| 1 | /* |
| 2 | * Copyright (C) 2016 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions |
| 6 | * are met: |
| 7 | * 1. Redistributions of source code must retain the above copyright |
| 8 | * notice, this list of conditions and the following disclaimer. |
| 9 | * 2. Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * |
| 13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| 14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| 17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 19 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 20 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 21 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 22 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 23 | */ |
| 24 | |
| 25 | #include "config.h" |
| 26 | #include "TextDecoder.h" |
| 27 | |
| 28 | #include "HTMLParserIdioms.h" |
| 29 | #include <wtf/Optional.h> |
| 30 | |
| 31 | namespace WebCore { |
| 32 | |
| 33 | ExceptionOr<Ref<TextDecoder>> TextDecoder::create(const String& label, Options options) |
| 34 | { |
| 35 | String strippedLabel = stripLeadingAndTrailingHTMLSpaces(label); |
| 36 | const UChar nullCharacter = '\0'; |
| 37 | if (strippedLabel.contains(nullCharacter)) |
| 38 | return Exception { RangeError }; |
| 39 | auto decoder = adoptRef(*new TextDecoder(strippedLabel.utf8().data(), options)); |
| 40 | if (!decoder->m_textEncoding.isValid() || !strcmp(decoder->m_textEncoding.name(), "replacement" )) |
| 41 | return Exception { RangeError }; |
| 42 | return decoder; |
| 43 | } |
| 44 | |
| 45 | TextDecoder::TextDecoder(const char* label, Options options) |
| 46 | : m_textEncoding(label) |
| 47 | , m_options(options) |
| 48 | { |
| 49 | } |
| 50 | |
| 51 | void TextDecoder::ignoreBOMIfNecessary(const uint8_t*& data, size_t& length) |
| 52 | { |
| 53 | const uint8_t utf8BOMBytes[3] = {0xEF, 0xBB, 0xBF}; |
| 54 | const uint8_t utf16BEBOMBytes[2] = {0xFE, 0xFF}; |
| 55 | const uint8_t utf16LEBOMBytes[2] = {0xFF, 0xFE}; |
| 56 | |
| 57 | if (m_textEncoding == UTF8Encoding() |
| 58 | && length >= sizeof(utf8BOMBytes) |
| 59 | && data[0] == utf8BOMBytes[0] |
| 60 | && data[1] == utf8BOMBytes[1] |
| 61 | && data[2] == utf8BOMBytes[2]) { |
| 62 | data += sizeof(utf8BOMBytes); |
| 63 | length -= sizeof(utf8BOMBytes); |
| 64 | } else if (m_textEncoding == UTF16BigEndianEncoding() |
| 65 | && length >= sizeof(utf16BEBOMBytes) |
| 66 | && data[0] == utf16BEBOMBytes[0] |
| 67 | && data[1] == utf16BEBOMBytes[1]) { |
| 68 | data += sizeof(utf16BEBOMBytes); |
| 69 | length -= sizeof(utf16BEBOMBytes); |
| 70 | } else if (m_textEncoding == UTF16LittleEndianEncoding() |
| 71 | && length >= sizeof(utf16LEBOMBytes) |
| 72 | && data[0] == utf16LEBOMBytes[0] |
| 73 | && data[1] == utf16LEBOMBytes[1]) { |
| 74 | data += sizeof(utf16LEBOMBytes); |
| 75 | length -= sizeof(utf16LEBOMBytes); |
| 76 | } |
| 77 | } |
| 78 | |
| 79 | String TextDecoder::prependBOMIfNecessary(const String& decoded) |
| 80 | { |
| 81 | if (m_hasDecoded || !m_options.ignoreBOM) |
| 82 | return decoded; |
| 83 | const UChar utf16BEBOM[2] = {0xFEFF, '\0'}; |
| 84 | |
| 85 | // FIXME: Make TextCodec::decode take a flag for prepending BOM so we don't need to do this extra allocation and copy. |
| 86 | return makeString(utf16BEBOM, decoded); |
| 87 | } |
| 88 | |
| 89 | static size_t codeUnitByteSize(const TextEncoding& encoding) |
| 90 | { |
| 91 | return encoding.isByteBasedEncoding() ? 1 : 2; |
| 92 | } |
| 93 | |
| 94 | ExceptionOr<String> TextDecoder::decode(Optional<BufferSource::VariantType> input, DecodeOptions options) |
| 95 | { |
| 96 | Optional<BufferSource> inputBuffer; |
| 97 | const uint8_t* data = nullptr; |
| 98 | size_t length = 0; |
| 99 | if (input) { |
| 100 | inputBuffer = BufferSource(WTFMove(input.value())); |
| 101 | data = inputBuffer->data(); |
| 102 | length = inputBuffer->length(); |
| 103 | } |
| 104 | |
| 105 | ignoreBOMIfNecessary(data, length); |
| 106 | |
| 107 | if (m_buffer.size()) { |
| 108 | m_buffer.append(data, length); |
| 109 | data = m_buffer.data(); |
| 110 | length = m_buffer.size(); |
| 111 | } |
| 112 | |
| 113 | const bool stopOnError = true; |
| 114 | bool sawError = false; |
| 115 | if (length % codeUnitByteSize(m_textEncoding)) |
| 116 | sawError = true; |
| 117 | const char* charData = reinterpret_cast<const char*>(data); |
| 118 | String result; |
| 119 | if (!sawError) |
| 120 | result = prependBOMIfNecessary(m_textEncoding.decode(charData, length, stopOnError, sawError)); |
| 121 | |
| 122 | if (sawError) { |
| 123 | if (options.stream) { |
| 124 | result = String(); |
| 125 | if (!m_buffer.size()) |
| 126 | m_buffer.append(data, length); |
| 127 | } else { |
| 128 | if (m_options.fatal) |
| 129 | return Exception { TypeError }; |
| 130 | result = prependBOMIfNecessary(m_textEncoding.decode(charData, length)); |
| 131 | } |
| 132 | } else |
| 133 | m_buffer.clear(); |
| 134 | |
| 135 | m_hasDecoded = true; |
| 136 | return result; |
| 137 | } |
| 138 | |
| 139 | String TextDecoder::encoding() const |
| 140 | { |
| 141 | return String(m_textEncoding.name()).convertToASCIILowercase(); |
| 142 | } |
| 143 | |
| 144 | } |
| 145 | |