TextDecoder.cpp source code [webkit/Source/WebCore/dom/TextDecoder.cpp]

1	/*
2	* Copyright (C) 2016 Apple Inc. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	* 1. Redistributions of source code must retain the above copyright
8	* notice, this list of conditions and the following disclaimer.
9	* 2. Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	*
13	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
20	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23	*/
24
25	#include "config.h"
26	#include "TextDecoder.h"
27
28	#include "HTMLParserIdioms.h"
29	#include <wtf/Optional.h>
30
31	namespace WebCore {
32
33	ExceptionOr<Ref<TextDecoder>> TextDecoder::create(const String& label, Options options)
34	{
35	String strippedLabel = stripLeadingAndTrailingHTMLSpaces(label);
36	const UChar nullCharacter = `'\0'`;
37	if (strippedLabel.contains(nullCharacter))
38	return Exception { RangeError };
39	auto decoder = adoptRef(*new TextDecoder (strippedLabel.utf8().data(), options));
40	if (!decoder ->m_textEncoding.isValid() \|\| !strcmp(decoder ->m_textEncoding.name(), "replacement"))
41	return Exception { RangeError };
42	return decoder;
43	}
44
45	TextDecoder::TextDecoder(const char* label, Options options)
46	: m_textEncoding (label)
47	, m_options (options)
48	{
49	}
50
51	void TextDecoder::ignoreBOMIfNecessary(const uint8_t*& data, size_t& length)
52	{
53	const uint8_t utf8BOMBytes[`3`] = {`0xEF`, `0xBB`, `0xBF`};
54	const uint8_t utf16BEBOMBytes[`2`] = {`0xFE`, `0xFF`};
55	const uint8_t utf16LEBOMBytes[`2`] = {`0xFF`, `0xFE`};
56
57	if (m_textEncoding == UTF8Encoding()
58	&& length >= sizeof(utf8BOMBytes)
59	&& data[`0`] == utf8BOMBytes[`0`]
60	&& data[`1`] == utf8BOMBytes[`1`]
61	&& data[`2`] == utf8BOMBytes[`2`]) {
62	data += sizeof(utf8BOMBytes);
63	length -= sizeof(utf8BOMBytes);
64	} else if (m_textEncoding == UTF16BigEndianEncoding()
65	&& length >= sizeof(utf16BEBOMBytes)
66	&& data[`0`] == utf16BEBOMBytes[`0`]
67	&& data[`1`] == utf16BEBOMBytes[`1`]) {
68	data += sizeof(utf16BEBOMBytes);
69	length -= sizeof(utf16BEBOMBytes);
70	} else if (m_textEncoding == UTF16LittleEndianEncoding()
71	&& length >= sizeof(utf16LEBOMBytes)
72	&& data[`0`] == utf16LEBOMBytes[`0`]
73	&& data[`1`] == utf16LEBOMBytes[`1`]) {
74	data += sizeof(utf16LEBOMBytes);
75	length -= sizeof(utf16LEBOMBytes);
76	}
77	}
78
79	String TextDecoder::prependBOMIfNecessary(const String& decoded)
80	{
81	if (m_hasDecoded \|\| !m_options.ignoreBOM)
82	return decoded;
83	const UChar utf16BEBOM[`2`] = {`0xFEFF`, `'\0'`};
84
85	// FIXME: Make TextCodec::decode take a flag for prepending BOM so we don't need to do this extra allocation and copy.
86	return makeString(utf16BEBOM, decoded);
87	}
88
89	static size_t codeUnitByteSize(const TextEncoding& encoding)
90	{
91	return encoding.isByteBasedEncoding() ? `1` : `2`;
92	}
93
94	ExceptionOr<String> TextDecoder::decode(Optional<BufferSource::VariantType> input, DecodeOptions options)
95	{
96	Optional<BufferSource> inputBuffer;
97	const uint8_t* data = nullptr;
98	size_t length = `0`;
99	if (input) {
100	inputBuffer = BufferSource (WTFMove(input.value()));
101	data = inputBuffer ->data();
102	length = inputBuffer ->length();
103	}
104
105	ignoreBOMIfNecessary(data, length);
106
107	if (m_buffer.size()) {
108	m_buffer.append(data, length);
109	data = m_buffer.data();
110	length = m_buffer.size();
111	}
112
113	const bool stopOnError = true;
114	bool sawError = false;
115	if (length % codeUnitByteSize(m_textEncoding))
116	sawError = true;
117	const char* charData = reinterpret_cast<const char*>(data);
118	String result;
119	if (!sawError)
120	result = prependBOMIfNecessary(m_textEncoding.decode(charData, length, stopOnError, sawError));
121
122	if (sawError) {
123	if (options.stream) {
124	result = String ();
125	if (!m_buffer.size())
126	m_buffer.append(data, length);
127	} else {
128	if (m_options.fatal)
129	return Exception { TypeError };
130	result = prependBOMIfNecessary(m_textEncoding.decode(charData, length));
131	}
132	} else
133	m_buffer.clear();
134
135	m_hasDecoded = true;
136	return result;
137	}
138
139	String TextDecoder::encoding() const
140	{
141	return String (m_textEncoding.name()).convertToASCIILowercase();
142	}
143
144	}
145

Browse the source code of webkit/Source/WebCore/dom/TextDecoder.cpp