1/*
2 * Copyright (C) 2010-2017 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
17 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
20 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 */
24
25#include "config.h"
26#include "HTMLParserIdioms.h"
27
28#include "Decimal.h"
29#include "QualifiedName.h"
30#include <limits>
31#include <wtf/MathExtras.h>
32#include <wtf/Optional.h>
33#include <wtf/URL.h>
34#include <wtf/Vector.h>
35#include <wtf/dtoa.h>
36
37namespace WebCore {
38
39template <typename CharType>
40static String stripLeadingAndTrailingHTMLSpaces(String string, CharType characters, unsigned length)
41{
42 unsigned numLeadingSpaces = 0;
43 unsigned numTrailingSpaces = 0;
44
45 for (; numLeadingSpaces < length; ++numLeadingSpaces) {
46 if (isNotHTMLSpace(characters[numLeadingSpaces]))
47 break;
48 }
49
50 if (numLeadingSpaces == length)
51 return string.isNull() ? string : emptyAtom().string();
52
53 for (; numTrailingSpaces < length; ++numTrailingSpaces) {
54 if (isNotHTMLSpace(characters[length - numTrailingSpaces - 1]))
55 break;
56 }
57
58 ASSERT(numLeadingSpaces + numTrailingSpaces < length);
59
60 if (!(numLeadingSpaces | numTrailingSpaces))
61 return string;
62
63 return string.substring(numLeadingSpaces, length - (numLeadingSpaces + numTrailingSpaces));
64}
65
66String stripLeadingAndTrailingHTMLSpaces(const String& string)
67{
68 unsigned length = string.length();
69
70 if (!length)
71 return string.isNull() ? string : emptyAtom().string();
72
73 if (string.is8Bit())
74 return stripLeadingAndTrailingHTMLSpaces(string, string.characters8(), length);
75
76 return stripLeadingAndTrailingHTMLSpaces(string, string.characters16(), length);
77}
78
79String serializeForNumberType(const Decimal& number)
80{
81 if (number.isZero()) {
82 // Decimal::toString appends exponent, e.g. "0e-18"
83 return number.isNegative() ? "-0" : "0";
84 }
85 return number.toString();
86}
87
88String serializeForNumberType(double number)
89{
90 // According to HTML5, "the best representation of the number n as a floating
91 // point number" is a string produced by applying ToString() to n.
92 return String::numberToStringECMAScript(number);
93}
94
95Decimal parseToDecimalForNumberType(const String& string, const Decimal& fallbackValue)
96{
97 // See HTML5 2.5.4.3 `Real numbers.' and parseToDoubleForNumberType
98
99 // String::toDouble() accepts leading + and whitespace characters, which are not valid here.
100 const UChar firstCharacter = string[0];
101 if (firstCharacter != '-' && firstCharacter != '.' && !isASCIIDigit(firstCharacter))
102 return fallbackValue;
103
104 const Decimal value = Decimal::fromString(string);
105 if (!value.isFinite())
106 return fallbackValue;
107
108 // Numbers are considered finite IEEE 754 single-precision floating point values.
109 // See HTML5 2.5.4.3 `Real numbers.'
110 // FIXME: We should use numeric_limits<double>::max for number input type.
111 const Decimal floatMax = Decimal::fromDouble(std::numeric_limits<float>::max());
112 if (value < -floatMax || value > floatMax)
113 return fallbackValue;
114
115 // We return +0 for -0 case.
116 return value.isZero() ? Decimal(0) : value;
117}
118
119Decimal parseToDecimalForNumberType(const String& string)
120{
121 return parseToDecimalForNumberType(string, Decimal::nan());
122}
123
124double parseToDoubleForNumberType(const String& string, double fallbackValue)
125{
126 // See HTML5 2.5.4.3 `Real numbers.'
127
128 // String::toDouble() accepts leading + and whitespace characters, which are not valid here.
129 UChar firstCharacter = string[0];
130 if (firstCharacter != '-' && firstCharacter != '.' && !isASCIIDigit(firstCharacter))
131 return fallbackValue;
132
133 bool valid = false;
134 double value = string.toDouble(&valid);
135 if (!valid)
136 return fallbackValue;
137
138 // NaN and infinity are considered valid by String::toDouble, but not valid here.
139 if (!std::isfinite(value))
140 return fallbackValue;
141
142 // Numbers are considered finite IEEE 754 single-precision floating point values.
143 // See HTML5 2.5.4.3 `Real numbers.'
144 if (-std::numeric_limits<float>::max() > value || value > std::numeric_limits<float>::max())
145 return fallbackValue;
146
147 // The following expression converts -0 to +0.
148 return value ? value : 0;
149}
150
151double parseToDoubleForNumberType(const String& string)
152{
153 return parseToDoubleForNumberType(string, std::numeric_limits<double>::quiet_NaN());
154}
155
156template <typename CharacterType>
157static Expected<int, HTMLIntegerParsingError> parseHTMLIntegerInternal(const CharacterType* position, const CharacterType* end)
158{
159 while (position < end && isHTMLSpace(*position))
160 ++position;
161
162 if (position == end)
163 return makeUnexpected(HTMLIntegerParsingError::Other);
164
165 bool isNegative = false;
166 if (*position == '-') {
167 isNegative = true;
168 ++position;
169 } else if (*position == '+')
170 ++position;
171
172 if (position == end || !isASCIIDigit(*position))
173 return makeUnexpected(HTMLIntegerParsingError::Other);
174
175 constexpr int intMax = std::numeric_limits<int>::max();
176 constexpr int base = 10;
177 constexpr int maxMultiplier = intMax / base;
178
179 unsigned result = 0;
180 do {
181 int digitValue = *position - '0';
182
183 if (result > maxMultiplier || (result == maxMultiplier && digitValue > (intMax % base) + isNegative))
184 return makeUnexpected(isNegative ? HTMLIntegerParsingError::NegativeOverflow : HTMLIntegerParsingError::PositiveOverflow);
185
186 result = base * result + digitValue;
187 ++position;
188 } while (position < end && isASCIIDigit(*position));
189
190 return isNegative ? -result : result;
191}
192
193// https://html.spec.whatwg.org/multipage/infrastructure.html#rules-for-parsing-integers
194Expected<int, HTMLIntegerParsingError> parseHTMLInteger(StringView input)
195{
196 unsigned length = input.length();
197 if (!length)
198 return makeUnexpected(HTMLIntegerParsingError::Other);
199
200 if (LIKELY(input.is8Bit())) {
201 auto* start = input.characters8();
202 return parseHTMLIntegerInternal(start, start + length);
203 }
204
205 auto* start = input.characters16();
206 return parseHTMLIntegerInternal(start, start + length);
207}
208
209// https://html.spec.whatwg.org/multipage/infrastructure.html#rules-for-parsing-non-negative-integers
210Expected<unsigned, HTMLIntegerParsingError> parseHTMLNonNegativeInteger(StringView input)
211{
212 auto optionalSignedResult = parseHTMLInteger(input);
213 if (!optionalSignedResult)
214 return makeUnexpected(WTFMove(optionalSignedResult.error()));
215
216 if (optionalSignedResult.value() < 0)
217 return makeUnexpected(HTMLIntegerParsingError::NegativeOverflow);
218
219 return static_cast<unsigned>(optionalSignedResult.value());
220}
221
222template <typename CharacterType>
223static Optional<int> parseValidHTMLNonNegativeIntegerInternal(const CharacterType* position, const CharacterType* end)
224{
225 // A string is a valid non-negative integer if it consists of one or more ASCII digits.
226 for (auto* c = position; c < end; ++c) {
227 if (!isASCIIDigit(*c))
228 return WTF::nullopt;
229 }
230
231 auto optionalSignedValue = parseHTMLIntegerInternal(position, end);
232 if (!optionalSignedValue || optionalSignedValue.value() < 0)
233 return WTF::nullopt;
234
235 return optionalSignedValue.value();
236}
237
238// https://html.spec.whatwg.org/#valid-non-negative-integer
239Optional<int> parseValidHTMLNonNegativeInteger(StringView input)
240{
241 if (input.isEmpty())
242 return WTF::nullopt;
243
244 if (LIKELY(input.is8Bit())) {
245 auto* start = input.characters8();
246 return parseValidHTMLNonNegativeIntegerInternal(start, start + input.length());
247 }
248
249 auto* start = input.characters16();
250 return parseValidHTMLNonNegativeIntegerInternal(start, start + input.length());
251}
252
253template <typename CharacterType>
254static Optional<double> parseValidHTMLFloatingPointNumberInternal(const CharacterType* position, size_t length)
255{
256 ASSERT(length > 0);
257
258 // parseDouble() allows the string to start with a '+' or to end with a '.' but those
259 // are not valid floating point numbers as per HTML.
260 if (*position == '+' || *(position + length - 1) == '.')
261 return WTF::nullopt;
262
263 size_t parsedLength = 0;
264 double number = parseDouble(position, length, parsedLength);
265 return parsedLength == length && std::isfinite(number) ? number : Optional<double>();
266}
267
268// https://html.spec.whatwg.org/#valid-floating-point-number
269Optional<double> parseValidHTMLFloatingPointNumber(StringView input)
270{
271 if (input.isEmpty())
272 return WTF::nullopt;
273
274 if (LIKELY(input.is8Bit())) {
275 auto* start = input.characters8();
276 return parseValidHTMLFloatingPointNumberInternal(start, input.length());
277 }
278
279 auto* start = input.characters16();
280 return parseValidHTMLFloatingPointNumberInternal(start, input.length());
281}
282
283static inline bool isHTMLSpaceOrDelimiter(UChar character)
284{
285 return isHTMLSpace(character) || character == ',' || character == ';';
286}
287
288static inline bool isNumberStart(UChar character)
289{
290 return isASCIIDigit(character) || character == '.' || character == '-';
291}
292
293// https://html.spec.whatwg.org/multipage/infrastructure.html#rules-for-parsing-floating-point-number-values
294template <typename CharacterType>
295static Vector<double> parseHTMLListOfOfFloatingPointNumberValuesInternal(const CharacterType* position, const CharacterType* end)
296{
297 Vector<double> numbers;
298
299 // This skips past any leading delimiters.
300 while (position < end && isHTMLSpaceOrDelimiter(*position))
301 ++position;
302
303 while (position < end) {
304 // This skips past leading garbage.
305 while (position < end && !(isHTMLSpaceOrDelimiter(*position) || isNumberStart(*position)))
306 ++position;
307
308 const CharacterType* numberStart = position;
309 while (position < end && !isHTMLSpaceOrDelimiter(*position))
310 ++position;
311
312 size_t parsedLength = 0;
313 double number = parseDouble(numberStart, position - numberStart, parsedLength);
314 numbers.append(parsedLength > 0 && std::isfinite(number) ? number : 0);
315
316 // This skips past the delimiter.
317 while (position < end && isHTMLSpaceOrDelimiter(*position))
318 ++position;
319 }
320
321 return numbers;
322}
323
324Vector<double> parseHTMLListOfOfFloatingPointNumberValues(StringView input)
325{
326 if (LIKELY(input.is8Bit())) {
327 auto* start = input.characters8();
328 return parseHTMLListOfOfFloatingPointNumberValuesInternal(start, start + input.length());
329 }
330
331 auto* start = input.characters16();
332 return parseHTMLListOfOfFloatingPointNumberValuesInternal(start, start + input.length());
333}
334
335static bool threadSafeEqual(const StringImpl& a, const StringImpl& b)
336{
337 if (&a == &b)
338 return true;
339 if (a.hash() != b.hash())
340 return false;
341 return equal(a, b);
342}
343
344bool threadSafeMatch(const QualifiedName& a, const QualifiedName& b)
345{
346 return threadSafeEqual(*a.localName().impl(), *b.localName().impl());
347}
348
349String parseCORSSettingsAttribute(const AtomicString& value)
350{
351 if (value.isNull())
352 return String();
353 if (equalIgnoringASCIICase(value, "use-credentials"))
354 return "use-credentials"_s;
355 return "anonymous"_s;
356}
357
358// https://html.spec.whatwg.org/multipage/semantics.html#attr-meta-http-equiv-refresh
359template <typename CharacterType>
360static bool parseHTTPRefreshInternal(const CharacterType* position, const CharacterType* end, double& parsedDelay, String& parsedURL)
361{
362 while (position < end && isHTMLSpace(*position))
363 ++position;
364
365 unsigned time = 0;
366
367 const CharacterType* numberStart = position;
368 while (position < end && isASCIIDigit(*position))
369 ++position;
370
371 StringView timeString(numberStart, position - numberStart);
372 if (timeString.isEmpty()) {
373 if (position >= end || *position != '.')
374 return false;
375 } else {
376 auto optionalNumber = parseHTMLNonNegativeInteger(timeString);
377 if (!optionalNumber)
378 return false;
379 time = optionalNumber.value();
380 }
381
382 while (position < end && (isASCIIDigit(*position) || *position == '.'))
383 ++position;
384
385 if (position == end) {
386 parsedDelay = time;
387 return true;
388 }
389
390 if (*position != ';' && *position != ',' && !isHTMLSpace(*position))
391 return false;
392
393 parsedDelay = time;
394
395 while (position < end && isHTMLSpace(*position))
396 ++position;
397
398 if (position < end && (*position == ';' || *position == ','))
399 ++position;
400
401 while (position < end && isHTMLSpace(*position))
402 ++position;
403
404 if (position == end)
405 return true;
406
407 if (*position == 'U' || *position == 'u') {
408 StringView url(position, end - position);
409
410 ++position;
411
412 if (position < end && (*position == 'R' || *position == 'r'))
413 ++position;
414 else {
415 parsedURL = url.toString();
416 return true;
417 }
418
419 if (position < end && (*position == 'L' || *position == 'l'))
420 ++position;
421 else {
422 parsedURL = url.toString();
423 return true;
424 }
425
426 while (position < end && isHTMLSpace(*position))
427 ++position;
428
429 if (position < end && *position == '=')
430 ++position;
431 else {
432 parsedURL = url.toString();
433 return true;
434 }
435
436 while (position < end && isHTMLSpace(*position))
437 ++position;
438 }
439
440 CharacterType quote;
441 if (position < end && (*position == '\'' || *position == '"')) {
442 quote = *position;
443 ++position;
444 } else
445 quote = '\0';
446
447 StringView url(position, end - position);
448
449 if (quote != '\0') {
450 size_t index = url.find(quote);
451 if (index != notFound)
452 url = url.substring(0, index);
453 }
454
455 parsedURL = url.toString();
456 return true;
457}
458
459bool parseMetaHTTPEquivRefresh(const StringView& input, double& delay, String& url)
460{
461 if (LIKELY(input.is8Bit())) {
462 auto* start = input.characters8();
463 return parseHTTPRefreshInternal(start, start + input.length(), delay, url);
464 }
465
466 auto* start = input.characters16();
467 return parseHTTPRefreshInternal(start, start + input.length(), delay, url);
468}
469
470// https://html.spec.whatwg.org/#rules-for-parsing-a-hash-name-reference
471AtomicString parseHTMLHashNameReference(StringView usemap)
472{
473 size_t numberSignIndex = usemap.find('#');
474 if (numberSignIndex == notFound)
475 return nullAtom();
476 return usemap.substring(numberSignIndex + 1).toAtomicString();
477}
478
479}
480