1/*
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#pragma once
28
29#include <wtf/text/StringBuilder.h>
30
31namespace WebCore {
32
33inline void unconsumeCharacters(SegmentedString& source, StringBuilder& consumedCharacters)
34{
35 source.pushBack(consumedCharacters.toString());
36}
37
38template <typename ParserFunctions>
39bool consumeCharacterReference(SegmentedString& source, StringBuilder& decodedCharacter, bool& notEnoughCharacters, UChar additionalAllowedCharacter)
40{
41 ASSERT(!additionalAllowedCharacter || additionalAllowedCharacter == '"' || additionalAllowedCharacter == '\'' || additionalAllowedCharacter == '>');
42 ASSERT(!notEnoughCharacters);
43 ASSERT(decodedCharacter.isEmpty());
44
45 enum {
46 Initial,
47 Number,
48 MaybeHexLowerCaseX,
49 MaybeHexUpperCaseX,
50 Hex,
51 Decimal,
52 Named
53 } state = Initial;
54 UChar32 result = 0;
55 bool overflow = false;
56 StringBuilder consumedCharacters;
57
58 while (!source.isEmpty()) {
59 UChar character = source.currentCharacter();
60 switch (state) {
61 case Initial:
62 if (character == '\x09' || character == '\x0A' || character == '\x0C' || character == ' ' || character == '<' || character == '&')
63 return false;
64 if (additionalAllowedCharacter && character == additionalAllowedCharacter)
65 return false;
66 if (character == '#') {
67 state = Number;
68 break;
69 }
70 if (isASCIIAlpha(character)) {
71 state = Named;
72 goto Named;
73 }
74 return false;
75 case Number:
76 if (character == 'x') {
77 state = MaybeHexLowerCaseX;
78 break;
79 }
80 if (character == 'X') {
81 state = MaybeHexUpperCaseX;
82 break;
83 }
84 if (isASCIIDigit(character)) {
85 state = Decimal;
86 goto Decimal;
87 }
88 source.pushBack("#"_s);
89 return false;
90 case MaybeHexLowerCaseX:
91 if (isASCIIHexDigit(character)) {
92 state = Hex;
93 goto Hex;
94 }
95 source.pushBack("#x"_s);
96 return false;
97 case MaybeHexUpperCaseX:
98 if (isASCIIHexDigit(character)) {
99 state = Hex;
100 goto Hex;
101 }
102 source.pushBack("#X"_s);
103 return false;
104 case Hex:
105 Hex:
106 if (isASCIIHexDigit(character)) {
107 result = result * 16 + toASCIIHexValue(character);
108 if (result > UCHAR_MAX_VALUE)
109 overflow = true;
110 break;
111 }
112 if (character == ';') {
113 source.advancePastNonNewline();
114 decodedCharacter.append(ParserFunctions::legalEntityFor(overflow ? 0 : result));
115 return true;
116 }
117 if (ParserFunctions::acceptMalformed()) {
118 decodedCharacter.append(ParserFunctions::legalEntityFor(overflow ? 0 : result));
119 return true;
120 }
121 unconsumeCharacters(source, consumedCharacters);
122 return false;
123 case Decimal:
124 Decimal:
125 if (isASCIIDigit(character)) {
126 result = result * 10 + character - '0';
127 if (result > UCHAR_MAX_VALUE)
128 overflow = true;
129 break;
130 }
131 if (character == ';') {
132 source.advancePastNonNewline();
133 decodedCharacter.append(ParserFunctions::legalEntityFor(overflow ? 0 : result));
134 return true;
135 }
136 if (ParserFunctions::acceptMalformed()) {
137 decodedCharacter.append(ParserFunctions::legalEntityFor(overflow ? 0 : result));
138 return true;
139 }
140 unconsumeCharacters(source, consumedCharacters);
141 return false;
142 case Named:
143 Named:
144 return ParserFunctions::consumeNamedEntity(source, decodedCharacter, notEnoughCharacters, additionalAllowedCharacter, character);
145 }
146 consumedCharacters.append(character);
147 source.advancePastNonNewline();
148 }
149 ASSERT(source.isEmpty());
150 notEnoughCharacters = true;
151 unconsumeCharacters(source, consumedCharacters);
152 return false;
153}
154
155} // namespace WebCore
156