| 1 | /* |
| 2 | Copyright (C) 2004-2016 Apple Inc. All rights reserved. |
| 3 | |
| 4 | This library is free software; you can redistribute it and/or |
| 5 | modify it under the terms of the GNU Library General Public |
| 6 | License as published by the Free Software Foundation; either |
| 7 | version 2 of the License, or (at your option) any later version. |
| 8 | |
| 9 | This library is distributed in the hope that it will be useful, |
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 12 | Library General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU Library General Public License |
| 15 | along with this library; see the file COPYING.LIB. If not, write to |
| 16 | the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 17 | Boston, MA 02110-1301, USA. |
| 18 | */ |
| 19 | |
| 20 | #pragma once |
| 21 | |
| 22 | #include <wtf/Deque.h> |
| 23 | #include <wtf/text/WTFString.h> |
| 24 | |
| 25 | namespace WebCore { |
| 26 | |
| 27 | // FIXME: This should not start with "k". |
| 28 | // FIXME: This is a shared tokenizer concept, not a SegmentedString concept, but this is the only common header for now. |
| 29 | constexpr LChar kEndOfFileMarker = 0; |
| 30 | |
| 31 | class SegmentedString { |
| 32 | public: |
| 33 | SegmentedString() = default; |
| 34 | SegmentedString(String&&); |
| 35 | SegmentedString(const String&); |
| 36 | |
| 37 | SegmentedString(SegmentedString&&) = delete; |
| 38 | SegmentedString(const SegmentedString&) = delete; |
| 39 | |
| 40 | SegmentedString& operator=(SegmentedString&&); |
| 41 | SegmentedString& operator=(const SegmentedString&) = default; |
| 42 | |
| 43 | void clear(); |
| 44 | void close(); |
| 45 | |
| 46 | void append(SegmentedString&&); |
| 47 | void append(const SegmentedString&); |
| 48 | |
| 49 | void append(String&&); |
| 50 | void append(const String&); |
| 51 | |
| 52 | void pushBack(String&&); |
| 53 | |
| 54 | void setExcludeLineNumbers(); |
| 55 | |
| 56 | bool isEmpty() const { return !m_currentSubstring.length; } |
| 57 | unsigned length() const; |
| 58 | |
| 59 | bool isClosed() const { return m_isClosed; } |
| 60 | |
| 61 | void advance(); |
| 62 | void advancePastNonNewline(); // Faster than calling advance when we know the current character is not a newline. |
| 63 | void advancePastNewline(); // Faster than calling advance when we know the current character is a newline. |
| 64 | |
| 65 | enum AdvancePastResult { DidNotMatch, DidMatch, NotEnoughCharacters }; |
| 66 | template<unsigned length> AdvancePastResult advancePast(const char (&literal)[length]) { return advancePast<length, false>(literal); } |
| 67 | template<unsigned length> AdvancePastResult advancePastLettersIgnoringASCIICase(const char (&literal)[length]) { return advancePast<length, true>(literal); } |
| 68 | |
| 69 | unsigned numberOfCharactersConsumed() const; |
| 70 | |
| 71 | String toString() const; |
| 72 | |
| 73 | UChar currentCharacter() const { return m_currentCharacter; } |
| 74 | |
| 75 | OrdinalNumber currentColumn() const; |
| 76 | OrdinalNumber currentLine() const; |
| 77 | |
| 78 | // Sets value of line/column variables. Column is specified indirectly by a parameter columnAfterProlog |
| 79 | // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed. |
| 80 | void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAfterProlog, int prologLength); |
| 81 | |
| 82 | private: |
| 83 | struct Substring { |
| 84 | Substring() = default; |
| 85 | Substring(String&&); |
| 86 | |
| 87 | UChar currentCharacter() const; |
| 88 | UChar currentCharacterPreIncrement(); |
| 89 | |
| 90 | unsigned numberOfCharactersConsumed() const; |
| 91 | void appendTo(StringBuilder&) const; |
| 92 | |
| 93 | String string; |
| 94 | unsigned length { 0 }; |
| 95 | bool is8Bit; |
| 96 | union { |
| 97 | const LChar* currentCharacter8; |
| 98 | const UChar* currentCharacter16; |
| 99 | }; |
| 100 | bool doNotExcludeLineNumbers { true }; |
| 101 | }; |
| 102 | |
| 103 | enum FastPathFlags { |
| 104 | NoFastPath = 0, |
| 105 | Use8BitAdvanceAndUpdateLineNumbers = 1 << 0, |
| 106 | Use8BitAdvance = 1 << 1, |
| 107 | }; |
| 108 | |
| 109 | void appendSubstring(Substring&&); |
| 110 | |
| 111 | void processPossibleNewline(); |
| 112 | void startNewLine(); |
| 113 | |
| 114 | void advanceWithoutUpdatingLineNumber(); |
| 115 | void advanceWithoutUpdatingLineNumber16(); |
| 116 | void advanceAndUpdateLineNumber16(); |
| 117 | void advancePastSingleCharacterSubstringWithoutUpdatingLineNumber(); |
| 118 | void advancePastSingleCharacterSubstring(); |
| 119 | void advanceEmpty(); |
| 120 | |
| 121 | void updateAdvanceFunctionPointers(); |
| 122 | void updateAdvanceFunctionPointersForEmptyString(); |
| 123 | void updateAdvanceFunctionPointersForSingleCharacterSubstring(); |
| 124 | |
| 125 | void decrementAndCheckLength(); |
| 126 | |
| 127 | template<typename CharacterType> static bool characterMismatch(CharacterType, char, bool lettersIgnoringASCIICase); |
| 128 | template<unsigned length, bool lettersIgnoringASCIICase> AdvancePastResult advancePast(const char (&literal)[length]); |
| 129 | AdvancePastResult advancePastSlowCase(const char* literal, bool lettersIgnoringASCIICase); |
| 130 | |
| 131 | Substring m_currentSubstring; |
| 132 | Deque<Substring> m_otherSubstrings; |
| 133 | |
| 134 | bool m_isClosed { false }; |
| 135 | |
| 136 | UChar m_currentCharacter { 0 }; |
| 137 | |
| 138 | unsigned m_numberOfCharactersConsumedPriorToCurrentSubstring { 0 }; |
| 139 | unsigned m_numberOfCharactersConsumedPriorToCurrentLine { 0 }; |
| 140 | int m_currentLine { 0 }; |
| 141 | |
| 142 | unsigned char m_fastPathFlags { NoFastPath }; |
| 143 | void (SegmentedString::*m_advanceWithoutUpdatingLineNumberFunction)() { &SegmentedString::advanceEmpty }; |
| 144 | void (SegmentedString::*m_advanceAndUpdateLineNumberFunction)() { &SegmentedString::advanceEmpty }; |
| 145 | }; |
| 146 | |
| 147 | inline SegmentedString::Substring::Substring(String&& passedString) |
| 148 | : string(WTFMove(passedString)) |
| 149 | , length(string.length()) |
| 150 | { |
| 151 | if (length) { |
| 152 | is8Bit = string.impl()->is8Bit(); |
| 153 | if (is8Bit) |
| 154 | currentCharacter8 = string.impl()->characters8(); |
| 155 | else |
| 156 | currentCharacter16 = string.impl()->characters16(); |
| 157 | } |
| 158 | } |
| 159 | |
| 160 | inline unsigned SegmentedString::Substring::numberOfCharactersConsumed() const |
| 161 | { |
| 162 | return string.length() - length; |
| 163 | } |
| 164 | |
| 165 | ALWAYS_INLINE UChar SegmentedString::Substring::currentCharacter() const |
| 166 | { |
| 167 | ASSERT(length); |
| 168 | return is8Bit ? *currentCharacter8 : *currentCharacter16; |
| 169 | } |
| 170 | |
| 171 | ALWAYS_INLINE UChar SegmentedString::Substring::currentCharacterPreIncrement() |
| 172 | { |
| 173 | ASSERT(length); |
| 174 | return is8Bit ? *++currentCharacter8 : *++currentCharacter16; |
| 175 | } |
| 176 | |
| 177 | inline SegmentedString::SegmentedString(String&& string) |
| 178 | : m_currentSubstring(WTFMove(string)) |
| 179 | { |
| 180 | if (m_currentSubstring.length) { |
| 181 | m_currentCharacter = m_currentSubstring.currentCharacter(); |
| 182 | updateAdvanceFunctionPointers(); |
| 183 | } |
| 184 | } |
| 185 | |
| 186 | inline SegmentedString::SegmentedString(const String& string) |
| 187 | : SegmentedString(String { string }) |
| 188 | { |
| 189 | } |
| 190 | |
| 191 | ALWAYS_INLINE void SegmentedString::decrementAndCheckLength() |
| 192 | { |
| 193 | ASSERT(m_currentSubstring.length > 1); |
| 194 | if (UNLIKELY(--m_currentSubstring.length == 1)) |
| 195 | updateAdvanceFunctionPointersForSingleCharacterSubstring(); |
| 196 | } |
| 197 | |
| 198 | ALWAYS_INLINE void SegmentedString::advanceWithoutUpdatingLineNumber() |
| 199 | { |
| 200 | if (LIKELY(m_fastPathFlags & Use8BitAdvance)) { |
| 201 | m_currentCharacter = *++m_currentSubstring.currentCharacter8; |
| 202 | decrementAndCheckLength(); |
| 203 | return; |
| 204 | } |
| 205 | |
| 206 | (this->*m_advanceWithoutUpdatingLineNumberFunction)(); |
| 207 | } |
| 208 | |
| 209 | inline void SegmentedString::startNewLine() |
| 210 | { |
| 211 | ++m_currentLine; |
| 212 | m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed(); |
| 213 | } |
| 214 | |
| 215 | inline void SegmentedString::processPossibleNewline() |
| 216 | { |
| 217 | if (m_currentCharacter == '\n') |
| 218 | startNewLine(); |
| 219 | } |
| 220 | |
| 221 | inline void SegmentedString::advance() |
| 222 | { |
| 223 | if (LIKELY(m_fastPathFlags & Use8BitAdvance)) { |
| 224 | ASSERT(m_currentSubstring.length > 1); |
| 225 | bool lastCharacterWasNewline = m_currentCharacter == '\n'; |
| 226 | m_currentCharacter = *++m_currentSubstring.currentCharacter8; |
| 227 | bool haveOneCharacterLeft = --m_currentSubstring.length == 1; |
| 228 | if (LIKELY(!(lastCharacterWasNewline | haveOneCharacterLeft))) |
| 229 | return; |
| 230 | if (lastCharacterWasNewline & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers)) |
| 231 | startNewLine(); |
| 232 | if (haveOneCharacterLeft) |
| 233 | updateAdvanceFunctionPointersForSingleCharacterSubstring(); |
| 234 | return; |
| 235 | } |
| 236 | |
| 237 | (this->*m_advanceAndUpdateLineNumberFunction)(); |
| 238 | } |
| 239 | |
| 240 | ALWAYS_INLINE void SegmentedString::advancePastNonNewline() |
| 241 | { |
| 242 | ASSERT(m_currentCharacter != '\n'); |
| 243 | advanceWithoutUpdatingLineNumber(); |
| 244 | } |
| 245 | |
| 246 | inline void SegmentedString::advancePastNewline() |
| 247 | { |
| 248 | ASSERT(m_currentCharacter == '\n'); |
| 249 | if (m_currentSubstring.length > 1) { |
| 250 | if (m_currentSubstring.doNotExcludeLineNumbers) |
| 251 | startNewLine(); |
| 252 | m_currentCharacter = m_currentSubstring.currentCharacterPreIncrement(); |
| 253 | decrementAndCheckLength(); |
| 254 | return; |
| 255 | } |
| 256 | |
| 257 | (this->*m_advanceAndUpdateLineNumberFunction)(); |
| 258 | } |
| 259 | |
| 260 | inline unsigned SegmentedString::numberOfCharactersConsumed() const |
| 261 | { |
| 262 | return m_numberOfCharactersConsumedPriorToCurrentSubstring + m_currentSubstring.numberOfCharactersConsumed(); |
| 263 | } |
| 264 | |
| 265 | template<typename CharacterType> ALWAYS_INLINE bool SegmentedString::characterMismatch(CharacterType a, char b, bool lettersIgnoringASCIICase) |
| 266 | { |
| 267 | return lettersIgnoringASCIICase ? !isASCIIAlphaCaselessEqual(a, b) : a != b; |
| 268 | } |
| 269 | |
| 270 | template<unsigned lengthIncludingTerminator, bool lettersIgnoringASCIICase> SegmentedString::AdvancePastResult SegmentedString::advancePast(const char (&literal)[lengthIncludingTerminator]) |
| 271 | { |
| 272 | constexpr unsigned length = lengthIncludingTerminator - 1; |
| 273 | ASSERT(!literal[length]); |
| 274 | ASSERT(!strchr(literal, '\n')); |
| 275 | if (length + 1 < m_currentSubstring.length) { |
| 276 | if (m_currentSubstring.is8Bit) { |
| 277 | for (unsigned i = 0; i < length; ++i) { |
| 278 | if (characterMismatch(m_currentSubstring.currentCharacter8[i], literal[i], lettersIgnoringASCIICase)) |
| 279 | return DidNotMatch; |
| 280 | } |
| 281 | m_currentSubstring.currentCharacter8 += length; |
| 282 | m_currentCharacter = *m_currentSubstring.currentCharacter8; |
| 283 | } else { |
| 284 | for (unsigned i = 0; i < length; ++i) { |
| 285 | if (characterMismatch(m_currentSubstring.currentCharacter16[i], literal[i], lettersIgnoringASCIICase)) |
| 286 | return DidNotMatch; |
| 287 | } |
| 288 | m_currentSubstring.currentCharacter16 += length; |
| 289 | m_currentCharacter = *m_currentSubstring.currentCharacter16; |
| 290 | } |
| 291 | m_currentSubstring.length -= length; |
| 292 | return DidMatch; |
| 293 | } |
| 294 | return advancePastSlowCase(literal, lettersIgnoringASCIICase); |
| 295 | } |
| 296 | |
| 297 | inline void SegmentedString::updateAdvanceFunctionPointers() |
| 298 | { |
| 299 | if (m_currentSubstring.length > 1) { |
| 300 | if (m_currentSubstring.is8Bit) { |
| 301 | m_fastPathFlags = Use8BitAdvance; |
| 302 | if (m_currentSubstring.doNotExcludeLineNumbers) |
| 303 | m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers; |
| 304 | return; |
| 305 | } |
| 306 | m_fastPathFlags = NoFastPath; |
| 307 | m_advanceWithoutUpdatingLineNumberFunction = &SegmentedString::advanceWithoutUpdatingLineNumber16; |
| 308 | if (m_currentSubstring.doNotExcludeLineNumbers) |
| 309 | m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceAndUpdateLineNumber16; |
| 310 | else |
| 311 | m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceWithoutUpdatingLineNumber16; |
| 312 | return; |
| 313 | } |
| 314 | |
| 315 | if (!m_currentSubstring.length) { |
| 316 | updateAdvanceFunctionPointersForEmptyString(); |
| 317 | return; |
| 318 | } |
| 319 | |
| 320 | updateAdvanceFunctionPointersForSingleCharacterSubstring(); |
| 321 | } |
| 322 | |
| 323 | } |
| 324 | |