1 | /* |
2 | * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
3 | * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ |
4 | * Copyright (C) 2013 Google, Inc. All Rights Reserved. |
5 | * |
6 | * Redistribution and use in source and binary forms, with or without |
7 | * modification, are permitted provided that the following conditions |
8 | * are met: |
9 | * 1. Redistributions of source code must retain the above copyright |
10 | * notice, this list of conditions and the following disclaimer. |
11 | * 2. Redistributions in binary form must reproduce the above copyright |
12 | * notice, this list of conditions and the following disclaimer in the |
13 | * documentation and/or other materials provided with the distribution. |
14 | * |
15 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | */ |
27 | |
28 | #pragma once |
29 | |
30 | #include "SegmentedString.h" |
31 | #include <wtf/unicode/CharacterNames.h> |
32 | |
33 | namespace WebCore { |
34 | |
35 | // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream |
36 | template <typename Tokenizer> |
37 | class InputStreamPreprocessor { |
38 | public: |
39 | explicit InputStreamPreprocessor(Tokenizer& tokenizer) |
40 | : m_tokenizer(tokenizer) |
41 | { |
42 | } |
43 | |
44 | ALWAYS_INLINE UChar nextInputCharacter() const { return m_nextInputCharacter; } |
45 | |
46 | // Returns whether we succeeded in peeking at the next character. |
47 | // The only way we can fail to peek is if there are no more |
48 | // characters in |source| (after collapsing \r\n, etc). |
49 | ALWAYS_INLINE bool peek(SegmentedString& source, bool skipNullCharacters = false) |
50 | { |
51 | if (UNLIKELY(source.isEmpty())) |
52 | return false; |
53 | |
54 | m_nextInputCharacter = source.currentCharacter(); |
55 | |
56 | // Every branch in this function is expensive, so we have a |
57 | // fast-reject branch for characters that don't require special |
58 | // handling. Please run the parser benchmark whenever you touch |
59 | // this function. It's very hot. |
60 | constexpr UChar specialCharacterMask = '\n' | '\r' | '\0'; |
61 | if (LIKELY(m_nextInputCharacter & ~specialCharacterMask)) { |
62 | m_skipNextNewLine = false; |
63 | return true; |
64 | } |
65 | |
66 | return processNextInputCharacter(source, skipNullCharacters); |
67 | } |
68 | |
69 | // Returns whether there are more characters in |source| after advancing. |
70 | ALWAYS_INLINE bool advance(SegmentedString& source, bool skipNullCharacters = false) |
71 | { |
72 | source.advance(); |
73 | return peek(source, skipNullCharacters); |
74 | } |
75 | ALWAYS_INLINE bool advancePastNonNewline(SegmentedString& source, bool skipNullCharacters = false) |
76 | { |
77 | source.advancePastNonNewline(); |
78 | return peek(source, skipNullCharacters); |
79 | } |
80 | |
81 | private: |
82 | bool processNextInputCharacter(SegmentedString& source, bool skipNullCharacters) |
83 | { |
84 | ProcessAgain: |
85 | ASSERT(m_nextInputCharacter == source.currentCharacter()); |
86 | if (m_nextInputCharacter == '\n' && m_skipNextNewLine) { |
87 | m_skipNextNewLine = false; |
88 | source.advancePastNewline(); |
89 | if (source.isEmpty()) |
90 | return false; |
91 | m_nextInputCharacter = source.currentCharacter(); |
92 | } |
93 | if (m_nextInputCharacter == '\r') { |
94 | m_nextInputCharacter = '\n'; |
95 | m_skipNextNewLine = true; |
96 | return true; |
97 | } |
98 | m_skipNextNewLine = false; |
99 | if (m_nextInputCharacter || isAtEndOfFile(source)) |
100 | return true; |
101 | if (skipNullCharacters && !m_tokenizer.neverSkipNullCharacters()) { |
102 | source.advancePastNonNewline(); |
103 | if (source.isEmpty()) |
104 | return false; |
105 | m_nextInputCharacter = source.currentCharacter(); |
106 | goto ProcessAgain; |
107 | } |
108 | m_nextInputCharacter = replacementCharacter; |
109 | return true; |
110 | } |
111 | |
112 | static bool isAtEndOfFile(SegmentedString& source) |
113 | { |
114 | return source.isClosed() && source.length() == 1; |
115 | } |
116 | |
117 | Tokenizer& m_tokenizer; |
118 | |
119 | // http://www.whatwg.org/specs/web-apps/current-work/#next-input-character |
120 | UChar m_nextInputCharacter { 0 }; |
121 | bool m_skipNextNewLine { false }; |
122 | }; |
123 | |
124 | } // namespace WebCore |
125 | |