1/*
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4 * Copyright (C) 2013 Google, Inc. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#pragma once
29
30#include "SegmentedString.h"
31#include <wtf/unicode/CharacterNames.h>
32
33namespace WebCore {
34
35// http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
36template <typename Tokenizer>
37class InputStreamPreprocessor {
38public:
39 explicit InputStreamPreprocessor(Tokenizer& tokenizer)
40 : m_tokenizer(tokenizer)
41 {
42 }
43
44 ALWAYS_INLINE UChar nextInputCharacter() const { return m_nextInputCharacter; }
45
46 // Returns whether we succeeded in peeking at the next character.
47 // The only way we can fail to peek is if there are no more
48 // characters in |source| (after collapsing \r\n, etc).
49 ALWAYS_INLINE bool peek(SegmentedString& source, bool skipNullCharacters = false)
50 {
51 if (UNLIKELY(source.isEmpty()))
52 return false;
53
54 m_nextInputCharacter = source.currentCharacter();
55
56 // Every branch in this function is expensive, so we have a
57 // fast-reject branch for characters that don't require special
58 // handling. Please run the parser benchmark whenever you touch
59 // this function. It's very hot.
60 constexpr UChar specialCharacterMask = '\n' | '\r' | '\0';
61 if (LIKELY(m_nextInputCharacter & ~specialCharacterMask)) {
62 m_skipNextNewLine = false;
63 return true;
64 }
65
66 return processNextInputCharacter(source, skipNullCharacters);
67 }
68
69 // Returns whether there are more characters in |source| after advancing.
70 ALWAYS_INLINE bool advance(SegmentedString& source, bool skipNullCharacters = false)
71 {
72 source.advance();
73 return peek(source, skipNullCharacters);
74 }
75 ALWAYS_INLINE bool advancePastNonNewline(SegmentedString& source, bool skipNullCharacters = false)
76 {
77 source.advancePastNonNewline();
78 return peek(source, skipNullCharacters);
79 }
80
81private:
82 bool processNextInputCharacter(SegmentedString& source, bool skipNullCharacters)
83 {
84 ProcessAgain:
85 ASSERT(m_nextInputCharacter == source.currentCharacter());
86 if (m_nextInputCharacter == '\n' && m_skipNextNewLine) {
87 m_skipNextNewLine = false;
88 source.advancePastNewline();
89 if (source.isEmpty())
90 return false;
91 m_nextInputCharacter = source.currentCharacter();
92 }
93 if (m_nextInputCharacter == '\r') {
94 m_nextInputCharacter = '\n';
95 m_skipNextNewLine = true;
96 return true;
97 }
98 m_skipNextNewLine = false;
99 if (m_nextInputCharacter || isAtEndOfFile(source))
100 return true;
101 if (skipNullCharacters && !m_tokenizer.neverSkipNullCharacters()) {
102 source.advancePastNonNewline();
103 if (source.isEmpty())
104 return false;
105 m_nextInputCharacter = source.currentCharacter();
106 goto ProcessAgain;
107 }
108 m_nextInputCharacter = replacementCharacter;
109 return true;
110 }
111
112 static bool isAtEndOfFile(SegmentedString& source)
113 {
114 return source.isClosed() && source.length() == 1;
115 }
116
117 Tokenizer& m_tokenizer;
118
119 // http://www.whatwg.org/specs/web-apps/current-work/#next-input-character
120 UChar m_nextInputCharacter { 0 };
121 bool m_skipNextNewLine { false };
122};
123
124} // namespace WebCore
125