| 1 | /* |
| 2 | * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
| 3 | * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ |
| 4 | * Copyright (C) 2013 Google, Inc. All Rights Reserved. |
| 5 | * |
| 6 | * Redistribution and use in source and binary forms, with or without |
| 7 | * modification, are permitted provided that the following conditions |
| 8 | * are met: |
| 9 | * 1. Redistributions of source code must retain the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer. |
| 11 | * 2. Redistributions in binary form must reproduce the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer in the |
| 13 | * documentation and/or other materials provided with the distribution. |
| 14 | * |
| 15 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 26 | */ |
| 27 | |
| 28 | #pragma once |
| 29 | |
| 30 | #include "SegmentedString.h" |
| 31 | #include <wtf/unicode/CharacterNames.h> |
| 32 | |
| 33 | namespace WebCore { |
| 34 | |
| 35 | // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream |
| 36 | template <typename Tokenizer> |
| 37 | class InputStreamPreprocessor { |
| 38 | public: |
| 39 | explicit InputStreamPreprocessor(Tokenizer& tokenizer) |
| 40 | : m_tokenizer(tokenizer) |
| 41 | { |
| 42 | } |
| 43 | |
| 44 | ALWAYS_INLINE UChar nextInputCharacter() const { return m_nextInputCharacter; } |
| 45 | |
| 46 | // Returns whether we succeeded in peeking at the next character. |
| 47 | // The only way we can fail to peek is if there are no more |
| 48 | // characters in |source| (after collapsing \r\n, etc). |
| 49 | ALWAYS_INLINE bool peek(SegmentedString& source, bool skipNullCharacters = false) |
| 50 | { |
| 51 | if (UNLIKELY(source.isEmpty())) |
| 52 | return false; |
| 53 | |
| 54 | m_nextInputCharacter = source.currentCharacter(); |
| 55 | |
| 56 | // Every branch in this function is expensive, so we have a |
| 57 | // fast-reject branch for characters that don't require special |
| 58 | // handling. Please run the parser benchmark whenever you touch |
| 59 | // this function. It's very hot. |
| 60 | constexpr UChar specialCharacterMask = '\n' | '\r' | '\0'; |
| 61 | if (LIKELY(m_nextInputCharacter & ~specialCharacterMask)) { |
| 62 | m_skipNextNewLine = false; |
| 63 | return true; |
| 64 | } |
| 65 | |
| 66 | return processNextInputCharacter(source, skipNullCharacters); |
| 67 | } |
| 68 | |
| 69 | // Returns whether there are more characters in |source| after advancing. |
| 70 | ALWAYS_INLINE bool advance(SegmentedString& source, bool skipNullCharacters = false) |
| 71 | { |
| 72 | source.advance(); |
| 73 | return peek(source, skipNullCharacters); |
| 74 | } |
| 75 | ALWAYS_INLINE bool advancePastNonNewline(SegmentedString& source, bool skipNullCharacters = false) |
| 76 | { |
| 77 | source.advancePastNonNewline(); |
| 78 | return peek(source, skipNullCharacters); |
| 79 | } |
| 80 | |
| 81 | private: |
| 82 | bool processNextInputCharacter(SegmentedString& source, bool skipNullCharacters) |
| 83 | { |
| 84 | ProcessAgain: |
| 85 | ASSERT(m_nextInputCharacter == source.currentCharacter()); |
| 86 | if (m_nextInputCharacter == '\n' && m_skipNextNewLine) { |
| 87 | m_skipNextNewLine = false; |
| 88 | source.advancePastNewline(); |
| 89 | if (source.isEmpty()) |
| 90 | return false; |
| 91 | m_nextInputCharacter = source.currentCharacter(); |
| 92 | } |
| 93 | if (m_nextInputCharacter == '\r') { |
| 94 | m_nextInputCharacter = '\n'; |
| 95 | m_skipNextNewLine = true; |
| 96 | return true; |
| 97 | } |
| 98 | m_skipNextNewLine = false; |
| 99 | if (m_nextInputCharacter || isAtEndOfFile(source)) |
| 100 | return true; |
| 101 | if (skipNullCharacters && !m_tokenizer.neverSkipNullCharacters()) { |
| 102 | source.advancePastNonNewline(); |
| 103 | if (source.isEmpty()) |
| 104 | return false; |
| 105 | m_nextInputCharacter = source.currentCharacter(); |
| 106 | goto ProcessAgain; |
| 107 | } |
| 108 | m_nextInputCharacter = replacementCharacter; |
| 109 | return true; |
| 110 | } |
| 111 | |
| 112 | static bool isAtEndOfFile(SegmentedString& source) |
| 113 | { |
| 114 | return source.isClosed() && source.length() == 1; |
| 115 | } |
| 116 | |
| 117 | Tokenizer& m_tokenizer; |
| 118 | |
| 119 | // http://www.whatwg.org/specs/web-apps/current-work/#next-input-character |
| 120 | UChar m_nextInputCharacter { 0 }; |
| 121 | bool m_skipNextNewLine { false }; |
| 122 | }; |
| 123 | |
| 124 | } // namespace WebCore |
| 125 | |