1/*
2 * Copyright (c) 2013, Opera Software ASA. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * 3. Neither the name of Opera Software ASA nor the names of its
13 * contributors may be used to endorse or promote products derived
14 * from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
27 * OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#pragma once
31
32#include "ParsingUtilities.h"
33#include <wtf/text/WTFString.h>
34
35namespace WebCore {
36
37// Helper class for "scanning" an input string and performing parsing of
38// "micro-syntax"-like constructs.
39//
40// There's two primary operations: match and scan.
41//
42// The 'match' operation matches an explicitly or implicitly specified sequence
43// against the characters ahead of the current input pointer, and returns true
44// if the sequence can be matched.
45//
46// The 'scan' operation performs a 'match', and if the match is successful it
47// advance the input pointer past the matched sequence.
48class VTTScanner {
49 WTF_MAKE_NONCOPYABLE(VTTScanner);
50public:
51 explicit VTTScanner(const String& line);
52
53 typedef const LChar* Position;
54
55 class Run {
56 public:
57 Run(Position start, Position end, bool is8Bit)
58 : m_start(start), m_end(end), m_is8Bit(is8Bit) { }
59
60 Position start() const { return m_start; }
61 Position end() const { return m_end; }
62
63 bool isEmpty() const { return m_start == m_end; }
64 size_t length() const;
65
66 private:
67 Position m_start;
68 Position m_end;
69 bool m_is8Bit;
70 };
71
72 // Check if the input pointer points at the specified position.
73 bool isAt(Position checkPosition) const { return position() == checkPosition; }
74 // Check if the input pointer points at the end of the input.
75 bool isAtEnd() const { return position() == end(); }
76 // Match the character |c| against the character at the input pointer (~lookahead).
77 bool match(char c) const { return !isAtEnd() && currentChar() == c; }
78 // Scan the character |c|.
79 bool scan(char);
80 // Scan the first |charactersCount| characters of the string |characters|.
81 bool scan(const LChar* characters, size_t charactersCount);
82
83 // Scan the literal |characters|.
84 template<unsigned charactersCount>
85 bool scan(const char (&characters)[charactersCount]);
86
87 // Skip (advance the input pointer) as long as the specified
88 // |characterPredicate| returns true, and the input pointer is not passed
89 // the end of the input.
90 template<bool characterPredicate(UChar)>
91 void skipWhile();
92
93 // Like skipWhile, but using a negated predicate.
94 template<bool characterPredicate(UChar)>
95 void skipUntil();
96
97 // Return the run of characters for which the specified
98 // |characterPredicate| returns true. The start of the run will be the
99 // current input pointer.
100 template<bool characterPredicate(UChar)>
101 Run collectWhile();
102
103 // Like collectWhile, but using a negated predicate.
104 template<bool characterPredicate(UChar)>
105 Run collectUntil();
106
107 // Scan the string |toMatch|, using the specified |run| as the sequence to
108 // match against.
109 bool scanRun(const Run&, const String& toMatch);
110
111 // Skip to the end of the specified |run|.
112 void skipRun(const Run&);
113
114 // Return the String made up of the characters in |run|, and advance the
115 // input pointer to the end of the run.
116 String extractString(const Run&);
117
118 // Return a String constructed from the rest of the input (between input
119 // pointer and end of input), and advance the input pointer accordingly.
120 String restOfInputAsString();
121
122 // Scan a set of ASCII digits from the input. Return the number of digits
123 // scanned, and set |number| to the computed value. If the digits make up a
124 // number that does not fit the 'int' type, |number| is set to INT_MAX.
125 // Note: Does not handle sign.
126 unsigned scanDigits(int& number);
127
128 // Scan a floating point value on one of the forms: \d+\.? \d+\.\d+ \.\d+
129 bool scanFloat(float& number, bool* isNegative = nullptr);
130
131protected:
132 Position position() const { return m_data.characters8; }
133 Position end() const { return m_end.characters8; }
134 void seekTo(Position);
135 UChar currentChar() const;
136 void advance(unsigned amount = 1);
137 // Adapt a UChar-predicate to an LChar-predicate.
138 // (For use with skipWhile/Until from ParsingUtilities.h).
139 template<bool characterPredicate(UChar)>
140 static inline bool LCharPredicateAdapter(LChar c) { return characterPredicate(c); }
141 union {
142 const LChar* characters8;
143 const UChar* characters16;
144 } m_data;
145 union {
146 const LChar* characters8;
147 const UChar* characters16;
148 } m_end;
149 bool m_is8Bit;
150};
151
152inline size_t VTTScanner::Run::length() const
153{
154 if (m_is8Bit)
155 return m_end - m_start;
156 return reinterpret_cast<const UChar*>(m_end) - reinterpret_cast<const UChar*>(m_start);
157}
158
159template<unsigned charactersCount>
160inline bool VTTScanner::scan(const char (&characters)[charactersCount])
161{
162 return scan(reinterpret_cast<const LChar*>(characters), charactersCount - 1);
163}
164
165template<bool characterPredicate(UChar)>
166inline void VTTScanner::skipWhile()
167{
168 if (m_is8Bit)
169 WebCore::skipWhile<LChar, LCharPredicateAdapter<characterPredicate> >(m_data.characters8, m_end.characters8);
170 else
171 WebCore::skipWhile<UChar, characterPredicate>(m_data.characters16, m_end.characters16);
172}
173
174template<bool characterPredicate(UChar)>
175inline void VTTScanner::skipUntil()
176{
177 if (m_is8Bit)
178 WebCore::skipUntil<LChar, LCharPredicateAdapter<characterPredicate> >(m_data.characters8, m_end.characters8);
179 else
180 WebCore::skipUntil<UChar, characterPredicate>(m_data.characters16, m_end.characters16);
181}
182
183template<bool characterPredicate(UChar)>
184inline VTTScanner::Run VTTScanner::collectWhile()
185{
186 if (m_is8Bit) {
187 const LChar* current = m_data.characters8;
188 WebCore::skipWhile<LChar, LCharPredicateAdapter<characterPredicate> >(current, m_end.characters8);
189 return Run(position(), current, m_is8Bit);
190 }
191 const UChar* current = m_data.characters16;
192 WebCore::skipWhile<UChar, characterPredicate>(current, m_end.characters16);
193 return Run(position(), reinterpret_cast<Position>(current), m_is8Bit);
194}
195
196template<bool characterPredicate(UChar)>
197inline VTTScanner::Run VTTScanner::collectUntil()
198{
199 if (m_is8Bit) {
200 const LChar* current = m_data.characters8;
201 WebCore::skipUntil<LChar, LCharPredicateAdapter<characterPredicate> >(current, m_end.characters8);
202 return Run(position(), current, m_is8Bit);
203 }
204 const UChar* current = m_data.characters16;
205 WebCore::skipUntil<UChar, characterPredicate>(current, m_end.characters16);
206 return Run(position(), reinterpret_cast<Position>(current), m_is8Bit);
207}
208
209inline void VTTScanner::seekTo(Position position)
210{
211 ASSERT(position <= end());
212 m_data.characters8 = position;
213}
214
215inline UChar VTTScanner::currentChar() const
216{
217 ASSERT(position() < end());
218 return m_is8Bit ? *m_data.characters8 : *m_data.characters16;
219}
220
221inline void VTTScanner::advance(unsigned amount)
222{
223 ASSERT(position() < end());
224 if (m_is8Bit)
225 m_data.characters8 += amount;
226 else
227 m_data.characters16 += amount;
228}
229
230} // namespace WebCore
231