1/*
2 * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
3 * Copyright (c) 2007, 2008, 2016 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile, Inc.
5 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 *
21 */
22
23#pragma once
24
25#include "RegExp.h"
26#include "JSCInlines.h"
27#include "Yarr.h"
28#include "YarrInterpreter.h"
29#include "YarrJIT.h"
30
31#define REGEXP_FUNC_TEST_DATA_GEN 0
32
33#if REGEXP_FUNC_TEST_DATA_GEN
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37#endif
38
39namespace JSC {
40
41#if REGEXP_FUNC_TEST_DATA_GEN
42class RegExpFunctionalTestCollector {
43 // This class is not thread safe.
44protected:
45 static const char* const s_fileName;
46
47public:
48 static RegExpFunctionalTestCollector* get();
49
50 ~RegExpFunctionalTestCollector();
51
52 void outputOneTest(RegExp*, String, int, int*, int);
53 void clearRegExp(RegExp* regExp)
54 {
55 if (regExp == m_lastRegExp)
56 m_lastRegExp = 0;
57 }
58
59private:
60 RegExpFunctionalTestCollector();
61
62 void outputEscapedString(const String&, bool escapeSlash = false);
63
64 static RegExpFunctionalTestCollector* s_instance;
65 FILE* m_file;
66 RegExp* m_lastRegExp;
67};
68#endif // REGEXP_FUNC_TEST_DATA_GEN
69
70ALWAYS_INLINE bool RegExp::hasCodeFor(Yarr::YarrCharSize charSize)
71{
72 if (hasCode()) {
73#if ENABLE(YARR_JIT)
74 if (m_state != JITCode)
75 return true;
76 ASSERT(m_regExpJITCode);
77 if ((charSize == Yarr::Char8) && (m_regExpJITCode->has8BitCode()))
78 return true;
79 if ((charSize == Yarr::Char16) && (m_regExpJITCode->has16BitCode()))
80 return true;
81#else
82 UNUSED_PARAM(charSize);
83 return true;
84#endif
85 }
86 return false;
87}
88
89#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
90class PatternContextBufferHolder {
91public:
92 PatternContextBufferHolder(VM& vm, bool needBuffer)
93 : m_vm(vm)
94 , m_needBuffer(needBuffer)
95 {
96 if (m_needBuffer) {
97 m_buffer = m_vm.acquireRegExpPatternContexBuffer();
98 m_size = VM::patternContextBufferSize;
99 } else {
100 m_buffer = nullptr;
101 m_size = 0;
102 }
103 }
104
105 ~PatternContextBufferHolder()
106 {
107 if (m_needBuffer)
108 m_vm.releaseRegExpPatternContexBuffer();
109 }
110
111 void* buffer() { return m_buffer; }
112 unsigned size() { return m_size; }
113
114private:
115 VM& m_vm;
116 bool m_needBuffer;
117 void* m_buffer;
118 unsigned m_size;
119};
120#endif
121
122ALWAYS_INLINE void RegExp::compileIfNecessary(VM& vm, Yarr::YarrCharSize charSize)
123{
124 if (hasCodeFor(charSize))
125 return;
126
127 if (m_state == ParseError)
128 return;
129
130 compile(&vm, charSize);
131}
132
133template<typename VectorType>
134ALWAYS_INLINE int RegExp::matchInline(VM& vm, const String& s, unsigned startOffset, VectorType& ovector)
135{
136#if ENABLE(REGEXP_TRACING)
137 m_rtMatchCallCount++;
138 m_rtMatchTotalSubjectStringLen += (double)(s.length() - startOffset);
139#endif
140
141 compileIfNecessary(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16);
142
143 if (m_state == ParseError) {
144 auto throwScope = DECLARE_THROW_SCOPE(vm);
145 ExecState* exec = vm.topCallFrame;
146 throwScope.throwException(exec, errorToThrow(exec));
147 if (!hasHardError(m_constructionErrorCode))
148 reset();
149 return -1;
150 }
151
152 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
153 ovector.resize(offsetVectorSize);
154 int* offsetVector = ovector.data();
155
156 int result;
157#if ENABLE(YARR_JIT)
158 if (m_state == JITCode) {
159 {
160 ASSERT(m_regExpJITCode);
161#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
162 PatternContextBufferHolder patternContextBufferHolder(vm, m_regExpJITCode->usesPatternContextBuffer());
163
164#define EXTRA_JIT_PARAMS , patternContextBufferHolder.buffer(), patternContextBufferHolder.size()
165#else
166#define EXTRA_JIT_PARAMS
167#endif
168
169 if (s.is8Bit())
170 result = m_regExpJITCode->execute(s.characters8(), startOffset, s.length(), offsetVector EXTRA_JIT_PARAMS).start;
171 else
172 result = m_regExpJITCode->execute(s.characters16(), startOffset, s.length(), offsetVector EXTRA_JIT_PARAMS).start;
173
174#undef EXTRA_JIT_PARAMS
175 }
176
177 if (result == Yarr::JSRegExpJITCodeFailure) {
178 // JIT'ed code couldn't handle expression, so punt back to the interpreter.
179 byteCodeCompileIfNecessary(&vm);
180 result = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
181 }
182
183#if ENABLE(YARR_JIT_DEBUG)
184 if (m_state == JITCode) {
185 byteCodeCompileIfNecessary(&vm);
186 matchCompareWithInterpreter(s, startOffset, offsetVector, result);
187 }
188#endif
189 } else
190#endif
191 result = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
192
193 // FIXME: The YARR engine should handle unsigned or size_t length matches.
194 // The YARR Interpreter is "unsigned" clean, while the YARR JIT hasn't been addressed.
195 // The offset vector handling needs to change as well.
196 // Right now we convert a match where the offsets overflowed into match failure.
197 // There are two places in WebCore that call the interpreter directly that need to
198 // have their offsets changed to int as well. They are yarr/RegularExpression.cpp
199 // and inspector/ContentSearchUtilities.cpp
200 if (s.length() > INT_MAX) {
201 bool overflowed = false;
202
203 if (result < -1)
204 overflowed = true;
205
206 for (unsigned i = 0; i <= m_numSubpatterns; i++) {
207 if ((offsetVector[i*2] < -1) || ((offsetVector[i*2] >= 0) && (offsetVector[i*2+1] < -1))) {
208 overflowed = true;
209 offsetVector[i*2] = -1;
210 offsetVector[i*2+1] = -1;
211 }
212 }
213
214 if (overflowed)
215 result = -1;
216 }
217
218 ASSERT(result >= -1);
219
220#if REGEXP_FUNC_TEST_DATA_GEN
221 RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result);
222#endif
223
224#if ENABLE(REGEXP_TRACING)
225 if (result != -1)
226 m_rtMatchFoundCount++;
227#endif
228
229 return result;
230}
231
232ALWAYS_INLINE bool RegExp::hasMatchOnlyCodeFor(Yarr::YarrCharSize charSize)
233{
234 if (hasCode()) {
235#if ENABLE(YARR_JIT)
236 if (m_state != JITCode)
237 return true;
238 ASSERT(m_regExpJITCode);
239 if ((charSize == Yarr::Char8) && (m_regExpJITCode->has8BitCodeMatchOnly()))
240 return true;
241 if ((charSize == Yarr::Char16) && (m_regExpJITCode->has16BitCodeMatchOnly()))
242 return true;
243#else
244 UNUSED_PARAM(charSize);
245 return true;
246#endif
247 }
248
249 return false;
250}
251
252ALWAYS_INLINE void RegExp::compileIfNecessaryMatchOnly(VM& vm, Yarr::YarrCharSize charSize)
253{
254 if (hasMatchOnlyCodeFor(charSize))
255 return;
256
257 if (m_state == ParseError)
258 return;
259
260 compileMatchOnly(&vm, charSize);
261}
262
263ALWAYS_INLINE MatchResult RegExp::matchInline(VM& vm, const String& s, unsigned startOffset)
264{
265#if ENABLE(REGEXP_TRACING)
266 m_rtMatchOnlyCallCount++;
267 m_rtMatchOnlyTotalSubjectStringLen += (double)(s.length() - startOffset);
268#endif
269
270 compileIfNecessaryMatchOnly(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16);
271
272 if (m_state == ParseError) {
273 auto throwScope = DECLARE_THROW_SCOPE(vm);
274 ExecState* exec = vm.topCallFrame;
275 throwScope.throwException(exec, errorToThrow(exec));
276 if (!hasHardError(m_constructionErrorCode))
277 reset();
278 return MatchResult::failed();
279 }
280
281#if ENABLE(YARR_JIT)
282 MatchResult result;
283
284 if (m_state == JITCode) {
285 {
286 ASSERT(m_regExpJITCode);
287#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
288 PatternContextBufferHolder patternContextBufferHolder(vm, m_regExpJITCode->usesPatternContextBuffer());
289
290#define EXTRA_JIT_PARAMS , patternContextBufferHolder.buffer(), patternContextBufferHolder.size()
291#else
292#define EXTRA_JIT_PARAMS
293#endif
294
295 if (s.is8Bit())
296 result = m_regExpJITCode->execute(s.characters8(), startOffset, s.length() EXTRA_JIT_PARAMS);
297 else
298 result = m_regExpJITCode->execute(s.characters16(), startOffset, s.length() EXTRA_JIT_PARAMS);
299
300#undef EXTRA_JIT_PARAMS
301 }
302
303#if ENABLE(REGEXP_TRACING)
304 if (!result)
305 m_rtMatchOnlyFoundCount++;
306#endif
307 if (result.start != static_cast<size_t>(Yarr::JSRegExpJITCodeFailure))
308 return result;
309
310 // JIT'ed code couldn't handle expression, so punt back to the interpreter.
311 byteCodeCompileIfNecessary(&vm);
312 }
313#endif
314
315 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
316 int* offsetVector;
317 Vector<int, 32> nonReturnedOvector;
318 nonReturnedOvector.grow(offsetVectorSize);
319 offsetVector = nonReturnedOvector.data();
320 int r = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
321#if REGEXP_FUNC_TEST_DATA_GEN
322 RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result);
323#endif
324
325 if (r >= 0) {
326#if ENABLE(REGEXP_TRACING)
327 m_rtMatchOnlyFoundCount++;
328#endif
329 return MatchResult(r, reinterpret_cast<unsigned*>(offsetVector)[1]);
330 }
331
332 return MatchResult::failed();
333}
334
335} // namespace JSC
336