1 | /* |
2 | * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) |
3 | * Copyright (C) 2003-2017 Apple Inc. All Rights Reserved. |
4 | * |
5 | * This library is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU Lesser General Public |
7 | * License as published by the Free Software Foundation; either |
8 | * version 2 of the License, or (at your option) any later version. |
9 | * |
10 | * This library is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | * Lesser General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU Lesser General Public |
16 | * License along with this library; if not, write to the Free Software |
17 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | * |
19 | */ |
20 | |
21 | #include "config.h" |
22 | #include "RegExpPrototype.h" |
23 | |
24 | #include "ArrayPrototype.h" |
25 | #include "BuiltinNames.h" |
26 | #include "Error.h" |
27 | #include "JSArray.h" |
28 | #include "JSCBuiltins.h" |
29 | #include "JSCInlines.h" |
30 | #include "JSCJSValue.h" |
31 | #include "JSFunction.h" |
32 | #include "JSStringInlines.h" |
33 | #include "Lexer.h" |
34 | #include "ObjectPrototype.h" |
35 | #include "RegExpCache.h" |
36 | #include "RegExpObject.h" |
37 | #include "RegExpObjectInlines.h" |
38 | #include "StringObject.h" |
39 | #include "StringRecursionChecker.h" |
40 | #include "YarrFlags.h" |
41 | #include <wtf/text/StringBuilder.h> |
42 | |
43 | namespace JSC { |
44 | |
45 | static EncodedJSValue JSC_HOST_CALL regExpProtoFuncExec(ExecState*); |
46 | static EncodedJSValue JSC_HOST_CALL regExpProtoFuncCompile(ExecState*); |
47 | static EncodedJSValue JSC_HOST_CALL regExpProtoFuncToString(ExecState*); |
48 | static EncodedJSValue JSC_HOST_CALL regExpProtoGetterGlobal(ExecState*); |
49 | static EncodedJSValue JSC_HOST_CALL regExpProtoGetterIgnoreCase(ExecState*); |
50 | static EncodedJSValue JSC_HOST_CALL regExpProtoGetterMultiline(ExecState*); |
51 | static EncodedJSValue JSC_HOST_CALL regExpProtoGetterDotAll(ExecState*); |
52 | static EncodedJSValue JSC_HOST_CALL regExpProtoGetterSticky(ExecState*); |
53 | static EncodedJSValue JSC_HOST_CALL regExpProtoGetterUnicode(ExecState*); |
54 | static EncodedJSValue JSC_HOST_CALL regExpProtoGetterSource(ExecState*); |
55 | static EncodedJSValue JSC_HOST_CALL regExpProtoGetterFlags(ExecState*); |
56 | |
57 | const ClassInfo RegExpPrototype::s_info = { "Object" , &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(RegExpPrototype) }; |
58 | |
59 | RegExpPrototype::RegExpPrototype(VM& vm, Structure* structure) |
60 | : JSNonFinalObject(vm, structure) |
61 | { |
62 | } |
63 | |
64 | void RegExpPrototype::finishCreation(VM& vm, JSGlobalObject* globalObject) |
65 | { |
66 | Base::finishCreation(vm); |
67 | ASSERT(inherits(vm, info())); |
68 | JSC_NATIVE_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->compile, regExpProtoFuncCompile, static_cast<unsigned>(PropertyAttribute::DontEnum), 2); |
69 | JSC_NATIVE_INTRINSIC_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->exec, regExpProtoFuncExec, static_cast<unsigned>(PropertyAttribute::DontEnum), 1, RegExpExecIntrinsic); |
70 | JSC_NATIVE_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->toString, regExpProtoFuncToString, static_cast<unsigned>(PropertyAttribute::DontEnum), 0); |
71 | JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->global, regExpProtoGetterGlobal, PropertyAttribute::DontEnum | PropertyAttribute::Accessor); |
72 | JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->dotAll, regExpProtoGetterDotAll, PropertyAttribute::DontEnum | PropertyAttribute::Accessor); |
73 | JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->ignoreCase, regExpProtoGetterIgnoreCase, PropertyAttribute::DontEnum | PropertyAttribute::Accessor); |
74 | JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->multiline, regExpProtoGetterMultiline, PropertyAttribute::DontEnum | PropertyAttribute::Accessor); |
75 | JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->sticky, regExpProtoGetterSticky, PropertyAttribute::DontEnum | PropertyAttribute::Accessor); |
76 | JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->unicode, regExpProtoGetterUnicode, PropertyAttribute::DontEnum | PropertyAttribute::Accessor); |
77 | JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->source, regExpProtoGetterSource, PropertyAttribute::DontEnum | PropertyAttribute::Accessor); |
78 | JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->flags, regExpProtoGetterFlags, PropertyAttribute::DontEnum | PropertyAttribute::Accessor); |
79 | JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->matchSymbol, regExpPrototypeMatchCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum)); |
80 | JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->replaceSymbol, regExpPrototypeReplaceCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum)); |
81 | JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->searchSymbol, regExpPrototypeSearchCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum)); |
82 | JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->splitSymbol, regExpPrototypeSplitCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum)); |
83 | JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->test, regExpPrototypeTestCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum)); |
84 | } |
85 | |
86 | // ------------------------------ Functions --------------------------- |
87 | |
88 | EncodedJSValue JSC_HOST_CALL regExpProtoFuncTestFast(ExecState* exec) |
89 | { |
90 | VM& vm = exec->vm(); |
91 | auto scope = DECLARE_THROW_SCOPE(vm); |
92 | |
93 | JSValue thisValue = exec->thisValue(); |
94 | auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue); |
95 | if (UNLIKELY(!regexp)) |
96 | return throwVMTypeError(exec, scope); |
97 | JSString* string = exec->argument(0).toStringOrNull(exec); |
98 | EXCEPTION_ASSERT(!!scope.exception() == !string); |
99 | if (!string) |
100 | return JSValue::encode(jsUndefined()); |
101 | RELEASE_AND_RETURN(scope, JSValue::encode(jsBoolean(regexp->test(exec, exec->lexicalGlobalObject(), string)))); |
102 | } |
103 | |
104 | EncodedJSValue JSC_HOST_CALL regExpProtoFuncExec(ExecState* exec) |
105 | { |
106 | VM& vm = exec->vm(); |
107 | auto scope = DECLARE_THROW_SCOPE(vm); |
108 | |
109 | JSValue thisValue = exec->thisValue(); |
110 | auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue); |
111 | if (UNLIKELY(!regexp)) |
112 | return throwVMTypeError(exec, scope, "Builtin RegExp exec can only be called on a RegExp object" ); |
113 | JSString* string = exec->argument(0).toStringOrNull(exec); |
114 | EXCEPTION_ASSERT(!!scope.exception() == !string); |
115 | if (!string) |
116 | return JSValue::encode(jsUndefined()); |
117 | RELEASE_AND_RETURN(scope, JSValue::encode(regexp->exec(exec, exec->lexicalGlobalObject(), string))); |
118 | } |
119 | |
120 | EncodedJSValue JSC_HOST_CALL regExpProtoFuncMatchFast(ExecState* exec) |
121 | { |
122 | RegExpObject* thisObject = jsCast<RegExpObject*>(exec->thisValue()); |
123 | JSString* string = jsCast<JSString*>(exec->uncheckedArgument(0)); |
124 | if (!thisObject->regExp()->global()) |
125 | return JSValue::encode(thisObject->exec(exec, exec->lexicalGlobalObject(), string)); |
126 | return JSValue::encode(thisObject->matchGlobal(exec, exec->lexicalGlobalObject(), string)); |
127 | } |
128 | |
129 | EncodedJSValue JSC_HOST_CALL regExpProtoFuncCompile(ExecState* exec) |
130 | { |
131 | VM& vm = exec->vm(); |
132 | auto scope = DECLARE_THROW_SCOPE(vm); |
133 | |
134 | JSValue thisValue = exec->thisValue(); |
135 | auto* thisRegExp = jsDynamicCast<RegExpObject*>(vm, thisValue); |
136 | if (UNLIKELY(!thisRegExp)) |
137 | return throwVMTypeError(exec, scope); |
138 | |
139 | RegExp* regExp; |
140 | JSValue arg0 = exec->argument(0); |
141 | JSValue arg1 = exec->argument(1); |
142 | |
143 | if (auto* regExpObject = jsDynamicCast<RegExpObject*>(vm, arg0)) { |
144 | if (!arg1.isUndefined()) |
145 | return throwVMTypeError(exec, scope, "Cannot supply flags when constructing one RegExp from another."_s ); |
146 | regExp = regExpObject->regExp(); |
147 | } else { |
148 | String pattern = arg0.isUndefined() ? emptyString() : arg0.toWTFString(exec); |
149 | RETURN_IF_EXCEPTION(scope, encodedJSValue()); |
150 | |
151 | auto flags = arg1.isUndefined() ? makeOptional(OptionSet<Yarr::Flags> { }) : Yarr::parseFlags(arg1.toWTFString(exec)); |
152 | RETURN_IF_EXCEPTION(scope, encodedJSValue()); |
153 | if (!flags) |
154 | return throwVMError(exec, scope, createSyntaxError(exec, "Invalid flags supplied to RegExp constructor."_s )); |
155 | |
156 | regExp = RegExp::create(vm, pattern, flags.value()); |
157 | } |
158 | |
159 | if (!regExp->isValid()) |
160 | return throwVMError(exec, scope, regExp->errorToThrow(exec)); |
161 | |
162 | thisRegExp->setRegExp(vm, regExp); |
163 | scope.release(); |
164 | thisRegExp->setLastIndex(exec, 0); |
165 | return JSValue::encode(thisRegExp); |
166 | } |
167 | |
168 | typedef std::array<char, 6 + 1> FlagsString; // 6 different flags and a null character terminator. |
169 | |
170 | static inline FlagsString flagsString(ExecState* exec, JSObject* regexp) |
171 | { |
172 | FlagsString string; |
173 | string[0] = 0; |
174 | |
175 | VM& vm = exec->vm(); |
176 | auto scope = DECLARE_THROW_SCOPE(vm); |
177 | |
178 | JSValue globalValue = regexp->get(exec, vm.propertyNames->global); |
179 | RETURN_IF_EXCEPTION(scope, string); |
180 | JSValue ignoreCaseValue = regexp->get(exec, vm.propertyNames->ignoreCase); |
181 | RETURN_IF_EXCEPTION(scope, string); |
182 | JSValue multilineValue = regexp->get(exec, vm.propertyNames->multiline); |
183 | RETURN_IF_EXCEPTION(scope, string); |
184 | JSValue dotAllValue = regexp->get(exec, vm.propertyNames->dotAll); |
185 | RETURN_IF_EXCEPTION(scope, string); |
186 | JSValue unicodeValue = regexp->get(exec, vm.propertyNames->unicode); |
187 | RETURN_IF_EXCEPTION(scope, string); |
188 | JSValue stickyValue = regexp->get(exec, vm.propertyNames->sticky); |
189 | RETURN_IF_EXCEPTION(scope, string); |
190 | |
191 | unsigned index = 0; |
192 | if (globalValue.toBoolean(exec)) |
193 | string[index++] = 'g'; |
194 | if (ignoreCaseValue.toBoolean(exec)) |
195 | string[index++] = 'i'; |
196 | if (multilineValue.toBoolean(exec)) |
197 | string[index++] = 'm'; |
198 | if (dotAllValue.toBoolean(exec)) |
199 | string[index++] = 's'; |
200 | if (unicodeValue.toBoolean(exec)) |
201 | string[index++] = 'u'; |
202 | if (stickyValue.toBoolean(exec)) |
203 | string[index++] = 'y'; |
204 | ASSERT(index < string.size()); |
205 | string[index] = 0; |
206 | return string; |
207 | } |
208 | |
209 | EncodedJSValue JSC_HOST_CALL regExpProtoFuncToString(ExecState* exec) |
210 | { |
211 | VM& vm = exec->vm(); |
212 | auto scope = DECLARE_THROW_SCOPE(vm); |
213 | |
214 | JSValue thisValue = exec->thisValue(); |
215 | if (!thisValue.isObject()) |
216 | return throwVMTypeError(exec, scope); |
217 | |
218 | JSObject* thisObject = asObject(thisValue); |
219 | |
220 | StringRecursionChecker checker(exec, thisObject); |
221 | EXCEPTION_ASSERT(!scope.exception() || checker.earlyReturnValue()); |
222 | if (JSValue earlyReturnValue = checker.earlyReturnValue()) |
223 | return JSValue::encode(earlyReturnValue); |
224 | |
225 | JSValue sourceValue = thisObject->get(exec, vm.propertyNames->source); |
226 | RETURN_IF_EXCEPTION(scope, encodedJSValue()); |
227 | String source = sourceValue.toWTFString(exec); |
228 | RETURN_IF_EXCEPTION(scope, encodedJSValue()); |
229 | |
230 | JSValue flagsValue = thisObject->get(exec, vm.propertyNames->flags); |
231 | RETURN_IF_EXCEPTION(scope, encodedJSValue()); |
232 | String flags = flagsValue.toWTFString(exec); |
233 | RETURN_IF_EXCEPTION(scope, encodedJSValue()); |
234 | |
235 | RELEASE_AND_RETURN(scope, JSValue::encode(jsMakeNontrivialString(exec, '/', source, '/', flags))); |
236 | } |
237 | |
238 | EncodedJSValue JSC_HOST_CALL regExpProtoGetterGlobal(ExecState* exec) |
239 | { |
240 | VM& vm = exec->vm(); |
241 | auto scope = DECLARE_THROW_SCOPE(vm); |
242 | |
243 | JSValue thisValue = exec->thisValue(); |
244 | auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue); |
245 | if (UNLIKELY(!regexp)) { |
246 | if (thisValue.inherits<RegExpPrototype>(vm)) |
247 | return JSValue::encode(jsUndefined()); |
248 | return throwVMTypeError(exec, scope, "The RegExp.prototype.global getter can only be called on a RegExp object"_s ); |
249 | } |
250 | |
251 | return JSValue::encode(jsBoolean(regexp->regExp()->global())); |
252 | } |
253 | |
254 | EncodedJSValue JSC_HOST_CALL regExpProtoGetterIgnoreCase(ExecState* exec) |
255 | { |
256 | VM& vm = exec->vm(); |
257 | auto scope = DECLARE_THROW_SCOPE(vm); |
258 | |
259 | JSValue thisValue = exec->thisValue(); |
260 | auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue); |
261 | if (UNLIKELY(!regexp)) { |
262 | if (thisValue.inherits<RegExpPrototype>(vm)) |
263 | return JSValue::encode(jsUndefined()); |
264 | return throwVMTypeError(exec, scope, "The RegExp.prototype.ignoreCase getter can only be called on a RegExp object"_s ); |
265 | } |
266 | |
267 | return JSValue::encode(jsBoolean(regexp->regExp()->ignoreCase())); |
268 | } |
269 | |
270 | EncodedJSValue JSC_HOST_CALL regExpProtoGetterMultiline(ExecState* exec) |
271 | { |
272 | VM& vm = exec->vm(); |
273 | auto scope = DECLARE_THROW_SCOPE(vm); |
274 | |
275 | JSValue thisValue = exec->thisValue(); |
276 | auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue); |
277 | if (UNLIKELY(!regexp)) { |
278 | if (thisValue.inherits<RegExpPrototype>(vm)) |
279 | return JSValue::encode(jsUndefined()); |
280 | return throwVMTypeError(exec, scope, "The RegExp.prototype.multiline getter can only be called on a RegExp object"_s ); |
281 | } |
282 | |
283 | return JSValue::encode(jsBoolean(regexp->regExp()->multiline())); |
284 | } |
285 | |
286 | EncodedJSValue JSC_HOST_CALL regExpProtoGetterDotAll(ExecState* exec) |
287 | { |
288 | VM& vm = exec->vm(); |
289 | auto scope = DECLARE_THROW_SCOPE(vm); |
290 | |
291 | JSValue thisValue = exec->thisValue(); |
292 | auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue); |
293 | if (UNLIKELY(!regexp)) { |
294 | if (thisValue.inherits<RegExpPrototype>(vm)) |
295 | return JSValue::encode(jsUndefined()); |
296 | return throwVMTypeError(exec, scope, "The RegExp.prototype.dotAll getter can only be called on a RegExp object"_s ); |
297 | } |
298 | |
299 | return JSValue::encode(jsBoolean(regexp->regExp()->dotAll())); |
300 | } |
301 | |
302 | EncodedJSValue JSC_HOST_CALL regExpProtoGetterSticky(ExecState* exec) |
303 | { |
304 | VM& vm = exec->vm(); |
305 | auto scope = DECLARE_THROW_SCOPE(vm); |
306 | |
307 | JSValue thisValue = exec->thisValue(); |
308 | auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue); |
309 | if (UNLIKELY(!regexp)) { |
310 | if (thisValue.inherits<RegExpPrototype>(vm)) |
311 | return JSValue::encode(jsUndefined()); |
312 | return throwVMTypeError(exec, scope, "The RegExp.prototype.sticky getter can only be called on a RegExp object"_s ); |
313 | } |
314 | |
315 | return JSValue::encode(jsBoolean(regexp->regExp()->sticky())); |
316 | } |
317 | |
318 | EncodedJSValue JSC_HOST_CALL regExpProtoGetterUnicode(ExecState* exec) |
319 | { |
320 | VM& vm = exec->vm(); |
321 | auto scope = DECLARE_THROW_SCOPE(vm); |
322 | |
323 | JSValue thisValue = exec->thisValue(); |
324 | auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue); |
325 | if (UNLIKELY(!regexp)) { |
326 | if (thisValue.inherits<RegExpPrototype>(vm)) |
327 | return JSValue::encode(jsUndefined()); |
328 | return throwVMTypeError(exec, scope, "The RegExp.prototype.unicode getter can only be called on a RegExp object"_s ); |
329 | } |
330 | |
331 | return JSValue::encode(jsBoolean(regexp->regExp()->unicode())); |
332 | } |
333 | |
334 | EncodedJSValue JSC_HOST_CALL regExpProtoGetterFlags(ExecState* exec) |
335 | { |
336 | VM& vm = exec->vm(); |
337 | auto scope = DECLARE_THROW_SCOPE(vm); |
338 | |
339 | JSValue thisValue = exec->thisValue(); |
340 | if (UNLIKELY(!thisValue.isObject())) |
341 | return throwVMTypeError(exec, scope, "The RegExp.prototype.flags getter can only be called on an object"_s ); |
342 | |
343 | auto flags = flagsString(exec, asObject(thisValue)); |
344 | RETURN_IF_EXCEPTION(scope, encodedJSValue()); |
345 | |
346 | return JSValue::encode(jsString(exec, flags.data())); |
347 | } |
348 | |
349 | template <typename CharacterType> |
350 | static inline void appendLineTerminatorEscape(StringBuilder&, CharacterType); |
351 | |
352 | template <> |
353 | inline void appendLineTerminatorEscape<LChar>(StringBuilder& builder, LChar lineTerminator) |
354 | { |
355 | if (lineTerminator == '\n') |
356 | builder.append('n'); |
357 | else |
358 | builder.append('r'); |
359 | } |
360 | |
361 | template <> |
362 | inline void appendLineTerminatorEscape<UChar>(StringBuilder& builder, UChar lineTerminator) |
363 | { |
364 | if (lineTerminator == '\n') |
365 | builder.append('n'); |
366 | else if (lineTerminator == '\r') |
367 | builder.append('r'); |
368 | else if (lineTerminator == 0x2028) |
369 | builder.appendLiteral("u2028" ); |
370 | else |
371 | builder.appendLiteral("u2029" ); |
372 | } |
373 | |
374 | template <typename CharacterType> |
375 | static inline JSValue regExpProtoGetterSourceInternal(ExecState* exec, const String& pattern, const CharacterType* characters, unsigned length) |
376 | { |
377 | bool previousCharacterWasBackslash = false; |
378 | bool inBrackets = false; |
379 | bool shouldEscape = false; |
380 | |
381 | // 15.10.6.4 specifies that RegExp.prototype.toString must return '/' + source + '/', |
382 | // and also states that the result must be a valid RegularExpressionLiteral. '//' is |
383 | // not a valid RegularExpressionLiteral (since it is a single line comment), and hence |
384 | // source cannot ever validly be "". If the source is empty, return a different Pattern |
385 | // that would match the same thing. |
386 | if (!length) |
387 | return jsNontrivialString(exec, "(?:)"_s ); |
388 | |
389 | // early return for strings that don't contain a forwards slash and LineTerminator |
390 | for (unsigned i = 0; i < length; ++i) { |
391 | CharacterType ch = characters[i]; |
392 | if (!previousCharacterWasBackslash) { |
393 | if (inBrackets) { |
394 | if (ch == ']') |
395 | inBrackets = false; |
396 | } else { |
397 | if (ch == '/') { |
398 | shouldEscape = true; |
399 | break; |
400 | } |
401 | if (ch == '[') |
402 | inBrackets = true; |
403 | } |
404 | } |
405 | |
406 | if (Lexer<CharacterType>::isLineTerminator(ch)) { |
407 | shouldEscape = true; |
408 | break; |
409 | } |
410 | |
411 | if (previousCharacterWasBackslash) |
412 | previousCharacterWasBackslash = false; |
413 | else |
414 | previousCharacterWasBackslash = ch == '\\'; |
415 | } |
416 | |
417 | if (!shouldEscape) |
418 | return jsString(exec, pattern); |
419 | |
420 | previousCharacterWasBackslash = false; |
421 | inBrackets = false; |
422 | StringBuilder result; |
423 | for (unsigned i = 0; i < length; ++i) { |
424 | CharacterType ch = characters[i]; |
425 | if (!previousCharacterWasBackslash) { |
426 | if (inBrackets) { |
427 | if (ch == ']') |
428 | inBrackets = false; |
429 | } else { |
430 | if (ch == '/') |
431 | result.append('\\'); |
432 | else if (ch == '[') |
433 | inBrackets = true; |
434 | } |
435 | } |
436 | |
437 | // escape LineTerminator |
438 | if (Lexer<CharacterType>::isLineTerminator(ch)) { |
439 | if (!previousCharacterWasBackslash) |
440 | result.append('\\'); |
441 | |
442 | appendLineTerminatorEscape<CharacterType>(result, ch); |
443 | } else |
444 | result.append(ch); |
445 | |
446 | if (previousCharacterWasBackslash) |
447 | previousCharacterWasBackslash = false; |
448 | else |
449 | previousCharacterWasBackslash = ch == '\\'; |
450 | } |
451 | |
452 | return jsString(exec, result.toString()); |
453 | } |
454 | |
455 | EncodedJSValue JSC_HOST_CALL regExpProtoGetterSource(ExecState* exec) |
456 | { |
457 | VM& vm = exec->vm(); |
458 | auto scope = DECLARE_THROW_SCOPE(vm); |
459 | |
460 | JSValue thisValue = exec->thisValue(); |
461 | auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue); |
462 | if (UNLIKELY(!regexp)) { |
463 | if (thisValue.inherits<RegExpPrototype>(vm)) |
464 | return JSValue::encode(jsString(exec, "(?:)"_s )); |
465 | return throwVMTypeError(exec, scope, "The RegExp.prototype.source getter can only be called on a RegExp object"_s ); |
466 | } |
467 | |
468 | String pattern = regexp->regExp()->pattern(); |
469 | if (pattern.is8Bit()) |
470 | return JSValue::encode(regExpProtoGetterSourceInternal(exec, pattern, pattern.characters8(), pattern.length())); |
471 | return JSValue::encode(regExpProtoGetterSourceInternal(exec, pattern, pattern.characters16(), pattern.length())); |
472 | } |
473 | |
474 | EncodedJSValue JSC_HOST_CALL regExpProtoFuncSearchFast(ExecState* exec) |
475 | { |
476 | VM& vm = exec->vm(); |
477 | auto scope = DECLARE_THROW_SCOPE(vm); |
478 | JSValue thisValue = exec->thisValue(); |
479 | RegExp* regExp = jsCast<RegExpObject*>(thisValue)->regExp(); |
480 | |
481 | JSString* string = exec->uncheckedArgument(0).toString(exec); |
482 | String s = string->value(exec); |
483 | RETURN_IF_EXCEPTION(scope, encodedJSValue()); |
484 | |
485 | JSGlobalObject* globalObject = exec->lexicalGlobalObject(); |
486 | MatchResult result = globalObject->regExpGlobalData().performMatch(vm, globalObject, regExp, string, s, 0); |
487 | RETURN_IF_EXCEPTION(scope, encodedJSValue()); |
488 | return JSValue::encode(result ? jsNumber(result.start) : jsNumber(-1)); |
489 | } |
490 | |
491 | static inline unsigned advanceStringIndex(String str, unsigned strSize, unsigned index, bool isUnicode) |
492 | { |
493 | if (!isUnicode) |
494 | return ++index; |
495 | return advanceStringUnicode(str, strSize, index); |
496 | } |
497 | |
498 | enum SplitControl { |
499 | ContinueSplit, |
500 | AbortSplit |
501 | }; |
502 | |
503 | template<typename ControlFunc, typename PushFunc> |
504 | void genericSplit( |
505 | VM& vm, RegExp* regexp, const String& input, unsigned inputSize, unsigned& position, |
506 | unsigned& matchPosition, bool regExpIsSticky, bool regExpIsUnicode, |
507 | const ControlFunc& control, const PushFunc& push) |
508 | { |
509 | Vector<int> ovector; |
510 | |
511 | while (matchPosition < inputSize) { |
512 | if (control() == AbortSplit) |
513 | return; |
514 | |
515 | ovector.shrink(0); |
516 | |
517 | // a. Perform ? Set(splitter, "lastIndex", q, true). |
518 | // b. Let z be ? RegExpExec(splitter, S). |
519 | int mpos = regexp->match(vm, input, matchPosition, ovector); |
520 | |
521 | // c. If z is null, let q be AdvanceStringIndex(S, q, unicodeMatching). |
522 | if (mpos < 0) { |
523 | if (!regExpIsSticky) |
524 | break; |
525 | matchPosition = advanceStringIndex(input, inputSize, matchPosition, regExpIsUnicode); |
526 | continue; |
527 | } |
528 | if (static_cast<unsigned>(mpos) >= inputSize) { |
529 | // The spec redoes the RegExpExec starting at the next character of the input. |
530 | // But in our case, mpos < 0 means that the native regexp already searched all permutations |
531 | // and know that we won't be able to find a match for the separator even if we redo the |
532 | // RegExpExec starting at the next character of the input. So, just bail. |
533 | break; |
534 | } |
535 | |
536 | // d. Else, z is not null |
537 | // i. Let e be ? ToLength(? Get(splitter, "lastIndex")). |
538 | // ii. Let e be min(e, size). |
539 | matchPosition = mpos; |
540 | unsigned matchEnd = ovector[1]; |
541 | |
542 | // iii. If e = p, let q be AdvanceStringIndex(S, q, unicodeMatching). |
543 | if (matchEnd == position) { |
544 | matchPosition = advanceStringIndex(input, inputSize, matchPosition, regExpIsUnicode); |
545 | continue; |
546 | } |
547 | // if matchEnd == 0 then position should also be zero and thus matchEnd should equal position. |
548 | ASSERT(matchEnd); |
549 | |
550 | // iv. Else e != p, |
551 | unsigned numberOfCaptures = regexp->numSubpatterns(); |
552 | |
553 | // 1. Let T be a String value equal to the substring of S consisting of the elements at indices p (inclusive) through q (exclusive). |
554 | // 2. Perform ! CreateDataProperty(A, ! ToString(lengthA), T). |
555 | if (push(true, position, matchPosition - position) == AbortSplit) |
556 | return; |
557 | |
558 | // 5. Let p be e. |
559 | position = matchEnd; |
560 | |
561 | // 6. Let numberOfCaptures be ? ToLength(? Get(z, "length")). |
562 | // 7. Let numberOfCaptures be max(numberOfCaptures-1, 0). |
563 | // 8. Let i be 1. |
564 | // 9. Repeat, while i <= numberOfCaptures, |
565 | for (unsigned i = 1; i <= numberOfCaptures; ++i) { |
566 | // a. Let nextCapture be ? Get(z, ! ToString(i)). |
567 | // b. Perform ! CreateDataProperty(A, ! ToString(lengthA), nextCapture). |
568 | int sub = ovector[i * 2]; |
569 | if (push(sub >= 0, sub, ovector[i * 2 + 1] - sub) == AbortSplit) |
570 | return; |
571 | } |
572 | |
573 | // 10. Let q be p. |
574 | matchPosition = position; |
575 | } |
576 | } |
577 | |
578 | // ES 21.2.5.11 RegExp.prototype[@@split](string, limit) |
579 | EncodedJSValue JSC_HOST_CALL regExpProtoFuncSplitFast(ExecState* exec) |
580 | { |
581 | VM& vm = exec->vm(); |
582 | auto scope = DECLARE_THROW_SCOPE(vm); |
583 | |
584 | // 1. [handled by JS builtin] Let rx be the this value. |
585 | // 2. [handled by JS builtin] If Type(rx) is not Object, throw a TypeError exception. |
586 | JSValue thisValue = exec->thisValue(); |
587 | RegExp* regexp = jsCast<RegExpObject*>(thisValue)->regExp(); |
588 | |
589 | // 3. [handled by JS builtin] Let S be ? ToString(string). |
590 | JSString* inputString = exec->argument(0).toString(exec); |
591 | String input = inputString->value(exec); |
592 | RETURN_IF_EXCEPTION(scope, encodedJSValue()); |
593 | ASSERT(!input.isNull()); |
594 | |
595 | // 4. [handled by JS builtin] Let C be ? SpeciesConstructor(rx, %RegExp%). |
596 | // 5. [handled by JS builtin] Let flags be ? ToString(? Get(rx, "flags")). |
597 | // 6. [handled by JS builtin] If flags contains "u", let unicodeMatching be true. |
598 | // 7. [handled by JS builtin] Else, let unicodeMatching be false. |
599 | // 8. [handled by JS builtin] If flags contains "y", let newFlags be flags. |
600 | // 9. [handled by JS builtin] Else, let newFlags be the string that is the concatenation of flags and "y". |
601 | // 10. [handled by JS builtin] Let splitter be ? Construct(C, « rx, newFlags »). |
602 | |
603 | // 11. Let A be ArrayCreate(0). |
604 | // 12. Let lengthA be 0. |
605 | JSArray* result = constructEmptyArray(exec, 0); |
606 | RETURN_IF_EXCEPTION(scope, encodedJSValue()); |
607 | unsigned resultLength = 0; |
608 | |
609 | // 13. If limit is undefined, let lim be 2^32-1; else let lim be ? ToUint32(limit). |
610 | JSValue limitValue = exec->argument(1); |
611 | unsigned limit = limitValue.isUndefined() ? 0xFFFFFFFFu : limitValue.toUInt32(exec); |
612 | RETURN_IF_EXCEPTION(scope, encodedJSValue()); |
613 | |
614 | // 14. Let size be the number of elements in S. |
615 | unsigned inputSize = input.length(); |
616 | |
617 | // 15. Let p = 0. |
618 | unsigned position = 0; |
619 | |
620 | // 16. If lim == 0, return A. |
621 | if (!limit) |
622 | return JSValue::encode(result); |
623 | |
624 | // 17. If size == 0, then |
625 | if (input.isEmpty()) { |
626 | // a. Let z be ? RegExpExec(splitter, S). |
627 | // b. If z is not null, return A. |
628 | // c. Perform ! CreateDataProperty(A, "0", S). |
629 | // d. Return A. |
630 | if (!regexp->match(vm, input, 0)) { |
631 | result->putDirectIndex(exec, 0, inputString); |
632 | RETURN_IF_EXCEPTION(scope, encodedJSValue()); |
633 | } |
634 | return JSValue::encode(result); |
635 | } |
636 | |
637 | // 18. Let q = p. |
638 | unsigned matchPosition = position; |
639 | // 19. Repeat, while q < size |
640 | bool regExpIsSticky = regexp->sticky(); |
641 | bool regExpIsUnicode = regexp->unicode(); |
642 | |
643 | unsigned maxSizeForDirectPath = 100000; |
644 | |
645 | genericSplit( |
646 | vm, regexp, input, inputSize, position, matchPosition, regExpIsSticky, regExpIsUnicode, |
647 | [&] () -> SplitControl { |
648 | if (resultLength >= maxSizeForDirectPath) |
649 | return AbortSplit; |
650 | return ContinueSplit; |
651 | }, |
652 | [&] (bool isDefined, unsigned start, unsigned length) -> SplitControl { |
653 | result->putDirectIndex(exec, resultLength++, isDefined ? jsSubstringOfResolved(vm, inputString, start, length) : jsUndefined()); |
654 | RETURN_IF_EXCEPTION(scope, AbortSplit); |
655 | if (resultLength >= limit) |
656 | return AbortSplit; |
657 | return ContinueSplit; |
658 | }); |
659 | RETURN_IF_EXCEPTION(scope, encodedJSValue()); |
660 | |
661 | if (resultLength >= limit) |
662 | return JSValue::encode(result); |
663 | if (resultLength < maxSizeForDirectPath) { |
664 | // 20. Let T be a String value equal to the substring of S consisting of the elements at indices p (inclusive) through size (exclusive). |
665 | // 21. Perform ! CreateDataProperty(A, ! ToString(lengthA), T). |
666 | scope.release(); |
667 | result->putDirectIndex(exec, resultLength, jsSubstringOfResolved(vm, inputString, position, inputSize - position)); |
668 | |
669 | // 22. Return A. |
670 | return JSValue::encode(result); |
671 | } |
672 | |
673 | // Now do a dry run to see how big things get. Give up if they get absurd. |
674 | unsigned savedPosition = position; |
675 | unsigned savedMatchPosition = matchPosition; |
676 | unsigned dryRunCount = 0; |
677 | genericSplit( |
678 | vm, regexp, input, inputSize, position, matchPosition, regExpIsSticky, regExpIsUnicode, |
679 | [&] () -> SplitControl { |
680 | if (resultLength + dryRunCount > MAX_STORAGE_VECTOR_LENGTH) |
681 | return AbortSplit; |
682 | return ContinueSplit; |
683 | }, |
684 | [&] (bool, unsigned, unsigned) -> SplitControl { |
685 | dryRunCount++; |
686 | if (resultLength + dryRunCount >= limit) |
687 | return AbortSplit; |
688 | return ContinueSplit; |
689 | }); |
690 | |
691 | if (resultLength + dryRunCount > MAX_STORAGE_VECTOR_LENGTH) { |
692 | throwOutOfMemoryError(exec, scope); |
693 | return encodedJSValue(); |
694 | } |
695 | |
696 | // OK, we know that if we finish the split, we won't have to OOM. |
697 | position = savedPosition; |
698 | matchPosition = savedMatchPosition; |
699 | |
700 | genericSplit( |
701 | vm, regexp, input, inputSize, position, matchPosition, regExpIsSticky, regExpIsUnicode, |
702 | [&] () -> SplitControl { |
703 | return ContinueSplit; |
704 | }, |
705 | [&] (bool isDefined, unsigned start, unsigned length) -> SplitControl { |
706 | result->putDirectIndex(exec, resultLength++, isDefined ? jsSubstringOfResolved(vm, inputString, start, length) : jsUndefined()); |
707 | RETURN_IF_EXCEPTION(scope, AbortSplit); |
708 | if (resultLength >= limit) |
709 | return AbortSplit; |
710 | return ContinueSplit; |
711 | }); |
712 | RETURN_IF_EXCEPTION(scope, encodedJSValue()); |
713 | |
714 | if (resultLength >= limit) |
715 | return JSValue::encode(result); |
716 | |
717 | // 20. Let T be a String value equal to the substring of S consisting of the elements at indices p (inclusive) through size (exclusive). |
718 | // 21. Perform ! CreateDataProperty(A, ! ToString(lengthA), T). |
719 | scope.release(); |
720 | result->putDirectIndex(exec, resultLength, jsSubstringOfResolved(vm, inputString, position, inputSize - position)); |
721 | // 22. Return A. |
722 | return JSValue::encode(result); |
723 | } |
724 | |
725 | } // namespace JSC |
726 | |