1/*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2003-2017 Apple Inc. All Rights Reserved.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 */
20
21#include "config.h"
22#include "RegExpPrototype.h"
23
24#include "ArrayPrototype.h"
25#include "BuiltinNames.h"
26#include "Error.h"
27#include "JSArray.h"
28#include "JSCBuiltins.h"
29#include "JSCInlines.h"
30#include "JSCJSValue.h"
31#include "JSFunction.h"
32#include "JSStringInlines.h"
33#include "Lexer.h"
34#include "ObjectPrototype.h"
35#include "RegExpCache.h"
36#include "RegExpObject.h"
37#include "RegExpObjectInlines.h"
38#include "StringObject.h"
39#include "StringRecursionChecker.h"
40#include "YarrFlags.h"
41#include <wtf/text/StringBuilder.h>
42
43namespace JSC {
44
45static EncodedJSValue JSC_HOST_CALL regExpProtoFuncExec(ExecState*);
46static EncodedJSValue JSC_HOST_CALL regExpProtoFuncCompile(ExecState*);
47static EncodedJSValue JSC_HOST_CALL regExpProtoFuncToString(ExecState*);
48static EncodedJSValue JSC_HOST_CALL regExpProtoGetterGlobal(ExecState*);
49static EncodedJSValue JSC_HOST_CALL regExpProtoGetterIgnoreCase(ExecState*);
50static EncodedJSValue JSC_HOST_CALL regExpProtoGetterMultiline(ExecState*);
51static EncodedJSValue JSC_HOST_CALL regExpProtoGetterDotAll(ExecState*);
52static EncodedJSValue JSC_HOST_CALL regExpProtoGetterSticky(ExecState*);
53static EncodedJSValue JSC_HOST_CALL regExpProtoGetterUnicode(ExecState*);
54static EncodedJSValue JSC_HOST_CALL regExpProtoGetterSource(ExecState*);
55static EncodedJSValue JSC_HOST_CALL regExpProtoGetterFlags(ExecState*);
56
57const ClassInfo RegExpPrototype::s_info = { "Object", &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(RegExpPrototype) };
58
59RegExpPrototype::RegExpPrototype(VM& vm, Structure* structure)
60 : JSNonFinalObject(vm, structure)
61{
62}
63
64void RegExpPrototype::finishCreation(VM& vm, JSGlobalObject* globalObject)
65{
66 Base::finishCreation(vm);
67 ASSERT(inherits(vm, info()));
68 JSC_NATIVE_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->compile, regExpProtoFuncCompile, static_cast<unsigned>(PropertyAttribute::DontEnum), 2);
69 JSC_NATIVE_INTRINSIC_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->exec, regExpProtoFuncExec, static_cast<unsigned>(PropertyAttribute::DontEnum), 1, RegExpExecIntrinsic);
70 JSC_NATIVE_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->toString, regExpProtoFuncToString, static_cast<unsigned>(PropertyAttribute::DontEnum), 0);
71 JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->global, regExpProtoGetterGlobal, PropertyAttribute::DontEnum | PropertyAttribute::Accessor);
72 JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->dotAll, regExpProtoGetterDotAll, PropertyAttribute::DontEnum | PropertyAttribute::Accessor);
73 JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->ignoreCase, regExpProtoGetterIgnoreCase, PropertyAttribute::DontEnum | PropertyAttribute::Accessor);
74 JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->multiline, regExpProtoGetterMultiline, PropertyAttribute::DontEnum | PropertyAttribute::Accessor);
75 JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->sticky, regExpProtoGetterSticky, PropertyAttribute::DontEnum | PropertyAttribute::Accessor);
76 JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->unicode, regExpProtoGetterUnicode, PropertyAttribute::DontEnum | PropertyAttribute::Accessor);
77 JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->source, regExpProtoGetterSource, PropertyAttribute::DontEnum | PropertyAttribute::Accessor);
78 JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->flags, regExpProtoGetterFlags, PropertyAttribute::DontEnum | PropertyAttribute::Accessor);
79 JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->matchSymbol, regExpPrototypeMatchCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum));
80 JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->replaceSymbol, regExpPrototypeReplaceCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum));
81 JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->searchSymbol, regExpPrototypeSearchCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum));
82 JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->splitSymbol, regExpPrototypeSplitCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum));
83 JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->test, regExpPrototypeTestCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum));
84}
85
86// ------------------------------ Functions ---------------------------
87
88EncodedJSValue JSC_HOST_CALL regExpProtoFuncTestFast(ExecState* exec)
89{
90 VM& vm = exec->vm();
91 auto scope = DECLARE_THROW_SCOPE(vm);
92
93 JSValue thisValue = exec->thisValue();
94 auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue);
95 if (UNLIKELY(!regexp))
96 return throwVMTypeError(exec, scope);
97 JSString* string = exec->argument(0).toStringOrNull(exec);
98 EXCEPTION_ASSERT(!!scope.exception() == !string);
99 if (!string)
100 return JSValue::encode(jsUndefined());
101 RELEASE_AND_RETURN(scope, JSValue::encode(jsBoolean(regexp->test(exec, exec->lexicalGlobalObject(), string))));
102}
103
104EncodedJSValue JSC_HOST_CALL regExpProtoFuncExec(ExecState* exec)
105{
106 VM& vm = exec->vm();
107 auto scope = DECLARE_THROW_SCOPE(vm);
108
109 JSValue thisValue = exec->thisValue();
110 auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue);
111 if (UNLIKELY(!regexp))
112 return throwVMTypeError(exec, scope, "Builtin RegExp exec can only be called on a RegExp object");
113 JSString* string = exec->argument(0).toStringOrNull(exec);
114 EXCEPTION_ASSERT(!!scope.exception() == !string);
115 if (!string)
116 return JSValue::encode(jsUndefined());
117 RELEASE_AND_RETURN(scope, JSValue::encode(regexp->exec(exec, exec->lexicalGlobalObject(), string)));
118}
119
120EncodedJSValue JSC_HOST_CALL regExpProtoFuncMatchFast(ExecState* exec)
121{
122 RegExpObject* thisObject = jsCast<RegExpObject*>(exec->thisValue());
123 JSString* string = jsCast<JSString*>(exec->uncheckedArgument(0));
124 if (!thisObject->regExp()->global())
125 return JSValue::encode(thisObject->exec(exec, exec->lexicalGlobalObject(), string));
126 return JSValue::encode(thisObject->matchGlobal(exec, exec->lexicalGlobalObject(), string));
127}
128
129EncodedJSValue JSC_HOST_CALL regExpProtoFuncCompile(ExecState* exec)
130{
131 VM& vm = exec->vm();
132 auto scope = DECLARE_THROW_SCOPE(vm);
133
134 JSValue thisValue = exec->thisValue();
135 auto* thisRegExp = jsDynamicCast<RegExpObject*>(vm, thisValue);
136 if (UNLIKELY(!thisRegExp))
137 return throwVMTypeError(exec, scope);
138
139 RegExp* regExp;
140 JSValue arg0 = exec->argument(0);
141 JSValue arg1 = exec->argument(1);
142
143 if (auto* regExpObject = jsDynamicCast<RegExpObject*>(vm, arg0)) {
144 if (!arg1.isUndefined())
145 return throwVMTypeError(exec, scope, "Cannot supply flags when constructing one RegExp from another."_s);
146 regExp = regExpObject->regExp();
147 } else {
148 String pattern = arg0.isUndefined() ? emptyString() : arg0.toWTFString(exec);
149 RETURN_IF_EXCEPTION(scope, encodedJSValue());
150
151 auto flags = arg1.isUndefined() ? makeOptional(OptionSet<Yarr::Flags> { }) : Yarr::parseFlags(arg1.toWTFString(exec));
152 RETURN_IF_EXCEPTION(scope, encodedJSValue());
153 if (!flags)
154 return throwVMError(exec, scope, createSyntaxError(exec, "Invalid flags supplied to RegExp constructor."_s));
155
156 regExp = RegExp::create(vm, pattern, flags.value());
157 }
158
159 if (!regExp->isValid())
160 return throwVMError(exec, scope, regExp->errorToThrow(exec));
161
162 thisRegExp->setRegExp(vm, regExp);
163 scope.release();
164 thisRegExp->setLastIndex(exec, 0);
165 return JSValue::encode(thisRegExp);
166}
167
168typedef std::array<char, 6 + 1> FlagsString; // 6 different flags and a null character terminator.
169
170static inline FlagsString flagsString(ExecState* exec, JSObject* regexp)
171{
172 FlagsString string;
173 string[0] = 0;
174
175 VM& vm = exec->vm();
176 auto scope = DECLARE_THROW_SCOPE(vm);
177
178 JSValue globalValue = regexp->get(exec, vm.propertyNames->global);
179 RETURN_IF_EXCEPTION(scope, string);
180 JSValue ignoreCaseValue = regexp->get(exec, vm.propertyNames->ignoreCase);
181 RETURN_IF_EXCEPTION(scope, string);
182 JSValue multilineValue = regexp->get(exec, vm.propertyNames->multiline);
183 RETURN_IF_EXCEPTION(scope, string);
184 JSValue dotAllValue = regexp->get(exec, vm.propertyNames->dotAll);
185 RETURN_IF_EXCEPTION(scope, string);
186 JSValue unicodeValue = regexp->get(exec, vm.propertyNames->unicode);
187 RETURN_IF_EXCEPTION(scope, string);
188 JSValue stickyValue = regexp->get(exec, vm.propertyNames->sticky);
189 RETURN_IF_EXCEPTION(scope, string);
190
191 unsigned index = 0;
192 if (globalValue.toBoolean(exec))
193 string[index++] = 'g';
194 if (ignoreCaseValue.toBoolean(exec))
195 string[index++] = 'i';
196 if (multilineValue.toBoolean(exec))
197 string[index++] = 'm';
198 if (dotAllValue.toBoolean(exec))
199 string[index++] = 's';
200 if (unicodeValue.toBoolean(exec))
201 string[index++] = 'u';
202 if (stickyValue.toBoolean(exec))
203 string[index++] = 'y';
204 ASSERT(index < string.size());
205 string[index] = 0;
206 return string;
207}
208
209EncodedJSValue JSC_HOST_CALL regExpProtoFuncToString(ExecState* exec)
210{
211 VM& vm = exec->vm();
212 auto scope = DECLARE_THROW_SCOPE(vm);
213
214 JSValue thisValue = exec->thisValue();
215 if (!thisValue.isObject())
216 return throwVMTypeError(exec, scope);
217
218 JSObject* thisObject = asObject(thisValue);
219
220 StringRecursionChecker checker(exec, thisObject);
221 EXCEPTION_ASSERT(!scope.exception() || checker.earlyReturnValue());
222 if (JSValue earlyReturnValue = checker.earlyReturnValue())
223 return JSValue::encode(earlyReturnValue);
224
225 JSValue sourceValue = thisObject->get(exec, vm.propertyNames->source);
226 RETURN_IF_EXCEPTION(scope, encodedJSValue());
227 String source = sourceValue.toWTFString(exec);
228 RETURN_IF_EXCEPTION(scope, encodedJSValue());
229
230 JSValue flagsValue = thisObject->get(exec, vm.propertyNames->flags);
231 RETURN_IF_EXCEPTION(scope, encodedJSValue());
232 String flags = flagsValue.toWTFString(exec);
233 RETURN_IF_EXCEPTION(scope, encodedJSValue());
234
235 RELEASE_AND_RETURN(scope, JSValue::encode(jsMakeNontrivialString(exec, '/', source, '/', flags)));
236}
237
238EncodedJSValue JSC_HOST_CALL regExpProtoGetterGlobal(ExecState* exec)
239{
240 VM& vm = exec->vm();
241 auto scope = DECLARE_THROW_SCOPE(vm);
242
243 JSValue thisValue = exec->thisValue();
244 auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue);
245 if (UNLIKELY(!regexp)) {
246 if (thisValue.inherits<RegExpPrototype>(vm))
247 return JSValue::encode(jsUndefined());
248 return throwVMTypeError(exec, scope, "The RegExp.prototype.global getter can only be called on a RegExp object"_s);
249 }
250
251 return JSValue::encode(jsBoolean(regexp->regExp()->global()));
252}
253
254EncodedJSValue JSC_HOST_CALL regExpProtoGetterIgnoreCase(ExecState* exec)
255{
256 VM& vm = exec->vm();
257 auto scope = DECLARE_THROW_SCOPE(vm);
258
259 JSValue thisValue = exec->thisValue();
260 auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue);
261 if (UNLIKELY(!regexp)) {
262 if (thisValue.inherits<RegExpPrototype>(vm))
263 return JSValue::encode(jsUndefined());
264 return throwVMTypeError(exec, scope, "The RegExp.prototype.ignoreCase getter can only be called on a RegExp object"_s);
265 }
266
267 return JSValue::encode(jsBoolean(regexp->regExp()->ignoreCase()));
268}
269
270EncodedJSValue JSC_HOST_CALL regExpProtoGetterMultiline(ExecState* exec)
271{
272 VM& vm = exec->vm();
273 auto scope = DECLARE_THROW_SCOPE(vm);
274
275 JSValue thisValue = exec->thisValue();
276 auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue);
277 if (UNLIKELY(!regexp)) {
278 if (thisValue.inherits<RegExpPrototype>(vm))
279 return JSValue::encode(jsUndefined());
280 return throwVMTypeError(exec, scope, "The RegExp.prototype.multiline getter can only be called on a RegExp object"_s);
281 }
282
283 return JSValue::encode(jsBoolean(regexp->regExp()->multiline()));
284}
285
286EncodedJSValue JSC_HOST_CALL regExpProtoGetterDotAll(ExecState* exec)
287{
288 VM& vm = exec->vm();
289 auto scope = DECLARE_THROW_SCOPE(vm);
290
291 JSValue thisValue = exec->thisValue();
292 auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue);
293 if (UNLIKELY(!regexp)) {
294 if (thisValue.inherits<RegExpPrototype>(vm))
295 return JSValue::encode(jsUndefined());
296 return throwVMTypeError(exec, scope, "The RegExp.prototype.dotAll getter can only be called on a RegExp object"_s);
297 }
298
299 return JSValue::encode(jsBoolean(regexp->regExp()->dotAll()));
300}
301
302EncodedJSValue JSC_HOST_CALL regExpProtoGetterSticky(ExecState* exec)
303{
304 VM& vm = exec->vm();
305 auto scope = DECLARE_THROW_SCOPE(vm);
306
307 JSValue thisValue = exec->thisValue();
308 auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue);
309 if (UNLIKELY(!regexp)) {
310 if (thisValue.inherits<RegExpPrototype>(vm))
311 return JSValue::encode(jsUndefined());
312 return throwVMTypeError(exec, scope, "The RegExp.prototype.sticky getter can only be called on a RegExp object"_s);
313 }
314
315 return JSValue::encode(jsBoolean(regexp->regExp()->sticky()));
316}
317
318EncodedJSValue JSC_HOST_CALL regExpProtoGetterUnicode(ExecState* exec)
319{
320 VM& vm = exec->vm();
321 auto scope = DECLARE_THROW_SCOPE(vm);
322
323 JSValue thisValue = exec->thisValue();
324 auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue);
325 if (UNLIKELY(!regexp)) {
326 if (thisValue.inherits<RegExpPrototype>(vm))
327 return JSValue::encode(jsUndefined());
328 return throwVMTypeError(exec, scope, "The RegExp.prototype.unicode getter can only be called on a RegExp object"_s);
329 }
330
331 return JSValue::encode(jsBoolean(regexp->regExp()->unicode()));
332}
333
334EncodedJSValue JSC_HOST_CALL regExpProtoGetterFlags(ExecState* exec)
335{
336 VM& vm = exec->vm();
337 auto scope = DECLARE_THROW_SCOPE(vm);
338
339 JSValue thisValue = exec->thisValue();
340 if (UNLIKELY(!thisValue.isObject()))
341 return throwVMTypeError(exec, scope, "The RegExp.prototype.flags getter can only be called on an object"_s);
342
343 auto flags = flagsString(exec, asObject(thisValue));
344 RETURN_IF_EXCEPTION(scope, encodedJSValue());
345
346 return JSValue::encode(jsString(exec, flags.data()));
347}
348
349template <typename CharacterType>
350static inline void appendLineTerminatorEscape(StringBuilder&, CharacterType);
351
352template <>
353inline void appendLineTerminatorEscape<LChar>(StringBuilder& builder, LChar lineTerminator)
354{
355 if (lineTerminator == '\n')
356 builder.append('n');
357 else
358 builder.append('r');
359}
360
361template <>
362inline void appendLineTerminatorEscape<UChar>(StringBuilder& builder, UChar lineTerminator)
363{
364 if (lineTerminator == '\n')
365 builder.append('n');
366 else if (lineTerminator == '\r')
367 builder.append('r');
368 else if (lineTerminator == 0x2028)
369 builder.appendLiteral("u2028");
370 else
371 builder.appendLiteral("u2029");
372}
373
374template <typename CharacterType>
375static inline JSValue regExpProtoGetterSourceInternal(ExecState* exec, const String& pattern, const CharacterType* characters, unsigned length)
376{
377 bool previousCharacterWasBackslash = false;
378 bool inBrackets = false;
379 bool shouldEscape = false;
380
381 // 15.10.6.4 specifies that RegExp.prototype.toString must return '/' + source + '/',
382 // and also states that the result must be a valid RegularExpressionLiteral. '//' is
383 // not a valid RegularExpressionLiteral (since it is a single line comment), and hence
384 // source cannot ever validly be "". If the source is empty, return a different Pattern
385 // that would match the same thing.
386 if (!length)
387 return jsNontrivialString(exec, "(?:)"_s);
388
389 // early return for strings that don't contain a forwards slash and LineTerminator
390 for (unsigned i = 0; i < length; ++i) {
391 CharacterType ch = characters[i];
392 if (!previousCharacterWasBackslash) {
393 if (inBrackets) {
394 if (ch == ']')
395 inBrackets = false;
396 } else {
397 if (ch == '/') {
398 shouldEscape = true;
399 break;
400 }
401 if (ch == '[')
402 inBrackets = true;
403 }
404 }
405
406 if (Lexer<CharacterType>::isLineTerminator(ch)) {
407 shouldEscape = true;
408 break;
409 }
410
411 if (previousCharacterWasBackslash)
412 previousCharacterWasBackslash = false;
413 else
414 previousCharacterWasBackslash = ch == '\\';
415 }
416
417 if (!shouldEscape)
418 return jsString(exec, pattern);
419
420 previousCharacterWasBackslash = false;
421 inBrackets = false;
422 StringBuilder result;
423 for (unsigned i = 0; i < length; ++i) {
424 CharacterType ch = characters[i];
425 if (!previousCharacterWasBackslash) {
426 if (inBrackets) {
427 if (ch == ']')
428 inBrackets = false;
429 } else {
430 if (ch == '/')
431 result.append('\\');
432 else if (ch == '[')
433 inBrackets = true;
434 }
435 }
436
437 // escape LineTerminator
438 if (Lexer<CharacterType>::isLineTerminator(ch)) {
439 if (!previousCharacterWasBackslash)
440 result.append('\\');
441
442 appendLineTerminatorEscape<CharacterType>(result, ch);
443 } else
444 result.append(ch);
445
446 if (previousCharacterWasBackslash)
447 previousCharacterWasBackslash = false;
448 else
449 previousCharacterWasBackslash = ch == '\\';
450 }
451
452 return jsString(exec, result.toString());
453}
454
455EncodedJSValue JSC_HOST_CALL regExpProtoGetterSource(ExecState* exec)
456{
457 VM& vm = exec->vm();
458 auto scope = DECLARE_THROW_SCOPE(vm);
459
460 JSValue thisValue = exec->thisValue();
461 auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue);
462 if (UNLIKELY(!regexp)) {
463 if (thisValue.inherits<RegExpPrototype>(vm))
464 return JSValue::encode(jsString(exec, "(?:)"_s));
465 return throwVMTypeError(exec, scope, "The RegExp.prototype.source getter can only be called on a RegExp object"_s);
466 }
467
468 String pattern = regexp->regExp()->pattern();
469 if (pattern.is8Bit())
470 return JSValue::encode(regExpProtoGetterSourceInternal(exec, pattern, pattern.characters8(), pattern.length()));
471 return JSValue::encode(regExpProtoGetterSourceInternal(exec, pattern, pattern.characters16(), pattern.length()));
472}
473
474EncodedJSValue JSC_HOST_CALL regExpProtoFuncSearchFast(ExecState* exec)
475{
476 VM& vm = exec->vm();
477 auto scope = DECLARE_THROW_SCOPE(vm);
478 JSValue thisValue = exec->thisValue();
479 RegExp* regExp = jsCast<RegExpObject*>(thisValue)->regExp();
480
481 JSString* string = exec->uncheckedArgument(0).toString(exec);
482 String s = string->value(exec);
483 RETURN_IF_EXCEPTION(scope, encodedJSValue());
484
485 JSGlobalObject* globalObject = exec->lexicalGlobalObject();
486 MatchResult result = globalObject->regExpGlobalData().performMatch(vm, globalObject, regExp, string, s, 0);
487 RETURN_IF_EXCEPTION(scope, encodedJSValue());
488 return JSValue::encode(result ? jsNumber(result.start) : jsNumber(-1));
489}
490
491static inline unsigned advanceStringIndex(String str, unsigned strSize, unsigned index, bool isUnicode)
492{
493 if (!isUnicode)
494 return ++index;
495 return advanceStringUnicode(str, strSize, index);
496}
497
498enum SplitControl {
499 ContinueSplit,
500 AbortSplit
501};
502
503template<typename ControlFunc, typename PushFunc>
504void genericSplit(
505 VM& vm, RegExp* regexp, const String& input, unsigned inputSize, unsigned& position,
506 unsigned& matchPosition, bool regExpIsSticky, bool regExpIsUnicode,
507 const ControlFunc& control, const PushFunc& push)
508{
509 Vector<int> ovector;
510
511 while (matchPosition < inputSize) {
512 if (control() == AbortSplit)
513 return;
514
515 ovector.shrink(0);
516
517 // a. Perform ? Set(splitter, "lastIndex", q, true).
518 // b. Let z be ? RegExpExec(splitter, S).
519 int mpos = regexp->match(vm, input, matchPosition, ovector);
520
521 // c. If z is null, let q be AdvanceStringIndex(S, q, unicodeMatching).
522 if (mpos < 0) {
523 if (!regExpIsSticky)
524 break;
525 matchPosition = advanceStringIndex(input, inputSize, matchPosition, regExpIsUnicode);
526 continue;
527 }
528 if (static_cast<unsigned>(mpos) >= inputSize) {
529 // The spec redoes the RegExpExec starting at the next character of the input.
530 // But in our case, mpos < 0 means that the native regexp already searched all permutations
531 // and know that we won't be able to find a match for the separator even if we redo the
532 // RegExpExec starting at the next character of the input. So, just bail.
533 break;
534 }
535
536 // d. Else, z is not null
537 // i. Let e be ? ToLength(? Get(splitter, "lastIndex")).
538 // ii. Let e be min(e, size).
539 matchPosition = mpos;
540 unsigned matchEnd = ovector[1];
541
542 // iii. If e = p, let q be AdvanceStringIndex(S, q, unicodeMatching).
543 if (matchEnd == position) {
544 matchPosition = advanceStringIndex(input, inputSize, matchPosition, regExpIsUnicode);
545 continue;
546 }
547 // if matchEnd == 0 then position should also be zero and thus matchEnd should equal position.
548 ASSERT(matchEnd);
549
550 // iv. Else e != p,
551 unsigned numberOfCaptures = regexp->numSubpatterns();
552
553 // 1. Let T be a String value equal to the substring of S consisting of the elements at indices p (inclusive) through q (exclusive).
554 // 2. Perform ! CreateDataProperty(A, ! ToString(lengthA), T).
555 if (push(true, position, matchPosition - position) == AbortSplit)
556 return;
557
558 // 5. Let p be e.
559 position = matchEnd;
560
561 // 6. Let numberOfCaptures be ? ToLength(? Get(z, "length")).
562 // 7. Let numberOfCaptures be max(numberOfCaptures-1, 0).
563 // 8. Let i be 1.
564 // 9. Repeat, while i <= numberOfCaptures,
565 for (unsigned i = 1; i <= numberOfCaptures; ++i) {
566 // a. Let nextCapture be ? Get(z, ! ToString(i)).
567 // b. Perform ! CreateDataProperty(A, ! ToString(lengthA), nextCapture).
568 int sub = ovector[i * 2];
569 if (push(sub >= 0, sub, ovector[i * 2 + 1] - sub) == AbortSplit)
570 return;
571 }
572
573 // 10. Let q be p.
574 matchPosition = position;
575 }
576}
577
578// ES 21.2.5.11 RegExp.prototype[@@split](string, limit)
579EncodedJSValue JSC_HOST_CALL regExpProtoFuncSplitFast(ExecState* exec)
580{
581 VM& vm = exec->vm();
582 auto scope = DECLARE_THROW_SCOPE(vm);
583
584 // 1. [handled by JS builtin] Let rx be the this value.
585 // 2. [handled by JS builtin] If Type(rx) is not Object, throw a TypeError exception.
586 JSValue thisValue = exec->thisValue();
587 RegExp* regexp = jsCast<RegExpObject*>(thisValue)->regExp();
588
589 // 3. [handled by JS builtin] Let S be ? ToString(string).
590 JSString* inputString = exec->argument(0).toString(exec);
591 String input = inputString->value(exec);
592 RETURN_IF_EXCEPTION(scope, encodedJSValue());
593 ASSERT(!input.isNull());
594
595 // 4. [handled by JS builtin] Let C be ? SpeciesConstructor(rx, %RegExp%).
596 // 5. [handled by JS builtin] Let flags be ? ToString(? Get(rx, "flags")).
597 // 6. [handled by JS builtin] If flags contains "u", let unicodeMatching be true.
598 // 7. [handled by JS builtin] Else, let unicodeMatching be false.
599 // 8. [handled by JS builtin] If flags contains "y", let newFlags be flags.
600 // 9. [handled by JS builtin] Else, let newFlags be the string that is the concatenation of flags and "y".
601 // 10. [handled by JS builtin] Let splitter be ? Construct(C, « rx, newFlags »).
602
603 // 11. Let A be ArrayCreate(0).
604 // 12. Let lengthA be 0.
605 JSArray* result = constructEmptyArray(exec, 0);
606 RETURN_IF_EXCEPTION(scope, encodedJSValue());
607 unsigned resultLength = 0;
608
609 // 13. If limit is undefined, let lim be 2^32-1; else let lim be ? ToUint32(limit).
610 JSValue limitValue = exec->argument(1);
611 unsigned limit = limitValue.isUndefined() ? 0xFFFFFFFFu : limitValue.toUInt32(exec);
612 RETURN_IF_EXCEPTION(scope, encodedJSValue());
613
614 // 14. Let size be the number of elements in S.
615 unsigned inputSize = input.length();
616
617 // 15. Let p = 0.
618 unsigned position = 0;
619
620 // 16. If lim == 0, return A.
621 if (!limit)
622 return JSValue::encode(result);
623
624 // 17. If size == 0, then
625 if (input.isEmpty()) {
626 // a. Let z be ? RegExpExec(splitter, S).
627 // b. If z is not null, return A.
628 // c. Perform ! CreateDataProperty(A, "0", S).
629 // d. Return A.
630 if (!regexp->match(vm, input, 0)) {
631 result->putDirectIndex(exec, 0, inputString);
632 RETURN_IF_EXCEPTION(scope, encodedJSValue());
633 }
634 return JSValue::encode(result);
635 }
636
637 // 18. Let q = p.
638 unsigned matchPosition = position;
639 // 19. Repeat, while q < size
640 bool regExpIsSticky = regexp->sticky();
641 bool regExpIsUnicode = regexp->unicode();
642
643 unsigned maxSizeForDirectPath = 100000;
644
645 genericSplit(
646 vm, regexp, input, inputSize, position, matchPosition, regExpIsSticky, regExpIsUnicode,
647 [&] () -> SplitControl {
648 if (resultLength >= maxSizeForDirectPath)
649 return AbortSplit;
650 return ContinueSplit;
651 },
652 [&] (bool isDefined, unsigned start, unsigned length) -> SplitControl {
653 result->putDirectIndex(exec, resultLength++, isDefined ? jsSubstringOfResolved(vm, inputString, start, length) : jsUndefined());
654 RETURN_IF_EXCEPTION(scope, AbortSplit);
655 if (resultLength >= limit)
656 return AbortSplit;
657 return ContinueSplit;
658 });
659 RETURN_IF_EXCEPTION(scope, encodedJSValue());
660
661 if (resultLength >= limit)
662 return JSValue::encode(result);
663 if (resultLength < maxSizeForDirectPath) {
664 // 20. Let T be a String value equal to the substring of S consisting of the elements at indices p (inclusive) through size (exclusive).
665 // 21. Perform ! CreateDataProperty(A, ! ToString(lengthA), T).
666 scope.release();
667 result->putDirectIndex(exec, resultLength, jsSubstringOfResolved(vm, inputString, position, inputSize - position));
668
669 // 22. Return A.
670 return JSValue::encode(result);
671 }
672
673 // Now do a dry run to see how big things get. Give up if they get absurd.
674 unsigned savedPosition = position;
675 unsigned savedMatchPosition = matchPosition;
676 unsigned dryRunCount = 0;
677 genericSplit(
678 vm, regexp, input, inputSize, position, matchPosition, regExpIsSticky, regExpIsUnicode,
679 [&] () -> SplitControl {
680 if (resultLength + dryRunCount > MAX_STORAGE_VECTOR_LENGTH)
681 return AbortSplit;
682 return ContinueSplit;
683 },
684 [&] (bool, unsigned, unsigned) -> SplitControl {
685 dryRunCount++;
686 if (resultLength + dryRunCount >= limit)
687 return AbortSplit;
688 return ContinueSplit;
689 });
690
691 if (resultLength + dryRunCount > MAX_STORAGE_VECTOR_LENGTH) {
692 throwOutOfMemoryError(exec, scope);
693 return encodedJSValue();
694 }
695
696 // OK, we know that if we finish the split, we won't have to OOM.
697 position = savedPosition;
698 matchPosition = savedMatchPosition;
699
700 genericSplit(
701 vm, regexp, input, inputSize, position, matchPosition, regExpIsSticky, regExpIsUnicode,
702 [&] () -> SplitControl {
703 return ContinueSplit;
704 },
705 [&] (bool isDefined, unsigned start, unsigned length) -> SplitControl {
706 result->putDirectIndex(exec, resultLength++, isDefined ? jsSubstringOfResolved(vm, inputString, start, length) : jsUndefined());
707 RETURN_IF_EXCEPTION(scope, AbortSplit);
708 if (resultLength >= limit)
709 return AbortSplit;
710 return ContinueSplit;
711 });
712 RETURN_IF_EXCEPTION(scope, encodedJSValue());
713
714 if (resultLength >= limit)
715 return JSValue::encode(result);
716
717 // 20. Let T be a String value equal to the substring of S consisting of the elements at indices p (inclusive) through size (exclusive).
718 // 21. Perform ! CreateDataProperty(A, ! ToString(lengthA), T).
719 scope.release();
720 result->putDirectIndex(exec, resultLength, jsSubstringOfResolved(vm, inputString, position, inputSize - position));
721 // 22. Return A.
722 return JSValue::encode(result);
723}
724
725} // namespace JSC
726