1/*
2 * Copyright (C) 2009-2018 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "YarrJIT.h"
28
29#include <wtf/ASCIICType.h>
30#include "LinkBuffer.h"
31#include "Options.h"
32#include "VM.h"
33#include "Yarr.h"
34#include "YarrCanonicalize.h"
35#include "YarrDisassembler.h"
36
37#if ENABLE(YARR_JIT)
38
39namespace JSC { namespace Yarr {
40
41template<YarrJITCompileMode compileMode>
42class YarrGenerator : public YarrJITInfo, private MacroAssembler {
43
44#if CPU(ARM_THUMB2)
45 static const RegisterID input = ARMRegisters::r0;
46 static const RegisterID index = ARMRegisters::r1;
47 static const RegisterID length = ARMRegisters::r2;
48 static const RegisterID output = ARMRegisters::r3;
49
50 static const RegisterID regT0 = ARMRegisters::r4;
51 static const RegisterID regT1 = ARMRegisters::r5;
52 static const RegisterID initialStart = ARMRegisters::r8;
53
54 static const RegisterID returnRegister = ARMRegisters::r0;
55 static const RegisterID returnRegister2 = ARMRegisters::r1;
56
57#define HAVE_INITIAL_START_REG
58#elif CPU(ARM64)
59 // Argument registers
60 static const RegisterID input = ARM64Registers::x0;
61 static const RegisterID index = ARM64Registers::x1;
62 static const RegisterID length = ARM64Registers::x2;
63 static const RegisterID output = ARM64Registers::x3;
64 static const RegisterID freelistRegister = ARM64Registers::x4;
65 static const RegisterID freelistSizeRegister = ARM64Registers::x5;
66
67 // Scratch registers
68 static const RegisterID regT0 = ARM64Registers::x6;
69 static const RegisterID regT1 = ARM64Registers::x7;
70 static const RegisterID regT2 = ARM64Registers::x8;
71 static const RegisterID remainingMatchCount = ARM64Registers::x9;
72 static const RegisterID regUnicodeInputAndTrail = ARM64Registers::x10;
73 static const RegisterID initialStart = ARM64Registers::x11;
74 static const RegisterID supplementaryPlanesBase = ARM64Registers::x12;
75 static const RegisterID leadingSurrogateTag = ARM64Registers::x13;
76 static const RegisterID trailingSurrogateTag = ARM64Registers::x14;
77 static const RegisterID endOfStringAddress = ARM64Registers::x15;
78
79 static const RegisterID returnRegister = ARM64Registers::x0;
80 static const RegisterID returnRegister2 = ARM64Registers::x1;
81
82 const TrustedImm32 surrogateTagMask = TrustedImm32(0xfffffc00);
83#define HAVE_INITIAL_START_REG
84#define JIT_UNICODE_EXPRESSIONS
85#elif CPU(MIPS)
86 static const RegisterID input = MIPSRegisters::a0;
87 static const RegisterID index = MIPSRegisters::a1;
88 static const RegisterID length = MIPSRegisters::a2;
89 static const RegisterID output = MIPSRegisters::a3;
90
91 static const RegisterID regT0 = MIPSRegisters::t4;
92 static const RegisterID regT1 = MIPSRegisters::t5;
93 static const RegisterID initialStart = MIPSRegisters::t6;
94
95 static const RegisterID returnRegister = MIPSRegisters::v0;
96 static const RegisterID returnRegister2 = MIPSRegisters::v1;
97
98#define HAVE_INITIAL_START_REG
99#elif CPU(X86)
100 static const RegisterID input = X86Registers::eax;
101 static const RegisterID index = X86Registers::edx;
102 static const RegisterID length = X86Registers::ecx;
103 static const RegisterID output = X86Registers::edi;
104
105 static const RegisterID regT0 = X86Registers::ebx;
106 static const RegisterID regT1 = X86Registers::esi;
107
108 static const RegisterID returnRegister = X86Registers::eax;
109 static const RegisterID returnRegister2 = X86Registers::edx;
110#elif CPU(X86_64)
111#if !OS(WINDOWS)
112 // Argument registers
113 static const RegisterID input = X86Registers::edi;
114 static const RegisterID index = X86Registers::esi;
115 static const RegisterID length = X86Registers::edx;
116 static const RegisterID output = X86Registers::ecx;
117 static const RegisterID freelistRegister = X86Registers::r8;
118 static const RegisterID freelistSizeRegister = X86Registers::r9; // Only used during initialization.
119#else
120 // If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted.
121 // http://msdn.microsoft.com/en-us/library/7572ztz4.aspx
122 COMPILE_ASSERT(sizeof(MatchResult) > sizeof(void*), MatchResult_does_not_fit_in_64bits);
123 static const RegisterID input = X86Registers::edx;
124 static const RegisterID index = X86Registers::r8;
125 static const RegisterID length = X86Registers::r9;
126 static const RegisterID output = X86Registers::r10;
127#endif
128
129 // Scratch registers
130 static const RegisterID regT0 = X86Registers::eax;
131#if !OS(WINDOWS)
132 static const RegisterID regT1 = X86Registers::r9;
133 static const RegisterID regT2 = X86Registers::r10;
134#else
135 static const RegisterID regT1 = X86Registers::ecx;
136 static const RegisterID regT2 = X86Registers::edi;
137#endif
138
139 static const RegisterID initialStart = X86Registers::ebx;
140#if !OS(WINDOWS)
141 static const RegisterID remainingMatchCount = X86Registers::r12;
142#else
143 static const RegisterID remainingMatchCount = X86Registers::esi;
144#endif
145 static const RegisterID regUnicodeInputAndTrail = X86Registers::r13;
146 static const RegisterID leadingSurrogateTag = X86Registers::r14;
147 static const RegisterID endOfStringAddress = X86Registers::r15;
148
149 static const RegisterID returnRegister = X86Registers::eax;
150 static const RegisterID returnRegister2 = X86Registers::edx;
151
152 const TrustedImm32 supplementaryPlanesBase = TrustedImm32(0x10000);
153 const TrustedImm32 trailingSurrogateTag = TrustedImm32(0xdc00);
154 const TrustedImm32 surrogateTagMask = TrustedImm32(0xfffffc00);
155#define HAVE_INITIAL_START_REG
156#define JIT_UNICODE_EXPRESSIONS
157#endif
158
159#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
160 struct ParenContextSizes {
161 size_t m_numSubpatterns;
162 size_t m_frameSlots;
163
164 ParenContextSizes(size_t numSubpatterns, size_t frameSlots)
165 : m_numSubpatterns(numSubpatterns)
166 , m_frameSlots(frameSlots)
167 {
168 }
169
170 size_t numSubpatterns() { return m_numSubpatterns; }
171
172 size_t frameSlots() { return m_frameSlots; }
173 };
174
175 struct ParenContext {
176 struct ParenContext* next;
177 uint32_t begin;
178 uint32_t matchAmount;
179 uintptr_t returnAddress;
180 struct Subpatterns {
181 unsigned start;
182 unsigned end;
183 } subpatterns[0];
184 uintptr_t frameSlots[0];
185
186 static size_t sizeFor(ParenContextSizes& parenContextSizes)
187 {
188 return sizeof(ParenContext) + sizeof(Subpatterns) * parenContextSizes.numSubpatterns() + sizeof(uintptr_t) * parenContextSizes.frameSlots();
189 }
190
191 static ptrdiff_t nextOffset()
192 {
193 return offsetof(ParenContext, next);
194 }
195
196 static ptrdiff_t beginOffset()
197 {
198 return offsetof(ParenContext, begin);
199 }
200
201 static ptrdiff_t matchAmountOffset()
202 {
203 return offsetof(ParenContext, matchAmount);
204 }
205
206 static ptrdiff_t returnAddressOffset()
207 {
208 return offsetof(ParenContext, returnAddress);
209 }
210
211 static ptrdiff_t subpatternOffset(size_t subpattern)
212 {
213 return offsetof(ParenContext, subpatterns) + (subpattern - 1) * sizeof(Subpatterns);
214 }
215
216 static ptrdiff_t savedFrameOffset(ParenContextSizes& parenContextSizes)
217 {
218 return offsetof(ParenContext, subpatterns) + (parenContextSizes.numSubpatterns()) * sizeof(Subpatterns);
219 }
220 };
221
222 void initParenContextFreeList()
223 {
224 RegisterID parenContextPointer = regT0;
225 RegisterID nextParenContextPointer = regT2;
226
227 size_t parenContextSize = ParenContext::sizeFor(m_parenContextSizes);
228
229 parenContextSize = WTF::roundUpToMultipleOf<sizeof(uintptr_t)>(parenContextSize);
230
231 // Check that the paren context is a reasonable size.
232 if (parenContextSize > INT16_MAX)
233 m_abortExecution.append(jump());
234
235 Jump emptyFreeList = branchTestPtr(Zero, freelistRegister);
236 move(freelistRegister, parenContextPointer);
237 addPtr(TrustedImm32(parenContextSize), freelistRegister, nextParenContextPointer);
238 addPtr(freelistRegister, freelistSizeRegister);
239 subPtr(TrustedImm32(parenContextSize), freelistSizeRegister);
240
241 Label loopTop(this);
242 Jump initDone = branchPtr(Above, nextParenContextPointer, freelistSizeRegister);
243 storePtr(nextParenContextPointer, Address(parenContextPointer, ParenContext::nextOffset()));
244 move(nextParenContextPointer, parenContextPointer);
245 addPtr(TrustedImm32(parenContextSize), parenContextPointer, nextParenContextPointer);
246 jump(loopTop);
247
248 initDone.link(this);
249 storePtr(TrustedImmPtr(nullptr), Address(parenContextPointer, ParenContext::nextOffset()));
250 emptyFreeList.link(this);
251 }
252
253 void allocateParenContext(RegisterID result)
254 {
255 m_abortExecution.append(branchTestPtr(Zero, freelistRegister));
256 sub32(TrustedImm32(1), remainingMatchCount);
257 m_hitMatchLimit.append(branchTestPtr(Zero, remainingMatchCount));
258 move(freelistRegister, result);
259 loadPtr(Address(freelistRegister, ParenContext::nextOffset()), freelistRegister);
260 }
261
262 void freeParenContext(RegisterID headPtrRegister, RegisterID newHeadPtrRegister)
263 {
264 loadPtr(Address(headPtrRegister, ParenContext::nextOffset()), newHeadPtrRegister);
265 storePtr(freelistRegister, Address(headPtrRegister, ParenContext::nextOffset()));
266 move(headPtrRegister, freelistRegister);
267 }
268
269 void saveParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
270 {
271 store32(index, Address(parenContextReg, ParenContext::beginOffset()));
272 loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), tempReg);
273 store32(tempReg, Address(parenContextReg, ParenContext::matchAmountOffset()));
274 loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex(), tempReg);
275 storePtr(tempReg, Address(parenContextReg, ParenContext::returnAddressOffset()));
276 if (compileMode == IncludeSubpatterns) {
277 for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
278 loadPtr(Address(output, (subpattern << 1) * sizeof(unsigned)), tempReg);
279 storePtr(tempReg, Address(parenContextReg, ParenContext::subpatternOffset(subpattern)));
280 clearSubpatternStart(subpattern);
281 }
282 }
283 subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
284 for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
285 loadFromFrame(frameLocation, tempReg);
286 storePtr(tempReg, Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)));
287 }
288 }
289
290 void restoreParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
291 {
292 load32(Address(parenContextReg, ParenContext::beginOffset()), index);
293 storeToFrame(index, subpatternBaseFrameLocation + BackTrackInfoParentheses::beginIndex());
294 load32(Address(parenContextReg, ParenContext::matchAmountOffset()), tempReg);
295 storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
296 loadPtr(Address(parenContextReg, ParenContext::returnAddressOffset()), tempReg);
297 storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
298 if (compileMode == IncludeSubpatterns) {
299 for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
300 loadPtr(Address(parenContextReg, ParenContext::subpatternOffset(subpattern)), tempReg);
301 storePtr(tempReg, Address(output, (subpattern << 1) * sizeof(unsigned)));
302 }
303 }
304 subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
305 for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
306 loadPtr(Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)), tempReg);
307 storeToFrame(tempReg, frameLocation);
308 }
309 }
310#endif
311
312 void optimizeAlternative(PatternAlternative* alternative)
313 {
314 if (!alternative->m_terms.size())
315 return;
316
317 for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
318 PatternTerm& term = alternative->m_terms[i];
319 PatternTerm& nextTerm = alternative->m_terms[i + 1];
320
321 // We can move BMP only character classes after fixed character terms.
322 if ((term.type == PatternTerm::TypeCharacterClass)
323 && (term.quantityType == QuantifierFixedCount)
324 && (!m_decodeSurrogatePairs || (term.characterClass->hasOneCharacterSize() && !term.m_invert))
325 && (nextTerm.type == PatternTerm::TypePatternCharacter)
326 && (nextTerm.quantityType == QuantifierFixedCount)) {
327 PatternTerm termCopy = term;
328 alternative->m_terms[i] = nextTerm;
329 alternative->m_terms[i + 1] = termCopy;
330 }
331 }
332 }
333
334 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar32* matches, unsigned matchCount)
335 {
336 do {
337 // pick which range we're going to generate
338 int which = count >> 1;
339 char lo = ranges[which].begin;
340 char hi = ranges[which].end;
341
342 // check if there are any ranges or matches below lo. If not, just jl to failure -
343 // if there is anything else to check, check that first, if it falls through jmp to failure.
344 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
345 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
346
347 // generate code for all ranges before this one
348 if (which)
349 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
350
351 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
352 matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
353 ++*matchIndex;
354 }
355 failures.append(jump());
356
357 loOrAbove.link(this);
358 } else if (which) {
359 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
360
361 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
362 failures.append(jump());
363
364 loOrAbove.link(this);
365 } else
366 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
367
368 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
369 ++*matchIndex;
370
371 matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
372 // fall through to here, the value is above hi.
373
374 // shuffle along & loop around if there are any more matches to handle.
375 unsigned next = which + 1;
376 ranges += next;
377 count -= next;
378 } while (count);
379 }
380
381 void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
382 {
383 if (charClass->m_table && !m_decodeSurrogatePairs) {
384 ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table));
385 matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry));
386 return;
387 }
388
389 JumpList unicodeFail;
390 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
391 JumpList isAscii;
392 if (charClass->m_matches.size() || charClass->m_ranges.size())
393 isAscii.append(branch32(LessThanOrEqual, character, TrustedImm32(0x7f)));
394
395 if (charClass->m_matchesUnicode.size()) {
396 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
397 UChar32 ch = charClass->m_matchesUnicode[i];
398 matchDest.append(branch32(Equal, character, Imm32(ch)));
399 }
400 }
401
402 if (charClass->m_rangesUnicode.size()) {
403 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
404 UChar32 lo = charClass->m_rangesUnicode[i].begin;
405 UChar32 hi = charClass->m_rangesUnicode[i].end;
406
407 Jump below = branch32(LessThan, character, Imm32(lo));
408 matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
409 below.link(this);
410 }
411 }
412
413 if (charClass->m_matches.size() || charClass->m_ranges.size())
414 unicodeFail = jump();
415 isAscii.link(this);
416 }
417
418 if (charClass->m_ranges.size()) {
419 unsigned matchIndex = 0;
420 JumpList failures;
421 matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
422 while (matchIndex < charClass->m_matches.size())
423 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
424
425 failures.link(this);
426 } else if (charClass->m_matches.size()) {
427 // optimization: gather 'a','A' etc back together, can mask & test once.
428 Vector<char> matchesAZaz;
429
430 for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
431 char ch = charClass->m_matches[i];
432 if (m_pattern.ignoreCase()) {
433 if (isASCIILower(ch)) {
434 matchesAZaz.append(ch);
435 continue;
436 }
437 if (isASCIIUpper(ch))
438 continue;
439 }
440 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
441 }
442
443 if (unsigned countAZaz = matchesAZaz.size()) {
444 or32(TrustedImm32(32), character);
445 for (unsigned i = 0; i < countAZaz; ++i)
446 matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
447 }
448 }
449
450 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
451 unicodeFail.link(this);
452 }
453
454#ifdef JIT_UNICODE_EXPRESSIONS
455 void advanceIndexAfterCharacterClassTermMatch(const PatternTerm* term, JumpList& failures, const RegisterID character)
456 {
457 ASSERT(term->type == PatternTerm::TypeCharacterClass);
458
459 if (term->characterClass->hasOneCharacterSize() && !term->invert())
460 add32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), index);
461 else {
462 add32(TrustedImm32(1), index);
463 failures.append(atEndOfInput());
464 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
465 add32(TrustedImm32(1), index);
466 isBMPChar.link(this);
467 }
468 }
469#endif
470
471 // Jumps if input not available; will have (incorrectly) incremented already!
472 Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
473 {
474 if (countToCheck)
475 add32(Imm32(countToCheck), index);
476 return branch32(Above, index, length);
477 }
478
479 Jump jumpIfAvailableInput(unsigned countToCheck)
480 {
481 add32(Imm32(countToCheck), index);
482 return branch32(BelowOrEqual, index, length);
483 }
484
485 Jump checkNotEnoughInput(RegisterID additionalAmount)
486 {
487 add32(index, additionalAmount);
488 return branch32(Above, additionalAmount, length);
489 }
490
491 Jump checkInput()
492 {
493 return branch32(BelowOrEqual, index, length);
494 }
495
496 Jump atEndOfInput()
497 {
498 return branch32(Equal, index, length);
499 }
500
501 Jump notAtEndOfInput()
502 {
503 return branch32(NotEqual, index, length);
504 }
505
506 BaseIndex negativeOffsetIndexedAddress(Checked<unsigned> negativeCharacterOffset, RegisterID tempReg, RegisterID indexReg = index)
507 {
508 RegisterID base = input;
509
510 // BaseIndex() addressing can take a int32_t offset. Given that we can have a regular
511 // expression that has unsigned character offsets, BaseIndex's signed offset is insufficient
512 // for addressing in extreme cases where we might underflow. Therefore we check to see if
513 // negativeCharacterOffset will underflow directly or after converting for 16 bit characters.
514 // If so, we do our own address calculating by adjusting the base, using the result register
515 // as a temp address register.
516 unsigned maximumNegativeOffsetForCharacterSize = m_charSize == Char8 ? 0x7fffffff : 0x3fffffff;
517 unsigned offsetAdjustAmount = 0x40000000;
518 if (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) {
519 base = tempReg;
520 move(input, base);
521 while (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) {
522 subPtr(TrustedImm32(offsetAdjustAmount), base);
523 if (m_charSize != Char8)
524 subPtr(TrustedImm32(offsetAdjustAmount), base);
525 negativeCharacterOffset -= offsetAdjustAmount;
526 }
527 }
528
529 Checked<int32_t> characterOffset(-static_cast<int32_t>(negativeCharacterOffset.unsafeGet()));
530
531 if (m_charSize == Char8)
532 return BaseIndex(input, indexReg, TimesOne, (characterOffset * static_cast<int32_t>(sizeof(char))).unsafeGet());
533
534 return BaseIndex(input, indexReg, TimesTwo, (characterOffset * static_cast<int32_t>(sizeof(UChar))).unsafeGet());
535 }
536
537#ifdef JIT_UNICODE_EXPRESSIONS
538 void tryReadUnicodeCharImpl(RegisterID resultReg)
539 {
540 ASSERT(m_charSize == Char16);
541
542 JumpList notUnicode;
543
544 load16Unaligned(regUnicodeInputAndTrail, resultReg);
545 and32(surrogateTagMask, resultReg, regT2);
546 notUnicode.append(branch32(NotEqual, regT2, leadingSurrogateTag));
547 addPtr(TrustedImm32(2), regUnicodeInputAndTrail);
548 notUnicode.append(branchPtr(AboveOrEqual, regUnicodeInputAndTrail, endOfStringAddress));
549 load16Unaligned(Address(regUnicodeInputAndTrail), regUnicodeInputAndTrail);
550 and32(surrogateTagMask, regUnicodeInputAndTrail, regT2);
551 notUnicode.append(branch32(NotEqual, regT2, trailingSurrogateTag));
552 sub32(leadingSurrogateTag, resultReg);
553 sub32(trailingSurrogateTag, regUnicodeInputAndTrail);
554 lshift32(TrustedImm32(10), resultReg);
555 or32(regUnicodeInputAndTrail, resultReg);
556 add32(supplementaryPlanesBase, resultReg);
557 notUnicode.link(this);
558 }
559
560 void tryReadUnicodeChar(BaseIndex address, RegisterID resultReg)
561 {
562 ASSERT(m_charSize == Char16);
563
564 getEffectiveAddress(address, regUnicodeInputAndTrail);
565
566 if (resultReg == regT0)
567 m_tryReadUnicodeCharacterCalls.append(nearCall());
568 else
569 tryReadUnicodeCharImpl(resultReg);
570 }
571#endif
572
573 void readCharacterDontDecodeSurrogates(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
574 {
575 BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
576
577 if (m_charSize == Char8)
578 load8(address, resultReg);
579 else
580 load16Unaligned(address, resultReg);
581 }
582
583 void readCharacter(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
584 {
585 BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
586
587 if (m_charSize == Char8)
588 load8(address, resultReg);
589#ifdef JIT_UNICODE_EXPRESSIONS
590 else if (m_decodeSurrogatePairs)
591 tryReadUnicodeChar(address, resultReg);
592#endif
593 else
594 load16Unaligned(address, resultReg);
595 }
596
597 Jump jumpIfCharNotEquals(UChar32 ch, Checked<unsigned> negativeCharacterOffset, RegisterID character)
598 {
599 readCharacter(negativeCharacterOffset, character);
600
601 // For case-insesitive compares, non-ascii characters that have different
602 // upper & lower case representations are converted to a character class.
603 ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
604 if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
605 or32(TrustedImm32(0x20), character);
606 ch |= 0x20;
607 }
608
609 return branch32(NotEqual, character, Imm32(ch));
610 }
611
612 void storeToFrame(RegisterID reg, unsigned frameLocation)
613 {
614 poke(reg, frameLocation);
615 }
616
617 void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
618 {
619 poke(imm, frameLocation);
620 }
621
622#if CPU(ARM64) || CPU(X86_64)
623 void storeToFrame(TrustedImmPtr imm, unsigned frameLocation)
624 {
625 poke(imm, frameLocation);
626 }
627#endif
628
629 DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
630 {
631 return storePtrWithPatch(TrustedImmPtr(nullptr), Address(stackPointerRegister, frameLocation * sizeof(void*)));
632 }
633
634 void loadFromFrame(unsigned frameLocation, RegisterID reg)
635 {
636 peek(reg, frameLocation);
637 }
638
639 void loadFromFrameAndJump(unsigned frameLocation)
640 {
641 jump(Address(stackPointerRegister, frameLocation * sizeof(void*)), YarrBacktrackPtrTag);
642 }
643
644 unsigned alignCallFrameSizeInBytes(unsigned callFrameSize)
645 {
646 if (!callFrameSize)
647 return 0;
648
649 callFrameSize *= sizeof(void*);
650 if (callFrameSize / sizeof(void*) != m_pattern.m_body->m_callFrameSize)
651 CRASH();
652 callFrameSize = (callFrameSize + 0x3f) & ~0x3f;
653 return callFrameSize;
654 }
655 void initCallFrame()
656 {
657 unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(m_pattern.m_body->m_callFrameSize);
658 if (callFrameSizeInBytes) {
659#if CPU(X86_64) || CPU(ARM64)
660 if (Options::zeroStackFrame()) {
661 // We need to start from the stack pointer, because we could have spilled callee saves
662 move(stackPointerRegister, regT0);
663 subPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
664 if (callFrameSizeInBytes <= 128) {
665 for (unsigned offset = 0; offset < callFrameSizeInBytes; offset += sizeof(intptr_t))
666 storePtr(TrustedImm32(0), Address(regT0, -8 - offset));
667 } else {
668 Label zeroLoop = label();
669 subPtr(TrustedImm32(sizeof(intptr_t) * 2), regT0);
670#if CPU(ARM64)
671 storePair64(ARM64Registers::zr, ARM64Registers::zr, regT0);
672#else
673 storePtr(TrustedImm32(0), Address(regT0));
674 storePtr(TrustedImm32(0), Address(regT0, sizeof(intptr_t)));
675#endif
676 branchPtr(NotEqual, regT0, stackPointerRegister).linkTo(zeroLoop, this);
677 }
678 } else
679#endif
680 subPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
681
682 }
683 }
684 void removeCallFrame()
685 {
686 unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(m_pattern.m_body->m_callFrameSize);
687 if (callFrameSizeInBytes)
688 addPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
689 }
690
691 void generateFailReturn()
692 {
693 move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
694 move(TrustedImm32(0), returnRegister2);
695 generateReturn();
696 }
697
698 void generateJITFailReturn()
699 {
700 if (m_abortExecution.empty() && m_hitMatchLimit.empty())
701 return;
702
703 JumpList finishExiting;
704 if (!m_abortExecution.empty()) {
705 m_abortExecution.link(this);
706 move(TrustedImmPtr((void*)static_cast<size_t>(-2)), returnRegister);
707 finishExiting.append(jump());
708 }
709
710 if (!m_hitMatchLimit.empty()) {
711 m_hitMatchLimit.link(this);
712 move(TrustedImmPtr((void*)static_cast<size_t>(-1)), returnRegister);
713 }
714
715 finishExiting.link(this);
716 removeCallFrame();
717 move(TrustedImm32(0), returnRegister2);
718 generateReturn();
719 }
720
721 // Used to record subpatterns, should only be called if compileMode is IncludeSubpatterns.
722 void setSubpatternStart(RegisterID reg, unsigned subpattern)
723 {
724 ASSERT(subpattern);
725 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
726 store32(reg, Address(output, (subpattern << 1) * sizeof(int)));
727 }
728 void setSubpatternEnd(RegisterID reg, unsigned subpattern)
729 {
730 ASSERT(subpattern);
731 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
732 store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int)));
733 }
734 void clearSubpatternStart(unsigned subpattern)
735 {
736 ASSERT(subpattern);
737 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
738 store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int)));
739 }
740
741 void clearMatches(unsigned subpattern, unsigned lastSubpattern)
742 {
743 for (; subpattern <= lastSubpattern; subpattern++)
744 clearSubpatternStart(subpattern);
745 }
746
747 // We use one of three different strategies to track the start of the current match,
748 // while matching.
749 // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
750 // at the end of matching. This is irrespective of compileMode, and in this case
751 // these methods should never be called.
752 // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output
753 // vector, store the match start in the output vector.
754 // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly
755 // in this register.
756 void setMatchStart(RegisterID reg)
757 {
758 ASSERT(!m_pattern.m_body->m_hasFixedSize);
759 if (compileMode == IncludeSubpatterns)
760 store32(reg, output);
761 else
762 move(reg, output);
763 }
764 void getMatchStart(RegisterID reg)
765 {
766 ASSERT(!m_pattern.m_body->m_hasFixedSize);
767 if (compileMode == IncludeSubpatterns)
768 load32(output, reg);
769 else
770 move(output, reg);
771 }
772
773 enum YarrOpCode : uint8_t {
774 // These nodes wrap body alternatives - those in the main disjunction,
775 // rather than subpatterns or assertions. These are chained together in
776 // a doubly linked list, with a 'begin' node for the first alternative,
777 // a 'next' node for each subsequent alternative, and an 'end' node at
778 // the end. In the case of repeating alternatives, the 'end' node also
779 // has a reference back to 'begin'.
780 OpBodyAlternativeBegin,
781 OpBodyAlternativeNext,
782 OpBodyAlternativeEnd,
783 // Similar to the body alternatives, but used for subpatterns with two
784 // or more alternatives.
785 OpNestedAlternativeBegin,
786 OpNestedAlternativeNext,
787 OpNestedAlternativeEnd,
788 // Used for alternatives in subpatterns where there is only a single
789 // alternative (backtracking is easier in these cases), or for alternatives
790 // which never need to be backtracked (those in parenthetical assertions,
791 // terminal subpatterns).
792 OpSimpleNestedAlternativeBegin,
793 OpSimpleNestedAlternativeNext,
794 OpSimpleNestedAlternativeEnd,
795 // Used to wrap 'Once' subpattern matches (quantityMaxCount == 1).
796 OpParenthesesSubpatternOnceBegin,
797 OpParenthesesSubpatternOnceEnd,
798 // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
799 OpParenthesesSubpatternTerminalBegin,
800 OpParenthesesSubpatternTerminalEnd,
801 // Used to wrap generic captured matches
802 OpParenthesesSubpatternBegin,
803 OpParenthesesSubpatternEnd,
804 // Used to wrap parenthetical assertions.
805 OpParentheticalAssertionBegin,
806 OpParentheticalAssertionEnd,
807 // Wraps all simple terms (pattern characters, character classes).
808 OpTerm,
809 // Where an expression contains only 'once through' body alternatives
810 // and no repeating ones, this op is used to return match failure.
811 OpMatchFailed
812 };
813
814 // This structure is used to hold the compiled opcode information,
815 // including reference back to the original PatternTerm/PatternAlternatives,
816 // and JIT compilation data structures.
817 struct YarrOp {
818 explicit YarrOp(PatternTerm* term)
819 : m_term(term)
820 , m_op(OpTerm)
821 , m_isDeadCode(false)
822 {
823 }
824
825 explicit YarrOp(YarrOpCode op)
826 : m_op(op)
827 , m_isDeadCode(false)
828 {
829 }
830
831 // For alternatives, this holds the PatternAlternative and doubly linked
832 // references to this alternative's siblings. In the case of the
833 // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
834 // m_nextOp will reference the OpBodyAlternativeBegin node of the first
835 // repeating alternative.
836 PatternAlternative* m_alternative;
837 size_t m_previousOp;
838 size_t m_nextOp;
839
840 // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
841 PatternTerm* m_term;
842 YarrOpCode m_op;
843
844 // Used to record a set of Jumps out of the generated code, typically
845 // used for jumps out to backtracking code, and a single reentry back
846 // into the code for a node (likely where a backtrack will trigger
847 // rematching).
848 Label m_reentry;
849 JumpList m_jumps;
850
851 // Used for backtracking when the prior alternative did not consume any
852 // characters but matched.
853 Jump m_zeroLengthMatch;
854
855 // This flag is used to null out the second pattern character, when
856 // two are fused to match a pair together.
857 bool m_isDeadCode;
858
859 // Currently used in the case of some of the more complex management of
860 // 'm_checkedOffset', to cache the offset used in this alternative, to avoid
861 // recalculating it.
862 Checked<unsigned> m_checkAdjust;
863
864 // Used by OpNestedAlternativeNext/End to hold the pointer to the
865 // value that will be pushed into the pattern's frame to return to,
866 // upon backtracking back into the disjunction.
867 DataLabelPtr m_returnAddress;
868 };
869
870 // BacktrackingState
871 // This class encapsulates information about the state of code generation
872 // whilst generating the code for backtracking, when a term fails to match.
873 // Upon entry to code generation of the backtracking code for a given node,
874 // the Backtracking state will hold references to all control flow sources
875 // that are outputs in need of further backtracking from the prior node
876 // generated (which is the subsequent operation in the regular expression,
877 // and in the m_ops Vector, since we generated backtracking backwards).
878 // These references to control flow take the form of:
879 // - A jump list of jumps, to be linked to code that will backtrack them
880 // further.
881 // - A set of DataLabelPtr values, to be populated with values to be
882 // treated effectively as return addresses backtracking into complex
883 // subpatterns.
884 // - A flag indicating that the current sequence of generated code up to
885 // this point requires backtracking.
886 class BacktrackingState {
887 public:
888 BacktrackingState()
889 : m_pendingFallthrough(false)
890 {
891 }
892
893 // Add a jump or jumps, a return address, or set the flag indicating
894 // that the current 'fallthrough' control flow requires backtracking.
895 void append(const Jump& jump)
896 {
897 m_laterFailures.append(jump);
898 }
899 void append(JumpList& jumpList)
900 {
901 m_laterFailures.append(jumpList);
902 }
903 void append(const DataLabelPtr& returnAddress)
904 {
905 m_pendingReturns.append(returnAddress);
906 }
907 void fallthrough()
908 {
909 ASSERT(!m_pendingFallthrough);
910 m_pendingFallthrough = true;
911 }
912
913 // These methods clear the backtracking state, either linking to the
914 // current location, a provided label, or copying the backtracking out
915 // to a JumpList. All actions may require code generation to take place,
916 // and as such are passed a pointer to the assembler.
917 void link(MacroAssembler* assembler)
918 {
919 if (m_pendingReturns.size()) {
920 Label here(assembler);
921 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
922 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
923 m_pendingReturns.clear();
924 }
925 m_laterFailures.link(assembler);
926 m_laterFailures.clear();
927 m_pendingFallthrough = false;
928 }
929 void linkTo(Label label, MacroAssembler* assembler)
930 {
931 if (m_pendingReturns.size()) {
932 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
933 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
934 m_pendingReturns.clear();
935 }
936 if (m_pendingFallthrough)
937 assembler->jump(label);
938 m_laterFailures.linkTo(label, assembler);
939 m_laterFailures.clear();
940 m_pendingFallthrough = false;
941 }
942 void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
943 {
944 if (m_pendingReturns.size()) {
945 Label here(assembler);
946 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
947 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
948 m_pendingReturns.clear();
949 m_pendingFallthrough = true;
950 }
951 if (m_pendingFallthrough)
952 jumpList.append(assembler->jump());
953 jumpList.append(m_laterFailures);
954 m_laterFailures.clear();
955 m_pendingFallthrough = false;
956 }
957
958 bool isEmpty()
959 {
960 return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
961 }
962
963 // Called at the end of code generation to link all return addresses.
964 void linkDataLabels(LinkBuffer& linkBuffer)
965 {
966 ASSERT(isEmpty());
967 for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
968 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf<YarrBacktrackPtrTag>(m_backtrackRecords[i].m_backtrackLocation));
969 }
970
971 private:
972 struct ReturnAddressRecord {
973 ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
974 : m_dataLabel(dataLabel)
975 , m_backtrackLocation(backtrackLocation)
976 {
977 }
978
979 DataLabelPtr m_dataLabel;
980 Label m_backtrackLocation;
981 };
982
983 JumpList m_laterFailures;
984 bool m_pendingFallthrough;
985 Vector<DataLabelPtr, 4> m_pendingReturns;
986 Vector<ReturnAddressRecord, 4> m_backtrackRecords;
987 };
988
989 // Generation methods:
990 // ===================
991
992 // This method provides a default implementation of backtracking common
993 // to many terms; terms commonly jump out of the forwards matching path
994 // on any failed conditions, and add these jumps to the m_jumps list. If
995 // no special handling is required we can often just backtrack to m_jumps.
996 void backtrackTermDefault(size_t opIndex)
997 {
998 YarrOp& op = m_ops[opIndex];
999 m_backtrackingState.append(op.m_jumps);
1000 }
1001
1002 void generateAssertionBOL(size_t opIndex)
1003 {
1004 YarrOp& op = m_ops[opIndex];
1005 PatternTerm* term = op.m_term;
1006
1007 if (m_pattern.multiline()) {
1008 const RegisterID character = regT0;
1009
1010 JumpList matchDest;
1011 if (!term->inputPosition)
1012 matchDest.append(branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet())));
1013
1014 readCharacter(m_checkedOffset - term->inputPosition + 1, character);
1015 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
1016 op.m_jumps.append(jump());
1017
1018 matchDest.link(this);
1019 } else {
1020 // Erk, really should poison out these alternatives early. :-/
1021 if (term->inputPosition)
1022 op.m_jumps.append(jump());
1023 else
1024 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checkedOffset.unsafeGet())));
1025 }
1026 }
1027 void backtrackAssertionBOL(size_t opIndex)
1028 {
1029 backtrackTermDefault(opIndex);
1030 }
1031
1032 void generateAssertionEOL(size_t opIndex)
1033 {
1034 YarrOp& op = m_ops[opIndex];
1035 PatternTerm* term = op.m_term;
1036
1037 if (m_pattern.multiline()) {
1038 const RegisterID character = regT0;
1039
1040 JumpList matchDest;
1041 if (term->inputPosition == m_checkedOffset.unsafeGet())
1042 matchDest.append(atEndOfInput());
1043
1044 readCharacter(m_checkedOffset - term->inputPosition, character);
1045 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
1046 op.m_jumps.append(jump());
1047
1048 matchDest.link(this);
1049 } else {
1050 if (term->inputPosition == m_checkedOffset.unsafeGet())
1051 op.m_jumps.append(notAtEndOfInput());
1052 // Erk, really should poison out these alternatives early. :-/
1053 else
1054 op.m_jumps.append(jump());
1055 }
1056 }
1057 void backtrackAssertionEOL(size_t opIndex)
1058 {
1059 backtrackTermDefault(opIndex);
1060 }
1061
1062 // Also falls though on nextIsNotWordChar.
1063 void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
1064 {
1065 YarrOp& op = m_ops[opIndex];
1066 PatternTerm* term = op.m_term;
1067
1068 const RegisterID character = regT0;
1069
1070 if (term->inputPosition == m_checkedOffset.unsafeGet())
1071 nextIsNotWordChar.append(atEndOfInput());
1072
1073 readCharacter(m_checkedOffset - term->inputPosition, character);
1074
1075 CharacterClass* wordcharCharacterClass;
1076
1077 if (m_unicodeIgnoreCase)
1078 wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass();
1079 else
1080 wordcharCharacterClass = m_pattern.wordcharCharacterClass();
1081
1082 matchCharacterClass(character, nextIsWordChar, wordcharCharacterClass);
1083 }
1084
1085 void generateAssertionWordBoundary(size_t opIndex)
1086 {
1087 YarrOp& op = m_ops[opIndex];
1088 PatternTerm* term = op.m_term;
1089
1090 const RegisterID character = regT0;
1091
1092 Jump atBegin;
1093 JumpList matchDest;
1094 if (!term->inputPosition)
1095 atBegin = branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet()));
1096 readCharacter(m_checkedOffset - term->inputPosition + 1, character);
1097
1098 CharacterClass* wordcharCharacterClass;
1099
1100 if (m_unicodeIgnoreCase)
1101 wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass();
1102 else
1103 wordcharCharacterClass = m_pattern.wordcharCharacterClass();
1104
1105 matchCharacterClass(character, matchDest, wordcharCharacterClass);
1106 if (!term->inputPosition)
1107 atBegin.link(this);
1108
1109 // We fall through to here if the last character was not a wordchar.
1110 JumpList nonWordCharThenWordChar;
1111 JumpList nonWordCharThenNonWordChar;
1112 if (term->invert()) {
1113 matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
1114 nonWordCharThenWordChar.append(jump());
1115 } else {
1116 matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
1117 nonWordCharThenNonWordChar.append(jump());
1118 }
1119 op.m_jumps.append(nonWordCharThenNonWordChar);
1120
1121 // We jump here if the last character was a wordchar.
1122 matchDest.link(this);
1123 JumpList wordCharThenWordChar;
1124 JumpList wordCharThenNonWordChar;
1125 if (term->invert()) {
1126 matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
1127 wordCharThenWordChar.append(jump());
1128 } else {
1129 matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
1130 // This can fall-though!
1131 }
1132
1133 op.m_jumps.append(wordCharThenWordChar);
1134
1135 nonWordCharThenWordChar.link(this);
1136 wordCharThenNonWordChar.link(this);
1137 }
1138 void backtrackAssertionWordBoundary(size_t opIndex)
1139 {
1140 backtrackTermDefault(opIndex);
1141 }
1142
1143#if ENABLE(YARR_JIT_BACKREFERENCES)
1144 void matchBackreference(size_t opIndex, JumpList& characterMatchFails, RegisterID character, RegisterID patternIndex, RegisterID patternCharacter)
1145 {
1146 YarrOp& op = m_ops[opIndex];
1147 PatternTerm* term = op.m_term;
1148 unsigned subpatternId = term->backReferenceSubpatternId;
1149
1150 Label loop(this);
1151
1152 readCharacterDontDecodeSurrogates(0, patternCharacter, patternIndex);
1153 readCharacterDontDecodeSurrogates(m_checkedOffset - term->inputPosition, character);
1154
1155 if (!m_pattern.ignoreCase())
1156 characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
1157 else {
1158 Jump charactersMatch = branch32(Equal, character, patternCharacter);
1159 ExtendedAddress characterTableEntry(character, reinterpret_cast<intptr_t>(&canonicalTableLChar));
1160 load16(characterTableEntry, character);
1161 ExtendedAddress patternTableEntry(patternCharacter, reinterpret_cast<intptr_t>(&canonicalTableLChar));
1162 load16(patternTableEntry, patternCharacter);
1163 characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
1164 charactersMatch.link(this);
1165 }
1166
1167
1168 add32(TrustedImm32(1), index);
1169 add32(TrustedImm32(1), patternIndex);
1170
1171 branch32(NotEqual, patternIndex, Address(output, ((subpatternId << 1) + 1) * sizeof(int))).linkTo(loop, this);
1172 }
1173
1174 void generateBackReference(size_t opIndex)
1175 {
1176 YarrOp& op = m_ops[opIndex];
1177 PatternTerm* term = op.m_term;
1178
1179 if (m_pattern.ignoreCase() && m_charSize != Char8) {
1180 m_failureReason = JITFailureReason::BackReference;
1181 return;
1182 }
1183
1184 unsigned subpatternId = term->backReferenceSubpatternId;
1185 unsigned parenthesesFrameLocation = term->frameLocation;
1186
1187 const RegisterID characterOrTemp = regT0;
1188 const RegisterID patternIndex = regT1;
1189 const RegisterID patternTemp = regT2;
1190
1191 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1192 if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1)
1193 storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1194
1195 JumpList matches;
1196
1197 if (term->quantityType != QuantifierNonGreedy) {
1198 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1199 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1200
1201 // An empty match is successful without consuming characters
1202 if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1) {
1203 matches.append(branch32(Equal, TrustedImm32(-1), patternIndex));
1204 matches.append(branch32(Equal, patternIndex, patternTemp));
1205 } else {
1206 Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
1207 Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
1208 zeroLengthMatch.link(this);
1209 storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1210 matches.append(jump());
1211 tryNonZeroMatch.link(this);
1212 }
1213 }
1214
1215 switch (term->quantityType) {
1216 case QuantifierFixedCount: {
1217 Label outerLoop(this);
1218
1219 // PatternTemp should contain pattern end index at this point
1220 sub32(patternIndex, patternTemp);
1221 op.m_jumps.append(checkNotEnoughInput(patternTemp));
1222
1223 matchBackreference(opIndex, op.m_jumps, characterOrTemp, patternIndex, patternTemp);
1224
1225 if (term->quantityMaxCount != 1) {
1226 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
1227 add32(TrustedImm32(1), characterOrTemp);
1228 storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1229 matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
1230 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1231 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1232 jump(outerLoop);
1233 }
1234 matches.link(this);
1235 break;
1236 }
1237
1238 case QuantifierGreedy: {
1239 JumpList incompleteMatches;
1240
1241 Label outerLoop(this);
1242
1243 // PatternTemp should contain pattern end index at this point
1244 sub32(patternIndex, patternTemp);
1245 matches.append(checkNotEnoughInput(patternTemp));
1246
1247 matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
1248
1249 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
1250 add32(TrustedImm32(1), characterOrTemp);
1251 storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1252 if (term->quantityMaxCount != quantifyInfinite)
1253 matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
1254 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1255 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1256
1257 // Store current index in frame for restoring after a partial match
1258 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1259 jump(outerLoop);
1260
1261 incompleteMatches.link(this);
1262 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
1263
1264 matches.link(this);
1265 op.m_reentry = label();
1266 break;
1267 }
1268
1269 case QuantifierNonGreedy: {
1270 JumpList incompleteMatches;
1271
1272 matches.append(jump());
1273
1274 op.m_reentry = label();
1275
1276 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1277 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1278
1279 // An empty match is successful without consuming characters
1280 Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
1281 Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
1282 zeroLengthMatch.link(this);
1283 storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1284 matches.append(jump());
1285 tryNonZeroMatch.link(this);
1286
1287 // Check if we have input remaining to match
1288 sub32(patternIndex, patternTemp);
1289 matches.append(checkNotEnoughInput(patternTemp));
1290
1291 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1292
1293 matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
1294
1295 matches.append(jump());
1296
1297 incompleteMatches.link(this);
1298 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
1299
1300 matches.link(this);
1301 break;
1302 }
1303 }
1304 }
1305 void backtrackBackReference(size_t opIndex)
1306 {
1307 YarrOp& op = m_ops[opIndex];
1308 PatternTerm* term = op.m_term;
1309
1310 unsigned subpatternId = term->backReferenceSubpatternId;
1311
1312 m_backtrackingState.link(this);
1313 op.m_jumps.link(this);
1314
1315 JumpList failures;
1316
1317 unsigned parenthesesFrameLocation = term->frameLocation;
1318 switch (term->quantityType) {
1319 case QuantifierFixedCount:
1320 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
1321 break;
1322
1323 case QuantifierGreedy: {
1324 const RegisterID matchAmount = regT0;
1325 const RegisterID patternStartIndex = regT1;
1326 const RegisterID patternEndIndexOrLen = regT2;
1327
1328 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
1329 failures.append(branchTest32(Zero, matchAmount));
1330
1331 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternStartIndex);
1332 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternEndIndexOrLen);
1333 sub32(patternStartIndex, patternEndIndexOrLen);
1334 sub32(patternEndIndexOrLen, index);
1335
1336 sub32(TrustedImm32(1), matchAmount);
1337 storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1338 jump(op.m_reentry);
1339 break;
1340 }
1341
1342 case QuantifierNonGreedy: {
1343 const RegisterID matchAmount = regT0;
1344
1345 failures.append(atEndOfInput());
1346 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
1347 if (term->quantityMaxCount != quantifyInfinite)
1348 failures.append(branch32(AboveOrEqual, Imm32(term->quantityMaxCount.unsafeGet()), matchAmount));
1349 add32(TrustedImm32(1), matchAmount);
1350 storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1351 jump(op.m_reentry);
1352 break;
1353 }
1354 }
1355 failures.link(this);
1356 m_backtrackingState.fallthrough();
1357 }
1358#endif
1359
1360 void generatePatternCharacterOnce(size_t opIndex)
1361 {
1362 YarrOp& op = m_ops[opIndex];
1363
1364 if (op.m_isDeadCode)
1365 return;
1366
1367 // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
1368 // node, so there must always be at least one more node.
1369 ASSERT(opIndex + 1 < m_ops.size());
1370 YarrOp* nextOp = &m_ops[opIndex + 1];
1371
1372 PatternTerm* term = op.m_term;
1373 UChar32 ch = term->patternCharacter;
1374
1375 if ((ch > 0xff) && (m_charSize == Char8)) {
1376 // Have a 16 bit pattern character and an 8 bit string - short circuit
1377 op.m_jumps.append(jump());
1378 return;
1379 }
1380
1381 const RegisterID character = regT0;
1382#if CPU(X86_64) || CPU(ARM64)
1383 unsigned maxCharactersAtOnce = m_charSize == Char8 ? 8 : 4;
1384#else
1385 unsigned maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
1386#endif
1387 uint64_t ignoreCaseMask = 0;
1388#if CPU(BIG_ENDIAN)
1389 uint64_t allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
1390#else
1391 uint64_t allCharacters = ch;
1392#endif
1393 unsigned numberCharacters;
1394 unsigned startTermPosition = term->inputPosition;
1395
1396 // For case-insesitive compares, non-ascii characters that have different
1397 // upper & lower case representations are converted to a character class.
1398 ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
1399
1400 if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
1401#if CPU(BIG_ENDIAN)
1402 ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16);
1403#else
1404 ignoreCaseMask |= 32;
1405#endif
1406 }
1407
1408 for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
1409 PatternTerm* nextTerm = nextOp->m_term;
1410
1411 // YarrJIT handles decoded surrogate pair as one character if unicode flag is enabled.
1412 // Note that the numberCharacters become 1 while the width of the pattern character becomes 32bit in this case.
1413 if (nextTerm->type != PatternTerm::TypePatternCharacter
1414 || nextTerm->quantityType != QuantifierFixedCount
1415 || nextTerm->quantityMaxCount != 1
1416 || nextTerm->inputPosition != (startTermPosition + numberCharacters)
1417 || (U16_LENGTH(nextTerm->patternCharacter) != 1 && m_decodeSurrogatePairs))
1418 break;
1419
1420 nextOp->m_isDeadCode = true;
1421
1422#if CPU(BIG_ENDIAN)
1423 int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters);
1424#else
1425 int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters;
1426#endif
1427
1428 UChar32 currentCharacter = nextTerm->patternCharacter;
1429
1430 if ((currentCharacter > 0xff) && (m_charSize == Char8)) {
1431 // Have a 16 bit pattern character and an 8 bit string - short circuit
1432 op.m_jumps.append(jump());
1433 return;
1434 }
1435
1436 // For case-insesitive compares, non-ascii characters that have different
1437 // upper & lower case representations are converted to a character class.
1438 ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter, m_canonicalMode));
1439
1440 allCharacters |= (static_cast<uint64_t>(currentCharacter) << shiftAmount);
1441
1442 if ((m_pattern.ignoreCase()) && (isASCIIAlpha(currentCharacter)))
1443 ignoreCaseMask |= 32ULL << shiftAmount;
1444 }
1445
1446 if (m_decodeSurrogatePairs)
1447 op.m_jumps.append(jumpIfNoAvailableInput());
1448
1449 if (m_charSize == Char8) {
1450 auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
1451 op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character));
1452 };
1453
1454 auto check2 = [&] (Checked<unsigned> offset, uint16_t characters, uint16_t mask) {
1455 load16Unaligned(negativeOffsetIndexedAddress(offset, character), character);
1456 if (mask)
1457 or32(Imm32(mask), character);
1458 op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1459 };
1460
1461 auto check4 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
1462 if (mask) {
1463 load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character);
1464 if (mask)
1465 or32(Imm32(mask), character);
1466 op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1467 return;
1468 }
1469 op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters)));
1470 };
1471
1472#if CPU(X86_64) || CPU(ARM64)
1473 auto check8 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
1474 load64(negativeOffsetIndexedAddress(offset, character), character);
1475 if (mask)
1476 or64(TrustedImm64(mask), character);
1477 op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask)));
1478 };
1479#endif
1480
1481 switch (numberCharacters) {
1482 case 1:
1483 // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
1484 check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
1485 return;
1486 case 2: {
1487 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
1488 return;
1489 }
1490 case 3: {
1491 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
1492 check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 16) & 0xff);
1493 return;
1494 }
1495 case 4: {
1496 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1497 return;
1498 }
1499#if CPU(X86_64) || CPU(ARM64)
1500 case 5: {
1501 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1502 check1(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xff);
1503 return;
1504 }
1505 case 6: {
1506 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1507 check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
1508 return;
1509 }
1510 case 7: {
1511 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1512 check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
1513 check1(m_checkedOffset - startTermPosition - 6, (allCharacters >> 48) & 0xff);
1514 return;
1515 }
1516 case 8: {
1517 check8(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
1518 return;
1519 }
1520#endif
1521 }
1522 } else {
1523 auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
1524 op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character));
1525 };
1526
1527 auto check2 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
1528 if (mask) {
1529 load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character);
1530 if (mask)
1531 or32(Imm32(mask), character);
1532 op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1533 return;
1534 }
1535 op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters)));
1536 };
1537
1538#if CPU(X86_64) || CPU(ARM64)
1539 auto check4 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
1540 load64(negativeOffsetIndexedAddress(offset, character), character);
1541 if (mask)
1542 or64(TrustedImm64(mask), character);
1543 op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask)));
1544 };
1545#endif
1546
1547 switch (numberCharacters) {
1548 case 1:
1549 // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
1550 check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
1551 return;
1552 case 2:
1553 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1554 return;
1555#if CPU(X86_64) || CPU(ARM64)
1556 case 3:
1557 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1558 check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 32) & 0xffff);
1559 return;
1560 case 4:
1561 check4(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
1562 return;
1563#endif
1564 }
1565 }
1566 }
1567 void backtrackPatternCharacterOnce(size_t opIndex)
1568 {
1569 backtrackTermDefault(opIndex);
1570 }
1571
1572 void generatePatternCharacterFixed(size_t opIndex)
1573 {
1574 YarrOp& op = m_ops[opIndex];
1575 PatternTerm* term = op.m_term;
1576 UChar32 ch = term->patternCharacter;
1577
1578 const RegisterID character = regT0;
1579 const RegisterID countRegister = regT1;
1580
1581 if (m_decodeSurrogatePairs)
1582 op.m_jumps.append(jumpIfNoAvailableInput());
1583
1584 move(index, countRegister);
1585 Checked<unsigned> scaledMaxCount = term->quantityMaxCount;
1586 scaledMaxCount *= U_IS_BMP(ch) ? 1 : 2;
1587 sub32(Imm32(scaledMaxCount.unsafeGet()), countRegister);
1588
1589 Label loop(this);
1590 readCharacter(m_checkedOffset - term->inputPosition - scaledMaxCount, character, countRegister);
1591 // For case-insesitive compares, non-ascii characters that have different
1592 // upper & lower case representations are converted to a character class.
1593 ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
1594 if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
1595 or32(TrustedImm32(0x20), character);
1596 ch |= 0x20;
1597 }
1598
1599 op.m_jumps.append(branch32(NotEqual, character, Imm32(ch)));
1600#ifdef JIT_UNICODE_EXPRESSIONS
1601 if (m_decodeSurrogatePairs && !U_IS_BMP(ch))
1602 add32(TrustedImm32(2), countRegister);
1603 else
1604#endif
1605 add32(TrustedImm32(1), countRegister);
1606 branch32(NotEqual, countRegister, index).linkTo(loop, this);
1607 }
1608 void backtrackPatternCharacterFixed(size_t opIndex)
1609 {
1610 backtrackTermDefault(opIndex);
1611 }
1612
1613 void generatePatternCharacterGreedy(size_t opIndex)
1614 {
1615 YarrOp& op = m_ops[opIndex];
1616 PatternTerm* term = op.m_term;
1617 UChar32 ch = term->patternCharacter;
1618
1619 const RegisterID character = regT0;
1620 const RegisterID countRegister = regT1;
1621
1622 move(TrustedImm32(0), countRegister);
1623
1624 // Unless have a 16 bit pattern character and an 8 bit string - short circuit
1625 if (!((ch > 0xff) && (m_charSize == Char8))) {
1626 JumpList failures;
1627 Label loop(this);
1628 failures.append(atEndOfInput());
1629 failures.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
1630
1631 add32(TrustedImm32(1), index);
1632#ifdef JIT_UNICODE_EXPRESSIONS
1633 if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1634 Jump surrogatePairOk = notAtEndOfInput();
1635 sub32(TrustedImm32(1), index);
1636 failures.append(jump());
1637 surrogatePairOk.link(this);
1638 add32(TrustedImm32(1), index);
1639 }
1640#endif
1641 add32(TrustedImm32(1), countRegister);
1642
1643 if (term->quantityMaxCount == quantifyInfinite)
1644 jump(loop);
1645 else
1646 branch32(NotEqual, countRegister, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(loop, this);
1647
1648 failures.link(this);
1649 }
1650 op.m_reentry = label();
1651
1652 storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex());
1653 }
1654 void backtrackPatternCharacterGreedy(size_t opIndex)
1655 {
1656 YarrOp& op = m_ops[opIndex];
1657 PatternTerm* term = op.m_term;
1658
1659 const RegisterID countRegister = regT1;
1660
1661 m_backtrackingState.link(this);
1662
1663 loadFromFrame(term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), countRegister);
1664 m_backtrackingState.append(branchTest32(Zero, countRegister));
1665 sub32(TrustedImm32(1), countRegister);
1666 if (!m_decodeSurrogatePairs || U_IS_BMP(term->patternCharacter))
1667 sub32(TrustedImm32(1), index);
1668 else
1669 sub32(TrustedImm32(2), index);
1670 jump(op.m_reentry);
1671 }
1672
1673 void generatePatternCharacterNonGreedy(size_t opIndex)
1674 {
1675 YarrOp& op = m_ops[opIndex];
1676 PatternTerm* term = op.m_term;
1677
1678 const RegisterID countRegister = regT1;
1679
1680 move(TrustedImm32(0), countRegister);
1681 op.m_reentry = label();
1682 storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex());
1683 }
1684 void backtrackPatternCharacterNonGreedy(size_t opIndex)
1685 {
1686 YarrOp& op = m_ops[opIndex];
1687 PatternTerm* term = op.m_term;
1688 UChar32 ch = term->patternCharacter;
1689
1690 const RegisterID character = regT0;
1691 const RegisterID countRegister = regT1;
1692
1693 m_backtrackingState.link(this);
1694
1695 loadFromFrame(term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), countRegister);
1696
1697 // Unless have a 16 bit pattern character and an 8 bit string - short circuit
1698 if (!((ch > 0xff) && (m_charSize == Char8))) {
1699 JumpList nonGreedyFailures;
1700 nonGreedyFailures.append(atEndOfInput());
1701 if (term->quantityMaxCount != quantifyInfinite)
1702 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet())));
1703 nonGreedyFailures.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
1704
1705 add32(TrustedImm32(1), index);
1706#ifdef JIT_UNICODE_EXPRESSIONS
1707 if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1708 Jump surrogatePairOk = notAtEndOfInput();
1709 sub32(TrustedImm32(1), index);
1710 nonGreedyFailures.append(jump());
1711 surrogatePairOk.link(this);
1712 add32(TrustedImm32(1), index);
1713 }
1714#endif
1715 add32(TrustedImm32(1), countRegister);
1716
1717 jump(op.m_reentry);
1718 nonGreedyFailures.link(this);
1719 }
1720
1721 if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1722 // subtract countRegister*2 for non-BMP characters
1723 lshift32(TrustedImm32(1), countRegister);
1724 }
1725
1726 sub32(countRegister, index);
1727 m_backtrackingState.fallthrough();
1728 }
1729
1730 void generateCharacterClassOnce(size_t opIndex)
1731 {
1732 YarrOp& op = m_ops[opIndex];
1733 PatternTerm* term = op.m_term;
1734
1735 const RegisterID character = regT0;
1736
1737 if (m_decodeSurrogatePairs) {
1738 op.m_jumps.append(jumpIfNoAvailableInput());
1739 storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1740 }
1741
1742 JumpList matchDest;
1743 readCharacter(m_checkedOffset - term->inputPosition, character);
1744 // If we are matching the "any character" builtin class we only need to read the
1745 // character and don't need to match as it will always succeed.
1746 if (term->invert() || !term->characterClass->m_anyCharacter) {
1747 matchCharacterClass(character, matchDest, term->characterClass);
1748
1749 if (term->invert())
1750 op.m_jumps.append(matchDest);
1751 else {
1752 op.m_jumps.append(jump());
1753 matchDest.link(this);
1754 }
1755 }
1756#ifdef JIT_UNICODE_EXPRESSIONS
1757 if (m_decodeSurrogatePairs && (!term->characterClass->hasOneCharacterSize() || term->invert())) {
1758 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1759 add32(TrustedImm32(1), index);
1760 isBMPChar.link(this);
1761 }
1762#endif
1763 }
1764 void backtrackCharacterClassOnce(size_t opIndex)
1765 {
1766#ifdef JIT_UNICODE_EXPRESSIONS
1767 if (m_decodeSurrogatePairs) {
1768 YarrOp& op = m_ops[opIndex];
1769 PatternTerm* term = op.m_term;
1770
1771 m_backtrackingState.link(this);
1772 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1773 m_backtrackingState.fallthrough();
1774 }
1775#endif
1776 backtrackTermDefault(opIndex);
1777 }
1778
1779 void generateCharacterClassFixed(size_t opIndex)
1780 {
1781 YarrOp& op = m_ops[opIndex];
1782 PatternTerm* term = op.m_term;
1783
1784 const RegisterID character = regT0;
1785 const RegisterID countRegister = regT1;
1786
1787 if (m_decodeSurrogatePairs)
1788 op.m_jumps.append(jumpIfNoAvailableInput());
1789
1790 move(index, countRegister);
1791
1792 Checked<unsigned> scaledMaxCount = term->quantityMaxCount;
1793
1794#ifdef JIT_UNICODE_EXPRESSIONS
1795 if (m_decodeSurrogatePairs && term->characterClass->hasOnlyNonBMPCharacters() && !term->invert())
1796 scaledMaxCount *= 2;
1797#endif
1798 sub32(Imm32(scaledMaxCount.unsafeGet()), countRegister);
1799
1800 Label loop(this);
1801 JumpList matchDest;
1802 readCharacter(m_checkedOffset - term->inputPosition - scaledMaxCount, character, countRegister);
1803 // If we are matching the "any character" builtin class we only need to read the
1804 // character and don't need to match as it will always succeed.
1805 if (term->invert() || !term->characterClass->m_anyCharacter) {
1806 matchCharacterClass(character, matchDest, term->characterClass);
1807
1808 if (term->invert())
1809 op.m_jumps.append(matchDest);
1810 else {
1811 op.m_jumps.append(jump());
1812 matchDest.link(this);
1813 }
1814 }
1815
1816#ifdef JIT_UNICODE_EXPRESSIONS
1817 if (m_decodeSurrogatePairs) {
1818 if (term->characterClass->hasOneCharacterSize() && !term->invert())
1819 add32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), countRegister);
1820 else {
1821 add32(TrustedImm32(1), countRegister);
1822 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1823 op.m_jumps.append(atEndOfInput());
1824 add32(TrustedImm32(1), countRegister);
1825 add32(TrustedImm32(1), index);
1826 isBMPChar.link(this);
1827 }
1828 } else
1829#endif
1830 add32(TrustedImm32(1), countRegister);
1831 branch32(NotEqual, countRegister, index).linkTo(loop, this);
1832 }
1833 void backtrackCharacterClassFixed(size_t opIndex)
1834 {
1835 backtrackTermDefault(opIndex);
1836 }
1837
1838 void generateCharacterClassGreedy(size_t opIndex)
1839 {
1840 YarrOp& op = m_ops[opIndex];
1841 PatternTerm* term = op.m_term;
1842
1843 const RegisterID character = regT0;
1844 const RegisterID countRegister = regT1;
1845
1846 if (m_decodeSurrogatePairs && (!term->characterClass->hasOneCharacterSize() || term->invert()))
1847 storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1848 move(TrustedImm32(0), countRegister);
1849
1850 JumpList failures;
1851 Label loop(this);
1852#ifdef JIT_UNICODE_EXPRESSIONS
1853 if (term->characterClass->hasOneCharacterSize() && !term->invert() && term->characterClass->hasNonBMPCharacters()) {
1854 move(TrustedImm32(1), character);
1855 failures.append(checkNotEnoughInput(character));
1856 } else
1857#endif
1858 failures.append(atEndOfInput());
1859
1860 if (term->invert()) {
1861 readCharacter(m_checkedOffset - term->inputPosition, character);
1862 matchCharacterClass(character, failures, term->characterClass);
1863 } else {
1864 JumpList matchDest;
1865 readCharacter(m_checkedOffset - term->inputPosition, character);
1866 // If we are matching the "any character" builtin class for non-unicode patterns,
1867 // we only need to read the character and don't need to match as it will always succeed.
1868 if (!term->characterClass->m_anyCharacter) {
1869 matchCharacterClass(character, matchDest, term->characterClass);
1870 failures.append(jump());
1871 }
1872 matchDest.link(this);
1873 }
1874
1875#ifdef JIT_UNICODE_EXPRESSIONS
1876 if (m_decodeSurrogatePairs)
1877 advanceIndexAfterCharacterClassTermMatch(term, failures, character);
1878 else
1879#endif
1880 add32(TrustedImm32(1), index);
1881 add32(TrustedImm32(1), countRegister);
1882
1883 if (term->quantityMaxCount != quantifyInfinite) {
1884 branch32(NotEqual, countRegister, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(loop, this);
1885 failures.append(jump());
1886 } else
1887 jump(loop);
1888
1889 failures.link(this);
1890 op.m_reentry = label();
1891
1892 storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1893 }
1894 void backtrackCharacterClassGreedy(size_t opIndex)
1895 {
1896 YarrOp& op = m_ops[opIndex];
1897 PatternTerm* term = op.m_term;
1898
1899 const RegisterID countRegister = regT1;
1900
1901 m_backtrackingState.link(this);
1902
1903 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1904 m_backtrackingState.append(branchTest32(Zero, countRegister));
1905 sub32(TrustedImm32(1), countRegister);
1906 storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1907
1908 if (!m_decodeSurrogatePairs)
1909 sub32(TrustedImm32(1), index);
1910 else if (term->characterClass->hasOneCharacterSize() && !term->invert())
1911 sub32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), index);
1912 else {
1913 // Rematch one less
1914 const RegisterID character = regT0;
1915
1916 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1917
1918 Label rematchLoop(this);
1919 readCharacter(m_checkedOffset - term->inputPosition, character);
1920
1921 sub32(TrustedImm32(1), countRegister);
1922 add32(TrustedImm32(1), index);
1923
1924#ifdef JIT_UNICODE_EXPRESSIONS
1925 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1926 add32(TrustedImm32(1), index);
1927 isBMPChar.link(this);
1928#endif
1929
1930 branchTest32(Zero, countRegister).linkTo(rematchLoop, this);
1931
1932 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1933 }
1934 jump(op.m_reentry);
1935 }
1936
1937 void generateCharacterClassNonGreedy(size_t opIndex)
1938 {
1939 YarrOp& op = m_ops[opIndex];
1940 PatternTerm* term = op.m_term;
1941
1942 const RegisterID countRegister = regT1;
1943
1944 move(TrustedImm32(0), countRegister);
1945 op.m_reentry = label();
1946
1947#ifdef JIT_UNICODE_EXPRESSIONS
1948 if (m_decodeSurrogatePairs) {
1949 if (!term->characterClass->hasOneCharacterSize() || term->invert())
1950 storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1951 }
1952#endif
1953
1954 storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1955 }
1956
1957 void backtrackCharacterClassNonGreedy(size_t opIndex)
1958 {
1959 YarrOp& op = m_ops[opIndex];
1960 PatternTerm* term = op.m_term;
1961
1962 const RegisterID character = regT0;
1963 const RegisterID countRegister = regT1;
1964
1965 JumpList nonGreedyFailures;
1966
1967 m_backtrackingState.link(this);
1968
1969#ifdef JIT_UNICODE_EXPRESSIONS
1970 if (m_decodeSurrogatePairs) {
1971 if (!term->characterClass->hasOneCharacterSize() || term->invert())
1972 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1973 }
1974#endif
1975
1976 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1977
1978 nonGreedyFailures.append(atEndOfInput());
1979 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet())));
1980
1981 JumpList matchDest;
1982 readCharacter(m_checkedOffset - term->inputPosition, character);
1983 // If we are matching the "any character" builtin class for non-unicode patterns,
1984 // we only need to read the character and don't need to match as it will always succeed.
1985 if (term->invert() || !term->characterClass->m_anyCharacter) {
1986 matchCharacterClass(character, matchDest, term->characterClass);
1987
1988 if (term->invert())
1989 nonGreedyFailures.append(matchDest);
1990 else {
1991 nonGreedyFailures.append(jump());
1992 matchDest.link(this);
1993 }
1994 }
1995
1996#ifdef JIT_UNICODE_EXPRESSIONS
1997 if (m_decodeSurrogatePairs)
1998 advanceIndexAfterCharacterClassTermMatch(term, nonGreedyFailures, character);
1999 else
2000#endif
2001 add32(TrustedImm32(1), index);
2002 add32(TrustedImm32(1), countRegister);
2003
2004 jump(op.m_reentry);
2005
2006 nonGreedyFailures.link(this);
2007 sub32(countRegister, index);
2008 m_backtrackingState.fallthrough();
2009 }
2010
2011 void generateDotStarEnclosure(size_t opIndex)
2012 {
2013 YarrOp& op = m_ops[opIndex];
2014 PatternTerm* term = op.m_term;
2015
2016 const RegisterID character = regT0;
2017 const RegisterID matchPos = regT1;
2018#ifndef HAVE_INITIAL_START_REG
2019 const RegisterID initialStart = character;
2020#endif
2021
2022 JumpList foundBeginningNewLine;
2023 JumpList saveStartIndex;
2024 JumpList foundEndingNewLine;
2025
2026 if (m_pattern.dotAll()) {
2027 move(TrustedImm32(0), matchPos);
2028 setMatchStart(matchPos);
2029 move(length, index);
2030 return;
2031 }
2032
2033 ASSERT(!m_pattern.m_body->m_hasFixedSize);
2034 getMatchStart(matchPos);
2035
2036#ifndef HAVE_INITIAL_START_REG
2037 loadFromFrame(m_pattern.m_initialStartValueFrameLocation, initialStart);
2038#endif
2039 saveStartIndex.append(branch32(BelowOrEqual, matchPos, initialStart));
2040 Label findBOLLoop(this);
2041 sub32(TrustedImm32(1), matchPos);
2042 if (m_charSize == Char8)
2043 load8(BaseIndex(input, matchPos, TimesOne, 0), character);
2044 else
2045 load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
2046 matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
2047
2048#ifndef HAVE_INITIAL_START_REG
2049 loadFromFrame(m_pattern.m_initialStartValueFrameLocation, initialStart);
2050#endif
2051 branch32(Above, matchPos, initialStart).linkTo(findBOLLoop, this);
2052 saveStartIndex.append(jump());
2053
2054 foundBeginningNewLine.link(this);
2055 add32(TrustedImm32(1), matchPos); // Advance past newline
2056 saveStartIndex.link(this);
2057
2058 if (!m_pattern.multiline() && term->anchors.bolAnchor)
2059 op.m_jumps.append(branchTest32(NonZero, matchPos));
2060
2061 ASSERT(!m_pattern.m_body->m_hasFixedSize);
2062 setMatchStart(matchPos);
2063
2064 move(index, matchPos);
2065
2066 Label findEOLLoop(this);
2067 foundEndingNewLine.append(branch32(Equal, matchPos, length));
2068 if (m_charSize == Char8)
2069 load8(BaseIndex(input, matchPos, TimesOne, 0), character);
2070 else
2071 load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
2072 matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
2073 add32(TrustedImm32(1), matchPos);
2074 jump(findEOLLoop);
2075
2076 foundEndingNewLine.link(this);
2077
2078 if (!m_pattern.multiline() && term->anchors.eolAnchor)
2079 op.m_jumps.append(branch32(NotEqual, matchPos, length));
2080
2081 move(matchPos, index);
2082 }
2083
2084 void backtrackDotStarEnclosure(size_t opIndex)
2085 {
2086 backtrackTermDefault(opIndex);
2087 }
2088
2089 // Code generation/backtracking for simple terms
2090 // (pattern characters, character classes, and assertions).
2091 // These methods farm out work to the set of functions above.
2092 void generateTerm(size_t opIndex)
2093 {
2094 YarrOp& op = m_ops[opIndex];
2095 PatternTerm* term = op.m_term;
2096
2097 switch (term->type) {
2098 case PatternTerm::TypePatternCharacter:
2099 switch (term->quantityType) {
2100 case QuantifierFixedCount:
2101 if (term->quantityMaxCount == 1)
2102 generatePatternCharacterOnce(opIndex);
2103 else
2104 generatePatternCharacterFixed(opIndex);
2105 break;
2106 case QuantifierGreedy:
2107 generatePatternCharacterGreedy(opIndex);
2108 break;
2109 case QuantifierNonGreedy:
2110 generatePatternCharacterNonGreedy(opIndex);
2111 break;
2112 }
2113 break;
2114
2115 case PatternTerm::TypeCharacterClass:
2116 switch (term->quantityType) {
2117 case QuantifierFixedCount:
2118 if (term->quantityMaxCount == 1)
2119 generateCharacterClassOnce(opIndex);
2120 else
2121 generateCharacterClassFixed(opIndex);
2122 break;
2123 case QuantifierGreedy:
2124 generateCharacterClassGreedy(opIndex);
2125 break;
2126 case QuantifierNonGreedy:
2127 generateCharacterClassNonGreedy(opIndex);
2128 break;
2129 }
2130 break;
2131
2132 case PatternTerm::TypeAssertionBOL:
2133 generateAssertionBOL(opIndex);
2134 break;
2135
2136 case PatternTerm::TypeAssertionEOL:
2137 generateAssertionEOL(opIndex);
2138 break;
2139
2140 case PatternTerm::TypeAssertionWordBoundary:
2141 generateAssertionWordBoundary(opIndex);
2142 break;
2143
2144 case PatternTerm::TypeForwardReference:
2145 m_failureReason = JITFailureReason::ForwardReference;
2146 break;
2147
2148 case PatternTerm::TypeParenthesesSubpattern:
2149 case PatternTerm::TypeParentheticalAssertion:
2150 RELEASE_ASSERT_NOT_REACHED();
2151
2152 case PatternTerm::TypeBackReference:
2153#if ENABLE(YARR_JIT_BACKREFERENCES)
2154 generateBackReference(opIndex);
2155#else
2156 m_failureReason = JITFailureReason::BackReference;
2157#endif
2158 break;
2159 case PatternTerm::TypeDotStarEnclosure:
2160 generateDotStarEnclosure(opIndex);
2161 break;
2162 }
2163 }
2164 void backtrackTerm(size_t opIndex)
2165 {
2166 YarrOp& op = m_ops[opIndex];
2167 PatternTerm* term = op.m_term;
2168
2169 switch (term->type) {
2170 case PatternTerm::TypePatternCharacter:
2171 switch (term->quantityType) {
2172 case QuantifierFixedCount:
2173 if (term->quantityMaxCount == 1)
2174 backtrackPatternCharacterOnce(opIndex);
2175 else
2176 backtrackPatternCharacterFixed(opIndex);
2177 break;
2178 case QuantifierGreedy:
2179 backtrackPatternCharacterGreedy(opIndex);
2180 break;
2181 case QuantifierNonGreedy:
2182 backtrackPatternCharacterNonGreedy(opIndex);
2183 break;
2184 }
2185 break;
2186
2187 case PatternTerm::TypeCharacterClass:
2188 switch (term->quantityType) {
2189 case QuantifierFixedCount:
2190 if (term->quantityMaxCount == 1)
2191 backtrackCharacterClassOnce(opIndex);
2192 else
2193 backtrackCharacterClassFixed(opIndex);
2194 break;
2195 case QuantifierGreedy:
2196 backtrackCharacterClassGreedy(opIndex);
2197 break;
2198 case QuantifierNonGreedy:
2199 backtrackCharacterClassNonGreedy(opIndex);
2200 break;
2201 }
2202 break;
2203
2204 case PatternTerm::TypeAssertionBOL:
2205 backtrackAssertionBOL(opIndex);
2206 break;
2207
2208 case PatternTerm::TypeAssertionEOL:
2209 backtrackAssertionEOL(opIndex);
2210 break;
2211
2212 case PatternTerm::TypeAssertionWordBoundary:
2213 backtrackAssertionWordBoundary(opIndex);
2214 break;
2215
2216 case PatternTerm::TypeForwardReference:
2217 m_failureReason = JITFailureReason::ForwardReference;
2218 break;
2219
2220 case PatternTerm::TypeParenthesesSubpattern:
2221 case PatternTerm::TypeParentheticalAssertion:
2222 RELEASE_ASSERT_NOT_REACHED();
2223
2224 case PatternTerm::TypeBackReference:
2225#if ENABLE(YARR_JIT_BACKREFERENCES)
2226 backtrackBackReference(opIndex);
2227#else
2228 m_failureReason = JITFailureReason::BackReference;
2229#endif
2230 break;
2231
2232 case PatternTerm::TypeDotStarEnclosure:
2233 backtrackDotStarEnclosure(opIndex);
2234 break;
2235 }
2236 }
2237
2238 void generate()
2239 {
2240 // Forwards generate the matching code.
2241 ASSERT(m_ops.size());
2242 size_t opIndex = 0;
2243
2244 do {
2245 if (m_disassembler)
2246 m_disassembler->setForGenerate(opIndex, label());
2247
2248 YarrOp& op = m_ops[opIndex];
2249 switch (op.m_op) {
2250
2251 case OpTerm:
2252 generateTerm(opIndex);
2253 break;
2254
2255 // OpBodyAlternativeBegin/Next/End
2256 //
2257 // These nodes wrap the set of alternatives in the body of the regular expression.
2258 // There may be either one or two chains of OpBodyAlternative nodes, one representing
2259 // the 'once through' sequence of alternatives (if any exist), and one representing
2260 // the repeating alternatives (again, if any exist).
2261 //
2262 // Upon normal entry to the Begin alternative, we will check that input is available.
2263 // Reentry to the Begin alternative will take place after the check has taken place,
2264 // and will assume that the input position has already been progressed as appropriate.
2265 //
2266 // Entry to subsequent Next/End alternatives occurs when the prior alternative has
2267 // successfully completed a match - return a success state from JIT code.
2268 //
2269 // Next alternatives allow for reentry optimized to suit backtracking from its
2270 // preceding alternative. It expects the input position to still be set to a position
2271 // appropriate to its predecessor, and it will only perform an input check if the
2272 // predecessor had a minimum size less than its own.
2273 //
2274 // In the case 'once through' expressions, the End node will also have a reentry
2275 // point to jump to when the last alternative fails. Again, this expects the input
2276 // position to still reflect that expected by the prior alternative.
2277 case OpBodyAlternativeBegin: {
2278 PatternAlternative* alternative = op.m_alternative;
2279
2280 // Upon entry at the head of the set of alternatives, check if input is available
2281 // to run the first alternative. (This progresses the input position).
2282 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
2283 // We will reenter after the check, and assume the input position to have been
2284 // set as appropriate to this alternative.
2285 op.m_reentry = label();
2286
2287 m_checkedOffset += alternative->m_minimumSize;
2288 break;
2289 }
2290 case OpBodyAlternativeNext:
2291 case OpBodyAlternativeEnd: {
2292 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2293 PatternAlternative* alternative = op.m_alternative;
2294
2295 // If we get here, the prior alternative matched - return success.
2296
2297 // Adjust the stack pointer to remove the pattern's frame.
2298 removeCallFrame();
2299
2300 // Load appropriate values into the return register and the first output
2301 // slot, and return. In the case of pattern with a fixed size, we will
2302 // not have yet set the value in the first
2303 ASSERT(index != returnRegister);
2304 if (m_pattern.m_body->m_hasFixedSize) {
2305 move(index, returnRegister);
2306 if (priorAlternative->m_minimumSize)
2307 sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
2308 if (compileMode == IncludeSubpatterns)
2309 store32(returnRegister, output);
2310 } else
2311 getMatchStart(returnRegister);
2312 if (compileMode == IncludeSubpatterns)
2313 store32(index, Address(output, 4));
2314 move(index, returnRegister2);
2315
2316 generateReturn();
2317
2318 // This is the divide between the tail of the prior alternative, above, and
2319 // the head of the subsequent alternative, below.
2320
2321 if (op.m_op == OpBodyAlternativeNext) {
2322 // This is the reentry point for the Next alternative. We expect any code
2323 // that jumps here to do so with the input position matching that of the
2324 // PRIOR alteranative, and we will only check input availability if we
2325 // need to progress it forwards.
2326 op.m_reentry = label();
2327 if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
2328 add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
2329 op.m_jumps.append(jumpIfNoAvailableInput());
2330 } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
2331 sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
2332 } else if (op.m_nextOp == notFound) {
2333 // This is the reentry point for the End of 'once through' alternatives,
2334 // jumped to when the last alternative fails to match.
2335 op.m_reentry = label();
2336 sub32(Imm32(priorAlternative->m_minimumSize), index);
2337 }
2338
2339 if (op.m_op == OpBodyAlternativeNext)
2340 m_checkedOffset += alternative->m_minimumSize;
2341 m_checkedOffset -= priorAlternative->m_minimumSize;
2342 break;
2343 }
2344
2345 // OpSimpleNestedAlternativeBegin/Next/End
2346 // OpNestedAlternativeBegin/Next/End
2347 //
2348 // These nodes are used to handle sets of alternatives that are nested within
2349 // subpatterns and parenthetical assertions. The 'simple' forms are used where
2350 // we do not need to be able to backtrack back into any alternative other than
2351 // the last, the normal forms allow backtracking into any alternative.
2352 //
2353 // Each Begin/Next node is responsible for planting an input check to ensure
2354 // sufficient input is available on entry. Next nodes additionally need to
2355 // jump to the end - Next nodes use the End node's m_jumps list to hold this
2356 // set of jumps.
2357 //
2358 // In the non-simple forms, successful alternative matches must store a
2359 // 'return address' using a DataLabelPtr, used to store the address to jump
2360 // to when backtracking, to get to the code for the appropriate alternative.
2361 case OpSimpleNestedAlternativeBegin:
2362 case OpNestedAlternativeBegin: {
2363 PatternTerm* term = op.m_term;
2364 PatternAlternative* alternative = op.m_alternative;
2365 PatternDisjunction* disjunction = term->parentheses.disjunction;
2366
2367 // Calculate how much input we need to check for, and if non-zero check.
2368 op.m_checkAdjust = Checked<unsigned>(alternative->m_minimumSize);
2369 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
2370 op.m_checkAdjust -= disjunction->m_minimumSize;
2371 if (op.m_checkAdjust)
2372 op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust.unsafeGet()));
2373
2374 m_checkedOffset += op.m_checkAdjust;
2375 break;
2376 }
2377 case OpSimpleNestedAlternativeNext:
2378 case OpNestedAlternativeNext: {
2379 PatternTerm* term = op.m_term;
2380 PatternAlternative* alternative = op.m_alternative;
2381 PatternDisjunction* disjunction = term->parentheses.disjunction;
2382
2383 // In the non-simple case, store a 'return address' so we can backtrack correctly.
2384 if (op.m_op == OpNestedAlternativeNext) {
2385 unsigned parenthesesFrameLocation = term->frameLocation;
2386 op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
2387 }
2388
2389 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
2390 // If the previous alternative matched without consuming characters then
2391 // backtrack to try to match while consumming some input.
2392 op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2393 }
2394
2395 // If we reach here then the last alternative has matched - jump to the
2396 // End node, to skip over any further alternatives.
2397 //
2398 // FIXME: this is logically O(N^2) (though N can be expected to be very
2399 // small). We could avoid this either by adding an extra jump to the JIT
2400 // data structures, or by making backtracking code that jumps to Next
2401 // alternatives are responsible for checking that input is available (if
2402 // we didn't need to plant the input checks, then m_jumps would be free).
2403 YarrOp* endOp = &m_ops[op.m_nextOp];
2404 while (endOp->m_nextOp != notFound) {
2405 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
2406 endOp = &m_ops[endOp->m_nextOp];
2407 }
2408 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
2409 endOp->m_jumps.append(jump());
2410
2411 // This is the entry point for the next alternative.
2412 op.m_reentry = label();
2413
2414 // Calculate how much input we need to check for, and if non-zero check.
2415 op.m_checkAdjust = alternative->m_minimumSize;
2416 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
2417 op.m_checkAdjust -= disjunction->m_minimumSize;
2418 if (op.m_checkAdjust)
2419 op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust.unsafeGet()));
2420
2421 YarrOp& lastOp = m_ops[op.m_previousOp];
2422 m_checkedOffset -= lastOp.m_checkAdjust;
2423 m_checkedOffset += op.m_checkAdjust;
2424 break;
2425 }
2426 case OpSimpleNestedAlternativeEnd:
2427 case OpNestedAlternativeEnd: {
2428 PatternTerm* term = op.m_term;
2429
2430 // In the non-simple case, store a 'return address' so we can backtrack correctly.
2431 if (op.m_op == OpNestedAlternativeEnd) {
2432 unsigned parenthesesFrameLocation = term->frameLocation;
2433 op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
2434 }
2435
2436 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
2437 // If the previous alternative matched without consuming characters then
2438 // backtrack to try to match while consumming some input.
2439 op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2440 }
2441
2442 // If this set of alternatives contains more than one alternative,
2443 // then the Next nodes will have planted jumps to the End, and added
2444 // them to this node's m_jumps list.
2445 op.m_jumps.link(this);
2446 op.m_jumps.clear();
2447
2448 YarrOp& lastOp = m_ops[op.m_previousOp];
2449 m_checkedOffset -= lastOp.m_checkAdjust;
2450 break;
2451 }
2452
2453 // OpParenthesesSubpatternOnceBegin/End
2454 //
2455 // These nodes support (optionally) capturing subpatterns, that have a
2456 // quantity count of 1 (this covers fixed once, and ?/?? quantifiers).
2457 case OpParenthesesSubpatternOnceBegin: {
2458 PatternTerm* term = op.m_term;
2459 unsigned parenthesesFrameLocation = term->frameLocation;
2460 const RegisterID indexTemporary = regT0;
2461 ASSERT(term->quantityMaxCount == 1);
2462
2463 // Upon entry to a Greedy quantified set of parenthese store the index.
2464 // We'll use this for two purposes:
2465 // - To indicate which iteration we are on of mathing the remainder of
2466 // the expression after the parentheses - the first, including the
2467 // match within the parentheses, or the second having skipped over them.
2468 // - To check for empty matches, which must be rejected.
2469 //
2470 // At the head of a NonGreedy set of parentheses we'll immediately set the
2471 // value on the stack to -1 (indicating a match skipping the subpattern),
2472 // and plant a jump to the end. We'll also plant a label to backtrack to
2473 // to reenter the subpattern later, with a store to set up index on the
2474 // second iteration.
2475 //
2476 // FIXME: for capturing parens, could use the index in the capture array?
2477 if (term->quantityType == QuantifierGreedy)
2478 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2479 else if (term->quantityType == QuantifierNonGreedy) {
2480 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2481 op.m_jumps.append(jump());
2482 op.m_reentry = label();
2483 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2484 }
2485
2486 // If the parenthese are capturing, store the starting index value to the
2487 // captures array, offsetting as necessary.
2488 //
2489 // FIXME: could avoid offsetting this value in JIT code, apply
2490 // offsets only afterwards, at the point the results array is
2491 // being accessed.
2492 if (term->capture() && compileMode == IncludeSubpatterns) {
2493 unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2494 if (term->quantityType == QuantifierFixedCount)
2495 inputOffset += term->parentheses.disjunction->m_minimumSize;
2496 if (inputOffset) {
2497 move(index, indexTemporary);
2498 sub32(Imm32(inputOffset), indexTemporary);
2499 setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
2500 } else
2501 setSubpatternStart(index, term->parentheses.subpatternId);
2502 }
2503 break;
2504 }
2505 case OpParenthesesSubpatternOnceEnd: {
2506 PatternTerm* term = op.m_term;
2507 const RegisterID indexTemporary = regT0;
2508 ASSERT(term->quantityMaxCount == 1);
2509
2510 // Runtime ASSERT to make sure that the nested alternative handled the
2511 // "no input consumed" check.
2512 if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
2513 Jump pastBreakpoint;
2514 pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2515 abortWithReason(YARRNoInputConsumed);
2516 pastBreakpoint.link(this);
2517 }
2518
2519 // If the parenthese are capturing, store the ending index value to the
2520 // captures array, offsetting as necessary.
2521 //
2522 // FIXME: could avoid offsetting this value in JIT code, apply
2523 // offsets only afterwards, at the point the results array is
2524 // being accessed.
2525 if (term->capture() && compileMode == IncludeSubpatterns) {
2526 unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2527 if (inputOffset) {
2528 move(index, indexTemporary);
2529 sub32(Imm32(inputOffset), indexTemporary);
2530 setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
2531 } else
2532 setSubpatternEnd(index, term->parentheses.subpatternId);
2533 }
2534
2535 // If the parentheses are quantified Greedy then add a label to jump back
2536 // to if we get a failed match from after the parentheses. For NonGreedy
2537 // parentheses, link the jump from before the subpattern to here.
2538 if (term->quantityType == QuantifierGreedy)
2539 op.m_reentry = label();
2540 else if (term->quantityType == QuantifierNonGreedy) {
2541 YarrOp& beginOp = m_ops[op.m_previousOp];
2542 beginOp.m_jumps.link(this);
2543 }
2544 break;
2545 }
2546
2547 // OpParenthesesSubpatternTerminalBegin/End
2548 case OpParenthesesSubpatternTerminalBegin: {
2549 PatternTerm* term = op.m_term;
2550 ASSERT(term->quantityType == QuantifierGreedy);
2551 ASSERT(term->quantityMaxCount == quantifyInfinite);
2552 ASSERT(!term->capture());
2553
2554 // Upon entry set a label to loop back to.
2555 op.m_reentry = label();
2556
2557 // Store the start index of the current match; we need to reject zero
2558 // length matches.
2559 storeToFrame(index, term->frameLocation + BackTrackInfoParenthesesTerminal::beginIndex());
2560 break;
2561 }
2562 case OpParenthesesSubpatternTerminalEnd: {
2563 YarrOp& beginOp = m_ops[op.m_previousOp];
2564 if (!ASSERT_DISABLED) {
2565 PatternTerm* term = op.m_term;
2566
2567 // Runtime ASSERT to make sure that the nested alternative handled the
2568 // "no input consumed" check.
2569 Jump pastBreakpoint;
2570 pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2571 abortWithReason(YARRNoInputConsumed);
2572 pastBreakpoint.link(this);
2573 }
2574
2575 // We know that the match is non-zero, we can accept it and
2576 // loop back up to the head of the subpattern.
2577 jump(beginOp.m_reentry);
2578
2579 // This is the entry point to jump to when we stop matching - we will
2580 // do so once the subpattern cannot match any more.
2581 op.m_reentry = label();
2582 break;
2583 }
2584
2585 // OpParenthesesSubpatternBegin/End
2586 //
2587 // These nodes support generic subpatterns.
2588 case OpParenthesesSubpatternBegin: {
2589#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2590 PatternTerm* term = op.m_term;
2591 unsigned parenthesesFrameLocation = term->frameLocation;
2592
2593 // Upon entry to a Greedy quantified set of parenthese store the index.
2594 // We'll use this for two purposes:
2595 // - To indicate which iteration we are on of mathing the remainder of
2596 // the expression after the parentheses - the first, including the
2597 // match within the parentheses, or the second having skipped over them.
2598 // - To check for empty matches, which must be rejected.
2599 //
2600 // At the head of a NonGreedy set of parentheses we'll immediately set 'begin'
2601 // in the backtrack info to -1 (indicating a match skipping the subpattern),
2602 // and plant a jump to the end. We'll also plant a label to backtrack to
2603 // to reenter the subpattern later, with a store to set 'begin' to current index
2604 // on the second iteration.
2605 //
2606 // FIXME: for capturing parens, could use the index in the capture array?
2607 if (term->quantityType == QuantifierGreedy || term->quantityType == QuantifierNonGreedy) {
2608 storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2609 storeToFrame(TrustedImmPtr(nullptr), parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2610
2611 if (term->quantityType == QuantifierNonGreedy) {
2612 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2613 op.m_jumps.append(jump());
2614 }
2615
2616 op.m_reentry = label();
2617 RegisterID currParenContextReg = regT0;
2618 RegisterID newParenContextReg = regT1;
2619
2620 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
2621 allocateParenContext(newParenContextReg);
2622 storePtr(currParenContextReg, newParenContextReg);
2623 storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2624 saveParenContext(newParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
2625 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2626 }
2627
2628 // If the parenthese are capturing, store the starting index value to the
2629 // captures array, offsetting as necessary.
2630 //
2631 // FIXME: could avoid offsetting this value in JIT code, apply
2632 // offsets only afterwards, at the point the results array is
2633 // being accessed.
2634 if (term->capture() && compileMode == IncludeSubpatterns) {
2635 const RegisterID indexTemporary = regT0;
2636 unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2637 if (term->quantityType == QuantifierFixedCount)
2638 inputOffset += term->parentheses.disjunction->m_minimumSize;
2639 if (inputOffset) {
2640 move(index, indexTemporary);
2641 sub32(Imm32(inputOffset), indexTemporary);
2642 setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
2643 } else
2644 setSubpatternStart(index, term->parentheses.subpatternId);
2645 }
2646#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2647 RELEASE_ASSERT_NOT_REACHED();
2648#endif
2649 break;
2650 }
2651 case OpParenthesesSubpatternEnd: {
2652#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2653 PatternTerm* term = op.m_term;
2654 unsigned parenthesesFrameLocation = term->frameLocation;
2655
2656 // Runtime ASSERT to make sure that the nested alternative handled the
2657 // "no input consumed" check.
2658 if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
2659 Jump pastBreakpoint;
2660 pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)));
2661 abortWithReason(YARRNoInputConsumed);
2662 pastBreakpoint.link(this);
2663 }
2664
2665 const RegisterID countTemporary = regT1;
2666
2667 YarrOp& beginOp = m_ops[op.m_previousOp];
2668 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
2669 add32(TrustedImm32(1), countTemporary);
2670 storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2671
2672 // If the parenthese are capturing, store the ending index value to the
2673 // captures array, offsetting as necessary.
2674 //
2675 // FIXME: could avoid offsetting this value in JIT code, apply
2676 // offsets only afterwards, at the point the results array is
2677 // being accessed.
2678 if (term->capture() && compileMode == IncludeSubpatterns) {
2679 const RegisterID indexTemporary = regT0;
2680
2681 unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2682 if (inputOffset) {
2683 move(index, indexTemporary);
2684 sub32(Imm32(inputOffset), indexTemporary);
2685 setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
2686 } else
2687 setSubpatternEnd(index, term->parentheses.subpatternId);
2688 }
2689
2690 // If the parentheses are quantified Greedy then add a label to jump back
2691 // to if we get a failed match from after the parentheses. For NonGreedy
2692 // parentheses, link the jump from before the subpattern to here.
2693 if (term->quantityType == QuantifierGreedy) {
2694 if (term->quantityMaxCount != quantifyInfinite)
2695 branch32(Below, countTemporary, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(beginOp.m_reentry, this);
2696 else
2697 jump(beginOp.m_reentry);
2698
2699 op.m_reentry = label();
2700 } else if (term->quantityType == QuantifierNonGreedy) {
2701 YarrOp& beginOp = m_ops[op.m_previousOp];
2702 beginOp.m_jumps.link(this);
2703 op.m_reentry = label();
2704 }
2705#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2706 RELEASE_ASSERT_NOT_REACHED();
2707#endif
2708 break;
2709 }
2710
2711 // OpParentheticalAssertionBegin/End
2712 case OpParentheticalAssertionBegin: {
2713 PatternTerm* term = op.m_term;
2714
2715 // Store the current index - assertions should not update index, so
2716 // we will need to restore it upon a successful match.
2717 unsigned parenthesesFrameLocation = term->frameLocation;
2718 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex());
2719
2720 // Check
2721 op.m_checkAdjust = m_checkedOffset - term->inputPosition;
2722 if (op.m_checkAdjust)
2723 sub32(Imm32(op.m_checkAdjust.unsafeGet()), index);
2724
2725 m_checkedOffset -= op.m_checkAdjust;
2726 break;
2727 }
2728 case OpParentheticalAssertionEnd: {
2729 PatternTerm* term = op.m_term;
2730
2731 // Restore the input index value.
2732 unsigned parenthesesFrameLocation = term->frameLocation;
2733 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex(), index);
2734
2735 // If inverted, a successful match of the assertion must be treated
2736 // as a failure, so jump to backtracking.
2737 if (term->invert()) {
2738 op.m_jumps.append(jump());
2739 op.m_reentry = label();
2740 }
2741
2742 YarrOp& lastOp = m_ops[op.m_previousOp];
2743 m_checkedOffset += lastOp.m_checkAdjust;
2744 break;
2745 }
2746
2747 case OpMatchFailed:
2748 removeCallFrame();
2749 generateFailReturn();
2750 break;
2751 }
2752
2753 ++opIndex;
2754 } while (opIndex < m_ops.size());
2755 }
2756
2757 void backtrack()
2758 {
2759 // Backwards generate the backtracking code.
2760 size_t opIndex = m_ops.size();
2761 ASSERT(opIndex);
2762
2763 do {
2764 --opIndex;
2765
2766 if (m_disassembler)
2767 m_disassembler->setForBacktrack(opIndex, label());
2768
2769 YarrOp& op = m_ops[opIndex];
2770 switch (op.m_op) {
2771
2772 case OpTerm:
2773 backtrackTerm(opIndex);
2774 break;
2775
2776 // OpBodyAlternativeBegin/Next/End
2777 //
2778 // For each Begin/Next node representing an alternative, we need to decide what to do
2779 // in two circumstances:
2780 // - If we backtrack back into this node, from within the alternative.
2781 // - If the input check at the head of the alternative fails (if this exists).
2782 //
2783 // We treat these two cases differently since in the former case we have slightly
2784 // more information - since we are backtracking out of a prior alternative we know
2785 // that at least enough input was available to run it. For example, given the regular
2786 // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
2787 // character match of 'a'), then we need not perform an additional input availability
2788 // check before running the second alternative.
2789 //
2790 // Backtracking required differs for the last alternative, which in the case of the
2791 // repeating set of alternatives must loop. The code generated for the last alternative
2792 // will also be used to handle all input check failures from any prior alternatives -
2793 // these require similar functionality, in seeking the next available alternative for
2794 // which there is sufficient input.
2795 //
2796 // Since backtracking of all other alternatives simply requires us to link backtracks
2797 // to the reentry point for the subsequent alternative, we will only be generating any
2798 // code when backtracking the last alternative.
2799 case OpBodyAlternativeBegin:
2800 case OpBodyAlternativeNext: {
2801 PatternAlternative* alternative = op.m_alternative;
2802
2803 if (op.m_op == OpBodyAlternativeNext) {
2804 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2805 m_checkedOffset += priorAlternative->m_minimumSize;
2806 }
2807 m_checkedOffset -= alternative->m_minimumSize;
2808
2809 // Is this the last alternative? If not, then if we backtrack to this point we just
2810 // need to jump to try to match the next alternative.
2811 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
2812 m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
2813 break;
2814 }
2815 YarrOp& endOp = m_ops[op.m_nextOp];
2816
2817 YarrOp* beginOp = &op;
2818 while (beginOp->m_op != OpBodyAlternativeBegin) {
2819 ASSERT(beginOp->m_op == OpBodyAlternativeNext);
2820 beginOp = &m_ops[beginOp->m_previousOp];
2821 }
2822
2823 bool onceThrough = endOp.m_nextOp == notFound;
2824
2825 JumpList lastStickyAlternativeFailures;
2826
2827 // First, generate code to handle cases where we backtrack out of an attempted match
2828 // of the last alternative. If this is a 'once through' set of alternatives then we
2829 // have nothing to do - link this straight through to the End.
2830 if (onceThrough)
2831 m_backtrackingState.linkTo(endOp.m_reentry, this);
2832 else {
2833 // If we don't need to move the input poistion, and the pattern has a fixed size
2834 // (in which case we omit the store of the start index until the pattern has matched)
2835 // then we can just link the backtrack out of the last alternative straight to the
2836 // head of the first alternative.
2837 if (m_pattern.m_body->m_hasFixedSize
2838 && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
2839 && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
2840 m_backtrackingState.linkTo(beginOp->m_reentry, this);
2841 else if (m_pattern.sticky() && m_ops[op.m_nextOp].m_op == OpBodyAlternativeEnd) {
2842 // It is a sticky pattern and the last alternative failed, jump to the end.
2843 m_backtrackingState.takeBacktracksToJumpList(lastStickyAlternativeFailures, this);
2844 } else {
2845 // We need to generate a trampoline of code to execute before looping back
2846 // around to the first alternative.
2847 m_backtrackingState.link(this);
2848
2849 // No need to advance and retry for a sticky pattern.
2850 if (!m_pattern.sticky()) {
2851 // If the pattern size is not fixed, then store the start index for use if we match.
2852 if (!m_pattern.m_body->m_hasFixedSize) {
2853 if (alternative->m_minimumSize == 1)
2854 setMatchStart(index);
2855 else {
2856 move(index, regT0);
2857 if (alternative->m_minimumSize)
2858 sub32(Imm32(alternative->m_minimumSize - 1), regT0);
2859 else
2860 add32(TrustedImm32(1), regT0);
2861 setMatchStart(regT0);
2862 }
2863 }
2864
2865 // Generate code to loop. Check whether the last alternative is longer than the
2866 // first (e.g. /a|xy/ or /a|xyz/).
2867 if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
2868 // We want to loop, and increment input position. If the delta is 1, it is
2869 // already correctly incremented, if more than one then decrement as appropriate.
2870 unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
2871 ASSERT(delta);
2872 if (delta != 1)
2873 sub32(Imm32(delta - 1), index);
2874 jump(beginOp->m_reentry);
2875 } else {
2876 // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
2877 // be sufficent input available to handle this, so just fall through.
2878 unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
2879 if (delta != 0xFFFFFFFFu) {
2880 // We need to check input because we are incrementing the input.
2881 add32(Imm32(delta + 1), index);
2882 checkInput().linkTo(beginOp->m_reentry, this);
2883 }
2884 }
2885 }
2886 }
2887 }
2888
2889 // We can reach this point in the code in two ways:
2890 // - Fallthrough from the code above (a repeating alternative backtracked out of its
2891 // last alternative, and did not have sufficent input to run the first).
2892 // - We will loop back up to the following label when a repeating alternative loops,
2893 // following a failed input check.
2894 //
2895 // Either way, we have just failed the input check for the first alternative.
2896 Label firstInputCheckFailed(this);
2897
2898 // Generate code to handle input check failures from alternatives except the last.
2899 // prevOp is the alternative we're handling a bail out from (initially Begin), and
2900 // nextOp is the alternative we will be attempting to reenter into.
2901 //
2902 // We will link input check failures from the forwards matching path back to the code
2903 // that can handle them.
2904 YarrOp* prevOp = beginOp;
2905 YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
2906 while (nextOp->m_op != OpBodyAlternativeEnd) {
2907 prevOp->m_jumps.link(this);
2908
2909 // We only get here if an input check fails, it is only worth checking again
2910 // if the next alternative has a minimum size less than the last.
2911 if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
2912 // FIXME: if we added an extra label to YarrOp, we could avoid needing to
2913 // subtract delta back out, and reduce this code. Should performance test
2914 // the benefit of this.
2915 unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
2916 sub32(Imm32(delta), index);
2917 Jump fail = jumpIfNoAvailableInput();
2918 add32(Imm32(delta), index);
2919 jump(nextOp->m_reentry);
2920 fail.link(this);
2921 } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
2922 add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
2923 prevOp = nextOp;
2924 nextOp = &m_ops[nextOp->m_nextOp];
2925 }
2926
2927 // We fall through to here if there is insufficient input to run the last alternative.
2928
2929 // If there is insufficient input to run the last alternative, then for 'once through'
2930 // alternatives we are done - just jump back up into the forwards matching path at the End.
2931 if (onceThrough) {
2932 op.m_jumps.linkTo(endOp.m_reentry, this);
2933 jump(endOp.m_reentry);
2934 break;
2935 }
2936
2937 // For repeating alternatives, link any input check failure from the last alternative to
2938 // this point.
2939 op.m_jumps.link(this);
2940
2941 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
2942
2943 // Check for cases where input position is already incremented by 1 for the last
2944 // alternative (this is particularly useful where the minimum size of the body
2945 // disjunction is 0, e.g. /a*|b/).
2946 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
2947 // index is already incremented by 1, so just store it now!
2948 setMatchStart(index);
2949 needsToUpdateMatchStart = false;
2950 }
2951
2952 if (!m_pattern.sticky()) {
2953 // Check whether there is sufficient input to loop. Increment the input position by
2954 // one, and check. Also add in the minimum disjunction size before checking - there
2955 // is no point in looping if we're just going to fail all the input checks around
2956 // the next iteration.
2957 ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
2958 if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
2959 // If the last alternative had the same minimum size as the disjunction,
2960 // just simply increment input pos by 1, no adjustment based on minimum size.
2961 add32(TrustedImm32(1), index);
2962 } else {
2963 // If the minumum for the last alternative was one greater than than that
2964 // for the disjunction, we're already progressed by 1, nothing to do!
2965 unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
2966 if (delta)
2967 sub32(Imm32(delta), index);
2968 }
2969 Jump matchFailed = jumpIfNoAvailableInput();
2970
2971 if (needsToUpdateMatchStart) {
2972 if (!m_pattern.m_body->m_minimumSize)
2973 setMatchStart(index);
2974 else {
2975 move(index, regT0);
2976 sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
2977 setMatchStart(regT0);
2978 }
2979 }
2980
2981 // Calculate how much more input the first alternative requires than the minimum
2982 // for the body as a whole. If no more is needed then we dont need an additional
2983 // input check here - jump straight back up to the start of the first alternative.
2984 if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
2985 jump(beginOp->m_reentry);
2986 else {
2987 if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
2988 add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
2989 else
2990 sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
2991 checkInput().linkTo(beginOp->m_reentry, this);
2992 jump(firstInputCheckFailed);
2993 }
2994
2995 // We jump to here if we iterate to the point that there is insufficient input to
2996 // run any matches, and need to return a failure state from JIT code.
2997 matchFailed.link(this);
2998 }
2999
3000 lastStickyAlternativeFailures.link(this);
3001 removeCallFrame();
3002 generateFailReturn();
3003 break;
3004 }
3005 case OpBodyAlternativeEnd: {
3006 // We should never backtrack back into a body disjunction.
3007 ASSERT(m_backtrackingState.isEmpty());
3008
3009 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
3010 m_checkedOffset += priorAlternative->m_minimumSize;
3011 break;
3012 }
3013
3014 // OpSimpleNestedAlternativeBegin/Next/End
3015 // OpNestedAlternativeBegin/Next/End
3016 //
3017 // Generate code for when we backtrack back out of an alternative into
3018 // a Begin or Next node, or when the entry input count check fails. If
3019 // there are more alternatives we need to jump to the next alternative,
3020 // if not we backtrack back out of the current set of parentheses.
3021 //
3022 // In the case of non-simple nested assertions we need to also link the
3023 // 'return address' appropriately to backtrack back out into the correct
3024 // alternative.
3025 case OpSimpleNestedAlternativeBegin:
3026 case OpSimpleNestedAlternativeNext:
3027 case OpNestedAlternativeBegin:
3028 case OpNestedAlternativeNext: {
3029 YarrOp& nextOp = m_ops[op.m_nextOp];
3030 bool isBegin = op.m_previousOp == notFound;
3031 bool isLastAlternative = nextOp.m_nextOp == notFound;
3032 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
3033 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
3034
3035 // Treat an input check failure the same as a failed match.
3036 m_backtrackingState.append(op.m_jumps);
3037
3038 // Set the backtracks to jump to the appropriate place. We may need
3039 // to link the backtracks in one of three different way depending on
3040 // the type of alternative we are dealing with:
3041 // - A single alternative, with no simplings.
3042 // - The last alternative of a set of two or more.
3043 // - An alternative other than the last of a set of two or more.
3044 //
3045 // In the case of a single alternative on its own, we don't need to
3046 // jump anywhere - if the alternative fails to match we can just
3047 // continue to backtrack out of the parentheses without jumping.
3048 //
3049 // In the case of the last alternative in a set of more than one, we
3050 // need to jump to return back out to the beginning. We'll do so by
3051 // adding a jump to the End node's m_jumps list, and linking this
3052 // when we come to generate the Begin node. For alternatives other
3053 // than the last, we need to jump to the next alternative.
3054 //
3055 // If the alternative had adjusted the input position we must link
3056 // backtracking to here, correct, and then jump on. If not we can
3057 // link the backtracks directly to their destination.
3058 if (op.m_checkAdjust) {
3059 // Handle the cases where we need to link the backtracks here.
3060 m_backtrackingState.link(this);
3061 sub32(Imm32(op.m_checkAdjust.unsafeGet()), index);
3062 if (!isLastAlternative) {
3063 // An alternative that is not the last should jump to its successor.
3064 jump(nextOp.m_reentry);
3065 } else if (!isBegin) {
3066 // The last of more than one alternatives must jump back to the beginning.
3067 nextOp.m_jumps.append(jump());
3068 } else {
3069 // A single alternative on its own can fall through.
3070 m_backtrackingState.fallthrough();
3071 }
3072 } else {
3073 // Handle the cases where we can link the backtracks directly to their destinations.
3074 if (!isLastAlternative) {
3075 // An alternative that is not the last should jump to its successor.
3076 m_backtrackingState.linkTo(nextOp.m_reentry, this);
3077 } else if (!isBegin) {
3078 // The last of more than one alternatives must jump back to the beginning.
3079 m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
3080 }
3081 // In the case of a single alternative on its own do nothing - it can fall through.
3082 }
3083
3084 // If there is a backtrack jump from a zero length match link it here.
3085 if (op.m_zeroLengthMatch.isSet())
3086 m_backtrackingState.append(op.m_zeroLengthMatch);
3087
3088 // At this point we've handled the backtracking back into this node.
3089 // Now link any backtracks that need to jump to here.
3090
3091 // For non-simple alternatives, link the alternative's 'return address'
3092 // so that we backtrack back out into the previous alternative.
3093 if (op.m_op == OpNestedAlternativeNext)
3094 m_backtrackingState.append(op.m_returnAddress);
3095
3096 // If there is more than one alternative, then the last alternative will
3097 // have planted a jump to be linked to the end. This jump was added to the
3098 // End node's m_jumps list. If we are back at the beginning, link it here.
3099 if (isBegin) {
3100 YarrOp* endOp = &m_ops[op.m_nextOp];
3101 while (endOp->m_nextOp != notFound) {
3102 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
3103 endOp = &m_ops[endOp->m_nextOp];
3104 }
3105 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
3106 m_backtrackingState.append(endOp->m_jumps);
3107 }
3108
3109 if (!isBegin) {
3110 YarrOp& lastOp = m_ops[op.m_previousOp];
3111 m_checkedOffset += lastOp.m_checkAdjust;
3112 }
3113 m_checkedOffset -= op.m_checkAdjust;
3114 break;
3115 }
3116 case OpSimpleNestedAlternativeEnd:
3117 case OpNestedAlternativeEnd: {
3118 PatternTerm* term = op.m_term;
3119
3120 // If there is a backtrack jump from a zero length match link it here.
3121 if (op.m_zeroLengthMatch.isSet())
3122 m_backtrackingState.append(op.m_zeroLengthMatch);
3123
3124 // If we backtrack into the end of a simple subpattern do nothing;
3125 // just continue through into the last alternative. If we backtrack
3126 // into the end of a non-simple set of alterntives we need to jump
3127 // to the backtracking return address set up during generation.
3128 if (op.m_op == OpNestedAlternativeEnd) {
3129 m_backtrackingState.link(this);
3130
3131 // Plant a jump to the return address.
3132 unsigned parenthesesFrameLocation = term->frameLocation;
3133 loadFromFrameAndJump(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
3134
3135 // Link the DataLabelPtr associated with the end of the last
3136 // alternative to this point.
3137 m_backtrackingState.append(op.m_returnAddress);
3138 }
3139
3140 YarrOp& lastOp = m_ops[op.m_previousOp];
3141 m_checkedOffset += lastOp.m_checkAdjust;
3142 break;
3143 }
3144
3145 // OpParenthesesSubpatternOnceBegin/End
3146 //
3147 // When we are backtracking back out of a capturing subpattern we need
3148 // to clear the start index in the matches output array, to record that
3149 // this subpattern has not been captured.
3150 //
3151 // When backtracking back out of a Greedy quantified subpattern we need
3152 // to catch this, and try running the remainder of the alternative after
3153 // the subpattern again, skipping the parentheses.
3154 //
3155 // Upon backtracking back into a quantified set of parentheses we need to
3156 // check whether we were currently skipping the subpattern. If not, we
3157 // can backtrack into them, if we were we need to either backtrack back
3158 // out of the start of the parentheses, or jump back to the forwards
3159 // matching start, depending of whether the match is Greedy or NonGreedy.
3160 case OpParenthesesSubpatternOnceBegin: {
3161 PatternTerm* term = op.m_term;
3162 ASSERT(term->quantityMaxCount == 1);
3163
3164 // We only need to backtrack to this point if capturing or greedy.
3165 if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) {
3166 m_backtrackingState.link(this);
3167
3168 // If capturing, clear the capture (we only need to reset start).
3169 if (term->capture() && compileMode == IncludeSubpatterns)
3170 clearSubpatternStart(term->parentheses.subpatternId);
3171
3172 // If Greedy, jump to the end.
3173 if (term->quantityType == QuantifierGreedy) {
3174 // Clear the flag in the stackframe indicating we ran through the subpattern.
3175 unsigned parenthesesFrameLocation = term->frameLocation;
3176 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
3177 // Jump to after the parentheses, skipping the subpattern.
3178 jump(m_ops[op.m_nextOp].m_reentry);
3179 // A backtrack from after the parentheses, when skipping the subpattern,
3180 // will jump back to here.
3181 op.m_jumps.link(this);
3182 }
3183
3184 m_backtrackingState.fallthrough();
3185 }
3186 break;
3187 }
3188 case OpParenthesesSubpatternOnceEnd: {
3189 PatternTerm* term = op.m_term;
3190
3191 if (term->quantityType != QuantifierFixedCount) {
3192 m_backtrackingState.link(this);
3193
3194 // Check whether we should backtrack back into the parentheses, or if we
3195 // are currently in a state where we had skipped over the subpattern
3196 // (in which case the flag value on the stack will be -1).
3197 unsigned parenthesesFrameLocation = term->frameLocation;
3198 Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
3199
3200 if (term->quantityType == QuantifierGreedy) {
3201 // For Greedy parentheses, we skip after having already tried going
3202 // through the subpattern, so if we get here we're done.
3203 YarrOp& beginOp = m_ops[op.m_previousOp];
3204 beginOp.m_jumps.append(hadSkipped);
3205 } else {
3206 // For NonGreedy parentheses, we try skipping the subpattern first,
3207 // so if we get here we need to try running through the subpattern
3208 // next. Jump back to the start of the parentheses in the forwards
3209 // matching path.
3210 ASSERT(term->quantityType == QuantifierNonGreedy);
3211 YarrOp& beginOp = m_ops[op.m_previousOp];
3212 hadSkipped.linkTo(beginOp.m_reentry, this);
3213 }
3214
3215 m_backtrackingState.fallthrough();
3216 }
3217
3218 m_backtrackingState.append(op.m_jumps);
3219 break;
3220 }
3221
3222 // OpParenthesesSubpatternTerminalBegin/End
3223 //
3224 // Terminal subpatterns will always match - there is nothing after them to
3225 // force a backtrack, and they have a minimum count of 0, and as such will
3226 // always produce an acceptable result.
3227 case OpParenthesesSubpatternTerminalBegin: {
3228 // We will backtrack to this point once the subpattern cannot match any
3229 // more. Since no match is accepted as a successful match (we are Greedy
3230 // quantified with a minimum of zero) jump back to the forwards matching
3231 // path at the end.
3232 YarrOp& endOp = m_ops[op.m_nextOp];
3233 m_backtrackingState.linkTo(endOp.m_reentry, this);
3234 break;
3235 }
3236 case OpParenthesesSubpatternTerminalEnd:
3237 // We should never be backtracking to here (hence the 'terminal' in the name).
3238 ASSERT(m_backtrackingState.isEmpty());
3239 m_backtrackingState.append(op.m_jumps);
3240 break;
3241
3242 // OpParenthesesSubpatternBegin/End
3243 //
3244 // When we are backtracking back out of a capturing subpattern we need
3245 // to clear the start index in the matches output array, to record that
3246 // this subpattern has not been captured.
3247 //
3248 // When backtracking back out of a Greedy quantified subpattern we need
3249 // to catch this, and try running the remainder of the alternative after
3250 // the subpattern again, skipping the parentheses.
3251 //
3252 // Upon backtracking back into a quantified set of parentheses we need to
3253 // check whether we were currently skipping the subpattern. If not, we
3254 // can backtrack into them, if we were we need to either backtrack back
3255 // out of the start of the parentheses, or jump back to the forwards
3256 // matching start, depending of whether the match is Greedy or NonGreedy.
3257 case OpParenthesesSubpatternBegin: {
3258#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3259 PatternTerm* term = op.m_term;
3260 unsigned parenthesesFrameLocation = term->frameLocation;
3261
3262 if (term->quantityType != QuantifierFixedCount) {
3263 m_backtrackingState.link(this);
3264
3265 RegisterID currParenContextReg = regT0;
3266 RegisterID newParenContextReg = regT1;
3267
3268 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
3269
3270 restoreParenContext(currParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
3271
3272 freeParenContext(currParenContextReg, newParenContextReg);
3273 storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
3274
3275 const RegisterID countTemporary = regT0;
3276 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
3277 Jump zeroLengthMatch = branchTest32(Zero, countTemporary);
3278
3279 sub32(TrustedImm32(1), countTemporary);
3280 storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
3281
3282 jump(m_ops[op.m_nextOp].m_reentry);
3283
3284 zeroLengthMatch.link(this);
3285
3286 // Clear the flag in the stackframe indicating we didn't run through the subpattern.
3287 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
3288
3289 if (term->quantityType == QuantifierGreedy)
3290 jump(m_ops[op.m_nextOp].m_reentry);
3291
3292 // If Greedy, jump to the end.
3293 if (term->quantityType == QuantifierGreedy) {
3294 // A backtrack from after the parentheses, when skipping the subpattern,
3295 // will jump back to here.
3296 op.m_jumps.link(this);
3297 }
3298
3299 m_backtrackingState.fallthrough();
3300 }
3301#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
3302 RELEASE_ASSERT_NOT_REACHED();
3303#endif
3304 break;
3305 }
3306 case OpParenthesesSubpatternEnd: {
3307#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3308 PatternTerm* term = op.m_term;
3309
3310 if (term->quantityType != QuantifierFixedCount) {
3311 m_backtrackingState.link(this);
3312
3313 unsigned parenthesesFrameLocation = term->frameLocation;
3314
3315 if (term->quantityType == QuantifierGreedy) {
3316 // Check whether we should backtrack back into the parentheses, or if we
3317 // are currently in a state where we had skipped over the subpattern
3318 // (in which case the flag value on the stack will be -1).
3319 Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
3320
3321 // For Greedy parentheses, we skip after having already tried going
3322 // through the subpattern, so if we get here we're done.
3323 YarrOp& beginOp = m_ops[op.m_previousOp];
3324 beginOp.m_jumps.append(hadSkipped);
3325 } else {
3326 // For NonGreedy parentheses, we try skipping the subpattern first,
3327 // so if we get here we need to try running through the subpattern
3328 // next. Jump back to the start of the parentheses in the forwards
3329 // matching path.
3330 ASSERT(term->quantityType == QuantifierNonGreedy);
3331
3332 const RegisterID beginTemporary = regT0;
3333 const RegisterID countTemporary = regT1;
3334
3335 YarrOp& beginOp = m_ops[op.m_previousOp];
3336
3337 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex(), beginTemporary);
3338 branch32(Equal, beginTemporary, TrustedImm32(-1)).linkTo(beginOp.m_reentry, this);
3339
3340 JumpList exceededMatchLimit;
3341
3342 if (term->quantityMaxCount != quantifyInfinite) {
3343 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
3344 exceededMatchLimit.append(branch32(AboveOrEqual, countTemporary, Imm32(term->quantityMaxCount.unsafeGet())));
3345 }
3346
3347 branch32(Above, index, beginTemporary).linkTo(beginOp.m_reentry, this);
3348
3349 exceededMatchLimit.link(this);
3350 }
3351
3352 m_backtrackingState.fallthrough();
3353 }
3354
3355 m_backtrackingState.append(op.m_jumps);
3356#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
3357 RELEASE_ASSERT_NOT_REACHED();
3358#endif
3359 break;
3360 }
3361
3362 // OpParentheticalAssertionBegin/End
3363 case OpParentheticalAssertionBegin: {
3364 PatternTerm* term = op.m_term;
3365 YarrOp& endOp = m_ops[op.m_nextOp];
3366
3367 // We need to handle the backtracks upon backtracking back out
3368 // of a parenthetical assertion if either we need to correct
3369 // the input index, or the assertion was inverted.
3370 if (op.m_checkAdjust || term->invert()) {
3371 m_backtrackingState.link(this);
3372
3373 if (op.m_checkAdjust)
3374 add32(Imm32(op.m_checkAdjust.unsafeGet()), index);
3375
3376 // In an inverted assertion failure to match the subpattern
3377 // is treated as a successful match - jump to the end of the
3378 // subpattern. We already have adjusted the input position
3379 // back to that before the assertion, which is correct.
3380 if (term->invert())
3381 jump(endOp.m_reentry);
3382
3383 m_backtrackingState.fallthrough();
3384 }
3385
3386 // The End node's jump list will contain any backtracks into
3387 // the end of the assertion. Also, if inverted, we will have
3388 // added the failure caused by a successful match to this.
3389 m_backtrackingState.append(endOp.m_jumps);
3390
3391 m_checkedOffset += op.m_checkAdjust;
3392 break;
3393 }
3394 case OpParentheticalAssertionEnd: {
3395 // FIXME: We should really be clearing any nested subpattern
3396 // matches on bailing out from after the pattern. Firefox has
3397 // this bug too (presumably because they use YARR!)
3398
3399 // Never backtrack into an assertion; later failures bail to before the begin.
3400 m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
3401
3402 YarrOp& lastOp = m_ops[op.m_previousOp];
3403 m_checkedOffset -= lastOp.m_checkAdjust;
3404 break;
3405 }
3406
3407 case OpMatchFailed:
3408 break;
3409 }
3410
3411 } while (opIndex);
3412 }
3413
3414 // Compilation methods:
3415 // ====================
3416
3417 // opCompileParenthesesSubpattern
3418 // Emits ops for a subpattern (set of parentheses). These consist
3419 // of a set of alternatives wrapped in an outer set of nodes for
3420 // the parentheses.
3421 // Supported types of parentheses are 'Once' (quantityMaxCount == 1),
3422 // 'Terminal' (non-capturing parentheses quantified as greedy
3423 // and infinite), and 0 based greedy / non-greedy quantified parentheses.
3424 // Alternatives will use the 'Simple' set of ops if either the
3425 // subpattern is terminal (in which case we will never need to
3426 // backtrack), or if the subpattern only contains one alternative.
3427 void opCompileParenthesesSubpattern(PatternTerm* term)
3428 {
3429 YarrOpCode parenthesesBeginOpCode;
3430 YarrOpCode parenthesesEndOpCode;
3431 YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
3432 YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
3433 YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
3434
3435 if (UNLIKELY(!m_vm->isSafeToRecurse())) {
3436 m_failureReason = JITFailureReason::ParenthesisNestedTooDeep;
3437 return;
3438 }
3439
3440 // We can currently only compile quantity 1 subpatterns that are
3441 // not copies. We generate a copy in the case of a range quantifier,
3442 // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
3443 // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
3444 // comes where the subpattern is capturing, in which case we would
3445 // need to restore the capture from the first subpattern upon a
3446 // failure in the second.
3447 if (term->quantityMinCount && term->quantityMinCount != term->quantityMaxCount) {
3448 m_failureReason = JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum;
3449 return;
3450 }
3451
3452 if (term->quantityMaxCount == 1 && !term->parentheses.isCopy) {
3453 // Select the 'Once' nodes.
3454 parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
3455 parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
3456
3457 // If there is more than one alternative we cannot use the 'simple' nodes.
3458 if (term->parentheses.disjunction->m_alternatives.size() != 1) {
3459 alternativeBeginOpCode = OpNestedAlternativeBegin;
3460 alternativeNextOpCode = OpNestedAlternativeNext;
3461 alternativeEndOpCode = OpNestedAlternativeEnd;
3462 }
3463 } else if (term->parentheses.isTerminal) {
3464 // Select the 'Terminal' nodes.
3465 parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
3466 parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
3467 } else {
3468#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3469 // We only handle generic parenthesis with non-fixed counts.
3470 if (term->quantityType == QuantifierFixedCount) {
3471 // This subpattern is not supported by the JIT.
3472 m_failureReason = JITFailureReason::FixedCountParenthesizedSubpattern;
3473 return;
3474 }
3475
3476 m_containsNestedSubpatterns = true;
3477
3478 // Select the 'Generic' nodes.
3479 parenthesesBeginOpCode = OpParenthesesSubpatternBegin;
3480 parenthesesEndOpCode = OpParenthesesSubpatternEnd;
3481
3482 // If there is more than one alternative we cannot use the 'simple' nodes.
3483 if (term->parentheses.disjunction->m_alternatives.size() != 1) {
3484 alternativeBeginOpCode = OpNestedAlternativeBegin;
3485 alternativeNextOpCode = OpNestedAlternativeNext;
3486 alternativeEndOpCode = OpNestedAlternativeEnd;
3487 }
3488#else
3489 // This subpattern is not supported by the JIT.
3490 m_failureReason = JITFailureReason::ParenthesizedSubpattern;
3491 return;
3492#endif
3493 }
3494
3495 size_t parenBegin = m_ops.size();
3496 m_ops.append(parenthesesBeginOpCode);
3497
3498 m_ops.append(alternativeBeginOpCode);
3499 m_ops.last().m_previousOp = notFound;
3500 m_ops.last().m_term = term;
3501 Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives;
3502 for (unsigned i = 0; i < alternatives.size(); ++i) {
3503 size_t lastOpIndex = m_ops.size() - 1;
3504
3505 PatternAlternative* nestedAlternative = alternatives[i].get();
3506 opCompileAlternative(nestedAlternative);
3507
3508 size_t thisOpIndex = m_ops.size();
3509 m_ops.append(YarrOp(alternativeNextOpCode));
3510
3511 YarrOp& lastOp = m_ops[lastOpIndex];
3512 YarrOp& thisOp = m_ops[thisOpIndex];
3513
3514 lastOp.m_alternative = nestedAlternative;
3515 lastOp.m_nextOp = thisOpIndex;
3516 thisOp.m_previousOp = lastOpIndex;
3517 thisOp.m_term = term;
3518 }
3519 YarrOp& lastOp = m_ops.last();
3520 ASSERT(lastOp.m_op == alternativeNextOpCode);
3521 lastOp.m_op = alternativeEndOpCode;
3522 lastOp.m_alternative = 0;
3523 lastOp.m_nextOp = notFound;
3524
3525 size_t parenEnd = m_ops.size();
3526 m_ops.append(parenthesesEndOpCode);
3527
3528 m_ops[parenBegin].m_term = term;
3529 m_ops[parenBegin].m_previousOp = notFound;
3530 m_ops[parenBegin].m_nextOp = parenEnd;
3531 m_ops[parenEnd].m_term = term;
3532 m_ops[parenEnd].m_previousOp = parenBegin;
3533 m_ops[parenEnd].m_nextOp = notFound;
3534 }
3535
3536 // opCompileParentheticalAssertion
3537 // Emits ops for a parenthetical assertion. These consist of an
3538 // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
3539 // the alternatives, with these wrapped by an outer pair of
3540 // OpParentheticalAssertionBegin/End nodes.
3541 // We can always use the OpSimpleNestedAlternative nodes in the
3542 // case of parenthetical assertions since these only ever match
3543 // once, and will never backtrack back into the assertion.
3544 void opCompileParentheticalAssertion(PatternTerm* term)
3545 {
3546 if (UNLIKELY(!m_vm->isSafeToRecurse())) {
3547 m_failureReason = JITFailureReason::ParenthesisNestedTooDeep;
3548 return;
3549 }
3550
3551 size_t parenBegin = m_ops.size();
3552 m_ops.append(OpParentheticalAssertionBegin);
3553
3554 m_ops.append(OpSimpleNestedAlternativeBegin);
3555 m_ops.last().m_previousOp = notFound;
3556 m_ops.last().m_term = term;
3557 Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives;
3558 for (unsigned i = 0; i < alternatives.size(); ++i) {
3559 size_t lastOpIndex = m_ops.size() - 1;
3560
3561 PatternAlternative* nestedAlternative = alternatives[i].get();
3562 opCompileAlternative(nestedAlternative);
3563
3564 size_t thisOpIndex = m_ops.size();
3565 m_ops.append(YarrOp(OpSimpleNestedAlternativeNext));
3566
3567 YarrOp& lastOp = m_ops[lastOpIndex];
3568 YarrOp& thisOp = m_ops[thisOpIndex];
3569
3570 lastOp.m_alternative = nestedAlternative;
3571 lastOp.m_nextOp = thisOpIndex;
3572 thisOp.m_previousOp = lastOpIndex;
3573 thisOp.m_term = term;
3574 }
3575 YarrOp& lastOp = m_ops.last();
3576 ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext);
3577 lastOp.m_op = OpSimpleNestedAlternativeEnd;
3578 lastOp.m_alternative = 0;
3579 lastOp.m_nextOp = notFound;
3580
3581 size_t parenEnd = m_ops.size();
3582 m_ops.append(OpParentheticalAssertionEnd);
3583
3584 m_ops[parenBegin].m_term = term;
3585 m_ops[parenBegin].m_previousOp = notFound;
3586 m_ops[parenBegin].m_nextOp = parenEnd;
3587 m_ops[parenEnd].m_term = term;
3588 m_ops[parenEnd].m_previousOp = parenBegin;
3589 m_ops[parenEnd].m_nextOp = notFound;
3590 }
3591
3592 // opCompileAlternative
3593 // Called to emit nodes for all terms in an alternative.
3594 void opCompileAlternative(PatternAlternative* alternative)
3595 {
3596 optimizeAlternative(alternative);
3597
3598 for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
3599 PatternTerm* term = &alternative->m_terms[i];
3600
3601 switch (term->type) {
3602 case PatternTerm::TypeParenthesesSubpattern:
3603 opCompileParenthesesSubpattern(term);
3604 break;
3605
3606 case PatternTerm::TypeParentheticalAssertion:
3607 opCompileParentheticalAssertion(term);
3608 break;
3609
3610 default:
3611 m_ops.append(term);
3612 }
3613 }
3614 }
3615
3616 // opCompileBody
3617 // This method compiles the body disjunction of the regular expression.
3618 // The body consists of two sets of alternatives - zero or more 'once
3619 // through' (BOL anchored) alternatives, followed by zero or more
3620 // repeated alternatives.
3621 // For each of these two sets of alteratives, if not empty they will be
3622 // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
3623 // 'begin' node referencing the first alternative, and 'next' nodes
3624 // referencing any further alternatives. The begin/next/end nodes are
3625 // linked together in a doubly linked list. In the case of repeating
3626 // alternatives, the end node is also linked back to the beginning.
3627 // If no repeating alternatives exist, then a OpMatchFailed node exists
3628 // to return the failing result.
3629 void opCompileBody(PatternDisjunction* disjunction)
3630 {
3631 if (UNLIKELY(!m_vm->isSafeToRecurse())) {
3632 m_failureReason = JITFailureReason::ParenthesisNestedTooDeep;
3633 return;
3634 }
3635
3636 Vector<std::unique_ptr<PatternAlternative>>& alternatives = disjunction->m_alternatives;
3637 size_t currentAlternativeIndex = 0;
3638
3639 // Emit the 'once through' alternatives.
3640 if (alternatives.size() && alternatives[0]->onceThrough()) {
3641 m_ops.append(YarrOp(OpBodyAlternativeBegin));
3642 m_ops.last().m_previousOp = notFound;
3643
3644 do {
3645 size_t lastOpIndex = m_ops.size() - 1;
3646 PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
3647 opCompileAlternative(alternative);
3648
3649 size_t thisOpIndex = m_ops.size();
3650 m_ops.append(YarrOp(OpBodyAlternativeNext));
3651
3652 YarrOp& lastOp = m_ops[lastOpIndex];
3653 YarrOp& thisOp = m_ops[thisOpIndex];
3654
3655 lastOp.m_alternative = alternative;
3656 lastOp.m_nextOp = thisOpIndex;
3657 thisOp.m_previousOp = lastOpIndex;
3658
3659 ++currentAlternativeIndex;
3660 } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough());
3661
3662 YarrOp& lastOp = m_ops.last();
3663
3664 ASSERT(lastOp.m_op == OpBodyAlternativeNext);
3665 lastOp.m_op = OpBodyAlternativeEnd;
3666 lastOp.m_alternative = 0;
3667 lastOp.m_nextOp = notFound;
3668 }
3669
3670 if (currentAlternativeIndex == alternatives.size()) {
3671 m_ops.append(YarrOp(OpMatchFailed));
3672 return;
3673 }
3674
3675 // Emit the repeated alternatives.
3676 size_t repeatLoop = m_ops.size();
3677 m_ops.append(YarrOp(OpBodyAlternativeBegin));
3678 m_ops.last().m_previousOp = notFound;
3679 do {
3680 size_t lastOpIndex = m_ops.size() - 1;
3681 PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
3682 ASSERT(!alternative->onceThrough());
3683 opCompileAlternative(alternative);
3684
3685 size_t thisOpIndex = m_ops.size();
3686 m_ops.append(YarrOp(OpBodyAlternativeNext));
3687
3688 YarrOp& lastOp = m_ops[lastOpIndex];
3689 YarrOp& thisOp = m_ops[thisOpIndex];
3690
3691 lastOp.m_alternative = alternative;
3692 lastOp.m_nextOp = thisOpIndex;
3693 thisOp.m_previousOp = lastOpIndex;
3694
3695 ++currentAlternativeIndex;
3696 } while (currentAlternativeIndex < alternatives.size());
3697 YarrOp& lastOp = m_ops.last();
3698 ASSERT(lastOp.m_op == OpBodyAlternativeNext);
3699 lastOp.m_op = OpBodyAlternativeEnd;
3700 lastOp.m_alternative = 0;
3701 lastOp.m_nextOp = repeatLoop;
3702 }
3703
3704 void generateTryReadUnicodeCharacterHelper()
3705 {
3706#ifdef JIT_UNICODE_EXPRESSIONS
3707 if (m_tryReadUnicodeCharacterCalls.isEmpty())
3708 return;
3709
3710 ASSERT(m_decodeSurrogatePairs);
3711
3712 m_tryReadUnicodeCharacterEntry = label();
3713
3714 tagReturnAddress();
3715
3716 tryReadUnicodeCharImpl(regT0);
3717
3718 ret();
3719#endif
3720 }
3721
3722 void generateEnter()
3723 {
3724#if CPU(X86_64)
3725 push(X86Registers::ebp);
3726 move(stackPointerRegister, X86Registers::ebp);
3727
3728 if (m_pattern.m_saveInitialStartValue)
3729 push(X86Registers::ebx);
3730
3731#if OS(WINDOWS)
3732 push(X86Registers::edi);
3733#endif
3734#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3735 if (m_containsNestedSubpatterns) {
3736#if OS(WINDOWS)
3737 push(X86Registers::esi);
3738#endif
3739 push(X86Registers::r12);
3740 }
3741#endif
3742
3743 if (m_decodeSurrogatePairs) {
3744 push(X86Registers::r13);
3745 push(X86Registers::r14);
3746 push(X86Registers::r15);
3747
3748 move(TrustedImm32(0xd800), leadingSurrogateTag);
3749 }
3750 // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
3751 zeroExtend32ToPtr(index, index);
3752 zeroExtend32ToPtr(length, length);
3753#if OS(WINDOWS)
3754 if (compileMode == IncludeSubpatterns)
3755 loadPtr(Address(X86Registers::ebp, 6 * sizeof(void*)), output);
3756 // rcx is the pointer to the allocated space for result in x64 Windows.
3757 push(X86Registers::ecx);
3758#endif
3759#elif CPU(X86)
3760 push(X86Registers::ebp);
3761 move(stackPointerRegister, X86Registers::ebp);
3762 // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
3763 push(X86Registers::ebx);
3764 push(X86Registers::edi);
3765 push(X86Registers::esi);
3766 // load output into edi (2 = saved ebp + return address).
3767 #if COMPILER(MSVC)
3768 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
3769 loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
3770 loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
3771 if (compileMode == IncludeSubpatterns)
3772 loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
3773 #else
3774 if (compileMode == IncludeSubpatterns)
3775 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
3776 #endif
3777#elif CPU(ARM64)
3778 tagReturnAddress();
3779 if (m_decodeSurrogatePairs) {
3780 pushPair(framePointerRegister, linkRegister);
3781 move(TrustedImm32(0x10000), supplementaryPlanesBase);
3782 move(TrustedImm32(0xd800), leadingSurrogateTag);
3783 move(TrustedImm32(0xdc00), trailingSurrogateTag);
3784 }
3785
3786 // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
3787 zeroExtend32ToPtr(index, index);
3788 zeroExtend32ToPtr(length, length);
3789#elif CPU(ARM_THUMB2)
3790 push(ARMRegisters::r4);
3791 push(ARMRegisters::r5);
3792 push(ARMRegisters::r6);
3793 push(ARMRegisters::r8);
3794#elif CPU(MIPS)
3795 // Do nothing.
3796#endif
3797
3798 store8(TrustedImm32(1), &m_vm->isExecutingInRegExpJIT);
3799 }
3800
3801 void generateReturn()
3802 {
3803 store8(TrustedImm32(0), &m_vm->isExecutingInRegExpJIT);
3804
3805#if CPU(X86_64)
3806#if OS(WINDOWS)
3807 // Store the return value in the allocated space pointed by rcx.
3808 pop(X86Registers::ecx);
3809 store64(returnRegister, Address(X86Registers::ecx));
3810 store64(returnRegister2, Address(X86Registers::ecx, sizeof(void*)));
3811 move(X86Registers::ecx, returnRegister);
3812#endif
3813 if (m_decodeSurrogatePairs) {
3814 pop(X86Registers::r15);
3815 pop(X86Registers::r14);
3816 pop(X86Registers::r13);
3817 }
3818
3819#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3820 if (m_containsNestedSubpatterns) {
3821 pop(X86Registers::r12);
3822#if OS(WINDOWS)
3823 pop(X86Registers::esi);
3824#endif
3825 }
3826#endif
3827#if OS(WINDOWS)
3828 pop(X86Registers::edi);
3829#endif
3830
3831 if (m_pattern.m_saveInitialStartValue)
3832 pop(X86Registers::ebx);
3833 pop(X86Registers::ebp);
3834#elif CPU(X86)
3835 pop(X86Registers::esi);
3836 pop(X86Registers::edi);
3837 pop(X86Registers::ebx);
3838 pop(X86Registers::ebp);
3839#elif CPU(ARM64)
3840 if (m_decodeSurrogatePairs)
3841 popPair(framePointerRegister, linkRegister);
3842#elif CPU(ARM_THUMB2)
3843 pop(ARMRegisters::r8);
3844 pop(ARMRegisters::r6);
3845 pop(ARMRegisters::r5);
3846 pop(ARMRegisters::r4);
3847#elif CPU(MIPS)
3848 // Do nothing
3849#endif
3850 ret();
3851 }
3852
3853public:
3854 YarrGenerator(VM* vm, YarrPattern& pattern, String& patternString, YarrCodeBlock& codeBlock, YarrCharSize charSize)
3855 : m_vm(vm)
3856 , m_pattern(pattern)
3857 , m_patternString(patternString)
3858 , m_codeBlock(codeBlock)
3859 , m_charSize(charSize)
3860 , m_decodeSurrogatePairs(m_charSize == Char16 && m_pattern.unicode())
3861 , m_unicodeIgnoreCase(m_pattern.unicode() && m_pattern.ignoreCase())
3862 , m_fixedSizedAlternative(false)
3863 , m_canonicalMode(m_pattern.unicode() ? CanonicalMode::Unicode : CanonicalMode::UCS2)
3864#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3865 , m_containsNestedSubpatterns(false)
3866 , m_parenContextSizes(compileMode == IncludeSubpatterns ? m_pattern.m_numSubpatterns : 0, m_pattern.m_body->m_callFrameSize)
3867#endif
3868 {
3869 }
3870
3871 void compile()
3872 {
3873 YarrCodeBlock& codeBlock = m_codeBlock;
3874
3875#ifndef JIT_UNICODE_EXPRESSIONS
3876 if (m_decodeSurrogatePairs) {
3877 codeBlock.setFallBackWithFailureReason(JITFailureReason::DecodeSurrogatePair);
3878 return;
3879 }
3880#endif
3881
3882 if (m_pattern.m_containsBackreferences
3883#if ENABLE(YARR_JIT_BACKREFERENCES)
3884 && (compileMode == MatchOnly || (m_pattern.ignoreCase() && m_charSize != Char8))
3885#endif
3886 ) {
3887 codeBlock.setFallBackWithFailureReason(JITFailureReason::BackReference);
3888 return;
3889 }
3890
3891 // We need to compile before generating code since we set flags based on compilation that
3892 // are used during generation.
3893 opCompileBody(m_pattern.m_body);
3894
3895 if (m_failureReason) {
3896 codeBlock.setFallBackWithFailureReason(*m_failureReason);
3897 return;
3898 }
3899
3900 if (UNLIKELY(Options::dumpDisassembly() || Options::dumpRegExpDisassembly()))
3901 m_disassembler = std::make_unique<YarrDisassembler>(this);
3902
3903 if (m_disassembler)
3904 m_disassembler->setStartOfCode(label());
3905
3906#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3907 if (m_containsNestedSubpatterns)
3908 codeBlock.setUsesPatternContextBuffer();
3909#endif
3910
3911 generateEnter();
3912
3913 Jump hasInput = checkInput();
3914 generateFailReturn();
3915 hasInput.link(this);
3916
3917#ifdef JIT_UNICODE_EXPRESSIONS
3918 if (m_decodeSurrogatePairs)
3919 getEffectiveAddress(BaseIndex(input, length, TimesTwo), endOfStringAddress);
3920#endif
3921
3922#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3923 if (m_containsNestedSubpatterns)
3924 move(TrustedImm32(matchLimit), remainingMatchCount);
3925#endif
3926
3927 if (compileMode == IncludeSubpatterns) {
3928 for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i)
3929 store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int)));
3930 }
3931
3932 if (!m_pattern.m_body->m_hasFixedSize)
3933 setMatchStart(index);
3934
3935 initCallFrame();
3936
3937#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3938 if (m_containsNestedSubpatterns)
3939 initParenContextFreeList();
3940#endif
3941
3942 if (m_pattern.m_saveInitialStartValue) {
3943#ifdef HAVE_INITIAL_START_REG
3944 move(index, initialStart);
3945#else
3946 storeToFrame(index, m_pattern.m_initialStartValueFrameLocation);
3947#endif
3948 }
3949
3950 generate();
3951 if (m_disassembler)
3952 m_disassembler->setEndOfGenerate(label());
3953 backtrack();
3954 if (m_disassembler)
3955 m_disassembler->setEndOfBacktrack(label());
3956
3957 generateTryReadUnicodeCharacterHelper();
3958
3959 generateJITFailReturn();
3960
3961 if (m_disassembler)
3962 m_disassembler->setEndOfCode(label());
3963
3964 LinkBuffer linkBuffer(*this, REGEXP_CODE_ID, JITCompilationCanFail);
3965 if (linkBuffer.didFailToAllocate()) {
3966 codeBlock.setFallBackWithFailureReason(JITFailureReason::ExecutableMemoryAllocationFailure);
3967 return;
3968 }
3969
3970 if (!m_tryReadUnicodeCharacterCalls.isEmpty()) {
3971 CodeLocationLabel<NoPtrTag> tryReadUnicodeCharacterHelper = linkBuffer.locationOf<NoPtrTag>(m_tryReadUnicodeCharacterEntry);
3972
3973 for (auto call : m_tryReadUnicodeCharacterCalls)
3974 linkBuffer.link(call, tryReadUnicodeCharacterHelper);
3975 }
3976
3977 m_backtrackingState.linkDataLabels(linkBuffer);
3978
3979 if (m_disassembler)
3980 m_disassembler->dump(linkBuffer);
3981
3982 if (compileMode == MatchOnly) {
3983 if (m_charSize == Char8)
3984 codeBlock.set8BitCodeMatchOnly(FINALIZE_REGEXP_CODE(linkBuffer, YarrMatchOnly8BitPtrTag, "Match-only 8-bit regular expression"));
3985 else
3986 codeBlock.set16BitCodeMatchOnly(FINALIZE_REGEXP_CODE(linkBuffer, YarrMatchOnly16BitPtrTag, "Match-only 16-bit regular expression"));
3987 } else {
3988 if (m_charSize == Char8)
3989 codeBlock.set8BitCode(FINALIZE_REGEXP_CODE(linkBuffer, Yarr8BitPtrTag, "8-bit regular expression"));
3990 else
3991 codeBlock.set16BitCode(FINALIZE_REGEXP_CODE(linkBuffer, Yarr16BitPtrTag, "16-bit regular expression"));
3992 }
3993 if (m_failureReason)
3994 codeBlock.setFallBackWithFailureReason(*m_failureReason);
3995 }
3996
3997 const char* variant() override
3998 {
3999 if (compileMode == MatchOnly) {
4000 if (m_charSize == Char8)
4001 return "Match-only 8-bit regular expression";
4002
4003 return "Match-only 16-bit regular expression";
4004 }
4005
4006 if (m_charSize == Char8)
4007 return "8-bit regular expression";
4008
4009 return "16-bit regular expression";
4010 }
4011
4012 unsigned opCount() override
4013 {
4014 return m_ops.size();
4015 }
4016
4017 void dumpPatternString(PrintStream& out) override
4018 {
4019 m_pattern.dumpPatternString(out, m_patternString);
4020 }
4021
4022 int dumpFor(PrintStream& out, unsigned opIndex) override
4023 {
4024 if (opIndex >= opCount())
4025 return 0;
4026
4027 out.printf("%4d:", opIndex);
4028
4029 YarrOp& op = m_ops[opIndex];
4030 PatternTerm* term = op.m_term;
4031 switch (op.m_op) {
4032 case OpTerm: {
4033 out.print("OpTerm ");
4034 switch (term->type) {
4035 case PatternTerm::TypeAssertionBOL:
4036 out.print("Assert BOL");
4037 break;
4038
4039 case PatternTerm::TypeAssertionEOL:
4040 out.print("Assert EOL");
4041 break;
4042
4043 case PatternTerm::TypeBackReference:
4044 out.printf("BackReference pattern #%u", term->backReferenceSubpatternId);
4045 term->dumpQuantifier(out);
4046 break;
4047
4048 case PatternTerm::TypePatternCharacter:
4049 out.print("TypePatternCharacter ");
4050 dumpUChar32(out, term->patternCharacter);
4051 if (m_pattern.ignoreCase())
4052 out.print(" ignore case");
4053
4054 term->dumpQuantifier(out);
4055 break;
4056
4057 case PatternTerm::TypeCharacterClass:
4058 out.print("TypePatternCharacterClass ");
4059 if (term->invert())
4060 out.print("not ");
4061 dumpCharacterClass(out, &m_pattern, term->characterClass);
4062 term->dumpQuantifier(out);
4063 break;
4064
4065 case PatternTerm::TypeAssertionWordBoundary:
4066 out.printf("%sword boundary", term->invert() ? "non-" : "");
4067 break;
4068
4069 case PatternTerm::TypeDotStarEnclosure:
4070 out.print(".* enclosure");
4071 break;
4072
4073 case PatternTerm::TypeForwardReference:
4074 out.print("TypeForwardReference <not handled>");
4075 break;
4076
4077 case PatternTerm::TypeParenthesesSubpattern:
4078 case PatternTerm::TypeParentheticalAssertion:
4079 RELEASE_ASSERT_NOT_REACHED();
4080 break;
4081 }
4082
4083 if (op.m_isDeadCode)
4084 out.print(" already handled");
4085 out.print("\n");
4086 return(0);
4087 }
4088
4089 case OpBodyAlternativeBegin:
4090 out.printf("OpBodyAlternativeBegin minimum size %u\n", op.m_alternative->m_minimumSize);
4091 return(0);
4092
4093 case OpBodyAlternativeNext:
4094 out.printf("OpBodyAlternativeNext minimum size %u\n", op.m_alternative->m_minimumSize);
4095 return(0);
4096
4097 case OpBodyAlternativeEnd:
4098 out.print("OpBodyAlternativeEnd\n");
4099 return(0);
4100
4101 case OpSimpleNestedAlternativeBegin:
4102 out.printf("OpSimpleNestedAlternativeBegin minimum size %u\n", op.m_alternative->m_minimumSize);
4103 return(1);
4104
4105 case OpNestedAlternativeBegin:
4106 out.printf("OpNestedAlternativeBegin minimum size %u\n", op.m_alternative->m_minimumSize);
4107 return(1);
4108
4109 case OpSimpleNestedAlternativeNext:
4110 out.printf("OpSimpleNestedAlternativeNext minimum size %u\n", op.m_alternative->m_minimumSize);
4111 return(0);
4112
4113 case OpNestedAlternativeNext:
4114 out.printf("OpNestedAlternativeNext minimum size %u\n", op.m_alternative->m_minimumSize);
4115 return(0);
4116
4117 case OpSimpleNestedAlternativeEnd:
4118 out.print("OpSimpleNestedAlternativeEnd");
4119 term->dumpQuantifier(out);
4120 out.print("\n");
4121 return(-1);
4122
4123 case OpNestedAlternativeEnd:
4124 out.print("OpNestedAlternativeEnd");
4125 term->dumpQuantifier(out);
4126 out.print("\n");
4127 return(-1);
4128
4129 case OpParenthesesSubpatternOnceBegin:
4130 out.print("OpParenthesesSubpatternOnceBegin ");
4131 if (term->capture())
4132 out.printf("capturing pattern #%u", term->parentheses.subpatternId);
4133 else
4134 out.print("non-capturing");
4135 term->dumpQuantifier(out);
4136 out.print("\n");
4137 return(0);
4138
4139 case OpParenthesesSubpatternOnceEnd:
4140 out.print("OpParenthesesSubpatternOnceEnd ");
4141 if (term->capture())
4142 out.printf("capturing pattern #%u", term->parentheses.subpatternId);
4143 else
4144 out.print("non-capturing");
4145 term->dumpQuantifier(out);
4146 out.print("\n");
4147 return(0);
4148
4149 case OpParenthesesSubpatternTerminalBegin:
4150 out.print("OpParenthesesSubpatternTerminalBegin ");
4151 if (term->capture())
4152 out.printf("capturing pattern #%u\n", term->parentheses.subpatternId);
4153 else
4154 out.print("non-capturing\n");
4155 return(0);
4156
4157 case OpParenthesesSubpatternTerminalEnd:
4158 out.print("OpParenthesesSubpatternTerminalEnd ");
4159 if (term->capture())
4160 out.printf("capturing pattern #%u\n", term->parentheses.subpatternId);
4161 else
4162 out.print("non-capturing\n");
4163 return(0);
4164
4165 case OpParenthesesSubpatternBegin:
4166 out.print("OpParenthesesSubpatternBegin ");
4167 if (term->capture())
4168 out.printf("capturing pattern #%u", term->parentheses.subpatternId);
4169 else
4170 out.print("non-capturing");
4171 term->dumpQuantifier(out);
4172 out.print("\n");
4173 return(0);
4174
4175 case OpParenthesesSubpatternEnd:
4176 out.print("OpParenthesesSubpatternEnd ");
4177 if (term->capture())
4178 out.printf("capturing pattern #%u", term->parentheses.subpatternId);
4179 else
4180 out.print("non-capturing");
4181 term->dumpQuantifier(out);
4182 out.print("\n");
4183 return(0);
4184
4185 case OpParentheticalAssertionBegin:
4186 out.printf("OpParentheticalAssertionBegin%s\n", term->invert() ? " inverted" : "");
4187 return(0);
4188
4189 case OpParentheticalAssertionEnd:
4190 out.printf("OpParentheticalAssertionEnd%s\n", term->invert() ? " inverted" : "");
4191 return(0);
4192
4193 case OpMatchFailed:
4194 out.print("OpMatchFailed\n");
4195 return(0);
4196 }
4197
4198 return(0);
4199 }
4200
4201private:
4202 VM* m_vm;
4203
4204 YarrPattern& m_pattern;
4205 String& m_patternString;
4206
4207 YarrCodeBlock& m_codeBlock;
4208 YarrCharSize m_charSize;
4209
4210 // Used to detect regular expression constructs that are not currently
4211 // supported in the JIT; fall back to the interpreter when this is detected.
4212 Optional<JITFailureReason> m_failureReason;
4213
4214 bool m_decodeSurrogatePairs;
4215 bool m_unicodeIgnoreCase;
4216 bool m_fixedSizedAlternative;
4217 CanonicalMode m_canonicalMode;
4218#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
4219 bool m_containsNestedSubpatterns;
4220 ParenContextSizes m_parenContextSizes;
4221#endif
4222 JumpList m_abortExecution;
4223 JumpList m_hitMatchLimit;
4224 Vector<Call> m_tryReadUnicodeCharacterCalls;
4225 Label m_tryReadUnicodeCharacterEntry;
4226
4227 // The regular expression expressed as a linear sequence of operations.
4228 Vector<YarrOp, 128> m_ops;
4229
4230 // This records the current input offset being applied due to the current
4231 // set of alternatives we are nested within. E.g. when matching the
4232 // character 'b' within the regular expression /abc/, we will know that
4233 // the minimum size for the alternative is 3, checked upon entry to the
4234 // alternative, and that 'b' is at offset 1 from the start, and as such
4235 // when matching 'b' we need to apply an offset of -2 to the load.
4236 //
4237 // FIXME: This should go away. Rather than tracking this value throughout
4238 // code generation, we should gather this information up front & store it
4239 // on the YarrOp structure.
4240 Checked<unsigned> m_checkedOffset;
4241
4242 // This class records state whilst generating the backtracking path of code.
4243 BacktrackingState m_backtrackingState;
4244
4245 std::unique_ptr<YarrDisassembler> m_disassembler;
4246};
4247
4248static void dumpCompileFailure(JITFailureReason failure)
4249{
4250 switch (failure) {
4251 case JITFailureReason::DecodeSurrogatePair:
4252 dataLog("Can't JIT a pattern decoding surrogate pairs\n");
4253 break;
4254 case JITFailureReason::BackReference:
4255 dataLog("Can't JIT some patterns containing back references\n");
4256 break;
4257 case JITFailureReason::ForwardReference:
4258 dataLog("Can't JIT a pattern containing forward references\n");
4259 break;
4260 case JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum:
4261 dataLog("Can't JIT a pattern containing a variable counted parenthesis with a non-zero minimum\n");
4262 break;
4263 case JITFailureReason::ParenthesizedSubpattern:
4264 dataLog("Can't JIT a pattern containing parenthesized subpatterns\n");
4265 break;
4266 case JITFailureReason::FixedCountParenthesizedSubpattern:
4267 dataLog("Can't JIT a pattern containing fixed count parenthesized subpatterns\n");
4268 break;
4269 case JITFailureReason::ParenthesisNestedTooDeep:
4270 dataLog("Can't JIT pattern due to parentheses nested too deeply\n");
4271 break;
4272 case JITFailureReason::ExecutableMemoryAllocationFailure:
4273 dataLog("Can't JIT because of failure of allocation of executable memory\n");
4274 break;
4275 }
4276}
4277
4278void jitCompile(YarrPattern& pattern, String& patternString, YarrCharSize charSize, VM* vm, YarrCodeBlock& codeBlock, YarrJITCompileMode mode)
4279{
4280 if (mode == MatchOnly)
4281 YarrGenerator<MatchOnly>(vm, pattern, patternString, codeBlock, charSize).compile();
4282 else
4283 YarrGenerator<IncludeSubpatterns>(vm, pattern, patternString, codeBlock, charSize).compile();
4284
4285 if (auto failureReason = codeBlock.failureReason()) {
4286 if (Options::dumpCompiledRegExpPatterns()) {
4287 pattern.dumpPatternString(WTF::dataFile(), patternString);
4288 dataLog(" : ");
4289 dumpCompileFailure(*failureReason);
4290 }
4291 }
4292}
4293
4294}}
4295
4296#endif
4297