YarrInterpreter.cpp source code [webkit/Source/JavaScriptCore/yarr/YarrInterpreter.cpp]

1	/*
2	* Copyright (C) 2009, 2013-2017 Apple Inc. All rights reserved.
3	* Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	* 1. Redistributions of source code must retain the above copyright
9	* notice, this list of conditions and the following disclaimer.
10	* 2. Redistributions in binary form must reproduce the above copyright
11	* notice, this list of conditions and the following disclaimer in the
12	* documentation and/or other materials provided with the distribution.
13	*
14	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25	*/
26
27	#include "config.h"
28	#include "YarrInterpreter.h"
29
30	#include "Options.h"
31	#include "SuperSampler.h"
32	#include "Yarr.h"
33	#include "YarrCanonicalize.h"
34	#include <wtf/BumpPointerAllocator.h>
35	#include <wtf/CheckedArithmetic.h>
36	#include <wtf/DataLog.h>
37	#include <wtf/text/CString.h>
38	#include <wtf/text/WTFString.h>
39
40	namespace JSC { namespace Yarr {
41
42	template<typename CharType>
43	class Interpreter {
44	public:
45	struct ParenthesesDisjunctionContext;
46
47	struct BackTrackInfoParentheses {
48	uintptr_t matchAmount;
49	ParenthesesDisjunctionContext* lastContext;
50	};
51
52	static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
53	{
54	context->next = backTrack->lastContext;
55	backTrack->lastContext = context;
56	++backTrack->matchAmount;
57	}
58
59	static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
60	{
61	RELEASE_ASSERT(backTrack->matchAmount);
62	RELEASE_ASSERT(backTrack->lastContext);
63	backTrack->lastContext = backTrack->lastContext->next;
64	--backTrack->matchAmount;
65	}
66
67	struct DisjunctionContext
68	{
69	DisjunctionContext() = default;
70
71	void* operator new(size_t, void* where)
72	{
73	return where;
74	}
75
76	static size_t allocationSize(unsigned numberOfFrames)
77	{
78	static_assert(alignof(DisjunctionContext) <= sizeof(void*), "");
79	size_t rawSize = (sizeof(DisjunctionContext) - sizeof(uintptr_t) + Checked<size_t>(numberOfFrames) * sizeof(uintptr_t)).unsafeGet();
80	size_t roundedSize = roundUpToMultipleOf<sizeof(void*)>(rawSize);
81	RELEASE_ASSERT(roundedSize >= rawSize);
82	return roundedSize;
83	}
84
85	int term { `0` };
86	unsigned matchBegin;
87	unsigned matchEnd;
88	uintptr_t frame[`1`];
89	};
90
91	DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
92	{
93	size_t size = DisjunctionContext::allocationSize(disjunction->m_frameSize);
94	allocatorPool = allocatorPool->ensureCapacity(size);
95	RELEASE_ASSERT(allocatorPool);
96	return new (allocatorPool->alloc(size)) DisjunctionContext();
97	}
98
99	void freeDisjunctionContext(DisjunctionContext* context)
100	{
101	allocatorPool = allocatorPool->dealloc(context);
102	}
103
104	struct ParenthesesDisjunctionContext
105	{
106	ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
107	{
108	unsigned firstSubpatternId = term.atom.subpatternId;
109	unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
110
111	for (unsigned i = `0`; i < (numNestedSubpatterns << `1`); ++i) {
112	subpatternBackup[i] = output[(firstSubpatternId << `1`) + i];
113	output[(firstSubpatternId << `1`) + i] = offsetNoMatch;
114	}
115
116	new (getDisjunctionContext(term)) DisjunctionContext();
117	}
118
119	void* operator new(size_t, void* where)
120	{
121	return where;
122	}
123
124	void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
125	{
126	for (unsigned i = `0`; i < (numNestedSubpatterns << `1`); ++i)
127	output[(firstSubpatternId << `1`) + i] = subpatternBackup[i];
128	}
129
130	DisjunctionContext* getDisjunctionContext(ByteTerm& term)
131	{
132	return bitwise_cast<DisjunctionContext>(bitwise_cast<uintptr_t>(this*) + allocationSize(term.atom.parenthesesDisjunction->m_numSubpatterns));
133	}
134
135	static size_t allocationSize(unsigned numberOfSubpatterns)
136	{
137	static_assert(alignof(ParenthesesDisjunctionContext) <= sizeof(void*), "");
138	size_t rawSize = (sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (Checked<size_t>(numberOfSubpatterns) * `2U`) * sizeof(unsigned)).unsafeGet();
139	size_t roundedSize = roundUpToMultipleOf<sizeof(void*)>(rawSize);
140	RELEASE_ASSERT(roundedSize >= rawSize);
141	return roundedSize;
142	}
143
144	ParenthesesDisjunctionContext* next { nullptr };
145	unsigned subpatternBackup[`1`];
146	};
147
148	ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
149	{
150	size_t size = (Checked<size_t>(ParenthesesDisjunctionContext::allocationSize(term.atom.parenthesesDisjunction->m_numSubpatterns)) + DisjunctionContext::allocationSize(disjunction->m_frameSize)).unsafeGet();
151	allocatorPool = allocatorPool->ensureCapacity(size);
152	RELEASE_ASSERT(allocatorPool);
153	return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
154	}
155
156	void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
157	{
158	allocatorPool = allocatorPool->dealloc(context);
159	}
160
161	class InputStream {
162	public:
163	InputStream(const CharType* input, unsigned start, unsigned length, bool decodeSurrogatePairs)
164	: input(input)
165	, pos(start)
166	, length(length)
167	, decodeSurrogatePairs(decodeSurrogatePairs)
168	{
169	}
170
171	void next()
172	{
173	++pos;
174	}
175
176	void rewind(unsigned amount)
177	{
178	ASSERT(pos >= amount);
179	pos -= amount;
180	}
181
182	int read()
183	{
184	ASSERT(pos < length);
185	if (pos < length)
186	return input[pos];
187	return -`1`;
188	}
189
190	int readPair()
191	{
192	ASSERT(pos + `1` < length);
193	return input[pos] \| input[pos + `1`] << `16`;
194	}
195
196	int readChecked(unsigned negativePositionOffest)
197	{
198	RELEASE_ASSERT(pos >= negativePositionOffest);
199	unsigned p = pos - negativePositionOffest;
200	ASSERT(p < length);
201	int result = input[p];
202	if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + `1` < length && U16_IS_TRAIL(input[p + `1`])) {
203	if (atEnd())
204	return -`1`;
205
206	result = U16_GET_SUPPLEMENTARY(result, input[p + `1`]);
207	next();
208	}
209	return result;
210	}
211
212	int readSurrogatePairChecked(unsigned negativePositionOffset)
213	{
214	RELEASE_ASSERT(pos >= negativePositionOffset);
215	unsigned p = pos - negativePositionOffset;
216	ASSERT(p < length);
217	if (p + `1` >= length)
218	return -`1`;
219
220	int first = input[p];
221	int second = input[p + `1`];
222	if (U16_IS_LEAD(first) && U16_IS_TRAIL(second))
223	return U16_GET_SUPPLEMENTARY(first, second);
224
225	return -`1`;
226	}
227
228	int reread(unsigned from)
229	{
230	ASSERT(from < length);
231	int result = input[from];
232	if (U16_IS_LEAD(result) && decodeSurrogatePairs && from + `1` < length && U16_IS_TRAIL(input[from + `1`]))
233	result = U16_GET_SUPPLEMENTARY(result, input[from + `1`]);
234	return result;
235	}
236
237	int prev()
238	{
239	ASSERT(!(pos > length));
240	if (pos && length)
241	return input[pos - `1`];
242	return -`1`;
243	}
244
245	unsigned getPos()
246	{
247	return pos;
248	}
249
250	void setPos(unsigned p)
251	{
252	pos = p;
253	}
254
255	bool atStart()
256	{
257	return pos == `0`;
258	}
259
260	bool atEnd()
261	{
262	return pos == length;
263	}
264
265	unsigned end()
266	{
267	return length;
268	}
269
270	bool checkInput(unsigned count)
271	{
272	if (((pos + count) <= length) && ((pos + count) >= pos)) {
273	pos += count;
274	return true;
275	}
276	return false;
277	}
278
279	void uncheckInput(unsigned count)
280	{
281	RELEASE_ASSERT(pos >= count);
282	pos -= count;
283	}
284
285	bool atStart(unsigned negativePositionOffset)
286	{
287	return pos == negativePositionOffset;
288	}
289
290	bool atEnd(unsigned negativePositionOffest)
291	{
292	RELEASE_ASSERT(pos >= negativePositionOffest);
293	return (pos - negativePositionOffest) == length;
294	}
295
296	bool isAvailableInput(unsigned offset)
297	{
298	return (((pos + offset) <= length) && ((pos + offset) >= pos));
299	}
300
301	private:
302	const CharType* input;
303	unsigned pos;
304	unsigned length;
305	bool decodeSurrogatePairs;
306	};
307
308	bool testCharacterClass(CharacterClass* characterClass, int ch)
309	{
310	auto linearSearchMatches = [&ch](const Vector<UChar32>& matches) {
311	for (unsigned i = `0`; i < matches.size(); ++i) {
312	if (ch == matches [i])
313	return true;
314	}
315
316	return false;
317	};
318
319	auto binarySearchMatches = [&ch](const Vector<UChar32>& matches) {
320	size_t low = `0`;
321	size_t high = matches.size() - `1`;
322
323	while (low <= high) {
324	size_t mid = low + (high - low) / `2`;
325	int diff = ch - matches [mid];
326	if (!diff)
327	return true;
328
329	if (diff < `0`) {
330	if (mid == low)
331	return false;
332	high = mid - `1`;
333	} else
334	low = mid + `1`;
335	}
336	return false;
337	};
338
339	auto linearSearchRanges = [&ch](const Vector<CharacterRange>& ranges) {
340	for (unsigned i = `0`; i < ranges.size(); ++i) {
341	if ((ch >= ranges [i].begin) && (ch <= ranges [i].end))
342	return true;
343	}
344
345	return false;
346	};
347
348	auto binarySearchRanges = [&ch](const Vector<CharacterRange>& ranges) {
349	size_t low = `0`;
350	size_t high = ranges.size() - `1`;
351
352	while (low <= high) {
353	size_t mid = low + (high - low) / `2`;
354	int rangeBeginDiff = ch - ranges [mid].begin;
355	if (rangeBeginDiff >= `0` && ch <= ranges [mid].end)
356	return true;
357
358	if (rangeBeginDiff < `0`) {
359	if (mid == low)
360	return false;
361	high = mid - `1`;
362	} else
363	low = mid + `1`;
364	}
365	return false;
366	};
367
368	if (characterClass->m_anyCharacter)
369	return true;
370
371	const size_t thresholdForBinarySearch = `6`;
372
373	if (!isASCII(ch)) {
374	if (characterClass->m_matchesUnicode.size()) {
375	if (characterClass->m_matchesUnicode.size() > thresholdForBinarySearch) {
376	if (binarySearchMatches(characterClass->m_matchesUnicode))
377	return true;
378	} else if (linearSearchMatches(characterClass->m_matchesUnicode))
379	return true;
380	}
381
382	if (characterClass->m_rangesUnicode.size()) {
383	if (characterClass->m_rangesUnicode.size() > thresholdForBinarySearch) {
384	if (binarySearchRanges(characterClass->m_rangesUnicode))
385	return true;
386	} else if (linearSearchRanges(characterClass->m_rangesUnicode))
387	return true;
388	}
389	} else {
390	if (characterClass->m_matches.size()) {
391	if (characterClass->m_matches.size() > thresholdForBinarySearch) {
392	if (binarySearchMatches(characterClass->m_matches))
393	return true;
394	} else if (linearSearchMatches(characterClass->m_matches))
395	return true;
396	}
397
398	if (characterClass->m_ranges.size()) {
399	if (characterClass->m_ranges.size() > thresholdForBinarySearch) {
400	if (binarySearchRanges(characterClass->m_ranges))
401	return true;
402	} else if (linearSearchRanges(characterClass->m_ranges))
403	return true;
404	}
405	}
406
407	return false;
408	}
409
410	bool checkCharacter(int testChar, unsigned negativeInputOffset)
411	{
412	return testChar == input.readChecked(negativeInputOffset);
413	}
414
415	bool checkSurrogatePair(int testUnicodeChar, unsigned negativeInputOffset)
416	{
417	return testUnicodeChar == input.readSurrogatePairChecked(negativeInputOffset);
418	}
419
420	bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
421	{
422	int ch = input.readChecked(negativeInputOffset);
423	return (loChar == ch) \|\| (hiChar == ch);
424	}
425
426	bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
427	{
428	bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
429	return invert ? !match : match;
430	}
431
432	bool checkCharacterClassDontAdvanceInputForNonBMP(CharacterClass* characterClass, unsigned negativeInputOffset)
433	{
434	int readCharacter = characterClass->hasOnlyNonBMPCharacters() ? input.readSurrogatePairChecked(negativeInputOffset) : input.readChecked(negativeInputOffset);
435	return testCharacterClass(characterClass, readCharacter);
436	}
437
438	bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
439	{
440	unsigned matchSize = (unsigned)(matchEnd - matchBegin);
441
442	if (!input.checkInput(matchSize))
443	return false;
444
445	for (unsigned i = `0`; i < matchSize; ++i) {
446	int oldCh = input.reread(matchBegin + i);
447	int ch;
448	if (!U_IS_BMP(oldCh)) {
449	ch = input.readSurrogatePairChecked(negativeInputOffset + matchSize - i);
450	++i;
451	} else
452	ch = input.readChecked(negativeInputOffset + matchSize - i);
453
454	if (oldCh == ch)
455	continue;
456
457	if (pattern->ignoreCase()) {
458	// See ES 6.0, 21.2.2.8.2 for the definition of Canonicalize(). For non-Unicode
459	// patterns, Unicode values are never allowed to match against ASCII ones.
460	// For Unicode, we need to check all canonical equivalents of a character.
461	if (!unicode && (isASCII(oldCh) \|\| isASCII(ch))) {
462	if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
463	continue;
464	} else if (areCanonicallyEquivalent(oldCh, ch, unicode ? CanonicalMode::Unicode : CanonicalMode::UCS2))
465	continue;
466	}
467
468	input.uncheckInput(matchSize);
469	return false;
470	}
471
472	return true;
473	}
474
475	bool matchAssertionBOL(ByteTerm& term)
476	{
477	return (input.atStart(term.inputPosition)) \|\| (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + `1`)));
478	}
479
480	bool matchAssertionEOL(ByteTerm& term)
481	{
482	if (term.inputPosition)
483	return (input.atEnd(term.inputPosition)) \|\| (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
484
485	return (input.atEnd()) \|\| (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.read()));
486	}
487
488	bool matchAssertionWordBoundary(ByteTerm& term)
489	{
490	bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + `1`));
491	bool readIsWordchar;
492	if (term.inputPosition)
493	readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
494	else
495	readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
496
497	bool wordBoundary = prevIsWordchar != readIsWordchar;
498	return term.invert() ? !wordBoundary : wordBoundary;
499	}
500
501	bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
502	{
503	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
504
505	switch (term.atom.quantityType) {
506	case QuantifierFixedCount:
507	break;
508
509	case QuantifierGreedy:
510	if (backTrack->matchAmount) {
511	--backTrack->matchAmount;
512	input.uncheckInput(U16_LENGTH(term.atom.patternCharacter));
513	return true;
514	}
515	break;
516
517	case QuantifierNonGreedy:
518	if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(`1`)) {
519	++backTrack->matchAmount;
520	if (checkCharacter(term.atom.patternCharacter, term.inputPosition + `1`))
521	return true;
522	}
523	input.setPos(backTrack->begin);
524	break;
525	}
526
527	return false;
528	}
529
530	bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
531	{
532	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
533
534	switch (term.atom.quantityType) {
535	case QuantifierFixedCount:
536	break;
537
538	case QuantifierGreedy:
539	if (backTrack->matchAmount) {
540	--backTrack->matchAmount;
541	input.uncheckInput(`1`);
542	return true;
543	}
544	break;
545
546	case QuantifierNonGreedy:
547	if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(`1`)) {
548	++backTrack->matchAmount;
549	if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + `1`))
550	return true;
551	}
552	input.uncheckInput(backTrack->matchAmount);
553	break;
554	}
555
556	return false;
557	}
558
559	bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
560	{
561	ASSERT(term.type == ByteTerm::TypeCharacterClass);
562	BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation);
563
564	switch (term.atom.quantityType) {
565	case QuantifierFixedCount: {
566	if (unicode) {
567	CharacterClass* charClass = term.atom.characterClass;
568	backTrack->begin = input.getPos();
569	unsigned matchAmount = `0`;
570	for (matchAmount = `0`; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
571	if (term.invert()) {
572	if (!checkCharacterClass(charClass, term.invert(), term.inputPosition - matchAmount)) {
573	input.setPos(backTrack->begin);
574	return false;
575	}
576	} else {
577	unsigned matchOffset = matchAmount * (charClass->hasOnlyNonBMPCharacters() ? `2` : `1`);
578	if (!checkCharacterClassDontAdvanceInputForNonBMP(charClass, term.inputPosition - matchOffset)) {
579	input.setPos(backTrack->begin);
580	return false;
581	}
582	}
583	}
584
585	return true;
586	}
587
588	for (unsigned matchAmount = `0`; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
589	if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
590	return false;
591	}
592	return true;
593	}
594
595	case QuantifierGreedy: {
596	unsigned position = input.getPos();
597	backTrack->begin = position;
598	unsigned matchAmount = `0`;
599	while ((matchAmount < term.atom.quantityMaxCount) && input.checkInput(`1`)) {
600	if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + `1`)) {
601	input.setPos(position);
602	break;
603	}
604	++matchAmount;
605	position = input.getPos();
606	}
607	backTrack->matchAmount = matchAmount;
608
609	return true;
610	}
611
612	case QuantifierNonGreedy:
613	backTrack->begin = input.getPos();
614	backTrack->matchAmount = `0`;
615	return true;
616	}
617
618	RELEASE_ASSERT_NOT_REACHED();
619	return false;
620	}
621
622	bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
623	{
624	ASSERT(term.type == ByteTerm::TypeCharacterClass);
625	BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation);
626
627	switch (term.atom.quantityType) {
628	case QuantifierFixedCount:
629	if (unicode)
630	input.setPos(backTrack->begin);
631	break;
632
633	case QuantifierGreedy:
634	if (backTrack->matchAmount) {
635	if (unicode) {
636	// Rematch one less match
637	input.setPos(backTrack->begin);
638	--backTrack->matchAmount;
639	for (unsigned matchAmount = `0`; (matchAmount < backTrack->matchAmount) && input.checkInput(`1`); ++matchAmount) {
640	if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + `1`)) {
641	input.uncheckInput(`1`);
642	break;
643	}
644	}
645	return true;
646	}
647	--backTrack->matchAmount;
648	input.uncheckInput(`1`);
649	return true;
650	}
651	break;
652
653	case QuantifierNonGreedy:
654	if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(`1`)) {
655	++backTrack->matchAmount;
656	if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + `1`))
657	return true;
658	}
659	input.setPos(backTrack->begin);
660	break;
661	}
662
663	return false;
664	}
665
666	bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
667	{
668	ASSERT(term.type == ByteTerm::TypeBackReference);
669	BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
670
671	unsigned matchBegin = output[(term.atom.subpatternId << `1`)];
672	unsigned matchEnd = output[(term.atom.subpatternId << `1`) + `1`];
673
674	// If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
675	// In this case the result of match is empty string like when it references to a parentheses with zero-width match.
676	// Eg.: /(a\1)/
677	if (matchEnd == offsetNoMatch)
678	return true;
679
680	if (matchBegin == offsetNoMatch)
681	return true;
682
683	ASSERT(matchBegin <= matchEnd);
684
685	if (matchBegin == matchEnd)
686	return true;
687
688	switch (term.atom.quantityType) {
689	case QuantifierFixedCount: {
690	backTrack->begin = input.getPos();
691	for (unsigned matchAmount = `0`; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
692	if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
693	input.setPos(backTrack->begin);
694	return false;
695	}
696	}
697	return true;
698	}
699
700	case QuantifierGreedy: {
701	unsigned matchAmount = `0`;
702	while ((matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
703	++matchAmount;
704	backTrack->matchAmount = matchAmount;
705	return true;
706	}
707
708	case QuantifierNonGreedy:
709	backTrack->begin = input.getPos();
710	backTrack->matchAmount = `0`;
711	return true;
712	}
713
714	RELEASE_ASSERT_NOT_REACHED();
715	return false;
716	}
717
718	bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
719	{
720	ASSERT(term.type == ByteTerm::TypeBackReference);
721	BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
722
723	unsigned matchBegin = output[(term.atom.subpatternId << `1`)];
724	unsigned matchEnd = output[(term.atom.subpatternId << `1`) + `1`];
725
726	if (matchBegin == offsetNoMatch)
727	return false;
728
729	ASSERT(matchBegin <= matchEnd);
730
731	if (matchBegin == matchEnd)
732	return false;
733
734	switch (term.atom.quantityType) {
735	case QuantifierFixedCount:
736	// for quantityMaxCount == 1, could rewind.
737	input.setPos(backTrack->begin);
738	break;
739
740	case QuantifierGreedy:
741	if (backTrack->matchAmount) {
742	--backTrack->matchAmount;
743	input.rewind(matchEnd - matchBegin);
744	return true;
745	}
746	break;
747
748	case QuantifierNonGreedy:
749	if ((backTrack->matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
750	++backTrack->matchAmount;
751	return true;
752	}
753	input.setPos(backTrack->begin);
754	break;
755	}
756
757	return false;
758	}
759
760	void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
761	{
762	if (term.capture()) {
763	unsigned subpatternId = term.atom.subpatternId;
764	output[(subpatternId << `1`)] = context->getDisjunctionContext(term)->matchBegin - term.inputPosition;
765	output[(subpatternId << `1`) + `1`] = context->getDisjunctionContext(term)->matchEnd - term.inputPosition;
766	}
767	}
768	void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
769	{
770	unsigned firstSubpatternId = term.atom.subpatternId;
771	unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
772	context->restoreOutput(output, firstSubpatternId, count);
773	}
774	JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
775	{
776	while (backTrack->matchAmount) {
777	ParenthesesDisjunctionContext* context = backTrack->lastContext;
778
779	JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
780	if (result == JSRegExpMatch)
781	return JSRegExpMatch;
782
783	resetMatches(term, context);
784	popParenthesesDisjunctionContext(backTrack);
785	freeParenthesesDisjunctionContext(context);
786
787	if (result != JSRegExpNoMatch)
788	return result;
789	}
790
791	return JSRegExpNoMatch;
792	}
793
794	bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
795	{
796	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
797	ASSERT(term.atom.quantityMaxCount == `1`);
798
799	BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
800
801	switch (term.atom.quantityType) {
802	case QuantifierGreedy: {
803	// set this speculatively; if we get to the parens end this will be true.
804	backTrack->begin = input.getPos();
805	break;
806	}
807	case QuantifierNonGreedy: {
808	backTrack->begin = notFound;
809	context->term += term.atom.parenthesesWidth;
810	return true;
811	}
812	case QuantifierFixedCount:
813	break;
814	}
815
816	if (term.capture()) {
817	unsigned subpatternId = term.atom.subpatternId;
818	output[(subpatternId << `1`)] = input.getPos() - term.inputPosition;
819	}
820
821	return true;
822	}
823
824	bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
825	{
826	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
827	ASSERT(term.atom.quantityMaxCount == `1`);
828
829	if (term.capture()) {
830	unsigned subpatternId = term.atom.subpatternId;
831	output[(subpatternId << `1`) + `1`] = input.getPos() - term.inputPosition;
832	}
833
834	if (term.atom.quantityType == QuantifierFixedCount)
835	return true;
836
837	BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
838	return backTrack->begin != input.getPos();
839	}
840
841	bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
842	{
843	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
844	ASSERT(term.atom.quantityMaxCount == `1`);
845
846	BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
847
848	if (term.capture()) {
849	unsigned subpatternId = term.atom.subpatternId;
850	output[(subpatternId << `1`)] = offsetNoMatch;
851	output[(subpatternId << `1`) + `1`] = offsetNoMatch;
852	}
853
854	switch (term.atom.quantityType) {
855	case QuantifierGreedy:
856	// if we backtrack to this point, there is another chance - try matching nothing.
857	ASSERT(backTrack->begin != notFound);
858	backTrack->begin = notFound;
859	context->term += term.atom.parenthesesWidth;
860	return true;
861	case QuantifierNonGreedy:
862	ASSERT(backTrack->begin != notFound);
863	FALLTHROUGH;
864	case QuantifierFixedCount:
865	break;
866	}
867
868	return false;
869	}
870
871	bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
872	{
873	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
874	ASSERT(term.atom.quantityMaxCount == `1`);
875
876	BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
877
878	switch (term.atom.quantityType) {
879	case QuantifierGreedy:
880	if (backTrack->begin == notFound) {
881	context->term -= term.atom.parenthesesWidth;
882	return false;
883	}
884	FALLTHROUGH;
885	case QuantifierNonGreedy:
886	if (backTrack->begin == notFound) {
887	backTrack->begin = input.getPos();
888	if (term.capture()) {
889	// Technically this access to inputPosition should be accessing the begin term's
890	// inputPosition, but for repeats other than fixed these values should be
891	// the same anyway! (We don't pre-check for greedy or non-greedy matches.)
892	ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
893	ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
894	unsigned subpatternId = term.atom.subpatternId;
895	output[subpatternId << `1`] = input.getPos() - term.inputPosition;
896	}
897	context->term -= term.atom.parenthesesWidth;
898	return true;
899	}
900	FALLTHROUGH;
901	case QuantifierFixedCount:
902	break;
903	}
904
905	return false;
906	}
907
908	bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
909	{
910	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
911	ASSERT(term.atom.quantityType == QuantifierGreedy);
912	ASSERT(term.atom.quantityMaxCount == quantifyInfinite);
913	ASSERT(!term.capture());
914
915	BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
916	backTrack->begin = input.getPos();
917	return true;
918	}
919
920	bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
921	{
922	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
923
924	BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
925	// Empty match is a failed match.
926	if (backTrack->begin == input.getPos())
927	return false;
928
929	// Successful match! Okay, what's next? - loop around and try to match more!
930	context->term -= (term.atom.parenthesesWidth + `1`);
931	return true;
932	}
933
934	bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
935	{
936	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
937	ASSERT(term.atom.quantityType == QuantifierGreedy);
938	ASSERT(term.atom.quantityMaxCount == quantifyInfinite);
939	ASSERT(!term.capture());
940
941	// If we backtrack to this point, we have failed to match this iteration of the parens.
942	// Since this is greedy / zero minimum a failed is also accepted as a match!
943	context->term += term.atom.parenthesesWidth;
944	return true;
945	}
946
947	bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
948	{
949	// 'Terminal' parentheses are at the end of the regex, and as such a match past end
950	// should always be returned as a successful match - we should never backtrack to here.
951	RELEASE_ASSERT_NOT_REACHED();
952	return false;
953	}
954
955	bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
956	{
957	ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
958	ASSERT(term.atom.quantityMaxCount == `1`);
959
960	BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
961
962	backTrack->begin = input.getPos();
963	return true;
964	}
965
966	bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
967	{
968	ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
969	ASSERT(term.atom.quantityMaxCount == `1`);
970
971	BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
972
973	input.setPos(backTrack->begin);
974
975	// We've reached the end of the parens; if they are inverted, this is failure.
976	if (term.invert()) {
977	context->term -= term.atom.parenthesesWidth;
978	return false;
979	}
980
981	return true;
982	}
983
984	bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
985	{
986	ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
987	ASSERT(term.atom.quantityMaxCount == `1`);
988
989	// We've failed to match parens; if they are inverted, this is win!
990	if (term.invert()) {
991	context->term += term.atom.parenthesesWidth;
992	return true;
993	}
994
995	return false;
996	}
997
998	bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
999	{
1000	ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
1001	ASSERT(term.atom.quantityMaxCount == `1`);
1002
1003	BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
1004
1005	input.setPos(backTrack->begin);
1006
1007	context->term -= term.atom.parenthesesWidth;
1008	return false;
1009	}
1010
1011	JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
1012	{
1013	ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
1014
1015	BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
1016	ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
1017
1018	backTrack->matchAmount = `0`;
1019	backTrack->lastContext = `0`;
1020
1021	ASSERT(term.atom.quantityType != QuantifierFixedCount \|\| term.atom.quantityMinCount == term.atom.quantityMaxCount);
1022
1023	unsigned minimumMatchCount = term.atom.quantityMinCount;
1024	JSRegExpResult fixedMatchResult;
1025
1026	// Handle fixed matches and the minimum part of a variable length match.
1027	if (minimumMatchCount) {
1028	// While we haven't yet reached our fixed limit,
1029	while (backTrack->matchAmount < minimumMatchCount) {
1030	// Try to do a match, and it it succeeds, add it to the list.
1031	ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1032	fixedMatchResult = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1033	if (fixedMatchResult == JSRegExpMatch)
1034	appendParenthesesDisjunctionContext(backTrack, context);
1035	else {
1036	// The match failed; try to find an alternate point to carry on from.
1037	resetMatches(term, context);
1038	freeParenthesesDisjunctionContext(context);
1039
1040	if (fixedMatchResult != JSRegExpNoMatch)
1041	return fixedMatchResult;
1042	JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
1043	if (backtrackResult != JSRegExpMatch)
1044	return backtrackResult;
1045	}
1046	}
1047
1048	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1049	recordParenthesesMatch(term, context);
1050	}
1051
1052	switch (term.atom.quantityType) {
1053	case QuantifierFixedCount: {
1054	ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1055	return JSRegExpMatch;
1056	}
1057
1058	case QuantifierGreedy: {
1059	while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1060	ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1061	JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1062	if (result == JSRegExpMatch)
1063	appendParenthesesDisjunctionContext(backTrack, context);
1064	else {
1065	resetMatches(term, context);
1066	freeParenthesesDisjunctionContext(context);
1067
1068	if (result != JSRegExpNoMatch)
1069	return result;
1070
1071	break;
1072	}
1073	}
1074
1075	if (backTrack->matchAmount) {
1076	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1077	recordParenthesesMatch(term, context);
1078	}
1079	return JSRegExpMatch;
1080	}
1081
1082	case QuantifierNonGreedy:
1083	return JSRegExpMatch;
1084	}
1085
1086	RELEASE_ASSERT_NOT_REACHED();
1087	return JSRegExpErrorNoMatch;
1088	}
1089
1090	// Rules for backtracking differ depending on whether this is greedy or non-greedy.
1091	//
1092	// Greedy matches never should try just adding more - you should already have done
1093	// the 'more' cases. Always backtrack, at least a leetle bit. However cases where
1094	// you backtrack an item off the list needs checking, since we'll never have matched
1095	// the one less case. Tracking forwards, still add as much as possible.
1096	//
1097	// Non-greedy, we've already done the one less case, so don't match on popping.
1098	// We haven't done the one more case, so always try to add that.
1099	//
1100	JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
1101	{
1102	ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
1103
1104	BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
1105	ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
1106
1107	switch (term.atom.quantityType) {
1108	case QuantifierFixedCount: {
1109	ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1110
1111	ParenthesesDisjunctionContext* context = `0`;
1112	JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
1113
1114	if (result != JSRegExpMatch)
1115	return result;
1116
1117	// While we haven't yet reached our fixed limit,
1118	while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1119	// Try to do a match, and it it succeeds, add it to the list.
1120	context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1121	result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1122
1123	if (result == JSRegExpMatch)
1124	appendParenthesesDisjunctionContext(backTrack, context);
1125	else {
1126	// The match failed; try to find an alternate point to carry on from.
1127	resetMatches(term, context);
1128	freeParenthesesDisjunctionContext(context);
1129
1130	if (result != JSRegExpNoMatch)
1131	return result;
1132	JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
1133	if (backtrackResult != JSRegExpMatch)
1134	return backtrackResult;
1135	}
1136	}
1137
1138	ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1139	context = backTrack->lastContext;
1140	recordParenthesesMatch(term, context);
1141	return JSRegExpMatch;
1142	}
1143
1144	case QuantifierGreedy: {
1145	if (!backTrack->matchAmount)
1146	return JSRegExpNoMatch;
1147
1148	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1149	JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1150	if (result == JSRegExpMatch) {
1151	while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1152	ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1153	JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1154	if (parenthesesResult == JSRegExpMatch)
1155	appendParenthesesDisjunctionContext(backTrack, context);
1156	else {
1157	resetMatches(term, context);
1158	freeParenthesesDisjunctionContext(context);
1159
1160	if (parenthesesResult != JSRegExpNoMatch)
1161	return parenthesesResult;
1162
1163	break;
1164	}
1165	}
1166	} else {
1167	resetMatches(term, context);
1168	popParenthesesDisjunctionContext(backTrack);
1169	freeParenthesesDisjunctionContext(context);
1170
1171	if (result != JSRegExpNoMatch \|\| backTrack->matchAmount < term.atom.quantityMinCount)
1172	return result;
1173	}
1174
1175	if (backTrack->matchAmount) {
1176	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1177	recordParenthesesMatch(term, context);
1178	}
1179	return JSRegExpMatch;
1180	}
1181
1182	case QuantifierNonGreedy: {
1183	// If we've not reached the limit, try to add one more match.
1184	if (backTrack->matchAmount < term.atom.quantityMaxCount) {
1185	ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1186	JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1187	if (result == JSRegExpMatch) {
1188	appendParenthesesDisjunctionContext(backTrack, context);
1189	recordParenthesesMatch(term, context);
1190	return JSRegExpMatch;
1191	}
1192
1193	resetMatches(term, context);
1194	freeParenthesesDisjunctionContext(context);
1195
1196	if (result != JSRegExpNoMatch)
1197	return result;
1198	}
1199
1200	// Nope - okay backtrack looking for an alternative.
1201	while (backTrack->matchAmount) {
1202	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1203	JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1204	if (result == JSRegExpMatch) {
1205	// successful backtrack! we're back in the game!
1206	if (backTrack->matchAmount) {
1207	context = backTrack->lastContext;
1208	recordParenthesesMatch(term, context);
1209	}
1210	return JSRegExpMatch;
1211	}
1212
1213	// pop a match off the stack
1214	resetMatches(term, context);
1215	popParenthesesDisjunctionContext(backTrack);
1216	freeParenthesesDisjunctionContext(context);
1217
1218	if (result != JSRegExpNoMatch)
1219	return result;
1220	}
1221
1222	return JSRegExpNoMatch;
1223	}
1224	}
1225
1226	RELEASE_ASSERT_NOT_REACHED();
1227	return JSRegExpErrorNoMatch;
1228	}
1229
1230	bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
1231	{
1232	UNUSED_PARAM(term);
1233
1234	if (pattern->dotAll()) {
1235	context->matchBegin = startOffset;
1236	context->matchEnd = input.end();
1237	return true;
1238	}
1239
1240	unsigned matchBegin = context->matchBegin;
1241
1242	if (matchBegin > startOffset) {
1243	for (matchBegin--; true; matchBegin--) {
1244	if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
1245	++matchBegin;
1246	break;
1247	}
1248
1249	if (matchBegin == startOffset)
1250	break;
1251	}
1252	}
1253
1254	unsigned matchEnd = input.getPos();
1255
1256	for (; (matchEnd != input.end())
1257	&& (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
1258
1259	if (((matchBegin && term.anchors.m_bol)
1260	\|\| ((matchEnd != input.end()) && term.anchors.m_eol))
1261	&& !pattern->multiline())
1262	return false;
1263
1264	context->matchBegin = matchBegin;
1265	context->matchEnd = matchEnd;
1266	return true;
1267	}
1268
1269	#define MATCH_NEXT() { ++context->term; goto matchAgain; }
1270	#define BACKTRACK() { --context->term; goto backtrack; }
1271	#define currentTerm() (disjunction->terms[context->term])
1272	JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1273	{
1274	if (!--remainingMatchCount)
1275	return JSRegExpErrorHitLimit;
1276
1277	if (btrack)
1278	BACKTRACK();
1279
1280	context->matchBegin = input.getPos();
1281	context->term = `0`;
1282
1283	matchAgain:
1284	ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1285
1286	switch (currentTerm().type) {
1287	case ByteTerm::TypeSubpatternBegin:
1288	MATCH_NEXT();
1289	case ByteTerm::TypeSubpatternEnd:
1290	context->matchEnd = input.getPos();
1291	return JSRegExpMatch;
1292
1293	case ByteTerm::TypeBodyAlternativeBegin:
1294	MATCH_NEXT();
1295	case ByteTerm::TypeBodyAlternativeDisjunction:
1296	case ByteTerm::TypeBodyAlternativeEnd:
1297	context->matchEnd = input.getPos();
1298	return JSRegExpMatch;
1299
1300	case ByteTerm::TypeAlternativeBegin:
1301	MATCH_NEXT();
1302	case ByteTerm::TypeAlternativeDisjunction:
1303	case ByteTerm::TypeAlternativeEnd: {
1304	int offset = currentTerm().alternative.end;
1305	BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1306	backTrack->offset = offset;
1307	context->term += offset;
1308	MATCH_NEXT();
1309	}
1310
1311	case ByteTerm::TypeAssertionBOL:
1312	if (matchAssertionBOL(currentTerm()))
1313	MATCH_NEXT();
1314	BACKTRACK();
1315	case ByteTerm::TypeAssertionEOL:
1316	if (matchAssertionEOL(currentTerm()))
1317	MATCH_NEXT();
1318	BACKTRACK();
1319	case ByteTerm::TypeAssertionWordBoundary:
1320	if (matchAssertionWordBoundary(currentTerm()))
1321	MATCH_NEXT();
1322	BACKTRACK();
1323
1324	case ByteTerm::TypePatternCharacterOnce:
1325	case ByteTerm::TypePatternCharacterFixed: {
1326	if (unicode) {
1327	if (!U_IS_BMP(currentTerm().atom.patternCharacter)) {
1328	for (unsigned matchAmount = `0`; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1329	if (!checkSurrogatePair(currentTerm().atom.patternCharacter, currentTerm().inputPosition - `2` * matchAmount)) {
1330	BACKTRACK();
1331	}
1332	}
1333	MATCH_NEXT();
1334	}
1335	}
1336	unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1337
1338	for (unsigned matchAmount = `0`; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1339	if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) {
1340	input.setPos(position);
1341	BACKTRACK();
1342	}
1343	}
1344	MATCH_NEXT();
1345	}
1346	case ByteTerm::TypePatternCharacterGreedy: {
1347	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1348	unsigned matchAmount = `0`;
1349	unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1350	while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(`1`)) {
1351	if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + `1`)) {
1352	input.setPos(position);
1353	break;
1354	}
1355	++matchAmount;
1356	position = input.getPos();
1357	}
1358	backTrack->matchAmount = matchAmount;
1359
1360	MATCH_NEXT();
1361	}
1362	case ByteTerm::TypePatternCharacterNonGreedy: {
1363	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1364	backTrack->begin = input.getPos();
1365	backTrack->matchAmount = `0`;
1366	MATCH_NEXT();
1367	}
1368
1369	case ByteTerm::TypePatternCasedCharacterOnce:
1370	case ByteTerm::TypePatternCasedCharacterFixed: {
1371	if (unicode) {
1372	// Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1373	ASSERT(U_IS_BMP(currentTerm().atom.patternCharacter));
1374
1375	unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1376
1377	for (unsigned matchAmount = `0`; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1378	if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) {
1379	input.setPos(position);
1380	BACKTRACK();
1381	}
1382	}
1383	MATCH_NEXT();
1384	}
1385
1386	for (unsigned matchAmount = `0`; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1387	if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
1388	BACKTRACK();
1389	}
1390	MATCH_NEXT();
1391	}
1392	case ByteTerm::TypePatternCasedCharacterGreedy: {
1393	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1394
1395	// Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1396	ASSERT(!unicode \|\| U_IS_BMP(currentTerm().atom.patternCharacter));
1397
1398	unsigned matchAmount = `0`;
1399	while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(`1`)) {
1400	if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + `1`)) {
1401	input.uncheckInput(`1`);
1402	break;
1403	}
1404	++matchAmount;
1405	}
1406	backTrack->matchAmount = matchAmount;
1407
1408	MATCH_NEXT();
1409	}
1410	case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1411	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1412
1413	// Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1414	ASSERT(!unicode \|\| U_IS_BMP(currentTerm().atom.patternCharacter));
1415
1416	backTrack->matchAmount = `0`;
1417	MATCH_NEXT();
1418	}
1419
1420	case ByteTerm::TypeCharacterClass:
1421	if (matchCharacterClass(currentTerm(), context))
1422	MATCH_NEXT();
1423	BACKTRACK();
1424	case ByteTerm::TypeBackReference:
1425	if (matchBackReference(currentTerm(), context))
1426	MATCH_NEXT();
1427	BACKTRACK();
1428	case ByteTerm::TypeParenthesesSubpattern: {
1429	JSRegExpResult result = matchParentheses(currentTerm(), context);
1430
1431	if (result == JSRegExpMatch) {
1432	MATCH_NEXT();
1433	} else if (result != JSRegExpNoMatch)
1434	return result;
1435
1436	BACKTRACK();
1437	}
1438	case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1439	if (matchParenthesesOnceBegin(currentTerm(), context))
1440	MATCH_NEXT();
1441	BACKTRACK();
1442	case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1443	if (matchParenthesesOnceEnd(currentTerm(), context))
1444	MATCH_NEXT();
1445	BACKTRACK();
1446	case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1447	if (matchParenthesesTerminalBegin(currentTerm(), context))
1448	MATCH_NEXT();
1449	BACKTRACK();
1450	case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1451	if (matchParenthesesTerminalEnd(currentTerm(), context))
1452	MATCH_NEXT();
1453	BACKTRACK();
1454	case ByteTerm::TypeParentheticalAssertionBegin:
1455	if (matchParentheticalAssertionBegin(currentTerm(), context))
1456	MATCH_NEXT();
1457	BACKTRACK();
1458	case ByteTerm::TypeParentheticalAssertionEnd:
1459	if (matchParentheticalAssertionEnd(currentTerm(), context))
1460	MATCH_NEXT();
1461	BACKTRACK();
1462
1463	case ByteTerm::TypeCheckInput:
1464	if (input.checkInput(currentTerm().checkInputCount))
1465	MATCH_NEXT();
1466	BACKTRACK();
1467
1468	case ByteTerm::TypeUncheckInput:
1469	input.uncheckInput(currentTerm().checkInputCount);
1470	MATCH_NEXT();
1471
1472	case ByteTerm::TypeDotStarEnclosure:
1473	if (matchDotStarEnclosure(currentTerm(), context))
1474	return JSRegExpMatch;
1475	BACKTRACK();
1476	}
1477
1478	// We should never fall-through to here.
1479	RELEASE_ASSERT_NOT_REACHED();
1480
1481	backtrack:
1482	ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1483
1484	switch (currentTerm().type) {
1485	case ByteTerm::TypeSubpatternBegin:
1486	return JSRegExpNoMatch;
1487	case ByteTerm::TypeSubpatternEnd:
1488	RELEASE_ASSERT_NOT_REACHED();
1489
1490	case ByteTerm::TypeBodyAlternativeBegin:
1491	case ByteTerm::TypeBodyAlternativeDisjunction: {
1492	int offset = currentTerm().alternative.next;
1493	context->term += offset;
1494	if (offset > `0`)
1495	MATCH_NEXT();
1496
1497	if (input.atEnd() \|\| pattern->sticky())
1498	return JSRegExpNoMatch;
1499
1500	input.next();
1501
1502	context->matchBegin = input.getPos();
1503
1504	if (currentTerm().alternative.onceThrough)
1505	context->term += currentTerm().alternative.next;
1506
1507	MATCH_NEXT();
1508	}
1509	case ByteTerm::TypeBodyAlternativeEnd:
1510	RELEASE_ASSERT_NOT_REACHED();
1511
1512	case ByteTerm::TypeAlternativeBegin:
1513	case ByteTerm::TypeAlternativeDisjunction: {
1514	int offset = currentTerm().alternative.next;
1515	context->term += offset;
1516	if (offset > `0`)
1517	MATCH_NEXT();
1518	BACKTRACK();
1519	}
1520	case ByteTerm::TypeAlternativeEnd: {
1521	// We should never backtrack back into an alternative of the main body of the regex.
1522	BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1523	unsigned offset = backTrack->offset;
1524	context->term -= offset;
1525	BACKTRACK();
1526	}
1527
1528	case ByteTerm::TypeAssertionBOL:
1529	case ByteTerm::TypeAssertionEOL:
1530	case ByteTerm::TypeAssertionWordBoundary:
1531	BACKTRACK();
1532
1533	case ByteTerm::TypePatternCharacterOnce:
1534	case ByteTerm::TypePatternCharacterFixed:
1535	case ByteTerm::TypePatternCharacterGreedy:
1536	case ByteTerm::TypePatternCharacterNonGreedy:
1537	if (backtrackPatternCharacter(currentTerm(), context))
1538	MATCH_NEXT();
1539	BACKTRACK();
1540	case ByteTerm::TypePatternCasedCharacterOnce:
1541	case ByteTerm::TypePatternCasedCharacterFixed:
1542	case ByteTerm::TypePatternCasedCharacterGreedy:
1543	case ByteTerm::TypePatternCasedCharacterNonGreedy:
1544	if (backtrackPatternCasedCharacter(currentTerm(), context))
1545	MATCH_NEXT();
1546	BACKTRACK();
1547	case ByteTerm::TypeCharacterClass:
1548	if (backtrackCharacterClass(currentTerm(), context))
1549	MATCH_NEXT();
1550	BACKTRACK();
1551	case ByteTerm::TypeBackReference:
1552	if (backtrackBackReference(currentTerm(), context))
1553	MATCH_NEXT();
1554	BACKTRACK();
1555	case ByteTerm::TypeParenthesesSubpattern: {
1556	JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1557
1558	if (result == JSRegExpMatch) {
1559	MATCH_NEXT();
1560	} else if (result != JSRegExpNoMatch)
1561	return result;
1562
1563	BACKTRACK();
1564	}
1565	case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1566	if (backtrackParenthesesOnceBegin(currentTerm(), context))
1567	MATCH_NEXT();
1568	BACKTRACK();
1569	case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1570	if (backtrackParenthesesOnceEnd(currentTerm(), context))
1571	MATCH_NEXT();
1572	BACKTRACK();
1573	case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1574	if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1575	MATCH_NEXT();
1576	BACKTRACK();
1577	case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1578	if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1579	MATCH_NEXT();
1580	BACKTRACK();
1581	case ByteTerm::TypeParentheticalAssertionBegin:
1582	if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1583	MATCH_NEXT();
1584	BACKTRACK();
1585	case ByteTerm::TypeParentheticalAssertionEnd:
1586	if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1587	MATCH_NEXT();
1588	BACKTRACK();
1589
1590	case ByteTerm::TypeCheckInput:
1591	input.uncheckInput(currentTerm().checkInputCount);
1592	BACKTRACK();
1593
1594	case ByteTerm::TypeUncheckInput:
1595	input.checkInput(currentTerm().checkInputCount);
1596	BACKTRACK();
1597
1598	case ByteTerm::TypeDotStarEnclosure:
1599	RELEASE_ASSERT_NOT_REACHED();
1600	}
1601
1602	RELEASE_ASSERT_NOT_REACHED();
1603	return JSRegExpErrorNoMatch;
1604	}
1605
1606	JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1607	{
1608	JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1609
1610	if (result == JSRegExpMatch) {
1611	while (context->matchBegin == context->matchEnd) {
1612	result = matchDisjunction(disjunction, context, true);
1613	if (result != JSRegExpMatch)
1614	return result;
1615	}
1616	return JSRegExpMatch;
1617	}
1618
1619	return result;
1620	}
1621
1622	unsigned interpret()
1623	{
1624	// FIXME: https://bugs.webkit.org/show_bug.cgi?id=195970
1625	// [Yarr Interpreter] The interpreter doesn't have checks for stack overflow due to deep recursion
1626	if (!input.isAvailableInput(`0`))
1627	return offsetNoMatch;
1628
1629	if (pattern->m_lock)
1630	pattern->m_lock->lock();
1631
1632	for (unsigned i = `0`; i < pattern->m_body ->m_numSubpatterns + `1`; ++i)
1633	output[i << `1`] = offsetNoMatch;
1634
1635	allocatorPool = pattern->m_allocator->startAllocator();
1636	RELEASE_ASSERT(allocatorPool);
1637
1638	DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1639
1640	JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
1641	if (result == JSRegExpMatch) {
1642	output[`0`] = context->matchBegin;
1643	output[`1`] = context->matchEnd;
1644	}
1645
1646	freeDisjunctionContext(context);
1647
1648	pattern->m_allocator->stopAllocator();
1649
1650	ASSERT((result == JSRegExpMatch) == (output[`0`] != offsetNoMatch));
1651
1652	if (pattern->m_lock)
1653	pattern->m_lock->unlock();
1654
1655	return output[`0`];
1656	}
1657
1658	Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
1659	: pattern(pattern)
1660	, unicode(pattern->unicode())
1661	, output(output)
1662	, input(input, start, length, pattern->unicode())
1663	, startOffset(start)
1664	, remainingMatchCount(matchLimit)
1665	{
1666	}
1667
1668	private:
1669	BytecodePattern* pattern;
1670	bool unicode;
1671	unsigned* output;
1672	InputStream input;
1673	WTF::BumpPointerPool* allocatorPool { nullptr };
1674	unsigned startOffset;
1675	unsigned remainingMatchCount;
1676	};
1677
1678	class ByteCompiler {
1679	struct ParenthesesStackEntry {
1680	unsigned beginTerm;
1681	unsigned savedAlternativeIndex;
1682	ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/, unsigned subpatternId, bool capture = false/)
1683	: beginTerm(beginTerm)
1684	, savedAlternativeIndex(savedAlternativeIndex)
1685	{
1686	}
1687	};
1688
1689	public:
1690	ByteCompiler(YarrPattern& pattern)
1691	: m_pattern(pattern)
1692	{
1693	m_currentAlternativeIndex = `0`;
1694	}
1695
1696	std::unique_ptr<BytecodePattern> compile(BumpPointerAllocator* allocator, ConcurrentJSLock* lock)
1697	{
1698	regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives [`0`]->onceThrough());
1699	emitDisjunction(m_pattern.m_body);
1700	regexEnd();
1701
1702	#ifndef NDEBUG
1703	if (Options::dumpCompiledRegExpPatterns())
1704	dumpDisjunction(m_bodyDisjunction.get());
1705	#endif
1706
1707	return std::make_unique<BytecodePattern>(WTFMove(m_bodyDisjunction), m_allParenthesesInfo, m_pattern, allocator, lock);
1708	}
1709
1710	void checkInput(unsigned count)
1711	{
1712	m_bodyDisjunction ->terms.append(ByteTerm::CheckInput(count));
1713	}
1714
1715	void uncheckInput(unsigned count)
1716	{
1717	m_bodyDisjunction ->terms.append(ByteTerm::UncheckInput(count));
1718	}
1719
1720	void assertionBOL(unsigned inputPosition)
1721	{
1722	m_bodyDisjunction ->terms.append(ByteTerm::BOL(inputPosition));
1723	}
1724
1725	void assertionEOL(unsigned inputPosition)
1726	{
1727	m_bodyDisjunction ->terms.append(ByteTerm::EOL(inputPosition));
1728	}
1729
1730	void assertionWordBoundary(bool invert, unsigned inputPosition)
1731	{
1732	m_bodyDisjunction ->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
1733	}
1734
1735	void atomPatternCharacter(UChar32 ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1736	{
1737	if (m_pattern.ignoreCase()) {
1738	UChar32 lo = u_tolower(ch);
1739	UChar32 hi = u_toupper(ch);
1740
1741	if (lo != hi) {
1742	m_bodyDisjunction ->terms.append(ByteTerm (lo, hi, inputPosition, frameLocation, quantityMaxCount, quantityType));
1743	return;
1744	}
1745	}
1746
1747	m_bodyDisjunction ->terms.append(ByteTerm (ch, inputPosition, frameLocation, quantityMaxCount, quantityType));
1748	}
1749
1750	void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1751	{
1752	m_bodyDisjunction ->terms.append(ByteTerm (characterClass, invert, inputPosition));
1753
1754	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1755	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].atom.quantityType = quantityType;
1756	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = frameLocation;
1757	}
1758
1759	void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1760	{
1761	ASSERT(subpatternId);
1762
1763	m_bodyDisjunction ->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
1764
1765	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1766	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].atom.quantityType = quantityType;
1767	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = frameLocation;
1768	}
1769
1770	void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1771	{
1772	int beginTerm = m_bodyDisjunction ->terms.size();
1773
1774	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1775	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = frameLocation;
1776	m_bodyDisjunction ->terms.append(ByteTerm::AlternativeBegin());
1777	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = alternativeFrameLocation;
1778
1779	m_parenthesesStack.append(ParenthesesStackEntry (beginTerm, m_currentAlternativeIndex));
1780	m_currentAlternativeIndex = beginTerm + `1`;
1781	}
1782
1783	void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1784	{
1785	int beginTerm = m_bodyDisjunction ->terms.size();
1786
1787	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1788	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = frameLocation;
1789	m_bodyDisjunction ->terms.append(ByteTerm::AlternativeBegin());
1790	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = alternativeFrameLocation;
1791
1792	m_parenthesesStack.append(ParenthesesStackEntry (beginTerm, m_currentAlternativeIndex));
1793	m_currentAlternativeIndex = beginTerm + `1`;
1794	}
1795
1796	void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1797	{
1798	// Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1799	// then fix this up at the end! - simplifying this should make it much clearer.
1800	// https://bugs.webkit.org/show_bug.cgi?id=50136
1801
1802	int beginTerm = m_bodyDisjunction ->terms.size();
1803
1804	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1805	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = frameLocation;
1806	m_bodyDisjunction ->terms.append(ByteTerm::AlternativeBegin());
1807	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = alternativeFrameLocation;
1808
1809	m_parenthesesStack.append(ParenthesesStackEntry (beginTerm, m_currentAlternativeIndex));
1810	m_currentAlternativeIndex = beginTerm + `1`;
1811	}
1812
1813	void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1814	{
1815	int beginTerm = m_bodyDisjunction ->terms.size();
1816
1817	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, `0`));
1818	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = frameLocation;
1819	m_bodyDisjunction ->terms.append(ByteTerm::AlternativeBegin());
1820	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = alternativeFrameLocation;
1821
1822	m_parenthesesStack.append(ParenthesesStackEntry (beginTerm, m_currentAlternativeIndex));
1823	m_currentAlternativeIndex = beginTerm + `1`;
1824	}
1825
1826	void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1827	{
1828	unsigned beginTerm = popParenthesesStack();
1829	closeAlternative(beginTerm + `1`);
1830	unsigned endTerm = m_bodyDisjunction ->terms.size();
1831
1832	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1833
1834	bool invert = m_bodyDisjunction ->terms [beginTerm].invert();
1835	unsigned subpatternId = m_bodyDisjunction ->terms [beginTerm].atom.subpatternId;
1836
1837	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1838	m_bodyDisjunction ->terms [beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1839	m_bodyDisjunction ->terms [endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1840	m_bodyDisjunction ->terms [endTerm].frameLocation = frameLocation;
1841
1842	m_bodyDisjunction ->terms [beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1843	m_bodyDisjunction ->terms [beginTerm].atom.quantityType = quantityType;
1844	m_bodyDisjunction ->terms [endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1845	m_bodyDisjunction ->terms [endTerm].atom.quantityType = quantityType;
1846	}
1847
1848	void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
1849	{
1850	m_bodyDisjunction ->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
1851	}
1852
1853	unsigned popParenthesesStack()
1854	{
1855	ASSERT(m_parenthesesStack.size());
1856	int stackEnd = m_parenthesesStack.size() - `1`;
1857	unsigned beginTerm = m_parenthesesStack [stackEnd].beginTerm;
1858	m_currentAlternativeIndex = m_parenthesesStack [stackEnd].savedAlternativeIndex;
1859	m_parenthesesStack.shrink(stackEnd);
1860
1861	ASSERT(beginTerm < m_bodyDisjunction ->terms.size());
1862	ASSERT(m_currentAlternativeIndex < m_bodyDisjunction ->terms.size());
1863
1864	return beginTerm;
1865	}
1866
1867	void closeAlternative(int beginTerm)
1868	{
1869	int origBeginTerm = beginTerm;
1870	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1871	int endIndex = m_bodyDisjunction ->terms.size();
1872
1873	unsigned frameLocation = m_bodyDisjunction ->terms [beginTerm].frameLocation;
1874
1875	if (!m_bodyDisjunction ->terms [beginTerm].alternative.next)
1876	m_bodyDisjunction ->terms.remove(beginTerm);
1877	else {
1878	while (m_bodyDisjunction ->terms [beginTerm].alternative.next) {
1879	beginTerm += m_bodyDisjunction ->terms [beginTerm].alternative.next;
1880	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1881	m_bodyDisjunction ->terms [beginTerm].alternative.end = endIndex - beginTerm;
1882	m_bodyDisjunction ->terms [beginTerm].frameLocation = frameLocation;
1883	}
1884
1885	m_bodyDisjunction ->terms [beginTerm].alternative.next = origBeginTerm - beginTerm;
1886
1887	m_bodyDisjunction ->terms.append(ByteTerm::AlternativeEnd());
1888	m_bodyDisjunction ->terms [endIndex].frameLocation = frameLocation;
1889	}
1890	}
1891
1892	void closeBodyAlternative()
1893	{
1894	int beginTerm = `0`;
1895	int origBeginTerm = `0`;
1896	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1897	int endIndex = m_bodyDisjunction ->terms.size();
1898
1899	unsigned frameLocation = m_bodyDisjunction ->terms [beginTerm].frameLocation;
1900
1901	while (m_bodyDisjunction ->terms [beginTerm].alternative.next) {
1902	beginTerm += m_bodyDisjunction ->terms [beginTerm].alternative.next;
1903	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1904	m_bodyDisjunction ->terms [beginTerm].alternative.end = endIndex - beginTerm;
1905	m_bodyDisjunction ->terms [beginTerm].frameLocation = frameLocation;
1906	}
1907
1908	m_bodyDisjunction ->terms [beginTerm].alternative.next = origBeginTerm - beginTerm;
1909
1910	m_bodyDisjunction ->terms.append(ByteTerm::BodyAlternativeEnd());
1911	m_bodyDisjunction ->terms [endIndex].frameLocation = frameLocation;
1912	}
1913
1914	void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType, unsigned callFrameSize = `0`)
1915	{
1916	unsigned beginTerm = popParenthesesStack();
1917	closeAlternative(beginTerm + `1`);
1918	unsigned endTerm = m_bodyDisjunction ->terms.size();
1919
1920	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1921
1922	ByteTerm& parenthesesBegin = m_bodyDisjunction ->terms [beginTerm];
1923
1924	bool capture = parenthesesBegin.capture();
1925	unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1926
1927	unsigned numSubpatterns = lastSubpatternId - subpatternId + `1`;
1928	auto parenthesesDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize);
1929
1930	unsigned firstTermInParentheses = beginTerm + `1`;
1931	parenthesesDisjunction ->terms.reserveInitialCapacity(endTerm - firstTermInParentheses + `2`);
1932
1933	parenthesesDisjunction ->terms.append(ByteTerm::SubpatternBegin());
1934	for (unsigned termInParentheses = firstTermInParentheses; termInParentheses < endTerm; ++termInParentheses)
1935	parenthesesDisjunction ->terms.append(m_bodyDisjunction ->terms [termInParentheses]);
1936	parenthesesDisjunction ->terms.append(ByteTerm::SubpatternEnd());
1937
1938	m_bodyDisjunction ->terms.shrink(beginTerm);
1939
1940	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition));
1941	m_allParenthesesInfo.append(WTFMove(parenthesesDisjunction));
1942
1943	m_bodyDisjunction ->terms [beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1944	m_bodyDisjunction ->terms [beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1945	m_bodyDisjunction ->terms [beginTerm].atom.quantityType = quantityType;
1946	m_bodyDisjunction ->terms [beginTerm].frameLocation = frameLocation;
1947	}
1948
1949	void atomParenthesesOnceEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1950	{
1951	unsigned beginTerm = popParenthesesStack();
1952	closeAlternative(beginTerm + `1`);
1953	unsigned endTerm = m_bodyDisjunction ->terms.size();
1954
1955	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1956
1957	bool capture = m_bodyDisjunction ->terms [beginTerm].capture();
1958	unsigned subpatternId = m_bodyDisjunction ->terms [beginTerm].atom.subpatternId;
1959
1960	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
1961	m_bodyDisjunction ->terms [beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1962	m_bodyDisjunction ->terms [endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1963	m_bodyDisjunction ->terms [endTerm].frameLocation = frameLocation;
1964
1965	m_bodyDisjunction ->terms [beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1966	m_bodyDisjunction ->terms [beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1967	m_bodyDisjunction ->terms [beginTerm].atom.quantityType = quantityType;
1968	m_bodyDisjunction ->terms [endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1969	m_bodyDisjunction ->terms [endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1970	m_bodyDisjunction ->terms [endTerm].atom.quantityType = quantityType;
1971	}
1972
1973	void atomParenthesesTerminalEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1974	{
1975	unsigned beginTerm = popParenthesesStack();
1976	closeAlternative(beginTerm + `1`);
1977	unsigned endTerm = m_bodyDisjunction ->terms.size();
1978
1979	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
1980
1981	bool capture = m_bodyDisjunction ->terms [beginTerm].capture();
1982	unsigned subpatternId = m_bodyDisjunction ->terms [beginTerm].atom.subpatternId;
1983
1984	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1985	m_bodyDisjunction ->terms [beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1986	m_bodyDisjunction ->terms [endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1987	m_bodyDisjunction ->terms [endTerm].frameLocation = frameLocation;
1988
1989	m_bodyDisjunction ->terms [beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1990	m_bodyDisjunction ->terms [beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1991	m_bodyDisjunction ->terms [beginTerm].atom.quantityType = quantityType;
1992	m_bodyDisjunction ->terms [endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1993	m_bodyDisjunction ->terms [endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1994	m_bodyDisjunction ->terms [endTerm].atom.quantityType = quantityType;
1995	}
1996
1997	void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
1998	{
1999	m_bodyDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize);
2000	m_bodyDisjunction ->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
2001	m_bodyDisjunction ->terms [`0`].frameLocation = `0`;
2002	m_currentAlternativeIndex = `0`;
2003	}
2004
2005	void regexEnd()
2006	{
2007	closeBodyAlternative();
2008	}
2009
2010	void alternativeBodyDisjunction(bool onceThrough)
2011	{
2012	int newAlternativeIndex = m_bodyDisjunction ->terms.size();
2013	m_bodyDisjunction ->terms [m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
2014	m_bodyDisjunction ->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
2015
2016	m_currentAlternativeIndex = newAlternativeIndex;
2017	}
2018
2019	void alternativeDisjunction()
2020	{
2021	int newAlternativeIndex = m_bodyDisjunction ->terms.size();
2022	m_bodyDisjunction ->terms [m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
2023	m_bodyDisjunction ->terms.append(ByteTerm::AlternativeDisjunction());
2024
2025	m_currentAlternativeIndex = newAlternativeIndex;
2026	}
2027
2028	void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = `0`, unsigned parenthesesInputCountAlreadyChecked = `0`)
2029	{
2030	for (unsigned alt = `0`; alt < disjunction->m_alternatives.size(); ++alt) {
2031	unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
2032
2033	PatternAlternative* alternative = disjunction->m_alternatives [alt].get();
2034
2035	if (alt) {
2036	if (disjunction == m_pattern.m_body)
2037	alternativeBodyDisjunction(alternative->onceThrough());
2038	else
2039	alternativeDisjunction();
2040	}
2041
2042	unsigned minimumSize = alternative->m_minimumSize;
2043	ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
2044	unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
2045
2046	if (countToCheck) {
2047	checkInput(countToCheck);
2048	currentCountAlreadyChecked += countToCheck;
2049	}
2050
2051	for (auto& term : alternative->m_terms) {
2052	switch (term.type) {
2053	case PatternTerm::TypeAssertionBOL:
2054	assertionBOL(currentCountAlreadyChecked - term.inputPosition);
2055	break;
2056
2057	case PatternTerm::TypeAssertionEOL:
2058	assertionEOL(currentCountAlreadyChecked - term.inputPosition);
2059	break;
2060
2061	case PatternTerm::TypeAssertionWordBoundary:
2062	assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition);
2063	break;
2064
2065	case PatternTerm::TypePatternCharacter:
2066	atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType);
2067	break;
2068
2069	case PatternTerm::TypeCharacterClass:
2070	atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType);
2071	break;
2072
2073	case PatternTerm::TypeBackReference:
2074	atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType);
2075	break;
2076
2077	case PatternTerm::TypeForwardReference:
2078	break;
2079
2080	case PatternTerm::TypeParenthesesSubpattern: {
2081	unsigned disjunctionAlreadyCheckedCount = `0`;
2082	if (term.quantityMaxCount == `1` && !term.parentheses.isCopy) {
2083	unsigned alternativeFrameLocation = term.frameLocation;
2084	// For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
2085	if (term.quantityType == QuantifierFixedCount)
2086	disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
2087	else
2088	alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
2089	ASSERT(currentCountAlreadyChecked >= term.inputPosition);
2090	unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition;
2091	atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
2092	emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
2093	atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType);
2094	} else if (term.parentheses.isTerminal) {
2095	ASSERT(currentCountAlreadyChecked >= term.inputPosition);
2096	unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition;
2097	atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesTerminal);
2098	emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
2099	atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType);
2100	} else {
2101	ASSERT(currentCountAlreadyChecked >= term.inputPosition);
2102	unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition;
2103	atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, `0`);
2104	emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, `0`);
2105	atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
2106	}
2107	break;
2108	}
2109
2110	case PatternTerm::TypeParentheticalAssertion: {
2111	unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
2112
2113	ASSERT(currentCountAlreadyChecked >= term.inputPosition);
2114	unsigned positiveInputOffset = currentCountAlreadyChecked - term.inputPosition;
2115	unsigned uncheckAmount = `0`;
2116	if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
2117	uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
2118	uncheckInput(uncheckAmount);
2119	currentCountAlreadyChecked -= uncheckAmount;
2120	}
2121
2122	atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
2123	emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
2124	atomParentheticalAssertionEnd(`0`, term.frameLocation, term.quantityMaxCount, term.quantityType);
2125	if (uncheckAmount) {
2126	checkInput(uncheckAmount);
2127	currentCountAlreadyChecked += uncheckAmount;
2128	}
2129	break;
2130	}
2131
2132	case PatternTerm::TypeDotStarEnclosure:
2133	assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
2134	break;
2135	}
2136	}
2137	}
2138	}
2139	#ifndef NDEBUG
2140	void dumpDisjunction(ByteDisjunction* disjunction, unsigned nesting = `0`)
2141	{
2142	PrintStream& out = WTF::dataFile();
2143
2144	unsigned termIndexNest = `0`;
2145
2146	if (!nesting) {
2147	out.printf("ByteDisjunction(%p):\n", disjunction);
2148	nesting = `1`;
2149	} else {
2150	termIndexNest = nesting - `1`;
2151	nesting = `2`;
2152	}
2153
2154	auto outputTermIndexAndNest = [&](size_t index, unsigned termNesting) {
2155	for (unsigned nestingDepth = `0`; nestingDepth < termIndexNest; nestingDepth++)
2156	out.print(" ");
2157	out.printf("%4zu", index);
2158	for (unsigned nestingDepth = `0`; nestingDepth < termNesting; nestingDepth++)
2159	out.print(" ");
2160	};
2161
2162	auto dumpQuantity = [&](ByteTerm& term) {
2163	if (term.atom.quantityType == QuantifierFixedCount && term.atom.quantityMinCount == `1` && term.atom.quantityMaxCount == `1`)
2164	return;
2165
2166	out.print(" {", term.atom.quantityMinCount);
2167	if (term.atom.quantityMinCount != term.atom.quantityMaxCount) {
2168	if (term.atom.quantityMaxCount == UINT_MAX)
2169	out.print(",inf");
2170	else
2171	out.print(",", term.atom.quantityMaxCount);
2172	}
2173	out.print("}");
2174	if (term.atom.quantityType == QuantifierGreedy)
2175	out.print(" greedy");
2176	else if (term.atom.quantityType == QuantifierNonGreedy)
2177	out.print(" non-greedy");
2178	};
2179
2180	auto dumpCaptured = [&](ByteTerm& term) {
2181	if (term.capture())
2182	out.print(" captured (#", term.atom.subpatternId, ")");
2183	};
2184
2185	auto dumpInverted = [&](ByteTerm& term) {
2186	if (term.invert())
2187	out.print(" inverted");
2188	};
2189
2190	auto dumpInputPosition = [&](ByteTerm& term) {
2191	out.printf(" inputPosition %u", term.inputPosition);
2192	};
2193
2194	auto dumpFrameLocation = [&](ByteTerm& term) {
2195	out.printf(" frameLocation %u", term.frameLocation);
2196	};
2197
2198	auto dumpCharacter = [&](ByteTerm& term) {
2199	out.print(" ");
2200	dumpUChar32(out, term.atom.patternCharacter);
2201	};
2202
2203	auto dumpCharClass = [&](ByteTerm& term) {
2204	out.print(" ");
2205	dumpCharacterClass(out, &m_pattern, term.atom.characterClass);
2206	};
2207
2208	for (size_t idx = `0`; idx < disjunction->terms.size(); ++idx) {
2209	ByteTerm term = disjunction->terms [idx];
2210
2211	bool outputNewline = true;
2212
2213	switch (term.type) {
2214	case ByteTerm::TypeBodyAlternativeBegin:
2215	outputTermIndexAndNest(idx, nesting++);
2216	out.print("BodyAlternativeBegin");
2217	if (term.alternative.onceThrough)
2218	out.print(" onceThrough");
2219	dumpFrameLocation(term);
2220	break;
2221	case ByteTerm::TypeBodyAlternativeDisjunction:
2222	outputTermIndexAndNest(idx, nesting - `1`);
2223	out.print("BodyAlternativeDisjunction");
2224	dumpFrameLocation(term);
2225	break;
2226	case ByteTerm::TypeBodyAlternativeEnd:
2227	outputTermIndexAndNest(idx, --nesting);
2228	out.print("BodyAlternativeEnd");
2229	dumpFrameLocation(term);
2230	break;
2231	case ByteTerm::TypeAlternativeBegin:
2232	outputTermIndexAndNest(idx, nesting++);
2233	out.print("AlternativeBegin");
2234	dumpFrameLocation(term);
2235	break;
2236	case ByteTerm::TypeAlternativeDisjunction:
2237	outputTermIndexAndNest(idx, nesting - `1`);
2238	out.print("AlternativeDisjunction");
2239	dumpFrameLocation(term);
2240	break;
2241	case ByteTerm::TypeAlternativeEnd:
2242	outputTermIndexAndNest(idx, --nesting);
2243	out.print("AlternativeEnd");
2244	dumpFrameLocation(term);
2245	break;
2246	case ByteTerm::TypeSubpatternBegin:
2247	outputTermIndexAndNest(idx, nesting++);
2248	out.print("SubpatternBegin");
2249	break;
2250	case ByteTerm::TypeSubpatternEnd:
2251	outputTermIndexAndNest(idx, --nesting);
2252	out.print("SubpatternEnd");
2253	break;
2254	case ByteTerm::TypeAssertionBOL:
2255	outputTermIndexAndNest(idx, nesting);
2256	out.print("AssertionBOL");
2257	break;
2258	case ByteTerm::TypeAssertionEOL:
2259	outputTermIndexAndNest(idx, nesting);
2260	out.print("AssertionEOL");
2261	break;
2262	case ByteTerm::TypeAssertionWordBoundary:
2263	outputTermIndexAndNest(idx, nesting);
2264	out.print("AssertionWordBoundary");
2265	break;
2266	case ByteTerm::TypePatternCharacterOnce:
2267	outputTermIndexAndNest(idx, nesting);
2268	out.print("PatternCharacterOnce");
2269	dumpInverted(term);
2270	dumpInputPosition(term);
2271	dumpFrameLocation(term);
2272	dumpCharacter(term);
2273	dumpQuantity(term);
2274	break;
2275	case ByteTerm::TypePatternCharacterFixed:
2276	outputTermIndexAndNest(idx, nesting);
2277	out.print("PatternCharacterFixed");
2278	dumpInverted(term);
2279	dumpInputPosition(term);
2280	dumpFrameLocation(term);
2281	dumpCharacter(term);
2282	out.print(" {", term.atom.quantityMinCount, "}");
2283	break;
2284	case ByteTerm::TypePatternCharacterGreedy:
2285	outputTermIndexAndNest(idx, nesting);
2286	out.print("PatternCharacterGreedy");
2287	dumpInverted(term);
2288	dumpInputPosition(term);
2289	dumpFrameLocation(term);
2290	dumpCharacter(term);
2291	dumpQuantity(term);
2292	break;
2293	case ByteTerm::TypePatternCharacterNonGreedy:
2294	outputTermIndexAndNest(idx, nesting);
2295	out.print("PatternCharacterNonGreedy");
2296	dumpInverted(term);
2297	dumpInputPosition(term);
2298	dumpFrameLocation(term);
2299	dumpCharacter(term);
2300	dumpQuantity(term);
2301	break;
2302	case ByteTerm::TypePatternCasedCharacterOnce:
2303	outputTermIndexAndNest(idx, nesting);
2304	out.print("PatternCasedCharacterOnce");
2305	break;
2306	case ByteTerm::TypePatternCasedCharacterFixed:
2307	outputTermIndexAndNest(idx, nesting);
2308	out.print("PatternCasedCharacterFixed");
2309	break;
2310	case ByteTerm::TypePatternCasedCharacterGreedy:
2311	outputTermIndexAndNest(idx, nesting);
2312	out.print("PatternCasedCharacterGreedy");
2313	break;
2314	case ByteTerm::TypePatternCasedCharacterNonGreedy:
2315	outputTermIndexAndNest(idx, nesting);
2316	out.print("PatternCasedCharacterNonGreedy");
2317	break;
2318	case ByteTerm::TypeCharacterClass:
2319	outputTermIndexAndNest(idx, nesting);
2320	out.print("CharacterClass");
2321	dumpInverted(term);
2322	dumpInputPosition(term);
2323	dumpFrameLocation(term);
2324	dumpCharClass(term);
2325	dumpQuantity(term);
2326	break;
2327	case ByteTerm::TypeBackReference:
2328	outputTermIndexAndNest(idx, nesting);
2329	out.print("BackReference #", term.atom.subpatternId);
2330	dumpQuantity(term);
2331	break;
2332	case ByteTerm::TypeParenthesesSubpattern:
2333	outputTermIndexAndNest(idx, nesting);
2334	out.print("ParenthesesSubpattern");
2335	dumpCaptured(term);
2336	dumpInverted(term);
2337	dumpInputPosition(term);
2338	dumpFrameLocation(term);
2339	dumpQuantity(term);
2340	out.print("\n");
2341	outputNewline = false;
2342	dumpDisjunction(term.atom.parenthesesDisjunction, nesting);
2343	break;
2344	case ByteTerm::TypeParenthesesSubpatternOnceBegin:
2345	outputTermIndexAndNest(idx, nesting++);
2346	out.print("ParenthesesSubpatternOnceBegin");
2347	dumpCaptured(term);
2348	dumpInverted(term);
2349	dumpInputPosition(term);
2350	dumpFrameLocation(term);
2351	break;
2352	case ByteTerm::TypeParenthesesSubpatternOnceEnd:
2353	outputTermIndexAndNest(idx, --nesting);
2354	out.print("ParenthesesSubpatternOnceEnd");
2355	dumpFrameLocation(term);
2356	break;
2357	case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
2358	outputTermIndexAndNest(idx, nesting++);
2359	out.print("ParenthesesSubpatternTerminalBegin");
2360	dumpInverted(term);
2361	dumpInputPosition(term);
2362	dumpFrameLocation(term);
2363	break;
2364	case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
2365	outputTermIndexAndNest(idx, --nesting);
2366	out.print("ParenthesesSubpatternTerminalEnd");
2367	dumpFrameLocation(term);
2368	break;
2369	case ByteTerm::TypeParentheticalAssertionBegin:
2370	outputTermIndexAndNest(idx, nesting++);
2371	out.print("ParentheticalAssertionBegin");
2372	dumpInverted(term);
2373	dumpInputPosition(term);
2374	dumpFrameLocation(term);
2375	break;
2376	case ByteTerm::TypeParentheticalAssertionEnd:
2377	outputTermIndexAndNest(idx, --nesting);
2378	out.print("ParentheticalAssertionEnd");
2379	dumpFrameLocation(term);
2380	break;
2381	case ByteTerm::TypeCheckInput:
2382	outputTermIndexAndNest(idx, nesting);
2383	out.print("CheckInput ", term.checkInputCount);
2384	break;
2385	case ByteTerm::TypeUncheckInput:
2386	outputTermIndexAndNest(idx, nesting);
2387	out.print("UncheckInput ", term.checkInputCount);
2388	break;
2389	case ByteTerm::TypeDotStarEnclosure:
2390	outputTermIndexAndNest(idx, nesting);
2391	out.print("DotStarEnclosure");
2392	break;
2393	}
2394	if (outputNewline)
2395	out.print("\n");
2396	}
2397	}
2398	#endif
2399
2400	private:
2401	YarrPattern& m_pattern;
2402	std::unique_ptr<ByteDisjunction> m_bodyDisjunction;
2403	unsigned m_currentAlternativeIndex;
2404	Vector<ParenthesesStackEntry> m_parenthesesStack;
2405	Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo;
2406	};
2407
2408	std::unique_ptr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator, ConcurrentJSLock* lock)
2409	{
2410	return ByteCompiler (pattern).compile(allocator, lock);
2411	}
2412
2413	unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
2414	{
2415	SuperSamplerScope superSamplerScope(false);
2416	if (input.is8Bit())
2417	return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret();
2418	return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret();
2419	}
2420
2421	unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
2422	{
2423	SuperSamplerScope superSamplerScope(false);
2424	return Interpreter<LChar>(bytecode, output, input, length, start).interpret();
2425	}
2426
2427	unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
2428	{
2429	SuperSamplerScope superSamplerScope(false);
2430	return Interpreter<UChar>(bytecode, output, input, length, start).interpret();
2431	}
2432
2433	// These should be the same for both UChar & LChar.
2434	COMPILE_ASSERT(sizeof(BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
2435	COMPILE_ASSERT(sizeof(BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
2436	COMPILE_ASSERT(sizeof(BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
2437	COMPILE_ASSERT(sizeof(BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
2438	COMPILE_ASSERT(sizeof(BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
2439	COMPILE_ASSERT(sizeof(BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
2440	COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) <= (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
2441
2442
2443	} }
2444

Browse the source code of webkit/Source/JavaScriptCore/yarr/YarrInterpreter.cpp