1/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011-2017 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28#include "HTMLTreeBuilder.h"
29
30#include "DocumentFragment.h"
31#include "HTMLDocument.h"
32#include "HTMLDocumentParser.h"
33#include "HTMLFormControlElement.h"
34#include "HTMLFormElement.h"
35#include "HTMLInputElement.h"
36#include "HTMLOptGroupElement.h"
37#include "HTMLOptionElement.h"
38#include "HTMLParserIdioms.h"
39#include "HTMLScriptElement.h"
40#include "HTMLTableElement.h"
41#include "JSCustomElementInterface.h"
42#include "LocalizedStrings.h"
43#include "NotImplemented.h"
44#include "SVGScriptElement.h"
45#include "XLinkNames.h"
46#include "XMLNSNames.h"
47#include "XMLNames.h"
48#include <wtf/NeverDestroyed.h>
49#include <wtf/unicode/CharacterNames.h>
50
51#if ENABLE(TELEPHONE_NUMBER_DETECTION) && PLATFORM(IOS_FAMILY)
52#include "TelephoneNumberDetector.h"
53#endif
54
55namespace WebCore {
56
57using namespace HTMLNames;
58
59CustomElementConstructionData::CustomElementConstructionData(Ref<JSCustomElementInterface>&& customElementInterface, const AtomicString& name, Vector<Attribute>&& attributes)
60 : elementInterface(WTFMove(customElementInterface))
61 , name(name)
62 , attributes(WTFMove(attributes))
63{
64}
65
66CustomElementConstructionData::~CustomElementConstructionData() = default;
67
68namespace {
69
70inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
71{
72 return isHTMLSpace(character) || character == replacementCharacter;
73}
74
75}
76
77static inline TextPosition uninitializedPositionValue1()
78{
79 return TextPosition(OrdinalNumber::fromOneBasedInt(-1), OrdinalNumber());
80}
81
82static inline bool isAllWhitespace(const String& string)
83{
84 return string.isAllSpecialCharacters<isHTMLSpace>();
85}
86
87static inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
88{
89 return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
90}
91
92static bool isNumberedHeaderTag(const AtomicString& tagName)
93{
94 return tagName == h1Tag
95 || tagName == h2Tag
96 || tagName == h3Tag
97 || tagName == h4Tag
98 || tagName == h5Tag
99 || tagName == h6Tag;
100}
101
102static bool isCaptionColOrColgroupTag(const AtomicString& tagName)
103{
104 return tagName == captionTag || tagName == colTag || tagName == colgroupTag;
105}
106
107static bool isTableCellContextTag(const AtomicString& tagName)
108{
109 return tagName == thTag || tagName == tdTag;
110}
111
112static bool isTableBodyContextTag(const AtomicString& tagName)
113{
114 return tagName == tbodyTag || tagName == tfootTag || tagName == theadTag;
115}
116
117static bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
118{
119 return tagName == bTag
120 || tagName == bigTag
121 || tagName == codeTag
122 || tagName == emTag
123 || tagName == fontTag
124 || tagName == iTag
125 || tagName == sTag
126 || tagName == smallTag
127 || tagName == strikeTag
128 || tagName == strongTag
129 || tagName == ttTag
130 || tagName == uTag;
131}
132
133static bool isNonAnchorFormattingTag(const AtomicString& tagName)
134{
135 return tagName == nobrTag || isNonAnchorNonNobrFormattingTag(tagName);
136}
137
138// https://html.spec.whatwg.org/multipage/syntax.html#formatting
139bool HTMLConstructionSite::isFormattingTag(const AtomicString& tagName)
140{
141 return tagName == aTag || isNonAnchorFormattingTag(tagName);
142}
143
144class HTMLTreeBuilder::ExternalCharacterTokenBuffer {
145public:
146 explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
147 : m_text(token.characters(), token.charactersLength())
148 , m_isAll8BitData(token.charactersIsAll8BitData())
149 {
150 ASSERT(!isEmpty());
151 }
152
153 explicit ExternalCharacterTokenBuffer(const String& string)
154 : m_text(string)
155 , m_isAll8BitData(m_text.is8Bit())
156 {
157 ASSERT(!isEmpty());
158 }
159
160 ~ExternalCharacterTokenBuffer()
161 {
162 ASSERT(isEmpty());
163 }
164
165 bool isEmpty() const { return m_text.isEmpty(); }
166
167 bool isAll8BitData() const { return m_isAll8BitData; }
168
169 void skipAtMostOneLeadingNewline()
170 {
171 ASSERT(!isEmpty());
172 if (m_text[0] == '\n')
173 m_text = m_text.substring(1);
174 }
175
176 void skipLeadingWhitespace()
177 {
178 skipLeading<isHTMLSpace>();
179 }
180
181 String takeLeadingWhitespace()
182 {
183 return takeLeading<isHTMLSpace>();
184 }
185
186 void skipLeadingNonWhitespace()
187 {
188 skipLeading<isNotHTMLSpace>();
189 }
190
191 String takeRemaining()
192 {
193 String result = makeString(m_text);
194 m_text = StringView();
195 return result;
196 }
197
198 void giveRemainingTo(StringBuilder& recipient)
199 {
200 recipient.append(m_text);
201 m_text = StringView();
202 }
203
204 String takeRemainingWhitespace()
205 {
206 ASSERT(!isEmpty());
207 Vector<LChar, 8> whitespace;
208 do {
209 UChar character = m_text[0];
210 if (isHTMLSpace(character))
211 whitespace.append(character);
212 m_text = m_text.substring(1);
213 } while (!m_text.isEmpty());
214
215 // Returning the null string when there aren't any whitespace
216 // characters is slightly cleaner semantically because we don't want
217 // to insert a text node (as opposed to inserting an empty text node).
218 if (whitespace.isEmpty())
219 return String();
220
221 return String::adopt(WTFMove(whitespace));
222 }
223
224private:
225 template<bool characterPredicate(UChar)> void skipLeading()
226 {
227 ASSERT(!isEmpty());
228 while (characterPredicate(m_text[0])) {
229 m_text = m_text.substring(1);
230 if (m_text.isEmpty())
231 return;
232 }
233 }
234
235 template<bool characterPredicate(UChar)> String takeLeading()
236 {
237 ASSERT(!isEmpty());
238 StringView start = m_text;
239 skipLeading<characterPredicate>();
240 if (start.length() == m_text.length())
241 return String();
242 return makeString(start.substring(0, start.length() - m_text.length()));
243 }
244
245 String makeString(StringView stringView) const
246 {
247 if (stringView.is8Bit() || !isAll8BitData())
248 return stringView.toString();
249 return String::make8BitFrom16BitSource(stringView.characters16(), stringView.length());
250 }
251
252 StringView m_text;
253 bool m_isAll8BitData;
254};
255
256inline bool HTMLTreeBuilder::isParsingTemplateContents() const
257{
258 return m_tree.openElements().hasTemplateInHTMLScope();
259}
260
261inline bool HTMLTreeBuilder::isParsingFragmentOrTemplateContents() const
262{
263 return isParsingFragment() || isParsingTemplateContents();
264}
265
266HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser& parser, HTMLDocument& document, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options)
267 : m_parser(parser)
268 , m_options(options)
269 , m_tree(document, parserContentPolicy, options.maximumDOMTreeDepth)
270 , m_scriptToProcessStartPosition(uninitializedPositionValue1())
271{
272#if !ASSERT_DISABLED
273 m_destructionProhibited = false;
274#endif
275}
276
277HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser& parser, DocumentFragment& fragment, Element& contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options)
278 : m_parser(parser)
279 , m_options(options)
280 , m_fragmentContext(fragment, contextElement)
281 , m_tree(fragment, parserContentPolicy, options.maximumDOMTreeDepth)
282 , m_scriptToProcessStartPosition(uninitializedPositionValue1())
283{
284 ASSERT(isMainThread());
285
286 // https://html.spec.whatwg.org/multipage/syntax.html#parsing-html-fragments
287 // For efficiency, we skip step 5 ("Let root be a new html element with no attributes") and instead use the DocumentFragment as a root node.
288 m_tree.openElements().pushRootNode(HTMLStackItem::create(fragment));
289
290 if (contextElement.hasTagName(templateTag))
291 m_templateInsertionModes.append(InsertionMode::TemplateContents);
292
293 resetInsertionModeAppropriately();
294
295 m_tree.setForm(is<HTMLFormElement>(contextElement) ? &downcast<HTMLFormElement>(contextElement) : HTMLFormElement::findClosestFormAncestor(contextElement));
296
297#if !ASSERT_DISABLED
298 m_destructionProhibited = false;
299#endif
300}
301
302HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
303{
304}
305
306HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment& fragment, Element& contextElement)
307 : m_fragment(&fragment)
308{
309 ASSERT(!fragment.hasChildNodes());
310 m_contextElementStackItem = HTMLStackItem::create(contextElement);
311}
312
313inline Element& HTMLTreeBuilder::FragmentParsingContext::contextElement() const
314{
315 return contextElementStackItem().element();
316}
317
318inline HTMLStackItem& HTMLTreeBuilder::FragmentParsingContext::contextElementStackItem() const
319{
320 ASSERT(m_fragment);
321 return *m_contextElementStackItem;
322}
323
324RefPtr<ScriptElement> HTMLTreeBuilder::takeScriptToProcess(TextPosition& scriptStartPosition)
325{
326 ASSERT(!m_destroyed);
327
328 if (!m_scriptToProcess)
329 return nullptr;
330
331 // Unpause ourselves, callers may pause us again when processing the script.
332 // The HTML5 spec is written as though scripts are executed inside the tree builder.
333 // We pause the parser to exit the tree builder, and then resume before running scripts.
334 scriptStartPosition = m_scriptToProcessStartPosition;
335 m_scriptToProcessStartPosition = uninitializedPositionValue1();
336 return WTFMove(m_scriptToProcess);
337}
338
339void HTMLTreeBuilder::constructTree(AtomicHTMLToken&& token)
340{
341#if !ASSERT_DISABLED
342 ASSERT(!m_destroyed);
343 ASSERT(!m_destructionProhibited);
344 m_destructionProhibited = true;
345#endif
346
347 if (shouldProcessTokenInForeignContent(token))
348 processTokenInForeignContent(WTFMove(token));
349 else
350 processToken(WTFMove(token));
351
352 bool inForeignContent = !m_tree.isEmpty()
353 && !isInHTMLNamespace(adjustedCurrentStackItem())
354 && !HTMLElementStack::isHTMLIntegrationPoint(m_tree.currentStackItem())
355 && !HTMLElementStack::isMathMLTextIntegrationPoint(m_tree.currentStackItem());
356
357 m_parser.tokenizer().setForceNullCharacterReplacement(m_insertionMode == InsertionMode::Text || inForeignContent);
358 m_parser.tokenizer().setShouldAllowCDATA(inForeignContent);
359
360#if !ASSERT_DISABLED
361 m_destructionProhibited = false;
362#endif
363
364 m_tree.executeQueuedTasks();
365 // The tree builder might have been destroyed as an indirect result of executing the queued tasks.
366}
367
368void HTMLTreeBuilder::processToken(AtomicHTMLToken&& token)
369{
370 switch (token.type()) {
371 case HTMLToken::Uninitialized:
372 ASSERT_NOT_REACHED();
373 break;
374 case HTMLToken::DOCTYPE:
375 m_shouldSkipLeadingNewline = false;
376 processDoctypeToken(WTFMove(token));
377 break;
378 case HTMLToken::StartTag:
379 m_shouldSkipLeadingNewline = false;
380 processStartTag(WTFMove(token));
381 break;
382 case HTMLToken::EndTag:
383 m_shouldSkipLeadingNewline = false;
384 processEndTag(WTFMove(token));
385 break;
386 case HTMLToken::Comment:
387 m_shouldSkipLeadingNewline = false;
388 processComment(WTFMove(token));
389 return;
390 case HTMLToken::Character:
391 processCharacter(WTFMove(token));
392 break;
393 case HTMLToken::EndOfFile:
394 m_shouldSkipLeadingNewline = false;
395 processEndOfFile(WTFMove(token));
396 break;
397 }
398}
399
400void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken&& token)
401{
402 ASSERT(token.type() == HTMLToken::DOCTYPE);
403 if (m_insertionMode == InsertionMode::Initial) {
404 m_tree.insertDoctype(WTFMove(token));
405 m_insertionMode = InsertionMode::BeforeHTML;
406 return;
407 }
408 if (m_insertionMode == InsertionMode::InTableText) {
409 defaultForInTableText();
410 processDoctypeToken(WTFMove(token));
411 return;
412 }
413 parseError(token);
414}
415
416void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, Vector<Attribute>&& attributes)
417{
418 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
419 AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), WTFMove(attributes));
420 processStartTag(WTFMove(fakeToken));
421}
422
423void HTMLTreeBuilder::processFakeEndTag(const AtomicString& tagName)
424{
425 AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName);
426 processEndTag(WTFMove(fakeToken));
427}
428
429void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
430{
431 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
432 processFakeEndTag(tagName.localName());
433}
434
435void HTMLTreeBuilder::processFakeCharacters(const String& characters)
436{
437 ASSERT(!characters.isEmpty());
438 ExternalCharacterTokenBuffer buffer(characters);
439 processCharacterBuffer(buffer);
440}
441
442void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
443{
444 if (!m_tree.openElements().inButtonScope(pTag->localName()))
445 return;
446 AtomicHTMLToken endP(HTMLToken::EndTag, pTag->localName());
447 processEndTag(WTFMove(endP));
448}
449
450namespace {
451
452bool isLi(const HTMLStackItem& item)
453{
454 return item.hasTagName(liTag);
455}
456
457bool isDdOrDt(const HTMLStackItem& item)
458{
459 return item.hasTagName(ddTag) || item.hasTagName(dtTag);
460}
461
462}
463
464template <bool shouldClose(const HTMLStackItem&)> void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken&& token)
465{
466 m_framesetOk = false;
467 for (auto* nodeRecord = &m_tree.openElements().topRecord(); ; nodeRecord = nodeRecord->next()) {
468 HTMLStackItem& item = nodeRecord->stackItem();
469 if (shouldClose(item)) {
470 ASSERT(item.isElement());
471 processFakeEndTag(item.localName());
472 break;
473 }
474 if (isSpecialNode(item) && !item.hasTagName(addressTag) && !item.hasTagName(divTag) && !item.hasTagName(pTag))
475 break;
476 }
477 processFakePEndTagIfPInButtonScope();
478 m_tree.insertHTMLElement(WTFMove(token));
479}
480
481template <typename TableQualifiedName> static HashMap<AtomicString, QualifiedName> createCaseMap(const TableQualifiedName* const names[], unsigned length)
482{
483 HashMap<AtomicString, QualifiedName> map;
484 for (unsigned i = 0; i < length; ++i) {
485 const QualifiedName& name = *names[i];
486 const AtomicString& localName = name.localName();
487 AtomicString loweredLocalName = localName.convertToASCIILowercase();
488 if (loweredLocalName != localName)
489 map.add(loweredLocalName, name);
490 }
491 return map;
492}
493
494static void adjustSVGTagNameCase(AtomicHTMLToken& token)
495{
496 static NeverDestroyed<HashMap<AtomicString, QualifiedName>> map = createCaseMap(SVGNames::getSVGTags(), SVGNames::SVGTagsCount);
497 const QualifiedName& casedName = map.get().get(token.name());
498 if (casedName.localName().isNull())
499 return;
500 token.setName(casedName.localName());
501}
502
503static inline void adjustAttributes(HashMap<AtomicString, QualifiedName>& map, AtomicHTMLToken& token)
504{
505 for (auto& attribute : token.attributes()) {
506 const QualifiedName& casedName = map.get(attribute.localName());
507 if (!casedName.localName().isNull())
508 attribute.parserSetName(casedName);
509 }
510}
511
512template<const QualifiedName* const* attributesTable(), unsigned attributesTableLength> static void adjustAttributes(AtomicHTMLToken& token)
513{
514 static NeverDestroyed<HashMap<AtomicString, QualifiedName>> map = createCaseMap(attributesTable(), attributesTableLength);
515 adjustAttributes(map, token);
516}
517
518static inline void adjustSVGAttributes(AtomicHTMLToken& token)
519{
520 adjustAttributes<SVGNames::getSVGAttrs, SVGNames::SVGAttrsCount>(token);
521}
522
523static inline void adjustMathMLAttributes(AtomicHTMLToken& token)
524{
525 adjustAttributes<MathMLNames::getMathMLAttrs, MathMLNames::MathMLAttrsCount>(token);
526}
527
528static void addNamesWithPrefix(HashMap<AtomicString, QualifiedName>& map, const AtomicString& prefix, const QualifiedName* const names[], unsigned length)
529{
530 for (unsigned i = 0; i < length; ++i) {
531 const QualifiedName& name = *names[i];
532 const AtomicString& localName = name.localName();
533 map.add(prefix + ':' + localName, QualifiedName(prefix, localName, name.namespaceURI()));
534 }
535}
536
537static HashMap<AtomicString, QualifiedName> createForeignAttributesMap()
538{
539 HashMap<AtomicString, QualifiedName> map;
540
541 AtomicString xlinkName("xlink", AtomicString::ConstructFromLiteral);
542 addNamesWithPrefix(map, xlinkName, XLinkNames::getXLinkAttrs(), XLinkNames::XLinkAttrsCount);
543 addNamesWithPrefix(map, xmlAtom(), XMLNames::getXMLAttrs(), XMLNames::XMLAttrsCount);
544
545 map.add(WTF::xmlnsAtom(), XMLNSNames::xmlnsAttr);
546 map.add("xmlns:xlink", QualifiedName(xmlnsAtom(), xlinkName, XMLNSNames::xmlnsNamespaceURI));
547
548 return map;
549}
550
551static void adjustForeignAttributes(AtomicHTMLToken& token)
552{
553 static NeverDestroyed<HashMap<AtomicString, QualifiedName>> map = createForeignAttributesMap();
554 adjustAttributes(map, token);
555}
556
557void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken&& token)
558{
559 ASSERT(token.type() == HTMLToken::StartTag);
560 if (token.name() == htmlTag) {
561 processHtmlStartTagForInBody(WTFMove(token));
562 return;
563 }
564 if (token.name() == baseTag
565 || token.name() == basefontTag
566 || token.name() == bgsoundTag
567 || token.name() == commandTag
568 || token.name() == linkTag
569 || token.name() == metaTag
570 || token.name() == noframesTag
571 || token.name() == scriptTag
572 || token.name() == styleTag
573 || token.name() == titleTag) {
574 bool didProcess = processStartTagForInHead(WTFMove(token));
575 ASSERT_UNUSED(didProcess, didProcess);
576 return;
577 }
578 if (token.name() == bodyTag) {
579 parseError(token);
580 bool fragmentOrTemplateCase = !m_tree.openElements().secondElementIsHTMLBodyElement() || m_tree.openElements().hasOnlyOneElement()
581 || m_tree.openElements().hasTemplateInHTMLScope();
582 if (fragmentOrTemplateCase) {
583 ASSERT(isParsingFragmentOrTemplateContents());
584 return;
585 }
586 m_framesetOk = false;
587 m_tree.insertHTMLBodyStartTagInBody(WTFMove(token));
588 return;
589 }
590 if (token.name() == framesetTag) {
591 parseError(token);
592 if (!m_tree.openElements().secondElementIsHTMLBodyElement() || m_tree.openElements().hasOnlyOneElement()) {
593 ASSERT(isParsingFragmentOrTemplateContents());
594 return;
595 }
596 if (!m_framesetOk)
597 return;
598 m_tree.openElements().bodyElement().remove();
599 m_tree.openElements().popUntil(m_tree.openElements().bodyElement());
600 m_tree.openElements().popHTMLBodyElement();
601 // Note: in the fragment case the root is a DocumentFragment instead of a proper html element which is a quirk / optimization in WebKit.
602 ASSERT(!isParsingFragment() || is<DocumentFragment>(m_tree.openElements().topNode()));
603 ASSERT(isParsingFragment() || &m_tree.openElements().top() == &m_tree.openElements().htmlElement());
604 m_tree.insertHTMLElement(WTFMove(token));
605 m_insertionMode = InsertionMode::InFrameset;
606 return;
607 }
608 if (token.name() == addressTag
609 || token.name() == articleTag
610 || token.name() == asideTag
611 || token.name() == blockquoteTag
612 || token.name() == centerTag
613 || token.name() == detailsTag
614 || token.name() == dirTag
615 || token.name() == divTag
616 || token.name() == dlTag
617 || token.name() == fieldsetTag
618 || token.name() == figcaptionTag
619 || token.name() == figureTag
620 || token.name() == footerTag
621 || token.name() == headerTag
622 || token.name() == hgroupTag
623 || token.name() == mainTag
624 || token.name() == menuTag
625 || token.name() == navTag
626 || token.name() == olTag
627 || token.name() == pTag
628 || token.name() == sectionTag
629 || token.name() == summaryTag
630 || token.name() == ulTag) {
631 processFakePEndTagIfPInButtonScope();
632 m_tree.insertHTMLElement(WTFMove(token));
633 return;
634 }
635 if (isNumberedHeaderTag(token.name())) {
636 processFakePEndTagIfPInButtonScope();
637 if (isNumberedHeaderElement(m_tree.currentStackItem())) {
638 parseError(token);
639 m_tree.openElements().pop();
640 }
641 m_tree.insertHTMLElement(WTFMove(token));
642 return;
643 }
644 if (token.name() == preTag || token.name() == listingTag) {
645 processFakePEndTagIfPInButtonScope();
646 m_tree.insertHTMLElement(WTFMove(token));
647 m_shouldSkipLeadingNewline = true;
648 m_framesetOk = false;
649 return;
650 }
651 if (token.name() == formTag) {
652 if (m_tree.form() && !isParsingTemplateContents()) {
653 parseError(token);
654 return;
655 }
656 processFakePEndTagIfPInButtonScope();
657 m_tree.insertHTMLFormElement(WTFMove(token));
658 return;
659 }
660 if (token.name() == liTag) {
661 processCloseWhenNestedTag<isLi>(WTFMove(token));
662 return;
663 }
664 if (token.name() == ddTag || token.name() == dtTag) {
665 processCloseWhenNestedTag<isDdOrDt>(WTFMove(token));
666 return;
667 }
668 if (token.name() == plaintextTag) {
669 processFakePEndTagIfPInButtonScope();
670 m_tree.insertHTMLElement(WTFMove(token));
671 m_parser.tokenizer().setPLAINTEXTState();
672 return;
673 }
674 if (token.name() == buttonTag) {
675 if (m_tree.openElements().inScope(buttonTag)) {
676 parseError(token);
677 processFakeEndTag(buttonTag);
678 processStartTag(WTFMove(token)); // FIXME: Could we just fall through here?
679 return;
680 }
681 m_tree.reconstructTheActiveFormattingElements();
682 m_tree.insertHTMLElement(WTFMove(token));
683 m_framesetOk = false;
684 return;
685 }
686 if (token.name() == aTag) {
687 RefPtr<Element> activeATag = m_tree.activeFormattingElements().closestElementInScopeWithName(aTag->localName());
688 if (activeATag) {
689 parseError(token);
690 processFakeEndTag(aTag);
691 m_tree.activeFormattingElements().remove(*activeATag);
692 if (m_tree.openElements().contains(*activeATag))
693 m_tree.openElements().remove(*activeATag);
694 }
695 m_tree.reconstructTheActiveFormattingElements();
696 m_tree.insertFormattingElement(WTFMove(token));
697 return;
698 }
699 if (isNonAnchorNonNobrFormattingTag(token.name())) {
700 m_tree.reconstructTheActiveFormattingElements();
701 m_tree.insertFormattingElement(WTFMove(token));
702 return;
703 }
704 if (token.name() == nobrTag) {
705 m_tree.reconstructTheActiveFormattingElements();
706 if (m_tree.openElements().inScope(nobrTag)) {
707 parseError(token);
708 processFakeEndTag(nobrTag);
709 m_tree.reconstructTheActiveFormattingElements();
710 }
711 m_tree.insertFormattingElement(WTFMove(token));
712 return;
713 }
714 if (token.name() == appletTag || token.name() == embedTag || token.name() == objectTag) {
715 if (!pluginContentIsAllowed(m_tree.parserContentPolicy()))
716 return;
717 }
718 if (token.name() == appletTag || token.name() == marqueeTag || token.name() == objectTag) {
719 m_tree.reconstructTheActiveFormattingElements();
720 m_tree.insertHTMLElement(WTFMove(token));
721 m_tree.activeFormattingElements().appendMarker();
722 m_framesetOk = false;
723 return;
724 }
725 if (token.name() == tableTag) {
726 if (!m_tree.inQuirksMode() && m_tree.openElements().inButtonScope(pTag))
727 processFakeEndTag(pTag);
728 m_tree.insertHTMLElement(WTFMove(token));
729 m_framesetOk = false;
730 m_insertionMode = InsertionMode::InTable;
731 return;
732 }
733 if (token.name() == imageTag) {
734 parseError(token);
735 // Apparently we're not supposed to ask.
736 token.setName(imgTag->localName());
737 // Note the fall through to the imgTag handling below!
738 }
739 if (token.name() == areaTag
740 || token.name() == brTag
741 || token.name() == embedTag
742 || token.name() == imgTag
743 || token.name() == keygenTag
744 || token.name() == wbrTag) {
745 m_tree.reconstructTheActiveFormattingElements();
746 m_tree.insertSelfClosingHTMLElement(WTFMove(token));
747 m_framesetOk = false;
748 return;
749 }
750 if (token.name() == inputTag) {
751 m_tree.reconstructTheActiveFormattingElements();
752 auto* typeAttribute = findAttribute(token.attributes(), typeAttr);
753 bool shouldClearFramesetOK = !typeAttribute || !equalLettersIgnoringASCIICase(typeAttribute->value(), "hidden");
754 m_tree.insertSelfClosingHTMLElement(WTFMove(token));
755 if (shouldClearFramesetOK)
756 m_framesetOk = false;
757 return;
758 }
759 if (token.name() == paramTag || token.name() == sourceTag || token.name() == trackTag) {
760 m_tree.insertSelfClosingHTMLElement(WTFMove(token));
761 return;
762 }
763 if (token.name() == hrTag) {
764 processFakePEndTagIfPInButtonScope();
765 m_tree.insertSelfClosingHTMLElement(WTFMove(token));
766 m_framesetOk = false;
767 return;
768 }
769 if (token.name() == textareaTag) {
770 m_tree.insertHTMLElement(WTFMove(token));
771 m_shouldSkipLeadingNewline = true;
772 m_parser.tokenizer().setRCDATAState();
773 m_originalInsertionMode = m_insertionMode;
774 m_framesetOk = false;
775 m_insertionMode = InsertionMode::Text;
776 return;
777 }
778 if (token.name() == xmpTag) {
779 processFakePEndTagIfPInButtonScope();
780 m_tree.reconstructTheActiveFormattingElements();
781 m_framesetOk = false;
782 processGenericRawTextStartTag(WTFMove(token));
783 return;
784 }
785 if (token.name() == iframeTag) {
786 m_framesetOk = false;
787 processGenericRawTextStartTag(WTFMove(token));
788 return;
789 }
790 if (token.name() == noembedTag) {
791 processGenericRawTextStartTag(WTFMove(token));
792 return;
793 }
794 if (token.name() == noscriptTag && m_options.scriptEnabled) {
795 processGenericRawTextStartTag(WTFMove(token));
796 return;
797 }
798 if (token.name() == selectTag) {
799 m_tree.reconstructTheActiveFormattingElements();
800 m_tree.insertHTMLElement(WTFMove(token));
801 m_framesetOk = false;
802 if (m_insertionMode == InsertionMode::InTable
803 || m_insertionMode == InsertionMode::InCaption
804 || m_insertionMode == InsertionMode::InColumnGroup
805 || m_insertionMode == InsertionMode::InTableBody
806 || m_insertionMode == InsertionMode::InRow
807 || m_insertionMode == InsertionMode::InCell)
808 m_insertionMode = InsertionMode::InSelectInTable;
809 else
810 m_insertionMode = InsertionMode::InSelect;
811 return;
812 }
813 if (token.name() == optgroupTag || token.name() == optionTag) {
814 if (is<HTMLOptionElement>(m_tree.currentStackItem().node())) {
815 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag->localName());
816 processEndTag(WTFMove(endOption));
817 }
818 m_tree.reconstructTheActiveFormattingElements();
819 m_tree.insertHTMLElement(WTFMove(token));
820 return;
821 }
822 if (token.name() == rbTag || token.name() == rtcTag) {
823 if (m_tree.openElements().inScope(rubyTag->localName())) {
824 m_tree.generateImpliedEndTags();
825 if (!m_tree.currentStackItem().hasTagName(rubyTag))
826 parseError(token);
827 }
828 m_tree.insertHTMLElement(WTFMove(token));
829 return;
830 }
831 if (token.name() == rtTag || token.name() == rpTag) {
832 if (m_tree.openElements().inScope(rubyTag->localName())) {
833 m_tree.generateImpliedEndTagsWithExclusion(rtcTag->localName());
834 if (!m_tree.currentStackItem().hasTagName(rubyTag) && !m_tree.currentStackItem().hasTagName(rtcTag))
835 parseError(token);
836 }
837 m_tree.insertHTMLElement(WTFMove(token));
838 return;
839 }
840 if (token.name() == MathMLNames::mathTag->localName()) {
841 m_tree.reconstructTheActiveFormattingElements();
842 adjustMathMLAttributes(token);
843 adjustForeignAttributes(token);
844 m_tree.insertForeignElement(WTFMove(token), MathMLNames::mathmlNamespaceURI);
845 return;
846 }
847 if (token.name() == SVGNames::svgTag->localName()) {
848 m_tree.reconstructTheActiveFormattingElements();
849 adjustSVGAttributes(token);
850 adjustForeignAttributes(token);
851 m_tree.insertForeignElement(WTFMove(token), SVGNames::svgNamespaceURI);
852 return;
853 }
854 if (isCaptionColOrColgroupTag(token.name())
855 || token.name() == frameTag
856 || token.name() == headTag
857 || isTableBodyContextTag(token.name())
858 || isTableCellContextTag(token.name())
859 || token.name() == trTag) {
860 parseError(token);
861 return;
862 }
863 if (token.name() == templateTag) {
864 m_framesetOk = false;
865 processTemplateStartTag(WTFMove(token));
866 return;
867 }
868 m_tree.reconstructTheActiveFormattingElements();
869 insertGenericHTMLElement(WTFMove(token));
870}
871
872inline void HTMLTreeBuilder::insertGenericHTMLElement(AtomicHTMLToken&& token)
873{
874 m_customElementToConstruct = m_tree.insertHTMLElementOrFindCustomElementInterface(WTFMove(token));
875}
876
877void HTMLTreeBuilder::didCreateCustomOrFallbackElement(Ref<Element>&& element, CustomElementConstructionData& data)
878{
879 m_tree.insertCustomElement(WTFMove(element), data.name, WTFMove(data.attributes));
880}
881
882void HTMLTreeBuilder::processTemplateStartTag(AtomicHTMLToken&& token)
883{
884 m_tree.activeFormattingElements().appendMarker();
885 m_tree.insertHTMLElement(WTFMove(token));
886 m_templateInsertionModes.append(InsertionMode::TemplateContents);
887 m_insertionMode = InsertionMode::TemplateContents;
888}
889
890bool HTMLTreeBuilder::processTemplateEndTag(AtomicHTMLToken&& token)
891{
892 ASSERT(token.name() == templateTag->localName());
893 if (!m_tree.openElements().hasTemplateInHTMLScope()) {
894 ASSERT(m_templateInsertionModes.isEmpty() || (m_templateInsertionModes.size() == 1 && m_fragmentContext.contextElement().hasTagName(templateTag)));
895 parseError(token);
896 return false;
897 }
898 m_tree.generateImpliedEndTags();
899 if (!m_tree.currentStackItem().hasTagName(templateTag))
900 parseError(token);
901 m_tree.openElements().popUntilPopped(templateTag);
902 m_tree.activeFormattingElements().clearToLastMarker();
903 m_templateInsertionModes.removeLast();
904 resetInsertionModeAppropriately();
905 return true;
906}
907
908bool HTMLTreeBuilder::processEndOfFileForInTemplateContents(AtomicHTMLToken&& token)
909{
910 AtomicHTMLToken endTemplate(HTMLToken::EndTag, templateTag->localName());
911 if (!processTemplateEndTag(WTFMove(endTemplate)))
912 return false;
913
914 processEndOfFile(WTFMove(token));
915 return true;
916}
917
918bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
919{
920 bool ignoreFakeEndTag = m_tree.currentIsRootNode() || m_tree.currentNode().hasTagName(templateTag);
921
922 if (ignoreFakeEndTag) {
923 ASSERT(isParsingFragmentOrTemplateContents());
924 // FIXME: parse error
925 return false;
926 }
927 m_tree.openElements().pop();
928 m_insertionMode = InsertionMode::InTable;
929 return true;
930}
931
932// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
933void HTMLTreeBuilder::closeTheCell()
934{
935 ASSERT(m_insertionMode == InsertionMode::InCell);
936 if (m_tree.openElements().inTableScope(tdTag)) {
937 ASSERT(!m_tree.openElements().inTableScope(thTag));
938 processFakeEndTag(tdTag);
939 return;
940 }
941 ASSERT(m_tree.openElements().inTableScope(thTag));
942 processFakeEndTag(thTag);
943 ASSERT(m_insertionMode == InsertionMode::InRow);
944}
945
946void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken&& token)
947{
948 ASSERT(token.type() == HTMLToken::StartTag);
949 if (token.name() == captionTag) {
950 m_tree.openElements().popUntilTableScopeMarker();
951 m_tree.activeFormattingElements().appendMarker();
952 m_tree.insertHTMLElement(WTFMove(token));
953 m_insertionMode = InsertionMode::InCaption;
954 return;
955 }
956 if (token.name() == colgroupTag) {
957 m_tree.openElements().popUntilTableScopeMarker();
958 m_tree.insertHTMLElement(WTFMove(token));
959 m_insertionMode = InsertionMode::InColumnGroup;
960 return;
961 }
962 if (token.name() == colTag) {
963 processFakeStartTag(colgroupTag);
964 ASSERT(m_insertionMode == InsertionMode::InColumnGroup);
965 processStartTag(WTFMove(token));
966 return;
967 }
968 if (isTableBodyContextTag(token.name())) {
969 m_tree.openElements().popUntilTableScopeMarker();
970 m_tree.insertHTMLElement(WTFMove(token));
971 m_insertionMode = InsertionMode::InTableBody;
972 return;
973 }
974 if (isTableCellContextTag(token.name()) || token.name() == trTag) {
975 processFakeStartTag(tbodyTag);
976 ASSERT(m_insertionMode == InsertionMode::InTableBody);
977 processStartTag(WTFMove(token));
978 return;
979 }
980 if (token.name() == tableTag) {
981 parseError(token);
982 if (!processTableEndTagForInTable()) {
983 ASSERT(isParsingFragmentOrTemplateContents());
984 return;
985 }
986 processStartTag(WTFMove(token));
987 return;
988 }
989 if (token.name() == styleTag || token.name() == scriptTag) {
990 processStartTagForInHead(WTFMove(token));
991 return;
992 }
993 if (token.name() == inputTag) {
994 auto* typeAttribute = findAttribute(token.attributes(), typeAttr);
995 if (typeAttribute && equalLettersIgnoringASCIICase(typeAttribute->value(), "hidden")) {
996 parseError(token);
997 m_tree.insertSelfClosingHTMLElement(WTFMove(token));
998 return;
999 }
1000 // Fall through to "anything else" case.
1001 }
1002 if (token.name() == formTag) {
1003 parseError(token);
1004 if (m_tree.form() && !isParsingTemplateContents())
1005 return;
1006 m_tree.insertHTMLFormElement(WTFMove(token), true);
1007 m_tree.openElements().pop();
1008 return;
1009 }
1010 if (token.name() == templateTag) {
1011 processTemplateStartTag(WTFMove(token));
1012 return;
1013 }
1014 parseError(token);
1015 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1016 processStartTagForInBody(WTFMove(token));
1017}
1018
1019void HTMLTreeBuilder::processStartTag(AtomicHTMLToken&& token)
1020{
1021 ASSERT(token.type() == HTMLToken::StartTag);
1022 switch (m_insertionMode) {
1023 case InsertionMode::Initial:
1024 defaultForInitial();
1025 ASSERT(m_insertionMode == InsertionMode::BeforeHTML);
1026 FALLTHROUGH;
1027 case InsertionMode::BeforeHTML:
1028 if (token.name() == htmlTag) {
1029 m_tree.insertHTMLHtmlStartTagBeforeHTML(WTFMove(token));
1030 m_insertionMode = InsertionMode::BeforeHead;
1031 return;
1032 }
1033 defaultForBeforeHTML();
1034 ASSERT(m_insertionMode == InsertionMode::BeforeHead);
1035 FALLTHROUGH;
1036 case InsertionMode::BeforeHead:
1037 if (token.name() == htmlTag) {
1038 processHtmlStartTagForInBody(WTFMove(token));
1039 return;
1040 }
1041 if (token.name() == headTag) {
1042 m_tree.insertHTMLHeadElement(WTFMove(token));
1043 m_insertionMode = InsertionMode::InHead;
1044 return;
1045 }
1046 defaultForBeforeHead();
1047 ASSERT(m_insertionMode == InsertionMode::InHead);
1048 FALLTHROUGH;
1049 case InsertionMode::InHead:
1050 if (processStartTagForInHead(WTFMove(token)))
1051 return;
1052 defaultForInHead();
1053 ASSERT(m_insertionMode == InsertionMode::AfterHead);
1054 FALLTHROUGH;
1055 case InsertionMode::AfterHead:
1056 if (token.name() == htmlTag) {
1057 processHtmlStartTagForInBody(WTFMove(token));
1058 return;
1059 }
1060 if (token.name() == bodyTag) {
1061 m_framesetOk = false;
1062 m_tree.insertHTMLBodyElement(WTFMove(token));
1063 m_insertionMode = InsertionMode::InBody;
1064 return;
1065 }
1066 if (token.name() == framesetTag) {
1067 m_tree.insertHTMLElement(WTFMove(token));
1068 m_insertionMode = InsertionMode::InFrameset;
1069 return;
1070 }
1071 if (token.name() == baseTag
1072 || token.name() == basefontTag
1073 || token.name() == bgsoundTag
1074 || token.name() == linkTag
1075 || token.name() == metaTag
1076 || token.name() == noframesTag
1077 || token.name() == scriptTag
1078 || token.name() == styleTag
1079 || token.name() == templateTag
1080 || token.name() == titleTag) {
1081 parseError(token);
1082 ASSERT(m_tree.headStackItem());
1083 m_tree.openElements().pushHTMLHeadElement(*m_tree.headStackItem());
1084 processStartTagForInHead(WTFMove(token));
1085 m_tree.openElements().removeHTMLHeadElement(m_tree.head());
1086 return;
1087 }
1088 if (token.name() == headTag) {
1089 parseError(token);
1090 return;
1091 }
1092 defaultForAfterHead();
1093 ASSERT(m_insertionMode == InsertionMode::InBody);
1094 FALLTHROUGH;
1095 case InsertionMode::InBody:
1096 processStartTagForInBody(WTFMove(token));
1097 break;
1098 case InsertionMode::InTable:
1099 processStartTagForInTable(WTFMove(token));
1100 break;
1101 case InsertionMode::InCaption:
1102 if (isCaptionColOrColgroupTag(token.name())
1103 || isTableBodyContextTag(token.name())
1104 || isTableCellContextTag(token.name())
1105 || token.name() == trTag) {
1106 parseError(token);
1107 if (!processCaptionEndTagForInCaption()) {
1108 ASSERT(isParsingFragment());
1109 return;
1110 }
1111 processStartTag(WTFMove(token));
1112 return;
1113 }
1114 processStartTagForInBody(WTFMove(token));
1115 break;
1116 case InsertionMode::InColumnGroup:
1117 if (token.name() == htmlTag) {
1118 processHtmlStartTagForInBody(WTFMove(token));
1119 return;
1120 }
1121 if (token.name() == colTag) {
1122 m_tree.insertSelfClosingHTMLElement(WTFMove(token));
1123 return;
1124 }
1125 if (token.name() == templateTag) {
1126 processTemplateStartTag(WTFMove(token));
1127 return;
1128 }
1129 if (!processColgroupEndTagForInColumnGroup()) {
1130 ASSERT(isParsingFragmentOrTemplateContents());
1131 return;
1132 }
1133 processStartTag(WTFMove(token));
1134 break;
1135 case InsertionMode::InTableBody:
1136 if (token.name() == trTag) {
1137 m_tree.openElements().popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1138 m_tree.insertHTMLElement(WTFMove(token));
1139 m_insertionMode = InsertionMode::InRow;
1140 return;
1141 }
1142 if (isTableCellContextTag(token.name())) {
1143 parseError(token);
1144 processFakeStartTag(trTag);
1145 ASSERT(m_insertionMode == InsertionMode::InRow);
1146 processStartTag(WTFMove(token));
1147 return;
1148 }
1149 if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
1150 // FIXME: This is slow.
1151 if (!m_tree.openElements().inTableScope(tbodyTag) && !m_tree.openElements().inTableScope(theadTag) && !m_tree.openElements().inTableScope(tfootTag)) {
1152 ASSERT(isParsingFragmentOrTemplateContents());
1153 parseError(token);
1154 return;
1155 }
1156 m_tree.openElements().popUntilTableBodyScopeMarker();
1157 ASSERT(isTableBodyContextTag(m_tree.currentStackItem().localName()));
1158 processFakeEndTag(m_tree.currentStackItem().localName());
1159 processStartTag(WTFMove(token));
1160 return;
1161 }
1162 processStartTagForInTable(WTFMove(token));
1163 break;
1164 case InsertionMode::InRow:
1165 if (isTableCellContextTag(token.name())) {
1166 m_tree.openElements().popUntilTableRowScopeMarker();
1167 m_tree.insertHTMLElement(WTFMove(token));
1168 m_insertionMode = InsertionMode::InCell;
1169 m_tree.activeFormattingElements().appendMarker();
1170 return;
1171 }
1172 if (token.name() == trTag
1173 || isCaptionColOrColgroupTag(token.name())
1174 || isTableBodyContextTag(token.name())) {
1175 if (!processTrEndTagForInRow()) {
1176 ASSERT(isParsingFragmentOrTemplateContents());
1177 return;
1178 }
1179 ASSERT(m_insertionMode == InsertionMode::InTableBody);
1180 processStartTag(WTFMove(token));
1181 return;
1182 }
1183 processStartTagForInTable(WTFMove(token));
1184 break;
1185 case InsertionMode::InCell:
1186 if (isCaptionColOrColgroupTag(token.name())
1187 || isTableCellContextTag(token.name())
1188 || token.name() == trTag
1189 || isTableBodyContextTag(token.name())) {
1190 // FIXME: This could be more efficient.
1191 if (!m_tree.openElements().inTableScope(tdTag) && !m_tree.openElements().inTableScope(thTag)) {
1192 ASSERT(isParsingFragment());
1193 parseError(token);
1194 return;
1195 }
1196 closeTheCell();
1197 processStartTag(WTFMove(token));
1198 return;
1199 }
1200 processStartTagForInBody(WTFMove(token));
1201 break;
1202 case InsertionMode::AfterBody:
1203 case InsertionMode::AfterAfterBody:
1204 if (token.name() == htmlTag) {
1205 processHtmlStartTagForInBody(WTFMove(token));
1206 return;
1207 }
1208 m_insertionMode = InsertionMode::InBody;
1209 processStartTag(WTFMove(token));
1210 break;
1211 case InsertionMode::InHeadNoscript:
1212 if (token.name() == htmlTag) {
1213 processHtmlStartTagForInBody(WTFMove(token));
1214 return;
1215 }
1216 if (token.name() == basefontTag
1217 || token.name() == bgsoundTag
1218 || token.name() == linkTag
1219 || token.name() == metaTag
1220 || token.name() == noframesTag
1221 || token.name() == styleTag) {
1222 bool didProcess = processStartTagForInHead(WTFMove(token));
1223 ASSERT_UNUSED(didProcess, didProcess);
1224 return;
1225 }
1226 if (token.name() == htmlTag || token.name() == noscriptTag) {
1227 parseError(token);
1228 return;
1229 }
1230 defaultForInHeadNoscript();
1231 processToken(WTFMove(token));
1232 break;
1233 case InsertionMode::InFrameset:
1234 if (token.name() == htmlTag) {
1235 processHtmlStartTagForInBody(WTFMove(token));
1236 return;
1237 }
1238 if (token.name() == framesetTag) {
1239 m_tree.insertHTMLElement(WTFMove(token));
1240 return;
1241 }
1242 if (token.name() == frameTag) {
1243 m_tree.insertSelfClosingHTMLElement(WTFMove(token));
1244 return;
1245 }
1246 if (token.name() == noframesTag) {
1247 processStartTagForInHead(WTFMove(token));
1248 return;
1249 }
1250 parseError(token);
1251 break;
1252 case InsertionMode::AfterFrameset:
1253 case InsertionMode::AfterAfterFrameset:
1254 if (token.name() == htmlTag) {
1255 processHtmlStartTagForInBody(WTFMove(token));
1256 return;
1257 }
1258 if (token.name() == noframesTag) {
1259 processStartTagForInHead(WTFMove(token));
1260 return;
1261 }
1262 parseError(token);
1263 break;
1264 case InsertionMode::InSelectInTable:
1265 if (token.name() == captionTag
1266 || token.name() == tableTag
1267 || isTableBodyContextTag(token.name())
1268 || token.name() == trTag
1269 || isTableCellContextTag(token.name())) {
1270 parseError(token);
1271 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag->localName());
1272 processEndTag(WTFMove(endSelect));
1273 processStartTag(WTFMove(token));
1274 return;
1275 }
1276 FALLTHROUGH;
1277 case InsertionMode::InSelect:
1278 if (token.name() == htmlTag) {
1279 processHtmlStartTagForInBody(WTFMove(token));
1280 return;
1281 }
1282 if (token.name() == optionTag) {
1283 if (is<HTMLOptionElement>(m_tree.currentStackItem().node())) {
1284 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag->localName());
1285 processEndTag(WTFMove(endOption));
1286 }
1287 m_tree.insertHTMLElement(WTFMove(token));
1288 return;
1289 }
1290 if (token.name() == optgroupTag) {
1291 if (is<HTMLOptionElement>(m_tree.currentStackItem().node())) {
1292 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag->localName());
1293 processEndTag(WTFMove(endOption));
1294 }
1295 if (is<HTMLOptGroupElement>(m_tree.currentStackItem().node())) {
1296 AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag->localName());
1297 processEndTag(WTFMove(endOptgroup));
1298 }
1299 m_tree.insertHTMLElement(WTFMove(token));
1300 return;
1301 }
1302 if (token.name() == selectTag) {
1303 parseError(token);
1304 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag->localName());
1305 processEndTag(WTFMove(endSelect));
1306 return;
1307 }
1308 if (token.name() == inputTag || token.name() == keygenTag || token.name() == textareaTag) {
1309 parseError(token);
1310 if (!m_tree.openElements().inSelectScope(selectTag)) {
1311 ASSERT(isParsingFragment());
1312 return;
1313 }
1314 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag->localName());
1315 processEndTag(WTFMove(endSelect));
1316 processStartTag(WTFMove(token));
1317 return;
1318 }
1319 if (token.name() == scriptTag) {
1320 bool didProcess = processStartTagForInHead(WTFMove(token));
1321 ASSERT_UNUSED(didProcess, didProcess);
1322 return;
1323 }
1324 if (token.name() == templateTag) {
1325 processTemplateStartTag(WTFMove(token));
1326 return;
1327 }
1328 break;
1329 case InsertionMode::InTableText:
1330 defaultForInTableText();
1331 processStartTag(WTFMove(token));
1332 break;
1333 case InsertionMode::Text:
1334 ASSERT_NOT_REACHED();
1335 break;
1336 case InsertionMode::TemplateContents:
1337 if (token.name() == templateTag) {
1338 processTemplateStartTag(WTFMove(token));
1339 return;
1340 }
1341
1342 if (token.name() == linkTag
1343 || token.name() == scriptTag
1344 || token.name() == styleTag
1345 || token.name() == metaTag) {
1346 processStartTagForInHead(WTFMove(token));
1347 return;
1348 }
1349
1350 InsertionMode insertionMode = InsertionMode::TemplateContents;
1351 if (token.name() == colTag)
1352 insertionMode = InsertionMode::InColumnGroup;
1353 else if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name()))
1354 insertionMode = InsertionMode::InTable;
1355 else if (token.name() == trTag)
1356 insertionMode = InsertionMode::InTableBody;
1357 else if (isTableCellContextTag(token.name()))
1358 insertionMode = InsertionMode::InRow;
1359 else
1360 insertionMode = InsertionMode::InBody;
1361
1362 ASSERT(insertionMode != InsertionMode::TemplateContents);
1363 ASSERT(m_templateInsertionModes.last() == InsertionMode::TemplateContents);
1364 m_templateInsertionModes.last() = insertionMode;
1365 m_insertionMode = insertionMode;
1366
1367 processStartTag(WTFMove(token));
1368 break;
1369 }
1370}
1371
1372void HTMLTreeBuilder::processHtmlStartTagForInBody(AtomicHTMLToken&& token)
1373{
1374 parseError(token);
1375 if (m_tree.openElements().hasTemplateInHTMLScope()) {
1376 ASSERT(isParsingTemplateContents());
1377 return;
1378 }
1379 m_tree.insertHTMLHtmlStartTagInBody(WTFMove(token));
1380}
1381
1382bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken&& token)
1383{
1384 ASSERT(token.type() == HTMLToken::EndTag);
1385 ASSERT(token.name() == bodyTag);
1386 if (!m_tree.openElements().inScope(bodyTag->localName())) {
1387 parseError(token);
1388 return false;
1389 }
1390 notImplemented(); // Emit a more specific parse error based on stack contents.
1391 m_insertionMode = InsertionMode::AfterBody;
1392 return true;
1393}
1394
1395void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken&& token)
1396{
1397 ASSERT(token.type() == HTMLToken::EndTag);
1398 for (auto* record = &m_tree.openElements().topRecord(); ; record = record->next()) {
1399 HTMLStackItem& item = record->stackItem();
1400 if (item.matchesHTMLTag(token.name())) {
1401 m_tree.generateImpliedEndTagsWithExclusion(token.name());
1402 if (!m_tree.currentStackItem().matchesHTMLTag(token.name()))
1403 parseError(token);
1404 m_tree.openElements().popUntilPopped(item.element());
1405 return;
1406 }
1407 if (isSpecialNode(item)) {
1408 parseError(token);
1409 return;
1410 }
1411 }
1412}
1413
1414// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
1415void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
1416{
1417 // The adoption agency algorithm is N^2. We limit the number of iterations
1418 // to stop from hanging the whole browser. This limit is specified in the
1419 // adoption agency algorithm:
1420 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inbody
1421 static const int outerIterationLimit = 8;
1422 static const int innerIterationLimit = 3;
1423
1424 // 1, 2, 3 and 16 are covered by the for() loop.
1425 for (int i = 0; i < outerIterationLimit; ++i) {
1426 // 4.
1427 RefPtr<Element> formattingElement = m_tree.activeFormattingElements().closestElementInScopeWithName(token.name());
1428 // 4.a
1429 if (!formattingElement)
1430 return processAnyOtherEndTagForInBody(WTFMove(token));
1431 // 4.c
1432 if ((m_tree.openElements().contains(*formattingElement)) && !m_tree.openElements().inScope(*formattingElement)) {
1433 parseError(token);
1434 notImplemented(); // Check the stack of open elements for a more specific parse error.
1435 return;
1436 }
1437 // 4.b
1438 auto* formattingElementRecord = m_tree.openElements().find(*formattingElement);
1439 if (!formattingElementRecord) {
1440 parseError(token);
1441 m_tree.activeFormattingElements().remove(*formattingElement);
1442 return;
1443 }
1444 // 4.d
1445 if (formattingElement != &m_tree.currentElement())
1446 parseError(token);
1447 // 5.
1448 auto* furthestBlock = m_tree.openElements().furthestBlockForFormattingElement(*formattingElement);
1449 // 6.
1450 if (!furthestBlock) {
1451 m_tree.openElements().popUntilPopped(*formattingElement);
1452 m_tree.activeFormattingElements().remove(*formattingElement);
1453 return;
1454 }
1455 // 7.
1456 ASSERT(furthestBlock->isAbove(*formattingElementRecord));
1457 Ref<HTMLStackItem> commonAncestor = formattingElementRecord->next()->stackItem();
1458 // 8.
1459 HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements().bookmarkFor(*formattingElement);
1460 // 9.
1461 auto* node = furthestBlock;
1462 auto* nextNode = node->next();
1463 auto* lastNode = furthestBlock;
1464 // 9.1, 9.2, 9.3 and 9.11 are covered by the for() loop.
1465 for (int i = 0; i < innerIterationLimit; ++i) {
1466 // 9.4
1467 node = nextNode;
1468 ASSERT(node);
1469 nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 9.5.
1470 // 9.5
1471 if (!m_tree.activeFormattingElements().contains(node->element())) {
1472 m_tree.openElements().remove(node->element());
1473 node = 0;
1474 continue;
1475 }
1476 // 9.6
1477 if (node == formattingElementRecord)
1478 break;
1479 // 9.7
1480 auto newItem = m_tree.createElementFromSavedToken(node->stackItem());
1481
1482 HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements().find(node->element());
1483 nodeEntry->replaceElement(newItem.copyRef());
1484 node->replaceElement(WTFMove(newItem));
1485
1486 // 9.8
1487 if (lastNode == furthestBlock)
1488 bookmark.moveToAfter(*nodeEntry);
1489 // 9.9
1490 m_tree.reparent(*node, *lastNode);
1491 // 9.10
1492 lastNode = node;
1493 }
1494 // 10.
1495 m_tree.insertAlreadyParsedChild(commonAncestor.get(), *lastNode);
1496 // 11.
1497 auto newItem = m_tree.createElementFromSavedToken(formattingElementRecord->stackItem());
1498 // 12. & 13.
1499 m_tree.takeAllChildrenAndReparent(newItem, *furthestBlock);
1500 // 14.
1501 m_tree.activeFormattingElements().swapTo(*formattingElement, newItem.copyRef(), bookmark);
1502 // 15.
1503 m_tree.openElements().remove(*formattingElement);
1504 m_tree.openElements().insertAbove(WTFMove(newItem), *furthestBlock);
1505 }
1506}
1507
1508void HTMLTreeBuilder::resetInsertionModeAppropriately()
1509{
1510 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1511 bool last = false;
1512 for (auto* record = &m_tree.openElements().topRecord(); ; record = record->next()) {
1513 RefPtr<HTMLStackItem> item = &record->stackItem();
1514 if (&item->node() == &m_tree.openElements().rootNode()) {
1515 last = true;
1516 bool shouldCreateItem = isParsingFragment();
1517 if (shouldCreateItem)
1518 item = &m_fragmentContext.contextElementStackItem();
1519 }
1520
1521 if (item->hasTagName(templateTag)) {
1522 m_insertionMode = m_templateInsertionModes.last();
1523 return;
1524 }
1525
1526 if (item->hasTagName(selectTag)) {
1527 if (!last) {
1528 while (&item->node() != &m_tree.openElements().rootNode() && !item->hasTagName(templateTag)) {
1529 record = record->next();
1530 item = &record->stackItem();
1531 if (is<HTMLTableElement>(item->node())) {
1532 m_insertionMode = InsertionMode::InSelectInTable;
1533 return;
1534 }
1535 }
1536 }
1537 m_insertionMode = InsertionMode::InSelect;
1538 return;
1539 }
1540 if (item->hasTagName(tdTag) || item->hasTagName(thTag)) {
1541 m_insertionMode = InsertionMode::InCell;
1542 return;
1543 }
1544 if (item->hasTagName(trTag)) {
1545 m_insertionMode = InsertionMode::InRow;
1546 return;
1547 }
1548 if (item->hasTagName(tbodyTag) || item->hasTagName(theadTag) || item->hasTagName(tfootTag)) {
1549 m_insertionMode = InsertionMode::InTableBody;
1550 return;
1551 }
1552 if (item->hasTagName(captionTag)) {
1553 m_insertionMode = InsertionMode::InCaption;
1554 return;
1555 }
1556 if (item->hasTagName(colgroupTag)) {
1557 m_insertionMode = InsertionMode::InColumnGroup;
1558 return;
1559 }
1560 if (is<HTMLTableElement>(item->node())) {
1561 m_insertionMode = InsertionMode::InTable;
1562 return;
1563 }
1564 if (item->hasTagName(headTag)) {
1565 if (!m_fragmentContext.fragment() || &m_fragmentContext.contextElement() != &item->node()) {
1566 m_insertionMode = InsertionMode::InHead;
1567 return;
1568 }
1569 m_insertionMode = InsertionMode::InBody;
1570 return;
1571 }
1572 if (item->hasTagName(bodyTag)) {
1573 m_insertionMode = InsertionMode::InBody;
1574 return;
1575 }
1576 if (item->hasTagName(framesetTag)) {
1577 m_insertionMode = InsertionMode::InFrameset;
1578 return;
1579 }
1580 if (item->hasTagName(htmlTag)) {
1581 if (m_tree.headStackItem()) {
1582 m_insertionMode = InsertionMode::AfterHead;
1583 return;
1584 }
1585 ASSERT(isParsingFragment());
1586 m_insertionMode = InsertionMode::BeforeHead;
1587 return;
1588 }
1589 if (last) {
1590 ASSERT(isParsingFragment());
1591 m_insertionMode = InsertionMode::InBody;
1592 return;
1593 }
1594 }
1595}
1596
1597void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken&& token)
1598{
1599 ASSERT(token.type() == HTMLToken::EndTag);
1600 if (isTableBodyContextTag(token.name())) {
1601 if (!m_tree.openElements().inTableScope(token.name())) {
1602 parseError(token);
1603 return;
1604 }
1605 m_tree.openElements().popUntilTableBodyScopeMarker();
1606 m_tree.openElements().pop();
1607 m_insertionMode = InsertionMode::InTable;
1608 return;
1609 }
1610 if (token.name() == tableTag) {
1611 // FIXME: This is slow.
1612 if (!m_tree.openElements().inTableScope(tbodyTag) && !m_tree.openElements().inTableScope(theadTag) && !m_tree.openElements().inTableScope(tfootTag)) {
1613 ASSERT(isParsingFragmentOrTemplateContents());
1614 parseError(token);
1615 return;
1616 }
1617 m_tree.openElements().popUntilTableBodyScopeMarker();
1618 ASSERT(isTableBodyContextTag(m_tree.currentStackItem().localName()));
1619 processFakeEndTag(m_tree.currentStackItem().localName());
1620 processEndTag(WTFMove(token));
1621 return;
1622 }
1623 if (token.name() == bodyTag
1624 || isCaptionColOrColgroupTag(token.name())
1625 || token.name() == htmlTag
1626 || isTableCellContextTag(token.name())
1627 || token.name() == trTag) {
1628 parseError(token);
1629 return;
1630 }
1631 processEndTagForInTable(WTFMove(token));
1632}
1633
1634void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken&& token)
1635{
1636 ASSERT(token.type() == HTMLToken::EndTag);
1637 if (token.name() == trTag) {
1638 processTrEndTagForInRow();
1639 return;
1640 }
1641 if (token.name() == tableTag) {
1642 if (!processTrEndTagForInRow()) {
1643 ASSERT(isParsingFragmentOrTemplateContents());
1644 return;
1645 }
1646 ASSERT(m_insertionMode == InsertionMode::InTableBody);
1647 processEndTag(WTFMove(token));
1648 return;
1649 }
1650 if (isTableBodyContextTag(token.name())) {
1651 if (!m_tree.openElements().inTableScope(token.name())) {
1652 parseError(token);
1653 return;
1654 }
1655 processFakeEndTag(trTag);
1656 ASSERT(m_insertionMode == InsertionMode::InTableBody);
1657 processEndTag(WTFMove(token));
1658 return;
1659 }
1660 if (token.name() == bodyTag
1661 || isCaptionColOrColgroupTag(token.name())
1662 || token.name() == htmlTag
1663 || isTableCellContextTag(token.name())) {
1664 parseError(token);
1665 return;
1666 }
1667 processEndTagForInTable(WTFMove(token));
1668}
1669
1670void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken&& token)
1671{
1672 ASSERT(token.type() == HTMLToken::EndTag);
1673 if (isTableCellContextTag(token.name())) {
1674 if (!m_tree.openElements().inTableScope(token.name())) {
1675 parseError(token);
1676 return;
1677 }
1678 m_tree.generateImpliedEndTags();
1679 if (!m_tree.currentStackItem().matchesHTMLTag(token.name()))
1680 parseError(token);
1681 m_tree.openElements().popUntilPopped(token.name());
1682 m_tree.activeFormattingElements().clearToLastMarker();
1683 m_insertionMode = InsertionMode::InRow;
1684 return;
1685 }
1686 if (token.name() == bodyTag
1687 || isCaptionColOrColgroupTag(token.name())
1688 || token.name() == htmlTag) {
1689 parseError(token);
1690 return;
1691 }
1692 if (token.name() == tableTag
1693 || token.name() == trTag
1694 || isTableBodyContextTag(token.name())) {
1695 if (!m_tree.openElements().inTableScope(token.name())) {
1696 ASSERT(isTableBodyContextTag(token.name()) || m_tree.openElements().inTableScope(templateTag) || isParsingFragment());
1697 parseError(token);
1698 return;
1699 }
1700 closeTheCell();
1701 processEndTag(WTFMove(token));
1702 return;
1703 }
1704 processEndTagForInBody(WTFMove(token));
1705}
1706
1707void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken&& token)
1708{
1709 ASSERT(token.type() == HTMLToken::EndTag);
1710 if (token.name() == bodyTag) {
1711 processBodyEndTagForInBody(WTFMove(token));
1712 return;
1713 }
1714 if (token.name() == htmlTag) {
1715 AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag->localName());
1716 if (processBodyEndTagForInBody(WTFMove(endBody)))
1717 processEndTag(WTFMove(token));
1718 return;
1719 }
1720 if (token.name() == addressTag
1721 || token.name() == articleTag
1722 || token.name() == asideTag
1723 || token.name() == blockquoteTag
1724 || token.name() == buttonTag
1725 || token.name() == centerTag
1726 || token.name() == detailsTag
1727 || token.name() == dirTag
1728 || token.name() == divTag
1729 || token.name() == dlTag
1730 || token.name() == fieldsetTag
1731 || token.name() == figcaptionTag
1732 || token.name() == figureTag
1733 || token.name() == footerTag
1734 || token.name() == headerTag
1735 || token.name() == hgroupTag
1736 || token.name() == listingTag
1737 || token.name() == mainTag
1738 || token.name() == menuTag
1739 || token.name() == navTag
1740 || token.name() == olTag
1741 || token.name() == preTag
1742 || token.name() == sectionTag
1743 || token.name() == summaryTag
1744 || token.name() == ulTag) {
1745 if (!m_tree.openElements().inScope(token.name())) {
1746 parseError(token);
1747 return;
1748 }
1749 m_tree.generateImpliedEndTags();
1750 if (!m_tree.currentStackItem().matchesHTMLTag(token.name()))
1751 parseError(token);
1752 m_tree.openElements().popUntilPopped(token.name());
1753 return;
1754 }
1755 if (token.name() == formTag) {
1756 if (!isParsingTemplateContents()) {
1757 RefPtr<Element> formElement = m_tree.takeForm();
1758 if (!formElement || !m_tree.openElements().inScope(*formElement)) {
1759 parseError(token);
1760 return;
1761 }
1762 m_tree.generateImpliedEndTags();
1763 if (&m_tree.currentNode() != formElement.get())
1764 parseError(token);
1765 m_tree.openElements().remove(*formElement);
1766 } else {
1767 if (!m_tree.openElements().inScope(token.name())) {
1768 parseError(token);
1769 return;
1770 }
1771 m_tree.generateImpliedEndTags();
1772 if (!m_tree.currentNode().hasTagName(formTag))
1773 parseError(token);
1774 m_tree.openElements().popUntilPopped(token.name());
1775 }
1776 }
1777 if (token.name() == pTag) {
1778 if (!m_tree.openElements().inButtonScope(token.name())) {
1779 parseError(token);
1780 processFakeStartTag(pTag);
1781 ASSERT(m_tree.openElements().inScope(token.name()));
1782 processEndTag(WTFMove(token));
1783 return;
1784 }
1785 m_tree.generateImpliedEndTagsWithExclusion(token.name());
1786 if (!m_tree.currentStackItem().matchesHTMLTag(token.name()))
1787 parseError(token);
1788 m_tree.openElements().popUntilPopped(token.name());
1789 return;
1790 }
1791 if (token.name() == liTag) {
1792 if (!m_tree.openElements().inListItemScope(token.name())) {
1793 parseError(token);
1794 return;
1795 }
1796 m_tree.generateImpliedEndTagsWithExclusion(token.name());
1797 if (!m_tree.currentStackItem().matchesHTMLTag(token.name()))
1798 parseError(token);
1799 m_tree.openElements().popUntilPopped(token.name());
1800 return;
1801 }
1802 if (token.name() == ddTag || token.name() == dtTag) {
1803 if (!m_tree.openElements().inScope(token.name())) {
1804 parseError(token);
1805 return;
1806 }
1807 m_tree.generateImpliedEndTagsWithExclusion(token.name());
1808 if (!m_tree.currentStackItem().matchesHTMLTag(token.name()))
1809 parseError(token);
1810 m_tree.openElements().popUntilPopped(token.name());
1811 return;
1812 }
1813 if (isNumberedHeaderTag(token.name())) {
1814 if (!m_tree.openElements().hasNumberedHeaderElementInScope()) {
1815 parseError(token);
1816 return;
1817 }
1818 m_tree.generateImpliedEndTags();
1819 if (!m_tree.currentStackItem().matchesHTMLTag(token.name()))
1820 parseError(token);
1821 m_tree.openElements().popUntilNumberedHeaderElementPopped();
1822 return;
1823 }
1824 if (HTMLConstructionSite::isFormattingTag(token.name())) {
1825 callTheAdoptionAgency(token);
1826 return;
1827 }
1828 if (token.name() == appletTag || token.name() == marqueeTag || token.name() == objectTag) {
1829 if (!m_tree.openElements().inScope(token.name())) {
1830 parseError(token);
1831 return;
1832 }
1833 m_tree.generateImpliedEndTags();
1834 if (!m_tree.currentStackItem().matchesHTMLTag(token.name()))
1835 parseError(token);
1836 m_tree.openElements().popUntilPopped(token.name());
1837 m_tree.activeFormattingElements().clearToLastMarker();
1838 return;
1839 }
1840 if (token.name() == brTag) {
1841 parseError(token);
1842 processFakeStartTag(brTag);
1843 return;
1844 }
1845 if (token.name() == templateTag) {
1846 processTemplateEndTag(WTFMove(token));
1847 return;
1848 }
1849 processAnyOtherEndTagForInBody(WTFMove(token));
1850}
1851
1852bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
1853{
1854 if (!m_tree.openElements().inTableScope(captionTag->localName())) {
1855 ASSERT(isParsingFragment());
1856 // FIXME: parse error
1857 return false;
1858 }
1859 m_tree.generateImpliedEndTags();
1860 // FIXME: parse error if (!m_tree.currentStackItem().hasTagName(captionTag))
1861 m_tree.openElements().popUntilPopped(captionTag->localName());
1862 m_tree.activeFormattingElements().clearToLastMarker();
1863 m_insertionMode = InsertionMode::InTable;
1864 return true;
1865}
1866
1867bool HTMLTreeBuilder::processTrEndTagForInRow()
1868{
1869 if (!m_tree.openElements().inTableScope(trTag)) {
1870 ASSERT(isParsingFragmentOrTemplateContents());
1871 // FIXME: parse error
1872 return false;
1873 }
1874 m_tree.openElements().popUntilTableRowScopeMarker();
1875 ASSERT(m_tree.currentStackItem().hasTagName(trTag));
1876 m_tree.openElements().pop();
1877 m_insertionMode = InsertionMode::InTableBody;
1878 return true;
1879}
1880
1881bool HTMLTreeBuilder::processTableEndTagForInTable()
1882{
1883 if (!m_tree.openElements().inTableScope(tableTag)) {
1884 ASSERT(isParsingFragmentOrTemplateContents());
1885 // FIXME: parse error.
1886 return false;
1887 }
1888 m_tree.openElements().popUntilPopped(tableTag->localName());
1889 resetInsertionModeAppropriately();
1890 return true;
1891}
1892
1893void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken&& token)
1894{
1895 ASSERT(token.type() == HTMLToken::EndTag);
1896 if (token.name() == tableTag) {
1897 processTableEndTagForInTable();
1898 return;
1899 }
1900 if (token.name() == bodyTag
1901 || isCaptionColOrColgroupTag(token.name())
1902 || token.name() == htmlTag
1903 || isTableBodyContextTag(token.name())
1904 || isTableCellContextTag(token.name())
1905 || token.name() == trTag) {
1906 parseError(token);
1907 return;
1908 }
1909 parseError(token);
1910 // Is this redirection necessary here?
1911 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1912 processEndTagForInBody(WTFMove(token));
1913}
1914
1915void HTMLTreeBuilder::processEndTag(AtomicHTMLToken&& token)
1916{
1917 ASSERT(token.type() == HTMLToken::EndTag);
1918 switch (m_insertionMode) {
1919 case InsertionMode::Initial:
1920 defaultForInitial();
1921 ASSERT(m_insertionMode == InsertionMode::BeforeHTML);
1922 FALLTHROUGH;
1923 case InsertionMode::BeforeHTML:
1924 if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
1925 parseError(token);
1926 return;
1927 }
1928 defaultForBeforeHTML();
1929 ASSERT(m_insertionMode == InsertionMode::BeforeHead);
1930 FALLTHROUGH;
1931 case InsertionMode::BeforeHead:
1932 if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
1933 parseError(token);
1934 return;
1935 }
1936 defaultForBeforeHead();
1937 ASSERT(m_insertionMode == InsertionMode::InHead);
1938 FALLTHROUGH;
1939 case InsertionMode::InHead:
1940 // FIXME: This case should be broken out into processEndTagForInHead,
1941 // because other end tag cases now refer to it ("process the token for using the rules of the "in head" insertion mode").
1942 // but because the logic falls through to InsertionMode::AfterHead, that gets a little messy.
1943 if (token.name() == templateTag) {
1944 processTemplateEndTag(WTFMove(token));
1945 return;
1946 }
1947 if (token.name() == headTag) {
1948 m_tree.openElements().popHTMLHeadElement();
1949 m_insertionMode = InsertionMode::AfterHead;
1950 return;
1951 }
1952 if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
1953 parseError(token);
1954 return;
1955 }
1956 defaultForInHead();
1957 ASSERT(m_insertionMode == InsertionMode::AfterHead);
1958 FALLTHROUGH;
1959 case InsertionMode::AfterHead:
1960 if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
1961 parseError(token);
1962 return;
1963 }
1964 defaultForAfterHead();
1965 ASSERT(m_insertionMode == InsertionMode::InBody);
1966 FALLTHROUGH;
1967 case InsertionMode::InBody:
1968 processEndTagForInBody(WTFMove(token));
1969 break;
1970 case InsertionMode::InTable:
1971 processEndTagForInTable(WTFMove(token));
1972 break;
1973 case InsertionMode::InCaption:
1974 if (token.name() == captionTag) {
1975 processCaptionEndTagForInCaption();
1976 return;
1977 }
1978 if (token.name() == tableTag) {
1979 parseError(token);
1980 if (!processCaptionEndTagForInCaption()) {
1981 ASSERT(isParsingFragment());
1982 return;
1983 }
1984 processEndTag(WTFMove(token));
1985 return;
1986 }
1987 if (token.name() == bodyTag
1988 || token.name() == colTag
1989 || token.name() == colgroupTag
1990 || token.name() == htmlTag
1991 || isTableBodyContextTag(token.name())
1992 || isTableCellContextTag(token.name())
1993 || token.name() == trTag) {
1994 parseError(token);
1995 return;
1996 }
1997 processEndTagForInBody(WTFMove(token));
1998 break;
1999 case InsertionMode::InColumnGroup:
2000 if (token.name() == colgroupTag) {
2001 processColgroupEndTagForInColumnGroup();
2002 return;
2003 }
2004 if (token.name() == colTag) {
2005 parseError(token);
2006 return;
2007 }
2008 if (token.name() == templateTag) {
2009 processTemplateEndTag(WTFMove(token));
2010 return;
2011 }
2012 if (!processColgroupEndTagForInColumnGroup()) {
2013 ASSERT(isParsingFragmentOrTemplateContents());
2014 return;
2015 }
2016 processEndTag(WTFMove(token));
2017 break;
2018 case InsertionMode::InRow:
2019 processEndTagForInRow(WTFMove(token));
2020 break;
2021 case InsertionMode::InCell:
2022 processEndTagForInCell(WTFMove(token));
2023 break;
2024 case InsertionMode::InTableBody:
2025 processEndTagForInTableBody(WTFMove(token));
2026 break;
2027 case InsertionMode::AfterBody:
2028 if (token.name() == htmlTag) {
2029 if (isParsingFragment()) {
2030 parseError(token);
2031 return;
2032 }
2033 m_insertionMode = InsertionMode::AfterAfterBody;
2034 return;
2035 }
2036 FALLTHROUGH;
2037 case InsertionMode::AfterAfterBody:
2038 ASSERT(m_insertionMode == InsertionMode::AfterBody || m_insertionMode == InsertionMode::AfterAfterBody);
2039 parseError(token);
2040 m_insertionMode = InsertionMode::InBody;
2041 processEndTag(WTFMove(token));
2042 break;
2043 case InsertionMode::InHeadNoscript:
2044 if (token.name() == noscriptTag) {
2045 ASSERT(m_tree.currentStackItem().hasTagName(noscriptTag));
2046 m_tree.openElements().pop();
2047 ASSERT(m_tree.currentStackItem().hasTagName(headTag));
2048 m_insertionMode = InsertionMode::InHead;
2049 return;
2050 }
2051 if (token.name() != brTag) {
2052 parseError(token);
2053 return;
2054 }
2055 defaultForInHeadNoscript();
2056 processToken(WTFMove(token));
2057 break;
2058 case InsertionMode::Text:
2059 if (token.name() == scriptTag) {
2060 // Pause ourselves so that parsing stops until the script can be processed by the caller.
2061 ASSERT(m_tree.currentStackItem().hasTagName(scriptTag));
2062 if (scriptingContentIsAllowed(m_tree.parserContentPolicy()))
2063 m_scriptToProcess = &downcast<HTMLScriptElement>(m_tree.currentElement());
2064 m_tree.openElements().pop();
2065 m_insertionMode = m_originalInsertionMode;
2066
2067 // This token will not have been created by the tokenizer if a
2068 // self-closing script tag was encountered and pre-HTML5 parser
2069 // quirks are enabled. We must set the tokenizer's state to
2070 // DataState explicitly if the tokenizer didn't have a chance to.
2071 ASSERT(m_parser.tokenizer().isInDataState() || m_options.usePreHTML5ParserQuirks);
2072 m_parser.tokenizer().setDataState();
2073 return;
2074 }
2075 m_tree.openElements().pop();
2076 m_insertionMode = m_originalInsertionMode;
2077 break;
2078 case InsertionMode::InFrameset:
2079 if (token.name() == framesetTag) {
2080 bool ignoreFramesetForFragmentParsing = m_tree.currentIsRootNode() || m_tree.openElements().hasTemplateInHTMLScope();
2081 if (ignoreFramesetForFragmentParsing) {
2082 ASSERT(isParsingFragmentOrTemplateContents());
2083 parseError(token);
2084 return;
2085 }
2086 m_tree.openElements().pop();
2087 if (!isParsingFragment() && !m_tree.currentStackItem().hasTagName(framesetTag))
2088 m_insertionMode = InsertionMode::AfterFrameset;
2089 return;
2090 }
2091 break;
2092 case InsertionMode::AfterFrameset:
2093 if (token.name() == htmlTag) {
2094 m_insertionMode = InsertionMode::AfterAfterFrameset;
2095 return;
2096 }
2097 FALLTHROUGH;
2098 case InsertionMode::AfterAfterFrameset:
2099 ASSERT(m_insertionMode == InsertionMode::AfterFrameset || m_insertionMode == InsertionMode::AfterAfterFrameset);
2100 parseError(token);
2101 break;
2102 case InsertionMode::InSelectInTable:
2103 if (token.name() == captionTag
2104 || token.name() == tableTag
2105 || isTableBodyContextTag(token.name())
2106 || token.name() == trTag
2107 || isTableCellContextTag(token.name())) {
2108 parseError(token);
2109 if (m_tree.openElements().inTableScope(token.name())) {
2110 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag->localName());
2111 processEndTag(WTFMove(endSelect));
2112 processEndTag(WTFMove(token));
2113 }
2114 return;
2115 }
2116 FALLTHROUGH;
2117 case InsertionMode::InSelect:
2118 ASSERT(m_insertionMode == InsertionMode::InSelect || m_insertionMode == InsertionMode::InSelectInTable);
2119 if (token.name() == optgroupTag) {
2120 if (is<HTMLOptionElement>(m_tree.currentStackItem().node()) && m_tree.oneBelowTop() && is<HTMLOptGroupElement>(m_tree.oneBelowTop()->node()))
2121 processFakeEndTag(optionTag);
2122 if (is<HTMLOptGroupElement>(m_tree.currentStackItem().node())) {
2123 m_tree.openElements().pop();
2124 return;
2125 }
2126 parseError(token);
2127 return;
2128 }
2129 if (token.name() == optionTag) {
2130 if (is<HTMLOptionElement>(m_tree.currentStackItem().node())) {
2131 m_tree.openElements().pop();
2132 return;
2133 }
2134 parseError(token);
2135 return;
2136 }
2137 if (token.name() == selectTag) {
2138 if (!m_tree.openElements().inSelectScope(token.name())) {
2139 ASSERT(isParsingFragment());
2140 parseError(token);
2141 return;
2142 }
2143 m_tree.openElements().popUntilPopped(selectTag->localName());
2144 resetInsertionModeAppropriately();
2145 return;
2146 }
2147 if (token.name() == templateTag) {
2148 processTemplateEndTag(WTFMove(token));
2149 return;
2150 }
2151 break;
2152 case InsertionMode::InTableText:
2153 defaultForInTableText();
2154 processEndTag(WTFMove(token));
2155 break;
2156 case InsertionMode::TemplateContents:
2157 if (token.name() == templateTag) {
2158 processTemplateEndTag(WTFMove(token));
2159 return;
2160 }
2161 break;
2162 }
2163}
2164
2165void HTMLTreeBuilder::processComment(AtomicHTMLToken&& token)
2166{
2167 ASSERT(token.type() == HTMLToken::Comment);
2168 if (m_insertionMode == InsertionMode::Initial
2169 || m_insertionMode == InsertionMode::BeforeHTML
2170 || m_insertionMode == InsertionMode::AfterAfterBody
2171 || m_insertionMode == InsertionMode::AfterAfterFrameset) {
2172 m_tree.insertCommentOnDocument(WTFMove(token));
2173 return;
2174 }
2175 if (m_insertionMode == InsertionMode::AfterBody) {
2176 m_tree.insertCommentOnHTMLHtmlElement(WTFMove(token));
2177 return;
2178 }
2179 if (m_insertionMode == InsertionMode::InTableText) {
2180 defaultForInTableText();
2181 processComment(WTFMove(token));
2182 return;
2183 }
2184 m_tree.insertComment(WTFMove(token));
2185}
2186
2187void HTMLTreeBuilder::processCharacter(AtomicHTMLToken&& token)
2188{
2189 ASSERT(token.type() == HTMLToken::Character);
2190 ExternalCharacterTokenBuffer buffer(token);
2191 processCharacterBuffer(buffer);
2192}
2193
2194#if ENABLE(TELEPHONE_NUMBER_DETECTION) && PLATFORM(IOS_FAMILY)
2195
2196// FIXME: Extract the following iOS-specific code into a separate file.
2197// From the string 4089961010, creates a link of the form <a href="tel:4089961010">4089961010</a> and inserts it.
2198void HTMLTreeBuilder::insertPhoneNumberLink(const String& string)
2199{
2200 Vector<Attribute> attributes;
2201 attributes.append(Attribute(HTMLNames::hrefAttr, makeString("tel:"_s, string)));
2202
2203 const AtomicString& aTagLocalName = aTag->localName();
2204 AtomicHTMLToken aStartToken(HTMLToken::StartTag, aTagLocalName, WTFMove(attributes));
2205 AtomicHTMLToken aEndToken(HTMLToken::EndTag, aTagLocalName);
2206
2207 processStartTag(WTFMove(aStartToken));
2208 m_tree.executeQueuedTasks();
2209 m_tree.insertTextNode(string);
2210 processEndTag(WTFMove(aEndToken));
2211}
2212
2213// Locates the phone numbers in the string and deals with it
2214// 1. Appends the text before the phone number as a text node.
2215// 2. Wraps the phone number in a tel: link.
2216// 3. Goes back to step 1 if a phone number is found in the rest of the string.
2217// 4. Appends the rest of the string as a text node.
2218void HTMLTreeBuilder::linkifyPhoneNumbers(const String& string)
2219{
2220 ASSERT(TelephoneNumberDetector::isSupported());
2221
2222 // relativeStartPosition and relativeEndPosition are the endpoints of the phone number range,
2223 // relative to the scannerPosition
2224 unsigned length = string.length();
2225 unsigned scannerPosition = 0;
2226 int relativeStartPosition = 0;
2227 int relativeEndPosition = 0;
2228
2229 auto characters = StringView(string).upconvertedCharacters();
2230
2231 // While there's a phone number in the rest of the string...
2232 while (scannerPosition < length && TelephoneNumberDetector::find(&characters[scannerPosition], length - scannerPosition, &relativeStartPosition, &relativeEndPosition)) {
2233 // The convention in the Data Detectors framework is that the end position is the first character NOT in the phone number
2234 // (that is, the length of the range is relativeEndPosition - relativeStartPosition). So substract 1 to get the same
2235 // convention as the old WebCore phone number parser (so that the rest of the code is still valid if we want to go back
2236 // to the old parser).
2237 --relativeEndPosition;
2238
2239 ASSERT(scannerPosition + relativeEndPosition < length);
2240
2241 m_tree.insertTextNode(string.substring(scannerPosition, relativeStartPosition));
2242 insertPhoneNumberLink(string.substring(scannerPosition + relativeStartPosition, relativeEndPosition - relativeStartPosition + 1));
2243
2244 scannerPosition += relativeEndPosition + 1;
2245 }
2246
2247 // Append the rest as a text node.
2248 if (scannerPosition > 0) {
2249 if (scannerPosition < length) {
2250 String after = string.substring(scannerPosition, length - scannerPosition);
2251 m_tree.insertTextNode(after);
2252 }
2253 } else
2254 m_tree.insertTextNode(string);
2255}
2256
2257// Looks at the ancestors of the element to determine whether we're inside an element which disallows parsing phone numbers.
2258static inline bool disallowTelephoneNumberParsing(const ContainerNode& node)
2259{
2260 return node.isLink()
2261 || node.hasTagName(scriptTag)
2262 || is<HTMLFormControlElement>(node)
2263 || node.hasTagName(styleTag)
2264 || node.hasTagName(ttTag)
2265 || node.hasTagName(preTag)
2266 || node.hasTagName(codeTag);
2267}
2268
2269static inline bool shouldParseTelephoneNumbersInNode(const ContainerNode& node)
2270{
2271 for (const ContainerNode* ancestor = &node; ancestor; ancestor = ancestor->parentNode()) {
2272 if (disallowTelephoneNumberParsing(*ancestor))
2273 return false;
2274 }
2275 return true;
2276}
2277
2278#endif // ENABLE(TELEPHONE_NUMBER_DETECTION) && PLATFORM(IOS_FAMILY)
2279
2280void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
2281{
2282ReprocessBuffer:
2283 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
2284 // Note that this logic is different than the generic \r\n collapsing
2285 // handled in the input stream preprocessor. This logic is here as an
2286 // "authoring convenience" so folks can write:
2287 //
2288 // <pre>
2289 // lorem ipsum
2290 // lorem ipsum
2291 // </pre>
2292 //
2293 // without getting an extra newline at the start of their <pre> element.
2294 if (m_shouldSkipLeadingNewline) {
2295 m_shouldSkipLeadingNewline = false;
2296 buffer.skipAtMostOneLeadingNewline();
2297 if (buffer.isEmpty())
2298 return;
2299 }
2300
2301 switch (m_insertionMode) {
2302 case InsertionMode::Initial:
2303 buffer.skipLeadingWhitespace();
2304 if (buffer.isEmpty())
2305 return;
2306 defaultForInitial();
2307 ASSERT(m_insertionMode == InsertionMode::BeforeHTML);
2308 FALLTHROUGH;
2309 case InsertionMode::BeforeHTML:
2310 buffer.skipLeadingWhitespace();
2311 if (buffer.isEmpty())
2312 return;
2313 defaultForBeforeHTML();
2314 ASSERT(m_insertionMode == InsertionMode::BeforeHead);
2315 FALLTHROUGH;
2316 case InsertionMode::BeforeHead:
2317 buffer.skipLeadingWhitespace();
2318 if (buffer.isEmpty())
2319 return;
2320 defaultForBeforeHead();
2321 ASSERT(m_insertionMode == InsertionMode::InHead);
2322 FALLTHROUGH;
2323 case InsertionMode::InHead: {
2324 String leadingWhitespace = buffer.takeLeadingWhitespace();
2325 if (!leadingWhitespace.isEmpty())
2326 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2327 if (buffer.isEmpty())
2328 return;
2329 defaultForInHead();
2330 ASSERT(m_insertionMode == InsertionMode::AfterHead);
2331 FALLTHROUGH;
2332 }
2333 case InsertionMode::AfterHead: {
2334 String leadingWhitespace = buffer.takeLeadingWhitespace();
2335 if (!leadingWhitespace.isEmpty())
2336 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2337 if (buffer.isEmpty())
2338 return;
2339 defaultForAfterHead();
2340 ASSERT(m_insertionMode == InsertionMode::InBody);
2341 FALLTHROUGH;
2342 }
2343 case InsertionMode::InBody:
2344 case InsertionMode::InCaption:
2345 case InsertionMode::InCell:
2346 case InsertionMode::TemplateContents:
2347 processCharacterBufferForInBody(buffer);
2348 break;
2349 case InsertionMode::InTable:
2350 case InsertionMode::InTableBody:
2351 case InsertionMode::InRow:
2352 ASSERT(m_pendingTableCharacters.isEmpty());
2353 if (is<HTMLTableElement>(m_tree.currentStackItem().node())
2354 || m_tree.currentStackItem().hasTagName(HTMLNames::tbodyTag)
2355 || m_tree.currentStackItem().hasTagName(HTMLNames::tfootTag)
2356 || m_tree.currentStackItem().hasTagName(HTMLNames::theadTag)
2357 || m_tree.currentStackItem().hasTagName(HTMLNames::trTag)) {
2358
2359 m_originalInsertionMode = m_insertionMode;
2360 m_insertionMode = InsertionMode::InTableText;
2361 // Note that we fall through to the InsertionMode::InTableText case below.
2362 } else {
2363 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2364 processCharacterBufferForInBody(buffer);
2365 break;
2366 }
2367 FALLTHROUGH;
2368 case InsertionMode::InTableText:
2369 buffer.giveRemainingTo(m_pendingTableCharacters);
2370 break;
2371 case InsertionMode::InColumnGroup: {
2372 String leadingWhitespace = buffer.takeLeadingWhitespace();
2373 if (!leadingWhitespace.isEmpty())
2374 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2375 if (buffer.isEmpty())
2376 return;
2377 if (!processColgroupEndTagForInColumnGroup()) {
2378 ASSERT(isParsingFragmentOrTemplateContents());
2379 // The spec tells us to drop these characters on the floor.
2380 buffer.skipLeadingNonWhitespace();
2381 if (buffer.isEmpty())
2382 return;
2383 }
2384 goto ReprocessBuffer;
2385 }
2386 case InsertionMode::AfterBody:
2387 case InsertionMode::AfterAfterBody:
2388 // FIXME: parse error
2389 m_insertionMode = InsertionMode::InBody;
2390 goto ReprocessBuffer;
2391 case InsertionMode::Text:
2392 m_tree.insertTextNode(buffer.takeRemaining());
2393 break;
2394 case InsertionMode::InHeadNoscript: {
2395 String leadingWhitespace = buffer.takeLeadingWhitespace();
2396 if (!leadingWhitespace.isEmpty())
2397 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2398 if (buffer.isEmpty())
2399 return;
2400 defaultForInHeadNoscript();
2401 goto ReprocessBuffer;
2402 }
2403 case InsertionMode::InFrameset:
2404 case InsertionMode::AfterFrameset: {
2405 String leadingWhitespace = buffer.takeRemainingWhitespace();
2406 if (!leadingWhitespace.isEmpty())
2407 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2408 // FIXME: We should generate a parse error if we skipped over any
2409 // non-whitespace characters.
2410 break;
2411 }
2412 case InsertionMode::InSelectInTable:
2413 case InsertionMode::InSelect:
2414 m_tree.insertTextNode(buffer.takeRemaining());
2415 break;
2416 case InsertionMode::AfterAfterFrameset: {
2417 String leadingWhitespace = buffer.takeRemainingWhitespace();
2418 if (!leadingWhitespace.isEmpty()) {
2419 m_tree.reconstructTheActiveFormattingElements();
2420 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2421 }
2422 // FIXME: We should generate a parse error if we skipped over any
2423 // non-whitespace characters.
2424 break;
2425 }
2426 }
2427}
2428
2429void HTMLTreeBuilder::processCharacterBufferForInBody(ExternalCharacterTokenBuffer& buffer)
2430{
2431 m_tree.reconstructTheActiveFormattingElements();
2432 String characters = buffer.takeRemaining();
2433#if ENABLE(TELEPHONE_NUMBER_DETECTION) && PLATFORM(IOS_FAMILY)
2434 if (!isParsingFragment() && m_tree.isTelephoneNumberParsingEnabled() && shouldParseTelephoneNumbersInNode(m_tree.currentNode()) && TelephoneNumberDetector::isSupported())
2435 linkifyPhoneNumbers(characters);
2436 else
2437 m_tree.insertTextNode(characters);
2438#else
2439 m_tree.insertTextNode(characters);
2440#endif
2441 if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2442 m_framesetOk = false;
2443}
2444
2445void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken&& token)
2446{
2447 ASSERT(token.type() == HTMLToken::EndOfFile);
2448 switch (m_insertionMode) {
2449 case InsertionMode::Initial:
2450 defaultForInitial();
2451 ASSERT(m_insertionMode == InsertionMode::BeforeHTML);
2452 FALLTHROUGH;
2453 case InsertionMode::BeforeHTML:
2454 defaultForBeforeHTML();
2455 ASSERT(m_insertionMode == InsertionMode::BeforeHead);
2456 FALLTHROUGH;
2457 case InsertionMode::BeforeHead:
2458 defaultForBeforeHead();
2459 ASSERT(m_insertionMode == InsertionMode::InHead);
2460 FALLTHROUGH;
2461 case InsertionMode::InHead:
2462 defaultForInHead();
2463 ASSERT(m_insertionMode == InsertionMode::AfterHead);
2464 FALLTHROUGH;
2465 case InsertionMode::AfterHead:
2466 defaultForAfterHead();
2467 ASSERT(m_insertionMode == InsertionMode::InBody);
2468 FALLTHROUGH;
2469 case InsertionMode::InBody:
2470 case InsertionMode::InCell:
2471 case InsertionMode::InCaption:
2472 case InsertionMode::InRow:
2473 notImplemented(); // Emit parse error based on what elements are still open.
2474 if (!m_templateInsertionModes.isEmpty()) {
2475 if (processEndOfFileForInTemplateContents(WTFMove(token)))
2476 return;
2477 }
2478 break;
2479 case InsertionMode::AfterBody:
2480 case InsertionMode::AfterAfterBody:
2481 break;
2482 case InsertionMode::InHeadNoscript:
2483 defaultForInHeadNoscript();
2484 processEndOfFile(WTFMove(token));
2485 return;
2486 case InsertionMode::AfterFrameset:
2487 case InsertionMode::AfterAfterFrameset:
2488 break;
2489 case InsertionMode::InColumnGroup:
2490 if (m_tree.currentIsRootNode()) {
2491 ASSERT(isParsingFragment());
2492 return; // FIXME: Should we break here instead of returning?
2493 }
2494 ASSERT(m_tree.currentNode().hasTagName(colgroupTag) || m_tree.currentNode().hasTagName(templateTag));
2495 processColgroupEndTagForInColumnGroup();
2496 FALLTHROUGH;
2497 case InsertionMode::InFrameset:
2498 case InsertionMode::InTable:
2499 case InsertionMode::InTableBody:
2500 case InsertionMode::InSelectInTable:
2501 case InsertionMode::InSelect:
2502 ASSERT(m_insertionMode == InsertionMode::InSelect || m_insertionMode == InsertionMode::InSelectInTable || m_insertionMode == InsertionMode::InTable || m_insertionMode == InsertionMode::InFrameset || m_insertionMode == InsertionMode::InTableBody || m_insertionMode == InsertionMode::InColumnGroup);
2503 if (&m_tree.currentNode() != &m_tree.openElements().rootNode())
2504 parseError(token);
2505 if (!m_templateInsertionModes.isEmpty()) {
2506 if (processEndOfFileForInTemplateContents(WTFMove(token)))
2507 return;
2508 }
2509 break;
2510 case InsertionMode::InTableText:
2511 defaultForInTableText();
2512 processEndOfFile(WTFMove(token));
2513 return;
2514 case InsertionMode::Text:
2515 parseError(token);
2516 if (m_tree.currentStackItem().hasTagName(scriptTag))
2517 notImplemented(); // mark the script element as "already started".
2518 m_tree.openElements().pop();
2519 ASSERT(m_originalInsertionMode != InsertionMode::Text);
2520 m_insertionMode = m_originalInsertionMode;
2521 processEndOfFile(WTFMove(token));
2522 return;
2523 case InsertionMode::TemplateContents:
2524 if (processEndOfFileForInTemplateContents(WTFMove(token)))
2525 return;
2526 break;
2527 }
2528 m_tree.openElements().popAll();
2529}
2530
2531void HTMLTreeBuilder::defaultForInitial()
2532{
2533 notImplemented();
2534 m_tree.setDefaultCompatibilityMode();
2535 // FIXME: parse error
2536 m_insertionMode = InsertionMode::BeforeHTML;
2537}
2538
2539void HTMLTreeBuilder::defaultForBeforeHTML()
2540{
2541 AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag->localName());
2542 m_tree.insertHTMLHtmlStartTagBeforeHTML(WTFMove(startHTML));
2543 m_insertionMode = InsertionMode::BeforeHead;
2544}
2545
2546void HTMLTreeBuilder::defaultForBeforeHead()
2547{
2548 AtomicHTMLToken startHead(HTMLToken::StartTag, headTag->localName());
2549 processStartTag(WTFMove(startHead));
2550}
2551
2552void HTMLTreeBuilder::defaultForInHead()
2553{
2554 AtomicHTMLToken endHead(HTMLToken::EndTag, headTag->localName());
2555 processEndTag(WTFMove(endHead));
2556}
2557
2558void HTMLTreeBuilder::defaultForInHeadNoscript()
2559{
2560 AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag->localName());
2561 processEndTag(WTFMove(endNoscript));
2562}
2563
2564void HTMLTreeBuilder::defaultForAfterHead()
2565{
2566 AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag->localName());
2567 processStartTag(WTFMove(startBody));
2568 m_framesetOk = true;
2569}
2570
2571void HTMLTreeBuilder::defaultForInTableText()
2572{
2573 String characters = m_pendingTableCharacters.toString();
2574 m_pendingTableCharacters.clear();
2575 if (!isAllWhitespace(characters)) {
2576 // FIXME: parse error
2577 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2578 m_tree.reconstructTheActiveFormattingElements();
2579 m_tree.insertTextNode(characters, NotAllWhitespace);
2580 m_framesetOk = false;
2581 m_insertionMode = m_originalInsertionMode;
2582 return;
2583 }
2584 m_tree.insertTextNode(characters);
2585 m_insertionMode = m_originalInsertionMode;
2586}
2587
2588bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken&& token)
2589{
2590 ASSERT(token.type() == HTMLToken::StartTag);
2591 if (token.name() == htmlTag) {
2592 processHtmlStartTagForInBody(WTFMove(token));
2593 return true;
2594 }
2595 if (token.name() == baseTag
2596 || token.name() == basefontTag
2597 || token.name() == bgsoundTag
2598 || token.name() == commandTag
2599 || token.name() == linkTag
2600 || token.name() == metaTag) {
2601 m_tree.insertSelfClosingHTMLElement(WTFMove(token));
2602 // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2603 return true;
2604 }
2605 if (token.name() == titleTag) {
2606 processGenericRCDATAStartTag(WTFMove(token));
2607 return true;
2608 }
2609 if (token.name() == noscriptTag) {
2610 if (m_options.scriptEnabled) {
2611 processGenericRawTextStartTag(WTFMove(token));
2612 return true;
2613 }
2614 m_tree.insertHTMLElement(WTFMove(token));
2615 m_insertionMode = InsertionMode::InHeadNoscript;
2616 return true;
2617 }
2618 if (token.name() == noframesTag || token.name() == styleTag) {
2619 processGenericRawTextStartTag(WTFMove(token));
2620 return true;
2621 }
2622 if (token.name() == scriptTag) {
2623 bool isSelfClosing = token.selfClosing();
2624 processScriptStartTag(WTFMove(token));
2625 if (m_options.usePreHTML5ParserQuirks && isSelfClosing)
2626 processFakeEndTag(scriptTag);
2627 return true;
2628 }
2629 if (token.name() == templateTag) {
2630 m_framesetOk = false;
2631 processTemplateStartTag(WTFMove(token));
2632 return true;
2633 }
2634 if (token.name() == headTag) {
2635 parseError(token);
2636 return true;
2637 }
2638 return false;
2639}
2640
2641void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken&& token)
2642{
2643 ASSERT(token.type() == HTMLToken::StartTag);
2644 m_tree.insertHTMLElement(WTFMove(token));
2645 m_parser.tokenizer().setRCDATAState();
2646 m_originalInsertionMode = m_insertionMode;
2647 m_insertionMode = InsertionMode::Text;
2648}
2649
2650void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken&& token)
2651{
2652 ASSERT(token.type() == HTMLToken::StartTag);
2653 m_tree.insertHTMLElement(WTFMove(token));
2654 m_parser.tokenizer().setRAWTEXTState();
2655 m_originalInsertionMode = m_insertionMode;
2656 m_insertionMode = InsertionMode::Text;
2657}
2658
2659void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken&& token)
2660{
2661 ASSERT(token.type() == HTMLToken::StartTag);
2662 m_tree.insertScriptElement(WTFMove(token));
2663 m_parser.tokenizer().setScriptDataState();
2664 m_originalInsertionMode = m_insertionMode;
2665
2666 TextPosition position = m_parser.textPosition();
2667
2668 m_scriptToProcessStartPosition = position;
2669
2670 m_insertionMode = InsertionMode::Text;
2671}
2672
2673// http://www.whatwg.org/specs/web-apps/current-work/#adjusted-current-node
2674HTMLStackItem& HTMLTreeBuilder::adjustedCurrentStackItem() const
2675{
2676 ASSERT(!m_tree.isEmpty());
2677 if (isParsingFragment() && m_tree.openElements().hasOnlyOneElement())
2678 return m_fragmentContext.contextElementStackItem();
2679
2680 return m_tree.currentStackItem();
2681}
2682
2683// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
2684bool HTMLTreeBuilder::shouldProcessTokenInForeignContent(const AtomicHTMLToken& token)
2685{
2686 if (m_tree.isEmpty())
2687 return false;
2688 HTMLStackItem& adjustedCurrentNode = adjustedCurrentStackItem();
2689 if (isInHTMLNamespace(adjustedCurrentNode))
2690 return false;
2691 if (HTMLElementStack::isMathMLTextIntegrationPoint(adjustedCurrentNode)) {
2692 if (token.type() == HTMLToken::StartTag
2693 && token.name() != MathMLNames::mglyphTag
2694 && token.name() != MathMLNames::malignmarkTag)
2695 return false;
2696 if (token.type() == HTMLToken::Character)
2697 return false;
2698 }
2699 if (adjustedCurrentNode.hasTagName(MathMLNames::annotation_xmlTag)
2700 && token.type() == HTMLToken::StartTag
2701 && token.name() == SVGNames::svgTag)
2702 return false;
2703 if (HTMLElementStack::isHTMLIntegrationPoint(adjustedCurrentNode)) {
2704 if (token.type() == HTMLToken::StartTag)
2705 return false;
2706 if (token.type() == HTMLToken::Character)
2707 return false;
2708 }
2709 if (token.type() == HTMLToken::EndOfFile)
2710 return false;
2711 return true;
2712}
2713
2714static bool hasAttribute(const AtomicHTMLToken& token, const QualifiedName& name)
2715{
2716 return findAttribute(token.attributes(), name);
2717}
2718
2719void HTMLTreeBuilder::processTokenInForeignContent(AtomicHTMLToken&& token)
2720{
2721 HTMLStackItem& adjustedCurrentNode = adjustedCurrentStackItem();
2722
2723 switch (token.type()) {
2724 case HTMLToken::Uninitialized:
2725 ASSERT_NOT_REACHED();
2726 break;
2727 case HTMLToken::DOCTYPE:
2728 parseError(token);
2729 break;
2730 case HTMLToken::StartTag: {
2731 if (token.name() == bTag
2732 || token.name() == bigTag
2733 || token.name() == blockquoteTag
2734 || token.name() == bodyTag
2735 || token.name() == brTag
2736 || token.name() == centerTag
2737 || token.name() == codeTag
2738 || token.name() == ddTag
2739 || token.name() == divTag
2740 || token.name() == dlTag
2741 || token.name() == dtTag
2742 || token.name() == emTag
2743 || token.name() == embedTag
2744 || isNumberedHeaderTag(token.name())
2745 || token.name() == headTag
2746 || token.name() == hrTag
2747 || token.name() == iTag
2748 || token.name() == imgTag
2749 || token.name() == liTag
2750 || token.name() == listingTag
2751 || token.name() == menuTag
2752 || token.name() == metaTag
2753 || token.name() == nobrTag
2754 || token.name() == olTag
2755 || token.name() == pTag
2756 || token.name() == preTag
2757 || token.name() == rubyTag
2758 || token.name() == sTag
2759 || token.name() == smallTag
2760 || token.name() == spanTag
2761 || token.name() == strongTag
2762 || token.name() == strikeTag
2763 || token.name() == subTag
2764 || token.name() == supTag
2765 || token.name() == tableTag
2766 || token.name() == ttTag
2767 || token.name() == uTag
2768 || token.name() == ulTag
2769 || token.name() == varTag
2770 || (token.name() == fontTag && (hasAttribute(token, colorAttr) || hasAttribute(token, faceAttr) || hasAttribute(token, sizeAttr)))) {
2771 parseError(token);
2772 m_tree.openElements().popUntilForeignContentScopeMarker();
2773 processStartTag(WTFMove(token));
2774 return;
2775 }
2776 const AtomicString& currentNamespace = adjustedCurrentNode.namespaceURI();
2777 if (currentNamespace == MathMLNames::mathmlNamespaceURI)
2778 adjustMathMLAttributes(token);
2779 if (currentNamespace == SVGNames::svgNamespaceURI) {
2780 adjustSVGTagNameCase(token);
2781 adjustSVGAttributes(token);
2782 }
2783 adjustForeignAttributes(token);
2784 m_tree.insertForeignElement(WTFMove(token), currentNamespace);
2785 break;
2786 }
2787 case HTMLToken::EndTag: {
2788 if (adjustedCurrentNode.namespaceURI() == SVGNames::svgNamespaceURI)
2789 adjustSVGTagNameCase(token);
2790
2791 if (token.name() == SVGNames::scriptTag && m_tree.currentStackItem().hasTagName(SVGNames::scriptTag)) {
2792 if (scriptingContentIsAllowed(m_tree.parserContentPolicy()))
2793 m_scriptToProcess = &downcast<SVGScriptElement>(m_tree.currentElement());
2794 m_tree.openElements().pop();
2795 return;
2796 }
2797 if (!isInHTMLNamespace(m_tree.currentStackItem())) {
2798 // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2799 auto* nodeRecord = &m_tree.openElements().topRecord();
2800 if (nodeRecord->stackItem().localName() != token.name())
2801 parseError(token);
2802 while (1) {
2803 if (nodeRecord->stackItem().localName() == token.name()) {
2804 m_tree.openElements().popUntilPopped(nodeRecord->element());
2805 return;
2806 }
2807 nodeRecord = nodeRecord->next();
2808
2809 if (isInHTMLNamespace(nodeRecord->stackItem()))
2810 break;
2811 }
2812 }
2813 // Otherwise, process the token according to the rules given in the section corresponding to the current insertion mode in HTML content.
2814 processEndTag(WTFMove(token));
2815 break;
2816 }
2817 case HTMLToken::Comment:
2818 m_tree.insertComment(WTFMove(token));
2819 return;
2820 case HTMLToken::Character: {
2821 String characters = String(token.characters(), token.charactersLength());
2822 m_tree.insertTextNode(characters);
2823 if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2824 m_framesetOk = false;
2825 break;
2826 }
2827 case HTMLToken::EndOfFile:
2828 ASSERT_NOT_REACHED();
2829 break;
2830 }
2831}
2832
2833void HTMLTreeBuilder::finished()
2834{
2835 ASSERT(!m_destroyed);
2836
2837 if (isParsingFragment())
2838 return;
2839
2840 ASSERT(m_templateInsertionModes.isEmpty());
2841
2842 m_tree.finishedParsing();
2843 // The tree builder might have been destroyed as an indirect result of finishing the parsing.
2844}
2845
2846inline void HTMLTreeBuilder::parseError(const AtomicHTMLToken&)
2847{
2848}
2849
2850}
2851