1/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011-2017 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28#include "HTMLTreeBuilder.h"
29
30#include "Comment.h"
31#include "CustomElementRegistry.h"
32#include "DOMWindow.h"
33#include "DocumentFragment.h"
34#include "DocumentType.h"
35#include "Frame.h"
36#include "FrameLoader.h"
37#include "FrameLoaderClient.h"
38#include "HTMLElementFactory.h"
39#include "HTMLFormElement.h"
40#include "HTMLHtmlElement.h"
41#include "HTMLImageElement.h"
42#include "HTMLOptGroupElement.h"
43#include "HTMLOptionElement.h"
44#include "HTMLParserIdioms.h"
45#include "HTMLPictureElement.h"
46#include "HTMLScriptElement.h"
47#include "HTMLTemplateElement.h"
48#include "HTMLUnknownElement.h"
49#include "JSCustomElementInterface.h"
50#include "NotImplemented.h"
51#include "SVGElement.h"
52#include "Text.h"
53
54namespace WebCore {
55
56using namespace HTMLNames;
57
58static inline void setAttributes(Element& element, Vector<Attribute>& attributes, ParserContentPolicy parserContentPolicy)
59{
60 if (!scriptingContentIsAllowed(parserContentPolicy))
61 element.stripScriptingAttributes(attributes);
62 element.parserSetAttributes(attributes);
63}
64
65static inline void setAttributes(Element& element, AtomicHTMLToken& token, ParserContentPolicy parserContentPolicy)
66{
67 setAttributes(element, token.attributes(), parserContentPolicy);
68}
69
70static bool hasImpliedEndTag(const HTMLStackItem& item)
71{
72 return item.hasTagName(ddTag)
73 || item.hasTagName(dtTag)
74 || item.hasTagName(liTag)
75 || is<HTMLOptionElement>(item.node())
76 || is<HTMLOptGroupElement>(item.node())
77 || item.hasTagName(pTag)
78 || item.hasTagName(rbTag)
79 || item.hasTagName(rpTag)
80 || item.hasTagName(rtTag)
81 || item.hasTagName(rtcTag);
82}
83
84static bool shouldUseLengthLimit(const ContainerNode& node)
85{
86 return !node.hasTagName(scriptTag) && !node.hasTagName(styleTag) && !node.hasTagName(SVGNames::scriptTag);
87}
88
89static inline bool causesFosterParenting(const HTMLStackItem& item)
90{
91 return item.hasTagName(HTMLNames::tableTag)
92 || item.hasTagName(HTMLNames::tbodyTag)
93 || item.hasTagName(HTMLNames::tfootTag)
94 || item.hasTagName(HTMLNames::theadTag)
95 || item.hasTagName(HTMLNames::trTag);
96}
97
98static inline bool isAllWhitespace(const String& string)
99{
100 return string.isAllSpecialCharacters<isHTMLSpace>();
101}
102
103static inline void insert(HTMLConstructionSiteTask& task)
104{
105 if (is<HTMLTemplateElement>(*task.parent)) {
106 task.parent = &downcast<HTMLTemplateElement>(*task.parent).content();
107 task.nextChild = nullptr;
108 }
109
110 ASSERT(!task.child->parentNode());
111 if (task.nextChild)
112 task.parent->parserInsertBefore(*task.child, *task.nextChild);
113 else
114 task.parent->parserAppendChild(*task.child);
115}
116
117static inline void executeInsertTask(HTMLConstructionSiteTask& task)
118{
119 ASSERT(task.operation == HTMLConstructionSiteTask::Insert);
120
121 insert(task);
122
123 task.child->beginParsingChildren();
124
125 if (task.selfClosing)
126 task.child->finishParsingChildren();
127}
128
129static inline void executeReparentTask(HTMLConstructionSiteTask& task)
130{
131 ASSERT(task.operation == HTMLConstructionSiteTask::Reparent);
132 ASSERT(!task.nextChild);
133
134 if (auto parent = makeRefPtr(task.child->parentNode()))
135 parent->parserRemoveChild(*task.child);
136
137 if (task.child->parentNode())
138 return;
139
140 task.parent->parserAppendChild(*task.child);
141}
142
143static inline void executeInsertAlreadyParsedChildTask(HTMLConstructionSiteTask& task)
144{
145 ASSERT(task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild);
146
147 if (RefPtr<ContainerNode> parent = task.child->parentNode())
148 parent->parserRemoveChild(*task.child);
149
150 if (task.child->parentNode())
151 return;
152
153 if (task.nextChild && task.nextChild->parentNode() != task.parent)
154 return;
155
156 insert(task);
157}
158
159static inline void executeTakeAllChildrenAndReparentTask(HTMLConstructionSiteTask& task)
160{
161 ASSERT(task.operation == HTMLConstructionSiteTask::TakeAllChildrenAndReparent);
162 ASSERT(!task.nextChild);
163
164 auto furthestBlock = makeRefPtr(task.oldParent());
165 task.parent->takeAllChildrenFrom(furthestBlock.get());
166
167 RELEASE_ASSERT(!task.parent->parentNode());
168 furthestBlock->parserAppendChild(*task.parent);
169}
170
171static inline void executeTask(HTMLConstructionSiteTask& task)
172{
173 switch (task.operation) {
174 case HTMLConstructionSiteTask::Insert:
175 executeInsertTask(task);
176 return;
177 // All the cases below this point are only used by the adoption agency.
178 case HTMLConstructionSiteTask::InsertAlreadyParsedChild:
179 executeInsertAlreadyParsedChildTask(task);
180 return;
181 case HTMLConstructionSiteTask::Reparent:
182 executeReparentTask(task);
183 return;
184 case HTMLConstructionSiteTask::TakeAllChildrenAndReparent:
185 executeTakeAllChildrenAndReparentTask(task);
186 return;
187 }
188 ASSERT_NOT_REACHED();
189}
190
191void HTMLConstructionSite::attachLater(ContainerNode& parent, Ref<Node>&& child, bool selfClosing)
192{
193 ASSERT(scriptingContentIsAllowed(m_parserContentPolicy) || !is<Element>(child) || !isScriptElement(downcast<Element>(child.get())));
194 ASSERT(pluginContentIsAllowed(m_parserContentPolicy) || !child->isPluginElement());
195
196 if (shouldFosterParent()) {
197 fosterParent(WTFMove(child));
198 return;
199 }
200
201 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
202 task.parent = &parent;
203 task.child = WTFMove(child);
204 task.selfClosing = selfClosing;
205
206 // Add as a sibling of the parent if we have reached the maximum depth allowed.
207 if (m_openElements.stackDepth() > m_maximumDOMTreeDepth && task.parent->parentNode())
208 task.parent = task.parent->parentNode();
209
210 ASSERT(task.parent);
211 m_taskQueue.append(WTFMove(task));
212}
213
214void HTMLConstructionSite::executeQueuedTasks()
215{
216 if (m_taskQueue.isEmpty())
217 return;
218
219 // Copy the task queue into a local variable in case executeTask
220 // re-enters the parser.
221 TaskQueue queue = WTFMove(m_taskQueue);
222
223 for (auto& task : queue)
224 executeTask(task);
225
226 // We might be detached now.
227}
228
229HTMLConstructionSite::HTMLConstructionSite(Document& document, ParserContentPolicy parserContentPolicy, unsigned maximumDOMTreeDepth)
230 : m_document(document)
231 , m_attachmentRoot(document)
232 , m_parserContentPolicy(parserContentPolicy)
233 , m_isParsingFragment(false)
234 , m_redirectAttachToFosterParent(false)
235 , m_maximumDOMTreeDepth(maximumDOMTreeDepth)
236 , m_inQuirksMode(document.inQuirksMode())
237{
238 ASSERT(m_document.isHTMLDocument() || m_document.isXHTMLDocument());
239}
240
241HTMLConstructionSite::HTMLConstructionSite(DocumentFragment& fragment, ParserContentPolicy parserContentPolicy, unsigned maximumDOMTreeDepth)
242 : m_document(fragment.document())
243 , m_attachmentRoot(fragment)
244 , m_parserContentPolicy(parserContentPolicy)
245 , m_isParsingFragment(true)
246 , m_redirectAttachToFosterParent(false)
247 , m_maximumDOMTreeDepth(maximumDOMTreeDepth)
248 , m_inQuirksMode(fragment.document().inQuirksMode())
249{
250 ASSERT(m_document.isHTMLDocument() || m_document.isXHTMLDocument());
251}
252
253HTMLConstructionSite::~HTMLConstructionSite() = default;
254
255void HTMLConstructionSite::setForm(HTMLFormElement* form)
256{
257 // This method should only be needed for HTMLTreeBuilder in the fragment case.
258 ASSERT(!m_form);
259 m_form = form;
260}
261
262RefPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
263{
264 return WTFMove(m_form);
265}
266
267void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
268{
269 if (m_isParsingFragment)
270 return;
271
272 if (auto frame = makeRefPtr(m_document.frame()))
273 frame->injectUserScripts(InjectAtDocumentStart);
274}
275
276void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken&& token)
277{
278 auto element = HTMLHtmlElement::create(m_document);
279 setAttributes(element, token, m_parserContentPolicy);
280 attachLater(m_attachmentRoot, element.copyRef());
281 m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element.copyRef(), WTFMove(token)));
282
283 executeQueuedTasks();
284 element->insertedByParser();
285 dispatchDocumentElementAvailableIfNeeded();
286}
287
288void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken&& token, Element& element)
289{
290 if (token.attributes().isEmpty())
291 return;
292
293 for (auto& tokenAttribute : token.attributes()) {
294 if (!element.elementData() || !element.findAttributeByName(tokenAttribute.name()))
295 element.setAttribute(tokenAttribute.name(), tokenAttribute.value());
296 }
297}
298
299void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken&& token)
300{
301 // Fragments do not have a root HTML element, so any additional HTML elements
302 // encountered during fragment parsing should be ignored.
303 if (m_isParsingFragment)
304 return;
305
306 mergeAttributesFromTokenIntoElement(WTFMove(token), m_openElements.htmlElement());
307}
308
309void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken&& token)
310{
311 mergeAttributesFromTokenIntoElement(WTFMove(token), m_openElements.bodyElement());
312}
313
314void HTMLConstructionSite::setDefaultCompatibilityMode()
315{
316 if (m_isParsingFragment)
317 return;
318 if (m_document.isSrcdocDocument())
319 return;
320 setCompatibilityMode(DocumentCompatibilityMode::QuirksMode);
321}
322
323void HTMLConstructionSite::setCompatibilityMode(DocumentCompatibilityMode mode)
324{
325 m_inQuirksMode = (mode == DocumentCompatibilityMode::QuirksMode);
326 m_document.setCompatibilityMode(mode);
327}
328
329void HTMLConstructionSite::setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId)
330{
331 // There are three possible compatibility modes:
332 // Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can
333 // be omitted from numbers.
334 // Limited Quirks - This mode is identical to no-quirks mode except for its treatment of line-height in the inline box model.
335 // No Quirks - no quirks apply. Web pages will obey the specifications to the letter.
336
337 // Check for Quirks Mode.
338 if (name != "html"
339 || startsWithLettersIgnoringASCIICase(publicId, "+//silmaril//dtd html pro v0r11 19970101//")
340 || startsWithLettersIgnoringASCIICase(publicId, "-//advasoft ltd//dtd html 3.0 aswedit + extensions//")
341 || startsWithLettersIgnoringASCIICase(publicId, "-//as//dtd html 3.0 aswedit + extensions//")
342 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html 2.0 level 1//")
343 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html 2.0 level 2//")
344 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html 2.0 strict level 1//")
345 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html 2.0 strict level 2//")
346 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html 2.0 strict//")
347 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html 2.0//")
348 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html 2.1e//")
349 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html 3.0//")
350 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html 3.2 final//")
351 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html 3.2//")
352 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html 3//")
353 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html level 0//")
354 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html level 1//")
355 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html level 2//")
356 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html level 3//")
357 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html strict level 0//")
358 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html strict level 1//")
359 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html strict level 2//")
360 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html strict level 3//")
361 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html strict//")
362 || startsWithLettersIgnoringASCIICase(publicId, "-//ietf//dtd html//")
363 || startsWithLettersIgnoringASCIICase(publicId, "-//metrius//dtd metrius presentational//")
364 || startsWithLettersIgnoringASCIICase(publicId, "-//microsoft//dtd internet explorer 2.0 html strict//")
365 || startsWithLettersIgnoringASCIICase(publicId, "-//microsoft//dtd internet explorer 2.0 html//")
366 || startsWithLettersIgnoringASCIICase(publicId, "-//microsoft//dtd internet explorer 2.0 tables//")
367 || startsWithLettersIgnoringASCIICase(publicId, "-//microsoft//dtd internet explorer 3.0 html strict//")
368 || startsWithLettersIgnoringASCIICase(publicId, "-//microsoft//dtd internet explorer 3.0 html//")
369 || startsWithLettersIgnoringASCIICase(publicId, "-//microsoft//dtd internet explorer 3.0 tables//")
370 || startsWithLettersIgnoringASCIICase(publicId, "-//netscape comm. corp.//dtd html//")
371 || startsWithLettersIgnoringASCIICase(publicId, "-//netscape comm. corp.//dtd strict html//")
372 || startsWithLettersIgnoringASCIICase(publicId, "-//o'reilly and associates//dtd html 2.0//")
373 || startsWithLettersIgnoringASCIICase(publicId, "-//o'reilly and associates//dtd html extended 1.0//")
374 || startsWithLettersIgnoringASCIICase(publicId, "-//o'reilly and associates//dtd html extended relaxed 1.0//")
375 || startsWithLettersIgnoringASCIICase(publicId, "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//")
376 || startsWithLettersIgnoringASCIICase(publicId, "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//")
377 || startsWithLettersIgnoringASCIICase(publicId, "-//spyglass//dtd html 2.0 extended//")
378 || startsWithLettersIgnoringASCIICase(publicId, "-//sq//dtd html 2.0 hotmetal + extensions//")
379 || startsWithLettersIgnoringASCIICase(publicId, "-//sun microsystems corp.//dtd hotjava html//")
380 || startsWithLettersIgnoringASCIICase(publicId, "-//sun microsystems corp.//dtd hotjava strict html//")
381 || startsWithLettersIgnoringASCIICase(publicId, "-//w3c//dtd html 3 1995-03-24//")
382 || startsWithLettersIgnoringASCIICase(publicId, "-//w3c//dtd html 3.2 draft//")
383 || startsWithLettersIgnoringASCIICase(publicId, "-//w3c//dtd html 3.2 final//")
384 || startsWithLettersIgnoringASCIICase(publicId, "-//w3c//dtd html 3.2//")
385 || startsWithLettersIgnoringASCIICase(publicId, "-//w3c//dtd html 3.2s draft//")
386 || startsWithLettersIgnoringASCIICase(publicId, "-//w3c//dtd html 4.0 frameset//")
387 || startsWithLettersIgnoringASCIICase(publicId, "-//w3c//dtd html 4.0 transitional//")
388 || startsWithLettersIgnoringASCIICase(publicId, "-//w3c//dtd html experimental 19960712//")
389 || startsWithLettersIgnoringASCIICase(publicId, "-//w3c//dtd html experimental 970421//")
390 || startsWithLettersIgnoringASCIICase(publicId, "-//w3c//dtd w3 html//")
391 || startsWithLettersIgnoringASCIICase(publicId, "-//w3o//dtd w3 html 3.0//")
392 || equalLettersIgnoringASCIICase(publicId, "-//w3o//dtd w3 html strict 3.0//en//")
393 || startsWithLettersIgnoringASCIICase(publicId, "-//webtechs//dtd mozilla html 2.0//")
394 || startsWithLettersIgnoringASCIICase(publicId, "-//webtechs//dtd mozilla html//")
395 || equalLettersIgnoringASCIICase(publicId, "-/w3c/dtd html 4.0 transitional/en")
396 || equalLettersIgnoringASCIICase(publicId, "html")
397 || equalLettersIgnoringASCIICase(systemId, "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
398 || (systemId.isEmpty() && startsWithLettersIgnoringASCIICase(publicId, "-//w3c//dtd html 4.01 frameset//"))
399 || (systemId.isEmpty() && startsWithLettersIgnoringASCIICase(publicId, "-//w3c//dtd html 4.01 transitional//"))) {
400 setCompatibilityMode(DocumentCompatibilityMode::QuirksMode);
401 return;
402 }
403
404 // Check for Limited Quirks Mode.
405 if (startsWithLettersIgnoringASCIICase(publicId, "-//w3c//dtd xhtml 1.0 frameset//")
406 || startsWithLettersIgnoringASCIICase(publicId, "-//w3c//dtd xhtml 1.0 transitional//")
407 || (!systemId.isEmpty() && startsWithLettersIgnoringASCIICase(publicId, "-//w3c//dtd html 4.01 frameset//"))
408 || (!systemId.isEmpty() && startsWithLettersIgnoringASCIICase(publicId, "-//w3c//dtd html 4.01 transitional//"))) {
409 setCompatibilityMode(DocumentCompatibilityMode::LimitedQuirksMode);
410 return;
411 }
412
413 // Otherwise we are No Quirks Mode.
414 setCompatibilityMode(DocumentCompatibilityMode::NoQuirksMode);
415}
416
417void HTMLConstructionSite::finishedParsing()
418{
419 m_document.finishedParsing();
420}
421
422void HTMLConstructionSite::insertDoctype(AtomicHTMLToken&& token)
423{
424 ASSERT(token.type() == HTMLToken::DOCTYPE);
425
426 String publicId = token.publicIdentifier();
427 String systemId = token.systemIdentifier();
428
429 attachLater(m_attachmentRoot, DocumentType::create(m_document, token.name(), publicId, systemId));
430
431 // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
432 // never occurs. However, if we ever chose to support such, this code is subtly wrong,
433 // because context-less fragments can determine their own quirks mode, and thus change
434 // parsing rules (like <p> inside <table>). For now we ASSERT that we never hit this code
435 // in a fragment, as changing the owning document's compatibility mode would be wrong.
436 ASSERT(!m_isParsingFragment);
437 if (m_isParsingFragment)
438 return;
439
440 if (token.forceQuirks())
441 setCompatibilityMode(DocumentCompatibilityMode::QuirksMode);
442 else
443 setCompatibilityModeFromDoctype(token.name(), publicId, systemId);
444}
445
446void HTMLConstructionSite::insertComment(AtomicHTMLToken&& token)
447{
448 ASSERT(token.type() == HTMLToken::Comment);
449 attachLater(currentNode(), Comment::create(ownerDocumentForCurrentNode(), token.comment()));
450}
451
452void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken&& token)
453{
454 ASSERT(token.type() == HTMLToken::Comment);
455 attachLater(m_attachmentRoot, Comment::create(m_document, token.comment()));
456}
457
458void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken&& token)
459{
460 ASSERT(token.type() == HTMLToken::Comment);
461 ContainerNode& parent = m_openElements.rootNode();
462 attachLater(parent, Comment::create(parent.document(), token.comment()));
463}
464
465void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken&& token)
466{
467 ASSERT(!shouldFosterParent());
468 m_head = HTMLStackItem::create(createHTMLElement(token), WTFMove(token));
469 attachLater(currentNode(), m_head->element());
470 m_openElements.pushHTMLHeadElement(*m_head);
471}
472
473void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken&& token)
474{
475 ASSERT(!shouldFosterParent());
476 auto body = createHTMLElement(token);
477 attachLater(currentNode(), body.copyRef());
478 m_openElements.pushHTMLBodyElement(HTMLStackItem::create(WTFMove(body), WTFMove(token)));
479}
480
481void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken&& token, bool isDemoted)
482{
483 auto element = createHTMLElement(token);
484 auto& formElement = downcast<HTMLFormElement>(element.get());
485 // If there is no template element on the stack of open elements, set the
486 // form element pointer to point to the element created.
487 if (!openElements().hasTemplateInHTMLScope())
488 m_form = &formElement;
489 formElement.setDemoted(isDemoted);
490 attachLater(currentNode(), formElement);
491 m_openElements.push(HTMLStackItem::create(formElement, WTFMove(token)));
492}
493
494void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken&& token)
495{
496 auto element = createHTMLElement(token);
497 attachLater(currentNode(), element.copyRef());
498 m_openElements.push(HTMLStackItem::create(WTFMove(element), WTFMove(token)));
499}
500
501std::unique_ptr<CustomElementConstructionData> HTMLConstructionSite::insertHTMLElementOrFindCustomElementInterface(AtomicHTMLToken&& token)
502{
503 JSCustomElementInterface* elementInterface = nullptr;
504 RefPtr<Element> element = createHTMLElementOrFindCustomElementInterface(token, &elementInterface);
505 if (UNLIKELY(elementInterface))
506 return std::make_unique<CustomElementConstructionData>(*elementInterface, token.name(), WTFMove(token.attributes()));
507 attachLater(currentNode(), *element);
508 m_openElements.push(HTMLStackItem::create(element.releaseNonNull(), WTFMove(token)));
509 return nullptr;
510}
511
512void HTMLConstructionSite::insertCustomElement(Ref<Element>&& element, const AtomicString& localName, Vector<Attribute>&& attributes)
513{
514 setAttributes(element, attributes, m_parserContentPolicy);
515 attachLater(currentNode(), element.copyRef());
516 m_openElements.push(HTMLStackItem::create(WTFMove(element), localName, WTFMove(attributes)));
517 executeQueuedTasks();
518}
519
520void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken&& token)
521{
522 ASSERT(token.type() == HTMLToken::StartTag);
523 // Normally HTMLElementStack is responsible for calling finishParsingChildren,
524 // but self-closing elements are never in the element stack so the stack
525 // doesn't get a chance to tell them that we're done parsing their children.
526 attachLater(currentNode(), createHTMLElement(token), true);
527 // FIXME: Do we want to acknowledge the token's self-closing flag?
528 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
529}
530
531void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken&& token)
532{
533 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
534 // Possible active formatting elements include:
535 // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
536 ASSERT(isFormattingTag(token.name()));
537 insertHTMLElement(WTFMove(token));
538 m_activeFormattingElements.append(currentStackItem());
539}
540
541void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken&& token)
542{
543 // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started
544 // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment
545 // For createContextualFragment, the specifications say to mark it parser-inserted and already-started and later unmark them.
546 // However, we short circuit that logic to avoid the subtree traversal to find script elements since scripts can never see
547 // those flags or effects thereof.
548 const bool parserInserted = m_parserContentPolicy != AllowScriptingContentAndDoNotMarkAlreadyStarted;
549 const bool alreadyStarted = m_isParsingFragment && parserInserted;
550 auto element = HTMLScriptElement::create(scriptTag, ownerDocumentForCurrentNode(), parserInserted, alreadyStarted);
551 setAttributes(element, token, m_parserContentPolicy);
552 if (scriptingContentIsAllowed(m_parserContentPolicy))
553 attachLater(currentNode(), element.copyRef());
554 m_openElements.push(HTMLStackItem::create(WTFMove(element), WTFMove(token)));
555}
556
557void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken&& token, const AtomicString& namespaceURI)
558{
559 ASSERT(token.type() == HTMLToken::StartTag);
560 notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
561
562 auto element = createElement(token, namespaceURI);
563 if (scriptingContentIsAllowed(m_parserContentPolicy) || !isScriptElement(element.get()))
564 attachLater(currentNode(), element.copyRef(), token.selfClosing());
565 if (!token.selfClosing())
566 m_openElements.push(HTMLStackItem::create(WTFMove(element), WTFMove(token), namespaceURI));
567}
568
569void HTMLConstructionSite::insertTextNode(const String& characters, WhitespaceMode whitespaceMode)
570{
571 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
572 task.parent = &currentNode();
573
574 if (shouldFosterParent())
575 findFosterSite(task);
576
577 // Strings composed entirely of whitespace are likely to be repeated.
578 // Turn them into AtomicString so we share a single string for each.
579 bool shouldUseAtomicString = whitespaceMode == AllWhitespace || (whitespaceMode == WhitespaceUnknown && isAllWhitespace(characters));
580
581 unsigned currentPosition = 0;
582 unsigned lengthLimit = shouldUseLengthLimit(*task.parent) ? Text::defaultLengthLimit : std::numeric_limits<unsigned>::max();
583
584 // FIXME: Splitting text nodes into smaller chunks contradicts HTML5 spec, but is currently necessary
585 // for performance, see <https://bugs.webkit.org/show_bug.cgi?id=55898>.
586
587 RefPtr<Node> previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild();
588 if (is<Text>(previousChild)) {
589 // FIXME: We're only supposed to append to this text node if it
590 // was the last text node inserted by the parser.
591 currentPosition = downcast<Text>(*previousChild).parserAppendData(characters, 0, lengthLimit);
592 }
593
594 while (currentPosition < characters.length()) {
595 auto textNode = Text::createWithLengthLimit(task.parent->document(), shouldUseAtomicString ? AtomicString(characters).string() : characters, currentPosition, lengthLimit);
596 // If we have a whole string of unbreakable characters the above could lead to an infinite loop. Exceeding the length limit is the lesser evil.
597 if (!textNode->length()) {
598 String substring = characters.substring(currentPosition);
599 textNode = Text::create(task.parent->document(), shouldUseAtomicString ? AtomicString(substring).string() : substring);
600 }
601
602 currentPosition += textNode->length();
603 ASSERT(currentPosition <= characters.length());
604 task.child = WTFMove(textNode);
605
606 executeTask(task);
607 }
608}
609
610void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord& newParent, HTMLElementStack::ElementRecord& child)
611{
612 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
613 task.parent = &newParent.node();
614 task.child = &child.element();
615 m_taskQueue.append(WTFMove(task));
616}
617
618void HTMLConstructionSite::insertAlreadyParsedChild(HTMLStackItem& newParent, HTMLElementStack::ElementRecord& child)
619{
620 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertAlreadyParsedChild);
621 if (causesFosterParenting(newParent)) {
622 findFosterSite(task);
623 ASSERT(task.parent);
624 } else
625 task.parent = &newParent.node();
626 task.child = &child.element();
627 m_taskQueue.append(WTFMove(task));
628}
629
630void HTMLConstructionSite::takeAllChildrenAndReparent(HTMLStackItem& newParent, HTMLElementStack::ElementRecord& oldParent)
631{
632 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::TakeAllChildrenAndReparent);
633 task.parent = &newParent.node();
634 task.child = &oldParent.node();
635 m_taskQueue.append(WTFMove(task));
636}
637
638Ref<Element> HTMLConstructionSite::createElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
639{
640 QualifiedName tagName(nullAtom(), token.name(), namespaceURI);
641 auto element = ownerDocumentForCurrentNode().createElement(tagName, true);
642 setAttributes(element, token, m_parserContentPolicy);
643 return element;
644}
645
646inline Document& HTMLConstructionSite::ownerDocumentForCurrentNode()
647{
648 if (is<HTMLTemplateElement>(currentNode()))
649 return downcast<HTMLTemplateElement>(currentNode()).content().document();
650 return currentNode().document();
651}
652
653static inline JSCustomElementInterface* findCustomElementInterface(Document& ownerDocument, const AtomicString& localName)
654{
655 auto* window = ownerDocument.domWindow();
656 if (!window)
657 return nullptr;
658
659 auto* registry = window->customElementRegistry();
660 if (LIKELY(!registry))
661 return nullptr;
662
663 return registry->findInterface(localName);
664}
665
666RefPtr<Element> HTMLConstructionSite::createHTMLElementOrFindCustomElementInterface(AtomicHTMLToken& token, JSCustomElementInterface** customElementInterface)
667{
668 auto& localName = token.name();
669 // FIXME: This can't use HTMLConstructionSite::createElement because we
670 // have to pass the current form element. We should rework form association
671 // to occur after construction to allow better code sharing here.
672 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#create-an-element-for-the-token
673 Document& ownerDocument = ownerDocumentForCurrentNode();
674 bool insideTemplateElement = !ownerDocument.frame();
675 auto element = HTMLElementFactory::createKnownElement(localName, ownerDocument, insideTemplateElement ? nullptr : form(), true);
676 if (UNLIKELY(!element)) {
677 if (auto* elementInterface = findCustomElementInterface(ownerDocument, localName)) {
678 if (!m_isParsingFragment) {
679 *customElementInterface = elementInterface;
680 return nullptr;
681 }
682 element = HTMLElement::create(QualifiedName { nullAtom(), localName, xhtmlNamespaceURI }, ownerDocument);
683 element->setIsCustomElementUpgradeCandidate();
684 element->enqueueToUpgrade(*elementInterface);
685 } else {
686 QualifiedName qualifiedName { nullAtom(), localName, xhtmlNamespaceURI };
687 if (Document::validateCustomElementName(localName) == CustomElementNameValidationStatus::Valid) {
688 element = HTMLElement::create(qualifiedName, ownerDocument);
689 element->setIsCustomElementUpgradeCandidate();
690 } else
691 element = HTMLUnknownElement::create(qualifiedName, ownerDocument);
692 }
693 }
694 ASSERT(element);
695
696 // FIXME: This is a hack to connect images to pictures before the image has
697 // been inserted into the document. It can be removed once asynchronous image
698 // loading is working.
699 if (is<HTMLPictureElement>(currentNode()) && is<HTMLImageElement>(*element))
700 downcast<HTMLImageElement>(*element).setPictureElement(&downcast<HTMLPictureElement>(currentNode()));
701
702 setAttributes(*element, token, m_parserContentPolicy);
703 ASSERT(element->isHTMLElement());
704 return element;
705}
706
707Ref<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken& token)
708{
709 RefPtr<Element> element = createHTMLElementOrFindCustomElementInterface(token, nullptr);
710 ASSERT(element);
711 return element.releaseNonNull();
712}
713
714Ref<HTMLStackItem> HTMLConstructionSite::createElementFromSavedToken(HTMLStackItem& item)
715{
716 // NOTE: Moving from item -> token -> item copies the Attribute vector twice!
717 AtomicHTMLToken fakeToken(HTMLToken::StartTag, item.localName(), Vector<Attribute>(item.attributes()));
718 ASSERT(item.namespaceURI() == HTMLNames::xhtmlNamespaceURI);
719 ASSERT(isFormattingTag(item.localName()));
720 return HTMLStackItem::create(createHTMLElement(fakeToken), WTFMove(fakeToken), item.namespaceURI());
721}
722
723Optional<unsigned> HTMLConstructionSite::indexOfFirstUnopenFormattingElement() const
724{
725 if (m_activeFormattingElements.isEmpty())
726 return WTF::nullopt;
727 unsigned index = m_activeFormattingElements.size();
728 do {
729 --index;
730 const auto& entry = m_activeFormattingElements.at(index);
731 if (entry.isMarker() || m_openElements.contains(entry.element())) {
732 unsigned firstUnopenElementIndex = index + 1;
733 return firstUnopenElementIndex < m_activeFormattingElements.size() ? firstUnopenElementIndex : Optional<unsigned>(WTF::nullopt);
734 }
735 } while (index);
736
737 return index;
738}
739
740void HTMLConstructionSite::reconstructTheActiveFormattingElements()
741{
742 Optional<unsigned> firstUnopenElementIndex = indexOfFirstUnopenFormattingElement();
743 if (!firstUnopenElementIndex)
744 return;
745
746 ASSERT(firstUnopenElementIndex.value() < m_activeFormattingElements.size());
747 for (unsigned unopenEntryIndex = firstUnopenElementIndex.value(); unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
748 auto& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
749 ASSERT(unopenedEntry.stackItem());
750 auto reconstructed = createElementFromSavedToken(*unopenedEntry.stackItem());
751 attachLater(currentNode(), reconstructed->node());
752 m_openElements.push(reconstructed.copyRef());
753 unopenedEntry.replaceElement(WTFMove(reconstructed));
754 }
755}
756
757void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
758{
759 while (hasImpliedEndTag(currentStackItem()) && !currentStackItem().matchesHTMLTag(tagName))
760 m_openElements.pop();
761}
762
763void HTMLConstructionSite::generateImpliedEndTags()
764{
765 while (hasImpliedEndTag(currentStackItem()))
766 m_openElements.pop();
767}
768
769void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task)
770{
771 // When a node is to be foster parented, the last template element with no table element is below it in the stack of open elements is the foster parent element (NOT the template's parent!)
772 auto* lastTemplateElement = m_openElements.topmost(templateTag->localName());
773 if (lastTemplateElement && !m_openElements.inTableScope(tableTag)) {
774 task.parent = &lastTemplateElement->element();
775 return;
776 }
777
778 if (auto* lastTableElementRecord = m_openElements.topmost(tableTag->localName())) {
779 auto& lastTableElement = lastTableElementRecord->element();
780 auto parent = makeRefPtr(lastTableElement.parentNode());
781 // When parsing HTML fragments, we skip step 4.2 ("Let root be a new html element with no attributes") for efficiency,
782 // and instead use the DocumentFragment as a root node. So we must treat the root node (DocumentFragment) as if it is a html element here.
783 bool parentCanBeFosterParent = parent && (parent->isElementNode() || (m_isParsingFragment && parent == &m_openElements.rootNode()));
784 parentCanBeFosterParent = parentCanBeFosterParent || (is<DocumentFragment>(parent) && downcast<DocumentFragment>(parent.get())->isTemplateContent());
785 if (parentCanBeFosterParent) {
786 task.parent = parent;
787 task.nextChild = &lastTableElement;
788 return;
789 }
790 task.parent = &lastTableElementRecord->next()->element();
791 return;
792 }
793 // Fragment case
794 task.parent = &m_openElements.rootNode(); // DocumentFragment
795}
796
797bool HTMLConstructionSite::shouldFosterParent() const
798{
799 return m_redirectAttachToFosterParent && causesFosterParenting(currentStackItem());
800}
801
802void HTMLConstructionSite::fosterParent(Ref<Node>&& node)
803{
804 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
805 findFosterSite(task);
806 task.child = WTFMove(node);
807 ASSERT(task.parent);
808
809 m_taskQueue.append(WTFMove(task));
810}
811
812}
813