1/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011-2017 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#pragma once
28
29#include "HTMLConstructionSite.h"
30#include "HTMLParserOptions.h"
31#include <wtf/text/StringBuilder.h>
32#include <wtf/text/TextPosition.h>
33
34namespace WebCore {
35
36class JSCustomElementInterface;
37class HTMLDocumentParser;
38class ScriptElement;
39
40struct CustomElementConstructionData {
41 WTF_MAKE_STRUCT_FAST_ALLOCATED;
42
43 CustomElementConstructionData(Ref<JSCustomElementInterface>&&, const AtomicString& name, Vector<Attribute>&&);
44 ~CustomElementConstructionData();
45
46 Ref<JSCustomElementInterface> elementInterface;
47 AtomicString name;
48 Vector<Attribute> attributes;
49};
50
51class HTMLTreeBuilder {
52 WTF_MAKE_FAST_ALLOCATED;
53public:
54 HTMLTreeBuilder(HTMLDocumentParser&, HTMLDocument&, ParserContentPolicy, const HTMLParserOptions&);
55 HTMLTreeBuilder(HTMLDocumentParser&, DocumentFragment&, Element& contextElement, ParserContentPolicy, const HTMLParserOptions&);
56 void setShouldSkipLeadingNewline(bool);
57
58 ~HTMLTreeBuilder();
59
60 bool isParsingFragment() const;
61
62 void constructTree(AtomicHTMLToken&&);
63
64 bool isParsingTemplateContents() const;
65 bool hasParserBlockingScriptWork() const;
66
67 // Must be called to take the parser-blocking script before calling the parser again.
68 RefPtr<ScriptElement> takeScriptToProcess(TextPosition& scriptStartPosition);
69
70 std::unique_ptr<CustomElementConstructionData> takeCustomElementConstructionData() { return WTFMove(m_customElementToConstruct); }
71 void didCreateCustomOrFallbackElement(Ref<Element>&&, CustomElementConstructionData&);
72
73 // Done, close any open tags, etc.
74 void finished();
75
76private:
77 class ExternalCharacterTokenBuffer;
78
79 // Represents HTML5 "insertion mode"
80 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
81 enum class InsertionMode {
82 Initial,
83 BeforeHTML,
84 BeforeHead,
85 InHead,
86 InHeadNoscript,
87 AfterHead,
88 TemplateContents,
89 InBody,
90 Text,
91 InTable,
92 InTableText,
93 InCaption,
94 InColumnGroup,
95 InTableBody,
96 InRow,
97 InCell,
98 InSelect,
99 InSelectInTable,
100 AfterBody,
101 InFrameset,
102 AfterFrameset,
103 AfterAfterBody,
104 AfterAfterFrameset,
105 };
106
107 bool isParsingFragmentOrTemplateContents() const;
108
109#if ENABLE(TELEPHONE_NUMBER_DETECTION) && PLATFORM(IOS_FAMILY)
110 void insertPhoneNumberLink(const String&);
111 void linkifyPhoneNumbers(const String&);
112#endif
113
114 void processToken(AtomicHTMLToken&&);
115
116 void processDoctypeToken(AtomicHTMLToken&&);
117 void processStartTag(AtomicHTMLToken&&);
118 void processEndTag(AtomicHTMLToken&&);
119 void processComment(AtomicHTMLToken&&);
120 void processCharacter(AtomicHTMLToken&&);
121 void processEndOfFile(AtomicHTMLToken&&);
122
123 bool processStartTagForInHead(AtomicHTMLToken&&);
124 void processStartTagForInBody(AtomicHTMLToken&&);
125 void processStartTagForInTable(AtomicHTMLToken&&);
126 void processEndTagForInBody(AtomicHTMLToken&&);
127 void processEndTagForInTable(AtomicHTMLToken&&);
128 void processEndTagForInTableBody(AtomicHTMLToken&&);
129 void processEndTagForInRow(AtomicHTMLToken&&);
130 void processEndTagForInCell(AtomicHTMLToken&&);
131
132 void processHtmlStartTagForInBody(AtomicHTMLToken&&);
133 bool processBodyEndTagForInBody(AtomicHTMLToken&&);
134 bool processTableEndTagForInTable();
135 bool processCaptionEndTagForInCaption();
136 bool processColgroupEndTagForInColumnGroup();
137 bool processTrEndTagForInRow();
138
139 void processAnyOtherEndTagForInBody(AtomicHTMLToken&&);
140
141 void processCharacterBuffer(ExternalCharacterTokenBuffer&);
142 inline void processCharacterBufferForInBody(ExternalCharacterTokenBuffer&);
143
144 void processFakeStartTag(const QualifiedName&, Vector<Attribute>&& attributes = Vector<Attribute>());
145 void processFakeEndTag(const QualifiedName&);
146 void processFakeEndTag(const AtomicString&);
147 void processFakeCharacters(const String&);
148 void processFakePEndTagIfPInButtonScope();
149
150 void processGenericRCDATAStartTag(AtomicHTMLToken&&);
151 void processGenericRawTextStartTag(AtomicHTMLToken&&);
152 void processScriptStartTag(AtomicHTMLToken&&);
153
154 // Default processing for the different insertion modes.
155 void defaultForInitial();
156 void defaultForBeforeHTML();
157 void defaultForBeforeHead();
158 void defaultForInHead();
159 void defaultForInHeadNoscript();
160 void defaultForAfterHead();
161 void defaultForInTableText();
162
163 bool shouldProcessTokenInForeignContent(const AtomicHTMLToken&);
164 void processTokenInForeignContent(AtomicHTMLToken&&);
165
166 HTMLStackItem& adjustedCurrentStackItem() const;
167
168 void callTheAdoptionAgency(AtomicHTMLToken&);
169
170 void closeTheCell();
171
172 template <bool shouldClose(const HTMLStackItem&)> void processCloseWhenNestedTag(AtomicHTMLToken&&);
173
174 void parseError(const AtomicHTMLToken&);
175
176 void resetInsertionModeAppropriately();
177
178 void insertGenericHTMLElement(AtomicHTMLToken&&);
179
180 void processTemplateStartTag(AtomicHTMLToken&&);
181 bool processTemplateEndTag(AtomicHTMLToken&&);
182 bool processEndOfFileForInTemplateContents(AtomicHTMLToken&&);
183
184 class FragmentParsingContext {
185 public:
186 FragmentParsingContext();
187 FragmentParsingContext(DocumentFragment&, Element& contextElement);
188
189 DocumentFragment* fragment() const;
190 Element& contextElement() const;
191 HTMLStackItem& contextElementStackItem() const;
192
193 private:
194 DocumentFragment* m_fragment { nullptr };
195 RefPtr<HTMLStackItem> m_contextElementStackItem;
196 };
197
198 HTMLDocumentParser& m_parser;
199 const HTMLParserOptions m_options;
200 const FragmentParsingContext m_fragmentContext;
201
202 HTMLConstructionSite m_tree;
203
204 // https://html.spec.whatwg.org/multipage/syntax.html#the-insertion-mode
205 InsertionMode m_insertionMode { InsertionMode::Initial };
206 InsertionMode m_originalInsertionMode { InsertionMode::Initial };
207 Vector<InsertionMode, 1> m_templateInsertionModes;
208
209 // https://html.spec.whatwg.org/multipage/syntax.html#concept-pending-table-char-tokens
210 StringBuilder m_pendingTableCharacters;
211
212 RefPtr<ScriptElement> m_scriptToProcess; // <script> tag which needs processing before resuming the parser.
213 TextPosition m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing.
214
215 std::unique_ptr<CustomElementConstructionData> m_customElementToConstruct;
216
217 bool m_shouldSkipLeadingNewline { false };
218
219 bool m_framesetOk { true };
220
221#if !ASSERT_DISABLED
222 bool m_destroyed { false };
223 bool m_destructionProhibited { true };
224#endif
225};
226
227inline HTMLTreeBuilder::~HTMLTreeBuilder()
228{
229#if !ASSERT_DISABLED
230 ASSERT(!m_destroyed);
231 ASSERT(!m_destructionProhibited);
232 m_destroyed = true;
233#endif
234}
235
236inline void HTMLTreeBuilder::setShouldSkipLeadingNewline(bool shouldSkip)
237{
238 ASSERT(!m_destroyed);
239 m_shouldSkipLeadingNewline = shouldSkip;
240}
241
242inline bool HTMLTreeBuilder::isParsingFragment() const
243{
244 ASSERT(!m_destroyed);
245 return !!m_fragmentContext.fragment();
246}
247
248inline bool HTMLTreeBuilder::hasParserBlockingScriptWork() const
249{
250 ASSERT(!m_destroyed);
251 ASSERT(!(m_scriptToProcess && m_customElementToConstruct));
252 return m_scriptToProcess || m_customElementToConstruct;
253}
254
255inline DocumentFragment* HTMLTreeBuilder::FragmentParsingContext::fragment() const
256{
257 return m_fragment;
258}
259
260} // namespace WebCore
261