| 1 | /* |
| 2 | * Copyright (C) 2000 Peter Kelly (pmk@post.com) |
| 3 | * Copyright (C) 2005-2017 Apple Inc. All rights reserved. |
| 4 | * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org) |
| 5 | * Copyright (C) 2007 Samuel Weinig (sam@webkit.org) |
| 6 | * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) |
| 7 | * Copyright (C) 2008 Holger Hans Peter Freyther |
| 8 | * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) |
| 9 | * |
| 10 | * This library is free software; you can redistribute it and/or |
| 11 | * modify it under the terms of the GNU Library General Public |
| 12 | * License as published by the Free Software Foundation; either |
| 13 | * version 2 of the License, or (at your option) any later version. |
| 14 | * |
| 15 | * This library is distributed in the hope that it will be useful, |
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 18 | * Library General Public License for more details. |
| 19 | * |
| 20 | * You should have received a copy of the GNU Library General Public License |
| 21 | * along with this library; see the file COPYING.LIB. If not, write to |
| 22 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 23 | * Boston, MA 02110-1301, USA. |
| 24 | */ |
| 25 | |
| 26 | #include "config.h" |
| 27 | #include "XMLDocumentParser.h" |
| 28 | |
| 29 | #include "CDATASection.h" |
| 30 | #include "Comment.h" |
| 31 | #include "Document.h" |
| 32 | #include "DocumentFragment.h" |
| 33 | #include "DocumentType.h" |
| 34 | #include "Frame.h" |
| 35 | #include "FrameLoader.h" |
| 36 | #include "FrameView.h" |
| 37 | #include "HTMLLinkElement.h" |
| 38 | #include "HTMLNames.h" |
| 39 | #include "HTMLStyleElement.h" |
| 40 | #include "ImageLoader.h" |
| 41 | #include "PendingScript.h" |
| 42 | #include "ProcessingInstruction.h" |
| 43 | #include "ResourceError.h" |
| 44 | #include "ResourceRequest.h" |
| 45 | #include "ResourceResponse.h" |
| 46 | #include "SVGNames.h" |
| 47 | #include "SVGStyleElement.h" |
| 48 | #include "ScriptElement.h" |
| 49 | #include "ScriptSourceCode.h" |
| 50 | #include "StyleScope.h" |
| 51 | #include "TextResourceDecoder.h" |
| 52 | #include "TreeDepthLimit.h" |
| 53 | #include <wtf/Ref.h> |
| 54 | #include <wtf/Threading.h> |
| 55 | #include <wtf/Vector.h> |
| 56 | |
| 57 | namespace WebCore { |
| 58 | |
| 59 | using namespace HTMLNames; |
| 60 | |
| 61 | void XMLDocumentParser::pushCurrentNode(ContainerNode* n) |
| 62 | { |
| 63 | ASSERT(n); |
| 64 | ASSERT(m_currentNode); |
| 65 | if (n != document()) |
| 66 | n->ref(); |
| 67 | m_currentNodeStack.append(m_currentNode); |
| 68 | m_currentNode = n; |
| 69 | if (m_currentNodeStack.size() > maxDOMTreeDepth) |
| 70 | handleError(XMLErrors::fatal, "Excessive node nesting." , textPosition()); |
| 71 | } |
| 72 | |
| 73 | void XMLDocumentParser::popCurrentNode() |
| 74 | { |
| 75 | if (!m_currentNode) |
| 76 | return; |
| 77 | ASSERT(m_currentNodeStack.size()); |
| 78 | |
| 79 | if (m_currentNode != document()) |
| 80 | m_currentNode->deref(); |
| 81 | |
| 82 | m_currentNode = m_currentNodeStack.last(); |
| 83 | m_currentNodeStack.removeLast(); |
| 84 | } |
| 85 | |
| 86 | void XMLDocumentParser::clearCurrentNodeStack() |
| 87 | { |
| 88 | if (m_currentNode && m_currentNode != document()) |
| 89 | m_currentNode->deref(); |
| 90 | m_currentNode = nullptr; |
| 91 | m_leafTextNode = nullptr; |
| 92 | |
| 93 | if (m_currentNodeStack.size()) { // Aborted parsing. |
| 94 | for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i) |
| 95 | m_currentNodeStack[i]->deref(); |
| 96 | if (m_currentNodeStack[0] && m_currentNodeStack[0] != document()) |
| 97 | m_currentNodeStack[0]->deref(); |
| 98 | m_currentNodeStack.clear(); |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | void XMLDocumentParser::insert(SegmentedString&&) |
| 103 | { |
| 104 | ASSERT_NOT_REACHED(); |
| 105 | } |
| 106 | |
| 107 | void XMLDocumentParser::append(RefPtr<StringImpl>&& inputSource) |
| 108 | { |
| 109 | String source { WTFMove(inputSource) }; |
| 110 | |
| 111 | if (m_sawXSLTransform || !m_sawFirstElement) |
| 112 | m_originalSourceForTransform.append(source); |
| 113 | |
| 114 | if (isStopped() || m_sawXSLTransform) |
| 115 | return; |
| 116 | |
| 117 | if (m_parserPaused) { |
| 118 | m_pendingSrc.append(source); |
| 119 | return; |
| 120 | } |
| 121 | |
| 122 | doWrite(source); |
| 123 | |
| 124 | // After parsing, dispatch image beforeload events. |
| 125 | ImageLoader::dispatchPendingBeforeLoadEvents(); |
| 126 | } |
| 127 | |
| 128 | void XMLDocumentParser::handleError(XMLErrors::ErrorType type, const char* m, TextPosition position) |
| 129 | { |
| 130 | if (!m_xmlErrors) |
| 131 | m_xmlErrors = std::make_unique<XMLErrors>(*document()); |
| 132 | m_xmlErrors->handleError(type, m, position); |
| 133 | if (type != XMLErrors::warning) |
| 134 | m_sawError = true; |
| 135 | if (type == XMLErrors::fatal) |
| 136 | stopParsing(); |
| 137 | } |
| 138 | |
| 139 | void XMLDocumentParser::createLeafTextNode() |
| 140 | { |
| 141 | if (m_leafTextNode) |
| 142 | return; |
| 143 | |
| 144 | ASSERT(m_bufferedText.size() == 0); |
| 145 | ASSERT(!m_leafTextNode); |
| 146 | m_leafTextNode = Text::create(m_currentNode->document(), "" ); |
| 147 | m_currentNode->parserAppendChild(*m_leafTextNode); |
| 148 | } |
| 149 | |
| 150 | bool XMLDocumentParser::updateLeafTextNode() |
| 151 | { |
| 152 | if (isStopped()) |
| 153 | return false; |
| 154 | |
| 155 | if (!m_leafTextNode) |
| 156 | return true; |
| 157 | |
| 158 | // This operation might fire mutation event, see below. |
| 159 | m_leafTextNode->appendData(String::fromUTF8(reinterpret_cast<const char*>(m_bufferedText.data()), m_bufferedText.size())); |
| 160 | m_bufferedText = { }; |
| 161 | |
| 162 | m_leafTextNode = nullptr; |
| 163 | |
| 164 | // Hence, we need to check again whether the parser is stopped, since mutation |
| 165 | // event handlers executed by appendData might have detached this parser. |
| 166 | return !isStopped(); |
| 167 | } |
| 168 | |
| 169 | void XMLDocumentParser::detach() |
| 170 | { |
| 171 | clearCurrentNodeStack(); |
| 172 | ScriptableDocumentParser::detach(); |
| 173 | } |
| 174 | |
| 175 | void XMLDocumentParser::end() |
| 176 | { |
| 177 | // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called. |
| 178 | // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case. |
| 179 | ASSERT(!m_parsingFragment); |
| 180 | |
| 181 | doEnd(); |
| 182 | |
| 183 | // doEnd() call above can detach the parser and null out its document. |
| 184 | // In that case, we just bail out. |
| 185 | if (isDetached()) |
| 186 | return; |
| 187 | |
| 188 | // doEnd() could process a script tag, thus pausing parsing. |
| 189 | if (m_parserPaused) |
| 190 | return; |
| 191 | |
| 192 | if (m_sawError && !isStopped()) { |
| 193 | insertErrorMessageBlock(); |
| 194 | if (isDetached()) // Inserting an error message may have ran arbitrary scripts. |
| 195 | return; |
| 196 | } else { |
| 197 | updateLeafTextNode(); |
| 198 | document()->styleScope().didChangeStyleSheetEnvironment(); |
| 199 | } |
| 200 | |
| 201 | if (isParsing()) |
| 202 | prepareToStopParsing(); |
| 203 | document()->setReadyState(Document::Interactive); |
| 204 | clearCurrentNodeStack(); |
| 205 | document()->finishedParsing(); |
| 206 | } |
| 207 | |
| 208 | void XMLDocumentParser::finish() |
| 209 | { |
| 210 | // FIXME: We should ASSERT(!m_parserStopped) here, since it does not |
| 211 | // makes sense to call any methods on DocumentParser once it's been stopped. |
| 212 | // However, FrameLoader::stop calls DocumentParser::finish unconditionally. |
| 213 | |
| 214 | Ref<XMLDocumentParser> protectedThis(*this); |
| 215 | |
| 216 | if (m_parserPaused) |
| 217 | m_finishCalled = true; |
| 218 | else |
| 219 | end(); |
| 220 | } |
| 221 | |
| 222 | void XMLDocumentParser::insertErrorMessageBlock() |
| 223 | { |
| 224 | ASSERT(m_xmlErrors); |
| 225 | m_xmlErrors->insertErrorMessageBlock(); |
| 226 | } |
| 227 | |
| 228 | void XMLDocumentParser::notifyFinished(PendingScript& pendingScript) |
| 229 | { |
| 230 | ASSERT(&pendingScript == m_pendingScript.get()); |
| 231 | |
| 232 | // JavaScript can detach this parser, make sure it's kept alive even if detached. |
| 233 | Ref<XMLDocumentParser> protectedThis(*this); |
| 234 | |
| 235 | m_pendingScript = nullptr; |
| 236 | pendingScript.clearClient(); |
| 237 | |
| 238 | pendingScript.element().executePendingScript(pendingScript); |
| 239 | |
| 240 | if (!isDetached() && !m_requestingScript) |
| 241 | resumeParsing(); |
| 242 | } |
| 243 | |
| 244 | bool XMLDocumentParser::isWaitingForScripts() const |
| 245 | { |
| 246 | return m_pendingScript; |
| 247 | } |
| 248 | |
| 249 | void XMLDocumentParser::pauseParsing() |
| 250 | { |
| 251 | ASSERT(!m_parserPaused); |
| 252 | |
| 253 | if (m_parsingFragment) |
| 254 | return; |
| 255 | |
| 256 | m_parserPaused = true; |
| 257 | } |
| 258 | |
| 259 | bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment& fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) |
| 260 | { |
| 261 | if (!chunk.length()) |
| 262 | return true; |
| 263 | |
| 264 | // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm: |
| 265 | // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm |
| 266 | // For now we have a hack for script/style innerHTML support: |
| 267 | if (contextElement && (contextElement->hasLocalName(HTMLNames::scriptTag->localName()) || contextElement->hasLocalName(HTMLNames::styleTag->localName()))) { |
| 268 | fragment.parserAppendChild(fragment.document().createTextNode(chunk)); |
| 269 | return true; |
| 270 | } |
| 271 | |
| 272 | auto parser = XMLDocumentParser::create(fragment, contextElement, parserContentPolicy); |
| 273 | bool wellFormed = parser->appendFragmentSource(chunk); |
| 274 | // Do not call finish(). The finish() and doEnd() implementations touch the main document and loader and can cause crashes in the fragment case. |
| 275 | parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction. |
| 276 | return wellFormed; // appendFragmentSource()'s wellFormed is more permissive than Document::wellFormed(). |
| 277 | } |
| 278 | |
| 279 | } // namespace WebCore |
| 280 | |