1/*
2 * Copyright (C) 2000 Peter Kelly <pmk@post.com>
3 * Copyright (C) 2005-2017 Apple Inc. All rights reserved.
4 * Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org>
5 * Copyright (C) 2007 Samuel Weinig <sam@webkit.org>
6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7 * Copyright (C) 2008 Holger Hans Peter Freyther
8 * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
9 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
10 * Copyright (C) 2013 Samsung Electronics. All rights reserved.
11 *
12 * This library is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Library General Public
14 * License as published by the Free Software Foundation; either
15 * version 2 of the License, or (at your option) any later version.
16 *
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Library General Public License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public License
23 * along with this library; see the file COPYING.LIB. If not, write to
24 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25 * Boston, MA 02110-1301, USA.
26 */
27
28#include "config.h"
29#include "XMLDocumentParser.h"
30
31#include "CDATASection.h"
32#include "Comment.h"
33#include "CachedResourceLoader.h"
34#include "Document.h"
35#include "DocumentFragment.h"
36#include "DocumentType.h"
37#include "Frame.h"
38#include "FrameLoader.h"
39#include "HTMLEntityParser.h"
40#include "HTMLHtmlElement.h"
41#include "HTMLTemplateElement.h"
42#include "InlineClassicScript.h"
43#include "PendingScript.h"
44#include "ProcessingInstruction.h"
45#include "ResourceError.h"
46#include "ResourceResponse.h"
47#include "ScriptElement.h"
48#include "ScriptSourceCode.h"
49#include "Settings.h"
50#include "SharedBuffer.h"
51#include "StyleScope.h"
52#include "TransformSource.h"
53#include "XMLNSNames.h"
54#include "XMLDocumentParserScope.h"
55#include <libxml/parserInternals.h>
56#include <wtf/unicode/UTF8Conversion.h>
57
58#if ENABLE(XSLT)
59#include "XMLTreeViewer.h"
60#include <libxslt/xslt.h>
61#endif
62
63namespace WebCore {
64
65#if ENABLE(XSLT)
66
67static inline bool shouldRenderInXMLTreeViewerMode(Document& document)
68{
69 if (document.sawElementsInKnownNamespaces())
70 return false;
71
72 if (document.transformSourceDocument())
73 return false;
74
75 auto* frame = document.frame();
76 if (!frame)
77 return false;
78
79 if (!frame->settings().developerExtrasEnabled())
80 return false;
81
82 if (frame->tree().parent())
83 return false; // This document is not in a top frame
84
85 return true;
86}
87
88#endif
89
90class PendingCallbacks {
91 WTF_MAKE_FAST_ALLOCATED;
92public:
93 void appendStartElementNSCallback(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int numNamespaces, const xmlChar** namespaces, int numAttributes, int numDefaulted, const xmlChar** attributes)
94 {
95 auto callback = std::make_unique<PendingStartElementNSCallback>();
96
97 callback->xmlLocalName = xmlStrdup(xmlLocalName);
98 callback->xmlPrefix = xmlStrdup(xmlPrefix);
99 callback->xmlURI = xmlStrdup(xmlURI);
100 callback->numNamespaces = numNamespaces;
101 callback->namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * numNamespaces * 2));
102 for (int i = 0; i < numNamespaces * 2 ; i++)
103 callback->namespaces[i] = xmlStrdup(namespaces[i]);
104 callback->numAttributes = numAttributes;
105 callback->numDefaulted = numDefaulted;
106 callback->attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * numAttributes * 5));
107 for (int i = 0; i < numAttributes; i++) {
108 // Each attribute has 5 elements in the array:
109 // name, prefix, uri, value and an end pointer.
110
111 for (int j = 0; j < 3; j++)
112 callback->attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
113
114 int len = attributes[i * 5 + 4] - attributes[i * 5 + 3];
115
116 callback->attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], len);
117 callback->attributes[i * 5 + 4] = callback->attributes[i * 5 + 3] + len;
118 }
119
120 m_callbacks.append(WTFMove(callback));
121 }
122
123 void appendEndElementNSCallback()
124 {
125 m_callbacks.append(std::make_unique<PendingEndElementNSCallback>());
126 }
127
128 void appendCharactersCallback(const xmlChar* s, int len)
129 {
130 auto callback = std::make_unique<PendingCharactersCallback>();
131
132 callback->s = xmlStrndup(s, len);
133 callback->len = len;
134
135 m_callbacks.append(WTFMove(callback));
136 }
137
138 void appendProcessingInstructionCallback(const xmlChar* target, const xmlChar* data)
139 {
140 auto callback = std::make_unique<PendingProcessingInstructionCallback>();
141
142 callback->target = xmlStrdup(target);
143 callback->data = xmlStrdup(data);
144
145 m_callbacks.append(WTFMove(callback));
146 }
147
148 void appendCDATABlockCallback(const xmlChar* s, int len)
149 {
150 auto callback = std::make_unique<PendingCDATABlockCallback>();
151
152 callback->s = xmlStrndup(s, len);
153 callback->len = len;
154
155 m_callbacks.append(WTFMove(callback));
156 }
157
158 void appendCommentCallback(const xmlChar* s)
159 {
160 auto callback = std::make_unique<PendingCommentCallback>();
161
162 callback->s = xmlStrdup(s);
163
164 m_callbacks.append(WTFMove(callback));
165 }
166
167 void appendInternalSubsetCallback(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
168 {
169 auto callback = std::make_unique<PendingInternalSubsetCallback>();
170
171 callback->name = xmlStrdup(name);
172 callback->externalID = xmlStrdup(externalID);
173 callback->systemID = xmlStrdup(systemID);
174
175 m_callbacks.append(WTFMove(callback));
176 }
177
178 void appendErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber)
179 {
180 auto callback = std::make_unique<PendingErrorCallback>();
181
182 callback->message = xmlStrdup(message);
183 callback->type = type;
184 callback->lineNumber = lineNumber;
185 callback->columnNumber = columnNumber;
186
187 m_callbacks.append(WTFMove(callback));
188 }
189
190 void callAndRemoveFirstCallback(XMLDocumentParser* parser)
191 {
192 std::unique_ptr<PendingCallback> callback = m_callbacks.takeFirst();
193 callback->call(parser);
194 }
195
196 bool isEmpty() const { return m_callbacks.isEmpty(); }
197
198private:
199 struct PendingCallback {
200 virtual ~PendingCallback() = default;
201 virtual void call(XMLDocumentParser* parser) = 0;
202 };
203
204 struct PendingStartElementNSCallback : public PendingCallback {
205 virtual ~PendingStartElementNSCallback()
206 {
207 xmlFree(xmlLocalName);
208 xmlFree(xmlPrefix);
209 xmlFree(xmlURI);
210 for (int i = 0; i < numNamespaces * 2; i++)
211 xmlFree(namespaces[i]);
212 xmlFree(namespaces);
213 for (int i = 0; i < numAttributes; i++) {
214 for (int j = 0; j < 4; j++)
215 xmlFree(attributes[i * 5 + j]);
216 }
217 xmlFree(attributes);
218 }
219
220 void call(XMLDocumentParser* parser) override
221 {
222 parser->startElementNs(xmlLocalName, xmlPrefix, xmlURI, numNamespaces, const_cast<const xmlChar**>(namespaces), numAttributes, numDefaulted, const_cast<const xmlChar**>(attributes));
223 }
224
225 xmlChar* xmlLocalName;
226 xmlChar* xmlPrefix;
227 xmlChar* xmlURI;
228 int numNamespaces;
229 xmlChar** namespaces;
230 int numAttributes;
231 int numDefaulted;
232 xmlChar** attributes;
233 };
234
235 struct PendingEndElementNSCallback : public PendingCallback {
236 void call(XMLDocumentParser* parser) override
237 {
238 parser->endElementNs();
239 }
240 };
241
242 struct PendingCharactersCallback : public PendingCallback {
243 virtual ~PendingCharactersCallback()
244 {
245 xmlFree(s);
246 }
247
248 void call(XMLDocumentParser* parser) override
249 {
250 parser->characters(s, len);
251 }
252
253 xmlChar* s;
254 int len;
255 };
256
257 struct PendingProcessingInstructionCallback : public PendingCallback {
258 virtual ~PendingProcessingInstructionCallback()
259 {
260 xmlFree(target);
261 xmlFree(data);
262 }
263
264 void call(XMLDocumentParser* parser) override
265 {
266 parser->processingInstruction(target, data);
267 }
268
269 xmlChar* target;
270 xmlChar* data;
271 };
272
273 struct PendingCDATABlockCallback : public PendingCallback {
274 virtual ~PendingCDATABlockCallback()
275 {
276 xmlFree(s);
277 }
278
279 void call(XMLDocumentParser* parser) override
280 {
281 parser->cdataBlock(s, len);
282 }
283
284 xmlChar* s;
285 int len;
286 };
287
288 struct PendingCommentCallback : public PendingCallback {
289 virtual ~PendingCommentCallback()
290 {
291 xmlFree(s);
292 }
293
294 void call(XMLDocumentParser* parser) override
295 {
296 parser->comment(s);
297 }
298
299 xmlChar* s;
300 };
301
302 struct PendingInternalSubsetCallback : public PendingCallback {
303 virtual ~PendingInternalSubsetCallback()
304 {
305 xmlFree(name);
306 xmlFree(externalID);
307 xmlFree(systemID);
308 }
309
310 void call(XMLDocumentParser* parser) override
311 {
312 parser->internalSubset(name, externalID, systemID);
313 }
314
315 xmlChar* name;
316 xmlChar* externalID;
317 xmlChar* systemID;
318 };
319
320 struct PendingErrorCallback: public PendingCallback {
321 virtual ~PendingErrorCallback()
322 {
323 xmlFree(message);
324 }
325
326 void call(XMLDocumentParser* parser) override
327 {
328 parser->handleError(type, reinterpret_cast<char*>(message), TextPosition(lineNumber, columnNumber));
329 }
330
331 XMLErrors::ErrorType type;
332 xmlChar* message;
333 OrdinalNumber lineNumber;
334 OrdinalNumber columnNumber;
335 };
336
337 Deque<std::unique_ptr<PendingCallback>> m_callbacks;
338};
339// --------------------------------
340
341static int globalDescriptor = 0;
342static Thread* libxmlLoaderThread { nullptr };
343
344static int matchFunc(const char*)
345{
346 // Only match loads initiated due to uses of libxml2 from within XMLDocumentParser to avoid
347 // interfering with client applications that also use libxml2. http://bugs.webkit.org/show_bug.cgi?id=17353
348 return XMLDocumentParserScope::currentCachedResourceLoader && libxmlLoaderThread == &Thread::current();
349}
350
351class OffsetBuffer {
352 WTF_MAKE_FAST_ALLOCATED;
353public:
354 OffsetBuffer(Vector<char> buffer)
355 : m_buffer(WTFMove(buffer))
356 , m_currentOffset(0)
357 {
358 }
359
360 int readOutBytes(char* outputBuffer, unsigned askedToRead)
361 {
362 unsigned bytesLeft = m_buffer.size() - m_currentOffset;
363 unsigned lenToCopy = std::min(askedToRead, bytesLeft);
364 if (lenToCopy) {
365 memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy);
366 m_currentOffset += lenToCopy;
367 }
368 return lenToCopy;
369 }
370
371private:
372 Vector<char> m_buffer;
373 unsigned m_currentOffset;
374};
375
376static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy)
377{
378 if (!scriptingContentIsAllowed(parserContentPolicy))
379 element->stripScriptingAttributes(attributeVector);
380 element->parserSetAttributes(attributeVector);
381}
382
383static void switchToUTF16(xmlParserCtxtPtr ctxt)
384{
385 // Hack around libxml2's lack of encoding overide support by manually
386 // resetting the encoding to UTF-16 before every chunk. Otherwise libxml
387 // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
388 // and switch encodings, causing the parse to fail.
389
390 // FIXME: Can we just use XML_PARSE_IGNORE_ENC now?
391
392 const UChar BOM = 0xFEFF;
393 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
394 xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
395}
396
397static bool shouldAllowExternalLoad(const URL& url)
398{
399 String urlString = url.string();
400
401 // On non-Windows platforms libxml asks for this URL, the "XML_XML_DEFAULT_CATALOG", on initialization.
402 if (urlString == "file:///etc/xml/catalog")
403 return false;
404
405 // On Windows, libxml computes a URL relative to where its DLL resides.
406 if (startsWithLettersIgnoringASCIICase(urlString, "file:///") && urlString.endsWithIgnoringASCIICase("/etc/catalog"))
407 return false;
408
409 // The most common DTD. There isn't much point in hammering www.w3c.org by requesting this for every XHTML document.
410 if (startsWithLettersIgnoringASCIICase(urlString, "http://www.w3.org/tr/xhtml"))
411 return false;
412
413 // Similarly, there isn't much point in requesting the SVG DTD.
414 if (startsWithLettersIgnoringASCIICase(urlString, "http://www.w3.org/graphics/svg"))
415 return false;
416
417 // The libxml doesn't give us a lot of context for deciding whether to
418 // allow this request. In the worst case, this load could be for an
419 // external entity and the resulting document could simply read the
420 // retrieved content. If we had more context, we could potentially allow
421 // the parser to load a DTD. As things stand, we take the conservative
422 // route and allow same-origin requests only.
423 if (!XMLDocumentParserScope::currentCachedResourceLoader->document()->securityOrigin().canRequest(url)) {
424 XMLDocumentParserScope::currentCachedResourceLoader->printAccessDeniedMessage(url);
425 return false;
426 }
427
428 return true;
429}
430
431static void* openFunc(const char* uri)
432{
433 ASSERT(XMLDocumentParserScope::currentCachedResourceLoader);
434 ASSERT(libxmlLoaderThread == &Thread::current());
435
436 URL url(URL(), uri);
437
438 if (!shouldAllowExternalLoad(url))
439 return &globalDescriptor;
440
441 ResourceError error;
442 ResourceResponse response;
443 RefPtr<SharedBuffer> data;
444
445
446 {
447 CachedResourceLoader* cachedResourceLoader = XMLDocumentParserScope::currentCachedResourceLoader;
448 XMLDocumentParserScope scope(nullptr);
449 // FIXME: We should restore the original global error handler as well.
450
451 if (cachedResourceLoader->frame()) {
452 FetchOptions options;
453 options.mode = FetchOptions::Mode::SameOrigin;
454 options.credentials = FetchOptions::Credentials::Include;
455 cachedResourceLoader->frame()->loader().loadResourceSynchronously(url, ClientCredentialPolicy::MayAskClientForCredentials, options, { }, error, response, data);
456 }
457 }
458
459 // We have to check the URL again after the load to catch redirects.
460 // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
461 if (!shouldAllowExternalLoad(response.url()))
462 return &globalDescriptor;
463 Vector<char> buffer;
464 if (data)
465 buffer.append(data->data(), data->size());
466 return new OffsetBuffer(WTFMove(buffer));
467}
468
469static int readFunc(void* context, char* buffer, int len)
470{
471 // Do 0-byte reads in case of a null descriptor
472 if (context == &globalDescriptor)
473 return 0;
474
475 OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
476 return data->readOutBytes(buffer, len);
477}
478
479static int writeFunc(void*, const char*, int)
480{
481 // Always just do 0-byte writes
482 return 0;
483}
484
485static int closeFunc(void* context)
486{
487 if (context != &globalDescriptor) {
488 OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
489 delete data;
490 }
491 return 0;
492}
493
494#if ENABLE(XSLT)
495static void errorFunc(void*, const char*, ...)
496{
497 // FIXME: It would be nice to display error messages somewhere.
498}
499#endif
500
501static void initializeXMLParser()
502{
503 static std::once_flag flag;
504 std::call_once(flag, [&] {
505 xmlInitParser();
506 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
507 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
508 libxmlLoaderThread = &Thread::current();
509 });
510}
511
512Ref<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
513{
514 initializeXMLParser();
515
516 xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
517 parser->_private = userData;
518
519 // Substitute entities.
520 xmlCtxtUseOptions(parser, XML_PARSE_NOENT | XML_PARSE_HUGE);
521
522 switchToUTF16(parser);
523
524 return adoptRef(*new XMLParserContext(parser));
525}
526
527
528// Chunk should be encoded in UTF-8
529RefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk)
530{
531 initializeXMLParser();
532
533 // appendFragmentSource() checks that the length doesn't overflow an int.
534 xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length());
535
536 if (!parser)
537 return 0;
538
539 memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
540
541 // Substitute entities.
542 // FIXME: Why is XML_PARSE_NODICT needed? This is different from what createStringParser does.
543 xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT | XML_PARSE_HUGE);
544
545 // Internal initialization
546 parser->sax2 = 1;
547 parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
548 parser->depth = 0;
549 parser->str_xml = xmlDictLookup(parser->dict, reinterpret_cast<xmlChar*>(const_cast<char*>("xml")), 3);
550 parser->str_xmlns = xmlDictLookup(parser->dict, reinterpret_cast<xmlChar*>(const_cast<char*>("xmlns")), 5);
551 parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
552 parser->_private = userData;
553
554 return adoptRef(*new XMLParserContext(parser));
555}
556
557// --------------------------------
558
559bool XMLDocumentParser::supportsXMLVersion(const String& version)
560{
561 return version == "1.0";
562}
563
564XMLDocumentParser::XMLDocumentParser(Document& document, FrameView* frameView)
565 : ScriptableDocumentParser(document)
566 , m_view(frameView)
567 , m_pendingCallbacks(std::make_unique<PendingCallbacks>())
568 , m_currentNode(&document)
569 , m_scriptStartPosition(TextPosition::belowRangePosition())
570{
571}
572
573XMLDocumentParser::XMLDocumentParser(DocumentFragment& fragment, Element* parentElement, ParserContentPolicy parserContentPolicy)
574 : ScriptableDocumentParser(fragment.document(), parserContentPolicy)
575 , m_pendingCallbacks(std::make_unique<PendingCallbacks>())
576 , m_currentNode(&fragment)
577 , m_scriptStartPosition(TextPosition::belowRangePosition())
578 , m_parsingFragment(true)
579{
580 fragment.ref();
581
582 // Add namespaces based on the parent node
583 Vector<Element*> elemStack;
584 while (parentElement) {
585 elemStack.append(parentElement);
586
587 ContainerNode* node = parentElement->parentNode();
588 if (!is<Element>(node))
589 break;
590 parentElement = downcast<Element>(node);
591 }
592
593 if (elemStack.isEmpty())
594 return;
595
596 // FIXME: Share code with isDefaultNamespace() per http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#parsing-xhtml-fragments
597 for (; !elemStack.isEmpty(); elemStack.removeLast()) {
598 Element* element = elemStack.last();
599 if (element->hasAttributes()) {
600 for (const Attribute& attribute : element->attributesIterator()) {
601 if (attribute.localName() == xmlnsAtom())
602 m_defaultNamespaceURI = attribute.value();
603 else if (attribute.prefix() == xmlnsAtom())
604 m_prefixToNamespaceMap.set(attribute.localName(), attribute.value());
605 }
606 }
607 }
608
609 if (m_defaultNamespaceURI.isNull())
610 m_defaultNamespaceURI = parentElement->namespaceURI();
611}
612
613XMLParserContext::~XMLParserContext()
614{
615 if (m_context->myDoc)
616 xmlFreeDoc(m_context->myDoc);
617 xmlFreeParserCtxt(m_context);
618}
619
620XMLDocumentParser::~XMLDocumentParser()
621{
622 // The XMLDocumentParser will always be detached before being destroyed.
623 ASSERT(m_currentNodeStack.isEmpty());
624 ASSERT(!m_currentNode);
625
626 // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
627 if (m_pendingScript)
628 m_pendingScript->clearClient();
629}
630
631void XMLDocumentParser::doWrite(const String& parseString)
632{
633 ASSERT(!isDetached());
634 if (!m_context)
635 initializeParserContext();
636
637 // Protect the libxml context from deletion during a callback
638 RefPtr<XMLParserContext> context = m_context;
639
640 // libXML throws an error if you try to switch the encoding for an empty string.
641 if (parseString.length()) {
642 // JavaScript may cause the parser to detach during xmlParseChunk
643 // keep this alive until this function is done.
644 Ref<XMLDocumentParser> protectedThis(*this);
645
646 XMLDocumentParserScope scope(&document()->cachedResourceLoader());
647
648 // FIXME: Can we parse 8-bit strings directly as Latin-1 instead of upconverting to UTF-16?
649 switchToUTF16(context->context());
650 xmlParseChunk(context->context(), reinterpret_cast<const char*>(StringView(parseString).upconvertedCharacters().get()), sizeof(UChar) * parseString.length(), 0);
651
652 // JavaScript (which may be run under the xmlParseChunk callstack) may
653 // cause the parser to be stopped or detached.
654 if (isStopped())
655 return;
656 }
657
658 // FIXME: Why is this here? And why is it after we process the passed source?
659 if (document()->decoder() && document()->decoder()->sawError()) {
660 // If the decoder saw an error, report it as fatal (stops parsing)
661 TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col));
662 handleError(XMLErrors::fatal, "Encoding error", position);
663 }
664}
665
666static inline String toString(const xmlChar* string, size_t size)
667{
668 return String::fromUTF8(reinterpret_cast<const char*>(string), size);
669}
670
671static inline String toString(const xmlChar* string)
672{
673 return String::fromUTF8(reinterpret_cast<const char*>(string));
674}
675
676static inline AtomicString toAtomicString(const xmlChar* string, size_t size)
677{
678 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), size);
679}
680
681static inline AtomicString toAtomicString(const xmlChar* string)
682{
683 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
684}
685
686struct _xmlSAX2Namespace {
687 const xmlChar* prefix;
688 const xmlChar* uri;
689};
690typedef struct _xmlSAX2Namespace xmlSAX2Namespace;
691
692static inline bool handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int numNamespaces)
693{
694 xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
695 for (int i = 0; i < numNamespaces; i++) {
696 AtomicString namespaceQName = xmlnsAtom();
697 AtomicString namespaceURI = toAtomicString(namespaces[i].uri);
698 if (namespaces[i].prefix)
699 namespaceQName = "xmlns:" + toString(namespaces[i].prefix);
700
701 auto result = Element::parseAttributeName(XMLNSNames::xmlnsNamespaceURI, namespaceQName);
702 if (result.hasException())
703 return false;
704
705 prefixedAttributes.append(Attribute(result.releaseReturnValue(), namespaceURI));
706 }
707 return true;
708}
709
710struct _xmlSAX2Attributes {
711 const xmlChar* localname;
712 const xmlChar* prefix;
713 const xmlChar* uri;
714 const xmlChar* value;
715 const xmlChar* end;
716};
717typedef struct _xmlSAX2Attributes xmlSAX2Attributes;
718
719static inline bool handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int numAttributes)
720{
721 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
722 for (int i = 0; i < numAttributes; i++) {
723 int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
724 AtomicString attrValue = toAtomicString(attributes[i].value, valueLength);
725 String attrPrefix = toString(attributes[i].prefix);
726 AtomicString attrURI = attrPrefix.isEmpty() ? nullAtom() : toAtomicString(attributes[i].uri);
727 AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : attrPrefix + ":" + toString(attributes[i].localname);
728
729 auto result = Element::parseAttributeName(attrURI, attrQName);
730 if (result.hasException())
731 return false;
732
733 prefixedAttributes.append(Attribute(result.releaseReturnValue(), attrValue));
734 }
735 return true;
736}
737
738// This is a hack around https://bugzilla.gnome.org/show_bug.cgi?id=502960
739// Otherwise libxml doesn't include namespace for parsed entities, breaking entity
740// expansion for all entities containing elements.
741static inline bool hackAroundLibXMLEntityParsingBug()
742{
743#if LIBXML_VERSION >= 20704
744 // This bug has been fixed in libxml 2.7.4.
745 return false;
746#else
747 return true;
748#endif
749}
750
751void XMLDocumentParser::startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int numNamespaces, const xmlChar** libxmlNamespaces, int numAttributes, int numDefaulted, const xmlChar** libxmlAttributes)
752{
753 if (isStopped())
754 return;
755
756 if (m_parserPaused) {
757 m_pendingCallbacks->appendStartElementNSCallback(xmlLocalName, xmlPrefix, xmlURI, numNamespaces, libxmlNamespaces, numAttributes, numDefaulted, libxmlAttributes);
758 return;
759 }
760
761 if (!updateLeafTextNode())
762 return;
763
764 AtomicString localName = toAtomicString(xmlLocalName);
765 AtomicString uri = toAtomicString(xmlURI);
766 AtomicString prefix = toAtomicString(xmlPrefix);
767
768 if (m_parsingFragment && uri.isNull()) {
769 if (!prefix.isNull())
770 uri = m_prefixToNamespaceMap.get(prefix);
771 else
772 uri = m_defaultNamespaceURI;
773 }
774
775 // If libxml entity parsing is broken, transfer the currentNodes' namespaceURI to the new node,
776 // if we're currently expanding elements which originate from an entity declaration.
777 if (hackAroundLibXMLEntityParsingBug() && depthTriggeringEntityExpansion() != -1 && context()->depth > depthTriggeringEntityExpansion() && uri.isNull() && prefix.isNull())
778 uri = m_currentNode->namespaceURI();
779
780 bool isFirstElement = !m_sawFirstElement;
781 m_sawFirstElement = true;
782
783 QualifiedName qName(prefix, localName, uri);
784 auto newElement = m_currentNode->document().createElement(qName, true);
785
786 Vector<Attribute> prefixedAttributes;
787 if (!handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, numNamespaces)) {
788 setAttributes(newElement.ptr(), prefixedAttributes, parserContentPolicy());
789 stopParsing();
790 return;
791 }
792
793 bool success = handleElementAttributes(prefixedAttributes, libxmlAttributes, numAttributes);
794 setAttributes(newElement.ptr(), prefixedAttributes, parserContentPolicy());
795 if (!success) {
796 stopParsing();
797 return;
798 }
799
800 newElement->beginParsingChildren();
801
802 if (isScriptElement(newElement.get()))
803 m_scriptStartPosition = textPosition();
804
805 m_currentNode->parserAppendChild(newElement);
806 if (!m_currentNode) // Synchronous DOM events may have removed the current node.
807 return;
808
809 if (is<HTMLTemplateElement>(newElement))
810 pushCurrentNode(&downcast<HTMLTemplateElement>(newElement.get()).content());
811 else
812 pushCurrentNode(newElement.ptr());
813
814 if (is<HTMLHtmlElement>(newElement))
815 downcast<HTMLHtmlElement>(newElement.get()).insertedByParser();
816
817 if (!m_parsingFragment && isFirstElement && document()->frame())
818 document()->frame()->injectUserScripts(InjectAtDocumentStart);
819}
820
821void XMLDocumentParser::endElementNs()
822{
823 if (isStopped())
824 return;
825
826 if (m_parserPaused) {
827 m_pendingCallbacks->appendEndElementNSCallback();
828 return;
829 }
830
831 // JavaScript can detach the parser. Make sure this is not released
832 // before the end of this method.
833 Ref<XMLDocumentParser> protectedThis(*this);
834
835 if (!updateLeafTextNode())
836 return;
837
838 RefPtr<ContainerNode> node = m_currentNode;
839 node->finishParsingChildren();
840
841 // Once we reach the depth again where entity expansion started, stop executing the work-around.
842 if (hackAroundLibXMLEntityParsingBug() && context()->depth <= depthTriggeringEntityExpansion())
843 setDepthTriggeringEntityExpansion(-1);
844
845 if (!scriptingContentIsAllowed(parserContentPolicy()) && is<Element>(*node) && isScriptElement(downcast<Element>(*node))) {
846 popCurrentNode();
847 node->remove();
848 return;
849 }
850
851 if (!node->isElementNode() || !m_view) {
852 popCurrentNode();
853 return;
854 }
855
856 auto& element = downcast<Element>(*node);
857
858 // The element's parent may have already been removed from document.
859 // Parsing continues in this case, but scripts aren't executed.
860 if (!element.isConnected()) {
861 popCurrentNode();
862 return;
863 }
864
865 if (!isScriptElement(element)) {
866 popCurrentNode();
867 return;
868 }
869
870 // Don't load external scripts for standalone documents (for now).
871 ASSERT(!m_pendingScript);
872 m_requestingScript = true;
873
874 auto& scriptElement = downcastScriptElement(element);
875 if (scriptElement.prepareScript(m_scriptStartPosition, ScriptElement::AllowLegacyTypeInTypeAttribute)) {
876 // FIXME: Script execution should be shared between
877 // the libxml2 and Qt XMLDocumentParser implementations.
878
879 if (scriptElement.readyToBeParserExecuted())
880 scriptElement.executeClassicScript(ScriptSourceCode(scriptElement.scriptContent(), URL(document()->url()), m_scriptStartPosition, JSC::SourceProviderSourceType::Program, InlineClassicScript::create(scriptElement)));
881 else if (scriptElement.willBeParserExecuted() && scriptElement.loadableScript()) {
882 m_pendingScript = PendingScript::create(scriptElement, *scriptElement.loadableScript());
883 m_pendingScript->setClient(*this);
884
885 // m_pendingScript will be nullptr if script was already loaded and setClient() executed it.
886 if (m_pendingScript)
887 pauseParsing();
888 }
889
890 // JavaScript may have detached the parser
891 if (isDetached())
892 return;
893 }
894 m_requestingScript = false;
895 popCurrentNode();
896}
897
898void XMLDocumentParser::characters(const xmlChar* characters, int length)
899{
900 if (isStopped())
901 return;
902
903 if (m_parserPaused) {
904 m_pendingCallbacks->appendCharactersCallback(characters, length);
905 return;
906 }
907
908 if (!m_leafTextNode)
909 createLeafTextNode();
910 m_bufferedText.append(characters, length);
911}
912
913void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args)
914{
915 if (isStopped())
916 return;
917
918 va_list preflightArgs;
919 va_copy(preflightArgs, args);
920 size_t stringLength = vsnprintf(nullptr, 0, message, preflightArgs);
921 va_end(preflightArgs);
922
923 Vector<char, 1024> buffer(stringLength + 1);
924 vsnprintf(buffer.data(), stringLength + 1, message, args);
925
926 TextPosition position = textPosition();
927 if (m_parserPaused)
928 m_pendingCallbacks->appendErrorCallback(type, reinterpret_cast<const xmlChar*>(buffer.data()), position.m_line, position.m_column);
929 else
930 handleError(type, buffer.data(), textPosition());
931}
932
933void XMLDocumentParser::processingInstruction(const xmlChar* target, const xmlChar* data)
934{
935 if (isStopped())
936 return;
937
938 if (m_parserPaused) {
939 m_pendingCallbacks->appendProcessingInstructionCallback(target, data);
940 return;
941 }
942
943 if (!updateLeafTextNode())
944 return;
945
946 auto result = m_currentNode->document().createProcessingInstruction(toString(target), toString(data));
947 if (result.hasException())
948 return;
949 auto pi = result.releaseReturnValue();
950
951 pi->setCreatedByParser(true);
952
953 m_currentNode->parserAppendChild(pi);
954
955 pi->finishParsingChildren();
956
957 if (pi->isCSS())
958 m_sawCSS = true;
959
960#if ENABLE(XSLT)
961 m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
962 if (m_sawXSLTransform && !document()->transformSourceDocument())
963 stopParsing();
964#endif
965}
966
967void XMLDocumentParser::cdataBlock(const xmlChar* s, int len)
968{
969 if (isStopped())
970 return;
971
972 if (m_parserPaused) {
973 m_pendingCallbacks->appendCDATABlockCallback(s, len);
974 return;
975 }
976
977 if (!updateLeafTextNode())
978 return;
979
980 m_currentNode->parserAppendChild(CDATASection::create(m_currentNode->document(), toString(s, len)));
981}
982
983void XMLDocumentParser::comment(const xmlChar* s)
984{
985 if (isStopped())
986 return;
987
988 if (m_parserPaused) {
989 m_pendingCallbacks->appendCommentCallback(s);
990 return;
991 }
992
993 if (!updateLeafTextNode())
994 return;
995
996 m_currentNode->parserAppendChild(Comment::create(m_currentNode->document(), toString(s)));
997}
998
999enum StandaloneInfo {
1000 StandaloneUnspecified = -2,
1001 NoXMlDeclaration,
1002 StandaloneNo,
1003 StandaloneYes
1004};
1005
1006void XMLDocumentParser::startDocument(const xmlChar* version, const xmlChar* encoding, int standalone)
1007{
1008 StandaloneInfo standaloneInfo = (StandaloneInfo)standalone;
1009 if (standaloneInfo == NoXMlDeclaration) {
1010 document()->setHasXMLDeclaration(false);
1011 return;
1012 }
1013
1014 if (version)
1015 document()->setXMLVersion(toString(version));
1016 if (standalone != StandaloneUnspecified)
1017 document()->setXMLStandalone(standaloneInfo == StandaloneYes);
1018 if (encoding)
1019 document()->setXMLEncoding(toString(encoding));
1020 document()->setHasXMLDeclaration(true);
1021}
1022
1023void XMLDocumentParser::endDocument()
1024{
1025 updateLeafTextNode();
1026}
1027
1028void XMLDocumentParser::internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1029{
1030 if (isStopped())
1031 return;
1032
1033 if (m_parserPaused) {
1034 m_pendingCallbacks->appendInternalSubsetCallback(name, externalID, systemID);
1035 return;
1036 }
1037
1038 if (document())
1039 document()->parserAppendChild(DocumentType::create(*document(), toString(name), toString(externalID), toString(systemID)));
1040}
1041
1042static inline XMLDocumentParser* getParser(void* closure)
1043{
1044 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1045 return static_cast<XMLDocumentParser*>(ctxt->_private);
1046}
1047
1048// This is a hack around http://bugzilla.gnome.org/show_bug.cgi?id=159219
1049// Otherwise libxml seems to call all the SAX callbacks twice for any replaced entity.
1050static inline bool hackAroundLibXMLEntityBug(void* closure)
1051{
1052#if LIBXML_VERSION >= 20627
1053 // This bug has been fixed in libxml 2.6.27.
1054 UNUSED_PARAM(closure);
1055 return false;
1056#else
1057 return static_cast<xmlParserCtxtPtr>(closure)->node;
1058#endif
1059}
1060
1061static void startElementNsHandler(void* closure, const xmlChar* localname, const xmlChar* prefix, const xmlChar* uri, int numNamespaces, const xmlChar** namespaces, int numAttributes, int numDefaulted, const xmlChar** libxmlAttributes)
1062{
1063 if (hackAroundLibXMLEntityBug(closure))
1064 return;
1065
1066 getParser(closure)->startElementNs(localname, prefix, uri, numNamespaces, namespaces, numAttributes, numDefaulted, libxmlAttributes);
1067}
1068
1069static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
1070{
1071 if (hackAroundLibXMLEntityBug(closure))
1072 return;
1073
1074 getParser(closure)->endElementNs();
1075}
1076
1077static void charactersHandler(void* closure, const xmlChar* s, int len)
1078{
1079 if (hackAroundLibXMLEntityBug(closure))
1080 return;
1081
1082 getParser(closure)->characters(s, len);
1083}
1084
1085static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
1086{
1087 if (hackAroundLibXMLEntityBug(closure))
1088 return;
1089
1090 getParser(closure)->processingInstruction(target, data);
1091}
1092
1093static void cdataBlockHandler(void* closure, const xmlChar* s, int len)
1094{
1095 if (hackAroundLibXMLEntityBug(closure))
1096 return;
1097
1098 getParser(closure)->cdataBlock(s, len);
1099}
1100
1101static void commentHandler(void* closure, const xmlChar* comment)
1102{
1103 if (hackAroundLibXMLEntityBug(closure))
1104 return;
1105
1106 getParser(closure)->comment(comment);
1107}
1108
1109WTF_ATTRIBUTE_PRINTF(2, 3)
1110static void warningHandler(void* closure, const char* message, ...)
1111{
1112 va_list args;
1113 va_start(args, message);
1114 getParser(closure)->error(XMLErrors::warning, message, args);
1115 va_end(args);
1116}
1117
1118WTF_ATTRIBUTE_PRINTF(2, 3)
1119static void fatalErrorHandler(void* closure, const char* message, ...)
1120{
1121 va_list args;
1122 va_start(args, message);
1123 getParser(closure)->error(XMLErrors::fatal, message, args);
1124 va_end(args);
1125}
1126
1127WTF_ATTRIBUTE_PRINTF(2, 3)
1128static void normalErrorHandler(void* closure, const char* message, ...)
1129{
1130 va_list args;
1131 va_start(args, message);
1132 getParser(closure)->error(XMLErrors::nonFatal, message, args);
1133 va_end(args);
1134}
1135
1136// Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is
1137// a hack to avoid malloc/free. Using a global variable like this could cause trouble
1138// if libxml implementation details were to change
1139static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
1140
1141static xmlEntityPtr sharedXHTMLEntity()
1142{
1143 static xmlEntity entity;
1144 if (!entity.type) {
1145 entity.type = XML_ENTITY_DECL;
1146 entity.orig = sharedXHTMLEntityResult;
1147 entity.content = sharedXHTMLEntityResult;
1148 entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
1149 }
1150 return &entity;
1151}
1152
1153static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize)
1154{
1155 const char* originalTarget = target;
1156 auto conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity, utf16Entity + numberOfCodeUnits, &target, target + targetSize);
1157 if (conversionResult != WTF::Unicode::ConversionOK)
1158 return 0;
1159
1160 // Even though we must pass the length, libxml expects the entity string to be null terminated.
1161 ASSERT(target >= originalTarget + 1);
1162 *target = '\0';
1163 return target - originalTarget;
1164}
1165
1166static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
1167{
1168 UChar utf16DecodedEntity[4];
1169 size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity);
1170 if (!numberOfCodeUnits)
1171 return 0;
1172
1173 ASSERT(numberOfCodeUnits <= 4);
1174 size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits,
1175 reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult));
1176 if (!entityLengthInUTF8)
1177 return 0;
1178
1179 xmlEntityPtr entity = sharedXHTMLEntity();
1180 entity->length = entityLengthInUTF8;
1181 entity->name = name;
1182 return entity;
1183}
1184
1185static void entityDeclarationHandler(void* closure, const xmlChar* name, int type, const xmlChar* publicId, const xmlChar* systemId, xmlChar* content)
1186{
1187 // Prevent the next call to getEntityHandler() to record the entity expansion depth.
1188 // We're parsing the entity declaration, so there's no need to record anything.
1189 // We only need to record the depth, if we're actually expanding the entity, when it's referenced.
1190 if (hackAroundLibXMLEntityParsingBug())
1191 getParser(closure)->setIsParsingEntityDeclaration(true);
1192 xmlSAX2EntityDecl(closure, name, type, publicId, systemId, content);
1193}
1194
1195static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
1196{
1197 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1198
1199 XMLDocumentParser* parser = getParser(closure);
1200 if (hackAroundLibXMLEntityParsingBug()) {
1201 if (parser->isParsingEntityDeclaration()) {
1202 // We're parsing the entity declarations (not an entity reference), no need to do anything special.
1203 parser->setIsParsingEntityDeclaration(false);
1204 ASSERT(parser->depthTriggeringEntityExpansion() == -1);
1205 } else {
1206 // The entity will be used and eventually expanded. Record the current parser depth
1207 // so the next call to startElementNs() knows that the new element originates from
1208 // an entity declaration.
1209 parser->setDepthTriggeringEntityExpansion(ctxt->depth);
1210 }
1211 }
1212
1213 xmlEntityPtr ent = xmlGetPredefinedEntity(name);
1214 if (ent) {
1215 ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
1216 return ent;
1217 }
1218
1219 ent = xmlGetDocEntity(ctxt->myDoc, name);
1220 if (!ent && parser->isXHTMLDocument()) {
1221 ent = getXHTMLEntity(name);
1222 if (ent)
1223 ent->etype = XML_INTERNAL_GENERAL_ENTITY;
1224 }
1225
1226 return ent;
1227}
1228
1229static void startDocumentHandler(void* closure)
1230{
1231 xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
1232 switchToUTF16(ctxt);
1233 getParser(closure)->startDocument(ctxt->version, ctxt->encoding, ctxt->standalone);
1234 xmlSAX2StartDocument(closure);
1235}
1236
1237static void endDocumentHandler(void* closure)
1238{
1239 getParser(closure)->endDocument();
1240 xmlSAX2EndDocument(closure);
1241}
1242
1243static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1244{
1245 getParser(closure)->internalSubset(name, externalID, systemID);
1246 xmlSAX2InternalSubset(closure, name, externalID, systemID);
1247}
1248
1249static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
1250{
1251 String extId = toString(externalId);
1252 if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN")
1253 || (extId == "-//W3C//DTD XHTML 1.1//EN")
1254 || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN")
1255 || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN")
1256 || (extId == "-//W3C//DTD XHTML Basic 1.0//EN")
1257 || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN")
1258 || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN")
1259 || (extId == "-//W3C//DTD MathML 2.0//EN")
1260 || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN")
1261 || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN")
1262 || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN"))
1263 getParser(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not.
1264}
1265
1266static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
1267{
1268 // nothing to do, but we need this to work around a crasher
1269 // http://bugzilla.gnome.org/show_bug.cgi?id=172255
1270 // http://bugs.webkit.org/show_bug.cgi?id=5792
1271}
1272
1273void XMLDocumentParser::initializeParserContext(const CString& chunk)
1274{
1275 xmlSAXHandler sax;
1276 memset(&sax, 0, sizeof(sax));
1277
1278 sax.error = normalErrorHandler;
1279 sax.fatalError = fatalErrorHandler;
1280 sax.characters = charactersHandler;
1281 sax.processingInstruction = processingInstructionHandler;
1282 sax.cdataBlock = cdataBlockHandler;
1283 sax.comment = commentHandler;
1284 sax.warning = warningHandler;
1285 sax.startElementNs = startElementNsHandler;
1286 sax.endElementNs = endElementNsHandler;
1287 sax.getEntity = getEntityHandler;
1288 sax.startDocument = startDocumentHandler;
1289 sax.endDocument = endDocumentHandler;
1290 sax.internalSubset = internalSubsetHandler;
1291 sax.externalSubset = externalSubsetHandler;
1292 sax.ignorableWhitespace = ignorableWhitespaceHandler;
1293 sax.entityDecl = entityDeclarationHandler;
1294 sax.initialized = XML_SAX2_MAGIC;
1295 DocumentParser::startParsing();
1296 m_sawError = false;
1297 m_sawCSS = false;
1298 m_sawXSLTransform = false;
1299 m_sawFirstElement = false;
1300
1301 XMLDocumentParserScope scope(&document()->cachedResourceLoader());
1302 if (m_parsingFragment)
1303 m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
1304 else {
1305 ASSERT(!chunk.data());
1306 m_context = XMLParserContext::createStringParser(&sax, this);
1307 }
1308}
1309
1310void XMLDocumentParser::doEnd()
1311{
1312 if (!isStopped()) {
1313 if (m_context) {
1314 // Tell libxml we're done.
1315 {
1316 XMLDocumentParserScope scope(&document()->cachedResourceLoader());
1317 xmlParseChunk(context(), 0, 0, 1);
1318 }
1319
1320 m_context = nullptr;
1321 }
1322 }
1323
1324#if ENABLE(XSLT)
1325 bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && shouldRenderInXMLTreeViewerMode(*document());
1326 if (xmlViewerMode) {
1327 XMLTreeViewer xmlTreeViewer(*document());
1328 xmlTreeViewer.transformDocumentToTreeView();
1329 } else if (m_sawXSLTransform) {
1330 xmlDocPtr doc = xmlDocPtrForString(document()->cachedResourceLoader(), m_originalSourceForTransform.toString(), document()->url().string());
1331 document()->setTransformSource(std::make_unique<TransformSource>(doc));
1332
1333 document()->setParsing(false); // Make the document think it's done, so it will apply XSL stylesheets.
1334 document()->applyPendingXSLTransformsNowIfScheduled();
1335
1336 // styleResolverChanged() call can detach the parser and null out its document.
1337 // In that case, we just bail out.
1338 if (isDetached())
1339 return;
1340
1341 document()->setParsing(true);
1342 DocumentParser::stopParsing();
1343 }
1344#endif
1345}
1346
1347#if ENABLE(XSLT)
1348static inline const char* nativeEndianUTF16Encoding()
1349{
1350 const UChar BOM = 0xFEFF;
1351 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
1352 return BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE";
1353}
1354
1355xmlDocPtr xmlDocPtrForString(CachedResourceLoader& cachedResourceLoader, const String& source, const String& url)
1356{
1357 if (source.isEmpty())
1358 return nullptr;
1359
1360 // Parse in a single chunk into an xmlDocPtr
1361 // FIXME: Hook up error handlers so that a failure to parse the main document results in
1362 // good error messages.
1363
1364 const bool is8Bit = source.is8Bit();
1365 const char* characters = is8Bit ? reinterpret_cast<const char*>(source.characters8()) : reinterpret_cast<const char*>(source.characters16());
1366 size_t sizeInBytes = source.length() * (is8Bit ? sizeof(LChar) : sizeof(UChar));
1367 const char* encoding = is8Bit ? "iso-8859-1" : nativeEndianUTF16Encoding();
1368
1369 XMLDocumentParserScope scope(&cachedResourceLoader, errorFunc);
1370 return xmlReadMemory(characters, sizeInBytes, url.latin1().data(), encoding, XSLT_PARSE_OPTIONS);
1371}
1372#endif
1373
1374TextPosition XMLDocumentParser::textPosition() const
1375{
1376 xmlParserCtxtPtr context = this->context();
1377 if (!context)
1378 return TextPosition();
1379 return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line),
1380 OrdinalNumber::fromOneBasedInt(context->input->col));
1381}
1382
1383bool XMLDocumentParser::shouldAssociateConsoleMessagesWithTextPosition() const
1384{
1385 return !m_parserPaused && !m_requestingScript;
1386}
1387
1388void XMLDocumentParser::stopParsing()
1389{
1390 if (m_sawError)
1391 insertErrorMessageBlock();
1392
1393 DocumentParser::stopParsing();
1394 if (context())
1395 xmlStopParser(context());
1396}
1397
1398void XMLDocumentParser::resumeParsing()
1399{
1400 ASSERT(!isDetached());
1401 ASSERT(m_parserPaused);
1402
1403 m_parserPaused = false;
1404
1405 // First, execute any pending callbacks
1406 while (!m_pendingCallbacks->isEmpty()) {
1407 m_pendingCallbacks->callAndRemoveFirstCallback(this);
1408
1409 // A callback paused the parser
1410 if (m_parserPaused)
1411 return;
1412 }
1413
1414 // There is normally only one string left, so toString() shouldn't copy.
1415 // In any case, the XML parser runs on the main thread and it's OK if
1416 // the passed string has more than one reference.
1417 auto rest = m_pendingSrc.toString();
1418 m_pendingSrc.clear();
1419 append(rest.impl());
1420
1421 // Finally, if finish() has been called and write() didn't result
1422 // in any further callbacks being queued, call end()
1423 if (m_finishCalled && m_pendingCallbacks->isEmpty())
1424 end();
1425}
1426
1427bool XMLDocumentParser::appendFragmentSource(const String& chunk)
1428{
1429 ASSERT(!m_context);
1430 ASSERT(m_parsingFragment);
1431
1432 CString chunkAsUtf8 = chunk.utf8();
1433
1434 // libxml2 takes an int for a length, and therefore can't handle XML chunks larger than 2 GiB.
1435 if (chunkAsUtf8.length() > INT_MAX)
1436 return false;
1437
1438 initializeParserContext(chunkAsUtf8);
1439 xmlParseContent(context());
1440 endDocument(); // Close any open text nodes.
1441
1442 // FIXME: If this code is actually needed, it should probably move to finish()
1443 // XMLDocumentParserQt has a similar check (m_stream.error() == QXmlStreamReader::PrematureEndOfDocumentError) in doEnd().
1444 // Check if all the chunk has been processed.
1445 long bytesProcessed = xmlByteConsumed(context());
1446 if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) {
1447 // FIXME: I don't believe we can hit this case without also having seen an error or a null byte.
1448 // If we hit this ASSERT, we've found a test case which demonstrates the need for this code.
1449 ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed]));
1450 return false;
1451 }
1452
1453 // No error if the chunk is well formed or it is not but we have no error.
1454 return context()->wellFormed || !xmlCtxtGetLastError(context());
1455}
1456
1457// --------------------------------
1458
1459using AttributeParseState = Optional<HashMap<String, String>>;
1460
1461static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/, const xmlChar* /*xmlURI*/, int /*numNamespaces*/, const xmlChar** /*namespaces*/, int numAttributes, int /*numDefaulted*/, const xmlChar** libxmlAttributes)
1462{
1463 if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
1464 return;
1465
1466 auto& state = *static_cast<AttributeParseState*>(static_cast<xmlParserCtxtPtr>(closure)->_private);
1467
1468 state = HashMap<String, String> { };
1469
1470 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
1471 for (int i = 0; i < numAttributes; i++) {
1472 String attrLocalName = toString(attributes[i].localname);
1473 int valueLength = (int) (attributes[i].end - attributes[i].value);
1474 String attrValue = toString(attributes[i].value, valueLength);
1475 String attrPrefix = toString(attributes[i].prefix);
1476 String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
1477
1478 state->set(attrQName, attrValue);
1479 }
1480}
1481
1482Optional<HashMap<String, String>> parseAttributes(const String& string)
1483{
1484 String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
1485
1486 AttributeParseState attributes;
1487
1488 xmlSAXHandler sax;
1489 memset(&sax, 0, sizeof(sax));
1490 sax.startElementNs = attributesStartElementNsHandler;
1491 sax.initialized = XML_SAX2_MAGIC;
1492
1493 auto parser = XMLParserContext::createStringParser(&sax, &attributes);
1494
1495 // FIXME: Can we parse 8-bit strings directly as Latin-1 instead of upconverting to UTF-16?
1496 xmlParseChunk(parser->context(), reinterpret_cast<const char*>(StringView(parseString).upconvertedCharacters().get()), parseString.length() * sizeof(UChar), 1);
1497
1498 return attributes;
1499}
1500
1501}
1502