1 | /* |
2 | * Copyright (C) 2000 Peter Kelly <pmk@post.com> |
3 | * Copyright (C) 2005-2017 Apple Inc. All rights reserved. |
4 | * Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org> |
5 | * Copyright (C) 2007 Samuel Weinig <sam@webkit.org> |
6 | * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) |
7 | * Copyright (C) 2008 Holger Hans Peter Freyther |
8 | * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) |
9 | * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> |
10 | * Copyright (C) 2013 Samsung Electronics. All rights reserved. |
11 | * |
12 | * This library is free software; you can redistribute it and/or |
13 | * modify it under the terms of the GNU Library General Public |
14 | * License as published by the Free Software Foundation; either |
15 | * version 2 of the License, or (at your option) any later version. |
16 | * |
17 | * This library is distributed in the hope that it will be useful, |
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
20 | * Library General Public License for more details. |
21 | * |
22 | * You should have received a copy of the GNU Library General Public License |
23 | * along with this library; see the file COPYING.LIB. If not, write to |
24 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
25 | * Boston, MA 02110-1301, USA. |
26 | */ |
27 | |
28 | #include "config.h" |
29 | #include "XMLDocumentParser.h" |
30 | |
31 | #include "CDATASection.h" |
32 | #include "Comment.h" |
33 | #include "CachedResourceLoader.h" |
34 | #include "Document.h" |
35 | #include "DocumentFragment.h" |
36 | #include "DocumentType.h" |
37 | #include "Frame.h" |
38 | #include "FrameLoader.h" |
39 | #include "HTMLEntityParser.h" |
40 | #include "HTMLHtmlElement.h" |
41 | #include "HTMLTemplateElement.h" |
42 | #include "InlineClassicScript.h" |
43 | #include "PendingScript.h" |
44 | #include "ProcessingInstruction.h" |
45 | #include "ResourceError.h" |
46 | #include "ResourceResponse.h" |
47 | #include "ScriptElement.h" |
48 | #include "ScriptSourceCode.h" |
49 | #include "Settings.h" |
50 | #include "SharedBuffer.h" |
51 | #include "StyleScope.h" |
52 | #include "TransformSource.h" |
53 | #include "XMLNSNames.h" |
54 | #include "XMLDocumentParserScope.h" |
55 | #include <libxml/parserInternals.h> |
56 | #include <wtf/unicode/UTF8Conversion.h> |
57 | |
58 | #if ENABLE(XSLT) |
59 | #include "XMLTreeViewer.h" |
60 | #include <libxslt/xslt.h> |
61 | #endif |
62 | |
63 | namespace WebCore { |
64 | |
65 | #if ENABLE(XSLT) |
66 | |
67 | static inline bool shouldRenderInXMLTreeViewerMode(Document& document) |
68 | { |
69 | if (document.sawElementsInKnownNamespaces()) |
70 | return false; |
71 | |
72 | if (document.transformSourceDocument()) |
73 | return false; |
74 | |
75 | auto* frame = document.frame(); |
76 | if (!frame) |
77 | return false; |
78 | |
79 | if (!frame->settings().developerExtrasEnabled()) |
80 | return false; |
81 | |
82 | if (frame->tree().parent()) |
83 | return false; // This document is not in a top frame |
84 | |
85 | return true; |
86 | } |
87 | |
88 | #endif |
89 | |
90 | class PendingCallbacks { |
91 | WTF_MAKE_FAST_ALLOCATED; |
92 | public: |
93 | void appendStartElementNSCallback(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int numNamespaces, const xmlChar** namespaces, int numAttributes, int numDefaulted, const xmlChar** attributes) |
94 | { |
95 | auto callback = std::make_unique<PendingStartElementNSCallback>(); |
96 | |
97 | callback->xmlLocalName = xmlStrdup(xmlLocalName); |
98 | callback->xmlPrefix = xmlStrdup(xmlPrefix); |
99 | callback->xmlURI = xmlStrdup(xmlURI); |
100 | callback->numNamespaces = numNamespaces; |
101 | callback->namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * numNamespaces * 2)); |
102 | for (int i = 0; i < numNamespaces * 2 ; i++) |
103 | callback->namespaces[i] = xmlStrdup(namespaces[i]); |
104 | callback->numAttributes = numAttributes; |
105 | callback->numDefaulted = numDefaulted; |
106 | callback->attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * numAttributes * 5)); |
107 | for (int i = 0; i < numAttributes; i++) { |
108 | // Each attribute has 5 elements in the array: |
109 | // name, prefix, uri, value and an end pointer. |
110 | |
111 | for (int j = 0; j < 3; j++) |
112 | callback->attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]); |
113 | |
114 | int len = attributes[i * 5 + 4] - attributes[i * 5 + 3]; |
115 | |
116 | callback->attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], len); |
117 | callback->attributes[i * 5 + 4] = callback->attributes[i * 5 + 3] + len; |
118 | } |
119 | |
120 | m_callbacks.append(WTFMove(callback)); |
121 | } |
122 | |
123 | void appendEndElementNSCallback() |
124 | { |
125 | m_callbacks.append(std::make_unique<PendingEndElementNSCallback>()); |
126 | } |
127 | |
128 | void appendCharactersCallback(const xmlChar* s, int len) |
129 | { |
130 | auto callback = std::make_unique<PendingCharactersCallback>(); |
131 | |
132 | callback->s = xmlStrndup(s, len); |
133 | callback->len = len; |
134 | |
135 | m_callbacks.append(WTFMove(callback)); |
136 | } |
137 | |
138 | void appendProcessingInstructionCallback(const xmlChar* target, const xmlChar* data) |
139 | { |
140 | auto callback = std::make_unique<PendingProcessingInstructionCallback>(); |
141 | |
142 | callback->target = xmlStrdup(target); |
143 | callback->data = xmlStrdup(data); |
144 | |
145 | m_callbacks.append(WTFMove(callback)); |
146 | } |
147 | |
148 | void appendCDATABlockCallback(const xmlChar* s, int len) |
149 | { |
150 | auto callback = std::make_unique<PendingCDATABlockCallback>(); |
151 | |
152 | callback->s = xmlStrndup(s, len); |
153 | callback->len = len; |
154 | |
155 | m_callbacks.append(WTFMove(callback)); |
156 | } |
157 | |
158 | void (const xmlChar* s) |
159 | { |
160 | auto callback = std::make_unique<PendingCommentCallback>(); |
161 | |
162 | callback->s = xmlStrdup(s); |
163 | |
164 | m_callbacks.append(WTFMove(callback)); |
165 | } |
166 | |
167 | void appendInternalSubsetCallback(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID) |
168 | { |
169 | auto callback = std::make_unique<PendingInternalSubsetCallback>(); |
170 | |
171 | callback->name = xmlStrdup(name); |
172 | callback->externalID = xmlStrdup(externalID); |
173 | callback->systemID = xmlStrdup(systemID); |
174 | |
175 | m_callbacks.append(WTFMove(callback)); |
176 | } |
177 | |
178 | void appendErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber) |
179 | { |
180 | auto callback = std::make_unique<PendingErrorCallback>(); |
181 | |
182 | callback->message = xmlStrdup(message); |
183 | callback->type = type; |
184 | callback->lineNumber = lineNumber; |
185 | callback->columnNumber = columnNumber; |
186 | |
187 | m_callbacks.append(WTFMove(callback)); |
188 | } |
189 | |
190 | void callAndRemoveFirstCallback(XMLDocumentParser* parser) |
191 | { |
192 | std::unique_ptr<PendingCallback> callback = m_callbacks.takeFirst(); |
193 | callback->call(parser); |
194 | } |
195 | |
196 | bool isEmpty() const { return m_callbacks.isEmpty(); } |
197 | |
198 | private: |
199 | struct PendingCallback { |
200 | virtual ~PendingCallback() = default; |
201 | virtual void call(XMLDocumentParser* parser) = 0; |
202 | }; |
203 | |
204 | struct PendingStartElementNSCallback : public PendingCallback { |
205 | virtual ~PendingStartElementNSCallback() |
206 | { |
207 | xmlFree(xmlLocalName); |
208 | xmlFree(xmlPrefix); |
209 | xmlFree(xmlURI); |
210 | for (int i = 0; i < numNamespaces * 2; i++) |
211 | xmlFree(namespaces[i]); |
212 | xmlFree(namespaces); |
213 | for (int i = 0; i < numAttributes; i++) { |
214 | for (int j = 0; j < 4; j++) |
215 | xmlFree(attributes[i * 5 + j]); |
216 | } |
217 | xmlFree(attributes); |
218 | } |
219 | |
220 | void call(XMLDocumentParser* parser) override |
221 | { |
222 | parser->startElementNs(xmlLocalName, xmlPrefix, xmlURI, numNamespaces, const_cast<const xmlChar**>(namespaces), numAttributes, numDefaulted, const_cast<const xmlChar**>(attributes)); |
223 | } |
224 | |
225 | xmlChar* xmlLocalName; |
226 | xmlChar* xmlPrefix; |
227 | xmlChar* xmlURI; |
228 | int numNamespaces; |
229 | xmlChar** namespaces; |
230 | int numAttributes; |
231 | int numDefaulted; |
232 | xmlChar** attributes; |
233 | }; |
234 | |
235 | struct PendingEndElementNSCallback : public PendingCallback { |
236 | void call(XMLDocumentParser* parser) override |
237 | { |
238 | parser->endElementNs(); |
239 | } |
240 | }; |
241 | |
242 | struct PendingCharactersCallback : public PendingCallback { |
243 | virtual ~PendingCharactersCallback() |
244 | { |
245 | xmlFree(s); |
246 | } |
247 | |
248 | void call(XMLDocumentParser* parser) override |
249 | { |
250 | parser->characters(s, len); |
251 | } |
252 | |
253 | xmlChar* s; |
254 | int len; |
255 | }; |
256 | |
257 | struct PendingProcessingInstructionCallback : public PendingCallback { |
258 | virtual ~PendingProcessingInstructionCallback() |
259 | { |
260 | xmlFree(target); |
261 | xmlFree(data); |
262 | } |
263 | |
264 | void call(XMLDocumentParser* parser) override |
265 | { |
266 | parser->processingInstruction(target, data); |
267 | } |
268 | |
269 | xmlChar* target; |
270 | xmlChar* data; |
271 | }; |
272 | |
273 | struct PendingCDATABlockCallback : public PendingCallback { |
274 | virtual ~PendingCDATABlockCallback() |
275 | { |
276 | xmlFree(s); |
277 | } |
278 | |
279 | void call(XMLDocumentParser* parser) override |
280 | { |
281 | parser->cdataBlock(s, len); |
282 | } |
283 | |
284 | xmlChar* s; |
285 | int len; |
286 | }; |
287 | |
288 | struct : public PendingCallback { |
289 | virtual () |
290 | { |
291 | xmlFree(s); |
292 | } |
293 | |
294 | void (XMLDocumentParser* parser) override |
295 | { |
296 | parser->comment(s); |
297 | } |
298 | |
299 | xmlChar* ; |
300 | }; |
301 | |
302 | struct PendingInternalSubsetCallback : public PendingCallback { |
303 | virtual ~PendingInternalSubsetCallback() |
304 | { |
305 | xmlFree(name); |
306 | xmlFree(externalID); |
307 | xmlFree(systemID); |
308 | } |
309 | |
310 | void call(XMLDocumentParser* parser) override |
311 | { |
312 | parser->internalSubset(name, externalID, systemID); |
313 | } |
314 | |
315 | xmlChar* name; |
316 | xmlChar* externalID; |
317 | xmlChar* systemID; |
318 | }; |
319 | |
320 | struct PendingErrorCallback: public PendingCallback { |
321 | virtual ~PendingErrorCallback() |
322 | { |
323 | xmlFree(message); |
324 | } |
325 | |
326 | void call(XMLDocumentParser* parser) override |
327 | { |
328 | parser->handleError(type, reinterpret_cast<char*>(message), TextPosition(lineNumber, columnNumber)); |
329 | } |
330 | |
331 | XMLErrors::ErrorType type; |
332 | xmlChar* message; |
333 | OrdinalNumber lineNumber; |
334 | OrdinalNumber columnNumber; |
335 | }; |
336 | |
337 | Deque<std::unique_ptr<PendingCallback>> m_callbacks; |
338 | }; |
339 | // -------------------------------- |
340 | |
341 | static int globalDescriptor = 0; |
342 | static Thread* libxmlLoaderThread { nullptr }; |
343 | |
344 | static int matchFunc(const char*) |
345 | { |
346 | // Only match loads initiated due to uses of libxml2 from within XMLDocumentParser to avoid |
347 | // interfering with client applications that also use libxml2. http://bugs.webkit.org/show_bug.cgi?id=17353 |
348 | return XMLDocumentParserScope::currentCachedResourceLoader && libxmlLoaderThread == &Thread::current(); |
349 | } |
350 | |
351 | class OffsetBuffer { |
352 | WTF_MAKE_FAST_ALLOCATED; |
353 | public: |
354 | OffsetBuffer(Vector<char> buffer) |
355 | : m_buffer(WTFMove(buffer)) |
356 | , m_currentOffset(0) |
357 | { |
358 | } |
359 | |
360 | int readOutBytes(char* outputBuffer, unsigned askedToRead) |
361 | { |
362 | unsigned bytesLeft = m_buffer.size() - m_currentOffset; |
363 | unsigned lenToCopy = std::min(askedToRead, bytesLeft); |
364 | if (lenToCopy) { |
365 | memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy); |
366 | m_currentOffset += lenToCopy; |
367 | } |
368 | return lenToCopy; |
369 | } |
370 | |
371 | private: |
372 | Vector<char> m_buffer; |
373 | unsigned m_currentOffset; |
374 | }; |
375 | |
376 | static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy) |
377 | { |
378 | if (!scriptingContentIsAllowed(parserContentPolicy)) |
379 | element->stripScriptingAttributes(attributeVector); |
380 | element->parserSetAttributes(attributeVector); |
381 | } |
382 | |
383 | static void switchToUTF16(xmlParserCtxtPtr ctxt) |
384 | { |
385 | // Hack around libxml2's lack of encoding overide support by manually |
386 | // resetting the encoding to UTF-16 before every chunk. Otherwise libxml |
387 | // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks |
388 | // and switch encodings, causing the parse to fail. |
389 | |
390 | // FIXME: Can we just use XML_PARSE_IGNORE_ENC now? |
391 | |
392 | const UChar BOM = 0xFEFF; |
393 | const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM); |
394 | xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE); |
395 | } |
396 | |
397 | static bool shouldAllowExternalLoad(const URL& url) |
398 | { |
399 | String urlString = url.string(); |
400 | |
401 | // On non-Windows platforms libxml asks for this URL, the "XML_XML_DEFAULT_CATALOG", on initialization. |
402 | if (urlString == "file:///etc/xml/catalog" ) |
403 | return false; |
404 | |
405 | // On Windows, libxml computes a URL relative to where its DLL resides. |
406 | if (startsWithLettersIgnoringASCIICase(urlString, "file:///" ) && urlString.endsWithIgnoringASCIICase("/etc/catalog" )) |
407 | return false; |
408 | |
409 | // The most common DTD. There isn't much point in hammering www.w3c.org by requesting this for every XHTML document. |
410 | if (startsWithLettersIgnoringASCIICase(urlString, "http://www.w3.org/tr/xhtml" )) |
411 | return false; |
412 | |
413 | // Similarly, there isn't much point in requesting the SVG DTD. |
414 | if (startsWithLettersIgnoringASCIICase(urlString, "http://www.w3.org/graphics/svg" )) |
415 | return false; |
416 | |
417 | // The libxml doesn't give us a lot of context for deciding whether to |
418 | // allow this request. In the worst case, this load could be for an |
419 | // external entity and the resulting document could simply read the |
420 | // retrieved content. If we had more context, we could potentially allow |
421 | // the parser to load a DTD. As things stand, we take the conservative |
422 | // route and allow same-origin requests only. |
423 | if (!XMLDocumentParserScope::currentCachedResourceLoader->document()->securityOrigin().canRequest(url)) { |
424 | XMLDocumentParserScope::currentCachedResourceLoader->printAccessDeniedMessage(url); |
425 | return false; |
426 | } |
427 | |
428 | return true; |
429 | } |
430 | |
431 | static void* openFunc(const char* uri) |
432 | { |
433 | ASSERT(XMLDocumentParserScope::currentCachedResourceLoader); |
434 | ASSERT(libxmlLoaderThread == &Thread::current()); |
435 | |
436 | URL url(URL(), uri); |
437 | |
438 | if (!shouldAllowExternalLoad(url)) |
439 | return &globalDescriptor; |
440 | |
441 | ResourceError error; |
442 | ResourceResponse response; |
443 | RefPtr<SharedBuffer> data; |
444 | |
445 | |
446 | { |
447 | CachedResourceLoader* cachedResourceLoader = XMLDocumentParserScope::currentCachedResourceLoader; |
448 | XMLDocumentParserScope scope(nullptr); |
449 | // FIXME: We should restore the original global error handler as well. |
450 | |
451 | if (cachedResourceLoader->frame()) { |
452 | FetchOptions options; |
453 | options.mode = FetchOptions::Mode::SameOrigin; |
454 | options.credentials = FetchOptions::Credentials::Include; |
455 | cachedResourceLoader->frame()->loader().loadResourceSynchronously(url, ClientCredentialPolicy::MayAskClientForCredentials, options, { }, error, response, data); |
456 | } |
457 | } |
458 | |
459 | // We have to check the URL again after the load to catch redirects. |
460 | // See <https://bugs.webkit.org/show_bug.cgi?id=21963>. |
461 | if (!shouldAllowExternalLoad(response.url())) |
462 | return &globalDescriptor; |
463 | Vector<char> buffer; |
464 | if (data) |
465 | buffer.append(data->data(), data->size()); |
466 | return new OffsetBuffer(WTFMove(buffer)); |
467 | } |
468 | |
469 | static int readFunc(void* context, char* buffer, int len) |
470 | { |
471 | // Do 0-byte reads in case of a null descriptor |
472 | if (context == &globalDescriptor) |
473 | return 0; |
474 | |
475 | OffsetBuffer* data = static_cast<OffsetBuffer*>(context); |
476 | return data->readOutBytes(buffer, len); |
477 | } |
478 | |
479 | static int writeFunc(void*, const char*, int) |
480 | { |
481 | // Always just do 0-byte writes |
482 | return 0; |
483 | } |
484 | |
485 | static int closeFunc(void* context) |
486 | { |
487 | if (context != &globalDescriptor) { |
488 | OffsetBuffer* data = static_cast<OffsetBuffer*>(context); |
489 | delete data; |
490 | } |
491 | return 0; |
492 | } |
493 | |
494 | #if ENABLE(XSLT) |
495 | static void errorFunc(void*, const char*, ...) |
496 | { |
497 | // FIXME: It would be nice to display error messages somewhere. |
498 | } |
499 | #endif |
500 | |
501 | static void initializeXMLParser() |
502 | { |
503 | static std::once_flag flag; |
504 | std::call_once(flag, [&] { |
505 | xmlInitParser(); |
506 | xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc); |
507 | xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc); |
508 | libxmlLoaderThread = &Thread::current(); |
509 | }); |
510 | } |
511 | |
512 | Ref<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData) |
513 | { |
514 | initializeXMLParser(); |
515 | |
516 | xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0); |
517 | parser->_private = userData; |
518 | |
519 | // Substitute entities. |
520 | xmlCtxtUseOptions(parser, XML_PARSE_NOENT | XML_PARSE_HUGE); |
521 | |
522 | switchToUTF16(parser); |
523 | |
524 | return adoptRef(*new XMLParserContext(parser)); |
525 | } |
526 | |
527 | |
528 | // Chunk should be encoded in UTF-8 |
529 | RefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk) |
530 | { |
531 | initializeXMLParser(); |
532 | |
533 | // appendFragmentSource() checks that the length doesn't overflow an int. |
534 | xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length()); |
535 | |
536 | if (!parser) |
537 | return 0; |
538 | |
539 | memcpy(parser->sax, handlers, sizeof(xmlSAXHandler)); |
540 | |
541 | // Substitute entities. |
542 | // FIXME: Why is XML_PARSE_NODICT needed? This is different from what createStringParser does. |
543 | xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT | XML_PARSE_HUGE); |
544 | |
545 | // Internal initialization |
546 | parser->sax2 = 1; |
547 | parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT |
548 | parser->depth = 0; |
549 | parser->str_xml = xmlDictLookup(parser->dict, reinterpret_cast<xmlChar*>(const_cast<char*>("xml" )), 3); |
550 | parser->str_xmlns = xmlDictLookup(parser->dict, reinterpret_cast<xmlChar*>(const_cast<char*>("xmlns" )), 5); |
551 | parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36); |
552 | parser->_private = userData; |
553 | |
554 | return adoptRef(*new XMLParserContext(parser)); |
555 | } |
556 | |
557 | // -------------------------------- |
558 | |
559 | bool XMLDocumentParser::supportsXMLVersion(const String& version) |
560 | { |
561 | return version == "1.0" ; |
562 | } |
563 | |
564 | XMLDocumentParser::XMLDocumentParser(Document& document, FrameView* frameView) |
565 | : ScriptableDocumentParser(document) |
566 | , m_view(frameView) |
567 | , m_pendingCallbacks(std::make_unique<PendingCallbacks>()) |
568 | , m_currentNode(&document) |
569 | , m_scriptStartPosition(TextPosition::belowRangePosition()) |
570 | { |
571 | } |
572 | |
573 | XMLDocumentParser::XMLDocumentParser(DocumentFragment& fragment, Element* parentElement, ParserContentPolicy parserContentPolicy) |
574 | : ScriptableDocumentParser(fragment.document(), parserContentPolicy) |
575 | , m_pendingCallbacks(std::make_unique<PendingCallbacks>()) |
576 | , m_currentNode(&fragment) |
577 | , m_scriptStartPosition(TextPosition::belowRangePosition()) |
578 | , m_parsingFragment(true) |
579 | { |
580 | fragment.ref(); |
581 | |
582 | // Add namespaces based on the parent node |
583 | Vector<Element*> elemStack; |
584 | while (parentElement) { |
585 | elemStack.append(parentElement); |
586 | |
587 | ContainerNode* node = parentElement->parentNode(); |
588 | if (!is<Element>(node)) |
589 | break; |
590 | parentElement = downcast<Element>(node); |
591 | } |
592 | |
593 | if (elemStack.isEmpty()) |
594 | return; |
595 | |
596 | // FIXME: Share code with isDefaultNamespace() per http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#parsing-xhtml-fragments |
597 | for (; !elemStack.isEmpty(); elemStack.removeLast()) { |
598 | Element* element = elemStack.last(); |
599 | if (element->hasAttributes()) { |
600 | for (const Attribute& attribute : element->attributesIterator()) { |
601 | if (attribute.localName() == xmlnsAtom()) |
602 | m_defaultNamespaceURI = attribute.value(); |
603 | else if (attribute.prefix() == xmlnsAtom()) |
604 | m_prefixToNamespaceMap.set(attribute.localName(), attribute.value()); |
605 | } |
606 | } |
607 | } |
608 | |
609 | if (m_defaultNamespaceURI.isNull()) |
610 | m_defaultNamespaceURI = parentElement->namespaceURI(); |
611 | } |
612 | |
613 | XMLParserContext::~XMLParserContext() |
614 | { |
615 | if (m_context->myDoc) |
616 | xmlFreeDoc(m_context->myDoc); |
617 | xmlFreeParserCtxt(m_context); |
618 | } |
619 | |
620 | XMLDocumentParser::~XMLDocumentParser() |
621 | { |
622 | // The XMLDocumentParser will always be detached before being destroyed. |
623 | ASSERT(m_currentNodeStack.isEmpty()); |
624 | ASSERT(!m_currentNode); |
625 | |
626 | // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp! |
627 | if (m_pendingScript) |
628 | m_pendingScript->clearClient(); |
629 | } |
630 | |
631 | void XMLDocumentParser::doWrite(const String& parseString) |
632 | { |
633 | ASSERT(!isDetached()); |
634 | if (!m_context) |
635 | initializeParserContext(); |
636 | |
637 | // Protect the libxml context from deletion during a callback |
638 | RefPtr<XMLParserContext> context = m_context; |
639 | |
640 | // libXML throws an error if you try to switch the encoding for an empty string. |
641 | if (parseString.length()) { |
642 | // JavaScript may cause the parser to detach during xmlParseChunk |
643 | // keep this alive until this function is done. |
644 | Ref<XMLDocumentParser> protectedThis(*this); |
645 | |
646 | XMLDocumentParserScope scope(&document()->cachedResourceLoader()); |
647 | |
648 | // FIXME: Can we parse 8-bit strings directly as Latin-1 instead of upconverting to UTF-16? |
649 | switchToUTF16(context->context()); |
650 | xmlParseChunk(context->context(), reinterpret_cast<const char*>(StringView(parseString).upconvertedCharacters().get()), sizeof(UChar) * parseString.length(), 0); |
651 | |
652 | // JavaScript (which may be run under the xmlParseChunk callstack) may |
653 | // cause the parser to be stopped or detached. |
654 | if (isStopped()) |
655 | return; |
656 | } |
657 | |
658 | // FIXME: Why is this here? And why is it after we process the passed source? |
659 | if (document()->decoder() && document()->decoder()->sawError()) { |
660 | // If the decoder saw an error, report it as fatal (stops parsing) |
661 | TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col)); |
662 | handleError(XMLErrors::fatal, "Encoding error" , position); |
663 | } |
664 | } |
665 | |
666 | static inline String toString(const xmlChar* string, size_t size) |
667 | { |
668 | return String::fromUTF8(reinterpret_cast<const char*>(string), size); |
669 | } |
670 | |
671 | static inline String toString(const xmlChar* string) |
672 | { |
673 | return String::fromUTF8(reinterpret_cast<const char*>(string)); |
674 | } |
675 | |
676 | static inline AtomicString toAtomicString(const xmlChar* string, size_t size) |
677 | { |
678 | return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), size); |
679 | } |
680 | |
681 | static inline AtomicString toAtomicString(const xmlChar* string) |
682 | { |
683 | return AtomicString::fromUTF8(reinterpret_cast<const char*>(string)); |
684 | } |
685 | |
686 | struct _xmlSAX2Namespace { |
687 | const xmlChar* prefix; |
688 | const xmlChar* uri; |
689 | }; |
690 | typedef struct _xmlSAX2Namespace xmlSAX2Namespace; |
691 | |
692 | static inline bool handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int numNamespaces) |
693 | { |
694 | xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces); |
695 | for (int i = 0; i < numNamespaces; i++) { |
696 | AtomicString namespaceQName = xmlnsAtom(); |
697 | AtomicString namespaceURI = toAtomicString(namespaces[i].uri); |
698 | if (namespaces[i].prefix) |
699 | namespaceQName = "xmlns:" + toString(namespaces[i].prefix); |
700 | |
701 | auto result = Element::parseAttributeName(XMLNSNames::xmlnsNamespaceURI, namespaceQName); |
702 | if (result.hasException()) |
703 | return false; |
704 | |
705 | prefixedAttributes.append(Attribute(result.releaseReturnValue(), namespaceURI)); |
706 | } |
707 | return true; |
708 | } |
709 | |
710 | struct _xmlSAX2Attributes { |
711 | const xmlChar* localname; |
712 | const xmlChar* prefix; |
713 | const xmlChar* uri; |
714 | const xmlChar* value; |
715 | const xmlChar* end; |
716 | }; |
717 | typedef struct _xmlSAX2Attributes xmlSAX2Attributes; |
718 | |
719 | static inline bool handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int numAttributes) |
720 | { |
721 | xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes); |
722 | for (int i = 0; i < numAttributes; i++) { |
723 | int valueLength = static_cast<int>(attributes[i].end - attributes[i].value); |
724 | AtomicString attrValue = toAtomicString(attributes[i].value, valueLength); |
725 | String attrPrefix = toString(attributes[i].prefix); |
726 | AtomicString attrURI = attrPrefix.isEmpty() ? nullAtom() : toAtomicString(attributes[i].uri); |
727 | AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : attrPrefix + ":" + toString(attributes[i].localname); |
728 | |
729 | auto result = Element::parseAttributeName(attrURI, attrQName); |
730 | if (result.hasException()) |
731 | return false; |
732 | |
733 | prefixedAttributes.append(Attribute(result.releaseReturnValue(), attrValue)); |
734 | } |
735 | return true; |
736 | } |
737 | |
738 | // This is a hack around https://bugzilla.gnome.org/show_bug.cgi?id=502960 |
739 | // Otherwise libxml doesn't include namespace for parsed entities, breaking entity |
740 | // expansion for all entities containing elements. |
741 | static inline bool hackAroundLibXMLEntityParsingBug() |
742 | { |
743 | #if LIBXML_VERSION >= 20704 |
744 | // This bug has been fixed in libxml 2.7.4. |
745 | return false; |
746 | #else |
747 | return true; |
748 | #endif |
749 | } |
750 | |
751 | void XMLDocumentParser::startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int numNamespaces, const xmlChar** libxmlNamespaces, int numAttributes, int numDefaulted, const xmlChar** libxmlAttributes) |
752 | { |
753 | if (isStopped()) |
754 | return; |
755 | |
756 | if (m_parserPaused) { |
757 | m_pendingCallbacks->appendStartElementNSCallback(xmlLocalName, xmlPrefix, xmlURI, numNamespaces, libxmlNamespaces, numAttributes, numDefaulted, libxmlAttributes); |
758 | return; |
759 | } |
760 | |
761 | if (!updateLeafTextNode()) |
762 | return; |
763 | |
764 | AtomicString localName = toAtomicString(xmlLocalName); |
765 | AtomicString uri = toAtomicString(xmlURI); |
766 | AtomicString prefix = toAtomicString(xmlPrefix); |
767 | |
768 | if (m_parsingFragment && uri.isNull()) { |
769 | if (!prefix.isNull()) |
770 | uri = m_prefixToNamespaceMap.get(prefix); |
771 | else |
772 | uri = m_defaultNamespaceURI; |
773 | } |
774 | |
775 | // If libxml entity parsing is broken, transfer the currentNodes' namespaceURI to the new node, |
776 | // if we're currently expanding elements which originate from an entity declaration. |
777 | if (hackAroundLibXMLEntityParsingBug() && depthTriggeringEntityExpansion() != -1 && context()->depth > depthTriggeringEntityExpansion() && uri.isNull() && prefix.isNull()) |
778 | uri = m_currentNode->namespaceURI(); |
779 | |
780 | bool isFirstElement = !m_sawFirstElement; |
781 | m_sawFirstElement = true; |
782 | |
783 | QualifiedName qName(prefix, localName, uri); |
784 | auto newElement = m_currentNode->document().createElement(qName, true); |
785 | |
786 | Vector<Attribute> prefixedAttributes; |
787 | if (!handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, numNamespaces)) { |
788 | setAttributes(newElement.ptr(), prefixedAttributes, parserContentPolicy()); |
789 | stopParsing(); |
790 | return; |
791 | } |
792 | |
793 | bool success = handleElementAttributes(prefixedAttributes, libxmlAttributes, numAttributes); |
794 | setAttributes(newElement.ptr(), prefixedAttributes, parserContentPolicy()); |
795 | if (!success) { |
796 | stopParsing(); |
797 | return; |
798 | } |
799 | |
800 | newElement->beginParsingChildren(); |
801 | |
802 | if (isScriptElement(newElement.get())) |
803 | m_scriptStartPosition = textPosition(); |
804 | |
805 | m_currentNode->parserAppendChild(newElement); |
806 | if (!m_currentNode) // Synchronous DOM events may have removed the current node. |
807 | return; |
808 | |
809 | if (is<HTMLTemplateElement>(newElement)) |
810 | pushCurrentNode(&downcast<HTMLTemplateElement>(newElement.get()).content()); |
811 | else |
812 | pushCurrentNode(newElement.ptr()); |
813 | |
814 | if (is<HTMLHtmlElement>(newElement)) |
815 | downcast<HTMLHtmlElement>(newElement.get()).insertedByParser(); |
816 | |
817 | if (!m_parsingFragment && isFirstElement && document()->frame()) |
818 | document()->frame()->injectUserScripts(InjectAtDocumentStart); |
819 | } |
820 | |
821 | void XMLDocumentParser::endElementNs() |
822 | { |
823 | if (isStopped()) |
824 | return; |
825 | |
826 | if (m_parserPaused) { |
827 | m_pendingCallbacks->appendEndElementNSCallback(); |
828 | return; |
829 | } |
830 | |
831 | // JavaScript can detach the parser. Make sure this is not released |
832 | // before the end of this method. |
833 | Ref<XMLDocumentParser> protectedThis(*this); |
834 | |
835 | if (!updateLeafTextNode()) |
836 | return; |
837 | |
838 | RefPtr<ContainerNode> node = m_currentNode; |
839 | node->finishParsingChildren(); |
840 | |
841 | // Once we reach the depth again where entity expansion started, stop executing the work-around. |
842 | if (hackAroundLibXMLEntityParsingBug() && context()->depth <= depthTriggeringEntityExpansion()) |
843 | setDepthTriggeringEntityExpansion(-1); |
844 | |
845 | if (!scriptingContentIsAllowed(parserContentPolicy()) && is<Element>(*node) && isScriptElement(downcast<Element>(*node))) { |
846 | popCurrentNode(); |
847 | node->remove(); |
848 | return; |
849 | } |
850 | |
851 | if (!node->isElementNode() || !m_view) { |
852 | popCurrentNode(); |
853 | return; |
854 | } |
855 | |
856 | auto& element = downcast<Element>(*node); |
857 | |
858 | // The element's parent may have already been removed from document. |
859 | // Parsing continues in this case, but scripts aren't executed. |
860 | if (!element.isConnected()) { |
861 | popCurrentNode(); |
862 | return; |
863 | } |
864 | |
865 | if (!isScriptElement(element)) { |
866 | popCurrentNode(); |
867 | return; |
868 | } |
869 | |
870 | // Don't load external scripts for standalone documents (for now). |
871 | ASSERT(!m_pendingScript); |
872 | m_requestingScript = true; |
873 | |
874 | auto& scriptElement = downcastScriptElement(element); |
875 | if (scriptElement.prepareScript(m_scriptStartPosition, ScriptElement::AllowLegacyTypeInTypeAttribute)) { |
876 | // FIXME: Script execution should be shared between |
877 | // the libxml2 and Qt XMLDocumentParser implementations. |
878 | |
879 | if (scriptElement.readyToBeParserExecuted()) |
880 | scriptElement.executeClassicScript(ScriptSourceCode(scriptElement.scriptContent(), URL(document()->url()), m_scriptStartPosition, JSC::SourceProviderSourceType::Program, InlineClassicScript::create(scriptElement))); |
881 | else if (scriptElement.willBeParserExecuted() && scriptElement.loadableScript()) { |
882 | m_pendingScript = PendingScript::create(scriptElement, *scriptElement.loadableScript()); |
883 | m_pendingScript->setClient(*this); |
884 | |
885 | // m_pendingScript will be nullptr if script was already loaded and setClient() executed it. |
886 | if (m_pendingScript) |
887 | pauseParsing(); |
888 | } |
889 | |
890 | // JavaScript may have detached the parser |
891 | if (isDetached()) |
892 | return; |
893 | } |
894 | m_requestingScript = false; |
895 | popCurrentNode(); |
896 | } |
897 | |
898 | void XMLDocumentParser::characters(const xmlChar* characters, int length) |
899 | { |
900 | if (isStopped()) |
901 | return; |
902 | |
903 | if (m_parserPaused) { |
904 | m_pendingCallbacks->appendCharactersCallback(characters, length); |
905 | return; |
906 | } |
907 | |
908 | if (!m_leafTextNode) |
909 | createLeafTextNode(); |
910 | m_bufferedText.append(characters, length); |
911 | } |
912 | |
913 | void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args) |
914 | { |
915 | if (isStopped()) |
916 | return; |
917 | |
918 | va_list preflightArgs; |
919 | va_copy(preflightArgs, args); |
920 | size_t stringLength = vsnprintf(nullptr, 0, message, preflightArgs); |
921 | va_end(preflightArgs); |
922 | |
923 | Vector<char, 1024> buffer(stringLength + 1); |
924 | vsnprintf(buffer.data(), stringLength + 1, message, args); |
925 | |
926 | TextPosition position = textPosition(); |
927 | if (m_parserPaused) |
928 | m_pendingCallbacks->appendErrorCallback(type, reinterpret_cast<const xmlChar*>(buffer.data()), position.m_line, position.m_column); |
929 | else |
930 | handleError(type, buffer.data(), textPosition()); |
931 | } |
932 | |
933 | void XMLDocumentParser::processingInstruction(const xmlChar* target, const xmlChar* data) |
934 | { |
935 | if (isStopped()) |
936 | return; |
937 | |
938 | if (m_parserPaused) { |
939 | m_pendingCallbacks->appendProcessingInstructionCallback(target, data); |
940 | return; |
941 | } |
942 | |
943 | if (!updateLeafTextNode()) |
944 | return; |
945 | |
946 | auto result = m_currentNode->document().createProcessingInstruction(toString(target), toString(data)); |
947 | if (result.hasException()) |
948 | return; |
949 | auto pi = result.releaseReturnValue(); |
950 | |
951 | pi->setCreatedByParser(true); |
952 | |
953 | m_currentNode->parserAppendChild(pi); |
954 | |
955 | pi->finishParsingChildren(); |
956 | |
957 | if (pi->isCSS()) |
958 | m_sawCSS = true; |
959 | |
960 | #if ENABLE(XSLT) |
961 | m_sawXSLTransform = !m_sawFirstElement && pi->isXSL(); |
962 | if (m_sawXSLTransform && !document()->transformSourceDocument()) |
963 | stopParsing(); |
964 | #endif |
965 | } |
966 | |
967 | void XMLDocumentParser::cdataBlock(const xmlChar* s, int len) |
968 | { |
969 | if (isStopped()) |
970 | return; |
971 | |
972 | if (m_parserPaused) { |
973 | m_pendingCallbacks->appendCDATABlockCallback(s, len); |
974 | return; |
975 | } |
976 | |
977 | if (!updateLeafTextNode()) |
978 | return; |
979 | |
980 | m_currentNode->parserAppendChild(CDATASection::create(m_currentNode->document(), toString(s, len))); |
981 | } |
982 | |
983 | void XMLDocumentParser::(const xmlChar* s) |
984 | { |
985 | if (isStopped()) |
986 | return; |
987 | |
988 | if (m_parserPaused) { |
989 | m_pendingCallbacks->appendCommentCallback(s); |
990 | return; |
991 | } |
992 | |
993 | if (!updateLeafTextNode()) |
994 | return; |
995 | |
996 | m_currentNode->parserAppendChild(Comment::create(m_currentNode->document(), toString(s))); |
997 | } |
998 | |
999 | enum StandaloneInfo { |
1000 | StandaloneUnspecified = -2, |
1001 | NoXMlDeclaration, |
1002 | StandaloneNo, |
1003 | StandaloneYes |
1004 | }; |
1005 | |
1006 | void XMLDocumentParser::startDocument(const xmlChar* version, const xmlChar* encoding, int standalone) |
1007 | { |
1008 | StandaloneInfo standaloneInfo = (StandaloneInfo)standalone; |
1009 | if (standaloneInfo == NoXMlDeclaration) { |
1010 | document()->setHasXMLDeclaration(false); |
1011 | return; |
1012 | } |
1013 | |
1014 | if (version) |
1015 | document()->setXMLVersion(toString(version)); |
1016 | if (standalone != StandaloneUnspecified) |
1017 | document()->setXMLStandalone(standaloneInfo == StandaloneYes); |
1018 | if (encoding) |
1019 | document()->setXMLEncoding(toString(encoding)); |
1020 | document()->setHasXMLDeclaration(true); |
1021 | } |
1022 | |
1023 | void XMLDocumentParser::endDocument() |
1024 | { |
1025 | updateLeafTextNode(); |
1026 | } |
1027 | |
1028 | void XMLDocumentParser::internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID) |
1029 | { |
1030 | if (isStopped()) |
1031 | return; |
1032 | |
1033 | if (m_parserPaused) { |
1034 | m_pendingCallbacks->appendInternalSubsetCallback(name, externalID, systemID); |
1035 | return; |
1036 | } |
1037 | |
1038 | if (document()) |
1039 | document()->parserAppendChild(DocumentType::create(*document(), toString(name), toString(externalID), toString(systemID))); |
1040 | } |
1041 | |
1042 | static inline XMLDocumentParser* getParser(void* closure) |
1043 | { |
1044 | xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure); |
1045 | return static_cast<XMLDocumentParser*>(ctxt->_private); |
1046 | } |
1047 | |
1048 | // This is a hack around http://bugzilla.gnome.org/show_bug.cgi?id=159219 |
1049 | // Otherwise libxml seems to call all the SAX callbacks twice for any replaced entity. |
1050 | static inline bool hackAroundLibXMLEntityBug(void* closure) |
1051 | { |
1052 | #if LIBXML_VERSION >= 20627 |
1053 | // This bug has been fixed in libxml 2.6.27. |
1054 | UNUSED_PARAM(closure); |
1055 | return false; |
1056 | #else |
1057 | return static_cast<xmlParserCtxtPtr>(closure)->node; |
1058 | #endif |
1059 | } |
1060 | |
1061 | static void startElementNsHandler(void* closure, const xmlChar* localname, const xmlChar* prefix, const xmlChar* uri, int numNamespaces, const xmlChar** namespaces, int numAttributes, int numDefaulted, const xmlChar** libxmlAttributes) |
1062 | { |
1063 | if (hackAroundLibXMLEntityBug(closure)) |
1064 | return; |
1065 | |
1066 | getParser(closure)->startElementNs(localname, prefix, uri, numNamespaces, namespaces, numAttributes, numDefaulted, libxmlAttributes); |
1067 | } |
1068 | |
1069 | static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*) |
1070 | { |
1071 | if (hackAroundLibXMLEntityBug(closure)) |
1072 | return; |
1073 | |
1074 | getParser(closure)->endElementNs(); |
1075 | } |
1076 | |
1077 | static void charactersHandler(void* closure, const xmlChar* s, int len) |
1078 | { |
1079 | if (hackAroundLibXMLEntityBug(closure)) |
1080 | return; |
1081 | |
1082 | getParser(closure)->characters(s, len); |
1083 | } |
1084 | |
1085 | static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data) |
1086 | { |
1087 | if (hackAroundLibXMLEntityBug(closure)) |
1088 | return; |
1089 | |
1090 | getParser(closure)->processingInstruction(target, data); |
1091 | } |
1092 | |
1093 | static void cdataBlockHandler(void* closure, const xmlChar* s, int len) |
1094 | { |
1095 | if (hackAroundLibXMLEntityBug(closure)) |
1096 | return; |
1097 | |
1098 | getParser(closure)->cdataBlock(s, len); |
1099 | } |
1100 | |
1101 | static void commentHandler(void* closure, const xmlChar* ) |
1102 | { |
1103 | if (hackAroundLibXMLEntityBug(closure)) |
1104 | return; |
1105 | |
1106 | getParser(closure)->comment(comment); |
1107 | } |
1108 | |
1109 | WTF_ATTRIBUTE_PRINTF(2, 3) |
1110 | static void warningHandler(void* closure, const char* message, ...) |
1111 | { |
1112 | va_list args; |
1113 | va_start(args, message); |
1114 | getParser(closure)->error(XMLErrors::warning, message, args); |
1115 | va_end(args); |
1116 | } |
1117 | |
1118 | WTF_ATTRIBUTE_PRINTF(2, 3) |
1119 | static void fatalErrorHandler(void* closure, const char* message, ...) |
1120 | { |
1121 | va_list args; |
1122 | va_start(args, message); |
1123 | getParser(closure)->error(XMLErrors::fatal, message, args); |
1124 | va_end(args); |
1125 | } |
1126 | |
1127 | WTF_ATTRIBUTE_PRINTF(2, 3) |
1128 | static void normalErrorHandler(void* closure, const char* message, ...) |
1129 | { |
1130 | va_list args; |
1131 | va_start(args, message); |
1132 | getParser(closure)->error(XMLErrors::nonFatal, message, args); |
1133 | va_end(args); |
1134 | } |
1135 | |
1136 | // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is |
1137 | // a hack to avoid malloc/free. Using a global variable like this could cause trouble |
1138 | // if libxml implementation details were to change |
1139 | static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; |
1140 | |
1141 | static xmlEntityPtr sharedXHTMLEntity() |
1142 | { |
1143 | static xmlEntity entity; |
1144 | if (!entity.type) { |
1145 | entity.type = XML_ENTITY_DECL; |
1146 | entity.orig = sharedXHTMLEntityResult; |
1147 | entity.content = sharedXHTMLEntityResult; |
1148 | entity.etype = XML_INTERNAL_PREDEFINED_ENTITY; |
1149 | } |
1150 | return &entity; |
1151 | } |
1152 | |
1153 | static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize) |
1154 | { |
1155 | const char* originalTarget = target; |
1156 | auto conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity, utf16Entity + numberOfCodeUnits, &target, target + targetSize); |
1157 | if (conversionResult != WTF::Unicode::ConversionOK) |
1158 | return 0; |
1159 | |
1160 | // Even though we must pass the length, libxml expects the entity string to be null terminated. |
1161 | ASSERT(target >= originalTarget + 1); |
1162 | *target = '\0'; |
1163 | return target - originalTarget; |
1164 | } |
1165 | |
1166 | static xmlEntityPtr getXHTMLEntity(const xmlChar* name) |
1167 | { |
1168 | UChar utf16DecodedEntity[4]; |
1169 | size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity); |
1170 | if (!numberOfCodeUnits) |
1171 | return 0; |
1172 | |
1173 | ASSERT(numberOfCodeUnits <= 4); |
1174 | size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits, |
1175 | reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult)); |
1176 | if (!entityLengthInUTF8) |
1177 | return 0; |
1178 | |
1179 | xmlEntityPtr entity = sharedXHTMLEntity(); |
1180 | entity->length = entityLengthInUTF8; |
1181 | entity->name = name; |
1182 | return entity; |
1183 | } |
1184 | |
1185 | static void entityDeclarationHandler(void* closure, const xmlChar* name, int type, const xmlChar* publicId, const xmlChar* systemId, xmlChar* content) |
1186 | { |
1187 | // Prevent the next call to getEntityHandler() to record the entity expansion depth. |
1188 | // We're parsing the entity declaration, so there's no need to record anything. |
1189 | // We only need to record the depth, if we're actually expanding the entity, when it's referenced. |
1190 | if (hackAroundLibXMLEntityParsingBug()) |
1191 | getParser(closure)->setIsParsingEntityDeclaration(true); |
1192 | xmlSAX2EntityDecl(closure, name, type, publicId, systemId, content); |
1193 | } |
1194 | |
1195 | static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name) |
1196 | { |
1197 | xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure); |
1198 | |
1199 | XMLDocumentParser* parser = getParser(closure); |
1200 | if (hackAroundLibXMLEntityParsingBug()) { |
1201 | if (parser->isParsingEntityDeclaration()) { |
1202 | // We're parsing the entity declarations (not an entity reference), no need to do anything special. |
1203 | parser->setIsParsingEntityDeclaration(false); |
1204 | ASSERT(parser->depthTriggeringEntityExpansion() == -1); |
1205 | } else { |
1206 | // The entity will be used and eventually expanded. Record the current parser depth |
1207 | // so the next call to startElementNs() knows that the new element originates from |
1208 | // an entity declaration. |
1209 | parser->setDepthTriggeringEntityExpansion(ctxt->depth); |
1210 | } |
1211 | } |
1212 | |
1213 | xmlEntityPtr ent = xmlGetPredefinedEntity(name); |
1214 | if (ent) { |
1215 | ent->etype = XML_INTERNAL_PREDEFINED_ENTITY; |
1216 | return ent; |
1217 | } |
1218 | |
1219 | ent = xmlGetDocEntity(ctxt->myDoc, name); |
1220 | if (!ent && parser->isXHTMLDocument()) { |
1221 | ent = getXHTMLEntity(name); |
1222 | if (ent) |
1223 | ent->etype = XML_INTERNAL_GENERAL_ENTITY; |
1224 | } |
1225 | |
1226 | return ent; |
1227 | } |
1228 | |
1229 | static void startDocumentHandler(void* closure) |
1230 | { |
1231 | xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure); |
1232 | switchToUTF16(ctxt); |
1233 | getParser(closure)->startDocument(ctxt->version, ctxt->encoding, ctxt->standalone); |
1234 | xmlSAX2StartDocument(closure); |
1235 | } |
1236 | |
1237 | static void endDocumentHandler(void* closure) |
1238 | { |
1239 | getParser(closure)->endDocument(); |
1240 | xmlSAX2EndDocument(closure); |
1241 | } |
1242 | |
1243 | static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID) |
1244 | { |
1245 | getParser(closure)->internalSubset(name, externalID, systemID); |
1246 | xmlSAX2InternalSubset(closure, name, externalID, systemID); |
1247 | } |
1248 | |
1249 | static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*) |
1250 | { |
1251 | String extId = toString(externalId); |
1252 | if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN" ) |
1253 | || (extId == "-//W3C//DTD XHTML 1.1//EN" ) |
1254 | || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN" ) |
1255 | || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN" ) |
1256 | || (extId == "-//W3C//DTD XHTML Basic 1.0//EN" ) |
1257 | || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN" ) |
1258 | || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" ) |
1259 | || (extId == "-//W3C//DTD MathML 2.0//EN" ) |
1260 | || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" ) |
1261 | || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN" ) |
1262 | || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN" )) |
1263 | getParser(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not. |
1264 | } |
1265 | |
1266 | static void ignorableWhitespaceHandler(void*, const xmlChar*, int) |
1267 | { |
1268 | // nothing to do, but we need this to work around a crasher |
1269 | // http://bugzilla.gnome.org/show_bug.cgi?id=172255 |
1270 | // http://bugs.webkit.org/show_bug.cgi?id=5792 |
1271 | } |
1272 | |
1273 | void XMLDocumentParser::initializeParserContext(const CString& chunk) |
1274 | { |
1275 | xmlSAXHandler sax; |
1276 | memset(&sax, 0, sizeof(sax)); |
1277 | |
1278 | sax.error = normalErrorHandler; |
1279 | sax.fatalError = fatalErrorHandler; |
1280 | sax.characters = charactersHandler; |
1281 | sax.processingInstruction = processingInstructionHandler; |
1282 | sax.cdataBlock = cdataBlockHandler; |
1283 | sax.comment = commentHandler; |
1284 | sax.warning = warningHandler; |
1285 | sax.startElementNs = startElementNsHandler; |
1286 | sax.endElementNs = endElementNsHandler; |
1287 | sax.getEntity = getEntityHandler; |
1288 | sax.startDocument = startDocumentHandler; |
1289 | sax.endDocument = endDocumentHandler; |
1290 | sax.internalSubset = internalSubsetHandler; |
1291 | sax.externalSubset = externalSubsetHandler; |
1292 | sax.ignorableWhitespace = ignorableWhitespaceHandler; |
1293 | sax.entityDecl = entityDeclarationHandler; |
1294 | sax.initialized = XML_SAX2_MAGIC; |
1295 | DocumentParser::startParsing(); |
1296 | m_sawError = false; |
1297 | m_sawCSS = false; |
1298 | m_sawXSLTransform = false; |
1299 | m_sawFirstElement = false; |
1300 | |
1301 | XMLDocumentParserScope scope(&document()->cachedResourceLoader()); |
1302 | if (m_parsingFragment) |
1303 | m_context = XMLParserContext::createMemoryParser(&sax, this, chunk); |
1304 | else { |
1305 | ASSERT(!chunk.data()); |
1306 | m_context = XMLParserContext::createStringParser(&sax, this); |
1307 | } |
1308 | } |
1309 | |
1310 | void XMLDocumentParser::doEnd() |
1311 | { |
1312 | if (!isStopped()) { |
1313 | if (m_context) { |
1314 | // Tell libxml we're done. |
1315 | { |
1316 | XMLDocumentParserScope scope(&document()->cachedResourceLoader()); |
1317 | xmlParseChunk(context(), 0, 0, 1); |
1318 | } |
1319 | |
1320 | m_context = nullptr; |
1321 | } |
1322 | } |
1323 | |
1324 | #if ENABLE(XSLT) |
1325 | bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && shouldRenderInXMLTreeViewerMode(*document()); |
1326 | if (xmlViewerMode) { |
1327 | XMLTreeViewer xmlTreeViewer(*document()); |
1328 | xmlTreeViewer.transformDocumentToTreeView(); |
1329 | } else if (m_sawXSLTransform) { |
1330 | xmlDocPtr doc = xmlDocPtrForString(document()->cachedResourceLoader(), m_originalSourceForTransform.toString(), document()->url().string()); |
1331 | document()->setTransformSource(std::make_unique<TransformSource>(doc)); |
1332 | |
1333 | document()->setParsing(false); // Make the document think it's done, so it will apply XSL stylesheets. |
1334 | document()->applyPendingXSLTransformsNowIfScheduled(); |
1335 | |
1336 | // styleResolverChanged() call can detach the parser and null out its document. |
1337 | // In that case, we just bail out. |
1338 | if (isDetached()) |
1339 | return; |
1340 | |
1341 | document()->setParsing(true); |
1342 | DocumentParser::stopParsing(); |
1343 | } |
1344 | #endif |
1345 | } |
1346 | |
1347 | #if ENABLE(XSLT) |
1348 | static inline const char* nativeEndianUTF16Encoding() |
1349 | { |
1350 | const UChar BOM = 0xFEFF; |
1351 | const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM); |
1352 | return BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE" ; |
1353 | } |
1354 | |
1355 | xmlDocPtr xmlDocPtrForString(CachedResourceLoader& cachedResourceLoader, const String& source, const String& url) |
1356 | { |
1357 | if (source.isEmpty()) |
1358 | return nullptr; |
1359 | |
1360 | // Parse in a single chunk into an xmlDocPtr |
1361 | // FIXME: Hook up error handlers so that a failure to parse the main document results in |
1362 | // good error messages. |
1363 | |
1364 | const bool is8Bit = source.is8Bit(); |
1365 | const char* characters = is8Bit ? reinterpret_cast<const char*>(source.characters8()) : reinterpret_cast<const char*>(source.characters16()); |
1366 | size_t sizeInBytes = source.length() * (is8Bit ? sizeof(LChar) : sizeof(UChar)); |
1367 | const char* encoding = is8Bit ? "iso-8859-1" : nativeEndianUTF16Encoding(); |
1368 | |
1369 | XMLDocumentParserScope scope(&cachedResourceLoader, errorFunc); |
1370 | return xmlReadMemory(characters, sizeInBytes, url.latin1().data(), encoding, XSLT_PARSE_OPTIONS); |
1371 | } |
1372 | #endif |
1373 | |
1374 | TextPosition XMLDocumentParser::textPosition() const |
1375 | { |
1376 | xmlParserCtxtPtr context = this->context(); |
1377 | if (!context) |
1378 | return TextPosition(); |
1379 | return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line), |
1380 | OrdinalNumber::fromOneBasedInt(context->input->col)); |
1381 | } |
1382 | |
1383 | bool XMLDocumentParser::shouldAssociateConsoleMessagesWithTextPosition() const |
1384 | { |
1385 | return !m_parserPaused && !m_requestingScript; |
1386 | } |
1387 | |
1388 | void XMLDocumentParser::stopParsing() |
1389 | { |
1390 | if (m_sawError) |
1391 | insertErrorMessageBlock(); |
1392 | |
1393 | DocumentParser::stopParsing(); |
1394 | if (context()) |
1395 | xmlStopParser(context()); |
1396 | } |
1397 | |
1398 | void XMLDocumentParser::resumeParsing() |
1399 | { |
1400 | ASSERT(!isDetached()); |
1401 | ASSERT(m_parserPaused); |
1402 | |
1403 | m_parserPaused = false; |
1404 | |
1405 | // First, execute any pending callbacks |
1406 | while (!m_pendingCallbacks->isEmpty()) { |
1407 | m_pendingCallbacks->callAndRemoveFirstCallback(this); |
1408 | |
1409 | // A callback paused the parser |
1410 | if (m_parserPaused) |
1411 | return; |
1412 | } |
1413 | |
1414 | // There is normally only one string left, so toString() shouldn't copy. |
1415 | // In any case, the XML parser runs on the main thread and it's OK if |
1416 | // the passed string has more than one reference. |
1417 | auto rest = m_pendingSrc.toString(); |
1418 | m_pendingSrc.clear(); |
1419 | append(rest.impl()); |
1420 | |
1421 | // Finally, if finish() has been called and write() didn't result |
1422 | // in any further callbacks being queued, call end() |
1423 | if (m_finishCalled && m_pendingCallbacks->isEmpty()) |
1424 | end(); |
1425 | } |
1426 | |
1427 | bool XMLDocumentParser::appendFragmentSource(const String& chunk) |
1428 | { |
1429 | ASSERT(!m_context); |
1430 | ASSERT(m_parsingFragment); |
1431 | |
1432 | CString chunkAsUtf8 = chunk.utf8(); |
1433 | |
1434 | // libxml2 takes an int for a length, and therefore can't handle XML chunks larger than 2 GiB. |
1435 | if (chunkAsUtf8.length() > INT_MAX) |
1436 | return false; |
1437 | |
1438 | initializeParserContext(chunkAsUtf8); |
1439 | xmlParseContent(context()); |
1440 | endDocument(); // Close any open text nodes. |
1441 | |
1442 | // FIXME: If this code is actually needed, it should probably move to finish() |
1443 | // XMLDocumentParserQt has a similar check (m_stream.error() == QXmlStreamReader::PrematureEndOfDocumentError) in doEnd(). |
1444 | // Check if all the chunk has been processed. |
1445 | long bytesProcessed = xmlByteConsumed(context()); |
1446 | if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) { |
1447 | // FIXME: I don't believe we can hit this case without also having seen an error or a null byte. |
1448 | // If we hit this ASSERT, we've found a test case which demonstrates the need for this code. |
1449 | ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed])); |
1450 | return false; |
1451 | } |
1452 | |
1453 | // No error if the chunk is well formed or it is not but we have no error. |
1454 | return context()->wellFormed || !xmlCtxtGetLastError(context()); |
1455 | } |
1456 | |
1457 | // -------------------------------- |
1458 | |
1459 | using AttributeParseState = Optional<HashMap<String, String>>; |
1460 | |
1461 | static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/, const xmlChar* /*xmlURI*/, int /*numNamespaces*/, const xmlChar** /*namespaces*/, int numAttributes, int /*numDefaulted*/, const xmlChar** libxmlAttributes) |
1462 | { |
1463 | if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs" ) != 0) |
1464 | return; |
1465 | |
1466 | auto& state = *static_cast<AttributeParseState*>(static_cast<xmlParserCtxtPtr>(closure)->_private); |
1467 | |
1468 | state = HashMap<String, String> { }; |
1469 | |
1470 | xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes); |
1471 | for (int i = 0; i < numAttributes; i++) { |
1472 | String attrLocalName = toString(attributes[i].localname); |
1473 | int valueLength = (int) (attributes[i].end - attributes[i].value); |
1474 | String attrValue = toString(attributes[i].value, valueLength); |
1475 | String attrPrefix = toString(attributes[i].prefix); |
1476 | String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName; |
1477 | |
1478 | state->set(attrQName, attrValue); |
1479 | } |
1480 | } |
1481 | |
1482 | Optional<HashMap<String, String>> parseAttributes(const String& string) |
1483 | { |
1484 | String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />" ; |
1485 | |
1486 | AttributeParseState attributes; |
1487 | |
1488 | xmlSAXHandler sax; |
1489 | memset(&sax, 0, sizeof(sax)); |
1490 | sax.startElementNs = attributesStartElementNsHandler; |
1491 | sax.initialized = XML_SAX2_MAGIC; |
1492 | |
1493 | auto parser = XMLParserContext::createStringParser(&sax, &attributes); |
1494 | |
1495 | // FIXME: Can we parse 8-bit strings directly as Latin-1 instead of upconverting to UTF-16? |
1496 | xmlParseChunk(parser->context(), reinterpret_cast<const char*>(StringView(parseString).upconvertedCharacters().get()), parseString.length() * sizeof(UChar), 1); |
1497 | |
1498 | return attributes; |
1499 | } |
1500 | |
1501 | } |
1502 | |