| 1 | /* |
| 2 | * Copyright (C) 2004-2017 Apple Inc. All rights reserved. |
| 3 | * Copyright (C) 2009, 2010 Google Inc. All rights reserved. |
| 4 | * |
| 5 | * Redistribution and use in source and binary forms, with or without |
| 6 | * modification, are permitted provided that the following conditions |
| 7 | * are met: |
| 8 | * 1. Redistributions of source code must retain the above copyright |
| 9 | * notice, this list of conditions and the following disclaimer. |
| 10 | * 2. Redistributions in binary form must reproduce the above copyright |
| 11 | * notice, this list of conditions and the following disclaimer in the |
| 12 | * documentation and/or other materials provided with the distribution. |
| 13 | * |
| 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 15 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 16 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 17 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 18 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 19 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 20 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 21 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 22 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 23 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 25 | */ |
| 26 | |
| 27 | #include "config.h" |
| 28 | #include "MarkupAccumulator.h" |
| 29 | |
| 30 | #include "CDATASection.h" |
| 31 | #include "Comment.h" |
| 32 | #include "DocumentFragment.h" |
| 33 | #include "DocumentType.h" |
| 34 | #include "Editor.h" |
| 35 | #include "HTMLElement.h" |
| 36 | #include "HTMLNames.h" |
| 37 | #include "HTMLTemplateElement.h" |
| 38 | #include <wtf/URL.h> |
| 39 | #include "ProcessingInstruction.h" |
| 40 | #include "XLinkNames.h" |
| 41 | #include "XMLNSNames.h" |
| 42 | #include "XMLNames.h" |
| 43 | #include <wtf/NeverDestroyed.h> |
| 44 | #include <wtf/unicode/CharacterNames.h> |
| 45 | |
| 46 | namespace WebCore { |
| 47 | |
| 48 | using namespace HTMLNames; |
| 49 | |
| 50 | struct EntityDescription { |
| 51 | const char* characters; |
| 52 | unsigned char length; |
| 53 | unsigned char mask; |
| 54 | }; |
| 55 | |
| 56 | static const EntityDescription entitySubstitutionList[] = { |
| 57 | { "" , 0 , 0 }, |
| 58 | { "&" , 5 , EntityAmp }, |
| 59 | { "<" , 4, EntityLt }, |
| 60 | { ">" , 4, EntityGt }, |
| 61 | { """ , 6, EntityQuot }, |
| 62 | { " " , 6, EntityNbsp }, |
| 63 | }; |
| 64 | |
| 65 | enum EntitySubstitutionIndex { |
| 66 | EntitySubstitutionNullIndex = 0, |
| 67 | EntitySubstitutionAmpIndex = 1, |
| 68 | EntitySubstitutionLtIndex = 2, |
| 69 | EntitySubstitutionGtIndex = 3, |
| 70 | EntitySubstitutionQuotIndex = 4, |
| 71 | EntitySubstitutionNbspIndex = 5, |
| 72 | }; |
| 73 | |
| 74 | static const unsigned maximumEscapedentityCharacter = noBreakSpace; |
| 75 | static const uint8_t entityMap[maximumEscapedentityCharacter + 1] = { |
| 76 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 77 | EntitySubstitutionQuotIndex, // '"'. |
| 78 | 0, 0, 0, |
| 79 | EntitySubstitutionAmpIndex, // '&'. |
| 80 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 81 | EntitySubstitutionLtIndex, // '<'. |
| 82 | 0, |
| 83 | EntitySubstitutionGtIndex, // '>'. |
| 84 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 85 | EntitySubstitutionNbspIndex // noBreakSpace. |
| 86 | }; |
| 87 | |
| 88 | static bool elementCannotHaveEndTag(const Node& node) |
| 89 | { |
| 90 | if (!is<HTMLElement>(node)) |
| 91 | return false; |
| 92 | |
| 93 | // From https://html.spec.whatwg.org/#serialising-html-fragments: |
| 94 | // If current node is an area, base, basefont, bgsound, br, col, embed, frame, hr, img, |
| 95 | // input, keygen, link, meta, param, source, track or wbr element, then continue on to |
| 96 | // the next child node at this point. |
| 97 | static const AtomicStringImpl* const localNames[] = { |
| 98 | areaTag->localName().impl(), |
| 99 | baseTag->localName().impl(), |
| 100 | basefontTag->localName().impl(), |
| 101 | bgsoundTag->localName().impl(), |
| 102 | brTag->localName().impl(), |
| 103 | colTag->localName().impl(), |
| 104 | embedTag->localName().impl(), |
| 105 | frameTag->localName().impl(), |
| 106 | hrTag->localName().impl(), |
| 107 | imgTag->localName().impl(), |
| 108 | inputTag->localName().impl(), |
| 109 | keygenTag->localName().impl(), |
| 110 | linkTag->localName().impl(), |
| 111 | metaTag->localName().impl(), |
| 112 | paramTag->localName().impl(), |
| 113 | sourceTag->localName().impl(), |
| 114 | trackTag->localName().impl(), |
| 115 | wbrTag->localName().impl() |
| 116 | }; |
| 117 | |
| 118 | auto* const elementName = downcast<HTMLElement>(node).localName().impl(); |
| 119 | for (auto* name : localNames) { |
| 120 | if (name == elementName) |
| 121 | return true; |
| 122 | } |
| 123 | |
| 124 | return false; |
| 125 | } |
| 126 | |
| 127 | // Rules of self-closure |
| 128 | // 1. No elements in HTML documents use the self-closing syntax. |
| 129 | // 2. Elements w/ children never self-close because they use a separate end tag. |
| 130 | // 3. HTML elements which do not have a "forbidden" end tag will close with a separate end tag. |
| 131 | // 4. Other elements self-close. |
| 132 | static bool shouldSelfClose(const Element& element, SerializationSyntax syntax) |
| 133 | { |
| 134 | if (syntax != SerializationSyntax::XML && element.document().isHTMLDocument()) |
| 135 | return false; |
| 136 | if (element.hasChildNodes()) |
| 137 | return false; |
| 138 | if (element.isHTMLElement() && !elementCannotHaveEndTag(element)) |
| 139 | return false; |
| 140 | return true; |
| 141 | } |
| 142 | |
| 143 | template<typename CharacterType> |
| 144 | static inline void appendCharactersReplacingEntitiesInternal(StringBuilder& result, const String& source, unsigned offset, unsigned length, EntityMask entityMask) |
| 145 | { |
| 146 | const CharacterType* text = source.characters<CharacterType>() + offset; |
| 147 | |
| 148 | size_t positionAfterLastEntity = 0; |
| 149 | for (size_t i = 0; i < length; ++i) { |
| 150 | CharacterType character = text[i]; |
| 151 | uint8_t substitution = character < WTF_ARRAY_LENGTH(entityMap) ? entityMap[character] : static_cast<uint8_t>(EntitySubstitutionNullIndex); |
| 152 | if (UNLIKELY(substitution != EntitySubstitutionNullIndex) && entitySubstitutionList[substitution].mask & entityMask) { |
| 153 | result.append(text + positionAfterLastEntity, i - positionAfterLastEntity); |
| 154 | result.append(entitySubstitutionList[substitution].characters, entitySubstitutionList[substitution].length); |
| 155 | positionAfterLastEntity = i + 1; |
| 156 | } |
| 157 | } |
| 158 | result.append(text + positionAfterLastEntity, length - positionAfterLastEntity); |
| 159 | } |
| 160 | |
| 161 | void MarkupAccumulator::appendCharactersReplacingEntities(StringBuilder& result, const String& source, unsigned offset, unsigned length, EntityMask entityMask) |
| 162 | { |
| 163 | if (!(offset + length)) |
| 164 | return; |
| 165 | |
| 166 | ASSERT(offset + length <= source.length()); |
| 167 | |
| 168 | if (source.is8Bit()) |
| 169 | appendCharactersReplacingEntitiesInternal<LChar>(result, source, offset, length, entityMask); |
| 170 | else |
| 171 | appendCharactersReplacingEntitiesInternal<UChar>(result, source, offset, length, entityMask); |
| 172 | } |
| 173 | |
| 174 | MarkupAccumulator::MarkupAccumulator(Vector<Node*>* nodes, ResolveURLs resolveURLs, SerializationSyntax serializationSyntax) |
| 175 | : m_nodes(nodes) |
| 176 | , m_resolveURLs(resolveURLs) |
| 177 | , m_serializationSyntax(serializationSyntax) |
| 178 | { |
| 179 | } |
| 180 | |
| 181 | MarkupAccumulator::~MarkupAccumulator() = default; |
| 182 | |
| 183 | String MarkupAccumulator::serializeNodes(Node& targetNode, SerializedNodes root, Vector<QualifiedName>* tagNamesToSkip) |
| 184 | { |
| 185 | serializeNodesWithNamespaces(targetNode, root, 0, tagNamesToSkip); |
| 186 | return m_markup.toString(); |
| 187 | } |
| 188 | |
| 189 | void MarkupAccumulator::serializeNodesWithNamespaces(Node& targetNode, SerializedNodes root, const Namespaces* namespaces, Vector<QualifiedName>* tagNamesToSkip) |
| 190 | { |
| 191 | if (tagNamesToSkip && is<Element>(targetNode)) { |
| 192 | for (auto& name : *tagNamesToSkip) { |
| 193 | if (downcast<Element>(targetNode).hasTagName(name)) |
| 194 | return; |
| 195 | } |
| 196 | } |
| 197 | |
| 198 | Namespaces namespaceHash; |
| 199 | if (namespaces) |
| 200 | namespaceHash = *namespaces; |
| 201 | else if (inXMLFragmentSerialization()) { |
| 202 | // Make sure xml prefix and namespace are always known to uphold the constraints listed at http://www.w3.org/TR/xml-names11/#xmlReserved. |
| 203 | namespaceHash.set(xmlAtom().impl(), XMLNames::xmlNamespaceURI->impl()); |
| 204 | namespaceHash.set(XMLNames::xmlNamespaceURI->impl(), xmlAtom().impl()); |
| 205 | } |
| 206 | |
| 207 | if (root == SerializedNodes::SubtreeIncludingNode) |
| 208 | startAppendingNode(targetNode, &namespaceHash); |
| 209 | |
| 210 | if (targetNode.document().isHTMLDocument() && elementCannotHaveEndTag(targetNode)) |
| 211 | return; |
| 212 | |
| 213 | Node* current = targetNode.hasTagName(templateTag) ? downcast<HTMLTemplateElement>(targetNode).content().firstChild() : targetNode.firstChild(); |
| 214 | for ( ; current; current = current->nextSibling()) |
| 215 | serializeNodesWithNamespaces(*current, SerializedNodes::SubtreeIncludingNode, &namespaceHash, tagNamesToSkip); |
| 216 | |
| 217 | if (root == SerializedNodes::SubtreeIncludingNode) |
| 218 | endAppendingNode(targetNode); |
| 219 | } |
| 220 | |
| 221 | String MarkupAccumulator::resolveURLIfNeeded(const Element& element, const String& urlString) const |
| 222 | { |
| 223 | switch (m_resolveURLs) { |
| 224 | case ResolveURLs::Yes: |
| 225 | return element.document().completeURL(urlString).string(); |
| 226 | |
| 227 | case ResolveURLs::YesExcludingLocalFileURLsForPrivacy: |
| 228 | if (!element.document().url().isLocalFile()) |
| 229 | return element.document().completeURL(urlString).string(); |
| 230 | break; |
| 231 | |
| 232 | case ResolveURLs::No: |
| 233 | break; |
| 234 | } |
| 235 | return urlString; |
| 236 | } |
| 237 | |
| 238 | void MarkupAccumulator::appendString(const String& string) |
| 239 | { |
| 240 | m_markup.append(string); |
| 241 | } |
| 242 | |
| 243 | void MarkupAccumulator::appendStringView(StringView view) |
| 244 | { |
| 245 | m_markup.append(view); |
| 246 | } |
| 247 | |
| 248 | void MarkupAccumulator::startAppendingNode(const Node& node, Namespaces* namespaces) |
| 249 | { |
| 250 | if (is<Element>(node)) |
| 251 | appendStartTag(m_markup, downcast<Element>(node), namespaces); |
| 252 | else |
| 253 | appendNonElementNode(m_markup, node, namespaces); |
| 254 | |
| 255 | if (m_nodes) |
| 256 | m_nodes->append(const_cast<Node*>(&node)); |
| 257 | } |
| 258 | |
| 259 | void MarkupAccumulator::appendEndTag(StringBuilder& result, const Element& element) |
| 260 | { |
| 261 | if (shouldSelfClose(element, m_serializationSyntax) || (!element.hasChildNodes() && elementCannotHaveEndTag(element))) |
| 262 | return; |
| 263 | result.append('<'); |
| 264 | result.append('/'); |
| 265 | result.append(element.nodeNamePreservingCase()); |
| 266 | result.append('>'); |
| 267 | } |
| 268 | |
| 269 | size_t MarkupAccumulator::totalLength(const Vector<String>& strings) |
| 270 | { |
| 271 | size_t length = 0; |
| 272 | for (auto& string : strings) |
| 273 | length += string.length(); |
| 274 | return length; |
| 275 | } |
| 276 | |
| 277 | void MarkupAccumulator::concatenateMarkup(StringBuilder& result) |
| 278 | { |
| 279 | result.append(m_markup); |
| 280 | } |
| 281 | |
| 282 | void MarkupAccumulator::appendAttributeValue(StringBuilder& result, const String& attribute, bool isSerializingHTML) |
| 283 | { |
| 284 | appendCharactersReplacingEntities(result, attribute, 0, attribute.length(), |
| 285 | isSerializingHTML ? EntityMaskInHTMLAttributeValue : EntityMaskInAttributeValue); |
| 286 | } |
| 287 | |
| 288 | void MarkupAccumulator::appendCustomAttributes(StringBuilder&, const Element&, Namespaces*) |
| 289 | { |
| 290 | } |
| 291 | |
| 292 | void MarkupAccumulator::appendQuotedURLAttributeValue(StringBuilder& result, const Element& element, const Attribute& attribute) |
| 293 | { |
| 294 | ASSERT(element.isURLAttribute(attribute)); |
| 295 | String resolvedURLString = resolveURLIfNeeded(element, attribute.value()); |
| 296 | UChar quoteChar = '"'; |
| 297 | if (WTF::protocolIsJavaScript(resolvedURLString)) { |
| 298 | // minimal escaping for javascript urls |
| 299 | if (resolvedURLString.contains('"')) { |
| 300 | if (resolvedURLString.contains('\'')) |
| 301 | resolvedURLString.replaceWithLiteral('"', """ ); |
| 302 | else |
| 303 | quoteChar = '\''; |
| 304 | } |
| 305 | result.append(quoteChar); |
| 306 | result.append(resolvedURLString); |
| 307 | result.append(quoteChar); |
| 308 | return; |
| 309 | } |
| 310 | |
| 311 | // FIXME: This does not fully match other browsers. Firefox percent-escapes non-ASCII characters for innerHTML. |
| 312 | result.append(quoteChar); |
| 313 | appendAttributeValue(result, resolvedURLString, false); |
| 314 | result.append(quoteChar); |
| 315 | } |
| 316 | |
| 317 | bool MarkupAccumulator::shouldAddNamespaceElement(const Element& element) |
| 318 | { |
| 319 | // Don't add namespace attribute if it is already defined for this elem. |
| 320 | const AtomicString& prefix = element.prefix(); |
| 321 | if (prefix.isEmpty()) |
| 322 | return !element.hasAttribute(xmlnsAtom()); |
| 323 | |
| 324 | static NeverDestroyed<String> xmlnsWithColon(MAKE_STATIC_STRING_IMPL("xmlns:" )); |
| 325 | return !element.hasAttribute(xmlnsWithColon.get() + prefix); |
| 326 | } |
| 327 | |
| 328 | bool MarkupAccumulator::shouldAddNamespaceAttribute(const Attribute& attribute, Namespaces& namespaces) |
| 329 | { |
| 330 | namespaces.checkConsistency(); |
| 331 | |
| 332 | // Don't add namespace attributes twice |
| 333 | // HTML Parser will create xmlns attributes without namespace for HTML elements, allow those as well. |
| 334 | if (attribute.name().localName() == xmlnsAtom() && (attribute.namespaceURI().isEmpty() || attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI)) { |
| 335 | namespaces.set(emptyAtom().impl(), attribute.value().impl()); |
| 336 | return false; |
| 337 | } |
| 338 | |
| 339 | QualifiedName xmlnsPrefixAttr(xmlnsAtom(), attribute.localName(), XMLNSNames::xmlnsNamespaceURI); |
| 340 | if (attribute.name() == xmlnsPrefixAttr) { |
| 341 | namespaces.set(attribute.localName().impl(), attribute.value().impl()); |
| 342 | namespaces.set(attribute.value().impl(), attribute.localName().impl()); |
| 343 | return false; |
| 344 | } |
| 345 | |
| 346 | return true; |
| 347 | } |
| 348 | |
| 349 | void MarkupAccumulator::appendNamespace(StringBuilder& result, const AtomicString& prefix, const AtomicString& namespaceURI, Namespaces& namespaces, bool allowEmptyDefaultNS) |
| 350 | { |
| 351 | namespaces.checkConsistency(); |
| 352 | if (namespaceURI.isEmpty()) { |
| 353 | // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-serialization-algorithm |
| 354 | if (allowEmptyDefaultNS && namespaces.get(emptyAtom().impl())) { |
| 355 | result.append(' '); |
| 356 | result.append(xmlnsAtom().string()); |
| 357 | result.appendLiteral("=\"\"" ); |
| 358 | } |
| 359 | return; |
| 360 | } |
| 361 | |
| 362 | // Use emptyAtom()s's impl() for both null and empty strings since the HashMap can't handle 0 as a key |
| 363 | AtomicStringImpl* pre = prefix.isEmpty() ? emptyAtom().impl() : prefix.impl(); |
| 364 | AtomicStringImpl* foundNS = namespaces.get(pre); |
| 365 | if (foundNS != namespaceURI.impl()) { |
| 366 | namespaces.set(pre, namespaceURI.impl()); |
| 367 | // Add namespace to prefix pair so we can do constraint checking later. |
| 368 | if (inXMLFragmentSerialization() && !prefix.isEmpty()) |
| 369 | namespaces.set(namespaceURI.impl(), pre); |
| 370 | // Make sure xml prefix and namespace are always known to uphold the constraints listed at http://www.w3.org/TR/xml-names11/#xmlReserved. |
| 371 | if (namespaceURI.impl() == XMLNames::xmlNamespaceURI->impl()) |
| 372 | return; |
| 373 | result.append(' '); |
| 374 | result.append(xmlnsAtom().string()); |
| 375 | if (!prefix.isEmpty()) { |
| 376 | result.append(':'); |
| 377 | result.append(prefix); |
| 378 | } |
| 379 | |
| 380 | result.append('='); |
| 381 | result.append('"'); |
| 382 | appendAttributeValue(result, namespaceURI, false); |
| 383 | result.append('"'); |
| 384 | } |
| 385 | } |
| 386 | |
| 387 | EntityMask MarkupAccumulator::entityMaskForText(const Text& text) const |
| 388 | { |
| 389 | if (!text.document().isHTMLDocument()) |
| 390 | return EntityMaskInPCDATA; |
| 391 | |
| 392 | const QualifiedName* parentName = nullptr; |
| 393 | if (text.parentElement()) |
| 394 | parentName = &text.parentElement()->tagQName(); |
| 395 | |
| 396 | if (parentName && (*parentName == scriptTag || *parentName == styleTag || *parentName == xmpTag)) |
| 397 | return EntityMaskInCDATA; |
| 398 | return EntityMaskInHTMLPCDATA; |
| 399 | } |
| 400 | |
| 401 | void MarkupAccumulator::appendText(StringBuilder& result, const Text& text) |
| 402 | { |
| 403 | const String& textData = text.data(); |
| 404 | appendCharactersReplacingEntities(result, textData, 0, textData.length(), entityMaskForText(text)); |
| 405 | } |
| 406 | |
| 407 | static void (StringBuilder& result, const String& ) |
| 408 | { |
| 409 | // FIXME: Comment content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "-->". |
| 410 | result.appendLiteral("<!--" ); |
| 411 | result.append(comment); |
| 412 | result.appendLiteral("-->" ); |
| 413 | } |
| 414 | |
| 415 | void MarkupAccumulator::appendXMLDeclaration(StringBuilder& result, const Document& document) |
| 416 | { |
| 417 | if (!document.hasXMLDeclaration()) |
| 418 | return; |
| 419 | |
| 420 | result.appendLiteral("<?xml version=\"" ); |
| 421 | result.append(document.xmlVersion()); |
| 422 | const String& encoding = document.xmlEncoding(); |
| 423 | if (!encoding.isEmpty()) { |
| 424 | result.appendLiteral("\" encoding=\"" ); |
| 425 | result.append(encoding); |
| 426 | } |
| 427 | if (document.xmlStandaloneStatus() != Document::StandaloneStatus::Unspecified) { |
| 428 | result.appendLiteral("\" standalone=\"" ); |
| 429 | if (document.xmlStandalone()) |
| 430 | result.appendLiteral("yes" ); |
| 431 | else |
| 432 | result.appendLiteral("no" ); |
| 433 | } |
| 434 | |
| 435 | result.appendLiteral("\"?>" ); |
| 436 | } |
| 437 | |
| 438 | void MarkupAccumulator::appendDocumentType(StringBuilder& result, const DocumentType& documentType) |
| 439 | { |
| 440 | if (documentType.name().isEmpty()) |
| 441 | return; |
| 442 | |
| 443 | result.appendLiteral("<!DOCTYPE " ); |
| 444 | result.append(documentType.name()); |
| 445 | if (!documentType.publicId().isEmpty()) { |
| 446 | result.appendLiteral(" PUBLIC \"" ); |
| 447 | result.append(documentType.publicId()); |
| 448 | result.append('"'); |
| 449 | if (!documentType.systemId().isEmpty()) { |
| 450 | result.append(' '); |
| 451 | result.append('"'); |
| 452 | result.append(documentType.systemId()); |
| 453 | result.append('"'); |
| 454 | } |
| 455 | } else if (!documentType.systemId().isEmpty()) { |
| 456 | result.appendLiteral(" SYSTEM \"" ); |
| 457 | result.append(documentType.systemId()); |
| 458 | result.append('"'); |
| 459 | } |
| 460 | result.append('>'); |
| 461 | } |
| 462 | |
| 463 | void MarkupAccumulator::appendProcessingInstruction(StringBuilder& result, const String& target, const String& data) |
| 464 | { |
| 465 | // FIXME: PI data is not escaped, but XMLSerializer (and possibly other callers) this should raise an exception if it includes "?>". |
| 466 | result.append('<'); |
| 467 | result.append('?'); |
| 468 | result.append(target); |
| 469 | result.append(' '); |
| 470 | result.append(data); |
| 471 | result.append('?'); |
| 472 | result.append('>'); |
| 473 | } |
| 474 | |
| 475 | void MarkupAccumulator::appendStartTag(StringBuilder& result, const Element& element, Namespaces* namespaces) |
| 476 | { |
| 477 | appendOpenTag(result, element, namespaces); |
| 478 | |
| 479 | if (element.hasAttributes()) { |
| 480 | for (const Attribute& attribute : element.attributesIterator()) |
| 481 | appendAttribute(result, element, attribute, namespaces); |
| 482 | } |
| 483 | |
| 484 | // Give an opportunity to subclasses to add their own attributes. |
| 485 | appendCustomAttributes(result, element, namespaces); |
| 486 | |
| 487 | appendCloseTag(result, element); |
| 488 | } |
| 489 | |
| 490 | void MarkupAccumulator::appendOpenTag(StringBuilder& result, const Element& element, Namespaces* namespaces) |
| 491 | { |
| 492 | result.append('<'); |
| 493 | if (inXMLFragmentSerialization() && namespaces && element.prefix().isEmpty()) { |
| 494 | // According to http://www.w3.org/TR/DOM-Level-3-Core/namespaces-algorithms.html#normalizeDocumentAlgo we now should create |
| 495 | // a default namespace declaration to make this namespace well-formed. However, http://www.w3.org/TR/xml-names11/#xmlReserved states |
| 496 | // "The prefix xml MUST NOT be declared as the default namespace.", so use the xml prefix explicitly. |
| 497 | if (element.namespaceURI() == XMLNames::xmlNamespaceURI) { |
| 498 | result.append(xmlAtom()); |
| 499 | result.append(':'); |
| 500 | } |
| 501 | } |
| 502 | result.append(element.nodeNamePreservingCase()); |
| 503 | if ((inXMLFragmentSerialization() || !element.document().isHTMLDocument()) && namespaces && shouldAddNamespaceElement(element)) |
| 504 | appendNamespace(result, element.prefix(), element.namespaceURI(), *namespaces, inXMLFragmentSerialization()); |
| 505 | } |
| 506 | |
| 507 | void MarkupAccumulator::appendCloseTag(StringBuilder& result, const Element& element) |
| 508 | { |
| 509 | if (shouldSelfClose(element, m_serializationSyntax)) { |
| 510 | if (element.isHTMLElement()) |
| 511 | result.append(' '); // XHTML 1.0 <-> HTML compatibility. |
| 512 | result.append('/'); |
| 513 | } |
| 514 | result.append('>'); |
| 515 | } |
| 516 | |
| 517 | static inline bool attributeIsInSerializedNamespace(const Attribute& attribute) |
| 518 | { |
| 519 | return attribute.namespaceURI() == XMLNames::xmlNamespaceURI |
| 520 | || attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI |
| 521 | || attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI; |
| 522 | } |
| 523 | |
| 524 | void MarkupAccumulator::generateUniquePrefix(QualifiedName& prefixedName, const Namespaces& namespaces) |
| 525 | { |
| 526 | // http://www.w3.org/TR/DOM-Level-3-Core/namespaces-algorithms.html#normalizeDocumentAlgo |
| 527 | // Find a prefix following the pattern "NS" + index (starting at 1) and make sure this |
| 528 | // prefix is not declared in the current scope. |
| 529 | StringBuilder builder; |
| 530 | do { |
| 531 | builder.clear(); |
| 532 | builder.appendLiteral("NS" ); |
| 533 | builder.appendNumber(++m_prefixLevel); |
| 534 | const AtomicString& name = builder.toAtomicString(); |
| 535 | if (!namespaces.get(name.impl())) { |
| 536 | prefixedName.setPrefix(name); |
| 537 | return; |
| 538 | } |
| 539 | } while (true); |
| 540 | } |
| 541 | |
| 542 | void MarkupAccumulator::appendAttribute(StringBuilder& result, const Element& element, const Attribute& attribute, Namespaces* namespaces) |
| 543 | { |
| 544 | bool isSerializingHTML = element.document().isHTMLDocument() && !inXMLFragmentSerialization(); |
| 545 | |
| 546 | result.append(' '); |
| 547 | |
| 548 | QualifiedName prefixedName = attribute.name(); |
| 549 | if (isSerializingHTML && !attributeIsInSerializedNamespace(attribute)) |
| 550 | result.append(attribute.name().localName()); |
| 551 | else { |
| 552 | if (!attribute.namespaceURI().isEmpty()) { |
| 553 | if (attribute.namespaceURI() == XMLNames::xmlNamespaceURI) { |
| 554 | // Always use xml as prefix if the namespace is the XML namespace. |
| 555 | prefixedName.setPrefix(xmlAtom()); |
| 556 | } else { |
| 557 | AtomicStringImpl* foundNS = namespaces && attribute.prefix().impl() ? namespaces->get(attribute.prefix().impl()) : 0; |
| 558 | bool prefixIsAlreadyMappedToOtherNS = foundNS && foundNS != attribute.namespaceURI().impl(); |
| 559 | if (attribute.prefix().isEmpty() || !foundNS || prefixIsAlreadyMappedToOtherNS) { |
| 560 | if (AtomicStringImpl* prefix = namespaces ? namespaces->get(attribute.namespaceURI().impl()) : 0) |
| 561 | prefixedName.setPrefix(AtomicString(prefix)); |
| 562 | else { |
| 563 | bool shouldBeDeclaredUsingAppendNamespace = !attribute.prefix().isEmpty() && !foundNS; |
| 564 | if (!shouldBeDeclaredUsingAppendNamespace && attribute.localName() != xmlnsAtom() && namespaces) |
| 565 | generateUniquePrefix(prefixedName, *namespaces); |
| 566 | } |
| 567 | } |
| 568 | } |
| 569 | } |
| 570 | result.append(prefixedName.toString()); |
| 571 | } |
| 572 | |
| 573 | result.append('='); |
| 574 | |
| 575 | if (element.isURLAttribute(attribute)) |
| 576 | appendQuotedURLAttributeValue(result, element, attribute); |
| 577 | else { |
| 578 | result.append('"'); |
| 579 | appendAttributeValue(result, attribute.value(), isSerializingHTML); |
| 580 | result.append('"'); |
| 581 | } |
| 582 | |
| 583 | if (!isSerializingHTML && namespaces && shouldAddNamespaceAttribute(attribute, *namespaces)) |
| 584 | appendNamespace(result, prefixedName.prefix(), prefixedName.namespaceURI(), *namespaces); |
| 585 | } |
| 586 | |
| 587 | void MarkupAccumulator::appendCDATASection(StringBuilder& result, const String& section) |
| 588 | { |
| 589 | // FIXME: CDATA content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "]]>". |
| 590 | result.appendLiteral("<![CDATA[" ); |
| 591 | result.append(section); |
| 592 | result.appendLiteral("]]>" ); |
| 593 | } |
| 594 | |
| 595 | void MarkupAccumulator::appendNonElementNode(StringBuilder& result, const Node& node, Namespaces* namespaces) |
| 596 | { |
| 597 | if (namespaces) |
| 598 | namespaces->checkConsistency(); |
| 599 | |
| 600 | switch (node.nodeType()) { |
| 601 | case Node::TEXT_NODE: |
| 602 | appendText(result, downcast<Text>(node)); |
| 603 | break; |
| 604 | case Node::COMMENT_NODE: |
| 605 | appendComment(result, downcast<Comment>(node).data()); |
| 606 | break; |
| 607 | case Node::DOCUMENT_NODE: |
| 608 | appendXMLDeclaration(result, downcast<Document>(node)); |
| 609 | break; |
| 610 | case Node::DOCUMENT_FRAGMENT_NODE: |
| 611 | break; |
| 612 | case Node::DOCUMENT_TYPE_NODE: |
| 613 | appendDocumentType(result, downcast<DocumentType>(node)); |
| 614 | break; |
| 615 | case Node::PROCESSING_INSTRUCTION_NODE: |
| 616 | appendProcessingInstruction(result, downcast<ProcessingInstruction>(node).target(), downcast<ProcessingInstruction>(node).data()); |
| 617 | break; |
| 618 | case Node::ELEMENT_NODE: |
| 619 | ASSERT_NOT_REACHED(); |
| 620 | break; |
| 621 | case Node::CDATA_SECTION_NODE: |
| 622 | appendCDATASection(result, downcast<CDATASection>(node).data()); |
| 623 | break; |
| 624 | case Node::ATTRIBUTE_NODE: |
| 625 | ASSERT_NOT_REACHED(); |
| 626 | break; |
| 627 | } |
| 628 | } |
| 629 | |
| 630 | } |
| 631 | |