| 1 | /* |
| 2 | * Copyright (C) 2004-2017 Apple Inc. All rights reserved. |
| 3 | * Copyright (C) 2008, 2009, 2010, 2011 Google Inc. All rights reserved. |
| 4 | * Copyright (C) 2011 Igalia S.L. |
| 5 | * Copyright (C) 2011 Motorola Mobility. All rights reserved. |
| 6 | * |
| 7 | * Redistribution and use in source and binary forms, with or without |
| 8 | * modification, are permitted provided that the following conditions |
| 9 | * are met: |
| 10 | * 1. Redistributions of source code must retain the above copyright |
| 11 | * notice, this list of conditions and the following disclaimer. |
| 12 | * 2. Redistributions in binary form must reproduce the above copyright |
| 13 | * notice, this list of conditions and the following disclaimer in the |
| 14 | * documentation and/or other materials provided with the distribution. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| 17 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 19 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| 20 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 21 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 22 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 23 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 24 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #include "config.h" |
| 30 | #include "markup.h" |
| 31 | |
| 32 | #include "ArchiveResource.h" |
| 33 | #include "CSSPrimitiveValue.h" |
| 34 | #include "CSSPropertyNames.h" |
| 35 | #include "CSSValue.h" |
| 36 | #include "CSSValueKeywords.h" |
| 37 | #include "CacheStorageProvider.h" |
| 38 | #include "ChildListMutationScope.h" |
| 39 | #include "Comment.h" |
| 40 | #include "ComposedTreeIterator.h" |
| 41 | #include "DocumentFragment.h" |
| 42 | #include "DocumentLoader.h" |
| 43 | #include "DocumentType.h" |
| 44 | #include "Editing.h" |
| 45 | #include "Editor.h" |
| 46 | #include "EditorClient.h" |
| 47 | #include "ElementIterator.h" |
| 48 | #include "EmptyClients.h" |
| 49 | #include "File.h" |
| 50 | #include "Frame.h" |
| 51 | #include "FrameLoader.h" |
| 52 | #include "HTMLAttachmentElement.h" |
| 53 | #include "HTMLBRElement.h" |
| 54 | #include "HTMLBodyElement.h" |
| 55 | #include "HTMLDivElement.h" |
| 56 | #include "HTMLHeadElement.h" |
| 57 | #include "HTMLHtmlElement.h" |
| 58 | #include "HTMLImageElement.h" |
| 59 | #include "HTMLNames.h" |
| 60 | #include "HTMLStyleElement.h" |
| 61 | #include "HTMLTableElement.h" |
| 62 | #include "HTMLTextAreaElement.h" |
| 63 | #include "HTMLTextFormControlElement.h" |
| 64 | #include "LibWebRTCProvider.h" |
| 65 | #include "MarkupAccumulator.h" |
| 66 | #include "NodeList.h" |
| 67 | #include "Page.h" |
| 68 | #include "PageConfiguration.h" |
| 69 | #include "Range.h" |
| 70 | #include "RenderBlock.h" |
| 71 | #include "RuntimeEnabledFeatures.h" |
| 72 | #include "Settings.h" |
| 73 | #include "SocketProvider.h" |
| 74 | #include "StyleProperties.h" |
| 75 | #include "TextIterator.h" |
| 76 | #include "TypedElementDescendantIterator.h" |
| 77 | #include "VisibleSelection.h" |
| 78 | #include "VisibleUnits.h" |
| 79 | #include <wtf/StdLibExtras.h> |
| 80 | #include <wtf/URL.h> |
| 81 | #include <wtf/URLParser.h> |
| 82 | #include <wtf/text/StringBuilder.h> |
| 83 | |
| 84 | namespace WebCore { |
| 85 | |
| 86 | using namespace HTMLNames; |
| 87 | |
| 88 | static bool propertyMissingOrEqualToNone(StyleProperties*, CSSPropertyID); |
| 89 | |
| 90 | class AttributeChange { |
| 91 | public: |
| 92 | AttributeChange() |
| 93 | : m_name(nullAtom(), nullAtom(), nullAtom()) |
| 94 | { |
| 95 | } |
| 96 | |
| 97 | AttributeChange(Element* element, const QualifiedName& name, const String& value) |
| 98 | : m_element(element), m_name(name), m_value(value) |
| 99 | { |
| 100 | } |
| 101 | |
| 102 | void apply() |
| 103 | { |
| 104 | m_element->setAttribute(m_name, m_value); |
| 105 | } |
| 106 | |
| 107 | private: |
| 108 | RefPtr<Element> m_element; |
| 109 | QualifiedName m_name; |
| 110 | String m_value; |
| 111 | }; |
| 112 | |
| 113 | static void completeURLs(DocumentFragment* fragment, const String& baseURL) |
| 114 | { |
| 115 | Vector<AttributeChange> changes; |
| 116 | |
| 117 | URL parsedBaseURL({ }, baseURL); |
| 118 | |
| 119 | for (auto& element : descendantsOfType<Element>(*fragment)) { |
| 120 | if (!element.hasAttributes()) |
| 121 | continue; |
| 122 | for (const Attribute& attribute : element.attributesIterator()) { |
| 123 | if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty()) |
| 124 | changes.append(AttributeChange(&element, attribute.name(), element.completeURLsInAttributeValue(parsedBaseURL, attribute))); |
| 125 | } |
| 126 | } |
| 127 | |
| 128 | for (auto& change : changes) |
| 129 | change.apply(); |
| 130 | } |
| 131 | |
| 132 | void replaceSubresourceURLs(Ref<DocumentFragment>&& fragment, HashMap<AtomicString, AtomicString>&& replacementMap) |
| 133 | { |
| 134 | Vector<AttributeChange> changes; |
| 135 | for (auto& element : descendantsOfType<Element>(fragment)) { |
| 136 | if (!element.hasAttributes()) |
| 137 | continue; |
| 138 | for (const Attribute& attribute : element.attributesIterator()) { |
| 139 | // FIXME: This won't work for srcset. |
| 140 | if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty()) { |
| 141 | auto replacement = replacementMap.get(attribute.value()); |
| 142 | if (!replacement.isNull()) |
| 143 | changes.append({ &element, attribute.name(), replacement }); |
| 144 | } |
| 145 | } |
| 146 | } |
| 147 | for (auto& change : changes) |
| 148 | change.apply(); |
| 149 | } |
| 150 | |
| 151 | struct ElementAttribute { |
| 152 | Ref<Element> element; |
| 153 | QualifiedName attributeName; |
| 154 | }; |
| 155 | |
| 156 | void removeSubresourceURLAttributes(Ref<DocumentFragment>&& fragment, WTF::Function<bool(const URL&)> shouldRemoveURL) |
| 157 | { |
| 158 | Vector<ElementAttribute> attributesToRemove; |
| 159 | for (auto& element : descendantsOfType<Element>(fragment)) { |
| 160 | if (!element.hasAttributes()) |
| 161 | continue; |
| 162 | for (const Attribute& attribute : element.attributesIterator()) { |
| 163 | // FIXME: This won't work for srcset. |
| 164 | if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty()) { |
| 165 | URL url({ }, attribute.value()); |
| 166 | if (shouldRemoveURL(url)) |
| 167 | attributesToRemove.append({ element, attribute.name() }); |
| 168 | } |
| 169 | } |
| 170 | } |
| 171 | for (auto& item : attributesToRemove) |
| 172 | item.element->removeAttribute(item.attributeName); |
| 173 | } |
| 174 | |
| 175 | std::unique_ptr<Page> createPageForSanitizingWebContent() |
| 176 | { |
| 177 | auto pageConfiguration = pageConfigurationWithEmptyClients(); |
| 178 | |
| 179 | auto page = std::make_unique<Page>(WTFMove(pageConfiguration)); |
| 180 | page->settings().setMediaEnabled(false); |
| 181 | page->settings().setScriptEnabled(false); |
| 182 | page->settings().setPluginsEnabled(false); |
| 183 | page->settings().setAcceleratedCompositingEnabled(false); |
| 184 | |
| 185 | Frame& frame = page->mainFrame(); |
| 186 | frame.setView(FrameView::create(frame, IntSize { 800, 600 })); |
| 187 | frame.init(); |
| 188 | |
| 189 | FrameLoader& loader = frame.loader(); |
| 190 | static char markup[] = "<!DOCTYPE html><html><body></body></html>" ; |
| 191 | ASSERT(loader.activeDocumentLoader()); |
| 192 | auto& writer = loader.activeDocumentLoader()->writer(); |
| 193 | writer.setMIMEType("text/html" ); |
| 194 | writer.begin(); |
| 195 | writer.insertDataSynchronously(String(markup)); |
| 196 | writer.end(); |
| 197 | RELEASE_ASSERT(page->mainFrame().document()->body()); |
| 198 | |
| 199 | return page; |
| 200 | } |
| 201 | |
| 202 | String sanitizeMarkup(const String& rawHTML, MSOListQuirks msoListQuirks, Optional<WTF::Function<void(DocumentFragment&)>> fragmentSanitizer) |
| 203 | { |
| 204 | auto page = createPageForSanitizingWebContent(); |
| 205 | Document* stagingDocument = page->mainFrame().document(); |
| 206 | ASSERT(stagingDocument); |
| 207 | |
| 208 | auto fragment = createFragmentFromMarkup(*stagingDocument, rawHTML, emptyString(), DisallowScriptingAndPluginContent); |
| 209 | |
| 210 | if (fragmentSanitizer) |
| 211 | (*fragmentSanitizer)(fragment); |
| 212 | |
| 213 | return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, rawHTML); |
| 214 | } |
| 215 | |
| 216 | enum class MSOListMode { Preserve, DoNotPreserve }; |
| 217 | class StyledMarkupAccumulator final : public MarkupAccumulator { |
| 218 | public: |
| 219 | enum RangeFullySelectsNode { DoesFullySelectNode, DoesNotFullySelectNode }; |
| 220 | |
| 221 | StyledMarkupAccumulator(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs, SerializeComposedTree, |
| 222 | AnnotateForInterchange, MSOListMode, bool needsPositionStyleConversion, Node* highestNodeToBeSerialized = nullptr); |
| 223 | |
| 224 | Node* serializeNodes(const Position& start, const Position& end); |
| 225 | void wrapWithNode(Node&, bool convertBlocksToInlines = false, RangeFullySelectsNode = DoesFullySelectNode); |
| 226 | void wrapWithStyleNode(StyleProperties*, Document&, bool isBlock = false); |
| 227 | String takeResults(); |
| 228 | |
| 229 | bool needRelativeStyleWrapper() const { return m_needRelativeStyleWrapper; } |
| 230 | bool needClearingDiv() const { return m_needClearingDiv; } |
| 231 | |
| 232 | using MarkupAccumulator::appendString; |
| 233 | |
| 234 | ContainerNode* parentNode(Node& node) |
| 235 | { |
| 236 | if (UNLIKELY(m_useComposedTree)) |
| 237 | return node.parentInComposedTree(); |
| 238 | return node.parentOrShadowHostNode(); |
| 239 | } |
| 240 | |
| 241 | private: |
| 242 | void appendStyleNodeOpenTag(StringBuilder&, StyleProperties*, Document&, bool isBlock = false); |
| 243 | const String& styleNodeCloseTag(bool isBlock = false); |
| 244 | |
| 245 | String renderedTextRespectingRange(const Text&); |
| 246 | String textContentRespectingRange(const Text&); |
| 247 | |
| 248 | bool shouldPreserveMSOListStyleForElement(const Element&); |
| 249 | |
| 250 | void appendStartTag(StringBuilder& out, const Element&, bool addDisplayInline, RangeFullySelectsNode); |
| 251 | void appendEndTag(StringBuilder& out, const Element&) override; |
| 252 | void appendCustomAttributes(StringBuilder&, const Element&, Namespaces*) override; |
| 253 | |
| 254 | void appendText(StringBuilder& out, const Text&) override; |
| 255 | void appendStartTag(StringBuilder& out, const Element& element, Namespaces*) override |
| 256 | { |
| 257 | appendStartTag(out, element, false, DoesFullySelectNode); |
| 258 | } |
| 259 | |
| 260 | Node* firstChild(Node& node) |
| 261 | { |
| 262 | if (UNLIKELY(m_useComposedTree)) |
| 263 | return firstChildInComposedTreeIgnoringUserAgentShadow(node); |
| 264 | return node.firstChild(); |
| 265 | } |
| 266 | |
| 267 | Node* nextSibling(Node& node) |
| 268 | { |
| 269 | if (UNLIKELY(m_useComposedTree)) |
| 270 | return nextSiblingInComposedTreeIgnoringUserAgentShadow(node); |
| 271 | return node.nextSibling(); |
| 272 | } |
| 273 | |
| 274 | Node* nextSkippingChildren(Node& node) |
| 275 | { |
| 276 | if (UNLIKELY(m_useComposedTree)) |
| 277 | return nextSkippingChildrenInComposedTreeIgnoringUserAgentShadow(node); |
| 278 | return NodeTraversal::nextSkippingChildren(node); |
| 279 | } |
| 280 | |
| 281 | bool hasChildNodes(Node& node) |
| 282 | { |
| 283 | if (UNLIKELY(m_useComposedTree)) |
| 284 | return firstChildInComposedTreeIgnoringUserAgentShadow(node); |
| 285 | return node.hasChildNodes(); |
| 286 | } |
| 287 | |
| 288 | bool isDescendantOf(Node& node, Node& possibleAncestor) |
| 289 | { |
| 290 | if (UNLIKELY(m_useComposedTree)) |
| 291 | return node.isDescendantOrShadowDescendantOf(&possibleAncestor); |
| 292 | return node.isDescendantOf(&possibleAncestor); |
| 293 | } |
| 294 | |
| 295 | enum class NodeTraversalMode { EmitString, DoNotEmitString }; |
| 296 | Node* traverseNodesForSerialization(Node* startNode, Node* pastEnd, NodeTraversalMode); |
| 297 | |
| 298 | bool appendNodeToPreserveMSOList(Node&); |
| 299 | |
| 300 | bool shouldAnnotate() |
| 301 | { |
| 302 | return m_annotate == AnnotateForInterchange::Yes; |
| 303 | } |
| 304 | |
| 305 | bool shouldApplyWrappingStyle(const Node& node) const |
| 306 | { |
| 307 | return m_highestNodeToBeSerialized && m_highestNodeToBeSerialized->parentNode() == node.parentNode() && m_wrappingStyle && m_wrappingStyle->style(); |
| 308 | } |
| 309 | |
| 310 | Position m_start; |
| 311 | Position m_end; |
| 312 | Vector<String> m_reversedPrecedingMarkup; |
| 313 | const AnnotateForInterchange m_annotate; |
| 314 | RefPtr<Node> m_highestNodeToBeSerialized; |
| 315 | RefPtr<EditingStyle> m_wrappingStyle; |
| 316 | bool m_useComposedTree; |
| 317 | bool m_needsPositionStyleConversion; |
| 318 | bool m_needRelativeStyleWrapper { false }; |
| 319 | bool m_needClearingDiv { false }; |
| 320 | bool m_shouldPreserveMSOList; |
| 321 | bool m_inMSOList { false }; |
| 322 | }; |
| 323 | |
| 324 | inline StyledMarkupAccumulator::StyledMarkupAccumulator(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs urlsToResolve, SerializeComposedTree serializeComposedTree, |
| 325 | AnnotateForInterchange annotate, MSOListMode msoListMode, bool needsPositionStyleConversion, Node* highestNodeToBeSerialized) |
| 326 | : MarkupAccumulator(nodes, urlsToResolve) |
| 327 | , m_start(start) |
| 328 | , m_end(end) |
| 329 | , m_annotate(annotate) |
| 330 | , m_highestNodeToBeSerialized(highestNodeToBeSerialized) |
| 331 | , m_useComposedTree(serializeComposedTree == SerializeComposedTree::Yes) |
| 332 | , m_needsPositionStyleConversion(needsPositionStyleConversion) |
| 333 | , m_shouldPreserveMSOList(msoListMode == MSOListMode::Preserve) |
| 334 | { |
| 335 | } |
| 336 | |
| 337 | void StyledMarkupAccumulator::wrapWithNode(Node& node, bool convertBlocksToInlines, RangeFullySelectsNode rangeFullySelectsNode) |
| 338 | { |
| 339 | StringBuilder markup; |
| 340 | if (is<Element>(node)) |
| 341 | appendStartTag(markup, downcast<Element>(node), convertBlocksToInlines && isBlock(&node), rangeFullySelectsNode); |
| 342 | else |
| 343 | appendNonElementNode(markup, node, nullptr); |
| 344 | m_reversedPrecedingMarkup.append(markup.toString()); |
| 345 | endAppendingNode(node); |
| 346 | if (m_nodes) |
| 347 | m_nodes->append(&node); |
| 348 | } |
| 349 | |
| 350 | void StyledMarkupAccumulator::wrapWithStyleNode(StyleProperties* style, Document& document, bool isBlock) |
| 351 | { |
| 352 | StringBuilder openTag; |
| 353 | appendStyleNodeOpenTag(openTag, style, document, isBlock); |
| 354 | m_reversedPrecedingMarkup.append(openTag.toString()); |
| 355 | appendString(styleNodeCloseTag(isBlock)); |
| 356 | } |
| 357 | |
| 358 | void StyledMarkupAccumulator::appendStyleNodeOpenTag(StringBuilder& out, StyleProperties* style, Document& document, bool isBlock) |
| 359 | { |
| 360 | // wrappingStyleForSerialization should have removed -webkit-text-decorations-in-effect |
| 361 | ASSERT(propertyMissingOrEqualToNone(style, CSSPropertyWebkitTextDecorationsInEffect)); |
| 362 | if (isBlock) |
| 363 | out.appendLiteral("<div style=\"" ); |
| 364 | else |
| 365 | out.appendLiteral("<span style=\"" ); |
| 366 | appendAttributeValue(out, style->asText(), document.isHTMLDocument()); |
| 367 | out.appendLiteral("\">" ); |
| 368 | } |
| 369 | |
| 370 | const String& StyledMarkupAccumulator::styleNodeCloseTag(bool isBlock) |
| 371 | { |
| 372 | static NeverDestroyed<const String> divClose(MAKE_STATIC_STRING_IMPL("</div>" )); |
| 373 | static NeverDestroyed<const String> styleSpanClose(MAKE_STATIC_STRING_IMPL("</span>" )); |
| 374 | return isBlock ? divClose : styleSpanClose; |
| 375 | } |
| 376 | |
| 377 | String StyledMarkupAccumulator::takeResults() |
| 378 | { |
| 379 | StringBuilder result; |
| 380 | result.reserveCapacity(totalLength(m_reversedPrecedingMarkup) + length()); |
| 381 | |
| 382 | for (size_t i = m_reversedPrecedingMarkup.size(); i > 0; --i) |
| 383 | result.append(m_reversedPrecedingMarkup[i - 1]); |
| 384 | |
| 385 | concatenateMarkup(result); |
| 386 | |
| 387 | // We remove '\0' characters because they are not visibly rendered to the user. |
| 388 | return result.toString().replaceWithLiteral('\0', "" ); |
| 389 | } |
| 390 | |
| 391 | void StyledMarkupAccumulator::appendText(StringBuilder& out, const Text& text) |
| 392 | { |
| 393 | const bool parentIsTextarea = is<HTMLTextAreaElement>(text.parentElement()); |
| 394 | const bool wrappingSpan = shouldApplyWrappingStyle(text) && !parentIsTextarea; |
| 395 | if (wrappingSpan) { |
| 396 | RefPtr<EditingStyle> wrappingStyle = m_wrappingStyle->copy(); |
| 397 | // FIXME: <rdar://problem/5371536> Style rules that match pasted content can change it's appearance |
| 398 | // Make sure spans are inline style in paste side e.g. span { display: block }. |
| 399 | wrappingStyle->forceInline(); |
| 400 | // FIXME: Should this be included in forceInline? |
| 401 | wrappingStyle->style()->setProperty(CSSPropertyFloat, CSSValueNone); |
| 402 | |
| 403 | appendStyleNodeOpenTag(out, wrappingStyle->style(), text.document()); |
| 404 | } |
| 405 | |
| 406 | if (!shouldAnnotate() || parentIsTextarea) { |
| 407 | auto content = textContentRespectingRange(text); |
| 408 | appendCharactersReplacingEntities(out, content, 0, content.length(), entityMaskForText(text)); |
| 409 | } else { |
| 410 | const bool useRenderedText = !enclosingElementWithTag(firstPositionInNode(const_cast<Text*>(&text)), selectTag); |
| 411 | String content = useRenderedText ? renderedTextRespectingRange(text) : textContentRespectingRange(text); |
| 412 | StringBuilder buffer; |
| 413 | appendCharactersReplacingEntities(buffer, content, 0, content.length(), EntityMaskInPCDATA); |
| 414 | out.append(convertHTMLTextToInterchangeFormat(buffer.toString(), &text)); |
| 415 | } |
| 416 | |
| 417 | if (wrappingSpan) |
| 418 | out.append(styleNodeCloseTag()); |
| 419 | } |
| 420 | |
| 421 | String StyledMarkupAccumulator::renderedTextRespectingRange(const Text& text) |
| 422 | { |
| 423 | TextIteratorBehavior behavior = TextIteratorDefaultBehavior; |
| 424 | Position start = &text == m_start.containerNode() ? m_start : firstPositionInNode(const_cast<Text*>(&text)); |
| 425 | Position end; |
| 426 | if (&text == m_end.containerNode()) |
| 427 | end = m_end; |
| 428 | else { |
| 429 | end = lastPositionInNode(const_cast<Text*>(&text)); |
| 430 | if (!m_end.isNull()) |
| 431 | behavior = TextIteratorBehavesAsIfNodesFollowing; |
| 432 | } |
| 433 | |
| 434 | return plainText(Range::create(text.document(), start, end).ptr(), behavior); |
| 435 | } |
| 436 | |
| 437 | String StyledMarkupAccumulator::textContentRespectingRange(const Text& text) |
| 438 | { |
| 439 | if (m_start.isNull() && m_end.isNull()) |
| 440 | return text.data(); |
| 441 | |
| 442 | unsigned start = 0; |
| 443 | unsigned end = std::numeric_limits<unsigned>::max(); |
| 444 | if (&text == m_start.containerNode()) |
| 445 | start = m_start.offsetInContainerNode(); |
| 446 | if (&text == m_end.containerNode()) |
| 447 | end = m_end.offsetInContainerNode(); |
| 448 | ASSERT(start < end); |
| 449 | return text.data().substring(start, end - start); |
| 450 | } |
| 451 | |
| 452 | void StyledMarkupAccumulator::appendCustomAttributes(StringBuilder& out, const Element& element, Namespaces* namespaces) |
| 453 | { |
| 454 | #if ENABLE(ATTACHMENT_ELEMENT) |
| 455 | if (!RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled()) |
| 456 | return; |
| 457 | |
| 458 | if (is<HTMLAttachmentElement>(element)) { |
| 459 | auto& attachment = downcast<HTMLAttachmentElement>(element); |
| 460 | appendAttribute(out, element, { webkitattachmentidAttr, attachment.uniqueIdentifier() }, namespaces); |
| 461 | if (auto* file = attachment.file()) { |
| 462 | // These attributes are only intended for File deserialization, and are removed from the generated attachment |
| 463 | // element after we've deserialized and set its backing File, in restoreAttachmentElementsInFragment. |
| 464 | appendAttribute(out, element, { webkitattachmentpathAttr, file->path() }, namespaces); |
| 465 | appendAttribute(out, element, { webkitattachmentbloburlAttr, file->url().string() }, namespaces); |
| 466 | } |
| 467 | } else if (is<HTMLImageElement>(element)) { |
| 468 | if (auto attachment = downcast<HTMLImageElement>(element).attachmentElement()) |
| 469 | appendAttribute(out, element, { webkitattachmentidAttr, attachment->uniqueIdentifier() }, namespaces); |
| 470 | } |
| 471 | #else |
| 472 | UNUSED_PARAM(out); |
| 473 | UNUSED_PARAM(element); |
| 474 | UNUSED_PARAM(namespaces); |
| 475 | #endif |
| 476 | } |
| 477 | |
| 478 | bool StyledMarkupAccumulator::shouldPreserveMSOListStyleForElement(const Element& element) |
| 479 | { |
| 480 | if (m_inMSOList) |
| 481 | return true; |
| 482 | if (m_shouldPreserveMSOList) { |
| 483 | auto style = element.getAttribute(styleAttr); |
| 484 | return style.startsWith("mso-list:" ) || style.contains(";mso-list:" ) || style.contains("\nmso-list:" ); |
| 485 | } |
| 486 | return false; |
| 487 | } |
| 488 | |
| 489 | void StyledMarkupAccumulator::appendStartTag(StringBuilder& out, const Element& element, bool addDisplayInline, RangeFullySelectsNode rangeFullySelectsNode) |
| 490 | { |
| 491 | const bool documentIsHTML = element.document().isHTMLDocument(); |
| 492 | const bool isSlotElement = is<HTMLSlotElement>(element); |
| 493 | if (UNLIKELY(isSlotElement)) |
| 494 | out.append("<span" ); |
| 495 | else |
| 496 | appendOpenTag(out, element, nullptr); |
| 497 | |
| 498 | appendCustomAttributes(out, element, nullptr); |
| 499 | |
| 500 | const bool shouldAnnotateOrForceInline = element.isHTMLElement() && (shouldAnnotate() || addDisplayInline); |
| 501 | bool shouldOverrideStyleAttr = (shouldAnnotateOrForceInline || shouldApplyWrappingStyle(element) || isSlotElement) && !shouldPreserveMSOListStyleForElement(element); |
| 502 | if (element.hasAttributes()) { |
| 503 | for (const Attribute& attribute : element.attributesIterator()) { |
| 504 | // We'll handle the style attribute separately, below. |
| 505 | if (attribute.name() == styleAttr && shouldOverrideStyleAttr) |
| 506 | continue; |
| 507 | if (element.isEventHandlerAttribute(attribute) || element.isJavaScriptURLAttribute(attribute)) |
| 508 | continue; |
| 509 | appendAttribute(out, element, attribute, 0); |
| 510 | } |
| 511 | } |
| 512 | |
| 513 | if (shouldOverrideStyleAttr) { |
| 514 | RefPtr<EditingStyle> newInlineStyle; |
| 515 | |
| 516 | if (shouldApplyWrappingStyle(element)) { |
| 517 | newInlineStyle = m_wrappingStyle->copy(); |
| 518 | newInlineStyle->removePropertiesInElementDefaultStyle(*const_cast<Element*>(&element)); |
| 519 | newInlineStyle->removeStyleConflictingWithStyleOfNode(*const_cast<Element*>(&element)); |
| 520 | } else |
| 521 | newInlineStyle = EditingStyle::create(); |
| 522 | |
| 523 | if (isSlotElement) |
| 524 | newInlineStyle->addDisplayContents(); |
| 525 | |
| 526 | if (is<StyledElement>(element) && downcast<StyledElement>(element).inlineStyle()) |
| 527 | newInlineStyle->overrideWithStyle(*downcast<StyledElement>(element).inlineStyle()); |
| 528 | |
| 529 | if (shouldAnnotateOrForceInline) { |
| 530 | if (shouldAnnotate()) |
| 531 | newInlineStyle->mergeStyleFromRulesForSerialization(downcast<HTMLElement>(*const_cast<Element*>(&element))); |
| 532 | |
| 533 | if (addDisplayInline) |
| 534 | newInlineStyle->forceInline(); |
| 535 | |
| 536 | if (m_needsPositionStyleConversion) { |
| 537 | m_needRelativeStyleWrapper |= newInlineStyle->convertPositionStyle(); |
| 538 | m_needClearingDiv |= newInlineStyle->isFloating(); |
| 539 | } |
| 540 | |
| 541 | // If the node is not fully selected by the range, then we don't want to keep styles that affect its relationship to the nodes around it |
| 542 | // only the ones that affect it and the nodes within it. |
| 543 | if (rangeFullySelectsNode == DoesNotFullySelectNode && newInlineStyle->style()) |
| 544 | newInlineStyle->style()->removeProperty(CSSPropertyFloat); |
| 545 | } |
| 546 | |
| 547 | if (!newInlineStyle->isEmpty()) { |
| 548 | out.appendLiteral(" style=\"" ); |
| 549 | appendAttributeValue(out, newInlineStyle->style()->asText(), documentIsHTML); |
| 550 | out.append('\"'); |
| 551 | } |
| 552 | } |
| 553 | |
| 554 | appendCloseTag(out, element); |
| 555 | } |
| 556 | |
| 557 | void StyledMarkupAccumulator::appendEndTag(StringBuilder& out, const Element& element) |
| 558 | { |
| 559 | if (UNLIKELY(is<HTMLSlotElement>(element))) |
| 560 | out.append("</span>" ); |
| 561 | else |
| 562 | MarkupAccumulator::appendEndTag(out, element); |
| 563 | } |
| 564 | |
| 565 | Node* StyledMarkupAccumulator::serializeNodes(const Position& start, const Position& end) |
| 566 | { |
| 567 | ASSERT(comparePositions(start, end) <= 0); |
| 568 | auto startNode = start.firstNode(); |
| 569 | Node* pastEnd = end.computeNodeAfterPosition(); |
| 570 | if (!pastEnd && end.containerNode()) |
| 571 | pastEnd = nextSkippingChildren(*end.containerNode()); |
| 572 | |
| 573 | if (!m_highestNodeToBeSerialized) { |
| 574 | Node* lastClosed = traverseNodesForSerialization(startNode.get(), pastEnd, NodeTraversalMode::DoNotEmitString); |
| 575 | m_highestNodeToBeSerialized = lastClosed; |
| 576 | } |
| 577 | |
| 578 | if (m_highestNodeToBeSerialized && m_highestNodeToBeSerialized->parentNode()) |
| 579 | m_wrappingStyle = EditingStyle::wrappingStyleForSerialization(*m_highestNodeToBeSerialized->parentNode(), shouldAnnotate()); |
| 580 | |
| 581 | return traverseNodesForSerialization(startNode.get(), pastEnd, NodeTraversalMode::EmitString); |
| 582 | } |
| 583 | |
| 584 | Node* StyledMarkupAccumulator::traverseNodesForSerialization(Node* startNode, Node* pastEnd, NodeTraversalMode traversalMode) |
| 585 | { |
| 586 | const bool shouldEmit = traversalMode == NodeTraversalMode::EmitString; |
| 587 | |
| 588 | m_inMSOList = false; |
| 589 | |
| 590 | unsigned depth = 0; |
| 591 | auto enterNode = [&] (Node& node) { |
| 592 | if (UNLIKELY(m_shouldPreserveMSOList) && shouldEmit) { |
| 593 | if (appendNodeToPreserveMSOList(node)) |
| 594 | return false; |
| 595 | } |
| 596 | |
| 597 | bool isDisplayContents = is<Element>(node) && downcast<Element>(node).hasDisplayContents(); |
| 598 | if (!node.renderer() && !isDisplayContents && !enclosingElementWithTag(firstPositionInOrBeforeNode(&node), selectTag)) |
| 599 | return false; |
| 600 | |
| 601 | ++depth; |
| 602 | if (shouldEmit) |
| 603 | startAppendingNode(node); |
| 604 | |
| 605 | return true; |
| 606 | }; |
| 607 | |
| 608 | Node* lastClosed = nullptr; |
| 609 | auto exitNode = [&] (Node& node) { |
| 610 | bool closing = depth; |
| 611 | if (depth) |
| 612 | --depth; |
| 613 | if (shouldEmit) { |
| 614 | if (closing) |
| 615 | endAppendingNode(node); |
| 616 | else |
| 617 | wrapWithNode(node); |
| 618 | } |
| 619 | lastClosed = &node; |
| 620 | }; |
| 621 | |
| 622 | Node* lastNode = nullptr; |
| 623 | Node* next = nullptr; |
| 624 | for (auto* n = startNode; n != pastEnd; lastNode = n, n = next) { |
| 625 | |
| 626 | Vector<Node*, 8> exitedAncestors; |
| 627 | next = nullptr; |
| 628 | if (auto* child = firstChild(*n)) |
| 629 | next = child; |
| 630 | else if (auto* sibling = nextSibling(*n)) |
| 631 | next = sibling; |
| 632 | else { |
| 633 | for (auto* ancestor = parentNode(*n); ancestor; ancestor = parentNode(*ancestor)) { |
| 634 | exitedAncestors.append(ancestor); |
| 635 | if (auto* sibling = nextSibling(*ancestor)) { |
| 636 | next = sibling; |
| 637 | break; |
| 638 | } |
| 639 | } |
| 640 | } |
| 641 | ASSERT(next || !pastEnd); |
| 642 | |
| 643 | if (isBlock(n) && canHaveChildrenForEditing(*n) && next == pastEnd) { |
| 644 | // Don't write out empty block containers that aren't fully selected. |
| 645 | continue; |
| 646 | } |
| 647 | |
| 648 | if (!enterNode(*n)) { |
| 649 | next = nextSkippingChildren(*n); |
| 650 | // Don't skip over pastEnd. |
| 651 | if (pastEnd && isDescendantOf(*pastEnd, *n)) |
| 652 | next = pastEnd; |
| 653 | ASSERT(next || !pastEnd); |
| 654 | } else { |
| 655 | if (!hasChildNodes(*n)) |
| 656 | exitNode(*n); |
| 657 | } |
| 658 | |
| 659 | for (auto* ancestor : exitedAncestors) { |
| 660 | if (!depth && next == pastEnd) |
| 661 | break; |
| 662 | exitNode(*ancestor); |
| 663 | } |
| 664 | } |
| 665 | |
| 666 | ASSERT(lastNode || !depth); |
| 667 | if (depth) { |
| 668 | for (auto* ancestor = parentNode(pastEnd ? *pastEnd : *lastNode); ancestor && depth; ancestor = parentNode(*ancestor)) |
| 669 | exitNode(*ancestor); |
| 670 | } |
| 671 | |
| 672 | return lastClosed; |
| 673 | } |
| 674 | |
| 675 | bool StyledMarkupAccumulator::appendNodeToPreserveMSOList(Node& node) |
| 676 | { |
| 677 | if (is<Comment>(node)) { |
| 678 | auto& = downcast<Comment>(node); |
| 679 | if (!m_inMSOList && commentNode.data() == "[if !supportLists]" ) |
| 680 | m_inMSOList = true; |
| 681 | else if (m_inMSOList && commentNode.data() == "[endif]" ) |
| 682 | m_inMSOList = false; |
| 683 | else |
| 684 | return false; |
| 685 | startAppendingNode(commentNode); |
| 686 | return true; |
| 687 | } |
| 688 | if (is<HTMLStyleElement>(node)) { |
| 689 | auto* firstChild = node.firstChild(); |
| 690 | if (!is<Text>(firstChild)) |
| 691 | return false; |
| 692 | |
| 693 | auto& textChild = downcast<Text>(*firstChild); |
| 694 | auto& styleContent = textChild.data(); |
| 695 | |
| 696 | const auto msoStyleDefinitionsStart = styleContent.find("/* Style Definitions */" ); |
| 697 | const auto msoListDefinitionsStart = styleContent.find("/* List Definitions */" ); |
| 698 | const auto lastListItem = styleContent.reverseFind("\n@list" ); |
| 699 | if (msoListDefinitionsStart == notFound || lastListItem == notFound) |
| 700 | return false; |
| 701 | const auto start = msoStyleDefinitionsStart != notFound && msoStyleDefinitionsStart < msoListDefinitionsStart ? msoStyleDefinitionsStart : msoListDefinitionsStart; |
| 702 | |
| 703 | const auto msoListDefinitionsEnd = styleContent.find(";}\n" , lastListItem); |
| 704 | if (msoListDefinitionsEnd == notFound || start >= msoListDefinitionsEnd) |
| 705 | return false; |
| 706 | |
| 707 | appendString("<head><style class=\"" WebKitMSOListQuirksStyle "\">\n<!--\n" ); |
| 708 | appendStringView(StringView(textChild.data()).substring(start, msoListDefinitionsEnd - start + 3)); |
| 709 | appendString("\n-->\n</style></head>" ); |
| 710 | |
| 711 | return true; |
| 712 | } |
| 713 | return false; |
| 714 | } |
| 715 | |
| 716 | static Node* ancestorToRetainStructureAndAppearanceForBlock(Node* commonAncestorBlock) |
| 717 | { |
| 718 | if (!commonAncestorBlock) |
| 719 | return nullptr; |
| 720 | |
| 721 | if (commonAncestorBlock->hasTagName(tbodyTag) || commonAncestorBlock->hasTagName(trTag)) { |
| 722 | ContainerNode* table = commonAncestorBlock->parentNode(); |
| 723 | while (table && !is<HTMLTableElement>(*table)) |
| 724 | table = table->parentNode(); |
| 725 | |
| 726 | return table; |
| 727 | } |
| 728 | |
| 729 | if (isNonTableCellHTMLBlockElement(commonAncestorBlock)) |
| 730 | return commonAncestorBlock; |
| 731 | |
| 732 | return nullptr; |
| 733 | } |
| 734 | |
| 735 | static inline Node* ancestorToRetainStructureAndAppearance(Node* commonAncestor) |
| 736 | { |
| 737 | return ancestorToRetainStructureAndAppearanceForBlock(enclosingBlock(commonAncestor)); |
| 738 | } |
| 739 | |
| 740 | static bool propertyMissingOrEqualToNone(StyleProperties* style, CSSPropertyID propertyID) |
| 741 | { |
| 742 | if (!style) |
| 743 | return false; |
| 744 | RefPtr<CSSValue> value = style->getPropertyCSSValue(propertyID); |
| 745 | if (!value) |
| 746 | return true; |
| 747 | if (!is<CSSPrimitiveValue>(*value)) |
| 748 | return false; |
| 749 | return downcast<CSSPrimitiveValue>(*value).valueID() == CSSValueNone; |
| 750 | } |
| 751 | |
| 752 | static bool needInterchangeNewlineAfter(const VisiblePosition& v) |
| 753 | { |
| 754 | VisiblePosition next = v.next(); |
| 755 | Node* upstreamNode = next.deepEquivalent().upstream().deprecatedNode(); |
| 756 | Node* downstreamNode = v.deepEquivalent().downstream().deprecatedNode(); |
| 757 | // Add an interchange newline if a paragraph break is selected and a br won't already be added to the markup to represent it. |
| 758 | return isEndOfParagraph(v) && isStartOfParagraph(next) && !(upstreamNode->hasTagName(brTag) && upstreamNode == downstreamNode); |
| 759 | } |
| 760 | |
| 761 | static RefPtr<EditingStyle> styleFromMatchedRulesAndInlineDecl(Node& node) |
| 762 | { |
| 763 | if (!is<HTMLElement>(node)) |
| 764 | return nullptr; |
| 765 | |
| 766 | auto& element = downcast<HTMLElement>(node); |
| 767 | auto style = EditingStyle::create(element.inlineStyle()); |
| 768 | style->mergeStyleFromRules(element); |
| 769 | return style; |
| 770 | } |
| 771 | |
| 772 | static bool isElementPresentational(const Node* node) |
| 773 | { |
| 774 | return node->hasTagName(uTag) || node->hasTagName(sTag) || node->hasTagName(strikeTag) |
| 775 | || node->hasTagName(iTag) || node->hasTagName(emTag) || node->hasTagName(bTag) || node->hasTagName(strongTag); |
| 776 | } |
| 777 | |
| 778 | static Node* highestAncestorToWrapMarkup(const Position& start, const Position& end, Node& commonAncestor, AnnotateForInterchange annotate) |
| 779 | { |
| 780 | Node* specialCommonAncestor = nullptr; |
| 781 | if (annotate == AnnotateForInterchange::Yes) { |
| 782 | // Include ancestors that aren't completely inside the range but are required to retain |
| 783 | // the structure and appearance of the copied markup. |
| 784 | specialCommonAncestor = ancestorToRetainStructureAndAppearance(&commonAncestor); |
| 785 | |
| 786 | if (auto* parentListNode = enclosingNodeOfType(start, isListItem)) { |
| 787 | if (!editingIgnoresContent(*parentListNode) && VisibleSelection::selectionFromContentsOfNode(parentListNode) == VisibleSelection(start, end)) { |
| 788 | specialCommonAncestor = parentListNode->parentNode(); |
| 789 | while (specialCommonAncestor && !isListHTMLElement(specialCommonAncestor)) |
| 790 | specialCommonAncestor = specialCommonAncestor->parentNode(); |
| 791 | } |
| 792 | } |
| 793 | |
| 794 | // Retain the Mail quote level by including all ancestor mail block quotes. |
| 795 | if (Node* highestMailBlockquote = highestEnclosingNodeOfType(start, isMailBlockquote, CanCrossEditingBoundary)) |
| 796 | specialCommonAncestor = highestMailBlockquote; |
| 797 | } |
| 798 | |
| 799 | auto* checkAncestor = specialCommonAncestor ? specialCommonAncestor : &commonAncestor; |
| 800 | if (checkAncestor->renderer() && checkAncestor->renderer()->containingBlock()) { |
| 801 | Node* newSpecialCommonAncestor = highestEnclosingNodeOfType(firstPositionInNode(checkAncestor), &isElementPresentational, CanCrossEditingBoundary, checkAncestor->renderer()->containingBlock()->element()); |
| 802 | if (newSpecialCommonAncestor) |
| 803 | specialCommonAncestor = newSpecialCommonAncestor; |
| 804 | } |
| 805 | |
| 806 | // If a single tab is selected, commonAncestor will be a text node inside a tab span. |
| 807 | // If two or more tabs are selected, commonAncestor will be the tab span. |
| 808 | // In either case, if there is a specialCommonAncestor already, it will necessarily be above |
| 809 | // any tab span that needs to be included. |
| 810 | if (!specialCommonAncestor && isTabSpanTextNode(&commonAncestor)) |
| 811 | specialCommonAncestor = commonAncestor.parentNode(); |
| 812 | if (!specialCommonAncestor && isTabSpanNode(&commonAncestor)) |
| 813 | specialCommonAncestor = &commonAncestor; |
| 814 | |
| 815 | if (auto* enclosingAnchor = enclosingElementWithTag(firstPositionInNode(specialCommonAncestor ? specialCommonAncestor : &commonAncestor), aTag)) |
| 816 | specialCommonAncestor = enclosingAnchor; |
| 817 | |
| 818 | return specialCommonAncestor; |
| 819 | } |
| 820 | |
| 821 | static String serializePreservingVisualAppearanceInternal(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs urlsToResolve, SerializeComposedTree serializeComposedTree, |
| 822 | AnnotateForInterchange annotate, ConvertBlocksToInlines convertBlocksToInlines, MSOListMode msoListMode) |
| 823 | { |
| 824 | static NeverDestroyed<const String> interchangeNewlineString(MAKE_STATIC_STRING_IMPL("<br class=\"" AppleInterchangeNewline "\">" )); |
| 825 | |
| 826 | if (!comparePositions(start, end)) |
| 827 | return emptyString(); |
| 828 | |
| 829 | RefPtr<Node> commonAncestor = commonShadowIncludingAncestor(start, end); |
| 830 | if (!commonAncestor) |
| 831 | return emptyString(); |
| 832 | |
| 833 | auto& document = *start.document(); |
| 834 | document.updateLayoutIgnorePendingStylesheets(); |
| 835 | |
| 836 | VisiblePosition visibleStart { start }; |
| 837 | VisiblePosition visibleEnd { end }; |
| 838 | |
| 839 | auto body = makeRefPtr(enclosingElementWithTag(firstPositionInNode(commonAncestor.get()), bodyTag)); |
| 840 | RefPtr<Element> fullySelectedRoot; |
| 841 | // FIXME: Do this for all fully selected blocks, not just the body. |
| 842 | if (body && VisiblePosition(firstPositionInNode(body.get())) == visibleStart && VisiblePosition(lastPositionInNode(body.get())) == visibleEnd) |
| 843 | fullySelectedRoot = body; |
| 844 | bool needsPositionStyleConversion = body && fullySelectedRoot == body && document.settings().shouldConvertPositionStyleOnCopy(); |
| 845 | |
| 846 | Node* specialCommonAncestor = highestAncestorToWrapMarkup(start, end, *commonAncestor, annotate); |
| 847 | |
| 848 | StyledMarkupAccumulator accumulator(start, end, nodes, urlsToResolve, serializeComposedTree, annotate, msoListMode, needsPositionStyleConversion, specialCommonAncestor); |
| 849 | |
| 850 | Position startAdjustedForInterchangeNewline = start; |
| 851 | if (annotate == AnnotateForInterchange::Yes && needInterchangeNewlineAfter(visibleStart)) { |
| 852 | if (visibleStart == visibleEnd.previous()) |
| 853 | return interchangeNewlineString; |
| 854 | |
| 855 | accumulator.appendString(interchangeNewlineString); |
| 856 | startAdjustedForInterchangeNewline = visibleStart.next().deepEquivalent(); |
| 857 | |
| 858 | if (comparePositions(startAdjustedForInterchangeNewline, end) >= 0) |
| 859 | return interchangeNewlineString; |
| 860 | } |
| 861 | |
| 862 | Node* lastClosed = accumulator.serializeNodes(startAdjustedForInterchangeNewline, end); |
| 863 | |
| 864 | if (specialCommonAncestor && lastClosed) { |
| 865 | // Also include all of the ancestors of lastClosed up to this special ancestor. |
| 866 | for (ContainerNode* ancestor = accumulator.parentNode(*lastClosed); ancestor; ancestor = accumulator.parentNode(*ancestor)) { |
| 867 | if (ancestor == fullySelectedRoot && convertBlocksToInlines == ConvertBlocksToInlines::No) { |
| 868 | RefPtr<EditingStyle> fullySelectedRootStyle = styleFromMatchedRulesAndInlineDecl(*fullySelectedRoot); |
| 869 | |
| 870 | // Bring the background attribute over, but not as an attribute because a background attribute on a div |
| 871 | // appears to have no effect. |
| 872 | if ((!fullySelectedRootStyle || !fullySelectedRootStyle->style() || !fullySelectedRootStyle->style()->getPropertyCSSValue(CSSPropertyBackgroundImage)) |
| 873 | && fullySelectedRoot->hasAttributeWithoutSynchronization(backgroundAttr)) |
| 874 | fullySelectedRootStyle->style()->setProperty(CSSPropertyBackgroundImage, "url('" + fullySelectedRoot->getAttribute(backgroundAttr) + "')" ); |
| 875 | |
| 876 | if (fullySelectedRootStyle->style()) { |
| 877 | // Reset the CSS properties to avoid an assertion error in addStyleMarkup(). |
| 878 | // This assertion is caused at least when we select all text of a <body> element whose |
| 879 | // 'text-decoration' property is "inherit", and copy it. |
| 880 | if (!propertyMissingOrEqualToNone(fullySelectedRootStyle->style(), CSSPropertyTextDecoration)) |
| 881 | fullySelectedRootStyle->style()->setProperty(CSSPropertyTextDecoration, CSSValueNone); |
| 882 | if (!propertyMissingOrEqualToNone(fullySelectedRootStyle->style(), CSSPropertyWebkitTextDecorationsInEffect)) |
| 883 | fullySelectedRootStyle->style()->setProperty(CSSPropertyWebkitTextDecorationsInEffect, CSSValueNone); |
| 884 | accumulator.wrapWithStyleNode(fullySelectedRootStyle->style(), document, true); |
| 885 | } |
| 886 | } else { |
| 887 | // Since this node and all the other ancestors are not in the selection we want to set RangeFullySelectsNode to DoesNotFullySelectNode |
| 888 | // so that styles that affect the exterior of the node are not included. |
| 889 | accumulator.wrapWithNode(*ancestor, convertBlocksToInlines == ConvertBlocksToInlines::Yes, StyledMarkupAccumulator::DoesNotFullySelectNode); |
| 890 | } |
| 891 | if (nodes) |
| 892 | nodes->append(ancestor); |
| 893 | |
| 894 | if (ancestor == specialCommonAncestor) |
| 895 | break; |
| 896 | } |
| 897 | } |
| 898 | |
| 899 | if (accumulator.needRelativeStyleWrapper() && needsPositionStyleConversion) { |
| 900 | if (accumulator.needClearingDiv()) |
| 901 | accumulator.appendString("<div style=\"clear: both;\"></div>" ); |
| 902 | RefPtr<EditingStyle> positionRelativeStyle = styleFromMatchedRulesAndInlineDecl(*body); |
| 903 | positionRelativeStyle->style()->setProperty(CSSPropertyPosition, CSSValueRelative); |
| 904 | accumulator.wrapWithStyleNode(positionRelativeStyle->style(), document, true); |
| 905 | } |
| 906 | |
| 907 | // FIXME: The interchange newline should be placed in the block that it's in, not after all of the content, unconditionally. |
| 908 | if (annotate == AnnotateForInterchange::Yes && needInterchangeNewlineAfter(visibleEnd.previous())) |
| 909 | accumulator.appendString(interchangeNewlineString); |
| 910 | |
| 911 | return accumulator.takeResults(); |
| 912 | } |
| 913 | |
| 914 | String serializePreservingVisualAppearance(const Range& range, Vector<Node*>* nodes, AnnotateForInterchange annotate, ConvertBlocksToInlines convertBlocksToInlines, ResolveURLs urlsToReslve) |
| 915 | { |
| 916 | return serializePreservingVisualAppearanceInternal(range.startPosition(), range.endPosition(), nodes, urlsToReslve, SerializeComposedTree::No, |
| 917 | annotate, convertBlocksToInlines, MSOListMode::DoNotPreserve); |
| 918 | } |
| 919 | |
| 920 | String serializePreservingVisualAppearance(const VisibleSelection& selection, ResolveURLs resolveURLs, SerializeComposedTree serializeComposedTree, Vector<Node*>* nodes) |
| 921 | { |
| 922 | return serializePreservingVisualAppearanceInternal(selection.start(), selection.end(), nodes, resolveURLs, serializeComposedTree, |
| 923 | AnnotateForInterchange::Yes, ConvertBlocksToInlines::No, MSOListMode::DoNotPreserve); |
| 924 | } |
| 925 | |
| 926 | |
| 927 | static bool shouldPreserveMSOLists(const String& markup) |
| 928 | { |
| 929 | if (!markup.startsWith("<html xmlns:" )) |
| 930 | return false; |
| 931 | auto tagClose = markup.find('>'); |
| 932 | if (tagClose == notFound) |
| 933 | return false; |
| 934 | auto htmlTag = markup.substring(0, tagClose); |
| 935 | return htmlTag.contains("xmlns:o=\"urn:schemas-microsoft-com:office:office\"" ) |
| 936 | && htmlTag.contains("xmlns:w=\"urn:schemas-microsoft-com:office:word\"" ); |
| 937 | } |
| 938 | |
| 939 | String sanitizedMarkupForFragmentInDocument(Ref<DocumentFragment>&& fragment, Document& document, MSOListQuirks msoListQuirks, const String& originalMarkup) |
| 940 | { |
| 941 | MSOListMode msoListMode = msoListQuirks == MSOListQuirks::CheckIfNeeded && shouldPreserveMSOLists(originalMarkup) |
| 942 | ? MSOListMode::Preserve : MSOListMode::DoNotPreserve; |
| 943 | |
| 944 | auto bodyElement = makeRefPtr(document.body()); |
| 945 | ASSERT(bodyElement); |
| 946 | bodyElement->appendChild(fragment.get()); |
| 947 | |
| 948 | // SerializeComposedTree::No because there can't be a shadow tree in the pasted fragment. |
| 949 | auto result = serializePreservingVisualAppearanceInternal(firstPositionInNode(bodyElement.get()), lastPositionInNode(bodyElement.get()), nullptr, |
| 950 | ResolveURLs::YesExcludingLocalFileURLsForPrivacy, SerializeComposedTree::No, AnnotateForInterchange::Yes, ConvertBlocksToInlines::No, msoListMode); |
| 951 | |
| 952 | if (msoListMode == MSOListMode::Preserve) { |
| 953 | StringBuilder builder; |
| 954 | builder.appendLiteral("<html xmlns:o=\"urn:schemas-microsoft-com:office:office\"\n" |
| 955 | "xmlns:w=\"urn:schemas-microsoft-com:office:word\"\n" |
| 956 | "xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\"\n" |
| 957 | "xmlns=\"http://www.w3.org/TR/REC-html40\">" ); |
| 958 | builder.append(result); |
| 959 | builder.appendLiteral("</html>" ); |
| 960 | return builder.toString(); |
| 961 | } |
| 962 | |
| 963 | return result; |
| 964 | } |
| 965 | |
| 966 | static void restoreAttachmentElementsInFragment(DocumentFragment& fragment) |
| 967 | { |
| 968 | #if ENABLE(ATTACHMENT_ELEMENT) |
| 969 | if (!RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled()) |
| 970 | return; |
| 971 | |
| 972 | // When creating a fragment we must strip the webkit-attachment-path attribute after restoring the File object. |
| 973 | Vector<Ref<HTMLAttachmentElement>> attachments; |
| 974 | for (auto& attachment : descendantsOfType<HTMLAttachmentElement>(fragment)) |
| 975 | attachments.append(attachment); |
| 976 | |
| 977 | for (auto& attachment : attachments) { |
| 978 | attachment->setUniqueIdentifier(attachment->attributeWithoutSynchronization(webkitattachmentidAttr)); |
| 979 | |
| 980 | auto attachmentPath = attachment->attachmentPath(); |
| 981 | auto blobURL = attachment->blobURL(); |
| 982 | if (!attachmentPath.isEmpty()) |
| 983 | attachment->setFile(File::create(attachmentPath)); |
| 984 | else if (!blobURL.isEmpty()) |
| 985 | attachment->setFile(File::deserialize({ }, blobURL, attachment->attachmentType(), attachment->attachmentTitle())); |
| 986 | |
| 987 | // Remove temporary attributes that were previously added in StyledMarkupAccumulator::appendCustomAttributes. |
| 988 | attachment->removeAttribute(webkitattachmentidAttr); |
| 989 | attachment->removeAttribute(webkitattachmentpathAttr); |
| 990 | attachment->removeAttribute(webkitattachmentbloburlAttr); |
| 991 | } |
| 992 | |
| 993 | Vector<Ref<HTMLImageElement>> images; |
| 994 | for (auto& image : descendantsOfType<HTMLImageElement>(fragment)) |
| 995 | images.append(image); |
| 996 | |
| 997 | for (auto& image : images) { |
| 998 | auto attachmentIdentifier = image->attributeWithoutSynchronization(webkitattachmentidAttr); |
| 999 | if (attachmentIdentifier.isEmpty()) |
| 1000 | continue; |
| 1001 | |
| 1002 | auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, *fragment.ownerDocument()); |
| 1003 | attachment->setUniqueIdentifier(attachmentIdentifier); |
| 1004 | image->setAttachmentElement(WTFMove(attachment)); |
| 1005 | image->removeAttribute(webkitattachmentidAttr); |
| 1006 | } |
| 1007 | #else |
| 1008 | UNUSED_PARAM(fragment); |
| 1009 | #endif |
| 1010 | } |
| 1011 | |
| 1012 | Ref<DocumentFragment> createFragmentFromMarkup(Document& document, const String& markup, const String& baseURL, ParserContentPolicy parserContentPolicy) |
| 1013 | { |
| 1014 | // We use a fake body element here to trick the HTML parser into using the InBody insertion mode. |
| 1015 | auto fakeBody = HTMLBodyElement::create(document); |
| 1016 | auto fragment = DocumentFragment::create(document); |
| 1017 | |
| 1018 | fragment->parseHTML(markup, fakeBody.ptr(), parserContentPolicy); |
| 1019 | restoreAttachmentElementsInFragment(fragment); |
| 1020 | if (!baseURL.isEmpty() && baseURL != WTF::blankURL() && baseURL != document.baseURL()) |
| 1021 | completeURLs(fragment.ptr(), baseURL); |
| 1022 | |
| 1023 | return fragment; |
| 1024 | } |
| 1025 | |
| 1026 | String serializeFragment(const Node& node, SerializedNodes root, Vector<Node*>* nodes, ResolveURLs urlsToResolve, Vector<QualifiedName>* tagNamesToSkip, SerializationSyntax serializationSyntax) |
| 1027 | { |
| 1028 | MarkupAccumulator accumulator(nodes, urlsToResolve, serializationSyntax); |
| 1029 | return accumulator.serializeNodes(const_cast<Node&>(node), root, tagNamesToSkip); |
| 1030 | } |
| 1031 | |
| 1032 | static void fillContainerFromString(ContainerNode& paragraph, const String& string) |
| 1033 | { |
| 1034 | Document& document = paragraph.document(); |
| 1035 | |
| 1036 | if (string.isEmpty()) { |
| 1037 | paragraph.appendChild(createBlockPlaceholderElement(document)); |
| 1038 | return; |
| 1039 | } |
| 1040 | |
| 1041 | ASSERT(string.find('\n') == notFound); |
| 1042 | |
| 1043 | Vector<String> tabList = string.splitAllowingEmptyEntries('\t'); |
| 1044 | String tabText = emptyString(); |
| 1045 | bool first = true; |
| 1046 | size_t numEntries = tabList.size(); |
| 1047 | for (size_t i = 0; i < numEntries; ++i) { |
| 1048 | const String& s = tabList[i]; |
| 1049 | |
| 1050 | // append the non-tab textual part |
| 1051 | if (!s.isEmpty()) { |
| 1052 | if (!tabText.isEmpty()) { |
| 1053 | paragraph.appendChild(createTabSpanElement(document, tabText)); |
| 1054 | tabText = emptyString(); |
| 1055 | } |
| 1056 | Ref<Node> textNode = document.createTextNode(stringWithRebalancedWhitespace(s, first, i + 1 == numEntries)); |
| 1057 | paragraph.appendChild(textNode); |
| 1058 | } |
| 1059 | |
| 1060 | // there is a tab after every entry, except the last entry |
| 1061 | // (if the last character is a tab, the list gets an extra empty entry) |
| 1062 | if (i + 1 != numEntries) |
| 1063 | tabText.append('\t'); |
| 1064 | else if (!tabText.isEmpty()) |
| 1065 | paragraph.appendChild(createTabSpanElement(document, tabText)); |
| 1066 | |
| 1067 | first = false; |
| 1068 | } |
| 1069 | } |
| 1070 | |
| 1071 | bool isPlainTextMarkup(Node* node) |
| 1072 | { |
| 1073 | ASSERT(node); |
| 1074 | if (!is<HTMLDivElement>(*node)) |
| 1075 | return false; |
| 1076 | |
| 1077 | HTMLDivElement& element = downcast<HTMLDivElement>(*node); |
| 1078 | if (element.hasAttributes()) |
| 1079 | return false; |
| 1080 | |
| 1081 | Node* firstChild = element.firstChild(); |
| 1082 | if (!firstChild) |
| 1083 | return false; |
| 1084 | |
| 1085 | Node* secondChild = firstChild->nextSibling(); |
| 1086 | if (!secondChild) |
| 1087 | return firstChild->isTextNode() || firstChild->firstChild(); |
| 1088 | |
| 1089 | if (secondChild->nextSibling()) |
| 1090 | return false; |
| 1091 | |
| 1092 | return isTabSpanTextNode(firstChild->firstChild()) && secondChild->isTextNode(); |
| 1093 | } |
| 1094 | |
| 1095 | static bool contextPreservesNewline(const Range& context) |
| 1096 | { |
| 1097 | VisiblePosition position(context.startPosition()); |
| 1098 | Node* container = position.deepEquivalent().containerNode(); |
| 1099 | if (!container || !container->renderer()) |
| 1100 | return false; |
| 1101 | |
| 1102 | return container->renderer()->style().preserveNewline(); |
| 1103 | } |
| 1104 | |
| 1105 | Ref<DocumentFragment> createFragmentFromText(Range& context, const String& text) |
| 1106 | { |
| 1107 | Document& document = context.ownerDocument(); |
| 1108 | Ref<DocumentFragment> fragment = document.createDocumentFragment(); |
| 1109 | |
| 1110 | if (text.isEmpty()) |
| 1111 | return fragment; |
| 1112 | |
| 1113 | String string = text; |
| 1114 | string.replace("\r\n" , "\n" ); |
| 1115 | string.replace('\r', '\n'); |
| 1116 | |
| 1117 | if (contextPreservesNewline(context)) { |
| 1118 | fragment->appendChild(document.createTextNode(string)); |
| 1119 | if (string.endsWith('\n')) { |
| 1120 | auto element = HTMLBRElement::create(document); |
| 1121 | element->setAttributeWithoutSynchronization(classAttr, AppleInterchangeNewline); |
| 1122 | fragment->appendChild(element); |
| 1123 | } |
| 1124 | return fragment; |
| 1125 | } |
| 1126 | |
| 1127 | // A string with no newlines gets added inline, rather than being put into a paragraph. |
| 1128 | if (string.find('\n') == notFound) { |
| 1129 | fillContainerFromString(fragment, string); |
| 1130 | return fragment; |
| 1131 | } |
| 1132 | |
| 1133 | // Break string into paragraphs. Extra line breaks turn into empty paragraphs. |
| 1134 | Node* blockNode = enclosingBlock(context.firstNode()); |
| 1135 | Element* block = downcast<Element>(blockNode); |
| 1136 | bool useClonesOfEnclosingBlock = blockNode |
| 1137 | && blockNode->isElementNode() |
| 1138 | && !block->hasTagName(bodyTag) |
| 1139 | && !block->hasTagName(htmlTag) |
| 1140 | && block != editableRootForPosition(context.startPosition()); |
| 1141 | bool useLineBreak = enclosingTextFormControl(context.startPosition()); |
| 1142 | |
| 1143 | Vector<String> list = string.splitAllowingEmptyEntries('\n'); |
| 1144 | size_t numLines = list.size(); |
| 1145 | for (size_t i = 0; i < numLines; ++i) { |
| 1146 | const String& s = list[i]; |
| 1147 | |
| 1148 | RefPtr<Element> element; |
| 1149 | if (s.isEmpty() && i + 1 == numLines) { |
| 1150 | // For last line, use the "magic BR" rather than a P. |
| 1151 | element = HTMLBRElement::create(document); |
| 1152 | element->setAttributeWithoutSynchronization(classAttr, AppleInterchangeNewline); |
| 1153 | } else if (useLineBreak) { |
| 1154 | element = HTMLBRElement::create(document); |
| 1155 | fillContainerFromString(fragment, s); |
| 1156 | } else { |
| 1157 | if (useClonesOfEnclosingBlock) |
| 1158 | element = block->cloneElementWithoutChildren(document); |
| 1159 | else |
| 1160 | element = createDefaultParagraphElement(document); |
| 1161 | fillContainerFromString(*element, s); |
| 1162 | } |
| 1163 | fragment->appendChild(*element); |
| 1164 | } |
| 1165 | return fragment; |
| 1166 | } |
| 1167 | |
| 1168 | String documentTypeString(const Document& document) |
| 1169 | { |
| 1170 | DocumentType* documentType = document.doctype(); |
| 1171 | if (!documentType) |
| 1172 | return emptyString(); |
| 1173 | return serializeFragment(*documentType, SerializedNodes::SubtreeIncludingNode); |
| 1174 | } |
| 1175 | |
| 1176 | String urlToMarkup(const URL& url, const String& title) |
| 1177 | { |
| 1178 | StringBuilder markup; |
| 1179 | markup.appendLiteral("<a href=\"" ); |
| 1180 | markup.append(url.string()); |
| 1181 | markup.appendLiteral("\">" ); |
| 1182 | MarkupAccumulator::appendCharactersReplacingEntities(markup, title, 0, title.length(), EntityMaskInPCDATA); |
| 1183 | markup.appendLiteral("</a>" ); |
| 1184 | return markup.toString(); |
| 1185 | } |
| 1186 | |
| 1187 | ExceptionOr<Ref<DocumentFragment>> createFragmentForInnerOuterHTML(Element& contextElement, const String& markup, ParserContentPolicy parserContentPolicy) |
| 1188 | { |
| 1189 | auto* document = &contextElement.document(); |
| 1190 | if (contextElement.hasTagName(templateTag)) |
| 1191 | document = &document->ensureTemplateDocument(); |
| 1192 | auto fragment = DocumentFragment::create(*document); |
| 1193 | |
| 1194 | if (document->isHTMLDocument()) { |
| 1195 | fragment->parseHTML(markup, &contextElement, parserContentPolicy); |
| 1196 | return fragment; |
| 1197 | } |
| 1198 | |
| 1199 | bool wasValid = fragment->parseXML(markup, &contextElement, parserContentPolicy); |
| 1200 | if (!wasValid) |
| 1201 | return Exception { SyntaxError }; |
| 1202 | return fragment; |
| 1203 | } |
| 1204 | |
| 1205 | RefPtr<DocumentFragment> createFragmentForTransformToFragment(Document& outputDoc, const String& sourceString, const String& sourceMIMEType) |
| 1206 | { |
| 1207 | RefPtr<DocumentFragment> fragment = outputDoc.createDocumentFragment(); |
| 1208 | |
| 1209 | if (sourceMIMEType == "text/html" ) { |
| 1210 | // As far as I can tell, there isn't a spec for how transformToFragment is supposed to work. |
| 1211 | // Based on the documentation I can find, it looks like we want to start parsing the fragment in the InBody insertion mode. |
| 1212 | // Unfortunately, that's an implementation detail of the parser. |
| 1213 | // We achieve that effect here by passing in a fake body element as context for the fragment. |
| 1214 | auto fakeBody = HTMLBodyElement::create(outputDoc); |
| 1215 | fragment->parseHTML(sourceString, fakeBody.ptr()); |
| 1216 | } else if (sourceMIMEType == "text/plain" ) |
| 1217 | fragment->parserAppendChild(Text::create(outputDoc, sourceString)); |
| 1218 | else { |
| 1219 | bool successfulParse = fragment->parseXML(sourceString, 0); |
| 1220 | if (!successfulParse) |
| 1221 | return nullptr; |
| 1222 | } |
| 1223 | |
| 1224 | // FIXME: Do we need to mess with URLs here? |
| 1225 | |
| 1226 | return fragment; |
| 1227 | } |
| 1228 | |
| 1229 | Ref<DocumentFragment> createFragmentForImageAndURL(Document& document, const String& url) |
| 1230 | { |
| 1231 | auto imageElement = HTMLImageElement::create(document); |
| 1232 | imageElement->setAttributeWithoutSynchronization(HTMLNames::srcAttr, url); |
| 1233 | |
| 1234 | auto fragment = document.createDocumentFragment(); |
| 1235 | fragment->appendChild(imageElement); |
| 1236 | |
| 1237 | return fragment; |
| 1238 | } |
| 1239 | |
| 1240 | static Vector<Ref<HTMLElement>> collectElementsToRemoveFromFragment(ContainerNode& container) |
| 1241 | { |
| 1242 | Vector<Ref<HTMLElement>> toRemove; |
| 1243 | for (auto& element : childrenOfType<HTMLElement>(container)) { |
| 1244 | if (is<HTMLHtmlElement>(element)) { |
| 1245 | toRemove.append(element); |
| 1246 | collectElementsToRemoveFromFragment(element); |
| 1247 | continue; |
| 1248 | } |
| 1249 | if (is<HTMLHeadElement>(element) || is<HTMLBodyElement>(element)) |
| 1250 | toRemove.append(element); |
| 1251 | } |
| 1252 | return toRemove; |
| 1253 | } |
| 1254 | |
| 1255 | static void removeElementFromFragmentPreservingChildren(DocumentFragment& fragment, HTMLElement& element) |
| 1256 | { |
| 1257 | RefPtr<Node> nextChild; |
| 1258 | for (RefPtr<Node> child = element.firstChild(); child; child = nextChild) { |
| 1259 | nextChild = child->nextSibling(); |
| 1260 | element.removeChild(*child); |
| 1261 | fragment.insertBefore(*child, &element); |
| 1262 | } |
| 1263 | fragment.removeChild(element); |
| 1264 | } |
| 1265 | |
| 1266 | ExceptionOr<Ref<DocumentFragment>> createContextualFragment(Element& element, const String& markup, ParserContentPolicy parserContentPolicy) |
| 1267 | { |
| 1268 | auto result = createFragmentForInnerOuterHTML(element, markup, parserContentPolicy); |
| 1269 | if (result.hasException()) |
| 1270 | return result.releaseException(); |
| 1271 | |
| 1272 | auto fragment = result.releaseReturnValue(); |
| 1273 | |
| 1274 | // We need to pop <html> and <body> elements and remove <head> to |
| 1275 | // accommodate folks passing complete HTML documents to make the |
| 1276 | // child of an element. |
| 1277 | auto toRemove = collectElementsToRemoveFromFragment(fragment); |
| 1278 | for (auto& element : toRemove) |
| 1279 | removeElementFromFragmentPreservingChildren(fragment, element); |
| 1280 | |
| 1281 | return fragment; |
| 1282 | } |
| 1283 | |
| 1284 | static inline bool hasOneChild(ContainerNode& node) |
| 1285 | { |
| 1286 | Node* firstChild = node.firstChild(); |
| 1287 | return firstChild && !firstChild->nextSibling(); |
| 1288 | } |
| 1289 | |
| 1290 | static inline bool hasOneTextChild(ContainerNode& node) |
| 1291 | { |
| 1292 | return hasOneChild(node) && node.firstChild()->isTextNode(); |
| 1293 | } |
| 1294 | |
| 1295 | static inline bool hasMutationEventListeners(const Document& document) |
| 1296 | { |
| 1297 | return document.hasListenerType(Document::DOMSUBTREEMODIFIED_LISTENER) |
| 1298 | || document.hasListenerType(Document::DOMNODEINSERTED_LISTENER) |
| 1299 | || document.hasListenerType(Document::DOMNODEREMOVED_LISTENER) |
| 1300 | || document.hasListenerType(Document::DOMNODEREMOVEDFROMDOCUMENT_LISTENER) |
| 1301 | || document.hasListenerType(Document::DOMCHARACTERDATAMODIFIED_LISTENER); |
| 1302 | } |
| 1303 | |
| 1304 | // We can use setData instead of replacing Text node as long as script can't observe the difference. |
| 1305 | static inline bool canUseSetDataOptimization(const Text& containerChild, const ChildListMutationScope& mutationScope) |
| 1306 | { |
| 1307 | bool authorScriptMayHaveReference = containerChild.refCount(); |
| 1308 | return !authorScriptMayHaveReference && !mutationScope.canObserve() && !hasMutationEventListeners(containerChild.document()); |
| 1309 | } |
| 1310 | |
| 1311 | ExceptionOr<void> replaceChildrenWithFragment(ContainerNode& container, Ref<DocumentFragment>&& fragment) |
| 1312 | { |
| 1313 | Ref<ContainerNode> containerNode(container); |
| 1314 | ChildListMutationScope mutation(containerNode); |
| 1315 | |
| 1316 | if (!fragment->firstChild()) { |
| 1317 | containerNode->removeChildren(); |
| 1318 | return { }; |
| 1319 | } |
| 1320 | |
| 1321 | auto* containerChild = containerNode->firstChild(); |
| 1322 | if (containerChild && !containerChild->nextSibling()) { |
| 1323 | if (is<Text>(*containerChild) && hasOneTextChild(fragment) && canUseSetDataOptimization(downcast<Text>(*containerChild), mutation)) { |
| 1324 | ASSERT(!fragment->firstChild()->refCount()); |
| 1325 | downcast<Text>(*containerChild).setData(downcast<Text>(*fragment->firstChild()).data()); |
| 1326 | return { }; |
| 1327 | } |
| 1328 | |
| 1329 | return containerNode->replaceChild(fragment, *containerChild); |
| 1330 | } |
| 1331 | |
| 1332 | containerNode->removeChildren(); |
| 1333 | return containerNode->appendChild(fragment); |
| 1334 | } |
| 1335 | |
| 1336 | } |
| 1337 | |