1/*
2 * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
3 * Copyright (C) 2008, 2009, 2010, 2011 Google Inc. All rights reserved.
4 * Copyright (C) 2011 Igalia S.L.
5 * Copyright (C) 2011 Motorola Mobility. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
20 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "config.h"
30#include "markup.h"
31
32#include "ArchiveResource.h"
33#include "CSSPrimitiveValue.h"
34#include "CSSPropertyNames.h"
35#include "CSSValue.h"
36#include "CSSValueKeywords.h"
37#include "CacheStorageProvider.h"
38#include "ChildListMutationScope.h"
39#include "Comment.h"
40#include "ComposedTreeIterator.h"
41#include "DocumentFragment.h"
42#include "DocumentLoader.h"
43#include "DocumentType.h"
44#include "Editing.h"
45#include "Editor.h"
46#include "EditorClient.h"
47#include "ElementIterator.h"
48#include "EmptyClients.h"
49#include "File.h"
50#include "Frame.h"
51#include "FrameLoader.h"
52#include "HTMLAttachmentElement.h"
53#include "HTMLBRElement.h"
54#include "HTMLBodyElement.h"
55#include "HTMLDivElement.h"
56#include "HTMLHeadElement.h"
57#include "HTMLHtmlElement.h"
58#include "HTMLImageElement.h"
59#include "HTMLNames.h"
60#include "HTMLStyleElement.h"
61#include "HTMLTableElement.h"
62#include "HTMLTextAreaElement.h"
63#include "HTMLTextFormControlElement.h"
64#include "LibWebRTCProvider.h"
65#include "MarkupAccumulator.h"
66#include "NodeList.h"
67#include "Page.h"
68#include "PageConfiguration.h"
69#include "Range.h"
70#include "RenderBlock.h"
71#include "RuntimeEnabledFeatures.h"
72#include "Settings.h"
73#include "SocketProvider.h"
74#include "StyleProperties.h"
75#include "TextIterator.h"
76#include "TypedElementDescendantIterator.h"
77#include "VisibleSelection.h"
78#include "VisibleUnits.h"
79#include <wtf/StdLibExtras.h>
80#include <wtf/URL.h>
81#include <wtf/URLParser.h>
82#include <wtf/text/StringBuilder.h>
83
84namespace WebCore {
85
86using namespace HTMLNames;
87
88static bool propertyMissingOrEqualToNone(StyleProperties*, CSSPropertyID);
89
90class AttributeChange {
91public:
92 AttributeChange()
93 : m_name(nullAtom(), nullAtom(), nullAtom())
94 {
95 }
96
97 AttributeChange(Element* element, const QualifiedName& name, const String& value)
98 : m_element(element), m_name(name), m_value(value)
99 {
100 }
101
102 void apply()
103 {
104 m_element->setAttribute(m_name, m_value);
105 }
106
107private:
108 RefPtr<Element> m_element;
109 QualifiedName m_name;
110 String m_value;
111};
112
113static void completeURLs(DocumentFragment* fragment, const String& baseURL)
114{
115 Vector<AttributeChange> changes;
116
117 URL parsedBaseURL({ }, baseURL);
118
119 for (auto& element : descendantsOfType<Element>(*fragment)) {
120 if (!element.hasAttributes())
121 continue;
122 for (const Attribute& attribute : element.attributesIterator()) {
123 if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty())
124 changes.append(AttributeChange(&element, attribute.name(), element.completeURLsInAttributeValue(parsedBaseURL, attribute)));
125 }
126 }
127
128 for (auto& change : changes)
129 change.apply();
130}
131
132void replaceSubresourceURLs(Ref<DocumentFragment>&& fragment, HashMap<AtomicString, AtomicString>&& replacementMap)
133{
134 Vector<AttributeChange> changes;
135 for (auto& element : descendantsOfType<Element>(fragment)) {
136 if (!element.hasAttributes())
137 continue;
138 for (const Attribute& attribute : element.attributesIterator()) {
139 // FIXME: This won't work for srcset.
140 if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty()) {
141 auto replacement = replacementMap.get(attribute.value());
142 if (!replacement.isNull())
143 changes.append({ &element, attribute.name(), replacement });
144 }
145 }
146 }
147 for (auto& change : changes)
148 change.apply();
149}
150
151struct ElementAttribute {
152 Ref<Element> element;
153 QualifiedName attributeName;
154};
155
156void removeSubresourceURLAttributes(Ref<DocumentFragment>&& fragment, WTF::Function<bool(const URL&)> shouldRemoveURL)
157{
158 Vector<ElementAttribute> attributesToRemove;
159 for (auto& element : descendantsOfType<Element>(fragment)) {
160 if (!element.hasAttributes())
161 continue;
162 for (const Attribute& attribute : element.attributesIterator()) {
163 // FIXME: This won't work for srcset.
164 if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty()) {
165 URL url({ }, attribute.value());
166 if (shouldRemoveURL(url))
167 attributesToRemove.append({ element, attribute.name() });
168 }
169 }
170 }
171 for (auto& item : attributesToRemove)
172 item.element->removeAttribute(item.attributeName);
173}
174
175std::unique_ptr<Page> createPageForSanitizingWebContent()
176{
177 auto pageConfiguration = pageConfigurationWithEmptyClients();
178
179 auto page = std::make_unique<Page>(WTFMove(pageConfiguration));
180 page->settings().setMediaEnabled(false);
181 page->settings().setScriptEnabled(false);
182 page->settings().setPluginsEnabled(false);
183 page->settings().setAcceleratedCompositingEnabled(false);
184
185 Frame& frame = page->mainFrame();
186 frame.setView(FrameView::create(frame, IntSize { 800, 600 }));
187 frame.init();
188
189 FrameLoader& loader = frame.loader();
190 static char markup[] = "<!DOCTYPE html><html><body></body></html>";
191 ASSERT(loader.activeDocumentLoader());
192 auto& writer = loader.activeDocumentLoader()->writer();
193 writer.setMIMEType("text/html");
194 writer.begin();
195 writer.insertDataSynchronously(String(markup));
196 writer.end();
197 RELEASE_ASSERT(page->mainFrame().document()->body());
198
199 return page;
200}
201
202String sanitizeMarkup(const String& rawHTML, MSOListQuirks msoListQuirks, Optional<WTF::Function<void(DocumentFragment&)>> fragmentSanitizer)
203{
204 auto page = createPageForSanitizingWebContent();
205 Document* stagingDocument = page->mainFrame().document();
206 ASSERT(stagingDocument);
207
208 auto fragment = createFragmentFromMarkup(*stagingDocument, rawHTML, emptyString(), DisallowScriptingAndPluginContent);
209
210 if (fragmentSanitizer)
211 (*fragmentSanitizer)(fragment);
212
213 return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, rawHTML);
214}
215
216enum class MSOListMode { Preserve, DoNotPreserve };
217class StyledMarkupAccumulator final : public MarkupAccumulator {
218public:
219 enum RangeFullySelectsNode { DoesFullySelectNode, DoesNotFullySelectNode };
220
221 StyledMarkupAccumulator(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs, SerializeComposedTree,
222 AnnotateForInterchange, MSOListMode, bool needsPositionStyleConversion, Node* highestNodeToBeSerialized = nullptr);
223
224 Node* serializeNodes(const Position& start, const Position& end);
225 void wrapWithNode(Node&, bool convertBlocksToInlines = false, RangeFullySelectsNode = DoesFullySelectNode);
226 void wrapWithStyleNode(StyleProperties*, Document&, bool isBlock = false);
227 String takeResults();
228
229 bool needRelativeStyleWrapper() const { return m_needRelativeStyleWrapper; }
230 bool needClearingDiv() const { return m_needClearingDiv; }
231
232 using MarkupAccumulator::appendString;
233
234 ContainerNode* parentNode(Node& node)
235 {
236 if (UNLIKELY(m_useComposedTree))
237 return node.parentInComposedTree();
238 return node.parentOrShadowHostNode();
239 }
240
241private:
242 void appendStyleNodeOpenTag(StringBuilder&, StyleProperties*, Document&, bool isBlock = false);
243 const String& styleNodeCloseTag(bool isBlock = false);
244
245 String renderedTextRespectingRange(const Text&);
246 String textContentRespectingRange(const Text&);
247
248 bool shouldPreserveMSOListStyleForElement(const Element&);
249
250 void appendStartTag(StringBuilder& out, const Element&, bool addDisplayInline, RangeFullySelectsNode);
251 void appendEndTag(StringBuilder& out, const Element&) override;
252 void appendCustomAttributes(StringBuilder&, const Element&, Namespaces*) override;
253
254 void appendText(StringBuilder& out, const Text&) override;
255 void appendStartTag(StringBuilder& out, const Element& element, Namespaces*) override
256 {
257 appendStartTag(out, element, false, DoesFullySelectNode);
258 }
259
260 Node* firstChild(Node& node)
261 {
262 if (UNLIKELY(m_useComposedTree))
263 return firstChildInComposedTreeIgnoringUserAgentShadow(node);
264 return node.firstChild();
265 }
266
267 Node* nextSibling(Node& node)
268 {
269 if (UNLIKELY(m_useComposedTree))
270 return nextSiblingInComposedTreeIgnoringUserAgentShadow(node);
271 return node.nextSibling();
272 }
273
274 Node* nextSkippingChildren(Node& node)
275 {
276 if (UNLIKELY(m_useComposedTree))
277 return nextSkippingChildrenInComposedTreeIgnoringUserAgentShadow(node);
278 return NodeTraversal::nextSkippingChildren(node);
279 }
280
281 bool hasChildNodes(Node& node)
282 {
283 if (UNLIKELY(m_useComposedTree))
284 return firstChildInComposedTreeIgnoringUserAgentShadow(node);
285 return node.hasChildNodes();
286 }
287
288 bool isDescendantOf(Node& node, Node& possibleAncestor)
289 {
290 if (UNLIKELY(m_useComposedTree))
291 return node.isDescendantOrShadowDescendantOf(&possibleAncestor);
292 return node.isDescendantOf(&possibleAncestor);
293 }
294
295 enum class NodeTraversalMode { EmitString, DoNotEmitString };
296 Node* traverseNodesForSerialization(Node* startNode, Node* pastEnd, NodeTraversalMode);
297
298 bool appendNodeToPreserveMSOList(Node&);
299
300 bool shouldAnnotate()
301 {
302 return m_annotate == AnnotateForInterchange::Yes;
303 }
304
305 bool shouldApplyWrappingStyle(const Node& node) const
306 {
307 return m_highestNodeToBeSerialized && m_highestNodeToBeSerialized->parentNode() == node.parentNode() && m_wrappingStyle && m_wrappingStyle->style();
308 }
309
310 Position m_start;
311 Position m_end;
312 Vector<String> m_reversedPrecedingMarkup;
313 const AnnotateForInterchange m_annotate;
314 RefPtr<Node> m_highestNodeToBeSerialized;
315 RefPtr<EditingStyle> m_wrappingStyle;
316 bool m_useComposedTree;
317 bool m_needsPositionStyleConversion;
318 bool m_needRelativeStyleWrapper { false };
319 bool m_needClearingDiv { false };
320 bool m_shouldPreserveMSOList;
321 bool m_inMSOList { false };
322};
323
324inline StyledMarkupAccumulator::StyledMarkupAccumulator(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs urlsToResolve, SerializeComposedTree serializeComposedTree,
325 AnnotateForInterchange annotate, MSOListMode msoListMode, bool needsPositionStyleConversion, Node* highestNodeToBeSerialized)
326 : MarkupAccumulator(nodes, urlsToResolve)
327 , m_start(start)
328 , m_end(end)
329 , m_annotate(annotate)
330 , m_highestNodeToBeSerialized(highestNodeToBeSerialized)
331 , m_useComposedTree(serializeComposedTree == SerializeComposedTree::Yes)
332 , m_needsPositionStyleConversion(needsPositionStyleConversion)
333 , m_shouldPreserveMSOList(msoListMode == MSOListMode::Preserve)
334{
335}
336
337void StyledMarkupAccumulator::wrapWithNode(Node& node, bool convertBlocksToInlines, RangeFullySelectsNode rangeFullySelectsNode)
338{
339 StringBuilder markup;
340 if (is<Element>(node))
341 appendStartTag(markup, downcast<Element>(node), convertBlocksToInlines && isBlock(&node), rangeFullySelectsNode);
342 else
343 appendNonElementNode(markup, node, nullptr);
344 m_reversedPrecedingMarkup.append(markup.toString());
345 endAppendingNode(node);
346 if (m_nodes)
347 m_nodes->append(&node);
348}
349
350void StyledMarkupAccumulator::wrapWithStyleNode(StyleProperties* style, Document& document, bool isBlock)
351{
352 StringBuilder openTag;
353 appendStyleNodeOpenTag(openTag, style, document, isBlock);
354 m_reversedPrecedingMarkup.append(openTag.toString());
355 appendString(styleNodeCloseTag(isBlock));
356}
357
358void StyledMarkupAccumulator::appendStyleNodeOpenTag(StringBuilder& out, StyleProperties* style, Document& document, bool isBlock)
359{
360 // wrappingStyleForSerialization should have removed -webkit-text-decorations-in-effect
361 ASSERT(propertyMissingOrEqualToNone(style, CSSPropertyWebkitTextDecorationsInEffect));
362 if (isBlock)
363 out.appendLiteral("<div style=\"");
364 else
365 out.appendLiteral("<span style=\"");
366 appendAttributeValue(out, style->asText(), document.isHTMLDocument());
367 out.appendLiteral("\">");
368}
369
370const String& StyledMarkupAccumulator::styleNodeCloseTag(bool isBlock)
371{
372 static NeverDestroyed<const String> divClose(MAKE_STATIC_STRING_IMPL("</div>"));
373 static NeverDestroyed<const String> styleSpanClose(MAKE_STATIC_STRING_IMPL("</span>"));
374 return isBlock ? divClose : styleSpanClose;
375}
376
377String StyledMarkupAccumulator::takeResults()
378{
379 StringBuilder result;
380 result.reserveCapacity(totalLength(m_reversedPrecedingMarkup) + length());
381
382 for (size_t i = m_reversedPrecedingMarkup.size(); i > 0; --i)
383 result.append(m_reversedPrecedingMarkup[i - 1]);
384
385 concatenateMarkup(result);
386
387 // We remove '\0' characters because they are not visibly rendered to the user.
388 return result.toString().replaceWithLiteral('\0', "");
389}
390
391void StyledMarkupAccumulator::appendText(StringBuilder& out, const Text& text)
392{
393 const bool parentIsTextarea = is<HTMLTextAreaElement>(text.parentElement());
394 const bool wrappingSpan = shouldApplyWrappingStyle(text) && !parentIsTextarea;
395 if (wrappingSpan) {
396 RefPtr<EditingStyle> wrappingStyle = m_wrappingStyle->copy();
397 // FIXME: <rdar://problem/5371536> Style rules that match pasted content can change it's appearance
398 // Make sure spans are inline style in paste side e.g. span { display: block }.
399 wrappingStyle->forceInline();
400 // FIXME: Should this be included in forceInline?
401 wrappingStyle->style()->setProperty(CSSPropertyFloat, CSSValueNone);
402
403 appendStyleNodeOpenTag(out, wrappingStyle->style(), text.document());
404 }
405
406 if (!shouldAnnotate() || parentIsTextarea) {
407 auto content = textContentRespectingRange(text);
408 appendCharactersReplacingEntities(out, content, 0, content.length(), entityMaskForText(text));
409 } else {
410 const bool useRenderedText = !enclosingElementWithTag(firstPositionInNode(const_cast<Text*>(&text)), selectTag);
411 String content = useRenderedText ? renderedTextRespectingRange(text) : textContentRespectingRange(text);
412 StringBuilder buffer;
413 appendCharactersReplacingEntities(buffer, content, 0, content.length(), EntityMaskInPCDATA);
414 out.append(convertHTMLTextToInterchangeFormat(buffer.toString(), &text));
415 }
416
417 if (wrappingSpan)
418 out.append(styleNodeCloseTag());
419}
420
421String StyledMarkupAccumulator::renderedTextRespectingRange(const Text& text)
422{
423 TextIteratorBehavior behavior = TextIteratorDefaultBehavior;
424 Position start = &text == m_start.containerNode() ? m_start : firstPositionInNode(const_cast<Text*>(&text));
425 Position end;
426 if (&text == m_end.containerNode())
427 end = m_end;
428 else {
429 end = lastPositionInNode(const_cast<Text*>(&text));
430 if (!m_end.isNull())
431 behavior = TextIteratorBehavesAsIfNodesFollowing;
432 }
433
434 return plainText(Range::create(text.document(), start, end).ptr(), behavior);
435}
436
437String StyledMarkupAccumulator::textContentRespectingRange(const Text& text)
438{
439 if (m_start.isNull() && m_end.isNull())
440 return text.data();
441
442 unsigned start = 0;
443 unsigned end = std::numeric_limits<unsigned>::max();
444 if (&text == m_start.containerNode())
445 start = m_start.offsetInContainerNode();
446 if (&text == m_end.containerNode())
447 end = m_end.offsetInContainerNode();
448 ASSERT(start < end);
449 return text.data().substring(start, end - start);
450}
451
452void StyledMarkupAccumulator::appendCustomAttributes(StringBuilder& out, const Element& element, Namespaces* namespaces)
453{
454#if ENABLE(ATTACHMENT_ELEMENT)
455 if (!RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled())
456 return;
457
458 if (is<HTMLAttachmentElement>(element)) {
459 auto& attachment = downcast<HTMLAttachmentElement>(element);
460 appendAttribute(out, element, { webkitattachmentidAttr, attachment.uniqueIdentifier() }, namespaces);
461 if (auto* file = attachment.file()) {
462 // These attributes are only intended for File deserialization, and are removed from the generated attachment
463 // element after we've deserialized and set its backing File, in restoreAttachmentElementsInFragment.
464 appendAttribute(out, element, { webkitattachmentpathAttr, file->path() }, namespaces);
465 appendAttribute(out, element, { webkitattachmentbloburlAttr, file->url().string() }, namespaces);
466 }
467 } else if (is<HTMLImageElement>(element)) {
468 if (auto attachment = downcast<HTMLImageElement>(element).attachmentElement())
469 appendAttribute(out, element, { webkitattachmentidAttr, attachment->uniqueIdentifier() }, namespaces);
470 }
471#else
472 UNUSED_PARAM(out);
473 UNUSED_PARAM(element);
474 UNUSED_PARAM(namespaces);
475#endif
476}
477
478bool StyledMarkupAccumulator::shouldPreserveMSOListStyleForElement(const Element& element)
479{
480 if (m_inMSOList)
481 return true;
482 if (m_shouldPreserveMSOList) {
483 auto style = element.getAttribute(styleAttr);
484 return style.startsWith("mso-list:") || style.contains(";mso-list:") || style.contains("\nmso-list:");
485 }
486 return false;
487}
488
489void StyledMarkupAccumulator::appendStartTag(StringBuilder& out, const Element& element, bool addDisplayInline, RangeFullySelectsNode rangeFullySelectsNode)
490{
491 const bool documentIsHTML = element.document().isHTMLDocument();
492 const bool isSlotElement = is<HTMLSlotElement>(element);
493 if (UNLIKELY(isSlotElement))
494 out.append("<span");
495 else
496 appendOpenTag(out, element, nullptr);
497
498 appendCustomAttributes(out, element, nullptr);
499
500 const bool shouldAnnotateOrForceInline = element.isHTMLElement() && (shouldAnnotate() || addDisplayInline);
501 bool shouldOverrideStyleAttr = (shouldAnnotateOrForceInline || shouldApplyWrappingStyle(element) || isSlotElement) && !shouldPreserveMSOListStyleForElement(element);
502 if (element.hasAttributes()) {
503 for (const Attribute& attribute : element.attributesIterator()) {
504 // We'll handle the style attribute separately, below.
505 if (attribute.name() == styleAttr && shouldOverrideStyleAttr)
506 continue;
507 if (element.isEventHandlerAttribute(attribute) || element.isJavaScriptURLAttribute(attribute))
508 continue;
509 appendAttribute(out, element, attribute, 0);
510 }
511 }
512
513 if (shouldOverrideStyleAttr) {
514 RefPtr<EditingStyle> newInlineStyle;
515
516 if (shouldApplyWrappingStyle(element)) {
517 newInlineStyle = m_wrappingStyle->copy();
518 newInlineStyle->removePropertiesInElementDefaultStyle(*const_cast<Element*>(&element));
519 newInlineStyle->removeStyleConflictingWithStyleOfNode(*const_cast<Element*>(&element));
520 } else
521 newInlineStyle = EditingStyle::create();
522
523 if (isSlotElement)
524 newInlineStyle->addDisplayContents();
525
526 if (is<StyledElement>(element) && downcast<StyledElement>(element).inlineStyle())
527 newInlineStyle->overrideWithStyle(*downcast<StyledElement>(element).inlineStyle());
528
529 if (shouldAnnotateOrForceInline) {
530 if (shouldAnnotate())
531 newInlineStyle->mergeStyleFromRulesForSerialization(downcast<HTMLElement>(*const_cast<Element*>(&element)));
532
533 if (addDisplayInline)
534 newInlineStyle->forceInline();
535
536 if (m_needsPositionStyleConversion) {
537 m_needRelativeStyleWrapper |= newInlineStyle->convertPositionStyle();
538 m_needClearingDiv |= newInlineStyle->isFloating();
539 }
540
541 // If the node is not fully selected by the range, then we don't want to keep styles that affect its relationship to the nodes around it
542 // only the ones that affect it and the nodes within it.
543 if (rangeFullySelectsNode == DoesNotFullySelectNode && newInlineStyle->style())
544 newInlineStyle->style()->removeProperty(CSSPropertyFloat);
545 }
546
547 if (!newInlineStyle->isEmpty()) {
548 out.appendLiteral(" style=\"");
549 appendAttributeValue(out, newInlineStyle->style()->asText(), documentIsHTML);
550 out.append('\"');
551 }
552 }
553
554 appendCloseTag(out, element);
555}
556
557void StyledMarkupAccumulator::appendEndTag(StringBuilder& out, const Element& element)
558{
559 if (UNLIKELY(is<HTMLSlotElement>(element)))
560 out.append("</span>");
561 else
562 MarkupAccumulator::appendEndTag(out, element);
563}
564
565Node* StyledMarkupAccumulator::serializeNodes(const Position& start, const Position& end)
566{
567 ASSERT(comparePositions(start, end) <= 0);
568 auto startNode = start.firstNode();
569 Node* pastEnd = end.computeNodeAfterPosition();
570 if (!pastEnd && end.containerNode())
571 pastEnd = nextSkippingChildren(*end.containerNode());
572
573 if (!m_highestNodeToBeSerialized) {
574 Node* lastClosed = traverseNodesForSerialization(startNode.get(), pastEnd, NodeTraversalMode::DoNotEmitString);
575 m_highestNodeToBeSerialized = lastClosed;
576 }
577
578 if (m_highestNodeToBeSerialized && m_highestNodeToBeSerialized->parentNode())
579 m_wrappingStyle = EditingStyle::wrappingStyleForSerialization(*m_highestNodeToBeSerialized->parentNode(), shouldAnnotate());
580
581 return traverseNodesForSerialization(startNode.get(), pastEnd, NodeTraversalMode::EmitString);
582}
583
584Node* StyledMarkupAccumulator::traverseNodesForSerialization(Node* startNode, Node* pastEnd, NodeTraversalMode traversalMode)
585{
586 const bool shouldEmit = traversalMode == NodeTraversalMode::EmitString;
587
588 m_inMSOList = false;
589
590 unsigned depth = 0;
591 auto enterNode = [&] (Node& node) {
592 if (UNLIKELY(m_shouldPreserveMSOList) && shouldEmit) {
593 if (appendNodeToPreserveMSOList(node))
594 return false;
595 }
596
597 bool isDisplayContents = is<Element>(node) && downcast<Element>(node).hasDisplayContents();
598 if (!node.renderer() && !isDisplayContents && !enclosingElementWithTag(firstPositionInOrBeforeNode(&node), selectTag))
599 return false;
600
601 ++depth;
602 if (shouldEmit)
603 startAppendingNode(node);
604
605 return true;
606 };
607
608 Node* lastClosed = nullptr;
609 auto exitNode = [&] (Node& node) {
610 bool closing = depth;
611 if (depth)
612 --depth;
613 if (shouldEmit) {
614 if (closing)
615 endAppendingNode(node);
616 else
617 wrapWithNode(node);
618 }
619 lastClosed = &node;
620 };
621
622 Node* lastNode = nullptr;
623 Node* next = nullptr;
624 for (auto* n = startNode; n != pastEnd; lastNode = n, n = next) {
625
626 Vector<Node*, 8> exitedAncestors;
627 next = nullptr;
628 if (auto* child = firstChild(*n))
629 next = child;
630 else if (auto* sibling = nextSibling(*n))
631 next = sibling;
632 else {
633 for (auto* ancestor = parentNode(*n); ancestor; ancestor = parentNode(*ancestor)) {
634 exitedAncestors.append(ancestor);
635 if (auto* sibling = nextSibling(*ancestor)) {
636 next = sibling;
637 break;
638 }
639 }
640 }
641 ASSERT(next || !pastEnd);
642
643 if (isBlock(n) && canHaveChildrenForEditing(*n) && next == pastEnd) {
644 // Don't write out empty block containers that aren't fully selected.
645 continue;
646 }
647
648 if (!enterNode(*n)) {
649 next = nextSkippingChildren(*n);
650 // Don't skip over pastEnd.
651 if (pastEnd && isDescendantOf(*pastEnd, *n))
652 next = pastEnd;
653 ASSERT(next || !pastEnd);
654 } else {
655 if (!hasChildNodes(*n))
656 exitNode(*n);
657 }
658
659 for (auto* ancestor : exitedAncestors) {
660 if (!depth && next == pastEnd)
661 break;
662 exitNode(*ancestor);
663 }
664 }
665
666 ASSERT(lastNode || !depth);
667 if (depth) {
668 for (auto* ancestor = parentNode(pastEnd ? *pastEnd : *lastNode); ancestor && depth; ancestor = parentNode(*ancestor))
669 exitNode(*ancestor);
670 }
671
672 return lastClosed;
673}
674
675bool StyledMarkupAccumulator::appendNodeToPreserveMSOList(Node& node)
676{
677 if (is<Comment>(node)) {
678 auto& commentNode = downcast<Comment>(node);
679 if (!m_inMSOList && commentNode.data() == "[if !supportLists]")
680 m_inMSOList = true;
681 else if (m_inMSOList && commentNode.data() == "[endif]")
682 m_inMSOList = false;
683 else
684 return false;
685 startAppendingNode(commentNode);
686 return true;
687 }
688 if (is<HTMLStyleElement>(node)) {
689 auto* firstChild = node.firstChild();
690 if (!is<Text>(firstChild))
691 return false;
692
693 auto& textChild = downcast<Text>(*firstChild);
694 auto& styleContent = textChild.data();
695
696 const auto msoStyleDefinitionsStart = styleContent.find("/* Style Definitions */");
697 const auto msoListDefinitionsStart = styleContent.find("/* List Definitions */");
698 const auto lastListItem = styleContent.reverseFind("\n@list");
699 if (msoListDefinitionsStart == notFound || lastListItem == notFound)
700 return false;
701 const auto start = msoStyleDefinitionsStart != notFound && msoStyleDefinitionsStart < msoListDefinitionsStart ? msoStyleDefinitionsStart : msoListDefinitionsStart;
702
703 const auto msoListDefinitionsEnd = styleContent.find(";}\n", lastListItem);
704 if (msoListDefinitionsEnd == notFound || start >= msoListDefinitionsEnd)
705 return false;
706
707 appendString("<head><style class=\"" WebKitMSOListQuirksStyle "\">\n<!--\n");
708 appendStringView(StringView(textChild.data()).substring(start, msoListDefinitionsEnd - start + 3));
709 appendString("\n-->\n</style></head>");
710
711 return true;
712 }
713 return false;
714}
715
716static Node* ancestorToRetainStructureAndAppearanceForBlock(Node* commonAncestorBlock)
717{
718 if (!commonAncestorBlock)
719 return nullptr;
720
721 if (commonAncestorBlock->hasTagName(tbodyTag) || commonAncestorBlock->hasTagName(trTag)) {
722 ContainerNode* table = commonAncestorBlock->parentNode();
723 while (table && !is<HTMLTableElement>(*table))
724 table = table->parentNode();
725
726 return table;
727 }
728
729 if (isNonTableCellHTMLBlockElement(commonAncestorBlock))
730 return commonAncestorBlock;
731
732 return nullptr;
733}
734
735static inline Node* ancestorToRetainStructureAndAppearance(Node* commonAncestor)
736{
737 return ancestorToRetainStructureAndAppearanceForBlock(enclosingBlock(commonAncestor));
738}
739
740static bool propertyMissingOrEqualToNone(StyleProperties* style, CSSPropertyID propertyID)
741{
742 if (!style)
743 return false;
744 RefPtr<CSSValue> value = style->getPropertyCSSValue(propertyID);
745 if (!value)
746 return true;
747 if (!is<CSSPrimitiveValue>(*value))
748 return false;
749 return downcast<CSSPrimitiveValue>(*value).valueID() == CSSValueNone;
750}
751
752static bool needInterchangeNewlineAfter(const VisiblePosition& v)
753{
754 VisiblePosition next = v.next();
755 Node* upstreamNode = next.deepEquivalent().upstream().deprecatedNode();
756 Node* downstreamNode = v.deepEquivalent().downstream().deprecatedNode();
757 // Add an interchange newline if a paragraph break is selected and a br won't already be added to the markup to represent it.
758 return isEndOfParagraph(v) && isStartOfParagraph(next) && !(upstreamNode->hasTagName(brTag) && upstreamNode == downstreamNode);
759}
760
761static RefPtr<EditingStyle> styleFromMatchedRulesAndInlineDecl(Node& node)
762{
763 if (!is<HTMLElement>(node))
764 return nullptr;
765
766 auto& element = downcast<HTMLElement>(node);
767 auto style = EditingStyle::create(element.inlineStyle());
768 style->mergeStyleFromRules(element);
769 return style;
770}
771
772static bool isElementPresentational(const Node* node)
773{
774 return node->hasTagName(uTag) || node->hasTagName(sTag) || node->hasTagName(strikeTag)
775 || node->hasTagName(iTag) || node->hasTagName(emTag) || node->hasTagName(bTag) || node->hasTagName(strongTag);
776}
777
778static Node* highestAncestorToWrapMarkup(const Position& start, const Position& end, Node& commonAncestor, AnnotateForInterchange annotate)
779{
780 Node* specialCommonAncestor = nullptr;
781 if (annotate == AnnotateForInterchange::Yes) {
782 // Include ancestors that aren't completely inside the range but are required to retain
783 // the structure and appearance of the copied markup.
784 specialCommonAncestor = ancestorToRetainStructureAndAppearance(&commonAncestor);
785
786 if (auto* parentListNode = enclosingNodeOfType(start, isListItem)) {
787 if (!editingIgnoresContent(*parentListNode) && VisibleSelection::selectionFromContentsOfNode(parentListNode) == VisibleSelection(start, end)) {
788 specialCommonAncestor = parentListNode->parentNode();
789 while (specialCommonAncestor && !isListHTMLElement(specialCommonAncestor))
790 specialCommonAncestor = specialCommonAncestor->parentNode();
791 }
792 }
793
794 // Retain the Mail quote level by including all ancestor mail block quotes.
795 if (Node* highestMailBlockquote = highestEnclosingNodeOfType(start, isMailBlockquote, CanCrossEditingBoundary))
796 specialCommonAncestor = highestMailBlockquote;
797 }
798
799 auto* checkAncestor = specialCommonAncestor ? specialCommonAncestor : &commonAncestor;
800 if (checkAncestor->renderer() && checkAncestor->renderer()->containingBlock()) {
801 Node* newSpecialCommonAncestor = highestEnclosingNodeOfType(firstPositionInNode(checkAncestor), &isElementPresentational, CanCrossEditingBoundary, checkAncestor->renderer()->containingBlock()->element());
802 if (newSpecialCommonAncestor)
803 specialCommonAncestor = newSpecialCommonAncestor;
804 }
805
806 // If a single tab is selected, commonAncestor will be a text node inside a tab span.
807 // If two or more tabs are selected, commonAncestor will be the tab span.
808 // In either case, if there is a specialCommonAncestor already, it will necessarily be above
809 // any tab span that needs to be included.
810 if (!specialCommonAncestor && isTabSpanTextNode(&commonAncestor))
811 specialCommonAncestor = commonAncestor.parentNode();
812 if (!specialCommonAncestor && isTabSpanNode(&commonAncestor))
813 specialCommonAncestor = &commonAncestor;
814
815 if (auto* enclosingAnchor = enclosingElementWithTag(firstPositionInNode(specialCommonAncestor ? specialCommonAncestor : &commonAncestor), aTag))
816 specialCommonAncestor = enclosingAnchor;
817
818 return specialCommonAncestor;
819}
820
821static String serializePreservingVisualAppearanceInternal(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs urlsToResolve, SerializeComposedTree serializeComposedTree,
822 AnnotateForInterchange annotate, ConvertBlocksToInlines convertBlocksToInlines, MSOListMode msoListMode)
823{
824 static NeverDestroyed<const String> interchangeNewlineString(MAKE_STATIC_STRING_IMPL("<br class=\"" AppleInterchangeNewline "\">"));
825
826 if (!comparePositions(start, end))
827 return emptyString();
828
829 RefPtr<Node> commonAncestor = commonShadowIncludingAncestor(start, end);
830 if (!commonAncestor)
831 return emptyString();
832
833 auto& document = *start.document();
834 document.updateLayoutIgnorePendingStylesheets();
835
836 VisiblePosition visibleStart { start };
837 VisiblePosition visibleEnd { end };
838
839 auto body = makeRefPtr(enclosingElementWithTag(firstPositionInNode(commonAncestor.get()), bodyTag));
840 RefPtr<Element> fullySelectedRoot;
841 // FIXME: Do this for all fully selected blocks, not just the body.
842 if (body && VisiblePosition(firstPositionInNode(body.get())) == visibleStart && VisiblePosition(lastPositionInNode(body.get())) == visibleEnd)
843 fullySelectedRoot = body;
844 bool needsPositionStyleConversion = body && fullySelectedRoot == body && document.settings().shouldConvertPositionStyleOnCopy();
845
846 Node* specialCommonAncestor = highestAncestorToWrapMarkup(start, end, *commonAncestor, annotate);
847
848 StyledMarkupAccumulator accumulator(start, end, nodes, urlsToResolve, serializeComposedTree, annotate, msoListMode, needsPositionStyleConversion, specialCommonAncestor);
849
850 Position startAdjustedForInterchangeNewline = start;
851 if (annotate == AnnotateForInterchange::Yes && needInterchangeNewlineAfter(visibleStart)) {
852 if (visibleStart == visibleEnd.previous())
853 return interchangeNewlineString;
854
855 accumulator.appendString(interchangeNewlineString);
856 startAdjustedForInterchangeNewline = visibleStart.next().deepEquivalent();
857
858 if (comparePositions(startAdjustedForInterchangeNewline, end) >= 0)
859 return interchangeNewlineString;
860 }
861
862 Node* lastClosed = accumulator.serializeNodes(startAdjustedForInterchangeNewline, end);
863
864 if (specialCommonAncestor && lastClosed) {
865 // Also include all of the ancestors of lastClosed up to this special ancestor.
866 for (ContainerNode* ancestor = accumulator.parentNode(*lastClosed); ancestor; ancestor = accumulator.parentNode(*ancestor)) {
867 if (ancestor == fullySelectedRoot && convertBlocksToInlines == ConvertBlocksToInlines::No) {
868 RefPtr<EditingStyle> fullySelectedRootStyle = styleFromMatchedRulesAndInlineDecl(*fullySelectedRoot);
869
870 // Bring the background attribute over, but not as an attribute because a background attribute on a div
871 // appears to have no effect.
872 if ((!fullySelectedRootStyle || !fullySelectedRootStyle->style() || !fullySelectedRootStyle->style()->getPropertyCSSValue(CSSPropertyBackgroundImage))
873 && fullySelectedRoot->hasAttributeWithoutSynchronization(backgroundAttr))
874 fullySelectedRootStyle->style()->setProperty(CSSPropertyBackgroundImage, "url('" + fullySelectedRoot->getAttribute(backgroundAttr) + "')");
875
876 if (fullySelectedRootStyle->style()) {
877 // Reset the CSS properties to avoid an assertion error in addStyleMarkup().
878 // This assertion is caused at least when we select all text of a <body> element whose
879 // 'text-decoration' property is "inherit", and copy it.
880 if (!propertyMissingOrEqualToNone(fullySelectedRootStyle->style(), CSSPropertyTextDecoration))
881 fullySelectedRootStyle->style()->setProperty(CSSPropertyTextDecoration, CSSValueNone);
882 if (!propertyMissingOrEqualToNone(fullySelectedRootStyle->style(), CSSPropertyWebkitTextDecorationsInEffect))
883 fullySelectedRootStyle->style()->setProperty(CSSPropertyWebkitTextDecorationsInEffect, CSSValueNone);
884 accumulator.wrapWithStyleNode(fullySelectedRootStyle->style(), document, true);
885 }
886 } else {
887 // Since this node and all the other ancestors are not in the selection we want to set RangeFullySelectsNode to DoesNotFullySelectNode
888 // so that styles that affect the exterior of the node are not included.
889 accumulator.wrapWithNode(*ancestor, convertBlocksToInlines == ConvertBlocksToInlines::Yes, StyledMarkupAccumulator::DoesNotFullySelectNode);
890 }
891 if (nodes)
892 nodes->append(ancestor);
893
894 if (ancestor == specialCommonAncestor)
895 break;
896 }
897 }
898
899 if (accumulator.needRelativeStyleWrapper() && needsPositionStyleConversion) {
900 if (accumulator.needClearingDiv())
901 accumulator.appendString("<div style=\"clear: both;\"></div>");
902 RefPtr<EditingStyle> positionRelativeStyle = styleFromMatchedRulesAndInlineDecl(*body);
903 positionRelativeStyle->style()->setProperty(CSSPropertyPosition, CSSValueRelative);
904 accumulator.wrapWithStyleNode(positionRelativeStyle->style(), document, true);
905 }
906
907 // FIXME: The interchange newline should be placed in the block that it's in, not after all of the content, unconditionally.
908 if (annotate == AnnotateForInterchange::Yes && needInterchangeNewlineAfter(visibleEnd.previous()))
909 accumulator.appendString(interchangeNewlineString);
910
911 return accumulator.takeResults();
912}
913
914String serializePreservingVisualAppearance(const Range& range, Vector<Node*>* nodes, AnnotateForInterchange annotate, ConvertBlocksToInlines convertBlocksToInlines, ResolveURLs urlsToReslve)
915{
916 return serializePreservingVisualAppearanceInternal(range.startPosition(), range.endPosition(), nodes, urlsToReslve, SerializeComposedTree::No,
917 annotate, convertBlocksToInlines, MSOListMode::DoNotPreserve);
918}
919
920String serializePreservingVisualAppearance(const VisibleSelection& selection, ResolveURLs resolveURLs, SerializeComposedTree serializeComposedTree, Vector<Node*>* nodes)
921{
922 return serializePreservingVisualAppearanceInternal(selection.start(), selection.end(), nodes, resolveURLs, serializeComposedTree,
923 AnnotateForInterchange::Yes, ConvertBlocksToInlines::No, MSOListMode::DoNotPreserve);
924}
925
926
927static bool shouldPreserveMSOLists(const String& markup)
928{
929 if (!markup.startsWith("<html xmlns:"))
930 return false;
931 auto tagClose = markup.find('>');
932 if (tagClose == notFound)
933 return false;
934 auto htmlTag = markup.substring(0, tagClose);
935 return htmlTag.contains("xmlns:o=\"urn:schemas-microsoft-com:office:office\"")
936 && htmlTag.contains("xmlns:w=\"urn:schemas-microsoft-com:office:word\"");
937}
938
939String sanitizedMarkupForFragmentInDocument(Ref<DocumentFragment>&& fragment, Document& document, MSOListQuirks msoListQuirks, const String& originalMarkup)
940{
941 MSOListMode msoListMode = msoListQuirks == MSOListQuirks::CheckIfNeeded && shouldPreserveMSOLists(originalMarkup)
942 ? MSOListMode::Preserve : MSOListMode::DoNotPreserve;
943
944 auto bodyElement = makeRefPtr(document.body());
945 ASSERT(bodyElement);
946 bodyElement->appendChild(fragment.get());
947
948 // SerializeComposedTree::No because there can't be a shadow tree in the pasted fragment.
949 auto result = serializePreservingVisualAppearanceInternal(firstPositionInNode(bodyElement.get()), lastPositionInNode(bodyElement.get()), nullptr,
950 ResolveURLs::YesExcludingLocalFileURLsForPrivacy, SerializeComposedTree::No, AnnotateForInterchange::Yes, ConvertBlocksToInlines::No, msoListMode);
951
952 if (msoListMode == MSOListMode::Preserve) {
953 StringBuilder builder;
954 builder.appendLiteral("<html xmlns:o=\"urn:schemas-microsoft-com:office:office\"\n"
955 "xmlns:w=\"urn:schemas-microsoft-com:office:word\"\n"
956 "xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\"\n"
957 "xmlns=\"http://www.w3.org/TR/REC-html40\">");
958 builder.append(result);
959 builder.appendLiteral("</html>");
960 return builder.toString();
961 }
962
963 return result;
964}
965
966static void restoreAttachmentElementsInFragment(DocumentFragment& fragment)
967{
968#if ENABLE(ATTACHMENT_ELEMENT)
969 if (!RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled())
970 return;
971
972 // When creating a fragment we must strip the webkit-attachment-path attribute after restoring the File object.
973 Vector<Ref<HTMLAttachmentElement>> attachments;
974 for (auto& attachment : descendantsOfType<HTMLAttachmentElement>(fragment))
975 attachments.append(attachment);
976
977 for (auto& attachment : attachments) {
978 attachment->setUniqueIdentifier(attachment->attributeWithoutSynchronization(webkitattachmentidAttr));
979
980 auto attachmentPath = attachment->attachmentPath();
981 auto blobURL = attachment->blobURL();
982 if (!attachmentPath.isEmpty())
983 attachment->setFile(File::create(attachmentPath));
984 else if (!blobURL.isEmpty())
985 attachment->setFile(File::deserialize({ }, blobURL, attachment->attachmentType(), attachment->attachmentTitle()));
986
987 // Remove temporary attributes that were previously added in StyledMarkupAccumulator::appendCustomAttributes.
988 attachment->removeAttribute(webkitattachmentidAttr);
989 attachment->removeAttribute(webkitattachmentpathAttr);
990 attachment->removeAttribute(webkitattachmentbloburlAttr);
991 }
992
993 Vector<Ref<HTMLImageElement>> images;
994 for (auto& image : descendantsOfType<HTMLImageElement>(fragment))
995 images.append(image);
996
997 for (auto& image : images) {
998 auto attachmentIdentifier = image->attributeWithoutSynchronization(webkitattachmentidAttr);
999 if (attachmentIdentifier.isEmpty())
1000 continue;
1001
1002 auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, *fragment.ownerDocument());
1003 attachment->setUniqueIdentifier(attachmentIdentifier);
1004 image->setAttachmentElement(WTFMove(attachment));
1005 image->removeAttribute(webkitattachmentidAttr);
1006 }
1007#else
1008 UNUSED_PARAM(fragment);
1009#endif
1010}
1011
1012Ref<DocumentFragment> createFragmentFromMarkup(Document& document, const String& markup, const String& baseURL, ParserContentPolicy parserContentPolicy)
1013{
1014 // We use a fake body element here to trick the HTML parser into using the InBody insertion mode.
1015 auto fakeBody = HTMLBodyElement::create(document);
1016 auto fragment = DocumentFragment::create(document);
1017
1018 fragment->parseHTML(markup, fakeBody.ptr(), parserContentPolicy);
1019 restoreAttachmentElementsInFragment(fragment);
1020 if (!baseURL.isEmpty() && baseURL != WTF::blankURL() && baseURL != document.baseURL())
1021 completeURLs(fragment.ptr(), baseURL);
1022
1023 return fragment;
1024}
1025
1026String serializeFragment(const Node& node, SerializedNodes root, Vector<Node*>* nodes, ResolveURLs urlsToResolve, Vector<QualifiedName>* tagNamesToSkip, SerializationSyntax serializationSyntax)
1027{
1028 MarkupAccumulator accumulator(nodes, urlsToResolve, serializationSyntax);
1029 return accumulator.serializeNodes(const_cast<Node&>(node), root, tagNamesToSkip);
1030}
1031
1032static void fillContainerFromString(ContainerNode& paragraph, const String& string)
1033{
1034 Document& document = paragraph.document();
1035
1036 if (string.isEmpty()) {
1037 paragraph.appendChild(createBlockPlaceholderElement(document));
1038 return;
1039 }
1040
1041 ASSERT(string.find('\n') == notFound);
1042
1043 Vector<String> tabList = string.splitAllowingEmptyEntries('\t');
1044 String tabText = emptyString();
1045 bool first = true;
1046 size_t numEntries = tabList.size();
1047 for (size_t i = 0; i < numEntries; ++i) {
1048 const String& s = tabList[i];
1049
1050 // append the non-tab textual part
1051 if (!s.isEmpty()) {
1052 if (!tabText.isEmpty()) {
1053 paragraph.appendChild(createTabSpanElement(document, tabText));
1054 tabText = emptyString();
1055 }
1056 Ref<Node> textNode = document.createTextNode(stringWithRebalancedWhitespace(s, first, i + 1 == numEntries));
1057 paragraph.appendChild(textNode);
1058 }
1059
1060 // there is a tab after every entry, except the last entry
1061 // (if the last character is a tab, the list gets an extra empty entry)
1062 if (i + 1 != numEntries)
1063 tabText.append('\t');
1064 else if (!tabText.isEmpty())
1065 paragraph.appendChild(createTabSpanElement(document, tabText));
1066
1067 first = false;
1068 }
1069}
1070
1071bool isPlainTextMarkup(Node* node)
1072{
1073 ASSERT(node);
1074 if (!is<HTMLDivElement>(*node))
1075 return false;
1076
1077 HTMLDivElement& element = downcast<HTMLDivElement>(*node);
1078 if (element.hasAttributes())
1079 return false;
1080
1081 Node* firstChild = element.firstChild();
1082 if (!firstChild)
1083 return false;
1084
1085 Node* secondChild = firstChild->nextSibling();
1086 if (!secondChild)
1087 return firstChild->isTextNode() || firstChild->firstChild();
1088
1089 if (secondChild->nextSibling())
1090 return false;
1091
1092 return isTabSpanTextNode(firstChild->firstChild()) && secondChild->isTextNode();
1093}
1094
1095static bool contextPreservesNewline(const Range& context)
1096{
1097 VisiblePosition position(context.startPosition());
1098 Node* container = position.deepEquivalent().containerNode();
1099 if (!container || !container->renderer())
1100 return false;
1101
1102 return container->renderer()->style().preserveNewline();
1103}
1104
1105Ref<DocumentFragment> createFragmentFromText(Range& context, const String& text)
1106{
1107 Document& document = context.ownerDocument();
1108 Ref<DocumentFragment> fragment = document.createDocumentFragment();
1109
1110 if (text.isEmpty())
1111 return fragment;
1112
1113 String string = text;
1114 string.replace("\r\n", "\n");
1115 string.replace('\r', '\n');
1116
1117 if (contextPreservesNewline(context)) {
1118 fragment->appendChild(document.createTextNode(string));
1119 if (string.endsWith('\n')) {
1120 auto element = HTMLBRElement::create(document);
1121 element->setAttributeWithoutSynchronization(classAttr, AppleInterchangeNewline);
1122 fragment->appendChild(element);
1123 }
1124 return fragment;
1125 }
1126
1127 // A string with no newlines gets added inline, rather than being put into a paragraph.
1128 if (string.find('\n') == notFound) {
1129 fillContainerFromString(fragment, string);
1130 return fragment;
1131 }
1132
1133 // Break string into paragraphs. Extra line breaks turn into empty paragraphs.
1134 Node* blockNode = enclosingBlock(context.firstNode());
1135 Element* block = downcast<Element>(blockNode);
1136 bool useClonesOfEnclosingBlock = blockNode
1137 && blockNode->isElementNode()
1138 && !block->hasTagName(bodyTag)
1139 && !block->hasTagName(htmlTag)
1140 && block != editableRootForPosition(context.startPosition());
1141 bool useLineBreak = enclosingTextFormControl(context.startPosition());
1142
1143 Vector<String> list = string.splitAllowingEmptyEntries('\n');
1144 size_t numLines = list.size();
1145 for (size_t i = 0; i < numLines; ++i) {
1146 const String& s = list[i];
1147
1148 RefPtr<Element> element;
1149 if (s.isEmpty() && i + 1 == numLines) {
1150 // For last line, use the "magic BR" rather than a P.
1151 element = HTMLBRElement::create(document);
1152 element->setAttributeWithoutSynchronization(classAttr, AppleInterchangeNewline);
1153 } else if (useLineBreak) {
1154 element = HTMLBRElement::create(document);
1155 fillContainerFromString(fragment, s);
1156 } else {
1157 if (useClonesOfEnclosingBlock)
1158 element = block->cloneElementWithoutChildren(document);
1159 else
1160 element = createDefaultParagraphElement(document);
1161 fillContainerFromString(*element, s);
1162 }
1163 fragment->appendChild(*element);
1164 }
1165 return fragment;
1166}
1167
1168String documentTypeString(const Document& document)
1169{
1170 DocumentType* documentType = document.doctype();
1171 if (!documentType)
1172 return emptyString();
1173 return serializeFragment(*documentType, SerializedNodes::SubtreeIncludingNode);
1174}
1175
1176String urlToMarkup(const URL& url, const String& title)
1177{
1178 StringBuilder markup;
1179 markup.appendLiteral("<a href=\"");
1180 markup.append(url.string());
1181 markup.appendLiteral("\">");
1182 MarkupAccumulator::appendCharactersReplacingEntities(markup, title, 0, title.length(), EntityMaskInPCDATA);
1183 markup.appendLiteral("</a>");
1184 return markup.toString();
1185}
1186
1187ExceptionOr<Ref<DocumentFragment>> createFragmentForInnerOuterHTML(Element& contextElement, const String& markup, ParserContentPolicy parserContentPolicy)
1188{
1189 auto* document = &contextElement.document();
1190 if (contextElement.hasTagName(templateTag))
1191 document = &document->ensureTemplateDocument();
1192 auto fragment = DocumentFragment::create(*document);
1193
1194 if (document->isHTMLDocument()) {
1195 fragment->parseHTML(markup, &contextElement, parserContentPolicy);
1196 return fragment;
1197 }
1198
1199 bool wasValid = fragment->parseXML(markup, &contextElement, parserContentPolicy);
1200 if (!wasValid)
1201 return Exception { SyntaxError };
1202 return fragment;
1203}
1204
1205RefPtr<DocumentFragment> createFragmentForTransformToFragment(Document& outputDoc, const String& sourceString, const String& sourceMIMEType)
1206{
1207 RefPtr<DocumentFragment> fragment = outputDoc.createDocumentFragment();
1208
1209 if (sourceMIMEType == "text/html") {
1210 // As far as I can tell, there isn't a spec for how transformToFragment is supposed to work.
1211 // Based on the documentation I can find, it looks like we want to start parsing the fragment in the InBody insertion mode.
1212 // Unfortunately, that's an implementation detail of the parser.
1213 // We achieve that effect here by passing in a fake body element as context for the fragment.
1214 auto fakeBody = HTMLBodyElement::create(outputDoc);
1215 fragment->parseHTML(sourceString, fakeBody.ptr());
1216 } else if (sourceMIMEType == "text/plain")
1217 fragment->parserAppendChild(Text::create(outputDoc, sourceString));
1218 else {
1219 bool successfulParse = fragment->parseXML(sourceString, 0);
1220 if (!successfulParse)
1221 return nullptr;
1222 }
1223
1224 // FIXME: Do we need to mess with URLs here?
1225
1226 return fragment;
1227}
1228
1229Ref<DocumentFragment> createFragmentForImageAndURL(Document& document, const String& url)
1230{
1231 auto imageElement = HTMLImageElement::create(document);
1232 imageElement->setAttributeWithoutSynchronization(HTMLNames::srcAttr, url);
1233
1234 auto fragment = document.createDocumentFragment();
1235 fragment->appendChild(imageElement);
1236
1237 return fragment;
1238}
1239
1240static Vector<Ref<HTMLElement>> collectElementsToRemoveFromFragment(ContainerNode& container)
1241{
1242 Vector<Ref<HTMLElement>> toRemove;
1243 for (auto& element : childrenOfType<HTMLElement>(container)) {
1244 if (is<HTMLHtmlElement>(element)) {
1245 toRemove.append(element);
1246 collectElementsToRemoveFromFragment(element);
1247 continue;
1248 }
1249 if (is<HTMLHeadElement>(element) || is<HTMLBodyElement>(element))
1250 toRemove.append(element);
1251 }
1252 return toRemove;
1253}
1254
1255static void removeElementFromFragmentPreservingChildren(DocumentFragment& fragment, HTMLElement& element)
1256{
1257 RefPtr<Node> nextChild;
1258 for (RefPtr<Node> child = element.firstChild(); child; child = nextChild) {
1259 nextChild = child->nextSibling();
1260 element.removeChild(*child);
1261 fragment.insertBefore(*child, &element);
1262 }
1263 fragment.removeChild(element);
1264}
1265
1266ExceptionOr<Ref<DocumentFragment>> createContextualFragment(Element& element, const String& markup, ParserContentPolicy parserContentPolicy)
1267{
1268 auto result = createFragmentForInnerOuterHTML(element, markup, parserContentPolicy);
1269 if (result.hasException())
1270 return result.releaseException();
1271
1272 auto fragment = result.releaseReturnValue();
1273
1274 // We need to pop <html> and <body> elements and remove <head> to
1275 // accommodate folks passing complete HTML documents to make the
1276 // child of an element.
1277 auto toRemove = collectElementsToRemoveFromFragment(fragment);
1278 for (auto& element : toRemove)
1279 removeElementFromFragmentPreservingChildren(fragment, element);
1280
1281 return fragment;
1282}
1283
1284static inline bool hasOneChild(ContainerNode& node)
1285{
1286 Node* firstChild = node.firstChild();
1287 return firstChild && !firstChild->nextSibling();
1288}
1289
1290static inline bool hasOneTextChild(ContainerNode& node)
1291{
1292 return hasOneChild(node) && node.firstChild()->isTextNode();
1293}
1294
1295static inline bool hasMutationEventListeners(const Document& document)
1296{
1297 return document.hasListenerType(Document::DOMSUBTREEMODIFIED_LISTENER)
1298 || document.hasListenerType(Document::DOMNODEINSERTED_LISTENER)
1299 || document.hasListenerType(Document::DOMNODEREMOVED_LISTENER)
1300 || document.hasListenerType(Document::DOMNODEREMOVEDFROMDOCUMENT_LISTENER)
1301 || document.hasListenerType(Document::DOMCHARACTERDATAMODIFIED_LISTENER);
1302}
1303
1304// We can use setData instead of replacing Text node as long as script can't observe the difference.
1305static inline bool canUseSetDataOptimization(const Text& containerChild, const ChildListMutationScope& mutationScope)
1306{
1307 bool authorScriptMayHaveReference = containerChild.refCount();
1308 return !authorScriptMayHaveReference && !mutationScope.canObserve() && !hasMutationEventListeners(containerChild.document());
1309}
1310
1311ExceptionOr<void> replaceChildrenWithFragment(ContainerNode& container, Ref<DocumentFragment>&& fragment)
1312{
1313 Ref<ContainerNode> containerNode(container);
1314 ChildListMutationScope mutation(containerNode);
1315
1316 if (!fragment->firstChild()) {
1317 containerNode->removeChildren();
1318 return { };
1319 }
1320
1321 auto* containerChild = containerNode->firstChild();
1322 if (containerChild && !containerChild->nextSibling()) {
1323 if (is<Text>(*containerChild) && hasOneTextChild(fragment) && canUseSetDataOptimization(downcast<Text>(*containerChild), mutation)) {
1324 ASSERT(!fragment->firstChild()->refCount());
1325 downcast<Text>(*containerChild).setData(downcast<Text>(*fragment->firstChild()).data());
1326 return { };
1327 }
1328
1329 return containerNode->replaceChild(fragment, *containerChild);
1330 }
1331
1332 containerNode->removeChildren();
1333 return containerNode->appendChild(fragment);
1334}
1335
1336}
1337