1 | /* |
2 | * Copyright (C) 2004-2017 Apple Inc. All rights reserved. |
3 | * Copyright (C) 2008, 2009, 2010, 2011 Google Inc. All rights reserved. |
4 | * Copyright (C) 2011 Igalia S.L. |
5 | * Copyright (C) 2011 Motorola Mobility. All rights reserved. |
6 | * |
7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions |
9 | * are met: |
10 | * 1. Redistributions of source code must retain the above copyright |
11 | * notice, this list of conditions and the following disclaimer. |
12 | * 2. Redistributions in binary form must reproduce the above copyright |
13 | * notice, this list of conditions and the following disclaimer in the |
14 | * documentation and/or other materials provided with the distribution. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
17 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
19 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
20 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
21 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
22 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
23 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
24 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
26 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "config.h" |
30 | #include "markup.h" |
31 | |
32 | #include "ArchiveResource.h" |
33 | #include "CSSPrimitiveValue.h" |
34 | #include "CSSPropertyNames.h" |
35 | #include "CSSValue.h" |
36 | #include "CSSValueKeywords.h" |
37 | #include "CacheStorageProvider.h" |
38 | #include "ChildListMutationScope.h" |
39 | #include "Comment.h" |
40 | #include "ComposedTreeIterator.h" |
41 | #include "DocumentFragment.h" |
42 | #include "DocumentLoader.h" |
43 | #include "DocumentType.h" |
44 | #include "Editing.h" |
45 | #include "Editor.h" |
46 | #include "EditorClient.h" |
47 | #include "ElementIterator.h" |
48 | #include "EmptyClients.h" |
49 | #include "File.h" |
50 | #include "Frame.h" |
51 | #include "FrameLoader.h" |
52 | #include "HTMLAttachmentElement.h" |
53 | #include "HTMLBRElement.h" |
54 | #include "HTMLBodyElement.h" |
55 | #include "HTMLDivElement.h" |
56 | #include "HTMLHeadElement.h" |
57 | #include "HTMLHtmlElement.h" |
58 | #include "HTMLImageElement.h" |
59 | #include "HTMLNames.h" |
60 | #include "HTMLStyleElement.h" |
61 | #include "HTMLTableElement.h" |
62 | #include "HTMLTextAreaElement.h" |
63 | #include "HTMLTextFormControlElement.h" |
64 | #include "LibWebRTCProvider.h" |
65 | #include "MarkupAccumulator.h" |
66 | #include "NodeList.h" |
67 | #include "Page.h" |
68 | #include "PageConfiguration.h" |
69 | #include "Range.h" |
70 | #include "RenderBlock.h" |
71 | #include "RuntimeEnabledFeatures.h" |
72 | #include "Settings.h" |
73 | #include "SocketProvider.h" |
74 | #include "StyleProperties.h" |
75 | #include "TextIterator.h" |
76 | #include "TypedElementDescendantIterator.h" |
77 | #include "VisibleSelection.h" |
78 | #include "VisibleUnits.h" |
79 | #include <wtf/StdLibExtras.h> |
80 | #include <wtf/URL.h> |
81 | #include <wtf/URLParser.h> |
82 | #include <wtf/text/StringBuilder.h> |
83 | |
84 | namespace WebCore { |
85 | |
86 | using namespace HTMLNames; |
87 | |
88 | static bool propertyMissingOrEqualToNone(StyleProperties*, CSSPropertyID); |
89 | |
90 | class AttributeChange { |
91 | public: |
92 | AttributeChange() |
93 | : m_name(nullAtom(), nullAtom(), nullAtom()) |
94 | { |
95 | } |
96 | |
97 | AttributeChange(Element* element, const QualifiedName& name, const String& value) |
98 | : m_element(element), m_name(name), m_value(value) |
99 | { |
100 | } |
101 | |
102 | void apply() |
103 | { |
104 | m_element->setAttribute(m_name, m_value); |
105 | } |
106 | |
107 | private: |
108 | RefPtr<Element> m_element; |
109 | QualifiedName m_name; |
110 | String m_value; |
111 | }; |
112 | |
113 | static void completeURLs(DocumentFragment* fragment, const String& baseURL) |
114 | { |
115 | Vector<AttributeChange> changes; |
116 | |
117 | URL parsedBaseURL({ }, baseURL); |
118 | |
119 | for (auto& element : descendantsOfType<Element>(*fragment)) { |
120 | if (!element.hasAttributes()) |
121 | continue; |
122 | for (const Attribute& attribute : element.attributesIterator()) { |
123 | if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty()) |
124 | changes.append(AttributeChange(&element, attribute.name(), element.completeURLsInAttributeValue(parsedBaseURL, attribute))); |
125 | } |
126 | } |
127 | |
128 | for (auto& change : changes) |
129 | change.apply(); |
130 | } |
131 | |
132 | void replaceSubresourceURLs(Ref<DocumentFragment>&& fragment, HashMap<AtomicString, AtomicString>&& replacementMap) |
133 | { |
134 | Vector<AttributeChange> changes; |
135 | for (auto& element : descendantsOfType<Element>(fragment)) { |
136 | if (!element.hasAttributes()) |
137 | continue; |
138 | for (const Attribute& attribute : element.attributesIterator()) { |
139 | // FIXME: This won't work for srcset. |
140 | if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty()) { |
141 | auto replacement = replacementMap.get(attribute.value()); |
142 | if (!replacement.isNull()) |
143 | changes.append({ &element, attribute.name(), replacement }); |
144 | } |
145 | } |
146 | } |
147 | for (auto& change : changes) |
148 | change.apply(); |
149 | } |
150 | |
151 | struct ElementAttribute { |
152 | Ref<Element> element; |
153 | QualifiedName attributeName; |
154 | }; |
155 | |
156 | void removeSubresourceURLAttributes(Ref<DocumentFragment>&& fragment, WTF::Function<bool(const URL&)> shouldRemoveURL) |
157 | { |
158 | Vector<ElementAttribute> attributesToRemove; |
159 | for (auto& element : descendantsOfType<Element>(fragment)) { |
160 | if (!element.hasAttributes()) |
161 | continue; |
162 | for (const Attribute& attribute : element.attributesIterator()) { |
163 | // FIXME: This won't work for srcset. |
164 | if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty()) { |
165 | URL url({ }, attribute.value()); |
166 | if (shouldRemoveURL(url)) |
167 | attributesToRemove.append({ element, attribute.name() }); |
168 | } |
169 | } |
170 | } |
171 | for (auto& item : attributesToRemove) |
172 | item.element->removeAttribute(item.attributeName); |
173 | } |
174 | |
175 | std::unique_ptr<Page> createPageForSanitizingWebContent() |
176 | { |
177 | auto pageConfiguration = pageConfigurationWithEmptyClients(); |
178 | |
179 | auto page = std::make_unique<Page>(WTFMove(pageConfiguration)); |
180 | page->settings().setMediaEnabled(false); |
181 | page->settings().setScriptEnabled(false); |
182 | page->settings().setPluginsEnabled(false); |
183 | page->settings().setAcceleratedCompositingEnabled(false); |
184 | |
185 | Frame& frame = page->mainFrame(); |
186 | frame.setView(FrameView::create(frame, IntSize { 800, 600 })); |
187 | frame.init(); |
188 | |
189 | FrameLoader& loader = frame.loader(); |
190 | static char markup[] = "<!DOCTYPE html><html><body></body></html>" ; |
191 | ASSERT(loader.activeDocumentLoader()); |
192 | auto& writer = loader.activeDocumentLoader()->writer(); |
193 | writer.setMIMEType("text/html" ); |
194 | writer.begin(); |
195 | writer.insertDataSynchronously(String(markup)); |
196 | writer.end(); |
197 | RELEASE_ASSERT(page->mainFrame().document()->body()); |
198 | |
199 | return page; |
200 | } |
201 | |
202 | String sanitizeMarkup(const String& rawHTML, MSOListQuirks msoListQuirks, Optional<WTF::Function<void(DocumentFragment&)>> fragmentSanitizer) |
203 | { |
204 | auto page = createPageForSanitizingWebContent(); |
205 | Document* stagingDocument = page->mainFrame().document(); |
206 | ASSERT(stagingDocument); |
207 | |
208 | auto fragment = createFragmentFromMarkup(*stagingDocument, rawHTML, emptyString(), DisallowScriptingAndPluginContent); |
209 | |
210 | if (fragmentSanitizer) |
211 | (*fragmentSanitizer)(fragment); |
212 | |
213 | return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, rawHTML); |
214 | } |
215 | |
216 | enum class MSOListMode { Preserve, DoNotPreserve }; |
217 | class StyledMarkupAccumulator final : public MarkupAccumulator { |
218 | public: |
219 | enum RangeFullySelectsNode { DoesFullySelectNode, DoesNotFullySelectNode }; |
220 | |
221 | StyledMarkupAccumulator(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs, SerializeComposedTree, |
222 | AnnotateForInterchange, MSOListMode, bool needsPositionStyleConversion, Node* highestNodeToBeSerialized = nullptr); |
223 | |
224 | Node* serializeNodes(const Position& start, const Position& end); |
225 | void wrapWithNode(Node&, bool convertBlocksToInlines = false, RangeFullySelectsNode = DoesFullySelectNode); |
226 | void wrapWithStyleNode(StyleProperties*, Document&, bool isBlock = false); |
227 | String takeResults(); |
228 | |
229 | bool needRelativeStyleWrapper() const { return m_needRelativeStyleWrapper; } |
230 | bool needClearingDiv() const { return m_needClearingDiv; } |
231 | |
232 | using MarkupAccumulator::appendString; |
233 | |
234 | ContainerNode* parentNode(Node& node) |
235 | { |
236 | if (UNLIKELY(m_useComposedTree)) |
237 | return node.parentInComposedTree(); |
238 | return node.parentOrShadowHostNode(); |
239 | } |
240 | |
241 | private: |
242 | void appendStyleNodeOpenTag(StringBuilder&, StyleProperties*, Document&, bool isBlock = false); |
243 | const String& styleNodeCloseTag(bool isBlock = false); |
244 | |
245 | String renderedTextRespectingRange(const Text&); |
246 | String textContentRespectingRange(const Text&); |
247 | |
248 | bool shouldPreserveMSOListStyleForElement(const Element&); |
249 | |
250 | void appendStartTag(StringBuilder& out, const Element&, bool addDisplayInline, RangeFullySelectsNode); |
251 | void appendEndTag(StringBuilder& out, const Element&) override; |
252 | void appendCustomAttributes(StringBuilder&, const Element&, Namespaces*) override; |
253 | |
254 | void appendText(StringBuilder& out, const Text&) override; |
255 | void appendStartTag(StringBuilder& out, const Element& element, Namespaces*) override |
256 | { |
257 | appendStartTag(out, element, false, DoesFullySelectNode); |
258 | } |
259 | |
260 | Node* firstChild(Node& node) |
261 | { |
262 | if (UNLIKELY(m_useComposedTree)) |
263 | return firstChildInComposedTreeIgnoringUserAgentShadow(node); |
264 | return node.firstChild(); |
265 | } |
266 | |
267 | Node* nextSibling(Node& node) |
268 | { |
269 | if (UNLIKELY(m_useComposedTree)) |
270 | return nextSiblingInComposedTreeIgnoringUserAgentShadow(node); |
271 | return node.nextSibling(); |
272 | } |
273 | |
274 | Node* nextSkippingChildren(Node& node) |
275 | { |
276 | if (UNLIKELY(m_useComposedTree)) |
277 | return nextSkippingChildrenInComposedTreeIgnoringUserAgentShadow(node); |
278 | return NodeTraversal::nextSkippingChildren(node); |
279 | } |
280 | |
281 | bool hasChildNodes(Node& node) |
282 | { |
283 | if (UNLIKELY(m_useComposedTree)) |
284 | return firstChildInComposedTreeIgnoringUserAgentShadow(node); |
285 | return node.hasChildNodes(); |
286 | } |
287 | |
288 | bool isDescendantOf(Node& node, Node& possibleAncestor) |
289 | { |
290 | if (UNLIKELY(m_useComposedTree)) |
291 | return node.isDescendantOrShadowDescendantOf(&possibleAncestor); |
292 | return node.isDescendantOf(&possibleAncestor); |
293 | } |
294 | |
295 | enum class NodeTraversalMode { EmitString, DoNotEmitString }; |
296 | Node* traverseNodesForSerialization(Node* startNode, Node* pastEnd, NodeTraversalMode); |
297 | |
298 | bool appendNodeToPreserveMSOList(Node&); |
299 | |
300 | bool shouldAnnotate() |
301 | { |
302 | return m_annotate == AnnotateForInterchange::Yes; |
303 | } |
304 | |
305 | bool shouldApplyWrappingStyle(const Node& node) const |
306 | { |
307 | return m_highestNodeToBeSerialized && m_highestNodeToBeSerialized->parentNode() == node.parentNode() && m_wrappingStyle && m_wrappingStyle->style(); |
308 | } |
309 | |
310 | Position m_start; |
311 | Position m_end; |
312 | Vector<String> m_reversedPrecedingMarkup; |
313 | const AnnotateForInterchange m_annotate; |
314 | RefPtr<Node> m_highestNodeToBeSerialized; |
315 | RefPtr<EditingStyle> m_wrappingStyle; |
316 | bool m_useComposedTree; |
317 | bool m_needsPositionStyleConversion; |
318 | bool m_needRelativeStyleWrapper { false }; |
319 | bool m_needClearingDiv { false }; |
320 | bool m_shouldPreserveMSOList; |
321 | bool m_inMSOList { false }; |
322 | }; |
323 | |
324 | inline StyledMarkupAccumulator::StyledMarkupAccumulator(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs urlsToResolve, SerializeComposedTree serializeComposedTree, |
325 | AnnotateForInterchange annotate, MSOListMode msoListMode, bool needsPositionStyleConversion, Node* highestNodeToBeSerialized) |
326 | : MarkupAccumulator(nodes, urlsToResolve) |
327 | , m_start(start) |
328 | , m_end(end) |
329 | , m_annotate(annotate) |
330 | , m_highestNodeToBeSerialized(highestNodeToBeSerialized) |
331 | , m_useComposedTree(serializeComposedTree == SerializeComposedTree::Yes) |
332 | , m_needsPositionStyleConversion(needsPositionStyleConversion) |
333 | , m_shouldPreserveMSOList(msoListMode == MSOListMode::Preserve) |
334 | { |
335 | } |
336 | |
337 | void StyledMarkupAccumulator::wrapWithNode(Node& node, bool convertBlocksToInlines, RangeFullySelectsNode rangeFullySelectsNode) |
338 | { |
339 | StringBuilder markup; |
340 | if (is<Element>(node)) |
341 | appendStartTag(markup, downcast<Element>(node), convertBlocksToInlines && isBlock(&node), rangeFullySelectsNode); |
342 | else |
343 | appendNonElementNode(markup, node, nullptr); |
344 | m_reversedPrecedingMarkup.append(markup.toString()); |
345 | endAppendingNode(node); |
346 | if (m_nodes) |
347 | m_nodes->append(&node); |
348 | } |
349 | |
350 | void StyledMarkupAccumulator::wrapWithStyleNode(StyleProperties* style, Document& document, bool isBlock) |
351 | { |
352 | StringBuilder openTag; |
353 | appendStyleNodeOpenTag(openTag, style, document, isBlock); |
354 | m_reversedPrecedingMarkup.append(openTag.toString()); |
355 | appendString(styleNodeCloseTag(isBlock)); |
356 | } |
357 | |
358 | void StyledMarkupAccumulator::appendStyleNodeOpenTag(StringBuilder& out, StyleProperties* style, Document& document, bool isBlock) |
359 | { |
360 | // wrappingStyleForSerialization should have removed -webkit-text-decorations-in-effect |
361 | ASSERT(propertyMissingOrEqualToNone(style, CSSPropertyWebkitTextDecorationsInEffect)); |
362 | if (isBlock) |
363 | out.appendLiteral("<div style=\"" ); |
364 | else |
365 | out.appendLiteral("<span style=\"" ); |
366 | appendAttributeValue(out, style->asText(), document.isHTMLDocument()); |
367 | out.appendLiteral("\">" ); |
368 | } |
369 | |
370 | const String& StyledMarkupAccumulator::styleNodeCloseTag(bool isBlock) |
371 | { |
372 | static NeverDestroyed<const String> divClose(MAKE_STATIC_STRING_IMPL("</div>" )); |
373 | static NeverDestroyed<const String> styleSpanClose(MAKE_STATIC_STRING_IMPL("</span>" )); |
374 | return isBlock ? divClose : styleSpanClose; |
375 | } |
376 | |
377 | String StyledMarkupAccumulator::takeResults() |
378 | { |
379 | StringBuilder result; |
380 | result.reserveCapacity(totalLength(m_reversedPrecedingMarkup) + length()); |
381 | |
382 | for (size_t i = m_reversedPrecedingMarkup.size(); i > 0; --i) |
383 | result.append(m_reversedPrecedingMarkup[i - 1]); |
384 | |
385 | concatenateMarkup(result); |
386 | |
387 | // We remove '\0' characters because they are not visibly rendered to the user. |
388 | return result.toString().replaceWithLiteral('\0', "" ); |
389 | } |
390 | |
391 | void StyledMarkupAccumulator::appendText(StringBuilder& out, const Text& text) |
392 | { |
393 | const bool parentIsTextarea = is<HTMLTextAreaElement>(text.parentElement()); |
394 | const bool wrappingSpan = shouldApplyWrappingStyle(text) && !parentIsTextarea; |
395 | if (wrappingSpan) { |
396 | RefPtr<EditingStyle> wrappingStyle = m_wrappingStyle->copy(); |
397 | // FIXME: <rdar://problem/5371536> Style rules that match pasted content can change it's appearance |
398 | // Make sure spans are inline style in paste side e.g. span { display: block }. |
399 | wrappingStyle->forceInline(); |
400 | // FIXME: Should this be included in forceInline? |
401 | wrappingStyle->style()->setProperty(CSSPropertyFloat, CSSValueNone); |
402 | |
403 | appendStyleNodeOpenTag(out, wrappingStyle->style(), text.document()); |
404 | } |
405 | |
406 | if (!shouldAnnotate() || parentIsTextarea) { |
407 | auto content = textContentRespectingRange(text); |
408 | appendCharactersReplacingEntities(out, content, 0, content.length(), entityMaskForText(text)); |
409 | } else { |
410 | const bool useRenderedText = !enclosingElementWithTag(firstPositionInNode(const_cast<Text*>(&text)), selectTag); |
411 | String content = useRenderedText ? renderedTextRespectingRange(text) : textContentRespectingRange(text); |
412 | StringBuilder buffer; |
413 | appendCharactersReplacingEntities(buffer, content, 0, content.length(), EntityMaskInPCDATA); |
414 | out.append(convertHTMLTextToInterchangeFormat(buffer.toString(), &text)); |
415 | } |
416 | |
417 | if (wrappingSpan) |
418 | out.append(styleNodeCloseTag()); |
419 | } |
420 | |
421 | String StyledMarkupAccumulator::renderedTextRespectingRange(const Text& text) |
422 | { |
423 | TextIteratorBehavior behavior = TextIteratorDefaultBehavior; |
424 | Position start = &text == m_start.containerNode() ? m_start : firstPositionInNode(const_cast<Text*>(&text)); |
425 | Position end; |
426 | if (&text == m_end.containerNode()) |
427 | end = m_end; |
428 | else { |
429 | end = lastPositionInNode(const_cast<Text*>(&text)); |
430 | if (!m_end.isNull()) |
431 | behavior = TextIteratorBehavesAsIfNodesFollowing; |
432 | } |
433 | |
434 | return plainText(Range::create(text.document(), start, end).ptr(), behavior); |
435 | } |
436 | |
437 | String StyledMarkupAccumulator::textContentRespectingRange(const Text& text) |
438 | { |
439 | if (m_start.isNull() && m_end.isNull()) |
440 | return text.data(); |
441 | |
442 | unsigned start = 0; |
443 | unsigned end = std::numeric_limits<unsigned>::max(); |
444 | if (&text == m_start.containerNode()) |
445 | start = m_start.offsetInContainerNode(); |
446 | if (&text == m_end.containerNode()) |
447 | end = m_end.offsetInContainerNode(); |
448 | ASSERT(start < end); |
449 | return text.data().substring(start, end - start); |
450 | } |
451 | |
452 | void StyledMarkupAccumulator::appendCustomAttributes(StringBuilder& out, const Element& element, Namespaces* namespaces) |
453 | { |
454 | #if ENABLE(ATTACHMENT_ELEMENT) |
455 | if (!RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled()) |
456 | return; |
457 | |
458 | if (is<HTMLAttachmentElement>(element)) { |
459 | auto& attachment = downcast<HTMLAttachmentElement>(element); |
460 | appendAttribute(out, element, { webkitattachmentidAttr, attachment.uniqueIdentifier() }, namespaces); |
461 | if (auto* file = attachment.file()) { |
462 | // These attributes are only intended for File deserialization, and are removed from the generated attachment |
463 | // element after we've deserialized and set its backing File, in restoreAttachmentElementsInFragment. |
464 | appendAttribute(out, element, { webkitattachmentpathAttr, file->path() }, namespaces); |
465 | appendAttribute(out, element, { webkitattachmentbloburlAttr, file->url().string() }, namespaces); |
466 | } |
467 | } else if (is<HTMLImageElement>(element)) { |
468 | if (auto attachment = downcast<HTMLImageElement>(element).attachmentElement()) |
469 | appendAttribute(out, element, { webkitattachmentidAttr, attachment->uniqueIdentifier() }, namespaces); |
470 | } |
471 | #else |
472 | UNUSED_PARAM(out); |
473 | UNUSED_PARAM(element); |
474 | UNUSED_PARAM(namespaces); |
475 | #endif |
476 | } |
477 | |
478 | bool StyledMarkupAccumulator::shouldPreserveMSOListStyleForElement(const Element& element) |
479 | { |
480 | if (m_inMSOList) |
481 | return true; |
482 | if (m_shouldPreserveMSOList) { |
483 | auto style = element.getAttribute(styleAttr); |
484 | return style.startsWith("mso-list:" ) || style.contains(";mso-list:" ) || style.contains("\nmso-list:" ); |
485 | } |
486 | return false; |
487 | } |
488 | |
489 | void StyledMarkupAccumulator::appendStartTag(StringBuilder& out, const Element& element, bool addDisplayInline, RangeFullySelectsNode rangeFullySelectsNode) |
490 | { |
491 | const bool documentIsHTML = element.document().isHTMLDocument(); |
492 | const bool isSlotElement = is<HTMLSlotElement>(element); |
493 | if (UNLIKELY(isSlotElement)) |
494 | out.append("<span" ); |
495 | else |
496 | appendOpenTag(out, element, nullptr); |
497 | |
498 | appendCustomAttributes(out, element, nullptr); |
499 | |
500 | const bool shouldAnnotateOrForceInline = element.isHTMLElement() && (shouldAnnotate() || addDisplayInline); |
501 | bool shouldOverrideStyleAttr = (shouldAnnotateOrForceInline || shouldApplyWrappingStyle(element) || isSlotElement) && !shouldPreserveMSOListStyleForElement(element); |
502 | if (element.hasAttributes()) { |
503 | for (const Attribute& attribute : element.attributesIterator()) { |
504 | // We'll handle the style attribute separately, below. |
505 | if (attribute.name() == styleAttr && shouldOverrideStyleAttr) |
506 | continue; |
507 | if (element.isEventHandlerAttribute(attribute) || element.isJavaScriptURLAttribute(attribute)) |
508 | continue; |
509 | appendAttribute(out, element, attribute, 0); |
510 | } |
511 | } |
512 | |
513 | if (shouldOverrideStyleAttr) { |
514 | RefPtr<EditingStyle> newInlineStyle; |
515 | |
516 | if (shouldApplyWrappingStyle(element)) { |
517 | newInlineStyle = m_wrappingStyle->copy(); |
518 | newInlineStyle->removePropertiesInElementDefaultStyle(*const_cast<Element*>(&element)); |
519 | newInlineStyle->removeStyleConflictingWithStyleOfNode(*const_cast<Element*>(&element)); |
520 | } else |
521 | newInlineStyle = EditingStyle::create(); |
522 | |
523 | if (isSlotElement) |
524 | newInlineStyle->addDisplayContents(); |
525 | |
526 | if (is<StyledElement>(element) && downcast<StyledElement>(element).inlineStyle()) |
527 | newInlineStyle->overrideWithStyle(*downcast<StyledElement>(element).inlineStyle()); |
528 | |
529 | if (shouldAnnotateOrForceInline) { |
530 | if (shouldAnnotate()) |
531 | newInlineStyle->mergeStyleFromRulesForSerialization(downcast<HTMLElement>(*const_cast<Element*>(&element))); |
532 | |
533 | if (addDisplayInline) |
534 | newInlineStyle->forceInline(); |
535 | |
536 | if (m_needsPositionStyleConversion) { |
537 | m_needRelativeStyleWrapper |= newInlineStyle->convertPositionStyle(); |
538 | m_needClearingDiv |= newInlineStyle->isFloating(); |
539 | } |
540 | |
541 | // If the node is not fully selected by the range, then we don't want to keep styles that affect its relationship to the nodes around it |
542 | // only the ones that affect it and the nodes within it. |
543 | if (rangeFullySelectsNode == DoesNotFullySelectNode && newInlineStyle->style()) |
544 | newInlineStyle->style()->removeProperty(CSSPropertyFloat); |
545 | } |
546 | |
547 | if (!newInlineStyle->isEmpty()) { |
548 | out.appendLiteral(" style=\"" ); |
549 | appendAttributeValue(out, newInlineStyle->style()->asText(), documentIsHTML); |
550 | out.append('\"'); |
551 | } |
552 | } |
553 | |
554 | appendCloseTag(out, element); |
555 | } |
556 | |
557 | void StyledMarkupAccumulator::appendEndTag(StringBuilder& out, const Element& element) |
558 | { |
559 | if (UNLIKELY(is<HTMLSlotElement>(element))) |
560 | out.append("</span>" ); |
561 | else |
562 | MarkupAccumulator::appendEndTag(out, element); |
563 | } |
564 | |
565 | Node* StyledMarkupAccumulator::serializeNodes(const Position& start, const Position& end) |
566 | { |
567 | ASSERT(comparePositions(start, end) <= 0); |
568 | auto startNode = start.firstNode(); |
569 | Node* pastEnd = end.computeNodeAfterPosition(); |
570 | if (!pastEnd && end.containerNode()) |
571 | pastEnd = nextSkippingChildren(*end.containerNode()); |
572 | |
573 | if (!m_highestNodeToBeSerialized) { |
574 | Node* lastClosed = traverseNodesForSerialization(startNode.get(), pastEnd, NodeTraversalMode::DoNotEmitString); |
575 | m_highestNodeToBeSerialized = lastClosed; |
576 | } |
577 | |
578 | if (m_highestNodeToBeSerialized && m_highestNodeToBeSerialized->parentNode()) |
579 | m_wrappingStyle = EditingStyle::wrappingStyleForSerialization(*m_highestNodeToBeSerialized->parentNode(), shouldAnnotate()); |
580 | |
581 | return traverseNodesForSerialization(startNode.get(), pastEnd, NodeTraversalMode::EmitString); |
582 | } |
583 | |
584 | Node* StyledMarkupAccumulator::traverseNodesForSerialization(Node* startNode, Node* pastEnd, NodeTraversalMode traversalMode) |
585 | { |
586 | const bool shouldEmit = traversalMode == NodeTraversalMode::EmitString; |
587 | |
588 | m_inMSOList = false; |
589 | |
590 | unsigned depth = 0; |
591 | auto enterNode = [&] (Node& node) { |
592 | if (UNLIKELY(m_shouldPreserveMSOList) && shouldEmit) { |
593 | if (appendNodeToPreserveMSOList(node)) |
594 | return false; |
595 | } |
596 | |
597 | bool isDisplayContents = is<Element>(node) && downcast<Element>(node).hasDisplayContents(); |
598 | if (!node.renderer() && !isDisplayContents && !enclosingElementWithTag(firstPositionInOrBeforeNode(&node), selectTag)) |
599 | return false; |
600 | |
601 | ++depth; |
602 | if (shouldEmit) |
603 | startAppendingNode(node); |
604 | |
605 | return true; |
606 | }; |
607 | |
608 | Node* lastClosed = nullptr; |
609 | auto exitNode = [&] (Node& node) { |
610 | bool closing = depth; |
611 | if (depth) |
612 | --depth; |
613 | if (shouldEmit) { |
614 | if (closing) |
615 | endAppendingNode(node); |
616 | else |
617 | wrapWithNode(node); |
618 | } |
619 | lastClosed = &node; |
620 | }; |
621 | |
622 | Node* lastNode = nullptr; |
623 | Node* next = nullptr; |
624 | for (auto* n = startNode; n != pastEnd; lastNode = n, n = next) { |
625 | |
626 | Vector<Node*, 8> exitedAncestors; |
627 | next = nullptr; |
628 | if (auto* child = firstChild(*n)) |
629 | next = child; |
630 | else if (auto* sibling = nextSibling(*n)) |
631 | next = sibling; |
632 | else { |
633 | for (auto* ancestor = parentNode(*n); ancestor; ancestor = parentNode(*ancestor)) { |
634 | exitedAncestors.append(ancestor); |
635 | if (auto* sibling = nextSibling(*ancestor)) { |
636 | next = sibling; |
637 | break; |
638 | } |
639 | } |
640 | } |
641 | ASSERT(next || !pastEnd); |
642 | |
643 | if (isBlock(n) && canHaveChildrenForEditing(*n) && next == pastEnd) { |
644 | // Don't write out empty block containers that aren't fully selected. |
645 | continue; |
646 | } |
647 | |
648 | if (!enterNode(*n)) { |
649 | next = nextSkippingChildren(*n); |
650 | // Don't skip over pastEnd. |
651 | if (pastEnd && isDescendantOf(*pastEnd, *n)) |
652 | next = pastEnd; |
653 | ASSERT(next || !pastEnd); |
654 | } else { |
655 | if (!hasChildNodes(*n)) |
656 | exitNode(*n); |
657 | } |
658 | |
659 | for (auto* ancestor : exitedAncestors) { |
660 | if (!depth && next == pastEnd) |
661 | break; |
662 | exitNode(*ancestor); |
663 | } |
664 | } |
665 | |
666 | ASSERT(lastNode || !depth); |
667 | if (depth) { |
668 | for (auto* ancestor = parentNode(pastEnd ? *pastEnd : *lastNode); ancestor && depth; ancestor = parentNode(*ancestor)) |
669 | exitNode(*ancestor); |
670 | } |
671 | |
672 | return lastClosed; |
673 | } |
674 | |
675 | bool StyledMarkupAccumulator::appendNodeToPreserveMSOList(Node& node) |
676 | { |
677 | if (is<Comment>(node)) { |
678 | auto& = downcast<Comment>(node); |
679 | if (!m_inMSOList && commentNode.data() == "[if !supportLists]" ) |
680 | m_inMSOList = true; |
681 | else if (m_inMSOList && commentNode.data() == "[endif]" ) |
682 | m_inMSOList = false; |
683 | else |
684 | return false; |
685 | startAppendingNode(commentNode); |
686 | return true; |
687 | } |
688 | if (is<HTMLStyleElement>(node)) { |
689 | auto* firstChild = node.firstChild(); |
690 | if (!is<Text>(firstChild)) |
691 | return false; |
692 | |
693 | auto& textChild = downcast<Text>(*firstChild); |
694 | auto& styleContent = textChild.data(); |
695 | |
696 | const auto msoStyleDefinitionsStart = styleContent.find("/* Style Definitions */" ); |
697 | const auto msoListDefinitionsStart = styleContent.find("/* List Definitions */" ); |
698 | const auto lastListItem = styleContent.reverseFind("\n@list" ); |
699 | if (msoListDefinitionsStart == notFound || lastListItem == notFound) |
700 | return false; |
701 | const auto start = msoStyleDefinitionsStart != notFound && msoStyleDefinitionsStart < msoListDefinitionsStart ? msoStyleDefinitionsStart : msoListDefinitionsStart; |
702 | |
703 | const auto msoListDefinitionsEnd = styleContent.find(";}\n" , lastListItem); |
704 | if (msoListDefinitionsEnd == notFound || start >= msoListDefinitionsEnd) |
705 | return false; |
706 | |
707 | appendString("<head><style class=\"" WebKitMSOListQuirksStyle "\">\n<!--\n" ); |
708 | appendStringView(StringView(textChild.data()).substring(start, msoListDefinitionsEnd - start + 3)); |
709 | appendString("\n-->\n</style></head>" ); |
710 | |
711 | return true; |
712 | } |
713 | return false; |
714 | } |
715 | |
716 | static Node* ancestorToRetainStructureAndAppearanceForBlock(Node* commonAncestorBlock) |
717 | { |
718 | if (!commonAncestorBlock) |
719 | return nullptr; |
720 | |
721 | if (commonAncestorBlock->hasTagName(tbodyTag) || commonAncestorBlock->hasTagName(trTag)) { |
722 | ContainerNode* table = commonAncestorBlock->parentNode(); |
723 | while (table && !is<HTMLTableElement>(*table)) |
724 | table = table->parentNode(); |
725 | |
726 | return table; |
727 | } |
728 | |
729 | if (isNonTableCellHTMLBlockElement(commonAncestorBlock)) |
730 | return commonAncestorBlock; |
731 | |
732 | return nullptr; |
733 | } |
734 | |
735 | static inline Node* ancestorToRetainStructureAndAppearance(Node* commonAncestor) |
736 | { |
737 | return ancestorToRetainStructureAndAppearanceForBlock(enclosingBlock(commonAncestor)); |
738 | } |
739 | |
740 | static bool propertyMissingOrEqualToNone(StyleProperties* style, CSSPropertyID propertyID) |
741 | { |
742 | if (!style) |
743 | return false; |
744 | RefPtr<CSSValue> value = style->getPropertyCSSValue(propertyID); |
745 | if (!value) |
746 | return true; |
747 | if (!is<CSSPrimitiveValue>(*value)) |
748 | return false; |
749 | return downcast<CSSPrimitiveValue>(*value).valueID() == CSSValueNone; |
750 | } |
751 | |
752 | static bool needInterchangeNewlineAfter(const VisiblePosition& v) |
753 | { |
754 | VisiblePosition next = v.next(); |
755 | Node* upstreamNode = next.deepEquivalent().upstream().deprecatedNode(); |
756 | Node* downstreamNode = v.deepEquivalent().downstream().deprecatedNode(); |
757 | // Add an interchange newline if a paragraph break is selected and a br won't already be added to the markup to represent it. |
758 | return isEndOfParagraph(v) && isStartOfParagraph(next) && !(upstreamNode->hasTagName(brTag) && upstreamNode == downstreamNode); |
759 | } |
760 | |
761 | static RefPtr<EditingStyle> styleFromMatchedRulesAndInlineDecl(Node& node) |
762 | { |
763 | if (!is<HTMLElement>(node)) |
764 | return nullptr; |
765 | |
766 | auto& element = downcast<HTMLElement>(node); |
767 | auto style = EditingStyle::create(element.inlineStyle()); |
768 | style->mergeStyleFromRules(element); |
769 | return style; |
770 | } |
771 | |
772 | static bool isElementPresentational(const Node* node) |
773 | { |
774 | return node->hasTagName(uTag) || node->hasTagName(sTag) || node->hasTagName(strikeTag) |
775 | || node->hasTagName(iTag) || node->hasTagName(emTag) || node->hasTagName(bTag) || node->hasTagName(strongTag); |
776 | } |
777 | |
778 | static Node* highestAncestorToWrapMarkup(const Position& start, const Position& end, Node& commonAncestor, AnnotateForInterchange annotate) |
779 | { |
780 | Node* specialCommonAncestor = nullptr; |
781 | if (annotate == AnnotateForInterchange::Yes) { |
782 | // Include ancestors that aren't completely inside the range but are required to retain |
783 | // the structure and appearance of the copied markup. |
784 | specialCommonAncestor = ancestorToRetainStructureAndAppearance(&commonAncestor); |
785 | |
786 | if (auto* parentListNode = enclosingNodeOfType(start, isListItem)) { |
787 | if (!editingIgnoresContent(*parentListNode) && VisibleSelection::selectionFromContentsOfNode(parentListNode) == VisibleSelection(start, end)) { |
788 | specialCommonAncestor = parentListNode->parentNode(); |
789 | while (specialCommonAncestor && !isListHTMLElement(specialCommonAncestor)) |
790 | specialCommonAncestor = specialCommonAncestor->parentNode(); |
791 | } |
792 | } |
793 | |
794 | // Retain the Mail quote level by including all ancestor mail block quotes. |
795 | if (Node* highestMailBlockquote = highestEnclosingNodeOfType(start, isMailBlockquote, CanCrossEditingBoundary)) |
796 | specialCommonAncestor = highestMailBlockquote; |
797 | } |
798 | |
799 | auto* checkAncestor = specialCommonAncestor ? specialCommonAncestor : &commonAncestor; |
800 | if (checkAncestor->renderer() && checkAncestor->renderer()->containingBlock()) { |
801 | Node* newSpecialCommonAncestor = highestEnclosingNodeOfType(firstPositionInNode(checkAncestor), &isElementPresentational, CanCrossEditingBoundary, checkAncestor->renderer()->containingBlock()->element()); |
802 | if (newSpecialCommonAncestor) |
803 | specialCommonAncestor = newSpecialCommonAncestor; |
804 | } |
805 | |
806 | // If a single tab is selected, commonAncestor will be a text node inside a tab span. |
807 | // If two or more tabs are selected, commonAncestor will be the tab span. |
808 | // In either case, if there is a specialCommonAncestor already, it will necessarily be above |
809 | // any tab span that needs to be included. |
810 | if (!specialCommonAncestor && isTabSpanTextNode(&commonAncestor)) |
811 | specialCommonAncestor = commonAncestor.parentNode(); |
812 | if (!specialCommonAncestor && isTabSpanNode(&commonAncestor)) |
813 | specialCommonAncestor = &commonAncestor; |
814 | |
815 | if (auto* enclosingAnchor = enclosingElementWithTag(firstPositionInNode(specialCommonAncestor ? specialCommonAncestor : &commonAncestor), aTag)) |
816 | specialCommonAncestor = enclosingAnchor; |
817 | |
818 | return specialCommonAncestor; |
819 | } |
820 | |
821 | static String serializePreservingVisualAppearanceInternal(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs urlsToResolve, SerializeComposedTree serializeComposedTree, |
822 | AnnotateForInterchange annotate, ConvertBlocksToInlines convertBlocksToInlines, MSOListMode msoListMode) |
823 | { |
824 | static NeverDestroyed<const String> interchangeNewlineString(MAKE_STATIC_STRING_IMPL("<br class=\"" AppleInterchangeNewline "\">" )); |
825 | |
826 | if (!comparePositions(start, end)) |
827 | return emptyString(); |
828 | |
829 | RefPtr<Node> commonAncestor = commonShadowIncludingAncestor(start, end); |
830 | if (!commonAncestor) |
831 | return emptyString(); |
832 | |
833 | auto& document = *start.document(); |
834 | document.updateLayoutIgnorePendingStylesheets(); |
835 | |
836 | VisiblePosition visibleStart { start }; |
837 | VisiblePosition visibleEnd { end }; |
838 | |
839 | auto body = makeRefPtr(enclosingElementWithTag(firstPositionInNode(commonAncestor.get()), bodyTag)); |
840 | RefPtr<Element> fullySelectedRoot; |
841 | // FIXME: Do this for all fully selected blocks, not just the body. |
842 | if (body && VisiblePosition(firstPositionInNode(body.get())) == visibleStart && VisiblePosition(lastPositionInNode(body.get())) == visibleEnd) |
843 | fullySelectedRoot = body; |
844 | bool needsPositionStyleConversion = body && fullySelectedRoot == body && document.settings().shouldConvertPositionStyleOnCopy(); |
845 | |
846 | Node* specialCommonAncestor = highestAncestorToWrapMarkup(start, end, *commonAncestor, annotate); |
847 | |
848 | StyledMarkupAccumulator accumulator(start, end, nodes, urlsToResolve, serializeComposedTree, annotate, msoListMode, needsPositionStyleConversion, specialCommonAncestor); |
849 | |
850 | Position startAdjustedForInterchangeNewline = start; |
851 | if (annotate == AnnotateForInterchange::Yes && needInterchangeNewlineAfter(visibleStart)) { |
852 | if (visibleStart == visibleEnd.previous()) |
853 | return interchangeNewlineString; |
854 | |
855 | accumulator.appendString(interchangeNewlineString); |
856 | startAdjustedForInterchangeNewline = visibleStart.next().deepEquivalent(); |
857 | |
858 | if (comparePositions(startAdjustedForInterchangeNewline, end) >= 0) |
859 | return interchangeNewlineString; |
860 | } |
861 | |
862 | Node* lastClosed = accumulator.serializeNodes(startAdjustedForInterchangeNewline, end); |
863 | |
864 | if (specialCommonAncestor && lastClosed) { |
865 | // Also include all of the ancestors of lastClosed up to this special ancestor. |
866 | for (ContainerNode* ancestor = accumulator.parentNode(*lastClosed); ancestor; ancestor = accumulator.parentNode(*ancestor)) { |
867 | if (ancestor == fullySelectedRoot && convertBlocksToInlines == ConvertBlocksToInlines::No) { |
868 | RefPtr<EditingStyle> fullySelectedRootStyle = styleFromMatchedRulesAndInlineDecl(*fullySelectedRoot); |
869 | |
870 | // Bring the background attribute over, but not as an attribute because a background attribute on a div |
871 | // appears to have no effect. |
872 | if ((!fullySelectedRootStyle || !fullySelectedRootStyle->style() || !fullySelectedRootStyle->style()->getPropertyCSSValue(CSSPropertyBackgroundImage)) |
873 | && fullySelectedRoot->hasAttributeWithoutSynchronization(backgroundAttr)) |
874 | fullySelectedRootStyle->style()->setProperty(CSSPropertyBackgroundImage, "url('" + fullySelectedRoot->getAttribute(backgroundAttr) + "')" ); |
875 | |
876 | if (fullySelectedRootStyle->style()) { |
877 | // Reset the CSS properties to avoid an assertion error in addStyleMarkup(). |
878 | // This assertion is caused at least when we select all text of a <body> element whose |
879 | // 'text-decoration' property is "inherit", and copy it. |
880 | if (!propertyMissingOrEqualToNone(fullySelectedRootStyle->style(), CSSPropertyTextDecoration)) |
881 | fullySelectedRootStyle->style()->setProperty(CSSPropertyTextDecoration, CSSValueNone); |
882 | if (!propertyMissingOrEqualToNone(fullySelectedRootStyle->style(), CSSPropertyWebkitTextDecorationsInEffect)) |
883 | fullySelectedRootStyle->style()->setProperty(CSSPropertyWebkitTextDecorationsInEffect, CSSValueNone); |
884 | accumulator.wrapWithStyleNode(fullySelectedRootStyle->style(), document, true); |
885 | } |
886 | } else { |
887 | // Since this node and all the other ancestors are not in the selection we want to set RangeFullySelectsNode to DoesNotFullySelectNode |
888 | // so that styles that affect the exterior of the node are not included. |
889 | accumulator.wrapWithNode(*ancestor, convertBlocksToInlines == ConvertBlocksToInlines::Yes, StyledMarkupAccumulator::DoesNotFullySelectNode); |
890 | } |
891 | if (nodes) |
892 | nodes->append(ancestor); |
893 | |
894 | if (ancestor == specialCommonAncestor) |
895 | break; |
896 | } |
897 | } |
898 | |
899 | if (accumulator.needRelativeStyleWrapper() && needsPositionStyleConversion) { |
900 | if (accumulator.needClearingDiv()) |
901 | accumulator.appendString("<div style=\"clear: both;\"></div>" ); |
902 | RefPtr<EditingStyle> positionRelativeStyle = styleFromMatchedRulesAndInlineDecl(*body); |
903 | positionRelativeStyle->style()->setProperty(CSSPropertyPosition, CSSValueRelative); |
904 | accumulator.wrapWithStyleNode(positionRelativeStyle->style(), document, true); |
905 | } |
906 | |
907 | // FIXME: The interchange newline should be placed in the block that it's in, not after all of the content, unconditionally. |
908 | if (annotate == AnnotateForInterchange::Yes && needInterchangeNewlineAfter(visibleEnd.previous())) |
909 | accumulator.appendString(interchangeNewlineString); |
910 | |
911 | return accumulator.takeResults(); |
912 | } |
913 | |
914 | String serializePreservingVisualAppearance(const Range& range, Vector<Node*>* nodes, AnnotateForInterchange annotate, ConvertBlocksToInlines convertBlocksToInlines, ResolveURLs urlsToReslve) |
915 | { |
916 | return serializePreservingVisualAppearanceInternal(range.startPosition(), range.endPosition(), nodes, urlsToReslve, SerializeComposedTree::No, |
917 | annotate, convertBlocksToInlines, MSOListMode::DoNotPreserve); |
918 | } |
919 | |
920 | String serializePreservingVisualAppearance(const VisibleSelection& selection, ResolveURLs resolveURLs, SerializeComposedTree serializeComposedTree, Vector<Node*>* nodes) |
921 | { |
922 | return serializePreservingVisualAppearanceInternal(selection.start(), selection.end(), nodes, resolveURLs, serializeComposedTree, |
923 | AnnotateForInterchange::Yes, ConvertBlocksToInlines::No, MSOListMode::DoNotPreserve); |
924 | } |
925 | |
926 | |
927 | static bool shouldPreserveMSOLists(const String& markup) |
928 | { |
929 | if (!markup.startsWith("<html xmlns:" )) |
930 | return false; |
931 | auto tagClose = markup.find('>'); |
932 | if (tagClose == notFound) |
933 | return false; |
934 | auto htmlTag = markup.substring(0, tagClose); |
935 | return htmlTag.contains("xmlns:o=\"urn:schemas-microsoft-com:office:office\"" ) |
936 | && htmlTag.contains("xmlns:w=\"urn:schemas-microsoft-com:office:word\"" ); |
937 | } |
938 | |
939 | String sanitizedMarkupForFragmentInDocument(Ref<DocumentFragment>&& fragment, Document& document, MSOListQuirks msoListQuirks, const String& originalMarkup) |
940 | { |
941 | MSOListMode msoListMode = msoListQuirks == MSOListQuirks::CheckIfNeeded && shouldPreserveMSOLists(originalMarkup) |
942 | ? MSOListMode::Preserve : MSOListMode::DoNotPreserve; |
943 | |
944 | auto bodyElement = makeRefPtr(document.body()); |
945 | ASSERT(bodyElement); |
946 | bodyElement->appendChild(fragment.get()); |
947 | |
948 | // SerializeComposedTree::No because there can't be a shadow tree in the pasted fragment. |
949 | auto result = serializePreservingVisualAppearanceInternal(firstPositionInNode(bodyElement.get()), lastPositionInNode(bodyElement.get()), nullptr, |
950 | ResolveURLs::YesExcludingLocalFileURLsForPrivacy, SerializeComposedTree::No, AnnotateForInterchange::Yes, ConvertBlocksToInlines::No, msoListMode); |
951 | |
952 | if (msoListMode == MSOListMode::Preserve) { |
953 | StringBuilder builder; |
954 | builder.appendLiteral("<html xmlns:o=\"urn:schemas-microsoft-com:office:office\"\n" |
955 | "xmlns:w=\"urn:schemas-microsoft-com:office:word\"\n" |
956 | "xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\"\n" |
957 | "xmlns=\"http://www.w3.org/TR/REC-html40\">" ); |
958 | builder.append(result); |
959 | builder.appendLiteral("</html>" ); |
960 | return builder.toString(); |
961 | } |
962 | |
963 | return result; |
964 | } |
965 | |
966 | static void restoreAttachmentElementsInFragment(DocumentFragment& fragment) |
967 | { |
968 | #if ENABLE(ATTACHMENT_ELEMENT) |
969 | if (!RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled()) |
970 | return; |
971 | |
972 | // When creating a fragment we must strip the webkit-attachment-path attribute after restoring the File object. |
973 | Vector<Ref<HTMLAttachmentElement>> attachments; |
974 | for (auto& attachment : descendantsOfType<HTMLAttachmentElement>(fragment)) |
975 | attachments.append(attachment); |
976 | |
977 | for (auto& attachment : attachments) { |
978 | attachment->setUniqueIdentifier(attachment->attributeWithoutSynchronization(webkitattachmentidAttr)); |
979 | |
980 | auto attachmentPath = attachment->attachmentPath(); |
981 | auto blobURL = attachment->blobURL(); |
982 | if (!attachmentPath.isEmpty()) |
983 | attachment->setFile(File::create(attachmentPath)); |
984 | else if (!blobURL.isEmpty()) |
985 | attachment->setFile(File::deserialize({ }, blobURL, attachment->attachmentType(), attachment->attachmentTitle())); |
986 | |
987 | // Remove temporary attributes that were previously added in StyledMarkupAccumulator::appendCustomAttributes. |
988 | attachment->removeAttribute(webkitattachmentidAttr); |
989 | attachment->removeAttribute(webkitattachmentpathAttr); |
990 | attachment->removeAttribute(webkitattachmentbloburlAttr); |
991 | } |
992 | |
993 | Vector<Ref<HTMLImageElement>> images; |
994 | for (auto& image : descendantsOfType<HTMLImageElement>(fragment)) |
995 | images.append(image); |
996 | |
997 | for (auto& image : images) { |
998 | auto attachmentIdentifier = image->attributeWithoutSynchronization(webkitattachmentidAttr); |
999 | if (attachmentIdentifier.isEmpty()) |
1000 | continue; |
1001 | |
1002 | auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, *fragment.ownerDocument()); |
1003 | attachment->setUniqueIdentifier(attachmentIdentifier); |
1004 | image->setAttachmentElement(WTFMove(attachment)); |
1005 | image->removeAttribute(webkitattachmentidAttr); |
1006 | } |
1007 | #else |
1008 | UNUSED_PARAM(fragment); |
1009 | #endif |
1010 | } |
1011 | |
1012 | Ref<DocumentFragment> createFragmentFromMarkup(Document& document, const String& markup, const String& baseURL, ParserContentPolicy parserContentPolicy) |
1013 | { |
1014 | // We use a fake body element here to trick the HTML parser into using the InBody insertion mode. |
1015 | auto fakeBody = HTMLBodyElement::create(document); |
1016 | auto fragment = DocumentFragment::create(document); |
1017 | |
1018 | fragment->parseHTML(markup, fakeBody.ptr(), parserContentPolicy); |
1019 | restoreAttachmentElementsInFragment(fragment); |
1020 | if (!baseURL.isEmpty() && baseURL != WTF::blankURL() && baseURL != document.baseURL()) |
1021 | completeURLs(fragment.ptr(), baseURL); |
1022 | |
1023 | return fragment; |
1024 | } |
1025 | |
1026 | String serializeFragment(const Node& node, SerializedNodes root, Vector<Node*>* nodes, ResolveURLs urlsToResolve, Vector<QualifiedName>* tagNamesToSkip, SerializationSyntax serializationSyntax) |
1027 | { |
1028 | MarkupAccumulator accumulator(nodes, urlsToResolve, serializationSyntax); |
1029 | return accumulator.serializeNodes(const_cast<Node&>(node), root, tagNamesToSkip); |
1030 | } |
1031 | |
1032 | static void fillContainerFromString(ContainerNode& paragraph, const String& string) |
1033 | { |
1034 | Document& document = paragraph.document(); |
1035 | |
1036 | if (string.isEmpty()) { |
1037 | paragraph.appendChild(createBlockPlaceholderElement(document)); |
1038 | return; |
1039 | } |
1040 | |
1041 | ASSERT(string.find('\n') == notFound); |
1042 | |
1043 | Vector<String> tabList = string.splitAllowingEmptyEntries('\t'); |
1044 | String tabText = emptyString(); |
1045 | bool first = true; |
1046 | size_t numEntries = tabList.size(); |
1047 | for (size_t i = 0; i < numEntries; ++i) { |
1048 | const String& s = tabList[i]; |
1049 | |
1050 | // append the non-tab textual part |
1051 | if (!s.isEmpty()) { |
1052 | if (!tabText.isEmpty()) { |
1053 | paragraph.appendChild(createTabSpanElement(document, tabText)); |
1054 | tabText = emptyString(); |
1055 | } |
1056 | Ref<Node> textNode = document.createTextNode(stringWithRebalancedWhitespace(s, first, i + 1 == numEntries)); |
1057 | paragraph.appendChild(textNode); |
1058 | } |
1059 | |
1060 | // there is a tab after every entry, except the last entry |
1061 | // (if the last character is a tab, the list gets an extra empty entry) |
1062 | if (i + 1 != numEntries) |
1063 | tabText.append('\t'); |
1064 | else if (!tabText.isEmpty()) |
1065 | paragraph.appendChild(createTabSpanElement(document, tabText)); |
1066 | |
1067 | first = false; |
1068 | } |
1069 | } |
1070 | |
1071 | bool isPlainTextMarkup(Node* node) |
1072 | { |
1073 | ASSERT(node); |
1074 | if (!is<HTMLDivElement>(*node)) |
1075 | return false; |
1076 | |
1077 | HTMLDivElement& element = downcast<HTMLDivElement>(*node); |
1078 | if (element.hasAttributes()) |
1079 | return false; |
1080 | |
1081 | Node* firstChild = element.firstChild(); |
1082 | if (!firstChild) |
1083 | return false; |
1084 | |
1085 | Node* secondChild = firstChild->nextSibling(); |
1086 | if (!secondChild) |
1087 | return firstChild->isTextNode() || firstChild->firstChild(); |
1088 | |
1089 | if (secondChild->nextSibling()) |
1090 | return false; |
1091 | |
1092 | return isTabSpanTextNode(firstChild->firstChild()) && secondChild->isTextNode(); |
1093 | } |
1094 | |
1095 | static bool contextPreservesNewline(const Range& context) |
1096 | { |
1097 | VisiblePosition position(context.startPosition()); |
1098 | Node* container = position.deepEquivalent().containerNode(); |
1099 | if (!container || !container->renderer()) |
1100 | return false; |
1101 | |
1102 | return container->renderer()->style().preserveNewline(); |
1103 | } |
1104 | |
1105 | Ref<DocumentFragment> createFragmentFromText(Range& context, const String& text) |
1106 | { |
1107 | Document& document = context.ownerDocument(); |
1108 | Ref<DocumentFragment> fragment = document.createDocumentFragment(); |
1109 | |
1110 | if (text.isEmpty()) |
1111 | return fragment; |
1112 | |
1113 | String string = text; |
1114 | string.replace("\r\n" , "\n" ); |
1115 | string.replace('\r', '\n'); |
1116 | |
1117 | if (contextPreservesNewline(context)) { |
1118 | fragment->appendChild(document.createTextNode(string)); |
1119 | if (string.endsWith('\n')) { |
1120 | auto element = HTMLBRElement::create(document); |
1121 | element->setAttributeWithoutSynchronization(classAttr, AppleInterchangeNewline); |
1122 | fragment->appendChild(element); |
1123 | } |
1124 | return fragment; |
1125 | } |
1126 | |
1127 | // A string with no newlines gets added inline, rather than being put into a paragraph. |
1128 | if (string.find('\n') == notFound) { |
1129 | fillContainerFromString(fragment, string); |
1130 | return fragment; |
1131 | } |
1132 | |
1133 | // Break string into paragraphs. Extra line breaks turn into empty paragraphs. |
1134 | Node* blockNode = enclosingBlock(context.firstNode()); |
1135 | Element* block = downcast<Element>(blockNode); |
1136 | bool useClonesOfEnclosingBlock = blockNode |
1137 | && blockNode->isElementNode() |
1138 | && !block->hasTagName(bodyTag) |
1139 | && !block->hasTagName(htmlTag) |
1140 | && block != editableRootForPosition(context.startPosition()); |
1141 | bool useLineBreak = enclosingTextFormControl(context.startPosition()); |
1142 | |
1143 | Vector<String> list = string.splitAllowingEmptyEntries('\n'); |
1144 | size_t numLines = list.size(); |
1145 | for (size_t i = 0; i < numLines; ++i) { |
1146 | const String& s = list[i]; |
1147 | |
1148 | RefPtr<Element> element; |
1149 | if (s.isEmpty() && i + 1 == numLines) { |
1150 | // For last line, use the "magic BR" rather than a P. |
1151 | element = HTMLBRElement::create(document); |
1152 | element->setAttributeWithoutSynchronization(classAttr, AppleInterchangeNewline); |
1153 | } else if (useLineBreak) { |
1154 | element = HTMLBRElement::create(document); |
1155 | fillContainerFromString(fragment, s); |
1156 | } else { |
1157 | if (useClonesOfEnclosingBlock) |
1158 | element = block->cloneElementWithoutChildren(document); |
1159 | else |
1160 | element = createDefaultParagraphElement(document); |
1161 | fillContainerFromString(*element, s); |
1162 | } |
1163 | fragment->appendChild(*element); |
1164 | } |
1165 | return fragment; |
1166 | } |
1167 | |
1168 | String documentTypeString(const Document& document) |
1169 | { |
1170 | DocumentType* documentType = document.doctype(); |
1171 | if (!documentType) |
1172 | return emptyString(); |
1173 | return serializeFragment(*documentType, SerializedNodes::SubtreeIncludingNode); |
1174 | } |
1175 | |
1176 | String urlToMarkup(const URL& url, const String& title) |
1177 | { |
1178 | StringBuilder markup; |
1179 | markup.appendLiteral("<a href=\"" ); |
1180 | markup.append(url.string()); |
1181 | markup.appendLiteral("\">" ); |
1182 | MarkupAccumulator::appendCharactersReplacingEntities(markup, title, 0, title.length(), EntityMaskInPCDATA); |
1183 | markup.appendLiteral("</a>" ); |
1184 | return markup.toString(); |
1185 | } |
1186 | |
1187 | ExceptionOr<Ref<DocumentFragment>> createFragmentForInnerOuterHTML(Element& contextElement, const String& markup, ParserContentPolicy parserContentPolicy) |
1188 | { |
1189 | auto* document = &contextElement.document(); |
1190 | if (contextElement.hasTagName(templateTag)) |
1191 | document = &document->ensureTemplateDocument(); |
1192 | auto fragment = DocumentFragment::create(*document); |
1193 | |
1194 | if (document->isHTMLDocument()) { |
1195 | fragment->parseHTML(markup, &contextElement, parserContentPolicy); |
1196 | return fragment; |
1197 | } |
1198 | |
1199 | bool wasValid = fragment->parseXML(markup, &contextElement, parserContentPolicy); |
1200 | if (!wasValid) |
1201 | return Exception { SyntaxError }; |
1202 | return fragment; |
1203 | } |
1204 | |
1205 | RefPtr<DocumentFragment> createFragmentForTransformToFragment(Document& outputDoc, const String& sourceString, const String& sourceMIMEType) |
1206 | { |
1207 | RefPtr<DocumentFragment> fragment = outputDoc.createDocumentFragment(); |
1208 | |
1209 | if (sourceMIMEType == "text/html" ) { |
1210 | // As far as I can tell, there isn't a spec for how transformToFragment is supposed to work. |
1211 | // Based on the documentation I can find, it looks like we want to start parsing the fragment in the InBody insertion mode. |
1212 | // Unfortunately, that's an implementation detail of the parser. |
1213 | // We achieve that effect here by passing in a fake body element as context for the fragment. |
1214 | auto fakeBody = HTMLBodyElement::create(outputDoc); |
1215 | fragment->parseHTML(sourceString, fakeBody.ptr()); |
1216 | } else if (sourceMIMEType == "text/plain" ) |
1217 | fragment->parserAppendChild(Text::create(outputDoc, sourceString)); |
1218 | else { |
1219 | bool successfulParse = fragment->parseXML(sourceString, 0); |
1220 | if (!successfulParse) |
1221 | return nullptr; |
1222 | } |
1223 | |
1224 | // FIXME: Do we need to mess with URLs here? |
1225 | |
1226 | return fragment; |
1227 | } |
1228 | |
1229 | Ref<DocumentFragment> createFragmentForImageAndURL(Document& document, const String& url) |
1230 | { |
1231 | auto imageElement = HTMLImageElement::create(document); |
1232 | imageElement->setAttributeWithoutSynchronization(HTMLNames::srcAttr, url); |
1233 | |
1234 | auto fragment = document.createDocumentFragment(); |
1235 | fragment->appendChild(imageElement); |
1236 | |
1237 | return fragment; |
1238 | } |
1239 | |
1240 | static Vector<Ref<HTMLElement>> collectElementsToRemoveFromFragment(ContainerNode& container) |
1241 | { |
1242 | Vector<Ref<HTMLElement>> toRemove; |
1243 | for (auto& element : childrenOfType<HTMLElement>(container)) { |
1244 | if (is<HTMLHtmlElement>(element)) { |
1245 | toRemove.append(element); |
1246 | collectElementsToRemoveFromFragment(element); |
1247 | continue; |
1248 | } |
1249 | if (is<HTMLHeadElement>(element) || is<HTMLBodyElement>(element)) |
1250 | toRemove.append(element); |
1251 | } |
1252 | return toRemove; |
1253 | } |
1254 | |
1255 | static void removeElementFromFragmentPreservingChildren(DocumentFragment& fragment, HTMLElement& element) |
1256 | { |
1257 | RefPtr<Node> nextChild; |
1258 | for (RefPtr<Node> child = element.firstChild(); child; child = nextChild) { |
1259 | nextChild = child->nextSibling(); |
1260 | element.removeChild(*child); |
1261 | fragment.insertBefore(*child, &element); |
1262 | } |
1263 | fragment.removeChild(element); |
1264 | } |
1265 | |
1266 | ExceptionOr<Ref<DocumentFragment>> createContextualFragment(Element& element, const String& markup, ParserContentPolicy parserContentPolicy) |
1267 | { |
1268 | auto result = createFragmentForInnerOuterHTML(element, markup, parserContentPolicy); |
1269 | if (result.hasException()) |
1270 | return result.releaseException(); |
1271 | |
1272 | auto fragment = result.releaseReturnValue(); |
1273 | |
1274 | // We need to pop <html> and <body> elements and remove <head> to |
1275 | // accommodate folks passing complete HTML documents to make the |
1276 | // child of an element. |
1277 | auto toRemove = collectElementsToRemoveFromFragment(fragment); |
1278 | for (auto& element : toRemove) |
1279 | removeElementFromFragmentPreservingChildren(fragment, element); |
1280 | |
1281 | return fragment; |
1282 | } |
1283 | |
1284 | static inline bool hasOneChild(ContainerNode& node) |
1285 | { |
1286 | Node* firstChild = node.firstChild(); |
1287 | return firstChild && !firstChild->nextSibling(); |
1288 | } |
1289 | |
1290 | static inline bool hasOneTextChild(ContainerNode& node) |
1291 | { |
1292 | return hasOneChild(node) && node.firstChild()->isTextNode(); |
1293 | } |
1294 | |
1295 | static inline bool hasMutationEventListeners(const Document& document) |
1296 | { |
1297 | return document.hasListenerType(Document::DOMSUBTREEMODIFIED_LISTENER) |
1298 | || document.hasListenerType(Document::DOMNODEINSERTED_LISTENER) |
1299 | || document.hasListenerType(Document::DOMNODEREMOVED_LISTENER) |
1300 | || document.hasListenerType(Document::DOMNODEREMOVEDFROMDOCUMENT_LISTENER) |
1301 | || document.hasListenerType(Document::DOMCHARACTERDATAMODIFIED_LISTENER); |
1302 | } |
1303 | |
1304 | // We can use setData instead of replacing Text node as long as script can't observe the difference. |
1305 | static inline bool canUseSetDataOptimization(const Text& containerChild, const ChildListMutationScope& mutationScope) |
1306 | { |
1307 | bool authorScriptMayHaveReference = containerChild.refCount(); |
1308 | return !authorScriptMayHaveReference && !mutationScope.canObserve() && !hasMutationEventListeners(containerChild.document()); |
1309 | } |
1310 | |
1311 | ExceptionOr<void> replaceChildrenWithFragment(ContainerNode& container, Ref<DocumentFragment>&& fragment) |
1312 | { |
1313 | Ref<ContainerNode> containerNode(container); |
1314 | ChildListMutationScope mutation(containerNode); |
1315 | |
1316 | if (!fragment->firstChild()) { |
1317 | containerNode->removeChildren(); |
1318 | return { }; |
1319 | } |
1320 | |
1321 | auto* containerChild = containerNode->firstChild(); |
1322 | if (containerChild && !containerChild->nextSibling()) { |
1323 | if (is<Text>(*containerChild) && hasOneTextChild(fragment) && canUseSetDataOptimization(downcast<Text>(*containerChild), mutation)) { |
1324 | ASSERT(!fragment->firstChild()->refCount()); |
1325 | downcast<Text>(*containerChild).setData(downcast<Text>(*fragment->firstChild()).data()); |
1326 | return { }; |
1327 | } |
1328 | |
1329 | return containerNode->replaceChild(fragment, *containerChild); |
1330 | } |
1331 | |
1332 | containerNode->removeChildren(); |
1333 | return containerNode->appendChild(fragment); |
1334 | } |
1335 | |
1336 | } |
1337 | |