1/*
2 * Copyright (C) 2012 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "config.h"
32#include "DOMPatchSupport.h"
33
34#include "Attribute.h"
35#include "DOMEditor.h"
36#include "Document.h"
37#include "DocumentFragment.h"
38#include "HTMLDocument.h"
39#include "HTMLDocumentParser.h"
40#include "HTMLElement.h"
41#include "HTMLHeadElement.h"
42#include "HTMLNames.h"
43#include "InspectorHistory.h"
44#include "Node.h"
45#include "XMLDocument.h"
46#include "XMLDocumentParser.h"
47#include <wtf/Deque.h>
48#include <wtf/HashTraits.h>
49#include <wtf/RefPtr.h>
50#include <wtf/SHA1.h>
51#include <wtf/text/Base64.h>
52#include <wtf/text/CString.h>
53
54namespace WebCore {
55
56using HTMLNames::bodyTag;
57using HTMLNames::headTag;
58using HTMLNames::htmlTag;
59
60struct DOMPatchSupport::Digest {
61 WTF_MAKE_STRUCT_FAST_ALLOCATED;
62
63 String sha1;
64 String attrsSHA1;
65 Node* node;
66 Vector<std::unique_ptr<Digest>> children;
67};
68
69DOMPatchSupport::DOMPatchSupport(DOMEditor& domEditor, Document& document)
70 : m_domEditor(domEditor)
71 , m_document(document)
72{
73}
74
75void DOMPatchSupport::patchDocument(const String& markup)
76{
77 RefPtr<Document> newDocument;
78 if (m_document.isHTMLDocument())
79 newDocument = HTMLDocument::create(nullptr, URL());
80 else if (m_document.isXHTMLDocument())
81 newDocument = XMLDocument::createXHTML(nullptr, URL());
82 else if (m_document.isSVGDocument())
83 newDocument = XMLDocument::create(nullptr, URL());
84
85 ASSERT(newDocument);
86 RefPtr<DocumentParser> parser;
87 if (newDocument->isHTMLDocument())
88 parser = HTMLDocumentParser::create(static_cast<HTMLDocument&>(*newDocument));
89 else
90 parser = XMLDocumentParser::create(*newDocument, nullptr);
91 parser->insert(markup); // Use insert() so that the parser will not yield.
92 parser->finish();
93 parser->detach();
94
95 if (!m_document.documentElement())
96 return;
97 if (!newDocument->documentElement())
98 return;
99
100 std::unique_ptr<Digest> oldInfo = createDigest(*m_document.documentElement(), nullptr);
101 std::unique_ptr<Digest> newInfo = createDigest(*newDocument->documentElement(), &m_unusedNodesMap);
102
103 if (innerPatchNode(*oldInfo, *newInfo).hasException()) {
104 // Fall back to rewrite.
105 m_document.write(nullptr, markup);
106 m_document.close();
107 }
108}
109
110ExceptionOr<Node*> DOMPatchSupport::patchNode(Node& node, const String& markup)
111{
112 // Don't parse <html> as a fragment.
113 if (node.isDocumentNode() || (node.parentNode() && node.parentNode()->isDocumentNode())) {
114 patchDocument(markup);
115 return nullptr;
116 }
117
118 Node* previousSibling = node.previousSibling();
119 // FIXME: This code should use one of createFragment* in markup.h
120 auto fragment = DocumentFragment::create(m_document);
121 if (m_document.isHTMLDocument())
122 fragment->parseHTML(markup, node.parentElement() ? node.parentElement() : m_document.documentElement());
123 else
124 fragment->parseXML(markup, node.parentElement() ? node.parentElement() : m_document.documentElement());
125
126 // Compose the old list.
127 auto* parentNode = node.parentNode();
128 Vector<std::unique_ptr<Digest>> oldList;
129 for (Node* child = parentNode->firstChild(); child; child = child->nextSibling())
130 oldList.append(createDigest(*child, nullptr));
131
132 // Compose the new list.
133 Vector<std::unique_ptr<Digest>> newList;
134 for (Node* child = parentNode->firstChild(); child != &node; child = child->nextSibling())
135 newList.append(createDigest(*child, nullptr));
136 for (Node* child = fragment->firstChild(); child; child = child->nextSibling()) {
137 if (child->hasTagName(headTag) && !child->firstChild() && !markup.containsIgnoringASCIICase("</head>"))
138 continue; // HTML5 parser inserts empty <head> tag whenever it parses <body>
139 if (child->hasTagName(bodyTag) && !child->firstChild() && !markup.containsIgnoringASCIICase("</body>"))
140 continue; // HTML5 parser inserts empty <body> tag whenever it parses </head>
141 newList.append(createDigest(*child, &m_unusedNodesMap));
142 }
143 for (Node* child = node.nextSibling(); child; child = child->nextSibling())
144 newList.append(createDigest(*child, nullptr));
145
146 if (innerPatchChildren(*parentNode, oldList, newList).hasException()) {
147 // Fall back to total replace.
148 auto result = m_domEditor.replaceChild(*parentNode, fragment.get(), node);
149 if (result.hasException())
150 return result.releaseException();
151 }
152 return previousSibling ? previousSibling->nextSibling() : parentNode->firstChild();
153}
154
155ExceptionOr<void> DOMPatchSupport::innerPatchNode(Digest& oldDigest, Digest& newDigest)
156{
157 if (oldDigest.sha1 == newDigest.sha1)
158 return { };
159
160 auto& oldNode = *oldDigest.node;
161 auto& newNode = *newDigest.node;
162
163 if (newNode.nodeType() != oldNode.nodeType() || newNode.nodeName() != oldNode.nodeName())
164 return m_domEditor.replaceChild(*oldNode.parentNode(), newNode, oldNode);
165
166 if (oldNode.nodeValue() != newNode.nodeValue()) {
167 auto result = m_domEditor.setNodeValue(oldNode, newNode.nodeValue());
168 if (result.hasException())
169 return result.releaseException();
170 }
171
172 if (!is<Element>(oldNode))
173 return { };
174
175 // Patch attributes
176 auto& oldElement = downcast<Element>(oldNode);
177 auto& newElement = downcast<Element>(newNode);
178 if (oldDigest.attrsSHA1 != newDigest.attrsSHA1) {
179 // FIXME: Create a function in Element for removing all properties. Take in account whether did/willModifyAttribute are important.
180 if (oldElement.hasAttributesWithoutUpdate()) {
181 while (oldElement.attributeCount()) {
182 auto result = m_domEditor.removeAttribute(oldElement, oldElement.attributeAt(0).localName());
183 if (result.hasException())
184 return result.releaseException();
185 }
186 }
187
188 // FIXME: Create a function in Element for copying properties. cloneDataFromElement() is close but not enough for this case.
189 if (newElement.hasAttributesWithoutUpdate()) {
190 for (auto& attribute : newElement.attributesIterator()) {
191 auto result = m_domEditor.setAttribute(oldElement, attribute.name().localName(), attribute.value());
192 if (result.hasException())
193 return result.releaseException();
194 }
195 }
196 }
197
198 auto result = innerPatchChildren(oldElement, oldDigest.children, newDigest.children);
199 m_unusedNodesMap.remove(newDigest.sha1);
200 return result;
201}
202
203std::pair<DOMPatchSupport::ResultMap, DOMPatchSupport::ResultMap>
204DOMPatchSupport::diff(const Vector<std::unique_ptr<Digest>>& oldList, const Vector<std::unique_ptr<Digest>>& newList)
205{
206 ResultMap newMap(newList.size());
207 ResultMap oldMap(oldList.size());
208
209 for (auto& result : oldMap) {
210 result.first = nullptr;
211 result.second = 0;
212 }
213
214 for (auto& result : newMap) {
215 result.first = nullptr;
216 result.second = 0;
217 }
218
219 // Trim head and tail.
220 for (size_t i = 0; i < oldList.size() && i < newList.size() && oldList[i]->sha1 == newList[i]->sha1; ++i) {
221 oldMap[i].first = oldList[i].get();
222 oldMap[i].second = i;
223 newMap[i].first = newList[i].get();
224 newMap[i].second = i;
225 }
226 for (size_t i = 0; i < oldList.size() && i < newList.size() && oldList[oldList.size() - i - 1]->sha1 == newList[newList.size() - i - 1]->sha1; ++i) {
227 size_t oldIndex = oldList.size() - i - 1;
228 size_t newIndex = newList.size() - i - 1;
229 oldMap[oldIndex].first = oldList[oldIndex].get();
230 oldMap[oldIndex].second = newIndex;
231 newMap[newIndex].first = newList[newIndex].get();
232 newMap[newIndex].second = oldIndex;
233 }
234
235 typedef HashMap<String, Vector<size_t>> DiffTable;
236 DiffTable newTable;
237 DiffTable oldTable;
238
239 for (size_t i = 0; i < newList.size(); ++i)
240 newTable.add(newList[i]->sha1, Vector<size_t>()).iterator->value.append(i);
241
242 for (size_t i = 0; i < oldList.size(); ++i)
243 oldTable.add(oldList[i]->sha1, Vector<size_t>()).iterator->value.append(i);
244
245 for (auto& newEntry : newTable) {
246 if (newEntry.value.size() != 1)
247 continue;
248
249 auto oldIt = oldTable.find(newEntry.key);
250 if (oldIt == oldTable.end() || oldIt->value.size() != 1)
251 continue;
252
253 newMap[newEntry.value[0]] = std::make_pair(newList[newEntry.value[0]].get(), oldIt->value[0]);
254 oldMap[oldIt->value[0]] = std::make_pair(oldList[oldIt->value[0]].get(), newEntry.value[0]);
255 }
256
257 for (size_t i = 0; newList.size() > 0 && i < newList.size() - 1; ++i) {
258 if (!newMap[i].first || newMap[i + 1].first)
259 continue;
260
261 size_t j = newMap[i].second + 1;
262 if (j < oldMap.size() && !oldMap[j].first && newList[i + 1]->sha1 == oldList[j]->sha1) {
263 newMap[i + 1] = std::make_pair(newList[i + 1].get(), j);
264 oldMap[j] = std::make_pair(oldList[j].get(), i + 1);
265 }
266 }
267
268 for (size_t i = newList.size() - 1; newList.size() > 0 && i > 0; --i) {
269 if (!newMap[i].first || newMap[i - 1].first || newMap[i].second <= 0)
270 continue;
271
272 size_t j = newMap[i].second - 1;
273 if (!oldMap[j].first && newList[i - 1]->sha1 == oldList[j]->sha1) {
274 newMap[i - 1] = std::make_pair(newList[i - 1].get(), j);
275 oldMap[j] = std::make_pair(oldList[j].get(), i - 1);
276 }
277 }
278
279#ifdef DEBUG_DOM_PATCH_SUPPORT
280 dumpMap(oldMap, "OLD");
281 dumpMap(newMap, "NEW");
282#endif
283
284 return std::make_pair(oldMap, newMap);
285}
286
287ExceptionOr<void> DOMPatchSupport::innerPatchChildren(ContainerNode& parentNode, const Vector<std::unique_ptr<Digest>>& oldList, const Vector<std::unique_ptr<Digest>>& newList)
288{
289 auto resultMaps = diff(oldList, newList);
290 ResultMap& oldMap = resultMaps.first;
291 ResultMap& newMap = resultMaps.second;
292
293 Digest* oldHead = nullptr;
294 Digest* oldBody = nullptr;
295
296 // 1. First strip everything except for the nodes that retain. Collect pending merges.
297 HashMap<Digest*, Digest*> merges;
298 HashSet<size_t, WTF::IntHash<size_t>, WTF::UnsignedWithZeroKeyHashTraits<size_t>> usedNewOrdinals;
299 for (size_t i = 0; i < oldList.size(); ++i) {
300 if (oldMap[i].first) {
301 if (!usedNewOrdinals.contains(oldMap[i].second)) {
302 usedNewOrdinals.add(oldMap[i].second);
303 continue;
304 }
305 oldMap[i].first = nullptr;
306 oldMap[i].second = 0;
307 }
308
309 // Always match <head> and <body> tags with each other - we can't remove them from the DOM
310 // upon patching.
311 if (oldList[i]->node->hasTagName(headTag)) {
312 oldHead = oldList[i].get();
313 continue;
314 }
315 if (oldList[i]->node->hasTagName(bodyTag)) {
316 oldBody = oldList[i].get();
317 continue;
318 }
319
320 // Check if this change is between stable nodes. If it is, consider it as "modified".
321 if (!m_unusedNodesMap.contains(oldList[i]->sha1) && (!i || oldMap[i - 1].first) && (i == oldMap.size() - 1 || oldMap[i + 1].first)) {
322 size_t anchorCandidate = i ? oldMap[i - 1].second + 1 : 0;
323 size_t anchorAfter = i == oldMap.size() - 1 ? anchorCandidate + 1 : oldMap[i + 1].second;
324 if (anchorAfter - anchorCandidate == 1 && anchorCandidate < newList.size())
325 merges.set(newList[anchorCandidate].get(), oldList[i].get());
326 else {
327 auto result = removeChildAndMoveToNew(*oldList[i]);
328 if (result.hasException())
329 return result.releaseException();
330 }
331 } else {
332 auto result = removeChildAndMoveToNew(*oldList[i]);
333 if (result.hasException())
334 return result.releaseException();
335 }
336 }
337
338 // Mark retained nodes as used, do not reuse node more than once.
339 HashSet<size_t, WTF::IntHash<size_t>, WTF::UnsignedWithZeroKeyHashTraits<size_t>> usedOldOrdinals;
340 for (size_t i = 0; i < newList.size(); ++i) {
341 if (!newMap[i].first)
342 continue;
343 size_t oldOrdinal = newMap[i].second;
344 if (usedOldOrdinals.contains(oldOrdinal)) {
345 // Do not map node more than once
346 newMap[i].first = nullptr;
347 newMap[i].second = 0;
348 continue;
349 }
350 usedOldOrdinals.add(oldOrdinal);
351 markNodeAsUsed(*newMap[i].first);
352 }
353
354 // Mark <head> and <body> nodes for merge.
355 if (oldHead || oldBody) {
356 for (size_t i = 0; i < newList.size(); ++i) {
357 if (oldHead && newList[i]->node->hasTagName(headTag))
358 merges.set(newList[i].get(), oldHead);
359 if (oldBody && newList[i]->node->hasTagName(bodyTag))
360 merges.set(newList[i].get(), oldBody);
361 }
362 }
363
364 // 2. Patch nodes marked for merge.
365 for (auto& merge : merges) {
366 auto result = innerPatchNode(*merge.value, *merge.key);
367 if (result.hasException())
368 return result.releaseException();
369 }
370
371 // 3. Insert missing nodes.
372 for (size_t i = 0; i < newMap.size(); ++i) {
373 if (newMap[i].first || merges.contains(newList[i].get()))
374 continue;
375 auto result = insertBeforeAndMarkAsUsed(parentNode, *newList[i], parentNode.traverseToChildAt(i));
376 if (result.hasException())
377 return result.releaseException();
378 }
379
380 // 4. Then put all nodes that retained into their slots (sort by new index).
381 for (size_t i = 0; i < oldMap.size(); ++i) {
382 if (!oldMap[i].first)
383 continue;
384 RefPtr<Node> node = oldMap[i].first->node;
385 auto* anchorNode = parentNode.traverseToChildAt(oldMap[i].second);
386 if (node == anchorNode)
387 continue;
388 if (node->hasTagName(bodyTag) || node->hasTagName(headTag))
389 continue; // Never move head or body, move the rest of the nodes around them.
390 auto result = m_domEditor.insertBefore(parentNode, node.releaseNonNull(), anchorNode);
391 if (result.hasException())
392 return result.releaseException();
393 }
394 return { };
395}
396
397static void addStringToSHA1(SHA1& sha1, const String& string)
398{
399 CString cString = string.utf8();
400 sha1.addBytes(reinterpret_cast<const uint8_t*>(cString.data()), cString.length());
401}
402
403std::unique_ptr<DOMPatchSupport::Digest> DOMPatchSupport::createDigest(Node& node, UnusedNodesMap* unusedNodesMap)
404{
405 auto digest = std::make_unique<Digest>();
406 digest->node = &node;
407 SHA1 sha1;
408
409 auto nodeType = node.nodeType();
410 sha1.addBytes(reinterpret_cast<const uint8_t*>(&nodeType), sizeof(nodeType));
411 addStringToSHA1(sha1, node.nodeName());
412 addStringToSHA1(sha1, node.nodeValue());
413
414 if (node.nodeType() == Node::ELEMENT_NODE) {
415 Node* child = node.firstChild();
416 while (child) {
417 std::unique_ptr<Digest> childInfo = createDigest(*child, unusedNodesMap);
418 addStringToSHA1(sha1, childInfo->sha1);
419 child = child->nextSibling();
420 digest->children.append(WTFMove(childInfo));
421 }
422 auto& element = downcast<Element>(node);
423
424 if (element.hasAttributesWithoutUpdate()) {
425 SHA1 attrsSHA1;
426 for (auto& attribute : element.attributesIterator()) {
427 addStringToSHA1(attrsSHA1, attribute.name().toString());
428 addStringToSHA1(attrsSHA1, attribute.value());
429 }
430 SHA1::Digest attrsHash;
431 attrsSHA1.computeHash(attrsHash);
432 digest->attrsSHA1 = base64Encode(attrsHash.data(), 10);
433 addStringToSHA1(sha1, digest->attrsSHA1);
434 }
435 }
436
437 SHA1::Digest hash;
438 sha1.computeHash(hash);
439 digest->sha1 = base64Encode(hash.data(), 10);
440 if (unusedNodesMap)
441 unusedNodesMap->add(digest->sha1, digest.get());
442
443 return digest;
444}
445
446ExceptionOr<void> DOMPatchSupport::insertBeforeAndMarkAsUsed(ContainerNode& parentNode, Digest& digest, Node* anchor)
447{
448 ASSERT(digest.node);
449 auto result = m_domEditor.insertBefore(parentNode, *digest.node, anchor);
450 markNodeAsUsed(digest);
451 return result;
452}
453
454ExceptionOr<void> DOMPatchSupport::removeChildAndMoveToNew(Digest& oldDigest)
455{
456 Ref<Node> oldNode = *oldDigest.node;
457 ASSERT(oldNode->parentNode());
458 auto result = m_domEditor.removeChild(*oldNode->parentNode(), oldNode);
459 if (result.hasException())
460 return result.releaseException();
461
462 // Diff works within levels. In order not to lose the node identity when user
463 // prepends his HTML with "<div>" (i.e. all nodes are shifted to the next nested level),
464 // prior to dropping the original node on the floor, check whether new DOM has a digest
465 // with matching sha1. If it does, replace it with the original DOM chunk. Chances are
466 // high that it will get merged back into the original DOM during the further patching.
467 auto it = m_unusedNodesMap.find(oldDigest.sha1);
468 if (it != m_unusedNodesMap.end()) {
469 auto& newDigest = *it->value;
470 auto& newNode = *newDigest.node;
471 auto result = m_domEditor.replaceChild(*newNode.parentNode(), oldNode.get(), newNode);
472 if (result.hasException())
473 return result.releaseException();
474 newDigest.node = oldNode.ptr();
475 markNodeAsUsed(newDigest);
476 return { };
477 }
478
479 for (auto& child : oldDigest.children) {
480 auto result = removeChildAndMoveToNew(*child);
481 if (result.hasException())
482 return result.releaseException();
483 }
484 return { };
485}
486
487void DOMPatchSupport::markNodeAsUsed(Digest& digest)
488{
489 Deque<Digest*> queue;
490 queue.append(&digest);
491 while (!queue.isEmpty()) {
492 auto& first = *queue.takeFirst();
493 m_unusedNodesMap.remove(first.sha1);
494 for (auto& child : first.children)
495 queue.append(child.get());
496 }
497}
498
499#ifdef DEBUG_DOM_PATCH_SUPPORT
500
501static String nodeName(Node* node)
502{
503 if (node->document().isXHTMLDocument())
504 return node->nodeName();
505 return node->nodeName().convertToASCIILowercase();
506}
507
508void DOMPatchSupport::dumpMap(const ResultMap& map, const String& name)
509{
510 fprintf(stderr, "\n\n");
511 for (size_t i = 0; i < map.size(); ++i)
512 fprintf(stderr, "%s[%lu]: %s (%p) - [%lu]\n", name.utf8().data(), i, map[i].first ? nodeName(map[i].first->m_node).utf8().data() : "", map[i].first, map[i].second);
513}
514
515#endif
516
517} // namespace WebCore
518