1/*
2 * Copyright (C) 2015 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23 * THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "DFAMinimizer.h"
28
29#if ENABLE(CONTENT_EXTENSIONS)
30
31#include "DFA.h"
32#include "DFANode.h"
33#include "MutableRangeList.h"
34#include <wtf/HashMap.h>
35#include <wtf/Hasher.h>
36#include <wtf/Vector.h>
37
38namespace WebCore {
39namespace ContentExtensions {
40
41namespace {
42
43template<typename VectorType, typename Iterable, typename Function>
44static inline void iterateIntersections(const VectorType& singularTransitionsFirsts, const Iterable& iterableTransitionList, const Function& intersectionHandler)
45{
46 ASSERT(!singularTransitionsFirsts.isEmpty());
47 auto otherIterator = iterableTransitionList.begin();
48 auto otherEnd = iterableTransitionList.end();
49
50 if (otherIterator == otherEnd)
51 return;
52
53 unsigned singularTransitionsLength = singularTransitionsFirsts.size();
54 unsigned singularTransitionsFirstsIndex = 0;
55 for (; otherIterator != otherEnd; ++otherIterator) {
56 auto firstCharacter = otherIterator.first();
57 while (singularTransitionsFirstsIndex < singularTransitionsLength
58 && singularTransitionsFirsts[singularTransitionsFirstsIndex] != firstCharacter)
59 ++singularTransitionsFirstsIndex;
60
61 intersectionHandler(singularTransitionsFirstsIndex, otherIterator);
62 ++singularTransitionsFirstsIndex;
63
64 auto lastCharacter = otherIterator.last();
65 while (singularTransitionsFirstsIndex < singularTransitionsLength
66 && singularTransitionsFirsts[singularTransitionsFirstsIndex] <= lastCharacter) {
67 intersectionHandler(singularTransitionsFirstsIndex, otherIterator);
68 ++singularTransitionsFirstsIndex;
69 }
70 }
71}
72
73class Partition {
74public:
75 void initialize(unsigned size)
76 {
77 if (!size)
78 return;
79
80 m_sets.reserveInitialCapacity(size);
81 m_partitionedElements.resize(size);
82 m_elementPositionInPartitionedNodes.resize(size);
83 m_elementToSetMap.resize(size);
84
85 for (unsigned i = 0; i < size; ++i) {
86 m_partitionedElements[i] = i;
87 m_elementPositionInPartitionedNodes[i] = i;
88 m_elementToSetMap[i] = 0;
89 }
90 m_sets.uncheckedAppend(SetDescriptor { 0, size, 0 });
91 }
92
93 void reserveUninitializedCapacity(unsigned elementCount)
94 {
95 m_partitionedElements.resize(elementCount);
96 m_elementPositionInPartitionedNodes.resize(elementCount);
97 m_elementToSetMap.resize(elementCount);
98 }
99
100 void addSetUnchecked(unsigned start, unsigned size)
101 {
102 m_sets.append(SetDescriptor { start, size, 0 });
103 }
104
105 void setElementUnchecked(unsigned elementIndex, unsigned positionInPartition, unsigned setIndex)
106 {
107 ASSERT(setIndex < m_sets.size());
108 m_partitionedElements[positionInPartition] = elementIndex;
109 m_elementPositionInPartitionedNodes[elementIndex] = positionInPartition;
110 m_elementToSetMap[elementIndex] = setIndex;
111 }
112
113 unsigned startOffsetOfSet(unsigned setIndex) const
114 {
115 return m_sets[setIndex].start;
116 }
117
118 ALWAYS_INLINE void markElementInCurrentGeneration(unsigned elementIndex)
119 {
120 // Swap the node with the first unmarked node.
121 unsigned setIndex = m_elementToSetMap[elementIndex];
122 SetDescriptor& setDescriptor = m_sets[setIndex];
123
124 unsigned elementPositionInPartition = m_elementPositionInPartitionedNodes[elementIndex];
125 ASSERT(elementPositionInPartition >= setDescriptor.start);
126 ASSERT(elementPositionInPartition < setDescriptor.end());
127
128 unsigned firstUnmarkedElementPositionInPartition = setDescriptor.indexAfterMarkedElements();
129 ASSERT(firstUnmarkedElementPositionInPartition >= setDescriptor.start && firstUnmarkedElementPositionInPartition < setDescriptor.end());
130 ASSERT(firstUnmarkedElementPositionInPartition >= firstUnmarkedElementPositionInPartition);
131
132 // Swap the nodes in the set.
133 unsigned firstUnmarkedElement = m_partitionedElements[firstUnmarkedElementPositionInPartition];
134 m_partitionedElements[firstUnmarkedElementPositionInPartition] = elementIndex;
135 m_partitionedElements[elementPositionInPartition] = firstUnmarkedElement;
136
137 // Update their index.
138 m_elementPositionInPartitionedNodes[elementIndex] = firstUnmarkedElementPositionInPartition;
139 m_elementPositionInPartitionedNodes[firstUnmarkedElement] = elementPositionInPartition;
140
141 if (!setDescriptor.markedCount) {
142 ASSERT(!m_setsMarkedInCurrentGeneration.contains(setIndex));
143 m_setsMarkedInCurrentGeneration.append(setIndex);
144 }
145 ++setDescriptor.markedCount;
146 }
147
148 // The function passed as argument MUST not modify the partition.
149 template<typename Function>
150 void refineGeneration(const Function& function)
151 {
152 for (unsigned setIndex : m_setsMarkedInCurrentGeneration) {
153 SetDescriptor& setDescriptor = m_sets[setIndex];
154 if (setDescriptor.markedCount == setDescriptor.size) {
155 // Everything is marked, there is nothing to refine.
156 setDescriptor.markedCount = 0;
157 continue;
158 }
159
160 SetDescriptor newSet;
161 bool newSetIsMarkedSet = setDescriptor.markedCount * 2 <= setDescriptor.size;
162 if (newSetIsMarkedSet) {
163 // Less than half of the nodes have been marked.
164 newSet = { setDescriptor.start, setDescriptor.markedCount, 0 };
165 setDescriptor.start = setDescriptor.start + setDescriptor.markedCount;
166 } else
167 newSet = { setDescriptor.start + setDescriptor.markedCount, setDescriptor.size - setDescriptor.markedCount, 0 };
168 setDescriptor.size -= newSet.size;
169 setDescriptor.markedCount = 0;
170
171 unsigned newSetIndex = m_sets.size();
172 m_sets.append(newSet);
173
174 for (unsigned i = newSet.start; i < newSet.end(); ++i)
175 m_elementToSetMap[m_partitionedElements[i]] = newSetIndex;
176
177 function(newSetIndex);
178 }
179 m_setsMarkedInCurrentGeneration.clear();
180 }
181
182 // Call Function() on every node of a given subset.
183 template<typename Function>
184 void iterateSet(unsigned setIndex, const Function& function)
185 {
186 SetDescriptor& setDescriptor = m_sets[setIndex];
187 for (unsigned i = setDescriptor.start; i < setDescriptor.end(); ++i)
188 function(m_partitionedElements[i]);
189 }
190
191 // Index of the set containing the Node.
192 unsigned setIndex(unsigned elementIndex) const
193 {
194 return m_elementToSetMap[elementIndex];
195 }
196
197 // NodeIndex of the first element in the set.
198 unsigned firstElementInSet(unsigned setIndex) const
199 {
200 return m_partitionedElements[m_sets[setIndex].start];
201 }
202
203 unsigned size() const
204 {
205 return m_sets.size();
206 }
207
208private:
209 struct SetDescriptor {
210 unsigned start;
211 unsigned size;
212 unsigned markedCount;
213
214 unsigned indexAfterMarkedElements() const { return start + markedCount; }
215 unsigned end() const { return start + size; }
216 };
217
218 // List of sets.
219 Vector<SetDescriptor, 0, ContentExtensionsOverflowHandler> m_sets;
220
221 // All the element indices such that two elements of the same set never have a element of a different set between them.
222 Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_partitionedElements;
223
224 // Map elementIndex->position in the partitionedElements.
225 Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_elementPositionInPartitionedNodes;
226
227 // Map elementIndex->SetIndex.
228 Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_elementToSetMap;
229
230 // List of sets with any marked node. Each set can appear at most once.
231 // FIXME: find a good inline size for this.
232 Vector<unsigned, 128, ContentExtensionsOverflowHandler> m_setsMarkedInCurrentGeneration;
233};
234
235class FullGraphPartition {
236 typedef MutableRangeList<char, uint32_t, 128> SingularTransitionsMutableRangeList;
237public:
238 FullGraphPartition(const DFA& dfa)
239 {
240 m_nodePartition.initialize(dfa.nodes.size());
241
242 SingularTransitionsMutableRangeList singularTransitions;
243 CounterConverter counterConverter;
244 for (const DFANode& node : dfa.nodes) {
245 if (node.isKilled())
246 continue;
247 auto transitions = node.transitions(dfa);
248 singularTransitions.extend(transitions.begin(), transitions.end(), counterConverter);
249 }
250
251 // Count the number of transition for each singular range. This will give us the bucket size
252 // for the transition partition, where transitions are partitioned by "symbol".
253 unsigned rangeIndexAccumulator = 0;
254 for (const auto& transition : singularTransitions) {
255 m_transitionPartition.addSetUnchecked(rangeIndexAccumulator, transition.data);
256 rangeIndexAccumulator += transition.data;
257 }
258
259 // Count the number of incoming transitions per node.
260 m_flattenedTransitionsStartOffsetPerNode.resize(dfa.nodes.size());
261 memset(m_flattenedTransitionsStartOffsetPerNode.data(), 0, m_flattenedTransitionsStartOffsetPerNode.size() * sizeof(unsigned));
262
263 Vector<char, 0, ContentExtensionsOverflowHandler> singularTransitionsFirsts;
264 singularTransitionsFirsts.reserveInitialCapacity(singularTransitions.m_ranges.size());
265 for (const auto& transition : singularTransitions)
266 singularTransitionsFirsts.uncheckedAppend(transition.first);
267
268 for (const DFANode& node : dfa.nodes) {
269 if (node.isKilled())
270 continue;
271 auto transitions = node.transitions(dfa);
272 iterateIntersections(singularTransitionsFirsts, transitions, [&](unsigned, const DFANode::ConstRangeIterator& origin) {
273 uint32_t targetNodeIndex = origin.target();
274 ++m_flattenedTransitionsStartOffsetPerNode[targetNodeIndex];
275 });
276 }
277
278 // Accumulate the offsets. This gives us the start position of each bucket.
279 unsigned transitionAccumulator = 0;
280 for (unsigned i = 0; i < m_flattenedTransitionsStartOffsetPerNode.size(); ++i) {
281 unsigned transitionsCountForNode = m_flattenedTransitionsStartOffsetPerNode[i];
282 m_flattenedTransitionsStartOffsetPerNode[i] = transitionAccumulator;
283 transitionAccumulator += transitionsCountForNode;
284 }
285 unsigned flattenedTransitionsSize = transitionAccumulator;
286 ASSERT_WITH_MESSAGE(flattenedTransitionsSize == rangeIndexAccumulator, "The number of transitions should be the same, regardless of how they are arranged in buckets.");
287
288 m_transitionPartition.reserveUninitializedCapacity(flattenedTransitionsSize);
289
290 // Next, let's fill the transition table and set up the size of each group at the same time.
291 m_flattenedTransitionsSizePerNode.resize(dfa.nodes.size());
292 for (unsigned& counter : m_flattenedTransitionsSizePerNode)
293 counter = 0;
294 m_flattenedTransitions.resize(flattenedTransitionsSize);
295
296 Vector<uint32_t> transitionPerRangeOffset(m_transitionPartition.size());
297 memset(transitionPerRangeOffset.data(), 0, transitionPerRangeOffset.size() * sizeof(uint32_t));
298
299 for (unsigned i = 0; i < dfa.nodes.size(); ++i) {
300 const DFANode& node = dfa.nodes[i];
301 if (node.isKilled())
302 continue;
303
304 auto transitions = node.transitions(dfa);
305 iterateIntersections(singularTransitionsFirsts, transitions, [&](unsigned singularTransitonIndex, const DFANode::ConstRangeIterator& origin) {
306 uint32_t targetNodeIndex = origin.target();
307
308 unsigned start = m_flattenedTransitionsStartOffsetPerNode[targetNodeIndex];
309 unsigned offset = m_flattenedTransitionsSizePerNode[targetNodeIndex];
310 unsigned positionInFlattenedTransitions = start + offset;
311 m_flattenedTransitions[positionInFlattenedTransitions] = Transition({ i });
312
313 uint32_t& inRangeOffset = transitionPerRangeOffset[singularTransitonIndex];
314 unsigned positionInTransitionPartition = m_transitionPartition.startOffsetOfSet(singularTransitonIndex) + inRangeOffset;
315 ++inRangeOffset;
316
317 m_transitionPartition.setElementUnchecked(positionInFlattenedTransitions, positionInTransitionPartition, singularTransitonIndex);
318
319 ++m_flattenedTransitionsSizePerNode[targetNodeIndex];
320 });
321 }
322 }
323
324 void markNode(unsigned nodeIndex)
325 {
326 m_nodePartition.markElementInCurrentGeneration(nodeIndex);
327 }
328
329 void refinePartitions()
330 {
331 m_nodePartition.refineGeneration([&](unsigned smallestSetIndex) {
332 m_nodePartition.iterateSet(smallestSetIndex, [&](unsigned nodeIndex) {
333 unsigned incomingTransitionsStartForNode = m_flattenedTransitionsStartOffsetPerNode[nodeIndex];
334 unsigned incomingTransitionsSizeForNode = m_flattenedTransitionsSizePerNode[nodeIndex];
335
336 for (unsigned i = 0; i < incomingTransitionsSizeForNode; ++i)
337 m_transitionPartition.markElementInCurrentGeneration(incomingTransitionsStartForNode + i);
338 });
339
340 // We only need to split the transitions, we handle the new sets through the main loop.
341 m_transitionPartition.refineGeneration([](unsigned) { });
342 });
343 }
344
345 void splitByUniqueTransitions()
346 {
347 for (; m_nextTransitionSetToProcess < m_transitionPartition.size(); ++m_nextTransitionSetToProcess) {
348 // We use the known splitters to refine the set.
349 m_transitionPartition.iterateSet(m_nextTransitionSetToProcess, [&](unsigned transitionIndex) {
350 unsigned sourceNodeIndex = m_flattenedTransitions[transitionIndex].source;
351 m_nodePartition.markElementInCurrentGeneration(sourceNodeIndex);
352 });
353
354 refinePartitions();
355 }
356 }
357
358 unsigned nodeReplacement(unsigned nodeIndex)
359 {
360 unsigned setIndex = m_nodePartition.setIndex(nodeIndex);
361 return m_nodePartition.firstElementInSet(setIndex);
362 }
363
364private:
365 struct Transition {
366 unsigned source;
367 };
368
369 struct CounterConverter {
370 uint32_t convert(uint32_t)
371 {
372 return 1;
373 }
374
375 void extend(uint32_t& destination, uint32_t)
376 {
377 ++destination;
378 }
379 };
380
381 Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_flattenedTransitionsStartOffsetPerNode;
382 Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_flattenedTransitionsSizePerNode;
383 Vector<Transition, 0, ContentExtensionsOverflowHandler> m_flattenedTransitions;
384
385 Partition m_nodePartition;
386 Partition m_transitionPartition;
387
388 unsigned m_nextTransitionSetToProcess { 0 };
389};
390
391struct ActionKey {
392 enum DeletedValueTag { DeletedValue };
393 explicit ActionKey(DeletedValueTag) { state = Deleted; }
394
395 enum EmptyValueTag { EmptyValue };
396 explicit ActionKey(EmptyValueTag) { state = Empty; }
397
398 explicit ActionKey(const DFA* dfa, uint32_t actionsStart, uint16_t actionsLength)
399 : dfa(dfa)
400 , actionsStart(actionsStart)
401 , actionsLength(actionsLength)
402 , state(Valid)
403 {
404 StringHasher hasher;
405 hasher.addCharactersAssumingAligned(reinterpret_cast<const UChar*>(&dfa->actions[actionsStart]), actionsLength * sizeof(uint64_t) / sizeof(UChar));
406 hash = hasher.hash();
407 }
408
409 bool isEmptyValue() const { return state == Empty; }
410 bool isDeletedValue() const { return state == Deleted; }
411
412 unsigned hash;
413
414 const DFA* dfa;
415 uint32_t actionsStart;
416 uint16_t actionsLength;
417
418 enum {
419 Valid,
420 Empty,
421 Deleted
422 } state;
423};
424
425struct ActionKeyHash {
426 static unsigned hash(const ActionKey& actionKey)
427 {
428 return actionKey.hash;
429 }
430
431 static bool equal(const ActionKey& a, const ActionKey& b)
432 {
433 if (a.state != b.state
434 || a.dfa != b.dfa
435 || a.actionsLength != b.actionsLength)
436 return false;
437 for (uint16_t i = 0; i < a.actionsLength; ++i) {
438 if (a.dfa->actions[a.actionsStart + i] != a.dfa->actions[b.actionsStart + i])
439 return false;
440 }
441 return true;
442 }
443 static const bool safeToCompareToEmptyOrDeleted = false;
444};
445
446struct ActionKeyHashTraits : public WTF::CustomHashTraits<ActionKey> {
447 static const bool emptyValueIsZero = true;
448};
449
450} // anonymous namespace.
451
452void DFAMinimizer::minimize(DFA& dfa)
453{
454 FullGraphPartition fullGraphPartition(dfa);
455
456 // Unlike traditional minimization final states can be differentiated by their action.
457 // Instead of creating a single set for the final state, we partition by actions from
458 // the start.
459 HashMap<ActionKey, Vector<unsigned>, ActionKeyHash, ActionKeyHashTraits> finalStates;
460 for (unsigned i = 0; i < dfa.nodes.size(); ++i) {
461 const DFANode& node = dfa.nodes[i];
462 if (node.hasActions()) {
463 // FIXME: Sort the actions in the dfa to make nodes that have the same actions in different order equal.
464 auto addResult = finalStates.add(ActionKey(&dfa, node.actionsStart(), node.actionsLength()), Vector<unsigned>());
465 addResult.iterator->value.append(i);
466 }
467 }
468
469 for (const auto& slot : finalStates) {
470 for (unsigned finalStateIndex : slot.value)
471 fullGraphPartition.markNode(finalStateIndex);
472 fullGraphPartition.refinePartitions();
473 }
474
475 // Use every splitter to refine the node partitions.
476 fullGraphPartition.splitByUniqueTransitions();
477
478 Vector<unsigned> relocationVector;
479 relocationVector.reserveInitialCapacity(dfa.nodes.size());
480 for (unsigned i = 0; i < dfa.nodes.size(); ++i)
481 relocationVector.uncheckedAppend(i);
482
483 // Update all the transitions.
484 for (unsigned i = 0; i < dfa.nodes.size(); ++i) {
485 unsigned replacement = fullGraphPartition.nodeReplacement(i);
486 if (i != replacement) {
487 relocationVector[i] = replacement;
488 dfa.nodes[i].kill(dfa);
489 }
490 }
491
492 dfa.root = relocationVector[dfa.root];
493 for (DFANode& node : dfa.nodes) {
494 if (node.isKilled())
495 continue;
496
497 for (auto& transition : node.transitions(dfa)) {
498 uint32_t target = transition.target();
499 uint32_t relocatedTarget = relocationVector[target];
500 if (target != relocatedTarget)
501 transition.resetTarget(relocatedTarget);
502 }
503 }
504}
505
506} // namespace ContentExtensions
507} // namespace WebCore
508
509#endif // ENABLE(CONTENT_EXTENSIONS)
510