1/*
2 * Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23 * THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "NFAToDFA.h"
28
29#if ENABLE(CONTENT_EXTENSIONS)
30
31#include "ContentExtensionsDebugging.h"
32#include "DFANode.h"
33#include "ImmutableNFA.h"
34#include "MutableRangeList.h"
35#include "NFA.h"
36#include <wtf/DataLog.h>
37#include <wtf/HashMap.h>
38#include <wtf/HashSet.h>
39
40namespace WebCore {
41
42namespace ContentExtensions {
43
44typedef MutableRange<signed char, NFANodeIndexSet> NFANodeRange;
45typedef MutableRangeList<signed char, NFANodeIndexSet> NFANodeRangeList;
46typedef MutableRangeList<signed char, NFANodeIndexSet, 128> PreallocatedNFANodeRangeList;
47typedef Vector<uint32_t, 0, ContentExtensionsOverflowHandler> UniqueNodeList;
48typedef Vector<UniqueNodeList, 0, ContentExtensionsOverflowHandler> NFANodeClosures;
49
50// FIXME: set a better initial size.
51// FIXME: include the hash inside NodeIdSet.
52typedef NFANodeIndexSet NodeIdSet;
53
54static inline void epsilonClosureExcludingSelf(NFA& nfa, unsigned nodeId, UniqueNodeList& output)
55{
56 NodeIdSet closure({ nodeId });
57 Vector<unsigned, 64, ContentExtensionsOverflowHandler> unprocessedNodes({ nodeId });
58
59 do {
60 unsigned unprocessedNodeId = unprocessedNodes.takeLast();
61 const auto& node = nfa.nodes[unprocessedNodeId];
62
63 for (uint32_t epsilonTargetIndex = node.epsilonTransitionTargetsStart; epsilonTargetIndex < node.epsilonTransitionTargetsEnd; ++epsilonTargetIndex) {
64 uint32_t targetNodeId = nfa.epsilonTransitionsTargets[epsilonTargetIndex];
65 auto addResult = closure.add(targetNodeId);
66 if (addResult.isNewEntry) {
67 unprocessedNodes.append(targetNodeId);
68 output.append(targetNodeId);
69 }
70 }
71 } while (!unprocessedNodes.isEmpty());
72
73 output.shrinkToFit();
74}
75
76static void resolveEpsilonClosures(NFA& nfa, NFANodeClosures& nfaNodeClosures)
77{
78 unsigned nfaGraphSize = nfa.nodes.size();
79 nfaNodeClosures.resize(nfaGraphSize);
80
81 for (unsigned nodeId = 0; nodeId < nfaGraphSize; ++nodeId)
82 epsilonClosureExcludingSelf(nfa, nodeId, nfaNodeClosures[nodeId]);
83
84 // Every nodes still point to that table, but we won't use it ever again.
85 // Clear it to get back the memory. That's not pretty but memory is important here, we have both
86 // graphs existing at the same time.
87 nfa.epsilonTransitionsTargets.clear();
88}
89
90static ALWAYS_INLINE void extendSetWithClosure(const NFANodeClosures& nfaNodeClosures, unsigned nodeId, NodeIdSet& set)
91{
92 ASSERT(set.contains(nodeId));
93 const UniqueNodeList& nodeClosure = nfaNodeClosures[nodeId];
94 if (!nodeClosure.isEmpty())
95 set.add(nodeClosure.begin(), nodeClosure.end());
96}
97
98struct UniqueNodeIdSetImpl {
99 unsigned* buffer()
100 {
101 return m_buffer;
102 }
103
104 const unsigned* buffer() const
105 {
106 return m_buffer;
107 }
108
109 unsigned m_size;
110 unsigned m_hash;
111 unsigned m_dfaNodeId;
112private:
113 unsigned m_buffer[1];
114};
115
116typedef Vector<UniqueNodeIdSetImpl*, 128, ContentExtensionsOverflowHandler> UniqueNodeQueue;
117
118class UniqueNodeIdSet {
119public:
120 UniqueNodeIdSet() { }
121 enum EmptyValueTag { EmptyValue };
122 enum DeletedValueTag { DeletedValue };
123
124 UniqueNodeIdSet(EmptyValueTag) { }
125 UniqueNodeIdSet(DeletedValueTag)
126 : m_uniqueNodeIdSetBuffer(reinterpret_cast<UniqueNodeIdSetImpl*>(-1))
127 {
128 }
129
130 UniqueNodeIdSet(const NodeIdSet& nodeIdSet, unsigned hash, unsigned dfaNodeId)
131 {
132 ASSERT(nodeIdSet.size());
133
134 unsigned size = nodeIdSet.size();
135 size_t byteSize = sizeof(UniqueNodeIdSetImpl) + (size - 1) * sizeof(unsigned);
136 m_uniqueNodeIdSetBuffer = static_cast<UniqueNodeIdSetImpl*>(fastMalloc(byteSize));
137
138 m_uniqueNodeIdSetBuffer->m_size = size;
139 m_uniqueNodeIdSetBuffer->m_hash = hash;
140 m_uniqueNodeIdSetBuffer->m_dfaNodeId = dfaNodeId;
141
142 unsigned* buffer = m_uniqueNodeIdSetBuffer->buffer();
143 for (unsigned nodeId : nodeIdSet) {
144 *buffer = nodeId;
145 ++buffer;
146 }
147 }
148
149 UniqueNodeIdSet(UniqueNodeIdSet&& other)
150 : m_uniqueNodeIdSetBuffer(other.m_uniqueNodeIdSetBuffer)
151 {
152 other.m_uniqueNodeIdSetBuffer = nullptr;
153 }
154
155 UniqueNodeIdSet& operator=(UniqueNodeIdSet&& other)
156 {
157 m_uniqueNodeIdSetBuffer = other.m_uniqueNodeIdSetBuffer;
158 other.m_uniqueNodeIdSetBuffer = nullptr;
159 return *this;
160 }
161
162 ~UniqueNodeIdSet()
163 {
164 fastFree(m_uniqueNodeIdSetBuffer);
165 }
166
167 bool operator==(const UniqueNodeIdSet& other) const
168 {
169 return m_uniqueNodeIdSetBuffer == other.m_uniqueNodeIdSetBuffer;
170 }
171
172 bool operator==(const NodeIdSet& other) const
173 {
174 if (m_uniqueNodeIdSetBuffer->m_size != static_cast<unsigned>(other.size()))
175 return false;
176 unsigned* buffer = m_uniqueNodeIdSetBuffer->buffer();
177 for (unsigned i = 0; i < m_uniqueNodeIdSetBuffer->m_size; ++i) {
178 if (!other.contains(buffer[i]))
179 return false;
180 }
181 return true;
182 }
183
184 UniqueNodeIdSetImpl* impl() const { return m_uniqueNodeIdSetBuffer; }
185
186 unsigned hash() const { return m_uniqueNodeIdSetBuffer->m_hash; }
187 bool isEmptyValue() const { return !m_uniqueNodeIdSetBuffer; }
188 bool isDeletedValue() const { return m_uniqueNodeIdSetBuffer == reinterpret_cast<UniqueNodeIdSetImpl*>(-1); }
189
190private:
191 UniqueNodeIdSetImpl* m_uniqueNodeIdSetBuffer = nullptr;
192};
193
194struct UniqueNodeIdSetHash {
195 static unsigned hash(const UniqueNodeIdSet& p)
196 {
197 return p.hash();
198 }
199
200 static bool equal(const UniqueNodeIdSet& a, const UniqueNodeIdSet& b)
201 {
202 return a == b;
203 }
204 // It would be fine to compare empty or deleted here, but not for the HashTranslator.
205 static const bool safeToCompareToEmptyOrDeleted = false;
206};
207
208struct UniqueNodeIdSetHashHashTraits : public WTF::CustomHashTraits<UniqueNodeIdSet> {
209 static const bool emptyValueIsZero = true;
210
211 // FIXME: Get a good size.
212 static const int minimumTableSize = 128;
213};
214
215typedef HashSet<std::unique_ptr<UniqueNodeIdSet>, UniqueNodeIdSetHash, UniqueNodeIdSetHashHashTraits> UniqueNodeIdSetTable;
216
217struct NodeIdSetToUniqueNodeIdSetSource {
218 NodeIdSetToUniqueNodeIdSetSource(DFA& dfa, const NFA& nfa, const NodeIdSet& nodeIdSet)
219 : dfa(dfa)
220 , nfa(nfa)
221 , nodeIdSet(nodeIdSet)
222 {
223 // The hashing operation must be independant of the nodeId.
224 unsigned hash = 4207445155;
225 for (unsigned nodeId : nodeIdSet)
226 hash += nodeId;
227 this->hash = DefaultHash<unsigned>::Hash::hash(hash);
228 }
229 DFA& dfa;
230 const NFA& nfa;
231 const NodeIdSet& nodeIdSet;
232 unsigned hash;
233};
234
235struct NodeIdSetToUniqueNodeIdSetTranslator {
236 static unsigned hash(const NodeIdSetToUniqueNodeIdSetSource& source)
237 {
238 return source.hash;
239 }
240
241 static inline bool equal(const UniqueNodeIdSet& a, const NodeIdSetToUniqueNodeIdSetSource& b)
242 {
243 return a == b.nodeIdSet;
244 }
245
246 static void translate(UniqueNodeIdSet& location, const NodeIdSetToUniqueNodeIdSetSource& source, unsigned hash)
247 {
248 DFANode newDFANode;
249
250 HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> actions;
251
252 for (unsigned nfaNodeId : source.nodeIdSet) {
253 const auto& nfaNode = source.nfa.nodes[nfaNodeId];
254 for (unsigned actionIndex = nfaNode.actionStart; actionIndex < nfaNode.actionEnd; ++actionIndex)
255 actions.add(source.nfa.actions[actionIndex]);
256 }
257
258 unsigned actionsStart = source.dfa.actions.size();
259 for (uint64_t action : actions)
260 source.dfa.actions.append(action);
261 unsigned actionsEnd = source.dfa.actions.size();
262 unsigned actionsLength = actionsEnd - actionsStart;
263 RELEASE_ASSERT_WITH_MESSAGE(actionsLength <= std::numeric_limits<uint16_t>::max(), "Too many actions for the current DFANode size.");
264 newDFANode.setActions(actionsStart, static_cast<uint16_t>(actionsLength));
265
266 unsigned dfaNodeId = source.dfa.nodes.size();
267 source.dfa.nodes.append(newDFANode);
268 new (NotNull, &location) UniqueNodeIdSet(source.nodeIdSet, hash, dfaNodeId);
269
270 ASSERT(location.impl());
271 }
272};
273
274struct DataConverterWithEpsilonClosure {
275 const NFANodeClosures& nfaNodeclosures;
276
277 template<typename Iterable>
278 NFANodeIndexSet convert(const Iterable& iterable)
279 {
280 NFANodeIndexSet result;
281 for (unsigned nodeId : iterable) {
282 result.add(nodeId);
283 const UniqueNodeList& nodeClosure = nfaNodeclosures[nodeId];
284 result.add(nodeClosure.begin(), nodeClosure.end());
285 }
286 return result;
287 }
288
289 template<typename Iterable>
290 void extend(NFANodeIndexSet& destination, const Iterable& iterable)
291 {
292 for (unsigned nodeId : iterable) {
293 auto addResult = destination.add(nodeId);
294 if (addResult.isNewEntry) {
295 const UniqueNodeList& nodeClosure = nfaNodeclosures[nodeId];
296 destination.add(nodeClosure.begin(), nodeClosure.end());
297 }
298 }
299 }
300};
301
302static inline void createCombinedTransition(PreallocatedNFANodeRangeList& combinedRangeList, const UniqueNodeIdSetImpl& sourceNodeSet, const NFA& immutableNFA, const NFANodeClosures& nfaNodeclosures)
303{
304 combinedRangeList.clear();
305
306 const unsigned* buffer = sourceNodeSet.buffer();
307
308 DataConverterWithEpsilonClosure converter { nfaNodeclosures };
309 for (unsigned i = 0; i < sourceNodeSet.m_size; ++i) {
310 unsigned nodeId = buffer[i];
311 auto transitions = immutableNFA.transitionsForNode(nodeId);
312 combinedRangeList.extend(transitions.begin(), transitions.end(), converter);
313 }
314}
315
316static ALWAYS_INLINE unsigned getOrCreateDFANode(const NodeIdSet& nfaNodeSet, const NFA& nfa, DFA& dfa, UniqueNodeIdSetTable& uniqueNodeIdSetTable, UniqueNodeQueue& unprocessedNodes)
317{
318 NodeIdSetToUniqueNodeIdSetSource nodeIdSetToUniqueNodeIdSetSource(dfa, nfa, nfaNodeSet);
319 auto uniqueNodeIdAddResult = uniqueNodeIdSetTable.add<NodeIdSetToUniqueNodeIdSetTranslator>(nodeIdSetToUniqueNodeIdSetSource);
320 if (uniqueNodeIdAddResult.isNewEntry)
321 unprocessedNodes.append(uniqueNodeIdAddResult.iterator->impl());
322
323 return uniqueNodeIdAddResult.iterator->impl()->m_dfaNodeId;
324}
325
326DFA NFAToDFA::convert(NFA& nfa)
327{
328 NFANodeClosures nfaNodeClosures;
329 resolveEpsilonClosures(nfa, nfaNodeClosures);
330
331 DFA dfa;
332
333 NodeIdSet initialSet({ nfa.root() });
334 extendSetWithClosure(nfaNodeClosures, nfa.root(), initialSet);
335
336 UniqueNodeIdSetTable uniqueNodeIdSetTable;
337
338 NodeIdSetToUniqueNodeIdSetSource initialNodeIdSetToUniqueNodeIdSetSource(dfa, nfa, initialSet);
339 auto addResult = uniqueNodeIdSetTable.add<NodeIdSetToUniqueNodeIdSetTranslator>(initialNodeIdSetToUniqueNodeIdSetSource);
340
341 UniqueNodeQueue unprocessedNodes;
342 unprocessedNodes.append(addResult.iterator->impl());
343
344 PreallocatedNFANodeRangeList combinedRangeList;
345 do {
346 UniqueNodeIdSetImpl* uniqueNodeIdSetImpl = unprocessedNodes.takeLast();
347 createCombinedTransition(combinedRangeList, *uniqueNodeIdSetImpl, nfa, nfaNodeClosures);
348
349 unsigned transitionsStart = dfa.transitionRanges.size();
350 for (const NFANodeRange& range : combinedRangeList) {
351 unsigned targetNodeId = getOrCreateDFANode(range.data, nfa, dfa, uniqueNodeIdSetTable, unprocessedNodes);
352 dfa.transitionRanges.append({ range.first, range.last });
353 dfa.transitionDestinations.append(targetNodeId);
354 }
355 unsigned transitionsEnd = dfa.transitionRanges.size();
356 unsigned transitionsLength = transitionsEnd - transitionsStart;
357
358 unsigned dfaNodeId = uniqueNodeIdSetImpl->m_dfaNodeId;
359 DFANode& dfaSourceNode = dfa.nodes[dfaNodeId];
360 dfaSourceNode.setTransitions(transitionsStart, static_cast<uint8_t>(transitionsLength));
361 } while (!unprocessedNodes.isEmpty());
362
363 dfa.shrinkToFit();
364 return dfa;
365}
366
367} // namespace ContentExtensions
368
369} // namespace WebCore
370
371#endif // ENABLE(CONTENT_EXTENSIONS)
372