1 | /* |
2 | * Copyright (C) 2010 Adam Barth. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #pragma once |
27 | |
28 | #include <wtf/Vector.h> |
29 | #include <wtf/text/WTFString.h> |
30 | |
31 | namespace WebCore { |
32 | |
33 | class UnicodeCodebook { |
34 | public: |
35 | static int codeWord(UChar c) { return c; } |
36 | enum { codeSize = 1 << 8 * sizeof(UChar) }; |
37 | }; |
38 | |
39 | class ASCIICodebook { |
40 | public: |
41 | static int codeWord(UChar c) { return c & (codeSize - 1); } |
42 | enum { codeSize = 1 << (8 * sizeof(char) - 1) }; |
43 | }; |
44 | |
45 | template<typename Codebook> |
46 | class SuffixTree { |
47 | WTF_MAKE_FAST_ALLOCATED; |
48 | WTF_MAKE_NONCOPYABLE(SuffixTree) |
49 | public: |
50 | SuffixTree(const String& text, unsigned depth) |
51 | : m_depth(depth) |
52 | , m_leaf(true) |
53 | { |
54 | build(text); |
55 | } |
56 | |
57 | bool mightContain(const String& query) |
58 | { |
59 | Node* current = &m_root; |
60 | int limit = std::min(m_depth, query.length()); |
61 | for (int i = 0; i < limit; ++i) { |
62 | auto it = current->find(Codebook::codeWord(query[i])); |
63 | if (it == current->end()) |
64 | return false; |
65 | current = it->node; |
66 | } |
67 | return true; |
68 | } |
69 | |
70 | private: |
71 | class Node { |
72 | WTF_MAKE_FAST_ALLOCATED; |
73 | public: |
74 | Node(bool isLeaf = false) |
75 | : m_isLeaf(isLeaf) |
76 | { |
77 | } |
78 | |
79 | ~Node() |
80 | { |
81 | for (auto& entry : m_children) { |
82 | auto* child = entry.node; |
83 | if (child && !child->m_isLeaf) |
84 | delete child; |
85 | } |
86 | } |
87 | |
88 | Node*& childAt(int codeWord); |
89 | |
90 | auto find(int codeWord) |
91 | { |
92 | return std::find_if(m_children.begin(), m_children.end(), [codeWord](auto& entry) { |
93 | return entry.codeWord == codeWord; |
94 | }); |
95 | } |
96 | |
97 | auto end() { return m_children.end(); } |
98 | |
99 | private: |
100 | struct ChildWithCodeWord { |
101 | int codeWord; |
102 | Node* node; |
103 | }; |
104 | |
105 | Vector<ChildWithCodeWord> m_children; |
106 | bool m_isLeaf; |
107 | }; |
108 | |
109 | void build(const String& text) |
110 | { |
111 | for (unsigned base = 0; base < text.length(); ++base) { |
112 | Node* current = &m_root; |
113 | unsigned limit = std::min(base + m_depth, text.length()); |
114 | for (unsigned offset = 0; base + offset < limit; ++offset) { |
115 | ASSERT(current != &m_leaf); |
116 | Node*& child = current->childAt(Codebook::codeWord(text[base + offset])); |
117 | if (!child) |
118 | child = base + offset + 1 == limit ? &m_leaf : new Node(); |
119 | current = child; |
120 | } |
121 | } |
122 | } |
123 | |
124 | Node m_root; |
125 | unsigned m_depth; |
126 | |
127 | // Instead of allocating a fresh empty leaf node for ever leaf in the tree |
128 | // (there can be a lot of these), we alias all the leaves to this "static" |
129 | // leaf node. |
130 | Node m_leaf; |
131 | }; |
132 | |
133 | template<typename Codebook> |
134 | inline auto SuffixTree<Codebook>::Node::childAt(int codeWord) -> Node*& |
135 | { |
136 | auto it = find(codeWord); |
137 | if (it != m_children.end()) |
138 | return it->node; |
139 | m_children.append(ChildWithCodeWord { codeWord, nullptr }); |
140 | return m_children.last().node; |
141 | } |
142 | |
143 | } // namespace WebCore |
144 | |