1/*
2 * Copyright (C) 2010 Adam Barth. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#pragma once
27
28#include <wtf/Vector.h>
29#include <wtf/text/WTFString.h>
30
31namespace WebCore {
32
33class UnicodeCodebook {
34public:
35 static int codeWord(UChar c) { return c; }
36 enum { codeSize = 1 << 8 * sizeof(UChar) };
37};
38
39class ASCIICodebook {
40public:
41 static int codeWord(UChar c) { return c & (codeSize - 1); }
42 enum { codeSize = 1 << (8 * sizeof(char) - 1) };
43};
44
45template<typename Codebook>
46class SuffixTree {
47 WTF_MAKE_FAST_ALLOCATED;
48 WTF_MAKE_NONCOPYABLE(SuffixTree)
49public:
50 SuffixTree(const String& text, unsigned depth)
51 : m_depth(depth)
52 , m_leaf(true)
53 {
54 build(text);
55 }
56
57 bool mightContain(const String& query)
58 {
59 Node* current = &m_root;
60 int limit = std::min(m_depth, query.length());
61 for (int i = 0; i < limit; ++i) {
62 auto it = current->find(Codebook::codeWord(query[i]));
63 if (it == current->end())
64 return false;
65 current = it->node;
66 }
67 return true;
68 }
69
70private:
71 class Node {
72 WTF_MAKE_FAST_ALLOCATED;
73 public:
74 Node(bool isLeaf = false)
75 : m_isLeaf(isLeaf)
76 {
77 }
78
79 ~Node()
80 {
81 for (auto& entry : m_children) {
82 auto* child = entry.node;
83 if (child && !child->m_isLeaf)
84 delete child;
85 }
86 }
87
88 Node*& childAt(int codeWord);
89
90 auto find(int codeWord)
91 {
92 return std::find_if(m_children.begin(), m_children.end(), [codeWord](auto& entry) {
93 return entry.codeWord == codeWord;
94 });
95 }
96
97 auto end() { return m_children.end(); }
98
99 private:
100 struct ChildWithCodeWord {
101 int codeWord;
102 Node* node;
103 };
104
105 Vector<ChildWithCodeWord> m_children;
106 bool m_isLeaf;
107 };
108
109 void build(const String& text)
110 {
111 for (unsigned base = 0; base < text.length(); ++base) {
112 Node* current = &m_root;
113 unsigned limit = std::min(base + m_depth, text.length());
114 for (unsigned offset = 0; base + offset < limit; ++offset) {
115 ASSERT(current != &m_leaf);
116 Node*& child = current->childAt(Codebook::codeWord(text[base + offset]));
117 if (!child)
118 child = base + offset + 1 == limit ? &m_leaf : new Node();
119 current = child;
120 }
121 }
122 }
123
124 Node m_root;
125 unsigned m_depth;
126
127 // Instead of allocating a fresh empty leaf node for ever leaf in the tree
128 // (there can be a lot of these), we alias all the leaves to this "static"
129 // leaf node.
130 Node m_leaf;
131};
132
133template<typename Codebook>
134inline auto SuffixTree<Codebook>::Node::childAt(int codeWord) -> Node*&
135{
136 auto it = find(codeWord);
137 if (it != m_children.end())
138 return it->node;
139 m_children.append(ChildWithCodeWord { codeWord, nullptr });
140 return m_children.last().node;
141}
142
143} // namespace WebCore
144