1 | /* |
2 | * Copyright (C) 2007 David Smith (catfish.man@gmail.com) |
3 | * Copyright (C) 2007, 2008, 2011-2014 Apple Inc. All rights reserved. |
4 | * |
5 | * This library is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU Library General Public |
7 | * License as published by the Free Software Foundation; either |
8 | * version 2 of the License, or (at your option) any later version. |
9 | * |
10 | * This library is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | * Library General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU Library General Public License |
16 | * along with this library; see the file COPYING.LIB. If not, write to |
17 | * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
18 | * Boston, MA 02111-1307, USA. |
19 | */ |
20 | |
21 | #include "config.h" |
22 | #include "SpaceSplitString.h" |
23 | |
24 | #include "HTMLParserIdioms.h" |
25 | #include <wtf/HashMap.h> |
26 | #include <wtf/NeverDestroyed.h> |
27 | #include <wtf/text/AtomicStringHash.h> |
28 | |
29 | namespace WebCore { |
30 | |
31 | COMPILE_ASSERT(!(sizeof(SpaceSplitStringData) % sizeof(uintptr_t)), SpaceSplitStringDataTailIsAlignedToWordSize); |
32 | |
33 | template <typename CharacterType, typename TokenProcessor> |
34 | static inline void tokenizeSpaceSplitString(TokenProcessor& tokenProcessor, const CharacterType* characters, unsigned length) |
35 | { |
36 | for (unsigned start = 0; ; ) { |
37 | while (start < length && isHTMLSpace(characters[start])) |
38 | ++start; |
39 | if (start >= length) |
40 | break; |
41 | unsigned end = start + 1; |
42 | while (end < length && isNotHTMLSpace(characters[end])) |
43 | ++end; |
44 | |
45 | if (!tokenProcessor.processToken(characters + start, end - start)) |
46 | return; |
47 | |
48 | start = end + 1; |
49 | } |
50 | } |
51 | |
52 | template<typename TokenProcessor> |
53 | static inline void tokenizeSpaceSplitString(TokenProcessor& tokenProcessor, const String& string) |
54 | { |
55 | ASSERT(!string.isNull()); |
56 | |
57 | const StringImpl& stringImpl = *string.impl(); |
58 | if (stringImpl.is8Bit()) |
59 | tokenizeSpaceSplitString(tokenProcessor, stringImpl.characters8(), stringImpl.length()); |
60 | else |
61 | tokenizeSpaceSplitString(tokenProcessor, stringImpl.characters16(), stringImpl.length()); |
62 | } |
63 | |
64 | bool SpaceSplitStringData::containsAll(SpaceSplitStringData& other) |
65 | { |
66 | if (this == &other) |
67 | return true; |
68 | |
69 | unsigned otherSize = other.m_size; |
70 | unsigned i = 0; |
71 | do { |
72 | if (!contains(other[i])) |
73 | return false; |
74 | ++i; |
75 | } while (i < otherSize); |
76 | return true; |
77 | } |
78 | |
79 | struct SpaceSplitStringTableKeyTraits : public HashTraits<AtomicString> |
80 | { |
81 | // The number 200 for typicalNumberOfSpaceSplitString was based on the typical number of unique class names |
82 | // on typical websites on August 2013. |
83 | static const unsigned typicalNumberOfSpaceSplitString = 200; |
84 | static const int minimumTableSize = WTF::HashTableCapacityForSize<typicalNumberOfSpaceSplitString>::value; |
85 | }; |
86 | |
87 | typedef HashMap<AtomicString, SpaceSplitStringData*, AtomicStringHash, SpaceSplitStringTableKeyTraits> SpaceSplitStringTable; |
88 | |
89 | static SpaceSplitStringTable& spaceSplitStringTable() |
90 | { |
91 | static NeverDestroyed<SpaceSplitStringTable> table; |
92 | return table; |
93 | } |
94 | |
95 | void SpaceSplitString::set(const AtomicString& inputString, bool shouldFoldCase) |
96 | { |
97 | if (inputString.isNull()) { |
98 | clear(); |
99 | return; |
100 | } |
101 | m_data = SpaceSplitStringData::create(shouldFoldCase ? inputString.convertToASCIILowercase() : inputString); |
102 | } |
103 | |
104 | class TokenIsEqualToCStringTokenProcessor { |
105 | public: |
106 | TokenIsEqualToCStringTokenProcessor(const char* referenceString, unsigned referenceStringLength) |
107 | : m_referenceString(referenceString) |
108 | , m_referenceStringLength(referenceStringLength) |
109 | , m_referenceStringWasFound(false) |
110 | { |
111 | } |
112 | |
113 | template <typename CharacterType> |
114 | bool processToken(const CharacterType* characters, unsigned length) |
115 | { |
116 | if (length == m_referenceStringLength && equal(characters, reinterpret_cast<const LChar*>(m_referenceString), length)) { |
117 | m_referenceStringWasFound = true; |
118 | return false; |
119 | } |
120 | return true; |
121 | } |
122 | |
123 | bool referenceStringWasFound() const { return m_referenceStringWasFound; } |
124 | |
125 | private: |
126 | const char* m_referenceString; |
127 | unsigned m_referenceStringLength; |
128 | bool m_referenceStringWasFound; |
129 | }; |
130 | |
131 | bool SpaceSplitString::spaceSplitStringContainsValue(const String& inputString, const char* value, unsigned valueLength, bool shouldFoldCase) |
132 | { |
133 | if (inputString.isNull()) |
134 | return false; |
135 | |
136 | TokenIsEqualToCStringTokenProcessor tokenProcessor(value, valueLength); |
137 | tokenizeSpaceSplitString(tokenProcessor, shouldFoldCase ? inputString.impl()->convertToASCIILowercase() : inputString); |
138 | return tokenProcessor.referenceStringWasFound(); |
139 | } |
140 | |
141 | class TokenCounter { |
142 | WTF_MAKE_NONCOPYABLE(TokenCounter); |
143 | public: |
144 | TokenCounter() : m_tokenCount(0) { } |
145 | |
146 | template<typename CharacterType> bool processToken(const CharacterType*, unsigned) |
147 | { |
148 | ++m_tokenCount; |
149 | return true; |
150 | } |
151 | |
152 | unsigned tokenCount() const { return m_tokenCount; } |
153 | |
154 | private: |
155 | unsigned m_tokenCount; |
156 | }; |
157 | |
158 | class TokenAtomicStringInitializer { |
159 | WTF_MAKE_NONCOPYABLE(TokenAtomicStringInitializer); |
160 | public: |
161 | TokenAtomicStringInitializer(AtomicString* memory) : m_memoryBucket(memory) { } |
162 | |
163 | template<typename CharacterType> bool processToken(const CharacterType* characters, unsigned length) |
164 | { |
165 | new (NotNull, m_memoryBucket) AtomicString(characters, length); |
166 | ++m_memoryBucket; |
167 | return true; |
168 | } |
169 | |
170 | const AtomicString* nextMemoryBucket() const { return m_memoryBucket; } |
171 | |
172 | private: |
173 | AtomicString* m_memoryBucket; |
174 | }; |
175 | |
176 | inline Ref<SpaceSplitStringData> SpaceSplitStringData::create(const AtomicString& keyString, unsigned tokenCount) |
177 | { |
178 | ASSERT(tokenCount); |
179 | |
180 | RELEASE_ASSERT(tokenCount < (std::numeric_limits<unsigned>::max() - sizeof(SpaceSplitStringData)) / sizeof(AtomicString)); |
181 | unsigned sizeToAllocate = sizeof(SpaceSplitStringData) + tokenCount * sizeof(AtomicString); |
182 | SpaceSplitStringData* spaceSplitStringData = static_cast<SpaceSplitStringData*>(fastMalloc(sizeToAllocate)); |
183 | |
184 | new (NotNull, spaceSplitStringData) SpaceSplitStringData(keyString, tokenCount); |
185 | AtomicString* tokenArrayStart = spaceSplitStringData->tokenArrayStart(); |
186 | TokenAtomicStringInitializer tokenInitializer(tokenArrayStart); |
187 | tokenizeSpaceSplitString(tokenInitializer, keyString); |
188 | ASSERT(static_cast<unsigned>(tokenInitializer.nextMemoryBucket() - tokenArrayStart) == tokenCount); |
189 | ASSERT(reinterpret_cast<const char*>(tokenInitializer.nextMemoryBucket()) == reinterpret_cast<const char*>(spaceSplitStringData) + sizeToAllocate); |
190 | |
191 | return adoptRef(*spaceSplitStringData); |
192 | } |
193 | |
194 | RefPtr<SpaceSplitStringData> SpaceSplitStringData::create(const AtomicString& keyString) |
195 | { |
196 | ASSERT(isMainThread()); |
197 | ASSERT(!keyString.isNull()); |
198 | |
199 | auto addResult = spaceSplitStringTable().add(keyString, nullptr); |
200 | if (!addResult.isNewEntry) |
201 | return addResult.iterator->value; |
202 | |
203 | // Nothing in the cache? Let's create a new SpaceSplitStringData if the input has something useful. |
204 | // 1) We find the number of strings in the input to know how much size we need to allocate. |
205 | TokenCounter tokenCounter; |
206 | tokenizeSpaceSplitString(tokenCounter, keyString); |
207 | unsigned tokenCount = tokenCounter.tokenCount(); |
208 | |
209 | if (!tokenCount) |
210 | return nullptr; |
211 | |
212 | RefPtr<SpaceSplitStringData> spaceSplitStringData = create(keyString, tokenCount); |
213 | addResult.iterator->value = spaceSplitStringData.get(); |
214 | return spaceSplitStringData; |
215 | } |
216 | |
217 | |
218 | void SpaceSplitStringData::destroy(SpaceSplitStringData* spaceSplitString) |
219 | { |
220 | ASSERT(isMainThread()); |
221 | |
222 | spaceSplitStringTable().remove(spaceSplitString->m_keyString); |
223 | |
224 | unsigned i = 0; |
225 | unsigned size = spaceSplitString->size(); |
226 | const AtomicString* data = spaceSplitString->tokenArrayStart(); |
227 | do { |
228 | data[i].~AtomicString(); |
229 | ++i; |
230 | } while (i < size); |
231 | |
232 | spaceSplitString->~SpaceSplitStringData(); |
233 | |
234 | fastFree(spaceSplitString); |
235 | } |
236 | |
237 | } // namespace WebCore |
238 | |