| 1 | /* |
| 2 | * Copyright (C) 2007 David Smith (catfish.man@gmail.com) |
| 3 | * Copyright (C) 2007, 2008, 2011-2014 Apple Inc. All rights reserved. |
| 4 | * |
| 5 | * This library is free software; you can redistribute it and/or |
| 6 | * modify it under the terms of the GNU Library General Public |
| 7 | * License as published by the Free Software Foundation; either |
| 8 | * version 2 of the License, or (at your option) any later version. |
| 9 | * |
| 10 | * This library is distributed in the hope that it will be useful, |
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | * Library General Public License for more details. |
| 14 | * |
| 15 | * You should have received a copy of the GNU Library General Public License |
| 16 | * along with this library; see the file COPYING.LIB. If not, write to |
| 17 | * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 18 | * Boston, MA 02111-1307, USA. |
| 19 | */ |
| 20 | |
| 21 | #include "config.h" |
| 22 | #include "SpaceSplitString.h" |
| 23 | |
| 24 | #include "HTMLParserIdioms.h" |
| 25 | #include <wtf/HashMap.h> |
| 26 | #include <wtf/NeverDestroyed.h> |
| 27 | #include <wtf/text/AtomicStringHash.h> |
| 28 | |
| 29 | namespace WebCore { |
| 30 | |
| 31 | COMPILE_ASSERT(!(sizeof(SpaceSplitStringData) % sizeof(uintptr_t)), SpaceSplitStringDataTailIsAlignedToWordSize); |
| 32 | |
| 33 | template <typename CharacterType, typename TokenProcessor> |
| 34 | static inline void tokenizeSpaceSplitString(TokenProcessor& tokenProcessor, const CharacterType* characters, unsigned length) |
| 35 | { |
| 36 | for (unsigned start = 0; ; ) { |
| 37 | while (start < length && isHTMLSpace(characters[start])) |
| 38 | ++start; |
| 39 | if (start >= length) |
| 40 | break; |
| 41 | unsigned end = start + 1; |
| 42 | while (end < length && isNotHTMLSpace(characters[end])) |
| 43 | ++end; |
| 44 | |
| 45 | if (!tokenProcessor.processToken(characters + start, end - start)) |
| 46 | return; |
| 47 | |
| 48 | start = end + 1; |
| 49 | } |
| 50 | } |
| 51 | |
| 52 | template<typename TokenProcessor> |
| 53 | static inline void tokenizeSpaceSplitString(TokenProcessor& tokenProcessor, const String& string) |
| 54 | { |
| 55 | ASSERT(!string.isNull()); |
| 56 | |
| 57 | const StringImpl& stringImpl = *string.impl(); |
| 58 | if (stringImpl.is8Bit()) |
| 59 | tokenizeSpaceSplitString(tokenProcessor, stringImpl.characters8(), stringImpl.length()); |
| 60 | else |
| 61 | tokenizeSpaceSplitString(tokenProcessor, stringImpl.characters16(), stringImpl.length()); |
| 62 | } |
| 63 | |
| 64 | bool SpaceSplitStringData::containsAll(SpaceSplitStringData& other) |
| 65 | { |
| 66 | if (this == &other) |
| 67 | return true; |
| 68 | |
| 69 | unsigned otherSize = other.m_size; |
| 70 | unsigned i = 0; |
| 71 | do { |
| 72 | if (!contains(other[i])) |
| 73 | return false; |
| 74 | ++i; |
| 75 | } while (i < otherSize); |
| 76 | return true; |
| 77 | } |
| 78 | |
| 79 | struct SpaceSplitStringTableKeyTraits : public HashTraits<AtomicString> |
| 80 | { |
| 81 | // The number 200 for typicalNumberOfSpaceSplitString was based on the typical number of unique class names |
| 82 | // on typical websites on August 2013. |
| 83 | static const unsigned typicalNumberOfSpaceSplitString = 200; |
| 84 | static const int minimumTableSize = WTF::HashTableCapacityForSize<typicalNumberOfSpaceSplitString>::value; |
| 85 | }; |
| 86 | |
| 87 | typedef HashMap<AtomicString, SpaceSplitStringData*, AtomicStringHash, SpaceSplitStringTableKeyTraits> SpaceSplitStringTable; |
| 88 | |
| 89 | static SpaceSplitStringTable& spaceSplitStringTable() |
| 90 | { |
| 91 | static NeverDestroyed<SpaceSplitStringTable> table; |
| 92 | return table; |
| 93 | } |
| 94 | |
| 95 | void SpaceSplitString::set(const AtomicString& inputString, bool shouldFoldCase) |
| 96 | { |
| 97 | if (inputString.isNull()) { |
| 98 | clear(); |
| 99 | return; |
| 100 | } |
| 101 | m_data = SpaceSplitStringData::create(shouldFoldCase ? inputString.convertToASCIILowercase() : inputString); |
| 102 | } |
| 103 | |
| 104 | class TokenIsEqualToCStringTokenProcessor { |
| 105 | public: |
| 106 | TokenIsEqualToCStringTokenProcessor(const char* referenceString, unsigned referenceStringLength) |
| 107 | : m_referenceString(referenceString) |
| 108 | , m_referenceStringLength(referenceStringLength) |
| 109 | , m_referenceStringWasFound(false) |
| 110 | { |
| 111 | } |
| 112 | |
| 113 | template <typename CharacterType> |
| 114 | bool processToken(const CharacterType* characters, unsigned length) |
| 115 | { |
| 116 | if (length == m_referenceStringLength && equal(characters, reinterpret_cast<const LChar*>(m_referenceString), length)) { |
| 117 | m_referenceStringWasFound = true; |
| 118 | return false; |
| 119 | } |
| 120 | return true; |
| 121 | } |
| 122 | |
| 123 | bool referenceStringWasFound() const { return m_referenceStringWasFound; } |
| 124 | |
| 125 | private: |
| 126 | const char* m_referenceString; |
| 127 | unsigned m_referenceStringLength; |
| 128 | bool m_referenceStringWasFound; |
| 129 | }; |
| 130 | |
| 131 | bool SpaceSplitString::spaceSplitStringContainsValue(const String& inputString, const char* value, unsigned valueLength, bool shouldFoldCase) |
| 132 | { |
| 133 | if (inputString.isNull()) |
| 134 | return false; |
| 135 | |
| 136 | TokenIsEqualToCStringTokenProcessor tokenProcessor(value, valueLength); |
| 137 | tokenizeSpaceSplitString(tokenProcessor, shouldFoldCase ? inputString.impl()->convertToASCIILowercase() : inputString); |
| 138 | return tokenProcessor.referenceStringWasFound(); |
| 139 | } |
| 140 | |
| 141 | class TokenCounter { |
| 142 | WTF_MAKE_NONCOPYABLE(TokenCounter); |
| 143 | public: |
| 144 | TokenCounter() : m_tokenCount(0) { } |
| 145 | |
| 146 | template<typename CharacterType> bool processToken(const CharacterType*, unsigned) |
| 147 | { |
| 148 | ++m_tokenCount; |
| 149 | return true; |
| 150 | } |
| 151 | |
| 152 | unsigned tokenCount() const { return m_tokenCount; } |
| 153 | |
| 154 | private: |
| 155 | unsigned m_tokenCount; |
| 156 | }; |
| 157 | |
| 158 | class TokenAtomicStringInitializer { |
| 159 | WTF_MAKE_NONCOPYABLE(TokenAtomicStringInitializer); |
| 160 | public: |
| 161 | TokenAtomicStringInitializer(AtomicString* memory) : m_memoryBucket(memory) { } |
| 162 | |
| 163 | template<typename CharacterType> bool processToken(const CharacterType* characters, unsigned length) |
| 164 | { |
| 165 | new (NotNull, m_memoryBucket) AtomicString(characters, length); |
| 166 | ++m_memoryBucket; |
| 167 | return true; |
| 168 | } |
| 169 | |
| 170 | const AtomicString* nextMemoryBucket() const { return m_memoryBucket; } |
| 171 | |
| 172 | private: |
| 173 | AtomicString* m_memoryBucket; |
| 174 | }; |
| 175 | |
| 176 | inline Ref<SpaceSplitStringData> SpaceSplitStringData::create(const AtomicString& keyString, unsigned tokenCount) |
| 177 | { |
| 178 | ASSERT(tokenCount); |
| 179 | |
| 180 | RELEASE_ASSERT(tokenCount < (std::numeric_limits<unsigned>::max() - sizeof(SpaceSplitStringData)) / sizeof(AtomicString)); |
| 181 | unsigned sizeToAllocate = sizeof(SpaceSplitStringData) + tokenCount * sizeof(AtomicString); |
| 182 | SpaceSplitStringData* spaceSplitStringData = static_cast<SpaceSplitStringData*>(fastMalloc(sizeToAllocate)); |
| 183 | |
| 184 | new (NotNull, spaceSplitStringData) SpaceSplitStringData(keyString, tokenCount); |
| 185 | AtomicString* tokenArrayStart = spaceSplitStringData->tokenArrayStart(); |
| 186 | TokenAtomicStringInitializer tokenInitializer(tokenArrayStart); |
| 187 | tokenizeSpaceSplitString(tokenInitializer, keyString); |
| 188 | ASSERT(static_cast<unsigned>(tokenInitializer.nextMemoryBucket() - tokenArrayStart) == tokenCount); |
| 189 | ASSERT(reinterpret_cast<const char*>(tokenInitializer.nextMemoryBucket()) == reinterpret_cast<const char*>(spaceSplitStringData) + sizeToAllocate); |
| 190 | |
| 191 | return adoptRef(*spaceSplitStringData); |
| 192 | } |
| 193 | |
| 194 | RefPtr<SpaceSplitStringData> SpaceSplitStringData::create(const AtomicString& keyString) |
| 195 | { |
| 196 | ASSERT(isMainThread()); |
| 197 | ASSERT(!keyString.isNull()); |
| 198 | |
| 199 | auto addResult = spaceSplitStringTable().add(keyString, nullptr); |
| 200 | if (!addResult.isNewEntry) |
| 201 | return addResult.iterator->value; |
| 202 | |
| 203 | // Nothing in the cache? Let's create a new SpaceSplitStringData if the input has something useful. |
| 204 | // 1) We find the number of strings in the input to know how much size we need to allocate. |
| 205 | TokenCounter tokenCounter; |
| 206 | tokenizeSpaceSplitString(tokenCounter, keyString); |
| 207 | unsigned tokenCount = tokenCounter.tokenCount(); |
| 208 | |
| 209 | if (!tokenCount) |
| 210 | return nullptr; |
| 211 | |
| 212 | RefPtr<SpaceSplitStringData> spaceSplitStringData = create(keyString, tokenCount); |
| 213 | addResult.iterator->value = spaceSplitStringData.get(); |
| 214 | return spaceSplitStringData; |
| 215 | } |
| 216 | |
| 217 | |
| 218 | void SpaceSplitStringData::destroy(SpaceSplitStringData* spaceSplitString) |
| 219 | { |
| 220 | ASSERT(isMainThread()); |
| 221 | |
| 222 | spaceSplitStringTable().remove(spaceSplitString->m_keyString); |
| 223 | |
| 224 | unsigned i = 0; |
| 225 | unsigned size = spaceSplitString->size(); |
| 226 | const AtomicString* data = spaceSplitString->tokenArrayStart(); |
| 227 | do { |
| 228 | data[i].~AtomicString(); |
| 229 | ++i; |
| 230 | } while (i < size); |
| 231 | |
| 232 | spaceSplitString->~SpaceSplitStringData(); |
| 233 | |
| 234 | fastFree(spaceSplitString); |
| 235 | } |
| 236 | |
| 237 | } // namespace WebCore |
| 238 | |