| 1 | /* |
| 2 | * Copyright (C) 1999 Lars Knoll (knoll@kde.org) |
| 3 | * (C) 1999 Antti Koivisto (koivisto@kde.org) |
| 4 | * (C) 2001 Dirk Mueller (mueller@kde.org) |
| 5 | * (C) 2006 Alexey Proskuryakov (ap@webkit.org) |
| 6 | * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. |
| 7 | * |
| 8 | * This library is free software; you can redistribute it and/or |
| 9 | * modify it under the terms of the GNU Library General Public |
| 10 | * License as published by the Free Software Foundation; either |
| 11 | * version 2 of the License, or (at your option) any later version. |
| 12 | * |
| 13 | * This library is distributed in the hope that it will be useful, |
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 | * Library General Public License for more details. |
| 17 | * |
| 18 | * You should have received a copy of the GNU Library General Public License |
| 19 | * along with this library; see the file COPYING.LIB. If not, write to |
| 20 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 21 | * Boston, MA 02110-1301, USA. |
| 22 | */ |
| 23 | |
| 24 | #include "config.h" |
| 25 | #include "SharedStringHash.h" |
| 26 | |
| 27 | #include <wtf/URL.h> |
| 28 | #include <wtf/text/AtomicString.h> |
| 29 | #include <wtf/text/StringHash.h> |
| 30 | #include <wtf/text/StringView.h> |
| 31 | |
| 32 | namespace WebCore { |
| 33 | |
| 34 | template <typename CharacterType> |
| 35 | static inline size_t findSlashDotDotSlash(const CharacterType* characters, size_t length, size_t position) |
| 36 | { |
| 37 | if (length < 4) |
| 38 | return notFound; |
| 39 | size_t loopLimit = length - 3; |
| 40 | for (size_t i = position; i < loopLimit; ++i) { |
| 41 | if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '.' && characters[i + 3] == '/') |
| 42 | return i; |
| 43 | } |
| 44 | return notFound; |
| 45 | } |
| 46 | |
| 47 | template <typename CharacterType> |
| 48 | static inline size_t findSlashSlash(const CharacterType* characters, size_t length, size_t position) |
| 49 | { |
| 50 | if (length < 2) |
| 51 | return notFound; |
| 52 | size_t loopLimit = length - 1; |
| 53 | for (size_t i = position; i < loopLimit; ++i) { |
| 54 | if (characters[i] == '/' && characters[i + 1] == '/') |
| 55 | return i; |
| 56 | } |
| 57 | return notFound; |
| 58 | } |
| 59 | |
| 60 | template <typename CharacterType> |
| 61 | static inline size_t findSlashDotSlash(const CharacterType* characters, size_t length, size_t position) |
| 62 | { |
| 63 | if (length < 3) |
| 64 | return notFound; |
| 65 | size_t loopLimit = length - 2; |
| 66 | for (size_t i = position; i < loopLimit; ++i) { |
| 67 | if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '/') |
| 68 | return i; |
| 69 | } |
| 70 | return notFound; |
| 71 | } |
| 72 | |
| 73 | template <typename CharacterType> |
| 74 | static inline bool containsColonSlashSlash(const CharacterType* characters, unsigned length) |
| 75 | { |
| 76 | if (length < 3) |
| 77 | return false; |
| 78 | unsigned loopLimit = length - 2; |
| 79 | for (unsigned i = 0; i < loopLimit; ++i) { |
| 80 | if (characters[i] == ':' && characters[i + 1] == '/' && characters[i + 2] == '/') |
| 81 | return true; |
| 82 | } |
| 83 | return false; |
| 84 | } |
| 85 | |
| 86 | template <typename CharacterType> |
| 87 | static inline void squeezeOutNullCharacters(Vector<CharacterType, 512>& string) |
| 88 | { |
| 89 | size_t size = string.size(); |
| 90 | size_t i = 0; |
| 91 | for (i = 0; i < size; ++i) { |
| 92 | if (!string[i]) |
| 93 | break; |
| 94 | } |
| 95 | if (i == size) |
| 96 | return; |
| 97 | size_t j = i; |
| 98 | for (++i; i < size; ++i) { |
| 99 | if (CharacterType character = string[i]) |
| 100 | string[j++] = character; |
| 101 | } |
| 102 | ASSERT(j < size); |
| 103 | string.shrink(j); |
| 104 | } |
| 105 | |
| 106 | template <typename CharacterType> |
| 107 | static void cleanSlashDotDotSlashes(Vector<CharacterType, 512>& path, size_t firstSlash) |
| 108 | { |
| 109 | size_t slash = firstSlash; |
| 110 | do { |
| 111 | size_t previousSlash = slash ? reverseFind(path.data(), path.size(), '/', slash - 1) : notFound; |
| 112 | // Don't remove the host, i.e. http://foo.org/../foo.html |
| 113 | if (previousSlash == notFound || (previousSlash > 3 && path[previousSlash - 2] == ':' && path[previousSlash - 1] == '/')) { |
| 114 | path[slash] = 0; |
| 115 | path[slash + 1] = 0; |
| 116 | path[slash + 2] = 0; |
| 117 | } else { |
| 118 | for (size_t i = previousSlash; i < slash + 3; ++i) |
| 119 | path[i] = 0; |
| 120 | } |
| 121 | slash += 3; |
| 122 | } while ((slash = findSlashDotDotSlash(path.data(), path.size(), slash)) != notFound); |
| 123 | squeezeOutNullCharacters(path); |
| 124 | } |
| 125 | |
| 126 | template <typename CharacterType> |
| 127 | static void mergeDoubleSlashes(Vector<CharacterType, 512>& path, size_t firstSlash) |
| 128 | { |
| 129 | size_t refPos = find(path.data(), path.size(), '#'); |
| 130 | if (!refPos || refPos == notFound) |
| 131 | refPos = path.size(); |
| 132 | |
| 133 | size_t slash = firstSlash; |
| 134 | while (slash < refPos) { |
| 135 | if (!slash || path[slash - 1] != ':') |
| 136 | path[slash++] = 0; |
| 137 | else |
| 138 | slash += 2; |
| 139 | if ((slash = findSlashSlash(path.data(), path.size(), slash)) == notFound) |
| 140 | break; |
| 141 | } |
| 142 | squeezeOutNullCharacters(path); |
| 143 | } |
| 144 | |
| 145 | template <typename CharacterType> |
| 146 | static void cleanSlashDotSlashes(Vector<CharacterType, 512>& path, size_t firstSlash) |
| 147 | { |
| 148 | size_t slash = firstSlash; |
| 149 | do { |
| 150 | path[slash] = 0; |
| 151 | path[slash + 1] = 0; |
| 152 | slash += 2; |
| 153 | } while ((slash = findSlashDotSlash(path.data(), path.size(), slash)) != notFound); |
| 154 | squeezeOutNullCharacters(path); |
| 155 | } |
| 156 | |
| 157 | template <typename CharacterType> |
| 158 | static inline void cleanPath(Vector<CharacterType, 512>& path) |
| 159 | { |
| 160 | // FIXME: Should not do this in the query or anchor part of the URL. |
| 161 | size_t firstSlash = findSlashDotDotSlash(path.data(), path.size(), 0); |
| 162 | if (firstSlash != notFound) |
| 163 | cleanSlashDotDotSlashes(path, firstSlash); |
| 164 | |
| 165 | // FIXME: Should not do this in the query part. |
| 166 | firstSlash = findSlashSlash(path.data(), path.size(), 0); |
| 167 | if (firstSlash != notFound) |
| 168 | mergeDoubleSlashes(path, firstSlash); |
| 169 | |
| 170 | // FIXME: Should not do this in the query or anchor part. |
| 171 | firstSlash = findSlashDotSlash(path.data(), path.size(), 0); |
| 172 | if (firstSlash != notFound) |
| 173 | cleanSlashDotSlashes(path, firstSlash); |
| 174 | } |
| 175 | |
| 176 | template <typename CharacterType> |
| 177 | static inline bool matchLetter(CharacterType c, char lowercaseLetter) |
| 178 | { |
| 179 | return (c | 0x20) == lowercaseLetter; |
| 180 | } |
| 181 | |
| 182 | template <typename CharacterType> |
| 183 | static inline bool needsTrailingSlash(const CharacterType* characters, unsigned length) |
| 184 | { |
| 185 | if (length < 6) |
| 186 | return false; |
| 187 | if (!matchLetter(characters[0], 'h') || !matchLetter(characters[1], 't') || !matchLetter(characters[2], 't') || !matchLetter(characters[3], 'p')) |
| 188 | return false; |
| 189 | if (!(characters[4] == ':' || (matchLetter(characters[4], 's') && characters[5] == ':'))) |
| 190 | return false; |
| 191 | |
| 192 | unsigned pos = characters[4] == ':' ? 5 : 6; |
| 193 | |
| 194 | // Skip initial two slashes if present. |
| 195 | if (pos + 1 < length && characters[pos] == '/' && characters[pos + 1] == '/') |
| 196 | pos += 2; |
| 197 | |
| 198 | // Find next slash. |
| 199 | while (pos < length && characters[pos] != '/') |
| 200 | ++pos; |
| 201 | |
| 202 | return pos == length; |
| 203 | } |
| 204 | |
| 205 | template <typename CharacterType> |
| 206 | static ALWAYS_INLINE SharedStringHash computeSharedStringHashInline(const CharacterType* url, unsigned length) |
| 207 | { |
| 208 | return AlreadyHashed::avoidDeletedValue(StringHasher::computeHash(url, length)); |
| 209 | } |
| 210 | |
| 211 | SharedStringHash computeSharedStringHash(const String& url) |
| 212 | { |
| 213 | unsigned length = url.length(); |
| 214 | if (!length || url.is8Bit()) |
| 215 | return computeSharedStringHashInline(url.characters8(), length); |
| 216 | return computeSharedStringHashInline(url.characters16(), length); |
| 217 | } |
| 218 | |
| 219 | SharedStringHash computeSharedStringHash(const UChar* url, unsigned length) |
| 220 | { |
| 221 | return computeSharedStringHashInline(url, length); |
| 222 | } |
| 223 | |
| 224 | template <typename CharacterType> |
| 225 | static ALWAYS_INLINE void computeSharedStringHashInline(const URL& base, const CharacterType* characters, unsigned length, Vector<CharacterType, 512>& buffer) |
| 226 | { |
| 227 | if (!length) |
| 228 | return; |
| 229 | |
| 230 | // This is a poor man's completeURL. Faster with less memory allocation. |
| 231 | // FIXME: It's missing a lot of what completeURL does and a lot of what URL does. |
| 232 | // For example, it does not handle international domain names properly. |
| 233 | |
| 234 | // FIXME: It is wrong that we do not do further processing on strings that have "://" in them: |
| 235 | // 1) The "://" could be in the query or anchor. |
| 236 | // 2) The URL's path could have a "/./" or a "/../" or a "//" sequence in it. |
| 237 | |
| 238 | // FIXME: needsTrailingSlash does not properly return true for a URL that has no path, but does |
| 239 | // have a query or anchor. |
| 240 | |
| 241 | bool hasColonSlashSlash = containsColonSlashSlash(characters, length); |
| 242 | |
| 243 | if (hasColonSlashSlash && !needsTrailingSlash(characters, length)) { |
| 244 | buffer.append(characters, length); |
| 245 | return; |
| 246 | } |
| 247 | |
| 248 | |
| 249 | if (hasColonSlashSlash) { |
| 250 | // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the |
| 251 | // end of the path, *before* the query or anchor. |
| 252 | buffer.append(characters, length); |
| 253 | buffer.append('/'); |
| 254 | return; |
| 255 | } |
| 256 | |
| 257 | if (!length) |
| 258 | append(buffer, base.string()); |
| 259 | else { |
| 260 | switch (characters[0]) { |
| 261 | case '/': |
| 262 | append(buffer, StringView(base.string()).substring(0, base.pathStart())); |
| 263 | break; |
| 264 | case '#': |
| 265 | append(buffer, StringView(base.string()).substring(0, base.pathEnd())); |
| 266 | break; |
| 267 | default: |
| 268 | append(buffer, StringView(base.string()).substring(0, base.pathAfterLastSlash())); |
| 269 | break; |
| 270 | } |
| 271 | } |
| 272 | buffer.append(characters, length); |
| 273 | cleanPath(buffer); |
| 274 | if (needsTrailingSlash(buffer.data(), buffer.size())) { |
| 275 | // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the |
| 276 | // end of the path, *before* the query or anchor. |
| 277 | buffer.append('/'); |
| 278 | } |
| 279 | |
| 280 | return; |
| 281 | } |
| 282 | |
| 283 | SharedStringHash computeVisitedLinkHash(const URL& base, const AtomicString& attributeURL) |
| 284 | { |
| 285 | if (attributeURL.isEmpty()) |
| 286 | return 0; |
| 287 | |
| 288 | if (!base.string().isEmpty() && base.string().is8Bit() && attributeURL.is8Bit()) { |
| 289 | Vector<LChar, 512> url; |
| 290 | computeSharedStringHashInline(base, attributeURL.characters8(), attributeURL.length(), url); |
| 291 | if (url.isEmpty()) |
| 292 | return 0; |
| 293 | |
| 294 | return computeSharedStringHashInline(url.data(), url.size()); |
| 295 | } |
| 296 | |
| 297 | Vector<UChar, 512> url; |
| 298 | auto upconvertedCharacters = StringView(attributeURL.string()).upconvertedCharacters(); |
| 299 | const UChar* characters = upconvertedCharacters; |
| 300 | computeSharedStringHashInline(base, characters, attributeURL.length(), url); |
| 301 | if (url.isEmpty()) |
| 302 | return 0; |
| 303 | |
| 304 | return computeSharedStringHashInline(url.data(), url.size()); |
| 305 | } |
| 306 | |
| 307 | } // namespace WebCore |
| 308 | |