1 | /* |
2 | * Copyright (C) 1999 Lars Knoll (knoll@kde.org) |
3 | * (C) 1999 Antti Koivisto (koivisto@kde.org) |
4 | * (C) 2001 Dirk Mueller (mueller@kde.org) |
5 | * (C) 2006 Alexey Proskuryakov (ap@webkit.org) |
6 | * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. |
7 | * |
8 | * This library is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU Library General Public |
10 | * License as published by the Free Software Foundation; either |
11 | * version 2 of the License, or (at your option) any later version. |
12 | * |
13 | * This library is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | * Library General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU Library General Public License |
19 | * along with this library; see the file COPYING.LIB. If not, write to |
20 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
21 | * Boston, MA 02110-1301, USA. |
22 | */ |
23 | |
24 | #include "config.h" |
25 | #include "SharedStringHash.h" |
26 | |
27 | #include <wtf/URL.h> |
28 | #include <wtf/text/AtomicString.h> |
29 | #include <wtf/text/StringHash.h> |
30 | #include <wtf/text/StringView.h> |
31 | |
32 | namespace WebCore { |
33 | |
34 | template <typename CharacterType> |
35 | static inline size_t findSlashDotDotSlash(const CharacterType* characters, size_t length, size_t position) |
36 | { |
37 | if (length < 4) |
38 | return notFound; |
39 | size_t loopLimit = length - 3; |
40 | for (size_t i = position; i < loopLimit; ++i) { |
41 | if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '.' && characters[i + 3] == '/') |
42 | return i; |
43 | } |
44 | return notFound; |
45 | } |
46 | |
47 | template <typename CharacterType> |
48 | static inline size_t findSlashSlash(const CharacterType* characters, size_t length, size_t position) |
49 | { |
50 | if (length < 2) |
51 | return notFound; |
52 | size_t loopLimit = length - 1; |
53 | for (size_t i = position; i < loopLimit; ++i) { |
54 | if (characters[i] == '/' && characters[i + 1] == '/') |
55 | return i; |
56 | } |
57 | return notFound; |
58 | } |
59 | |
60 | template <typename CharacterType> |
61 | static inline size_t findSlashDotSlash(const CharacterType* characters, size_t length, size_t position) |
62 | { |
63 | if (length < 3) |
64 | return notFound; |
65 | size_t loopLimit = length - 2; |
66 | for (size_t i = position; i < loopLimit; ++i) { |
67 | if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '/') |
68 | return i; |
69 | } |
70 | return notFound; |
71 | } |
72 | |
73 | template <typename CharacterType> |
74 | static inline bool containsColonSlashSlash(const CharacterType* characters, unsigned length) |
75 | { |
76 | if (length < 3) |
77 | return false; |
78 | unsigned loopLimit = length - 2; |
79 | for (unsigned i = 0; i < loopLimit; ++i) { |
80 | if (characters[i] == ':' && characters[i + 1] == '/' && characters[i + 2] == '/') |
81 | return true; |
82 | } |
83 | return false; |
84 | } |
85 | |
86 | template <typename CharacterType> |
87 | static inline void squeezeOutNullCharacters(Vector<CharacterType, 512>& string) |
88 | { |
89 | size_t size = string.size(); |
90 | size_t i = 0; |
91 | for (i = 0; i < size; ++i) { |
92 | if (!string[i]) |
93 | break; |
94 | } |
95 | if (i == size) |
96 | return; |
97 | size_t j = i; |
98 | for (++i; i < size; ++i) { |
99 | if (CharacterType character = string[i]) |
100 | string[j++] = character; |
101 | } |
102 | ASSERT(j < size); |
103 | string.shrink(j); |
104 | } |
105 | |
106 | template <typename CharacterType> |
107 | static void cleanSlashDotDotSlashes(Vector<CharacterType, 512>& path, size_t firstSlash) |
108 | { |
109 | size_t slash = firstSlash; |
110 | do { |
111 | size_t previousSlash = slash ? reverseFind(path.data(), path.size(), '/', slash - 1) : notFound; |
112 | // Don't remove the host, i.e. http://foo.org/../foo.html |
113 | if (previousSlash == notFound || (previousSlash > 3 && path[previousSlash - 2] == ':' && path[previousSlash - 1] == '/')) { |
114 | path[slash] = 0; |
115 | path[slash + 1] = 0; |
116 | path[slash + 2] = 0; |
117 | } else { |
118 | for (size_t i = previousSlash; i < slash + 3; ++i) |
119 | path[i] = 0; |
120 | } |
121 | slash += 3; |
122 | } while ((slash = findSlashDotDotSlash(path.data(), path.size(), slash)) != notFound); |
123 | squeezeOutNullCharacters(path); |
124 | } |
125 | |
126 | template <typename CharacterType> |
127 | static void mergeDoubleSlashes(Vector<CharacterType, 512>& path, size_t firstSlash) |
128 | { |
129 | size_t refPos = find(path.data(), path.size(), '#'); |
130 | if (!refPos || refPos == notFound) |
131 | refPos = path.size(); |
132 | |
133 | size_t slash = firstSlash; |
134 | while (slash < refPos) { |
135 | if (!slash || path[slash - 1] != ':') |
136 | path[slash++] = 0; |
137 | else |
138 | slash += 2; |
139 | if ((slash = findSlashSlash(path.data(), path.size(), slash)) == notFound) |
140 | break; |
141 | } |
142 | squeezeOutNullCharacters(path); |
143 | } |
144 | |
145 | template <typename CharacterType> |
146 | static void cleanSlashDotSlashes(Vector<CharacterType, 512>& path, size_t firstSlash) |
147 | { |
148 | size_t slash = firstSlash; |
149 | do { |
150 | path[slash] = 0; |
151 | path[slash + 1] = 0; |
152 | slash += 2; |
153 | } while ((slash = findSlashDotSlash(path.data(), path.size(), slash)) != notFound); |
154 | squeezeOutNullCharacters(path); |
155 | } |
156 | |
157 | template <typename CharacterType> |
158 | static inline void cleanPath(Vector<CharacterType, 512>& path) |
159 | { |
160 | // FIXME: Should not do this in the query or anchor part of the URL. |
161 | size_t firstSlash = findSlashDotDotSlash(path.data(), path.size(), 0); |
162 | if (firstSlash != notFound) |
163 | cleanSlashDotDotSlashes(path, firstSlash); |
164 | |
165 | // FIXME: Should not do this in the query part. |
166 | firstSlash = findSlashSlash(path.data(), path.size(), 0); |
167 | if (firstSlash != notFound) |
168 | mergeDoubleSlashes(path, firstSlash); |
169 | |
170 | // FIXME: Should not do this in the query or anchor part. |
171 | firstSlash = findSlashDotSlash(path.data(), path.size(), 0); |
172 | if (firstSlash != notFound) |
173 | cleanSlashDotSlashes(path, firstSlash); |
174 | } |
175 | |
176 | template <typename CharacterType> |
177 | static inline bool matchLetter(CharacterType c, char lowercaseLetter) |
178 | { |
179 | return (c | 0x20) == lowercaseLetter; |
180 | } |
181 | |
182 | template <typename CharacterType> |
183 | static inline bool needsTrailingSlash(const CharacterType* characters, unsigned length) |
184 | { |
185 | if (length < 6) |
186 | return false; |
187 | if (!matchLetter(characters[0], 'h') || !matchLetter(characters[1], 't') || !matchLetter(characters[2], 't') || !matchLetter(characters[3], 'p')) |
188 | return false; |
189 | if (!(characters[4] == ':' || (matchLetter(characters[4], 's') && characters[5] == ':'))) |
190 | return false; |
191 | |
192 | unsigned pos = characters[4] == ':' ? 5 : 6; |
193 | |
194 | // Skip initial two slashes if present. |
195 | if (pos + 1 < length && characters[pos] == '/' && characters[pos + 1] == '/') |
196 | pos += 2; |
197 | |
198 | // Find next slash. |
199 | while (pos < length && characters[pos] != '/') |
200 | ++pos; |
201 | |
202 | return pos == length; |
203 | } |
204 | |
205 | template <typename CharacterType> |
206 | static ALWAYS_INLINE SharedStringHash computeSharedStringHashInline(const CharacterType* url, unsigned length) |
207 | { |
208 | return AlreadyHashed::avoidDeletedValue(StringHasher::computeHash(url, length)); |
209 | } |
210 | |
211 | SharedStringHash computeSharedStringHash(const String& url) |
212 | { |
213 | unsigned length = url.length(); |
214 | if (!length || url.is8Bit()) |
215 | return computeSharedStringHashInline(url.characters8(), length); |
216 | return computeSharedStringHashInline(url.characters16(), length); |
217 | } |
218 | |
219 | SharedStringHash computeSharedStringHash(const UChar* url, unsigned length) |
220 | { |
221 | return computeSharedStringHashInline(url, length); |
222 | } |
223 | |
224 | template <typename CharacterType> |
225 | static ALWAYS_INLINE void computeSharedStringHashInline(const URL& base, const CharacterType* characters, unsigned length, Vector<CharacterType, 512>& buffer) |
226 | { |
227 | if (!length) |
228 | return; |
229 | |
230 | // This is a poor man's completeURL. Faster with less memory allocation. |
231 | // FIXME: It's missing a lot of what completeURL does and a lot of what URL does. |
232 | // For example, it does not handle international domain names properly. |
233 | |
234 | // FIXME: It is wrong that we do not do further processing on strings that have "://" in them: |
235 | // 1) The "://" could be in the query or anchor. |
236 | // 2) The URL's path could have a "/./" or a "/../" or a "//" sequence in it. |
237 | |
238 | // FIXME: needsTrailingSlash does not properly return true for a URL that has no path, but does |
239 | // have a query or anchor. |
240 | |
241 | bool hasColonSlashSlash = containsColonSlashSlash(characters, length); |
242 | |
243 | if (hasColonSlashSlash && !needsTrailingSlash(characters, length)) { |
244 | buffer.append(characters, length); |
245 | return; |
246 | } |
247 | |
248 | |
249 | if (hasColonSlashSlash) { |
250 | // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the |
251 | // end of the path, *before* the query or anchor. |
252 | buffer.append(characters, length); |
253 | buffer.append('/'); |
254 | return; |
255 | } |
256 | |
257 | if (!length) |
258 | append(buffer, base.string()); |
259 | else { |
260 | switch (characters[0]) { |
261 | case '/': |
262 | append(buffer, StringView(base.string()).substring(0, base.pathStart())); |
263 | break; |
264 | case '#': |
265 | append(buffer, StringView(base.string()).substring(0, base.pathEnd())); |
266 | break; |
267 | default: |
268 | append(buffer, StringView(base.string()).substring(0, base.pathAfterLastSlash())); |
269 | break; |
270 | } |
271 | } |
272 | buffer.append(characters, length); |
273 | cleanPath(buffer); |
274 | if (needsTrailingSlash(buffer.data(), buffer.size())) { |
275 | // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the |
276 | // end of the path, *before* the query or anchor. |
277 | buffer.append('/'); |
278 | } |
279 | |
280 | return; |
281 | } |
282 | |
283 | SharedStringHash computeVisitedLinkHash(const URL& base, const AtomicString& attributeURL) |
284 | { |
285 | if (attributeURL.isEmpty()) |
286 | return 0; |
287 | |
288 | if (!base.string().isEmpty() && base.string().is8Bit() && attributeURL.is8Bit()) { |
289 | Vector<LChar, 512> url; |
290 | computeSharedStringHashInline(base, attributeURL.characters8(), attributeURL.length(), url); |
291 | if (url.isEmpty()) |
292 | return 0; |
293 | |
294 | return computeSharedStringHashInline(url.data(), url.size()); |
295 | } |
296 | |
297 | Vector<UChar, 512> url; |
298 | auto upconvertedCharacters = StringView(attributeURL.string()).upconvertedCharacters(); |
299 | const UChar* characters = upconvertedCharacters; |
300 | computeSharedStringHashInline(base, characters, attributeURL.length(), url); |
301 | if (url.isEmpty()) |
302 | return 0; |
303 | |
304 | return computeSharedStringHashInline(url.data(), url.size()); |
305 | } |
306 | |
307 | } // namespace WebCore |
308 | |