1/*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * (C) 1999 Antti Koivisto (koivisto@kde.org)
4 * (C) 2001 Dirk Mueller (mueller@kde.org)
5 * (C) 2006 Alexey Proskuryakov (ap@webkit.org)
6 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 */
23
24#include "config.h"
25#include "SharedStringHash.h"
26
27#include <wtf/URL.h>
28#include <wtf/text/AtomicString.h>
29#include <wtf/text/StringHash.h>
30#include <wtf/text/StringView.h>
31
32namespace WebCore {
33
34template <typename CharacterType>
35static inline size_t findSlashDotDotSlash(const CharacterType* characters, size_t length, size_t position)
36{
37 if (length < 4)
38 return notFound;
39 size_t loopLimit = length - 3;
40 for (size_t i = position; i < loopLimit; ++i) {
41 if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '.' && characters[i + 3] == '/')
42 return i;
43 }
44 return notFound;
45}
46
47template <typename CharacterType>
48static inline size_t findSlashSlash(const CharacterType* characters, size_t length, size_t position)
49{
50 if (length < 2)
51 return notFound;
52 size_t loopLimit = length - 1;
53 for (size_t i = position; i < loopLimit; ++i) {
54 if (characters[i] == '/' && characters[i + 1] == '/')
55 return i;
56 }
57 return notFound;
58}
59
60template <typename CharacterType>
61static inline size_t findSlashDotSlash(const CharacterType* characters, size_t length, size_t position)
62{
63 if (length < 3)
64 return notFound;
65 size_t loopLimit = length - 2;
66 for (size_t i = position; i < loopLimit; ++i) {
67 if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '/')
68 return i;
69 }
70 return notFound;
71}
72
73template <typename CharacterType>
74static inline bool containsColonSlashSlash(const CharacterType* characters, unsigned length)
75{
76 if (length < 3)
77 return false;
78 unsigned loopLimit = length - 2;
79 for (unsigned i = 0; i < loopLimit; ++i) {
80 if (characters[i] == ':' && characters[i + 1] == '/' && characters[i + 2] == '/')
81 return true;
82 }
83 return false;
84}
85
86template <typename CharacterType>
87static inline void squeezeOutNullCharacters(Vector<CharacterType, 512>& string)
88{
89 size_t size = string.size();
90 size_t i = 0;
91 for (i = 0; i < size; ++i) {
92 if (!string[i])
93 break;
94 }
95 if (i == size)
96 return;
97 size_t j = i;
98 for (++i; i < size; ++i) {
99 if (CharacterType character = string[i])
100 string[j++] = character;
101 }
102 ASSERT(j < size);
103 string.shrink(j);
104}
105
106template <typename CharacterType>
107static void cleanSlashDotDotSlashes(Vector<CharacterType, 512>& path, size_t firstSlash)
108{
109 size_t slash = firstSlash;
110 do {
111 size_t previousSlash = slash ? reverseFind(path.data(), path.size(), '/', slash - 1) : notFound;
112 // Don't remove the host, i.e. http://foo.org/../foo.html
113 if (previousSlash == notFound || (previousSlash > 3 && path[previousSlash - 2] == ':' && path[previousSlash - 1] == '/')) {
114 path[slash] = 0;
115 path[slash + 1] = 0;
116 path[slash + 2] = 0;
117 } else {
118 for (size_t i = previousSlash; i < slash + 3; ++i)
119 path[i] = 0;
120 }
121 slash += 3;
122 } while ((slash = findSlashDotDotSlash(path.data(), path.size(), slash)) != notFound);
123 squeezeOutNullCharacters(path);
124}
125
126template <typename CharacterType>
127static void mergeDoubleSlashes(Vector<CharacterType, 512>& path, size_t firstSlash)
128{
129 size_t refPos = find(path.data(), path.size(), '#');
130 if (!refPos || refPos == notFound)
131 refPos = path.size();
132
133 size_t slash = firstSlash;
134 while (slash < refPos) {
135 if (!slash || path[slash - 1] != ':')
136 path[slash++] = 0;
137 else
138 slash += 2;
139 if ((slash = findSlashSlash(path.data(), path.size(), slash)) == notFound)
140 break;
141 }
142 squeezeOutNullCharacters(path);
143}
144
145template <typename CharacterType>
146static void cleanSlashDotSlashes(Vector<CharacterType, 512>& path, size_t firstSlash)
147{
148 size_t slash = firstSlash;
149 do {
150 path[slash] = 0;
151 path[slash + 1] = 0;
152 slash += 2;
153 } while ((slash = findSlashDotSlash(path.data(), path.size(), slash)) != notFound);
154 squeezeOutNullCharacters(path);
155}
156
157template <typename CharacterType>
158static inline void cleanPath(Vector<CharacterType, 512>& path)
159{
160 // FIXME: Should not do this in the query or anchor part of the URL.
161 size_t firstSlash = findSlashDotDotSlash(path.data(), path.size(), 0);
162 if (firstSlash != notFound)
163 cleanSlashDotDotSlashes(path, firstSlash);
164
165 // FIXME: Should not do this in the query part.
166 firstSlash = findSlashSlash(path.data(), path.size(), 0);
167 if (firstSlash != notFound)
168 mergeDoubleSlashes(path, firstSlash);
169
170 // FIXME: Should not do this in the query or anchor part.
171 firstSlash = findSlashDotSlash(path.data(), path.size(), 0);
172 if (firstSlash != notFound)
173 cleanSlashDotSlashes(path, firstSlash);
174}
175
176template <typename CharacterType>
177static inline bool matchLetter(CharacterType c, char lowercaseLetter)
178{
179 return (c | 0x20) == lowercaseLetter;
180}
181
182template <typename CharacterType>
183static inline bool needsTrailingSlash(const CharacterType* characters, unsigned length)
184{
185 if (length < 6)
186 return false;
187 if (!matchLetter(characters[0], 'h') || !matchLetter(characters[1], 't') || !matchLetter(characters[2], 't') || !matchLetter(characters[3], 'p'))
188 return false;
189 if (!(characters[4] == ':' || (matchLetter(characters[4], 's') && characters[5] == ':')))
190 return false;
191
192 unsigned pos = characters[4] == ':' ? 5 : 6;
193
194 // Skip initial two slashes if present.
195 if (pos + 1 < length && characters[pos] == '/' && characters[pos + 1] == '/')
196 pos += 2;
197
198 // Find next slash.
199 while (pos < length && characters[pos] != '/')
200 ++pos;
201
202 return pos == length;
203}
204
205template <typename CharacterType>
206static ALWAYS_INLINE SharedStringHash computeSharedStringHashInline(const CharacterType* url, unsigned length)
207{
208 return AlreadyHashed::avoidDeletedValue(StringHasher::computeHash(url, length));
209}
210
211SharedStringHash computeSharedStringHash(const String& url)
212{
213 unsigned length = url.length();
214 if (!length || url.is8Bit())
215 return computeSharedStringHashInline(url.characters8(), length);
216 return computeSharedStringHashInline(url.characters16(), length);
217}
218
219SharedStringHash computeSharedStringHash(const UChar* url, unsigned length)
220{
221 return computeSharedStringHashInline(url, length);
222}
223
224template <typename CharacterType>
225static ALWAYS_INLINE void computeSharedStringHashInline(const URL& base, const CharacterType* characters, unsigned length, Vector<CharacterType, 512>& buffer)
226{
227 if (!length)
228 return;
229
230 // This is a poor man's completeURL. Faster with less memory allocation.
231 // FIXME: It's missing a lot of what completeURL does and a lot of what URL does.
232 // For example, it does not handle international domain names properly.
233
234 // FIXME: It is wrong that we do not do further processing on strings that have "://" in them:
235 // 1) The "://" could be in the query or anchor.
236 // 2) The URL's path could have a "/./" or a "/../" or a "//" sequence in it.
237
238 // FIXME: needsTrailingSlash does not properly return true for a URL that has no path, but does
239 // have a query or anchor.
240
241 bool hasColonSlashSlash = containsColonSlashSlash(characters, length);
242
243 if (hasColonSlashSlash && !needsTrailingSlash(characters, length)) {
244 buffer.append(characters, length);
245 return;
246 }
247
248
249 if (hasColonSlashSlash) {
250 // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
251 // end of the path, *before* the query or anchor.
252 buffer.append(characters, length);
253 buffer.append('/');
254 return;
255 }
256
257 if (!length)
258 append(buffer, base.string());
259 else {
260 switch (characters[0]) {
261 case '/':
262 append(buffer, StringView(base.string()).substring(0, base.pathStart()));
263 break;
264 case '#':
265 append(buffer, StringView(base.string()).substring(0, base.pathEnd()));
266 break;
267 default:
268 append(buffer, StringView(base.string()).substring(0, base.pathAfterLastSlash()));
269 break;
270 }
271 }
272 buffer.append(characters, length);
273 cleanPath(buffer);
274 if (needsTrailingSlash(buffer.data(), buffer.size())) {
275 // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
276 // end of the path, *before* the query or anchor.
277 buffer.append('/');
278 }
279
280 return;
281}
282
283SharedStringHash computeVisitedLinkHash(const URL& base, const AtomicString& attributeURL)
284{
285 if (attributeURL.isEmpty())
286 return 0;
287
288 if (!base.string().isEmpty() && base.string().is8Bit() && attributeURL.is8Bit()) {
289 Vector<LChar, 512> url;
290 computeSharedStringHashInline(base, attributeURL.characters8(), attributeURL.length(), url);
291 if (url.isEmpty())
292 return 0;
293
294 return computeSharedStringHashInline(url.data(), url.size());
295 }
296
297 Vector<UChar, 512> url;
298 auto upconvertedCharacters = StringView(attributeURL.string()).upconvertedCharacters();
299 const UChar* characters = upconvertedCharacters;
300 computeSharedStringHashInline(base, characters, attributeURL.length(), url);
301 if (url.isEmpty())
302 return 0;
303
304 return computeSharedStringHashInline(url.data(), url.size());
305}
306
307} // namespace WebCore
308