SharedStringHash.cpp source code [webkit/Source/WebCore/platform/SharedStringHash.cpp]

1	/*
2	* Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3	* (C) 1999 Antti Koivisto (koivisto@kde.org)
4	* (C) 2001 Dirk Mueller (mueller@kde.org)
5	* (C) 2006 Alexey Proskuryakov (ap@webkit.org)
6	* Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
7	*
8	* This library is free software; you can redistribute it and/or
9	* modify it under the terms of the GNU Library General Public
10	* License as published by the Free Software Foundation; either
11	* version 2 of the License, or (at your option) any later version.
12	*
13	* This library is distributed in the hope that it will be useful,
14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16	* Library General Public License for more details.
17	*
18	* You should have received a copy of the GNU Library General Public License
19	* along with this library; see the file COPYING.LIB. If not, write to
20	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21	* Boston, MA 02110-1301, USA.
22	*/
23
24	#include "config.h"
25	#include "SharedStringHash.h"
26
27	#include <wtf/URL.h>
28	#include <wtf/text/AtomicString.h>
29	#include <wtf/text/StringHash.h>
30	#include <wtf/text/StringView.h>
31
32	namespace WebCore {
33
34	template <typename CharacterType>
35	static inline size_t findSlashDotDotSlash(const CharacterType* characters, size_t length, size_t position)
36	{
37	if (length < `4`)
38	return notFound;
39	size_t loopLimit = length - `3`;
40	for (size_t i = position; i < loopLimit; ++i) {
41	if (characters[i] == `'/'` && characters[i + `1`] == `'.'` && characters[i + `2`] == `'.'` && characters[i + `3`] == `'/'`)
42	return i;
43	}
44	return notFound;
45	}
46
47	template <typename CharacterType>
48	static inline size_t findSlashSlash(const CharacterType* characters, size_t length, size_t position)
49	{
50	if (length < `2`)
51	return notFound;
52	size_t loopLimit = length - `1`;
53	for (size_t i = position; i < loopLimit; ++i) {
54	if (characters[i] == `'/'` && characters[i + `1`] == `'/'`)
55	return i;
56	}
57	return notFound;
58	}
59
60	template <typename CharacterType>
61	static inline size_t findSlashDotSlash(const CharacterType* characters, size_t length, size_t position)
62	{
63	if (length < `3`)
64	return notFound;
65	size_t loopLimit = length - `2`;
66	for (size_t i = position; i < loopLimit; ++i) {
67	if (characters[i] == `'/'` && characters[i + `1`] == `'.'` && characters[i + `2`] == `'/'`)
68	return i;
69	}
70	return notFound;
71	}
72
73	template <typename CharacterType>
74	static inline bool containsColonSlashSlash(const CharacterType* characters, unsigned length)
75	{
76	if (length < `3`)
77	return false;
78	unsigned loopLimit = length - `2`;
79	for (unsigned i = `0`; i < loopLimit; ++i) {
80	if (characters[i] == `':'` && characters[i + `1`] == `'/'` && characters[i + `2`] == `'/'`)
81	return true;
82	}
83	return false;
84	}
85
86	template <typename CharacterType>
87	static inline void squeezeOutNullCharacters(Vector<CharacterType, `512`>& string)
88	{
89	size_t size = string.size();
90	size_t i = `0`;
91	for (i = `0`; i < size; ++i) {
92	if (!string[i])
93	break;
94	}
95	if (i == size)
96	return;
97	size_t j = i;
98	for (++i; i < size; ++i) {
99	if (CharacterType character = string[i])
100	string[j++] = character;
101	}
102	ASSERT(j < size);
103	string.shrink(j);
104	}
105
106	template <typename CharacterType>
107	static void cleanSlashDotDotSlashes(Vector<CharacterType, `512`>& path, size_t firstSlash)
108	{
109	size_t slash = firstSlash;
110	do {
111	size_t previousSlash = slash ? reverseFind(path.data(), path.size(), `'/'`, slash - `1`) : notFound;
112	// Don't remove the host, i.e. http://foo.org/../foo.html
113	if (previousSlash == notFound \|\| (previousSlash > `3` && path[previousSlash - `2`] == `':'` && path[previousSlash - `1`] == `'/'`)) {
114	path[slash] = `0`;
115	path[slash + `1`] = `0`;
116	path[slash + `2`] = `0`;
117	} else {
118	for (size_t i = previousSlash; i < slash + `3`; ++i)
119	path[i] = `0`;
120	}
121	slash += `3`;
122	} while ((slash = findSlashDotDotSlash(path.data(), path.size(), slash)) != notFound);
123	squeezeOutNullCharacters(path);
124	}
125
126	template <typename CharacterType>
127	static void mergeDoubleSlashes(Vector<CharacterType, `512`>& path, size_t firstSlash)
128	{
129	size_t refPos = find(path.data(), path.size(), `'#'`);
130	if (!refPos \|\| refPos == notFound)
131	refPos = path.size();
132
133	size_t slash = firstSlash;
134	while (slash < refPos) {
135	if (!slash \|\| path[slash - `1`] != `':'`)
136	path[slash++] = `0`;
137	else
138	slash += `2`;
139	if ((slash = findSlashSlash(path.data(), path.size(), slash)) == notFound)
140	break;
141	}
142	squeezeOutNullCharacters(path);
143	}
144
145	template <typename CharacterType>
146	static void cleanSlashDotSlashes(Vector<CharacterType, `512`>& path, size_t firstSlash)
147	{
148	size_t slash = firstSlash;
149	do {
150	path[slash] = `0`;
151	path[slash + `1`] = `0`;
152	slash += `2`;
153	} while ((slash = findSlashDotSlash(path.data(), path.size(), slash)) != notFound);
154	squeezeOutNullCharacters(path);
155	}
156
157	template <typename CharacterType>
158	static inline void cleanPath(Vector<CharacterType, `512`>& path)
159	{
160	// FIXME: Should not do this in the query or anchor part of the URL.
161	size_t firstSlash = findSlashDotDotSlash(path.data(), path.size(), `0`);
162	if (firstSlash != notFound)
163	cleanSlashDotDotSlashes(path, firstSlash);
164
165	// FIXME: Should not do this in the query part.
166	firstSlash = findSlashSlash(path.data(), path.size(), `0`);
167	if (firstSlash != notFound)
168	mergeDoubleSlashes(path, firstSlash);
169
170	// FIXME: Should not do this in the query or anchor part.
171	firstSlash = findSlashDotSlash(path.data(), path.size(), `0`);
172	if (firstSlash != notFound)
173	cleanSlashDotSlashes(path, firstSlash);
174	}
175
176	template <typename CharacterType>
177	static inline bool matchLetter(CharacterType c, char lowercaseLetter)
178	{
179	return (c \| `0x20`) == lowercaseLetter;
180	}
181
182	template <typename CharacterType>
183	static inline bool needsTrailingSlash(const CharacterType* characters, unsigned length)
184	{
185	if (length < `6`)
186	return false;
187	if (!matchLetter(characters[`0`], `'h'`) \|\| !matchLetter(characters[`1`], `'t'`) \|\| !matchLetter(characters[`2`], `'t'`) \|\| !matchLetter(characters[`3`], `'p'`))
188	return false;
189	if (!(characters[`4`] == `':'` \|\| (matchLetter(characters[`4`], `'s'`) && characters[`5`] == `':'`)))
190	return false;
191
192	unsigned pos = characters[`4`] == `':'` ? `5` : `6`;
193
194	// Skip initial two slashes if present.
195	if (pos + `1` < length && characters[pos] == `'/'` && characters[pos + `1`] == `'/'`)
196	pos += `2`;
197
198	// Find next slash.
199	while (pos < length && characters[pos] != `'/'`)
200	++pos;
201
202	return pos == length;
203	}
204
205	template <typename CharacterType>
206	static ALWAYS_INLINE SharedStringHash computeSharedStringHashInline(const CharacterType* url, unsigned length)
207	{
208	return AlreadyHashed::avoidDeletedValue(StringHasher::computeHash(url, length));
209	}
210
211	SharedStringHash computeSharedStringHash(const String& url)
212	{
213	unsigned length = url.length();
214	if (!length \|\| url.is8Bit())
215	return computeSharedStringHashInline(url.characters8(), length);
216	return computeSharedStringHashInline(url.characters16(), length);
217	}
218
219	SharedStringHash computeSharedStringHash(const UChar* url, unsigned length)
220	{
221	return computeSharedStringHashInline(url, length);
222	}
223
224	template <typename CharacterType>
225	static ALWAYS_INLINE void computeSharedStringHashInline(const URL& base, const CharacterType* characters, unsigned length, Vector<CharacterType, `512`>& buffer)
226	{
227	if (!length)
228	return;
229
230	// This is a poor man's completeURL. Faster with less memory allocation.
231	// FIXME: It's missing a lot of what completeURL does and a lot of what URL does.
232	// For example, it does not handle international domain names properly.
233
234	// FIXME: It is wrong that we do not do further processing on strings that have "://" in them:
235	// 1) The "://" could be in the query or anchor.
236	// 2) The URL's path could have a "/./" or a "/../" or a "//" sequence in it.
237
238	// FIXME: needsTrailingSlash does not properly return true for a URL that has no path, but does
239	// have a query or anchor.
240
241	bool hasColonSlashSlash = containsColonSlashSlash(characters, length);
242
243	if (hasColonSlashSlash && !needsTrailingSlash(characters, length)) {
244	buffer.append(characters, length);
245	return;
246	}
247
248
249	if (hasColonSlashSlash) {
250	// FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
251	// end of the path, before* the query or anchor.*
252	buffer.append(characters, length);
253	buffer.append(`'/'`);
254	return;
255	}
256
257	if (!length)
258	append(buffer, base.string());
259	else {
260	switch (characters[`0`]) {
261	case `'/'`:
262	append(buffer, StringView (base.string()).substring(`0`, base.pathStart()));
263	break;
264	case `'#'`:
265	append(buffer, StringView (base.string()).substring(`0`, base.pathEnd()));
266	break;
267	default:
268	append(buffer, StringView (base.string()).substring(`0`, base.pathAfterLastSlash()));
269	break;
270	}
271	}
272	buffer.append(characters, length);
273	cleanPath(buffer);
274	if (needsTrailingSlash(buffer.data(), buffer.size())) {
275	// FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
276	// end of the path, before* the query or anchor.*
277	buffer.append(`'/'`);
278	}
279
280	return;
281	}
282
283	SharedStringHash computeVisitedLinkHash(const URL& base, const AtomicString& attributeURL)
284	{
285	if (attributeURL.isEmpty())
286	return `0`;
287
288	if (!base.string().isEmpty() && base.string().is8Bit() && attributeURL.is8Bit()) {
289	Vector<LChar, `512`> url;
290	computeSharedStringHashInline(base, attributeURL.characters8(), attributeURL.length(), url);
291	if (url.isEmpty())
292	return `0`;
293
294	return computeSharedStringHashInline(url.data(), url.size());
295	}
296
297	Vector<UChar, `512`> url;
298	auto upconvertedCharacters = StringView (attributeURL.string()).upconvertedCharacters();
299	const UChar* characters = upconvertedCharacters;
300	computeSharedStringHashInline(base, characters, attributeURL.length(), url);
301	if (url.isEmpty())
302	return `0`;
303
304	return computeSharedStringHashInline(url.data(), url.size());
305	}
306
307	} // namespace WebCore
308

Browse the source code of webkit/Source/WebCore/platform/SharedStringHash.cpp