URL.cpp source code [webkit/Source/WTF/wtf/URL.cpp]

1	/*
2	* Copyright (C) 2004-2019 Apple Inc. All rights reserved.
3	* Copyright (C) 2012 Research In Motion Limited. All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	* 1. Redistributions of source code must retain the above copyright
9	* notice, this list of conditions and the following disclaimer.
10	* 2. Redistributions in binary form must reproduce the above copyright
11	* notice, this list of conditions and the following disclaimer in the
12	* documentation and/or other materials provided with the distribution.
13	*
14	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25	*/
26
27	#include "config.h"
28	#include <wtf/URL.h>
29
30	#include "URLParser.h"
31	#include <stdio.h>
32	#include <unicode/uidna.h>
33	#include <wtf/HashMap.h>
34	#include <wtf/NeverDestroyed.h>
35	#include <wtf/StdLibExtras.h>
36	#include <wtf/UUID.h>
37	#include <wtf/text/CString.h>
38	#include <wtf/text/StringBuilder.h>
39	#include <wtf/text/StringConcatenateNumbers.h>
40	#include <wtf/text/StringHash.h>
41	#include <wtf/text/TextStream.h>
42
43	namespace WTF {
44
45	typedef Vector<char, `512`> CharBuffer;
46	typedef Vector<UChar, `512`> UCharBuffer;
47
48	static const unsigned invalidPortNumber = `0xFFFF`;
49
50	// Copies the source to the destination, assuming all the source characters are
51	// ASCII. The destination buffer must be large enough. Null characters are allowed
52	// in the source string, and no attempt is made to null-terminate the result.
53	static void copyASCII(const String& string, char* dest)
54	{
55	if (string.isEmpty())
56	return;
57
58	if (string.is8Bit())
59	memcpy(dest, string.characters8(), string.length());
60	else {
61	const UChar* src = string.characters16();
62	size_t length = string.length();
63	for (size_t i = `0`; i < length; i++)
64	dest[i] = static_cast<char>(src[i]);
65	}
66	}
67
68	void URL::invalidate()
69	{
70	m_isValid = false;
71	m_protocolIsInHTTPFamily = false;
72	m_cannotBeABaseURL = false;
73	m_schemeEnd = `0`;
74	m_userStart = `0`;
75	m_userEnd = `0`;
76	m_passwordEnd = `0`;
77	m_hostEnd = `0`;
78	m_portLength = `0`;
79	m_pathEnd = `0`;
80	m_pathAfterLastSlash = `0`;
81	m_queryEnd = `0`;
82	}
83
84	URL::URL(const URL& base, const String& relative, const URLTextEncoding* encoding)
85	{
86	URLParser parser(relative, base, encoding);
87	*this = parser.result();
88	}
89
90	static bool shouldTrimFromURL(UChar c)
91	{
92	// Browsers ignore leading/trailing whitespace and control
93	// characters from URLs. Note that c is an unsigned* char here*
94	// so this comparison should only catch control characters.
95	return c <= `' '`;
96	}
97
98	URL URL::isolatedCopy() const
99	{
100	URL result = *this;
101	result.m_string = result.m_string.isolatedCopy();
102	return result;
103	}
104
105	String URL::lastPathComponent() const
106	{
107	if (!hasPath())
108	return String ();
109
110	unsigned end = m_pathEnd - `1`;
111	if (m_string [end] == `'/'`)
112	--end;
113
114	size_t start = m_string.reverseFind(`'/'`, end);
115	if (start < static_cast<unsigned>(m_hostEnd + m_portLength))
116	return String ();
117	++start;
118
119	return m_string.substring(start, end - start + `1`);
120	}
121
122	StringView URL::protocol() const
123	{
124	return StringView (m_string).substring(`0`, m_schemeEnd);
125	}
126
127	StringView URL::host() const
128	{
129	unsigned start = hostStart();
130	return StringView (m_string).substring(start, m_hostEnd - start);
131	}
132
133	Optional<uint16_t> URL::port() const
134	{
135	if (!m_portLength)
136	return WTF::nullopt;
137
138	bool ok = false;
139	unsigned number;
140	if (m_string.is8Bit())
141	number = charactersToUIntStrict(m_string.characters8() + m_hostEnd + `1`, m_portLength - `1`, &ok);
142	else
143	number = charactersToUIntStrict(m_string.characters16() + m_hostEnd + `1`, m_portLength - `1`, &ok);
144	if (!ok \|\| number > std::numeric_limits<uint16_t>::max())
145	return WTF::nullopt;
146	return number;
147	}
148
149	String URL::hostAndPort() const
150	{
151	if (auto port = this->port())
152	return makeString(host(), `':'`, static_cast<unsigned>(port.value()));
153	return host().toString();
154	}
155
156	String URL::protocolHostAndPort() const
157	{
158	String result = m_string.substring(`0`, m_hostEnd + m_portLength);
159
160	if (m_passwordEnd - m_userStart > `0`) {
161	const int allowForTrailingAtSign = `1`;
162	result.remove(m_userStart, m_passwordEnd - m_userStart + allowForTrailingAtSign);
163	}
164
165	return result;
166	}
167
168	static String decodeEscapeSequencesFromParsedURL(StringView input)
169	{
170	auto inputLength = input.length();
171	if (!inputLength)
172	return emptyString();
173	Vector<LChar> percentDecoded;
174	percentDecoded.reserveInitialCapacity(inputLength);
175	for (unsigned i = `0`; i < inputLength; ++i) {
176	if (input [i] == `'%'`
177	&& inputLength > `2`
178	&& i < inputLength - `2`
179	&& isASCIIHexDigit(input [i + `1`])
180	&& isASCIIHexDigit(input [i + `2`])) {
181	percentDecoded.uncheckedAppend(toASCIIHexValue(input [i + `1`], input [i + `2`]));
182	i += `2`;
183	} else
184	percentDecoded.uncheckedAppend(input [i]);
185	}
186	return String::fromUTF8(percentDecoded.data(), percentDecoded.size());
187	}
188
189	String URL::user() const
190	{
191	return decodeEscapeSequencesFromParsedURL(StringView (m_string).substring(m_userStart, m_userEnd - m_userStart));
192	}
193
194	String URL::pass() const
195	{
196	if (m_passwordEnd == m_userEnd)
197	return String ();
198
199	return decodeEscapeSequencesFromParsedURL(StringView (m_string).substring(m_userEnd + `1`, m_passwordEnd - m_userEnd - `1`));
200	}
201
202	String URL::encodedUser() const
203	{
204	return m_string.substring(m_userStart, m_userEnd - m_userStart);
205	}
206
207	String URL::encodedPass() const
208	{
209	if (m_passwordEnd == m_userEnd)
210	return String ();
211
212	return m_string.substring(m_userEnd + `1`, m_passwordEnd - m_userEnd - `1`);
213	}
214
215	String URL::fragmentIdentifier() const
216	{
217	if (!hasFragmentIdentifier())
218	return String ();
219
220	return m_string.substring(m_queryEnd + `1`);
221	}
222
223	bool URL::hasFragmentIdentifier() const
224	{
225	return m_isValid && m_string.length() != m_queryEnd;
226	}
227
228	String URL::baseAsString() const
229	{
230	return m_string.left(m_pathAfterLastSlash);
231	}
232
233	#if !USE(CF)
234
235	String URL::fileSystemPath() const
236	{
237	if (!isValid() \|\| !isLocalFile())
238	return String ();
239
240	return decodeEscapeSequencesFromParsedURL(StringView (path()));
241	}
242
243	#endif
244
245	#ifdef NDEBUG
246
247	static inline void assertProtocolIsGood(StringView)
248	{
249	}
250
251	#else
252
253	static void assertProtocolIsGood(StringView protocol)
254	{
255	// FIXME: We probably don't need this function any more.
256	// The isASCIIAlphaCaselessEqual function asserts that passed-in characters
257	// are ones it can handle; the older code did not and relied on these checks.
258	for (auto character : protocol.codeUnits()) {
259	ASSERT(isASCII(character));
260	ASSERT(character > `' '`);
261	ASSERT(!isASCIIUpper(character));
262	ASSERT(toASCIILowerUnchecked(character) == character);
263	}
264	}
265
266	#endif
267
268	static Lock defaultPortForProtocolMapForTestingLock;
269
270	using DefaultPortForProtocolMapForTesting = HashMap<String, uint16_t>;
271	static DefaultPortForProtocolMapForTesting*& defaultPortForProtocolMapForTesting()
272	{
273	static DefaultPortForProtocolMapForTesting* defaultPortForProtocolMap;
274	return defaultPortForProtocolMap;
275	}
276
277	static DefaultPortForProtocolMapForTesting& ensureDefaultPortForProtocolMapForTesting()
278	{
279	DefaultPortForProtocolMapForTesting*& defaultPortForProtocolMap = defaultPortForProtocolMapForTesting();
280	if (!defaultPortForProtocolMap)
281	defaultPortForProtocolMap = new DefaultPortForProtocolMapForTesting;
282	return *defaultPortForProtocolMap;
283	}
284
285	void registerDefaultPortForProtocolForTesting(uint16_t port, const String& protocol)
286	{
287	auto locker = holdLock(defaultPortForProtocolMapForTestingLock);
288	ensureDefaultPortForProtocolMapForTesting().add(protocol, port);
289	}
290
291	void clearDefaultPortForProtocolMapForTesting()
292	{
293	auto locker = holdLock(defaultPortForProtocolMapForTestingLock);
294	if (auto* map = defaultPortForProtocolMapForTesting())
295	map->clear();
296	}
297
298	Optional<uint16_t> defaultPortForProtocol(StringView protocol)
299	{
300	if (auto* overrideMap = defaultPortForProtocolMapForTesting()) {
301	auto locker = holdLock(defaultPortForProtocolMapForTestingLock);
302	ASSERT(overrideMap); // No need to null check again here since overrideMap cannot become null after being non-null.
303	auto iterator = overrideMap->find(protocol.toStringWithoutCopying());
304	if (iterator != overrideMap->end())
305	return iterator ->value;
306	}
307	return URLParser::defaultPortForProtocol(protocol);
308	}
309
310	bool isDefaultPortForProtocol(uint16_t port, StringView protocol)
311	{
312	return defaultPortForProtocol(protocol) == port;
313	}
314
315	bool URL::protocolIs(const char* protocol) const
316	{
317	assertProtocolIsGood(StringView (reinterpret_cast<const LChar*>(protocol), strlen(protocol)));
318
319	// JavaScript URLs are "valid" and should be executed even if URL decides they are invalid.
320	// The free function protocolIsJavaScript() should be used instead.
321	ASSERT(!equalLettersIgnoringASCIICase(StringView (protocol), "javascript"));
322
323	if (!m_isValid)
324	return false;
325
326	// Do the comparison without making a new string object.
327	for (unsigned i = `0`; i < m_schemeEnd; ++i) {
328	if (!protocol[i] \|\| !isASCIIAlphaCaselessEqual(m_string [i], protocol[i]))
329	return false;
330	}
331	return !protocol[m_schemeEnd]; // We should have consumed all characters in the argument.
332	}
333
334	bool URL::protocolIs(StringView protocol) const
335	{
336	assertProtocolIsGood(protocol);
337
338	if (!m_isValid)
339	return false;
340
341	if (m_schemeEnd != protocol.length())
342	return false;
343
344	// Do the comparison without making a new string object.
345	for (unsigned i = `0`; i < m_schemeEnd; ++i) {
346	if (!isASCIIAlphaCaselessEqual(m_string [i], protocol [i]))
347	return false;
348	}
349	return true;
350	}
351
352	String URL::query() const
353	{
354	if (m_queryEnd == m_pathEnd)
355	return String ();
356
357	return m_string.substring(m_pathEnd + `1`, m_queryEnd - (m_pathEnd + `1`));
358	}
359
360	String URL::path() const
361	{
362	unsigned portEnd = m_hostEnd + m_portLength;
363	return m_string.substring(portEnd, m_pathEnd - portEnd);
364	}
365
366	bool URL::setProtocol(const String& s)
367	{
368	// Firefox and IE remove everything after the first ':'.
369	size_t separatorPosition = s.find(`':'`);
370	String newProtocol = s.substring(`0`, separatorPosition);
371	auto canonicalized = URLParser::maybeCanonicalizeScheme(newProtocol);
372	if (!canonicalized)
373	return false;
374
375	if (!m_isValid) {
376	URLParser parser(makeString(*canonicalized, ":", m_string));
377	*this = parser.result();
378	return true;
379	}
380
381	URLParser parser(makeString(*canonicalized, m_string.substring(m_schemeEnd)));
382	*this = parser.result();
383	return true;
384	}
385
386	static bool isAllASCII(StringView string)
387	{
388	if (string.is8Bit())
389	return charactersAreAllASCII(string.characters8(), string.length());
390	return charactersAreAllASCII(string.characters16(), string.length());
391	}
392
393	// Appends the punycoded hostname identified by the given string and length to
394	// the output buffer. The result will not be null terminated.
395	// Return value of false means error in encoding.
396	static bool appendEncodedHostname(UCharBuffer& buffer, StringView string)
397	{
398	// Needs to be big enough to hold an IDN-encoded name.
399	// For host names bigger than this, we won't do IDN encoding, which is almost certainly OK.
400	const unsigned hostnameBufferLength = `2048`;
401
402	if (string.length() > hostnameBufferLength \|\| isAllASCII(string)) {
403	append(buffer, string);
404	return true;
405	}
406
407	UChar hostnameBuffer[hostnameBufferLength];
408	UErrorCode error = U_ZERO_ERROR;
409	UIDNAInfo processingDetails = UIDNA_INFO_INITIALIZER;
410	int32_t numCharactersConverted = uidna_nameToASCII(&URLParser::internationalDomainNameTranscoder(),
411	string.upconvertedCharacters(), string.length(), hostnameBuffer, hostnameBufferLength, &processingDetails, &error);
412
413	if (U_SUCCESS(error) && !processingDetails.errors) {
414	buffer.append(hostnameBuffer, numCharactersConverted);
415	return true;
416	}
417	return false;
418	}
419
420	unsigned URL::hostStart() const
421	{
422	return (m_passwordEnd == m_userStart) ? m_passwordEnd : m_passwordEnd + `1`;
423	}
424
425	void URL::setHost(const String& s)
426	{
427	if (!m_isValid)
428	return;
429
430	auto colonIndex = s.find(`':'`);
431	if (colonIndex != notFound)
432	return;
433
434	UCharBuffer encodedHostName;
435	if (!appendEncodedHostname(encodedHostName, s))
436	return;
437
438	bool slashSlashNeeded = m_userStart == static_cast<unsigned>(m_schemeEnd + `1`);
439
440	StringBuilder builder;
441	builder.append(m_string.left(hostStart()));
442	if (slashSlashNeeded)
443	builder.appendLiteral("//");
444	builder.append(StringView (encodedHostName.data(), encodedHostName.size()));
445	builder.append(m_string.substring(m_hostEnd));
446
447	URLParser parser(builder.toString());
448	*this = parser.result();
449	}
450
451	void URL::removePort()
452	{
453	if (!m_portLength)
454	return;
455	URLParser parser(makeString(StringView (m_string).left(m_hostEnd), StringView (m_string).substring(m_hostEnd + m_portLength)));
456	*this = parser.result();
457	}
458
459	void URL::setPort(unsigned short i)
460	{
461	if (!m_isValid)
462	return;
463
464	bool colonNeeded = !m_portLength;
465	unsigned portStart = (colonNeeded ? m_hostEnd : m_hostEnd + `1`);
466
467	URLParser parser(makeString(StringView (m_string).left(portStart), (colonNeeded ? ":" : ""), static_cast<unsigned>(i), StringView (m_string).substring(m_hostEnd + m_portLength)));
468	*this = parser.result();
469	}
470
471	void URL::setHostAndPort(const String& hostAndPort)
472	{
473	if (!m_isValid)
474	return;
475
476	StringView hostName(hostAndPort);
477	StringView port;
478
479	auto colonIndex = hostName.find(`':'`);
480	if (colonIndex != notFound) {
481	port = hostName.substring(colonIndex + `1`);
482	bool ok;
483	int portInt = port.toIntStrict(ok);
484	if (!ok \|\| portInt < `0`)
485	return;
486	hostName = hostName.substring(`0`, colonIndex);
487	}
488
489	if (hostName.isEmpty())
490	return;
491
492	UCharBuffer encodedHostName;
493	if (!appendEncodedHostname(encodedHostName, hostName))
494	return;
495
496	bool slashSlashNeeded = m_userStart == static_cast<unsigned>(m_schemeEnd + `1`);
497
498	StringBuilder builder;
499	builder.append(m_string.left(hostStart()));
500	if (slashSlashNeeded)
501	builder.appendLiteral("//");
502	builder.append(StringView (encodedHostName.data(), encodedHostName.size()));
503	if (!port.isEmpty()) {
504	builder.appendLiteral(":");
505	builder.append(port);
506	}
507	builder.append(StringView (m_string).substring(m_hostEnd + m_portLength));
508
509	URLParser parser(builder.toString());
510	*this = parser.result();
511	}
512
513	static String percentEncodeCharacters(const String& input, bool(*shouldEncode)(UChar))
514	{
515	auto encode = [shouldEncode] (const String& input) {
516	CString utf8 = input.utf8();
517	auto* data = utf8.data();
518	StringBuilder builder;
519	auto length = utf8.length();
520	for (unsigned j = `0`; j < length; j++) {
521	auto c = data[j];
522	if (shouldEncode(c)) {
523	builder.append(`'%'`);
524	builder.append(upperNibbleToASCIIHexDigit(c));
525	builder.append(lowerNibbleToASCIIHexDigit(c));
526	} else
527	builder.append(c);
528	}
529	return builder.toString();
530	};
531
532	for (size_t i = `0`; i < input.length(); ++i) {
533	if (UNLIKELY(shouldEncode(input[i])))
534	return encode (input);
535	}
536	return input;
537	}
538
539	void URL::setUser(const String& user)
540	{
541	if (!m_isValid)
542	return;
543
544	// FIXME: Non-ASCII characters must be encoded and escaped to match parse() expectations,
545	// and to avoid changing more than just the user login.
546
547	unsigned end = m_userEnd;
548	if (!user.isEmpty()) {
549	String u = percentEncodeCharacters(user, URLParser::isInUserInfoEncodeSet);
550	if (m_userStart == static_cast<unsigned>(m_schemeEnd + `1`))
551	u = "//" + u;
552	// Add '@' if we didn't have one before.
553	if (end == m_hostEnd \|\| (end == m_passwordEnd && m_string [end] != `'@'`))
554	u.append(`'@'`);
555	URLParser parser(makeString(StringView (m_string).left(m_userStart), u, StringView (m_string).substring(end)));
556	*this = parser.result();
557	} else {
558	// Remove '@' if we now have neither user nor password.
559	if (m_userEnd == m_passwordEnd && end != m_hostEnd && m_string [end] == `'@'`)
560	end += `1`;
561	// We don't want to parse in the extremely common case where we are not going to make a change.
562	if (m_userStart != end) {
563	URLParser parser(makeString(StringView (m_string).left(m_userStart), StringView (m_string).substring(end)));
564	*this = parser.result();
565	}
566	}
567	}
568
569	void URL::setPass(const String& password)
570	{
571	if (!m_isValid)
572	return;
573
574	unsigned end = m_passwordEnd;
575	if (!password.isEmpty()) {
576	String p = ":" + percentEncodeCharacters(password, URLParser::isInUserInfoEncodeSet) + "@";
577	if (m_userEnd == static_cast<unsigned>(m_schemeEnd + `1`))
578	p = "//" + p;
579	// Eat the existing '@' since we are going to add our own.
580	if (end != m_hostEnd && m_string [end] == `'@'`)
581	end += `1`;
582	URLParser parser(makeString(StringView (m_string).left(m_userEnd), p, StringView (m_string).substring(end)));
583	*this = parser.result();
584	} else {
585	// Remove '@' if we now have neither user nor password.
586	if (m_userStart == m_userEnd && end != m_hostEnd && m_string [end] == `'@'`)
587	end += `1`;
588	// We don't want to parse in the extremely common case where we are not going to make a change.
589	if (m_userEnd != end) {
590	URLParser parser(makeString(StringView (m_string).left(m_userEnd), StringView (m_string).substring(end)));
591	*this = parser.result();
592	}
593	}
594	}
595
596	void URL::setFragmentIdentifier(StringView identifier)
597	{
598	if (!m_isValid)
599	return;
600
601	// FIXME: Optimize the case where the identifier already happens to be equal to what was passed?
602	// FIXME: Is it correct to do this without encoding and escaping non-ASCII characters?
603	*this = URLParser { makeString(StringView { m_string }.substring(`0`, m_queryEnd), `'#'`, identifier) }.result();
604	}
605
606	void URL::removeFragmentIdentifier()
607	{
608	if (!m_isValid) {
609	ASSERT(!m_queryEnd);
610	return;
611	}
612	if (m_isValid && m_string.length() > m_queryEnd)
613	m_string = m_string.left(m_queryEnd);
614	}
615
616	void URL::removeQueryAndFragmentIdentifier()
617	{
618	if (!m_isValid)
619	return;
620
621	m_string = m_string.left(m_pathEnd);
622	m_queryEnd = m_pathEnd;
623	}
624
625	void URL::setQuery(const String& query)
626	{
627	if (!m_isValid)
628	return;
629
630	// FIXME: '#' and non-ASCII characters must be encoded and escaped.
631	// Usually, the query is encoded using document encoding, not UTF-8, but we don't have
632	// access to the document in this function.
633	// https://webkit.org/b/161176
634	if ((query.isEmpty() \|\| query [`0`] != `'?'`) && !query.isNull()) {
635	URLParser parser(makeString(StringView (m_string).left(m_pathEnd), "?", query, StringView (m_string).substring(m_queryEnd)));
636	*this = parser.result();
637	} else {
638	URLParser parser(makeString(StringView (m_string).left(m_pathEnd), query, StringView (m_string).substring(m_queryEnd)));
639	*this = parser.result();
640	}
641
642	}
643
644	void URL::setPath(const String& s)
645	{
646	if (!m_isValid)
647	return;
648
649	String path = s;
650	if (path.isEmpty() \|\| path [`0`] != `'/'`)
651	path = "/" + path;
652
653	auto questionMarkOrNumberSign = [] (UChar character) {
654	return character == `'?'` \|\| character == `'#'`;
655	};
656	URLParser parser(makeString(StringView (m_string).left(m_hostEnd + m_portLength), percentEncodeCharacters(path, questionMarkOrNumberSign), StringView (m_string).substring(m_pathEnd)));
657	*this = parser.result();
658	}
659
660	bool equalIgnoringFragmentIdentifier(const URL& a, const URL& b)
661	{
662	if (a.m_queryEnd != b.m_queryEnd)
663	return false;
664	unsigned queryLength = a.m_queryEnd;
665	for (unsigned i = `0`; i < queryLength; ++i)
666	if (a.string()[i] != b.string()[i])
667	return false;
668	return true;
669	}
670
671	bool equalIgnoringQueryAndFragment(const URL& a, const URL& b)
672	{
673	if (a.pathEnd() != b.pathEnd())
674	return false;
675	unsigned pathEnd = a.pathEnd();
676	for (unsigned i = `0`; i < pathEnd; ++i) {
677	if (a.string()[i] != b.string()[i])
678	return false;
679	}
680	return true;
681	}
682
683	bool protocolHostAndPortAreEqual(const URL& a, const URL& b)
684	{
685	if (a.m_schemeEnd != b.m_schemeEnd)
686	return false;
687
688	unsigned hostStartA = a.hostStart();
689	unsigned hostLengthA = a.m_hostEnd - hostStartA;
690	unsigned hostStartB = b.hostStart();
691	unsigned hostLengthB = b.m_hostEnd - b.hostStart();
692	if (hostLengthA != hostLengthB)
693	return false;
694
695	// Check the scheme
696	for (unsigned i = `0`; i < a.m_schemeEnd; ++i) {
697	if (a.string()[i] != b.string()[i])
698	return false;
699	}
700
701	// And the host
702	for (unsigned i = `0`; i < hostLengthA; ++i) {
703	if (a.string()[hostStartA + i] != b.string()[hostStartB + i])
704	return false;
705	}
706
707	if (a.port() != b.port())
708	return false;
709
710	return true;
711	}
712
713	bool hostsAreEqual(const URL& a, const URL& b)
714	{
715	unsigned hostStartA = a.hostStart();
716	unsigned hostLengthA = a.m_hostEnd - hostStartA;
717	unsigned hostStartB = b.hostStart();
718	unsigned hostLengthB = b.m_hostEnd - hostStartB;
719	if (hostLengthA != hostLengthB)
720	return false;
721
722	for (unsigned i = `0`; i < hostLengthA; ++i) {
723	if (a.string()[hostStartA + i] != b.string()[hostStartB + i])
724	return false;
725	}
726
727	return true;
728	}
729
730	bool URL::isMatchingDomain(const String& domain) const
731	{
732	if (isNull())
733	return false;
734
735	if (domain.isEmpty())
736	return true;
737
738	if (!protocolIsInHTTPFamily())
739	return false;
740
741	auto host = this->host();
742	if (!host.endsWith(domain))
743	return false;
744
745	return host.length() == domain.length() \|\| host [host.length() - domain.length() - `1`] == `'.'`;
746	}
747
748	String encodeWithURLEscapeSequences(const String& input)
749	{
750	return percentEncodeCharacters(input, URLParser::isInUserInfoEncodeSet);
751	}
752
753	bool URL::isHierarchical() const
754	{
755	if (!m_isValid)
756	return false;
757	ASSERT(m_string[m_schemeEnd] == `':'`);
758	return m_string [m_schemeEnd + `1`] == `'/'`;
759	}
760
761	void URL::copyToBuffer(Vector<char, `512`>& buffer) const
762	{
763	// FIXME: This throws away the high bytes of all the characters in the string!
764	// That's fine for a valid URL, which is all ASCII, but not for invalid URLs.
765	buffer.resize(m_string.length());
766	copyASCII(m_string, buffer.data());
767	}
768
769	template<typename StringClass>
770	bool protocolIsInternal(const StringClass& url, const char* protocol)
771	{
772	// Do the comparison without making a new string object.
773	assertProtocolIsGood(StringView (reinterpret_cast<const LChar*>(protocol), strlen(protocol)));
774	bool isLeading = true;
775	for (unsigned i = `0`, j = `0`; url[i]; ++i) {
776	// Skip leading whitespace and control characters.
777	if (isLeading && shouldTrimFromURL(url[i]))
778	continue;
779	isLeading = false;
780
781	// Skip any tabs and newlines.
782	if (url[i] == `'\t'` \|\| url[i] == `'\r'` \|\| url[i] == `'\n'`)
783	continue;
784
785	if (!protocol[j])
786	return url[i] == `':'`;
787	if (!isASCIIAlphaCaselessEqual(url[i], protocol[j]))
788	return false;
789
790	++j;
791	}
792
793	return false;
794	}
795
796	bool protocolIs(const String& url, const char* protocol)
797	{
798	return protocolIsInternal(url, protocol);
799	}
800
801	inline bool URL::protocolIs(const String& string, const char* protocol)
802	{
803	return WTF::protocolIsInternal(string, protocol);
804	}
805
806	#ifndef NDEBUG
807
808	void URL::print() const
809	{
810	printf("%s\n", m_string.utf8().data());
811	}
812
813	#endif
814
815	String URL::strippedForUseAsReferrer() const
816	{
817	URL referrer(*this);
818	referrer.setUser(String ());
819	referrer.setPass(String ());
820	referrer.removeFragmentIdentifier();
821	return referrer.string();
822	}
823
824	bool URL::isLocalFile() const
825	{
826	// Including feed here might be a bad idea since drag and drop uses this check
827	// and including feed would allow feeds to potentially let someone's blog
828	// read the contents of the clipboard on a drag, even without a drop.
829	// Likewise with using the FrameLoader::shouldTreatURLAsLocal() function.
830	return protocolIs("file");
831	}
832
833	bool protocolIsJavaScript(const String& url)
834	{
835	return protocolIsInternal(url, "javascript");
836	}
837
838	bool protocolIsJavaScript(StringView url)
839	{
840	return protocolIsInternal(url, "javascript");
841	}
842
843	bool protocolIsInHTTPFamily(const String& url)
844	{
845	auto length = url.length();
846	// Do the comparison without making a new string object.
847	return length >= `5`
848	&& isASCIIAlphaCaselessEqual(url [`0`], `'h'`)
849	&& isASCIIAlphaCaselessEqual(url [`1`], `'t'`)
850	&& isASCIIAlphaCaselessEqual(url [`2`], `'t'`)
851	&& isASCIIAlphaCaselessEqual(url [`3`], `'p'`)
852	&& (url [`4`] == `':'` \|\| (isASCIIAlphaCaselessEqual(url [`4`], `'s'`) && length >= `6` && url [`5`] == `':'`));
853	}
854
855	const URL& blankURL()
856	{
857	static NeverDestroyed<URL> staticBlankURL(URL (), "about:blank");
858	return staticBlankURL;
859	}
860
861	bool URL::protocolIsAbout() const
862	{
863	return protocolIs("about");
864	}
865
866	bool portAllowed(const URL& url)
867	{
868	Optional<uint16_t> port = url.port();
869
870	// Since most URLs don't have a port, return early for the "no port" case.
871	if (!port)
872	return true;
873
874	// This blocked port list matches the port blocking that Mozilla implements.
875	// See http://www.mozilla.org/projects/netlib/PortBanning.html for more information.
876	static const uint16_t blockedPortList[] = {
877	`1`, // tcpmux
878	`7`, // echo
879	`9`, // discard
880	`11`, // systat
881	`13`, // daytime
882	`15`, // netstat
883	`17`, // qotd
884	`19`, // chargen
885	`20`, // FTP-data
886	`21`, // FTP-control
887	`22`, // SSH
888	`23`, // telnet
889	`25`, // SMTP
890	`37`, // time
891	`42`, // name
892	`43`, // nicname
893	`53`, // domain
894	`77`, // priv-rjs
895	`79`, // finger
896	`87`, // ttylink
897	`95`, // supdup
898	`101`, // hostriame
899	`102`, // iso-tsap
900	`103`, // gppitnp
901	`104`, // acr-nema
902	`109`, // POP2
903	`110`, // POP3
904	`111`, // sunrpc
905	`113`, // auth
906	`115`, // SFTP
907	`117`, // uucp-path
908	`119`, // nntp
909	`123`, // NTP
910	`135`, // loc-srv / epmap
911	`139`, // netbios
912	`143`, // IMAP2
913	`179`, // BGP
914	`389`, // LDAP
915	`427`, // SLP (Also used by Apple Filing Protocol)
916	`465`, // SMTP+SSL
917	`512`, // print / exec
918	`513`, // login
919	`514`, // shell
920	`515`, // printer
921	`526`, // tempo
922	`530`, // courier
923	`531`, // Chat
924	`532`, // netnews
925	`540`, // UUCP
926	`548`, // afpovertcp [Apple addition]
927	`556`, // remotefs
928	`563`, // NNTP+SSL
929	`587`, // ESMTP
930	`601`, // syslog-conn
931	`636`, // LDAP+SSL
932	`993`, // IMAP+SSL
933	`995`, // POP3+SSL
934	`2049`, // NFS
935	`3659`, // apple-sasl / PasswordServer [Apple addition]
936	`4045`, // lockd
937	`4190`, // ManageSieve [Apple addition]
938	`6000`, // X11
939	`6665`, // Alternate IRC [Apple addition]
940	`6666`, // Alternate IRC [Apple addition]
941	`6667`, // Standard IRC [Apple addition]
942	`6668`, // Alternate IRC [Apple addition]
943	`6669`, // Alternate IRC [Apple addition]
944	`6679`, // Alternate IRC SSL [Apple addition]
945	`6697`, // IRC+SSL [Apple addition]
946	invalidPortNumber, // Used to block all invalid port numbers
947	};
948
949	// If the port is not in the blocked port list, allow it.
950	ASSERT(std::is_sorted(std::begin(blockedPortList), std::end(blockedPortList)));
951	if (!std::binary_search(std::begin(blockedPortList), std::end(blockedPortList), port.value()))
952	return true;
953
954	// Allow ports 21 and 22 for FTP URLs, as Mozilla does.
955	if ((port.value() == `21` \|\| port.value() == `22`) && url.protocolIs("ftp"))
956	return true;
957
958	// Allow any port number in a file URL, since the port number is ignored.
959	if (url.protocolIs("file"))
960	return true;
961
962	return false;
963	}
964
965	String mimeTypeFromDataURL(const String& url)
966	{
967	ASSERT(protocolIsInternal(url, "data"));
968
969	// FIXME: What's the right behavior when the URL has a comma first, but a semicolon later?
970	// Currently this code will break at the semicolon in that case. Not sure that's correct.
971	auto index = url.find(`';'`, `5`);
972	if (index == notFound)
973	index = url.find(`','`, `5`);
974	if (index == notFound) {
975	// FIXME: There was an old comment here that made it sound like this should be returning text/plain.
976	// But we have been returning empty string here for some time, so not changing its behavior at this time.
977	return emptyString();
978	}
979	if (index == `5`)
980	return "text/plain"_s;
981	ASSERT(index >= `5`);
982	return url.substring(`5`, index - `5`).convertToASCIILowercase();
983	}
984
985	String URL::stringCenterEllipsizedToLength(unsigned length) const
986	{
987	if (string().length() <= length)
988	return string();
989
990	return string().left(length / `2` - `1`) + "..." + string().right(length / `2` - `2`);
991	}
992
993	URL URL::fakeURLWithRelativePart(const String& relativePart)
994	{
995	return URL (URL (), "webkit-fake-url://" + createCanonicalUUIDString() + `'/'` + relativePart);
996	}
997
998	URL URL::fileURLWithFileSystemPath(const String& filePath)
999	{
1000	return URL (URL (), "file:///" + filePath);
1001	}
1002
1003	TextStream& operator<<(TextStream& ts, const URL& url)
1004	{
1005	ts << url.string();
1006	return ts;
1007	}
1008
1009	#if !PLATFORM(COCOA) && !USE(SOUP)
1010	static bool isIPv4Address(StringView string)
1011	{
1012	auto count = `0`;
1013
1014	for (const auto octet : string.splitAllowingEmptyEntries(`'.'`)) {
1015	if (count >= `4`)
1016	return false;
1017
1018	const auto length = octet.length();
1019	if (!length \|\| length > `3`)
1020	return false;
1021
1022	auto value = `0`;
1023	for (auto i = `0u`; i < length; ++i) {
1024	const auto digit = octet[i];
1025
1026	// Prohibit leading zeroes.
1027	if (digit > `'9'` \|\| digit < (!i && length > `1` ? `'1'` : `'0'`))
1028	return false;
1029
1030	value = `10` * value + (digit - `'0'`);
1031	}
1032
1033	if (value > `255`)
1034	return false;
1035
1036	count++;
1037	}
1038
1039	return (count == `4`);
1040	}
1041
1042	static bool isIPv6Address(StringView string)
1043	{
1044	enum SkipState { None, WillSkip, Skipping, Skipped, Final };
1045	auto skipState = None;
1046	auto count = `0`;
1047
1048	for (const auto hextet : string.splitAllowingEmptyEntries(`':'`)) {
1049	if (count >= `8` \|\| skipState == Final)
1050	return false;
1051
1052	const auto length = hextet.length();
1053	if (!length) {
1054	// :: may be used anywhere to skip 1 to 8 hextets, but only once.
1055	if (skipState == Skipped)
1056	return false;
1057
1058	if (skipState == None)
1059	skipState = !count ? WillSkip : Skipping;
1060	else if (skipState == WillSkip)
1061	skipState = Skipping;
1062	else
1063	skipState = Final;
1064	continue;
1065	}
1066
1067	if (skipState == WillSkip)
1068	return false;
1069
1070	if (skipState == Skipping)
1071	skipState = Skipped;
1072
1073	if (length > `4`) {
1074	// An IPv4 address may be used in place of the final two hextets.
1075	if ((skipState == None && count != `6`) \|\| (skipState == Skipped && count >= `6`) \|\| !isIPv4Address(hextet))
1076	return false;
1077
1078	skipState = Final;
1079	continue;
1080	}
1081
1082	for (const auto codeUnit : hextet.codeUnits()) {
1083	// IPv6 allows leading zeroes.
1084	if (!isASCIIHexDigit(codeUnit))
1085	return false;
1086	}
1087
1088	count++;
1089	}
1090
1091	return (count == `8` && skipState == None) \|\| skipState == Skipped \|\| skipState == Final;
1092	}
1093
1094	bool URL::hostIsIPAddress(StringView host)
1095	{
1096	if (host.find(`':'`) == notFound)
1097	return isIPv4Address(host);
1098
1099	return isIPv6Address(host);
1100	}
1101	#endif
1102
1103	} // namespace WTF
1104

Browse the source code of webkit/Source/WTF/wtf/URL.cpp