1/*
2 * Copyright (C) 2016 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23 * THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "WTFStringUtilities.h"
28#include <WebCore/TextEncoding.h>
29#include <wtf/MainThread.h>
30#include <wtf/URLParser.h>
31#include <wtf/text/StringBuilder.h>
32
33using namespace WebCore;
34
35namespace TestWebKitAPI {
36
37class URLParserTextEncodingTest : public testing::Test {
38public:
39 void SetUp() final
40 {
41 WTF::initializeMainThread();
42 }
43};
44
45struct ExpectedParts {
46 String protocol;
47 String user;
48 String password;
49 String host;
50 unsigned short port;
51 String path;
52 String query;
53 String fragment;
54 String string;
55
56 bool isInvalid() const
57 {
58 return protocol.isEmpty()
59 && user.isEmpty()
60 && password.isEmpty()
61 && host.isEmpty()
62 && !port
63 && path.isEmpty()
64 && query.isEmpty()
65 && fragment.isEmpty();
66 }
67};
68
69template<typename T, typename U>
70bool eq(T&& s1, U&& s2)
71{
72 EXPECT_STREQ(s1.utf8().data(), s2.utf8().data());
73 return s1.utf8() == s2.utf8();
74}
75
76static String insertTabAtLocation(const String& string, size_t location)
77{
78 ASSERT(location <= string.length());
79 return makeString(string.substring(0, location), "\t", string.substring(location));
80}
81
82static ExpectedParts invalidParts(const String& urlStringWithTab)
83{
84 return {"", "", "", "", 0, "" , "", "", urlStringWithTab};
85}
86
87enum class TestTabs { No, Yes };
88
89// Inserting tabs between surrogate pairs changes the encoded value instead of being skipped by the URLParser.
90const TestTabs testTabsValueForSurrogatePairs = TestTabs::No;
91
92static void checkURL(const String& urlString, const TextEncoding* encoding, const ExpectedParts& parts, TestTabs testTabs = TestTabs::Yes)
93{
94 auto url = URL({ }, urlString, encoding);
95 EXPECT_TRUE(eq(parts.protocol, url.protocol()));
96 EXPECT_TRUE(eq(parts.user, url.user()));
97 EXPECT_TRUE(eq(parts.password, url.pass()));
98 EXPECT_TRUE(eq(parts.host, url.host()));
99 EXPECT_EQ(parts.port, url.port().valueOr(0));
100 EXPECT_TRUE(eq(parts.path, url.path()));
101 EXPECT_TRUE(eq(parts.query, url.query()));
102 EXPECT_TRUE(eq(parts.fragment, url.fragmentIdentifier()));
103 EXPECT_TRUE(eq(parts.string, url.string()));
104
105 if (testTabs == TestTabs::No)
106 return;
107
108 for (size_t i = 0; i < urlString.length(); ++i) {
109 String urlStringWithTab = insertTabAtLocation(urlString, i);
110 checkURL(urlStringWithTab, encoding,
111 parts.isInvalid() ? invalidParts(urlStringWithTab) : parts,
112 TestTabs::No);
113 }
114}
115
116static void checkURL(const String& urlString, const String& baseURLString, const TextEncoding* encoding, const ExpectedParts& parts, TestTabs testTabs = TestTabs::Yes)
117{
118 auto url = URL(URL({ }, baseURLString), urlString, encoding);
119 EXPECT_TRUE(eq(parts.protocol, url.protocol()));
120 EXPECT_TRUE(eq(parts.user, url.user()));
121 EXPECT_TRUE(eq(parts.password, url.pass()));
122 EXPECT_TRUE(eq(parts.host, url.host()));
123 EXPECT_EQ(parts.port, url.port().valueOr(0));
124 EXPECT_TRUE(eq(parts.path, url.path()));
125 EXPECT_TRUE(eq(parts.query, url.query()));
126 EXPECT_TRUE(eq(parts.fragment, url.fragmentIdentifier()));
127 EXPECT_TRUE(eq(parts.string, url.string()));
128
129 if (testTabs == TestTabs::No)
130 return;
131
132 for (size_t i = 0; i < urlString.length(); ++i) {
133 String urlStringWithTab = insertTabAtLocation(urlString, i);
134 checkURL(urlStringWithTab, baseURLString, encoding,
135 parts.isInvalid() ? invalidParts(urlStringWithTab) : parts,
136 TestTabs::No);
137 }
138}
139
140TEST_F(URLParserTextEncodingTest, QueryEncoding)
141{
142 checkURL(utf16String(u"http://host?ß😍#ß😍"), nullptr, {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", "%C3%9F%F0%9F%98%8D", utf16String(u"http://host/?%C3%9F%F0%9F%98%8D#%C3%9F%F0%9F%98%8D")}, testTabsValueForSurrogatePairs);
143
144 TextEncoding latin1(String("latin1"));
145 checkURL("http://host/?query with%20spaces", &latin1, {"http", "", "", "host", 0, "/", "query%20with%20spaces", "", "http://host/?query%20with%20spaces"});
146 checkURL("http://host/?query", &latin1, {"http", "", "", "host", 0, "/", "query", "", "http://host/?query"});
147 checkURL("http://host/?\tquery", &latin1, {"http", "", "", "host", 0, "/", "query", "", "http://host/?query"});
148 checkURL("http://host/?q\tuery", &latin1, {"http", "", "", "host", 0, "/", "query", "", "http://host/?query"});
149 checkURL("http://host/?query with SpAcEs#fragment", &latin1, {"http", "", "", "host", 0, "/", "query%20with%20SpAcEs", "fragment", "http://host/?query%20with%20SpAcEs#fragment"});
150 checkURL("http://host/?que\rry\t\r\n#fragment", &latin1, {"http", "", "", "host", 0, "/", "query", "fragment", "http://host/?query#fragment"});
151
152 TextEncoding unrecognized(String("unrecognized invalid encoding name"));
153 checkURL("http://host/?query", &unrecognized, {"http", "", "", "host", 0, "/", "", "", "http://host/?"});
154 checkURL("http://host/?", &unrecognized, {"http", "", "", "host", 0, "/", "", "", "http://host/?"});
155
156 TextEncoding iso88591(String("ISO-8859-1"));
157 String withUmlauts = utf16String<4>({0xDC, 0x430, 0x451, '\0'});
158 checkURL(makeString("ws://host/path?", withUmlauts), &iso88591, {"ws", "", "", "host", 0, "/path", "%C3%9C%D0%B0%D1%91", "", "ws://host/path?%C3%9C%D0%B0%D1%91"});
159 checkURL(makeString("wss://host/path?", withUmlauts), &iso88591, {"wss", "", "", "host", 0, "/path", "%C3%9C%D0%B0%D1%91", "", "wss://host/path?%C3%9C%D0%B0%D1%91"});
160 checkURL(makeString("asdf://host/path?", withUmlauts), &iso88591, {"asdf", "", "", "host", 0, "/path", "%C3%9C%D0%B0%D1%91", "", "asdf://host/path?%C3%9C%D0%B0%D1%91"});
161 checkURL(makeString("https://host/path?", withUmlauts), &iso88591, {"https", "", "", "host", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "", "https://host/path?%DC%26%231072%3B%26%231105%3B"});
162 checkURL(makeString("gopher://host/path?", withUmlauts), &iso88591, {"gopher", "", "", "host", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "", "gopher://host/path?%DC%26%231072%3B%26%231105%3B"});
163 checkURL(makeString("/path?", withUmlauts, "#fragment"), "ws://example.com/", &iso88591, {"ws", "", "", "example.com", 0, "/path", "%C3%9C%D0%B0%D1%91", "fragment", "ws://example.com/path?%C3%9C%D0%B0%D1%91#fragment"});
164 checkURL(makeString("/path?", withUmlauts, "#fragment"), "wss://example.com/", &iso88591, {"wss", "", "", "example.com", 0, "/path", "%C3%9C%D0%B0%D1%91", "fragment", "wss://example.com/path?%C3%9C%D0%B0%D1%91#fragment"});
165 checkURL(makeString("/path?", withUmlauts, "#fragment"), "asdf://example.com/", &iso88591, {"asdf", "", "", "example.com", 0, "/path", "%C3%9C%D0%B0%D1%91", "fragment", "asdf://example.com/path?%C3%9C%D0%B0%D1%91#fragment"});
166 checkURL(makeString("/path?", withUmlauts, "#fragment"), "https://example.com/", &iso88591, {"https", "", "", "example.com", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "fragment", "https://example.com/path?%DC%26%231072%3B%26%231105%3B#fragment"});
167 checkURL(makeString("/path?", withUmlauts, "#fragment"), "gopher://example.com/", &iso88591, {"gopher", "", "", "example.com", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "fragment", "gopher://example.com/path?%DC%26%231072%3B%26%231105%3B#fragment"});
168 checkURL(makeString("gopher://host/path?", withUmlauts, "#fragment"), "asdf://example.com/?doesntmatter", &iso88591, {"gopher", "", "", "host", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "fragment", "gopher://host/path?%DC%26%231072%3B%26%231105%3B#fragment"});
169 checkURL(makeString("asdf://host/path?", withUmlauts, "#fragment"), "http://example.com/?doesntmatter", &iso88591, {"asdf", "", "", "host", 0, "/path", "%C3%9C%D0%B0%D1%91", "fragment", "asdf://host/path?%C3%9C%D0%B0%D1%91#fragment"});
170
171 checkURL("http://host/pa'th?qu'ery#fr'agment", nullptr, {"http", "", "", "host", 0, "/pa'th", "qu%27ery", "fr'agment", "http://host/pa'th?qu%27ery#fr'agment"});
172 checkURL("asdf://host/pa'th?qu'ery#fr'agment", nullptr, {"asdf", "", "", "host", 0, "/pa'th", "qu'ery", "fr'agment", "asdf://host/pa'th?qu'ery#fr'agment"});
173 // FIXME: Add more tests with other encodings and things like non-ascii characters, emoji and unmatched surrogate pairs.
174}
175
176} // namespace TestWebKitAPI
177