1 | /* |
2 | * Copyright (C) 2016 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' |
14 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
15 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS |
17 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
18 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
19 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
20 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
21 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
22 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
23 | * THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #include "config.h" |
27 | #include "WTFStringUtilities.h" |
28 | #include <WebCore/TextEncoding.h> |
29 | #include <wtf/MainThread.h> |
30 | #include <wtf/URLParser.h> |
31 | #include <wtf/text/StringBuilder.h> |
32 | |
33 | using namespace WebCore; |
34 | |
35 | namespace TestWebKitAPI { |
36 | |
37 | class URLParserTextEncodingTest : public testing::Test { |
38 | public: |
39 | void SetUp() final |
40 | { |
41 | WTF::initializeMainThread(); |
42 | } |
43 | }; |
44 | |
45 | struct ExpectedParts { |
46 | String protocol; |
47 | String user; |
48 | String password; |
49 | String host; |
50 | unsigned short port; |
51 | String path; |
52 | String query; |
53 | String fragment; |
54 | String string; |
55 | |
56 | bool isInvalid() const |
57 | { |
58 | return protocol.isEmpty() |
59 | && user.isEmpty() |
60 | && password.isEmpty() |
61 | && host.isEmpty() |
62 | && !port |
63 | && path.isEmpty() |
64 | && query.isEmpty() |
65 | && fragment.isEmpty(); |
66 | } |
67 | }; |
68 | |
69 | template<typename T, typename U> |
70 | bool eq(T&& s1, U&& s2) |
71 | { |
72 | EXPECT_STREQ(s1.utf8().data(), s2.utf8().data()); |
73 | return s1.utf8() == s2.utf8(); |
74 | } |
75 | |
76 | static String insertTabAtLocation(const String& string, size_t location) |
77 | { |
78 | ASSERT(location <= string.length()); |
79 | return makeString(string.substring(0, location), "\t" , string.substring(location)); |
80 | } |
81 | |
82 | static ExpectedParts invalidParts(const String& urlStringWithTab) |
83 | { |
84 | return {"" , "" , "" , "" , 0, "" , "" , "" , urlStringWithTab}; |
85 | } |
86 | |
87 | enum class TestTabs { No, Yes }; |
88 | |
89 | // Inserting tabs between surrogate pairs changes the encoded value instead of being skipped by the URLParser. |
90 | const TestTabs testTabsValueForSurrogatePairs = TestTabs::No; |
91 | |
92 | static void checkURL(const String& urlString, const TextEncoding* encoding, const ExpectedParts& parts, TestTabs testTabs = TestTabs::Yes) |
93 | { |
94 | auto url = URL({ }, urlString, encoding); |
95 | EXPECT_TRUE(eq(parts.protocol, url.protocol())); |
96 | EXPECT_TRUE(eq(parts.user, url.user())); |
97 | EXPECT_TRUE(eq(parts.password, url.pass())); |
98 | EXPECT_TRUE(eq(parts.host, url.host())); |
99 | EXPECT_EQ(parts.port, url.port().valueOr(0)); |
100 | EXPECT_TRUE(eq(parts.path, url.path())); |
101 | EXPECT_TRUE(eq(parts.query, url.query())); |
102 | EXPECT_TRUE(eq(parts.fragment, url.fragmentIdentifier())); |
103 | EXPECT_TRUE(eq(parts.string, url.string())); |
104 | |
105 | if (testTabs == TestTabs::No) |
106 | return; |
107 | |
108 | for (size_t i = 0; i < urlString.length(); ++i) { |
109 | String urlStringWithTab = insertTabAtLocation(urlString, i); |
110 | checkURL(urlStringWithTab, encoding, |
111 | parts.isInvalid() ? invalidParts(urlStringWithTab) : parts, |
112 | TestTabs::No); |
113 | } |
114 | } |
115 | |
116 | static void checkURL(const String& urlString, const String& baseURLString, const TextEncoding* encoding, const ExpectedParts& parts, TestTabs testTabs = TestTabs::Yes) |
117 | { |
118 | auto url = URL(URL({ }, baseURLString), urlString, encoding); |
119 | EXPECT_TRUE(eq(parts.protocol, url.protocol())); |
120 | EXPECT_TRUE(eq(parts.user, url.user())); |
121 | EXPECT_TRUE(eq(parts.password, url.pass())); |
122 | EXPECT_TRUE(eq(parts.host, url.host())); |
123 | EXPECT_EQ(parts.port, url.port().valueOr(0)); |
124 | EXPECT_TRUE(eq(parts.path, url.path())); |
125 | EXPECT_TRUE(eq(parts.query, url.query())); |
126 | EXPECT_TRUE(eq(parts.fragment, url.fragmentIdentifier())); |
127 | EXPECT_TRUE(eq(parts.string, url.string())); |
128 | |
129 | if (testTabs == TestTabs::No) |
130 | return; |
131 | |
132 | for (size_t i = 0; i < urlString.length(); ++i) { |
133 | String urlStringWithTab = insertTabAtLocation(urlString, i); |
134 | checkURL(urlStringWithTab, baseURLString, encoding, |
135 | parts.isInvalid() ? invalidParts(urlStringWithTab) : parts, |
136 | TestTabs::No); |
137 | } |
138 | } |
139 | |
140 | TEST_F(URLParserTextEncodingTest, QueryEncoding) |
141 | { |
142 | checkURL(utf16String(u"http://host?ß😍#ß😍" ), nullptr, {"http" , "" , "" , "host" , 0, "/" , "%C3%9F%F0%9F%98%8D" , "%C3%9F%F0%9F%98%8D" , utf16String(u"http://host/?%C3%9F%F0%9F%98%8D#%C3%9F%F0%9F%98%8D" )}, testTabsValueForSurrogatePairs); |
143 | |
144 | TextEncoding latin1(String("latin1" )); |
145 | checkURL("http://host/?query with%20spaces" , &latin1, {"http" , "" , "" , "host" , 0, "/" , "query%20with%20spaces" , "" , "http://host/?query%20with%20spaces" }); |
146 | checkURL("http://host/?query" , &latin1, {"http" , "" , "" , "host" , 0, "/" , "query" , "" , "http://host/?query" }); |
147 | checkURL("http://host/?\tquery" , &latin1, {"http" , "" , "" , "host" , 0, "/" , "query" , "" , "http://host/?query" }); |
148 | checkURL("http://host/?q\tuery" , &latin1, {"http" , "" , "" , "host" , 0, "/" , "query" , "" , "http://host/?query" }); |
149 | checkURL("http://host/?query with SpAcEs#fragment" , &latin1, {"http" , "" , "" , "host" , 0, "/" , "query%20with%20SpAcEs" , "fragment" , "http://host/?query%20with%20SpAcEs#fragment" }); |
150 | checkURL("http://host/?que\rry\t\r\n#fragment" , &latin1, {"http" , "" , "" , "host" , 0, "/" , "query" , "fragment" , "http://host/?query#fragment" }); |
151 | |
152 | TextEncoding unrecognized(String("unrecognized invalid encoding name" )); |
153 | checkURL("http://host/?query" , &unrecognized, {"http" , "" , "" , "host" , 0, "/" , "" , "" , "http://host/?" }); |
154 | checkURL("http://host/?" , &unrecognized, {"http" , "" , "" , "host" , 0, "/" , "" , "" , "http://host/?" }); |
155 | |
156 | TextEncoding iso88591(String("ISO-8859-1" )); |
157 | String withUmlauts = utf16String<4>({0xDC, 0x430, 0x451, '\0'}); |
158 | checkURL(makeString("ws://host/path?" , withUmlauts), &iso88591, {"ws" , "" , "" , "host" , 0, "/path" , "%C3%9C%D0%B0%D1%91" , "" , "ws://host/path?%C3%9C%D0%B0%D1%91" }); |
159 | checkURL(makeString("wss://host/path?" , withUmlauts), &iso88591, {"wss" , "" , "" , "host" , 0, "/path" , "%C3%9C%D0%B0%D1%91" , "" , "wss://host/path?%C3%9C%D0%B0%D1%91" }); |
160 | checkURL(makeString("asdf://host/path?" , withUmlauts), &iso88591, {"asdf" , "" , "" , "host" , 0, "/path" , "%C3%9C%D0%B0%D1%91" , "" , "asdf://host/path?%C3%9C%D0%B0%D1%91" }); |
161 | checkURL(makeString("https://host/path?" , withUmlauts), &iso88591, {"https" , "" , "" , "host" , 0, "/path" , "%DC%26%231072%3B%26%231105%3B" , "" , "https://host/path?%DC%26%231072%3B%26%231105%3B" }); |
162 | checkURL(makeString("gopher://host/path?" , withUmlauts), &iso88591, {"gopher" , "" , "" , "host" , 0, "/path" , "%DC%26%231072%3B%26%231105%3B" , "" , "gopher://host/path?%DC%26%231072%3B%26%231105%3B" }); |
163 | checkURL(makeString("/path?" , withUmlauts, "#fragment" ), "ws://example.com/" , &iso88591, {"ws" , "" , "" , "example.com" , 0, "/path" , "%C3%9C%D0%B0%D1%91" , "fragment" , "ws://example.com/path?%C3%9C%D0%B0%D1%91#fragment" }); |
164 | checkURL(makeString("/path?" , withUmlauts, "#fragment" ), "wss://example.com/" , &iso88591, {"wss" , "" , "" , "example.com" , 0, "/path" , "%C3%9C%D0%B0%D1%91" , "fragment" , "wss://example.com/path?%C3%9C%D0%B0%D1%91#fragment" }); |
165 | checkURL(makeString("/path?" , withUmlauts, "#fragment" ), "asdf://example.com/" , &iso88591, {"asdf" , "" , "" , "example.com" , 0, "/path" , "%C3%9C%D0%B0%D1%91" , "fragment" , "asdf://example.com/path?%C3%9C%D0%B0%D1%91#fragment" }); |
166 | checkURL(makeString("/path?" , withUmlauts, "#fragment" ), "https://example.com/" , &iso88591, {"https" , "" , "" , "example.com" , 0, "/path" , "%DC%26%231072%3B%26%231105%3B" , "fragment" , "https://example.com/path?%DC%26%231072%3B%26%231105%3B#fragment" }); |
167 | checkURL(makeString("/path?" , withUmlauts, "#fragment" ), "gopher://example.com/" , &iso88591, {"gopher" , "" , "" , "example.com" , 0, "/path" , "%DC%26%231072%3B%26%231105%3B" , "fragment" , "gopher://example.com/path?%DC%26%231072%3B%26%231105%3B#fragment" }); |
168 | checkURL(makeString("gopher://host/path?" , withUmlauts, "#fragment" ), "asdf://example.com/?doesntmatter" , &iso88591, {"gopher" , "" , "" , "host" , 0, "/path" , "%DC%26%231072%3B%26%231105%3B" , "fragment" , "gopher://host/path?%DC%26%231072%3B%26%231105%3B#fragment" }); |
169 | checkURL(makeString("asdf://host/path?" , withUmlauts, "#fragment" ), "http://example.com/?doesntmatter" , &iso88591, {"asdf" , "" , "" , "host" , 0, "/path" , "%C3%9C%D0%B0%D1%91" , "fragment" , "asdf://host/path?%C3%9C%D0%B0%D1%91#fragment" }); |
170 | |
171 | checkURL("http://host/pa'th?qu'ery#fr'agment" , nullptr, {"http" , "" , "" , "host" , 0, "/pa'th" , "qu%27ery" , "fr'agment" , "http://host/pa'th?qu%27ery#fr'agment" }); |
172 | checkURL("asdf://host/pa'th?qu'ery#fr'agment" , nullptr, {"asdf" , "" , "" , "host" , 0, "/pa'th" , "qu'ery" , "fr'agment" , "asdf://host/pa'th?qu'ery#fr'agment" }); |
173 | // FIXME: Add more tests with other encodings and things like non-ascii characters, emoji and unmatched surrogate pairs. |
174 | } |
175 | |
176 | } // namespace TestWebKitAPI |
177 | |