1 | /* |
2 | * Copyright (C) 2008-2017 Apple Inc. All Rights Reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #include "config.h" |
27 | #include "ManifestParser.h" |
28 | |
29 | #include "TextResourceDecoder.h" |
30 | #include <wtf/URL.h> |
31 | #include <wtf/text/StringView.h> |
32 | #include <wtf/unicode/CharacterNames.h> |
33 | |
34 | namespace WebCore { |
35 | |
36 | enum Mode { Explicit, Fallback, OnlineWhitelist, Unknown }; |
37 | |
38 | static String manifestPath(const URL& manifestURL) |
39 | { |
40 | String manifestPath = manifestURL.path(); |
41 | ASSERT(manifestPath[0] == '/'); |
42 | manifestPath = manifestPath.substring(0, manifestPath.reverseFind('/') + 1); |
43 | ASSERT(manifestPath[0] == manifestPath[manifestPath.length() - 1]); |
44 | return manifestPath; |
45 | } |
46 | |
47 | bool parseManifest(const URL& manifestURL, const String& manifestMIMEType, const char* data, int length, Manifest& manifest) |
48 | { |
49 | ASSERT(manifest.explicitURLs.isEmpty()); |
50 | ASSERT(manifest.onlineWhitelistedURLs.isEmpty()); |
51 | ASSERT(manifest.fallbackURLs.isEmpty()); |
52 | manifest.allowAllNetworkRequests = false; |
53 | |
54 | String manifestPath = WebCore::manifestPath(manifestURL); |
55 | |
56 | const char cacheManifestMIMEType[] = "text/cache-manifest" ; |
57 | bool allowFallbackNamespaceOutsideManfestPath = equalLettersIgnoringASCIICase(manifestMIMEType, cacheManifestMIMEType); |
58 | |
59 | Mode mode = Explicit; |
60 | |
61 | String manifestString = TextResourceDecoder::create(ASCIILiteral::fromLiteralUnsafe(cacheManifestMIMEType), "UTF-8" )->decodeAndFlush(data, length); |
62 | |
63 | // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?" (the BOM is removed by TextResourceDecoder). |
64 | // Example: "CACHE MANIFEST #comment" is a valid signature. |
65 | // Example: "CACHE MANIFEST;V2" is not. |
66 | const char manifestSignature[] = "CACHE MANIFEST" ; |
67 | if (!manifestString.startsWith(manifestSignature)) |
68 | return false; |
69 | |
70 | StringView manifestAfterSignature = StringView(manifestString).substring(sizeof(manifestSignature) - 1); |
71 | auto upconvertedCharacters = manifestAfterSignature.upconvertedCharacters(); |
72 | const UChar* p = upconvertedCharacters; |
73 | const UChar* end = p + manifestAfterSignature.length(); |
74 | |
75 | if (p < end && *p != ' ' && *p != '\t' && *p != '\n' && *p != '\r') |
76 | return false; |
77 | |
78 | // Skip to the end of the line. |
79 | while (p < end && *p != '\r' && *p != '\n') |
80 | p++; |
81 | |
82 | while (1) { |
83 | // Skip whitespace |
84 | while (p < end && (*p == '\n' || *p == '\r' || *p == ' ' || *p == '\t')) |
85 | p++; |
86 | |
87 | if (p == end) |
88 | break; |
89 | |
90 | const UChar* lineStart = p; |
91 | |
92 | // Find the end of the line |
93 | while (p < end && *p != '\r' && *p != '\n') |
94 | p++; |
95 | |
96 | // Check if we have a comment |
97 | if (*lineStart == '#') |
98 | continue; |
99 | |
100 | // Get rid of trailing whitespace |
101 | const UChar* tmp = p - 1; |
102 | while (tmp > lineStart && (*tmp == ' ' || *tmp == '\t')) |
103 | tmp--; |
104 | |
105 | String line(lineStart, tmp - lineStart + 1); |
106 | |
107 | if (line == "CACHE:" ) |
108 | mode = Explicit; |
109 | else if (line == "FALLBACK:" ) |
110 | mode = Fallback; |
111 | else if (line == "NETWORK:" ) |
112 | mode = OnlineWhitelist; |
113 | else if (line.endsWith(':')) |
114 | mode = Unknown; |
115 | else if (mode == Unknown) |
116 | continue; |
117 | else if (mode == Explicit || mode == OnlineWhitelist) { |
118 | auto upconvertedLineCharacters = StringView(line).upconvertedCharacters(); |
119 | const UChar* p = upconvertedLineCharacters; |
120 | const UChar* lineEnd = p + line.length(); |
121 | |
122 | // Look for whitespace separating the URL from subsequent ignored tokens. |
123 | while (p < lineEnd && *p != '\t' && *p != ' ') |
124 | p++; |
125 | |
126 | if (mode == OnlineWhitelist && p - upconvertedLineCharacters == 1 && line[0] == '*') { |
127 | // Wildcard was found. |
128 | manifest.allowAllNetworkRequests = true; |
129 | continue; |
130 | } |
131 | |
132 | URL url(manifestURL, line.substring(0, p - upconvertedLineCharacters)); |
133 | |
134 | if (!url.isValid()) |
135 | continue; |
136 | |
137 | url.removeFragmentIdentifier(); |
138 | |
139 | if (!equalIgnoringASCIICase(url.protocol(), manifestURL.protocol())) |
140 | continue; |
141 | |
142 | if (mode == Explicit && manifestURL.protocolIs("https" ) && !protocolHostAndPortAreEqual(manifestURL, url)) |
143 | continue; |
144 | |
145 | if (mode == Explicit) |
146 | manifest.explicitURLs.add(url.string()); |
147 | else |
148 | manifest.onlineWhitelistedURLs.append(url); |
149 | |
150 | } else if (mode == Fallback) { |
151 | auto upconvertedLineCharacters = StringView(line).upconvertedCharacters(); |
152 | const UChar* p = upconvertedLineCharacters; |
153 | const UChar* lineEnd = p + line.length(); |
154 | |
155 | // Look for whitespace separating the two URLs |
156 | while (p < lineEnd && *p != '\t' && *p != ' ') |
157 | p++; |
158 | |
159 | if (p == lineEnd) { |
160 | // There was no whitespace separating the URLs. |
161 | continue; |
162 | } |
163 | |
164 | URL namespaceURL(manifestURL, line.substring(0, p - upconvertedLineCharacters)); |
165 | if (!namespaceURL.isValid()) |
166 | continue; |
167 | namespaceURL.removeFragmentIdentifier(); |
168 | |
169 | if (!protocolHostAndPortAreEqual(manifestURL, namespaceURL)) |
170 | continue; |
171 | |
172 | // Although <https://html.spec.whatwg.org/multipage/offline.html#parsing-cache-manifests> (07/06/2017) saids |
173 | // that we should always prefix match the manifest path we only do so if the manifest was served with a non- |
174 | // standard HTTP Content-Type header for web compatibility. |
175 | if (!allowFallbackNamespaceOutsideManfestPath && !namespaceURL.path().startsWith(manifestPath)) |
176 | continue; |
177 | |
178 | // Skip whitespace separating fallback namespace from URL. |
179 | while (p < lineEnd && (*p == '\t' || *p == ' ')) |
180 | p++; |
181 | |
182 | // Look for whitespace separating the URL from subsequent ignored tokens. |
183 | const UChar* fallbackStart = p; |
184 | while (p < lineEnd && *p != '\t' && *p != ' ') |
185 | p++; |
186 | |
187 | URL fallbackURL(manifestURL, String(fallbackStart, p - fallbackStart)); |
188 | if (!fallbackURL.isValid()) |
189 | continue; |
190 | fallbackURL.removeFragmentIdentifier(); |
191 | |
192 | if (!protocolHostAndPortAreEqual(manifestURL, fallbackURL)) |
193 | continue; |
194 | |
195 | manifest.fallbackURLs.append(std::make_pair(namespaceURL, fallbackURL)); |
196 | } else |
197 | ASSERT_NOT_REACHED(); |
198 | } |
199 | |
200 | return true; |
201 | } |
202 | |
203 | } |
204 | |