1/*
2 * Copyright (C) 2008-2017 Apple Inc. All Rights Reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "ManifestParser.h"
28
29#include "TextResourceDecoder.h"
30#include <wtf/URL.h>
31#include <wtf/text/StringView.h>
32#include <wtf/unicode/CharacterNames.h>
33
34namespace WebCore {
35
36enum Mode { Explicit, Fallback, OnlineWhitelist, Unknown };
37
38static String manifestPath(const URL& manifestURL)
39{
40 String manifestPath = manifestURL.path();
41 ASSERT(manifestPath[0] == '/');
42 manifestPath = manifestPath.substring(0, manifestPath.reverseFind('/') + 1);
43 ASSERT(manifestPath[0] == manifestPath[manifestPath.length() - 1]);
44 return manifestPath;
45}
46
47bool parseManifest(const URL& manifestURL, const String& manifestMIMEType, const char* data, int length, Manifest& manifest)
48{
49 ASSERT(manifest.explicitURLs.isEmpty());
50 ASSERT(manifest.onlineWhitelistedURLs.isEmpty());
51 ASSERT(manifest.fallbackURLs.isEmpty());
52 manifest.allowAllNetworkRequests = false;
53
54 String manifestPath = WebCore::manifestPath(manifestURL);
55
56 const char cacheManifestMIMEType[] = "text/cache-manifest";
57 bool allowFallbackNamespaceOutsideManfestPath = equalLettersIgnoringASCIICase(manifestMIMEType, cacheManifestMIMEType);
58
59 Mode mode = Explicit;
60
61 String manifestString = TextResourceDecoder::create(ASCIILiteral::fromLiteralUnsafe(cacheManifestMIMEType), "UTF-8")->decodeAndFlush(data, length);
62
63 // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?" (the BOM is removed by TextResourceDecoder).
64 // Example: "CACHE MANIFEST #comment" is a valid signature.
65 // Example: "CACHE MANIFEST;V2" is not.
66 const char manifestSignature[] = "CACHE MANIFEST";
67 if (!manifestString.startsWith(manifestSignature))
68 return false;
69
70 StringView manifestAfterSignature = StringView(manifestString).substring(sizeof(manifestSignature) - 1);
71 auto upconvertedCharacters = manifestAfterSignature.upconvertedCharacters();
72 const UChar* p = upconvertedCharacters;
73 const UChar* end = p + manifestAfterSignature.length();
74
75 if (p < end && *p != ' ' && *p != '\t' && *p != '\n' && *p != '\r')
76 return false;
77
78 // Skip to the end of the line.
79 while (p < end && *p != '\r' && *p != '\n')
80 p++;
81
82 while (1) {
83 // Skip whitespace
84 while (p < end && (*p == '\n' || *p == '\r' || *p == ' ' || *p == '\t'))
85 p++;
86
87 if (p == end)
88 break;
89
90 const UChar* lineStart = p;
91
92 // Find the end of the line
93 while (p < end && *p != '\r' && *p != '\n')
94 p++;
95
96 // Check if we have a comment
97 if (*lineStart == '#')
98 continue;
99
100 // Get rid of trailing whitespace
101 const UChar* tmp = p - 1;
102 while (tmp > lineStart && (*tmp == ' ' || *tmp == '\t'))
103 tmp--;
104
105 String line(lineStart, tmp - lineStart + 1);
106
107 if (line == "CACHE:")
108 mode = Explicit;
109 else if (line == "FALLBACK:")
110 mode = Fallback;
111 else if (line == "NETWORK:")
112 mode = OnlineWhitelist;
113 else if (line.endsWith(':'))
114 mode = Unknown;
115 else if (mode == Unknown)
116 continue;
117 else if (mode == Explicit || mode == OnlineWhitelist) {
118 auto upconvertedLineCharacters = StringView(line).upconvertedCharacters();
119 const UChar* p = upconvertedLineCharacters;
120 const UChar* lineEnd = p + line.length();
121
122 // Look for whitespace separating the URL from subsequent ignored tokens.
123 while (p < lineEnd && *p != '\t' && *p != ' ')
124 p++;
125
126 if (mode == OnlineWhitelist && p - upconvertedLineCharacters == 1 && line[0] == '*') {
127 // Wildcard was found.
128 manifest.allowAllNetworkRequests = true;
129 continue;
130 }
131
132 URL url(manifestURL, line.substring(0, p - upconvertedLineCharacters));
133
134 if (!url.isValid())
135 continue;
136
137 url.removeFragmentIdentifier();
138
139 if (!equalIgnoringASCIICase(url.protocol(), manifestURL.protocol()))
140 continue;
141
142 if (mode == Explicit && manifestURL.protocolIs("https") && !protocolHostAndPortAreEqual(manifestURL, url))
143 continue;
144
145 if (mode == Explicit)
146 manifest.explicitURLs.add(url.string());
147 else
148 manifest.onlineWhitelistedURLs.append(url);
149
150 } else if (mode == Fallback) {
151 auto upconvertedLineCharacters = StringView(line).upconvertedCharacters();
152 const UChar* p = upconvertedLineCharacters;
153 const UChar* lineEnd = p + line.length();
154
155 // Look for whitespace separating the two URLs
156 while (p < lineEnd && *p != '\t' && *p != ' ')
157 p++;
158
159 if (p == lineEnd) {
160 // There was no whitespace separating the URLs.
161 continue;
162 }
163
164 URL namespaceURL(manifestURL, line.substring(0, p - upconvertedLineCharacters));
165 if (!namespaceURL.isValid())
166 continue;
167 namespaceURL.removeFragmentIdentifier();
168
169 if (!protocolHostAndPortAreEqual(manifestURL, namespaceURL))
170 continue;
171
172 // Although <https://html.spec.whatwg.org/multipage/offline.html#parsing-cache-manifests> (07/06/2017) saids
173 // that we should always prefix match the manifest path we only do so if the manifest was served with a non-
174 // standard HTTP Content-Type header for web compatibility.
175 if (!allowFallbackNamespaceOutsideManfestPath && !namespaceURL.path().startsWith(manifestPath))
176 continue;
177
178 // Skip whitespace separating fallback namespace from URL.
179 while (p < lineEnd && (*p == '\t' || *p == ' '))
180 p++;
181
182 // Look for whitespace separating the URL from subsequent ignored tokens.
183 const UChar* fallbackStart = p;
184 while (p < lineEnd && *p != '\t' && *p != ' ')
185 p++;
186
187 URL fallbackURL(manifestURL, String(fallbackStart, p - fallbackStart));
188 if (!fallbackURL.isValid())
189 continue;
190 fallbackURL.removeFragmentIdentifier();
191
192 if (!protocolHostAndPortAreEqual(manifestURL, fallbackURL))
193 continue;
194
195 manifest.fallbackURLs.append(std::make_pair(namespaceURL, fallbackURL));
196 } else
197 ASSERT_NOT_REACHED();
198 }
199
200 return true;
201}
202
203}
204