1 | /* |
2 | * Copyright (C) 2011 Adam Barth. All Rights Reserved. |
3 | * Copyright (C) 2011 Daniel Bates (dbates@intudata.com). |
4 | * Copyright (C) 2017 Apple Inc. All rights reserved. |
5 | * |
6 | * Redistribution and use in source and binary forms, with or without |
7 | * modification, are permitted provided that the following conditions |
8 | * are met: |
9 | * 1. Redistributions of source code must retain the above copyright |
10 | * notice, this list of conditions and the following disclaimer. |
11 | * 2. Redistributions in binary form must reproduce the above copyright |
12 | * notice, this list of conditions and the following disclaimer in the |
13 | * documentation and/or other materials provided with the distribution. |
14 | * |
15 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | */ |
27 | |
28 | #include "config.h" |
29 | #include "XSSAuditor.h" |
30 | |
31 | #include "DecodeEscapeSequences.h" |
32 | #include "Document.h" |
33 | #include "DocumentLoader.h" |
34 | #include "FormData.h" |
35 | #include "Frame.h" |
36 | #include "FrameLoader.h" |
37 | #include "HTMLDocumentParser.h" |
38 | #include "HTMLNames.h" |
39 | #include "HTMLParamElement.h" |
40 | #include "HTMLParserIdioms.h" |
41 | #include "SVGNames.h" |
42 | #include "Settings.h" |
43 | #include "TextResourceDecoder.h" |
44 | #include "XLinkNames.h" |
45 | #include <wtf/ASCIICType.h> |
46 | #include <wtf/MainThread.h> |
47 | #include <wtf/NeverDestroyed.h> |
48 | #include <wtf/text/StringConcatenateNumbers.h> |
49 | |
50 | namespace WebCore { |
51 | |
52 | using namespace HTMLNames; |
53 | |
54 | static bool isNonCanonicalCharacter(UChar c) |
55 | { |
56 | // We remove all non-ASCII characters, including non-printable ASCII characters. |
57 | // |
58 | // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character. |
59 | // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the |
60 | // adverse effect that we remove any legitimate zeros from a string. |
61 | // We also remove forward-slash, because it is common for some servers to collapse successive path components, eg, |
62 | // a//b becomes a/b. |
63 | // |
64 | // For instance: new String("http://localhost:8000") => new String("http:localhost:8"). |
65 | return (c == '\\' || c == '0' || c == '\0' || c == '/' || c >= 127); |
66 | } |
67 | |
68 | static bool isRequiredForInjection(UChar c) |
69 | { |
70 | return (c == '\'' || c == '"' || c == '<' || c == '>'); |
71 | } |
72 | |
73 | static bool isTerminatingCharacter(UChar c) |
74 | { |
75 | return (c == '&' || c == '/' || c == '"' || c == '\'' || c == '<' || c == '>' || c == ','); |
76 | } |
77 | |
78 | static bool isHTMLQuote(UChar c) |
79 | { |
80 | return (c == '"' || c == '\''); |
81 | } |
82 | |
83 | static bool isJSNewline(UChar c) |
84 | { |
85 | // Per ecma-262 section 7.3 Line Terminators. |
86 | return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029); |
87 | } |
88 | |
89 | static bool (const String& string, size_t start) |
90 | { |
91 | return (start + 3 < string.length() && string[start] == '<' && string[start + 1] == '!' && string[start + 2] == '-' && string[start + 3] == '-'); |
92 | } |
93 | |
94 | static bool (const String& string, size_t start) |
95 | { |
96 | return (start + 1 < string.length() && string[start] == '/' && string[start + 1] == '/'); |
97 | } |
98 | |
99 | static bool (const String& string, size_t start) |
100 | { |
101 | return (start + 1 < string.length() && string[start] == '/' && string[start + 1] == '*'); |
102 | } |
103 | |
104 | static bool startsOpeningScriptTagAt(const String& string, size_t start) |
105 | { |
106 | return start + 6 < string.length() && string[start] == '<' |
107 | && WTF::toASCIILowerUnchecked(string[start + 1]) == 's' |
108 | && WTF::toASCIILowerUnchecked(string[start + 2]) == 'c' |
109 | && WTF::toASCIILowerUnchecked(string[start + 3]) == 'r' |
110 | && WTF::toASCIILowerUnchecked(string[start + 4]) == 'i' |
111 | && WTF::toASCIILowerUnchecked(string[start + 5]) == 'p' |
112 | && WTF::toASCIILowerUnchecked(string[start + 6]) == 't'; |
113 | } |
114 | |
115 | // If other files need this, we should move this to HTMLParserIdioms.h |
116 | template<size_t inlineCapacity> |
117 | bool threadSafeMatch(const Vector<UChar, inlineCapacity>& vector, const QualifiedName& qname) |
118 | { |
119 | return equalIgnoringNullity(vector, qname.localName().impl()); |
120 | } |
121 | |
122 | static bool hasName(const HTMLToken& token, const QualifiedName& name) |
123 | { |
124 | return threadSafeMatch(token.name(), name); |
125 | } |
126 | |
127 | static bool findAttributeWithName(const HTMLToken& token, const QualifiedName& name, size_t& indexOfMatchingAttribute) |
128 | { |
129 | // Notice that we're careful not to ref the StringImpl here because we might be on a background thread. |
130 | const String& attrName = name.namespaceURI() == XLinkNames::xlinkNamespaceURI ? "xlink:" + name.localName().string() : name.localName().string(); |
131 | |
132 | for (size_t i = 0; i < token.attributes().size(); ++i) { |
133 | if (equalIgnoringNullity(token.attributes().at(i).name, attrName)) { |
134 | indexOfMatchingAttribute = i; |
135 | return true; |
136 | } |
137 | } |
138 | return false; |
139 | } |
140 | |
141 | static bool isNameOfInlineEventHandler(const Vector<UChar, 32>& name) |
142 | { |
143 | const size_t lengthOfShortestInlineEventHandlerName = 5; // To wit: oncut. |
144 | if (name.size() < lengthOfShortestInlineEventHandlerName) |
145 | return false; |
146 | return name[0] == 'o' && name[1] == 'n'; |
147 | } |
148 | |
149 | static bool isDangerousHTTPEquiv(const String& value) |
150 | { |
151 | String equiv = value.stripWhiteSpace(); |
152 | return equalLettersIgnoringASCIICase(equiv, "refresh" ); |
153 | } |
154 | |
155 | static inline String decode16BitUnicodeEscapeSequences(const String& string) |
156 | { |
157 | // Note, the encoding is ignored since each %u-escape sequence represents a UTF-16 code unit. |
158 | return decodeEscapeSequences<Unicode16BitEscapeSequence>(string, UTF8Encoding()); |
159 | } |
160 | |
161 | static inline String decodeStandardURLEscapeSequences(const String& string, const TextEncoding& encoding) |
162 | { |
163 | // We use decodeEscapeSequences() instead of decodeURLEscapeSequences() (declared in URL.h) to |
164 | // avoid platform-specific URL decoding differences (e.g. URLGoogle). |
165 | return decodeEscapeSequences<URLEscapeSequence>(string, encoding); |
166 | } |
167 | |
168 | static String fullyDecodeString(const String& string, const TextEncoding& encoding) |
169 | { |
170 | size_t oldWorkingStringLength; |
171 | String workingString = string; |
172 | do { |
173 | oldWorkingStringLength = workingString.length(); |
174 | workingString = decode16BitUnicodeEscapeSequences(decodeStandardURLEscapeSequences(workingString, encoding)); |
175 | } while (workingString.length() < oldWorkingStringLength); |
176 | workingString.replace('+', ' '); |
177 | return workingString; |
178 | } |
179 | |
180 | static void truncateForSrcLikeAttribute(String& decodedSnippet) |
181 | { |
182 | // In HTTP URLs, characters following the first ?, #, or third slash may come from |
183 | // the page itself and can be merely ignored by an attacker's server when a remote |
184 | // script or script-like resource is requested. In data URLs, the payload starts at |
185 | // the first comma, and the first /*, //, or <!-- may introduce a comment. Also |
186 | // data URLs may use the same string literal tricks as with script content itself. |
187 | // In either case, content following this may come from the page and may be ignored |
188 | // when the script is executed. Also, any of these characters may now be represented |
189 | // by the (enlarged) set of HTML5 entities. |
190 | // For simplicity, we don't differentiate based on URL scheme, and stop at the first |
191 | // & (since it might be part of an entity for any of the subsequent punctuation) |
192 | // the first # or ?, the third slash, or the first slash, <, ', or " once a comma |
193 | // is seen. |
194 | int slashCount = 0; |
195 | bool commaSeen = false; |
196 | for (size_t currentLength = 0; currentLength < decodedSnippet.length(); ++currentLength) { |
197 | UChar currentChar = decodedSnippet[currentLength]; |
198 | if (currentChar == '&' |
199 | || currentChar == '?' |
200 | || currentChar == '#' |
201 | || ((currentChar == '/' || currentChar == '\\') && (commaSeen || ++slashCount > 2)) |
202 | || (currentChar == '<' && commaSeen) |
203 | || (currentChar == '\'' && commaSeen) |
204 | || (currentChar == '"' && commaSeen)) { |
205 | decodedSnippet.truncate(currentLength); |
206 | return; |
207 | } |
208 | if (currentChar == ',') |
209 | commaSeen = true; |
210 | } |
211 | } |
212 | |
213 | static void truncateForScriptLikeAttribute(String& decodedSnippet) |
214 | { |
215 | // Beware of trailing characters which came from the page itself, not the |
216 | // injected vector. Excluding the terminating character covers common cases |
217 | // where the page immediately ends the attribute, but doesn't cover more |
218 | // complex cases where there is other page data following the injection. |
219 | // Generally, these won't parse as JavaScript, so the injected vector |
220 | // typically excludes them from consideration via a single-line comment or |
221 | // by enclosing them in a string literal terminated later by the page's own |
222 | // closing punctuation. Since the snippet has not been parsed, the vector |
223 | // may also try to introduce these via entities. As a result, we'd like to |
224 | // stop before the first "//", the first <!--, the first entity, or the first |
225 | // quote not immediately following the first equals sign (taking whitespace |
226 | // into consideration). To keep things simpler, we don't try to distinguish |
227 | // between entity-introducing ampersands vs. other uses, nor do we bother to |
228 | // check for a second slash for a comment, nor do we bother to check for |
229 | // !-- following a less-than sign. We stop instead on any ampersand |
230 | // slash, or less-than sign. |
231 | size_t position = 0; |
232 | if ((position = decodedSnippet.find('=')) != notFound |
233 | && (position = decodedSnippet.find(isNotHTMLSpace, position + 1)) != notFound |
234 | && (position = decodedSnippet.find(isTerminatingCharacter, isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) != notFound) { |
235 | decodedSnippet.truncate(position); |
236 | } |
237 | } |
238 | |
239 | static bool isSemicolonSeparatedAttribute(const HTMLToken::Attribute& attribute) |
240 | { |
241 | return threadSafeMatch(attribute.name, SVGNames::valuesAttr); |
242 | } |
243 | |
244 | static bool semicolonSeparatedValueContainsJavaScriptURL(StringView semicolonSeparatedValue) |
245 | { |
246 | for (auto value : semicolonSeparatedValue.split(';')) { |
247 | if (WTF::protocolIsJavaScript(value)) |
248 | return true; |
249 | } |
250 | return false; |
251 | } |
252 | |
253 | XSSAuditor::XSSAuditor() |
254 | : m_isEnabled(false) |
255 | , m_xssProtection(XSSProtectionDisposition::Enabled) |
256 | , m_didSendValidXSSProtectionHeader(false) |
257 | , m_state(Uninitialized) |
258 | , m_scriptTagNestingLevel(0) |
259 | , m_encoding(UTF8Encoding()) |
260 | { |
261 | // Although tempting to call init() at this point, the various objects |
262 | // we want to reference might not all have been constructed yet. |
263 | } |
264 | |
265 | void XSSAuditor::initForFragment() |
266 | { |
267 | ASSERT(isMainThread()); |
268 | ASSERT(m_state == Uninitialized); |
269 | m_state = Initialized; |
270 | // When parsing a fragment, we don't enable the XSS auditor because it's |
271 | // too much overhead. |
272 | ASSERT(!m_isEnabled); |
273 | } |
274 | |
275 | void XSSAuditor::init(Document* document, XSSAuditorDelegate* auditorDelegate) |
276 | { |
277 | ASSERT(isMainThread()); |
278 | if (m_state == Initialized) |
279 | return; |
280 | ASSERT(m_state == Uninitialized); |
281 | m_state = Initialized; |
282 | |
283 | if (RefPtr<Frame> frame = document->frame()) |
284 | m_isEnabled = frame->settings().xssAuditorEnabled(); |
285 | |
286 | if (!m_isEnabled) |
287 | return; |
288 | |
289 | m_documentURL = document->url().isolatedCopy(); |
290 | |
291 | // In theory, the Document could have detached from the Frame after the |
292 | // XSSAuditor was constructed. |
293 | if (!document->frame()) { |
294 | m_isEnabled = false; |
295 | return; |
296 | } |
297 | |
298 | if (m_documentURL.isEmpty()) { |
299 | // The URL can be empty when opening a new browser window or calling window.open(""). |
300 | m_isEnabled = false; |
301 | return; |
302 | } |
303 | |
304 | if (m_documentURL.protocolIsData()) { |
305 | m_isEnabled = false; |
306 | return; |
307 | } |
308 | |
309 | if (document->decoder()) |
310 | m_encoding = document->decoder()->encoding(); |
311 | |
312 | m_decodedURL = canonicalize(m_documentURL.string(), TruncationStyle::None); |
313 | if (m_decodedURL.find(isRequiredForInjection) == notFound) |
314 | m_decodedURL = String(); |
315 | |
316 | if (RefPtr<DocumentLoader> documentLoader = document->frame()->loader().documentLoader()) { |
317 | String = documentLoader->response().httpHeaderField(HTTPHeaderName::XXSSProtection); |
318 | String errorDetails; |
319 | unsigned errorPosition = 0; |
320 | String parsedReportURL; |
321 | URL reportURL; |
322 | m_xssProtection = parseXSSProtectionHeader(headerValue, errorDetails, errorPosition, parsedReportURL); |
323 | m_didSendValidXSSProtectionHeader = !headerValue.isNull() && m_xssProtection != XSSProtectionDisposition::Invalid; |
324 | |
325 | if ((m_xssProtection == XSSProtectionDisposition::Enabled || m_xssProtection == XSSProtectionDisposition::BlockEnabled) && !parsedReportURL.isEmpty()) { |
326 | reportURL = document->completeURL(parsedReportURL); |
327 | if (MixedContentChecker::isMixedContent(document->securityOrigin(), reportURL)) { |
328 | errorDetails = "insecure reporting URL for secure page" ; |
329 | m_xssProtection = XSSProtectionDisposition::Invalid; |
330 | reportURL = URL(); |
331 | m_didSendValidXSSProtectionHeader = false; |
332 | } |
333 | } |
334 | if (m_xssProtection == XSSProtectionDisposition::Invalid) { |
335 | document->addConsoleMessage(MessageSource::Security, MessageLevel::Error, makeString("Error parsing header X-XSS-Protection: " , headerValue, ": " , errorDetails, " at character position " , errorPosition, ". The default protections will be applied." )); |
336 | m_xssProtection = XSSProtectionDisposition::Enabled; |
337 | } |
338 | |
339 | if (auditorDelegate) |
340 | auditorDelegate->setReportURL(reportURL.isolatedCopy()); |
341 | RefPtr<FormData> httpBody = documentLoader->originalRequest().httpBody(); |
342 | if (httpBody && !httpBody->isEmpty()) { |
343 | String httpBodyAsString = httpBody->flattenToString(); |
344 | if (!httpBodyAsString.isEmpty()) { |
345 | m_decodedHTTPBody = canonicalize(httpBodyAsString, TruncationStyle::None); |
346 | if (m_decodedHTTPBody.find(isRequiredForInjection) == notFound) |
347 | m_decodedHTTPBody = String(); |
348 | } |
349 | } |
350 | } |
351 | |
352 | if (m_decodedURL.isEmpty() && m_decodedHTTPBody.isEmpty()) { |
353 | m_isEnabled = false; |
354 | return; |
355 | } |
356 | } |
357 | |
358 | std::unique_ptr<XSSInfo> XSSAuditor::filterToken(const FilterTokenRequest& request) |
359 | { |
360 | ASSERT(m_state == Initialized); |
361 | if (!m_isEnabled || m_xssProtection == XSSProtectionDisposition::Disabled) |
362 | return nullptr; |
363 | |
364 | bool didBlockScript = false; |
365 | if (request.token.type() == HTMLToken::StartTag) |
366 | didBlockScript = filterStartToken(request); |
367 | else if (m_scriptTagNestingLevel) { |
368 | if (request.token.type() == HTMLToken::Character) |
369 | didBlockScript = filterCharacterToken(request); |
370 | else if (request.token.type() == HTMLToken::EndTag) |
371 | filterEndToken(request); |
372 | } |
373 | |
374 | if (!didBlockScript) |
375 | return nullptr; |
376 | |
377 | bool didBlockEntirePage = m_xssProtection == XSSProtectionDisposition::BlockEnabled; |
378 | return std::make_unique<XSSInfo>(m_documentURL, didBlockEntirePage, m_didSendValidXSSProtectionHeader); |
379 | } |
380 | |
381 | bool XSSAuditor::filterStartToken(const FilterTokenRequest& request) |
382 | { |
383 | bool didBlockScript = eraseDangerousAttributesIfInjected(request); |
384 | |
385 | if (hasName(request.token, scriptTag)) { |
386 | didBlockScript |= filterScriptToken(request); |
387 | ASSERT(request.shouldAllowCDATA || !m_scriptTagNestingLevel); |
388 | m_scriptTagNestingLevel++; |
389 | } else if (hasName(request.token, objectTag)) |
390 | didBlockScript |= filterObjectToken(request); |
391 | else if (hasName(request.token, paramTag)) |
392 | didBlockScript |= filterParamToken(request); |
393 | else if (hasName(request.token, embedTag)) |
394 | didBlockScript |= filterEmbedToken(request); |
395 | else if (hasName(request.token, appletTag)) |
396 | didBlockScript |= filterAppletToken(request); |
397 | else if (hasName(request.token, iframeTag) || hasName(request.token, frameTag)) |
398 | didBlockScript |= filterFrameToken(request); |
399 | else if (hasName(request.token, metaTag)) |
400 | didBlockScript |= filterMetaToken(request); |
401 | else if (hasName(request.token, baseTag)) |
402 | didBlockScript |= filterBaseToken(request); |
403 | else if (hasName(request.token, formTag)) |
404 | didBlockScript |= filterFormToken(request); |
405 | else if (hasName(request.token, inputTag)) |
406 | didBlockScript |= filterInputToken(request); |
407 | else if (hasName(request.token, buttonTag)) |
408 | didBlockScript |= filterButtonToken(request); |
409 | |
410 | return didBlockScript; |
411 | } |
412 | |
413 | void XSSAuditor::filterEndToken(const FilterTokenRequest& request) |
414 | { |
415 | ASSERT(m_scriptTagNestingLevel); |
416 | if (hasName(request.token, scriptTag)) { |
417 | m_scriptTagNestingLevel--; |
418 | ASSERT(request.shouldAllowCDATA || !m_scriptTagNestingLevel); |
419 | } |
420 | } |
421 | |
422 | bool XSSAuditor::filterCharacterToken(const FilterTokenRequest& request) |
423 | { |
424 | ASSERT(m_scriptTagNestingLevel); |
425 | if (m_wasScriptTagFoundInRequest && isContainedInRequest(canonicalizedSnippetForJavaScript(request))) { |
426 | request.token.clear(); |
427 | LChar space = ' '; |
428 | request.token.appendToCharacter(space); // Technically, character tokens can't be empty. |
429 | return true; |
430 | } |
431 | return false; |
432 | } |
433 | |
434 | bool XSSAuditor::filterScriptToken(const FilterTokenRequest& request) |
435 | { |
436 | ASSERT(request.token.type() == HTMLToken::StartTag); |
437 | ASSERT(hasName(request.token, scriptTag)); |
438 | |
439 | m_wasScriptTagFoundInRequest = isContainedInRequest(canonicalizedSnippetForTagName(request)); |
440 | |
441 | bool didBlockScript = false; |
442 | if (m_wasScriptTagFoundInRequest) { |
443 | didBlockScript |= eraseAttributeIfInjected(request, srcAttr, WTF::blankURL().string(), TruncationStyle::SrcLikeAttribute); |
444 | didBlockScript |= eraseAttributeIfInjected(request, SVGNames::hrefAttr, WTF::blankURL().string(), TruncationStyle::SrcLikeAttribute); |
445 | didBlockScript |= eraseAttributeIfInjected(request, XLinkNames::hrefAttr, WTF::blankURL().string(), TruncationStyle::SrcLikeAttribute); |
446 | } |
447 | |
448 | return didBlockScript; |
449 | } |
450 | |
451 | bool XSSAuditor::filterObjectToken(const FilterTokenRequest& request) |
452 | { |
453 | ASSERT(request.token.type() == HTMLToken::StartTag); |
454 | ASSERT(hasName(request.token, objectTag)); |
455 | |
456 | bool didBlockScript = false; |
457 | if (isContainedInRequest(canonicalizedSnippetForTagName(request))) { |
458 | didBlockScript |= eraseAttributeIfInjected(request, dataAttr, WTF::blankURL().string(), TruncationStyle::SrcLikeAttribute); |
459 | didBlockScript |= eraseAttributeIfInjected(request, typeAttr); |
460 | didBlockScript |= eraseAttributeIfInjected(request, classidAttr); |
461 | } |
462 | return didBlockScript; |
463 | } |
464 | |
465 | bool XSSAuditor::filterParamToken(const FilterTokenRequest& request) |
466 | { |
467 | ASSERT(request.token.type() == HTMLToken::StartTag); |
468 | ASSERT(hasName(request.token, paramTag)); |
469 | |
470 | size_t indexOfNameAttribute; |
471 | if (!findAttributeWithName(request.token, nameAttr, indexOfNameAttribute)) |
472 | return false; |
473 | |
474 | const HTMLToken::Attribute& nameAttribute = request.token.attributes().at(indexOfNameAttribute); |
475 | if (!HTMLParamElement::isURLParameter(String(nameAttribute.value))) |
476 | return false; |
477 | |
478 | return eraseAttributeIfInjected(request, valueAttr, WTF::blankURL().string(), TruncationStyle::SrcLikeAttribute); |
479 | } |
480 | |
481 | bool XSSAuditor::filterEmbedToken(const FilterTokenRequest& request) |
482 | { |
483 | ASSERT(request.token.type() == HTMLToken::StartTag); |
484 | ASSERT(hasName(request.token, embedTag)); |
485 | |
486 | bool didBlockScript = false; |
487 | if (isContainedInRequest(canonicalizedSnippetForTagName(request))) { |
488 | didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), TruncationStyle::SrcLikeAttribute); |
489 | didBlockScript |= eraseAttributeIfInjected(request, srcAttr, WTF::blankURL().string(), TruncationStyle::SrcLikeAttribute); |
490 | didBlockScript |= eraseAttributeIfInjected(request, typeAttr); |
491 | } |
492 | return didBlockScript; |
493 | } |
494 | |
495 | bool XSSAuditor::filterAppletToken(const FilterTokenRequest& request) |
496 | { |
497 | ASSERT(request.token.type() == HTMLToken::StartTag); |
498 | ASSERT(hasName(request.token, appletTag)); |
499 | |
500 | bool didBlockScript = false; |
501 | if (isContainedInRequest(canonicalizedSnippetForTagName(request))) { |
502 | didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), TruncationStyle::SrcLikeAttribute); |
503 | didBlockScript |= eraseAttributeIfInjected(request, objectAttr); |
504 | } |
505 | return didBlockScript; |
506 | } |
507 | |
508 | bool XSSAuditor::filterFrameToken(const FilterTokenRequest& request) |
509 | { |
510 | ASSERT(request.token.type() == HTMLToken::StartTag); |
511 | ASSERT(hasName(request.token, iframeTag) || hasName(request.token, frameTag)); |
512 | |
513 | bool didBlockScript = eraseAttributeIfInjected(request, srcdocAttr, String(), TruncationStyle::ScriptLikeAttribute); |
514 | if (isContainedInRequest(canonicalizedSnippetForTagName(request))) |
515 | didBlockScript |= eraseAttributeIfInjected(request, srcAttr, String(), TruncationStyle::SrcLikeAttribute); |
516 | |
517 | return didBlockScript; |
518 | } |
519 | |
520 | bool XSSAuditor::filterMetaToken(const FilterTokenRequest& request) |
521 | { |
522 | ASSERT(request.token.type() == HTMLToken::StartTag); |
523 | ASSERT(hasName(request.token, metaTag)); |
524 | |
525 | return eraseAttributeIfInjected(request, http_equivAttr); |
526 | } |
527 | |
528 | bool XSSAuditor::filterBaseToken(const FilterTokenRequest& request) |
529 | { |
530 | ASSERT(request.token.type() == HTMLToken::StartTag); |
531 | ASSERT(hasName(request.token, baseTag)); |
532 | |
533 | return eraseAttributeIfInjected(request, hrefAttr); |
534 | } |
535 | |
536 | bool XSSAuditor::filterFormToken(const FilterTokenRequest& request) |
537 | { |
538 | ASSERT(request.token.type() == HTMLToken::StartTag); |
539 | ASSERT(hasName(request.token, formTag)); |
540 | |
541 | return eraseAttributeIfInjected(request, actionAttr, WTF::blankURL().string()); |
542 | } |
543 | |
544 | bool XSSAuditor::filterInputToken(const FilterTokenRequest& request) |
545 | { |
546 | ASSERT(request.token.type() == HTMLToken::StartTag); |
547 | ASSERT(hasName(request.token, inputTag)); |
548 | |
549 | return eraseAttributeIfInjected(request, formactionAttr, WTF::blankURL().string(), TruncationStyle::SrcLikeAttribute); |
550 | } |
551 | |
552 | bool XSSAuditor::filterButtonToken(const FilterTokenRequest& request) |
553 | { |
554 | ASSERT(request.token.type() == HTMLToken::StartTag); |
555 | ASSERT(hasName(request.token, buttonTag)); |
556 | |
557 | return eraseAttributeIfInjected(request, formactionAttr, WTF::blankURL().string(), TruncationStyle::SrcLikeAttribute); |
558 | } |
559 | |
560 | bool XSSAuditor::eraseDangerousAttributesIfInjected(const FilterTokenRequest& request) |
561 | { |
562 | static NeverDestroyed<String> safeJavaScriptURL(MAKE_STATIC_STRING_IMPL("javascript:void(0)" )); |
563 | |
564 | bool didBlockScript = false; |
565 | for (size_t i = 0; i < request.token.attributes().size(); ++i) { |
566 | const HTMLToken::Attribute& attribute = request.token.attributes().at(i); |
567 | bool isInlineEventHandler = isNameOfInlineEventHandler(attribute.name); |
568 | // FIXME: It would be better if we didn't create a new String for every attribute in the document. |
569 | String strippedValue = stripLeadingAndTrailingHTMLSpaces(String(attribute.value)); |
570 | bool valueContainsJavaScriptURL = (!isInlineEventHandler && WTF::protocolIsJavaScript(strippedValue)) || (isSemicolonSeparatedAttribute(attribute) && semicolonSeparatedValueContainsJavaScriptURL(strippedValue)); |
571 | if (!isInlineEventHandler && !valueContainsJavaScriptURL) |
572 | continue; |
573 | if (!isContainedInRequest(canonicalize(snippetFromAttribute(request, attribute), TruncationStyle::ScriptLikeAttribute))) |
574 | continue; |
575 | request.token.eraseValueOfAttribute(i); |
576 | if (valueContainsJavaScriptURL) |
577 | request.token.appendToAttributeValue(i, safeJavaScriptURL.get()); |
578 | didBlockScript = true; |
579 | } |
580 | return didBlockScript; |
581 | } |
582 | |
583 | bool XSSAuditor::eraseAttributeIfInjected(const FilterTokenRequest& request, const QualifiedName& attributeName, const String& replacementValue, TruncationStyle truncationStyle) |
584 | { |
585 | size_t indexOfAttribute = 0; |
586 | if (!findAttributeWithName(request.token, attributeName, indexOfAttribute)) |
587 | return false; |
588 | |
589 | const HTMLToken::Attribute& attribute = request.token.attributes().at(indexOfAttribute); |
590 | if (!isContainedInRequest(canonicalize(snippetFromAttribute(request, attribute), truncationStyle))) |
591 | return false; |
592 | |
593 | if (threadSafeMatch(attributeName, srcAttr)) { |
594 | if (isLikelySafeResource(String(attribute.value))) |
595 | return false; |
596 | } else if (threadSafeMatch(attributeName, http_equivAttr)) { |
597 | if (!isDangerousHTTPEquiv(String(attribute.value))) |
598 | return false; |
599 | } |
600 | |
601 | request.token.eraseValueOfAttribute(indexOfAttribute); |
602 | if (!replacementValue.isEmpty()) |
603 | request.token.appendToAttributeValue(indexOfAttribute, replacementValue); |
604 | return true; |
605 | } |
606 | |
607 | String XSSAuditor::canonicalizedSnippetForTagName(const FilterTokenRequest& request) |
608 | { |
609 | // Grab a fixed number of characters equal to the length of the token's name plus one (to account for the "<"). |
610 | return canonicalize(request.sourceTracker.source(request.token).substring(0, request.token.name().size() + 1), TruncationStyle::None); |
611 | } |
612 | |
613 | String XSSAuditor::snippetFromAttribute(const FilterTokenRequest& request, const HTMLToken::Attribute& attribute) |
614 | { |
615 | // The range doesn't include the character which terminates the value. So, |
616 | // for an input of |name="value"|, the snippet is |name="value|. For an |
617 | // unquoted input of |name=value |, the snippet is |name=value|. |
618 | // FIXME: We should grab one character before the name also. |
619 | return request.sourceTracker.source(request.token, attribute.startOffset, attribute.endOffset); |
620 | } |
621 | |
622 | String XSSAuditor::canonicalize(const String& snippet, TruncationStyle truncationStyle) |
623 | { |
624 | String decodedSnippet = fullyDecodeString(snippet, m_encoding); |
625 | if (truncationStyle != TruncationStyle::None) { |
626 | decodedSnippet.truncate(kMaximumFragmentLengthTarget); |
627 | if (truncationStyle == TruncationStyle::SrcLikeAttribute) |
628 | truncateForSrcLikeAttribute(decodedSnippet); |
629 | else if (truncationStyle == TruncationStyle::ScriptLikeAttribute) |
630 | truncateForScriptLikeAttribute(decodedSnippet); |
631 | } |
632 | return decodedSnippet.removeCharacters(&isNonCanonicalCharacter); |
633 | } |
634 | |
635 | String XSSAuditor::canonicalizedSnippetForJavaScript(const FilterTokenRequest& request) |
636 | { |
637 | String string = request.sourceTracker.source(request.token); |
638 | size_t startPosition = 0; |
639 | size_t endPosition = string.length(); |
640 | size_t foundPosition = notFound; |
641 | size_t lastNonSpacePosition = notFound; |
642 | |
643 | // Skip over initial comments to find start of code. |
644 | while (startPosition < endPosition) { |
645 | while (startPosition < endPosition && isHTMLSpace(string[startPosition])) |
646 | startPosition++; |
647 | |
648 | // Under SVG/XML rules, only HTML comment syntax matters and the parser returns |
649 | // these as a separate comment tokens. Having consumed whitespace, we need not look |
650 | // further for these. |
651 | if (request.shouldAllowCDATA) |
652 | break; |
653 | |
654 | // Under HTML rules, both the HTML and JS comment synatx matters, and the HTML |
655 | // comment ends at the end of the line, not with -->. |
656 | if (startsHTMLCommentAt(string, startPosition) || startsSingleLineCommentAt(string, startPosition)) { |
657 | while (startPosition < endPosition && !isJSNewline(string[startPosition])) |
658 | startPosition++; |
659 | } else if (startsMultiLineCommentAt(string, startPosition)) { |
660 | if (startPosition + 2 < endPosition && (foundPosition = string.find("*/" , startPosition + 2)) != notFound) |
661 | startPosition = foundPosition + 2; |
662 | else |
663 | startPosition = endPosition; |
664 | } else |
665 | break; |
666 | } |
667 | |
668 | String result; |
669 | while (startPosition < endPosition && !result.length()) { |
670 | // Stop at next comment (using the same rules as above for SVG/XML vs HTML), when we encounter a comma, |
671 | // when we hit an opening <script> tag, or when we exceed the maximum length target. The comma rule |
672 | // covers a common parameter concatenation case performed by some web servers. |
673 | lastNonSpacePosition = notFound; |
674 | for (foundPosition = startPosition; foundPosition < endPosition; foundPosition++) { |
675 | if (!request.shouldAllowCDATA) { |
676 | if (startsSingleLineCommentAt(string, foundPosition) |
677 | || startsMultiLineCommentAt(string, foundPosition) |
678 | || startsHTMLCommentAt(string, foundPosition)) { |
679 | break; |
680 | } |
681 | } |
682 | if (string[foundPosition] == ',') |
683 | break; |
684 | |
685 | if (lastNonSpacePosition != notFound && startsOpeningScriptTagAt(string, foundPosition)) { |
686 | foundPosition = lastNonSpacePosition + 1; |
687 | break; |
688 | } |
689 | if (foundPosition > startPosition + kMaximumFragmentLengthTarget) { |
690 | // After hitting the length target, we can only stop at a point where we know we are |
691 | // not in the middle of a %-escape sequence. For the sake of simplicity, approximate |
692 | // not stopping inside a (possibly multiply encoded) %-escape sequence by breaking on |
693 | // whitespace only. We should have enough text in these cases to avoid false positives. |
694 | if (isHTMLSpace(string[foundPosition])) |
695 | break; |
696 | } |
697 | |
698 | if (!isHTMLSpace(string[foundPosition])) |
699 | lastNonSpacePosition = foundPosition; |
700 | } |
701 | |
702 | result = canonicalize(string.substring(startPosition, foundPosition - startPosition), TruncationStyle::None); |
703 | startPosition = foundPosition + 1; |
704 | } |
705 | return result; |
706 | } |
707 | |
708 | SuffixTree<ASCIICodebook>* XSSAuditor::decodedHTTPBodySuffixTree() |
709 | { |
710 | const unsigned minimumLengthForSuffixTree = 512; // FIXME: Tune this parameter. |
711 | const unsigned suffixTreeDepth = 5; |
712 | |
713 | if (!m_decodedHTTPBodySuffixTree && m_decodedHTTPBody.length() >= minimumLengthForSuffixTree) |
714 | m_decodedHTTPBodySuffixTree = std::make_unique<SuffixTree<ASCIICodebook>>(m_decodedHTTPBody, suffixTreeDepth); |
715 | return m_decodedHTTPBodySuffixTree.get(); |
716 | } |
717 | |
718 | bool XSSAuditor::isContainedInRequest(const String& decodedSnippet) |
719 | { |
720 | if (decodedSnippet.isEmpty()) |
721 | return false; |
722 | if (m_decodedURL.containsIgnoringASCIICase(decodedSnippet)) |
723 | return true; |
724 | auto* decodedHTTPBodySuffixTree = this->decodedHTTPBodySuffixTree(); |
725 | if (decodedHTTPBodySuffixTree && !decodedHTTPBodySuffixTree->mightContain(decodedSnippet)) |
726 | return false; |
727 | return m_decodedHTTPBody.containsIgnoringASCIICase(decodedSnippet); |
728 | } |
729 | |
730 | bool XSSAuditor::isLikelySafeResource(const String& url) |
731 | { |
732 | // Give empty URLs and about:blank a pass. Making a resourceURL from an |
733 | // empty string below will likely later fail the "no query args test" as |
734 | // it inherits the document's query args. |
735 | if (url.isEmpty() || url == WTF::blankURL().string()) |
736 | return true; |
737 | |
738 | // If the resource is loaded from the same host as the enclosing page, it's |
739 | // probably not an XSS attack, so we reduce false positives by allowing the |
740 | // request, ignoring scheme and port considerations. If the resource has a |
741 | // query string, we're more suspicious, however, because that's pretty rare |
742 | // and the attacker might be able to trick a server-side script into doing |
743 | // something dangerous with the query string. |
744 | if (m_documentURL.host().isEmpty()) |
745 | return false; |
746 | |
747 | URL resourceURL(m_documentURL, url); |
748 | return (m_documentURL.host() == resourceURL.host() && resourceURL.query().isEmpty()); |
749 | } |
750 | |
751 | } // namespace WebCore |
752 | |