1 | /* |
2 | * Copyright (C) 2008, 2014 Apple Inc. All Rights Reserved. |
3 | * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ |
4 | * Copyright (C) 2010 Google Inc. All Rights Reserved. |
5 | * |
6 | * Redistribution and use in source and binary forms, with or without |
7 | * modification, are permitted provided that the following conditions |
8 | * are met: |
9 | * 1. Redistributions of source code must retain the above copyright |
10 | * notice, this list of conditions and the following disclaimer. |
11 | * 2. Redistributions in binary form must reproduce the above copyright |
12 | * notice, this list of conditions and the following disclaimer in the |
13 | * documentation and/or other materials provided with the distribution. |
14 | * |
15 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | */ |
27 | |
28 | #include "config.h" |
29 | #include "HTMLPreloadScanner.h" |
30 | |
31 | #include "HTMLNames.h" |
32 | #include "HTMLParserIdioms.h" |
33 | #include "HTMLSrcsetParser.h" |
34 | #include "HTMLTokenizer.h" |
35 | #include "InputTypeNames.h" |
36 | #include "LinkLoader.h" |
37 | #include "LinkRelAttribute.h" |
38 | #include "Logging.h" |
39 | #include "MIMETypeRegistry.h" |
40 | #include "MediaList.h" |
41 | #include "MediaQueryEvaluator.h" |
42 | #include "MediaQueryParser.h" |
43 | #include "RenderView.h" |
44 | #include "RuntimeEnabledFeatures.h" |
45 | #include "SizesAttributeParser.h" |
46 | #include <wtf/MainThread.h> |
47 | |
48 | namespace WebCore { |
49 | |
50 | using namespace HTMLNames; |
51 | |
52 | TokenPreloadScanner::TagId TokenPreloadScanner::tagIdFor(const HTMLToken::DataVector& data) |
53 | { |
54 | AtomicString tagName(data); |
55 | if (tagName == imgTag) |
56 | return TagId::Img; |
57 | if (tagName == inputTag) |
58 | return TagId::Input; |
59 | if (tagName == linkTag) |
60 | return TagId::Link; |
61 | if (tagName == scriptTag) |
62 | return TagId::Script; |
63 | if (tagName == styleTag) |
64 | return TagId::Style; |
65 | if (tagName == baseTag) |
66 | return TagId::Base; |
67 | if (tagName == templateTag) |
68 | return TagId::Template; |
69 | if (tagName == metaTag) |
70 | return TagId::Meta; |
71 | if (tagName == pictureTag) |
72 | return TagId::Picture; |
73 | if (tagName == sourceTag) |
74 | return TagId::Source; |
75 | return TagId::Unknown; |
76 | } |
77 | |
78 | String TokenPreloadScanner::initiatorFor(TagId tagId) |
79 | { |
80 | switch (tagId) { |
81 | case TagId::Source: |
82 | case TagId::Img: |
83 | return "img"_s ; |
84 | case TagId::Input: |
85 | return "input"_s ; |
86 | case TagId::Link: |
87 | return "link"_s ; |
88 | case TagId::Script: |
89 | return "script"_s ; |
90 | case TagId::Unknown: |
91 | case TagId::Style: |
92 | case TagId::Base: |
93 | case TagId::Template: |
94 | case TagId::Meta: |
95 | case TagId::Picture: |
96 | ASSERT_NOT_REACHED(); |
97 | return "unknown"_s ; |
98 | } |
99 | ASSERT_NOT_REACHED(); |
100 | return "unknown"_s ; |
101 | } |
102 | |
103 | class TokenPreloadScanner::StartTagScanner { |
104 | public: |
105 | explicit StartTagScanner(TagId tagId, float deviceScaleFactor = 1.0) |
106 | : m_tagId(tagId) |
107 | , m_linkIsStyleSheet(false) |
108 | , m_linkIsPreload(false) |
109 | , m_metaIsViewport(false) |
110 | , m_metaIsDisabledAdaptations(false) |
111 | , m_inputIsImage(false) |
112 | , m_deviceScaleFactor(deviceScaleFactor) |
113 | { |
114 | } |
115 | |
116 | void processAttributes(const HTMLToken::AttributeList& attributes, Document& document, Vector<bool>& pictureState) |
117 | { |
118 | ASSERT(isMainThread()); |
119 | if (m_tagId >= TagId::Unknown) |
120 | return; |
121 | |
122 | for (auto& attribute : attributes) { |
123 | AtomicString attributeName(attribute.name); |
124 | String attributeValue = StringImpl::create8BitIfPossible(attribute.value); |
125 | processAttribute(attributeName, attributeValue, document, pictureState); |
126 | } |
127 | |
128 | if (m_tagId == TagId::Source && !pictureState.isEmpty() && !pictureState.last() && m_mediaMatched && m_typeMatched && !m_srcSetAttribute.isEmpty()) { |
129 | |
130 | auto sourceSize = SizesAttributeParser(m_sizesAttribute, document).length(); |
131 | ImageCandidate imageCandidate = bestFitSourceForImageAttributes(m_deviceScaleFactor, m_urlToLoad, m_srcSetAttribute, sourceSize); |
132 | if (!imageCandidate.isEmpty()) { |
133 | pictureState.last() = true; |
134 | setUrlToLoad(imageCandidate.string.toString(), true); |
135 | } |
136 | } |
137 | |
138 | // Resolve between src and srcSet if we have them and the tag is img. |
139 | if (m_tagId == TagId::Img && !m_srcSetAttribute.isEmpty()) { |
140 | auto sourceSize = SizesAttributeParser(m_sizesAttribute, document).length(); |
141 | ImageCandidate imageCandidate = bestFitSourceForImageAttributes(m_deviceScaleFactor, m_urlToLoad, m_srcSetAttribute, sourceSize); |
142 | setUrlToLoad(imageCandidate.string.toString(), true); |
143 | } |
144 | |
145 | if (m_metaIsViewport && !m_metaContent.isNull()) |
146 | document.processViewport(m_metaContent, ViewportArguments::ViewportMeta); |
147 | |
148 | if (m_metaIsDisabledAdaptations && !m_metaContent.isNull()) |
149 | document.processDisabledAdaptations(m_metaContent); |
150 | } |
151 | |
152 | std::unique_ptr<PreloadRequest> createPreloadRequest(const URL& predictedBaseURL) |
153 | { |
154 | if (!shouldPreload()) |
155 | return nullptr; |
156 | |
157 | auto type = resourceType(); |
158 | if (!type) |
159 | return nullptr; |
160 | |
161 | if (!LinkLoader::isSupportedType(type.value(), m_typeAttribute)) |
162 | return nullptr; |
163 | |
164 | auto request = std::make_unique<PreloadRequest>(initiatorFor(m_tagId), m_urlToLoad, predictedBaseURL, type.value(), m_mediaAttribute, m_moduleScript); |
165 | request->setCrossOriginMode(m_crossOriginMode); |
166 | request->setNonce(m_nonceAttribute); |
167 | |
168 | // According to the spec, the module tag ignores the "charset" attribute as the same to the worker's |
169 | // importScript. But WebKit supports the "charset" for importScript intentionally. So to be consistent, |
170 | // even for the module tags, we handle the "charset" attribute. |
171 | request->setCharset(charset()); |
172 | return request; |
173 | } |
174 | |
175 | static bool match(const AtomicString& name, const QualifiedName& qName) |
176 | { |
177 | ASSERT(isMainThread()); |
178 | return qName.localName() == name; |
179 | } |
180 | |
181 | private: |
182 | void processImageAndScriptAttribute(const AtomicString& attributeName, const String& attributeValue) |
183 | { |
184 | if (match(attributeName, srcAttr)) |
185 | setUrlToLoad(attributeValue); |
186 | else if (match(attributeName, crossoriginAttr)) |
187 | m_crossOriginMode = stripLeadingAndTrailingHTMLSpaces(attributeValue); |
188 | else if (match(attributeName, charsetAttr)) |
189 | m_charset = attributeValue; |
190 | } |
191 | |
192 | void processAttribute(const AtomicString& attributeName, const String& attributeValue, Document& document, const Vector<bool>& pictureState) |
193 | { |
194 | bool inPicture = !pictureState.isEmpty(); |
195 | bool alreadyMatchedSource = inPicture && pictureState.last(); |
196 | |
197 | switch (m_tagId) { |
198 | case TagId::Img: |
199 | if (inPicture && alreadyMatchedSource) |
200 | break; |
201 | if (match(attributeName, srcsetAttr) && m_srcSetAttribute.isNull()) { |
202 | m_srcSetAttribute = attributeValue; |
203 | break; |
204 | } |
205 | if (match(attributeName, sizesAttr) && m_sizesAttribute.isNull()) { |
206 | m_sizesAttribute = attributeValue; |
207 | break; |
208 | } |
209 | processImageAndScriptAttribute(attributeName, attributeValue); |
210 | break; |
211 | case TagId::Source: |
212 | if (inPicture && alreadyMatchedSource) |
213 | break; |
214 | if (match(attributeName, srcsetAttr) && m_srcSetAttribute.isNull()) { |
215 | m_srcSetAttribute = attributeValue; |
216 | break; |
217 | } |
218 | if (match(attributeName, sizesAttr) && m_sizesAttribute.isNull()) { |
219 | m_sizesAttribute = attributeValue; |
220 | break; |
221 | } |
222 | if (match(attributeName, mediaAttr) && m_mediaAttribute.isNull()) { |
223 | m_mediaAttribute = attributeValue; |
224 | auto mediaSet = MediaQuerySet::create(attributeValue, MediaQueryParserContext(document)); |
225 | auto documentElement = makeRefPtr(document.documentElement()); |
226 | LOG(MediaQueries, "HTMLPreloadScanner %p processAttribute evaluating media queries" , this); |
227 | m_mediaMatched = MediaQueryEvaluator { document.printing() ? "print" : "screen" , document, documentElement ? documentElement->computedStyle() : nullptr }.evaluate(mediaSet.get()); |
228 | } |
229 | if (match(attributeName, typeAttr) && m_typeAttribute.isNull()) { |
230 | // when multiple type attributes present: first value wins, ignore subsequent (to match ImageElement parser and Blink behaviours) |
231 | m_typeAttribute = attributeValue; |
232 | m_typeMatched &= MIMETypeRegistry::isSupportedImageVideoOrSVGMIMEType(m_typeAttribute); |
233 | } |
234 | break; |
235 | case TagId::Script: |
236 | if (match(attributeName, typeAttr)) { |
237 | m_moduleScript = equalLettersIgnoringASCIICase(attributeValue, "module" ) ? PreloadRequest::ModuleScript::Yes : PreloadRequest::ModuleScript::No; |
238 | break; |
239 | } else if (match(attributeName, nonceAttr)) |
240 | m_nonceAttribute = attributeValue; |
241 | processImageAndScriptAttribute(attributeName, attributeValue); |
242 | break; |
243 | case TagId::Link: |
244 | if (match(attributeName, hrefAttr)) |
245 | setUrlToLoad(attributeValue); |
246 | else if (match(attributeName, relAttr)) { |
247 | LinkRelAttribute parsedAttribute { document, attributeValue }; |
248 | m_linkIsStyleSheet = relAttributeIsStyleSheet(parsedAttribute); |
249 | m_linkIsPreload = parsedAttribute.isLinkPreload; |
250 | } else if (match(attributeName, mediaAttr)) |
251 | m_mediaAttribute = attributeValue; |
252 | else if (match(attributeName, charsetAttr)) |
253 | m_charset = attributeValue; |
254 | else if (match(attributeName, crossoriginAttr)) |
255 | m_crossOriginMode = stripLeadingAndTrailingHTMLSpaces(attributeValue); |
256 | else if (match(attributeName, nonceAttr)) |
257 | m_nonceAttribute = attributeValue; |
258 | else if (match(attributeName, asAttr)) |
259 | m_asAttribute = attributeValue; |
260 | else if (match(attributeName, typeAttr)) |
261 | m_typeAttribute = attributeValue; |
262 | break; |
263 | case TagId::Input: |
264 | if (match(attributeName, srcAttr)) |
265 | setUrlToLoad(attributeValue); |
266 | else if (match(attributeName, typeAttr)) |
267 | m_inputIsImage = equalLettersIgnoringASCIICase(attributeValue, "image" ); |
268 | break; |
269 | case TagId::Meta: |
270 | if (match(attributeName, contentAttr)) |
271 | m_metaContent = attributeValue; |
272 | else if (match(attributeName, nameAttr)) |
273 | m_metaIsViewport = equalLettersIgnoringASCIICase(attributeValue, "viewport" ); |
274 | else if (RuntimeEnabledFeatures::sharedFeatures().disabledAdaptationsMetaTagEnabled() && match(attributeName, nameAttr)) |
275 | m_metaIsDisabledAdaptations = equalLettersIgnoringASCIICase(attributeValue, "disabled-adaptations" ); |
276 | break; |
277 | case TagId::Base: |
278 | case TagId::Style: |
279 | case TagId::Template: |
280 | case TagId::Picture: |
281 | case TagId::Unknown: |
282 | break; |
283 | } |
284 | } |
285 | |
286 | static bool relAttributeIsStyleSheet(const LinkRelAttribute& parsedAttribute) |
287 | { |
288 | return parsedAttribute.isStyleSheet && !parsedAttribute.isAlternate && !parsedAttribute.iconType && !parsedAttribute.isDNSPrefetch; |
289 | } |
290 | |
291 | void setUrlToLoad(const String& value, bool allowReplacement = false) |
292 | { |
293 | // We only respect the first src/href, per HTML5: |
294 | // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#attribute-name-state |
295 | if (!allowReplacement && !m_urlToLoad.isEmpty()) |
296 | return; |
297 | String url = stripLeadingAndTrailingHTMLSpaces(value); |
298 | if (url.isEmpty()) |
299 | return; |
300 | m_urlToLoad = url; |
301 | } |
302 | |
303 | const String& charset() const |
304 | { |
305 | return m_charset; |
306 | } |
307 | |
308 | Optional<CachedResource::Type> resourceType() const |
309 | { |
310 | switch (m_tagId) { |
311 | case TagId::Script: |
312 | return CachedResource::Type::Script; |
313 | case TagId::Img: |
314 | case TagId::Input: |
315 | case TagId::Source: |
316 | ASSERT(m_tagId != TagId::Input || m_inputIsImage); |
317 | return CachedResource::Type::ImageResource; |
318 | case TagId::Link: |
319 | if (m_linkIsStyleSheet) |
320 | return CachedResource::Type::CSSStyleSheet; |
321 | if (m_linkIsPreload) |
322 | return LinkLoader::resourceTypeFromAsAttribute(m_asAttribute); |
323 | break; |
324 | case TagId::Meta: |
325 | case TagId::Unknown: |
326 | case TagId::Style: |
327 | case TagId::Base: |
328 | case TagId::Template: |
329 | case TagId::Picture: |
330 | break; |
331 | } |
332 | ASSERT_NOT_REACHED(); |
333 | return CachedResource::Type::RawResource; |
334 | } |
335 | |
336 | bool shouldPreload() |
337 | { |
338 | if (m_urlToLoad.isEmpty()) |
339 | return false; |
340 | |
341 | if (protocolIs(m_urlToLoad, "data" ) || protocolIs(m_urlToLoad, "about" )) |
342 | return false; |
343 | |
344 | if (m_tagId == TagId::Link && !m_linkIsStyleSheet && !m_linkIsPreload) |
345 | return false; |
346 | |
347 | if (m_tagId == TagId::Input && !m_inputIsImage) |
348 | return false; |
349 | |
350 | return true; |
351 | } |
352 | |
353 | TagId m_tagId; |
354 | String m_urlToLoad; |
355 | String m_srcSetAttribute; |
356 | String m_sizesAttribute; |
357 | bool m_mediaMatched { true }; |
358 | bool m_typeMatched { true }; |
359 | String m_charset; |
360 | String m_crossOriginMode; |
361 | bool m_linkIsStyleSheet; |
362 | bool m_linkIsPreload; |
363 | String m_mediaAttribute; |
364 | String m_nonceAttribute; |
365 | String m_metaContent; |
366 | String m_asAttribute; |
367 | String m_typeAttribute; |
368 | bool m_metaIsViewport; |
369 | bool m_metaIsDisabledAdaptations; |
370 | bool m_inputIsImage; |
371 | float m_deviceScaleFactor; |
372 | PreloadRequest::ModuleScript m_moduleScript { PreloadRequest::ModuleScript::No }; |
373 | }; |
374 | |
375 | TokenPreloadScanner::TokenPreloadScanner(const URL& documentURL, float deviceScaleFactor) |
376 | : m_documentURL(documentURL) |
377 | , m_deviceScaleFactor(deviceScaleFactor) |
378 | { |
379 | } |
380 | |
381 | void TokenPreloadScanner::scan(const HTMLToken& token, Vector<std::unique_ptr<PreloadRequest>>& requests, Document& document) |
382 | { |
383 | switch (token.type()) { |
384 | case HTMLToken::Character: |
385 | if (!m_inStyle) |
386 | return; |
387 | m_cssScanner.scan(token.characters(), requests); |
388 | return; |
389 | |
390 | case HTMLToken::EndTag: { |
391 | TagId tagId = tagIdFor(token.name()); |
392 | if (tagId == TagId::Template) { |
393 | if (m_templateCount) |
394 | --m_templateCount; |
395 | return; |
396 | } |
397 | if (tagId == TagId::Style) { |
398 | if (m_inStyle) |
399 | m_cssScanner.reset(); |
400 | m_inStyle = false; |
401 | } else if (tagId == TagId::Picture && !m_pictureSourceState.isEmpty()) |
402 | m_pictureSourceState.removeLast(); |
403 | |
404 | return; |
405 | } |
406 | |
407 | case HTMLToken::StartTag: { |
408 | if (m_templateCount) |
409 | return; |
410 | TagId tagId = tagIdFor(token.name()); |
411 | if (tagId == TagId::Template) { |
412 | ++m_templateCount; |
413 | return; |
414 | } |
415 | if (tagId == TagId::Style) { |
416 | m_inStyle = true; |
417 | return; |
418 | } |
419 | if (tagId == TagId::Base) { |
420 | // The first <base> element is the one that wins. |
421 | if (!m_predictedBaseElementURL.isEmpty()) |
422 | return; |
423 | updatePredictedBaseURL(token); |
424 | return; |
425 | } |
426 | if (tagId == TagId::Picture) { |
427 | m_pictureSourceState.append(false); |
428 | return; |
429 | } |
430 | |
431 | StartTagScanner scanner(tagId, m_deviceScaleFactor); |
432 | scanner.processAttributes(token.attributes(), document, m_pictureSourceState); |
433 | if (auto request = scanner.createPreloadRequest(m_predictedBaseElementURL)) |
434 | requests.append(WTFMove(request)); |
435 | return; |
436 | } |
437 | |
438 | default: |
439 | return; |
440 | } |
441 | } |
442 | |
443 | void TokenPreloadScanner::updatePredictedBaseURL(const HTMLToken& token) |
444 | { |
445 | ASSERT(m_predictedBaseElementURL.isEmpty()); |
446 | if (auto* hrefAttribute = findAttribute(token.attributes(), hrefAttr->localName().string())) |
447 | m_predictedBaseElementURL = URL(m_documentURL, stripLeadingAndTrailingHTMLSpaces(StringImpl::create8BitIfPossible(hrefAttribute->value))).isolatedCopy(); |
448 | } |
449 | |
450 | HTMLPreloadScanner::HTMLPreloadScanner(const HTMLParserOptions& options, const URL& documentURL, float deviceScaleFactor) |
451 | : m_scanner(documentURL, deviceScaleFactor) |
452 | , m_tokenizer(options) |
453 | { |
454 | } |
455 | |
456 | void HTMLPreloadScanner::appendToEnd(const SegmentedString& source) |
457 | { |
458 | m_source.append(source); |
459 | } |
460 | |
461 | void HTMLPreloadScanner::scan(HTMLResourcePreloader& preloader, Document& document) |
462 | { |
463 | ASSERT(isMainThread()); // HTMLTokenizer::updateStateFor only works on the main thread. |
464 | |
465 | const URL& startingBaseElementURL = document.baseElementURL(); |
466 | |
467 | // When we start scanning, our best prediction of the baseElementURL is the real one! |
468 | if (!startingBaseElementURL.isEmpty()) |
469 | m_scanner.setPredictedBaseElementURL(startingBaseElementURL); |
470 | |
471 | PreloadRequestStream requests; |
472 | |
473 | while (auto token = m_tokenizer.nextToken(m_source)) { |
474 | if (token->type() == HTMLToken::StartTag) |
475 | m_tokenizer.updateStateFor(AtomicString(token->name())); |
476 | m_scanner.scan(*token, requests, document); |
477 | } |
478 | |
479 | preloader.preload(WTFMove(requests)); |
480 | } |
481 | |
482 | bool testPreloadScannerViewportSupport(Document* document) |
483 | { |
484 | ASSERT(document); |
485 | HTMLParserOptions options(*document); |
486 | HTMLPreloadScanner scanner(options, document->url()); |
487 | HTMLResourcePreloader preloader(*document); |
488 | scanner.appendToEnd(String("<meta name=viewport content='width=400'>" )); |
489 | scanner.scan(preloader, *document); |
490 | return (document->viewportArguments().width == 400); |
491 | } |
492 | |
493 | } |
494 | |