1/*
2 * Copyright (C) 2008, 2014 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4 * Copyright (C) 2010 Google Inc. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "config.h"
29#include "HTMLPreloadScanner.h"
30
31#include "HTMLNames.h"
32#include "HTMLParserIdioms.h"
33#include "HTMLSrcsetParser.h"
34#include "HTMLTokenizer.h"
35#include "InputTypeNames.h"
36#include "LinkLoader.h"
37#include "LinkRelAttribute.h"
38#include "Logging.h"
39#include "MIMETypeRegistry.h"
40#include "MediaList.h"
41#include "MediaQueryEvaluator.h"
42#include "MediaQueryParser.h"
43#include "RenderView.h"
44#include "RuntimeEnabledFeatures.h"
45#include "SizesAttributeParser.h"
46#include <wtf/MainThread.h>
47
48namespace WebCore {
49
50using namespace HTMLNames;
51
52TokenPreloadScanner::TagId TokenPreloadScanner::tagIdFor(const HTMLToken::DataVector& data)
53{
54 AtomicString tagName(data);
55 if (tagName == imgTag)
56 return TagId::Img;
57 if (tagName == inputTag)
58 return TagId::Input;
59 if (tagName == linkTag)
60 return TagId::Link;
61 if (tagName == scriptTag)
62 return TagId::Script;
63 if (tagName == styleTag)
64 return TagId::Style;
65 if (tagName == baseTag)
66 return TagId::Base;
67 if (tagName == templateTag)
68 return TagId::Template;
69 if (tagName == metaTag)
70 return TagId::Meta;
71 if (tagName == pictureTag)
72 return TagId::Picture;
73 if (tagName == sourceTag)
74 return TagId::Source;
75 return TagId::Unknown;
76}
77
78String TokenPreloadScanner::initiatorFor(TagId tagId)
79{
80 switch (tagId) {
81 case TagId::Source:
82 case TagId::Img:
83 return "img"_s;
84 case TagId::Input:
85 return "input"_s;
86 case TagId::Link:
87 return "link"_s;
88 case TagId::Script:
89 return "script"_s;
90 case TagId::Unknown:
91 case TagId::Style:
92 case TagId::Base:
93 case TagId::Template:
94 case TagId::Meta:
95 case TagId::Picture:
96 ASSERT_NOT_REACHED();
97 return "unknown"_s;
98 }
99 ASSERT_NOT_REACHED();
100 return "unknown"_s;
101}
102
103class TokenPreloadScanner::StartTagScanner {
104public:
105 explicit StartTagScanner(TagId tagId, float deviceScaleFactor = 1.0)
106 : m_tagId(tagId)
107 , m_linkIsStyleSheet(false)
108 , m_linkIsPreload(false)
109 , m_metaIsViewport(false)
110 , m_metaIsDisabledAdaptations(false)
111 , m_inputIsImage(false)
112 , m_deviceScaleFactor(deviceScaleFactor)
113 {
114 }
115
116 void processAttributes(const HTMLToken::AttributeList& attributes, Document& document, Vector<bool>& pictureState)
117 {
118 ASSERT(isMainThread());
119 if (m_tagId >= TagId::Unknown)
120 return;
121
122 for (auto& attribute : attributes) {
123 AtomicString attributeName(attribute.name);
124 String attributeValue = StringImpl::create8BitIfPossible(attribute.value);
125 processAttribute(attributeName, attributeValue, document, pictureState);
126 }
127
128 if (m_tagId == TagId::Source && !pictureState.isEmpty() && !pictureState.last() && m_mediaMatched && m_typeMatched && !m_srcSetAttribute.isEmpty()) {
129
130 auto sourceSize = SizesAttributeParser(m_sizesAttribute, document).length();
131 ImageCandidate imageCandidate = bestFitSourceForImageAttributes(m_deviceScaleFactor, m_urlToLoad, m_srcSetAttribute, sourceSize);
132 if (!imageCandidate.isEmpty()) {
133 pictureState.last() = true;
134 setUrlToLoad(imageCandidate.string.toString(), true);
135 }
136 }
137
138 // Resolve between src and srcSet if we have them and the tag is img.
139 if (m_tagId == TagId::Img && !m_srcSetAttribute.isEmpty()) {
140 auto sourceSize = SizesAttributeParser(m_sizesAttribute, document).length();
141 ImageCandidate imageCandidate = bestFitSourceForImageAttributes(m_deviceScaleFactor, m_urlToLoad, m_srcSetAttribute, sourceSize);
142 setUrlToLoad(imageCandidate.string.toString(), true);
143 }
144
145 if (m_metaIsViewport && !m_metaContent.isNull())
146 document.processViewport(m_metaContent, ViewportArguments::ViewportMeta);
147
148 if (m_metaIsDisabledAdaptations && !m_metaContent.isNull())
149 document.processDisabledAdaptations(m_metaContent);
150 }
151
152 std::unique_ptr<PreloadRequest> createPreloadRequest(const URL& predictedBaseURL)
153 {
154 if (!shouldPreload())
155 return nullptr;
156
157 auto type = resourceType();
158 if (!type)
159 return nullptr;
160
161 if (!LinkLoader::isSupportedType(type.value(), m_typeAttribute))
162 return nullptr;
163
164 auto request = std::make_unique<PreloadRequest>(initiatorFor(m_tagId), m_urlToLoad, predictedBaseURL, type.value(), m_mediaAttribute, m_moduleScript);
165 request->setCrossOriginMode(m_crossOriginMode);
166 request->setNonce(m_nonceAttribute);
167
168 // According to the spec, the module tag ignores the "charset" attribute as the same to the worker's
169 // importScript. But WebKit supports the "charset" for importScript intentionally. So to be consistent,
170 // even for the module tags, we handle the "charset" attribute.
171 request->setCharset(charset());
172 return request;
173 }
174
175 static bool match(const AtomicString& name, const QualifiedName& qName)
176 {
177 ASSERT(isMainThread());
178 return qName.localName() == name;
179 }
180
181private:
182 void processImageAndScriptAttribute(const AtomicString& attributeName, const String& attributeValue)
183 {
184 if (match(attributeName, srcAttr))
185 setUrlToLoad(attributeValue);
186 else if (match(attributeName, crossoriginAttr))
187 m_crossOriginMode = stripLeadingAndTrailingHTMLSpaces(attributeValue);
188 else if (match(attributeName, charsetAttr))
189 m_charset = attributeValue;
190 }
191
192 void processAttribute(const AtomicString& attributeName, const String& attributeValue, Document& document, const Vector<bool>& pictureState)
193 {
194 bool inPicture = !pictureState.isEmpty();
195 bool alreadyMatchedSource = inPicture && pictureState.last();
196
197 switch (m_tagId) {
198 case TagId::Img:
199 if (inPicture && alreadyMatchedSource)
200 break;
201 if (match(attributeName, srcsetAttr) && m_srcSetAttribute.isNull()) {
202 m_srcSetAttribute = attributeValue;
203 break;
204 }
205 if (match(attributeName, sizesAttr) && m_sizesAttribute.isNull()) {
206 m_sizesAttribute = attributeValue;
207 break;
208 }
209 processImageAndScriptAttribute(attributeName, attributeValue);
210 break;
211 case TagId::Source:
212 if (inPicture && alreadyMatchedSource)
213 break;
214 if (match(attributeName, srcsetAttr) && m_srcSetAttribute.isNull()) {
215 m_srcSetAttribute = attributeValue;
216 break;
217 }
218 if (match(attributeName, sizesAttr) && m_sizesAttribute.isNull()) {
219 m_sizesAttribute = attributeValue;
220 break;
221 }
222 if (match(attributeName, mediaAttr) && m_mediaAttribute.isNull()) {
223 m_mediaAttribute = attributeValue;
224 auto mediaSet = MediaQuerySet::create(attributeValue, MediaQueryParserContext(document));
225 auto documentElement = makeRefPtr(document.documentElement());
226 LOG(MediaQueries, "HTMLPreloadScanner %p processAttribute evaluating media queries", this);
227 m_mediaMatched = MediaQueryEvaluator { document.printing() ? "print" : "screen", document, documentElement ? documentElement->computedStyle() : nullptr }.evaluate(mediaSet.get());
228 }
229 if (match(attributeName, typeAttr) && m_typeAttribute.isNull()) {
230 // when multiple type attributes present: first value wins, ignore subsequent (to match ImageElement parser and Blink behaviours)
231 m_typeAttribute = attributeValue;
232 m_typeMatched &= MIMETypeRegistry::isSupportedImageVideoOrSVGMIMEType(m_typeAttribute);
233 }
234 break;
235 case TagId::Script:
236 if (match(attributeName, typeAttr)) {
237 m_moduleScript = equalLettersIgnoringASCIICase(attributeValue, "module") ? PreloadRequest::ModuleScript::Yes : PreloadRequest::ModuleScript::No;
238 break;
239 } else if (match(attributeName, nonceAttr))
240 m_nonceAttribute = attributeValue;
241 processImageAndScriptAttribute(attributeName, attributeValue);
242 break;
243 case TagId::Link:
244 if (match(attributeName, hrefAttr))
245 setUrlToLoad(attributeValue);
246 else if (match(attributeName, relAttr)) {
247 LinkRelAttribute parsedAttribute { document, attributeValue };
248 m_linkIsStyleSheet = relAttributeIsStyleSheet(parsedAttribute);
249 m_linkIsPreload = parsedAttribute.isLinkPreload;
250 } else if (match(attributeName, mediaAttr))
251 m_mediaAttribute = attributeValue;
252 else if (match(attributeName, charsetAttr))
253 m_charset = attributeValue;
254 else if (match(attributeName, crossoriginAttr))
255 m_crossOriginMode = stripLeadingAndTrailingHTMLSpaces(attributeValue);
256 else if (match(attributeName, nonceAttr))
257 m_nonceAttribute = attributeValue;
258 else if (match(attributeName, asAttr))
259 m_asAttribute = attributeValue;
260 else if (match(attributeName, typeAttr))
261 m_typeAttribute = attributeValue;
262 break;
263 case TagId::Input:
264 if (match(attributeName, srcAttr))
265 setUrlToLoad(attributeValue);
266 else if (match(attributeName, typeAttr))
267 m_inputIsImage = equalLettersIgnoringASCIICase(attributeValue, "image");
268 break;
269 case TagId::Meta:
270 if (match(attributeName, contentAttr))
271 m_metaContent = attributeValue;
272 else if (match(attributeName, nameAttr))
273 m_metaIsViewport = equalLettersIgnoringASCIICase(attributeValue, "viewport");
274 else if (RuntimeEnabledFeatures::sharedFeatures().disabledAdaptationsMetaTagEnabled() && match(attributeName, nameAttr))
275 m_metaIsDisabledAdaptations = equalLettersIgnoringASCIICase(attributeValue, "disabled-adaptations");
276 break;
277 case TagId::Base:
278 case TagId::Style:
279 case TagId::Template:
280 case TagId::Picture:
281 case TagId::Unknown:
282 break;
283 }
284 }
285
286 static bool relAttributeIsStyleSheet(const LinkRelAttribute& parsedAttribute)
287 {
288 return parsedAttribute.isStyleSheet && !parsedAttribute.isAlternate && !parsedAttribute.iconType && !parsedAttribute.isDNSPrefetch;
289 }
290
291 void setUrlToLoad(const String& value, bool allowReplacement = false)
292 {
293 // We only respect the first src/href, per HTML5:
294 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#attribute-name-state
295 if (!allowReplacement && !m_urlToLoad.isEmpty())
296 return;
297 String url = stripLeadingAndTrailingHTMLSpaces(value);
298 if (url.isEmpty())
299 return;
300 m_urlToLoad = url;
301 }
302
303 const String& charset() const
304 {
305 return m_charset;
306 }
307
308 Optional<CachedResource::Type> resourceType() const
309 {
310 switch (m_tagId) {
311 case TagId::Script:
312 return CachedResource::Type::Script;
313 case TagId::Img:
314 case TagId::Input:
315 case TagId::Source:
316 ASSERT(m_tagId != TagId::Input || m_inputIsImage);
317 return CachedResource::Type::ImageResource;
318 case TagId::Link:
319 if (m_linkIsStyleSheet)
320 return CachedResource::Type::CSSStyleSheet;
321 if (m_linkIsPreload)
322 return LinkLoader::resourceTypeFromAsAttribute(m_asAttribute);
323 break;
324 case TagId::Meta:
325 case TagId::Unknown:
326 case TagId::Style:
327 case TagId::Base:
328 case TagId::Template:
329 case TagId::Picture:
330 break;
331 }
332 ASSERT_NOT_REACHED();
333 return CachedResource::Type::RawResource;
334 }
335
336 bool shouldPreload()
337 {
338 if (m_urlToLoad.isEmpty())
339 return false;
340
341 if (protocolIs(m_urlToLoad, "data") || protocolIs(m_urlToLoad, "about"))
342 return false;
343
344 if (m_tagId == TagId::Link && !m_linkIsStyleSheet && !m_linkIsPreload)
345 return false;
346
347 if (m_tagId == TagId::Input && !m_inputIsImage)
348 return false;
349
350 return true;
351 }
352
353 TagId m_tagId;
354 String m_urlToLoad;
355 String m_srcSetAttribute;
356 String m_sizesAttribute;
357 bool m_mediaMatched { true };
358 bool m_typeMatched { true };
359 String m_charset;
360 String m_crossOriginMode;
361 bool m_linkIsStyleSheet;
362 bool m_linkIsPreload;
363 String m_mediaAttribute;
364 String m_nonceAttribute;
365 String m_metaContent;
366 String m_asAttribute;
367 String m_typeAttribute;
368 bool m_metaIsViewport;
369 bool m_metaIsDisabledAdaptations;
370 bool m_inputIsImage;
371 float m_deviceScaleFactor;
372 PreloadRequest::ModuleScript m_moduleScript { PreloadRequest::ModuleScript::No };
373};
374
375TokenPreloadScanner::TokenPreloadScanner(const URL& documentURL, float deviceScaleFactor)
376 : m_documentURL(documentURL)
377 , m_deviceScaleFactor(deviceScaleFactor)
378{
379}
380
381void TokenPreloadScanner::scan(const HTMLToken& token, Vector<std::unique_ptr<PreloadRequest>>& requests, Document& document)
382{
383 switch (token.type()) {
384 case HTMLToken::Character:
385 if (!m_inStyle)
386 return;
387 m_cssScanner.scan(token.characters(), requests);
388 return;
389
390 case HTMLToken::EndTag: {
391 TagId tagId = tagIdFor(token.name());
392 if (tagId == TagId::Template) {
393 if (m_templateCount)
394 --m_templateCount;
395 return;
396 }
397 if (tagId == TagId::Style) {
398 if (m_inStyle)
399 m_cssScanner.reset();
400 m_inStyle = false;
401 } else if (tagId == TagId::Picture && !m_pictureSourceState.isEmpty())
402 m_pictureSourceState.removeLast();
403
404 return;
405 }
406
407 case HTMLToken::StartTag: {
408 if (m_templateCount)
409 return;
410 TagId tagId = tagIdFor(token.name());
411 if (tagId == TagId::Template) {
412 ++m_templateCount;
413 return;
414 }
415 if (tagId == TagId::Style) {
416 m_inStyle = true;
417 return;
418 }
419 if (tagId == TagId::Base) {
420 // The first <base> element is the one that wins.
421 if (!m_predictedBaseElementURL.isEmpty())
422 return;
423 updatePredictedBaseURL(token);
424 return;
425 }
426 if (tagId == TagId::Picture) {
427 m_pictureSourceState.append(false);
428 return;
429 }
430
431 StartTagScanner scanner(tagId, m_deviceScaleFactor);
432 scanner.processAttributes(token.attributes(), document, m_pictureSourceState);
433 if (auto request = scanner.createPreloadRequest(m_predictedBaseElementURL))
434 requests.append(WTFMove(request));
435 return;
436 }
437
438 default:
439 return;
440 }
441}
442
443void TokenPreloadScanner::updatePredictedBaseURL(const HTMLToken& token)
444{
445 ASSERT(m_predictedBaseElementURL.isEmpty());
446 if (auto* hrefAttribute = findAttribute(token.attributes(), hrefAttr->localName().string()))
447 m_predictedBaseElementURL = URL(m_documentURL, stripLeadingAndTrailingHTMLSpaces(StringImpl::create8BitIfPossible(hrefAttribute->value))).isolatedCopy();
448}
449
450HTMLPreloadScanner::HTMLPreloadScanner(const HTMLParserOptions& options, const URL& documentURL, float deviceScaleFactor)
451 : m_scanner(documentURL, deviceScaleFactor)
452 , m_tokenizer(options)
453{
454}
455
456void HTMLPreloadScanner::appendToEnd(const SegmentedString& source)
457{
458 m_source.append(source);
459}
460
461void HTMLPreloadScanner::scan(HTMLResourcePreloader& preloader, Document& document)
462{
463 ASSERT(isMainThread()); // HTMLTokenizer::updateStateFor only works on the main thread.
464
465 const URL& startingBaseElementURL = document.baseElementURL();
466
467 // When we start scanning, our best prediction of the baseElementURL is the real one!
468 if (!startingBaseElementURL.isEmpty())
469 m_scanner.setPredictedBaseElementURL(startingBaseElementURL);
470
471 PreloadRequestStream requests;
472
473 while (auto token = m_tokenizer.nextToken(m_source)) {
474 if (token->type() == HTMLToken::StartTag)
475 m_tokenizer.updateStateFor(AtomicString(token->name()));
476 m_scanner.scan(*token, requests, document);
477 }
478
479 preloader.preload(WTFMove(requests));
480}
481
482bool testPreloadScannerViewportSupport(Document* document)
483{
484 ASSERT(document);
485 HTMLParserOptions options(*document);
486 HTMLPreloadScanner scanner(options, document->url());
487 HTMLResourcePreloader preloader(*document);
488 scanner.appendToEnd(String("<meta name=viewport content='width=400'>"));
489 scanner.scan(preloader, *document);
490 return (document->viewportArguments().width == 400);
491}
492
493}
494