1/*
2 * Copyright (C) 2015-2017 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23 * THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "ContentExtensionCompiler.h"
28
29#if ENABLE(CONTENT_EXTENSIONS)
30
31#include "CombinedURLFilters.h"
32#include "CompiledContentExtension.h"
33#include "ContentExtensionActions.h"
34#include "ContentExtensionError.h"
35#include "ContentExtensionParser.h"
36#include "ContentExtensionRule.h"
37#include "ContentExtensionsDebugging.h"
38#include "DFABytecodeCompiler.h"
39#include "DFACombiner.h"
40#include "NFA.h"
41#include "NFAToDFA.h"
42#include "URLFilterParser.h"
43#include <wtf/DataLog.h>
44#include <wtf/text/CString.h>
45#include <wtf/text/StringBuilder.h>
46
47namespace WebCore {
48namespace ContentExtensions {
49
50static void serializeString(Vector<SerializedActionByte>& actions, const String& string)
51{
52 // Append Selector length (4 bytes).
53 uint32_t stringLength = string.length();
54 actions.grow(actions.size() + sizeof(uint32_t));
55 *reinterpret_cast<uint32_t*>(&actions[actions.size() - sizeof(uint32_t)]) = stringLength;
56 bool wideCharacters = !string.is8Bit();
57 actions.append(wideCharacters);
58 // Append Selector.
59 if (wideCharacters) {
60 uint32_t startIndex = actions.size();
61 actions.grow(actions.size() + sizeof(UChar) * stringLength);
62 for (uint32_t i = 0; i < stringLength; ++i)
63 *reinterpret_cast<UChar*>(&actions[startIndex + i * sizeof(UChar)]) = string[i];
64 } else {
65 for (uint32_t i = 0; i < stringLength; ++i)
66 actions.append(string[i]);
67 }
68}
69
70// css-display-none combining is special because we combine the string arguments with commas because we know they are css selectors.
71struct PendingDisplayNoneActions {
72 StringBuilder combinedSelectors;
73 Vector<uint32_t> clientLocations;
74};
75
76using PendingDisplayNoneActionsMap = HashMap<Trigger, PendingDisplayNoneActions, TriggerHash, TriggerHashTraits>;
77
78static void resolvePendingDisplayNoneActions(Vector<SerializedActionByte>& actions, Vector<uint32_t>& actionLocations, PendingDisplayNoneActionsMap& map)
79{
80 for (auto& pendingDisplayNoneActions : map.values()) {
81 uint32_t actionLocation = actions.size();
82 actions.append(static_cast<SerializedActionByte>(ActionType::CSSDisplayNoneSelector));
83 serializeString(actions, pendingDisplayNoneActions.combinedSelectors.toString());
84 for (uint32_t clientLocation : pendingDisplayNoneActions.clientLocations)
85 actionLocations[clientLocation] = actionLocation;
86 }
87 map.clear();
88}
89
90static Vector<unsigned> serializeActions(const Vector<ContentExtensionRule>& ruleList, Vector<SerializedActionByte>& actions)
91{
92 ASSERT(!actions.size());
93
94 Vector<uint32_t> actionLocations;
95
96 using ActionLocation = uint32_t;
97 using ActionMap = HashMap<ResourceFlags, ActionLocation, DefaultHash<ResourceFlags>::Hash, WTF::UnsignedWithZeroKeyHashTraits<ResourceFlags>>;
98 using StringActionMap = HashMap<std::pair<String, ResourceFlags>, ActionLocation, DefaultHash<std::pair<String, ResourceFlags>>::Hash, PairHashTraits<HashTraits<String>, WTF::UnsignedWithZeroKeyHashTraits<ResourceFlags>>>;
99 ActionMap blockLoadActionsMap;
100 ActionMap blockCookiesActionsMap;
101 PendingDisplayNoneActionsMap cssDisplayNoneActionsMap;
102 ActionMap ignorePreviousRuleActionsMap;
103 ActionMap makeHTTPSActionsMap;
104 StringActionMap notifyActionsMap;
105
106 for (unsigned ruleIndex = 0; ruleIndex < ruleList.size(); ++ruleIndex) {
107 const ContentExtensionRule& rule = ruleList[ruleIndex];
108 ActionType actionType = rule.action().type();
109
110 if (actionType == ActionType::IgnorePreviousRules) {
111 resolvePendingDisplayNoneActions(actions, actionLocations, cssDisplayNoneActionsMap);
112
113 blockLoadActionsMap.clear();
114 blockCookiesActionsMap.clear();
115 cssDisplayNoneActionsMap.clear();
116 makeHTTPSActionsMap.clear();
117 notifyActionsMap.clear();
118 } else
119 ignorePreviousRuleActionsMap.clear();
120
121 // Anything with condition is just pushed.
122 // We could try to merge conditions but that case is not common in practice.
123 if (!rule.trigger().conditions.isEmpty()) {
124 actionLocations.append(actions.size());
125
126 actions.append(static_cast<SerializedActionByte>(actionType));
127 if (hasStringArgument(actionType))
128 serializeString(actions, rule.action().stringArgument());
129 else
130 ASSERT(rule.action().stringArgument().isNull());
131 continue;
132 }
133
134 ResourceFlags flags = rule.trigger().flags;
135 unsigned actionLocation = std::numeric_limits<unsigned>::max();
136
137 auto findOrMakeActionLocation = [&] (ActionMap& map) {
138 const auto existingAction = map.find(flags);
139 if (existingAction == map.end()) {
140 actionLocation = actions.size();
141 actions.append(static_cast<SerializedActionByte>(actionType));
142 map.set(flags, actionLocation);
143 } else
144 actionLocation = existingAction->value;
145 };
146
147 auto findOrMakeStringActionLocation = [&] (StringActionMap& map) {
148 const String& argument = rule.action().stringArgument();
149 auto existingAction = map.find(std::make_pair(argument, flags));
150 if (existingAction == map.end()) {
151 actionLocation = actions.size();
152 actions.append(static_cast<SerializedActionByte>(actionType));
153 serializeString(actions, argument);
154 map.set(std::make_pair(argument, flags), actionLocation);
155 } else
156 actionLocation = existingAction->value;
157 };
158
159 switch (actionType) {
160 case ActionType::CSSDisplayNoneSelector: {
161 const auto addResult = cssDisplayNoneActionsMap.add(rule.trigger(), PendingDisplayNoneActions());
162 auto& pendingStringActions = addResult.iterator->value;
163 if (!pendingStringActions.combinedSelectors.isEmpty())
164 pendingStringActions.combinedSelectors.append(',');
165 pendingStringActions.combinedSelectors.append(rule.action().stringArgument());
166 pendingStringActions.clientLocations.append(actionLocations.size());
167
168 actionLocation = std::numeric_limits<unsigned>::max();
169 break;
170 }
171 case ActionType::IgnorePreviousRules:
172 findOrMakeActionLocation(ignorePreviousRuleActionsMap);
173 break;
174 case ActionType::BlockLoad:
175 findOrMakeActionLocation(blockLoadActionsMap);
176 break;
177 case ActionType::BlockCookies:
178 findOrMakeActionLocation(blockCookiesActionsMap);
179 break;
180 case ActionType::MakeHTTPS:
181 findOrMakeActionLocation(makeHTTPSActionsMap);
182 break;
183 case ActionType::Notify:
184 findOrMakeStringActionLocation(notifyActionsMap);
185 break;
186 }
187
188 actionLocations.append(actionLocation);
189 }
190 resolvePendingDisplayNoneActions(actions, actionLocations, cssDisplayNoneActionsMap);
191 return actionLocations;
192}
193
194typedef HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> UniversalActionSet;
195
196static void addUniversalActionsToDFA(DFA& dfa, UniversalActionSet&& universalActions)
197{
198 if (universalActions.isEmpty())
199 return;
200
201 DFANode& root = dfa.nodes[dfa.root];
202 ASSERT(!root.actionsLength());
203 unsigned actionsStart = dfa.actions.size();
204 dfa.actions.reserveCapacity(dfa.actions.size() + universalActions.size());
205 for (uint64_t action : universalActions)
206 dfa.actions.uncheckedAppend(action);
207 unsigned actionsEnd = dfa.actions.size();
208
209 unsigned actionsLength = actionsEnd - actionsStart;
210 RELEASE_ASSERT_WITH_MESSAGE(actionsLength < std::numeric_limits<uint16_t>::max(), "Too many uncombined actions that match everything");
211 root.setActions(actionsStart, static_cast<uint16_t>(actionsLength));
212}
213
214template<typename Functor>
215static void compileToBytecode(CombinedURLFilters&& filters, UniversalActionSet&& universalActions, Functor writeBytecodeToClient)
216{
217 // Smaller maxNFASizes risk high compiling and interpreting times from having too many DFAs,
218 // larger maxNFASizes use too much memory when compiling.
219 const unsigned maxNFASize = 75000;
220
221 bool firstNFASeen = false;
222
223 auto lowerDFAToBytecode = [&](DFA&& dfa)
224 {
225#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
226 dataLogF("DFA\n");
227 dfa.debugPrintDot();
228#endif
229 ASSERT_WITH_MESSAGE(!dfa.nodes[dfa.root].hasActions(), "All actions on the DFA root should come from regular expressions that match everything.");
230
231 if (!firstNFASeen) {
232 // Put all the universal actions on the first DFA.
233 addUniversalActionsToDFA(dfa, WTFMove(universalActions));
234 }
235
236 Vector<DFABytecode> bytecode;
237 DFABytecodeCompiler compiler(dfa, bytecode);
238 compiler.compile();
239 LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t));
240 writeBytecodeToClient(WTFMove(bytecode));
241 firstNFASeen = true;
242 };
243
244 const unsigned smallDFASize = 100;
245 DFACombiner smallDFACombiner;
246 filters.processNFAs(maxNFASize, [&](NFA&& nfa) {
247#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
248 dataLogF("NFA\n");
249 nfa.debugPrintDot();
250#endif
251 LOG_LARGE_STRUCTURES(nfa, nfa.memoryUsed());
252 DFA dfa = NFAToDFA::convert(nfa);
253 LOG_LARGE_STRUCTURES(dfa, dfa.memoryUsed());
254
255 if (dfa.graphSize() < smallDFASize)
256 smallDFACombiner.addDFA(WTFMove(dfa));
257 else {
258 dfa.minimize();
259 lowerDFAToBytecode(WTFMove(dfa));
260 }
261 });
262
263 smallDFACombiner.combineDFAs(smallDFASize, [&](DFA&& dfa) {
264 LOG_LARGE_STRUCTURES(dfa, dfa.memoryUsed());
265 lowerDFAToBytecode(WTFMove(dfa));
266 });
267
268 ASSERT(filters.isEmpty());
269
270 if (!firstNFASeen) {
271 // Our bytecode interpreter expects to have at least one DFA, so if we haven't seen any
272 // create a dummy one and add any universal actions.
273
274 DFA dummyDFA = DFA::empty();
275 addUniversalActionsToDFA(dummyDFA, WTFMove(universalActions));
276
277 Vector<DFABytecode> bytecode;
278 DFABytecodeCompiler compiler(dummyDFA, bytecode);
279 compiler.compile();
280 LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t));
281 writeBytecodeToClient(WTFMove(bytecode));
282 }
283 LOG_LARGE_STRUCTURES(universalActions, universalActions.capacity() * sizeof(unsigned));
284}
285
286std::error_code compileRuleList(ContentExtensionCompilationClient& client, String&& ruleJSON, Vector<ContentExtensionRule>&& parsedRuleList)
287{
288#if !ASSERT_DISABLED
289 callOnMainThread([ruleJSON = ruleJSON.isolatedCopy(), parsedRuleList = parsedRuleList.isolatedCopy()] {
290 ASSERT(parseRuleList(ruleJSON).value() == parsedRuleList);
291 });
292#endif
293
294 bool domainConditionSeen = false;
295 bool topURLConditionSeen = false;
296 for (const auto& rule : parsedRuleList) {
297 switch (rule.trigger().conditionType) {
298 case Trigger::ConditionType::None:
299 break;
300 case Trigger::ConditionType::IfDomain:
301 case Trigger::ConditionType::UnlessDomain:
302 domainConditionSeen = true;
303 break;
304 case Trigger::ConditionType::IfTopURL:
305 case Trigger::ConditionType::UnlessTopURL:
306 topURLConditionSeen = true;
307 break;
308 }
309 }
310 if (topURLConditionSeen && domainConditionSeen)
311 return ContentExtensionError::JSONTopURLAndDomainConditions;
312
313#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
314 MonotonicTime patternPartitioningStart = MonotonicTime::now();
315#endif
316
317 client.writeSource(std::exchange(ruleJSON, String()));
318
319 Vector<SerializedActionByte> actions;
320 Vector<unsigned> actionLocations = serializeActions(parsedRuleList, actions);
321 LOG_LARGE_STRUCTURES(actions, actions.capacity() * sizeof(SerializedActionByte));
322 client.writeActions(WTFMove(actions), domainConditionSeen);
323
324 UniversalActionSet universalActionsWithoutConditions;
325 UniversalActionSet universalActionsWithConditions;
326 UniversalActionSet universalTopURLActions;
327
328 // FIXME: These don't all need to be in memory at the same time.
329 CombinedURLFilters filtersWithoutConditions;
330 CombinedURLFilters filtersWithConditions;
331 CombinedURLFilters topURLFilters;
332 URLFilterParser filtersWithoutConditionParser(filtersWithoutConditions);
333 URLFilterParser filtersWithConditionParser(filtersWithConditions);
334 URLFilterParser topURLFilterParser(topURLFilters);
335
336 for (unsigned ruleIndex = 0; ruleIndex < parsedRuleList.size(); ++ruleIndex) {
337 const ContentExtensionRule& contentExtensionRule = parsedRuleList[ruleIndex];
338 const Trigger& trigger = contentExtensionRule.trigger();
339 ASSERT(trigger.urlFilter.length());
340
341 // High bits are used for flags. This should match how they are used in DFABytecodeCompiler::compileNode.
342 ASSERT(!trigger.flags || ActionFlagMask & (static_cast<uint64_t>(trigger.flags) << 32));
343 ASSERT(!(~ActionFlagMask & (static_cast<uint64_t>(trigger.flags) << 32)));
344 uint64_t actionLocationAndFlags = (static_cast<uint64_t>(trigger.flags) << 32) | static_cast<uint64_t>(actionLocations[ruleIndex]);
345 URLFilterParser::ParseStatus status = URLFilterParser::Ok;
346 if (trigger.conditions.isEmpty()) {
347 ASSERT(trigger.conditionType == Trigger::ConditionType::None);
348 status = filtersWithoutConditionParser.addPattern(trigger.urlFilter, trigger.urlFilterIsCaseSensitive, actionLocationAndFlags);
349 if (status == URLFilterParser::MatchesEverything) {
350 universalActionsWithoutConditions.add(actionLocationAndFlags);
351 status = URLFilterParser::Ok;
352 }
353 if (status != URLFilterParser::Ok) {
354 dataLogF("Error while parsing %s: %s\n", trigger.urlFilter.utf8().data(), URLFilterParser::statusString(status).utf8().data());
355 return ContentExtensionError::JSONInvalidRegex;
356 }
357 } else {
358 switch (trigger.conditionType) {
359 case Trigger::ConditionType::IfDomain:
360 case Trigger::ConditionType::IfTopURL:
361 actionLocationAndFlags |= IfConditionFlag;
362 break;
363 case Trigger::ConditionType::None:
364 case Trigger::ConditionType::UnlessDomain:
365 case Trigger::ConditionType::UnlessTopURL:
366 ASSERT(!(actionLocationAndFlags & IfConditionFlag));
367 break;
368 }
369
370 status = filtersWithConditionParser.addPattern(trigger.urlFilter, trigger.urlFilterIsCaseSensitive, actionLocationAndFlags);
371 if (status == URLFilterParser::MatchesEverything) {
372 universalActionsWithConditions.add(actionLocationAndFlags);
373 status = URLFilterParser::Ok;
374 }
375 if (status != URLFilterParser::Ok) {
376 dataLogF("Error while parsing %s: %s\n", trigger.urlFilter.utf8().data(), URLFilterParser::statusString(status).utf8().data());
377 return ContentExtensionError::JSONInvalidRegex;
378 }
379 for (const String& condition : trigger.conditions) {
380 if (domainConditionSeen) {
381 ASSERT(!topURLConditionSeen);
382 topURLFilters.addDomain(actionLocationAndFlags, condition);
383 } else {
384 ASSERT(topURLConditionSeen);
385 status = topURLFilterParser.addPattern(condition, trigger.topURLConditionIsCaseSensitive, actionLocationAndFlags);
386 if (status == URLFilterParser::MatchesEverything) {
387 universalTopURLActions.add(actionLocationAndFlags);
388 status = URLFilterParser::Ok;
389 }
390 if (status != URLFilterParser::Ok) {
391 dataLogF("Error while parsing %s: %s\n", condition.utf8().data(), URLFilterParser::statusString(status).utf8().data());
392 return ContentExtensionError::JSONInvalidRegex;
393 }
394 }
395 }
396 }
397 ASSERT(status == URLFilterParser::Ok);
398 }
399 LOG_LARGE_STRUCTURES(parsedRuleList, parsedRuleList.capacity() * sizeof(ContentExtensionRule)); // Doesn't include strings.
400 LOG_LARGE_STRUCTURES(actionLocations, actionLocations.capacity() * sizeof(unsigned));
401 parsedRuleList.clear();
402 actionLocations.clear();
403
404#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
405 MonotonicTime patternPartitioningEnd = MonotonicTime::now();
406 dataLogF(" Time spent partitioning the rules into groups: %f\n", (patternPartitioningEnd - patternPartitioningStart).seconds());
407#endif
408
409 LOG_LARGE_STRUCTURES(filtersWithoutConditions, filtersWithoutConditions.memoryUsed());
410 LOG_LARGE_STRUCTURES(filtersWithConditions, filtersWithConditions.memoryUsed());
411 LOG_LARGE_STRUCTURES(topURLFilters, topURLFilters.memoryUsed());
412
413#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
414 MonotonicTime totalNFAToByteCodeBuildTimeStart = MonotonicTime::now();
415#endif
416
417 compileToBytecode(WTFMove(filtersWithoutConditions), WTFMove(universalActionsWithoutConditions), [&](Vector<DFABytecode>&& bytecode) {
418 client.writeFiltersWithoutConditionsBytecode(WTFMove(bytecode));
419 });
420 compileToBytecode(WTFMove(filtersWithConditions), WTFMove(universalActionsWithConditions), [&](Vector<DFABytecode>&& bytecode) {
421 client.writeFiltersWithConditionsBytecode(WTFMove(bytecode));
422 });
423 compileToBytecode(WTFMove(topURLFilters), WTFMove(universalTopURLActions), [&](Vector<DFABytecode>&& bytecode) {
424 client.writeTopURLFiltersBytecode(WTFMove(bytecode));
425 });
426
427#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
428 MonotonicTime totalNFAToByteCodeBuildTimeEnd = MonotonicTime::now();
429 dataLogF(" Time spent building and compiling the DFAs: %f\n", (totalNFAToByteCodeBuildTimeEnd - totalNFAToByteCodeBuildTimeStart).seconds());
430#endif
431
432 client.finalize();
433
434 return { };
435}
436
437} // namespace ContentExtensions
438} // namespace WebCore
439
440#endif // ENABLE(CONTENT_EXTENSIONS)
441