1 | /* |
2 | * Copyright (C) 2015-2017 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' |
14 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
15 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS |
17 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
18 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
19 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
20 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
21 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
22 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
23 | * THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #include "config.h" |
27 | #include "ContentExtensionCompiler.h" |
28 | |
29 | #if ENABLE(CONTENT_EXTENSIONS) |
30 | |
31 | #include "CombinedURLFilters.h" |
32 | #include "CompiledContentExtension.h" |
33 | #include "ContentExtensionActions.h" |
34 | #include "ContentExtensionError.h" |
35 | #include "ContentExtensionParser.h" |
36 | #include "ContentExtensionRule.h" |
37 | #include "ContentExtensionsDebugging.h" |
38 | #include "DFABytecodeCompiler.h" |
39 | #include "DFACombiner.h" |
40 | #include "NFA.h" |
41 | #include "NFAToDFA.h" |
42 | #include "URLFilterParser.h" |
43 | #include <wtf/DataLog.h> |
44 | #include <wtf/text/CString.h> |
45 | #include <wtf/text/StringBuilder.h> |
46 | |
47 | namespace WebCore { |
48 | namespace ContentExtensions { |
49 | |
50 | static void serializeString(Vector<SerializedActionByte>& actions, const String& string) |
51 | { |
52 | // Append Selector length (4 bytes). |
53 | uint32_t stringLength = string.length(); |
54 | actions.grow(actions.size() + sizeof(uint32_t)); |
55 | *reinterpret_cast<uint32_t*>(&actions[actions.size() - sizeof(uint32_t)]) = stringLength; |
56 | bool wideCharacters = !string.is8Bit(); |
57 | actions.append(wideCharacters); |
58 | // Append Selector. |
59 | if (wideCharacters) { |
60 | uint32_t startIndex = actions.size(); |
61 | actions.grow(actions.size() + sizeof(UChar) * stringLength); |
62 | for (uint32_t i = 0; i < stringLength; ++i) |
63 | *reinterpret_cast<UChar*>(&actions[startIndex + i * sizeof(UChar)]) = string[i]; |
64 | } else { |
65 | for (uint32_t i = 0; i < stringLength; ++i) |
66 | actions.append(string[i]); |
67 | } |
68 | } |
69 | |
70 | // css-display-none combining is special because we combine the string arguments with commas because we know they are css selectors. |
71 | struct PendingDisplayNoneActions { |
72 | StringBuilder combinedSelectors; |
73 | Vector<uint32_t> clientLocations; |
74 | }; |
75 | |
76 | using PendingDisplayNoneActionsMap = HashMap<Trigger, PendingDisplayNoneActions, TriggerHash, TriggerHashTraits>; |
77 | |
78 | static void resolvePendingDisplayNoneActions(Vector<SerializedActionByte>& actions, Vector<uint32_t>& actionLocations, PendingDisplayNoneActionsMap& map) |
79 | { |
80 | for (auto& pendingDisplayNoneActions : map.values()) { |
81 | uint32_t actionLocation = actions.size(); |
82 | actions.append(static_cast<SerializedActionByte>(ActionType::CSSDisplayNoneSelector)); |
83 | serializeString(actions, pendingDisplayNoneActions.combinedSelectors.toString()); |
84 | for (uint32_t clientLocation : pendingDisplayNoneActions.clientLocations) |
85 | actionLocations[clientLocation] = actionLocation; |
86 | } |
87 | map.clear(); |
88 | } |
89 | |
90 | static Vector<unsigned> serializeActions(const Vector<ContentExtensionRule>& ruleList, Vector<SerializedActionByte>& actions) |
91 | { |
92 | ASSERT(!actions.size()); |
93 | |
94 | Vector<uint32_t> actionLocations; |
95 | |
96 | using ActionLocation = uint32_t; |
97 | using ActionMap = HashMap<ResourceFlags, ActionLocation, DefaultHash<ResourceFlags>::Hash, WTF::UnsignedWithZeroKeyHashTraits<ResourceFlags>>; |
98 | using StringActionMap = HashMap<std::pair<String, ResourceFlags>, ActionLocation, DefaultHash<std::pair<String, ResourceFlags>>::Hash, PairHashTraits<HashTraits<String>, WTF::UnsignedWithZeroKeyHashTraits<ResourceFlags>>>; |
99 | ActionMap blockLoadActionsMap; |
100 | ActionMap blockCookiesActionsMap; |
101 | PendingDisplayNoneActionsMap cssDisplayNoneActionsMap; |
102 | ActionMap ignorePreviousRuleActionsMap; |
103 | ActionMap makeHTTPSActionsMap; |
104 | StringActionMap notifyActionsMap; |
105 | |
106 | for (unsigned ruleIndex = 0; ruleIndex < ruleList.size(); ++ruleIndex) { |
107 | const ContentExtensionRule& rule = ruleList[ruleIndex]; |
108 | ActionType actionType = rule.action().type(); |
109 | |
110 | if (actionType == ActionType::IgnorePreviousRules) { |
111 | resolvePendingDisplayNoneActions(actions, actionLocations, cssDisplayNoneActionsMap); |
112 | |
113 | blockLoadActionsMap.clear(); |
114 | blockCookiesActionsMap.clear(); |
115 | cssDisplayNoneActionsMap.clear(); |
116 | makeHTTPSActionsMap.clear(); |
117 | notifyActionsMap.clear(); |
118 | } else |
119 | ignorePreviousRuleActionsMap.clear(); |
120 | |
121 | // Anything with condition is just pushed. |
122 | // We could try to merge conditions but that case is not common in practice. |
123 | if (!rule.trigger().conditions.isEmpty()) { |
124 | actionLocations.append(actions.size()); |
125 | |
126 | actions.append(static_cast<SerializedActionByte>(actionType)); |
127 | if (hasStringArgument(actionType)) |
128 | serializeString(actions, rule.action().stringArgument()); |
129 | else |
130 | ASSERT(rule.action().stringArgument().isNull()); |
131 | continue; |
132 | } |
133 | |
134 | ResourceFlags flags = rule.trigger().flags; |
135 | unsigned actionLocation = std::numeric_limits<unsigned>::max(); |
136 | |
137 | auto findOrMakeActionLocation = [&] (ActionMap& map) { |
138 | const auto existingAction = map.find(flags); |
139 | if (existingAction == map.end()) { |
140 | actionLocation = actions.size(); |
141 | actions.append(static_cast<SerializedActionByte>(actionType)); |
142 | map.set(flags, actionLocation); |
143 | } else |
144 | actionLocation = existingAction->value; |
145 | }; |
146 | |
147 | auto findOrMakeStringActionLocation = [&] (StringActionMap& map) { |
148 | const String& argument = rule.action().stringArgument(); |
149 | auto existingAction = map.find(std::make_pair(argument, flags)); |
150 | if (existingAction == map.end()) { |
151 | actionLocation = actions.size(); |
152 | actions.append(static_cast<SerializedActionByte>(actionType)); |
153 | serializeString(actions, argument); |
154 | map.set(std::make_pair(argument, flags), actionLocation); |
155 | } else |
156 | actionLocation = existingAction->value; |
157 | }; |
158 | |
159 | switch (actionType) { |
160 | case ActionType::CSSDisplayNoneSelector: { |
161 | const auto addResult = cssDisplayNoneActionsMap.add(rule.trigger(), PendingDisplayNoneActions()); |
162 | auto& pendingStringActions = addResult.iterator->value; |
163 | if (!pendingStringActions.combinedSelectors.isEmpty()) |
164 | pendingStringActions.combinedSelectors.append(','); |
165 | pendingStringActions.combinedSelectors.append(rule.action().stringArgument()); |
166 | pendingStringActions.clientLocations.append(actionLocations.size()); |
167 | |
168 | actionLocation = std::numeric_limits<unsigned>::max(); |
169 | break; |
170 | } |
171 | case ActionType::IgnorePreviousRules: |
172 | findOrMakeActionLocation(ignorePreviousRuleActionsMap); |
173 | break; |
174 | case ActionType::BlockLoad: |
175 | findOrMakeActionLocation(blockLoadActionsMap); |
176 | break; |
177 | case ActionType::BlockCookies: |
178 | findOrMakeActionLocation(blockCookiesActionsMap); |
179 | break; |
180 | case ActionType::MakeHTTPS: |
181 | findOrMakeActionLocation(makeHTTPSActionsMap); |
182 | break; |
183 | case ActionType::Notify: |
184 | findOrMakeStringActionLocation(notifyActionsMap); |
185 | break; |
186 | } |
187 | |
188 | actionLocations.append(actionLocation); |
189 | } |
190 | resolvePendingDisplayNoneActions(actions, actionLocations, cssDisplayNoneActionsMap); |
191 | return actionLocations; |
192 | } |
193 | |
194 | typedef HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> UniversalActionSet; |
195 | |
196 | static void addUniversalActionsToDFA(DFA& dfa, UniversalActionSet&& universalActions) |
197 | { |
198 | if (universalActions.isEmpty()) |
199 | return; |
200 | |
201 | DFANode& root = dfa.nodes[dfa.root]; |
202 | ASSERT(!root.actionsLength()); |
203 | unsigned actionsStart = dfa.actions.size(); |
204 | dfa.actions.reserveCapacity(dfa.actions.size() + universalActions.size()); |
205 | for (uint64_t action : universalActions) |
206 | dfa.actions.uncheckedAppend(action); |
207 | unsigned actionsEnd = dfa.actions.size(); |
208 | |
209 | unsigned actionsLength = actionsEnd - actionsStart; |
210 | RELEASE_ASSERT_WITH_MESSAGE(actionsLength < std::numeric_limits<uint16_t>::max(), "Too many uncombined actions that match everything" ); |
211 | root.setActions(actionsStart, static_cast<uint16_t>(actionsLength)); |
212 | } |
213 | |
214 | template<typename Functor> |
215 | static void compileToBytecode(CombinedURLFilters&& filters, UniversalActionSet&& universalActions, Functor writeBytecodeToClient) |
216 | { |
217 | // Smaller maxNFASizes risk high compiling and interpreting times from having too many DFAs, |
218 | // larger maxNFASizes use too much memory when compiling. |
219 | const unsigned maxNFASize = 75000; |
220 | |
221 | bool firstNFASeen = false; |
222 | |
223 | auto lowerDFAToBytecode = [&](DFA&& dfa) |
224 | { |
225 | #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING |
226 | dataLogF("DFA\n" ); |
227 | dfa.debugPrintDot(); |
228 | #endif |
229 | ASSERT_WITH_MESSAGE(!dfa.nodes[dfa.root].hasActions(), "All actions on the DFA root should come from regular expressions that match everything." ); |
230 | |
231 | if (!firstNFASeen) { |
232 | // Put all the universal actions on the first DFA. |
233 | addUniversalActionsToDFA(dfa, WTFMove(universalActions)); |
234 | } |
235 | |
236 | Vector<DFABytecode> bytecode; |
237 | DFABytecodeCompiler compiler(dfa, bytecode); |
238 | compiler.compile(); |
239 | LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t)); |
240 | writeBytecodeToClient(WTFMove(bytecode)); |
241 | firstNFASeen = true; |
242 | }; |
243 | |
244 | const unsigned smallDFASize = 100; |
245 | DFACombiner smallDFACombiner; |
246 | filters.processNFAs(maxNFASize, [&](NFA&& nfa) { |
247 | #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING |
248 | dataLogF("NFA\n" ); |
249 | nfa.debugPrintDot(); |
250 | #endif |
251 | LOG_LARGE_STRUCTURES(nfa, nfa.memoryUsed()); |
252 | DFA dfa = NFAToDFA::convert(nfa); |
253 | LOG_LARGE_STRUCTURES(dfa, dfa.memoryUsed()); |
254 | |
255 | if (dfa.graphSize() < smallDFASize) |
256 | smallDFACombiner.addDFA(WTFMove(dfa)); |
257 | else { |
258 | dfa.minimize(); |
259 | lowerDFAToBytecode(WTFMove(dfa)); |
260 | } |
261 | }); |
262 | |
263 | smallDFACombiner.combineDFAs(smallDFASize, [&](DFA&& dfa) { |
264 | LOG_LARGE_STRUCTURES(dfa, dfa.memoryUsed()); |
265 | lowerDFAToBytecode(WTFMove(dfa)); |
266 | }); |
267 | |
268 | ASSERT(filters.isEmpty()); |
269 | |
270 | if (!firstNFASeen) { |
271 | // Our bytecode interpreter expects to have at least one DFA, so if we haven't seen any |
272 | // create a dummy one and add any universal actions. |
273 | |
274 | DFA dummyDFA = DFA::empty(); |
275 | addUniversalActionsToDFA(dummyDFA, WTFMove(universalActions)); |
276 | |
277 | Vector<DFABytecode> bytecode; |
278 | DFABytecodeCompiler compiler(dummyDFA, bytecode); |
279 | compiler.compile(); |
280 | LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t)); |
281 | writeBytecodeToClient(WTFMove(bytecode)); |
282 | } |
283 | LOG_LARGE_STRUCTURES(universalActions, universalActions.capacity() * sizeof(unsigned)); |
284 | } |
285 | |
286 | std::error_code compileRuleList(ContentExtensionCompilationClient& client, String&& ruleJSON, Vector<ContentExtensionRule>&& parsedRuleList) |
287 | { |
288 | #if !ASSERT_DISABLED |
289 | callOnMainThread([ruleJSON = ruleJSON.isolatedCopy(), parsedRuleList = parsedRuleList.isolatedCopy()] { |
290 | ASSERT(parseRuleList(ruleJSON).value() == parsedRuleList); |
291 | }); |
292 | #endif |
293 | |
294 | bool domainConditionSeen = false; |
295 | bool topURLConditionSeen = false; |
296 | for (const auto& rule : parsedRuleList) { |
297 | switch (rule.trigger().conditionType) { |
298 | case Trigger::ConditionType::None: |
299 | break; |
300 | case Trigger::ConditionType::IfDomain: |
301 | case Trigger::ConditionType::UnlessDomain: |
302 | domainConditionSeen = true; |
303 | break; |
304 | case Trigger::ConditionType::IfTopURL: |
305 | case Trigger::ConditionType::UnlessTopURL: |
306 | topURLConditionSeen = true; |
307 | break; |
308 | } |
309 | } |
310 | if (topURLConditionSeen && domainConditionSeen) |
311 | return ContentExtensionError::JSONTopURLAndDomainConditions; |
312 | |
313 | #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING |
314 | MonotonicTime patternPartitioningStart = MonotonicTime::now(); |
315 | #endif |
316 | |
317 | client.writeSource(std::exchange(ruleJSON, String())); |
318 | |
319 | Vector<SerializedActionByte> actions; |
320 | Vector<unsigned> actionLocations = serializeActions(parsedRuleList, actions); |
321 | LOG_LARGE_STRUCTURES(actions, actions.capacity() * sizeof(SerializedActionByte)); |
322 | client.writeActions(WTFMove(actions), domainConditionSeen); |
323 | |
324 | UniversalActionSet universalActionsWithoutConditions; |
325 | UniversalActionSet universalActionsWithConditions; |
326 | UniversalActionSet universalTopURLActions; |
327 | |
328 | // FIXME: These don't all need to be in memory at the same time. |
329 | CombinedURLFilters filtersWithoutConditions; |
330 | CombinedURLFilters filtersWithConditions; |
331 | CombinedURLFilters topURLFilters; |
332 | URLFilterParser filtersWithoutConditionParser(filtersWithoutConditions); |
333 | URLFilterParser filtersWithConditionParser(filtersWithConditions); |
334 | URLFilterParser topURLFilterParser(topURLFilters); |
335 | |
336 | for (unsigned ruleIndex = 0; ruleIndex < parsedRuleList.size(); ++ruleIndex) { |
337 | const ContentExtensionRule& contentExtensionRule = parsedRuleList[ruleIndex]; |
338 | const Trigger& trigger = contentExtensionRule.trigger(); |
339 | ASSERT(trigger.urlFilter.length()); |
340 | |
341 | // High bits are used for flags. This should match how they are used in DFABytecodeCompiler::compileNode. |
342 | ASSERT(!trigger.flags || ActionFlagMask & (static_cast<uint64_t>(trigger.flags) << 32)); |
343 | ASSERT(!(~ActionFlagMask & (static_cast<uint64_t>(trigger.flags) << 32))); |
344 | uint64_t actionLocationAndFlags = (static_cast<uint64_t>(trigger.flags) << 32) | static_cast<uint64_t>(actionLocations[ruleIndex]); |
345 | URLFilterParser::ParseStatus status = URLFilterParser::Ok; |
346 | if (trigger.conditions.isEmpty()) { |
347 | ASSERT(trigger.conditionType == Trigger::ConditionType::None); |
348 | status = filtersWithoutConditionParser.addPattern(trigger.urlFilter, trigger.urlFilterIsCaseSensitive, actionLocationAndFlags); |
349 | if (status == URLFilterParser::MatchesEverything) { |
350 | universalActionsWithoutConditions.add(actionLocationAndFlags); |
351 | status = URLFilterParser::Ok; |
352 | } |
353 | if (status != URLFilterParser::Ok) { |
354 | dataLogF("Error while parsing %s: %s\n" , trigger.urlFilter.utf8().data(), URLFilterParser::statusString(status).utf8().data()); |
355 | return ContentExtensionError::JSONInvalidRegex; |
356 | } |
357 | } else { |
358 | switch (trigger.conditionType) { |
359 | case Trigger::ConditionType::IfDomain: |
360 | case Trigger::ConditionType::IfTopURL: |
361 | actionLocationAndFlags |= IfConditionFlag; |
362 | break; |
363 | case Trigger::ConditionType::None: |
364 | case Trigger::ConditionType::UnlessDomain: |
365 | case Trigger::ConditionType::UnlessTopURL: |
366 | ASSERT(!(actionLocationAndFlags & IfConditionFlag)); |
367 | break; |
368 | } |
369 | |
370 | status = filtersWithConditionParser.addPattern(trigger.urlFilter, trigger.urlFilterIsCaseSensitive, actionLocationAndFlags); |
371 | if (status == URLFilterParser::MatchesEverything) { |
372 | universalActionsWithConditions.add(actionLocationAndFlags); |
373 | status = URLFilterParser::Ok; |
374 | } |
375 | if (status != URLFilterParser::Ok) { |
376 | dataLogF("Error while parsing %s: %s\n" , trigger.urlFilter.utf8().data(), URLFilterParser::statusString(status).utf8().data()); |
377 | return ContentExtensionError::JSONInvalidRegex; |
378 | } |
379 | for (const String& condition : trigger.conditions) { |
380 | if (domainConditionSeen) { |
381 | ASSERT(!topURLConditionSeen); |
382 | topURLFilters.addDomain(actionLocationAndFlags, condition); |
383 | } else { |
384 | ASSERT(topURLConditionSeen); |
385 | status = topURLFilterParser.addPattern(condition, trigger.topURLConditionIsCaseSensitive, actionLocationAndFlags); |
386 | if (status == URLFilterParser::MatchesEverything) { |
387 | universalTopURLActions.add(actionLocationAndFlags); |
388 | status = URLFilterParser::Ok; |
389 | } |
390 | if (status != URLFilterParser::Ok) { |
391 | dataLogF("Error while parsing %s: %s\n" , condition.utf8().data(), URLFilterParser::statusString(status).utf8().data()); |
392 | return ContentExtensionError::JSONInvalidRegex; |
393 | } |
394 | } |
395 | } |
396 | } |
397 | ASSERT(status == URLFilterParser::Ok); |
398 | } |
399 | LOG_LARGE_STRUCTURES(parsedRuleList, parsedRuleList.capacity() * sizeof(ContentExtensionRule)); // Doesn't include strings. |
400 | LOG_LARGE_STRUCTURES(actionLocations, actionLocations.capacity() * sizeof(unsigned)); |
401 | parsedRuleList.clear(); |
402 | actionLocations.clear(); |
403 | |
404 | #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING |
405 | MonotonicTime patternPartitioningEnd = MonotonicTime::now(); |
406 | dataLogF(" Time spent partitioning the rules into groups: %f\n" , (patternPartitioningEnd - patternPartitioningStart).seconds()); |
407 | #endif |
408 | |
409 | LOG_LARGE_STRUCTURES(filtersWithoutConditions, filtersWithoutConditions.memoryUsed()); |
410 | LOG_LARGE_STRUCTURES(filtersWithConditions, filtersWithConditions.memoryUsed()); |
411 | LOG_LARGE_STRUCTURES(topURLFilters, topURLFilters.memoryUsed()); |
412 | |
413 | #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING |
414 | MonotonicTime totalNFAToByteCodeBuildTimeStart = MonotonicTime::now(); |
415 | #endif |
416 | |
417 | compileToBytecode(WTFMove(filtersWithoutConditions), WTFMove(universalActionsWithoutConditions), [&](Vector<DFABytecode>&& bytecode) { |
418 | client.writeFiltersWithoutConditionsBytecode(WTFMove(bytecode)); |
419 | }); |
420 | compileToBytecode(WTFMove(filtersWithConditions), WTFMove(universalActionsWithConditions), [&](Vector<DFABytecode>&& bytecode) { |
421 | client.writeFiltersWithConditionsBytecode(WTFMove(bytecode)); |
422 | }); |
423 | compileToBytecode(WTFMove(topURLFilters), WTFMove(universalTopURLActions), [&](Vector<DFABytecode>&& bytecode) { |
424 | client.writeTopURLFiltersBytecode(WTFMove(bytecode)); |
425 | }); |
426 | |
427 | #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING |
428 | MonotonicTime totalNFAToByteCodeBuildTimeEnd = MonotonicTime::now(); |
429 | dataLogF(" Time spent building and compiling the DFAs: %f\n" , (totalNFAToByteCodeBuildTimeEnd - totalNFAToByteCodeBuildTimeStart).seconds()); |
430 | #endif |
431 | |
432 | client.finalize(); |
433 | |
434 | return { }; |
435 | } |
436 | |
437 | } // namespace ContentExtensions |
438 | } // namespace WebCore |
439 | |
440 | #endif // ENABLE(CONTENT_EXTENSIONS) |
441 | |