| 1 | /* |
| 2 | * Copyright (C) 2011 Google Inc. All rights reserved. |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are |
| 6 | * met: |
| 7 | * |
| 8 | * 1. Redistributions of source code must retain the above copyright |
| 9 | * notice, this list of conditions and the following disclaimer. |
| 10 | * |
| 11 | * 2. Redistributions in binary form must reproduce the above |
| 12 | * copyright notice, this list of conditions and the following disclaimer |
| 13 | * in the documentation and/or other materials provided with the |
| 14 | * distribution. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. AND ITS CONTRIBUTORS |
| 17 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. |
| 20 | * OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #include "config.h" |
| 30 | #include "ContentSearchUtilities.h" |
| 31 | |
| 32 | #include "RegularExpression.h" |
| 33 | #include "Yarr.h" |
| 34 | #include "YarrFlags.h" |
| 35 | #include "YarrInterpreter.h" |
| 36 | #include <wtf/BumpPointerAllocator.h> |
| 37 | #include <wtf/StdLibExtras.h> |
| 38 | #include <wtf/text/StringBuilder.h> |
| 39 | #include <wtf/text/TextPosition.h> |
| 40 | |
| 41 | using namespace JSC::Yarr; |
| 42 | |
| 43 | namespace Inspector { |
| 44 | namespace ContentSearchUtilities { |
| 45 | |
| 46 | static const char regexSpecialCharacters[] = "[](){}+-*.,?\\^$|" ; |
| 47 | |
| 48 | static String createSearchRegexSource(const String& text) |
| 49 | { |
| 50 | StringBuilder result; |
| 51 | |
| 52 | for (unsigned i = 0; i < text.length(); i++) { |
| 53 | UChar character = text[i]; |
| 54 | if (isASCII(character) && strchr(regexSpecialCharacters, character)) |
| 55 | result.append('\\'); |
| 56 | result.append(character); |
| 57 | } |
| 58 | |
| 59 | return result.toString(); |
| 60 | } |
| 61 | |
| 62 | static inline size_t (const size_t* value) |
| 63 | { |
| 64 | return *value; |
| 65 | } |
| 66 | |
| 67 | TextPosition textPositionFromOffset(size_t offset, const Vector<size_t>& lineEndings) |
| 68 | { |
| 69 | const size_t* foundNextStart = approximateBinarySearch<size_t, size_t>(lineEndings, lineEndings.size(), offset, sizetExtractor); |
| 70 | size_t lineIndex = foundNextStart - &lineEndings.at(0); |
| 71 | if (offset >= *foundNextStart) |
| 72 | ++lineIndex; |
| 73 | size_t lineStartOffset = lineIndex > 0 ? lineEndings.at(lineIndex - 1) : 0; |
| 74 | size_t column = offset - lineStartOffset; |
| 75 | return TextPosition(OrdinalNumber::fromZeroBasedInt(lineIndex), OrdinalNumber::fromZeroBasedInt(column)); |
| 76 | } |
| 77 | |
| 78 | static Vector<std::pair<size_t, String>> getRegularExpressionMatchesByLines(const RegularExpression& regex, const String& text) |
| 79 | { |
| 80 | Vector<std::pair<size_t, String>> result; |
| 81 | if (text.isEmpty()) |
| 82 | return result; |
| 83 | |
| 84 | auto endings = lineEndings(text); |
| 85 | size_t size = endings.size(); |
| 86 | size_t start = 0; |
| 87 | |
| 88 | for (size_t lineNumber = 0; lineNumber < size; ++lineNumber) { |
| 89 | size_t nextStart = endings[lineNumber]; |
| 90 | String line = text.substring(start, nextStart - start); |
| 91 | |
| 92 | int matchLength; |
| 93 | if (regex.match(line, 0, &matchLength) != -1) |
| 94 | result.append(std::pair<size_t, String>(lineNumber, line)); |
| 95 | |
| 96 | start = nextStart; |
| 97 | } |
| 98 | |
| 99 | return result; |
| 100 | } |
| 101 | |
| 102 | Vector<size_t> lineEndings(const String& text) |
| 103 | { |
| 104 | Vector<size_t> result; |
| 105 | |
| 106 | size_t start = 0; |
| 107 | while (start < text.length()) { |
| 108 | size_t nextStart = text.find('\n', start); |
| 109 | if (nextStart == notFound || nextStart == (text.length() - 1)) { |
| 110 | result.append(text.length()); |
| 111 | break; |
| 112 | } |
| 113 | |
| 114 | nextStart += 1; |
| 115 | result.append(nextStart); |
| 116 | start = nextStart; |
| 117 | } |
| 118 | |
| 119 | result.append(text.length()); |
| 120 | |
| 121 | return result; |
| 122 | } |
| 123 | |
| 124 | static Ref<Protocol::GenericTypes::SearchMatch> buildObjectForSearchMatch(size_t lineNumber, const String& lineContent) |
| 125 | { |
| 126 | return Protocol::GenericTypes::SearchMatch::create() |
| 127 | .setLineNumber(lineNumber) |
| 128 | .setLineContent(lineContent) |
| 129 | .release(); |
| 130 | } |
| 131 | |
| 132 | RegularExpression createSearchRegex(const String& query, bool caseSensitive, bool isRegex) |
| 133 | { |
| 134 | return RegularExpression { isRegex ? query : createSearchRegexSource(query), caseSensitive ? TextCaseSensitive : TextCaseInsensitive }; |
| 135 | } |
| 136 | |
| 137 | int countRegularExpressionMatches(const RegularExpression& regex, const String& content) |
| 138 | { |
| 139 | if (content.isEmpty()) |
| 140 | return 0; |
| 141 | |
| 142 | int result = 0; |
| 143 | int position; |
| 144 | unsigned start = 0; |
| 145 | int matchLength; |
| 146 | while ((position = regex.match(content, start, &matchLength)) != -1) { |
| 147 | if (start >= content.length()) |
| 148 | break; |
| 149 | if (matchLength > 0) |
| 150 | ++result; |
| 151 | start = position + 1; |
| 152 | } |
| 153 | return result; |
| 154 | } |
| 155 | |
| 156 | Ref<JSON::ArrayOf<Protocol::GenericTypes::SearchMatch>> searchInTextByLines(const String& text, const String& query, const bool caseSensitive, const bool isRegex) |
| 157 | { |
| 158 | auto result = JSON::ArrayOf<Protocol::GenericTypes::SearchMatch>::create(); |
| 159 | auto regex = ContentSearchUtilities::createSearchRegex(query, caseSensitive, isRegex); |
| 160 | for (const auto& match : getRegularExpressionMatchesByLines(regex, text)) |
| 161 | result->addItem(buildObjectForSearchMatch(match.first, match.second)); |
| 162 | return result; |
| 163 | } |
| 164 | |
| 165 | static String (const String& content, const String& patternString) |
| 166 | { |
| 167 | if (content.isEmpty()) |
| 168 | return String(); |
| 169 | |
| 170 | JSC::Yarr::ErrorCode error { JSC::Yarr::ErrorCode::NoError }; |
| 171 | YarrPattern pattern(patternString, JSC::Yarr::Flags::Multiline, error); |
| 172 | ASSERT(!hasError(error)); |
| 173 | BumpPointerAllocator regexAllocator; |
| 174 | auto bytecodePattern = byteCompile(pattern, ®exAllocator); |
| 175 | ASSERT(bytecodePattern); |
| 176 | |
| 177 | ASSERT(pattern.m_numSubpatterns == 1); |
| 178 | std::array<unsigned, 4> matches; |
| 179 | unsigned result = interpret(bytecodePattern.get(), content, 0, matches.data()); |
| 180 | if (result == offsetNoMatch) |
| 181 | return String(); |
| 182 | |
| 183 | ASSERT(matches[2] > 0 && matches[3] > 0); |
| 184 | return content.substring(matches[2], matches[3] - matches[2]); |
| 185 | } |
| 186 | |
| 187 | String findStylesheetSourceMapURL(const String& content) |
| 188 | { |
| 189 | // "/*# <name>=<value> */" and deprecated "/*@" |
| 190 | return findMagicComment(content, "/\\*[#@][\040\t]sourceMappingURL=[\040\t]*([^\\s\'\"]*)[\040\t]*\\*/"_s ); |
| 191 | } |
| 192 | |
| 193 | } // namespace ContentSearchUtilities |
| 194 | } // namespace Inspector |
| 195 | |