| 1 | /* |
| 2 | * Copyright (C) 2015-2017 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions |
| 6 | * are met: |
| 7 | * 1. Redistributions of source code must retain the above copyright |
| 8 | * notice, this list of conditions and the following disclaimer. |
| 9 | * 2. Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * |
| 13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' |
| 14 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
| 15 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS |
| 17 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 18 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 19 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 20 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 21 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 22 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
| 23 | * THE POSSIBILITY OF SUCH DAMAGE. |
| 24 | */ |
| 25 | |
| 26 | #pragma once |
| 27 | |
| 28 | #include <algorithm> |
| 29 | #include <unicode/uchar.h> |
| 30 | #include <wtf/ASCIICType.h> |
| 31 | #include <wtf/NotFound.h> |
| 32 | #include <wtf/UnalignedAccess.h> |
| 33 | |
| 34 | namespace WTF { |
| 35 | |
| 36 | using CodeUnitMatchFunction = bool (*)(UChar); |
| 37 | |
| 38 | template<typename CharacterTypeA, typename CharacterTypeB> bool equalIgnoringASCIICase(const CharacterTypeA*, const CharacterTypeB*, unsigned length); |
| 39 | template<typename CharacterTypeA, typename CharacterTypeB> bool equalIgnoringASCIICase(const CharacterTypeA*, unsigned lengthA, const CharacterTypeB*, unsigned lengthB); |
| 40 | |
| 41 | template<typename StringClassA, typename StringClassB> bool equalIgnoringASCIICaseCommon(const StringClassA&, const StringClassB&); |
| 42 | |
| 43 | template<typename CharacterType> bool equalLettersIgnoringASCIICase(const CharacterType*, const char* lowercaseLetters, unsigned length); |
| 44 | template<typename CharacterType, unsigned lowercaseLettersLength> bool equalLettersIgnoringASCIICase(const CharacterType*, unsigned charactersLength, const char (&lowercaseLetters)[lowercaseLettersLength]); |
| 45 | |
| 46 | template<typename StringClass, unsigned length> bool equalLettersIgnoringASCIICaseCommon(const StringClass&, const char (&lowercaseLetters)[length]); |
| 47 | |
| 48 | bool equalIgnoringASCIICase(const char*, const char*); |
| 49 | template<unsigned lowercaseLettersLength> bool equalLettersIgnoringASCIICase(const char*, const char (&lowercaseLetters)[lowercaseLettersLength]); |
| 50 | |
| 51 | // Do comparisons 8 or 4 bytes-at-a-time on architectures where it's safe. |
| 52 | #if (CPU(X86_64) || CPU(ARM64)) && !ASAN_ENABLED |
| 53 | ALWAYS_INLINE bool equal(const LChar* aLChar, const LChar* bLChar, unsigned length) |
| 54 | { |
| 55 | unsigned dwordLength = length >> 3; |
| 56 | |
| 57 | const char* a = reinterpret_cast<const char*>(aLChar); |
| 58 | const char* b = reinterpret_cast<const char*>(bLChar); |
| 59 | |
| 60 | if (dwordLength) { |
| 61 | for (unsigned i = 0; i != dwordLength; ++i) { |
| 62 | if (unalignedLoad<uint64_t>(a) != unalignedLoad<uint64_t>(b)) |
| 63 | return false; |
| 64 | |
| 65 | a += sizeof(uint64_t); |
| 66 | b += sizeof(uint64_t); |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | if (length & 4) { |
| 71 | if (unalignedLoad<uint32_t>(a) != unalignedLoad<uint32_t>(b)) |
| 72 | return false; |
| 73 | |
| 74 | a += sizeof(uint32_t); |
| 75 | b += sizeof(uint32_t); |
| 76 | } |
| 77 | |
| 78 | if (length & 2) { |
| 79 | if (unalignedLoad<uint16_t>(a) != unalignedLoad<uint16_t>(b)) |
| 80 | return false; |
| 81 | |
| 82 | a += sizeof(uint16_t); |
| 83 | b += sizeof(uint16_t); |
| 84 | } |
| 85 | |
| 86 | if (length & 1 && (*reinterpret_cast<const LChar*>(a) != *reinterpret_cast<const LChar*>(b))) |
| 87 | return false; |
| 88 | |
| 89 | return true; |
| 90 | } |
| 91 | |
| 92 | ALWAYS_INLINE bool equal(const UChar* aUChar, const UChar* bUChar, unsigned length) |
| 93 | { |
| 94 | unsigned dwordLength = length >> 2; |
| 95 | |
| 96 | const char* a = reinterpret_cast<const char*>(aUChar); |
| 97 | const char* b = reinterpret_cast<const char*>(bUChar); |
| 98 | |
| 99 | if (dwordLength) { |
| 100 | for (unsigned i = 0; i != dwordLength; ++i) { |
| 101 | if (unalignedLoad<uint64_t>(a) != unalignedLoad<uint64_t>(b)) |
| 102 | return false; |
| 103 | |
| 104 | a += sizeof(uint64_t); |
| 105 | b += sizeof(uint64_t); |
| 106 | } |
| 107 | } |
| 108 | |
| 109 | if (length & 2) { |
| 110 | if (unalignedLoad<uint32_t>(a) != unalignedLoad<uint32_t>(b)) |
| 111 | return false; |
| 112 | |
| 113 | a += sizeof(uint32_t); |
| 114 | b += sizeof(uint32_t); |
| 115 | } |
| 116 | |
| 117 | if (length & 1 && (*reinterpret_cast<const UChar*>(a) != *reinterpret_cast<const UChar*>(b))) |
| 118 | return false; |
| 119 | |
| 120 | return true; |
| 121 | } |
| 122 | #elif CPU(X86) && !ASAN_ENABLED |
| 123 | ALWAYS_INLINE bool equal(const LChar* aLChar, const LChar* bLChar, unsigned length) |
| 124 | { |
| 125 | const char* a = reinterpret_cast<const char*>(aLChar); |
| 126 | const char* b = reinterpret_cast<const char*>(bLChar); |
| 127 | |
| 128 | unsigned wordLength = length >> 2; |
| 129 | for (unsigned i = 0; i != wordLength; ++i) { |
| 130 | if (unalignedLoad<uint32_t>(a) != unalignedLoad<uint32_t>(b)) |
| 131 | return false; |
| 132 | a += sizeof(uint32_t); |
| 133 | b += sizeof(uint32_t); |
| 134 | } |
| 135 | |
| 136 | length &= 3; |
| 137 | |
| 138 | if (length) { |
| 139 | const LChar* aRemainder = reinterpret_cast<const LChar*>(a); |
| 140 | const LChar* bRemainder = reinterpret_cast<const LChar*>(b); |
| 141 | |
| 142 | for (unsigned i = 0; i < length; ++i) { |
| 143 | if (aRemainder[i] != bRemainder[i]) |
| 144 | return false; |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | return true; |
| 149 | } |
| 150 | |
| 151 | ALWAYS_INLINE bool equal(const UChar* aUChar, const UChar* bUChar, unsigned length) |
| 152 | { |
| 153 | const char* a = reinterpret_cast<const char*>(aUChar); |
| 154 | const char* b = reinterpret_cast<const char*>(bUChar); |
| 155 | |
| 156 | unsigned wordLength = length >> 1; |
| 157 | for (unsigned i = 0; i != wordLength; ++i) { |
| 158 | if (unalignedLoad<uint32_t>(a) != unalignedLoad<uint32_t>(b)) |
| 159 | return false; |
| 160 | a += sizeof(uint32_t); |
| 161 | b += sizeof(uint32_t); |
| 162 | } |
| 163 | |
| 164 | if (length & 1 && *reinterpret_cast<const UChar*>(a) != *reinterpret_cast<const UChar*>(b)) |
| 165 | return false; |
| 166 | |
| 167 | return true; |
| 168 | } |
| 169 | #elif PLATFORM(IOS_FAMILY) && WTF_ARM_ARCH_AT_LEAST(7) && !ASAN_ENABLED |
| 170 | ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) |
| 171 | { |
| 172 | bool isEqual = false; |
| 173 | uint32_t aValue; |
| 174 | uint32_t bValue; |
| 175 | asm("subs %[length], #4\n" |
| 176 | "blo 2f\n" |
| 177 | |
| 178 | "0:\n" // Label 0 = Start of loop over 32 bits. |
| 179 | "ldr %[aValue], [%[a]], #4\n" |
| 180 | "ldr %[bValue], [%[b]], #4\n" |
| 181 | "cmp %[aValue], %[bValue]\n" |
| 182 | "bne 66f\n" |
| 183 | "subs %[length], #4\n" |
| 184 | "bhs 0b\n" |
| 185 | |
| 186 | // At this point, length can be: |
| 187 | // -0: 00000000000000000000000000000000 (0 bytes left) |
| 188 | // -1: 11111111111111111111111111111111 (3 bytes left) |
| 189 | // -2: 11111111111111111111111111111110 (2 bytes left) |
| 190 | // -3: 11111111111111111111111111111101 (1 byte left) |
| 191 | // -4: 11111111111111111111111111111100 (length was 0) |
| 192 | // The pointers are at the correct position. |
| 193 | "2:\n" // Label 2 = End of loop over 32 bits, check for pair of characters. |
| 194 | "tst %[length], #2\n" |
| 195 | "beq 1f\n" |
| 196 | "ldrh %[aValue], [%[a]], #2\n" |
| 197 | "ldrh %[bValue], [%[b]], #2\n" |
| 198 | "cmp %[aValue], %[bValue]\n" |
| 199 | "bne 66f\n" |
| 200 | |
| 201 | "1:\n" // Label 1 = Check for a single character left. |
| 202 | "tst %[length], #1\n" |
| 203 | "beq 42f\n" |
| 204 | "ldrb %[aValue], [%[a]]\n" |
| 205 | "ldrb %[bValue], [%[b]]\n" |
| 206 | "cmp %[aValue], %[bValue]\n" |
| 207 | "bne 66f\n" |
| 208 | |
| 209 | "42:\n" // Label 42 = Success. |
| 210 | "mov %[isEqual], #1\n" |
| 211 | "66:\n" // Label 66 = End without changing isEqual to 1. |
| 212 | : [length]"+r" (length), [isEqual]"+r" (isEqual), [a]"+r" (a), [b]"+r" (b), [aValue]"+r" (aValue), [bValue]"+r" (bValue) |
| 213 | : |
| 214 | : |
| 215 | ); |
| 216 | return isEqual; |
| 217 | } |
| 218 | |
| 219 | ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) |
| 220 | { |
| 221 | bool isEqual = false; |
| 222 | uint32_t aValue; |
| 223 | uint32_t bValue; |
| 224 | asm("subs %[length], #2\n" |
| 225 | "blo 1f\n" |
| 226 | |
| 227 | "0:\n" // Label 0 = Start of loop over 32 bits. |
| 228 | "ldr %[aValue], [%[a]], #4\n" |
| 229 | "ldr %[bValue], [%[b]], #4\n" |
| 230 | "cmp %[aValue], %[bValue]\n" |
| 231 | "bne 66f\n" |
| 232 | "subs %[length], #2\n" |
| 233 | "bhs 0b\n" |
| 234 | |
| 235 | // At this point, length can be: |
| 236 | // -0: 00000000000000000000000000000000 (0 bytes left) |
| 237 | // -1: 11111111111111111111111111111111 (1 character left, 2 bytes) |
| 238 | // -2: 11111111111111111111111111111110 (length was zero) |
| 239 | // The pointers are at the correct position. |
| 240 | "1:\n" // Label 1 = Check for a single character left. |
| 241 | "tst %[length], #1\n" |
| 242 | "beq 42f\n" |
| 243 | "ldrh %[aValue], [%[a]]\n" |
| 244 | "ldrh %[bValue], [%[b]]\n" |
| 245 | "cmp %[aValue], %[bValue]\n" |
| 246 | "bne 66f\n" |
| 247 | |
| 248 | "42:\n" // Label 42 = Success. |
| 249 | "mov %[isEqual], #1\n" |
| 250 | "66:\n" // Label 66 = End without changing isEqual to 1. |
| 251 | : [length]"+r" (length), [isEqual]"+r" (isEqual), [a]"+r" (a), [b]"+r" (b), [aValue]"+r" (aValue), [bValue]"+r" (bValue) |
| 252 | : |
| 253 | : |
| 254 | ); |
| 255 | return isEqual; |
| 256 | } |
| 257 | #elif !ASAN_ENABLED |
| 258 | ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) { return !memcmp(a, b, length); } |
| 259 | ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) { return !memcmp(a, b, length * sizeof(UChar)); } |
| 260 | #else |
| 261 | ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) |
| 262 | { |
| 263 | for (unsigned i = 0; i < length; ++i) { |
| 264 | if (a[i] != b[i]) |
| 265 | return false; |
| 266 | } |
| 267 | return true; |
| 268 | } |
| 269 | ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) |
| 270 | { |
| 271 | for (unsigned i = 0; i < length; ++i) { |
| 272 | if (a[i] != b[i]) |
| 273 | return false; |
| 274 | } |
| 275 | return true; |
| 276 | } |
| 277 | #endif |
| 278 | |
| 279 | ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) |
| 280 | { |
| 281 | for (unsigned i = 0; i < length; ++i) { |
| 282 | if (a[i] != b[i]) |
| 283 | return false; |
| 284 | } |
| 285 | return true; |
| 286 | } |
| 287 | |
| 288 | ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { return equal(b, a, length); } |
| 289 | |
| 290 | template<typename StringClassA, typename StringClassB> |
| 291 | ALWAYS_INLINE bool equalCommon(const StringClassA& a, const StringClassB& b) |
| 292 | { |
| 293 | unsigned length = a.length(); |
| 294 | if (length != b.length()) |
| 295 | return false; |
| 296 | |
| 297 | if (a.is8Bit()) { |
| 298 | if (b.is8Bit()) |
| 299 | return equal(a.characters8(), b.characters8(), length); |
| 300 | |
| 301 | return equal(a.characters8(), b.characters16(), length); |
| 302 | } |
| 303 | |
| 304 | if (b.is8Bit()) |
| 305 | return equal(a.characters16(), b.characters8(), length); |
| 306 | |
| 307 | return equal(a.characters16(), b.characters16(), length); |
| 308 | } |
| 309 | |
| 310 | template<typename StringClassA, typename StringClassB> |
| 311 | ALWAYS_INLINE bool equalCommon(const StringClassA* a, const StringClassB* b) |
| 312 | { |
| 313 | if (a == b) |
| 314 | return true; |
| 315 | if (!a || !b) |
| 316 | return false; |
| 317 | return equal(*a, *b); |
| 318 | } |
| 319 | |
| 320 | template<typename StringClass, unsigned length> bool equal(const StringClass& a, const UChar (&codeUnits)[length]) |
| 321 | { |
| 322 | if (a.length() != length) |
| 323 | return false; |
| 324 | |
| 325 | if (a.is8Bit()) |
| 326 | return equal(a.characters8(), codeUnits, length); |
| 327 | |
| 328 | return equal(a.characters16(), codeUnits, length); |
| 329 | } |
| 330 | |
| 331 | template<typename CharacterTypeA, typename CharacterTypeB> |
| 332 | inline bool equalIgnoringASCIICase(const CharacterTypeA* a, const CharacterTypeB* b, unsigned length) |
| 333 | { |
| 334 | for (unsigned i = 0; i < length; ++i) { |
| 335 | if (toASCIILower(a[i]) != toASCIILower(b[i])) |
| 336 | return false; |
| 337 | } |
| 338 | return true; |
| 339 | } |
| 340 | |
| 341 | template<typename CharacterTypeA, typename CharacterTypeB> inline bool equalIgnoringASCIICase(const CharacterTypeA* a, unsigned lengthA, const CharacterTypeB* b, unsigned lengthB) |
| 342 | { |
| 343 | return lengthA == lengthB && equalIgnoringASCIICase(a, b, lengthA); |
| 344 | } |
| 345 | |
| 346 | template<typename StringClassA, typename StringClassB> |
| 347 | bool equalIgnoringASCIICaseCommon(const StringClassA& a, const StringClassB& b) |
| 348 | { |
| 349 | unsigned length = a.length(); |
| 350 | if (length != b.length()) |
| 351 | return false; |
| 352 | |
| 353 | if (a.is8Bit()) { |
| 354 | if (b.is8Bit()) |
| 355 | return equalIgnoringASCIICase(a.characters8(), b.characters8(), length); |
| 356 | |
| 357 | return equalIgnoringASCIICase(a.characters8(), b.characters16(), length); |
| 358 | } |
| 359 | |
| 360 | if (b.is8Bit()) |
| 361 | return equalIgnoringASCIICase(a.characters16(), b.characters8(), length); |
| 362 | |
| 363 | return equalIgnoringASCIICase(a.characters16(), b.characters16(), length); |
| 364 | } |
| 365 | |
| 366 | template<typename StringClassA> bool equalIgnoringASCIICaseCommon(const StringClassA& a, const char* b) |
| 367 | { |
| 368 | unsigned length = a.length(); |
| 369 | if (length != strlen(b)) |
| 370 | return false; |
| 371 | |
| 372 | if (a.is8Bit()) |
| 373 | return equalIgnoringASCIICase(a.characters8(), b, length); |
| 374 | |
| 375 | return equalIgnoringASCIICase(a.characters16(), b, length); |
| 376 | } |
| 377 | |
| 378 | template<typename StringClassA, typename StringClassB> |
| 379 | bool startsWith(const StringClassA& reference, const StringClassB& prefix) |
| 380 | { |
| 381 | unsigned prefixLength = prefix.length(); |
| 382 | if (prefixLength > reference.length()) |
| 383 | return false; |
| 384 | |
| 385 | if (reference.is8Bit()) { |
| 386 | if (prefix.is8Bit()) |
| 387 | return equal(reference.characters8(), prefix.characters8(), prefixLength); |
| 388 | return equal(reference.characters8(), prefix.characters16(), prefixLength); |
| 389 | } |
| 390 | if (prefix.is8Bit()) |
| 391 | return equal(reference.characters16(), prefix.characters8(), prefixLength); |
| 392 | return equal(reference.characters16(), prefix.characters16(), prefixLength); |
| 393 | } |
| 394 | |
| 395 | template<typename StringClassA, typename StringClassB> |
| 396 | bool startsWithIgnoringASCIICase(const StringClassA& reference, const StringClassB& prefix) |
| 397 | { |
| 398 | unsigned prefixLength = prefix.length(); |
| 399 | if (prefixLength > reference.length()) |
| 400 | return false; |
| 401 | |
| 402 | if (reference.is8Bit()) { |
| 403 | if (prefix.is8Bit()) |
| 404 | return equalIgnoringASCIICase(reference.characters8(), prefix.characters8(), prefixLength); |
| 405 | return equalIgnoringASCIICase(reference.characters8(), prefix.characters16(), prefixLength); |
| 406 | } |
| 407 | if (prefix.is8Bit()) |
| 408 | return equalIgnoringASCIICase(reference.characters16(), prefix.characters8(), prefixLength); |
| 409 | return equalIgnoringASCIICase(reference.characters16(), prefix.characters16(), prefixLength); |
| 410 | } |
| 411 | |
| 412 | template<typename StringClassA, typename StringClassB> |
| 413 | bool endsWith(const StringClassA& reference, const StringClassB& suffix) |
| 414 | { |
| 415 | unsigned suffixLength = suffix.length(); |
| 416 | unsigned referenceLength = reference.length(); |
| 417 | if (suffixLength > referenceLength) |
| 418 | return false; |
| 419 | |
| 420 | unsigned startOffset = referenceLength - suffixLength; |
| 421 | |
| 422 | if (reference.is8Bit()) { |
| 423 | if (suffix.is8Bit()) |
| 424 | return equal(reference.characters8() + startOffset, suffix.characters8(), suffixLength); |
| 425 | return equal(reference.characters8() + startOffset, suffix.characters16(), suffixLength); |
| 426 | } |
| 427 | if (suffix.is8Bit()) |
| 428 | return equal(reference.characters16() + startOffset, suffix.characters8(), suffixLength); |
| 429 | return equal(reference.characters16() + startOffset, suffix.characters16(), suffixLength); |
| 430 | } |
| 431 | |
| 432 | template<typename StringClassA, typename StringClassB> |
| 433 | bool endsWithIgnoringASCIICase(const StringClassA& reference, const StringClassB& suffix) |
| 434 | { |
| 435 | unsigned suffixLength = suffix.length(); |
| 436 | unsigned referenceLength = reference.length(); |
| 437 | if (suffixLength > referenceLength) |
| 438 | return false; |
| 439 | |
| 440 | unsigned startOffset = referenceLength - suffixLength; |
| 441 | |
| 442 | if (reference.is8Bit()) { |
| 443 | if (suffix.is8Bit()) |
| 444 | return equalIgnoringASCIICase(reference.characters8() + startOffset, suffix.characters8(), suffixLength); |
| 445 | return equalIgnoringASCIICase(reference.characters8() + startOffset, suffix.characters16(), suffixLength); |
| 446 | } |
| 447 | if (suffix.is8Bit()) |
| 448 | return equalIgnoringASCIICase(reference.characters16() + startOffset, suffix.characters8(), suffixLength); |
| 449 | return equalIgnoringASCIICase(reference.characters16() + startOffset, suffix.characters16(), suffixLength); |
| 450 | } |
| 451 | |
| 452 | template <typename SearchCharacterType, typename MatchCharacterType> |
| 453 | size_t findIgnoringASCIICase(const SearchCharacterType* source, const MatchCharacterType* matchCharacters, unsigned startOffset, unsigned searchLength, unsigned matchLength) |
| 454 | { |
| 455 | ASSERT(searchLength >= matchLength); |
| 456 | |
| 457 | const SearchCharacterType* startSearchedCharacters = source + startOffset; |
| 458 | |
| 459 | // delta is the number of additional times to test; delta == 0 means test only once. |
| 460 | unsigned delta = searchLength - matchLength; |
| 461 | |
| 462 | for (unsigned i = 0; i <= delta; ++i) { |
| 463 | if (equalIgnoringASCIICase(startSearchedCharacters + i, matchCharacters, matchLength)) |
| 464 | return startOffset + i; |
| 465 | } |
| 466 | return notFound; |
| 467 | } |
| 468 | |
| 469 | inline size_t findIgnoringASCIICaseWithoutLength(const char* source, const char* matchCharacters) |
| 470 | { |
| 471 | unsigned searchLength = strlen(source); |
| 472 | unsigned matchLength = strlen(matchCharacters); |
| 473 | |
| 474 | return matchLength < searchLength ? findIgnoringASCIICase(source, matchCharacters, 0, searchLength, matchLength) : notFound; |
| 475 | } |
| 476 | |
| 477 | template<typename StringClassA, typename StringClassB> |
| 478 | size_t findIgnoringASCIICase(const StringClassA& source, const StringClassB& stringToFind, unsigned startOffset) |
| 479 | { |
| 480 | unsigned sourceStringLength = source.length(); |
| 481 | unsigned matchLength = stringToFind.length(); |
| 482 | if (!matchLength) |
| 483 | return std::min(startOffset, sourceStringLength); |
| 484 | |
| 485 | // Check startOffset & matchLength are in range. |
| 486 | if (startOffset > sourceStringLength) |
| 487 | return notFound; |
| 488 | unsigned searchLength = sourceStringLength - startOffset; |
| 489 | if (matchLength > searchLength) |
| 490 | return notFound; |
| 491 | |
| 492 | if (source.is8Bit()) { |
| 493 | if (stringToFind.is8Bit()) |
| 494 | return findIgnoringASCIICase(source.characters8(), stringToFind.characters8(), startOffset, searchLength, matchLength); |
| 495 | return findIgnoringASCIICase(source.characters8(), stringToFind.characters16(), startOffset, searchLength, matchLength); |
| 496 | } |
| 497 | |
| 498 | if (stringToFind.is8Bit()) |
| 499 | return findIgnoringASCIICase(source.characters16(), stringToFind.characters8(), startOffset, searchLength, matchLength); |
| 500 | |
| 501 | return findIgnoringASCIICase(source.characters16(), stringToFind.characters16(), startOffset, searchLength, matchLength); |
| 502 | } |
| 503 | |
| 504 | template <typename SearchCharacterType, typename MatchCharacterType> |
| 505 | ALWAYS_INLINE static size_t findInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength) |
| 506 | { |
| 507 | // Optimization: keep a running hash of the strings, |
| 508 | // only call equal() if the hashes match. |
| 509 | |
| 510 | // delta is the number of additional times to test; delta == 0 means test only once. |
| 511 | unsigned delta = searchLength - matchLength; |
| 512 | |
| 513 | unsigned searchHash = 0; |
| 514 | unsigned matchHash = 0; |
| 515 | |
| 516 | for (unsigned i = 0; i < matchLength; ++i) { |
| 517 | searchHash += searchCharacters[i]; |
| 518 | matchHash += matchCharacters[i]; |
| 519 | } |
| 520 | |
| 521 | unsigned i = 0; |
| 522 | // keep looping until we match |
| 523 | while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) { |
| 524 | if (i == delta) |
| 525 | return notFound; |
| 526 | searchHash += searchCharacters[i + matchLength]; |
| 527 | searchHash -= searchCharacters[i]; |
| 528 | ++i; |
| 529 | } |
| 530 | return index + i; |
| 531 | } |
| 532 | |
| 533 | template<typename CharacterType> |
| 534 | inline size_t find(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = 0) |
| 535 | { |
| 536 | while (index < length) { |
| 537 | if (characters[index] == matchCharacter) |
| 538 | return index; |
| 539 | ++index; |
| 540 | } |
| 541 | return notFound; |
| 542 | } |
| 543 | |
| 544 | ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = 0) |
| 545 | { |
| 546 | return find(characters, length, static_cast<UChar>(matchCharacter), index); |
| 547 | } |
| 548 | |
| 549 | inline size_t find(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0) |
| 550 | { |
| 551 | if (matchCharacter & ~0xFF) |
| 552 | return notFound; |
| 553 | return find(characters, length, static_cast<LChar>(matchCharacter), index); |
| 554 | } |
| 555 | |
| 556 | template<typename StringClass> |
| 557 | size_t findCommon(const StringClass& haystack, const StringClass& needle, unsigned start) |
| 558 | { |
| 559 | unsigned needleLength = needle.length(); |
| 560 | |
| 561 | if (needleLength == 1) { |
| 562 | if (haystack.is8Bit()) |
| 563 | return WTF::find(haystack.characters8(), haystack.length(), needle[0], start); |
| 564 | return WTF::find(haystack.characters16(), haystack.length(), needle[0], start); |
| 565 | } |
| 566 | |
| 567 | if (!needleLength) |
| 568 | return std::min(start, haystack.length()); |
| 569 | |
| 570 | if (start > haystack.length()) |
| 571 | return notFound; |
| 572 | unsigned searchLength = haystack.length() - start; |
| 573 | if (needleLength > searchLength) |
| 574 | return notFound; |
| 575 | |
| 576 | if (haystack.is8Bit()) { |
| 577 | if (needle.is8Bit()) |
| 578 | return findInner(haystack.characters8() + start, needle.characters8(), start, searchLength, needleLength); |
| 579 | return findInner(haystack.characters8() + start, needle.characters16(), start, searchLength, needleLength); |
| 580 | } |
| 581 | |
| 582 | if (needle.is8Bit()) |
| 583 | return findInner(haystack.characters16() + start, needle.characters8(), start, searchLength, needleLength); |
| 584 | |
| 585 | return findInner(haystack.characters16() + start, needle.characters16(), start, searchLength, needleLength); |
| 586 | } |
| 587 | |
| 588 | // This is marked inline since it's mostly used in non-inline functions for each string type. |
| 589 | // When used directly in code it's probably OK to be inline; maybe the loop will be unrolled. |
| 590 | template<typename CharacterType> inline bool equalLettersIgnoringASCIICase(const CharacterType* characters, const char* lowercaseLetters, unsigned length) |
| 591 | { |
| 592 | for (unsigned i = 0; i < length; ++i) { |
| 593 | if (!isASCIIAlphaCaselessEqual(characters[i], lowercaseLetters[i])) |
| 594 | return false; |
| 595 | } |
| 596 | return true; |
| 597 | } |
| 598 | |
| 599 | template<typename CharacterType, unsigned lowercaseLettersLength> inline bool equalLettersIgnoringASCIICase(const CharacterType* characters, unsigned charactersLength, const char (&lowercaseLetters)[lowercaseLettersLength]) |
| 600 | { |
| 601 | ASSERT(strlen(lowercaseLetters) == lowercaseLettersLength - 1); |
| 602 | unsigned = lowercaseLettersLength - 1; |
| 603 | return charactersLength == lowercaseLettersStringLength && equalLettersIgnoringASCIICase(characters, lowercaseLetters, lowercaseLettersStringLength); |
| 604 | } |
| 605 | |
| 606 | template<typename StringClass> bool inline hasPrefixWithLettersIgnoringASCIICaseCommon(const StringClass& string, const char* lowercaseLetters, unsigned length) |
| 607 | { |
| 608 | #if !ASSERT_DISABLED |
| 609 | ASSERT(*lowercaseLetters); |
| 610 | for (const char* letter = lowercaseLetters; *letter; ++letter) |
| 611 | ASSERT(toASCIILowerUnchecked(*letter) == *letter); |
| 612 | #endif |
| 613 | ASSERT(string.length() >= length); |
| 614 | |
| 615 | if (string.is8Bit()) |
| 616 | return equalLettersIgnoringASCIICase(string.characters8(), lowercaseLetters, length); |
| 617 | return equalLettersIgnoringASCIICase(string.characters16(), lowercaseLetters, length); |
| 618 | } |
| 619 | |
| 620 | // This is intentionally not marked inline because it's used often and is not speed-critical enough to want it inlined everywhere. |
| 621 | template<typename StringClass> bool equalLettersIgnoringASCIICaseCommonWithoutLength(const StringClass& string, const char* lowercaseLetters) |
| 622 | { |
| 623 | unsigned length = string.length(); |
| 624 | if (length != strlen(lowercaseLetters)) |
| 625 | return false; |
| 626 | return hasPrefixWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters, length); |
| 627 | } |
| 628 | |
| 629 | template<typename StringClass> bool startsWithLettersIgnoringASCIICaseCommonWithoutLength(const StringClass& string, const char* lowercaseLetters) |
| 630 | { |
| 631 | size_t prefixLength = strlen(lowercaseLetters); |
| 632 | if (!prefixLength) |
| 633 | return true; |
| 634 | if (string.length() < prefixLength) |
| 635 | return false; |
| 636 | return hasPrefixWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters, prefixLength); |
| 637 | } |
| 638 | |
| 639 | template<typename StringClass, unsigned length> inline bool equalLettersIgnoringASCIICaseCommon(const StringClass& string, const char (&lowercaseLetters)[length]) |
| 640 | { |
| 641 | // Don't actually use the length; we are choosing code size over speed. |
| 642 | ASSERT(strlen(lowercaseLetters) == length - 1); |
| 643 | const char* pointer = lowercaseLetters; |
| 644 | return equalLettersIgnoringASCIICaseCommonWithoutLength(string, pointer); |
| 645 | } |
| 646 | |
| 647 | template<typename StringClass, unsigned length> inline bool startsWithLettersIgnoringASCIICaseCommon(const StringClass& string, const char (&lowercaseLetters)[length]) |
| 648 | { |
| 649 | const char* pointer = lowercaseLetters; |
| 650 | return startsWithLettersIgnoringASCIICaseCommonWithoutLength(string, pointer); |
| 651 | } |
| 652 | |
| 653 | inline bool equalIgnoringASCIICase(const char* a, const char* b) |
| 654 | { |
| 655 | auto length = strlen(a); |
| 656 | return length == strlen(b) && equalIgnoringASCIICase(a, b, length); |
| 657 | } |
| 658 | |
| 659 | template<unsigned lowercaseLettersLength> inline bool equalLettersIgnoringASCIICase(const char* string, const char (&lowercaseLetters)[lowercaseLettersLength]) |
| 660 | { |
| 661 | auto length = strlen(lowercaseLetters); |
| 662 | return strlen(string) == length && equalLettersIgnoringASCIICase(string, lowercaseLetters, length); |
| 663 | } |
| 664 | |
| 665 | } |
| 666 | |
| 667 | using WTF::equalIgnoringASCIICase; |
| 668 | using WTF::equalLettersIgnoringASCIICase; |
| 669 | |