| 1 | /* |
| 2 | * Copyright (C) 2008, 2014 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions |
| 6 | * are met: |
| 7 | * |
| 8 | * 1. Redistributions of source code must retain the above copyright |
| 9 | * notice, this list of conditions and the following disclaimer. |
| 10 | * 2. Redistributions in binary form must reproduce the above copyright |
| 11 | * notice, this list of conditions and the following disclaimer in the |
| 12 | * documentation and/or other materials provided with the distribution. |
| 13 | * 3. Neither the name of Apple Inc. ("Apple") nor the names of |
| 14 | * its contributors may be used to endorse or promote products derived |
| 15 | * from this software without specific prior written permission. |
| 16 | * |
| 17 | * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY |
| 18 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| 20 | * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY |
| 21 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| 26 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #include "config.h" |
| 30 | #include <wtf/unicode/Collator.h> |
| 31 | |
| 32 | // FIXME: Merge this with CollatorDefault.cpp into a single Collator.cpp source file. |
| 33 | |
| 34 | #if !UCONFIG_NO_COLLATION |
| 35 | |
| 36 | #include <mutex> |
| 37 | #include <unicode/ucol.h> |
| 38 | #include <wtf/Lock.h> |
| 39 | #include <wtf/text/StringView.h> |
| 40 | |
| 41 | #if OS(DARWIN) && USE(CF) |
| 42 | #include <CoreFoundation/CoreFoundation.h> |
| 43 | #include <wtf/RetainPtr.h> |
| 44 | #endif |
| 45 | |
| 46 | namespace WTF { |
| 47 | |
| 48 | static UCollator* cachedCollator; |
| 49 | static char* cachedCollatorLocale; |
| 50 | static bool cachedCollatorShouldSortLowercaseFirst; |
| 51 | |
| 52 | static Lock cachedCollatorMutex; |
| 53 | |
| 54 | #if !(OS(DARWIN) && USE(CF)) |
| 55 | |
| 56 | static inline const char* resolveDefaultLocale(const char* locale) |
| 57 | { |
| 58 | return locale; |
| 59 | } |
| 60 | |
| 61 | #else |
| 62 | |
| 63 | static inline char* copyShortASCIIString(CFStringRef string) |
| 64 | { |
| 65 | // OK to have a fixed size buffer and to only handle ASCII since we only use this for locale names. |
| 66 | char buffer[256]; |
| 67 | if (!string || !CFStringGetCString(string, buffer, sizeof(buffer), kCFStringEncodingASCII)) |
| 68 | return strdup("" ); |
| 69 | return strdup(buffer); |
| 70 | } |
| 71 | |
| 72 | static char* copyDefaultLocale() |
| 73 | { |
| 74 | #if !PLATFORM(IOS_FAMILY) |
| 75 | return copyShortASCIIString(static_cast<CFStringRef>(CFLocaleGetValue(adoptCF(CFLocaleCopyCurrent()).get(), kCFLocaleCollatorIdentifier))); |
| 76 | #else |
| 77 | // FIXME: Documentation claims the code above would work on iOS 4.0 and later. After test that works, we should remove this and use that instead. |
| 78 | return copyShortASCIIString(adoptCF(static_cast<CFStringRef>(CFPreferencesCopyValue(CFSTR("AppleCollationOrder" ), kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost))).get()); |
| 79 | #endif |
| 80 | } |
| 81 | |
| 82 | static inline const char* resolveDefaultLocale(const char* locale) |
| 83 | { |
| 84 | if (locale) |
| 85 | return locale; |
| 86 | // Since iOS and OS X don't set UNIX locale to match the user's selected locale, the ICU default locale is not the right one. |
| 87 | // So, instead of passing null to ICU, we pass the name of the user's selected locale. |
| 88 | static char* defaultLocale; |
| 89 | static std::once_flag initializeDefaultLocaleOnce; |
| 90 | std::call_once(initializeDefaultLocaleOnce, []{ |
| 91 | defaultLocale = copyDefaultLocale(); |
| 92 | }); |
| 93 | return defaultLocale; |
| 94 | } |
| 95 | |
| 96 | #endif |
| 97 | |
| 98 | static inline bool localesMatch(const char* a, const char* b) |
| 99 | { |
| 100 | // Two null locales are equal, other locales are compared with strcmp. |
| 101 | return a == b || (a && b && !strcmp(a, b)); |
| 102 | } |
| 103 | |
| 104 | Collator::Collator(const char* locale, bool shouldSortLowercaseFirst) |
| 105 | { |
| 106 | UErrorCode status = U_ZERO_ERROR; |
| 107 | |
| 108 | { |
| 109 | std::lock_guard<Lock> lock(cachedCollatorMutex); |
| 110 | if (cachedCollator && localesMatch(cachedCollatorLocale, locale) && cachedCollatorShouldSortLowercaseFirst == shouldSortLowercaseFirst) { |
| 111 | m_collator = cachedCollator; |
| 112 | m_locale = cachedCollatorLocale; |
| 113 | m_shouldSortLowercaseFirst = shouldSortLowercaseFirst; |
| 114 | cachedCollator = nullptr; |
| 115 | cachedCollatorLocale = nullptr; |
| 116 | return; |
| 117 | } |
| 118 | } |
| 119 | |
| 120 | m_collator = ucol_open(resolveDefaultLocale(locale), &status); |
| 121 | if (U_FAILURE(status)) { |
| 122 | status = U_ZERO_ERROR; |
| 123 | m_collator = ucol_open("" , &status); // Fall back to Unicode Collation Algorithm. |
| 124 | } |
| 125 | ASSERT(U_SUCCESS(status)); |
| 126 | |
| 127 | ucol_setAttribute(m_collator, UCOL_CASE_FIRST, shouldSortLowercaseFirst ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST, &status); |
| 128 | ASSERT(U_SUCCESS(status)); |
| 129 | |
| 130 | ucol_setAttribute(m_collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); |
| 131 | ASSERT(U_SUCCESS(status)); |
| 132 | |
| 133 | m_locale = locale ? fastStrDup(locale) : nullptr; |
| 134 | m_shouldSortLowercaseFirst = shouldSortLowercaseFirst; |
| 135 | } |
| 136 | |
| 137 | Collator::~Collator() |
| 138 | { |
| 139 | std::lock_guard<Lock> lock(cachedCollatorMutex); |
| 140 | if (cachedCollator) { |
| 141 | ucol_close(cachedCollator); |
| 142 | fastFree(cachedCollatorLocale); |
| 143 | } |
| 144 | cachedCollator = m_collator; |
| 145 | cachedCollatorLocale = m_locale; |
| 146 | cachedCollatorShouldSortLowercaseFirst = m_shouldSortLowercaseFirst; |
| 147 | } |
| 148 | |
| 149 | static int32_t getIndexLatin1(UCharIterator* iterator, UCharIteratorOrigin origin) |
| 150 | { |
| 151 | switch (origin) { |
| 152 | case UITER_START: |
| 153 | return iterator->start; |
| 154 | case UITER_CURRENT: |
| 155 | return iterator->index; |
| 156 | case UITER_LIMIT: |
| 157 | return iterator->limit; |
| 158 | case UITER_ZERO: |
| 159 | return 0; |
| 160 | case UITER_LENGTH: |
| 161 | return iterator->length; |
| 162 | } |
| 163 | ASSERT_NOT_REACHED(); |
| 164 | return U_SENTINEL; |
| 165 | } |
| 166 | |
| 167 | static int32_t moveLatin1(UCharIterator* iterator, int32_t delta, UCharIteratorOrigin origin) |
| 168 | { |
| 169 | return iterator->index = getIndexLatin1(iterator, origin) + delta; |
| 170 | } |
| 171 | |
| 172 | static UBool hasNextLatin1(UCharIterator* iterator) |
| 173 | { |
| 174 | return iterator->index < iterator->limit; |
| 175 | } |
| 176 | |
| 177 | static UBool hasPreviousLatin1(UCharIterator* iterator) |
| 178 | { |
| 179 | return iterator->index > iterator->start; |
| 180 | } |
| 181 | |
| 182 | static UChar32 currentLatin1(UCharIterator* iterator) |
| 183 | { |
| 184 | ASSERT(iterator->index >= iterator->start); |
| 185 | if (iterator->index >= iterator->limit) |
| 186 | return U_SENTINEL; |
| 187 | return static_cast<const LChar*>(iterator->context)[iterator->index]; |
| 188 | } |
| 189 | |
| 190 | static UChar32 nextLatin1(UCharIterator* iterator) |
| 191 | { |
| 192 | ASSERT(iterator->index >= iterator->start); |
| 193 | if (iterator->index >= iterator->limit) |
| 194 | return U_SENTINEL; |
| 195 | return static_cast<const LChar*>(iterator->context)[iterator->index++]; |
| 196 | } |
| 197 | |
| 198 | static UChar32 previousLatin1(UCharIterator* iterator) |
| 199 | { |
| 200 | if (iterator->index <= iterator->start) |
| 201 | return U_SENTINEL; |
| 202 | return static_cast<const LChar*>(iterator->context)[--iterator->index]; |
| 203 | } |
| 204 | |
| 205 | static uint32_t getStateLatin1(const UCharIterator* iterator) |
| 206 | { |
| 207 | return iterator->index; |
| 208 | } |
| 209 | |
| 210 | static void setStateLatin1(UCharIterator* iterator, uint32_t state, UErrorCode*) |
| 211 | { |
| 212 | iterator->index = state; |
| 213 | } |
| 214 | |
| 215 | static UCharIterator createLatin1Iterator(const LChar* characters, int length) |
| 216 | { |
| 217 | UCharIterator iterator; |
| 218 | iterator.context = characters; |
| 219 | iterator.length = length; |
| 220 | iterator.start = 0; |
| 221 | iterator.index = 0; |
| 222 | iterator.limit = length; |
| 223 | iterator.reservedField = 0; |
| 224 | iterator.getIndex = getIndexLatin1; |
| 225 | iterator.move = moveLatin1; |
| 226 | iterator.hasNext = hasNextLatin1; |
| 227 | iterator.hasPrevious = hasPreviousLatin1; |
| 228 | iterator.current = currentLatin1; |
| 229 | iterator.next = nextLatin1; |
| 230 | iterator.previous = previousLatin1; |
| 231 | iterator.reservedFn = nullptr; |
| 232 | iterator.getState = getStateLatin1; |
| 233 | iterator.setState = setStateLatin1; |
| 234 | return iterator; |
| 235 | } |
| 236 | |
| 237 | UCharIterator createIterator(StringView string) |
| 238 | { |
| 239 | if (string.is8Bit()) |
| 240 | return createLatin1Iterator(string.characters8(), string.length()); |
| 241 | UCharIterator iterator; |
| 242 | uiter_setString(&iterator, string.characters16(), string.length()); |
| 243 | return iterator; |
| 244 | } |
| 245 | |
| 246 | int Collator::collate(StringView a, StringView b) const |
| 247 | { |
| 248 | UCharIterator iteratorA = createIterator(a); |
| 249 | UCharIterator iteratorB = createIterator(b); |
| 250 | UErrorCode status = U_ZERO_ERROR; |
| 251 | int result = ucol_strcollIter(m_collator, &iteratorA, &iteratorB, &status); |
| 252 | ASSERT(U_SUCCESS(status)); |
| 253 | return result; |
| 254 | } |
| 255 | |
| 256 | static UCharIterator createIteratorUTF8(const char* string) |
| 257 | { |
| 258 | UCharIterator iterator; |
| 259 | uiter_setUTF8(&iterator, string, strlen(string)); |
| 260 | return iterator; |
| 261 | } |
| 262 | |
| 263 | int Collator::collateUTF8(const char* a, const char* b) const |
| 264 | { |
| 265 | UCharIterator iteratorA = createIteratorUTF8(a); |
| 266 | UCharIterator iteratorB = createIteratorUTF8(b); |
| 267 | UErrorCode status = U_ZERO_ERROR; |
| 268 | int result = ucol_strcollIter(m_collator, &iteratorA, &iteratorB, &status); |
| 269 | ASSERT(U_SUCCESS(status)); |
| 270 | return result; |
| 271 | } |
| 272 | |
| 273 | } // namespace WTF |
| 274 | |
| 275 | #endif |
| 276 | |