| 1 | /* |
| 2 | * Copyright (C) 2015 Andy VanWagoner (andy@vanwagoner.family) |
| 3 | * Copyright (C) 2015 Sukolsak Sakshuwong (sukolsak@gmail.com) |
| 4 | * Copyright (C) 2016-2017 Apple Inc. All Rights Reserved. |
| 5 | * |
| 6 | * Redistribution and use in source and binary forms, with or without |
| 7 | * modification, are permitted provided that the following conditions |
| 8 | * are met: |
| 9 | * 1. Redistributions of source code must retain the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer. |
| 11 | * 2. Redistributions in binary form must reproduce the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer in the |
| 13 | * documentation and/or other materials provided with the distribution. |
| 14 | * |
| 15 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' |
| 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
| 17 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS |
| 19 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 20 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 21 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 22 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 23 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 24 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
| 25 | * THE POSSIBILITY OF SUCH DAMAGE. |
| 26 | */ |
| 27 | |
| 28 | #include "config.h" |
| 29 | #include "IntlCollator.h" |
| 30 | |
| 31 | #if ENABLE(INTL) |
| 32 | |
| 33 | #include "CatchScope.h" |
| 34 | #include "Error.h" |
| 35 | #include "IntlCollatorConstructor.h" |
| 36 | #include "IntlObject.h" |
| 37 | #include "JSBoundFunction.h" |
| 38 | #include "JSCInlines.h" |
| 39 | #include "ObjectConstructor.h" |
| 40 | #include "SlotVisitorInlines.h" |
| 41 | #include "StructureInlines.h" |
| 42 | #include <unicode/ucol.h> |
| 43 | #include <wtf/unicode/Collator.h> |
| 44 | |
| 45 | namespace JSC { |
| 46 | |
| 47 | const ClassInfo IntlCollator::s_info = { "Object" , &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(IntlCollator) }; |
| 48 | |
| 49 | static const char* const relevantCollatorExtensionKeys[3] = { "co" , "kn" , "kf" }; |
| 50 | static const size_t indexOfExtensionKeyCo = 0; |
| 51 | static const size_t indexOfExtensionKeyKn = 1; |
| 52 | static const size_t indexOfExtensionKeyKf = 2; |
| 53 | |
| 54 | void IntlCollator::UCollatorDeleter::operator()(UCollator* collator) const |
| 55 | { |
| 56 | if (collator) |
| 57 | ucol_close(collator); |
| 58 | } |
| 59 | |
| 60 | IntlCollator* IntlCollator::create(VM& vm, Structure* structure) |
| 61 | { |
| 62 | IntlCollator* format = new (NotNull, allocateCell<IntlCollator>(vm.heap)) IntlCollator(vm, structure); |
| 63 | format->finishCreation(vm); |
| 64 | return format; |
| 65 | } |
| 66 | |
| 67 | Structure* IntlCollator::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype) |
| 68 | { |
| 69 | return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info()); |
| 70 | } |
| 71 | |
| 72 | IntlCollator::IntlCollator(VM& vm, Structure* structure) |
| 73 | : JSDestructibleObject(vm, structure) |
| 74 | { |
| 75 | } |
| 76 | |
| 77 | void IntlCollator::finishCreation(VM& vm) |
| 78 | { |
| 79 | Base::finishCreation(vm); |
| 80 | ASSERT(inherits(vm, info())); |
| 81 | } |
| 82 | |
| 83 | void IntlCollator::destroy(JSCell* cell) |
| 84 | { |
| 85 | static_cast<IntlCollator*>(cell)->IntlCollator::~IntlCollator(); |
| 86 | } |
| 87 | |
| 88 | void IntlCollator::visitChildren(JSCell* cell, SlotVisitor& visitor) |
| 89 | { |
| 90 | IntlCollator* thisObject = jsCast<IntlCollator*>(cell); |
| 91 | ASSERT_GC_OBJECT_INHERITS(thisObject, info()); |
| 92 | |
| 93 | Base::visitChildren(thisObject, visitor); |
| 94 | |
| 95 | visitor.append(thisObject->m_boundCompare); |
| 96 | } |
| 97 | |
| 98 | static Vector<String> sortLocaleData(const String& locale, size_t keyIndex) |
| 99 | { |
| 100 | // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0) |
| 101 | Vector<String> keyLocaleData; |
| 102 | switch (keyIndex) { |
| 103 | case indexOfExtensionKeyCo: { |
| 104 | // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values." |
| 105 | keyLocaleData.append({ }); |
| 106 | |
| 107 | UErrorCode status = U_ZERO_ERROR; |
| 108 | UEnumeration* enumeration = ucol_getKeywordValuesForLocale("collation" , locale.utf8().data(), false, &status); |
| 109 | if (U_SUCCESS(status)) { |
| 110 | const char* collation; |
| 111 | while ((collation = uenum_next(enumeration, nullptr, &status)) && U_SUCCESS(status)) { |
| 112 | // 10.2.3 "The values "standard" and "search" must not be used as elements in any [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co array." |
| 113 | if (!strcmp(collation, "standard" ) || !strcmp(collation, "search" )) |
| 114 | continue; |
| 115 | |
| 116 | // Map keyword values to BCP 47 equivalents. |
| 117 | if (!strcmp(collation, "dictionary" )) |
| 118 | collation = "dict" ; |
| 119 | else if (!strcmp(collation, "gb2312han" )) |
| 120 | collation = "gb2312" ; |
| 121 | else if (!strcmp(collation, "phonebook" )) |
| 122 | collation = "phonebk" ; |
| 123 | else if (!strcmp(collation, "traditional" )) |
| 124 | collation = "trad" ; |
| 125 | |
| 126 | keyLocaleData.append(collation); |
| 127 | } |
| 128 | uenum_close(enumeration); |
| 129 | } |
| 130 | break; |
| 131 | } |
| 132 | case indexOfExtensionKeyKn: |
| 133 | keyLocaleData.reserveInitialCapacity(2); |
| 134 | keyLocaleData.uncheckedAppend("false"_s ); |
| 135 | keyLocaleData.uncheckedAppend("true"_s ); |
| 136 | break; |
| 137 | case indexOfExtensionKeyKf: |
| 138 | keyLocaleData.reserveInitialCapacity(3); |
| 139 | keyLocaleData.uncheckedAppend("false"_s ); |
| 140 | keyLocaleData.uncheckedAppend("lower"_s ); |
| 141 | keyLocaleData.uncheckedAppend("upper"_s ); |
| 142 | break; |
| 143 | default: |
| 144 | ASSERT_NOT_REACHED(); |
| 145 | } |
| 146 | return keyLocaleData; |
| 147 | } |
| 148 | |
| 149 | static Vector<String> searchLocaleData(const String&, size_t keyIndex) |
| 150 | { |
| 151 | // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0) |
| 152 | Vector<String> keyLocaleData; |
| 153 | switch (keyIndex) { |
| 154 | case indexOfExtensionKeyCo: |
| 155 | // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values." |
| 156 | keyLocaleData.reserveInitialCapacity(1); |
| 157 | keyLocaleData.append({ }); |
| 158 | break; |
| 159 | case indexOfExtensionKeyKn: |
| 160 | keyLocaleData.reserveInitialCapacity(2); |
| 161 | keyLocaleData.uncheckedAppend("false"_s ); |
| 162 | keyLocaleData.uncheckedAppend("true"_s ); |
| 163 | break; |
| 164 | case indexOfExtensionKeyKf: |
| 165 | keyLocaleData.reserveInitialCapacity(3); |
| 166 | keyLocaleData.uncheckedAppend("false"_s ); |
| 167 | keyLocaleData.uncheckedAppend("lower"_s ); |
| 168 | keyLocaleData.uncheckedAppend("upper"_s ); |
| 169 | break; |
| 170 | default: |
| 171 | ASSERT_NOT_REACHED(); |
| 172 | } |
| 173 | return keyLocaleData; |
| 174 | } |
| 175 | |
| 176 | void IntlCollator::initializeCollator(ExecState& state, JSValue locales, JSValue optionsValue) |
| 177 | { |
| 178 | VM& vm = state.vm(); |
| 179 | auto scope = DECLARE_THROW_SCOPE(vm); |
| 180 | |
| 181 | // 10.1.1 InitializeCollator (collator, locales, options) (ECMA-402) |
| 182 | // https://tc39.github.io/ecma402/#sec-initializecollator |
| 183 | |
| 184 | auto requestedLocales = canonicalizeLocaleList(state, locales); |
| 185 | RETURN_IF_EXCEPTION(scope, void()); |
| 186 | |
| 187 | JSObject* options; |
| 188 | if (optionsValue.isUndefined()) |
| 189 | options = constructEmptyObject(&state, state.lexicalGlobalObject()->nullPrototypeObjectStructure()); |
| 190 | else { |
| 191 | options = optionsValue.toObject(&state); |
| 192 | RETURN_IF_EXCEPTION(scope, void()); |
| 193 | } |
| 194 | |
| 195 | String usageString = intlStringOption(state, options, vm.propertyNames->usage, { "sort" , "search" }, "usage must be either \"sort\" or \"search\"" , "sort" ); |
| 196 | RETURN_IF_EXCEPTION(scope, void()); |
| 197 | if (usageString == "sort" ) |
| 198 | m_usage = Usage::Sort; |
| 199 | else if (usageString == "search" ) |
| 200 | m_usage = Usage::Search; |
| 201 | else |
| 202 | ASSERT_NOT_REACHED(); |
| 203 | |
| 204 | auto localeData = (m_usage == Usage::Sort) ? sortLocaleData : searchLocaleData; |
| 205 | |
| 206 | HashMap<String, String> opt; |
| 207 | |
| 208 | String matcher = intlStringOption(state, options, vm.propertyNames->localeMatcher, { "lookup" , "best fit" }, "localeMatcher must be either \"lookup\" or \"best fit\"" , "best fit" ); |
| 209 | RETURN_IF_EXCEPTION(scope, void()); |
| 210 | opt.add("localeMatcher"_s , matcher); |
| 211 | |
| 212 | { |
| 213 | String numericString; |
| 214 | bool usesFallback; |
| 215 | bool numeric = intlBooleanOption(state, options, vm.propertyNames->numeric, usesFallback); |
| 216 | RETURN_IF_EXCEPTION(scope, void()); |
| 217 | if (!usesFallback) |
| 218 | numericString = numeric ? "true"_s : "false"_s ; |
| 219 | if (!numericString.isNull()) |
| 220 | opt.add("kn"_s , numericString); |
| 221 | } |
| 222 | { |
| 223 | String caseFirst = intlStringOption(state, options, vm.propertyNames->caseFirst, { "upper" , "lower" , "false" }, "caseFirst must be either \"upper\", \"lower\", or \"false\"" , nullptr); |
| 224 | RETURN_IF_EXCEPTION(scope, void()); |
| 225 | if (!caseFirst.isNull()) |
| 226 | opt.add("kf"_s , caseFirst); |
| 227 | } |
| 228 | |
| 229 | auto& availableLocales = state.jsCallee()->globalObject(vm)->intlCollatorAvailableLocales(); |
| 230 | auto result = resolveLocale(state, availableLocales, requestedLocales, opt, relevantCollatorExtensionKeys, WTF_ARRAY_LENGTH(relevantCollatorExtensionKeys), localeData); |
| 231 | |
| 232 | m_locale = result.get("locale"_s ); |
| 233 | if (m_locale.isEmpty()) { |
| 234 | throwTypeError(&state, scope, "failed to initialize Collator due to invalid locale"_s ); |
| 235 | return; |
| 236 | } |
| 237 | |
| 238 | const String& collation = result.get("co"_s ); |
| 239 | m_collation = collation.isNull() ? "default"_s : collation; |
| 240 | m_numeric = result.get("kn"_s ) == "true" ; |
| 241 | |
| 242 | const String& caseFirst = result.get("kf"_s ); |
| 243 | if (caseFirst == "lower" ) |
| 244 | m_caseFirst = CaseFirst::Lower; |
| 245 | else if (caseFirst == "upper" ) |
| 246 | m_caseFirst = CaseFirst::Upper; |
| 247 | else |
| 248 | m_caseFirst = CaseFirst::False; |
| 249 | |
| 250 | String sensitivityString = intlStringOption(state, options, vm.propertyNames->sensitivity, { "base" , "accent" , "case" , "variant" }, "sensitivity must be either \"base\", \"accent\", \"case\", or \"variant\"" , nullptr); |
| 251 | RETURN_IF_EXCEPTION(scope, void()); |
| 252 | if (sensitivityString == "base" ) |
| 253 | m_sensitivity = Sensitivity::Base; |
| 254 | else if (sensitivityString == "accent" ) |
| 255 | m_sensitivity = Sensitivity::Accent; |
| 256 | else if (sensitivityString == "case" ) |
| 257 | m_sensitivity = Sensitivity::Case; |
| 258 | else |
| 259 | m_sensitivity = Sensitivity::Variant; |
| 260 | |
| 261 | bool usesFallback; |
| 262 | bool ignorePunctuation = intlBooleanOption(state, options, vm.propertyNames->ignorePunctuation, usesFallback); |
| 263 | if (usesFallback) |
| 264 | ignorePunctuation = false; |
| 265 | RETURN_IF_EXCEPTION(scope, void()); |
| 266 | m_ignorePunctuation = ignorePunctuation; |
| 267 | |
| 268 | m_initializedCollator = true; |
| 269 | } |
| 270 | |
| 271 | void IntlCollator::createCollator(ExecState& state) |
| 272 | { |
| 273 | VM& vm = state.vm(); |
| 274 | auto scope = DECLARE_CATCH_SCOPE(vm); |
| 275 | ASSERT(!m_collator); |
| 276 | |
| 277 | if (!m_initializedCollator) { |
| 278 | initializeCollator(state, jsUndefined(), jsUndefined()); |
| 279 | scope.assertNoException(); |
| 280 | } |
| 281 | |
| 282 | UErrorCode status = U_ZERO_ERROR; |
| 283 | auto collator = std::unique_ptr<UCollator, UCollatorDeleter>(ucol_open(m_locale.utf8().data(), &status)); |
| 284 | if (U_FAILURE(status)) |
| 285 | return; |
| 286 | |
| 287 | UColAttributeValue strength = UCOL_PRIMARY; |
| 288 | UColAttributeValue caseLevel = UCOL_OFF; |
| 289 | UColAttributeValue caseFirst = UCOL_OFF; |
| 290 | switch (m_sensitivity) { |
| 291 | case Sensitivity::Base: |
| 292 | break; |
| 293 | case Sensitivity::Accent: |
| 294 | strength = UCOL_SECONDARY; |
| 295 | break; |
| 296 | case Sensitivity::Case: |
| 297 | caseLevel = UCOL_ON; |
| 298 | break; |
| 299 | case Sensitivity::Variant: |
| 300 | strength = UCOL_TERTIARY; |
| 301 | break; |
| 302 | } |
| 303 | switch (m_caseFirst) { |
| 304 | case CaseFirst::False: |
| 305 | break; |
| 306 | case CaseFirst::Lower: |
| 307 | caseFirst = UCOL_LOWER_FIRST; |
| 308 | break; |
| 309 | case CaseFirst::Upper: |
| 310 | caseFirst = UCOL_UPPER_FIRST; |
| 311 | break; |
| 312 | } |
| 313 | |
| 314 | ucol_setAttribute(collator.get(), UCOL_STRENGTH, strength, &status); |
| 315 | ucol_setAttribute(collator.get(), UCOL_CASE_LEVEL, caseLevel, &status); |
| 316 | ucol_setAttribute(collator.get(), UCOL_CASE_FIRST, caseFirst, &status); |
| 317 | ucol_setAttribute(collator.get(), UCOL_NUMERIC_COLLATION, m_numeric ? UCOL_ON : UCOL_OFF, &status); |
| 318 | |
| 319 | // FIXME: Setting UCOL_ALTERNATE_HANDLING to UCOL_SHIFTED causes punctuation and whitespace to be |
| 320 | // ignored. There is currently no way to ignore only punctuation. |
| 321 | ucol_setAttribute(collator.get(), UCOL_ALTERNATE_HANDLING, m_ignorePunctuation ? UCOL_SHIFTED : UCOL_DEFAULT, &status); |
| 322 | |
| 323 | // "The method is required to return 0 when comparing Strings that are considered canonically |
| 324 | // equivalent by the Unicode standard." |
| 325 | ucol_setAttribute(collator.get(), UCOL_NORMALIZATION_MODE, UCOL_ON, &status); |
| 326 | if (U_FAILURE(status)) |
| 327 | return; |
| 328 | |
| 329 | m_collator = WTFMove(collator); |
| 330 | } |
| 331 | |
| 332 | JSValue IntlCollator::compareStrings(ExecState& state, StringView x, StringView y) |
| 333 | { |
| 334 | VM& vm = state.vm(); |
| 335 | auto scope = DECLARE_THROW_SCOPE(vm); |
| 336 | |
| 337 | // 10.3.4 CompareStrings abstract operation (ECMA-402 2.0) |
| 338 | if (!m_collator) { |
| 339 | createCollator(state); |
| 340 | if (!m_collator) |
| 341 | return throwException(&state, scope, createError(&state, "Failed to compare strings."_s )); |
| 342 | } |
| 343 | |
| 344 | UErrorCode status = U_ZERO_ERROR; |
| 345 | UCharIterator iteratorX = createIterator(x); |
| 346 | UCharIterator iteratorY = createIterator(y); |
| 347 | auto result = ucol_strcollIter(m_collator.get(), &iteratorX, &iteratorY, &status); |
| 348 | if (U_FAILURE(status)) |
| 349 | return throwException(&state, scope, createError(&state, "Failed to compare strings."_s )); |
| 350 | return jsNumber(result); |
| 351 | } |
| 352 | |
| 353 | ASCIILiteral IntlCollator::usageString(Usage usage) |
| 354 | { |
| 355 | switch (usage) { |
| 356 | case Usage::Sort: |
| 357 | return "sort"_s ; |
| 358 | case Usage::Search: |
| 359 | return "search"_s ; |
| 360 | } |
| 361 | ASSERT_NOT_REACHED(); |
| 362 | return ASCIILiteral::null(); |
| 363 | } |
| 364 | |
| 365 | ASCIILiteral IntlCollator::sensitivityString(Sensitivity sensitivity) |
| 366 | { |
| 367 | switch (sensitivity) { |
| 368 | case Sensitivity::Base: |
| 369 | return "base"_s ; |
| 370 | case Sensitivity::Accent: |
| 371 | return "accent"_s ; |
| 372 | case Sensitivity::Case: |
| 373 | return "case"_s ; |
| 374 | case Sensitivity::Variant: |
| 375 | return "variant"_s ; |
| 376 | } |
| 377 | ASSERT_NOT_REACHED(); |
| 378 | return ASCIILiteral::null(); |
| 379 | } |
| 380 | |
| 381 | ASCIILiteral IntlCollator::caseFirstString(CaseFirst caseFirst) |
| 382 | { |
| 383 | switch (caseFirst) { |
| 384 | case CaseFirst::False: |
| 385 | return "false"_s ; |
| 386 | case CaseFirst::Lower: |
| 387 | return "lower"_s ; |
| 388 | case CaseFirst::Upper: |
| 389 | return "upper"_s ; |
| 390 | } |
| 391 | ASSERT_NOT_REACHED(); |
| 392 | return ASCIILiteral::null(); |
| 393 | } |
| 394 | |
| 395 | JSObject* IntlCollator::resolvedOptions(ExecState& state) |
| 396 | { |
| 397 | VM& vm = state.vm(); |
| 398 | auto scope = DECLARE_THROW_SCOPE(vm); |
| 399 | |
| 400 | // 10.3.5 Intl.Collator.prototype.resolvedOptions() (ECMA-402 2.0) |
| 401 | // The function returns a new object whose properties and attributes are set as if |
| 402 | // constructed by an object literal assigning to each of the following properties the |
| 403 | // value of the corresponding internal slot of this Collator object (see 10.4): locale, |
| 404 | // usage, sensitivity, ignorePunctuation, collation, as well as those properties shown |
| 405 | // in Table 1 whose keys are included in the %Collator%[[relevantExtensionKeys]] |
| 406 | // internal slot of the standard built-in object that is the initial value of |
| 407 | // Intl.Collator. |
| 408 | |
| 409 | if (!m_initializedCollator) { |
| 410 | initializeCollator(state, jsUndefined(), jsUndefined()); |
| 411 | scope.assertNoException(); |
| 412 | } |
| 413 | |
| 414 | JSObject* options = constructEmptyObject(&state); |
| 415 | options->putDirect(vm, vm.propertyNames->locale, jsString(&state, m_locale)); |
| 416 | options->putDirect(vm, vm.propertyNames->usage, jsNontrivialString(&state, usageString(m_usage))); |
| 417 | options->putDirect(vm, vm.propertyNames->sensitivity, jsNontrivialString(&state, sensitivityString(m_sensitivity))); |
| 418 | options->putDirect(vm, vm.propertyNames->ignorePunctuation, jsBoolean(m_ignorePunctuation)); |
| 419 | options->putDirect(vm, vm.propertyNames->collation, jsString(&state, m_collation)); |
| 420 | options->putDirect(vm, vm.propertyNames->numeric, jsBoolean(m_numeric)); |
| 421 | options->putDirect(vm, vm.propertyNames->caseFirst, jsNontrivialString(&state, caseFirstString(m_caseFirst))); |
| 422 | return options; |
| 423 | } |
| 424 | |
| 425 | void IntlCollator::setBoundCompare(VM& vm, JSBoundFunction* format) |
| 426 | { |
| 427 | m_boundCompare.set(vm, this, format); |
| 428 | } |
| 429 | |
| 430 | } // namespace JSC |
| 431 | |
| 432 | #endif // ENABLE(INTL) |
| 433 | |