| 1 | /* |
| 2 | * Copyright (C) 2014 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions |
| 6 | * are met: |
| 7 | * 1. Redistributions of source code must retain the above copyright |
| 8 | * notice, this list of conditions and the following disclaimer. |
| 9 | * 2. Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * |
| 13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' |
| 14 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
| 15 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS |
| 17 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 18 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 19 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 20 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 21 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 22 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
| 23 | * THE POSSIBILITY OF SUCH DAMAGE. |
| 24 | */ |
| 25 | |
| 26 | #include "config.h" |
| 27 | #include <wtf/text/icu/UTextProviderLatin1.h> |
| 28 | |
| 29 | #include <wtf/text/StringImpl.h> |
| 30 | #include <wtf/text/icu/UTextProvider.h> |
| 31 | |
| 32 | namespace WTF { |
| 33 | |
| 34 | // Latin1 provider |
| 35 | |
| 36 | static UText* uTextLatin1Clone(UText*, const UText*, UBool, UErrorCode*); |
| 37 | static int64_t uTextLatin1NativeLength(UText*); |
| 38 | static UBool uTextLatin1Access(UText*, int64_t, UBool); |
| 39 | static int32_t uTextLatin1Extract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode*); |
| 40 | static int64_t uTextLatin1MapOffsetToNative(const UText*); |
| 41 | static int32_t uTextLatin1MapNativeIndexToUTF16(const UText*, int64_t); |
| 42 | static void uTextLatin1Close(UText*); |
| 43 | |
| 44 | static const struct UTextFuncs uTextLatin1Funcs = { |
| 45 | sizeof(UTextFuncs), |
| 46 | 0, |
| 47 | 0, |
| 48 | 0, |
| 49 | uTextLatin1Clone, |
| 50 | uTextLatin1NativeLength, |
| 51 | uTextLatin1Access, |
| 52 | uTextLatin1Extract, |
| 53 | nullptr, |
| 54 | nullptr, |
| 55 | uTextLatin1MapOffsetToNative, |
| 56 | uTextLatin1MapNativeIndexToUTF16, |
| 57 | uTextLatin1Close, |
| 58 | nullptr, |
| 59 | nullptr, |
| 60 | nullptr |
| 61 | }; |
| 62 | |
| 63 | static UText* uTextLatin1Clone(UText* destination, const UText* source, UBool deep, UErrorCode* status) |
| 64 | { |
| 65 | ASSERT_UNUSED(deep, !deep); |
| 66 | |
| 67 | if (U_FAILURE(*status)) |
| 68 | return 0; |
| 69 | |
| 70 | UText* result = utext_setup(destination, sizeof(UChar) * UTextWithBufferInlineCapacity, status); |
| 71 | if (U_FAILURE(*status)) |
| 72 | return destination; |
| 73 | |
| 74 | result->providerProperties = source->providerProperties; |
| 75 | |
| 76 | // Point at the same position, but with an empty buffer. |
| 77 | result->chunkNativeStart = source->chunkNativeStart; |
| 78 | result->chunkNativeLimit = source->chunkNativeStart; |
| 79 | result->nativeIndexingLimit = static_cast<int32_t>(source->chunkNativeStart); |
| 80 | result->chunkOffset = 0; |
| 81 | result->context = source->context; |
| 82 | result->a = source->a; |
| 83 | result->pFuncs = &uTextLatin1Funcs; |
| 84 | result->chunkContents = (UChar*)result->pExtra; |
| 85 | memset(const_cast<UChar*>(result->chunkContents), 0, sizeof(UChar) * UTextWithBufferInlineCapacity); |
| 86 | |
| 87 | return result; |
| 88 | } |
| 89 | |
| 90 | static int64_t uTextLatin1NativeLength(UText* uText) |
| 91 | { |
| 92 | return uText->a; |
| 93 | } |
| 94 | |
| 95 | static UBool uTextLatin1Access(UText* uText, int64_t index, UBool forward) |
| 96 | { |
| 97 | int64_t length = uText->a; |
| 98 | |
| 99 | if (forward) { |
| 100 | if (index < uText->chunkNativeLimit && index >= uText->chunkNativeStart) { |
| 101 | // Already inside the buffer. Set the new offset. |
| 102 | uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart); |
| 103 | return TRUE; |
| 104 | } |
| 105 | if (index >= length && uText->chunkNativeLimit == length) { |
| 106 | // Off the end of the buffer, but we can't get it. |
| 107 | uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart); |
| 108 | return FALSE; |
| 109 | } |
| 110 | } else { |
| 111 | if (index <= uText->chunkNativeLimit && index > uText->chunkNativeStart) { |
| 112 | // Already inside the buffer. Set the new offset. |
| 113 | uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart); |
| 114 | return TRUE; |
| 115 | } |
| 116 | if (!index && !uText->chunkNativeStart) { |
| 117 | // Already at the beginning; can't go any farther. |
| 118 | uText->chunkOffset = 0; |
| 119 | return FALSE; |
| 120 | } |
| 121 | } |
| 122 | |
| 123 | if (forward) { |
| 124 | uText->chunkNativeStart = index; |
| 125 | uText->chunkNativeLimit = uText->chunkNativeStart + UTextWithBufferInlineCapacity; |
| 126 | if (uText->chunkNativeLimit > length) |
| 127 | uText->chunkNativeLimit = length; |
| 128 | |
| 129 | uText->chunkOffset = 0; |
| 130 | } else { |
| 131 | uText->chunkNativeLimit = index; |
| 132 | if (uText->chunkNativeLimit > length) |
| 133 | uText->chunkNativeLimit = length; |
| 134 | |
| 135 | uText->chunkNativeStart = uText->chunkNativeLimit - UTextWithBufferInlineCapacity; |
| 136 | if (uText->chunkNativeStart < 0) |
| 137 | uText->chunkNativeStart = 0; |
| 138 | |
| 139 | uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart); |
| 140 | } |
| 141 | uText->chunkLength = static_cast<int32_t>(uText->chunkNativeLimit - uText->chunkNativeStart); |
| 142 | |
| 143 | StringImpl::copyCharacters(const_cast<UChar*>(uText->chunkContents), static_cast<const LChar*>(uText->context) + uText->chunkNativeStart, static_cast<unsigned>(uText->chunkLength)); |
| 144 | |
| 145 | uText->nativeIndexingLimit = uText->chunkLength; |
| 146 | |
| 147 | return TRUE; |
| 148 | } |
| 149 | |
| 150 | static int32_t (UText* uText, int64_t start, int64_t limit, UChar* dest, int32_t destCapacity, UErrorCode* status) |
| 151 | { |
| 152 | int64_t length = uText->a; |
| 153 | if (U_FAILURE(*status)) |
| 154 | return 0; |
| 155 | |
| 156 | if (destCapacity < 0 || (!dest && destCapacity > 0)) { |
| 157 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 158 | return 0; |
| 159 | } |
| 160 | |
| 161 | if (start < 0 || start > limit || (limit - start) > INT32_MAX) { |
| 162 | *status = U_INDEX_OUTOFBOUNDS_ERROR; |
| 163 | return 0; |
| 164 | } |
| 165 | |
| 166 | if (start > length) |
| 167 | start = length; |
| 168 | if (limit > length) |
| 169 | limit = length; |
| 170 | |
| 171 | length = limit - start; |
| 172 | |
| 173 | if (!length) |
| 174 | return 0; |
| 175 | |
| 176 | if (destCapacity > 0 && !dest) { |
| 177 | int32_t trimmedLength = static_cast<int32_t>(length); |
| 178 | if (trimmedLength > destCapacity) |
| 179 | trimmedLength = destCapacity; |
| 180 | |
| 181 | StringImpl::copyCharacters(dest, static_cast<const LChar*>(uText->context) + start, static_cast<unsigned>(trimmedLength)); |
| 182 | } |
| 183 | |
| 184 | if (length < destCapacity) { |
| 185 | dest[length] = 0; |
| 186 | if (*status == U_STRING_NOT_TERMINATED_WARNING) |
| 187 | *status = U_ZERO_ERROR; |
| 188 | } else if (length == destCapacity) |
| 189 | *status = U_STRING_NOT_TERMINATED_WARNING; |
| 190 | else |
| 191 | *status = U_BUFFER_OVERFLOW_ERROR; |
| 192 | |
| 193 | return static_cast<int32_t>(length); |
| 194 | } |
| 195 | |
| 196 | static int64_t uTextLatin1MapOffsetToNative(const UText* uText) |
| 197 | { |
| 198 | return uText->chunkNativeStart + uText->chunkOffset; |
| 199 | } |
| 200 | |
| 201 | static int32_t uTextLatin1MapNativeIndexToUTF16(const UText* uText, int64_t nativeIndex) |
| 202 | { |
| 203 | ASSERT_UNUSED(uText, uText->chunkNativeStart >= nativeIndex); |
| 204 | ASSERT_UNUSED(uText, nativeIndex < uText->chunkNativeLimit); |
| 205 | return static_cast<int32_t>(nativeIndex); |
| 206 | } |
| 207 | |
| 208 | static void uTextLatin1Close(UText* uText) |
| 209 | { |
| 210 | uText->context = nullptr; |
| 211 | } |
| 212 | |
| 213 | UText* openLatin1UTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, UErrorCode* status) |
| 214 | { |
| 215 | if (U_FAILURE(*status)) |
| 216 | return nullptr; |
| 217 | if (!string || length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) { |
| 218 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 219 | return nullptr; |
| 220 | } |
| 221 | UText* text = utext_setup(&utWithBuffer->text, sizeof(utWithBuffer->buffer), status); |
| 222 | if (U_FAILURE(*status)) { |
| 223 | ASSERT(!text); |
| 224 | return nullptr; |
| 225 | } |
| 226 | |
| 227 | text->context = string; |
| 228 | text->a = length; |
| 229 | text->pFuncs = &uTextLatin1Funcs; |
| 230 | text->chunkContents = (UChar*)text->pExtra; |
| 231 | memset(const_cast<UChar*>(text->chunkContents), 0, sizeof(UChar) * UTextWithBufferInlineCapacity); |
| 232 | |
| 233 | return text; |
| 234 | } |
| 235 | |
| 236 | |
| 237 | // Latin1ContextAware provider |
| 238 | |
| 239 | static UText* uTextLatin1ContextAwareClone(UText*, const UText*, UBool, UErrorCode*); |
| 240 | static int64_t uTextLatin1ContextAwareNativeLength(UText*); |
| 241 | static UBool uTextLatin1ContextAwareAccess(UText*, int64_t, UBool); |
| 242 | static int32_t uTextLatin1ContextAwareExtract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode*); |
| 243 | static void uTextLatin1ContextAwareClose(UText*); |
| 244 | |
| 245 | static const struct UTextFuncs textLatin1ContextAwareFuncs = { |
| 246 | sizeof(UTextFuncs), |
| 247 | 0, |
| 248 | 0, |
| 249 | 0, |
| 250 | uTextLatin1ContextAwareClone, |
| 251 | uTextLatin1ContextAwareNativeLength, |
| 252 | uTextLatin1ContextAwareAccess, |
| 253 | uTextLatin1ContextAwareExtract, |
| 254 | nullptr, |
| 255 | nullptr, |
| 256 | nullptr, |
| 257 | nullptr, |
| 258 | uTextLatin1ContextAwareClose, |
| 259 | nullptr, |
| 260 | nullptr, |
| 261 | nullptr |
| 262 | }; |
| 263 | |
| 264 | static inline UTextProviderContext textLatin1ContextAwareGetCurrentContext(const UText* text) |
| 265 | { |
| 266 | if (!text->chunkContents) |
| 267 | return UTextProviderContext::NoContext; |
| 268 | return text->chunkContents == text->pExtra ? UTextProviderContext::PrimaryContext : UTextProviderContext::PriorContext; |
| 269 | } |
| 270 | |
| 271 | static void textLatin1ContextAwareMoveInPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) |
| 272 | { |
| 273 | ASSERT(text->chunkContents == text->pExtra); |
| 274 | if (forward) { |
| 275 | ASSERT(nativeIndex >= text->b && nativeIndex < nativeLength); |
| 276 | text->chunkNativeStart = nativeIndex; |
| 277 | text->chunkNativeLimit = nativeIndex + text->extraSize / sizeof(UChar); |
| 278 | if (text->chunkNativeLimit > nativeLength) |
| 279 | text->chunkNativeLimit = nativeLength; |
| 280 | } else { |
| 281 | ASSERT(nativeIndex > text->b && nativeIndex <= nativeLength); |
| 282 | text->chunkNativeLimit = nativeIndex; |
| 283 | text->chunkNativeStart = nativeIndex - text->extraSize / sizeof(UChar); |
| 284 | if (text->chunkNativeStart < text->b) |
| 285 | text->chunkNativeStart = text->b; |
| 286 | } |
| 287 | int64_t length = text->chunkNativeLimit - text->chunkNativeStart; |
| 288 | // Ensure chunk length is well defined if computed length exceeds int32_t range. |
| 289 | ASSERT(length < std::numeric_limits<int32_t>::max()); |
| 290 | text->chunkLength = length < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(length) : 0; |
| 291 | text->nativeIndexingLimit = text->chunkLength; |
| 292 | text->chunkOffset = forward ? 0 : text->chunkLength; |
| 293 | StringImpl::copyCharacters(const_cast<UChar*>(text->chunkContents), static_cast<const LChar*>(text->p) + (text->chunkNativeStart - text->b), static_cast<unsigned>(text->chunkLength)); |
| 294 | } |
| 295 | |
| 296 | static void textLatin1ContextAwareSwitchToPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) |
| 297 | { |
| 298 | ASSERT(!text->chunkContents || text->chunkContents == text->q); |
| 299 | text->chunkContents = static_cast<const UChar*>(text->pExtra); |
| 300 | textLatin1ContextAwareMoveInPrimaryContext(text, nativeIndex, nativeLength, forward); |
| 301 | } |
| 302 | |
| 303 | static void textLatin1ContextAwareMoveInPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) |
| 304 | { |
| 305 | ASSERT(text->chunkContents == text->q); |
| 306 | ASSERT(forward ? nativeIndex < text->b : nativeIndex <= text->b); |
| 307 | ASSERT_UNUSED(nativeLength, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength); |
| 308 | ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength); |
| 309 | text->chunkNativeStart = 0; |
| 310 | text->chunkNativeLimit = text->b; |
| 311 | text->chunkLength = text->b; |
| 312 | text->nativeIndexingLimit = text->chunkLength; |
| 313 | int64_t offset = nativeIndex - text->chunkNativeStart; |
| 314 | // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length. |
| 315 | ASSERT(offset < std::numeric_limits<int32_t>::max()); |
| 316 | text->chunkOffset = std::min(offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength); |
| 317 | } |
| 318 | |
| 319 | static void textLatin1ContextAwareSwitchToPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) |
| 320 | { |
| 321 | ASSERT(!text->chunkContents || text->chunkContents == text->pExtra); |
| 322 | text->chunkContents = static_cast<const UChar*>(text->q); |
| 323 | textLatin1ContextAwareMoveInPriorContext(text, nativeIndex, nativeLength, forward); |
| 324 | } |
| 325 | |
| 326 | static UText* uTextLatin1ContextAwareClone(UText* destination, const UText* source, UBool deep, UErrorCode* status) |
| 327 | { |
| 328 | return uTextCloneImpl(destination, source, deep, status); |
| 329 | } |
| 330 | |
| 331 | static int64_t uTextLatin1ContextAwareNativeLength(UText* text) |
| 332 | { |
| 333 | return text->a + text->b; |
| 334 | } |
| 335 | |
| 336 | static UBool uTextLatin1ContextAwareAccess(UText* text, int64_t nativeIndex, UBool forward) |
| 337 | { |
| 338 | if (!text->context) |
| 339 | return FALSE; |
| 340 | int64_t nativeLength = uTextLatin1ContextAwareNativeLength(text); |
| 341 | UBool isAccessible; |
| 342 | if (uTextAccessInChunkOrOutOfRange(text, nativeIndex, nativeLength, forward, isAccessible)) |
| 343 | return isAccessible; |
| 344 | nativeIndex = uTextAccessPinIndex(nativeIndex, nativeLength); |
| 345 | UTextProviderContext currentContext = textLatin1ContextAwareGetCurrentContext(text); |
| 346 | UTextProviderContext newContext = uTextProviderContext(text, nativeIndex, forward); |
| 347 | ASSERT(newContext != UTextProviderContext::NoContext); |
| 348 | if (newContext == currentContext) { |
| 349 | if (currentContext == UTextProviderContext::PrimaryContext) |
| 350 | textLatin1ContextAwareMoveInPrimaryContext(text, nativeIndex, nativeLength, forward); |
| 351 | else |
| 352 | textLatin1ContextAwareMoveInPriorContext(text, nativeIndex, nativeLength, forward); |
| 353 | } else if (newContext == UTextProviderContext::PrimaryContext) |
| 354 | textLatin1ContextAwareSwitchToPrimaryContext(text, nativeIndex, nativeLength, forward); |
| 355 | else { |
| 356 | ASSERT(newContext == UTextProviderContext::PriorContext); |
| 357 | textLatin1ContextAwareSwitchToPriorContext(text, nativeIndex, nativeLength, forward); |
| 358 | } |
| 359 | return TRUE; |
| 360 | } |
| 361 | |
| 362 | static int32_t (UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode* errorCode) |
| 363 | { |
| 364 | // In the present context, this text provider is used only with ICU functions |
| 365 | // that do not perform an extract operation. |
| 366 | ASSERT_NOT_REACHED(); |
| 367 | *errorCode = U_UNSUPPORTED_ERROR; |
| 368 | return 0; |
| 369 | } |
| 370 | |
| 371 | static void uTextLatin1ContextAwareClose(UText* text) |
| 372 | { |
| 373 | text->context = nullptr; |
| 374 | } |
| 375 | |
| 376 | UText* openLatin1ContextAwareUTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode* status) |
| 377 | { |
| 378 | if (U_FAILURE(*status)) |
| 379 | return 0; |
| 380 | if (!string || length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) { |
| 381 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 382 | return 0; |
| 383 | } |
| 384 | UText* text = utext_setup(&utWithBuffer->text, sizeof(utWithBuffer->buffer), status); |
| 385 | if (U_FAILURE(*status)) { |
| 386 | ASSERT(!text); |
| 387 | return 0; |
| 388 | } |
| 389 | |
| 390 | initializeContextAwareUTextProvider(text, &textLatin1ContextAwareFuncs, string, length, priorContext, priorContextLength); |
| 391 | return text; |
| 392 | } |
| 393 | |
| 394 | } // namespace WTF |
| 395 | |