StringImpl.cpp source code [webkit/Source/WTF/wtf/text/StringImpl.cpp]

1	/*
2	* Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3	* (C) 1999 Antti Koivisto (koivisto@kde.org)
4	* (C) 2001 Dirk Mueller ( mueller@kde.org )
5	* Copyright (C) 2003-2018 Apple Inc. All rights reserved.
6	* Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
7	*
8	* This library is free software; you can redistribute it and/or
9	* modify it under the terms of the GNU Library General Public
10	* License as published by the Free Software Foundation; either
11	* version 2 of the License, or (at your option) any later version.
12	*
13	* This library is distributed in the hope that it will be useful,
14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16	* Library General Public License for more details.
17	*
18	* You should have received a copy of the GNU Library General Public License
19	* along with this library; see the file COPYING.LIB. If not, write to
20	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21	* Boston, MA 02110-1301, USA.
22	*
23	*/
24
25	#include "config.h"
26	#include <wtf/text/StringImpl.h>
27
28	#include <wtf/ProcessID.h>
29	#include <wtf/StdLibExtras.h>
30	#include <wtf/text/AtomicString.h>
31	#include <wtf/text/CString.h>
32	#include <wtf/text/ExternalStringImpl.h>
33	#include <wtf/text/StringBuffer.h>
34	#include <wtf/text/StringHash.h>
35	#include <wtf/text/StringView.h>
36	#include <wtf/text/SymbolImpl.h>
37	#include <wtf/text/SymbolRegistry.h>
38	#include <wtf/unicode/CharacterNames.h>
39	#include <wtf/unicode/UTF8Conversion.h>
40
41	#if STRING_STATS
42	#include <unistd.h>
43	#include <wtf/DataLog.h>
44	#endif
45
46	namespace WTF {
47
48	using namespace Unicode;
49
50	static_assert(sizeof(StringImpl) == `2` * sizeof(int) + `2` * sizeof(void*), "StringImpl should stay small");
51
52	#if STRING_STATS
53	StringStats StringImpl::m_stringStats;
54
55	std::atomic<unsigned> StringStats::s_stringRemovesTillPrintStats(s_printStringStatsFrequency);
56
57	void StringStats::removeString(StringImpl& string)
58	{
59	unsigned length = string.length();
60	bool isSubString = string.isSubString();
61
62	--m_totalNumberStrings;
63
64	if (string.is8Bit()) {
65	--m_number8BitStrings;
66	if (!isSubString)
67	m_total8BitData -= length;
68	} else {
69	--m_number16BitStrings;
70	if (!isSubString)
71	m_total16BitData -= length;
72	}
73
74	if (!--s_stringRemovesTillPrintStats) {
75	s_stringRemovesTillPrintStats = s_printStringStatsFrequency;
76	printStats();
77	}
78	}
79
80	void StringStats::printStats()
81	{
82	dataLogF("String stats for process id %d:\n", getCurrentProcessID());
83
84	unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData;
85	double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * `100`) / (double)m_totalNumberStrings : `0.0`;
86	double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (double)m_number8BitStrings : `0.0`;
87	dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number8BitStrings.load(), percent8Bit, m_total8BitData.load(), m_total8BitData.load(), average8bitLength);
88
89	double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * `100`) / (double)m_totalNumberStrings : `0.0`;
90	double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : `0.0`;
91	dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number16BitStrings.load(), percent16Bit, m_total16BitData.load(), m_total16BitData * `2`, average16bitLength);
92
93	double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : `0.0`;
94	unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * `2`;
95	dataLogF("%8u Total %12llu chars %12llu bytes avg length %6.1f\n", m_totalNumberStrings.load(), totalNumberCharacters, totalDataBytes, averageLength);
96	unsigned long long totalSavedBytes = m_total8BitData;
97	double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * `100`) / (double)(totalDataBytes + totalSavedBytes) : `0.0`;
98	dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings);
99
100	dataLogF("%8u StringImpl::ref calls\n", m_refCalls.load());
101	dataLogF("%8u StringImpl::deref calls\n", m_derefCalls.load());
102	}
103	#endif
104
105	StringImpl::StaticStringImpl StringImpl::s_atomicEmptyString("", StringImpl::StringAtomic);
106
107	StringImpl::~StringImpl()
108	{
109	ASSERT(!isStatic());
110
111	StringView::invalidate(*this);
112
113	STRING_STATS_REMOVE_STRING(*this);
114
115	if (isAtomic()) {
116	ASSERT(!isSymbol());
117	if (length())
118	AtomicStringImpl::remove(static_cast<AtomicStringImpl>(this*));
119	} else if (isSymbol()) {
120	auto& symbol = static_cast<SymbolImpl&>(*this);
121	auto* symbolRegistry = symbol.symbolRegistry();
122	if (symbolRegistry)
123	symbolRegistry->remove(*symbol.asRegisteredSymbolImpl());
124	}
125
126	BufferOwnership ownership = bufferOwnership();
127
128	if (ownership == BufferInternal)
129	return;
130	if (ownership == BufferOwned) {
131	// We use m_data8, but since it is a union with m_data16 this works either way.
132	ASSERT(m_data8);
133	fastFree(const_cast<LChar*>(m_data8));
134	return;
135	}
136	if (ownership == BufferExternal) {
137	auto* external = static_cast<ExternalStringImpl>(this*);
138	external->freeExternalBuffer(const_cast<LChar*>(m_data8), sizeInBytes());
139	external->m_free.~ExternalStringImplFreeFunction();
140	return;
141	}
142
143	ASSERT(ownership == BufferSubstring);
144	ASSERT(substringBuffer());
145	substringBuffer()->deref();
146	}
147
148	void StringImpl::destroy(StringImpl* stringImpl)
149	{
150	stringImpl->~StringImpl();
151	fastFree(stringImpl);
152	}
153
154	Ref<StringImpl> StringImpl::createFromLiteral(const char* characters, unsigned length)
155	{
156	ASSERT_WITH_MESSAGE(length, "Use StringImpl::empty() to create an empty string");
157	ASSERT(charactersAreAllASCII<LChar>(reinterpret_cast<const LChar*>(characters), length));
158	return adoptRef(*new StringImpl (reinterpret_cast<const LChar*>(characters), length, ConstructWithoutCopying));
159	}
160
161	Ref<StringImpl> StringImpl::createFromLiteral(const char* characters)
162	{
163	return createFromLiteral(characters, strlen(characters));
164	}
165
166	Ref<StringImpl> StringImpl::createWithoutCopying(const UChar* characters, unsigned length)
167	{
168	if (!length)
169	return *empty();
170	return adoptRef(*new StringImpl (characters, length, ConstructWithoutCopying));
171	}
172
173	Ref<StringImpl> StringImpl::createWithoutCopying(const LChar* characters, unsigned length)
174	{
175	if (!length)
176	return *empty();
177	return adoptRef(*new StringImpl (characters, length, ConstructWithoutCopying));
178	}
179
180	template<typename CharacterType> inline Ref<StringImpl> StringImpl::createUninitializedInternal(unsigned length, CharacterType*& data)
181	{
182	if (!length) {
183	data = `0`;
184	return *empty();
185	}
186	return createUninitializedInternalNonEmpty(length, data);
187	}
188
189	template<typename CharacterType> inline Ref<StringImpl> StringImpl::createUninitializedInternalNonEmpty(unsigned length, CharacterType*& data)
190	{
191	ASSERT(length);
192
193	// Allocate a single buffer large enough to contain the StringImpl
194	// struct as well as the data which it contains. This removes one
195	// heap allocation from this call.
196	if (length > maxInternalLength<CharacterType>())
197	CRASH();
198	StringImpl* string = static_cast<StringImpl*>(fastMalloc(allocationSize<CharacterType>(length)));
199
200	data = string->tailPointer<CharacterType>();
201	return constructInternal<CharacterType>(*string, length);
202	}
203
204	Ref<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data)
205	{
206	return createUninitializedInternal(length, data);
207	}
208
209	Ref<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
210	{
211	return createUninitializedInternal(length, data);
212	}
213
214	template<typename CharacterType> inline Expected<Ref<StringImpl>, UTF8ConversionError> StringImpl::reallocateInternal(Ref<StringImpl>&& originalString, unsigned length, CharacterType*& data)
215	{
216	ASSERT(originalString ->hasOneRef());
217	ASSERT(originalString ->bufferOwnership() == BufferInternal);
218
219	if (!length) {
220	data = `0`;
221	return Ref<StringImpl>(*empty());
222	}
223
224	// Same as createUninitialized() except here we use fastRealloc.
225	if (length > maxInternalLength<CharacterType>())
226	return makeUnexpected(UTF8ConversionError::OutOfMemory);
227
228	originalString ->~StringImpl();
229	StringImpl* string;
230	if (!tryFastRealloc(&originalString.leakRef(), allocationSize<CharacterType>(length)).getValue(string))
231	return makeUnexpected(UTF8ConversionError::OutOfMemory);
232
233	data = string->tailPointer<CharacterType>();
234	return constructInternal<CharacterType>(*string, length);
235	}
236
237	Ref<StringImpl> StringImpl::reallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data)
238	{
239	auto expectedStringImpl = tryReallocate(WTFMove(originalString), length, data);
240	RELEASE_ASSERT(expectedStringImpl);
241	return WTFMove(expectedStringImpl.value());
242	}
243
244	Ref<StringImpl> StringImpl::reallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data)
245	{
246	auto expectedStringImpl = tryReallocate(WTFMove(originalString), length, data);
247	RELEASE_ASSERT(expectedStringImpl);
248	return WTFMove(expectedStringImpl.value());
249	}
250
251	Expected<Ref<StringImpl>, UTF8ConversionError> StringImpl::tryReallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data)
252	{
253	ASSERT(originalString ->is8Bit());
254	return reallocateInternal(WTFMove(originalString), length, data);
255	}
256
257	Expected<Ref<StringImpl>, UTF8ConversionError> StringImpl::tryReallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data)
258	{
259	ASSERT(!originalString ->is8Bit());
260	return reallocateInternal(WTFMove(originalString), length, data);
261	}
262
263	template<typename CharacterType> inline Ref<StringImpl> StringImpl::createInternal(const CharacterType* characters, unsigned length)
264	{
265	if (!characters \|\| !length)
266	return *empty();
267	CharacterType* data;
268	auto string = createUninitializedInternalNonEmpty(length, data);
269	copyCharacters(data, characters, length);
270	return string;
271	}
272
273	Ref<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
274	{
275	return createInternal(characters, length);
276	}
277
278	Ref<StringImpl> StringImpl::create(const LChar* characters, unsigned length)
279	{
280	return createInternal(characters, length);
281	}
282
283	Ref<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length)
284	{
285	if (!characters \|\| !length)
286	return *empty();
287
288	LChar* data;
289	auto string = createUninitializedInternalNonEmpty(length, data);
290
291	for (size_t i = `0`; i < length; ++i) {
292	if (!isLatin1(characters[i]))
293	return create(characters, length);
294	data[i] = static_cast<LChar>(characters[i]);
295	}
296
297	return string;
298	}
299
300	Ref<StringImpl> StringImpl::create8BitIfPossible(const UChar* string)
301	{
302	return StringImpl::create8BitIfPossible(string, lengthOfNullTerminatedString(string));
303	}
304
305	Ref<StringImpl> StringImpl::create(const LChar* string)
306	{
307	if (!string)
308	return *empty();
309	size_t length = strlen(reinterpret_cast<const char*>(string));
310	if (length > MaxLength)
311	CRASH();
312	return create(string, length);
313	}
314
315	Ref<StringImpl> StringImpl::substring(unsigned start, unsigned length)
316	{
317	if (start >= m_length)
318	return *empty();
319	unsigned maxLength = m_length - start;
320	if (length >= maxLength) {
321	if (!start)
322	return *this;
323	length = maxLength;
324	}
325	if (is8Bit())
326	return create(m_data8 + start, length);
327
328	return create(m_data16 + start, length);
329	}
330
331	UChar32 StringImpl::characterStartingAt(unsigned i)
332	{
333	if (is8Bit())
334	return m_data8[i];
335	if (U16_IS_SINGLE(m_data16[i]))
336	return m_data16[i];
337	if (i + `1` < m_length && U16_IS_LEAD(m_data16[i]) && U16_IS_TRAIL(m_data16[i + `1`]))
338	return U16_GET_SUPPLEMENTARY(m_data16[i], m_data16[i + `1`]);
339	return `0`;
340	}
341
342	Ref<StringImpl> StringImpl::convertToLowercaseWithoutLocale()
343	{
344	// Note: At one time this was a hot function in the Dromaeo benchmark, specifically the
345	// no-op code path that may return ourself if we find no upper case letters and no invalid
346	// ASCII letters.
347
348	// First scan the string for uppercase and non-ASCII characters:
349	if (is8Bit()) {
350	for (unsigned i = `0`; i < m_length; ++i) {
351	LChar character = m_data8[i];
352	if (UNLIKELY((character & ~`0x7F`) \|\| isASCIIUpper(character)))
353	return convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(i);
354	}
355
356	return *this;
357	}
358
359	bool noUpper = true;
360	unsigned ored = `0`;
361
362	for (unsigned i = `0`; i < m_length; ++i) {
363	UChar character = m_data16[i];
364	if (UNLIKELY(isASCIIUpper(character)))
365	noUpper = false;
366	ored \|= character;
367	}
368	// Nothing to do if the string is all ASCII with no uppercase.
369	if (noUpper && !(ored & ~`0x7F`))
370	return *this;
371
372	if (!(ored & ~`0x7F`)) {
373	UChar* data16;
374	auto newImpl = createUninitializedInternalNonEmpty(m_length, data16);
375	for (unsigned i = `0`; i < m_length; ++i)
376	data16[i] = toASCIILower(m_data16[i]);
377	return newImpl;
378	}
379
380	if (m_length > MaxLength)
381	CRASH();
382	int32_t length = m_length;
383
384	// Do a slower implementation for cases that include non-ASCII characters.
385	UChar* data16;
386	auto newImpl = createUninitializedInternalNonEmpty(m_length, data16);
387
388	UErrorCode status = U_ZERO_ERROR;
389	int32_t realLength = u_strToLower(data16, length, m_data16, m_length, "", &status);
390	if (U_SUCCESS(status) && realLength == length)
391	return newImpl;
392
393	newImpl = createUninitialized(realLength, data16);
394	status = U_ZERO_ERROR;
395	u_strToLower(data16, realLength, m_data16, m_length, "", &status);
396	if (U_FAILURE(status))
397	return *this;
398	return newImpl;
399	}
400
401	Ref<StringImpl> StringImpl::convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned failingIndex)
402	{
403	ASSERT(is8Bit());
404	LChar* data8;
405	auto newImpl = createUninitializedInternalNonEmpty(m_length, data8);
406
407	for (unsigned i = `0`; i < failingIndex; ++i) {
408	ASSERT(!(m_data8[i] & ~`0x7F`) && !isASCIIUpper(m_data8[i]));
409	data8[i] = m_data8[i];
410	}
411
412	for (unsigned i = failingIndex; i < m_length; ++i) {
413	LChar character = m_data8[i];
414	if (!(character & ~`0x7F`))
415	data8[i] = toASCIILower(character);
416	else {
417	ASSERT(isLatin1(u_tolower(character)));
418	data8[i] = static_cast<LChar>(u_tolower(character));
419	}
420	}
421
422	return newImpl;
423	}
424
425	Ref<StringImpl> StringImpl::convertToUppercaseWithoutLocale()
426	{
427	// This function could be optimized for no-op cases the way
428	// convertToLowercaseWithoutLocale() is, but in empirical testing,
429	// few actual calls to upper() are no-ops, so it wouldn't be worth
430	// the extra time for pre-scanning.
431
432	if (m_length > MaxLength)
433	CRASH();
434	int32_t length = m_length;
435
436	if (is8Bit()) {
437	LChar* data8;
438	auto newImpl = createUninitialized(m_length, data8);
439
440	// Do a faster loop for the case where all the characters are ASCII.
441	unsigned ored = `0`;
442	for (int i = `0`; i < length; ++i) {
443	LChar character = m_data8[i];
444	ored \|= character;
445	data8[i] = toASCIIUpper(character);
446	}
447	if (!(ored & ~`0x7F`))
448	return newImpl;
449
450	// Do a slower implementation for cases that include non-ASCII Latin-1 characters.
451	int numberSharpSCharacters = `0`;
452
453	// There are two special cases.
454	// 1. Some Latin-1 characters when converted to upper case are 16 bit characters.
455	// 2. Lower case sharp-S converts to "SS" (two characters)
456	for (int32_t i = `0`; i < length; ++i) {
457	LChar character = m_data8[i];
458	if (UNLIKELY(character == smallLetterSharpS))
459	++numberSharpSCharacters;
460	ASSERT(u_toupper(character) <= `0xFFFF`);
461	UChar upper = u_toupper(character);
462	if (UNLIKELY(!isLatin1(upper))) {
463	// Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
464	goto upconvert;
465	}
466	data8[i] = static_cast<LChar>(upper);
467	}
468
469	if (!numberSharpSCharacters)
470	return newImpl;
471
472	// We have numberSSCharacters sharp-s characters, but none of the other special characters.
473	newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
474
475	LChar* dest = data8;
476
477	for (int32_t i = `0`; i < length; ++i) {
478	LChar character = m_data8[i];
479	if (character == smallLetterSharpS) {
480	*dest++ = `'S'`;
481	*dest++ = `'S'`;
482	} else {
483	ASSERT(isLatin1(u_toupper(character)));
484	dest++ = static_cast*<LChar>(u_toupper(character));
485	}
486	}
487
488	return newImpl;
489	}
490
491	upconvert:
492	auto upconvertedCharacters = StringView (*this).upconvertedCharacters();
493	const UChar* source16 = upconvertedCharacters;
494
495	UChar* data16;
496	auto newImpl = createUninitialized(m_length, data16);
497
498	// Do a faster loop for the case where all the characters are ASCII.
499	unsigned ored = `0`;
500	for (int i = `0`; i < length; ++i) {
501	UChar character = source16[i];
502	ored \|= character;
503	data16[i] = toASCIIUpper(character);
504	}
505	if (!(ored & ~`0x7F`))
506	return newImpl;
507
508	// Do a slower implementation for cases that include non-ASCII characters.
509	UErrorCode status = U_ZERO_ERROR;
510	int32_t realLength = u_strToUpper(data16, length, source16, m_length, "", &status);
511	if (U_SUCCESS(status) && realLength == length)
512	return newImpl;
513	newImpl = createUninitialized(realLength, data16);
514	status = U_ZERO_ERROR;
515	u_strToUpper(data16, realLength, source16, m_length, "", &status);
516	if (U_FAILURE(status))
517	return *this;
518	return newImpl;
519	}
520
521	static inline bool needsTurkishCasingRules(const AtomicString& localeIdentifier)
522	{
523	// Either "tr" or "az" locale, with case sensitive comparison and allowing for an ignored subtag.
524	UChar first = localeIdentifier [`0`];
525	UChar second = localeIdentifier [`1`];
526	return ((isASCIIAlphaCaselessEqual(first, `'t'`) && isASCIIAlphaCaselessEqual(second, `'r'`))
527	\|\| (isASCIIAlphaCaselessEqual(first, `'a'`) && isASCIIAlphaCaselessEqual(second, `'z'`)))
528	&& (localeIdentifier.length() == `2` \|\| localeIdentifier [`2`] == `'-'`);
529	}
530
531	Ref<StringImpl> StringImpl::convertToLowercaseWithLocale(const AtomicString& localeIdentifier)
532	{
533	// Use the more-optimized code path most of the time.
534	// Assuming here that the only locale-specific lowercasing is the Turkish casing rules.
535	// FIXME: Could possibly optimize further by looking for the specific sequences
536	// that have locale-specific lowercasing. There are only three of them.
537	if (!needsTurkishCasingRules(localeIdentifier))
538	return convertToLowercaseWithoutLocale();
539
540	// FIXME: Could share more code with the main StringImpl::lower by factoring out
541	// this last part into a shared function that takes a locale string, since this is
542	// just like the end of that function.
543
544	if (m_length > MaxLength)
545	CRASH();
546	int length = m_length;
547
548	// Below, we pass in the hardcoded locale "tr". Passing that is more efficient than
549	// allocating memory just to turn localeIdentifier into a C string, and we assume
550	// there is no difference between the uppercasing for "tr" and "az" locales.
551	auto upconvertedCharacters = StringView (*this).upconvertedCharacters();
552	const UChar* source16 = upconvertedCharacters;
553	UChar* data16;
554	auto newString = createUninitialized(length, data16);
555	UErrorCode status = U_ZERO_ERROR;
556	int realLength = u_strToLower(data16, length, source16, length, "tr", &status);
557	if (U_SUCCESS(status) && realLength == length)
558	return newString;
559	newString = createUninitialized(realLength, data16);
560	status = U_ZERO_ERROR;
561	u_strToLower(data16, realLength, source16, length, "tr", &status);
562	if (U_FAILURE(status))
563	return *this;
564	return newString;
565	}
566
567	Ref<StringImpl> StringImpl::convertToUppercaseWithLocale(const AtomicString& localeIdentifier)
568	{
569	// Use the more-optimized code path most of the time.
570	// Assuming here that the only locale-specific lowercasing is the Turkish casing rules,
571	// and that the only affected character is lowercase "i".
572	if (!needsTurkishCasingRules(localeIdentifier) \|\| find(`'i'`) == notFound)
573	return convertToUppercaseWithoutLocale();
574
575	if (m_length > MaxLength)
576	CRASH();
577	int length = m_length;
578
579	// Below, we pass in the hardcoded locale "tr". Passing that is more efficient than
580	// allocating memory just to turn localeIdentifier into a C string, and we assume
581	// there is no difference between the uppercasing for "tr" and "az" locales.
582	auto upconvertedCharacters = StringView (*this).upconvertedCharacters();
583	const UChar* source16 = upconvertedCharacters;
584	UChar* data16;
585	auto newString = createUninitialized(length, data16);
586	UErrorCode status = U_ZERO_ERROR;
587	int realLength = u_strToUpper(data16, length, source16, length, "tr", &status);
588	if (U_SUCCESS(status) && realLength == length)
589	return newString;
590	newString = createUninitialized(realLength, data16);
591	status = U_ZERO_ERROR;
592	u_strToUpper(data16, realLength, source16, length, "tr", &status);
593	if (U_FAILURE(status))
594	return *this;
595	return newString;
596	}
597
598	Ref<StringImpl> StringImpl::foldCase()
599	{
600	if (is8Bit()) {
601	unsigned failingIndex;
602	for (unsigned i = `0`; i < m_length; ++i) {
603	auto character = m_data8[i];
604	if (UNLIKELY(!isASCII(character) \|\| isASCIIUpper(character))) {
605	failingIndex = i;
606	goto SlowPath;
607	}
608	}
609	// String was all ASCII and no uppercase, so just return as-is.
610	return *this;
611
612	SlowPath:
613	bool need16BitCharacters = false;
614	for (unsigned i = failingIndex; i < m_length; ++i) {
615	auto character = m_data8[i];
616	if (character == `0xB5` \|\| character == `0xDF`) {
617	need16BitCharacters = true;
618	break;
619	}
620	}
621
622	if (!need16BitCharacters) {
623	LChar* data8;
624	auto folded = createUninitializedInternalNonEmpty(m_length, data8);
625	copyCharacters(data8, m_data8, failingIndex);
626	for (unsigned i = failingIndex; i < m_length; ++i) {
627	auto character = m_data8[i];
628	if (isASCII(character))
629	data8[i] = toASCIILower(character);
630	else {
631	ASSERT(isLatin1(u_foldCase(character, U_FOLD_CASE_DEFAULT)));
632	data8[i] = static_cast<LChar>(u_foldCase(character, U_FOLD_CASE_DEFAULT));
633	}
634	}
635	return folded;
636	}
637	} else {
638	// FIXME: Unclear why we use goto in the 8-bit case, and a different approach in the 16-bit case.
639	bool noUpper = true;
640	unsigned ored = `0`;
641	for (unsigned i = `0`; i < m_length; ++i) {
642	UChar character = m_data16[i];
643	if (UNLIKELY(isASCIIUpper(character)))
644	noUpper = false;
645	ored \|= character;
646	}
647	if (!(ored & ~`0x7F`)) {
648	if (noUpper) {
649	// String was all ASCII and no uppercase, so just return as-is.
650	return *this;
651	}
652	UChar* data16;
653	auto folded = createUninitializedInternalNonEmpty(m_length, data16);
654	for (unsigned i = `0`; i < m_length; ++i)
655	data16[i] = toASCIILower(m_data16[i]);
656	return folded;
657	}
658	}
659
660	if (m_length > MaxLength)
661	CRASH();
662
663	auto upconvertedCharacters = StringView (*this).upconvertedCharacters();
664
665	UChar* data;
666	auto folded = createUninitializedInternalNonEmpty(m_length, data);
667	int32_t length = m_length;
668	UErrorCode status = U_ZERO_ERROR;
669	int32_t realLength = u_strFoldCase(data, length, upconvertedCharacters, length, U_FOLD_CASE_DEFAULT, &status);
670	if (U_SUCCESS(status) && realLength == length)
671	return folded;
672	ASSERT(realLength > length);
673	folded = createUninitializedInternalNonEmpty(realLength, data);
674	status = U_ZERO_ERROR;
675	u_strFoldCase(data, realLength, upconvertedCharacters, length, U_FOLD_CASE_DEFAULT, &status);
676	if (U_FAILURE(status))
677	return *this;
678	return folded;
679	}
680
681	template<StringImpl::CaseConvertType type, typename CharacterType>
682	ALWAYS_INLINE Ref<StringImpl> StringImpl::convertASCIICase(StringImpl& impl, const CharacterType* data, unsigned length)
683	{
684	unsigned failingIndex;
685	for (unsigned i = `0`; i < length; ++i) {
686	CharacterType character = data[i];
687	if (type == CaseConvertType::Lower ? UNLIKELY(isASCIIUpper(character)) : LIKELY(isASCIILower(character))) {
688	failingIndex = i;
689	goto SlowPath;
690	}
691	}
692	return impl;
693
694	SlowPath:
695	CharacterType* newData;
696	auto newImpl = createUninitializedInternalNonEmpty(length, newData);
697	copyCharacters(newData, data, failingIndex);
698	for (unsigned i = failingIndex; i < length; ++i)
699	newData[i] = type == CaseConvertType::Lower ? toASCIILower(data[i]) : toASCIIUpper(data[i]);
700	return newImpl;
701	}
702
703	Ref<StringImpl> StringImpl::convertToASCIILowercase()
704	{
705	if (is8Bit())
706	return convertASCIICase<CaseConvertType::Lower>(*this, m_data8, m_length);
707	return convertASCIICase<CaseConvertType::Lower>(*this, m_data16, m_length);
708	}
709
710	Ref<StringImpl> StringImpl::convertToASCIIUppercase()
711	{
712	if (is8Bit())
713	return convertASCIICase<CaseConvertType::Upper>(*this, m_data8, m_length);
714	return convertASCIICase<CaseConvertType::Upper>(*this, m_data16, m_length);
715	}
716
717	template<typename CodeUnitPredicate> inline Ref<StringImpl> StringImpl::stripMatchedCharacters(CodeUnitPredicate predicate)
718	{
719	if (!m_length)
720	return *this;
721
722	unsigned start = `0`;
723	unsigned end = m_length - `1`;
724
725	// skip white space from start
726	while (start <= end && predicate(is8Bit() ? m_data8[start] : m_data16[start]))
727	++start;
728
729	// only white space
730	if (start > end)
731	return *empty();
732
733	// skip white space from end
734	while (end && predicate(is8Bit() ? m_data8[end] : m_data16[end]))
735	--end;
736
737	if (!start && end == m_length - `1`)
738	return *this;
739	if (is8Bit())
740	return create(m_data8 + start, end + `1` - start);
741	return create(m_data16 + start, end + `1` - start);
742	}
743
744	Ref<StringImpl> StringImpl::stripWhiteSpace()
745	{
746	return stripMatchedCharacters(isSpaceOrNewline);
747	}
748
749	Ref<StringImpl> StringImpl::stripLeadingAndTrailingCharacters(CodeUnitMatchFunction predicate)
750	{
751	return stripMatchedCharacters(predicate);
752	}
753
754	template<typename CharacterType> ALWAYS_INLINE Ref<StringImpl> StringImpl::removeCharacters(const CharacterType* characters, CodeUnitMatchFunction findMatch)
755	{
756	auto* from = characters;
757	auto* fromEnd = from + m_length;
758
759	// Assume the common case will not remove any characters
760	while (from != fromEnd && !findMatch(*from))
761	++from;
762	if (from == fromEnd)
763	return *this;
764
765	StringBuffer<CharacterType> data(m_length);
766	auto* to = data.characters();
767	unsigned outc = from - characters;
768
769	if (outc)
770	copyCharacters(to, characters, outc);
771
772	do {
773	while (from != fromEnd && findMatch(*from))
774	++from;
775	while (from != fromEnd && !findMatch(*from))
776	to[outc++] = *from++;
777	} while (from != fromEnd);
778
779	data.shrink(outc);
780
781	return adopt(WTFMove(data));
782	}
783
784	Ref<StringImpl> StringImpl::removeCharacters(CodeUnitMatchFunction findMatch)
785	{
786	if (is8Bit())
787	return removeCharacters(characters8(), findMatch);
788	return removeCharacters(characters16(), findMatch);
789	}
790
791	template<typename CharacterType, class UCharPredicate> inline Ref<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate)
792	{
793	StringBuffer<CharacterType> data(m_length);
794
795	auto* from = characters<CharacterType>();
796	auto* fromEnd = from + m_length;
797	unsigned outc = `0`;
798	bool changedToSpace = false;
799
800	auto* to = data.characters();
801
802	while (true) {
803	while (from != fromEnd && predicate(*from)) {
804	if (*from != `' '`)
805	changedToSpace = true;
806	++from;
807	}
808	while (from != fromEnd && !predicate(*from))
809	to[outc++] = *from++;
810	if (from != fromEnd)
811	to[outc++] = `' '`;
812	else
813	break;
814	}
815
816	if (outc && to[outc - `1`] == `' '`)
817	--outc;
818
819	if (outc == m_length && !changedToSpace)
820	return *this;
821
822	data.shrink(outc);
823
824	return adopt(WTFMove(data));
825	}
826
827	Ref<StringImpl> StringImpl::simplifyWhiteSpace()
828	{
829	if (is8Bit())
830	return StringImpl::simplifyMatchedCharactersToSpace<LChar>(isSpaceOrNewline);
831	return StringImpl::simplifyMatchedCharactersToSpace<UChar>(isSpaceOrNewline);
832	}
833
834	Ref<StringImpl> StringImpl::simplifyWhiteSpace(CodeUnitMatchFunction isWhiteSpace)
835	{
836	if (is8Bit())
837	return StringImpl::simplifyMatchedCharactersToSpace<LChar>(isWhiteSpace);
838	return StringImpl::simplifyMatchedCharactersToSpace<UChar>(isWhiteSpace);
839	}
840
841	int StringImpl::toIntStrict(bool* ok, int base)
842	{
843	if (is8Bit())
844	return charactersToIntStrict(characters8(), m_length, ok, base);
845	return charactersToIntStrict(characters16(), m_length, ok, base);
846	}
847
848	unsigned StringImpl::toUIntStrict(bool* ok, int base)
849	{
850	if (is8Bit())
851	return charactersToUIntStrict(characters8(), m_length, ok, base);
852	return charactersToUIntStrict(characters16(), m_length, ok, base);
853	}
854
855	int64_t StringImpl::toInt64Strict(bool* ok, int base)
856	{
857	if (is8Bit())
858	return charactersToInt64Strict(characters8(), m_length, ok, base);
859	return charactersToInt64Strict(characters16(), m_length, ok, base);
860	}
861
862	uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
863	{
864	if (is8Bit())
865	return charactersToUInt64Strict(characters8(), m_length, ok, base);
866	return charactersToUInt64Strict(characters16(), m_length, ok, base);
867	}
868
869	intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
870	{
871	if (is8Bit())
872	return charactersToIntPtrStrict(characters8(), m_length, ok, base);
873	return charactersToIntPtrStrict(characters16(), m_length, ok, base);
874	}
875
876	int StringImpl::toInt(bool* ok)
877	{
878	if (is8Bit())
879	return charactersToInt(characters8(), m_length, ok);
880	return charactersToInt(characters16(), m_length, ok);
881	}
882
883	unsigned StringImpl::toUInt(bool* ok)
884	{
885	if (is8Bit())
886	return charactersToUInt(characters8(), m_length, ok);
887	return charactersToUInt(characters16(), m_length, ok);
888	}
889
890	int64_t StringImpl::toInt64(bool* ok)
891	{
892	if (is8Bit())
893	return charactersToInt64(characters8(), m_length, ok);
894	return charactersToInt64(characters16(), m_length, ok);
895	}
896
897	uint64_t StringImpl::toUInt64(bool* ok)
898	{
899	if (is8Bit())
900	return charactersToUInt64(characters8(), m_length, ok);
901	return charactersToUInt64(characters16(), m_length, ok);
902	}
903
904	intptr_t StringImpl::toIntPtr(bool* ok)
905	{
906	if (is8Bit())
907	return charactersToIntPtr(characters8(), m_length, ok);
908	return charactersToIntPtr(characters16(), m_length, ok);
909	}
910
911	double StringImpl::toDouble(bool* ok)
912	{
913	if (is8Bit())
914	return charactersToDouble(characters8(), m_length, ok);
915	return charactersToDouble(characters16(), m_length, ok);
916	}
917
918	float StringImpl::toFloat(bool* ok)
919	{
920	if (is8Bit())
921	return charactersToFloat(characters8(), m_length, ok);
922	return charactersToFloat(characters16(), m_length, ok);
923	}
924
925	size_t StringImpl::find(CodeUnitMatchFunction matchFunction, unsigned start)
926	{
927	if (is8Bit())
928	return WTF::find(characters8(), m_length, matchFunction, start);
929	return WTF::find(characters16(), m_length, matchFunction, start);
930	}
931
932	size_t StringImpl::find(const LChar* matchString, unsigned index)
933	{
934	// Check for null or empty string to match against
935	if (!matchString)
936	return notFound;
937	size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
938	if (matchStringLength > MaxLength)
939	CRASH();
940	unsigned matchLength = matchStringLength;
941	if (!matchLength)
942	return std::min(index, length());
943
944	// Optimization 1: fast case for strings of length 1.
945	if (matchLength == `1`) {
946	if (is8Bit())
947	return WTF::find(characters8(), length(), matchString[`0`], index);
948	return WTF::find(characters16(), length(), *matchString, index);
949	}
950
951	// Check index & matchLength are in range.
952	if (index > length())
953	return notFound;
954	unsigned searchLength = length() - index;
955	if (matchLength > searchLength)
956	return notFound;
957	// delta is the number of additional times to test; delta == 0 means test only once.
958	unsigned delta = searchLength - matchLength;
959
960	// Optimization 2: keep a running hash of the strings,
961	// only call equal if the hashes match.
962
963	if (is8Bit()) {
964	const LChar* searchCharacters = characters8() + index;
965
966	unsigned searchHash = `0`;
967	unsigned matchHash = `0`;
968	for (unsigned i = `0`; i < matchLength; ++i) {
969	searchHash += searchCharacters[i];
970	matchHash += matchString[i];
971	}
972
973	unsigned i = `0`;
974	while (searchHash != matchHash \|\| !equal(searchCharacters + i, matchString, matchLength)) {
975	if (i == delta)
976	return notFound;
977	searchHash += searchCharacters[i + matchLength];
978	searchHash -= searchCharacters[i];
979	++i;
980	}
981	return index + i;
982	}
983
984	const UChar* searchCharacters = characters16() + index;
985
986	unsigned searchHash = `0`;
987	unsigned matchHash = `0`;
988	for (unsigned i = `0`; i < matchLength; ++i) {
989	searchHash += searchCharacters[i];
990	matchHash += matchString[i];
991	}
992
993	unsigned i = `0`;
994	while (searchHash != matchHash \|\| !equal(searchCharacters + i, matchString, matchLength)) {
995	if (i == delta)
996	return notFound;
997	searchHash += searchCharacters[i + matchLength];
998	searchHash -= searchCharacters[i];
999	++i;
1000	}
1001	return index + i;
1002	}
1003
1004	size_t StringImpl::find(StringImpl* matchString)
1005	{
1006	// Check for null string to match against
1007	if (UNLIKELY(!matchString))
1008	return notFound;
1009	unsigned matchLength = matchString->length();
1010
1011	// Optimization 1: fast case for strings of length 1.
1012	if (matchLength == `1`) {
1013	if (is8Bit()) {
1014	if (matchString->is8Bit())
1015	return WTF::find(characters8(), length(), matchString->characters8()[`0`]);
1016	return WTF::find(characters8(), length(), matchString->characters16()[`0`]);
1017	}
1018	if (matchString->is8Bit())
1019	return WTF::find(characters16(), length(), matchString->characters8()[`0`]);
1020	return WTF::find(characters16(), length(), matchString->characters16()[`0`]);
1021	}
1022
1023	// Check matchLength is in range.
1024	if (matchLength > length())
1025	return notFound;
1026
1027	// Check for empty string to match against
1028	if (UNLIKELY(!matchLength))
1029	return `0`;
1030
1031	if (is8Bit()) {
1032	if (matchString->is8Bit())
1033	return findInner(characters8(), matchString->characters8(), `0`, length(), matchLength);
1034	return findInner(characters8(), matchString->characters16(), `0`, length(), matchLength);
1035	}
1036
1037	if (matchString->is8Bit())
1038	return findInner(characters16(), matchString->characters8(), `0`, length(), matchLength);
1039
1040	return findInner(characters16(), matchString->characters16(), `0`, length(), matchLength);
1041	}
1042
1043	size_t StringImpl::find(StringImpl* matchString, unsigned index)
1044	{
1045	// Check for null or empty string to match against
1046	if (UNLIKELY(!matchString))
1047	return notFound;
1048
1049	return findCommon(*this, *matchString, index);
1050	}
1051
1052	size_t StringImpl::findIgnoringASCIICase(const StringImpl& matchString) const
1053	{
1054	return ::WTF::findIgnoringASCIICase(*this, matchString, `0`);
1055	}
1056
1057	size_t StringImpl::findIgnoringASCIICase(const StringImpl& matchString, unsigned startOffset) const
1058	{
1059	return ::WTF::findIgnoringASCIICase(*this, matchString, startOffset);
1060	}
1061
1062	size_t StringImpl::findIgnoringASCIICase(const StringImpl* matchString) const
1063	{
1064	if (!matchString)
1065	return notFound;
1066	return ::WTF::findIgnoringASCIICase(*this, *matchString, `0`);
1067	}
1068
1069	size_t StringImpl::findIgnoringASCIICase(const StringImpl* matchString, unsigned startOffset) const
1070	{
1071	if (!matchString)
1072	return notFound;
1073	return ::WTF::findIgnoringASCIICase(*this, *matchString, startOffset);
1074	}
1075
1076	size_t StringImpl::reverseFind(UChar character, unsigned index)
1077	{
1078	if (is8Bit())
1079	return WTF::reverseFind(characters8(), m_length, character, index);
1080	return WTF::reverseFind(characters16(), m_length, character, index);
1081	}
1082
1083	template <typename SearchCharacterType, typename MatchCharacterType>
1084	ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1085	{
1086	// Optimization: keep a running hash of the strings,
1087	// only call equal if the hashes match.
1088
1089	// delta is the number of additional times to test; delta == 0 means test only once.
1090	unsigned delta = std::min(index, length - matchLength);
1091
1092	unsigned searchHash = `0`;
1093	unsigned matchHash = `0`;
1094	for (unsigned i = `0`; i < matchLength; ++i) {
1095	searchHash += searchCharacters[delta + i];
1096	matchHash += matchCharacters[i];
1097	}
1098
1099	// keep looping until we match
1100	while (searchHash != matchHash \|\| !equal(searchCharacters + delta, matchCharacters, matchLength)) {
1101	if (!delta)
1102	return notFound;
1103	--delta;
1104	searchHash -= searchCharacters[delta + matchLength];
1105	searchHash += searchCharacters[delta];
1106	}
1107	return delta;
1108	}
1109
1110	size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
1111	{
1112	// Check for null or empty string to match against
1113	if (!matchString)
1114	return notFound;
1115	unsigned matchLength = matchString->length();
1116	unsigned ourLength = length();
1117	if (!matchLength)
1118	return std::min(index, ourLength);
1119
1120	// Optimization 1: fast case for strings of length 1.
1121	if (matchLength == `1`) {
1122	if (is8Bit())
1123	return WTF::reverseFind(characters8(), ourLength, (*matchString)[`0`], index);
1124	return WTF::reverseFind(characters16(), ourLength, (*matchString)[`0`], index);
1125	}
1126
1127	// Check index & matchLength are in range.
1128	if (matchLength > ourLength)
1129	return notFound;
1130
1131	if (is8Bit()) {
1132	if (matchString->is8Bit())
1133	return reverseFindInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
1134	return reverseFindInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
1135	}
1136
1137	if (matchString->is8Bit())
1138	return reverseFindInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
1139
1140	return reverseFindInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1141	}
1142
1143	ALWAYS_INLINE static bool equalInner(const StringImpl& string, unsigned startOffset, const char* matchString, unsigned matchLength)
1144	{
1145	ASSERT(matchLength <= string.length());
1146	ASSERT(startOffset + matchLength <= string.length());
1147
1148	if (string.is8Bit())
1149	return equal(string.characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1150	return equal(string.characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1151	}
1152
1153	ALWAYS_INLINE static bool equalInner(const StringImpl& string, unsigned startOffset, const StringImpl& matchString)
1154	{
1155	if (startOffset > string.length())
1156	return false;
1157	if (matchString.length() > string.length())
1158	return false;
1159	if (matchString.length() + startOffset > string.length())
1160	return false;
1161
1162	if (string.is8Bit()) {
1163	if (matchString.is8Bit())
1164	return equal(string.characters8() + startOffset, matchString.characters8(), matchString.length());
1165	return equal(string.characters8() + startOffset, matchString.characters16(), matchString.length());
1166	}
1167	if (matchString.is8Bit())
1168	return equal(string.characters16() + startOffset, matchString.characters8(), matchString.length());
1169	return equal(string.characters16() + startOffset, matchString.characters16(), matchString.length());
1170	}
1171
1172	bool StringImpl::startsWith(const StringImpl* string) const
1173	{
1174	return string && ::WTF::startsWith(*this, *string);
1175	}
1176
1177	bool StringImpl::startsWith(const StringImpl& string) const
1178	{
1179	return ::WTF::startsWith(*this, string);
1180	}
1181
1182	bool StringImpl::startsWithIgnoringASCIICase(const StringImpl* prefix) const
1183	{
1184	return prefix && ::WTF::startsWithIgnoringASCIICase(*this, *prefix);
1185	}
1186
1187	bool StringImpl::startsWithIgnoringASCIICase(const StringImpl& prefix) const
1188	{
1189	return ::WTF::startsWithIgnoringASCIICase(*this, prefix);
1190	}
1191
1192	bool StringImpl::startsWith(UChar character) const
1193	{
1194	return m_length && (*this)[`0`] == character;
1195	}
1196
1197	bool StringImpl::startsWith(const char* matchString, unsigned matchLength) const
1198	{
1199	return matchLength <= length() && equalInner(*this, `0`, matchString, matchLength);
1200	}
1201
1202	bool StringImpl::hasInfixStartingAt(const StringImpl& matchString, unsigned startOffset) const
1203	{
1204	return equalInner(*this, startOffset, matchString);
1205	}
1206
1207	bool StringImpl::endsWith(StringImpl* suffix)
1208	{
1209	return suffix && ::WTF::endsWith(*this, *suffix);
1210	}
1211
1212	bool StringImpl::endsWith(StringImpl& suffix)
1213	{
1214	return ::WTF::endsWith(*this, suffix);
1215	}
1216
1217	bool StringImpl::endsWithIgnoringASCIICase(const StringImpl* suffix) const
1218	{
1219	return suffix && ::WTF::endsWithIgnoringASCIICase(*this, *suffix);
1220	}
1221
1222	bool StringImpl::endsWithIgnoringASCIICase(const StringImpl& suffix) const
1223	{
1224	return ::WTF::endsWithIgnoringASCIICase(*this, suffix);
1225	}
1226
1227	bool StringImpl::endsWith(UChar character) const
1228	{
1229	return m_length && (*this)[m_length - `1`] == character;
1230	}
1231
1232	bool StringImpl::endsWith(const char* matchString, unsigned matchLength) const
1233	{
1234	return matchLength <= length() && equalInner(*this, length() - matchLength, matchString, matchLength);
1235	}
1236
1237	bool StringImpl::hasInfixEndingAt(const StringImpl& matchString, unsigned endOffset) const
1238	{
1239	return endOffset >= matchString.length() && equalInner(*this, endOffset - matchString.length(), matchString);
1240	}
1241
1242	Ref<StringImpl> StringImpl::replace(UChar target, UChar replacement)
1243	{
1244	if (target == replacement)
1245	return *this;
1246	unsigned i;
1247	for (i = `0`; i != m_length; ++i) {
1248	UChar character = is8Bit() ? m_data8[i] : m_data16[i];
1249	if (character == target)
1250	break;
1251	}
1252	if (i == m_length)
1253	return *this;
1254
1255	if (is8Bit()) {
1256	if (!isLatin1(target)) {
1257	// Looking for a 16-bit character in an 8-bit string, so we're done.
1258	return *this;
1259	}
1260
1261	if (isLatin1(replacement)) {
1262	LChar* data;
1263	LChar oldChar = static_cast<LChar>(target);
1264	LChar newChar = static_cast<LChar>(replacement);
1265
1266	auto newImpl = createUninitializedInternalNonEmpty(m_length, data);
1267
1268	for (i = `0`; i != m_length; ++i) {
1269	LChar character = m_data8[i];
1270	if (character == oldChar)
1271	character = newChar;
1272	data[i] = character;
1273	}
1274	return newImpl;
1275	}
1276
1277	UChar* data;
1278	auto newImpl = createUninitializedInternalNonEmpty(m_length, data);
1279
1280	for (i = `0`; i != m_length; ++i) {
1281	UChar character = m_data8[i];
1282	if (character == target)
1283	character = replacement;
1284	data[i] = character;
1285	}
1286
1287	return newImpl;
1288	}
1289
1290	UChar* data;
1291	auto newImpl = createUninitializedInternalNonEmpty(m_length, data);
1292
1293	for (i = `0`; i != m_length; ++i) {
1294	UChar character = m_data16[i];
1295	if (character == target)
1296	character = replacement;
1297	data[i] = character;
1298	}
1299	return newImpl;
1300	}
1301
1302	Ref<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* string)
1303	{
1304	position = std::min(position, length());
1305	lengthToReplace = std::min(lengthToReplace, length() - position);
1306	unsigned lengthToInsert = string ? string->length() : `0`;
1307	if (!lengthToReplace && !lengthToInsert)
1308	return *this;
1309
1310	if ((length() - lengthToReplace) >= (MaxLength - lengthToInsert))
1311	CRASH();
1312
1313	if (is8Bit() && (!string \|\| string->is8Bit())) {
1314	LChar* data;
1315	auto newImpl = createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1316	copyCharacters(data, m_data8, position);
1317	if (string)
1318	copyCharacters(data + position, string->m_data8, lengthToInsert);
1319	copyCharacters(data + position + lengthToInsert, m_data8 + position + lengthToReplace, length() - position - lengthToReplace);
1320	return newImpl;
1321	}
1322	UChar* data;
1323	auto newImpl = createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1324	if (is8Bit())
1325	copyCharacters(data, m_data8, position);
1326	else
1327	copyCharacters(data, m_data16, position);
1328	if (string) {
1329	if (string->is8Bit())
1330	copyCharacters(data + position, string->m_data8, lengthToInsert);
1331	else
1332	copyCharacters(data + position, string->m_data16, lengthToInsert);
1333	}
1334	if (is8Bit())
1335	copyCharacters(data + position + lengthToInsert, m_data8 + position + lengthToReplace, length() - position - lengthToReplace);
1336	else
1337	copyCharacters(data + position + lengthToInsert, m_data16 + position + lengthToReplace, length() - position - lengthToReplace);
1338	return newImpl;
1339	}
1340
1341	Ref<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
1342	{
1343	if (!replacement)
1344	return *this;
1345	if (replacement->is8Bit())
1346	return replace(pattern, replacement->m_data8, replacement->length());
1347	return replace(pattern, replacement->m_data16, replacement->length());
1348	}
1349
1350	Ref<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement, unsigned repStrLength)
1351	{
1352	ASSERT(replacement);
1353
1354	size_t srcSegmentStart = `0`;
1355	unsigned matchCount = `0`;
1356
1357	// Count the matches.
1358	while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
1359	++matchCount;
1360	++srcSegmentStart;
1361	}
1362
1363	// If we have 0 matches then we don't have to do any more work.
1364	if (!matchCount)
1365	return *this;
1366
1367	if (repStrLength && matchCount > MaxLength / repStrLength)
1368	CRASH();
1369
1370	unsigned replaceSize = matchCount * repStrLength;
1371	unsigned newSize = m_length - matchCount;
1372	if (newSize >= (MaxLength - replaceSize))
1373	CRASH();
1374
1375	newSize += replaceSize;
1376
1377	// Construct the new data.
1378	size_t srcSegmentEnd;
1379	unsigned srcSegmentLength;
1380	srcSegmentStart = `0`;
1381	unsigned dstOffset = `0`;
1382
1383	if (is8Bit()) {
1384	LChar* data;
1385	auto newImpl = createUninitialized(newSize, data);
1386
1387	while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
1388	srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1389	copyCharacters(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength);
1390	dstOffset += srcSegmentLength;
1391	copyCharacters(data + dstOffset, replacement, repStrLength);
1392	dstOffset += repStrLength;
1393	srcSegmentStart = srcSegmentEnd + `1`;
1394	}
1395
1396	srcSegmentLength = m_length - srcSegmentStart;
1397	copyCharacters(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength);
1398
1399	ASSERT(dstOffset + srcSegmentLength == newImpl.get().length());
1400
1401	return newImpl;
1402	}
1403
1404	UChar* data;
1405	auto newImpl = createUninitialized(newSize, data);
1406
1407	while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
1408	srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1409	copyCharacters(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength);
1410
1411	dstOffset += srcSegmentLength;
1412	copyCharacters(data + dstOffset, replacement, repStrLength);
1413
1414	dstOffset += repStrLength;
1415	srcSegmentStart = srcSegmentEnd + `1`;
1416	}
1417
1418	srcSegmentLength = m_length - srcSegmentStart;
1419	copyCharacters(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength);
1420
1421	ASSERT(dstOffset + srcSegmentLength == newImpl.get().length());
1422
1423	return newImpl;
1424	}
1425
1426	Ref<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement, unsigned repStrLength)
1427	{
1428	ASSERT(replacement);
1429
1430	size_t srcSegmentStart = `0`;
1431	unsigned matchCount = `0`;
1432
1433	// Count the matches.
1434	while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
1435	++matchCount;
1436	++srcSegmentStart;
1437	}
1438
1439	// If we have 0 matches then we don't have to do any more work.
1440	if (!matchCount)
1441	return *this;
1442
1443	if (repStrLength && matchCount > MaxLength / repStrLength)
1444	CRASH();
1445
1446	unsigned replaceSize = matchCount * repStrLength;
1447	unsigned newSize = m_length - matchCount;
1448	if (newSize >= (MaxLength - replaceSize))
1449	CRASH();
1450
1451	newSize += replaceSize;
1452
1453	// Construct the new data.
1454	size_t srcSegmentEnd;
1455	unsigned srcSegmentLength;
1456	srcSegmentStart = `0`;
1457	unsigned dstOffset = `0`;
1458
1459	if (is8Bit()) {
1460	UChar* data;
1461	auto newImpl = createUninitialized(newSize, data);
1462
1463	while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
1464	srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1465	copyCharacters(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength);
1466
1467	dstOffset += srcSegmentLength;
1468	copyCharacters(data + dstOffset, replacement, repStrLength);
1469
1470	dstOffset += repStrLength;
1471	srcSegmentStart = srcSegmentEnd + `1`;
1472	}
1473
1474	srcSegmentLength = m_length - srcSegmentStart;
1475	copyCharacters(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength);
1476
1477	ASSERT(dstOffset + srcSegmentLength == newImpl.get().length());
1478
1479	return newImpl;
1480	}
1481
1482	UChar* data;
1483	auto newImpl = createUninitialized(newSize, data);
1484
1485	while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
1486	srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1487	copyCharacters(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength);
1488
1489	dstOffset += srcSegmentLength;
1490	copyCharacters(data + dstOffset, replacement, repStrLength);
1491
1492	dstOffset += repStrLength;
1493	srcSegmentStart = srcSegmentEnd + `1`;
1494	}
1495
1496	srcSegmentLength = m_length - srcSegmentStart;
1497	copyCharacters(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength);
1498
1499	ASSERT(dstOffset + srcSegmentLength == newImpl.get().length());
1500
1501	return newImpl;
1502	}
1503
1504	Ref<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
1505	{
1506	if (!pattern \|\| !replacement)
1507	return *this;
1508
1509	unsigned patternLength = pattern->length();
1510	if (!patternLength)
1511	return *this;
1512
1513	unsigned repStrLength = replacement->length();
1514	size_t srcSegmentStart = `0`;
1515	unsigned matchCount = `0`;
1516
1517	// Count the matches.
1518	while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
1519	++matchCount;
1520	srcSegmentStart += patternLength;
1521	}
1522
1523	// If we have 0 matches, we don't have to do any more work
1524	if (!matchCount)
1525	return *this;
1526
1527	unsigned newSize = m_length - matchCount * patternLength;
1528	if (repStrLength && matchCount > MaxLength / repStrLength)
1529	CRASH();
1530
1531	if (newSize > (MaxLength - matchCount * repStrLength))
1532	CRASH();
1533
1534	newSize += matchCount * repStrLength;
1535
1536
1537	// Construct the new data
1538	size_t srcSegmentEnd;
1539	unsigned srcSegmentLength;
1540	srcSegmentStart = `0`;
1541	unsigned dstOffset = `0`;
1542	bool srcIs8Bit = is8Bit();
1543	bool replacementIs8Bit = replacement->is8Bit();
1544
1545	// There are 4 cases:
1546	// 1. This and replacement are both 8 bit.
1547	// 2. This and replacement are both 16 bit.
1548	// 3. This is 8 bit and replacement is 16 bit.
1549	// 4. This is 16 bit and replacement is 8 bit.
1550	if (srcIs8Bit && replacementIs8Bit) {
1551	// Case 1
1552	LChar* data;
1553	auto newImpl = createUninitialized(newSize, data);
1554	while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
1555	srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1556	copyCharacters(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength);
1557	dstOffset += srcSegmentLength;
1558	copyCharacters(data + dstOffset, replacement->m_data8, repStrLength);
1559	dstOffset += repStrLength;
1560	srcSegmentStart = srcSegmentEnd + patternLength;
1561	}
1562
1563	srcSegmentLength = m_length - srcSegmentStart;
1564	copyCharacters(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength);
1565
1566	ASSERT(dstOffset + srcSegmentLength == newImpl.get().length());
1567
1568	return newImpl;
1569	}
1570
1571	UChar* data;
1572	auto newImpl = createUninitialized(newSize, data);
1573	while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
1574	srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1575	if (srcIs8Bit) {
1576	// Case 3.
1577	copyCharacters(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength);
1578	} else {
1579	// Case 2 & 4.
1580	copyCharacters(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength);
1581	}
1582	dstOffset += srcSegmentLength;
1583	if (replacementIs8Bit) {
1584	// Cases 2 & 3.
1585	copyCharacters(data + dstOffset, replacement->m_data8, repStrLength);
1586	} else {
1587	// Case 4
1588	copyCharacters(data + dstOffset, replacement->m_data16, repStrLength);
1589	}
1590	dstOffset += repStrLength;
1591	srcSegmentStart = srcSegmentEnd + patternLength;
1592	}
1593
1594	srcSegmentLength = m_length - srcSegmentStart;
1595	if (srcIs8Bit) {
1596	// Case 3.
1597	copyCharacters(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength);
1598	} else {
1599	// Cases 2 & 4.
1600	copyCharacters(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength);
1601	}
1602
1603	ASSERT(dstOffset + srcSegmentLength == newImpl.get().length());
1604
1605	return newImpl;
1606	}
1607
1608	bool equal(const StringImpl* a, const StringImpl* b)
1609	{
1610	return equalCommon(a, b);
1611	}
1612
1613	template<typename CharacterType> inline bool equalInternal(const StringImpl* a, const CharacterType* b, unsigned length)
1614	{
1615	if (!a)
1616	return !b;
1617	if (!b)
1618	return false;
1619
1620	if (a->length() != length)
1621	return false;
1622	if (a->is8Bit())
1623	return equal(a->characters8(), b, length);
1624	return equal(a->characters16(), b, length);
1625	}
1626
1627	bool equal(const StringImpl* a, const LChar* b, unsigned length)
1628	{
1629	return equalInternal(a, b, length);
1630	}
1631
1632	bool equal(const StringImpl* a, const UChar* b, unsigned length)
1633	{
1634	return equalInternal(a, b, length);
1635	}
1636
1637	bool equal(const StringImpl* a, const LChar* b)
1638	{
1639	if (!a)
1640	return !b;
1641	if (!b)
1642	return !a;
1643
1644	unsigned length = a->length();
1645
1646	if (a->is8Bit()) {
1647	const LChar* aPtr = a->characters8();
1648	for (unsigned i = `0`; i != length; ++i) {
1649	LChar bc = b[i];
1650	LChar ac = aPtr[i];
1651	if (!bc)
1652	return false;
1653	if (ac != bc)
1654	return false;
1655	}
1656
1657	return !b[length];
1658	}
1659
1660	const UChar* aPtr = a->characters16();
1661	for (unsigned i = `0`; i != length; ++i) {
1662	LChar bc = b[i];
1663	if (!bc)
1664	return false;
1665	if (aPtr[i] != bc)
1666	return false;
1667	}
1668
1669	return !b[length];
1670	}
1671
1672	bool equal(const StringImpl& a, const StringImpl& b)
1673	{
1674	return equalCommon(a, b);
1675	}
1676
1677	bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
1678	{
1679	if (!a && b && !b->length())
1680	return true;
1681	if (!b && a && !a->length())
1682	return true;
1683	return equal(a, b);
1684	}
1685
1686	bool equalIgnoringASCIICase(const StringImpl* a, const StringImpl* b)
1687	{
1688	return a == b \|\| (a && b && equalIgnoringASCIICase(a, b));
1689	}
1690
1691	bool equalIgnoringASCIICaseNonNull(const StringImpl* a, const StringImpl* b)
1692	{
1693	ASSERT(a);
1694	ASSERT(b);
1695	return equalIgnoringASCIICase(a, b);
1696	}
1697
1698	UCharDirection StringImpl::defaultWritingDirection(bool* hasStrongDirectionality)
1699	{
1700	for (unsigned i = `0`; i < m_length; ++i) {
1701	auto charDirection = u_charDirection(is8Bit() ? m_data8[i] : m_data16[i]);
1702	if (charDirection == U_LEFT_TO_RIGHT) {
1703	if (hasStrongDirectionality)
1704	hasStrongDirectionality = true*;
1705	return U_LEFT_TO_RIGHT;
1706	}
1707	if (charDirection == U_RIGHT_TO_LEFT \|\| charDirection == U_RIGHT_TO_LEFT_ARABIC) {
1708	if (hasStrongDirectionality)
1709	hasStrongDirectionality = true*;
1710	return U_RIGHT_TO_LEFT;
1711	}
1712	}
1713	if (hasStrongDirectionality)
1714	hasStrongDirectionality = false*;
1715	return U_LEFT_TO_RIGHT;
1716	}
1717
1718	Ref<StringImpl> StringImpl::adopt(StringBuffer<LChar>&& buffer)
1719	{
1720	unsigned length = buffer.length();
1721	if (!length)
1722	return *empty();
1723	return adoptRef(*new StringImpl (buffer.release(), length));
1724	}
1725
1726	Ref<StringImpl> StringImpl::adopt(StringBuffer<UChar>&& buffer)
1727	{
1728	unsigned length = buffer.length();
1729	if (!length)
1730	return *empty();
1731	return adoptRef(*new StringImpl (buffer.release(), length));
1732	}
1733
1734	size_t StringImpl::sizeInBytes() const
1735	{
1736	// FIXME: support substrings
1737	size_t size = length();
1738	if (!is8Bit())
1739	size *= `2`;
1740	return size + sizeof(*this);
1741	}
1742
1743	// Helper to write a three-byte UTF-8 code point into the buffer; caller must ensure room is available.
1744	static inline void putUTF8Triple(char*& buffer, UChar character)
1745	{
1746	ASSERT(character >= `0x0800`);
1747	buffer++ = static_cast<char*>(((character >> `12`) & `0x0F`) \| `0xE0`);
1748	buffer++ = static_cast<char*>(((character >> `6`) & `0x3F`) \| `0x80`);
1749	buffer++ = static_cast<char*>((character & `0x3F`) \| `0x80`);
1750	}
1751
1752	UTF8ConversionError StringImpl::utf8Impl(const UChar* characters, unsigned length, char*& buffer, size_t bufferSize, ConversionMode mode)
1753	{
1754	if (mode == StrictConversionReplacingUnpairedSurrogatesWithFFFD) {
1755	const UChar* charactersEnd = characters + length;
1756	char* bufferEnd = buffer + bufferSize;
1757	while (characters < charactersEnd) {
1758	// Use strict conversion to detect unpaired surrogates.
1759	auto result = convertUTF16ToUTF8(&characters, charactersEnd, &buffer, bufferEnd);
1760	ASSERT(result != TargetExhausted);
1761	// Conversion fails when there is an unpaired surrogate.
1762	// Put replacement character (U+FFFD) instead of the unpaired surrogate.
1763	if (result != ConversionOK) {
1764	ASSERT((`0xD800` <= characters && characters <= `0xDFFF`));
1765	// There should be room left, since one UChar hasn't been converted.
1766	ASSERT((buffer + `3`) <= bufferEnd);
1767	putUTF8Triple(buffer, replacementCharacter);
1768	++characters;
1769	}
1770	}
1771	} else {
1772	bool strict = mode == StrictConversion;
1773	const UChar* originalCharacters = characters;
1774	auto result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferSize, strict);
1775	ASSERT(result != TargetExhausted); // (length 3) should be sufficient for any conversion*
1776
1777	// Only produced from strict conversion.
1778	if (result == SourceIllegal) {
1779	ASSERT(strict);
1780	return UTF8ConversionError::IllegalSource;
1781	}
1782
1783	// Check for an unconverted high surrogate.
1784	if (result == SourceExhausted) {
1785	if (strict)
1786	return UTF8ConversionError::SourceExhausted;
1787	// This should be one unpaired high surrogate. Treat it the same
1788	// was as an unpaired high surrogate would have been handled in
1789	// the middle of a string with non-strict conversion - which is
1790	// to say, simply encode it to UTF-8.
1791	ASSERT_UNUSED(
1792	originalCharacters, (characters + `1`) == (originalCharacters + length));
1793	ASSERT((characters >= `0xD800`) && (characters <= `0xDBFF`));
1794	// There should be room left, since one UChar hasn't been converted.
1795	ASSERT((buffer + `3`) <= (buffer + bufferSize));
1796	putUTF8Triple(buffer, *characters);
1797	}
1798	}
1799
1800	return UTF8ConversionError::None;
1801	}
1802
1803	Expected<CString, UTF8ConversionError> StringImpl::utf8ForCharacters(const LChar* characters, unsigned length)
1804	{
1805	if (!length)
1806	return CString ("", `0`);
1807	if (length > MaxLength / `3`)
1808	return makeUnexpected(UTF8ConversionError::OutOfMemory);
1809	Vector<char, `1024`> bufferVector(length * `3`);
1810	char* buffer = bufferVector.data();
1811	const LChar* source = characters;
1812	bool success = convertLatin1ToUTF8(&source, source + length, &buffer, buffer + bufferVector.size());
1813	ASSERT_UNUSED(success, success); // (length 3) should be sufficient for any conversion*
1814	return CString (bufferVector.data(), buffer - bufferVector.data());
1815	}
1816
1817	Expected<CString, UTF8ConversionError> StringImpl::utf8ForCharacters(const UChar* characters, unsigned length, ConversionMode mode)
1818	{
1819	if (!length)
1820	return CString ("", `0`);
1821	if (length > MaxLength / `3`)
1822	return makeUnexpected(UTF8ConversionError::OutOfMemory);
1823	Vector<char, `1024`> bufferVector(length * `3`);
1824	char* buffer = bufferVector.data();
1825	UTF8ConversionError error = utf8Impl(characters, length, buffer, bufferVector.size(), mode);
1826	if (error != UTF8ConversionError::None)
1827	return makeUnexpected(error);
1828	return CString (bufferVector.data(), buffer - bufferVector.data());
1829	}
1830
1831	Expected<CString, UTF8ConversionError> StringImpl::tryGetUtf8ForRange(unsigned offset, unsigned length, ConversionMode mode) const
1832	{
1833	ASSERT(offset <= this->length());
1834	ASSERT(offset + length <= this->length());
1835
1836	if (!length)
1837	return CString ("", `0`);
1838
1839	// Allocate a buffer big enough to hold all the characters
1840	// (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
1841	// Optimization ideas, if we find this function is hot:
1842	// We could speculatively create a CStringBuffer to contain 'length'*
1843	// characters, and resize if necessary (i.e. if the buffer contains
1844	// non-ascii characters). (Alternatively, scan the buffer first for
1845	// ascii characters, so we know this will be sufficient).
1846	// We could allocate a CStringBuffer with an appropriate size to*
1847	// have a good chance of being able to write the string into the
1848	// buffer without reallocing (say, 1.5 x length).
1849	if (length > MaxLength / `3`)
1850	return makeUnexpected(UTF8ConversionError::OutOfMemory);
1851	Vector<char, `1024`> bufferVector(length * `3`);
1852
1853	char* buffer = bufferVector.data();
1854
1855	if (is8Bit()) {
1856	const LChar* characters = this->characters8() + offset;
1857	auto success = convertLatin1ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size());
1858	ASSERT_UNUSED(success, success); // (length 3) should be sufficient for any conversion*
1859	} else {
1860	UTF8ConversionError error = utf8Impl(this->characters16() + offset, length, buffer, bufferVector.size(), mode);
1861	if (error != UTF8ConversionError::None)
1862	return makeUnexpected(error);
1863	}
1864
1865	return CString (bufferVector.data(), buffer - bufferVector.data());
1866	}
1867
1868	Expected<CString, UTF8ConversionError> StringImpl::tryGetUtf8(ConversionMode mode) const
1869	{
1870	return tryGetUtf8ForRange(`0`, length(), mode);
1871	}
1872
1873	CString StringImpl::utf8(ConversionMode mode) const
1874	{
1875	auto expectedString = tryGetUtf8ForRange(`0`, length(), mode);
1876	RELEASE_ASSERT(expectedString);
1877	return expectedString.value();
1878	}
1879
1880	NEVER_INLINE unsigned StringImpl::hashSlowCase() const
1881	{
1882	if (is8Bit())
1883	setHash(StringHasher::computeHashAndMaskTop8Bits(m_data8, m_length));
1884	else
1885	setHash(StringHasher::computeHashAndMaskTop8Bits(m_data16, m_length));
1886	return existingHash();
1887	}
1888
1889	unsigned StringImpl::concurrentHash() const
1890	{
1891	unsigned hash;
1892	if (is8Bit())
1893	hash = StringHasher::computeHashAndMaskTop8Bits(m_data8, m_length);
1894	else
1895	hash = StringHasher::computeHashAndMaskTop8Bits(m_data16, m_length);
1896	ASSERT(((hash << s_flagCount) >> s_flagCount) == hash);
1897	return hash;
1898	}
1899
1900	bool equalIgnoringNullity(const UChar* a, size_t aLength, StringImpl* b)
1901	{
1902	if (!b)
1903	return !aLength;
1904	if (aLength != b->length())
1905	return false;
1906	if (b->is8Bit()) {
1907	const LChar* bCharacters = b->characters8();
1908	for (unsigned i = `0`; i < aLength; ++i) {
1909	if (a[i] != bCharacters[i])
1910	return false;
1911	}
1912	return true;
1913	}
1914	return !memcmp(a, b->characters16(), b->length() * sizeof(UChar));
1915	}
1916
1917	} // namespace WTF
1918

Browse the source code of webkit/Source/WTF/wtf/text/StringImpl.cpp