StringView.cpp source code [webkit/Source/WTF/wtf/text/StringView.cpp]

1	/*
2
3	Copyright (C) 2014-2019 Apple Inc. All rights reserved.
4
5	Redistribution and use in source and binary forms, with or without
6	modification, are permitted provided that the following conditions
7	are met:
8	1. Redistributions of source code must retain the above copyright
9	notice, this list of conditions and the following disclaimer.
10	2. Redistributions in binary form must reproduce the above copyright
11	notice, this list of conditions and the following disclaimer in the
12	documentation and/or other materials provided with the distribution.
13
14	THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
15	EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16	WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17	DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
18	DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19	(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20	LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21	ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24
25	*/
26
27	#include "config.h"
28	#include <wtf/text/StringView.h>
29
30	#include <mutex>
31	#include <unicode/ubrk.h>
32	#include <unicode/unorm2.h>
33	#include <wtf/HashMap.h>
34	#include <wtf/Lock.h>
35	#include <wtf/NeverDestroyed.h>
36	#include <wtf/Optional.h>
37	#include <wtf/text/TextBreakIterator.h>
38
39	namespace WTF {
40
41	bool StringView::containsIgnoringASCIICase(const StringView& matchString) const
42	{
43	return findIgnoringASCIICase(matchString) != notFound;
44	}
45
46	bool StringView::containsIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const
47	{
48	return findIgnoringASCIICase(matchString, startOffset) != notFound;
49	}
50
51	size_t StringView::findIgnoringASCIICase(const StringView& matchString) const
52	{
53	return ::WTF::findIgnoringASCIICase(*this, matchString, `0`);
54	}
55
56	size_t StringView::findIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const
57	{
58	return ::WTF::findIgnoringASCIICase(*this, matchString, startOffset);
59	}
60
61	bool StringView::startsWith(const StringView& prefix) const
62	{
63	return ::WTF::startsWith(*this, prefix);
64	}
65
66	bool StringView::startsWithIgnoringASCIICase(const StringView& prefix) const
67	{
68	return ::WTF::startsWithIgnoringASCIICase(*this, prefix);
69	}
70
71	bool StringView::endsWith(const StringView& suffix) const
72	{
73	return ::WTF::endsWith(*this, suffix);
74	}
75
76	bool StringView::endsWithIgnoringASCIICase(const StringView& suffix) const
77	{
78	return ::WTF::endsWithIgnoringASCIICase(*this, suffix);
79	}
80
81	Expected<CString, UTF8ConversionError> StringView::tryGetUtf8(ConversionMode mode) const
82	{
83	if (isNull())
84	return CString ("", `0`);
85	if (is8Bit())
86	return StringImpl::utf8ForCharacters(characters8(), length());
87	return StringImpl::utf8ForCharacters(characters16(), length(), mode);
88	}
89
90	CString StringView::utf8(ConversionMode mode) const
91	{
92	auto expectedString = tryGetUtf8(mode);
93	RELEASE_ASSERT(expectedString);
94	return expectedString.value();
95	}
96
97	size_t StringView::find(StringView matchString, unsigned start) const
98	{
99	return findCommon(*this, matchString, start);
100	}
101
102	void StringView::SplitResult::Iterator::findNextSubstring()
103	{
104	for (size_t separatorPosition; (separatorPosition = m_result.m_string.find(m_result.m_separator, m_position)) != notFound; ++m_position) {
105	if (m_result.m_allowEmptyEntries \|\| separatorPosition > m_position) {
106	m_length = separatorPosition - m_position;
107	return;
108	}
109	}
110	m_length = m_result.m_string.length() - m_position;
111	if (!m_length && !m_result.m_allowEmptyEntries)
112	m_isDone = true;
113	}
114
115	auto StringView::SplitResult::Iterator::operator++() -> Iterator&
116	{
117	ASSERT(m_position <= m_result.m_string.length() && !m_isDone);
118	m_position += m_length;
119	if (m_position < m_result.m_string.length()) {
120	++m_position;
121	findNextSubstring();
122	} else if (!m_isDone)
123	m_isDone = true;
124	return *this;
125	}
126
127	class StringView::GraphemeClusters::Iterator::Impl {
128	WTF_MAKE_FAST_ALLOCATED;
129	public:
130	Impl(const StringView& stringView, Optional<NonSharedCharacterBreakIterator>&& iterator, unsigned index)
131	: m_stringView(stringView)
132	, m_iterator (WTFMove(iterator))
133	, m_index(index)
134	, m_indexEnd(computeIndexEnd())
135	{
136	}
137
138	void operator++()
139	{
140	ASSERT(m_indexEnd > m_index);
141	m_index = m_indexEnd;
142	m_indexEnd = computeIndexEnd();
143	}
144
145	StringView operator() const*
146	{
147	if (m_stringView.is8Bit())
148	return StringView (m_stringView.characters8() + m_index, m_indexEnd - m_index);
149	return StringView (m_stringView.characters16() + m_index, m_indexEnd - m_index);
150	}
151
152	bool operator==(const Impl& other) const
153	{
154	ASSERT(&m_stringView == &other.m_stringView);
155	auto result = m_index == other.m_index;
156	ASSERT(!result \|\| m_indexEnd == other.m_indexEnd);
157	return result;
158	}
159
160	unsigned computeIndexEnd()
161	{
162	if (!m_iterator)
163	return `0`;
164	if (m_index == m_stringView.length())
165	return m_index;
166	return ubrk_following(m_iterator.value(), m_index);
167	}
168
169	private:
170	const StringView& m_stringView;
171	Optional<NonSharedCharacterBreakIterator> m_iterator;
172	unsigned m_index;
173	unsigned m_indexEnd;
174	};
175
176	StringView::GraphemeClusters::Iterator::Iterator(const StringView& stringView, unsigned index)
177	: m_impl(std::make_unique<Impl>(stringView, stringView.isNull() ? WTF::nullopt : Optional<NonSharedCharacterBreakIterator>(NonSharedCharacterBreakIterator (stringView)), index))
178	{
179	}
180
181	StringView::GraphemeClusters::Iterator::~Iterator()
182	{
183	}
184
185	StringView::GraphemeClusters::Iterator::Iterator(Iterator&& other)
186	: m_impl (WTFMove(other.m_impl))
187	{
188	}
189
190	auto StringView::GraphemeClusters::Iterator::operator++() -> Iterator&
191	{
192	++(*m_impl);
193	return *this;
194	}
195
196	StringView StringView::GraphemeClusters::Iterator::operator() const*
197	{
198	return **m_impl;
199	}
200
201	bool StringView::GraphemeClusters::Iterator::operator==(const Iterator& other) const
202	{
203	return m_impl == (other.m_impl);
204	}
205
206	bool StringView::GraphemeClusters::Iterator::operator!=(const Iterator& other) const
207	{
208	return !(*this == other);
209	}
210
211	enum class ASCIICase { Lower, Upper };
212
213	template<ASCIICase type, typename CharacterType>
214	String convertASCIICase(const CharacterType* input, unsigned length)
215	{
216	if (!input)
217	return { };
218
219	CharacterType* characters;
220	auto result = String::createUninitialized(length, characters);
221	for (unsigned i = `0`; i < length; ++i)
222	characters[i] = type == ASCIICase::Lower ? toASCIILower(input[i]) : toASCIIUpper(input[i]);
223	return result;
224	}
225
226	String StringView::convertToASCIILowercase() const
227	{
228	if (m_is8Bit)
229	return convertASCIICase<ASCIICase::Lower>(static_cast<const LChar*>(m_characters), m_length);
230	return convertASCIICase<ASCIICase::Lower>(static_cast<const UChar*>(m_characters), m_length);
231	}
232
233	String StringView::convertToASCIIUppercase() const
234	{
235	if (m_is8Bit)
236	return convertASCIICase<ASCIICase::Upper>(static_cast<const LChar*>(m_characters), m_length);
237	return convertASCIICase<ASCIICase::Upper>(static_cast<const UChar*>(m_characters), m_length);
238	}
239
240	StringViewWithUnderlyingString normalizedNFC(StringView string)
241	{
242	// Latin-1 characters are unaffected by normalization.
243	if (string.is8Bit())
244	return { string, { } };
245
246	UErrorCode status = U_ZERO_ERROR;
247	const UNormalizer2* normalizer = unorm2_getNFCInstance(&status);
248	ASSERT(U_SUCCESS(status));
249
250	// No need to normalize if already normalized.
251	UBool checkResult = unorm2_isNormalized(normalizer, string.characters16(), string.length(), &status);
252	if (checkResult)
253	return { string, { } };
254
255	unsigned normalizedLength = unorm2_normalize(normalizer, string.characters16(), string.length(), nullptr, `0`, &status);
256	ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
257
258	UChar* characters;
259	String result = String::createUninitialized(normalizedLength, characters);
260
261	status = U_ZERO_ERROR;
262	unorm2_normalize(normalizer, string.characters16(), string.length(), characters, normalizedLength, &status);
263	ASSERT(U_SUCCESS(status));
264
265	StringView view { result };
266	return { view, WTFMove(result) };
267	}
268
269	String normalizedNFC(const String& string)
270	{
271	auto result = normalizedNFC(StringView { string });
272	if (result.underlyingString.isNull())
273	return string;
274	return result.underlyingString;
275	}
276
277	#if CHECK_STRINGVIEW_LIFETIME
278
279	// Manage reference count manually so UnderlyingString does not need to be defined in the header.
280
281	struct StringView::UnderlyingString {
282	std::atomic_uint refCount { `1u` };
283	bool isValid { true };
284	const StringImpl& string;
285	explicit UnderlyingString(const StringImpl&);
286	};
287
288	StringView::UnderlyingString::UnderlyingString(const StringImpl& string)
289	: string(string)
290	{
291	}
292
293	static Lock underlyingStringsMutex;
294
295	static HashMap<const StringImpl, StringView::UnderlyingString>& underlyingStrings()
296	{
297	static NeverDestroyed<HashMap<const StringImpl, StringView::UnderlyingString>> map;
298	return map;
299	}
300
301	void StringView::invalidate(const StringImpl& stringToBeDestroyed)
302	{
303	UnderlyingString* underlyingString;
304	{
305	std::lock_guard<Lock> lock(underlyingStringsMutex);
306	underlyingString = underlyingStrings().take(&stringToBeDestroyed);
307	if (!underlyingString)
308	return;
309	}
310	ASSERT(underlyingString->isValid);
311	underlyingString->isValid = false;
312	}
313
314	bool StringView::underlyingStringIsValid() const
315	{
316	return !m_underlyingString \|\| m_underlyingString->isValid;
317	}
318
319	void StringView::adoptUnderlyingString(UnderlyingString* underlyingString)
320	{
321	if (m_underlyingString) {
322	std::lock_guard<Lock> lock(underlyingStringsMutex);
323	if (!--m_underlyingString->refCount) {
324	if (m_underlyingString->isValid) {
325	underlyingStrings().remove(&m_underlyingString->string);
326	}
327	delete m_underlyingString;
328	}
329	}
330	m_underlyingString = underlyingString;
331	}
332
333	void StringView::setUnderlyingString(const StringImpl* string)
334	{
335	UnderlyingString* underlyingString;
336	if (!string)
337	underlyingString = nullptr;
338	else {
339	std::lock_guard<Lock> lock(underlyingStringsMutex);
340	auto result = underlyingStrings().add(string, nullptr);
341	if (result.isNewEntry)
342	result.iterator ->value = new UnderlyingString (*string);
343	else
344	++result.iterator ->value->refCount;
345	underlyingString = result.iterator ->value;
346	}
347	adoptUnderlyingString(underlyingString);
348	}
349
350	void StringView::setUnderlyingString(const StringView& otherString)
351	{
352	UnderlyingString* underlyingString = otherString.m_underlyingString;
353	if (underlyingString)
354	++underlyingString->refCount;
355	adoptUnderlyingString(underlyingString);
356	}
357
358	#endif // CHECK_STRINGVIEW_LIFETIME
359
360	} // namespace WTF
361

Browse the source code of webkit/Source/WTF/wtf/text/StringView.cpp