SurrogatePairAwareTextIterator.cpp source code [webkit/Source/WebCore/platform/graphics/SurrogatePairAwareTextIterator.cpp]

1	/*
2	* Copyright (C) 2003-2019 Apple Inc. All rights reserved.
3	* Copyright (C) 2008 Holger Hans Peter Freyther
4	* Copyright (C) Research In Motion Limited 2011. All rights reserved.
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Library General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Library General Public License for more details.
15	*
16	* You should have received a copy of the GNU Library General Public License
17	* along with this library; see the file COPYING.LIB. If not, write to
18	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19	* Boston, MA 02110-1301, USA.
20	*
21	*/
22
23	#include "config.h"
24	#include "SurrogatePairAwareTextIterator.h"
25
26	#include <unicode/unorm2.h>
27
28	namespace WebCore {
29
30	SurrogatePairAwareTextIterator::SurrogatePairAwareTextIterator(const UChar* characters, unsigned currentIndex, unsigned lastIndex, unsigned endIndex)
31	: m_characters(characters)
32	, m_currentIndex(currentIndex)
33	, m_lastIndex(lastIndex)
34	, m_endIndex(endIndex)
35	{
36	}
37
38	bool SurrogatePairAwareTextIterator::consumeSlowCase(UChar32& character, unsigned& clusterLength)
39	{
40	if (character <= `0x30FE`) {
41	// Deal with Hiragana and Katakana voiced and semi-voiced syllables.
42	// Normalize into composed form, and then look for glyph with base + combined mark.
43	// Check above for character range to minimize performance impact.
44	if (UChar32 normalized = normalizeVoicingMarks()) {
45	character = normalized;
46	clusterLength = `2`;
47	}
48	return true;
49	}
50
51	if (!U16_IS_SURROGATE(character))
52	return true;
53
54	// If we have a surrogate pair, make sure it starts with the high part.
55	if (!U16_IS_SURROGATE_LEAD(character))
56	return false;
57
58	// Do we have a surrogate pair? If so, determine the full Unicode (32 bit) code point before glyph lookup.
59	// Make sure we have another character and it's a low surrogate.
60	if (m_currentIndex + `1` >= m_endIndex)
61	return false;
62
63	UChar low = m_characters[`1`];
64	if (!U16_IS_TRAIL(low))
65	return false;
66
67	character = U16_GET_SUPPLEMENTARY(character, low);
68	clusterLength = `2`;
69	return true;
70	}
71
72	UChar32 SurrogatePairAwareTextIterator::normalizeVoicingMarks()
73	{
74	// According to http://www.unicode.org/Public/UNIDATA/UCD.html#Canonical_Combining_Class_Values
75	static constexpr uint8_t hiraganaKatakanaVoicingMarksCombiningClass = `8`;
76
77	if (m_currentIndex + `1` >= m_endIndex)
78	return `0`;
79
80	if (u_getCombiningClass(m_characters[`1`]) == hiraganaKatakanaVoicingMarksCombiningClass) {
81	UErrorCode status = U_ZERO_ERROR;
82	const UNormalizer2* normalizer = unorm2_getNFCInstance(&status);
83	ASSERT(U_SUCCESS(status));
84	auto composedCharacter = unorm2_composePair(normalizer, m_characters[`0`], m_characters[`1`]);
85	if (composedCharacter > `0`)
86	return composedCharacter;
87	}
88
89	return `0`;
90	}
91
92	}
93

Browse the source code of webkit/Source/WebCore/platform/graphics/SurrogatePairAwareTextIterator.cpp