BreakLines.h source code [webkit/Source/WebCore/rendering/BreakLines.h]

1	/*
2	* Copyright (C) 2005, 2007, 2010, 2013, 2016 Apple Inc. All rights reserved.
3	* Copyright (C) 2011 Google Inc. All rights reserved.
4	*
5	* This library is free software; you can redistribute it and/or
6	* modify it under the terms of the GNU Library General Public
7	* License as published by the Free Software Foundation; either
8	* version 2 of the License, or (at your option) any later version.
9	*
10	* This library is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	* Library General Public License for more details.
14	*
15	* You should have received a copy of the GNU Library General Public License
16	* along with this library; see the file COPYING.LIB. If not, write to
17	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18	* Boston, MA 02110-1301, USA.
19	*
20	*/
21
22	#pragma once
23
24	#include <unicode/ubrk.h>
25	#include <wtf/ASCIICType.h>
26	#include <wtf/StdLibExtras.h>
27	#include <wtf/text/TextBreakIterator.h>
28	#include <wtf/unicode/CharacterNames.h>
29
30	namespace WebCore {
31
32	static const UChar lineBreakTableFirstCharacter = `'!'`;
33	static const UChar lineBreakTableLastCharacter = `127`;
34	static const unsigned lineBreakTableColumnCount = (lineBreakTableLastCharacter - lineBreakTableFirstCharacter) / `8` + `1`;
35
36	WEBCORE_EXPORT extern const unsigned char lineBreakTable[][lineBreakTableColumnCount];
37
38	enum class NonBreakingSpaceBehavior {
39	IgnoreNonBreakingSpace,
40	TreatNonBreakingSpaceAsBreak,
41	};
42
43	enum class CanUseShortcut {
44	Yes,
45	No
46	};
47
48	template<NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
49	static inline bool isBreakableSpace(UChar character)
50	{
51	switch (character) {
52	case `' '`:
53	case `'\n'`:
54	case `'\t'`:
55	return true;
56	case noBreakSpace:
57	return nonBreakingSpaceBehavior == NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak;
58	default:
59	return false;
60	}
61	}
62
63	inline bool shouldBreakAfter(UChar lastCharacter, UChar character, UChar nextCharacter)
64	{
65	// Don't allow line breaking between '-' and a digit if the '-' may mean a minus sign in the context,
66	// while allow breaking in 'ABCD-1234' and '1234-5678' which may be in long URLs.
67	if (character == `'-'` && isASCIIDigit(nextCharacter))
68	return isASCIIAlphanumeric(lastCharacter);
69
70	// If both ch and nextCh are ASCII characters, use a lookup table for enhanced speed and for compatibility
71	// with other browsers (see comments for asciiLineBreakTable for details).
72	if (character >= lineBreakTableFirstCharacter && character <= lineBreakTableLastCharacter && nextCharacter >= lineBreakTableFirstCharacter && nextCharacter <= lineBreakTableLastCharacter) {
73	const unsigned char* tableRow = lineBreakTable[character - lineBreakTableFirstCharacter];
74	unsigned nextCharacterIndex = nextCharacter - lineBreakTableFirstCharacter;
75	return tableRow[nextCharacterIndex / `8`] & (`1` << (nextCharacterIndex % `8`));
76	}
77	// Otherwise defer to the Unicode algorithm by returning false.
78	return false;
79	}
80
81	template<NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
82	inline bool needsLineBreakIterator(UChar character)
83	{
84	if (nonBreakingSpaceBehavior == NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak)
85	return character > lineBreakTableLastCharacter;
86	return character > lineBreakTableLastCharacter && character != noBreakSpace;
87	}
88
89	// When in non-loose mode, we can use the ASCII shortcut table.
90	template<typename CharacterType, NonBreakingSpaceBehavior nonBreakingSpaceBehavior, CanUseShortcut canUseShortcut>
91	inline unsigned nextBreakablePosition(LazyLineBreakIterator& lazyBreakIterator, const CharacterType* string, unsigned length, unsigned startPosition)
92	{
93	Optional<unsigned> nextBreak;
94
95	CharacterType lastLastCharacter = startPosition > `1` ? string[startPosition - `2`] : static_cast<CharacterType>(lazyBreakIterator.secondToLastCharacter());
96	CharacterType lastCharacter = startPosition > `0` ? string[startPosition - `1`] : static_cast<CharacterType>(lazyBreakIterator.lastCharacter());
97	unsigned priorContextLength = lazyBreakIterator.priorContextLength();
98	for (unsigned i = startPosition; i < length; i++) {
99	CharacterType character = string[i];
100
101	if (isBreakableSpace<nonBreakingSpaceBehavior>(character) \|\| (canUseShortcut == CanUseShortcut::Yes && shouldBreakAfter(lastLastCharacter, lastCharacter, character)))
102	return i;
103
104	if (canUseShortcut == CanUseShortcut::No \|\| needsLineBreakIterator<nonBreakingSpaceBehavior>(character) \|\| needsLineBreakIterator<nonBreakingSpaceBehavior>(lastCharacter)) {
105	if (!nextBreak \|\| nextBreak.value() < i) {
106	// Don't break if positioned at start of primary context and there is no prior context.
107	if (i \|\| priorContextLength) {
108	UBreakIterator* breakIterator = lazyBreakIterator.get(priorContextLength);
109	if (breakIterator) {
110	int candidate = ubrk_following(breakIterator, i - `1` + priorContextLength);
111	if (candidate == UBRK_DONE)
112	nextBreak = WTF::nullopt;
113	else {
114	unsigned result = candidate;
115	ASSERT(result >= priorContextLength);
116	nextBreak = result - priorContextLength;
117	}
118	}
119	}
120	}
121	if (i == nextBreak && !isBreakableSpace<nonBreakingSpaceBehavior>(lastCharacter))
122	return i;
123	}
124
125	lastLastCharacter = lastCharacter;
126	lastCharacter = character;
127	}
128
129	return length;
130	}
131
132	template<typename CharacterType, NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
133	inline unsigned nextBreakablePositionKeepingAllWords(const CharacterType* string, unsigned length, unsigned startPosition)
134	{
135	for (unsigned i = startPosition; i < length; i++) {
136	if (isBreakableSpace<nonBreakingSpaceBehavior>(string[i]))
137	return i;
138	}
139	return length;
140	}
141
142	inline unsigned nextBreakablePositionKeepingAllWords(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
143	{
144	auto stringView = lazyBreakIterator.stringView();
145	if (stringView.is8Bit())
146	return nextBreakablePositionKeepingAllWords<LChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak>(stringView.characters8(), stringView.length(), startPosition);
147	return nextBreakablePositionKeepingAllWords<UChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak>(stringView.characters16(), stringView.length(), startPosition);
148	}
149
150	inline unsigned nextBreakablePositionKeepingAllWordsIgnoringNBSP(LazyLineBreakIterator& iterator, unsigned startPosition)
151	{
152	auto stringView = iterator.stringView();
153	if (stringView.is8Bit())
154	return nextBreakablePositionKeepingAllWords<LChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace>(stringView.characters8(), stringView.length(), startPosition);
155	return nextBreakablePositionKeepingAllWords<UChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace>(stringView.characters16(), stringView.length(), startPosition);
156	}
157
158	inline unsigned nextBreakablePosition(LazyLineBreakIterator& iterator, unsigned startPosition)
159	{
160	auto stringView = iterator.stringView();
161	if (stringView.is8Bit())
162	return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::Yes>(iterator, stringView.characters8(), stringView.length(), startPosition);
163	return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::Yes>(iterator, stringView.characters16(), stringView.length(), startPosition);
164	}
165
166	inline unsigned nextBreakablePositionIgnoringNBSP(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
167	{
168	auto stringView = lazyBreakIterator.stringView();
169	if (stringView.is8Bit())
170	return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::Yes>(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition);
171	return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::Yes>(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition);
172	}
173
174	inline unsigned nextBreakablePositionWithoutShortcut(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
175	{
176	auto stringView = lazyBreakIterator.stringView();
177	if (stringView.is8Bit())
178	return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::No>(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition);
179	return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::No>(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition);
180	}
181
182	inline unsigned nextBreakablePositionIgnoringNBSPWithoutShortcut(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
183	{
184	auto stringView = lazyBreakIterator.stringView();
185	if (stringView.is8Bit())
186	return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::No>(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition);
187	return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::No>(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition);
188	}
189
190	inline unsigned nextBreakablePositionBreakCharacter(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
191	{
192	auto stringView = lazyBreakIterator.stringView();
193	ASSERT(startPosition <= stringView.length());
194	// FIXME: Can/Should we implement this using a Shared Iterator (performance issue)
195	// https://bugs.webkit.org/show_bug.cgi?id=197876
196	NonSharedCharacterBreakIterator iterator(stringView);
197	Optional<unsigned> next = ubrk_following(iterator, startPosition);
198	return next.valueOr(stringView.length());
199	}
200
201	inline bool isBreakable(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition, Optional<unsigned>& nextBreakable, bool breakNBSP, bool canUseShortcut, bool keepAllWords, bool breakAnywhere)
202	{
203	if (nextBreakable && nextBreakable.value() >= startPosition)
204	return startPosition == nextBreakable;
205
206	if (breakAnywhere)
207	nextBreakable = nextBreakablePositionBreakCharacter(lazyBreakIterator, startPosition);
208	else if (keepAllWords) {
209	if (breakNBSP)
210	nextBreakable = nextBreakablePositionKeepingAllWords(lazyBreakIterator, startPosition);
211	else
212	nextBreakable = nextBreakablePositionKeepingAllWordsIgnoringNBSP(lazyBreakIterator, startPosition);
213	} else if (!canUseShortcut) {
214	if (breakNBSP)
215	nextBreakable = nextBreakablePositionWithoutShortcut(lazyBreakIterator, startPosition);
216	else
217	nextBreakable = nextBreakablePositionIgnoringNBSPWithoutShortcut(lazyBreakIterator, startPosition);
218	} else {
219	if (breakNBSP)
220	nextBreakable = nextBreakablePosition(lazyBreakIterator, startPosition);
221	else
222	nextBreakable = nextBreakablePositionIgnoringNBSP(lazyBreakIterator, startPosition);
223	}
224	return startPosition == nextBreakable;
225	}
226
227	} // namespace WebCore
228

Browse the source code of webkit/Source/WebCore/rendering/BreakLines.h