1/*
2 * Copyright (C) 2005, 2007, 2010, 2013, 2016 Apple Inc. All rights reserved.
3 * Copyright (C) 2011 Google Inc. All rights reserved.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22#pragma once
23
24#include <unicode/ubrk.h>
25#include <wtf/ASCIICType.h>
26#include <wtf/StdLibExtras.h>
27#include <wtf/text/TextBreakIterator.h>
28#include <wtf/unicode/CharacterNames.h>
29
30namespace WebCore {
31
32static const UChar lineBreakTableFirstCharacter = '!';
33static const UChar lineBreakTableLastCharacter = 127;
34static const unsigned lineBreakTableColumnCount = (lineBreakTableLastCharacter - lineBreakTableFirstCharacter) / 8 + 1;
35
36WEBCORE_EXPORT extern const unsigned char lineBreakTable[][lineBreakTableColumnCount];
37
38enum class NonBreakingSpaceBehavior {
39 IgnoreNonBreakingSpace,
40 TreatNonBreakingSpaceAsBreak,
41};
42
43enum class CanUseShortcut {
44 Yes,
45 No
46};
47
48template<NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
49static inline bool isBreakableSpace(UChar character)
50{
51 switch (character) {
52 case ' ':
53 case '\n':
54 case '\t':
55 return true;
56 case noBreakSpace:
57 return nonBreakingSpaceBehavior == NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak;
58 default:
59 return false;
60 }
61}
62
63inline bool shouldBreakAfter(UChar lastCharacter, UChar character, UChar nextCharacter)
64{
65 // Don't allow line breaking between '-' and a digit if the '-' may mean a minus sign in the context,
66 // while allow breaking in 'ABCD-1234' and '1234-5678' which may be in long URLs.
67 if (character == '-' && isASCIIDigit(nextCharacter))
68 return isASCIIAlphanumeric(lastCharacter);
69
70 // If both ch and nextCh are ASCII characters, use a lookup table for enhanced speed and for compatibility
71 // with other browsers (see comments for asciiLineBreakTable for details).
72 if (character >= lineBreakTableFirstCharacter && character <= lineBreakTableLastCharacter && nextCharacter >= lineBreakTableFirstCharacter && nextCharacter <= lineBreakTableLastCharacter) {
73 const unsigned char* tableRow = lineBreakTable[character - lineBreakTableFirstCharacter];
74 unsigned nextCharacterIndex = nextCharacter - lineBreakTableFirstCharacter;
75 return tableRow[nextCharacterIndex / 8] & (1 << (nextCharacterIndex % 8));
76 }
77 // Otherwise defer to the Unicode algorithm by returning false.
78 return false;
79}
80
81template<NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
82inline bool needsLineBreakIterator(UChar character)
83{
84 if (nonBreakingSpaceBehavior == NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak)
85 return character > lineBreakTableLastCharacter;
86 return character > lineBreakTableLastCharacter && character != noBreakSpace;
87}
88
89// When in non-loose mode, we can use the ASCII shortcut table.
90template<typename CharacterType, NonBreakingSpaceBehavior nonBreakingSpaceBehavior, CanUseShortcut canUseShortcut>
91inline unsigned nextBreakablePosition(LazyLineBreakIterator& lazyBreakIterator, const CharacterType* string, unsigned length, unsigned startPosition)
92{
93 Optional<unsigned> nextBreak;
94
95 CharacterType lastLastCharacter = startPosition > 1 ? string[startPosition - 2] : static_cast<CharacterType>(lazyBreakIterator.secondToLastCharacter());
96 CharacterType lastCharacter = startPosition > 0 ? string[startPosition - 1] : static_cast<CharacterType>(lazyBreakIterator.lastCharacter());
97 unsigned priorContextLength = lazyBreakIterator.priorContextLength();
98 for (unsigned i = startPosition; i < length; i++) {
99 CharacterType character = string[i];
100
101 if (isBreakableSpace<nonBreakingSpaceBehavior>(character) || (canUseShortcut == CanUseShortcut::Yes && shouldBreakAfter(lastLastCharacter, lastCharacter, character)))
102 return i;
103
104 if (canUseShortcut == CanUseShortcut::No || needsLineBreakIterator<nonBreakingSpaceBehavior>(character) || needsLineBreakIterator<nonBreakingSpaceBehavior>(lastCharacter)) {
105 if (!nextBreak || nextBreak.value() < i) {
106 // Don't break if positioned at start of primary context and there is no prior context.
107 if (i || priorContextLength) {
108 UBreakIterator* breakIterator = lazyBreakIterator.get(priorContextLength);
109 if (breakIterator) {
110 int candidate = ubrk_following(breakIterator, i - 1 + priorContextLength);
111 if (candidate == UBRK_DONE)
112 nextBreak = WTF::nullopt;
113 else {
114 unsigned result = candidate;
115 ASSERT(result >= priorContextLength);
116 nextBreak = result - priorContextLength;
117 }
118 }
119 }
120 }
121 if (i == nextBreak && !isBreakableSpace<nonBreakingSpaceBehavior>(lastCharacter))
122 return i;
123 }
124
125 lastLastCharacter = lastCharacter;
126 lastCharacter = character;
127 }
128
129 return length;
130}
131
132template<typename CharacterType, NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
133inline unsigned nextBreakablePositionKeepingAllWords(const CharacterType* string, unsigned length, unsigned startPosition)
134{
135 for (unsigned i = startPosition; i < length; i++) {
136 if (isBreakableSpace<nonBreakingSpaceBehavior>(string[i]))
137 return i;
138 }
139 return length;
140}
141
142inline unsigned nextBreakablePositionKeepingAllWords(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
143{
144 auto stringView = lazyBreakIterator.stringView();
145 if (stringView.is8Bit())
146 return nextBreakablePositionKeepingAllWords<LChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak>(stringView.characters8(), stringView.length(), startPosition);
147 return nextBreakablePositionKeepingAllWords<UChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak>(stringView.characters16(), stringView.length(), startPosition);
148}
149
150inline unsigned nextBreakablePositionKeepingAllWordsIgnoringNBSP(LazyLineBreakIterator& iterator, unsigned startPosition)
151{
152 auto stringView = iterator.stringView();
153 if (stringView.is8Bit())
154 return nextBreakablePositionKeepingAllWords<LChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace>(stringView.characters8(), stringView.length(), startPosition);
155 return nextBreakablePositionKeepingAllWords<UChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace>(stringView.characters16(), stringView.length(), startPosition);
156}
157
158inline unsigned nextBreakablePosition(LazyLineBreakIterator& iterator, unsigned startPosition)
159{
160 auto stringView = iterator.stringView();
161 if (stringView.is8Bit())
162 return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::Yes>(iterator, stringView.characters8(), stringView.length(), startPosition);
163 return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::Yes>(iterator, stringView.characters16(), stringView.length(), startPosition);
164}
165
166inline unsigned nextBreakablePositionIgnoringNBSP(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
167{
168 auto stringView = lazyBreakIterator.stringView();
169 if (stringView.is8Bit())
170 return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::Yes>(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition);
171 return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::Yes>(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition);
172}
173
174inline unsigned nextBreakablePositionWithoutShortcut(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
175{
176 auto stringView = lazyBreakIterator.stringView();
177 if (stringView.is8Bit())
178 return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::No>(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition);
179 return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::No>(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition);
180}
181
182inline unsigned nextBreakablePositionIgnoringNBSPWithoutShortcut(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
183{
184 auto stringView = lazyBreakIterator.stringView();
185 if (stringView.is8Bit())
186 return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::No>(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition);
187 return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::No>(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition);
188}
189
190inline unsigned nextBreakablePositionBreakCharacter(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
191{
192 auto stringView = lazyBreakIterator.stringView();
193 ASSERT(startPosition <= stringView.length());
194 // FIXME: Can/Should we implement this using a Shared Iterator (performance issue)
195 // https://bugs.webkit.org/show_bug.cgi?id=197876
196 NonSharedCharacterBreakIterator iterator(stringView);
197 Optional<unsigned> next = ubrk_following(iterator, startPosition);
198 return next.valueOr(stringView.length());
199}
200
201inline bool isBreakable(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition, Optional<unsigned>& nextBreakable, bool breakNBSP, bool canUseShortcut, bool keepAllWords, bool breakAnywhere)
202{
203 if (nextBreakable && nextBreakable.value() >= startPosition)
204 return startPosition == nextBreakable;
205
206 if (breakAnywhere)
207 nextBreakable = nextBreakablePositionBreakCharacter(lazyBreakIterator, startPosition);
208 else if (keepAllWords) {
209 if (breakNBSP)
210 nextBreakable = nextBreakablePositionKeepingAllWords(lazyBreakIterator, startPosition);
211 else
212 nextBreakable = nextBreakablePositionKeepingAllWordsIgnoringNBSP(lazyBreakIterator, startPosition);
213 } else if (!canUseShortcut) {
214 if (breakNBSP)
215 nextBreakable = nextBreakablePositionWithoutShortcut(lazyBreakIterator, startPosition);
216 else
217 nextBreakable = nextBreakablePositionIgnoringNBSPWithoutShortcut(lazyBreakIterator, startPosition);
218 } else {
219 if (breakNBSP)
220 nextBreakable = nextBreakablePosition(lazyBreakIterator, startPosition);
221 else
222 nextBreakable = nextBreakablePositionIgnoringNBSP(lazyBreakIterator, startPosition);
223 }
224 return startPosition == nextBreakable;
225}
226
227} // namespace WebCore
228