1/*
2 * Copyright (C) 2000 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2003-2017 Apple Inc. All rights reserved.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22#pragma once
23
24#include "BidiContext.h"
25#include "BidiRunList.h"
26#include "WritingMode.h"
27#include <wtf/HashMap.h>
28#include <wtf/Noncopyable.h>
29#include <wtf/Vector.h>
30
31namespace WebCore {
32
33class RenderObject;
34
35template<typename Iterator> class WhitespaceCollapsingState {
36public:
37 void reset()
38 {
39 m_transitions.clear();
40 m_currentTransition = 0;
41 }
42
43 void startIgnoringSpaces(const Iterator& transition)
44 {
45 ASSERT(!(m_transitions.size() % 2));
46 m_transitions.append(transition);
47 }
48
49 void stopIgnoringSpaces(const Iterator& transition)
50 {
51 ASSERT(m_transitions.size() % 2);
52 m_transitions.append(transition);
53 }
54
55 // When ignoring spaces, this needs to be called for objects that need line boxes such as RenderInlines or
56 // hard line breaks to ensure that they're not ignored.
57 void ensureLineBoxInsideIgnoredSpaces(RenderObject& renderer)
58 {
59 Iterator transition(0, &renderer, 0);
60 stopIgnoringSpaces(transition);
61 startIgnoringSpaces(transition);
62 }
63
64 void decrementTransitionAt(size_t index)
65 {
66 m_transitions[index].fastDecrement();
67 }
68
69 const Vector<Iterator>& transitions() { return m_transitions; }
70 size_t numTransitions() const { return m_transitions.size(); }
71 size_t currentTransition() const { return m_currentTransition; }
72 void setCurrentTransition(size_t currentTransition) { m_currentTransition = currentTransition; }
73 void incrementCurrentTransition() { ++m_currentTransition; }
74 void decrementNumTransitions() { m_transitions.shrink(m_transitions.size() - 1); }
75 bool betweenTransitions() const { return m_currentTransition % 2; }
76private:
77 Vector<Iterator> m_transitions;
78 size_t m_currentTransition { 0 };
79};
80
81// The BidiStatus at a given position (typically the end of a line) can
82// be cached and then used to restart bidi resolution at that position.
83struct BidiStatus {
84 BidiStatus() = default;
85
86 // Creates a BidiStatus representing a new paragraph root with a default direction.
87 // Uses TextDirection as it only has two possibilities instead of UCharDirection which has at least 19.
88 BidiStatus(TextDirection direction, bool isOverride)
89 : eor(direction == TextDirection::LTR ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT)
90 , lastStrong(eor)
91 , last(eor)
92 , context(BidiContext::create(direction == TextDirection::LTR ? 0 : 1, eor, isOverride))
93 {
94 }
95
96 BidiStatus(UCharDirection eor, UCharDirection lastStrong, UCharDirection last, RefPtr<BidiContext>&& context)
97 : eor(eor)
98 , lastStrong(lastStrong)
99 , last(last)
100 , context(WTFMove(context))
101 {
102 }
103
104 UCharDirection eor { U_OTHER_NEUTRAL };
105 UCharDirection lastStrong { U_OTHER_NEUTRAL };
106 UCharDirection last { U_OTHER_NEUTRAL };
107 RefPtr<BidiContext> context;
108};
109
110struct BidiEmbedding {
111 BidiEmbedding(UCharDirection direction, BidiEmbeddingSource source)
112 : direction(direction)
113 , source(source)
114 {
115 }
116
117 UCharDirection direction;
118 BidiEmbeddingSource source;
119};
120
121inline bool operator==(const BidiStatus& status1, const BidiStatus& status2)
122{
123 return status1.eor == status2.eor && status1.last == status2.last && status1.lastStrong == status2.lastStrong && *(status1.context) == *(status2.context);
124}
125
126inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2)
127{
128 return !(status1 == status2);
129}
130
131struct BidiCharacterRun {
132 WTF_MAKE_FAST_ALLOCATED;
133public:
134 BidiCharacterRun(unsigned start, unsigned stop, BidiContext* context, UCharDirection direction)
135 : m_start(start)
136 , m_stop(stop)
137 , m_override(context->override())
138 {
139 if (direction == U_OTHER_NEUTRAL)
140 direction = context->dir();
141
142 m_level = context->level();
143
144 // add level of run (cases I1 & I2)
145 if (m_level % 2) {
146 if (direction == U_LEFT_TO_RIGHT || direction == U_ARABIC_NUMBER || direction == U_EUROPEAN_NUMBER)
147 m_level++;
148 } else {
149 if (direction == U_RIGHT_TO_LEFT)
150 m_level++;
151 else if (direction == U_ARABIC_NUMBER || direction == U_EUROPEAN_NUMBER)
152 m_level += 2;
153 }
154 }
155
156 ~BidiCharacterRun()
157 {
158 // Delete the linked list in a loop to prevent destructor recursion.
159 auto next = WTFMove(m_next);
160 while (next)
161 next = WTFMove(next->m_next);
162 }
163
164 unsigned start() const { return m_start; }
165 unsigned stop() const { return m_stop; }
166 unsigned char level() const { return m_level; }
167 bool reversed(bool visuallyOrdered) { return m_level % 2 && !visuallyOrdered; }
168 bool dirOverride(bool visuallyOrdered) { return m_override || visuallyOrdered; }
169
170 BidiCharacterRun* next() const { return m_next.get(); }
171 std::unique_ptr<BidiCharacterRun> takeNext() { return WTFMove(m_next); }
172 void setNext(std::unique_ptr<BidiCharacterRun>&& next) { m_next = WTFMove(next); }
173
174private:
175 std::unique_ptr<BidiCharacterRun> m_next;
176
177public:
178 unsigned m_start;
179 unsigned m_stop;
180 unsigned char m_level;
181 bool m_override : 1;
182 bool m_hasHyphen : 1; // Used by BidiRun subclass which is a layering violation but enables us to save 8 bytes per object on 64-bit.
183};
184
185enum VisualDirectionOverride {
186 NoVisualOverride,
187 VisualLeftToRightOverride,
188 VisualRightToLeftOverride
189};
190
191// BidiResolver is WebKit's implementation of the Unicode Bidi Algorithm
192// http://unicode.org/reports/tr9
193template<typename Iterator, typename Run, typename DerivedClass> class BidiResolverBase {
194 WTF_MAKE_NONCOPYABLE(BidiResolverBase);
195public:
196 const Iterator& position() const { return m_current; }
197 void setPositionIgnoringNestedIsolates(const Iterator& position) { m_current = position; }
198 void setPosition(const Iterator& position, unsigned nestedIsolatedCount)
199 {
200 m_current = position;
201 m_nestedIsolateCount = nestedIsolatedCount;
202 }
203
204 void increment() { static_cast<DerivedClass&>(*this).incrementInternal(); }
205
206 BidiContext* context() const { return m_status.context.get(); }
207 void setContext(RefPtr<BidiContext>&& context) { m_status.context = WTFMove(context); }
208
209 void setLastDir(UCharDirection lastDir) { m_status.last = lastDir; }
210 void setLastStrongDir(UCharDirection lastStrongDir) { m_status.lastStrong = lastStrongDir; }
211 void setEorDir(UCharDirection eorDir) { m_status.eor = eorDir; }
212
213 UCharDirection dir() const { return m_direction; }
214 void setDir(UCharDirection direction) { m_direction = direction; }
215
216 const BidiStatus& status() const { return m_status; }
217 void setStatus(BidiStatus status) { m_status = status; }
218
219 WhitespaceCollapsingState<Iterator>& whitespaceCollapsingState() { return m_whitespaceCollapsingState; }
220
221 // The current algorithm handles nested isolates one layer of nesting at a time.
222 // But when we layout each isolated span, we will walk into (and ignore) all
223 // child isolated spans.
224 void enterIsolate() { m_nestedIsolateCount++; }
225 void exitIsolate() { ASSERT(m_nestedIsolateCount >= 1); m_nestedIsolateCount--; }
226 bool inIsolate() const { return m_nestedIsolateCount; }
227
228 void embed(UCharDirection, BidiEmbeddingSource);
229 bool commitExplicitEmbedding();
230
231 void createBidiRunsForLine(const Iterator& end, VisualDirectionOverride = NoVisualOverride, bool hardLineBreak = false);
232
233 BidiRunList<Run>& runs() { return m_runs; }
234
235 // FIXME: This used to be part of deleteRuns() but was a layering violation.
236 // It's unclear if this is still needed.
237 void markCurrentRunEmpty() { m_emptyRun = true; }
238
239 void setWhitespaceCollapsingTransitionForIsolatedRun(Run&, size_t);
240 unsigned whitespaceCollapsingTransitionForIsolatedRun(Run&);
241
242protected:
243 BidiResolverBase() = default;
244
245 // FIXME: Instead of InlineBidiResolvers subclassing this method, we should
246 // pass in some sort of Traits object which knows how to create runs for appending.
247 void appendRun() { static_cast<DerivedClass&>(*this).appendRunInternal(); }
248 bool needsContinuePastEnd() const { return static_cast<const DerivedClass&>(*this).needsContinuePastEndInternal(); }
249
250 Iterator m_current;
251 // sor and eor are "start of run" and "end of run" respectively and correpond
252 // to abreviations used in UBA spec: http://unicode.org/reports/tr9/#BD7
253 Iterator m_sor; // Points to the first character in the current run.
254 Iterator m_eor; // Points to the last character in the current run.
255 Iterator m_last;
256 BidiStatus m_status;
257 UCharDirection m_direction { U_OTHER_NEUTRAL };
258 Iterator endOfLine;
259 bool m_reachedEndOfLine { false };
260 Iterator m_lastBeforeET; // Before a U_EUROPEAN_NUMBER_TERMINATOR
261 bool m_emptyRun { true };
262
263 // FIXME: This should not belong to the resolver, but rather be passed
264 // into createBidiRunsForLine by the caller.
265 BidiRunList<Run> m_runs;
266
267 WhitespaceCollapsingState<Iterator> m_whitespaceCollapsingState;
268
269 unsigned m_nestedIsolateCount { 0 };
270 HashMap<Run*, unsigned> m_whitespaceCollapsingTransitionForIsolatedRun;
271
272private:
273 void raiseExplicitEmbeddingLevel(UCharDirection from, UCharDirection to);
274 void lowerExplicitEmbeddingLevel(UCharDirection from);
275 void checkDirectionInLowerRaiseEmbeddingLevel();
276
277 void updateStatusLastFromCurrentDirection(UCharDirection);
278 void reorderRunsFromLevels();
279 void incrementInternal() { m_current.increment(); }
280 void appendRunInternal();
281 bool needsContinuePastEndInternal() const { return true; }
282
283 Vector<BidiEmbedding, 8> m_currentExplicitEmbeddingSequence;
284};
285
286template<typename Iterator, typename Run>
287class BidiResolver : public BidiResolverBase<Iterator, Run, BidiResolver<Iterator, Run>> {
288};
289
290template<typename Iterator, typename Run, typename IsolateRun>
291class BidiResolverWithIsolate : public BidiResolverBase<Iterator, Run, BidiResolverWithIsolate<Iterator, Run, IsolateRun>> {
292public:
293 ~BidiResolverWithIsolate();
294
295 void incrementInternal();
296 void appendRunInternal();
297 bool needsContinuePastEndInternal() const;
298 Vector<IsolateRun>& isolatedRuns() { return m_isolatedRuns; }
299
300private:
301 Vector<IsolateRun> m_isolatedRuns;
302};
303
304template<typename Iterator, typename Run, typename IsolateRun>
305inline BidiResolverWithIsolate<Iterator, Run, IsolateRun>::~BidiResolverWithIsolate()
306{
307 // The owner of this resolver should have handled the isolated runs.
308 ASSERT(m_isolatedRuns.isEmpty());
309}
310
311template<typename Iterator, typename Run, typename DerivedClass>
312void BidiResolverBase<Iterator, Run, DerivedClass>::appendRunInternal()
313{
314 if (!m_emptyRun && !m_eor.atEnd()) {
315 unsigned startOffset = m_sor.offset();
316 unsigned endOffset = m_eor.offset();
317
318 if (!endOfLine.atEnd() && endOffset >= endOfLine.offset()) {
319 m_reachedEndOfLine = true;
320 endOffset = endOfLine.offset();
321 }
322
323 if (endOffset >= startOffset)
324 m_runs.appendRun(std::make_unique<Run>(startOffset, endOffset + 1, context(), m_direction));
325
326 m_eor.increment();
327 m_sor = m_eor;
328 }
329
330 m_direction = U_OTHER_NEUTRAL;
331 m_status.eor = U_OTHER_NEUTRAL;
332}
333
334template<typename Iterator, typename Run, typename DerivedClass>
335void BidiResolverBase<Iterator, Run, DerivedClass>::embed(UCharDirection dir, BidiEmbeddingSource source)
336{
337 // Isolated spans compute base directionality during their own UBA run.
338 // Do not insert fake embed characters once we enter an isolated span.
339 ASSERT(!inIsolate());
340
341 ASSERT(dir == U_POP_DIRECTIONAL_FORMAT || dir == U_LEFT_TO_RIGHT_EMBEDDING || dir == U_LEFT_TO_RIGHT_OVERRIDE || dir == U_RIGHT_TO_LEFT_EMBEDDING || dir == U_RIGHT_TO_LEFT_OVERRIDE);
342 m_currentExplicitEmbeddingSequence.append(BidiEmbedding(dir, source));
343}
344
345template<typename Iterator, typename Run, typename DerivedClass>
346void BidiResolverBase<Iterator, Run, DerivedClass>::checkDirectionInLowerRaiseEmbeddingLevel()
347{
348 ASSERT(m_status.eor != U_OTHER_NEUTRAL || m_eor.atEnd());
349 ASSERT(m_status.last != U_DIR_NON_SPACING_MARK
350 && m_status.last != U_BOUNDARY_NEUTRAL
351 && m_status.last != U_RIGHT_TO_LEFT_EMBEDDING
352 && m_status.last != U_LEFT_TO_RIGHT_EMBEDDING
353 && m_status.last != U_RIGHT_TO_LEFT_OVERRIDE
354 && m_status.last != U_LEFT_TO_RIGHT_OVERRIDE
355 && m_status.last != U_POP_DIRECTIONAL_FORMAT);
356 if (m_direction == U_OTHER_NEUTRAL)
357 m_direction = m_status.lastStrong == U_LEFT_TO_RIGHT ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT;
358}
359
360template<typename Iterator, typename Run, typename DerivedClass>
361void BidiResolverBase<Iterator, Run, DerivedClass>::lowerExplicitEmbeddingLevel(UCharDirection from)
362{
363 if (!m_emptyRun && m_eor != m_last) {
364 checkDirectionInLowerRaiseEmbeddingLevel();
365 // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
366 if (from == U_LEFT_TO_RIGHT) {
367 // bidi.sor ... bidi.eor ... bidi.last L
368 if (m_status.eor == U_EUROPEAN_NUMBER) {
369 if (m_status.lastStrong != U_LEFT_TO_RIGHT) {
370 m_direction = U_EUROPEAN_NUMBER;
371 appendRun();
372 }
373 } else if (m_status.eor == U_ARABIC_NUMBER) {
374 m_direction = U_ARABIC_NUMBER;
375 appendRun();
376 } else if (m_status.lastStrong != U_LEFT_TO_RIGHT) {
377 appendRun();
378 m_direction = U_LEFT_TO_RIGHT;
379 }
380 } else if (m_status.eor == U_EUROPEAN_NUMBER || m_status.eor == U_ARABIC_NUMBER || m_status.lastStrong == U_LEFT_TO_RIGHT) {
381 appendRun();
382 m_direction = U_RIGHT_TO_LEFT;
383 }
384 m_eor = m_last;
385 }
386
387 appendRun();
388 m_emptyRun = true;
389
390 // sor for the new run is determined by the higher level (rule X10)
391 setLastDir(from);
392 setLastStrongDir(from);
393 m_eor = Iterator();
394}
395
396template<typename Iterator, typename Run, typename DerivedClass>
397void BidiResolverBase<Iterator, Run, DerivedClass>::raiseExplicitEmbeddingLevel(UCharDirection from, UCharDirection to)
398{
399 if (!m_emptyRun && m_eor != m_last) {
400 checkDirectionInLowerRaiseEmbeddingLevel();
401 // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
402 if (to == U_LEFT_TO_RIGHT) {
403 // bidi.sor ... bidi.eor ... bidi.last L
404 if (m_status.eor == U_EUROPEAN_NUMBER) {
405 if (m_status.lastStrong != U_LEFT_TO_RIGHT) {
406 m_direction = U_EUROPEAN_NUMBER;
407 appendRun();
408 }
409 } else if (m_status.eor == U_ARABIC_NUMBER) {
410 m_direction = U_ARABIC_NUMBER;
411 appendRun();
412 } else if (m_status.lastStrong != U_LEFT_TO_RIGHT && from == U_LEFT_TO_RIGHT) {
413 appendRun();
414 m_direction = U_LEFT_TO_RIGHT;
415 }
416 } else if (m_status.eor == U_ARABIC_NUMBER
417 || (m_status.eor == U_EUROPEAN_NUMBER && (m_status.lastStrong != U_LEFT_TO_RIGHT || from == U_RIGHT_TO_LEFT))
418 || (m_status.eor != U_EUROPEAN_NUMBER && m_status.lastStrong == U_LEFT_TO_RIGHT && from == U_RIGHT_TO_LEFT)) {
419 appendRun();
420 m_direction = U_RIGHT_TO_LEFT;
421 }
422 m_eor = m_last;
423 }
424
425 appendRun();
426 m_emptyRun = true;
427
428 setLastDir(to);
429 setLastStrongDir(to);
430 m_eor = Iterator();
431}
432
433template<typename Iterator, typename Run, typename DerivedClass>
434bool BidiResolverBase<Iterator, Run, DerivedClass>::commitExplicitEmbedding()
435{
436 // When we're "inIsolate()" we're resolving the parent context which
437 // ignores (skips over) the isolated content, including embedding levels.
438 // We should never accrue embedding levels while skipping over isolated content.
439 ASSERT(!inIsolate() || m_currentExplicitEmbeddingSequence.isEmpty());
440
441 auto fromLevel = context()->level();
442 RefPtr<BidiContext> toContext = context();
443
444 for (auto& embedding : m_currentExplicitEmbeddingSequence) {
445 if (embedding.direction == U_POP_DIRECTIONAL_FORMAT) {
446 if (auto* parentContext = toContext->parent())
447 toContext = parentContext;
448 } else {
449 UCharDirection direction = (embedding.direction == U_RIGHT_TO_LEFT_EMBEDDING || embedding.direction == U_RIGHT_TO_LEFT_OVERRIDE) ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT;
450 bool override = embedding.direction == U_LEFT_TO_RIGHT_OVERRIDE || embedding.direction == U_RIGHT_TO_LEFT_OVERRIDE;
451 unsigned char level = toContext->level();
452 if (direction == U_RIGHT_TO_LEFT)
453 level = nextGreaterOddLevel(level);
454 else
455 level = nextGreaterEvenLevel(level);
456 if (level < 61)
457 toContext = BidiContext::create(level, direction, override, embedding.source, toContext.get());
458 }
459 }
460
461 auto toLevel = toContext->level();
462
463 if (toLevel > fromLevel)
464 raiseExplicitEmbeddingLevel(fromLevel % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT, toLevel % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT);
465 else if (toLevel < fromLevel)
466 lowerExplicitEmbeddingLevel(fromLevel % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT);
467
468 setContext(WTFMove(toContext));
469
470 m_currentExplicitEmbeddingSequence.clear();
471
472 return fromLevel != toLevel;
473}
474
475template<typename Iterator, typename Run, typename DerivedClass>
476inline void BidiResolverBase<Iterator, Run, DerivedClass>::updateStatusLastFromCurrentDirection(UCharDirection dirCurrent)
477{
478 switch (dirCurrent) {
479 case U_EUROPEAN_NUMBER_TERMINATOR:
480 if (m_status.last != U_EUROPEAN_NUMBER)
481 m_status.last = U_EUROPEAN_NUMBER_TERMINATOR;
482 break;
483 case U_EUROPEAN_NUMBER_SEPARATOR:
484 case U_COMMON_NUMBER_SEPARATOR:
485 case U_SEGMENT_SEPARATOR:
486 case U_WHITE_SPACE_NEUTRAL:
487 case U_OTHER_NEUTRAL:
488 switch (m_status.last) {
489 case U_LEFT_TO_RIGHT:
490 case U_RIGHT_TO_LEFT:
491 case U_RIGHT_TO_LEFT_ARABIC:
492 case U_EUROPEAN_NUMBER:
493 case U_ARABIC_NUMBER:
494 m_status.last = dirCurrent;
495 break;
496 default:
497 m_status.last = U_OTHER_NEUTRAL;
498 }
499 break;
500 case U_DIR_NON_SPACING_MARK:
501 case U_BOUNDARY_NEUTRAL:
502 case U_RIGHT_TO_LEFT_EMBEDDING:
503 case U_LEFT_TO_RIGHT_EMBEDDING:
504 case U_RIGHT_TO_LEFT_OVERRIDE:
505 case U_LEFT_TO_RIGHT_OVERRIDE:
506 case U_POP_DIRECTIONAL_FORMAT:
507 // ignore these
508 break;
509 case U_EUROPEAN_NUMBER:
510 FALLTHROUGH;
511 default:
512 m_status.last = dirCurrent;
513 }
514}
515
516template<typename Iterator, typename Run, typename DerivedClass>
517inline void BidiResolverBase<Iterator, Run, DerivedClass>::reorderRunsFromLevels()
518{
519 unsigned char levelLow = 128;
520 unsigned char levelHigh = 0;
521 for (Run* run = m_runs.firstRun(); run; run = run->next()) {
522 levelHigh = std::max(run->level(), levelHigh);
523 levelLow = std::min(run->level(), levelLow);
524 }
525
526 // This implements reordering of the line (L2 according to Bidi spec):
527 // http://unicode.org/reports/tr9/#L2
528 // L2. From the highest level found in the text to the lowest odd level on each line,
529 // reverse any contiguous sequence of characters that are at that level or higher.
530
531 // Reversing is only done up to the lowest odd level.
532 if (!(levelLow % 2))
533 levelLow++;
534
535 unsigned count = m_runs.runCount() - 1;
536
537 while (levelHigh >= levelLow) {
538 unsigned i = 0;
539 Run* run = m_runs.firstRun();
540 while (i < count) {
541 for (;i < count && run && run->level() < levelHigh; i++)
542 run = run->next();
543 unsigned start = i;
544 for (;i <= count && run && run->level() >= levelHigh; i++)
545 run = run->next();
546 unsigned end = i - 1;
547 m_runs.reverseRuns(start, end);
548 }
549 levelHigh--;
550 }
551}
552
553template<typename Iterator, typename Run, typename DerivedClass>
554void BidiResolverBase<Iterator, Run, DerivedClass>::createBidiRunsForLine(const Iterator& end, VisualDirectionOverride override, bool hardLineBreak)
555{
556 ASSERT(m_direction == U_OTHER_NEUTRAL);
557
558 if (override != NoVisualOverride) {
559 m_emptyRun = false;
560 m_sor = m_current;
561 m_eor = Iterator();
562 while (m_current != end && !m_current.atEnd()) {
563 m_eor = m_current;
564 increment();
565 }
566 m_direction = override == VisualLeftToRightOverride ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT;
567 appendRun();
568 m_runs.setLogicallyLastRun(m_runs.lastRun());
569 if (override == VisualRightToLeftOverride && m_runs.runCount())
570 m_runs.reverseRuns(0, m_runs.runCount() - 1);
571 return;
572 }
573
574 m_emptyRun = true;
575
576 m_eor = Iterator();
577
578 m_last = m_current;
579 bool pastEnd = false;
580 BidiResolverBase<Iterator, Run, DerivedClass> stateAtEnd;
581
582 while (true) {
583 UCharDirection dirCurrent;
584 if (pastEnd && (hardLineBreak || m_current.atEnd())) {
585 BidiContext* c = context();
586 if (hardLineBreak) {
587 // A deviation from the Unicode Bidi Algorithm in order to match
588 // WinIE and user expectations: hard line breaks reset bidi state
589 // coming from unicode bidi control characters, but not those from
590 // DOM nodes with specified directionality
591 stateAtEnd.setContext(c->copyStackRemovingUnicodeEmbeddingContexts());
592
593 dirCurrent = stateAtEnd.context()->dir();
594 stateAtEnd.setEorDir(dirCurrent);
595 stateAtEnd.setLastDir(dirCurrent);
596 stateAtEnd.setLastStrongDir(dirCurrent);
597 } else {
598 while (c->parent())
599 c = c->parent();
600 dirCurrent = c->dir();
601 }
602 } else {
603 dirCurrent = m_current.direction();
604 if (context()->override()
605 && dirCurrent != U_RIGHT_TO_LEFT_EMBEDDING
606 && dirCurrent != U_LEFT_TO_RIGHT_EMBEDDING
607 && dirCurrent != U_RIGHT_TO_LEFT_OVERRIDE
608 && dirCurrent != U_LEFT_TO_RIGHT_OVERRIDE
609 && dirCurrent != U_POP_DIRECTIONAL_FORMAT)
610 dirCurrent = context()->dir();
611 else if (dirCurrent == U_DIR_NON_SPACING_MARK)
612 dirCurrent = m_status.last;
613 }
614
615#if PLATFORM(WIN)
616 // Our Windows build hasn't updated its headers from ICU 6.1, which doesn't have these symbols.
617 const UCharDirection U_FIRST_STRONG_ISOLATE = static_cast<UCharDirection>(19);
618 const UCharDirection U_LEFT_TO_RIGHT_ISOLATE = static_cast<UCharDirection>(20);
619 const UCharDirection U_RIGHT_TO_LEFT_ISOLATE = static_cast<UCharDirection>(21);
620 const UCharDirection U_POP_DIRECTIONAL_ISOLATE = static_cast<UCharDirection>(22);
621#endif
622 // We ignore all character directionality while in unicode-bidi: isolate spans.
623 // We'll handle ordering the isolated characters in a second pass.
624 if (inIsolate() || dirCurrent == U_FIRST_STRONG_ISOLATE || dirCurrent == U_LEFT_TO_RIGHT_ISOLATE || dirCurrent == U_RIGHT_TO_LEFT_ISOLATE || dirCurrent == U_POP_DIRECTIONAL_ISOLATE)
625 dirCurrent = U_OTHER_NEUTRAL;
626
627 ASSERT(m_status.eor != U_OTHER_NEUTRAL || m_eor.atEnd());
628 switch (dirCurrent) {
629
630 // embedding and overrides (X1-X9 in the Bidi specs)
631 case U_RIGHT_TO_LEFT_EMBEDDING:
632 case U_LEFT_TO_RIGHT_EMBEDDING:
633 case U_RIGHT_TO_LEFT_OVERRIDE:
634 case U_LEFT_TO_RIGHT_OVERRIDE:
635 case U_POP_DIRECTIONAL_FORMAT:
636 embed(dirCurrent, FromUnicode);
637 commitExplicitEmbedding();
638 break;
639
640 // strong types
641 case U_LEFT_TO_RIGHT:
642 switch(m_status.last) {
643 case U_RIGHT_TO_LEFT:
644 case U_RIGHT_TO_LEFT_ARABIC:
645 case U_EUROPEAN_NUMBER:
646 case U_ARABIC_NUMBER:
647 if (m_status.last != U_EUROPEAN_NUMBER || m_status.lastStrong != U_LEFT_TO_RIGHT)
648 appendRun();
649 break;
650 case U_LEFT_TO_RIGHT:
651 break;
652 case U_EUROPEAN_NUMBER_SEPARATOR:
653 case U_EUROPEAN_NUMBER_TERMINATOR:
654 case U_COMMON_NUMBER_SEPARATOR:
655 case U_BOUNDARY_NEUTRAL:
656 case U_BLOCK_SEPARATOR:
657 case U_SEGMENT_SEPARATOR:
658 case U_WHITE_SPACE_NEUTRAL:
659 case U_OTHER_NEUTRAL:
660 if (m_status.eor == U_EUROPEAN_NUMBER) {
661 if (m_status.lastStrong != U_LEFT_TO_RIGHT) {
662 // the numbers need to be on a higher embedding level, so let's close that run
663 m_direction = U_EUROPEAN_NUMBER;
664 appendRun();
665 if (context()->dir() != U_LEFT_TO_RIGHT) {
666 // the neutrals take the embedding direction, which is R
667 m_eor = m_last;
668 m_direction = U_RIGHT_TO_LEFT;
669 appendRun();
670 }
671 }
672 } else if (m_status.eor == U_ARABIC_NUMBER) {
673 // Arabic numbers are always on a higher embedding level, so let's close that run
674 m_direction = U_ARABIC_NUMBER;
675 appendRun();
676 if (context()->dir() != U_LEFT_TO_RIGHT) {
677 // the neutrals take the embedding direction, which is R
678 m_eor = m_last;
679 m_direction = U_RIGHT_TO_LEFT;
680 appendRun();
681 }
682 } else if (m_status.lastStrong != U_LEFT_TO_RIGHT) {
683 //last stuff takes embedding dir
684 if (context()->dir() == U_RIGHT_TO_LEFT) {
685 m_eor = m_last;
686 m_direction = U_RIGHT_TO_LEFT;
687 }
688 appendRun();
689 }
690 break;
691 default:
692 break;
693 }
694 m_eor = m_current;
695 m_status.eor = U_LEFT_TO_RIGHT;
696 m_status.lastStrong = U_LEFT_TO_RIGHT;
697 m_direction = U_LEFT_TO_RIGHT;
698 break;
699 case U_RIGHT_TO_LEFT_ARABIC:
700 case U_RIGHT_TO_LEFT:
701 switch (m_status.last) {
702 case U_LEFT_TO_RIGHT:
703 case U_EUROPEAN_NUMBER:
704 case U_ARABIC_NUMBER:
705 appendRun();
706 FALLTHROUGH;
707 case U_RIGHT_TO_LEFT:
708 case U_RIGHT_TO_LEFT_ARABIC:
709 break;
710 case U_EUROPEAN_NUMBER_SEPARATOR:
711 case U_EUROPEAN_NUMBER_TERMINATOR:
712 case U_COMMON_NUMBER_SEPARATOR:
713 case U_BOUNDARY_NEUTRAL:
714 case U_BLOCK_SEPARATOR:
715 case U_SEGMENT_SEPARATOR:
716 case U_WHITE_SPACE_NEUTRAL:
717 case U_OTHER_NEUTRAL:
718 if (m_status.eor == U_EUROPEAN_NUMBER) {
719 if (m_status.lastStrong == U_LEFT_TO_RIGHT && context()->dir() == U_LEFT_TO_RIGHT)
720 m_eor = m_last;
721 appendRun();
722 } else if (m_status.eor == U_ARABIC_NUMBER)
723 appendRun();
724 else if (m_status.lastStrong == U_LEFT_TO_RIGHT) {
725 if (context()->dir() == U_LEFT_TO_RIGHT)
726 m_eor = m_last;
727 appendRun();
728 }
729 break;
730 default:
731 break;
732 }
733 m_eor = m_current;
734 m_status.eor = U_RIGHT_TO_LEFT;
735 m_status.lastStrong = dirCurrent;
736 m_direction = U_RIGHT_TO_LEFT;
737 break;
738
739 // weak types:
740
741 case U_EUROPEAN_NUMBER:
742 if (m_status.lastStrong != U_RIGHT_TO_LEFT_ARABIC) {
743 // if last strong was AL change EN to AN
744 switch (m_status.last) {
745 case U_EUROPEAN_NUMBER:
746 case U_LEFT_TO_RIGHT:
747 break;
748 case U_RIGHT_TO_LEFT:
749 case U_RIGHT_TO_LEFT_ARABIC:
750 case U_ARABIC_NUMBER:
751 m_eor = m_last;
752 appendRun();
753 m_direction = U_EUROPEAN_NUMBER;
754 break;
755 case U_EUROPEAN_NUMBER_SEPARATOR:
756 case U_COMMON_NUMBER_SEPARATOR:
757 if (m_status.eor == U_EUROPEAN_NUMBER)
758 break;
759 FALLTHROUGH;
760 case U_EUROPEAN_NUMBER_TERMINATOR:
761 case U_BOUNDARY_NEUTRAL:
762 case U_BLOCK_SEPARATOR:
763 case U_SEGMENT_SEPARATOR:
764 case U_WHITE_SPACE_NEUTRAL:
765 case U_OTHER_NEUTRAL:
766 if (m_status.eor == U_EUROPEAN_NUMBER) {
767 if (m_status.lastStrong == U_RIGHT_TO_LEFT) {
768 // ENs on both sides behave like Rs, so the neutrals should be R.
769 // Terminate the EN run.
770 appendRun();
771 // Make an R run.
772 m_eor = m_status.last == U_EUROPEAN_NUMBER_TERMINATOR ? m_lastBeforeET : m_last;
773 m_direction = U_RIGHT_TO_LEFT;
774 appendRun();
775 // Begin a new EN run.
776 m_direction = U_EUROPEAN_NUMBER;
777 }
778 } else if (m_status.eor == U_ARABIC_NUMBER) {
779 // Terminate the AN run.
780 appendRun();
781 if (m_status.lastStrong == U_RIGHT_TO_LEFT || context()->dir() == U_RIGHT_TO_LEFT) {
782 // Make an R run.
783 m_eor = m_status.last == U_EUROPEAN_NUMBER_TERMINATOR ? m_lastBeforeET : m_last;
784 m_direction = U_RIGHT_TO_LEFT;
785 appendRun();
786 // Begin a new EN run.
787 m_direction = U_EUROPEAN_NUMBER;
788 }
789 } else if (m_status.lastStrong == U_RIGHT_TO_LEFT) {
790 // Extend the R run to include the neutrals.
791 m_eor = m_status.last == U_EUROPEAN_NUMBER_TERMINATOR ? m_lastBeforeET : m_last;
792 m_direction = U_RIGHT_TO_LEFT;
793 appendRun();
794 // Begin a new EN run.
795 m_direction = U_EUROPEAN_NUMBER;
796 }
797 break;
798 default:
799 break;
800 }
801 m_eor = m_current;
802 m_status.eor = U_EUROPEAN_NUMBER;
803 if (m_direction == U_OTHER_NEUTRAL)
804 m_direction = U_LEFT_TO_RIGHT;
805 break;
806 }
807 FALLTHROUGH;
808 case U_ARABIC_NUMBER:
809 dirCurrent = U_ARABIC_NUMBER;
810 switch (m_status.last) {
811 case U_LEFT_TO_RIGHT:
812 if (context()->dir() == U_LEFT_TO_RIGHT)
813 appendRun();
814 break;
815 case U_ARABIC_NUMBER:
816 break;
817 case U_RIGHT_TO_LEFT:
818 case U_RIGHT_TO_LEFT_ARABIC:
819 case U_EUROPEAN_NUMBER:
820 m_eor = m_last;
821 appendRun();
822 break;
823 case U_COMMON_NUMBER_SEPARATOR:
824 if (m_status.eor == U_ARABIC_NUMBER)
825 break;
826 FALLTHROUGH;
827 case U_EUROPEAN_NUMBER_SEPARATOR:
828 case U_EUROPEAN_NUMBER_TERMINATOR:
829 case U_BOUNDARY_NEUTRAL:
830 case U_BLOCK_SEPARATOR:
831 case U_SEGMENT_SEPARATOR:
832 case U_WHITE_SPACE_NEUTRAL:
833 case U_OTHER_NEUTRAL:
834 if (m_status.eor == U_ARABIC_NUMBER
835 || (m_status.eor == U_EUROPEAN_NUMBER && (m_status.lastStrong == U_RIGHT_TO_LEFT || context()->dir() == U_RIGHT_TO_LEFT))
836 || (m_status.eor != U_EUROPEAN_NUMBER && m_status.lastStrong == U_LEFT_TO_RIGHT && context()->dir() == U_RIGHT_TO_LEFT)) {
837 // Terminate the run before the neutrals.
838 appendRun();
839 // Begin an R run for the neutrals.
840 m_direction = U_RIGHT_TO_LEFT;
841 } else if (m_direction == U_OTHER_NEUTRAL)
842 m_direction = m_status.lastStrong == U_LEFT_TO_RIGHT ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT;
843 m_eor = m_last;
844 appendRun();
845 break;
846 default:
847 break;
848 }
849 m_eor = m_current;
850 m_status.eor = U_ARABIC_NUMBER;
851 if (m_direction == U_OTHER_NEUTRAL)
852 m_direction = U_ARABIC_NUMBER;
853 break;
854 case U_EUROPEAN_NUMBER_SEPARATOR:
855 case U_COMMON_NUMBER_SEPARATOR:
856 break;
857 case U_EUROPEAN_NUMBER_TERMINATOR:
858 if (m_status.last == U_EUROPEAN_NUMBER) {
859 dirCurrent = U_EUROPEAN_NUMBER;
860 m_eor = m_current;
861 m_status.eor = dirCurrent;
862 } else if (m_status.last != U_EUROPEAN_NUMBER_TERMINATOR)
863 m_lastBeforeET = m_emptyRun ? m_eor : m_last;
864 break;
865
866 // boundary neutrals should be ignored
867 case U_BOUNDARY_NEUTRAL:
868 if (m_eor == m_last)
869 m_eor = m_current;
870 break;
871 // neutrals
872 case U_BLOCK_SEPARATOR:
873 // FIXME: What do we do with newline and paragraph separators that come to here?
874 break;
875 case U_SEGMENT_SEPARATOR:
876 // FIXME: Implement rule L1.
877 break;
878 case U_WHITE_SPACE_NEUTRAL:
879 break;
880 case U_OTHER_NEUTRAL:
881 break;
882 default:
883 break;
884 }
885
886 if (pastEnd && (m_eor == m_current || !needsContinuePastEnd())) {
887 if (!m_reachedEndOfLine) {
888 m_eor = endOfLine;
889 switch (m_status.eor) {
890 case U_LEFT_TO_RIGHT:
891 case U_RIGHT_TO_LEFT:
892 case U_ARABIC_NUMBER:
893 m_direction = m_status.eor;
894 break;
895 case U_EUROPEAN_NUMBER:
896 m_direction = m_status.lastStrong == U_LEFT_TO_RIGHT ? U_LEFT_TO_RIGHT : U_EUROPEAN_NUMBER;
897 break;
898 default:
899 ASSERT_NOT_REACHED();
900 }
901 appendRun();
902 }
903 m_current = end;
904 m_status = stateAtEnd.m_status;
905 m_sor = stateAtEnd.m_sor;
906 m_eor = stateAtEnd.m_eor;
907 m_last = stateAtEnd.m_last;
908 m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine;
909 m_lastBeforeET = stateAtEnd.m_lastBeforeET;
910 m_emptyRun = stateAtEnd.m_emptyRun;
911 m_direction = U_OTHER_NEUTRAL;
912 break;
913 }
914
915 updateStatusLastFromCurrentDirection(dirCurrent);
916 m_last = m_current;
917
918 if (m_emptyRun) {
919 m_sor = m_current;
920 m_emptyRun = false;
921 }
922
923 increment();
924 if (!m_currentExplicitEmbeddingSequence.isEmpty()) {
925 bool committed = commitExplicitEmbedding();
926 if (committed && pastEnd) {
927 m_current = end;
928 m_status = stateAtEnd.m_status;
929 m_sor = stateAtEnd.m_sor;
930 m_eor = stateAtEnd.m_eor;
931 m_last = stateAtEnd.m_last;
932 m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine;
933 m_lastBeforeET = stateAtEnd.m_lastBeforeET;
934 m_emptyRun = stateAtEnd.m_emptyRun;
935 m_direction = U_OTHER_NEUTRAL;
936 break;
937 }
938 }
939
940 if (!pastEnd && (m_current == end || m_current.atEnd())) {
941 if (m_emptyRun)
942 break;
943 stateAtEnd.m_status = m_status;
944 stateAtEnd.m_sor = m_sor;
945 stateAtEnd.m_eor = m_eor;
946 stateAtEnd.m_last = m_last;
947 stateAtEnd.m_reachedEndOfLine = m_reachedEndOfLine;
948 stateAtEnd.m_lastBeforeET = m_lastBeforeET;
949 stateAtEnd.m_emptyRun = m_emptyRun;
950 endOfLine = m_last;
951 pastEnd = true;
952 }
953 }
954
955 m_runs.setLogicallyLastRun(m_runs.lastRun());
956 reorderRunsFromLevels();
957 endOfLine = Iterator();
958}
959
960template<typename Iterator, typename Run, typename DerivedClass>
961void BidiResolverBase<Iterator, Run, DerivedClass>::setWhitespaceCollapsingTransitionForIsolatedRun(Run& run, size_t transition)
962{
963 ASSERT(!m_whitespaceCollapsingTransitionForIsolatedRun.contains(&run));
964 m_whitespaceCollapsingTransitionForIsolatedRun.add(&run, transition);
965}
966
967template<typename Iterator, typename Run, typename DerivedClass>
968unsigned BidiResolverBase<Iterator, Run, DerivedClass>::whitespaceCollapsingTransitionForIsolatedRun(Run& run)
969{
970 return m_whitespaceCollapsingTransitionForIsolatedRun.take(&run);
971}
972
973} // namespace WebCore
974