| 1 | /* |
| 2 | * Copyright (C) 2004-2017 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions |
| 6 | * are met: |
| 7 | * 1. Redistributions of source code must retain the above copyright |
| 8 | * notice, this list of conditions and the following disclaimer. |
| 9 | * 2. Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * |
| 13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| 14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| 17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 24 | */ |
| 25 | |
| 26 | #pragma once |
| 27 | |
| 28 | // FIXME: Move each iterator class into a separate header file. |
| 29 | |
| 30 | #include "FindOptions.h" |
| 31 | #include "Range.h" |
| 32 | #include "TextIteratorBehavior.h" |
| 33 | #include <wtf/Vector.h> |
| 34 | #include <wtf/text/StringView.h> |
| 35 | |
| 36 | namespace WebCore { |
| 37 | |
| 38 | class InlineTextBox; |
| 39 | class RenderText; |
| 40 | class RenderTextFragment; |
| 41 | |
| 42 | namespace SimpleLineLayout { |
| 43 | class RunResolver; |
| 44 | } |
| 45 | |
| 46 | WEBCORE_EXPORT String plainText(Position start, Position end, TextIteratorBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false); |
| 47 | WEBCORE_EXPORT String plainTextReplacingNoBreakSpace(Position start, Position end, TextIteratorBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false); |
| 48 | |
| 49 | WEBCORE_EXPORT String plainText(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false); |
| 50 | WEBCORE_EXPORT String plainTextReplacingNoBreakSpace(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false); |
| 51 | WEBCORE_EXPORT String plainTextUsingBackwardsTextIteratorForTesting(const Range&); |
| 52 | |
| 53 | Ref<Range> findPlainText(const Range&, const String&, FindOptions); |
| 54 | WEBCORE_EXPORT Ref<Range> findClosestPlainText(const Range&, const String&, FindOptions, unsigned); |
| 55 | WEBCORE_EXPORT bool hasAnyPlainText(const Range&, TextIteratorBehavior = TextIteratorDefaultBehavior); |
| 56 | bool findPlainText(const String& document, const String&, FindOptions); // Lets us use the search algorithm on a string. |
| 57 | |
| 58 | // FIXME: Move this somewhere else in the editing directory. It doesn't belong here. |
| 59 | bool isRendererReplacedElement(RenderObject*); |
| 60 | |
| 61 | class BitStack { |
| 62 | public: |
| 63 | BitStack(); |
| 64 | ~BitStack(); |
| 65 | |
| 66 | void push(bool); |
| 67 | void pop(); |
| 68 | |
| 69 | bool top() const; |
| 70 | unsigned size() const; |
| 71 | |
| 72 | private: |
| 73 | unsigned m_size; |
| 74 | Vector<unsigned, 1> m_words; |
| 75 | }; |
| 76 | |
| 77 | class TextIteratorCopyableText { |
| 78 | public: |
| 79 | TextIteratorCopyableText() |
| 80 | : m_singleCharacter(0) |
| 81 | , m_offset(0) |
| 82 | , m_length(0) |
| 83 | { |
| 84 | } |
| 85 | |
| 86 | StringView text() const { return m_singleCharacter ? StringView(&m_singleCharacter, 1) : StringView(m_string).substring(m_offset, m_length); } |
| 87 | void appendToStringBuilder(StringBuilder&) const; |
| 88 | |
| 89 | void reset(); |
| 90 | void set(String&&); |
| 91 | void set(String&&, unsigned offset, unsigned length); |
| 92 | void set(UChar); |
| 93 | |
| 94 | private: |
| 95 | UChar m_singleCharacter; |
| 96 | String m_string; |
| 97 | unsigned m_offset; |
| 98 | unsigned m_length; |
| 99 | }; |
| 100 | |
| 101 | // Iterates through the DOM range, returning all the text, and 0-length boundaries |
| 102 | // at points where replaced elements break up the text flow. The text is delivered in |
| 103 | // the chunks it's already stored in, to avoid copying any text. |
| 104 | |
| 105 | class TextIterator { |
| 106 | WTF_MAKE_FAST_ALLOCATED; |
| 107 | public: |
| 108 | WEBCORE_EXPORT explicit TextIterator(Position start, Position end, TextIteratorBehavior = TextIteratorDefaultBehavior); |
| 109 | WEBCORE_EXPORT explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); |
| 110 | WEBCORE_EXPORT ~TextIterator(); |
| 111 | |
| 112 | bool atEnd() const { return !m_positionNode; } |
| 113 | WEBCORE_EXPORT void advance(); |
| 114 | |
| 115 | StringView text() const { ASSERT(!atEnd()); return m_text; } |
| 116 | WEBCORE_EXPORT Ref<Range> range() const; |
| 117 | WEBCORE_EXPORT Node* node() const; |
| 118 | |
| 119 | const TextIteratorCopyableText& copyableText() const { ASSERT(!atEnd()); return m_copyableText; } |
| 120 | void appendTextToStringBuilder(StringBuilder& builder) const { copyableText().appendToStringBuilder(builder); } |
| 121 | |
| 122 | WEBCORE_EXPORT static int rangeLength(const Range*, bool spacesForReplacedElements = false); |
| 123 | WEBCORE_EXPORT static RefPtr<Range> rangeFromLocationAndLength(ContainerNode* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false); |
| 124 | WEBCORE_EXPORT static bool getLocationAndLengthFromRange(Node* scope, const Range*, size_t& location, size_t& length); |
| 125 | WEBCORE_EXPORT static Ref<Range> subrange(Range& entireRange, int characterOffset, int characterCount); |
| 126 | |
| 127 | private: |
| 128 | void init(); |
| 129 | void exitNode(Node*); |
| 130 | bool shouldRepresentNodeOffsetZero(); |
| 131 | bool shouldEmitSpaceBeforeAndAfterNode(Node&); |
| 132 | void representNodeOffsetZero(); |
| 133 | bool handleTextNode(); |
| 134 | bool handleReplacedElement(); |
| 135 | bool handleNonTextNode(); |
| 136 | void handleTextBox(); |
| 137 | void handleTextNodeFirstLetter(RenderTextFragment&); |
| 138 | void emitCharacter(UChar, Node& characterNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset); |
| 139 | void emitText(Text& textNode, RenderText&, int textStartOffset, int textEndOffset); |
| 140 | |
| 141 | Node* baseNodeForEmittingNewLine() const; |
| 142 | |
| 143 | const TextIteratorBehavior m_behavior { TextIteratorDefaultBehavior }; |
| 144 | |
| 145 | // Current position, not necessarily of the text being returned, but position as we walk through the DOM tree. |
| 146 | Node* m_node { nullptr }; |
| 147 | int m_offset { 0 }; |
| 148 | bool m_handledNode { false }; |
| 149 | bool m_handledChildren { false }; |
| 150 | BitStack m_fullyClippedStack; |
| 151 | |
| 152 | // The range. |
| 153 | Node* m_startContainer { nullptr }; |
| 154 | int m_startOffset { 0 }; |
| 155 | Node* m_endContainer { nullptr }; |
| 156 | int m_endOffset { 0 }; |
| 157 | Node* m_pastEndNode { nullptr }; |
| 158 | |
| 159 | // The current text and its position, in the form to be returned from the iterator. |
| 160 | Node* m_positionNode { nullptr }; |
| 161 | mutable Node* m_positionOffsetBaseNode { nullptr }; |
| 162 | mutable int m_positionStartOffset { 0 }; |
| 163 | mutable int m_positionEndOffset { 0 }; |
| 164 | TextIteratorCopyableText m_copyableText; |
| 165 | StringView m_text; |
| 166 | |
| 167 | // Used when there is still some pending text from the current node; when these are false and null, we go back to normal iterating. |
| 168 | Node* m_nodeForAdditionalNewline { nullptr }; |
| 169 | InlineTextBox* m_textBox { nullptr }; |
| 170 | |
| 171 | // Used when iterating over :first-letter text to save pointer to remaining text box. |
| 172 | InlineTextBox* m_remainingTextBox { nullptr }; |
| 173 | |
| 174 | // Used to point to RenderText object for :first-letter. |
| 175 | RenderText* m_firstLetterText { nullptr }; |
| 176 | |
| 177 | // Used to do the whitespace collapsing logic. |
| 178 | Text* m_lastTextNode { nullptr }; |
| 179 | bool m_lastTextNodeEndedWithCollapsedSpace { false }; |
| 180 | UChar m_lastCharacter { 0 }; |
| 181 | |
| 182 | // Used to do simple line layout run logic. |
| 183 | bool m_nextRunNeedsWhitespace { false }; |
| 184 | unsigned m_accumulatedSimpleTextLengthInFlow { 0 }; |
| 185 | Text* m_previousSimpleTextNodeInFlow { nullptr }; |
| 186 | std::unique_ptr<SimpleLineLayout::RunResolver> m_flowRunResolverCache; |
| 187 | |
| 188 | // Used when text boxes are out of order (Hebrew/Arabic with embedded LTR text) |
| 189 | Vector<InlineTextBox*> m_sortedTextBoxes; |
| 190 | size_t m_sortedTextBoxesPosition { 0 }; |
| 191 | |
| 192 | // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content |
| 193 | bool m_hasEmitted { false }; |
| 194 | |
| 195 | // Used when deciding text fragment created by :first-letter should be looked into. |
| 196 | bool m_handledFirstLetter { false }; |
| 197 | }; |
| 198 | |
| 199 | // Iterates through the DOM range, returning all the text, and 0-length boundaries |
| 200 | // at points where replaced elements break up the text flow. The text comes back in |
| 201 | // chunks so as to optimize for performance of the iteration. |
| 202 | class SimplifiedBackwardsTextIterator { |
| 203 | public: |
| 204 | explicit SimplifiedBackwardsTextIterator(const Range&); |
| 205 | |
| 206 | bool atEnd() const { return !m_positionNode; } |
| 207 | void advance(); |
| 208 | |
| 209 | StringView text() const { ASSERT(!atEnd()); return m_text; } |
| 210 | WEBCORE_EXPORT Ref<Range> range() const; |
| 211 | Node* node() const { ASSERT(!atEnd()); return m_node; } |
| 212 | |
| 213 | private: |
| 214 | void exitNode(); |
| 215 | bool handleTextNode(); |
| 216 | RenderText* handleFirstLetter(int& startOffset, int& offsetInNode); |
| 217 | bool handleReplacedElement(); |
| 218 | bool handleNonTextNode(); |
| 219 | void emitCharacter(UChar, Node&, int startOffset, int endOffset); |
| 220 | bool advanceRespectingRange(Node*); |
| 221 | |
| 222 | const TextIteratorBehavior m_behavior { TextIteratorDefaultBehavior }; |
| 223 | |
| 224 | // Current position, not necessarily of the text being returned, but position as we walk through the DOM tree. |
| 225 | Node* m_node { nullptr }; |
| 226 | int m_offset { 0 }; |
| 227 | bool m_handledNode { false }; |
| 228 | bool m_handledChildren { false }; |
| 229 | BitStack m_fullyClippedStack; |
| 230 | |
| 231 | // The range. |
| 232 | Node* m_startContainer { nullptr }; |
| 233 | int m_startOffset { 0 }; |
| 234 | Node* m_endContainer { nullptr }; |
| 235 | int m_endOffset { 0 }; |
| 236 | |
| 237 | // The current text and its position, in the form to be returned from the iterator. |
| 238 | Node* m_positionNode { nullptr }; |
| 239 | int m_positionStartOffset { 0 }; |
| 240 | int m_positionEndOffset { 0 }; |
| 241 | TextIteratorCopyableText m_copyableText; |
| 242 | StringView m_text; |
| 243 | |
| 244 | // Used to do the whitespace logic. |
| 245 | Text* m_lastTextNode { nullptr }; |
| 246 | UChar m_lastCharacter { 0 }; |
| 247 | |
| 248 | // Whether m_node has advanced beyond the iteration range (i.e. m_startContainer). |
| 249 | bool m_havePassedStartContainer { false }; |
| 250 | |
| 251 | // Should handle first-letter renderer in the next call to handleTextNode. |
| 252 | bool m_shouldHandleFirstLetter { false }; |
| 253 | }; |
| 254 | |
| 255 | // Builds on the text iterator, adding a character position so we can walk one |
| 256 | // character at a time, or faster, as needed. Useful for searching. |
| 257 | class CharacterIterator { |
| 258 | public: |
| 259 | explicit CharacterIterator(const Range&, TextIteratorBehavior = TextIteratorDefaultBehavior); |
| 260 | WEBCORE_EXPORT explicit CharacterIterator(Position start, Position end, TextIteratorBehavior = TextIteratorDefaultBehavior); |
| 261 | |
| 262 | bool atEnd() const { return m_underlyingIterator.atEnd(); } |
| 263 | WEBCORE_EXPORT void advance(int numCharacters); |
| 264 | |
| 265 | StringView text() const { return m_underlyingIterator.text().substring(m_runOffset); } |
| 266 | WEBCORE_EXPORT Ref<Range> range() const; |
| 267 | |
| 268 | bool atBreak() const { return m_atBreak; } |
| 269 | int characterOffset() const { return m_offset; } |
| 270 | |
| 271 | private: |
| 272 | TextIterator m_underlyingIterator; |
| 273 | |
| 274 | int m_offset { 0 }; |
| 275 | int m_runOffset { 0 }; |
| 276 | bool m_atBreak { true }; |
| 277 | }; |
| 278 | |
| 279 | class BackwardsCharacterIterator { |
| 280 | public: |
| 281 | explicit BackwardsCharacterIterator(const Range&); |
| 282 | |
| 283 | bool atEnd() const { return m_underlyingIterator.atEnd(); } |
| 284 | void advance(int numCharacters); |
| 285 | |
| 286 | Ref<Range> range() const; |
| 287 | |
| 288 | private: |
| 289 | SimplifiedBackwardsTextIterator m_underlyingIterator; |
| 290 | |
| 291 | int m_offset; |
| 292 | int m_runOffset; |
| 293 | bool m_atBreak; |
| 294 | }; |
| 295 | |
| 296 | // Similar to the TextIterator, except that the chunks of text returned are "well behaved", meaning |
| 297 | // they never split up a word. This is useful for spell checking and perhaps one day for searching as well. |
| 298 | class WordAwareIterator { |
| 299 | public: |
| 300 | explicit WordAwareIterator(const Range&); |
| 301 | |
| 302 | bool atEnd() const { return !m_didLookAhead && m_underlyingIterator.atEnd(); } |
| 303 | void advance(); |
| 304 | |
| 305 | StringView text() const; |
| 306 | |
| 307 | private: |
| 308 | TextIterator m_underlyingIterator; |
| 309 | |
| 310 | // Text from the previous chunk from the text iterator. |
| 311 | TextIteratorCopyableText m_previousText; |
| 312 | |
| 313 | // Many chunks from text iterator concatenated. |
| 314 | Vector<UChar> m_buffer; |
| 315 | |
| 316 | // Did we have to look ahead in the text iterator to confirm the current chunk? |
| 317 | bool m_didLookAhead; |
| 318 | }; |
| 319 | |
| 320 | } // namespace WebCore |
| 321 | |