1/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#pragma once
27
28#include "SegmentedString.h"
29#include <wtf/text/OrdinalNumber.h>
30
31namespace WebCore {
32
33// The InputStream is made up of a sequence of SegmentedStrings:
34//
35// [--current--][--next--][--next--] ... [--next--]
36// /\ (also called m_last)
37// L_ current insertion point
38//
39// The current segmented string is stored in InputStream. Each of the
40// afterInsertionPoint buffers are stored in InsertionPointRecords on the
41// stack.
42//
43// We remove characters from the "current" string in the InputStream.
44// document.write() will add characters at the current insertion point,
45// which appends them to the "current" string.
46//
47// m_last is a pointer to the last of the afterInsertionPoint strings.
48// The network adds data at the end of the InputStream, which appends
49// them to the "last" string.
50class HTMLInputStream {
51 WTF_MAKE_NONCOPYABLE(HTMLInputStream);
52public:
53 HTMLInputStream()
54 : m_last(&m_first)
55 {
56 }
57
58 void appendToEnd(SegmentedString&& string)
59 {
60 m_last->append(WTFMove(string));
61 }
62
63 void insertAtCurrentInsertionPoint(SegmentedString&& string)
64 {
65 m_first.append(WTFMove(string));
66 }
67
68 bool hasInsertionPoint() const
69 {
70 return &m_first != m_last;
71 }
72
73 void markEndOfFile()
74 {
75 m_last->append(String { &kEndOfFileMarker, 1 });
76 m_last->close();
77 }
78
79 void closeWithoutMarkingEndOfFile()
80 {
81 m_last->close();
82 }
83
84 bool haveSeenEndOfFile() const
85 {
86 return m_last->isClosed();
87 }
88
89 SegmentedString& current() { return m_first; }
90 const SegmentedString& current() const { return m_first; }
91
92 void splitInto(SegmentedString& next)
93 {
94 next = WTFMove(m_first);
95 if (m_last == &m_first) {
96 // We used to only have one SegmentedString in the InputStream
97 // but now we have two. That means m_first is no longer also
98 // the m_last string, |next| is now the last one.
99 m_last = &next;
100 }
101 }
102
103 void mergeFrom(SegmentedString& next)
104 {
105 m_first.append(next);
106 if (m_last == &next) {
107 // The string |next| used to be the last SegmentedString in
108 // the InputStream. Now that it's been merged into m_first,
109 // that makes m_first the last one.
110 m_last = &m_first;
111 }
112 if (next.isClosed()) {
113 // We also need to merge the "closed" state from next to
114 // m_first. Arguably, this work could be done in append().
115 m_first.close();
116 }
117 }
118
119private:
120 SegmentedString m_first;
121 SegmentedString* m_last;
122};
123
124class InsertionPointRecord {
125 WTF_MAKE_NONCOPYABLE(InsertionPointRecord);
126public:
127 explicit InsertionPointRecord(HTMLInputStream& inputStream)
128 : m_inputStream(&inputStream)
129 {
130 m_line = m_inputStream->current().currentLine();
131 m_column = m_inputStream->current().currentColumn();
132 m_inputStream->splitInto(m_next);
133 // We 'fork' current position and use it for the generated script part.
134 // This is a bit weird, because generated part does not have positions within an HTML document.
135 m_inputStream->current().setCurrentPosition(m_line, m_column, 0);
136 }
137
138 ~InsertionPointRecord()
139 {
140 // Some inserted text may have remained in input stream. E.g. if script has written "&amp" or "<table",
141 // it stays in buffer because it cannot be properly tokenized before we see next part.
142 int unparsedRemainderLength = m_inputStream->current().length();
143 m_inputStream->mergeFrom(m_next);
144 // We restore position for the character that goes right after unparsed remainder.
145 m_inputStream->current().setCurrentPosition(m_line, m_column, unparsedRemainderLength);
146 }
147
148private:
149 HTMLInputStream* m_inputStream;
150 SegmentedString m_next;
151 OrdinalNumber m_line;
152 OrdinalNumber m_column;
153};
154
155} // namespace WebCore
156