1 | /* |
2 | * Copyright (C) 2013-2017 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #include "config.h" |
27 | #include "SpeechSynthesis.h" |
28 | |
29 | #if ENABLE(SPEECH_SYNTHESIS) |
30 | |
31 | #include "EventNames.h" |
32 | #include "PlatformSpeechSynthesisVoice.h" |
33 | #include "PlatformSpeechSynthesizer.h" |
34 | #include "SpeechSynthesisEvent.h" |
35 | #include "SpeechSynthesisUtterance.h" |
36 | #include "UserGestureIndicator.h" |
37 | #include <wtf/NeverDestroyed.h> |
38 | |
39 | namespace WebCore { |
40 | |
41 | Ref<SpeechSynthesis> SpeechSynthesis::create(WeakPtr<SpeechSynthesisClient> client) |
42 | { |
43 | return adoptRef(*new SpeechSynthesis(client)); |
44 | } |
45 | |
46 | SpeechSynthesis::SpeechSynthesis(WeakPtr<SpeechSynthesisClient> client) |
47 | : m_currentSpeechUtterance(nullptr) |
48 | , m_isPaused(false) |
49 | #if PLATFORM(IOS_FAMILY) |
50 | , m_restrictions(RequireUserGestureForSpeechStartRestriction) |
51 | #endif |
52 | , m_speechSynthesisClient(client) |
53 | { |
54 | if (m_speechSynthesisClient) |
55 | m_speechSynthesisClient->setObserver(makeWeakPtr(this)); |
56 | } |
57 | |
58 | void SpeechSynthesis::setPlatformSynthesizer(std::unique_ptr<PlatformSpeechSynthesizer> synthesizer) |
59 | { |
60 | m_platformSpeechSynthesizer = WTFMove(synthesizer); |
61 | m_voiceList.clear(); |
62 | m_currentSpeechUtterance = nullptr; |
63 | m_utteranceQueue.clear(); |
64 | m_isPaused = false; |
65 | m_speechSynthesisClient = nullptr; |
66 | } |
67 | |
68 | void SpeechSynthesis::voicesDidChange() |
69 | { |
70 | m_voiceList.clear(); |
71 | } |
72 | |
73 | PlatformSpeechSynthesizer& SpeechSynthesis::ensurePlatformSpeechSynthesizer() |
74 | { |
75 | if (!m_platformSpeechSynthesizer) |
76 | m_platformSpeechSynthesizer = std::make_unique<PlatformSpeechSynthesizer>(this); |
77 | return *m_platformSpeechSynthesizer; |
78 | } |
79 | |
80 | const Vector<Ref<SpeechSynthesisVoice>>& SpeechSynthesis::getVoices() |
81 | { |
82 | if (!m_voiceList.isEmpty()) |
83 | return m_voiceList; |
84 | |
85 | // If the voiceList is empty, that's the cue to get the voices from the platform again. |
86 | for (auto& voice : m_speechSynthesisClient ? m_speechSynthesisClient->voiceList() : ensurePlatformSpeechSynthesizer().voiceList()) |
87 | m_voiceList.append(SpeechSynthesisVoice::create(*voice)); |
88 | |
89 | return m_voiceList; |
90 | } |
91 | |
92 | bool SpeechSynthesis::speaking() const |
93 | { |
94 | // If we have a current speech utterance, then that means we're assumed to be in a speaking state. |
95 | // This state is independent of whether the utterance happens to be paused. |
96 | return m_currentSpeechUtterance; |
97 | } |
98 | |
99 | bool SpeechSynthesis::pending() const |
100 | { |
101 | // This is true if there are any utterances that have not started. |
102 | // That means there will be more than one in the queue. |
103 | return m_utteranceQueue.size() > 1; |
104 | } |
105 | |
106 | bool SpeechSynthesis::paused() const |
107 | { |
108 | return m_isPaused; |
109 | } |
110 | |
111 | void SpeechSynthesis::startSpeakingImmediately(SpeechSynthesisUtterance& utterance) |
112 | { |
113 | ASSERT(!m_currentSpeechUtterance); |
114 | utterance.setStartTime(MonotonicTime::now()); |
115 | m_currentSpeechUtterance = &utterance; |
116 | m_isPaused = false; |
117 | |
118 | // Zero lengthed strings should immediately notify that the event is complete. |
119 | if (utterance.text().isEmpty()) { |
120 | handleSpeakingCompleted(utterance, false); |
121 | return; |
122 | } |
123 | |
124 | if (m_speechSynthesisClient) |
125 | m_speechSynthesisClient->speak(utterance.platformUtterance()); |
126 | else |
127 | ensurePlatformSpeechSynthesizer().speak(utterance.platformUtterance()); |
128 | } |
129 | |
130 | void SpeechSynthesis::speak(SpeechSynthesisUtterance& utterance) |
131 | { |
132 | // Like Audio, we should require that the user interact to start a speech synthesis session. |
133 | #if PLATFORM(IOS_FAMILY) |
134 | if (UserGestureIndicator::processingUserGesture()) |
135 | removeBehaviorRestriction(RequireUserGestureForSpeechStartRestriction); |
136 | else if (userGestureRequiredForSpeechStart()) |
137 | return; |
138 | #endif |
139 | |
140 | m_utteranceQueue.append(utterance); |
141 | |
142 | // If the queue was empty, speak this immediately and add it to the queue. |
143 | if (m_utteranceQueue.size() == 1) |
144 | startSpeakingImmediately(m_utteranceQueue.first()); |
145 | } |
146 | |
147 | void SpeechSynthesis::cancel() |
148 | { |
149 | // Remove all the items from the utterance queue. |
150 | // Hold on to the current utterance so the platform synthesizer can have a chance to clean up. |
151 | RefPtr<SpeechSynthesisUtterance> current = m_currentSpeechUtterance; |
152 | m_utteranceQueue.clear(); |
153 | if (m_speechSynthesisClient) |
154 | m_speechSynthesisClient->cancel(); |
155 | else if (m_platformSpeechSynthesizer) |
156 | m_platformSpeechSynthesizer->cancel(); |
157 | current = nullptr; |
158 | |
159 | // The platform should have called back immediately and cleared the current utterance. |
160 | ASSERT(!m_currentSpeechUtterance); |
161 | } |
162 | |
163 | void SpeechSynthesis::pause() |
164 | { |
165 | if (!m_isPaused) { |
166 | if (m_speechSynthesisClient) |
167 | m_speechSynthesisClient->pause(); |
168 | else if (m_platformSpeechSynthesizer) |
169 | m_platformSpeechSynthesizer->pause(); |
170 | } |
171 | } |
172 | |
173 | void SpeechSynthesis::resume() |
174 | { |
175 | if (m_currentSpeechUtterance) { |
176 | if (m_speechSynthesisClient) |
177 | m_speechSynthesisClient->resume(); |
178 | else if (m_platformSpeechSynthesizer) |
179 | m_platformSpeechSynthesizer->resume(); |
180 | } |
181 | } |
182 | |
183 | void SpeechSynthesis::fireEvent(const AtomicString& type, SpeechSynthesisUtterance& utterance, unsigned long charIndex, const String& name) |
184 | { |
185 | utterance.dispatchEvent(SpeechSynthesisEvent::create(type, charIndex, (MonotonicTime::now() - utterance.startTime()).seconds(), name)); |
186 | } |
187 | |
188 | void SpeechSynthesis::handleSpeakingCompleted(SpeechSynthesisUtterance& utterance, bool errorOccurred) |
189 | { |
190 | ASSERT(m_currentSpeechUtterance); |
191 | Ref<SpeechSynthesisUtterance> protect(utterance); |
192 | |
193 | m_currentSpeechUtterance = nullptr; |
194 | |
195 | fireEvent(errorOccurred ? eventNames().errorEvent : eventNames().endEvent, utterance, 0, String()); |
196 | |
197 | if (m_utteranceQueue.size()) { |
198 | Ref<SpeechSynthesisUtterance> firstUtterance = m_utteranceQueue.takeFirst(); |
199 | ASSERT(&utterance == firstUtterance.ptr()); |
200 | |
201 | // Start the next job if there is one pending. |
202 | if (!m_utteranceQueue.isEmpty()) |
203 | startSpeakingImmediately(m_utteranceQueue.first()); |
204 | } |
205 | } |
206 | |
207 | void SpeechSynthesis::boundaryEventOccurred(PlatformSpeechSynthesisUtterance& utterance, SpeechBoundary boundary, unsigned charIndex) |
208 | { |
209 | static NeverDestroyed<const String> wordBoundaryString(MAKE_STATIC_STRING_IMPL("word" )); |
210 | static NeverDestroyed<const String> sentenceBoundaryString(MAKE_STATIC_STRING_IMPL("sentence" )); |
211 | |
212 | ASSERT(utterance.client()); |
213 | |
214 | switch (boundary) { |
215 | case SpeechBoundary::SpeechWordBoundary: |
216 | fireEvent(eventNames().boundaryEvent, static_cast<SpeechSynthesisUtterance&>(*utterance.client()), charIndex, wordBoundaryString); |
217 | break; |
218 | case SpeechBoundary::SpeechSentenceBoundary: |
219 | fireEvent(eventNames().boundaryEvent, static_cast<SpeechSynthesisUtterance&>(*utterance.client()), charIndex, sentenceBoundaryString); |
220 | break; |
221 | default: |
222 | ASSERT_NOT_REACHED(); |
223 | } |
224 | } |
225 | |
226 | void SpeechSynthesis::didStartSpeaking() |
227 | { |
228 | didStartSpeaking(*m_currentSpeechUtterance->platformUtterance()); |
229 | } |
230 | |
231 | void SpeechSynthesis::didFinishSpeaking() |
232 | { |
233 | didFinishSpeaking(*m_currentSpeechUtterance->platformUtterance()); |
234 | } |
235 | |
236 | void SpeechSynthesis::didPauseSpeaking() |
237 | { |
238 | didPauseSpeaking(*m_currentSpeechUtterance->platformUtterance()); |
239 | } |
240 | |
241 | void SpeechSynthesis::didResumeSpeaking() |
242 | { |
243 | didResumeSpeaking(*m_currentSpeechUtterance->platformUtterance()); |
244 | } |
245 | |
246 | void SpeechSynthesis::speakingErrorOccurred() |
247 | { |
248 | speakingErrorOccurred(*m_currentSpeechUtterance->platformUtterance()); |
249 | } |
250 | |
251 | void SpeechSynthesis::boundaryEventOccurred(bool wordBoundary, unsigned charIndex) |
252 | { |
253 | boundaryEventOccurred(*m_currentSpeechUtterance->platformUtterance(), wordBoundary ? SpeechBoundary::SpeechWordBoundary : SpeechBoundary::SpeechSentenceBoundary, charIndex); |
254 | } |
255 | |
256 | void SpeechSynthesis::voicesChanged() |
257 | { |
258 | voicesDidChange(); |
259 | } |
260 | |
261 | void SpeechSynthesis::didStartSpeaking(PlatformSpeechSynthesisUtterance& utterance) |
262 | { |
263 | if (utterance.client()) |
264 | fireEvent(eventNames().startEvent, static_cast<SpeechSynthesisUtterance&>(*utterance.client()), 0, String()); |
265 | } |
266 | |
267 | void SpeechSynthesis::didPauseSpeaking(PlatformSpeechSynthesisUtterance& utterance) |
268 | { |
269 | m_isPaused = true; |
270 | if (utterance.client()) |
271 | fireEvent(eventNames().pauseEvent, static_cast<SpeechSynthesisUtterance&>(*utterance.client()), 0, String()); |
272 | } |
273 | |
274 | void SpeechSynthesis::didResumeSpeaking(PlatformSpeechSynthesisUtterance& utterance) |
275 | { |
276 | m_isPaused = false; |
277 | if (utterance.client()) |
278 | fireEvent(eventNames().resumeEvent, static_cast<SpeechSynthesisUtterance&>(*utterance.client()), 0, String()); |
279 | } |
280 | |
281 | void SpeechSynthesis::didFinishSpeaking(PlatformSpeechSynthesisUtterance& utterance) |
282 | { |
283 | if (utterance.client()) |
284 | handleSpeakingCompleted(static_cast<SpeechSynthesisUtterance&>(*utterance.client()), false); |
285 | } |
286 | |
287 | void SpeechSynthesis::speakingErrorOccurred(PlatformSpeechSynthesisUtterance& utterance) |
288 | { |
289 | if (utterance.client()) |
290 | handleSpeakingCompleted(static_cast<SpeechSynthesisUtterance&>(*utterance.client()), true); |
291 | } |
292 | |
293 | } // namespace WebCore |
294 | |
295 | #endif // ENABLE(SPEECH_SYNTHESIS) |
296 | |