Avi Drissman | 4e1b7bc3 | 2022-09-15 14:03:50 | [diff] [blame] | 1 | // Copyright 2019 The Chromium Authors |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "content/browser/speech/speech_synthesis_impl.h" |
| 6 | |
Dale Curtis | 84888a1 | 2025-05-13 19:05:59 | [diff] [blame] | 7 | #include "content/browser/media/audio_stream_monitor.h" |
Morten Stenshorne | 3e34dcbb | 2021-12-27 21:45:48 | [diff] [blame] | 8 | #include "content/browser/renderer_host/render_frame_host_impl.h" |
Scott Violet | c653128 | 2020-06-25 02:19:02 | [diff] [blame] | 9 | #include "content/browser/speech/tts_utterance_impl.h" |
Dale Curtis | 84888a1 | 2025-05-13 19:05:59 | [diff] [blame] | 10 | #include "content/browser/web_contents/web_contents_impl.h" |
Matt Falkenhagen | 7b71b9f1 | 2021-08-12 01:32:43 | [diff] [blame] | 11 | #include "content/public/browser/web_contents.h" |
Scott Violet | c653128 | 2020-06-25 02:19:02 | [diff] [blame] | 12 | |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 13 | namespace content { |
| 14 | namespace { |
| 15 | |
Dale Curtis | 84888a1 | 2025-05-13 19:05:59 | [diff] [blame] | 16 | using AudibleCB = base::RepeatingCallback< |
| 17 | std::unique_ptr<AudioStreamMonitor::AudibleClientRegistration>()>; |
| 18 | |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 19 | // The lifetime of instances of this class is manually bound to the lifetime of |
Di Wu | 50d43c7 | 2025-07-28 03:29:49 | [diff] [blame] | 20 | // the associated TtsUtterance. |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 21 | class EventThunk : public UtteranceEventDelegate { |
| 22 | public: |
Dale Curtis | 84888a1 | 2025-05-13 19:05:59 | [diff] [blame] | 23 | EventThunk(mojo::PendingRemote<blink::mojom::SpeechSynthesisClient> client, |
| 24 | AudibleCB audible_cb) |
| 25 | : client_(std::move(client)), audible_cb_(std::move(audible_cb)) {} |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 26 | ~EventThunk() override = default; |
| 27 | |
| 28 | // UtteranceEventDelegate methods: |
| 29 | void OnTtsEvent(TtsUtterance* utterance, |
| 30 | TtsEventType event_type, |
| 31 | int char_index, |
| 32 | int char_length, |
| 33 | const std::string& error_message) override { |
| 34 | // These values are unsigned in the web speech API, so -1 cannot be used as |
| 35 | // a sentinel value. Use 0 instead to match web standards. |
| 36 | char_index = std::max(char_index, 0); |
| 37 | char_length = std::max(char_length, 0); |
| 38 | |
| 39 | switch (event_type) { |
| 40 | case TTS_EVENT_START: |
Dale Curtis | 84888a1 | 2025-05-13 19:05:59 | [diff] [blame] | 41 | audible_client_ = audible_cb_.Run(); |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 42 | client_->OnStartedSpeaking(); |
| 43 | break; |
| 44 | case TTS_EVENT_END: |
Dale Curtis | 84888a1 | 2025-05-13 19:05:59 | [diff] [blame] | 45 | audible_client_.reset(); |
Julie Jeongeun Kim | f6a5e23 | 2022-11-04 03:27:01 | [diff] [blame] | 46 | client_->OnFinishedSpeaking( |
| 47 | blink::mojom::SpeechSynthesisErrorCode::kNoError); |
| 48 | break; |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 49 | case TTS_EVENT_INTERRUPTED: |
Dale Curtis | 84888a1 | 2025-05-13 19:05:59 | [diff] [blame] | 50 | audible_client_.reset(); |
Julie Jeongeun Kim | f6a5e23 | 2022-11-04 03:27:01 | [diff] [blame] | 51 | client_->OnFinishedSpeaking( |
| 52 | blink::mojom::SpeechSynthesisErrorCode::kInterrupted); |
| 53 | break; |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 54 | case TTS_EVENT_CANCELLED: |
Dale Curtis | 84888a1 | 2025-05-13 19:05:59 | [diff] [blame] | 55 | audible_client_.reset(); |
Julie Jeongeun Kim | f6a5e23 | 2022-11-04 03:27:01 | [diff] [blame] | 56 | client_->OnFinishedSpeaking( |
| 57 | blink::mojom::SpeechSynthesisErrorCode::kCancelled); |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 58 | break; |
| 59 | case TTS_EVENT_WORD: |
| 60 | client_->OnEncounteredWordBoundary(char_index, char_length); |
| 61 | break; |
| 62 | case TTS_EVENT_SENTENCE: |
| 63 | client_->OnEncounteredSentenceBoundary(char_index, 0); |
| 64 | break; |
| 65 | case TTS_EVENT_MARKER: |
| 66 | // The web platform API does not support this event. |
| 67 | break; |
| 68 | case TTS_EVENT_ERROR: |
Dale Curtis | 84888a1 | 2025-05-13 19:05:59 | [diff] [blame] | 69 | audible_client_.reset(); |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 70 | // The web platform API does not support error text. |
| 71 | client_->OnEncounteredSpeakingError(); |
| 72 | break; |
| 73 | case TTS_EVENT_PAUSE: |
Dale Curtis | 84888a1 | 2025-05-13 19:05:59 | [diff] [blame] | 74 | audible_client_.reset(); |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 75 | client_->OnPausedSpeaking(); |
| 76 | break; |
| 77 | case TTS_EVENT_RESUME: |
Dale Curtis | 84888a1 | 2025-05-13 19:05:59 | [diff] [blame] | 78 | audible_client_ = audible_cb_.Run(); |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 79 | client_->OnResumedSpeaking(); |
| 80 | break; |
| 81 | } |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 82 | } |
| 83 | |
| 84 | private: |
| 85 | mojo::Remote<blink::mojom::SpeechSynthesisClient> client_; |
Dale Curtis | 84888a1 | 2025-05-13 19:05:59 | [diff] [blame] | 86 | AudibleCB audible_cb_; |
| 87 | std::unique_ptr<AudioStreamMonitor::AudibleClientRegistration> |
| 88 | audible_client_; |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 89 | }; |
| 90 | |
| 91 | void SendVoiceListToObserver( |
| 92 | blink::mojom::SpeechSynthesisVoiceListObserver* observer, |
| 93 | const std::vector<VoiceData>& voices) { |
| 94 | std::vector<blink::mojom::SpeechSynthesisVoicePtr> out_voices; |
| 95 | out_voices.resize(voices.size()); |
| 96 | for (size_t i = 0; i < voices.size(); ++i) { |
| 97 | blink::mojom::SpeechSynthesisVoicePtr& out_voice = out_voices[i]; |
| 98 | out_voice = blink::mojom::SpeechSynthesisVoice::New(); |
| 99 | out_voice->voice_uri = voices[i].name; |
| 100 | out_voice->name = voices[i].name; |
| 101 | out_voice->lang = voices[i].lang; |
| 102 | out_voice->is_local_service = !voices[i].remote; |
| 103 | out_voice->is_default = (i == 0); |
| 104 | } |
| 105 | observer->OnSetVoiceList(std::move(out_voices)); |
| 106 | } |
| 107 | |
| 108 | } // namespace |
| 109 | |
Scott Violet | c653128 | 2020-06-25 02:19:02 | [diff] [blame] | 110 | SpeechSynthesisImpl::SpeechSynthesisImpl(BrowserContext* browser_context, |
Matt Falkenhagen | 7b71b9f1 | 2021-08-12 01:32:43 | [diff] [blame] | 111 | RenderFrameHostImpl* rfh) |
| 112 | : browser_context_(browser_context), |
Dale Curtis | 84888a1 | 2025-05-13 19:05:59 | [diff] [blame] | 113 | web_contents_(WebContents::FromRenderFrameHost((rfh))), |
| 114 | frame_id_(rfh->GetGlobalId()) { |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 115 | DCHECK(browser_context_); |
Scott Violet | c653128 | 2020-06-25 02:19:02 | [diff] [blame] | 116 | DCHECK(web_contents_); |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 117 | TtsController::GetInstance()->AddVoicesChangedDelegate(this); |
| 118 | } |
| 119 | |
| 120 | SpeechSynthesisImpl::~SpeechSynthesisImpl() { |
| 121 | TtsController::GetInstance()->RemoveVoicesChangedDelegate(this); |
| 122 | |
| 123 | // NOTE: Some EventThunk instances may outlive this class, and that's okay. |
| 124 | // They have their lifetime bound to their associated TtsUtterance instance, |
| 125 | // and the TtsController manages the lifetime of those. |
| 126 | } |
| 127 | |
| 128 | void SpeechSynthesisImpl::AddReceiver( |
| 129 | mojo::PendingReceiver<blink::mojom::SpeechSynthesis> receiver) { |
| 130 | receiver_set_.Add(this, std::move(receiver)); |
| 131 | } |
| 132 | |
| 133 | void SpeechSynthesisImpl::AddVoiceListObserver( |
| 134 | mojo::PendingRemote<blink::mojom::SpeechSynthesisVoiceListObserver> |
| 135 | pending_observer) { |
| 136 | mojo::Remote<blink::mojom::SpeechSynthesisVoiceListObserver> observer( |
| 137 | std::move(pending_observer)); |
| 138 | |
| 139 | std::vector<VoiceData> voices; |
Joel Riley | 3300c0e8 | 2021-06-24 05:22:15 | [diff] [blame] | 140 | TtsController::GetInstance()->GetVoices(browser_context_, GURL(), &voices); |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 141 | SendVoiceListToObserver(observer.get(), voices); |
| 142 | |
| 143 | observer_set_.Add(std::move(observer)); |
| 144 | } |
| 145 | |
| 146 | void SpeechSynthesisImpl::Speak( |
| 147 | blink::mojom::SpeechSynthesisUtterancePtr utterance, |
| 148 | mojo::PendingRemote<blink::mojom::SpeechSynthesisClient> client) { |
evliu | b5d2726 | 2021-09-30 00:10:01 | [diff] [blame] | 149 | if (web_contents_->IsAudioMuted()) |
| 150 | return; |
| 151 | |
Scott Violet | c653128 | 2020-06-25 02:19:02 | [diff] [blame] | 152 | std::unique_ptr<TtsUtterance> tts_utterance = |
| 153 | std::make_unique<TtsUtteranceImpl>(browser_context_, web_contents_); |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 154 | tts_utterance->SetText(utterance->text); |
| 155 | tts_utterance->SetLang(utterance->lang); |
| 156 | tts_utterance->SetVoiceName(utterance->voice); |
David Tseng | c06cdad | 2020-11-20 01:21:43 | [diff] [blame] | 157 | tts_utterance->SetShouldClearQueue(false); |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 158 | tts_utterance->SetContinuousParameters(utterance->rate, utterance->pitch, |
| 159 | utterance->volume); |
| 160 | |
| 161 | // See comments on EventThunk about how lifetime of this instance is managed. |
Di Wu | 50d43c7 | 2025-07-28 03:29:49 | [diff] [blame] | 162 | tts_utterance->SetEventDelegate(std::make_unique<EventThunk>( |
Dale Curtis | 84888a1 | 2025-05-13 19:05:59 | [diff] [blame] | 163 | std::move(client), |
| 164 | base::BindRepeating( |
| 165 | &AudioStreamMonitor::RegisterAudibleClient, |
| 166 | base::Unretained(static_cast<WebContentsImpl*>(web_contents_) |
| 167 | ->audio_stream_monitor()), |
| 168 | frame_id_))); |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 169 | |
Darin Fisher | 07d6ef85 | 2019-09-12 18:58:49 | [diff] [blame] | 170 | TtsController::GetInstance()->SpeakOrEnqueue(std::move(tts_utterance)); |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 171 | } |
| 172 | |
| 173 | void SpeechSynthesisImpl::Pause() { |
| 174 | TtsController::GetInstance()->Pause(); |
| 175 | } |
| 176 | |
| 177 | void SpeechSynthesisImpl::Resume() { |
| 178 | TtsController::GetInstance()->Resume(); |
| 179 | } |
| 180 | |
| 181 | void SpeechSynthesisImpl::Cancel() { |
| 182 | TtsController::GetInstance()->Stop(); |
| 183 | } |
| 184 | |
| 185 | void SpeechSynthesisImpl::OnVoicesChanged() { |
| 186 | std::vector<VoiceData> voices; |
Joel Riley | 3300c0e8 | 2021-06-24 05:22:15 | [diff] [blame] | 187 | TtsController::GetInstance()->GetVoices(browser_context_, GURL(), &voices); |
Darin Fisher | a26aa7e | 2019-09-02 20:22:59 | [diff] [blame] | 188 | for (auto& observer : observer_set_) |
| 189 | SendVoiceListToObserver(observer.get(), voices); |
| 190 | } |
| 191 | |
| 192 | } // namespace content |