blob: 1f07dd1fc6d443b0c1b699c2ff0d0733ee706943 [file] [log] [blame]
// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "content/browser/speech/speech_synthesis_impl.h"
#include "content/browser/media/audio_stream_monitor.h"
#include "content/browser/renderer_host/render_frame_host_impl.h"
#include "content/browser/speech/tts_utterance_impl.h"
#include "content/browser/web_contents/web_contents_impl.h"
#include "content/public/browser/web_contents.h"
namespace content {
namespace {
using AudibleCB = base::RepeatingCallback<
std::unique_ptr<AudioStreamMonitor::AudibleClientRegistration>()>;
// The lifetime of instances of this class is manually bound to the lifetime of
// the associated TtsUtterance.
class EventThunk : public UtteranceEventDelegate {
public:
EventThunk(mojo::PendingRemote<blink::mojom::SpeechSynthesisClient> client,
AudibleCB audible_cb)
: client_(std::move(client)), audible_cb_(std::move(audible_cb)) {}
~EventThunk() override = default;
// UtteranceEventDelegate methods:
void OnTtsEvent(TtsUtterance* utterance,
TtsEventType event_type,
int char_index,
int char_length,
const std::string& error_message) override {
// These values are unsigned in the web speech API, so -1 cannot be used as
// a sentinel value. Use 0 instead to match web standards.
char_index = std::max(char_index, 0);
char_length = std::max(char_length, 0);
switch (event_type) {
case TTS_EVENT_START:
audible_client_ = audible_cb_.Run();
client_->OnStartedSpeaking();
break;
case TTS_EVENT_END:
audible_client_.reset();
client_->OnFinishedSpeaking(
blink::mojom::SpeechSynthesisErrorCode::kNoError);
break;
case TTS_EVENT_INTERRUPTED:
audible_client_.reset();
client_->OnFinishedSpeaking(
blink::mojom::SpeechSynthesisErrorCode::kInterrupted);
break;
case TTS_EVENT_CANCELLED:
audible_client_.reset();
client_->OnFinishedSpeaking(
blink::mojom::SpeechSynthesisErrorCode::kCancelled);
break;
case TTS_EVENT_WORD:
client_->OnEncounteredWordBoundary(char_index, char_length);
break;
case TTS_EVENT_SENTENCE:
client_->OnEncounteredSentenceBoundary(char_index, 0);
break;
case TTS_EVENT_MARKER:
// The web platform API does not support this event.
break;
case TTS_EVENT_ERROR:
audible_client_.reset();
// The web platform API does not support error text.
client_->OnEncounteredSpeakingError();
break;
case TTS_EVENT_PAUSE:
audible_client_.reset();
client_->OnPausedSpeaking();
break;
case TTS_EVENT_RESUME:
audible_client_ = audible_cb_.Run();
client_->OnResumedSpeaking();
break;
}
}
private:
mojo::Remote<blink::mojom::SpeechSynthesisClient> client_;
AudibleCB audible_cb_;
std::unique_ptr<AudioStreamMonitor::AudibleClientRegistration>
audible_client_;
};
void SendVoiceListToObserver(
blink::mojom::SpeechSynthesisVoiceListObserver* observer,
const std::vector<VoiceData>& voices) {
std::vector<blink::mojom::SpeechSynthesisVoicePtr> out_voices;
out_voices.resize(voices.size());
for (size_t i = 0; i < voices.size(); ++i) {
blink::mojom::SpeechSynthesisVoicePtr& out_voice = out_voices[i];
out_voice = blink::mojom::SpeechSynthesisVoice::New();
out_voice->voice_uri = voices[i].name;
out_voice->name = voices[i].name;
out_voice->lang = voices[i].lang;
out_voice->is_local_service = !voices[i].remote;
out_voice->is_default = (i == 0);
}
observer->OnSetVoiceList(std::move(out_voices));
}
} // namespace
SpeechSynthesisImpl::SpeechSynthesisImpl(BrowserContext* browser_context,
RenderFrameHostImpl* rfh)
: browser_context_(browser_context),
web_contents_(WebContents::FromRenderFrameHost((rfh))),
frame_id_(rfh->GetGlobalId()) {
DCHECK(browser_context_);
DCHECK(web_contents_);
TtsController::GetInstance()->AddVoicesChangedDelegate(this);
}
SpeechSynthesisImpl::~SpeechSynthesisImpl() {
TtsController::GetInstance()->RemoveVoicesChangedDelegate(this);
// NOTE: Some EventThunk instances may outlive this class, and that's okay.
// They have their lifetime bound to their associated TtsUtterance instance,
// and the TtsController manages the lifetime of those.
}
void SpeechSynthesisImpl::AddReceiver(
mojo::PendingReceiver<blink::mojom::SpeechSynthesis> receiver) {
receiver_set_.Add(this, std::move(receiver));
}
void SpeechSynthesisImpl::AddVoiceListObserver(
mojo::PendingRemote<blink::mojom::SpeechSynthesisVoiceListObserver>
pending_observer) {
mojo::Remote<blink::mojom::SpeechSynthesisVoiceListObserver> observer(
std::move(pending_observer));
std::vector<VoiceData> voices;
TtsController::GetInstance()->GetVoices(browser_context_, GURL(), &voices);
SendVoiceListToObserver(observer.get(), voices);
observer_set_.Add(std::move(observer));
}
void SpeechSynthesisImpl::Speak(
blink::mojom::SpeechSynthesisUtterancePtr utterance,
mojo::PendingRemote<blink::mojom::SpeechSynthesisClient> client) {
if (web_contents_->IsAudioMuted())
return;
std::unique_ptr<TtsUtterance> tts_utterance =
std::make_unique<TtsUtteranceImpl>(browser_context_, web_contents_);
tts_utterance->SetText(utterance->text);
tts_utterance->SetLang(utterance->lang);
tts_utterance->SetVoiceName(utterance->voice);
tts_utterance->SetShouldClearQueue(false);
tts_utterance->SetContinuousParameters(utterance->rate, utterance->pitch,
utterance->volume);
// See comments on EventThunk about how lifetime of this instance is managed.
tts_utterance->SetEventDelegate(std::make_unique<EventThunk>(
std::move(client),
base::BindRepeating(
&AudioStreamMonitor::RegisterAudibleClient,
base::Unretained(static_cast<WebContentsImpl*>(web_contents_)
->audio_stream_monitor()),
frame_id_)));
TtsController::GetInstance()->SpeakOrEnqueue(std::move(tts_utterance));
}
void SpeechSynthesisImpl::Pause() {
TtsController::GetInstance()->Pause();
}
void SpeechSynthesisImpl::Resume() {
TtsController::GetInstance()->Resume();
}
void SpeechSynthesisImpl::Cancel() {
TtsController::GetInstance()->Stop();
}
void SpeechSynthesisImpl::OnVoicesChanged() {
std::vector<VoiceData> voices;
TtsController::GetInstance()->GetVoices(browser_context_, GURL(), &voices);
for (auto& observer : observer_set_)
SendVoiceListToObserver(observer.get(), voices);
}
} // namespace content