Avi Drissman | 4e1b7bc3 | 2022-09-15 14:03:50 | [diff] [blame] | 1 | // Copyright 2013 The Chromium Authors |
[email protected] | 3bc0b56 | 2011-08-24 23:51:04 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
[email protected] | c52b289 | 2012-03-07 11:01:02 | [diff] [blame] | 5 | #include "content/browser/speech/speech_recognition_manager_impl.h" |
[email protected] | 3bc0b56 | 2011-08-24 23:51:04 | [diff] [blame] | 6 | |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 7 | #include <algorithm> |
| 8 | #include <map> |
Evan Liu | 881ab7a | 2024-08-01 21:54:51 | [diff] [blame] | 9 | #include <optional> |
dcheng | 36b6aec9 | 2015-12-26 06:16:36 | [diff] [blame] | 10 | #include <utility> |
Guido Urdaneta | 971548c | 2018-12-13 17:25:22 | [diff] [blame] | 11 | #include <vector> |
dcheng | 36b6aec9 | 2015-12-26 06:16:36 | [diff] [blame] | 12 | |
Avi Drissman | adac2199 | 2023-01-11 23:46:39 | [diff] [blame] | 13 | #include "base/functional/bind.h" |
| 14 | #include "base/functional/callback.h" |
skyostil | 95082a6 | 2015-06-05 19:53:07 | [diff] [blame] | 15 | #include "base/location.h" |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 16 | #include "base/memory/ref_counted_delete_on_sequence.h" |
Evan Liu | 045091f | 2024-11-20 22:24:41 | [diff] [blame] | 17 | #include "base/metrics/histogram_functions.h" |
Patrick Monette | 643cdf6 | 2021-10-15 19:13:42 | [diff] [blame] | 18 | #include "base/task/sequenced_task_runner.h" |
| 19 | #include "base/task/single_thread_task_runner.h" |
avi | b734894 | 2015-12-25 20:57:10 | [diff] [blame] | 20 | #include "build/build_config.h" |
Evan Liu | b2b7f6d | 2024-08-05 22:55:48 | [diff] [blame] | 21 | #include "components/soda/soda_util.h" |
[email protected] | 52e456b9 | 2012-02-23 17:13:18 | [diff] [blame] | 22 | #include "content/browser/browser_main_loop.h" |
[email protected] | 2af35c50 | 2012-09-13 20:14:43 | [diff] [blame] | 23 | #include "content/browser/renderer_host/media/media_stream_manager.h" |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 24 | #include "content/browser/renderer_host/media/media_stream_ui_proxy.h" |
Yaowei Zhou | f7df39c | 2024-02-29 04:53:48 | [diff] [blame] | 25 | #include "content/browser/speech/network_speech_recognition_engine_impl.h" |
[email protected] | ce1adc34 | 2013-05-20 13:35:43 | [diff] [blame] | 26 | #include "content/browser/speech/speech_recognizer_impl.h" |
Eric Seckler | 8652dcd5 | 2018-09-20 10:42:28 | [diff] [blame] | 27 | #include "content/public/browser/browser_task_traits.h" |
Matt Menke | 7b2266e | 2018-06-07 19:32:09 | [diff] [blame] | 28 | #include "content/public/browser/browser_thread.h" |
[email protected] | 66cfec6 | 2012-02-24 17:57:51 | [diff] [blame] | 29 | #include "content/public/browser/content_browser_client.h" |
Sreeja Kamishetty | 0a0961f | 2021-10-11 16:23:53 | [diff] [blame] | 30 | #include "content/public/browser/document_user_data.h" |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 31 | #include "content/public/browser/render_frame_host.h" |
[email protected] | ce96786 | 2012-02-09 22:47:05 | [diff] [blame] | 32 | #include "content/public/browser/resource_context.h" |
Evan Liu | 881ab7a | 2024-08-01 21:54:51 | [diff] [blame] | 33 | #include "content/public/browser/speech_recognition_audio_forwarder_config.h" |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 34 | #include "content/public/browser/speech_recognition_event_listener.h" |
[email protected] | c52b289 | 2012-03-07 11:01:02 | [diff] [blame] | 35 | #include "content/public/browser/speech_recognition_manager_delegate.h" |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 36 | #include "content/public/browser/speech_recognition_session_config.h" |
| 37 | #include "content/public/browser/speech_recognition_session_context.h" |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 38 | #include "content/public/browser/web_contents.h" |
| 39 | #include "content/public/browser/web_contents_observer.h" |
Hans Wennborg | 5ffd139 | 2019-10-16 11:00:02 | [diff] [blame] | 40 | #include "content/public/common/content_client.h" |
olka | 251dd569 | 2016-04-27 15:50:17 | [diff] [blame] | 41 | #include "media/audio/audio_device_description.h" |
Evan Liu | d422a37 | 2025-07-09 23:38:16 | [diff] [blame] | 42 | #include "media/base/limits.h" |
Evan Liu | 881ab7a | 2024-08-01 21:54:51 | [diff] [blame] | 43 | #include "media/mojo/mojom/speech_recognition.mojom.h" |
| 44 | #include "media/mojo/mojom/speech_recognition_audio_forwarder.mojom.h" |
Evan Liu | d725228 | 2024-05-16 20:10:32 | [diff] [blame] | 45 | #include "media/mojo/mojom/speech_recognition_error.mojom.h" |
| 46 | #include "media/mojo/mojom/speech_recognition_result.mojom.h" |
Evan Liu | 881ab7a | 2024-08-01 21:54:51 | [diff] [blame] | 47 | #include "media/mojo/mojom/speech_recognizer.mojom.h" |
| 48 | #include "mojo/public/cpp/bindings/pending_receiver.h" |
guidou | c6dd446 | 2016-04-30 18:00:37 | [diff] [blame] | 49 | #include "url/gurl.h" |
| 50 | #include "url/origin.h" |
[email protected] | 3bc0b56 | 2011-08-24 23:51:04 | [diff] [blame] | 51 | |
Xiaohan Wang | 8d67feb | 2022-01-15 14:37:43 | [diff] [blame] | 52 | #if BUILDFLAG(IS_ANDROID) |
[email protected] | 6c59004 | 2013-06-14 12:23:26 | [diff] [blame] | 53 | #include "content/browser/speech/speech_recognizer_impl_android.h" |
Evan Liu | 4528959 | 2024-03-21 17:46:12 | [diff] [blame] | 54 | #elif !BUILDFLAG(IS_FUCHSIA) |
| 55 | #include "components/soda/constants.h" |
Evan Liu | 4528959 | 2024-03-21 17:46:12 | [diff] [blame] | 56 | #include "components/soda/soda_util.h" |
| 57 | #include "content/browser/speech/soda_speech_recognition_engine_impl.h" |
| 58 | #include "media/base/media_switches.h" |
Evan Liu | 4528959 | 2024-03-21 17:46:12 | [diff] [blame] | 59 | #endif // BUILDFLAG(IS_ANDROID) |
[email protected] | 6c59004 | 2013-06-14 12:23:26 | [diff] [blame] | 60 | |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 61 | namespace content { |
[email protected] | 66cfec6 | 2012-02-24 17:57:51 | [diff] [blame] | 62 | |
[email protected] | f3b279e | 2013-01-29 20:48:46 | [diff] [blame] | 63 | SpeechRecognitionManager* SpeechRecognitionManager::manager_for_tests_; |
| 64 | |
[email protected] | 28df14d | 2012-05-16 14:51:22 | [diff] [blame] | 65 | namespace { |
[email protected] | fcb8e021 | 2012-10-29 11:57:18 | [diff] [blame] | 66 | |
| 67 | SpeechRecognitionManagerImpl* g_speech_recognition_manager_impl; |
[email protected] | a5274917 | 2012-06-14 10:28:26 | [diff] [blame] | 68 | |
Evan Liu | 045091f | 2024-11-20 22:24:41 | [diff] [blame] | 69 | constexpr char kWebSpeechAudioOnDeviceAvailableHistogram[] = |
| 70 | "Accessibility.WebSpeech.OnDeviceAvailable"; |
| 71 | constexpr char kWebSpeechAudioUseOnDeviceHistogram[] = |
| 72 | "Accessibility.WebSpeech.UseOnDevice"; |
| 73 | constexpr char kWebSpeechAudioUseAudioForwarderHistogram[] = |
| 74 | "Accessibility.WebSpeech.UseAudioForwarder"; |
Evan Liu | 1353e8e | 2025-06-04 17:13:56 | [diff] [blame] | 75 | constexpr char kWebSpeechCanRenderFrameUseOnDeviceHistogram[] = |
| 76 | "Accessibility.WebSpeech.CanRenderFrameUseOnDevice"; |
| 77 | constexpr char kWebSpeechIsOnDeviceSpeechRecognitionInstalledHistogram[] = |
| 78 | "Accessibility.WebSpeech.IsOnDeviceSpeechRecognitionInstalled"; |
Evan Liu | 045091f | 2024-11-20 22:24:41 | [diff] [blame] | 79 | |
[email protected] | 28df14d | 2012-05-16 14:51:22 | [diff] [blame] | 80 | } // namespace |
| 81 | |
Guido Urdaneta | 971548c | 2018-12-13 17:25:22 | [diff] [blame] | 82 | int SpeechRecognitionManagerImpl::next_requester_id_ = 0; |
| 83 | |
Dave Tapuska | 54d43989 | 2021-08-12 16:43:17 | [diff] [blame] | 84 | class FrameSessionTracker |
Sreeja Kamishetty | 0a0961f | 2021-10-11 16:23:53 | [diff] [blame] | 85 | : public content::DocumentUserData<FrameSessionTracker> { |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 86 | public: |
| 87 | using FrameDeletedCallback = |
| 88 | base::RepeatingCallback<void(int /* session_id */)>; |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 89 | |
Dave Tapuska | 54d43989 | 2021-08-12 16:43:17 | [diff] [blame] | 90 | ~FrameSessionTracker() override { |
| 91 | DCHECK_CURRENTLY_ON(BrowserThread::UI); |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 92 | |
Dave Tapuska | 54d43989 | 2021-08-12 16:43:17 | [diff] [blame] | 93 | for (auto session : sessions_) { |
| 94 | GetIOThreadTaskRunner({})->PostTask( |
| 95 | FROM_HERE, base::BindOnce(frame_deleted_callback_, session)); |
| 96 | } |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 97 | } |
| 98 | |
Dave Tapuska | 54d43989 | 2021-08-12 16:43:17 | [diff] [blame] | 99 | static void CreateObserverForSession(int render_process_id, |
| 100 | int render_frame_id, |
| 101 | int session_id, |
| 102 | FrameDeletedCallback callback) { |
| 103 | DCHECK_CURRENTLY_ON(BrowserThread::UI); |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 104 | |
Dave Tapuska | 54d43989 | 2021-08-12 16:43:17 | [diff] [blame] | 105 | RenderFrameHost* render_frame_host = |
| 106 | RenderFrameHost::FromID(render_process_id, render_frame_id); |
| 107 | if (!render_frame_host) |
| 108 | return; |
| 109 | |
| 110 | FrameSessionTracker* tracker = |
| 111 | GetOrCreateForCurrentDocument(render_frame_host); |
| 112 | |
| 113 | // This will clobber any previously set callback but it will always |
| 114 | // be the same binding. |
| 115 | tracker->SetCallback(std::move(callback)); |
| 116 | tracker->AddSession(session_id); |
| 117 | } |
| 118 | |
| 119 | static void RemoveObserverForSession(int render_process_id, |
| 120 | int render_frame_id, |
| 121 | int session_id) { |
| 122 | DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| 123 | |
| 124 | RenderFrameHost* render_frame_host = |
| 125 | RenderFrameHost::FromID(render_process_id, render_frame_id); |
| 126 | if (!render_frame_host) |
| 127 | return; |
| 128 | |
| 129 | FrameSessionTracker* tracker = GetForCurrentDocument(render_frame_host); |
| 130 | if (!tracker) |
| 131 | return; |
| 132 | tracker->RemoveSession(session_id); |
| 133 | } |
| 134 | |
| 135 | private: |
Lukasz Anforowicz | cfeb95c | 2021-10-01 19:33:35 | [diff] [blame] | 136 | explicit FrameSessionTracker(content::RenderFrameHost* rfh) |
Sreeja Kamishetty | 0a0961f | 2021-10-11 16:23:53 | [diff] [blame] | 137 | : DocumentUserData<FrameSessionTracker>(rfh) {} |
Dave Tapuska | 54d43989 | 2021-08-12 16:43:17 | [diff] [blame] | 138 | |
Sreeja Kamishetty | 0a0961f | 2021-10-11 16:23:53 | [diff] [blame] | 139 | friend class content::DocumentUserData<FrameSessionTracker>; |
| 140 | DOCUMENT_USER_DATA_KEY_DECL(); |
Dave Tapuska | 54d43989 | 2021-08-12 16:43:17 | [diff] [blame] | 141 | |
| 142 | void AddSession(int session_id) { sessions_.insert(session_id); } |
| 143 | |
| 144 | void RemoveSession(int session_id) { sessions_.erase(session_id); } |
| 145 | |
| 146 | void SetCallback(FrameDeletedCallback callback) { |
| 147 | frame_deleted_callback_ = std::move(callback); |
| 148 | } |
| 149 | |
| 150 | FrameDeletedCallback frame_deleted_callback_; |
| 151 | std::set<int> sessions_; |
| 152 | }; |
| 153 | |
Sreeja Kamishetty | 0a0961f | 2021-10-11 16:23:53 | [diff] [blame] | 154 | DOCUMENT_USER_DATA_KEY_IMPL(FrameSessionTracker); |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 155 | |
[email protected] | fcb8e021 | 2012-10-29 11:57:18 | [diff] [blame] | 156 | SpeechRecognitionManager* SpeechRecognitionManager::GetInstance() { |
[email protected] | f3b279e | 2013-01-29 20:48:46 | [diff] [blame] | 157 | if (manager_for_tests_) |
| 158 | return manager_for_tests_; |
[email protected] | fcb8e021 | 2012-10-29 11:57:18 | [diff] [blame] | 159 | return SpeechRecognitionManagerImpl::GetInstance(); |
| 160 | } |
[email protected] | 3bc0b56 | 2011-08-24 23:51:04 | [diff] [blame] | 161 | |
[email protected] | 0a208bf | 2013-10-01 21:09:54 | [diff] [blame] | 162 | void SpeechRecognitionManager::SetManagerForTesting( |
[email protected] | f3b279e | 2013-01-29 20:48:46 | [diff] [blame] | 163 | SpeechRecognitionManager* manager) { |
| 164 | manager_for_tests_ = manager; |
| 165 | } |
| 166 | |
[email protected] | c52b289 | 2012-03-07 11:01:02 | [diff] [blame] | 167 | SpeechRecognitionManagerImpl* SpeechRecognitionManagerImpl::GetInstance() { |
[email protected] | 28df14d | 2012-05-16 14:51:22 | [diff] [blame] | 168 | return g_speech_recognition_manager_impl; |
[email protected] | 3bc0b56 | 2011-08-24 23:51:04 | [diff] [blame] | 169 | } |
| 170 | |
Evan Liu | 68919fc | 2025-03-06 20:38:18 | [diff] [blame] | 171 | bool SpeechRecognitionManagerImpl::IsOnDeviceSpeechRecognitionInstalled( |
Evan Liu | 4528959 | 2024-03-21 17:46:12 | [diff] [blame] | 172 | const SpeechRecognitionSessionConfig& config) { |
Evan Liu | 881ab7a | 2024-08-01 21:54:51 | [diff] [blame] | 173 | #if !BUILDFLAG(IS_FUCHSIA) && !BUILDFLAG(IS_ANDROID) |
Evan Liu | 68919fc | 2025-03-06 20:38:18 | [diff] [blame] | 174 | return speech::IsOnDeviceSpeechRecognitionAvailable(config.language) == |
| 175 | media::mojom::AvailabilityStatus::kAvailable; |
Evan Liu | 881ab7a | 2024-08-01 21:54:51 | [diff] [blame] | 176 | #else |
| 177 | return false; |
Evan Liu | 4528959 | 2024-03-21 17:46:12 | [diff] [blame] | 178 | #endif // !BUILDFLAG(IS_FUCHSIA) && !BUILDFLAG(IS_ANDROID) |
Evan Liu | 881ab7a | 2024-08-01 21:54:51 | [diff] [blame] | 179 | } |
Evan Liu | 4528959 | 2024-03-21 17:46:12 | [diff] [blame] | 180 | |
[email protected] | aa44546 | 2013-06-21 17:12:36 | [diff] [blame] | 181 | SpeechRecognitionManagerImpl::SpeechRecognitionManagerImpl( |
olka | ef762c9 | 2017-02-06 16:45:16 | [diff] [blame] | 182 | media::AudioSystem* audio_system, |
olka | 087876b | 2017-01-27 12:50:12 | [diff] [blame] | 183 | MediaStreamManager* media_stream_manager) |
olka | ef762c9 | 2017-02-06 16:45:16 | [diff] [blame] | 184 | : audio_system_(audio_system), |
| 185 | media_stream_manager_(media_stream_manager), |
olka | 087876b | 2017-01-27 12:50:12 | [diff] [blame] | 186 | delegate_(GetContentClient() |
| 187 | ->browser() |
| 188 | ->CreateSpeechRecognitionManagerDelegate()), |
Jeremy Roman | 3bca4bf | 2019-07-11 03:41:25 | [diff] [blame] | 189 | requester_id_(next_requester_id_++) { |
[email protected] | 28df14d | 2012-05-16 14:51:22 | [diff] [blame] | 190 | DCHECK(!g_speech_recognition_manager_impl); |
| 191 | g_speech_recognition_manager_impl = this; |
[email protected] | 66cfec6 | 2012-02-24 17:57:51 | [diff] [blame] | 192 | } |
| 193 | |
[email protected] | c52b289 | 2012-03-07 11:01:02 | [diff] [blame] | 194 | SpeechRecognitionManagerImpl::~SpeechRecognitionManagerImpl() { |
Kevin Marshall | f788237 | 2017-06-06 00:14:34 | [diff] [blame] | 195 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
[email protected] | 28df14d | 2012-05-16 14:51:22 | [diff] [blame] | 196 | DCHECK(g_speech_recognition_manager_impl); |
Kevin Marshall | f788237 | 2017-06-06 00:14:34 | [diff] [blame] | 197 | |
Ivan Kotenkov | 2c0d2bb3 | 2017-11-01 15:41:28 | [diff] [blame] | 198 | g_speech_recognition_manager_impl = nullptr; |
[email protected] | 0944a729 | 2011-09-21 16:45:06 | [diff] [blame] | 199 | } |
| 200 | |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 201 | int SpeechRecognitionManagerImpl::CreateSession( |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 202 | const SpeechRecognitionSessionConfig& config) { |
Evan Liu | 881ab7a | 2024-08-01 21:54:51 | [diff] [blame] | 203 | return CreateSession(std::move(config), mojo::NullReceiver(), |
| 204 | mojo::NullRemote(), std::nullopt); |
| 205 | } |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 206 | |
Evan Liu | 881ab7a | 2024-08-01 21:54:51 | [diff] [blame] | 207 | int SpeechRecognitionManagerImpl::CreateSession( |
| 208 | const SpeechRecognitionSessionConfig& config, |
| 209 | mojo::PendingReceiver<media::mojom::SpeechRecognitionSession> |
| 210 | session_receiver, |
| 211 | mojo::PendingRemote<media::mojom::SpeechRecognitionSessionClient> |
| 212 | client_remote, |
| 213 | std::optional<SpeechRecognitionAudioForwarderConfig> |
| 214 | audio_forwarder_config) { |
Evan Liu | 7928492 | 2025-05-29 20:29:10 | [diff] [blame] | 215 | return CreateSession( |
| 216 | std::move(config), std::move(session_receiver), std::move(client_remote), |
Evan Liu | 881ab7a | 2024-08-01 21:54:51 | [diff] [blame] | 217 | audio_forwarder_config.has_value() |
| 218 | ? std::make_optional<SpeechRecognitionAudioForwarderConfig>( |
| 219 | audio_forwarder_config.value()) |
Evan Liu | 7928492 | 2025-05-29 20:29:10 | [diff] [blame] | 220 | : std::nullopt, |
| 221 | /*can_render_frame_use_on_device=*/ |
| 222 | false); // On-device speech recognition may only be used if the callsite |
| 223 | // explicitly checks if the render frame is permitted to use it. |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 224 | } |
| 225 | |
| 226 | void SpeechRecognitionManagerImpl::StartSession(int session_id) { |
mostynb | 042582e | 2015-03-16 22:13:40 | [diff] [blame] | 227 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 228 | if (!SessionExists(session_id)) |
| 229 | return; |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 230 | |
Evan Liu | 1c0164d | 2025-02-12 20:40:36 | [diff] [blame] | 231 | if (sessions_[session_id]->use_microphone) { |
| 232 | // If there is another session using the microphone, abort that. |
| 233 | if (microphone_session_id_ != kSessionIDInvalid && |
| 234 | microphone_session_id_ != session_id) { |
| 235 | AbortSession(microphone_session_id_); |
| 236 | } |
| 237 | |
| 238 | microphone_session_id_ = session_id; |
| 239 | |
| 240 | if (delegate_) { |
| 241 | delegate_->CheckRecognitionIsAllowed( |
| 242 | session_id, |
| 243 | base::BindOnce( |
| 244 | &SpeechRecognitionManagerImpl::RecognitionAllowedCallback, |
| 245 | weak_factory_.GetWeakPtr(), session_id)); |
| 246 | } |
| 247 | return; |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 248 | } |
| 249 | |
Evan Liu | 1c0164d | 2025-02-12 20:40:36 | [diff] [blame] | 250 | base::SingleThreadTaskRunner::GetCurrentDefault()->PostTask( |
| 251 | FROM_HERE, |
| 252 | base::BindOnce(&SpeechRecognitionManagerImpl::DispatchEvent, |
| 253 | weak_factory_.GetWeakPtr(), session_id, EVENT_START)); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 254 | } |
| 255 | |
| 256 | void SpeechRecognitionManagerImpl::RecognitionAllowedCallback(int session_id, |
[email protected] | 2af35c50 | 2012-09-13 20:14:43 | [diff] [blame] | 257 | bool ask_user, |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 258 | bool is_allowed) { |
mostynb | 042582e | 2015-03-16 22:13:40 | [diff] [blame] | 259 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
[email protected] | 2af35c50 | 2012-09-13 20:14:43 | [diff] [blame] | 260 | |
Avi Drissman | e6d6550 | 2018-02-13 16:21:58 | [diff] [blame] | 261 | auto iter = sessions_.find(session_id); |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 262 | if (iter == sessions_.end()) |
| 263 | return; |
| 264 | |
Avi Drissman | e6d6550 | 2018-02-13 16:21:58 | [diff] [blame] | 265 | Session* session = iter->second.get(); |
[email protected] | 6df4ab9 | 2013-10-02 19:22:28 | [diff] [blame] | 266 | |
| 267 | if (session->abort_requested) |
| 268 | return; |
| 269 | |
[email protected] | 2af35c50 | 2012-09-13 20:14:43 | [diff] [blame] | 270 | if (ask_user) { |
[email protected] | 6df4ab9 | 2013-10-02 19:22:28 | [diff] [blame] | 271 | SpeechRecognitionSessionContext& context = session->context; |
[email protected] | aa44546 | 2013-06-21 17:12:36 | [diff] [blame] | 272 | context.label = media_stream_manager_->MakeMediaAccessRequest( |
Bryant Chandler | abd520a | 2023-10-30 17:47:35 | [diff] [blame] | 273 | {context.render_process_id, context.render_frame_id}, requester_id_, |
Scott Violet | b166d20 | 2021-01-27 22:01:11 | [diff] [blame] | 274 | session_id, blink::StreamControls(true, false), context.security_origin, |
Mark Pilgrim | 3543420 | 2017-07-14 19:43:24 | [diff] [blame] | 275 | base::BindOnce( |
[email protected] | aa44546 | 2013-06-21 17:12:36 | [diff] [blame] | 276 | &SpeechRecognitionManagerImpl::MediaRequestPermissionCallback, |
| 277 | weak_factory_.GetWeakPtr(), session_id)); |
[email protected] | 2af35c50 | 2012-09-13 20:14:43 | [diff] [blame] | 278 | return; |
| 279 | } |
[email protected] | 2af35c50 | 2012-09-13 20:14:43 | [diff] [blame] | 280 | |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 281 | if (is_allowed) { |
Sean Maher | 5b9af51f | 2022-11-21 15:32:47 | [diff] [blame] | 282 | base::SingleThreadTaskRunner::GetCurrentDefault()->PostTask( |
[email protected] | dd32b127 | 2013-05-04 14:17:11 | [diff] [blame] | 283 | FROM_HERE, |
tzik | e2aca99 | 2017-09-05 08:50:54 | [diff] [blame] | 284 | base::BindOnce(&SpeechRecognitionManagerImpl::DispatchEvent, |
| 285 | weak_factory_.GetWeakPtr(), session_id, EVENT_START)); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 286 | } else { |
Adithya Srinivasan | c35bf396 | 2018-06-12 14:28:14 | [diff] [blame] | 287 | OnRecognitionError( |
Evan Liu | d725228 | 2024-05-16 20:10:32 | [diff] [blame] | 288 | session_id, media::mojom::SpeechRecognitionError( |
| 289 | media::mojom::SpeechRecognitionErrorCode::kNotAllowed, |
| 290 | media::mojom::SpeechAudioErrorDetails::kNone)); |
Sean Maher | 5b9af51f | 2022-11-21 15:32:47 | [diff] [blame] | 291 | base::SingleThreadTaskRunner::GetCurrentDefault()->PostTask( |
[email protected] | dd32b127 | 2013-05-04 14:17:11 | [diff] [blame] | 292 | FROM_HERE, |
tzik | e2aca99 | 2017-09-05 08:50:54 | [diff] [blame] | 293 | base::BindOnce(&SpeechRecognitionManagerImpl::DispatchEvent, |
| 294 | weak_factory_.GetWeakPtr(), session_id, EVENT_ABORT)); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 295 | } |
| 296 | } |
| 297 | |
[email protected] | 920cfb4 | 2012-11-21 17:26:08 | [diff] [blame] | 298 | void SpeechRecognitionManagerImpl::MediaRequestPermissionCallback( |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 299 | int session_id, |
Simon Hangl | 1131b4a | 2022-05-25 10:25:19 | [diff] [blame] | 300 | const blink::mojom::StreamDevicesSet& stream_devices_set, |
dcheng | 5971627 | 2016-04-09 05:19:08 | [diff] [blame] | 301 | std::unique_ptr<MediaStreamUIProxy> stream_ui) { |
mostynb | 042582e | 2015-03-16 22:13:40 | [diff] [blame] | 302 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
[email protected] | 920cfb4 | 2012-11-21 17:26:08 | [diff] [blame] | 303 | |
Avi Drissman | e6d6550 | 2018-02-13 16:21:58 | [diff] [blame] | 304 | auto iter = sessions_.find(session_id); |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 305 | if (iter == sessions_.end()) |
| 306 | return; |
[email protected] | 920cfb4 | 2012-11-21 17:26:08 | [diff] [blame] | 307 | |
Simon Hangl | 1131b4a | 2022-05-25 10:25:19 | [diff] [blame] | 308 | // The SpeechRecognictionManager is not used with multiple streams |
Simon Hangl | f6f9345 | 2023-05-09 17:11:53 | [diff] [blame] | 309 | // which is only supported in combination with the getAllScreensMedia API. |
Simon Hangl | 4474b6eb | 2022-06-28 23:27:36 | [diff] [blame] | 310 | // The |stream_devices| vector can be empty e.g. if the permission |
| 311 | // was denied. |
| 312 | DCHECK_LE(stream_devices_set.stream_devices.size(), 1u); |
Simon Hangl | fd537997 | 2022-06-09 09:36:54 | [diff] [blame] | 313 | |
Simon Hangl | 574cecf | 2022-05-05 06:19:46 | [diff] [blame] | 314 | blink::MediaStreamDevices devices_list = |
Simon Hangl | fd537997 | 2022-06-09 09:36:54 | [diff] [blame] | 315 | blink::ToMediaStreamDevicesList(stream_devices_set); |
Simon Hangl | 574cecf | 2022-05-05 06:19:46 | [diff] [blame] | 316 | const bool is_allowed = !devices_list.empty(); |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 317 | if (is_allowed) { |
| 318 | // Copy the approved devices array to the context for UI indication. |
Simon Hangl | 574cecf | 2022-05-05 06:19:46 | [diff] [blame] | 319 | iter->second->context.devices = devices_list; |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 320 | |
| 321 | // Save the UI object. |
dcheng | 36b6aec9 | 2015-12-26 06:16:36 | [diff] [blame] | 322 | iter->second->ui = std::move(stream_ui); |
[email protected] | 920cfb4 | 2012-11-21 17:26:08 | [diff] [blame] | 323 | } |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 324 | |
| 325 | // Clear the label to indicate the request has been done. |
| 326 | iter->second->context.label.clear(); |
| 327 | |
| 328 | // Notify the recognition about the request result. |
| 329 | RecognitionAllowedCallback(iter->first, false, is_allowed); |
[email protected] | 920cfb4 | 2012-11-21 17:26:08 | [diff] [blame] | 330 | } |
| 331 | |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 332 | void SpeechRecognitionManagerImpl::AbortSession(int session_id) { |
mostynb | 042582e | 2015-03-16 22:13:40 | [diff] [blame] | 333 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 334 | auto iter = sessions_.find(session_id); |
| 335 | if (iter == sessions_.end()) |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 336 | return; |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 337 | |
Gabriel Charette | e7cdc5cd | 2020-05-27 23:35:05 | [diff] [blame] | 338 | GetUIThreadTaskRunner({})->PostTask( |
| 339 | FROM_HERE, |
Dave Tapuska | 54d43989 | 2021-08-12 16:43:17 | [diff] [blame] | 340 | base::BindOnce(&FrameSessionTracker::RemoveObserverForSession, |
Gabriel Charette | e7cdc5cd | 2020-05-27 23:35:05 | [diff] [blame] | 341 | iter->second->config.initial_context.render_process_id, |
| 342 | iter->second->config.initial_context.render_frame_id, |
| 343 | session_id)); |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 344 | |
| 345 | AbortSessionImpl(session_id); |
| 346 | } |
| 347 | |
| 348 | void SpeechRecognitionManagerImpl::AbortSessionImpl(int session_id) { |
| 349 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
| 350 | |
Avi Drissman | e6d6550 | 2018-02-13 16:21:58 | [diff] [blame] | 351 | auto iter = sessions_.find(session_id); |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 352 | if (iter == sessions_.end()) |
| 353 | return; |
| 354 | |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 355 | iter->second->ui.reset(); |
[email protected] | 2af35c50 | 2012-09-13 20:14:43 | [diff] [blame] | 356 | |
[email protected] | 6df4ab9 | 2013-10-02 19:22:28 | [diff] [blame] | 357 | if (iter->second->abort_requested) |
| 358 | return; |
| 359 | |
| 360 | iter->second->abort_requested = true; |
| 361 | |
Sean Maher | 5b9af51f | 2022-11-21 15:32:47 | [diff] [blame] | 362 | base::SingleThreadTaskRunner::GetCurrentDefault()->PostTask( |
[email protected] | dd32b127 | 2013-05-04 14:17:11 | [diff] [blame] | 363 | FROM_HERE, |
tzik | e2aca99 | 2017-09-05 08:50:54 | [diff] [blame] | 364 | base::BindOnce(&SpeechRecognitionManagerImpl::DispatchEvent, |
| 365 | weak_factory_.GetWeakPtr(), session_id, EVENT_ABORT)); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 366 | } |
| 367 | |
| 368 | void SpeechRecognitionManagerImpl::StopAudioCaptureForSession(int session_id) { |
mostynb | 042582e | 2015-03-16 22:13:40 | [diff] [blame] | 369 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 370 | |
Avi Drissman | e6d6550 | 2018-02-13 16:21:58 | [diff] [blame] | 371 | auto iter = sessions_.find(session_id); |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 372 | if (iter == sessions_.end()) |
| 373 | return; |
| 374 | |
Avi Drissman | 2c707cb | 2020-09-23 14:57:39 | [diff] [blame] | 375 | GetUIThreadTaskRunner({})->PostTask( |
| 376 | FROM_HERE, |
Dave Tapuska | 54d43989 | 2021-08-12 16:43:17 | [diff] [blame] | 377 | base::BindOnce(&FrameSessionTracker::RemoveObserverForSession, |
Avi Drissman | 2c707cb | 2020-09-23 14:57:39 | [diff] [blame] | 378 | iter->second->config.initial_context.render_process_id, |
| 379 | iter->second->config.initial_context.render_frame_id, |
| 380 | session_id)); |
| 381 | |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 382 | iter->second->ui.reset(); |
[email protected] | 2af35c50 | 2012-09-13 20:14:43 | [diff] [blame] | 383 | |
Sean Maher | 5b9af51f | 2022-11-21 15:32:47 | [diff] [blame] | 384 | base::SingleThreadTaskRunner::GetCurrentDefault()->PostTask( |
tzik | e2aca99 | 2017-09-05 08:50:54 | [diff] [blame] | 385 | FROM_HERE, base::BindOnce(&SpeechRecognitionManagerImpl::DispatchEvent, |
| 386 | weak_factory_.GetWeakPtr(), session_id, |
| 387 | EVENT_STOP_CAPTURE)); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 388 | } |
| 389 | |
Yiren Wang | 2399856 | 2025-01-28 21:31:05 | [diff] [blame] | 390 | void SpeechRecognitionManagerImpl::UpdateRecognitionContextForSession( |
| 391 | int session_id, |
| 392 | const media::SpeechRecognitionRecognitionContext& recognition_context) { |
| 393 | CHECK_CURRENTLY_ON(BrowserThread::IO); |
| 394 | auto iter = sessions_.find(session_id); |
| 395 | if (iter == sessions_.end()) { |
| 396 | return; |
| 397 | } |
| 398 | iter->second->recognition_context = recognition_context; |
| 399 | |
| 400 | base::SingleThreadTaskRunner::GetCurrentDefault()->PostTask( |
| 401 | FROM_HERE, base::BindOnce(&SpeechRecognitionManagerImpl::DispatchEvent, |
| 402 | weak_factory_.GetWeakPtr(), session_id, |
| 403 | EVENT_UPDATE_RECOGNITION_CONTEXT)); |
| 404 | } |
| 405 | |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 406 | // Here begins the SpeechRecognitionEventListener interface implementation, |
| 407 | // which will simply relay the events to the proper listener registered for the |
[email protected] | 25bed924 | 2014-05-02 22:05:41 | [diff] [blame] | 408 | // particular session and to the catch-all listener provided by the delegate |
| 409 | // (if any). |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 410 | |
| 411 | void SpeechRecognitionManagerImpl::OnRecognitionStart(int session_id) { |
mostynb | 042582e | 2015-03-16 22:13:40 | [diff] [blame] | 412 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 413 | if (!SessionExists(session_id)) |
| 414 | return; |
| 415 | |
Avi Drissman | e6d6550 | 2018-02-13 16:21:58 | [diff] [blame] | 416 | auto iter = sessions_.find(session_id); |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 417 | if (iter->second->ui) { |
| 418 | // Notify the UI that the devices are being used. |
Sergey Poromov | cbe6f7f | 2020-11-05 14:51:26 | [diff] [blame] | 419 | iter->second->ui->OnStarted( |
| 420 | base::OnceClosure(), MediaStreamUI::SourceCallback(), |
| 421 | MediaStreamUIProxy::WindowIdCallback(), /*label=*/std::string(), |
Scott Violet | b166d20 | 2021-01-27 22:01:11 | [diff] [blame] | 422 | /*screen_capture_ids=*/{}, MediaStreamUI::StateChangeCallback()); |
[email protected] | 920cfb4 | 2012-11-21 17:26:08 | [diff] [blame] | 423 | } |
[email protected] | 920cfb4 | 2012-11-21 17:26:08 | [diff] [blame] | 424 | |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 425 | if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener()) |
| 426 | delegate_listener->OnRecognitionStart(session_id); |
| 427 | if (SpeechRecognitionEventListener* listener = GetListener(session_id)) |
| 428 | listener->OnRecognitionStart(session_id); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 429 | } |
| 430 | |
| 431 | void SpeechRecognitionManagerImpl::OnAudioStart(int session_id) { |
mostynb | 042582e | 2015-03-16 22:13:40 | [diff] [blame] | 432 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 433 | if (!SessionExists(session_id)) |
| 434 | return; |
| 435 | |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 436 | if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener()) |
| 437 | delegate_listener->OnAudioStart(session_id); |
| 438 | if (SpeechRecognitionEventListener* listener = GetListener(session_id)) |
| 439 | listener->OnAudioStart(session_id); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 440 | } |
| 441 | |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 442 | void SpeechRecognitionManagerImpl::OnSoundStart(int session_id) { |
mostynb | 042582e | 2015-03-16 22:13:40 | [diff] [blame] | 443 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 444 | if (!SessionExists(session_id)) |
| 445 | return; |
| 446 | |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 447 | if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener()) |
| 448 | delegate_listener->OnSoundStart(session_id); |
| 449 | if (SpeechRecognitionEventListener* listener = GetListener(session_id)) |
| 450 | listener->OnSoundStart(session_id); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 451 | } |
| 452 | |
| 453 | void SpeechRecognitionManagerImpl::OnSoundEnd(int session_id) { |
mostynb | 042582e | 2015-03-16 22:13:40 | [diff] [blame] | 454 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 455 | if (!SessionExists(session_id)) |
| 456 | return; |
| 457 | |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 458 | if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener()) |
| 459 | delegate_listener->OnSoundEnd(session_id); |
| 460 | if (SpeechRecognitionEventListener* listener = GetListener(session_id)) |
| 461 | listener->OnSoundEnd(session_id); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 462 | } |
| 463 | |
| 464 | void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id) { |
mostynb | 042582e | 2015-03-16 22:13:40 | [diff] [blame] | 465 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 466 | if (!SessionExists(session_id)) |
| 467 | return; |
| 468 | |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 469 | if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener()) |
| 470 | delegate_listener->OnAudioEnd(session_id); |
| 471 | if (SpeechRecognitionEventListener* listener = GetListener(session_id)) |
| 472 | listener->OnAudioEnd(session_id); |
Sean Maher | 5b9af51f | 2022-11-21 15:32:47 | [diff] [blame] | 473 | base::SingleThreadTaskRunner::GetCurrentDefault()->PostTask( |
tzik | e2aca99 | 2017-09-05 08:50:54 | [diff] [blame] | 474 | FROM_HERE, base::BindOnce(&SpeechRecognitionManagerImpl::DispatchEvent, |
| 475 | weak_factory_.GetWeakPtr(), session_id, |
| 476 | EVENT_AUDIO_ENDED)); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 477 | } |
| 478 | |
[email protected] | fc88c1e | 2012-12-04 09:54:36 | [diff] [blame] | 479 | void SpeechRecognitionManagerImpl::OnRecognitionResults( |
Adithya Srinivasan | e75e328 | 2018-06-01 15:09:00 | [diff] [blame] | 480 | int session_id, |
Evan Liu | d725228 | 2024-05-16 20:10:32 | [diff] [blame] | 481 | const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results) { |
mostynb | 042582e | 2015-03-16 22:13:40 | [diff] [blame] | 482 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 483 | if (!SessionExists(session_id)) |
| 484 | return; |
| 485 | |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 486 | if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener()) |
[email protected] | fc88c1e | 2012-12-04 09:54:36 | [diff] [blame] | 487 | delegate_listener->OnRecognitionResults(session_id, results); |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 488 | if (SpeechRecognitionEventListener* listener = GetListener(session_id)) |
[email protected] | fc88c1e | 2012-12-04 09:54:36 | [diff] [blame] | 489 | listener->OnRecognitionResults(session_id, results); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 490 | } |
| 491 | |
| 492 | void SpeechRecognitionManagerImpl::OnRecognitionError( |
Adithya Srinivasan | ce749506 | 2018-05-28 16:12:40 | [diff] [blame] | 493 | int session_id, |
Evan Liu | d725228 | 2024-05-16 20:10:32 | [diff] [blame] | 494 | const media::mojom::SpeechRecognitionError& error) { |
mostynb | 042582e | 2015-03-16 22:13:40 | [diff] [blame] | 495 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 496 | if (!SessionExists(session_id)) |
| 497 | return; |
| 498 | |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 499 | if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener()) |
| 500 | delegate_listener->OnRecognitionError(session_id, error); |
| 501 | if (SpeechRecognitionEventListener* listener = GetListener(session_id)) |
| 502 | listener->OnRecognitionError(session_id, error); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 503 | } |
| 504 | |
| 505 | void SpeechRecognitionManagerImpl::OnAudioLevelsChange( |
| 506 | int session_id, float volume, float noise_volume) { |
mostynb | 042582e | 2015-03-16 22:13:40 | [diff] [blame] | 507 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 508 | if (!SessionExists(session_id)) |
| 509 | return; |
| 510 | |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 511 | if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener()) |
| 512 | delegate_listener->OnAudioLevelsChange(session_id, volume, noise_volume); |
| 513 | if (SpeechRecognitionEventListener* listener = GetListener(session_id)) |
| 514 | listener->OnAudioLevelsChange(session_id, volume, noise_volume); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 515 | } |
| 516 | |
Evan Liu | 7928492 | 2025-05-29 20:29:10 | [diff] [blame] | 517 | int SpeechRecognitionManagerImpl::CreateSession( |
| 518 | const SpeechRecognitionSessionConfig& config, |
| 519 | mojo::PendingReceiver<media::mojom::SpeechRecognitionSession> |
| 520 | session_receiver, |
| 521 | mojo::PendingRemote<media::mojom::SpeechRecognitionSessionClient> |
| 522 | client_remote, |
| 523 | std::optional<SpeechRecognitionAudioForwarderConfig> audio_forwarder_config, |
| 524 | bool can_render_frame_use_on_device) { |
| 525 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
| 526 | const int session_id = GetNextSessionID(); |
| 527 | DCHECK(!SessionExists(session_id)); |
| 528 | |
| 529 | base::UmaHistogramBoolean(kWebSpeechAudioOnDeviceAvailableHistogram, |
| 530 | IsOnDeviceSpeechRecognitionInstalled(config)); |
| 531 | base::UmaHistogramBoolean(kWebSpeechAudioUseOnDeviceHistogram, |
| 532 | UseOnDeviceSpeechRecognition(config)); |
| 533 | base::UmaHistogramBoolean(kWebSpeechAudioUseAudioForwarderHistogram, |
| 534 | audio_forwarder_config.has_value()); |
| 535 | |
| 536 | // Initialize the error to be none. |
| 537 | media::mojom::SpeechRecognitionErrorCode error = |
| 538 | media::mojom::SpeechRecognitionErrorCode::kNone; |
| 539 | |
| 540 | if (UseOnDeviceSpeechRecognition(config)) { |
Evan Liu | 1353e8e | 2025-06-04 17:13:56 | [diff] [blame] | 541 | base::UmaHistogramBoolean(kWebSpeechCanRenderFrameUseOnDeviceHistogram, |
| 542 | can_render_frame_use_on_device); |
Evan Liu | 7928492 | 2025-05-29 20:29:10 | [diff] [blame] | 543 | if (!can_render_frame_use_on_device) { |
| 544 | error = media::mojom::SpeechRecognitionErrorCode::kServiceNotAllowed; |
| 545 | } |
| 546 | |
Evan Liu | 1353e8e | 2025-06-04 17:13:56 | [diff] [blame] | 547 | bool is_on_device_speech_recognition_installed = |
| 548 | IsOnDeviceSpeechRecognitionInstalled(config); |
| 549 | base::UmaHistogramBoolean( |
| 550 | kWebSpeechIsOnDeviceSpeechRecognitionInstalledHistogram, |
| 551 | is_on_device_speech_recognition_installed); |
Evan Liu | 7928492 | 2025-05-29 20:29:10 | [diff] [blame] | 552 | // Set the error if on-device speech recognition must be used but is not |
| 553 | // available. |
Evan Liu | 1353e8e | 2025-06-04 17:13:56 | [diff] [blame] | 554 | if (!is_on_device_speech_recognition_installed) { |
Evan Liu | 7928492 | 2025-05-29 20:29:10 | [diff] [blame] | 555 | error = media::mojom::SpeechRecognitionErrorCode::kLanguageNotSupported; |
| 556 | } |
| 557 | } else { |
| 558 | // Set the error if on-device speech recognition is not used but recognition |
| 559 | // context is set. |
| 560 | if (config.recognition_context.has_value()) { |
| 561 | error = media::mojom::SpeechRecognitionErrorCode::kPhrasesNotSupported; |
| 562 | } |
| 563 | } |
| 564 | |
Evan Liu | d422a37 | 2025-07-09 23:38:16 | [diff] [blame] | 565 | if (audio_forwarder_config.has_value() && |
| 566 | (audio_forwarder_config.value().sample_rate > |
| 567 | media::limits::kMaxSampleRate || |
| 568 | audio_forwarder_config.value().sample_rate < |
| 569 | media::limits::kMinSampleRate || |
| 570 | audio_forwarder_config.value().channel_count <= 0 || |
| 571 | audio_forwarder_config.value().channel_count > |
| 572 | media::limits::kMaxChannels)) { |
| 573 | error = media::mojom::SpeechRecognitionErrorCode::kAudioCapture; |
| 574 | } |
| 575 | |
Evan Liu | 7928492 | 2025-05-29 20:29:10 | [diff] [blame] | 576 | // Throw the error and do not create the session if error is found. |
| 577 | if (error != media::mojom::SpeechRecognitionErrorCode::kNone) { |
| 578 | mojo::Remote<media::mojom::SpeechRecognitionSessionClient> client( |
| 579 | std::move(client_remote)); |
| 580 | if (client.is_bound()) { |
| 581 | client->ErrorOccurred(media::mojom::SpeechRecognitionError::New( |
| 582 | error, media::mojom::SpeechAudioErrorDetails::kNone)); |
| 583 | client->Ended(); |
| 584 | } else if (config.event_listener) { |
| 585 | // The client may have been moved into the event_listener such as what |
| 586 | // SpeechRecognitionDispatcherHost does, so throw the error there. |
| 587 | config.event_listener.get()->OnRecognitionError( |
| 588 | session_id, media::mojom::SpeechRecognitionError( |
| 589 | error, media::mojom::SpeechAudioErrorDetails::kNone)); |
| 590 | config.event_listener.get()->OnRecognitionEnd(session_id); |
| 591 | } else { |
| 592 | // At least a client should be have been informed of the error. |
| 593 | NOTREACHED(); |
| 594 | } |
| 595 | return session_id; |
| 596 | } |
| 597 | |
| 598 | // Set-up the new session. |
| 599 | auto session = std::make_unique<Session>(); |
| 600 | session->id = session_id; |
| 601 | session->config = config; |
| 602 | session->context = config.initial_context; |
| 603 | session->use_microphone = !audio_forwarder_config.has_value(); |
| 604 | |
| 605 | #if !BUILDFLAG(IS_ANDROID) |
| 606 | #if !BUILDFLAG(IS_FUCHSIA) |
| 607 | if (UseOnDeviceSpeechRecognition(config) && |
| 608 | audio_forwarder_config.has_value()) { |
| 609 | CHECK_GT(audio_forwarder_config.value().channel_count, 0); |
| 610 | CHECK_GT(audio_forwarder_config.value().sample_rate, 0); |
| 611 | // The speech recognition service process will create and manage the speech |
| 612 | // recognition session instead of the browser. Raw audio will be passed |
| 613 | // directly to the speech recognition process and speech recognition events |
| 614 | // will be returned directly to the renderer, bypassing the browser |
| 615 | // entirely. |
| 616 | if (!speech_recognition_context_.is_bound()) { |
| 617 | raw_ptr<SpeechRecognitionManagerDelegate> |
| 618 | speech_recognition_mgr_delegate = |
| 619 | SpeechRecognitionManagerImpl::GetInstance() |
| 620 | ? SpeechRecognitionManagerImpl::GetInstance()->delegate() |
| 621 | : nullptr; |
| 622 | |
| 623 | CHECK(speech_recognition_mgr_delegate); |
| 624 | mojo::PendingReceiver<media::mojom::SpeechRecognitionContext> |
| 625 | speech_recognition_context_receiver = |
| 626 | speech_recognition_context_.BindNewPipeAndPassReceiver(); |
| 627 | speech_recognition_mgr_delegate->BindSpeechRecognitionContext( |
Evan Liu | a89ea52 | 2025-07-30 21:23:58 | [diff] [blame] | 628 | std::move(speech_recognition_context_receiver), config.language); |
Evan Liu | 7928492 | 2025-05-29 20:29:10 | [diff] [blame] | 629 | } |
| 630 | |
| 631 | media::mojom::SpeechRecognitionOptionsPtr options = |
| 632 | media::mojom::SpeechRecognitionOptions::New(); |
| 633 | options->recognition_mode = media::mojom::SpeechRecognitionMode::kCaption; |
| 634 | options->enable_formatting = false; |
| 635 | options->recognizer_client_type = |
| 636 | media::mojom::RecognizerClientType::kLiveCaption; |
| 637 | options->skip_continuously_empty_audio = true; |
| 638 | options->recognition_context = config.recognition_context; |
| 639 | |
| 640 | speech_recognition_context_->BindWebSpeechRecognizer( |
| 641 | std::move(session_receiver), std::move(client_remote), |
| 642 | std::move(audio_forwarder_config.value().audio_forwarder), |
| 643 | audio_forwarder_config.value().channel_count, |
| 644 | audio_forwarder_config.value().sample_rate, std::move(options), |
| 645 | config.continuous); |
| 646 | |
| 647 | // The session is managed by the speech recognition service directly thus |
| 648 | // does not need to be associated with a session id in the browser. |
| 649 | return 0; |
| 650 | } |
| 651 | #endif //! BUILDFLAG(IS_FUCHSIA) |
| 652 | |
| 653 | std::unique_ptr<SpeechRecognitionEngine> speech_recognition_engine; |
| 654 | |
| 655 | #if !BUILDFLAG(IS_FUCHSIA) |
| 656 | if (UseOnDeviceSpeechRecognition(config)) { |
| 657 | std::unique_ptr<SodaSpeechRecognitionEngineImpl> |
| 658 | soda_speech_recognition_engine = |
| 659 | std::make_unique<SodaSpeechRecognitionEngineImpl>(config); |
| 660 | if (soda_speech_recognition_engine->Initialize()) { |
| 661 | speech_recognition_engine = std::move(soda_speech_recognition_engine); |
| 662 | } |
| 663 | } |
| 664 | #endif //! BUILDFLAG(IS_FUCHSIA) |
| 665 | |
| 666 | if (!speech_recognition_engine) { |
| 667 | // A NetworkSpeechRecognitionEngineImpl (and corresponding Config) is |
| 668 | // required only when using SpeechRecognizerImpl, which performs the audio |
| 669 | // capture and endpointing in the browser. This is not the case of Android |
| 670 | // where, not only the speech recognition, but also the audio capture and |
| 671 | // endpointing activities performed outside of the browser (delegated via |
| 672 | // JNI to the Android API implementation). |
| 673 | |
| 674 | NetworkSpeechRecognitionEngineImpl::Config remote_engine_config; |
| 675 | remote_engine_config.language = config.language; |
| 676 | remote_engine_config.grammars = config.grammars; |
| 677 | remote_engine_config.audio_sample_rate = |
| 678 | audio_forwarder_config.has_value() |
| 679 | ? audio_forwarder_config.value().sample_rate |
| 680 | : SpeechRecognizerImpl::kAudioSampleRate; |
| 681 | remote_engine_config.audio_num_bits_per_sample = |
| 682 | SpeechRecognizerImpl::kNumBitsPerAudioSample; |
| 683 | remote_engine_config.filter_profanities = config.filter_profanities; |
| 684 | remote_engine_config.continuous = config.continuous; |
| 685 | remote_engine_config.interim_results = config.interim_results; |
| 686 | remote_engine_config.max_hypotheses = config.max_hypotheses; |
| 687 | remote_engine_config.origin_url = config.origin.Serialize(); |
| 688 | remote_engine_config.auth_token = config.auth_token; |
| 689 | remote_engine_config.auth_scope = config.auth_scope; |
| 690 | remote_engine_config.preamble = config.preamble; |
| 691 | |
| 692 | std::unique_ptr<NetworkSpeechRecognitionEngineImpl> google_remote_engine = |
| 693 | std::make_unique<NetworkSpeechRecognitionEngineImpl>( |
| 694 | config.shared_url_loader_factory); |
| 695 | google_remote_engine->SetConfig(remote_engine_config); |
| 696 | speech_recognition_engine = std::move(google_remote_engine); |
| 697 | } |
| 698 | |
| 699 | session->recognizer = new SpeechRecognizerImpl( |
| 700 | this, audio_system_, session_id, config.continuous, |
| 701 | config.interim_results, std::move(speech_recognition_engine), |
| 702 | audio_forwarder_config.has_value() |
| 703 | ? std::make_optional<SpeechRecognitionAudioForwarderConfig>( |
| 704 | audio_forwarder_config.value()) |
| 705 | : std::nullopt); |
| 706 | |
| 707 | #else |
| 708 | session->recognizer = new SpeechRecognizerImplAndroid(this, session_id); |
| 709 | #endif //! BUILDFLAG(IS_ANDROID) |
| 710 | |
| 711 | sessions_[session_id] = std::move(session); |
| 712 | |
| 713 | GetUIThreadTaskRunner({})->PostTask( |
| 714 | FROM_HERE, |
| 715 | base::BindOnce( |
| 716 | &FrameSessionTracker::CreateObserverForSession, |
| 717 | config.initial_context.render_process_id, |
| 718 | config.initial_context.render_frame_id, session_id, |
| 719 | base::BindRepeating(&SpeechRecognitionManagerImpl::AbortSessionImpl, |
| 720 | weak_factory_.GetWeakPtr()))); |
| 721 | |
| 722 | return session_id; |
| 723 | } |
| 724 | |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 725 | void SpeechRecognitionManagerImpl::OnRecognitionEnd(int session_id) { |
mostynb | 042582e | 2015-03-16 22:13:40 | [diff] [blame] | 726 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 727 | if (!SessionExists(session_id)) |
| 728 | return; |
| 729 | |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 730 | if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener()) |
| 731 | delegate_listener->OnRecognitionEnd(session_id); |
| 732 | if (SpeechRecognitionEventListener* listener = GetListener(session_id)) |
| 733 | listener->OnRecognitionEnd(session_id); |
Sean Maher | 5b9af51f | 2022-11-21 15:32:47 | [diff] [blame] | 734 | base::SingleThreadTaskRunner::GetCurrentDefault()->PostTask( |
tzik | e2aca99 | 2017-09-05 08:50:54 | [diff] [blame] | 735 | FROM_HERE, base::BindOnce(&SpeechRecognitionManagerImpl::DispatchEvent, |
| 736 | weak_factory_.GetWeakPtr(), session_id, |
| 737 | EVENT_RECOGNITION_ENDED)); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 738 | } |
| 739 | |
Lucas Furukawa Gadani | 0a70be9 | 2019-06-28 17:31:26 | [diff] [blame] | 740 | SpeechRecognitionSessionContext SpeechRecognitionManagerImpl::GetSessionContext( |
| 741 | int session_id) { |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 742 | return GetSession(session_id)->context; |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 743 | } |
| 744 | |
Evan Liu | 881ab7a | 2024-08-01 21:54:51 | [diff] [blame] | 745 | bool SpeechRecognitionManagerImpl::UseOnDeviceSpeechRecognition( |
| 746 | const SpeechRecognitionSessionConfig& config) { |
| 747 | #if !BUILDFLAG(IS_FUCHSIA) && !BUILDFLAG(IS_ANDROID) |
Evan Liu | b2b7f6d | 2024-08-05 22:55:48 | [diff] [blame] | 748 | return config.on_device && |
Evan Liu | 68919fc | 2025-03-06 20:38:18 | [diff] [blame] | 749 | (speech::IsOnDeviceSpeechRecognitionAvailable(config.language) == |
| 750 | media::mojom::AvailabilityStatus::kAvailable || |
Evan Liu | b2b7f6d | 2024-08-05 22:55:48 | [diff] [blame] | 751 | !config.allow_cloud_fallback); |
Evan Liu | 881ab7a | 2024-08-01 21:54:51 | [diff] [blame] | 752 | #else |
| 753 | return false; |
| 754 | #endif |
| 755 | } |
| 756 | |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 757 | void SpeechRecognitionManagerImpl::AbortAllSessionsForRenderFrame( |
[email protected] | 07c45dd | 2012-07-06 12:10:34 | [diff] [blame] | 758 | int render_process_id, |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 759 | int render_frame_id) { |
mostynb | 042582e | 2015-03-16 22:13:40 | [diff] [blame] | 760 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 761 | |
Avi Drissman | e6d6550 | 2018-02-13 16:21:58 | [diff] [blame] | 762 | for (const auto& session_pair : sessions_) { |
| 763 | Session* session = session_pair.second.get(); |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 764 | if (session->context.render_process_id == render_process_id && |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 765 | session->context.render_frame_id == render_frame_id) { |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 766 | AbortSession(session->id); |
[email protected] | 07c45dd | 2012-07-06 12:10:34 | [diff] [blame] | 767 | } |
| 768 | } |
| 769 | } |
| 770 | |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 771 | // ----------------------- Core FSM implementation --------------------------- |
| 772 | void SpeechRecognitionManagerImpl::DispatchEvent(int session_id, |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 773 | FSMEvent event) { |
mostynb | 042582e | 2015-03-16 22:13:40 | [diff] [blame] | 774 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
[email protected] | d305111d | 2012-05-24 10:58:24 | [diff] [blame] | 775 | |
| 776 | // There are some corner cases in which the session might be deleted (due to |
| 777 | // an EndRecognition event) between a request (e.g. Abort) and its dispatch. |
| 778 | if (!SessionExists(session_id)) |
| 779 | return; |
| 780 | |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 781 | Session* session = GetSession(session_id); |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 782 | FSMState session_state = GetSessionState(session_id); |
| 783 | DCHECK_LE(session_state, SESSION_STATE_MAX_VALUE); |
| 784 | DCHECK_LE(event, EVENT_MAX_VALUE); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 785 | |
| 786 | // Event dispatching must be sequential, otherwise it will break all the rules |
| 787 | // and the assumptions of the finite state automata model. |
| 788 | DCHECK(!is_dispatching_event_); |
| 789 | is_dispatching_event_ = true; |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 790 | ExecuteTransitionAndGetNextState(session, session_state, event); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 791 | is_dispatching_event_ = false; |
| 792 | } |
| 793 | |
| 794 | // This FSM handles the evolution of each session, from the viewpoint of the |
| 795 | // interaction with the user (that may be either the browser end-user which |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 796 | // interacts with UI bubbles, or JS developer interacting with JS methods). |
[email protected] | c91bb26 | 2012-06-27 10:56:45 | [diff] [blame] | 797 | // All the events received by the SpeechRecognizer instances (one for each |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 798 | // session) are always routed to the SpeechRecognitionEventListener(s) |
| 799 | // regardless the choices taken in this FSM. |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 800 | void SpeechRecognitionManagerImpl::ExecuteTransitionAndGetNextState( |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 801 | Session* session, FSMState session_state, FSMEvent event) { |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 802 | // Note: since we're not tracking the state of the recognizer object, rather |
| 803 | // we're directly retrieving it (through GetSessionState), we see its events |
| 804 | // (that are AUDIO_ENDED and RECOGNITION_ENDED) after its state evolution |
| 805 | // (e.g., when we receive the AUDIO_ENDED event, the recognizer has just |
| 806 | // completed the transition from CAPTURING_AUDIO to WAITING_FOR_RESULT, thus |
| 807 | // we perceive the AUDIO_ENDED event in WAITING_FOR_RESULT). |
| 808 | // This makes the code below a bit tricky but avoids a lot of code for |
| 809 | // tracking and reconstructing asynchronously the state of the recognizer. |
| 810 | switch (session_state) { |
| 811 | case SESSION_STATE_IDLE: |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 812 | switch (event) { |
| 813 | case EVENT_START: |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 814 | return SessionStart(*session); |
Yiren Wang | 2399856 | 2025-01-28 21:31:05 | [diff] [blame] | 815 | case EVENT_UPDATE_RECOGNITION_CONTEXT: |
| 816 | return SessionUpdateRecognitionContext(*session); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 817 | case EVENT_ABORT: |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 818 | return SessionAbort(*session); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 819 | case EVENT_RECOGNITION_ENDED: |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 820 | return SessionDelete(session); |
| 821 | case EVENT_STOP_CAPTURE: |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 822 | return SessionStopAudioCapture(*session); |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 823 | case EVENT_AUDIO_ENDED: |
| 824 | return; |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 825 | } |
| 826 | break; |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 827 | case SESSION_STATE_CAPTURING_AUDIO: |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 828 | switch (event) { |
Yiren Wang | 2399856 | 2025-01-28 21:31:05 | [diff] [blame] | 829 | case EVENT_UPDATE_RECOGNITION_CONTEXT: |
| 830 | return SessionUpdateRecognitionContext(*session); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 831 | case EVENT_STOP_CAPTURE: |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 832 | return SessionStopAudioCapture(*session); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 833 | case EVENT_ABORT: |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 834 | return SessionAbort(*session); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 835 | case EVENT_START: |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 836 | return; |
| 837 | case EVENT_AUDIO_ENDED: |
| 838 | case EVENT_RECOGNITION_ENDED: |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 839 | return NotFeasible(*session, event); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 840 | } |
| 841 | break; |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 842 | case SESSION_STATE_WAITING_FOR_RESULT: |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 843 | switch (event) { |
Yiren Wang | 2399856 | 2025-01-28 21:31:05 | [diff] [blame] | 844 | case EVENT_UPDATE_RECOGNITION_CONTEXT: |
| 845 | return SessionUpdateRecognitionContext(*session); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 846 | case EVENT_ABORT: |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 847 | return SessionAbort(*session); |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 848 | case EVENT_AUDIO_ENDED: |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 849 | return ResetCapturingSessionId(*session); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 850 | case EVENT_START: |
| 851 | case EVENT_STOP_CAPTURE: |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 852 | return; |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 853 | case EVENT_RECOGNITION_ENDED: |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 854 | return NotFeasible(*session, event); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 855 | } |
| 856 | break; |
| 857 | } |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 858 | return NotFeasible(*session, event); |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 859 | } |
| 860 | |
| 861 | SpeechRecognitionManagerImpl::FSMState |
| 862 | SpeechRecognitionManagerImpl::GetSessionState(int session_id) const { |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 863 | Session* session = GetSession(session_id); |
| 864 | if (!session->recognizer.get() || !session->recognizer->IsActive()) |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 865 | return SESSION_STATE_IDLE; |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 866 | if (session->recognizer->IsCapturingAudio()) |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 867 | return SESSION_STATE_CAPTURING_AUDIO; |
| 868 | return SESSION_STATE_WAITING_FOR_RESULT; |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 869 | } |
| 870 | |
| 871 | // ----------- Contract for all the FSM evolution functions below ------------- |
| 872 | // - Are guaranteed to be executed in the IO thread; |
| 873 | // - Are guaranteed to be not reentrant (themselves and each other); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 874 | |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 875 | void SpeechRecognitionManagerImpl::SessionStart(const Session& session) { |
Guido Urdaneta | 73fa663 | 2019-01-14 18:46:26 | [diff] [blame] | 876 | const blink::MediaStreamDevices& devices = session.context.devices; |
[email protected] | 579d699 | 2013-06-22 13:40:20 | [diff] [blame] | 877 | std::string device_id; |
| 878 | if (devices.empty()) { |
| 879 | // From the ask_user=false path, use the default device. |
| 880 | // TODO(xians): Abort the session after we do not need to support this path |
| 881 | // anymore. |
olka | 251dd569 | 2016-04-27 15:50:17 | [diff] [blame] | 882 | device_id = media::AudioDeviceDescription::kDefaultDeviceId; |
[email protected] | 579d699 | 2013-06-22 13:40:20 | [diff] [blame] | 883 | } else { |
| 884 | // From the ask_user=true path, use the selected device. |
| 885 | DCHECK_EQ(1u, devices.size()); |
Antonio Gomes | c8b734b | 2019-06-05 18:22:16 | [diff] [blame] | 886 | DCHECK_EQ(blink::mojom::MediaStreamType::DEVICE_AUDIO_CAPTURE, |
| 887 | devices.front().type); |
[email protected] | 579d699 | 2013-06-22 13:40:20 | [diff] [blame] | 888 | device_id = devices.front().id; |
| 889 | } |
[email protected] | 76f9f04e | 2013-06-20 06:38:23 | [diff] [blame] | 890 | |
[email protected] | 579d699 | 2013-06-22 13:40:20 | [diff] [blame] | 891 | session.recognizer->StartRecognition(device_id); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 892 | } |
| 893 | |
Yiren Wang | 2399856 | 2025-01-28 21:31:05 | [diff] [blame] | 894 | void SpeechRecognitionManagerImpl::SessionUpdateRecognitionContext( |
| 895 | const Session& session) { |
| 896 | CHECK(session.recognizer.get()); |
| 897 | session.recognizer->UpdateRecognitionContext(session.recognition_context); |
| 898 | } |
| 899 | |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 900 | void SpeechRecognitionManagerImpl::SessionAbort(const Session& session) { |
Evan Liu | 1c0164d | 2025-02-12 20:40:36 | [diff] [blame] | 901 | if (microphone_session_id_ == session.id) { |
| 902 | microphone_session_id_ = kSessionIDInvalid; |
| 903 | } |
[email protected] | 2af35c50 | 2012-09-13 20:14:43 | [diff] [blame] | 904 | DCHECK(session.recognizer.get()); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 905 | session.recognizer->AbortRecognition(); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 906 | } |
| 907 | |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 908 | void SpeechRecognitionManagerImpl::SessionStopAudioCapture( |
| 909 | const Session& session) { |
[email protected] | 2af35c50 | 2012-09-13 20:14:43 | [diff] [blame] | 910 | DCHECK(session.recognizer.get()); |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 911 | session.recognizer->StopAudioCapture(); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 912 | } |
| 913 | |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 914 | void SpeechRecognitionManagerImpl::ResetCapturingSessionId( |
| 915 | const Session& session) { |
Evan Liu | 1c0164d | 2025-02-12 20:40:36 | [diff] [blame] | 916 | microphone_session_id_ = kSessionIDInvalid; |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 917 | } |
| 918 | |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 919 | void SpeechRecognitionManagerImpl::SessionDelete(Session* session) { |
Ivan Kotenkov | 2c0d2bb3 | 2017-11-01 15:41:28 | [diff] [blame] | 920 | DCHECK(session->recognizer.get() == nullptr || |
| 921 | !session->recognizer->IsActive()); |
Evan Liu | 1c0164d | 2025-02-12 20:40:36 | [diff] [blame] | 922 | if (microphone_session_id_ == session->id) { |
| 923 | microphone_session_id_ = kSessionIDInvalid; |
| 924 | } |
[email protected] | 2542d88a | 2013-09-30 15:41:07 | [diff] [blame] | 925 | if (!session->context.label.empty()) |
| 926 | media_stream_manager_->CancelRequest(session->context.label); |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 927 | sessions_.erase(session->id); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 928 | } |
| 929 | |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 930 | void SpeechRecognitionManagerImpl::NotFeasible(const Session& session, |
| 931 | FSMEvent event) { |
Peter Boström | fc7ddc18 | 2024-10-31 19:37:21 | [diff] [blame] | 932 | NOTREACHED() << "Unfeasible event " << event << " in state " |
| 933 | << GetSessionState(session.id) << " for session " << session.id; |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 934 | } |
| 935 | |
| 936 | int SpeechRecognitionManagerImpl::GetNextSessionID() { |
| 937 | ++last_session_id_; |
| 938 | // Deal with wrapping of last_session_id_. (How civilized). |
| 939 | if (last_session_id_ <= 0) |
| 940 | last_session_id_ = 1; |
| 941 | return last_session_id_; |
| 942 | } |
| 943 | |
| 944 | bool SpeechRecognitionManagerImpl::SessionExists(int session_id) const { |
| 945 | return sessions_.find(session_id) != sessions_.end(); |
| 946 | } |
| 947 | |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 948 | SpeechRecognitionManagerImpl::Session* |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 949 | SpeechRecognitionManagerImpl::GetSession(int session_id) const { |
mostynb | 042582e | 2015-03-16 22:13:40 | [diff] [blame] | 950 | DCHECK_CURRENTLY_ON(BrowserThread::IO); |
Avi Drissman | e6d6550 | 2018-02-13 16:21:58 | [diff] [blame] | 951 | auto iter = sessions_.find(session_id); |
Daniel Cheng | 4d54f0a | 2025-05-26 22:59:12 | [diff] [blame] | 952 | CHECK(iter != sessions_.end()); |
Avi Drissman | e6d6550 | 2018-02-13 16:21:58 | [diff] [blame] | 953 | return iter->second.get(); |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 954 | } |
| 955 | |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 956 | SpeechRecognitionEventListener* SpeechRecognitionManagerImpl::GetListener( |
| 957 | int session_id) const { |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 958 | Session* session = GetSession(session_id); |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 959 | if (session->config.event_listener) |
[email protected] | e2eb2f2 | 2014-02-20 14:36:00 | [diff] [blame] | 960 | return session->config.event_listener.get(); |
Ivan Kotenkov | 2c0d2bb3 | 2017-11-01 15:41:28 | [diff] [blame] | 961 | return nullptr; |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 962 | } |
| 963 | |
| 964 | SpeechRecognitionEventListener* |
| 965 | SpeechRecognitionManagerImpl::GetDelegateListener() const { |
Ivan Kotenkov | 2c0d2bb3 | 2017-11-01 15:41:28 | [diff] [blame] | 966 | return delegate_.get() ? delegate_->GetEventListener() : nullptr; |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 967 | } |
| 968 | |
| 969 | const SpeechRecognitionSessionConfig& |
Lucas Furukawa Gadani | 0a70be9 | 2019-06-28 17:31:26 | [diff] [blame] | 970 | SpeechRecognitionManagerImpl::GetSessionConfig(int session_id) { |
[email protected] | 2b94cfe | 2013-06-07 13:12:53 | [diff] [blame] | 971 | return GetSession(session_id)->config; |
[email protected] | 12f4fb9 | 2012-05-16 10:30:16 | [diff] [blame] | 972 | } |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 973 | |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 974 | SpeechRecognitionManagerImpl::Session::Session() |
Avi Drissman | 6ed86dc3 | 2018-03-03 00:29:24 | [diff] [blame] | 975 | : id(kSessionIDInvalid), abort_requested(false) {} |
[email protected] | 0944a729 | 2011-09-21 16:45:06 | [diff] [blame] | 976 | |
[email protected] | b450e90 | 2012-04-25 20:20:18 | [diff] [blame] | 977 | SpeechRecognitionManagerImpl::Session::~Session() { |
[email protected] | 84c13c03 | 2011-09-23 00:12:22 | [diff] [blame] | 978 | } |
| 979 | |
[email protected] | fcb8e021 | 2012-10-29 11:57:18 | [diff] [blame] | 980 | } // namespace content |