niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 1 | /* |
bjornv@webrtc.org | f4b77fd | 2012-01-25 12:40:00 +0000 | [diff] [blame] | 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 11 | #include "modules/audio_processing/voice_detection_impl.h" |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 12 | |
Yves Gerey | 988cc08 | 2018-10-23 12:03:01 +0200 | [diff] [blame] | 13 | #include "api/audio/audio_frame.h" |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 14 | #include "common_audio/vad/include/webrtc_vad.h" |
| 15 | #include "modules/audio_processing/audio_buffer.h" |
Yves Gerey | 988cc08 | 2018-10-23 12:03:01 +0200 | [diff] [blame] | 16 | #include "rtc_base/checks.h" |
Steve Anton | 10542f2 | 2019-01-11 09:11:00 -0800 | [diff] [blame^] | 17 | #include "rtc_base/constructor_magic.h" |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 18 | |
| 19 | namespace webrtc { |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 20 | class VoiceDetectionImpl::Vad { |
| 21 | public: |
| 22 | Vad() { |
| 23 | state_ = WebRtcVad_Create(); |
| 24 | RTC_CHECK(state_); |
| 25 | int error = WebRtcVad_Init(state_); |
| 26 | RTC_DCHECK_EQ(0, error); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 27 | } |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame] | 28 | ~Vad() { WebRtcVad_Free(state_); } |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 29 | VadInst* state() { return state_; } |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame] | 30 | |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 31 | private: |
| 32 | VadInst* state_ = nullptr; |
| 33 | RTC_DISALLOW_COPY_AND_ASSIGN(Vad); |
| 34 | }; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 35 | |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 36 | VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit) |
| 37 | : crit_(crit) { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 38 | RTC_DCHECK(crit); |
| 39 | } |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 40 | |
| 41 | VoiceDetectionImpl::~VoiceDetectionImpl() {} |
| 42 | |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 43 | void VoiceDetectionImpl::Initialize(int sample_rate_hz) { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 44 | rtc::CritScope cs(crit_); |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 45 | sample_rate_hz_ = sample_rate_hz; |
kwiberg | 88788ad | 2016-02-19 07:04:49 -0800 | [diff] [blame] | 46 | std::unique_ptr<Vad> new_vad; |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 47 | if (enabled_) { |
| 48 | new_vad.reset(new Vad()); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 49 | } |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 50 | vad_.swap(new_vad); |
| 51 | using_external_vad_ = false; |
| 52 | frame_size_samples_ = |
| 53 | static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000; |
| 54 | set_likelihood(likelihood_); |
| 55 | } |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 56 | |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 57 | void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { |
| 58 | rtc::CritScope cs(crit_); |
| 59 | if (!enabled_) { |
| 60 | return; |
| 61 | } |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 62 | if (using_external_vad_) { |
| 63 | using_external_vad_ = false; |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 64 | return; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 65 | } |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 66 | |
kwiberg | af476c7 | 2016-11-28 15:21:39 -0800 | [diff] [blame] | 67 | RTC_DCHECK_GE(160, audio->num_frames_per_band()); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 68 | // TODO(ajm): concatenate data in frame buffer here. |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame] | 69 | int vad_ret = |
| 70 | WebRtcVad_Process(vad_->state(), sample_rate_hz_, |
| 71 | audio->mixed_low_pass_data(), frame_size_samples_); |
andrew@webrtc.org | ed083d4 | 2011-09-19 15:28:51 +0000 | [diff] [blame] | 72 | if (vad_ret == 0) { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 73 | stream_has_voice_ = false; |
andrew@webrtc.org | ed083d4 | 2011-09-19 15:28:51 +0000 | [diff] [blame] | 74 | audio->set_activity(AudioFrame::kVadPassive); |
| 75 | } else if (vad_ret == 1) { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 76 | stream_has_voice_ = true; |
andrew@webrtc.org | ed083d4 | 2011-09-19 15:28:51 +0000 | [diff] [blame] | 77 | audio->set_activity(AudioFrame::kVadActive); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 78 | } else { |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 79 | RTC_NOTREACHED(); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 80 | } |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 81 | } |
| 82 | |
| 83 | int VoiceDetectionImpl::Enable(bool enable) { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 84 | rtc::CritScope cs(crit_); |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 85 | if (enabled_ != enable) { |
| 86 | enabled_ = enable; |
| 87 | Initialize(sample_rate_hz_); |
| 88 | } |
| 89 | return AudioProcessing::kNoError; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 90 | } |
| 91 | |
| 92 | bool VoiceDetectionImpl::is_enabled() const { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 93 | rtc::CritScope cs(crit_); |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 94 | return enabled_; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 95 | } |
| 96 | |
| 97 | int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 98 | rtc::CritScope cs(crit_); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 99 | using_external_vad_ = true; |
| 100 | stream_has_voice_ = has_voice; |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 101 | return AudioProcessing::kNoError; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 102 | } |
| 103 | |
| 104 | bool VoiceDetectionImpl::stream_has_voice() const { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 105 | rtc::CritScope cs(crit_); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 106 | // TODO(ajm): enable this assertion? |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame] | 107 | // RTC_DCHECK(using_external_vad_ || is_component_enabled()); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 108 | return stream_has_voice_; |
| 109 | } |
| 110 | |
| 111 | int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 112 | rtc::CritScope cs(crit_); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 113 | likelihood_ = likelihood; |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 114 | if (enabled_) { |
| 115 | int mode = 2; |
| 116 | switch (likelihood) { |
| 117 | case VoiceDetection::kVeryLowLikelihood: |
| 118 | mode = 3; |
| 119 | break; |
| 120 | case VoiceDetection::kLowLikelihood: |
| 121 | mode = 2; |
| 122 | break; |
| 123 | case VoiceDetection::kModerateLikelihood: |
| 124 | mode = 1; |
| 125 | break; |
| 126 | case VoiceDetection::kHighLikelihood: |
| 127 | mode = 0; |
| 128 | break; |
| 129 | default: |
| 130 | RTC_NOTREACHED(); |
| 131 | break; |
| 132 | } |
| 133 | int error = WebRtcVad_set_mode(vad_->state(), mode); |
| 134 | RTC_DCHECK_EQ(0, error); |
| 135 | } |
| 136 | return AudioProcessing::kNoError; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 137 | } |
| 138 | |
| 139 | VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 140 | rtc::CritScope cs(crit_); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 141 | return likelihood_; |
| 142 | } |
| 143 | |
| 144 | int VoiceDetectionImpl::set_frame_size_ms(int size) { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 145 | rtc::CritScope cs(crit_); |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame] | 146 | RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported. |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 147 | frame_size_ms_ = size; |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 148 | Initialize(sample_rate_hz_); |
| 149 | return AudioProcessing::kNoError; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 150 | } |
| 151 | |
| 152 | int VoiceDetectionImpl::frame_size_ms() const { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 153 | rtc::CritScope cs(crit_); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 154 | return frame_size_ms_; |
| 155 | } |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 156 | } // namespace webrtc |