niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 1 | /* |
bjornv@webrtc.org | f4b77fd | 2012-01-25 12:40:00 +0000 | [diff] [blame] | 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 11 | #include "modules/audio_processing/voice_detection_impl.h" |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 12 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 13 | #include "common_audio/vad/include/webrtc_vad.h" |
| 14 | #include "modules/audio_processing/audio_buffer.h" |
| 15 | #include "rtc_base/constructormagic.h" |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 16 | |
| 17 | namespace webrtc { |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 18 | class VoiceDetectionImpl::Vad { |
| 19 | public: |
| 20 | Vad() { |
| 21 | state_ = WebRtcVad_Create(); |
| 22 | RTC_CHECK(state_); |
| 23 | int error = WebRtcVad_Init(state_); |
| 24 | RTC_DCHECK_EQ(0, error); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 25 | } |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame^] | 26 | ~Vad() { WebRtcVad_Free(state_); } |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 27 | VadInst* state() { return state_; } |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame^] | 28 | |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 29 | private: |
| 30 | VadInst* state_ = nullptr; |
| 31 | RTC_DISALLOW_COPY_AND_ASSIGN(Vad); |
| 32 | }; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 33 | |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 34 | VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit) |
| 35 | : crit_(crit) { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 36 | RTC_DCHECK(crit); |
| 37 | } |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 38 | |
| 39 | VoiceDetectionImpl::~VoiceDetectionImpl() {} |
| 40 | |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 41 | void VoiceDetectionImpl::Initialize(int sample_rate_hz) { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 42 | rtc::CritScope cs(crit_); |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 43 | sample_rate_hz_ = sample_rate_hz; |
kwiberg | 88788ad | 2016-02-19 07:04:49 -0800 | [diff] [blame] | 44 | std::unique_ptr<Vad> new_vad; |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 45 | if (enabled_) { |
| 46 | new_vad.reset(new Vad()); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 47 | } |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 48 | vad_.swap(new_vad); |
| 49 | using_external_vad_ = false; |
| 50 | frame_size_samples_ = |
| 51 | static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000; |
| 52 | set_likelihood(likelihood_); |
| 53 | } |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 54 | |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 55 | void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { |
| 56 | rtc::CritScope cs(crit_); |
| 57 | if (!enabled_) { |
| 58 | return; |
| 59 | } |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 60 | if (using_external_vad_) { |
| 61 | using_external_vad_ = false; |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 62 | return; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 63 | } |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 64 | |
kwiberg | af476c7 | 2016-11-28 15:21:39 -0800 | [diff] [blame] | 65 | RTC_DCHECK_GE(160, audio->num_frames_per_band()); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 66 | // TODO(ajm): concatenate data in frame buffer here. |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame^] | 67 | int vad_ret = |
| 68 | WebRtcVad_Process(vad_->state(), sample_rate_hz_, |
| 69 | audio->mixed_low_pass_data(), frame_size_samples_); |
andrew@webrtc.org | ed083d4 | 2011-09-19 15:28:51 +0000 | [diff] [blame] | 70 | if (vad_ret == 0) { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 71 | stream_has_voice_ = false; |
andrew@webrtc.org | ed083d4 | 2011-09-19 15:28:51 +0000 | [diff] [blame] | 72 | audio->set_activity(AudioFrame::kVadPassive); |
| 73 | } else if (vad_ret == 1) { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 74 | stream_has_voice_ = true; |
andrew@webrtc.org | ed083d4 | 2011-09-19 15:28:51 +0000 | [diff] [blame] | 75 | audio->set_activity(AudioFrame::kVadActive); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 76 | } else { |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 77 | RTC_NOTREACHED(); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 78 | } |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 79 | } |
| 80 | |
| 81 | int VoiceDetectionImpl::Enable(bool enable) { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 82 | rtc::CritScope cs(crit_); |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 83 | if (enabled_ != enable) { |
| 84 | enabled_ = enable; |
| 85 | Initialize(sample_rate_hz_); |
| 86 | } |
| 87 | return AudioProcessing::kNoError; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 88 | } |
| 89 | |
| 90 | bool VoiceDetectionImpl::is_enabled() const { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 91 | rtc::CritScope cs(crit_); |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 92 | return enabled_; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 93 | } |
| 94 | |
| 95 | int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 96 | rtc::CritScope cs(crit_); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 97 | using_external_vad_ = true; |
| 98 | stream_has_voice_ = has_voice; |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 99 | return AudioProcessing::kNoError; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 100 | } |
| 101 | |
| 102 | bool VoiceDetectionImpl::stream_has_voice() const { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 103 | rtc::CritScope cs(crit_); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 104 | // TODO(ajm): enable this assertion? |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame^] | 105 | // RTC_DCHECK(using_external_vad_ || is_component_enabled()); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 106 | return stream_has_voice_; |
| 107 | } |
| 108 | |
| 109 | int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 110 | rtc::CritScope cs(crit_); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 111 | likelihood_ = likelihood; |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 112 | if (enabled_) { |
| 113 | int mode = 2; |
| 114 | switch (likelihood) { |
| 115 | case VoiceDetection::kVeryLowLikelihood: |
| 116 | mode = 3; |
| 117 | break; |
| 118 | case VoiceDetection::kLowLikelihood: |
| 119 | mode = 2; |
| 120 | break; |
| 121 | case VoiceDetection::kModerateLikelihood: |
| 122 | mode = 1; |
| 123 | break; |
| 124 | case VoiceDetection::kHighLikelihood: |
| 125 | mode = 0; |
| 126 | break; |
| 127 | default: |
| 128 | RTC_NOTREACHED(); |
| 129 | break; |
| 130 | } |
| 131 | int error = WebRtcVad_set_mode(vad_->state(), mode); |
| 132 | RTC_DCHECK_EQ(0, error); |
| 133 | } |
| 134 | return AudioProcessing::kNoError; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 135 | } |
| 136 | |
| 137 | VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 138 | rtc::CritScope cs(crit_); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 139 | return likelihood_; |
| 140 | } |
| 141 | |
| 142 | int VoiceDetectionImpl::set_frame_size_ms(int size) { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 143 | rtc::CritScope cs(crit_); |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame^] | 144 | RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported. |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 145 | frame_size_ms_ = size; |
solenberg | a29386c | 2015-12-16 03:31:12 -0800 | [diff] [blame] | 146 | Initialize(sample_rate_hz_); |
| 147 | return AudioProcessing::kNoError; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 148 | } |
| 149 | |
| 150 | int VoiceDetectionImpl::frame_size_ms() const { |
peah | df3efa8 | 2015-11-28 12:35:15 -0800 | [diff] [blame] | 151 | rtc::CritScope cs(crit_); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 152 | return frame_size_ms_; |
| 153 | } |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 154 | } // namespace webrtc |