blob: 5ee0c7b9c24bb46d25aade4110ed1a55a2f368c5 [file] [log] [blame]
niklase@google.com470e71d2011-07-07 08:21:25 +00001/*
bjornv@webrtc.orgf4b77fd2012-01-25 12:40:00 +00002 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
niklase@google.com470e71d2011-07-07 08:21:25 +00003 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_processing/voice_detection_impl.h"
niklase@google.com470e71d2011-07-07 08:21:25 +000012
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020013#include "common_audio/vad/include/webrtc_vad.h"
14#include "modules/audio_processing/audio_buffer.h"
15#include "rtc_base/constructormagic.h"
niklase@google.com470e71d2011-07-07 08:21:25 +000016
17namespace webrtc {
solenberga29386c2015-12-16 03:31:12 -080018class VoiceDetectionImpl::Vad {
19 public:
20 Vad() {
21 state_ = WebRtcVad_Create();
22 RTC_CHECK(state_);
23 int error = WebRtcVad_Init(state_);
24 RTC_DCHECK_EQ(0, error);
niklase@google.com470e71d2011-07-07 08:21:25 +000025 }
solenberga29386c2015-12-16 03:31:12 -080026 ~Vad() {
27 WebRtcVad_Free(state_);
28 }
29 VadInst* state() { return state_; }
30 private:
31 VadInst* state_ = nullptr;
32 RTC_DISALLOW_COPY_AND_ASSIGN(Vad);
33};
niklase@google.com470e71d2011-07-07 08:21:25 +000034
solenberga29386c2015-12-16 03:31:12 -080035VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit)
36 : crit_(crit) {
peahdf3efa82015-11-28 12:35:15 -080037 RTC_DCHECK(crit);
38}
niklase@google.com470e71d2011-07-07 08:21:25 +000039
40VoiceDetectionImpl::~VoiceDetectionImpl() {}
41
solenberga29386c2015-12-16 03:31:12 -080042void VoiceDetectionImpl::Initialize(int sample_rate_hz) {
peahdf3efa82015-11-28 12:35:15 -080043 rtc::CritScope cs(crit_);
solenberga29386c2015-12-16 03:31:12 -080044 sample_rate_hz_ = sample_rate_hz;
kwiberg88788ad2016-02-19 07:04:49 -080045 std::unique_ptr<Vad> new_vad;
solenberga29386c2015-12-16 03:31:12 -080046 if (enabled_) {
47 new_vad.reset(new Vad());
niklase@google.com470e71d2011-07-07 08:21:25 +000048 }
solenberga29386c2015-12-16 03:31:12 -080049 vad_.swap(new_vad);
50 using_external_vad_ = false;
51 frame_size_samples_ =
52 static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000;
53 set_likelihood(likelihood_);
54}
niklase@google.com470e71d2011-07-07 08:21:25 +000055
solenberga29386c2015-12-16 03:31:12 -080056void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
57 rtc::CritScope cs(crit_);
58 if (!enabled_) {
59 return;
60 }
niklase@google.com470e71d2011-07-07 08:21:25 +000061 if (using_external_vad_) {
62 using_external_vad_ = false;
solenberga29386c2015-12-16 03:31:12 -080063 return;
niklase@google.com470e71d2011-07-07 08:21:25 +000064 }
niklase@google.com470e71d2011-07-07 08:21:25 +000065
kwibergaf476c72016-11-28 15:21:39 -080066 RTC_DCHECK_GE(160, audio->num_frames_per_band());
niklase@google.com470e71d2011-07-07 08:21:25 +000067 // TODO(ajm): concatenate data in frame buffer here.
solenberga29386c2015-12-16 03:31:12 -080068 int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
aluebs@webrtc.org2561d522014-07-17 08:27:39 +000069 audio->mixed_low_pass_data(),
andrew@webrtc.orged083d42011-09-19 15:28:51 +000070 frame_size_samples_);
71 if (vad_ret == 0) {
niklase@google.com470e71d2011-07-07 08:21:25 +000072 stream_has_voice_ = false;
andrew@webrtc.orged083d42011-09-19 15:28:51 +000073 audio->set_activity(AudioFrame::kVadPassive);
74 } else if (vad_ret == 1) {
niklase@google.com470e71d2011-07-07 08:21:25 +000075 stream_has_voice_ = true;
andrew@webrtc.orged083d42011-09-19 15:28:51 +000076 audio->set_activity(AudioFrame::kVadActive);
niklase@google.com470e71d2011-07-07 08:21:25 +000077 } else {
solenberga29386c2015-12-16 03:31:12 -080078 RTC_NOTREACHED();
niklase@google.com470e71d2011-07-07 08:21:25 +000079 }
niklase@google.com470e71d2011-07-07 08:21:25 +000080}
81
82int VoiceDetectionImpl::Enable(bool enable) {
peahdf3efa82015-11-28 12:35:15 -080083 rtc::CritScope cs(crit_);
solenberga29386c2015-12-16 03:31:12 -080084 if (enabled_ != enable) {
85 enabled_ = enable;
86 Initialize(sample_rate_hz_);
87 }
88 return AudioProcessing::kNoError;
niklase@google.com470e71d2011-07-07 08:21:25 +000089}
90
91bool VoiceDetectionImpl::is_enabled() const {
peahdf3efa82015-11-28 12:35:15 -080092 rtc::CritScope cs(crit_);
solenberga29386c2015-12-16 03:31:12 -080093 return enabled_;
niklase@google.com470e71d2011-07-07 08:21:25 +000094}
95
96int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
peahdf3efa82015-11-28 12:35:15 -080097 rtc::CritScope cs(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +000098 using_external_vad_ = true;
99 stream_has_voice_ = has_voice;
solenberga29386c2015-12-16 03:31:12 -0800100 return AudioProcessing::kNoError;
niklase@google.com470e71d2011-07-07 08:21:25 +0000101}
102
103bool VoiceDetectionImpl::stream_has_voice() const {
peahdf3efa82015-11-28 12:35:15 -0800104 rtc::CritScope cs(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +0000105 // TODO(ajm): enable this assertion?
kwiberg9e2be5f2016-09-14 05:23:22 -0700106 //RTC_DCHECK(using_external_vad_ || is_component_enabled());
niklase@google.com470e71d2011-07-07 08:21:25 +0000107 return stream_has_voice_;
108}
109
110int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
peahdf3efa82015-11-28 12:35:15 -0800111 rtc::CritScope cs(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +0000112 likelihood_ = likelihood;
solenberga29386c2015-12-16 03:31:12 -0800113 if (enabled_) {
114 int mode = 2;
115 switch (likelihood) {
116 case VoiceDetection::kVeryLowLikelihood:
117 mode = 3;
118 break;
119 case VoiceDetection::kLowLikelihood:
120 mode = 2;
121 break;
122 case VoiceDetection::kModerateLikelihood:
123 mode = 1;
124 break;
125 case VoiceDetection::kHighLikelihood:
126 mode = 0;
127 break;
128 default:
129 RTC_NOTREACHED();
130 break;
131 }
132 int error = WebRtcVad_set_mode(vad_->state(), mode);
133 RTC_DCHECK_EQ(0, error);
134 }
135 return AudioProcessing::kNoError;
niklase@google.com470e71d2011-07-07 08:21:25 +0000136}
137
138VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
peahdf3efa82015-11-28 12:35:15 -0800139 rtc::CritScope cs(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +0000140 return likelihood_;
141}
142
143int VoiceDetectionImpl::set_frame_size_ms(int size) {
peahdf3efa82015-11-28 12:35:15 -0800144 rtc::CritScope cs(crit_);
solenberga29386c2015-12-16 03:31:12 -0800145 RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported.
niklase@google.com470e71d2011-07-07 08:21:25 +0000146 frame_size_ms_ = size;
solenberga29386c2015-12-16 03:31:12 -0800147 Initialize(sample_rate_hz_);
148 return AudioProcessing::kNoError;
niklase@google.com470e71d2011-07-07 08:21:25 +0000149}
150
151int VoiceDetectionImpl::frame_size_ms() const {
peahdf3efa82015-11-28 12:35:15 -0800152 rtc::CritScope cs(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +0000153 return frame_size_ms_;
154}
niklase@google.com470e71d2011-07-07 08:21:25 +0000155} // namespace webrtc