blob: 25c7269cb446aa149a3d7b4f4f2337f2cbb98b1c [file] [log] [blame]
niklase@google.com470e71d2011-07-07 08:21:25 +00001/*
bjornv@webrtc.orgf4b77fd2012-01-25 12:40:00 +00002 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
niklase@google.com470e71d2011-07-07 08:21:25 +00003 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
pbos@webrtc.org7fad4b82013-05-28 08:11:59 +000011#include "webrtc/modules/audio_processing/voice_detection_impl.h"
niklase@google.com470e71d2011-07-07 08:21:25 +000012
pbos@webrtc.org12dc1a32013-08-05 16:22:53 +000013#include <assert.h>
niklase@google.com470e71d2011-07-07 08:21:25 +000014
peahdf3efa82015-11-28 12:35:15 -080015#include "webrtc/base/criticalsection.h"
16#include "webrtc/base/thread_checker.h"
pbos@webrtc.org7fad4b82013-05-28 08:11:59 +000017#include "webrtc/common_audio/vad/include/webrtc_vad.h"
pbos@webrtc.org7fad4b82013-05-28 08:11:59 +000018#include "webrtc/modules/audio_processing/audio_buffer.h"
niklase@google.com470e71d2011-07-07 08:21:25 +000019
20namespace webrtc {
21
22typedef VadInst Handle;
23
24namespace {
bjornv@webrtc.orgf4b77fd2012-01-25 12:40:00 +000025int MapSetting(VoiceDetection::Likelihood likelihood) {
niklase@google.com470e71d2011-07-07 08:21:25 +000026 switch (likelihood) {
27 case VoiceDetection::kVeryLowLikelihood:
28 return 3;
niklase@google.com470e71d2011-07-07 08:21:25 +000029 case VoiceDetection::kLowLikelihood:
30 return 2;
niklase@google.com470e71d2011-07-07 08:21:25 +000031 case VoiceDetection::kModerateLikelihood:
32 return 1;
niklase@google.com470e71d2011-07-07 08:21:25 +000033 case VoiceDetection::kHighLikelihood:
34 return 0;
niklase@google.com470e71d2011-07-07 08:21:25 +000035 }
andrew@webrtc.org648af742012-02-08 01:57:29 +000036 assert(false);
mflodman@webrtc.orgec31bc12012-02-06 12:42:45 +000037 return -1;
niklase@google.com470e71d2011-07-07 08:21:25 +000038}
39} // namespace
40
andrew@webrtc.org56e4a052014-02-27 22:23:17 +000041VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm,
peahdf3efa82015-11-28 12:35:15 -080042 rtc::CriticalSection* crit)
43 : ProcessingComponent(),
44 apm_(apm),
45 crit_(crit),
46 stream_has_voice_(false),
47 using_external_vad_(false),
48 likelihood_(kLowLikelihood),
49 frame_size_ms_(10),
50 frame_size_samples_(0) {
51 RTC_DCHECK(apm);
52 RTC_DCHECK(crit);
53}
niklase@google.com470e71d2011-07-07 08:21:25 +000054
55VoiceDetectionImpl::~VoiceDetectionImpl() {}
56
57int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
peahdf3efa82015-11-28 12:35:15 -080058 rtc::CritScope cs(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +000059 if (!is_component_enabled()) {
60 return apm_->kNoError;
61 }
62
63 if (using_external_vad_) {
64 using_external_vad_ = false;
65 return apm_->kNoError;
66 }
aluebs@webrtc.orgd35a5c32015-02-10 22:52:15 +000067 assert(audio->num_frames_per_band() <= 160);
niklase@google.com470e71d2011-07-07 08:21:25 +000068
niklase@google.com470e71d2011-07-07 08:21:25 +000069 // TODO(ajm): concatenate data in frame buffer here.
70
andrew@webrtc.orged083d42011-09-19 15:28:51 +000071 int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
andrew@webrtc.orgddbb8a22014-04-22 21:00:04 +000072 apm_->proc_split_sample_rate_hz(),
aluebs@webrtc.org2561d522014-07-17 08:27:39 +000073 audio->mixed_low_pass_data(),
andrew@webrtc.orged083d42011-09-19 15:28:51 +000074 frame_size_samples_);
75 if (vad_ret == 0) {
niklase@google.com470e71d2011-07-07 08:21:25 +000076 stream_has_voice_ = false;
andrew@webrtc.orged083d42011-09-19 15:28:51 +000077 audio->set_activity(AudioFrame::kVadPassive);
78 } else if (vad_ret == 1) {
niklase@google.com470e71d2011-07-07 08:21:25 +000079 stream_has_voice_ = true;
andrew@webrtc.orged083d42011-09-19 15:28:51 +000080 audio->set_activity(AudioFrame::kVadActive);
niklase@google.com470e71d2011-07-07 08:21:25 +000081 } else {
82 return apm_->kUnspecifiedError;
83 }
84
85 return apm_->kNoError;
86}
87
88int VoiceDetectionImpl::Enable(bool enable) {
peahdf3efa82015-11-28 12:35:15 -080089 rtc::CritScope cs(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +000090 return EnableComponent(enable);
91}
92
93bool VoiceDetectionImpl::is_enabled() const {
peahdf3efa82015-11-28 12:35:15 -080094 rtc::CritScope cs(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +000095 return is_component_enabled();
96}
97
98int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
peahdf3efa82015-11-28 12:35:15 -080099 rtc::CritScope cs(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +0000100 using_external_vad_ = true;
101 stream_has_voice_ = has_voice;
102 return apm_->kNoError;
103}
104
105bool VoiceDetectionImpl::stream_has_voice() const {
peahdf3efa82015-11-28 12:35:15 -0800106 rtc::CritScope cs(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +0000107 // TODO(ajm): enable this assertion?
108 //assert(using_external_vad_ || is_component_enabled());
109 return stream_has_voice_;
110}
111
112int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
peahdf3efa82015-11-28 12:35:15 -0800113 rtc::CritScope cs(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +0000114 if (MapSetting(likelihood) == -1) {
115 return apm_->kBadParameterError;
116 }
117
118 likelihood_ = likelihood;
119 return Configure();
120}
121
122VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
peahdf3efa82015-11-28 12:35:15 -0800123 rtc::CritScope cs(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +0000124 return likelihood_;
125}
126
127int VoiceDetectionImpl::set_frame_size_ms(int size) {
peahdf3efa82015-11-28 12:35:15 -0800128 rtc::CritScope cs(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +0000129 assert(size == 10); // TODO(ajm): remove when supported.
130 if (size != 10 &&
131 size != 20 &&
132 size != 30) {
133 return apm_->kBadParameterError;
134 }
135
136 frame_size_ms_ = size;
137
138 return Initialize();
139}
140
141int VoiceDetectionImpl::frame_size_ms() const {
peahdf3efa82015-11-28 12:35:15 -0800142 rtc::CritScope cs(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +0000143 return frame_size_ms_;
144}
145
146int VoiceDetectionImpl::Initialize() {
147 int err = ProcessingComponent::Initialize();
peahdf3efa82015-11-28 12:35:15 -0800148
149 rtc::CritScope cs(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +0000150 if (err != apm_->kNoError || !is_component_enabled()) {
151 return err;
152 }
153
154 using_external_vad_ = false;
Peter Kastingdce40cf2015-08-24 14:52:23 -0700155 frame_size_samples_ = static_cast<size_t>(
156 frame_size_ms_ * apm_->proc_split_sample_rate_hz() / 1000);
niklase@google.com470e71d2011-07-07 08:21:25 +0000157 // TODO(ajm): intialize frame buffer here.
158
159 return apm_->kNoError;
160}
161
niklase@google.com470e71d2011-07-07 08:21:25 +0000162void* VoiceDetectionImpl::CreateHandle() const {
Bjorn Volckerde4703c2015-05-27 07:22:58 +0200163 return WebRtcVad_Create();
niklase@google.com470e71d2011-07-07 08:21:25 +0000164}
165
bjornv@webrtc.org5964fe02014-04-22 06:52:28 +0000166void VoiceDetectionImpl::DestroyHandle(void* handle) const {
bjornv@webrtc.org2a796722014-04-22 04:45:35 +0000167 WebRtcVad_Free(static_cast<Handle*>(handle));
niklase@google.com470e71d2011-07-07 08:21:25 +0000168}
169
170int VoiceDetectionImpl::InitializeHandle(void* handle) const {
171 return WebRtcVad_Init(static_cast<Handle*>(handle));
172}
173
174int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
peahdf3efa82015-11-28 12:35:15 -0800175 rtc::CritScope cs(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +0000176 return WebRtcVad_set_mode(static_cast<Handle*>(handle),
177 MapSetting(likelihood_));
178}
179
180int VoiceDetectionImpl::num_handles_required() const {
181 return 1;
182}
183
184int VoiceDetectionImpl::GetHandleError(void* handle) const {
185 // The VAD has no get_error() function.
186 assert(handle != NULL);
187 return apm_->kUnspecifiedError;
188}
189} // namespace webrtc