blob: 1b1dd8b80d1ddfd989670e882d821db575c5877b [file] [log] [blame]
niklase@google.com470e71d2011-07-07 08:21:25 +00001/*
bjornv@webrtc.orgf4b77fd2012-01-25 12:40:00 +00002 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
niklase@google.com470e71d2011-07-07 08:21:25 +00003 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
pbos@webrtc.org7fad4b82013-05-28 08:11:59 +000011#include "webrtc/modules/audio_processing/voice_detection_impl.h"
niklase@google.com470e71d2011-07-07 08:21:25 +000012
pbos@webrtc.org12dc1a32013-08-05 16:22:53 +000013#include <assert.h>
niklase@google.com470e71d2011-07-07 08:21:25 +000014
pbos@webrtc.org7fad4b82013-05-28 08:11:59 +000015#include "webrtc/common_audio/vad/include/webrtc_vad.h"
pbos@webrtc.org7fad4b82013-05-28 08:11:59 +000016#include "webrtc/modules/audio_processing/audio_buffer.h"
andrew@webrtc.org56e4a052014-02-27 22:23:17 +000017#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
niklase@google.com470e71d2011-07-07 08:21:25 +000018
19namespace webrtc {
20
21typedef VadInst Handle;
22
23namespace {
bjornv@webrtc.orgf4b77fd2012-01-25 12:40:00 +000024int MapSetting(VoiceDetection::Likelihood likelihood) {
niklase@google.com470e71d2011-07-07 08:21:25 +000025 switch (likelihood) {
26 case VoiceDetection::kVeryLowLikelihood:
27 return 3;
niklase@google.com470e71d2011-07-07 08:21:25 +000028 case VoiceDetection::kLowLikelihood:
29 return 2;
niklase@google.com470e71d2011-07-07 08:21:25 +000030 case VoiceDetection::kModerateLikelihood:
31 return 1;
niklase@google.com470e71d2011-07-07 08:21:25 +000032 case VoiceDetection::kHighLikelihood:
33 return 0;
niklase@google.com470e71d2011-07-07 08:21:25 +000034 }
andrew@webrtc.org648af742012-02-08 01:57:29 +000035 assert(false);
mflodman@webrtc.orgec31bc12012-02-06 12:42:45 +000036 return -1;
niklase@google.com470e71d2011-07-07 08:21:25 +000037}
38} // namespace
39
andrew@webrtc.org56e4a052014-02-27 22:23:17 +000040VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm,
41 CriticalSectionWrapper* crit)
42 : ProcessingComponent(),
niklase@google.com470e71d2011-07-07 08:21:25 +000043 apm_(apm),
andrew@webrtc.org56e4a052014-02-27 22:23:17 +000044 crit_(crit),
niklase@google.com470e71d2011-07-07 08:21:25 +000045 stream_has_voice_(false),
46 using_external_vad_(false),
47 likelihood_(kLowLikelihood),
48 frame_size_ms_(10),
49 frame_size_samples_(0) {}
50
51VoiceDetectionImpl::~VoiceDetectionImpl() {}
52
53int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
54 if (!is_component_enabled()) {
55 return apm_->kNoError;
56 }
57
58 if (using_external_vad_) {
59 using_external_vad_ = false;
60 return apm_->kNoError;
61 }
62 assert(audio->samples_per_split_channel() <= 160);
63
pbos@webrtc.orgb7192b82013-04-10 07:50:54 +000064 int16_t* mixed_data = audio->low_pass_split_data(0);
niklase@google.com470e71d2011-07-07 08:21:25 +000065 if (audio->num_channels() > 1) {
66 audio->CopyAndMixLowPass(1);
67 mixed_data = audio->mixed_low_pass_data(0);
68 }
69
70 // TODO(ajm): concatenate data in frame buffer here.
71
andrew@webrtc.orged083d42011-09-19 15:28:51 +000072 int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
73 apm_->split_sample_rate_hz(),
74 mixed_data,
75 frame_size_samples_);
76 if (vad_ret == 0) {
niklase@google.com470e71d2011-07-07 08:21:25 +000077 stream_has_voice_ = false;
andrew@webrtc.orged083d42011-09-19 15:28:51 +000078 audio->set_activity(AudioFrame::kVadPassive);
79 } else if (vad_ret == 1) {
niklase@google.com470e71d2011-07-07 08:21:25 +000080 stream_has_voice_ = true;
andrew@webrtc.orged083d42011-09-19 15:28:51 +000081 audio->set_activity(AudioFrame::kVadActive);
niklase@google.com470e71d2011-07-07 08:21:25 +000082 } else {
83 return apm_->kUnspecifiedError;
84 }
85
86 return apm_->kNoError;
87}
88
89int VoiceDetectionImpl::Enable(bool enable) {
andrew@webrtc.org56e4a052014-02-27 22:23:17 +000090 CriticalSectionScoped crit_scoped(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +000091 return EnableComponent(enable);
92}
93
94bool VoiceDetectionImpl::is_enabled() const {
95 return is_component_enabled();
96}
97
98int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
99 using_external_vad_ = true;
100 stream_has_voice_ = has_voice;
101 return apm_->kNoError;
102}
103
104bool VoiceDetectionImpl::stream_has_voice() const {
105 // TODO(ajm): enable this assertion?
106 //assert(using_external_vad_ || is_component_enabled());
107 return stream_has_voice_;
108}
109
110int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
andrew@webrtc.org56e4a052014-02-27 22:23:17 +0000111 CriticalSectionScoped crit_scoped(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +0000112 if (MapSetting(likelihood) == -1) {
113 return apm_->kBadParameterError;
114 }
115
116 likelihood_ = likelihood;
117 return Configure();
118}
119
120VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
121 return likelihood_;
122}
123
124int VoiceDetectionImpl::set_frame_size_ms(int size) {
andrew@webrtc.org56e4a052014-02-27 22:23:17 +0000125 CriticalSectionScoped crit_scoped(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +0000126 assert(size == 10); // TODO(ajm): remove when supported.
127 if (size != 10 &&
128 size != 20 &&
129 size != 30) {
130 return apm_->kBadParameterError;
131 }
132
133 frame_size_ms_ = size;
134
135 return Initialize();
136}
137
138int VoiceDetectionImpl::frame_size_ms() const {
139 return frame_size_ms_;
140}
141
142int VoiceDetectionImpl::Initialize() {
143 int err = ProcessingComponent::Initialize();
144 if (err != apm_->kNoError || !is_component_enabled()) {
145 return err;
146 }
147
148 using_external_vad_ = false;
149 frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000);
150 // TODO(ajm): intialize frame buffer here.
151
152 return apm_->kNoError;
153}
154
niklase@google.com470e71d2011-07-07 08:21:25 +0000155void* VoiceDetectionImpl::CreateHandle() const {
156 Handle* handle = NULL;
157 if (WebRtcVad_Create(&handle) != apm_->kNoError) {
158 handle = NULL;
159 } else {
160 assert(handle != NULL);
161 }
162
163 return handle;
164}
165
166int VoiceDetectionImpl::DestroyHandle(void* handle) const {
167 return WebRtcVad_Free(static_cast<Handle*>(handle));
168}
169
170int VoiceDetectionImpl::InitializeHandle(void* handle) const {
171 return WebRtcVad_Init(static_cast<Handle*>(handle));
172}
173
174int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
175 return WebRtcVad_set_mode(static_cast<Handle*>(handle),
176 MapSetting(likelihood_));
177}
178
179int VoiceDetectionImpl::num_handles_required() const {
180 return 1;
181}
182
183int VoiceDetectionImpl::GetHandleError(void* handle) const {
184 // The VAD has no get_error() function.
185 assert(handle != NULL);
186 return apm_->kUnspecifiedError;
187}
188} // namespace webrtc