blob: 49aac2e6742c71a62019a75d831a6b90caa1735c [file] [log] [blame]
niklase@google.com470e71d2011-07-07 08:21:25 +00001/*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "voice_detection_impl.h"
12
13#include <cassert>
14
15#include "critical_section_wrapper.h"
16#include "webrtc_vad.h"
17
18#include "audio_processing_impl.h"
19#include "audio_buffer.h"
20
21namespace webrtc {
22
23typedef VadInst Handle;
24
25namespace {
26WebRtc_Word16 MapSetting(VoiceDetection::Likelihood likelihood) {
27 switch (likelihood) {
28 case VoiceDetection::kVeryLowLikelihood:
29 return 3;
30 break;
31 case VoiceDetection::kLowLikelihood:
32 return 2;
33 break;
34 case VoiceDetection::kModerateLikelihood:
35 return 1;
36 break;
37 case VoiceDetection::kHighLikelihood:
38 return 0;
39 break;
40 default:
41 return -1;
42 }
43}
44} // namespace
45
46
47VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessingImpl* apm)
48 : ProcessingComponent(apm),
49 apm_(apm),
50 stream_has_voice_(false),
51 using_external_vad_(false),
52 likelihood_(kLowLikelihood),
53 frame_size_ms_(10),
54 frame_size_samples_(0) {}
55
56VoiceDetectionImpl::~VoiceDetectionImpl() {}
57
58int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
59 if (!is_component_enabled()) {
60 return apm_->kNoError;
61 }
62
63 if (using_external_vad_) {
64 using_external_vad_ = false;
65 return apm_->kNoError;
66 }
67 assert(audio->samples_per_split_channel() <= 160);
68
69 WebRtc_Word16* mixed_data = audio->low_pass_split_data(0);
70 if (audio->num_channels() > 1) {
71 audio->CopyAndMixLowPass(1);
72 mixed_data = audio->mixed_low_pass_data(0);
73 }
74
75 // TODO(ajm): concatenate data in frame buffer here.
76
andrew@webrtc.orged083d42011-09-19 15:28:51 +000077 int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
78 apm_->split_sample_rate_hz(),
79 mixed_data,
80 frame_size_samples_);
81 if (vad_ret == 0) {
niklase@google.com470e71d2011-07-07 08:21:25 +000082 stream_has_voice_ = false;
andrew@webrtc.orged083d42011-09-19 15:28:51 +000083 audio->set_activity(AudioFrame::kVadPassive);
84 } else if (vad_ret == 1) {
niklase@google.com470e71d2011-07-07 08:21:25 +000085 stream_has_voice_ = true;
andrew@webrtc.orged083d42011-09-19 15:28:51 +000086 audio->set_activity(AudioFrame::kVadActive);
niklase@google.com470e71d2011-07-07 08:21:25 +000087 } else {
88 return apm_->kUnspecifiedError;
89 }
90
91 return apm_->kNoError;
92}
93
94int VoiceDetectionImpl::Enable(bool enable) {
95 CriticalSectionScoped crit_scoped(*apm_->crit());
96 return EnableComponent(enable);
97}
98
99bool VoiceDetectionImpl::is_enabled() const {
100 return is_component_enabled();
101}
102
103int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
104 using_external_vad_ = true;
105 stream_has_voice_ = has_voice;
106 return apm_->kNoError;
107}
108
109bool VoiceDetectionImpl::stream_has_voice() const {
110 // TODO(ajm): enable this assertion?
111 //assert(using_external_vad_ || is_component_enabled());
112 return stream_has_voice_;
113}
114
115int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
116 CriticalSectionScoped crit_scoped(*apm_->crit());
117 if (MapSetting(likelihood) == -1) {
118 return apm_->kBadParameterError;
119 }
120
121 likelihood_ = likelihood;
122 return Configure();
123}
124
125VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
126 return likelihood_;
127}
128
129int VoiceDetectionImpl::set_frame_size_ms(int size) {
130 CriticalSectionScoped crit_scoped(*apm_->crit());
131 assert(size == 10); // TODO(ajm): remove when supported.
132 if (size != 10 &&
133 size != 20 &&
134 size != 30) {
135 return apm_->kBadParameterError;
136 }
137
138 frame_size_ms_ = size;
139
140 return Initialize();
141}
142
143int VoiceDetectionImpl::frame_size_ms() const {
144 return frame_size_ms_;
145}
146
147int VoiceDetectionImpl::Initialize() {
148 int err = ProcessingComponent::Initialize();
149 if (err != apm_->kNoError || !is_component_enabled()) {
150 return err;
151 }
152
153 using_external_vad_ = false;
154 frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000);
155 // TODO(ajm): intialize frame buffer here.
156
157 return apm_->kNoError;
158}
159
160int VoiceDetectionImpl::get_version(char* version,
161 int version_len_bytes) const {
162 if (WebRtcVad_get_version(version, version_len_bytes) != 0) {
163 return apm_->kBadParameterError;
164 }
165
166 return apm_->kNoError;
167}
168
169void* VoiceDetectionImpl::CreateHandle() const {
170 Handle* handle = NULL;
171 if (WebRtcVad_Create(&handle) != apm_->kNoError) {
172 handle = NULL;
173 } else {
174 assert(handle != NULL);
175 }
176
177 return handle;
178}
179
180int VoiceDetectionImpl::DestroyHandle(void* handle) const {
181 return WebRtcVad_Free(static_cast<Handle*>(handle));
182}
183
184int VoiceDetectionImpl::InitializeHandle(void* handle) const {
185 return WebRtcVad_Init(static_cast<Handle*>(handle));
186}
187
188int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
189 return WebRtcVad_set_mode(static_cast<Handle*>(handle),
190 MapSetting(likelihood_));
191}
192
193int VoiceDetectionImpl::num_handles_required() const {
194 return 1;
195}
196
197int VoiceDetectionImpl::GetHandleError(void* handle) const {
198 // The VAD has no get_error() function.
199 assert(handle != NULL);
200 return apm_->kUnspecifiedError;
201}
202} // namespace webrtc