blob: 8a505efdbd24f0486b9caf26763d9dba3d26cbb2 [file] [log] [blame]
niklase@google.com470e71d2011-07-07 08:21:25 +00001/*
bjornv@webrtc.orgf4b77fd2012-01-25 12:40:00 +00002 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
niklase@google.com470e71d2011-07-07 08:21:25 +00003 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "voice_detection_impl.h"
12
13#include <cassert>
14
15#include "critical_section_wrapper.h"
16#include "webrtc_vad.h"
17
18#include "audio_processing_impl.h"
19#include "audio_buffer.h"
20
21namespace webrtc {
22
23typedef VadInst Handle;
24
25namespace {
bjornv@webrtc.orgf4b77fd2012-01-25 12:40:00 +000026int MapSetting(VoiceDetection::Likelihood likelihood) {
niklase@google.com470e71d2011-07-07 08:21:25 +000027 switch (likelihood) {
28 case VoiceDetection::kVeryLowLikelihood:
29 return 3;
niklase@google.com470e71d2011-07-07 08:21:25 +000030 case VoiceDetection::kLowLikelihood:
31 return 2;
niklase@google.com470e71d2011-07-07 08:21:25 +000032 case VoiceDetection::kModerateLikelihood:
33 return 1;
niklase@google.com470e71d2011-07-07 08:21:25 +000034 case VoiceDetection::kHighLikelihood:
35 return 0;
niklase@google.com470e71d2011-07-07 08:21:25 +000036 }
andrew@webrtc.org648af742012-02-08 01:57:29 +000037 assert(false);
mflodman@webrtc.orgec31bc12012-02-06 12:42:45 +000038 return -1;
niklase@google.com470e71d2011-07-07 08:21:25 +000039}
40} // namespace
41
niklase@google.com470e71d2011-07-07 08:21:25 +000042VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessingImpl* apm)
43 : ProcessingComponent(apm),
44 apm_(apm),
45 stream_has_voice_(false),
46 using_external_vad_(false),
47 likelihood_(kLowLikelihood),
48 frame_size_ms_(10),
49 frame_size_samples_(0) {}
50
51VoiceDetectionImpl::~VoiceDetectionImpl() {}
52
53int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
54 if (!is_component_enabled()) {
55 return apm_->kNoError;
56 }
57
58 if (using_external_vad_) {
59 using_external_vad_ = false;
60 return apm_->kNoError;
61 }
62 assert(audio->samples_per_split_channel() <= 160);
63
pbos@webrtc.orgb7192b82013-04-10 07:50:54 +000064 int16_t* mixed_data = audio->low_pass_split_data(0);
niklase@google.com470e71d2011-07-07 08:21:25 +000065 if (audio->num_channels() > 1) {
66 audio->CopyAndMixLowPass(1);
67 mixed_data = audio->mixed_low_pass_data(0);
68 }
69
70 // TODO(ajm): concatenate data in frame buffer here.
71
andrew@webrtc.orged083d42011-09-19 15:28:51 +000072 int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
73 apm_->split_sample_rate_hz(),
74 mixed_data,
75 frame_size_samples_);
76 if (vad_ret == 0) {
niklase@google.com470e71d2011-07-07 08:21:25 +000077 stream_has_voice_ = false;
andrew@webrtc.orged083d42011-09-19 15:28:51 +000078 audio->set_activity(AudioFrame::kVadPassive);
79 } else if (vad_ret == 1) {
niklase@google.com470e71d2011-07-07 08:21:25 +000080 stream_has_voice_ = true;
andrew@webrtc.orged083d42011-09-19 15:28:51 +000081 audio->set_activity(AudioFrame::kVadActive);
niklase@google.com470e71d2011-07-07 08:21:25 +000082 } else {
83 return apm_->kUnspecifiedError;
84 }
85
86 return apm_->kNoError;
87}
88
89int VoiceDetectionImpl::Enable(bool enable) {
andrew@webrtc.org40654032012-01-30 20:51:15 +000090 CriticalSectionScoped crit_scoped(apm_->crit());
niklase@google.com470e71d2011-07-07 08:21:25 +000091 return EnableComponent(enable);
92}
93
94bool VoiceDetectionImpl::is_enabled() const {
95 return is_component_enabled();
96}
97
98int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
99 using_external_vad_ = true;
100 stream_has_voice_ = has_voice;
101 return apm_->kNoError;
102}
103
104bool VoiceDetectionImpl::stream_has_voice() const {
105 // TODO(ajm): enable this assertion?
106 //assert(using_external_vad_ || is_component_enabled());
107 return stream_has_voice_;
108}
109
110int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
andrew@webrtc.org40654032012-01-30 20:51:15 +0000111 CriticalSectionScoped crit_scoped(apm_->crit());
niklase@google.com470e71d2011-07-07 08:21:25 +0000112 if (MapSetting(likelihood) == -1) {
113 return apm_->kBadParameterError;
114 }
115
116 likelihood_ = likelihood;
117 return Configure();
118}
119
120VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
121 return likelihood_;
122}
123
124int VoiceDetectionImpl::set_frame_size_ms(int size) {
andrew@webrtc.org40654032012-01-30 20:51:15 +0000125 CriticalSectionScoped crit_scoped(apm_->crit());
niklase@google.com470e71d2011-07-07 08:21:25 +0000126 assert(size == 10); // TODO(ajm): remove when supported.
127 if (size != 10 &&
128 size != 20 &&
129 size != 30) {
130 return apm_->kBadParameterError;
131 }
132
133 frame_size_ms_ = size;
134
135 return Initialize();
136}
137
138int VoiceDetectionImpl::frame_size_ms() const {
139 return frame_size_ms_;
140}
141
142int VoiceDetectionImpl::Initialize() {
143 int err = ProcessingComponent::Initialize();
144 if (err != apm_->kNoError || !is_component_enabled()) {
145 return err;
146 }
147
148 using_external_vad_ = false;
149 frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000);
150 // TODO(ajm): intialize frame buffer here.
151
152 return apm_->kNoError;
153}
154
niklase@google.com470e71d2011-07-07 08:21:25 +0000155void* VoiceDetectionImpl::CreateHandle() const {
156 Handle* handle = NULL;
157 if (WebRtcVad_Create(&handle) != apm_->kNoError) {
158 handle = NULL;
159 } else {
160 assert(handle != NULL);
161 }
162
163 return handle;
164}
165
166int VoiceDetectionImpl::DestroyHandle(void* handle) const {
167 return WebRtcVad_Free(static_cast<Handle*>(handle));
168}
169
170int VoiceDetectionImpl::InitializeHandle(void* handle) const {
171 return WebRtcVad_Init(static_cast<Handle*>(handle));
172}
173
174int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
175 return WebRtcVad_set_mode(static_cast<Handle*>(handle),
176 MapSetting(likelihood_));
177}
178
179int VoiceDetectionImpl::num_handles_required() const {
180 return 1;
181}
182
183int VoiceDetectionImpl::GetHandleError(void* handle) const {
184 // The VAD has no get_error() function.
185 assert(handle != NULL);
186 return apm_->kUnspecifiedError;
187}
188} // namespace webrtc