blob: 0883536d52d39151d3fbe0f6d9dd2fbeb43f3511 [file] [log] [blame]
niklase@google.com470e71d2011-07-07 08:21:25 +00001/*
bjornv@webrtc.orgf4b77fd2012-01-25 12:40:00 +00002 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
niklase@google.com470e71d2011-07-07 08:21:25 +00003 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
pbos@webrtc.org7fad4b82013-05-28 08:11:59 +000011#include "webrtc/modules/audio_processing/voice_detection_impl.h"
niklase@google.com470e71d2011-07-07 08:21:25 +000012
pbos@webrtc.org12dc1a32013-08-05 16:22:53 +000013#include <assert.h>
niklase@google.com470e71d2011-07-07 08:21:25 +000014
pbos@webrtc.org7fad4b82013-05-28 08:11:59 +000015#include "webrtc/common_audio/vad/include/webrtc_vad.h"
pbos@webrtc.org7fad4b82013-05-28 08:11:59 +000016#include "webrtc/modules/audio_processing/audio_buffer.h"
andrew@webrtc.org56e4a052014-02-27 22:23:17 +000017#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
niklase@google.com470e71d2011-07-07 08:21:25 +000018
19namespace webrtc {
20
21typedef VadInst Handle;
22
23namespace {
bjornv@webrtc.orgf4b77fd2012-01-25 12:40:00 +000024int MapSetting(VoiceDetection::Likelihood likelihood) {
niklase@google.com470e71d2011-07-07 08:21:25 +000025 switch (likelihood) {
26 case VoiceDetection::kVeryLowLikelihood:
27 return 3;
niklase@google.com470e71d2011-07-07 08:21:25 +000028 case VoiceDetection::kLowLikelihood:
29 return 2;
niklase@google.com470e71d2011-07-07 08:21:25 +000030 case VoiceDetection::kModerateLikelihood:
31 return 1;
niklase@google.com470e71d2011-07-07 08:21:25 +000032 case VoiceDetection::kHighLikelihood:
33 return 0;
niklase@google.com470e71d2011-07-07 08:21:25 +000034 }
andrew@webrtc.org648af742012-02-08 01:57:29 +000035 assert(false);
mflodman@webrtc.orgec31bc12012-02-06 12:42:45 +000036 return -1;
niklase@google.com470e71d2011-07-07 08:21:25 +000037}
38} // namespace
39
andrew@webrtc.org56e4a052014-02-27 22:23:17 +000040VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm,
41 CriticalSectionWrapper* crit)
42 : ProcessingComponent(),
niklase@google.com470e71d2011-07-07 08:21:25 +000043 apm_(apm),
andrew@webrtc.org56e4a052014-02-27 22:23:17 +000044 crit_(crit),
niklase@google.com470e71d2011-07-07 08:21:25 +000045 stream_has_voice_(false),
46 using_external_vad_(false),
47 likelihood_(kLowLikelihood),
48 frame_size_ms_(10),
49 frame_size_samples_(0) {}
50
51VoiceDetectionImpl::~VoiceDetectionImpl() {}
52
53int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
54 if (!is_component_enabled()) {
55 return apm_->kNoError;
56 }
57
58 if (using_external_vad_) {
59 using_external_vad_ = false;
60 return apm_->kNoError;
61 }
aluebs@webrtc.orgd35a5c32015-02-10 22:52:15 +000062 assert(audio->num_frames_per_band() <= 160);
niklase@google.com470e71d2011-07-07 08:21:25 +000063
niklase@google.com470e71d2011-07-07 08:21:25 +000064 // TODO(ajm): concatenate data in frame buffer here.
65
andrew@webrtc.orged083d42011-09-19 15:28:51 +000066 int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
andrew@webrtc.orgddbb8a22014-04-22 21:00:04 +000067 apm_->proc_split_sample_rate_hz(),
aluebs@webrtc.org2561d522014-07-17 08:27:39 +000068 audio->mixed_low_pass_data(),
andrew@webrtc.orged083d42011-09-19 15:28:51 +000069 frame_size_samples_);
70 if (vad_ret == 0) {
niklase@google.com470e71d2011-07-07 08:21:25 +000071 stream_has_voice_ = false;
andrew@webrtc.orged083d42011-09-19 15:28:51 +000072 audio->set_activity(AudioFrame::kVadPassive);
73 } else if (vad_ret == 1) {
niklase@google.com470e71d2011-07-07 08:21:25 +000074 stream_has_voice_ = true;
andrew@webrtc.orged083d42011-09-19 15:28:51 +000075 audio->set_activity(AudioFrame::kVadActive);
niklase@google.com470e71d2011-07-07 08:21:25 +000076 } else {
77 return apm_->kUnspecifiedError;
78 }
79
80 return apm_->kNoError;
81}
82
83int VoiceDetectionImpl::Enable(bool enable) {
andrew@webrtc.org56e4a052014-02-27 22:23:17 +000084 CriticalSectionScoped crit_scoped(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +000085 return EnableComponent(enable);
86}
87
88bool VoiceDetectionImpl::is_enabled() const {
89 return is_component_enabled();
90}
91
92int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
93 using_external_vad_ = true;
94 stream_has_voice_ = has_voice;
95 return apm_->kNoError;
96}
97
98bool VoiceDetectionImpl::stream_has_voice() const {
99 // TODO(ajm): enable this assertion?
100 //assert(using_external_vad_ || is_component_enabled());
101 return stream_has_voice_;
102}
103
104int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
andrew@webrtc.org56e4a052014-02-27 22:23:17 +0000105 CriticalSectionScoped crit_scoped(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +0000106 if (MapSetting(likelihood) == -1) {
107 return apm_->kBadParameterError;
108 }
109
110 likelihood_ = likelihood;
111 return Configure();
112}
113
114VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
115 return likelihood_;
116}
117
118int VoiceDetectionImpl::set_frame_size_ms(int size) {
andrew@webrtc.org56e4a052014-02-27 22:23:17 +0000119 CriticalSectionScoped crit_scoped(crit_);
niklase@google.com470e71d2011-07-07 08:21:25 +0000120 assert(size == 10); // TODO(ajm): remove when supported.
121 if (size != 10 &&
122 size != 20 &&
123 size != 30) {
124 return apm_->kBadParameterError;
125 }
126
127 frame_size_ms_ = size;
128
129 return Initialize();
130}
131
132int VoiceDetectionImpl::frame_size_ms() const {
133 return frame_size_ms_;
134}
135
136int VoiceDetectionImpl::Initialize() {
137 int err = ProcessingComponent::Initialize();
138 if (err != apm_->kNoError || !is_component_enabled()) {
139 return err;
140 }
141
142 using_external_vad_ = false;
andrew@webrtc.orgddbb8a22014-04-22 21:00:04 +0000143 frame_size_samples_ = frame_size_ms_ *
144 apm_->proc_split_sample_rate_hz() / 1000;
niklase@google.com470e71d2011-07-07 08:21:25 +0000145 // TODO(ajm): intialize frame buffer here.
146
147 return apm_->kNoError;
148}
149
niklase@google.com470e71d2011-07-07 08:21:25 +0000150void* VoiceDetectionImpl::CreateHandle() const {
Bjorn Volckerde4703c2015-05-27 07:22:58 +0200151 return WebRtcVad_Create();
niklase@google.com470e71d2011-07-07 08:21:25 +0000152}
153
bjornv@webrtc.org5964fe02014-04-22 06:52:28 +0000154void VoiceDetectionImpl::DestroyHandle(void* handle) const {
bjornv@webrtc.org2a796722014-04-22 04:45:35 +0000155 WebRtcVad_Free(static_cast<Handle*>(handle));
niklase@google.com470e71d2011-07-07 08:21:25 +0000156}
157
158int VoiceDetectionImpl::InitializeHandle(void* handle) const {
159 return WebRtcVad_Init(static_cast<Handle*>(handle));
160}
161
162int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
163 return WebRtcVad_set_mode(static_cast<Handle*>(handle),
164 MapSetting(likelihood_));
165}
166
167int VoiceDetectionImpl::num_handles_required() const {
168 return 1;
169}
170
171int VoiceDetectionImpl::GetHandleError(void* handle) const {
172 // The VAD has no get_error() function.
173 assert(handle != NULL);
174 return apm_->kUnspecifiedError;
175}
176} // namespace webrtc