Blame - src/modules/audio_processing/voice_detection_impl.cc - webrtc.googlesource.com/src

blob: 49aac2e6742c71a62019a75d831a6b90caa1735c [file] [log] [blame]

niklase@google.com	470e71d	2011-07-07 08:21:25 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
				11	#include "voice_detection_impl.h"
				12
				13	#include <cassert>
				14
				15	#include "critical_section_wrapper.h"
				16	#include "webrtc_vad.h"
				17
				18	#include "audio_processing_impl.h"
				19	#include "audio_buffer.h"
				20
				21	namespace webrtc {
				22
				23	typedef VadInst Handle;
				24
				25	namespace {
				26	WebRtc_Word16 MapSetting(VoiceDetection::Likelihood likelihood) {
				27	switch (likelihood) {
				28	case VoiceDetection::kVeryLowLikelihood:
				29	return 3;
				30	break;
				31	case VoiceDetection::kLowLikelihood:
				32	return 2;
				33	break;
				34	case VoiceDetection::kModerateLikelihood:
				35	return 1;
				36	break;
				37	case VoiceDetection::kHighLikelihood:
				38	return 0;
				39	break;
				40	default:
				41	return -1;
				42	}
				43	}
				44	} // namespace
				45
				46
				47	VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessingImpl* apm)
				48	: ProcessingComponent(apm),
				49	apm_(apm),
				50	stream_has_voice_(false),
				51	using_external_vad_(false),
				52	likelihood_(kLowLikelihood),
				53	frame_size_ms_(10),
				54	frame_size_samples_(0) {}
				55
				56	VoiceDetectionImpl::~VoiceDetectionImpl() {}
				57
				58	int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
				59	if (!is_component_enabled()) {
				60	return apm_->kNoError;
				61	}
				62
				63	if (using_external_vad_) {
				64	using_external_vad_ = false;
				65	return apm_->kNoError;
				66	}
				67	assert(audio->samples_per_split_channel() <= 160);
				68
				69	WebRtc_Word16* mixed_data = audio->low_pass_split_data(0);
				70	if (audio->num_channels() > 1) {
				71	audio->CopyAndMixLowPass(1);
				72	mixed_data = audio->mixed_low_pass_data(0);
				73	}
				74
				75	// TODO(ajm): concatenate data in frame buffer here.
				76
andrew@webrtc.org	ed083d4	2011-09-19 15:28:51 +0000	[diff] [blame]	77	int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
				78	apm_->split_sample_rate_hz(),
				79	mixed_data,
				80	frame_size_samples_);
				81	if (vad_ret == 0) {
niklase@google.com	470e71d	2011-07-07 08:21:25 +0000	[diff] [blame]	82	stream_has_voice_ = false;
andrew@webrtc.org	ed083d4	2011-09-19 15:28:51 +0000	[diff] [blame]	83	audio->set_activity(AudioFrame::kVadPassive);
				84	} else if (vad_ret == 1) {
niklase@google.com	470e71d	2011-07-07 08:21:25 +0000	[diff] [blame]	85	stream_has_voice_ = true;
andrew@webrtc.org	ed083d4	2011-09-19 15:28:51 +0000	[diff] [blame]	86	audio->set_activity(AudioFrame::kVadActive);
niklase@google.com	470e71d	2011-07-07 08:21:25 +0000	[diff] [blame]	87	} else {
				88	return apm_->kUnspecifiedError;
				89	}
				90
				91	return apm_->kNoError;
				92	}
				93
				94	int VoiceDetectionImpl::Enable(bool enable) {
				95	CriticalSectionScoped crit_scoped(*apm_->crit());
				96	return EnableComponent(enable);
				97	}
				98
				99	bool VoiceDetectionImpl::is_enabled() const {
				100	return is_component_enabled();
				101	}
				102
				103	int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
				104	using_external_vad_ = true;
				105	stream_has_voice_ = has_voice;
				106	return apm_->kNoError;
				107	}
				108
				109	bool VoiceDetectionImpl::stream_has_voice() const {
				110	// TODO(ajm): enable this assertion?
				111	//assert(using_external_vad_ \|\| is_component_enabled());
				112	return stream_has_voice_;
				113	}
				114
				115	int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
				116	CriticalSectionScoped crit_scoped(*apm_->crit());
				117	if (MapSetting(likelihood) == -1) {
				118	return apm_->kBadParameterError;
				119	}
				120
				121	likelihood_ = likelihood;
				122	return Configure();
				123	}
				124
				125	VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
				126	return likelihood_;
				127	}
				128
				129	int VoiceDetectionImpl::set_frame_size_ms(int size) {
				130	CriticalSectionScoped crit_scoped(*apm_->crit());
				131	assert(size == 10); // TODO(ajm): remove when supported.
				132	if (size != 10 &&
				133	size != 20 &&
				134	size != 30) {
				135	return apm_->kBadParameterError;
				136	}
				137
				138	frame_size_ms_ = size;
				139
				140	return Initialize();
				141	}
				142
				143	int VoiceDetectionImpl::frame_size_ms() const {
				144	return frame_size_ms_;
				145	}
				146
				147	int VoiceDetectionImpl::Initialize() {
				148	int err = ProcessingComponent::Initialize();
				149	if (err != apm_->kNoError \|\| !is_component_enabled()) {
				150	return err;
				151	}
				152
				153	using_external_vad_ = false;
				154	frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000);
				155	// TODO(ajm): intialize frame buffer here.
				156
				157	return apm_->kNoError;
				158	}
				159
				160	int VoiceDetectionImpl::get_version(char* version,
				161	int version_len_bytes) const {
				162	if (WebRtcVad_get_version(version, version_len_bytes) != 0) {
				163	return apm_->kBadParameterError;
				164	}
				165
				166	return apm_->kNoError;
				167	}
				168
				169	void* VoiceDetectionImpl::CreateHandle() const {
				170	Handle* handle = NULL;
				171	if (WebRtcVad_Create(&handle) != apm_->kNoError) {
				172	handle = NULL;
				173	} else {
				174	assert(handle != NULL);
				175	}
				176
				177	return handle;
				178	}
				179
				180	int VoiceDetectionImpl::DestroyHandle(void* handle) const {
				181	return WebRtcVad_Free(static_cast<Handle*>(handle));
				182	}
				183
				184	int VoiceDetectionImpl::InitializeHandle(void* handle) const {
				185	return WebRtcVad_Init(static_cast<Handle*>(handle));
				186	}
				187
				188	int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
				189	return WebRtcVad_set_mode(static_cast<Handle*>(handle),
				190	MapSetting(likelihood_));
				191	}
				192
				193	int VoiceDetectionImpl::num_handles_required() const {
				194	return 1;
				195	}
				196
				197	int VoiceDetectionImpl::GetHandleError(void* handle) const {
				198	// The VAD has no get_error() function.
				199	assert(handle != NULL);
				200	return apm_->kUnspecifiedError;
				201	}
				202	} // namespace webrtc