Blame - modules/audio_processing/agc2/vad_with_level.cc - webrtc.googlesource.com/src

blob: 52970dfe677c3d5e71efc2fd2e87ab4ec4a35203 [file] [log] [blame]

Alex Loiko	db6af36	2018-06-20 14:14:18 +0200	[diff] [blame]	1	/*
				2	* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
				11	#include "modules/audio_processing/agc2/vad_with_level.h"
				12
				13	#include <algorithm>
Yves Gerey	988cc08	2018-10-23 12:03:01 +0200	[diff] [blame^]	14	#include <array>
				15	#include <cmath>
Alex Loiko	db6af36	2018-06-20 14:14:18 +0200	[diff] [blame]	16
Yves Gerey	988cc08	2018-10-23 12:03:01 +0200	[diff] [blame^]	17	#include "api/array_view.h"
Alex Loiko	db6af36	2018-06-20 14:14:18 +0200	[diff] [blame]	18	#include "common_audio/include/audio_util.h"
				19	#include "modules/audio_processing/agc2/rnn_vad/common.h"
Alex Loiko	db6af36	2018-06-20 14:14:18 +0200	[diff] [blame]	20
				21	namespace webrtc {
				22
				23	namespace {
				24	float ProcessForPeak(AudioFrameView<const float> frame) {
				25	float current_max = 0;
				26	for (const auto& x : frame.channel(0)) {
				27	current_max = std::max(std::fabs(x), current_max);
				28	}
				29	return current_max;
				30	}
				31
				32	float ProcessForRms(AudioFrameView<const float> frame) {
				33	float rms = 0;
				34	for (const auto& x : frame.channel(0)) {
				35	rms += x * x;
				36	}
				37	return sqrt(rms / frame.samples_per_channel());
				38	}
				39	} // namespace
				40
				41	VadWithLevel::VadWithLevel() = default;
				42	VadWithLevel::~VadWithLevel() = default;
				43
				44	VadWithLevel::LevelAndProbability VadWithLevel::AnalyzeFrame(
				45	AudioFrameView<const float> frame) {
				46	SetSampleRate(static_cast<int>(frame.samples_per_channel() * 100));
				47	std::array<float, rnn_vad::kFrameSize10ms24kHz> work_frame;
				48	// Feed the 1st channel to the resampler.
				49	resampler_.Resample(frame.channel(0).data(), frame.samples_per_channel(),
				50	work_frame.data(), rnn_vad::kFrameSize10ms24kHz);
				51
				52	std::array<float, rnn_vad::kFeatureVectorSize> feature_vector;
				53
				54	const bool is_silence = features_extractor_.CheckSilenceComputeFeatures(
				55	work_frame, feature_vector);
				56	const float vad_probability =
				57	rnn_vad_.ComputeVadProbability(feature_vector, is_silence);
				58	return LevelAndProbability(vad_probability,
				59	FloatS16ToDbfs(ProcessForRms(frame)),
				60	FloatS16ToDbfs(ProcessForPeak(frame)));
				61	}
				62
				63	void VadWithLevel::SetSampleRate(int sample_rate_hz) {
				64	// The source number of channels in 1, because we always use the 1st
				65	// channel.
				66	resampler_.InitializeIfNeeded(sample_rate_hz, rnn_vad::kSampleRate24kHz,
				67	1 /* num_channels */);
				68	}
				69
				70	} // namespace webrtc