Blame - modules/audio_processing/vad/vad_audio_proc.h - webrtc.googlesource.com/src

blob: cbdd7071296cd2f7751849e27d9b226e7ca850a0 [file] [log] [blame]

pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	11	#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
				12	#define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	13
Yves Gerey	988cc08	2018-10-23 12:03:01 +0200	[diff] [blame]	14	#include <stddef.h>
				15	#include <stdint.h>
Jonas Olsson	a4d8737	2019-07-05 19:08:33 +0200	[diff] [blame]	16
kwiberg	dabf07f	2016-02-17 07:59:48 -0800	[diff] [blame]	17	#include <memory>
				18
Yves Gerey	988cc08	2018-10-23 12:03:01 +0200	[diff] [blame]	19	#include "modules/audio_processing/vad/common.h" // AudioFeatures, kSampleR...
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	20
				21	namespace webrtc {
				22
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	23	class PoleZeroFilter;
				24
aluebs	ecf6b81	2015-06-25 12:28:48 -0700	[diff] [blame]	25	class VadAudioProc {
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	26	public:
				27	// Forward declare iSAC structs.
				28	struct PitchAnalysisStruct;
				29	struct PreFiltBankstr;
				30
aluebs	ecf6b81	2015-06-25 12:28:48 -0700	[diff] [blame]	31	VadAudioProc();
				32	~VadAudioProc();
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	33
				34	int ExtractFeatures(const int16_t* audio_frame,
Peter Kasting	dce40cf	2015-08-24 14:52:23 -0700	[diff] [blame]	35	size_t length,
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	36	AudioFeatures* audio_features);
				37
Peter Kasting	662d7f1	2022-05-04 12:57:00 -0700	[diff] [blame]	38	static constexpr size_t kDftSize = 512;
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	39
				40	private:
Peter Kasting	dce40cf	2015-08-24 14:52:23 -0700	[diff] [blame]	41	void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length);
				42	void SubframeCorrelation(double* corr,
				43	size_t length_corr,
				44	size_t subframe_index);
				45	void GetLpcPolynomials(double* lpc, size_t length_lpc);
				46	void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak);
				47	void Rms(double* rms, size_t length_rms);
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	48	void ResetBuffer();
				49
				50	// To compute spectral peak we perform LPC analysis to get spectral envelope.
				51	// For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
				52	// LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
				53	// we need 5 ms of past signal to create the input of LPC analysis.
Peter Kasting	662d7f1	2022-05-04 12:57:00 -0700	[diff] [blame]	54	static constexpr size_t kNumPastSignalSamples =
				55	static_cast<size_t>(kSampleRateHz / 200);
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	56
				57	// TODO(turajs): maybe defining this at a higher level (maybe enum) so that
				58	// all the code recognize it as "no-error."
Peter Kasting	662d7f1	2022-05-04 12:57:00 -0700	[diff] [blame]	59	static constexpr int kNoError = 0;
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	60
Peter Kasting	662d7f1	2022-05-04 12:57:00 -0700	[diff] [blame]	61	static constexpr size_t kNum10msSubframes = 3;
				62	static constexpr size_t kNumSubframeSamples =
				63	static_cast<size_t>(kSampleRateHz / 100);
				64	// Samples in 30 ms @ given sampling rate.
				65	static constexpr size_t kNumSamplesToProcess =
				66	size_t{kNum10msSubframes} * kNumSubframeSamples;
				67	static constexpr size_t kBufferLength =
				68	size_t{kNumPastSignalSamples} + kNumSamplesToProcess;
				69	static constexpr size_t kIpLength = kDftSize >> 1;
				70	static constexpr size_t kWLength = kDftSize >> 1;
				71	static constexpr size_t kLpcOrder = 16;
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	72
Peter Kasting	dce40cf	2015-08-24 14:52:23 -0700	[diff] [blame]	73	size_t ip_[kIpLength];
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	74	float w_fft_[kWLength];
				75
				76	// A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ).
				77	float audio_buffer_[kBufferLength];
Peter Kasting	dce40cf	2015-08-24 14:52:23 -0700	[diff] [blame]	78	size_t num_buffer_samples_;
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	79
				80	double log_old_gain_;
				81	double old_lag_;
				82
kwiberg	dabf07f	2016-02-17 07:59:48 -0800	[diff] [blame]	83	std::unique_ptr<PitchAnalysisStruct> pitch_analysis_handle_;
				84	std::unique_ptr<PreFiltBankstr> pre_filter_handle_;
				85	std::unique_ptr<PoleZeroFilter> high_pass_filter_;
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	86	};
				87
				88	} // namespace webrtc
				89
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	90	#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_