blob: cbdd7071296cd2f7751849e27d9b226e7ca850a0 [file] [log] [blame]
pbos@webrtc.org788acd12014-12-15 09:41:24 +00001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
12#define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
pbos@webrtc.org788acd12014-12-15 09:41:24 +000013
Yves Gerey988cc082018-10-23 12:03:01 +020014#include <stddef.h>
15#include <stdint.h>
Jonas Olssona4d87372019-07-05 19:08:33 +020016
kwibergdabf07f2016-02-17 07:59:48 -080017#include <memory>
18
Yves Gerey988cc082018-10-23 12:03:01 +020019#include "modules/audio_processing/vad/common.h" // AudioFeatures, kSampleR...
pbos@webrtc.org788acd12014-12-15 09:41:24 +000020
21namespace webrtc {
22
pbos@webrtc.org788acd12014-12-15 09:41:24 +000023class PoleZeroFilter;
24
aluebsecf6b812015-06-25 12:28:48 -070025class VadAudioProc {
pbos@webrtc.org788acd12014-12-15 09:41:24 +000026 public:
27 // Forward declare iSAC structs.
28 struct PitchAnalysisStruct;
29 struct PreFiltBankstr;
30
aluebsecf6b812015-06-25 12:28:48 -070031 VadAudioProc();
32 ~VadAudioProc();
pbos@webrtc.org788acd12014-12-15 09:41:24 +000033
34 int ExtractFeatures(const int16_t* audio_frame,
Peter Kastingdce40cf2015-08-24 14:52:23 -070035 size_t length,
pbos@webrtc.org788acd12014-12-15 09:41:24 +000036 AudioFeatures* audio_features);
37
Peter Kasting662d7f12022-05-04 12:57:00 -070038 static constexpr size_t kDftSize = 512;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000039
40 private:
Peter Kastingdce40cf2015-08-24 14:52:23 -070041 void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length);
42 void SubframeCorrelation(double* corr,
43 size_t length_corr,
44 size_t subframe_index);
45 void GetLpcPolynomials(double* lpc, size_t length_lpc);
46 void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak);
47 void Rms(double* rms, size_t length_rms);
pbos@webrtc.org788acd12014-12-15 09:41:24 +000048 void ResetBuffer();
49
50 // To compute spectral peak we perform LPC analysis to get spectral envelope.
51 // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
52 // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
53 // we need 5 ms of past signal to create the input of LPC analysis.
Peter Kasting662d7f12022-05-04 12:57:00 -070054 static constexpr size_t kNumPastSignalSamples =
55 static_cast<size_t>(kSampleRateHz / 200);
pbos@webrtc.org788acd12014-12-15 09:41:24 +000056
57 // TODO(turajs): maybe defining this at a higher level (maybe enum) so that
58 // all the code recognize it as "no-error."
Peter Kasting662d7f12022-05-04 12:57:00 -070059 static constexpr int kNoError = 0;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000060
Peter Kasting662d7f12022-05-04 12:57:00 -070061 static constexpr size_t kNum10msSubframes = 3;
62 static constexpr size_t kNumSubframeSamples =
63 static_cast<size_t>(kSampleRateHz / 100);
64 // Samples in 30 ms @ given sampling rate.
65 static constexpr size_t kNumSamplesToProcess =
66 size_t{kNum10msSubframes} * kNumSubframeSamples;
67 static constexpr size_t kBufferLength =
68 size_t{kNumPastSignalSamples} + kNumSamplesToProcess;
69 static constexpr size_t kIpLength = kDftSize >> 1;
70 static constexpr size_t kWLength = kDftSize >> 1;
71 static constexpr size_t kLpcOrder = 16;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000072
Peter Kastingdce40cf2015-08-24 14:52:23 -070073 size_t ip_[kIpLength];
pbos@webrtc.org788acd12014-12-15 09:41:24 +000074 float w_fft_[kWLength];
75
76 // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ).
77 float audio_buffer_[kBufferLength];
Peter Kastingdce40cf2015-08-24 14:52:23 -070078 size_t num_buffer_samples_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000079
80 double log_old_gain_;
81 double old_lag_;
82
kwibergdabf07f2016-02-17 07:59:48 -080083 std::unique_ptr<PitchAnalysisStruct> pitch_analysis_handle_;
84 std::unique_ptr<PreFiltBankstr> pre_filter_handle_;
85 std::unique_ptr<PoleZeroFilter> high_pass_filter_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000086};
87
88} // namespace webrtc
89
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020090#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_