blob: 1f27b294e552398133589012a7f98cd24b06d388 [file] [log] [blame]
pbos@webrtc.org788acd12014-12-15 09:41:24 +00001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
aluebsecf6b812015-06-25 12:28:48 -070011#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
12#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
pbos@webrtc.org788acd12014-12-15 09:41:24 +000013
kwibergdabf07f2016-02-17 07:59:48 -080014#include <memory>
15
aluebsecf6b812015-06-25 12:28:48 -070016#include "webrtc/modules/audio_processing/vad/common.h"
pbos@webrtc.org788acd12014-12-15 09:41:24 +000017#include "webrtc/typedefs.h"
18
19namespace webrtc {
20
21class AudioFrame;
22class PoleZeroFilter;
23
aluebsecf6b812015-06-25 12:28:48 -070024class VadAudioProc {
pbos@webrtc.org788acd12014-12-15 09:41:24 +000025 public:
26 // Forward declare iSAC structs.
27 struct PitchAnalysisStruct;
28 struct PreFiltBankstr;
29
aluebsecf6b812015-06-25 12:28:48 -070030 VadAudioProc();
31 ~VadAudioProc();
pbos@webrtc.org788acd12014-12-15 09:41:24 +000032
33 int ExtractFeatures(const int16_t* audio_frame,
Peter Kastingdce40cf2015-08-24 14:52:23 -070034 size_t length,
pbos@webrtc.org788acd12014-12-15 09:41:24 +000035 AudioFeatures* audio_features);
36
Peter Kastingdce40cf2015-08-24 14:52:23 -070037 static const size_t kDftSize = 512;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000038
39 private:
Peter Kastingdce40cf2015-08-24 14:52:23 -070040 void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length);
41 void SubframeCorrelation(double* corr,
42 size_t length_corr,
43 size_t subframe_index);
44 void GetLpcPolynomials(double* lpc, size_t length_lpc);
45 void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak);
46 void Rms(double* rms, size_t length_rms);
pbos@webrtc.org788acd12014-12-15 09:41:24 +000047 void ResetBuffer();
48
49 // To compute spectral peak we perform LPC analysis to get spectral envelope.
50 // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
51 // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
52 // we need 5 ms of past signal to create the input of LPC analysis.
kwiberg9e2be5f2016-09-14 05:23:22 -070053 enum : size_t {
54 kNumPastSignalSamples = static_cast<size_t>(kSampleRateHz / 200)
55 };
pbos@webrtc.org788acd12014-12-15 09:41:24 +000056
57 // TODO(turajs): maybe defining this at a higher level (maybe enum) so that
58 // all the code recognize it as "no-error."
kwiberg9e2be5f2016-09-14 05:23:22 -070059 enum : int { kNoError = 0 };
pbos@webrtc.org788acd12014-12-15 09:41:24 +000060
kwiberg9e2be5f2016-09-14 05:23:22 -070061 enum : size_t { kNum10msSubframes = 3 };
62 enum : size_t {
63 kNumSubframeSamples = static_cast<size_t>(kSampleRateHz / 100)
64 };
65 enum : size_t {
66 // Samples in 30 ms @ given sampling rate.
67 kNumSamplesToProcess = kNum10msSubframes * kNumSubframeSamples
68 };
69 enum : size_t {
70 kBufferLength = kNumPastSignalSamples + kNumSamplesToProcess
71 };
72 enum : size_t { kIpLength = kDftSize >> 1 };
73 enum : size_t { kWLength = kDftSize >> 1 };
74 enum : size_t { kLpcOrder = 16 };
pbos@webrtc.org788acd12014-12-15 09:41:24 +000075
Peter Kastingdce40cf2015-08-24 14:52:23 -070076 size_t ip_[kIpLength];
pbos@webrtc.org788acd12014-12-15 09:41:24 +000077 float w_fft_[kWLength];
78
79 // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ).
80 float audio_buffer_[kBufferLength];
Peter Kastingdce40cf2015-08-24 14:52:23 -070081 size_t num_buffer_samples_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000082
83 double log_old_gain_;
84 double old_lag_;
85
kwibergdabf07f2016-02-17 07:59:48 -080086 std::unique_ptr<PitchAnalysisStruct> pitch_analysis_handle_;
87 std::unique_ptr<PreFiltBankstr> pre_filter_handle_;
88 std::unique_ptr<PoleZeroFilter> high_pass_filter_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000089};
90
91} // namespace webrtc
92
aluebsecf6b812015-06-25 12:28:48 -070093#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_