blob: b0ad868d4b97c4a6b5360ece1363aa056be0516b [file] [log] [blame]
Alex Loiko2bac8962018-03-27 13:38:36 +02001/*
2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Alex Loikodb6af362018-06-20 14:14:18 +020011#ifndef MODULES_AUDIO_PROCESSING_AGC2_VAD_WITH_LEVEL_H_
12#define MODULES_AUDIO_PROCESSING_AGC2_VAD_WITH_LEVEL_H_
Alex Loiko2bac8962018-03-27 13:38:36 +020013
Alex Loikodb6af362018-06-20 14:14:18 +020014#include "common_audio/resampler/include/push_resampler.h"
15#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h"
16#include "modules/audio_processing/agc2/rnn_vad/rnn.h"
Alex Loiko2bac8962018-03-27 13:38:36 +020017#include "modules/audio_processing/include/audio_frame_view.h"
18
19namespace webrtc {
20class VadWithLevel {
21 public:
22 struct LevelAndProbability {
23 constexpr LevelAndProbability(float prob, float rms, float peak)
24 : speech_probability(prob),
25 speech_rms_dbfs(rms),
26 speech_peak_dbfs(peak) {}
27 LevelAndProbability() = default;
28 float speech_probability = 0;
29 float speech_rms_dbfs = 0; // Root mean square in decibels to full-scale.
30 float speech_peak_dbfs = 0;
31 };
32
Alex Loikodb6af362018-06-20 14:14:18 +020033 VadWithLevel();
34 ~VadWithLevel();
35
36 LevelAndProbability AnalyzeFrame(AudioFrameView<const float> frame);
37
38 private:
39 void SetSampleRate(int sample_rate_hz);
40
41 rnn_vad::RnnBasedVad rnn_vad_;
42 rnn_vad::FeaturesExtractor features_extractor_;
43 PushResampler<float> resampler_;
Alex Loiko2bac8962018-03-27 13:38:36 +020044};
45
46} // namespace webrtc
47
Alex Loikodb6af362018-06-20 14:14:18 +020048#endif // MODULES_AUDIO_PROCESSING_AGC2_VAD_WITH_LEVEL_H_