blob: 67a00ced6c1060966991db4b8ec83d59cda0a825 [file] [log] [blame]
Alex Loiko2bac8962018-03-27 13:38:36 +02001/*
2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Alex Loikodb6af362018-06-20 14:14:18 +020011#ifndef MODULES_AUDIO_PROCESSING_AGC2_VAD_WITH_LEVEL_H_
12#define MODULES_AUDIO_PROCESSING_AGC2_VAD_WITH_LEVEL_H_
Alex Loiko2bac8962018-03-27 13:38:36 +020013
14#include "api/array_view.h"
Alex Loikodb6af362018-06-20 14:14:18 +020015#include "common_audio/resampler/include/push_resampler.h"
16#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h"
17#include "modules/audio_processing/agc2/rnn_vad/rnn.h"
Alex Loiko2bac8962018-03-27 13:38:36 +020018#include "modules/audio_processing/include/audio_frame_view.h"
19
20namespace webrtc {
21class VadWithLevel {
22 public:
23 struct LevelAndProbability {
24 constexpr LevelAndProbability(float prob, float rms, float peak)
25 : speech_probability(prob),
26 speech_rms_dbfs(rms),
27 speech_peak_dbfs(peak) {}
28 LevelAndProbability() = default;
29 float speech_probability = 0;
30 float speech_rms_dbfs = 0; // Root mean square in decibels to full-scale.
31 float speech_peak_dbfs = 0;
32 };
33
Alex Loikodb6af362018-06-20 14:14:18 +020034 VadWithLevel();
35 ~VadWithLevel();
36
37 LevelAndProbability AnalyzeFrame(AudioFrameView<const float> frame);
38
39 private:
40 void SetSampleRate(int sample_rate_hz);
41
42 rnn_vad::RnnBasedVad rnn_vad_;
43 rnn_vad::FeaturesExtractor features_extractor_;
44 PushResampler<float> resampler_;
Alex Loiko2bac8962018-03-27 13:38:36 +020045};
46
47} // namespace webrtc
48
Alex Loikodb6af362018-06-20 14:14:18 +020049#endif // MODULES_AUDIO_PROCESSING_AGC2_VAD_WITH_LEVEL_H_