blob: 8d2ae45762ed6dd52d0cacb19c21d76a71a7427a [file] [log] [blame]
Alex Loiko2bac8962018-03-27 13:38:36 +02001/*
2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Alex Loikodb6af362018-06-20 14:14:18 +020011#ifndef MODULES_AUDIO_PROCESSING_AGC2_VAD_WITH_LEVEL_H_
12#define MODULES_AUDIO_PROCESSING_AGC2_VAD_WITH_LEVEL_H_
Alex Loiko2bac8962018-03-27 13:38:36 +020013
Alessio Bazzica530781d2020-09-25 13:24:36 +020014#include <memory>
15
Alessio Bazzica253f8362020-11-27 16:02:38 +010016#include "modules/audio_processing/agc2/cpu_features.h"
Alex Loiko2bac8962018-03-27 13:38:36 +020017#include "modules/audio_processing/include/audio_frame_view.h"
18
19namespace webrtc {
Alessio Bazzica530781d2020-09-25 13:24:36 +020020
21// Class to analyze voice activity and audio levels.
22class VadLevelAnalyzer {
Alex Loiko2bac8962018-03-27 13:38:36 +020023 public:
Alessio Bazzica530781d2020-09-25 13:24:36 +020024 struct Result {
25 float speech_probability; // Range: [0, 1].
26 float rms_dbfs; // Root mean square power (dBFS).
27 float peak_dbfs; // Peak power (dBFS).
Alex Loiko2bac8962018-03-27 13:38:36 +020028 };
29
Alessio Bazzica530781d2020-09-25 13:24:36 +020030 // Voice Activity Detector (VAD) interface.
31 class VoiceActivityDetector {
32 public:
33 virtual ~VoiceActivityDetector() = default;
Alessio Bazzica841d74e2021-03-31 15:04:03 +020034 // Resets the internal state.
35 virtual void Reset() = 0;
Alessio Bazzica530781d2020-09-25 13:24:36 +020036 // Analyzes an audio frame and returns the speech probability.
37 virtual float ComputeProbability(AudioFrameView<const float> frame) = 0;
38 };
Alex Loikodb6af362018-06-20 14:14:18 +020039
Alessio Bazzica841d74e2021-03-31 15:04:03 +020040 // Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call
41 // `VadLevelAnalyzer::Reset()`; it must be equal to or greater than the
Alessio Bazzica980c4602021-04-14 19:09:17 +020042 // duration of two frames. Uses `cpu_features` to instantiate the default VAD.
Alessio Bazzica841d74e2021-03-31 15:04:03 +020043 VadLevelAnalyzer(int vad_reset_period_ms,
Alessio Bazzica253f8362020-11-27 16:02:38 +010044 const AvailableCpuFeatures& cpu_features);
Alessio Bazzica530781d2020-09-25 13:24:36 +020045 // Ctor. Uses a custom `vad`.
Alessio Bazzica841d74e2021-03-31 15:04:03 +020046 VadLevelAnalyzer(int vad_reset_period_ms,
Alessio Bazzicac1ece012020-09-25 14:31:17 +020047 std::unique_ptr<VoiceActivityDetector> vad);
Alessio Bazzica841d74e2021-03-31 15:04:03 +020048
Alessio Bazzica530781d2020-09-25 13:24:36 +020049 VadLevelAnalyzer(const VadLevelAnalyzer&) = delete;
50 VadLevelAnalyzer& operator=(const VadLevelAnalyzer&) = delete;
51 ~VadLevelAnalyzer();
52
53 // Computes the speech probability and the level for `frame`.
54 Result AnalyzeFrame(AudioFrameView<const float> frame);
Alex Loikodb6af362018-06-20 14:14:18 +020055
56 private:
Alessio Bazzica530781d2020-09-25 13:24:36 +020057 std::unique_ptr<VoiceActivityDetector> vad_;
Alessio Bazzica841d74e2021-03-31 15:04:03 +020058 const int vad_reset_period_frames_;
Alessio Bazzica841d74e2021-03-31 15:04:03 +020059 int time_to_vad_reset_;
Alex Loiko2bac8962018-03-27 13:38:36 +020060};
61
62} // namespace webrtc
63
Alex Loikodb6af362018-06-20 14:14:18 +020064#endif // MODULES_AUDIO_PROCESSING_AGC2_VAD_WITH_LEVEL_H_