Alex Loiko | 2bac896 | 2018-03-27 13:38:36 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
Alex Loiko | db6af36 | 2018-06-20 14:14:18 +0200 | [diff] [blame] | 11 | #ifndef MODULES_AUDIO_PROCESSING_AGC2_VAD_WITH_LEVEL_H_ |
| 12 | #define MODULES_AUDIO_PROCESSING_AGC2_VAD_WITH_LEVEL_H_ |
Alex Loiko | 2bac896 | 2018-03-27 13:38:36 +0200 | [diff] [blame] | 13 | |
Alessio Bazzica | 530781d | 2020-09-25 13:24:36 +0200 | [diff] [blame] | 14 | #include <memory> |
| 15 | |
Alessio Bazzica | 253f836 | 2020-11-27 16:02:38 +0100 | [diff] [blame] | 16 | #include "modules/audio_processing/agc2/cpu_features.h" |
Alex Loiko | 2bac896 | 2018-03-27 13:38:36 +0200 | [diff] [blame] | 17 | #include "modules/audio_processing/include/audio_frame_view.h" |
| 18 | |
| 19 | namespace webrtc { |
Alessio Bazzica | 530781d | 2020-09-25 13:24:36 +0200 | [diff] [blame] | 20 | |
| 21 | // Class to analyze voice activity and audio levels. |
| 22 | class VadLevelAnalyzer { |
Alex Loiko | 2bac896 | 2018-03-27 13:38:36 +0200 | [diff] [blame] | 23 | public: |
Alessio Bazzica | 530781d | 2020-09-25 13:24:36 +0200 | [diff] [blame] | 24 | struct Result { |
| 25 | float speech_probability; // Range: [0, 1]. |
| 26 | float rms_dbfs; // Root mean square power (dBFS). |
| 27 | float peak_dbfs; // Peak power (dBFS). |
Alex Loiko | 2bac896 | 2018-03-27 13:38:36 +0200 | [diff] [blame] | 28 | }; |
| 29 | |
Alessio Bazzica | 530781d | 2020-09-25 13:24:36 +0200 | [diff] [blame] | 30 | // Voice Activity Detector (VAD) interface. |
| 31 | class VoiceActivityDetector { |
| 32 | public: |
| 33 | virtual ~VoiceActivityDetector() = default; |
Alessio Bazzica | 841d74e | 2021-03-31 15:04:03 +0200 | [diff] [blame] | 34 | // Resets the internal state. |
| 35 | virtual void Reset() = 0; |
Alessio Bazzica | 530781d | 2020-09-25 13:24:36 +0200 | [diff] [blame] | 36 | // Analyzes an audio frame and returns the speech probability. |
| 37 | virtual float ComputeProbability(AudioFrameView<const float> frame) = 0; |
| 38 | }; |
Alex Loiko | db6af36 | 2018-06-20 14:14:18 +0200 | [diff] [blame] | 39 | |
Alessio Bazzica | 841d74e | 2021-03-31 15:04:03 +0200 | [diff] [blame] | 40 | // Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call |
| 41 | // `VadLevelAnalyzer::Reset()`; it must be equal to or greater than the |
Alessio Bazzica | 980c460 | 2021-04-14 19:09:17 +0200 | [diff] [blame] | 42 | // duration of two frames. Uses `cpu_features` to instantiate the default VAD. |
Alessio Bazzica | 841d74e | 2021-03-31 15:04:03 +0200 | [diff] [blame] | 43 | VadLevelAnalyzer(int vad_reset_period_ms, |
Alessio Bazzica | 253f836 | 2020-11-27 16:02:38 +0100 | [diff] [blame] | 44 | const AvailableCpuFeatures& cpu_features); |
Alessio Bazzica | 530781d | 2020-09-25 13:24:36 +0200 | [diff] [blame] | 45 | // Ctor. Uses a custom `vad`. |
Alessio Bazzica | 841d74e | 2021-03-31 15:04:03 +0200 | [diff] [blame] | 46 | VadLevelAnalyzer(int vad_reset_period_ms, |
Alessio Bazzica | c1ece01 | 2020-09-25 14:31:17 +0200 | [diff] [blame] | 47 | std::unique_ptr<VoiceActivityDetector> vad); |
Alessio Bazzica | 841d74e | 2021-03-31 15:04:03 +0200 | [diff] [blame] | 48 | |
Alessio Bazzica | 530781d | 2020-09-25 13:24:36 +0200 | [diff] [blame] | 49 | VadLevelAnalyzer(const VadLevelAnalyzer&) = delete; |
| 50 | VadLevelAnalyzer& operator=(const VadLevelAnalyzer&) = delete; |
| 51 | ~VadLevelAnalyzer(); |
| 52 | |
| 53 | // Computes the speech probability and the level for `frame`. |
| 54 | Result AnalyzeFrame(AudioFrameView<const float> frame); |
Alex Loiko | db6af36 | 2018-06-20 14:14:18 +0200 | [diff] [blame] | 55 | |
| 56 | private: |
Alessio Bazzica | 530781d | 2020-09-25 13:24:36 +0200 | [diff] [blame] | 57 | std::unique_ptr<VoiceActivityDetector> vad_; |
Alessio Bazzica | 841d74e | 2021-03-31 15:04:03 +0200 | [diff] [blame] | 58 | const int vad_reset_period_frames_; |
Alessio Bazzica | 841d74e | 2021-03-31 15:04:03 +0200 | [diff] [blame] | 59 | int time_to_vad_reset_; |
Alex Loiko | 2bac896 | 2018-03-27 13:38:36 +0200 | [diff] [blame] | 60 | }; |
| 61 | |
| 62 | } // namespace webrtc |
| 63 | |
Alex Loiko | db6af36 | 2018-06-20 14:14:18 +0200 | [diff] [blame] | 64 | #endif // MODULES_AUDIO_PROCESSING_AGC2_VAD_WITH_LEVEL_H_ |