Alex Loiko | 1e48e80 | 2018-03-28 09:45:29 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | #include "modules/audio_processing/agc2/saturation_protector.h" |
| 12 | |
Alessio Bazzica | 980c460 | 2021-04-14 19:09:17 +0200 | [diff] [blame] | 13 | #include <memory> |
| 14 | |
| 15 | #include "modules/audio_processing/agc2/agc2_common.h" |
| 16 | #include "modules/audio_processing/agc2/saturation_protector_buffer.h" |
Alex Loiko | 1e48e80 | 2018-03-28 09:45:29 +0200 | [diff] [blame] | 17 | #include "modules/audio_processing/logging/apm_data_dumper.h" |
Alessio Bazzica | 980c460 | 2021-04-14 19:09:17 +0200 | [diff] [blame] | 18 | #include "rtc_base/checks.h" |
Alex Loiko | 1e48e80 | 2018-03-28 09:45:29 +0200 | [diff] [blame] | 19 | #include "rtc_base/numerics/safe_minmax.h" |
| 20 | |
| 21 | namespace webrtc { |
Alex Loiko | 9917c4a | 2018-04-04 14:16:10 +0200 | [diff] [blame] | 22 | namespace { |
Alessio Bazzica | 10f6ead | 2020-09-22 14:44:06 +0200 | [diff] [blame] | 23 | |
Alessio Bazzica | 980c460 | 2021-04-14 19:09:17 +0200 | [diff] [blame] | 24 | constexpr int kPeakEnveloperSuperFrameLengthMs = 400; |
| 25 | constexpr float kMinMarginDb = 12.0f; |
| 26 | constexpr float kMaxMarginDb = 25.0f; |
| 27 | constexpr float kAttack = 0.9988493699365052f; |
| 28 | constexpr float kDecay = 0.9997697679981565f; |
Alessio Bazzica | 10f6ead | 2020-09-22 14:44:06 +0200 | [diff] [blame] | 29 | |
Alessio Bazzica | 980c460 | 2021-04-14 19:09:17 +0200 | [diff] [blame] | 30 | // Saturation protector state. Defined outside of `SaturationProtectorImpl` to |
| 31 | // implement check-point and restore ops. |
| 32 | struct SaturationProtectorState { |
| 33 | bool operator==(const SaturationProtectorState& s) const { |
| 34 | return headroom_db == s.headroom_db && |
| 35 | peak_delay_buffer == s.peak_delay_buffer && |
| 36 | max_peaks_dbfs == s.max_peaks_dbfs && |
| 37 | time_since_push_ms == s.time_since_push_ms; |
Alessio Bazzica | 56f63c3 | 2020-09-29 11:56:38 +0200 | [diff] [blame] | 38 | } |
Alessio Bazzica | 980c460 | 2021-04-14 19:09:17 +0200 | [diff] [blame] | 39 | inline bool operator!=(const SaturationProtectorState& s) const { |
| 40 | return !(*this == s); |
Alessio Bazzica | 56f63c3 | 2020-09-29 11:56:38 +0200 | [diff] [blame] | 41 | } |
Alessio Bazzica | 56f63c3 | 2020-09-29 11:56:38 +0200 | [diff] [blame] | 42 | |
Alessio Bazzica | 980c460 | 2021-04-14 19:09:17 +0200 | [diff] [blame] | 43 | float headroom_db; |
| 44 | SaturationProtectorBuffer peak_delay_buffer; |
| 45 | float max_peaks_dbfs; |
| 46 | int time_since_push_ms; // Time since the last ring buffer push operation. |
| 47 | }; |
Alessio Bazzica | 10f6ead | 2020-09-22 14:44:06 +0200 | [diff] [blame] | 48 | |
Alessio Bazzica | 980c460 | 2021-04-14 19:09:17 +0200 | [diff] [blame] | 49 | // Resets the saturation protector state. |
| 50 | void ResetSaturationProtectorState(float initial_headroom_db, |
Alessio Bazzica | 56f63c3 | 2020-09-29 11:56:38 +0200 | [diff] [blame] | 51 | SaturationProtectorState& state) { |
Alessio Bazzica | 980c460 | 2021-04-14 19:09:17 +0200 | [diff] [blame] | 52 | state.headroom_db = initial_headroom_db; |
Alessio Bazzica | 56f63c3 | 2020-09-29 11:56:38 +0200 | [diff] [blame] | 53 | state.peak_delay_buffer.Reset(); |
| 54 | state.max_peaks_dbfs = kMinLevelDbfs; |
| 55 | state.time_since_push_ms = 0; |
Alessio Bazzica | 736ff83 | 2020-09-23 09:04:48 +0200 | [diff] [blame] | 56 | } |
| 57 | |
Alessio Bazzica | 980c460 | 2021-04-14 19:09:17 +0200 | [diff] [blame] | 58 | // Updates `state` by analyzing the estimated speech level `speech_level_dbfs` |
| 59 | // and the peak level `peak_dbfs` for an observed frame. `state` must not be |
| 60 | // modified without calling this function. |
| 61 | void UpdateSaturationProtectorState(float peak_dbfs, |
Alessio Bazzica | 56f63c3 | 2020-09-29 11:56:38 +0200 | [diff] [blame] | 62 | float speech_level_dbfs, |
| 63 | SaturationProtectorState& state) { |
Alessio Bazzica | 736ff83 | 2020-09-23 09:04:48 +0200 | [diff] [blame] | 64 | // Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms. |
Alessio Bazzica | 980c460 | 2021-04-14 19:09:17 +0200 | [diff] [blame] | 65 | state.max_peaks_dbfs = std::max(state.max_peaks_dbfs, peak_dbfs); |
Alessio Bazzica | 56f63c3 | 2020-09-29 11:56:38 +0200 | [diff] [blame] | 66 | state.time_since_push_ms += kFrameDurationMs; |
| 67 | if (rtc::SafeGt(state.time_since_push_ms, kPeakEnveloperSuperFrameLengthMs)) { |
| 68 | // Push `max_peaks_dbfs` back into the ring buffer. |
| 69 | state.peak_delay_buffer.PushBack(state.max_peaks_dbfs); |
Alessio Bazzica | 736ff83 | 2020-09-23 09:04:48 +0200 | [diff] [blame] | 70 | // Reset. |
Alessio Bazzica | 56f63c3 | 2020-09-29 11:56:38 +0200 | [diff] [blame] | 71 | state.max_peaks_dbfs = kMinLevelDbfs; |
| 72 | state.time_since_push_ms = 0; |
Alessio Bazzica | 736ff83 | 2020-09-23 09:04:48 +0200 | [diff] [blame] | 73 | } |
| 74 | |
Alessio Bazzica | 980c460 | 2021-04-14 19:09:17 +0200 | [diff] [blame] | 75 | // Update the headroom by comparing the estimated speech level and the delayed |
| 76 | // max speech peak. |
Alessio Bazzica | 56f63c3 | 2020-09-29 11:56:38 +0200 | [diff] [blame] | 77 | const float delayed_peak_dbfs = |
| 78 | state.peak_delay_buffer.Front().value_or(state.max_peaks_dbfs); |
| 79 | const float difference_db = delayed_peak_dbfs - speech_level_dbfs; |
Alessio Bazzica | 980c460 | 2021-04-14 19:09:17 +0200 | [diff] [blame] | 80 | if (difference_db > state.headroom_db) { |
Alessio Bazzica | 56f63c3 | 2020-09-29 11:56:38 +0200 | [diff] [blame] | 81 | // Attack. |
Alessio Bazzica | 980c460 | 2021-04-14 19:09:17 +0200 | [diff] [blame] | 82 | state.headroom_db = |
| 83 | state.headroom_db * kAttack + difference_db * (1.0f - kAttack); |
Alessio Bazzica | 736ff83 | 2020-09-23 09:04:48 +0200 | [diff] [blame] | 84 | } else { |
Alessio Bazzica | 56f63c3 | 2020-09-29 11:56:38 +0200 | [diff] [blame] | 85 | // Decay. |
Alessio Bazzica | 980c460 | 2021-04-14 19:09:17 +0200 | [diff] [blame] | 86 | state.headroom_db = |
| 87 | state.headroom_db * kDecay + difference_db * (1.0f - kDecay); |
Alessio Bazzica | 736ff83 | 2020-09-23 09:04:48 +0200 | [diff] [blame] | 88 | } |
| 89 | |
Alessio Bazzica | 980c460 | 2021-04-14 19:09:17 +0200 | [diff] [blame] | 90 | state.headroom_db = |
| 91 | rtc::SafeClamp<float>(state.headroom_db, kMinMarginDb, kMaxMarginDb); |
| 92 | } |
| 93 | |
| 94 | // Saturation protector which recommends a headroom based on the recent peaks. |
| 95 | class SaturationProtectorImpl : public SaturationProtector { |
| 96 | public: |
| 97 | explicit SaturationProtectorImpl(float initial_headroom_db, |
| 98 | float extra_headroom_db, |
| 99 | int adjacent_speech_frames_threshold, |
| 100 | ApmDataDumper* apm_data_dumper) |
| 101 | : apm_data_dumper_(apm_data_dumper), |
| 102 | initial_headroom_db_(initial_headroom_db), |
| 103 | extra_headroom_db_(extra_headroom_db), |
| 104 | adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) { |
| 105 | Reset(); |
| 106 | } |
| 107 | SaturationProtectorImpl(const SaturationProtectorImpl&) = delete; |
| 108 | SaturationProtectorImpl& operator=(const SaturationProtectorImpl&) = delete; |
| 109 | ~SaturationProtectorImpl() = default; |
| 110 | |
| 111 | float HeadroomDb() override { return headroom_db_; } |
| 112 | |
| 113 | void Analyze(float speech_probability, |
| 114 | float peak_dbfs, |
| 115 | float speech_level_dbfs) override { |
| 116 | if (speech_probability < kVadConfidenceThreshold) { |
| 117 | // Not a speech frame. |
| 118 | if (adjacent_speech_frames_threshold_ > 1) { |
| 119 | // When two or more adjacent speech frames are required in order to |
| 120 | // update the state, we need to decide whether to discard or confirm the |
| 121 | // updates based on the speech sequence length. |
| 122 | if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) { |
| 123 | // First non-speech frame after a long enough sequence of speech |
| 124 | // frames. Update the reliable state. |
| 125 | reliable_state_ = preliminary_state_; |
| 126 | } else if (num_adjacent_speech_frames_ > 0) { |
| 127 | // First non-speech frame after a too short sequence of speech frames. |
| 128 | // Reset to the last reliable state. |
| 129 | preliminary_state_ = reliable_state_; |
| 130 | } |
| 131 | } |
| 132 | num_adjacent_speech_frames_ = 0; |
| 133 | } else { |
| 134 | // Speech frame observed. |
| 135 | num_adjacent_speech_frames_++; |
| 136 | |
| 137 | // Update preliminary level estimate. |
| 138 | UpdateSaturationProtectorState(peak_dbfs, speech_level_dbfs, |
| 139 | preliminary_state_); |
| 140 | |
| 141 | if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) { |
| 142 | // `preliminary_state_` is now reliable. Update the headroom. |
| 143 | headroom_db_ = preliminary_state_.headroom_db + extra_headroom_db_; |
| 144 | } |
| 145 | } |
| 146 | DumpDebugData(); |
| 147 | } |
| 148 | |
| 149 | void Reset() override { |
| 150 | num_adjacent_speech_frames_ = 0; |
| 151 | headroom_db_ = initial_headroom_db_ + extra_headroom_db_; |
| 152 | ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_); |
| 153 | ResetSaturationProtectorState(initial_headroom_db_, reliable_state_); |
| 154 | } |
| 155 | |
| 156 | private: |
| 157 | void DumpDebugData() { |
| 158 | apm_data_dumper_->DumpRaw( |
| 159 | "agc2_saturation_protector_preliminary_max_peak_dbfs", |
| 160 | preliminary_state_.max_peaks_dbfs); |
| 161 | apm_data_dumper_->DumpRaw( |
| 162 | "agc2_saturation_protector_reliable_max_peak_dbfs", |
| 163 | reliable_state_.max_peaks_dbfs); |
| 164 | } |
| 165 | |
| 166 | ApmDataDumper* const apm_data_dumper_; |
| 167 | const float initial_headroom_db_; |
| 168 | const float extra_headroom_db_; |
| 169 | const int adjacent_speech_frames_threshold_; |
| 170 | int num_adjacent_speech_frames_; |
| 171 | float headroom_db_; |
| 172 | SaturationProtectorState preliminary_state_; |
| 173 | SaturationProtectorState reliable_state_; |
| 174 | }; |
| 175 | |
| 176 | } // namespace |
| 177 | |
| 178 | std::unique_ptr<SaturationProtector> CreateSaturationProtector( |
| 179 | float initial_headroom_db, |
| 180 | float extra_headroom_db, |
| 181 | int adjacent_speech_frames_threshold, |
| 182 | ApmDataDumper* apm_data_dumper) { |
| 183 | return std::make_unique<SaturationProtectorImpl>( |
| 184 | initial_headroom_db, extra_headroom_db, adjacent_speech_frames_threshold, |
| 185 | apm_data_dumper); |
Alex Loiko | 9917c4a | 2018-04-04 14:16:10 +0200 | [diff] [blame] | 186 | } |
| 187 | |
Alex Loiko | 1e48e80 | 2018-03-28 09:45:29 +0200 | [diff] [blame] | 188 | } // namespace webrtc |