blob: d6f21ef891bcbc3c1f4a8a8951ab81a101d8e73d [file] [log] [blame]
Alex Loiko1e48e802018-03-28 09:45:29 +02001/*
2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "modules/audio_processing/agc2/saturation_protector.h"
12
Alessio Bazzica980c4602021-04-14 19:09:17 +020013#include <memory>
14
15#include "modules/audio_processing/agc2/agc2_common.h"
16#include "modules/audio_processing/agc2/saturation_protector_buffer.h"
Alex Loiko1e48e802018-03-28 09:45:29 +020017#include "modules/audio_processing/logging/apm_data_dumper.h"
Alessio Bazzica980c4602021-04-14 19:09:17 +020018#include "rtc_base/checks.h"
Alex Loiko1e48e802018-03-28 09:45:29 +020019#include "rtc_base/numerics/safe_minmax.h"
20
21namespace webrtc {
Alex Loiko9917c4a2018-04-04 14:16:10 +020022namespace {
Alessio Bazzica10f6ead2020-09-22 14:44:06 +020023
Alessio Bazzica980c4602021-04-14 19:09:17 +020024constexpr int kPeakEnveloperSuperFrameLengthMs = 400;
25constexpr float kMinMarginDb = 12.0f;
26constexpr float kMaxMarginDb = 25.0f;
27constexpr float kAttack = 0.9988493699365052f;
28constexpr float kDecay = 0.9997697679981565f;
Alessio Bazzica10f6ead2020-09-22 14:44:06 +020029
Alessio Bazzica980c4602021-04-14 19:09:17 +020030// Saturation protector state. Defined outside of `SaturationProtectorImpl` to
31// implement check-point and restore ops.
32struct SaturationProtectorState {
33 bool operator==(const SaturationProtectorState& s) const {
34 return headroom_db == s.headroom_db &&
35 peak_delay_buffer == s.peak_delay_buffer &&
36 max_peaks_dbfs == s.max_peaks_dbfs &&
37 time_since_push_ms == s.time_since_push_ms;
Alessio Bazzica56f63c32020-09-29 11:56:38 +020038 }
Alessio Bazzica980c4602021-04-14 19:09:17 +020039 inline bool operator!=(const SaturationProtectorState& s) const {
40 return !(*this == s);
Alessio Bazzica56f63c32020-09-29 11:56:38 +020041 }
Alessio Bazzica56f63c32020-09-29 11:56:38 +020042
Alessio Bazzica980c4602021-04-14 19:09:17 +020043 float headroom_db;
44 SaturationProtectorBuffer peak_delay_buffer;
45 float max_peaks_dbfs;
46 int time_since_push_ms; // Time since the last ring buffer push operation.
47};
Alessio Bazzica10f6ead2020-09-22 14:44:06 +020048
Alessio Bazzica980c4602021-04-14 19:09:17 +020049// Resets the saturation protector state.
50void ResetSaturationProtectorState(float initial_headroom_db,
Alessio Bazzica56f63c32020-09-29 11:56:38 +020051 SaturationProtectorState& state) {
Alessio Bazzica980c4602021-04-14 19:09:17 +020052 state.headroom_db = initial_headroom_db;
Alessio Bazzica56f63c32020-09-29 11:56:38 +020053 state.peak_delay_buffer.Reset();
54 state.max_peaks_dbfs = kMinLevelDbfs;
55 state.time_since_push_ms = 0;
Alessio Bazzica736ff832020-09-23 09:04:48 +020056}
57
Alessio Bazzica980c4602021-04-14 19:09:17 +020058// Updates `state` by analyzing the estimated speech level `speech_level_dbfs`
59// and the peak level `peak_dbfs` for an observed frame. `state` must not be
60// modified without calling this function.
61void UpdateSaturationProtectorState(float peak_dbfs,
Alessio Bazzica56f63c32020-09-29 11:56:38 +020062 float speech_level_dbfs,
63 SaturationProtectorState& state) {
Alessio Bazzica736ff832020-09-23 09:04:48 +020064 // Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms.
Alessio Bazzica980c4602021-04-14 19:09:17 +020065 state.max_peaks_dbfs = std::max(state.max_peaks_dbfs, peak_dbfs);
Alessio Bazzica56f63c32020-09-29 11:56:38 +020066 state.time_since_push_ms += kFrameDurationMs;
67 if (rtc::SafeGt(state.time_since_push_ms, kPeakEnveloperSuperFrameLengthMs)) {
68 // Push `max_peaks_dbfs` back into the ring buffer.
69 state.peak_delay_buffer.PushBack(state.max_peaks_dbfs);
Alessio Bazzica736ff832020-09-23 09:04:48 +020070 // Reset.
Alessio Bazzica56f63c32020-09-29 11:56:38 +020071 state.max_peaks_dbfs = kMinLevelDbfs;
72 state.time_since_push_ms = 0;
Alessio Bazzica736ff832020-09-23 09:04:48 +020073 }
74
Alessio Bazzica980c4602021-04-14 19:09:17 +020075 // Update the headroom by comparing the estimated speech level and the delayed
76 // max speech peak.
Alessio Bazzica56f63c32020-09-29 11:56:38 +020077 const float delayed_peak_dbfs =
78 state.peak_delay_buffer.Front().value_or(state.max_peaks_dbfs);
79 const float difference_db = delayed_peak_dbfs - speech_level_dbfs;
Alessio Bazzica980c4602021-04-14 19:09:17 +020080 if (difference_db > state.headroom_db) {
Alessio Bazzica56f63c32020-09-29 11:56:38 +020081 // Attack.
Alessio Bazzica980c4602021-04-14 19:09:17 +020082 state.headroom_db =
83 state.headroom_db * kAttack + difference_db * (1.0f - kAttack);
Alessio Bazzica736ff832020-09-23 09:04:48 +020084 } else {
Alessio Bazzica56f63c32020-09-29 11:56:38 +020085 // Decay.
Alessio Bazzica980c4602021-04-14 19:09:17 +020086 state.headroom_db =
87 state.headroom_db * kDecay + difference_db * (1.0f - kDecay);
Alessio Bazzica736ff832020-09-23 09:04:48 +020088 }
89
Alessio Bazzica980c4602021-04-14 19:09:17 +020090 state.headroom_db =
91 rtc::SafeClamp<float>(state.headroom_db, kMinMarginDb, kMaxMarginDb);
92}
93
94// Saturation protector which recommends a headroom based on the recent peaks.
95class SaturationProtectorImpl : public SaturationProtector {
96 public:
97 explicit SaturationProtectorImpl(float initial_headroom_db,
98 float extra_headroom_db,
99 int adjacent_speech_frames_threshold,
100 ApmDataDumper* apm_data_dumper)
101 : apm_data_dumper_(apm_data_dumper),
102 initial_headroom_db_(initial_headroom_db),
103 extra_headroom_db_(extra_headroom_db),
104 adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) {
105 Reset();
106 }
107 SaturationProtectorImpl(const SaturationProtectorImpl&) = delete;
108 SaturationProtectorImpl& operator=(const SaturationProtectorImpl&) = delete;
109 ~SaturationProtectorImpl() = default;
110
111 float HeadroomDb() override { return headroom_db_; }
112
113 void Analyze(float speech_probability,
114 float peak_dbfs,
115 float speech_level_dbfs) override {
116 if (speech_probability < kVadConfidenceThreshold) {
117 // Not a speech frame.
118 if (adjacent_speech_frames_threshold_ > 1) {
119 // When two or more adjacent speech frames are required in order to
120 // update the state, we need to decide whether to discard or confirm the
121 // updates based on the speech sequence length.
122 if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
123 // First non-speech frame after a long enough sequence of speech
124 // frames. Update the reliable state.
125 reliable_state_ = preliminary_state_;
126 } else if (num_adjacent_speech_frames_ > 0) {
127 // First non-speech frame after a too short sequence of speech frames.
128 // Reset to the last reliable state.
129 preliminary_state_ = reliable_state_;
130 }
131 }
132 num_adjacent_speech_frames_ = 0;
133 } else {
134 // Speech frame observed.
135 num_adjacent_speech_frames_++;
136
137 // Update preliminary level estimate.
138 UpdateSaturationProtectorState(peak_dbfs, speech_level_dbfs,
139 preliminary_state_);
140
141 if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
142 // `preliminary_state_` is now reliable. Update the headroom.
143 headroom_db_ = preliminary_state_.headroom_db + extra_headroom_db_;
144 }
145 }
146 DumpDebugData();
147 }
148
149 void Reset() override {
150 num_adjacent_speech_frames_ = 0;
151 headroom_db_ = initial_headroom_db_ + extra_headroom_db_;
152 ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_);
153 ResetSaturationProtectorState(initial_headroom_db_, reliable_state_);
154 }
155
156 private:
157 void DumpDebugData() {
158 apm_data_dumper_->DumpRaw(
159 "agc2_saturation_protector_preliminary_max_peak_dbfs",
160 preliminary_state_.max_peaks_dbfs);
161 apm_data_dumper_->DumpRaw(
162 "agc2_saturation_protector_reliable_max_peak_dbfs",
163 reliable_state_.max_peaks_dbfs);
164 }
165
166 ApmDataDumper* const apm_data_dumper_;
167 const float initial_headroom_db_;
168 const float extra_headroom_db_;
169 const int adjacent_speech_frames_threshold_;
170 int num_adjacent_speech_frames_;
171 float headroom_db_;
172 SaturationProtectorState preliminary_state_;
173 SaturationProtectorState reliable_state_;
174};
175
176} // namespace
177
178std::unique_ptr<SaturationProtector> CreateSaturationProtector(
179 float initial_headroom_db,
180 float extra_headroom_db,
181 int adjacent_speech_frames_threshold,
182 ApmDataDumper* apm_data_dumper) {
183 return std::make_unique<SaturationProtectorImpl>(
184 initial_headroom_db, extra_headroom_db, adjacent_speech_frames_threshold,
185 apm_data_dumper);
Alex Loiko9917c4a2018-04-04 14:16:10 +0200186}
187
Alex Loiko1e48e802018-03-28 09:45:29 +0200188} // namespace webrtc