blob: 58b3a2769cdaa72ff4045c7c23fd006237140198 [file] [log] [blame]
Hanna Silen4b3a0612021-06-02 23:03:24 +02001/*
2 * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "modules/audio_processing/agc/clipping_predictor.h"
12
13#include <algorithm>
14#include <memory>
15
16#include "common_audio/include/audio_util.h"
17#include "modules/audio_processing/agc/clipping_predictor_level_buffer.h"
18#include "modules/audio_processing/agc/gain_map_internal.h"
19#include "rtc_base/checks.h"
20#include "rtc_base/logging.h"
21#include "rtc_base/numerics/safe_minmax.h"
22
23namespace webrtc {
24namespace {
25
26constexpr int kClippingPredictorMaxGainChange = 15;
27
Hanna Silen4b3a0612021-06-02 23:03:24 +020028// Estimates the new level from the gain error; a copy of the function
29// `LevelFromGainError` in agc_manager_direct.cc.
30int LevelFromGainError(int gain_error,
31 int level,
32 int min_mic_level,
33 int max_mic_level) {
34 RTC_DCHECK_GE(level, 0);
35 RTC_DCHECK_LE(level, max_mic_level);
36 if (gain_error == 0) {
37 return level;
38 }
39 int new_level = level;
40 if (gain_error > 0) {
41 while (kGainMap[new_level] - kGainMap[level] < gain_error &&
42 new_level < max_mic_level) {
43 ++new_level;
44 }
45 } else {
46 while (kGainMap[new_level] - kGainMap[level] > gain_error &&
47 new_level > min_mic_level) {
48 --new_level;
49 }
50 }
51 return new_level;
52}
53
54float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) {
55 const float crest_factor =
56 FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average));
57 return crest_factor;
58}
59
60// Crest factor-based clipping prediction and clipped level step estimation.
61class ClippingEventPredictor : public ClippingPredictor {
62 public:
63 // ClippingEventPredictor with `num_channels` channels (limited to values
64 // higher than zero); window size `window_length` and reference window size
65 // `reference_window_length` (both referring to the number of frames in the
66 // respective sliding windows and limited to values higher than zero);
67 // reference window delay `reference_window_delay` (delay in frames, limited
68 // to values zero and higher with an additional requirement of
69 // `window_length` < `reference_window_length` + reference_window_delay`);
70 // and an estimation peak threshold `clipping_threshold` and a crest factor
71 // drop threshold `crest_factor_margin` (both in dB).
72 ClippingEventPredictor(int num_channels,
73 int window_length,
74 int reference_window_length,
75 int reference_window_delay,
76 float clipping_threshold,
77 float crest_factor_margin)
78 : window_length_(window_length),
79 reference_window_length_(reference_window_length),
80 reference_window_delay_(reference_window_delay),
81 clipping_threshold_(clipping_threshold),
82 crest_factor_margin_(crest_factor_margin) {
83 RTC_DCHECK_GT(num_channels, 0);
84 RTC_DCHECK_GT(window_length, 0);
85 RTC_DCHECK_GT(reference_window_length, 0);
86 RTC_DCHECK_GE(reference_window_delay, 0);
87 RTC_DCHECK_GT(reference_window_length + reference_window_delay,
88 window_length);
89 const int buffer_length = GetMinFramesProcessed();
90 RTC_DCHECK_GT(buffer_length, 0);
91 for (int i = 0; i < num_channels; ++i) {
92 ch_buffers_.push_back(
93 std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
94 }
95 }
96
97 ClippingEventPredictor(const ClippingEventPredictor&) = delete;
98 ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete;
99 ~ClippingEventPredictor() {}
100
101 void Reset() {
102 const int num_channels = ch_buffers_.size();
103 for (int i = 0; i < num_channels; ++i) {
104 ch_buffers_[i]->Reset();
105 }
106 }
107
108 // Analyzes a frame of audio and stores the framewise metrics in
109 // `ch_buffers_`.
Alessio Bazzicab237a872021-06-11 12:37:54 +0200110 void Analyze(const AudioFrameView<const float>& frame) {
Hanna Silen4b3a0612021-06-02 23:03:24 +0200111 const int num_channels = frame.num_channels();
112 RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
113 const int samples_per_channel = frame.samples_per_channel();
114 RTC_DCHECK_GT(samples_per_channel, 0);
115 for (int channel = 0; channel < num_channels; ++channel) {
116 float sum_squares = 0.0f;
117 float peak = 0.0f;
118 for (const auto& sample : frame.channel(channel)) {
119 sum_squares += sample * sample;
120 peak = std::max(std::fabs(sample), peak);
121 }
122 ch_buffers_[channel]->Push(
123 {sum_squares / static_cast<float>(samples_per_channel), peak});
124 }
125 }
126
127 // Estimates the analog gain adjustment for channel `channel` using a
128 // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
129 // estimate for the clipped level step equal to `default_clipped_level_step_`
130 // if at least `GetMinFramesProcessed()` frames have been processed since the
131 // last reset and a clipping event is predicted. `level`, `min_mic_level`, and
132 // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
133 absl::optional<int> EstimateClippedLevelStep(int channel,
134 int level,
135 int default_step,
136 int min_mic_level,
137 int max_mic_level) const {
138 RTC_CHECK_GE(channel, 0);
139 RTC_CHECK_LT(channel, ch_buffers_.size());
140 RTC_DCHECK_GE(level, 0);
141 RTC_DCHECK_LE(level, 255);
142 RTC_DCHECK_GT(default_step, 0);
143 RTC_DCHECK_LE(default_step, 255);
144 RTC_DCHECK_GE(min_mic_level, 0);
145 RTC_DCHECK_LE(min_mic_level, 255);
146 RTC_DCHECK_GE(max_mic_level, 0);
147 RTC_DCHECK_LE(max_mic_level, 255);
148 if (level <= min_mic_level) {
149 return absl::nullopt;
150 }
151 if (PredictClippingEvent(channel)) {
152 const int new_level =
153 rtc::SafeClamp(level - default_step, min_mic_level, max_mic_level);
154 const int step = level - new_level;
155 if (step > 0) {
156 return step;
157 }
158 }
159 return absl::nullopt;
160 }
161
162 private:
163 int GetMinFramesProcessed() const {
164 return reference_window_delay_ + reference_window_length_;
165 }
166
167 // Predicts clipping events based on the processed audio frames. Returns
168 // true if a clipping event is likely.
169 bool PredictClippingEvent(int channel) const {
170 const auto metrics =
171 ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
172 if (!metrics.has_value() ||
173 !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
174 return false;
175 }
176 const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
177 reference_window_delay_, reference_window_length_);
178 if (!reference_metrics.has_value()) {
179 return false;
180 }
181 const float crest_factor = ComputeCrestFactor(metrics.value());
182 const float reference_crest_factor =
183 ComputeCrestFactor(reference_metrics.value());
184 if (crest_factor < reference_crest_factor - crest_factor_margin_) {
185 return true;
186 }
187 return false;
188 }
189
190 std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
191 const int window_length_;
192 const int reference_window_length_;
193 const int reference_window_delay_;
194 const float clipping_threshold_;
195 const float crest_factor_margin_;
196};
197
198// Performs crest factor-based clipping peak prediction.
199class ClippingPeakPredictor : public ClippingPredictor {
200 public:
201 // Ctor. ClippingPeakPredictor with `num_channels` channels (limited to values
202 // higher than zero); window size `window_length` and reference window size
203 // `reference_window_length` (both referring to the number of frames in the
204 // respective sliding windows and limited to values higher than zero);
205 // reference window delay `reference_window_delay` (delay in frames, limited
206 // to values zero and higher with an additional requirement of
207 // `window_length` < `reference_window_length` + reference_window_delay`);
208 // and a clipping prediction threshold `clipping_threshold` (in dB). Adaptive
209 // clipped level step estimation is used if `adaptive_step_estimation` is
210 // true.
211 explicit ClippingPeakPredictor(int num_channels,
212 int window_length,
213 int reference_window_length,
214 int reference_window_delay,
215 int clipping_threshold,
216 bool adaptive_step_estimation)
217 : window_length_(window_length),
218 reference_window_length_(reference_window_length),
219 reference_window_delay_(reference_window_delay),
220 clipping_threshold_(clipping_threshold),
221 adaptive_step_estimation_(adaptive_step_estimation) {
222 RTC_DCHECK_GT(num_channels, 0);
223 RTC_DCHECK_GT(window_length, 0);
224 RTC_DCHECK_GT(reference_window_length, 0);
225 RTC_DCHECK_GE(reference_window_delay, 0);
226 RTC_DCHECK_GT(reference_window_length + reference_window_delay,
227 window_length);
228 const int buffer_length = GetMinFramesProcessed();
229 RTC_DCHECK_GT(buffer_length, 0);
230 for (int i = 0; i < num_channels; ++i) {
231 ch_buffers_.push_back(
232 std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
233 }
234 }
235
236 ClippingPeakPredictor(const ClippingPeakPredictor&) = delete;
237 ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete;
238 ~ClippingPeakPredictor() {}
239
240 void Reset() {
241 const int num_channels = ch_buffers_.size();
242 for (int i = 0; i < num_channels; ++i) {
243 ch_buffers_[i]->Reset();
244 }
245 }
246
247 // Analyzes a frame of audio and stores the framewise metrics in
248 // `ch_buffers_`.
Alessio Bazzicab237a872021-06-11 12:37:54 +0200249 void Analyze(const AudioFrameView<const float>& frame) {
Hanna Silen4b3a0612021-06-02 23:03:24 +0200250 const int num_channels = frame.num_channels();
251 RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
252 const int samples_per_channel = frame.samples_per_channel();
253 RTC_DCHECK_GT(samples_per_channel, 0);
254 for (int channel = 0; channel < num_channels; ++channel) {
255 float sum_squares = 0.0f;
256 float peak = 0.0f;
257 for (const auto& sample : frame.channel(channel)) {
258 sum_squares += sample * sample;
259 peak = std::max(std::fabs(sample), peak);
260 }
261 ch_buffers_[channel]->Push(
262 {sum_squares / static_cast<float>(samples_per_channel), peak});
263 }
264 }
265
266 // Estimates the analog gain adjustment for channel `channel` using a
267 // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
268 // estimate for the clipped level step (equal to
269 // `default_clipped_level_step_` if `adaptive_estimation_` is false) if at
270 // least `GetMinFramesProcessed()` frames have been processed since the last
271 // reset and a clipping event is predicted. `level`, `min_mic_level`, and
272 // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
273 absl::optional<int> EstimateClippedLevelStep(int channel,
274 int level,
275 int default_step,
276 int min_mic_level,
277 int max_mic_level) const {
278 RTC_DCHECK_GE(channel, 0);
279 RTC_DCHECK_LT(channel, ch_buffers_.size());
280 RTC_DCHECK_GE(level, 0);
281 RTC_DCHECK_LE(level, 255);
282 RTC_DCHECK_GT(default_step, 0);
283 RTC_DCHECK_LE(default_step, 255);
284 RTC_DCHECK_GE(min_mic_level, 0);
285 RTC_DCHECK_LE(min_mic_level, 255);
286 RTC_DCHECK_GE(max_mic_level, 0);
287 RTC_DCHECK_LE(max_mic_level, 255);
288 if (level <= min_mic_level) {
289 return absl::nullopt;
290 }
291 absl::optional<float> estimate_db = EstimatePeakValue(channel);
292 if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) {
293 int step = 0;
294 if (!adaptive_step_estimation_) {
295 step = default_step;
296 } else {
297 const int estimated_gain_change =
298 rtc::SafeClamp(-static_cast<int>(std::ceil(estimate_db.value())),
299 -kClippingPredictorMaxGainChange, 0);
300 step =
301 std::max(level - LevelFromGainError(estimated_gain_change, level,
302 min_mic_level, max_mic_level),
303 default_step);
304 }
305 const int new_level =
306 rtc::SafeClamp(level - step, min_mic_level, max_mic_level);
307 if (level > new_level) {
308 return level - new_level;
309 }
310 }
311 return absl::nullopt;
312 }
313
314 private:
315 int GetMinFramesProcessed() {
316 return reference_window_delay_ + reference_window_length_;
317 }
318
319 // Predicts clipping sample peaks based on the processed audio frames.
320 // Returns the estimated peak value if clipping is predicted. Otherwise
321 // returns absl::nullopt.
322 absl::optional<float> EstimatePeakValue(int channel) const {
323 const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
324 reference_window_delay_, reference_window_length_);
325 if (!reference_metrics.has_value()) {
326 return absl::nullopt;
327 }
328 const auto metrics =
329 ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
330 if (!metrics.has_value() ||
331 !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
332 return absl::nullopt;
333 }
334 const float reference_crest_factor =
335 ComputeCrestFactor(reference_metrics.value());
336 const float& mean_squares = metrics.value().average;
337 const float projected_peak =
338 reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares));
339 return projected_peak;
340 }
341
342 std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
343 const int window_length_;
344 const int reference_window_length_;
345 const int reference_window_delay_;
346 const int clipping_threshold_;
347 const bool adaptive_step_estimation_;
348};
349
350} // namespace
351
Alessio Bazzicab237a872021-06-11 12:37:54 +0200352std::unique_ptr<ClippingPredictor> CreateClippingPredictor(
Hanna Silen4b3a0612021-06-02 23:03:24 +0200353 int num_channels,
Alessio Bazzicab237a872021-06-11 12:37:54 +0200354 const AudioProcessing::Config::GainController1::AnalogGainController::
355 ClippingPredictor& config) {
356 if (!config.enabled) {
357 RTC_LOG(LS_INFO) << "[agc] Clipping prediction disabled.";
358 return nullptr;
359 }
360 RTC_LOG(LS_INFO) << "[agc] Clipping prediction enabled.";
361 using ClippingPredictorMode = AudioProcessing::Config::GainController1::
362 AnalogGainController::ClippingPredictor::Mode;
363 switch (config.mode) {
364 case ClippingPredictorMode::kClippingEventPrediction:
365 return std::make_unique<ClippingEventPredictor>(
366 num_channels, config.window_length, config.reference_window_length,
367 config.reference_window_delay, config.clipping_threshold,
368 config.crest_factor_margin);
369 case ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction:
370 return std::make_unique<ClippingPeakPredictor>(
371 num_channels, config.window_length, config.reference_window_length,
372 config.reference_window_delay, config.clipping_threshold,
373 /*adaptive_step_estimation=*/true);
374 case ClippingPredictorMode::kFixedStepClippingPeakPrediction:
375 return std::make_unique<ClippingPeakPredictor>(
376 num_channels, config.window_length, config.reference_window_length,
377 config.reference_window_delay, config.clipping_threshold,
378 /*adaptive_step_estimation=*/false);
379 }
Artem Titovd3251962021-11-15 16:57:07 +0100380 RTC_DCHECK_NOTREACHED();
Hanna Silen4b3a0612021-06-02 23:03:24 +0200381}
382
383} // namespace webrtc