blob: deb95f633e6cac6c9787c4aa64ac1527d2b70403 [file] [log] [blame]
Hanna Silen4b3a0612021-06-02 23:03:24 +02001/*
2 * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "modules/audio_processing/agc/clipping_predictor.h"
12
13#include <algorithm>
14#include <memory>
15
16#include "common_audio/include/audio_util.h"
17#include "modules/audio_processing/agc/clipping_predictor_level_buffer.h"
18#include "modules/audio_processing/agc/gain_map_internal.h"
19#include "rtc_base/checks.h"
20#include "rtc_base/logging.h"
21#include "rtc_base/numerics/safe_minmax.h"
22
23namespace webrtc {
24namespace {
25
26constexpr int kClippingPredictorMaxGainChange = 15;
27
28using ClippingPredictorConfig = AudioProcessing::Config::GainController1::
29 AnalogGainController::ClippingPredictor;
30
31// Estimates the new level from the gain error; a copy of the function
32// `LevelFromGainError` in agc_manager_direct.cc.
33int LevelFromGainError(int gain_error,
34 int level,
35 int min_mic_level,
36 int max_mic_level) {
37 RTC_DCHECK_GE(level, 0);
38 RTC_DCHECK_LE(level, max_mic_level);
39 if (gain_error == 0) {
40 return level;
41 }
42 int new_level = level;
43 if (gain_error > 0) {
44 while (kGainMap[new_level] - kGainMap[level] < gain_error &&
45 new_level < max_mic_level) {
46 ++new_level;
47 }
48 } else {
49 while (kGainMap[new_level] - kGainMap[level] > gain_error &&
50 new_level > min_mic_level) {
51 --new_level;
52 }
53 }
54 return new_level;
55}
56
57float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) {
58 const float crest_factor =
59 FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average));
60 return crest_factor;
61}
62
63// Crest factor-based clipping prediction and clipped level step estimation.
64class ClippingEventPredictor : public ClippingPredictor {
65 public:
66 // ClippingEventPredictor with `num_channels` channels (limited to values
67 // higher than zero); window size `window_length` and reference window size
68 // `reference_window_length` (both referring to the number of frames in the
69 // respective sliding windows and limited to values higher than zero);
70 // reference window delay `reference_window_delay` (delay in frames, limited
71 // to values zero and higher with an additional requirement of
72 // `window_length` < `reference_window_length` + reference_window_delay`);
73 // and an estimation peak threshold `clipping_threshold` and a crest factor
74 // drop threshold `crest_factor_margin` (both in dB).
75 ClippingEventPredictor(int num_channels,
76 int window_length,
77 int reference_window_length,
78 int reference_window_delay,
79 float clipping_threshold,
80 float crest_factor_margin)
81 : window_length_(window_length),
82 reference_window_length_(reference_window_length),
83 reference_window_delay_(reference_window_delay),
84 clipping_threshold_(clipping_threshold),
85 crest_factor_margin_(crest_factor_margin) {
86 RTC_DCHECK_GT(num_channels, 0);
87 RTC_DCHECK_GT(window_length, 0);
88 RTC_DCHECK_GT(reference_window_length, 0);
89 RTC_DCHECK_GE(reference_window_delay, 0);
90 RTC_DCHECK_GT(reference_window_length + reference_window_delay,
91 window_length);
92 const int buffer_length = GetMinFramesProcessed();
93 RTC_DCHECK_GT(buffer_length, 0);
94 for (int i = 0; i < num_channels; ++i) {
95 ch_buffers_.push_back(
96 std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
97 }
98 }
99
100 ClippingEventPredictor(const ClippingEventPredictor&) = delete;
101 ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete;
102 ~ClippingEventPredictor() {}
103
104 void Reset() {
105 const int num_channels = ch_buffers_.size();
106 for (int i = 0; i < num_channels; ++i) {
107 ch_buffers_[i]->Reset();
108 }
109 }
110
111 // Analyzes a frame of audio and stores the framewise metrics in
112 // `ch_buffers_`.
113 void Process(const AudioFrameView<const float>& frame) {
114 const int num_channels = frame.num_channels();
115 RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
116 const int samples_per_channel = frame.samples_per_channel();
117 RTC_DCHECK_GT(samples_per_channel, 0);
118 for (int channel = 0; channel < num_channels; ++channel) {
119 float sum_squares = 0.0f;
120 float peak = 0.0f;
121 for (const auto& sample : frame.channel(channel)) {
122 sum_squares += sample * sample;
123 peak = std::max(std::fabs(sample), peak);
124 }
125 ch_buffers_[channel]->Push(
126 {sum_squares / static_cast<float>(samples_per_channel), peak});
127 }
128 }
129
130 // Estimates the analog gain adjustment for channel `channel` using a
131 // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
132 // estimate for the clipped level step equal to `default_clipped_level_step_`
133 // if at least `GetMinFramesProcessed()` frames have been processed since the
134 // last reset and a clipping event is predicted. `level`, `min_mic_level`, and
135 // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
136 absl::optional<int> EstimateClippedLevelStep(int channel,
137 int level,
138 int default_step,
139 int min_mic_level,
140 int max_mic_level) const {
141 RTC_CHECK_GE(channel, 0);
142 RTC_CHECK_LT(channel, ch_buffers_.size());
143 RTC_DCHECK_GE(level, 0);
144 RTC_DCHECK_LE(level, 255);
145 RTC_DCHECK_GT(default_step, 0);
146 RTC_DCHECK_LE(default_step, 255);
147 RTC_DCHECK_GE(min_mic_level, 0);
148 RTC_DCHECK_LE(min_mic_level, 255);
149 RTC_DCHECK_GE(max_mic_level, 0);
150 RTC_DCHECK_LE(max_mic_level, 255);
151 if (level <= min_mic_level) {
152 return absl::nullopt;
153 }
154 if (PredictClippingEvent(channel)) {
155 const int new_level =
156 rtc::SafeClamp(level - default_step, min_mic_level, max_mic_level);
157 const int step = level - new_level;
158 if (step > 0) {
159 return step;
160 }
161 }
162 return absl::nullopt;
163 }
164
165 private:
166 int GetMinFramesProcessed() const {
167 return reference_window_delay_ + reference_window_length_;
168 }
169
170 // Predicts clipping events based on the processed audio frames. Returns
171 // true if a clipping event is likely.
172 bool PredictClippingEvent(int channel) const {
173 const auto metrics =
174 ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
175 if (!metrics.has_value() ||
176 !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
177 return false;
178 }
179 const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
180 reference_window_delay_, reference_window_length_);
181 if (!reference_metrics.has_value()) {
182 return false;
183 }
184 const float crest_factor = ComputeCrestFactor(metrics.value());
185 const float reference_crest_factor =
186 ComputeCrestFactor(reference_metrics.value());
187 if (crest_factor < reference_crest_factor - crest_factor_margin_) {
188 return true;
189 }
190 return false;
191 }
192
193 std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
194 const int window_length_;
195 const int reference_window_length_;
196 const int reference_window_delay_;
197 const float clipping_threshold_;
198 const float crest_factor_margin_;
199};
200
201// Performs crest factor-based clipping peak prediction.
202class ClippingPeakPredictor : public ClippingPredictor {
203 public:
204 // Ctor. ClippingPeakPredictor with `num_channels` channels (limited to values
205 // higher than zero); window size `window_length` and reference window size
206 // `reference_window_length` (both referring to the number of frames in the
207 // respective sliding windows and limited to values higher than zero);
208 // reference window delay `reference_window_delay` (delay in frames, limited
209 // to values zero and higher with an additional requirement of
210 // `window_length` < `reference_window_length` + reference_window_delay`);
211 // and a clipping prediction threshold `clipping_threshold` (in dB). Adaptive
212 // clipped level step estimation is used if `adaptive_step_estimation` is
213 // true.
214 explicit ClippingPeakPredictor(int num_channels,
215 int window_length,
216 int reference_window_length,
217 int reference_window_delay,
218 int clipping_threshold,
219 bool adaptive_step_estimation)
220 : window_length_(window_length),
221 reference_window_length_(reference_window_length),
222 reference_window_delay_(reference_window_delay),
223 clipping_threshold_(clipping_threshold),
224 adaptive_step_estimation_(adaptive_step_estimation) {
225 RTC_DCHECK_GT(num_channels, 0);
226 RTC_DCHECK_GT(window_length, 0);
227 RTC_DCHECK_GT(reference_window_length, 0);
228 RTC_DCHECK_GE(reference_window_delay, 0);
229 RTC_DCHECK_GT(reference_window_length + reference_window_delay,
230 window_length);
231 const int buffer_length = GetMinFramesProcessed();
232 RTC_DCHECK_GT(buffer_length, 0);
233 for (int i = 0; i < num_channels; ++i) {
234 ch_buffers_.push_back(
235 std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
236 }
237 }
238
239 ClippingPeakPredictor(const ClippingPeakPredictor&) = delete;
240 ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete;
241 ~ClippingPeakPredictor() {}
242
243 void Reset() {
244 const int num_channels = ch_buffers_.size();
245 for (int i = 0; i < num_channels; ++i) {
246 ch_buffers_[i]->Reset();
247 }
248 }
249
250 // Analyzes a frame of audio and stores the framewise metrics in
251 // `ch_buffers_`.
252 void Process(const AudioFrameView<const float>& frame) {
253 const int num_channels = frame.num_channels();
254 RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
255 const int samples_per_channel = frame.samples_per_channel();
256 RTC_DCHECK_GT(samples_per_channel, 0);
257 for (int channel = 0; channel < num_channels; ++channel) {
258 float sum_squares = 0.0f;
259 float peak = 0.0f;
260 for (const auto& sample : frame.channel(channel)) {
261 sum_squares += sample * sample;
262 peak = std::max(std::fabs(sample), peak);
263 }
264 ch_buffers_[channel]->Push(
265 {sum_squares / static_cast<float>(samples_per_channel), peak});
266 }
267 }
268
269 // Estimates the analog gain adjustment for channel `channel` using a
270 // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
271 // estimate for the clipped level step (equal to
272 // `default_clipped_level_step_` if `adaptive_estimation_` is false) if at
273 // least `GetMinFramesProcessed()` frames have been processed since the last
274 // reset and a clipping event is predicted. `level`, `min_mic_level`, and
275 // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
276 absl::optional<int> EstimateClippedLevelStep(int channel,
277 int level,
278 int default_step,
279 int min_mic_level,
280 int max_mic_level) const {
281 RTC_DCHECK_GE(channel, 0);
282 RTC_DCHECK_LT(channel, ch_buffers_.size());
283 RTC_DCHECK_GE(level, 0);
284 RTC_DCHECK_LE(level, 255);
285 RTC_DCHECK_GT(default_step, 0);
286 RTC_DCHECK_LE(default_step, 255);
287 RTC_DCHECK_GE(min_mic_level, 0);
288 RTC_DCHECK_LE(min_mic_level, 255);
289 RTC_DCHECK_GE(max_mic_level, 0);
290 RTC_DCHECK_LE(max_mic_level, 255);
291 if (level <= min_mic_level) {
292 return absl::nullopt;
293 }
294 absl::optional<float> estimate_db = EstimatePeakValue(channel);
295 if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) {
296 int step = 0;
297 if (!adaptive_step_estimation_) {
298 step = default_step;
299 } else {
300 const int estimated_gain_change =
301 rtc::SafeClamp(-static_cast<int>(std::ceil(estimate_db.value())),
302 -kClippingPredictorMaxGainChange, 0);
303 step =
304 std::max(level - LevelFromGainError(estimated_gain_change, level,
305 min_mic_level, max_mic_level),
306 default_step);
307 }
308 const int new_level =
309 rtc::SafeClamp(level - step, min_mic_level, max_mic_level);
310 if (level > new_level) {
311 return level - new_level;
312 }
313 }
314 return absl::nullopt;
315 }
316
317 private:
318 int GetMinFramesProcessed() {
319 return reference_window_delay_ + reference_window_length_;
320 }
321
322 // Predicts clipping sample peaks based on the processed audio frames.
323 // Returns the estimated peak value if clipping is predicted. Otherwise
324 // returns absl::nullopt.
325 absl::optional<float> EstimatePeakValue(int channel) const {
326 const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
327 reference_window_delay_, reference_window_length_);
328 if (!reference_metrics.has_value()) {
329 return absl::nullopt;
330 }
331 const auto metrics =
332 ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
333 if (!metrics.has_value() ||
334 !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
335 return absl::nullopt;
336 }
337 const float reference_crest_factor =
338 ComputeCrestFactor(reference_metrics.value());
339 const float& mean_squares = metrics.value().average;
340 const float projected_peak =
341 reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares));
342 return projected_peak;
343 }
344
345 std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
346 const int window_length_;
347 const int reference_window_length_;
348 const int reference_window_delay_;
349 const int clipping_threshold_;
350 const bool adaptive_step_estimation_;
351};
352
353} // namespace
354
355std::unique_ptr<ClippingPredictor> CreateClippingEventPredictor(
356 int num_channels,
357 const ClippingPredictorConfig& config) {
358 return std::make_unique<ClippingEventPredictor>(
359 num_channels, config.window_length, config.reference_window_length,
360 config.reference_window_delay, config.clipping_threshold,
361 config.crest_factor_margin);
362}
363
364std::unique_ptr<ClippingPredictor> CreateFixedStepClippingPeakPredictor(
365 int num_channels,
366 const ClippingPredictorConfig& config) {
367 return std::make_unique<ClippingPeakPredictor>(
368 num_channels, config.window_length, config.reference_window_length,
369 config.reference_window_delay, config.clipping_threshold,
370 /*adaptive_step_estimation=*/false);
371}
372
373std::unique_ptr<ClippingPredictor> CreateAdaptiveStepClippingPeakPredictor(
374 int num_channels,
375 const ClippingPredictorConfig& config) {
376 return std::make_unique<ClippingPeakPredictor>(
377 num_channels, config.window_length, config.reference_window_length,
378 config.reference_window_delay, config.clipping_threshold,
379 /*adaptive_step_estimation=*/true);
380}
381
382} // namespace webrtc