Hanna Silen | 4b3a061 | 2021-06-02 23:03:24 +0200 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | #include "modules/audio_processing/agc/clipping_predictor.h" |
| 12 | |
| 13 | #include <algorithm> |
| 14 | #include <memory> |
| 15 | |
| 16 | #include "common_audio/include/audio_util.h" |
| 17 | #include "modules/audio_processing/agc/clipping_predictor_level_buffer.h" |
| 18 | #include "modules/audio_processing/agc/gain_map_internal.h" |
| 19 | #include "rtc_base/checks.h" |
| 20 | #include "rtc_base/logging.h" |
| 21 | #include "rtc_base/numerics/safe_minmax.h" |
| 22 | |
| 23 | namespace webrtc { |
| 24 | namespace { |
| 25 | |
| 26 | constexpr int kClippingPredictorMaxGainChange = 15; |
| 27 | |
| 28 | using ClippingPredictorConfig = AudioProcessing::Config::GainController1:: |
| 29 | AnalogGainController::ClippingPredictor; |
| 30 | |
| 31 | // Estimates the new level from the gain error; a copy of the function |
| 32 | // `LevelFromGainError` in agc_manager_direct.cc. |
| 33 | int LevelFromGainError(int gain_error, |
| 34 | int level, |
| 35 | int min_mic_level, |
| 36 | int max_mic_level) { |
| 37 | RTC_DCHECK_GE(level, 0); |
| 38 | RTC_DCHECK_LE(level, max_mic_level); |
| 39 | if (gain_error == 0) { |
| 40 | return level; |
| 41 | } |
| 42 | int new_level = level; |
| 43 | if (gain_error > 0) { |
| 44 | while (kGainMap[new_level] - kGainMap[level] < gain_error && |
| 45 | new_level < max_mic_level) { |
| 46 | ++new_level; |
| 47 | } |
| 48 | } else { |
| 49 | while (kGainMap[new_level] - kGainMap[level] > gain_error && |
| 50 | new_level > min_mic_level) { |
| 51 | --new_level; |
| 52 | } |
| 53 | } |
| 54 | return new_level; |
| 55 | } |
| 56 | |
| 57 | float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) { |
| 58 | const float crest_factor = |
| 59 | FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average)); |
| 60 | return crest_factor; |
| 61 | } |
| 62 | |
| 63 | // Crest factor-based clipping prediction and clipped level step estimation. |
| 64 | class ClippingEventPredictor : public ClippingPredictor { |
| 65 | public: |
| 66 | // ClippingEventPredictor with `num_channels` channels (limited to values |
| 67 | // higher than zero); window size `window_length` and reference window size |
| 68 | // `reference_window_length` (both referring to the number of frames in the |
| 69 | // respective sliding windows and limited to values higher than zero); |
| 70 | // reference window delay `reference_window_delay` (delay in frames, limited |
| 71 | // to values zero and higher with an additional requirement of |
| 72 | // `window_length` < `reference_window_length` + reference_window_delay`); |
| 73 | // and an estimation peak threshold `clipping_threshold` and a crest factor |
| 74 | // drop threshold `crest_factor_margin` (both in dB). |
| 75 | ClippingEventPredictor(int num_channels, |
| 76 | int window_length, |
| 77 | int reference_window_length, |
| 78 | int reference_window_delay, |
| 79 | float clipping_threshold, |
| 80 | float crest_factor_margin) |
| 81 | : window_length_(window_length), |
| 82 | reference_window_length_(reference_window_length), |
| 83 | reference_window_delay_(reference_window_delay), |
| 84 | clipping_threshold_(clipping_threshold), |
| 85 | crest_factor_margin_(crest_factor_margin) { |
| 86 | RTC_DCHECK_GT(num_channels, 0); |
| 87 | RTC_DCHECK_GT(window_length, 0); |
| 88 | RTC_DCHECK_GT(reference_window_length, 0); |
| 89 | RTC_DCHECK_GE(reference_window_delay, 0); |
| 90 | RTC_DCHECK_GT(reference_window_length + reference_window_delay, |
| 91 | window_length); |
| 92 | const int buffer_length = GetMinFramesProcessed(); |
| 93 | RTC_DCHECK_GT(buffer_length, 0); |
| 94 | for (int i = 0; i < num_channels; ++i) { |
| 95 | ch_buffers_.push_back( |
| 96 | std::make_unique<ClippingPredictorLevelBuffer>(buffer_length)); |
| 97 | } |
| 98 | } |
| 99 | |
| 100 | ClippingEventPredictor(const ClippingEventPredictor&) = delete; |
| 101 | ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete; |
| 102 | ~ClippingEventPredictor() {} |
| 103 | |
| 104 | void Reset() { |
| 105 | const int num_channels = ch_buffers_.size(); |
| 106 | for (int i = 0; i < num_channels; ++i) { |
| 107 | ch_buffers_[i]->Reset(); |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | // Analyzes a frame of audio and stores the framewise metrics in |
| 112 | // `ch_buffers_`. |
| 113 | void Process(const AudioFrameView<const float>& frame) { |
| 114 | const int num_channels = frame.num_channels(); |
| 115 | RTC_DCHECK_EQ(num_channels, ch_buffers_.size()); |
| 116 | const int samples_per_channel = frame.samples_per_channel(); |
| 117 | RTC_DCHECK_GT(samples_per_channel, 0); |
| 118 | for (int channel = 0; channel < num_channels; ++channel) { |
| 119 | float sum_squares = 0.0f; |
| 120 | float peak = 0.0f; |
| 121 | for (const auto& sample : frame.channel(channel)) { |
| 122 | sum_squares += sample * sample; |
| 123 | peak = std::max(std::fabs(sample), peak); |
| 124 | } |
| 125 | ch_buffers_[channel]->Push( |
| 126 | {sum_squares / static_cast<float>(samples_per_channel), peak}); |
| 127 | } |
| 128 | } |
| 129 | |
| 130 | // Estimates the analog gain adjustment for channel `channel` using a |
| 131 | // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an |
| 132 | // estimate for the clipped level step equal to `default_clipped_level_step_` |
| 133 | // if at least `GetMinFramesProcessed()` frames have been processed since the |
| 134 | // last reset and a clipping event is predicted. `level`, `min_mic_level`, and |
| 135 | // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255]. |
| 136 | absl::optional<int> EstimateClippedLevelStep(int channel, |
| 137 | int level, |
| 138 | int default_step, |
| 139 | int min_mic_level, |
| 140 | int max_mic_level) const { |
| 141 | RTC_CHECK_GE(channel, 0); |
| 142 | RTC_CHECK_LT(channel, ch_buffers_.size()); |
| 143 | RTC_DCHECK_GE(level, 0); |
| 144 | RTC_DCHECK_LE(level, 255); |
| 145 | RTC_DCHECK_GT(default_step, 0); |
| 146 | RTC_DCHECK_LE(default_step, 255); |
| 147 | RTC_DCHECK_GE(min_mic_level, 0); |
| 148 | RTC_DCHECK_LE(min_mic_level, 255); |
| 149 | RTC_DCHECK_GE(max_mic_level, 0); |
| 150 | RTC_DCHECK_LE(max_mic_level, 255); |
| 151 | if (level <= min_mic_level) { |
| 152 | return absl::nullopt; |
| 153 | } |
| 154 | if (PredictClippingEvent(channel)) { |
| 155 | const int new_level = |
| 156 | rtc::SafeClamp(level - default_step, min_mic_level, max_mic_level); |
| 157 | const int step = level - new_level; |
| 158 | if (step > 0) { |
| 159 | return step; |
| 160 | } |
| 161 | } |
| 162 | return absl::nullopt; |
| 163 | } |
| 164 | |
| 165 | private: |
| 166 | int GetMinFramesProcessed() const { |
| 167 | return reference_window_delay_ + reference_window_length_; |
| 168 | } |
| 169 | |
| 170 | // Predicts clipping events based on the processed audio frames. Returns |
| 171 | // true if a clipping event is likely. |
| 172 | bool PredictClippingEvent(int channel) const { |
| 173 | const auto metrics = |
| 174 | ch_buffers_[channel]->ComputePartialMetrics(0, window_length_); |
| 175 | if (!metrics.has_value() || |
| 176 | !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) { |
| 177 | return false; |
| 178 | } |
| 179 | const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics( |
| 180 | reference_window_delay_, reference_window_length_); |
| 181 | if (!reference_metrics.has_value()) { |
| 182 | return false; |
| 183 | } |
| 184 | const float crest_factor = ComputeCrestFactor(metrics.value()); |
| 185 | const float reference_crest_factor = |
| 186 | ComputeCrestFactor(reference_metrics.value()); |
| 187 | if (crest_factor < reference_crest_factor - crest_factor_margin_) { |
| 188 | return true; |
| 189 | } |
| 190 | return false; |
| 191 | } |
| 192 | |
| 193 | std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_; |
| 194 | const int window_length_; |
| 195 | const int reference_window_length_; |
| 196 | const int reference_window_delay_; |
| 197 | const float clipping_threshold_; |
| 198 | const float crest_factor_margin_; |
| 199 | }; |
| 200 | |
| 201 | // Performs crest factor-based clipping peak prediction. |
| 202 | class ClippingPeakPredictor : public ClippingPredictor { |
| 203 | public: |
| 204 | // Ctor. ClippingPeakPredictor with `num_channels` channels (limited to values |
| 205 | // higher than zero); window size `window_length` and reference window size |
| 206 | // `reference_window_length` (both referring to the number of frames in the |
| 207 | // respective sliding windows and limited to values higher than zero); |
| 208 | // reference window delay `reference_window_delay` (delay in frames, limited |
| 209 | // to values zero and higher with an additional requirement of |
| 210 | // `window_length` < `reference_window_length` + reference_window_delay`); |
| 211 | // and a clipping prediction threshold `clipping_threshold` (in dB). Adaptive |
| 212 | // clipped level step estimation is used if `adaptive_step_estimation` is |
| 213 | // true. |
| 214 | explicit ClippingPeakPredictor(int num_channels, |
| 215 | int window_length, |
| 216 | int reference_window_length, |
| 217 | int reference_window_delay, |
| 218 | int clipping_threshold, |
| 219 | bool adaptive_step_estimation) |
| 220 | : window_length_(window_length), |
| 221 | reference_window_length_(reference_window_length), |
| 222 | reference_window_delay_(reference_window_delay), |
| 223 | clipping_threshold_(clipping_threshold), |
| 224 | adaptive_step_estimation_(adaptive_step_estimation) { |
| 225 | RTC_DCHECK_GT(num_channels, 0); |
| 226 | RTC_DCHECK_GT(window_length, 0); |
| 227 | RTC_DCHECK_GT(reference_window_length, 0); |
| 228 | RTC_DCHECK_GE(reference_window_delay, 0); |
| 229 | RTC_DCHECK_GT(reference_window_length + reference_window_delay, |
| 230 | window_length); |
| 231 | const int buffer_length = GetMinFramesProcessed(); |
| 232 | RTC_DCHECK_GT(buffer_length, 0); |
| 233 | for (int i = 0; i < num_channels; ++i) { |
| 234 | ch_buffers_.push_back( |
| 235 | std::make_unique<ClippingPredictorLevelBuffer>(buffer_length)); |
| 236 | } |
| 237 | } |
| 238 | |
| 239 | ClippingPeakPredictor(const ClippingPeakPredictor&) = delete; |
| 240 | ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete; |
| 241 | ~ClippingPeakPredictor() {} |
| 242 | |
| 243 | void Reset() { |
| 244 | const int num_channels = ch_buffers_.size(); |
| 245 | for (int i = 0; i < num_channels; ++i) { |
| 246 | ch_buffers_[i]->Reset(); |
| 247 | } |
| 248 | } |
| 249 | |
| 250 | // Analyzes a frame of audio and stores the framewise metrics in |
| 251 | // `ch_buffers_`. |
| 252 | void Process(const AudioFrameView<const float>& frame) { |
| 253 | const int num_channels = frame.num_channels(); |
| 254 | RTC_DCHECK_EQ(num_channels, ch_buffers_.size()); |
| 255 | const int samples_per_channel = frame.samples_per_channel(); |
| 256 | RTC_DCHECK_GT(samples_per_channel, 0); |
| 257 | for (int channel = 0; channel < num_channels; ++channel) { |
| 258 | float sum_squares = 0.0f; |
| 259 | float peak = 0.0f; |
| 260 | for (const auto& sample : frame.channel(channel)) { |
| 261 | sum_squares += sample * sample; |
| 262 | peak = std::max(std::fabs(sample), peak); |
| 263 | } |
| 264 | ch_buffers_[channel]->Push( |
| 265 | {sum_squares / static_cast<float>(samples_per_channel), peak}); |
| 266 | } |
| 267 | } |
| 268 | |
| 269 | // Estimates the analog gain adjustment for channel `channel` using a |
| 270 | // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an |
| 271 | // estimate for the clipped level step (equal to |
| 272 | // `default_clipped_level_step_` if `adaptive_estimation_` is false) if at |
| 273 | // least `GetMinFramesProcessed()` frames have been processed since the last |
| 274 | // reset and a clipping event is predicted. `level`, `min_mic_level`, and |
| 275 | // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255]. |
| 276 | absl::optional<int> EstimateClippedLevelStep(int channel, |
| 277 | int level, |
| 278 | int default_step, |
| 279 | int min_mic_level, |
| 280 | int max_mic_level) const { |
| 281 | RTC_DCHECK_GE(channel, 0); |
| 282 | RTC_DCHECK_LT(channel, ch_buffers_.size()); |
| 283 | RTC_DCHECK_GE(level, 0); |
| 284 | RTC_DCHECK_LE(level, 255); |
| 285 | RTC_DCHECK_GT(default_step, 0); |
| 286 | RTC_DCHECK_LE(default_step, 255); |
| 287 | RTC_DCHECK_GE(min_mic_level, 0); |
| 288 | RTC_DCHECK_LE(min_mic_level, 255); |
| 289 | RTC_DCHECK_GE(max_mic_level, 0); |
| 290 | RTC_DCHECK_LE(max_mic_level, 255); |
| 291 | if (level <= min_mic_level) { |
| 292 | return absl::nullopt; |
| 293 | } |
| 294 | absl::optional<float> estimate_db = EstimatePeakValue(channel); |
| 295 | if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) { |
| 296 | int step = 0; |
| 297 | if (!adaptive_step_estimation_) { |
| 298 | step = default_step; |
| 299 | } else { |
| 300 | const int estimated_gain_change = |
| 301 | rtc::SafeClamp(-static_cast<int>(std::ceil(estimate_db.value())), |
| 302 | -kClippingPredictorMaxGainChange, 0); |
| 303 | step = |
| 304 | std::max(level - LevelFromGainError(estimated_gain_change, level, |
| 305 | min_mic_level, max_mic_level), |
| 306 | default_step); |
| 307 | } |
| 308 | const int new_level = |
| 309 | rtc::SafeClamp(level - step, min_mic_level, max_mic_level); |
| 310 | if (level > new_level) { |
| 311 | return level - new_level; |
| 312 | } |
| 313 | } |
| 314 | return absl::nullopt; |
| 315 | } |
| 316 | |
| 317 | private: |
| 318 | int GetMinFramesProcessed() { |
| 319 | return reference_window_delay_ + reference_window_length_; |
| 320 | } |
| 321 | |
| 322 | // Predicts clipping sample peaks based on the processed audio frames. |
| 323 | // Returns the estimated peak value if clipping is predicted. Otherwise |
| 324 | // returns absl::nullopt. |
| 325 | absl::optional<float> EstimatePeakValue(int channel) const { |
| 326 | const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics( |
| 327 | reference_window_delay_, reference_window_length_); |
| 328 | if (!reference_metrics.has_value()) { |
| 329 | return absl::nullopt; |
| 330 | } |
| 331 | const auto metrics = |
| 332 | ch_buffers_[channel]->ComputePartialMetrics(0, window_length_); |
| 333 | if (!metrics.has_value() || |
| 334 | !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) { |
| 335 | return absl::nullopt; |
| 336 | } |
| 337 | const float reference_crest_factor = |
| 338 | ComputeCrestFactor(reference_metrics.value()); |
| 339 | const float& mean_squares = metrics.value().average; |
| 340 | const float projected_peak = |
| 341 | reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares)); |
| 342 | return projected_peak; |
| 343 | } |
| 344 | |
| 345 | std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_; |
| 346 | const int window_length_; |
| 347 | const int reference_window_length_; |
| 348 | const int reference_window_delay_; |
| 349 | const int clipping_threshold_; |
| 350 | const bool adaptive_step_estimation_; |
| 351 | }; |
| 352 | |
| 353 | } // namespace |
| 354 | |
| 355 | std::unique_ptr<ClippingPredictor> CreateClippingEventPredictor( |
| 356 | int num_channels, |
| 357 | const ClippingPredictorConfig& config) { |
| 358 | return std::make_unique<ClippingEventPredictor>( |
| 359 | num_channels, config.window_length, config.reference_window_length, |
| 360 | config.reference_window_delay, config.clipping_threshold, |
| 361 | config.crest_factor_margin); |
| 362 | } |
| 363 | |
| 364 | std::unique_ptr<ClippingPredictor> CreateFixedStepClippingPeakPredictor( |
| 365 | int num_channels, |
| 366 | const ClippingPredictorConfig& config) { |
| 367 | return std::make_unique<ClippingPeakPredictor>( |
| 368 | num_channels, config.window_length, config.reference_window_length, |
| 369 | config.reference_window_delay, config.clipping_threshold, |
| 370 | /*adaptive_step_estimation=*/false); |
| 371 | } |
| 372 | |
| 373 | std::unique_ptr<ClippingPredictor> CreateAdaptiveStepClippingPeakPredictor( |
| 374 | int num_channels, |
| 375 | const ClippingPredictorConfig& config) { |
| 376 | return std::make_unique<ClippingPeakPredictor>( |
| 377 | num_channels, config.window_length, config.reference_window_length, |
| 378 | config.reference_window_delay, config.clipping_threshold, |
| 379 | /*adaptive_step_estimation=*/true); |
| 380 | } |
| 381 | |
| 382 | } // namespace webrtc |