blob: 58859d837b4fd698a941de193244ebafa7526eff [file] [log] [blame]
peah522d71b2017-02-23 05:16:26 -08001/*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_processing/aec3/aec_state.h"
peah522d71b2017-02-23 05:16:26 -080012
13#include <math.h>
Raphael Kubo da Costa07438142017-10-16 17:00:02 +020014
peah522d71b2017-02-23 05:16:26 -080015#include <numeric>
16#include <vector>
17
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020018#include "api/array_view.h"
19#include "modules/audio_processing/logging/apm_data_dumper.h"
20#include "rtc_base/atomicops.h"
21#include "rtc_base/checks.h"
peah522d71b2017-02-23 05:16:26 -080022
23namespace webrtc {
24namespace {
25
peah86afe9d2017-04-06 15:45:32 -070026// Computes delay of the adaptive filter.
Per Åhgren40659c32017-10-17 12:56:21 +020027int EstimateFilterDelay(
peah522d71b2017-02-23 05:16:26 -080028 const std::vector<std::array<float, kFftLengthBy2Plus1>>&
peah86afe9d2017-04-06 15:45:32 -070029 adaptive_filter_frequency_response) {
30 const auto& H2 = adaptive_filter_frequency_response;
peah522d71b2017-02-23 05:16:26 -080031 constexpr size_t kUpperBin = kFftLengthBy2 - 5;
Per Åhgren40659c32017-10-17 12:56:21 +020032 RTC_DCHECK_GE(kAdaptiveFilterLength, H2.size());
33 std::array<int, kAdaptiveFilterLength> delays;
34 delays.fill(0);
peah522d71b2017-02-23 05:16:26 -080035 for (size_t k = 1; k < kUpperBin; ++k) {
peah86afe9d2017-04-06 15:45:32 -070036 // Find the maximum of H2[j].
Per Åhgren40659c32017-10-17 12:56:21 +020037 size_t peak = 0;
peah522d71b2017-02-23 05:16:26 -080038 for (size_t j = 0; j < H2.size(); ++j) {
39 if (H2[j][k] > H2[peak][k]) {
40 peak = j;
41 }
42 }
Per Åhgren40659c32017-10-17 12:56:21 +020043 ++delays[peak];
peah522d71b2017-02-23 05:16:26 -080044 }
peah522d71b2017-02-23 05:16:26 -080045
Per Åhgren40659c32017-10-17 12:56:21 +020046 return std::distance(delays.begin(),
47 std::max_element(delays.begin(), delays.end()));
peah522d71b2017-02-23 05:16:26 -080048}
49
peah522d71b2017-02-23 05:16:26 -080050} // namespace
51
52int AecState::instance_count_ = 0;
53
Gustaf Ullbergbd83b912017-10-18 12:32:42 +020054AecState::AecState(const EchoCanceller3Config& config)
peah522d71b2017-02-23 05:16:26 -080055 : data_dumper_(
56 new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
Gustaf Ullbergbd83b912017-10-18 12:32:42 +020057 erle_estimator_(config.erle.min, config.erle.max_l, config.erle.max_h),
peah8cee56f2017-08-24 22:36:53 -070058 config_(config),
Per Åhgren7ddd4632017-10-25 02:59:45 +020059 reverb_decay_(config_.ep_strength.default_len) {
60 max_render_.fill(0.f);
61}
peah522d71b2017-02-23 05:16:26 -080062
63AecState::~AecState() = default;
64
peah86afe9d2017-04-06 15:45:32 -070065void AecState::HandleEchoPathChange(
66 const EchoPathVariability& echo_path_variability) {
Per Åhgren8ba58612017-12-01 23:01:44 +010067 const auto full_reset = [&]() {
Per Åhgren7ddd4632017-10-25 02:59:45 +020068 blocks_since_last_saturation_ = kUnknownDelayRenderWindowSize + 1;
peah86afe9d2017-04-06 15:45:32 -070069 usable_linear_estimate_ = false;
70 echo_leakage_detected_ = false;
71 capture_signal_saturation_ = false;
72 echo_saturation_ = false;
Per Åhgren7ddd4632017-10-25 02:59:45 +020073 max_render_.fill(0.f);
Per Åhgren8ba58612017-12-01 23:01:44 +010074 force_zero_gain_counter_ = 0;
75 blocks_with_filter_adaptation_ = 0;
76 blocks_with_strong_render_ = 0;
77 initial_state_ = true;
78 capture_block_counter_ = 0;
79 linear_echo_estimate_ = false;
80 sufficient_filter_updates_ = false;
81 render_received_ = false;
82 force_zero_gain_ = true;
83 };
peah6d822ad2017-04-10 13:52:14 -070084
Per Åhgren8ba58612017-12-01 23:01:44 +010085 // TODO(peah): Refine the reset scheme according to the type of gain and
86 // delay adjustment.
87 if (echo_path_variability.gain_change) {
88 full_reset();
89 }
90
91 if (echo_path_variability.delay_change !=
92 EchoPathVariability::DelayAdjustment::kBufferReadjustment) {
93 full_reset();
94 } else if (echo_path_variability.delay_change !=
95 EchoPathVariability::DelayAdjustment::kBufferFlush) {
96 full_reset();
97
98 } else if (echo_path_variability.delay_change !=
99 EchoPathVariability::DelayAdjustment::kDelayReset) {
100 full_reset();
101 } else if (echo_path_variability.delay_change !=
102 EchoPathVariability::DelayAdjustment::kNewDetectedDelay) {
103 full_reset();
104 } else if (echo_path_variability.gain_change) {
105 capture_block_counter_ = kNumBlocksPerSecond;
peah86afe9d2017-04-06 15:45:32 -0700106 }
107}
108
peah522d71b2017-02-23 05:16:26 -0800109void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
peah86afe9d2017-04-06 15:45:32 -0700110 adaptive_filter_frequency_response,
peah29103572017-07-11 02:54:02 -0700111 const std::array<float, kAdaptiveFilterTimeDomainLength>&
112 adaptive_filter_impulse_response,
Per Åhgren1b4059e2017-10-15 20:19:21 +0200113 bool converged_filter,
peah522d71b2017-02-23 05:16:26 -0800114 const rtc::Optional<size_t>& external_delay_samples,
peah86afe9d2017-04-06 15:45:32 -0700115 const RenderBuffer& render_buffer,
peah522d71b2017-02-23 05:16:26 -0800116 const std::array<float, kFftLengthBy2Plus1>& E2_main,
peah522d71b2017-02-23 05:16:26 -0800117 const std::array<float, kFftLengthBy2Plus1>& Y2,
118 rtc::ArrayView<const float> x,
peah29103572017-07-11 02:54:02 -0700119 const std::array<float, kBlockSize>& s,
peah522d71b2017-02-23 05:16:26 -0800120 bool echo_leakage_detected) {
peah86afe9d2017-04-06 15:45:32 -0700121 // Store input parameters.
122 echo_leakage_detected_ = echo_leakage_detected;
123
124 // Update counters.
Per Åhgren1b4059e2017-10-15 20:19:21 +0200125 ++capture_block_counter_;
peah86afe9d2017-04-06 15:45:32 -0700126
peah6d822ad2017-04-10 13:52:14 -0700127 // Force zero echo suppression gain after an echo path change to allow at
128 // least some render data to be collected in order to avoid an initial echo
129 // burst.
Per Åhgren1b4059e2017-10-15 20:19:21 +0200130 force_zero_gain_ = (++force_zero_gain_counter_) < kNumBlocksPerSecond / 5;
peah6d822ad2017-04-10 13:52:14 -0700131
peah86afe9d2017-04-06 15:45:32 -0700132 // Estimate delays.
Oskar Sundbomaa8b67d2017-11-17 14:34:48 +0100133 filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response);
peah522d71b2017-02-23 05:16:26 -0800134 external_delay_ =
135 external_delay_samples
136 ? rtc::Optional<size_t>(*external_delay_samples / kBlockSize)
Oskar Sundbomaa8b67d2017-11-17 14:34:48 +0100137 : rtc::nullopt;
peah522d71b2017-02-23 05:16:26 -0800138
peah86afe9d2017-04-06 15:45:32 -0700139 // Update the ERL and ERLE measures.
Per Åhgren40659c32017-10-17 12:56:21 +0200140 if (converged_filter && capture_block_counter_ >= 2 * kNumBlocksPerSecond) {
peah86afe9d2017-04-06 15:45:32 -0700141 const auto& X2 = render_buffer.Spectrum(*filter_delay_);
peah522d71b2017-02-23 05:16:26 -0800142 erle_estimator_.Update(X2, Y2, E2_main);
143 erl_estimator_.Update(X2, Y2);
peah522d71b2017-02-23 05:16:26 -0800144 }
peah86afe9d2017-04-06 15:45:32 -0700145
Per Åhgren1b4059e2017-10-15 20:19:21 +0200146 // Update the echo audibility evaluator.
147 echo_audibility_.Update(x, s, converged_filter);
148
Per Åhgren1b4059e2017-10-15 20:19:21 +0200149
Gustaf Ullbergbd83b912017-10-18 12:32:42 +0200150 if (config_.ep_strength.echo_can_saturate) {
Per Åhgren7ddd4632017-10-25 02:59:45 +0200151 // Detect and flag echo saturation.
152 RTC_DCHECK_LT(0, x.size());
153 // Store the render values in a circular buffer.
154 max_render_index_ = (max_render_index_ + 1) % max_render_.size();
155 auto x_max_result = std::minmax_element(x.begin(), x.end());
156 max_render_[max_render_index_] =
157 std::max(fabs(*x_max_result.first), fabs(*x_max_result.second));
Per Åhgren1b4059e2017-10-15 20:19:21 +0200158
Per Åhgren7ddd4632017-10-25 02:59:45 +0200159 bool saturated_echo = false;
160 // Check for whether a saturated frame potentially could consist of
161 // saturated echo.
162 if (SaturatedCapture()) {
163 if (converged_filter) {
164 RTC_DCHECK(filter_delay_);
165 const size_t index =
166 (max_render_index_ + max_render_.size() - *filter_delay_) %
167 max_render_.size();
168 saturated_echo = max_render_[index] > 200.f;
169 } else {
170 saturated_echo =
171 *std::max_element(max_render_.begin(), max_render_.end()) > 200.f;
172 }
173 }
174
175 // Set flag for potential presence of saturated echo
Per Åhgren1b4059e2017-10-15 20:19:21 +0200176 blocks_since_last_saturation_ =
177 saturated_echo ? 0 : blocks_since_last_saturation_ + 1;
Per Åhgren7ddd4632017-10-25 02:59:45 +0200178 if (converged_filter) {
179 echo_saturation_ =
180 blocks_since_last_saturation_ < kAdaptiveFilterLength + 1;
181 } else {
182 echo_saturation_ =
183 blocks_since_last_saturation_ < kUnknownDelayRenderWindowSize + 1;
184 }
Per Åhgren1b4059e2017-10-15 20:19:21 +0200185
Per Åhgren7ddd4632017-10-25 02:59:45 +0200186 // Set flag for whether the echo path is generally strong enough to saturate
187 // the echo.
188 if (converged_filter) {
189 // Base detection on predicted echo sample.
190 auto s_max_result = std::minmax_element(s.begin(), s.end());
191 const float s_max_abs =
192 std::max(fabs(*s_max_result.first), fabs(*s_max_result.second));
193
194 const bool saturated_echo_sample =
195 s_max_abs >= 10000.f && SaturatedCapture();
196 saturating_echo_path_counter_ = saturated_echo_sample
197 ? 10 * kNumBlocksPerSecond
198 : saturating_echo_path_counter_ - 1;
199 } else {
200 // Base detection on detected potentially echo.
201 saturating_echo_path_counter_ = saturated_echo
202 ? 10 * kNumBlocksPerSecond
203 : saturating_echo_path_counter_ - 1;
204 }
205 saturating_echo_path_counter_ = std::max(0, saturating_echo_path_counter_);
206 saturating_echo_path_ = saturating_echo_path_counter_ > 0;
Per Åhgren1b4059e2017-10-15 20:19:21 +0200207 } else {
208 echo_saturation_ = false;
Per Åhgren7ddd4632017-10-25 02:59:45 +0200209 saturating_echo_path_ = false;
210 saturating_echo_path_counter_ = 0;
Per Åhgren1b4059e2017-10-15 20:19:21 +0200211 }
peah86afe9d2017-04-06 15:45:32 -0700212
Per Åhgren7ddd4632017-10-25 02:59:45 +0200213 // Compute render energies.
Per Åhgren1b4059e2017-10-15 20:19:21 +0200214 const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
215 const bool active_render_block =
Gustaf Ullbergbd83b912017-10-18 12:32:42 +0200216 x_energy > (config_.render_levels.active_render_limit *
217 config_.render_levels.active_render_limit) *
Per Åhgren1b4059e2017-10-15 20:19:21 +0200218 kFftLengthBy2;
Per Åhgren7ddd4632017-10-25 02:59:45 +0200219 const bool strong_render_block = x_energy > 1000 * 1000 * kFftLengthBy2;
220
Per Åhgren1b4059e2017-10-15 20:19:21 +0200221 if (active_render_block) {
222 render_received_ = true;
223 }
Per Åhgren7ddd4632017-10-25 02:59:45 +0200224
225 // Update counters.
Per Åhgren1b4059e2017-10-15 20:19:21 +0200226 blocks_with_filter_adaptation_ +=
227 (active_render_block && (!SaturatedCapture()) ? 1 : 0);
228
Per Åhgren7ddd4632017-10-25 02:59:45 +0200229 blocks_with_strong_render_ +=
230 (strong_render_block && (!SaturatedCapture()) ? 1 : 0);
231
232 // After an amount of active render samples for which an echo should have been
233 // detected in the capture signal if the ERL was not infinite, flag that a
234 // transparent mode should be entered.
235 if (SaturatingEchoPath()) {
236 transparent_mode_ = !converged_filter &&
237 (!render_received_ || blocks_with_strong_render_ >=
238 15 * kNumBlocksPerSecond);
239 } else {
240 transparent_mode_ = !converged_filter &&
241 (!render_received_ ||
242 blocks_with_strong_render_ >= 5 * kNumBlocksPerSecond);
243 }
244
245 // Update flag for whether the adaptation is in the initial state.
246 if (SaturatingEchoPath()) {
247 initial_state_ = capture_block_counter_ < 6 * kNumBlocksPerSecond;
248 } else {
249 initial_state_ = capture_block_counter_ < 3 * kNumBlocksPerSecond;
250 }
251
252 // Detect whether the linear filter is usable.
253 if (SaturatingEchoPath()) {
254 usable_linear_estimate_ =
255 (!echo_saturation_) &&
256 (converged_filter && SufficientFilterUpdates()) &&
257 capture_block_counter_ >= 5 * kNumBlocksPerSecond && external_delay_;
258 } else {
259 usable_linear_estimate_ =
260 (!echo_saturation_) &&
261 (converged_filter || SufficientFilterUpdates()) &&
262 capture_block_counter_ >= 2 * kNumBlocksPerSecond && external_delay_;
263 }
264
265 // Flag whether the linear echo estimate should be used.
266 linear_echo_estimate_ = usable_linear_estimate_ && !TransparentMode();
267
268 // Flag whether a sufficient number of filter updates has been done for the
269 // filter to perform well.
270 if (SaturatingEchoPath()) {
271 sufficient_filter_updates_ =
272 blocks_with_filter_adaptation_ >= 2 * kEchoPathChangeConvergenceBlocks;
273 } else {
274 sufficient_filter_updates_ =
275 blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks;
276 }
peah29103572017-07-11 02:54:02 -0700277
278 // Update the room reverb estimate.
279 UpdateReverb(adaptive_filter_impulse_response);
280}
281
282void AecState::UpdateReverb(
283 const std::array<float, kAdaptiveFilterTimeDomainLength>&
284 impulse_response) {
285 if ((!(filter_delay_ && usable_linear_estimate_)) ||
286 (*filter_delay_ > kAdaptiveFilterLength - 4)) {
287 return;
288 }
289
290 // Form the data to match against by squaring the impulse response
291 // coefficients.
292 std::array<float, kAdaptiveFilterTimeDomainLength> matching_data;
293 std::transform(impulse_response.begin(), impulse_response.end(),
294 matching_data.begin(), [](float a) { return a * a; });
295
296 // Avoid matching against noise in the model by subtracting an estimate of the
297 // model noise power.
298 constexpr size_t kTailLength = 64;
299 constexpr size_t tail_index = kAdaptiveFilterTimeDomainLength - kTailLength;
300 const float tail_power = *std::max_element(matching_data.begin() + tail_index,
301 matching_data.end());
302 std::for_each(matching_data.begin(), matching_data.begin() + tail_index,
303 [tail_power](float& a) { a = std::max(0.f, a - tail_power); });
304
305 // Identify the peak index of the impulse response.
306 const size_t peak_index = *std::max_element(
307 matching_data.begin(), matching_data.begin() + tail_index);
308
309 if (peak_index + 128 < tail_index) {
310 size_t start_index = peak_index + 64;
311 // Compute the matching residual error for the current candidate to match.
312 float residual_sqr_sum = 0.f;
313 float d_k = reverb_decay_to_test_;
314 for (size_t k = start_index; k < tail_index; ++k) {
315 if (matching_data[start_index + 1] == 0.f) {
316 break;
317 }
318
319 float residual = matching_data[k] - matching_data[peak_index] * d_k;
320 residual_sqr_sum += residual * residual;
321 d_k *= reverb_decay_to_test_;
322 }
323
324 // If needed, update the best candidate for the reverb decay.
325 if (reverb_decay_candidate_residual_ < 0.f ||
326 residual_sqr_sum < reverb_decay_candidate_residual_) {
327 reverb_decay_candidate_residual_ = residual_sqr_sum;
328 reverb_decay_candidate_ = reverb_decay_to_test_;
329 }
330 }
331
332 // Compute the next reverb candidate to evaluate such that all candidates will
333 // be evaluated within one second.
334 reverb_decay_to_test_ += (0.9965f - 0.9f) / (5 * kNumBlocksPerSecond);
335
336 // If all reverb candidates have been evaluated, choose the best one as the
337 // reverb decay.
338 if (reverb_decay_to_test_ >= 0.9965f) {
339 if (reverb_decay_candidate_residual_ < 0.f) {
340 // Transform the decay to be in the unit of blocks.
341 reverb_decay_ = powf(reverb_decay_candidate_, kFftLengthBy2);
342
343 // Limit the estimated reverb_decay_ to the maximum one needed in practice
344 // to minimize the impact of incorrect estimates.
Gustaf Ullbergbd83b912017-10-18 12:32:42 +0200345 reverb_decay_ = std::min(config_.ep_strength.default_len, reverb_decay_);
peah29103572017-07-11 02:54:02 -0700346 }
347 reverb_decay_to_test_ = 0.9f;
348 reverb_decay_candidate_residual_ = -1.f;
349 }
350
351 // For noisy impulse responses, assume a fixed tail length.
352 if (tail_power > 0.0005f) {
Gustaf Ullbergbd83b912017-10-18 12:32:42 +0200353 reverb_decay_ = config_.ep_strength.default_len;
peah29103572017-07-11 02:54:02 -0700354 }
355 data_dumper_->DumpRaw("aec3_reverb_decay", reverb_decay_);
356 data_dumper_->DumpRaw("aec3_tail_power", tail_power);
357}
358
359void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x,
Per Åhgren1b4059e2017-10-15 20:19:21 +0200360 const std::array<float, kBlockSize>& s,
361 bool converged_filter) {
peah29103572017-07-11 02:54:02 -0700362 auto result_x = std::minmax_element(x.begin(), x.end());
363 auto result_s = std::minmax_element(s.begin(), s.end());
364 const float x_abs =
Raphael Kubo da Costa07438142017-10-16 17:00:02 +0200365 std::max(fabsf(*result_x.first), fabsf(*result_x.second));
peah29103572017-07-11 02:54:02 -0700366 const float s_abs =
Raphael Kubo da Costa07438142017-10-16 17:00:02 +0200367 std::max(fabsf(*result_s.first), fabsf(*result_s.second));
peah29103572017-07-11 02:54:02 -0700368
Per Åhgren1b4059e2017-10-15 20:19:21 +0200369 if (converged_filter) {
370 if (x_abs < 20.f) {
371 ++low_farend_counter_;
372 } else {
373 low_farend_counter_ = 0;
374 }
peah29103572017-07-11 02:54:02 -0700375 } else {
Per Åhgren1b4059e2017-10-15 20:19:21 +0200376 if (x_abs < 100.f) {
377 ++low_farend_counter_;
378 } else {
379 low_farend_counter_ = 0;
380 }
peah29103572017-07-11 02:54:02 -0700381 }
382
383 // The echo is deemed as not audible if the echo estimate is on the level of
384 // the quantization noise in the FFTs and the nearend level is sufficiently
385 // strong to mask that by ensuring that the playout and AGC gains do not boost
386 // any residual echo that is below the quantization noise level. Furthermore,
387 // cases where the render signal is very close to zero are also identified as
388 // not producing audible echo.
Per Åhgren1b4059e2017-10-15 20:19:21 +0200389 inaudible_echo_ = (max_nearend_ > 500 && s_abs < 30.f) ||
390 (!converged_filter && x_abs < 500);
peah29103572017-07-11 02:54:02 -0700391 inaudible_echo_ = inaudible_echo_ || low_farend_counter_ > 20;
392}
393
394void AecState::EchoAudibility::UpdateWithOutput(rtc::ArrayView<const float> e) {
395 const float e_max = *std::max_element(e.begin(), e.end());
396 const float e_min = *std::min_element(e.begin(), e.end());
Raphael Kubo da Costa07438142017-10-16 17:00:02 +0200397 const float e_abs = std::max(fabsf(e_max), fabsf(e_min));
peah29103572017-07-11 02:54:02 -0700398
399 if (max_nearend_ < e_abs) {
400 max_nearend_ = e_abs;
401 max_nearend_counter_ = 0;
402 } else {
403 if (++max_nearend_counter_ > 5 * kNumBlocksPerSecond) {
404 max_nearend_ *= 0.995f;
405 }
406 }
peah522d71b2017-02-23 05:16:26 -0800407}
408
409} // namespace webrtc