Balancing the transparency in AEC3 between saturating and low echo paths
This CL balances the NLP tradeoff in AEC3 to properly handle the cases
when the echo path is so strong that it saturates the echo and when it
is so weak that the echo is very low compared to nearend.
Bug: webrtc:8411, webrtc:8412, chromium:775653
Change-Id: I5aff74dfadd51cac1ce71b1cb935d68a5be6918d
Reviewed-on: https://webrtc-review.googlesource.com/14120
Commit-Queue: Per Åhgren <peah@webrtc.org>
Reviewed-by: Per Åhgren <peah@webrtc.org>
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#20418}
diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc
index 2f91cfb..9b0f774 100644
--- a/modules/audio_processing/aec3/aec_state.cc
+++ b/modules/audio_processing/aec3/aec_state.cc
@@ -56,23 +56,29 @@
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
erle_estimator_(config.erle.min, config.erle.max_l, config.erle.max_h),
config_(config),
- reverb_decay_(config_.ep_strength.default_len) {}
+ reverb_decay_(config_.ep_strength.default_len) {
+ max_render_.fill(0.f);
+}
AecState::~AecState() = default;
void AecState::HandleEchoPathChange(
const EchoPathVariability& echo_path_variability) {
if (echo_path_variability.AudioPathChanged()) {
- blocks_since_last_saturation_ = 0;
+ blocks_since_last_saturation_ = kUnknownDelayRenderWindowSize + 1;
usable_linear_estimate_ = false;
echo_leakage_detected_ = false;
capture_signal_saturation_ = false;
echo_saturation_ = false;
- previous_max_sample_ = 0.f;
+ max_render_.fill(0.f);
if (echo_path_variability.delay_change) {
force_zero_gain_counter_ = 0;
blocks_with_filter_adaptation_ = 0;
+ blocks_with_strong_render_ = 0;
+ initial_state_ = true;
+ linear_echo_estimate_ = false;
+ sufficient_filter_updates_ = false;
render_received_ = false;
force_zero_gain_ = true;
capture_block_counter_ = 0;
@@ -124,50 +130,134 @@
// Update the echo audibility evaluator.
echo_audibility_.Update(x, s, converged_filter);
- // Detect and flag echo saturation.
- // TODO(peah): Add the delay in this computation to ensure that the render and
- // capture signals are properly aligned.
- RTC_DCHECK_LT(0, x.size());
- const float max_sample = fabs(*std::max_element(
- x.begin(), x.end(), [](float a, float b) { return a * a < b * b; }));
if (config_.ep_strength.echo_can_saturate) {
- const bool saturated_echo =
- (previous_max_sample_ > 200.f) && SaturatedCapture();
+ // Detect and flag echo saturation.
+ RTC_DCHECK_LT(0, x.size());
+ // Store the render values in a circular buffer.
+ max_render_index_ = (max_render_index_ + 1) % max_render_.size();
+ auto x_max_result = std::minmax_element(x.begin(), x.end());
+ max_render_[max_render_index_] =
+ std::max(fabs(*x_max_result.first), fabs(*x_max_result.second));
- // Counts the blocks since saturation.
- constexpr size_t kSaturationLeakageBlocks = 20;
+ bool saturated_echo = false;
+ // Check for whether a saturated frame potentially could consist of
+ // saturated echo.
+ if (SaturatedCapture()) {
+ if (converged_filter) {
+ RTC_DCHECK(filter_delay_);
+ const size_t index =
+ (max_render_index_ + max_render_.size() - *filter_delay_) %
+ max_render_.size();
+ saturated_echo = max_render_[index] > 200.f;
+ } else {
+ saturated_echo =
+ *std::max_element(max_render_.begin(), max_render_.end()) > 200.f;
+ }
+ }
+
+ // Set flag for potential presence of saturated echo
blocks_since_last_saturation_ =
saturated_echo ? 0 : blocks_since_last_saturation_ + 1;
+ if (converged_filter) {
+ echo_saturation_ =
+ blocks_since_last_saturation_ < kAdaptiveFilterLength + 1;
+ } else {
+ echo_saturation_ =
+ blocks_since_last_saturation_ < kUnknownDelayRenderWindowSize + 1;
+ }
- echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks;
+ // Set flag for whether the echo path is generally strong enough to saturate
+ // the echo.
+ if (converged_filter) {
+ // Base detection on predicted echo sample.
+ auto s_max_result = std::minmax_element(s.begin(), s.end());
+ const float s_max_abs =
+ std::max(fabs(*s_max_result.first), fabs(*s_max_result.second));
+
+ const bool saturated_echo_sample =
+ s_max_abs >= 10000.f && SaturatedCapture();
+ saturating_echo_path_counter_ = saturated_echo_sample
+ ? 10 * kNumBlocksPerSecond
+ : saturating_echo_path_counter_ - 1;
+ } else {
+ // Base detection on detected potentially echo.
+ saturating_echo_path_counter_ = saturated_echo
+ ? 10 * kNumBlocksPerSecond
+ : saturating_echo_path_counter_ - 1;
+ }
+ saturating_echo_path_counter_ = std::max(0, saturating_echo_path_counter_);
+ saturating_echo_path_ = saturating_echo_path_counter_ > 0;
} else {
echo_saturation_ = false;
+ saturating_echo_path_ = false;
+ saturating_echo_path_counter_ = 0;
}
- previous_max_sample_ = max_sample;
- // Flag whether the linear filter estimate is usable.
- usable_linear_estimate_ =
- (!echo_saturation_) && (converged_filter || SufficientFilterUpdates()) &&
- capture_block_counter_ >= 2 * kNumBlocksPerSecond && external_delay_;
-
- // After an amount of active render samples for which an echo should have been
- // detected in the capture signal if the ERL was not infinite, flag that a
- // transparent mode should be entered.
+ // Compute render energies.
const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
const bool active_render_block =
x_energy > (config_.render_levels.active_render_limit *
config_.render_levels.active_render_limit) *
kFftLengthBy2;
+ const bool strong_render_block = x_energy > 1000 * 1000 * kFftLengthBy2;
+
if (active_render_block) {
render_received_ = true;
}
+
+ // Update counters.
blocks_with_filter_adaptation_ +=
(active_render_block && (!SaturatedCapture()) ? 1 : 0);
- transparent_mode_ = !converged_filter &&
- (!render_received_ || blocks_with_filter_adaptation_ >=
- 5 * kNumBlocksPerSecond);
+ blocks_with_strong_render_ +=
+ (strong_render_block && (!SaturatedCapture()) ? 1 : 0);
+
+ // After an amount of active render samples for which an echo should have been
+ // detected in the capture signal if the ERL was not infinite, flag that a
+ // transparent mode should be entered.
+ if (SaturatingEchoPath()) {
+ transparent_mode_ = !converged_filter &&
+ (!render_received_ || blocks_with_strong_render_ >=
+ 15 * kNumBlocksPerSecond);
+ } else {
+ transparent_mode_ = !converged_filter &&
+ (!render_received_ ||
+ blocks_with_strong_render_ >= 5 * kNumBlocksPerSecond);
+ }
+
+ // Update flag for whether the adaptation is in the initial state.
+ if (SaturatingEchoPath()) {
+ initial_state_ = capture_block_counter_ < 6 * kNumBlocksPerSecond;
+ } else {
+ initial_state_ = capture_block_counter_ < 3 * kNumBlocksPerSecond;
+ }
+
+ // Detect whether the linear filter is usable.
+ if (SaturatingEchoPath()) {
+ usable_linear_estimate_ =
+ (!echo_saturation_) &&
+ (converged_filter && SufficientFilterUpdates()) &&
+ capture_block_counter_ >= 5 * kNumBlocksPerSecond && external_delay_;
+ } else {
+ usable_linear_estimate_ =
+ (!echo_saturation_) &&
+ (converged_filter || SufficientFilterUpdates()) &&
+ capture_block_counter_ >= 2 * kNumBlocksPerSecond && external_delay_;
+ }
+
+ // Flag whether the linear echo estimate should be used.
+ linear_echo_estimate_ = usable_linear_estimate_ && !TransparentMode();
+
+ // Flag whether a sufficient number of filter updates has been done for the
+ // filter to perform well.
+ if (SaturatingEchoPath()) {
+ sufficient_filter_updates_ =
+ blocks_with_filter_adaptation_ >= 2 * kEchoPathChangeConvergenceBlocks;
+ } else {
+ sufficient_filter_updates_ =
+ blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks;
+ }
// Update the room reverb estimate.
UpdateReverb(adaptive_filter_impulse_response);
diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h
index 3feddd9..9c87133 100644
--- a/modules/audio_processing/aec3/aec_state.h
+++ b/modules/audio_processing/aec3/aec_state.h
@@ -67,6 +67,9 @@
// Returns whether the echo signal is saturated.
bool SaturatedEcho() const { return echo_saturation_; }
+ // Returns whether the echo path can saturate.
+ bool SaturatingEchoPath() const { return saturating_echo_path_; }
+
// Updates the capture signal saturation.
void UpdateCaptureSaturation(bool capture_signal_saturation) {
capture_signal_saturation_ = capture_signal_saturation;
@@ -93,20 +96,14 @@
}
// Returns whether the linear filter should have been able to adapt properly.
- bool SufficientFilterUpdates() const {
- return blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks;
- }
+ bool SufficientFilterUpdates() const { return sufficient_filter_updates_; }
// Returns whether the echo subtractor can be used to determine the residual
// echo.
- bool LinearEchoEstimate() const {
- return UsableLinearEstimate() && !TransparentMode();
- }
+ bool LinearEchoEstimate() const { return linear_echo_estimate_; }
// Returns whether the AEC is in an initial state.
- bool InitialState() const {
- return capture_block_counter_ < 3 * kNumBlocksPerSecond;
- }
+ bool InitialState() const { return initial_state_; }
// Updates the aec state.
void Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
@@ -147,12 +144,14 @@
ErleEstimator erle_estimator_;
size_t capture_block_counter_ = 0;
size_t blocks_with_filter_adaptation_ = 0;
+ size_t blocks_with_strong_render_ = 0;
bool usable_linear_estimate_ = false;
bool echo_leakage_detected_ = false;
bool capture_signal_saturation_ = false;
bool echo_saturation_ = false;
bool transparent_mode_ = false;
- float previous_max_sample_ = 0.f;
+ std::array<float, kAdaptiveFilterLength> max_render_;
+ size_t max_render_index_ = 0;
bool force_zero_gain_ = false;
bool render_received_ = false;
size_t force_zero_gain_counter_ = 0;
@@ -165,6 +164,11 @@
EchoAudibility echo_audibility_;
const EchoCanceller3Config config_;
float reverb_decay_;
+ bool saturating_echo_path_ = false;
+ int saturating_echo_path_counter_ = 0;
+ bool initial_state_ = true;
+ bool linear_echo_estimate_ = false;
+ bool sufficient_filter_updates_ = false;
RTC_DISALLOW_COPY_AND_ASSIGN(AecState);
};
diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc
index 9914bc8..72c629b 100644
--- a/modules/audio_processing/aec3/echo_remover.cc
+++ b/modules/audio_processing/aec3/echo_remover.cc
@@ -189,10 +189,9 @@
cng_.Compute(aec_state_, Y2, &comfort_noise, &high_band_comfort_noise);
// A choose and apply echo suppression gain.
- suppression_gain_.GetGain(
- E2, R2, cng_.NoiseSpectrum(), render_signal_analyzer_,
- aec_state_.SaturatedEcho(), x, aec_state_.ForcedZeroGain(),
- aec_state_.LinearEchoEstimate(), &high_bands_gain, &G);
+ suppression_gain_.GetGain(E2, R2, cng_.NoiseSpectrum(),
+ render_signal_analyzer_, aec_state_, x,
+ &high_bands_gain, &G);
suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G,
high_bands_gain, y);
diff --git a/modules/audio_processing/aec3/matched_filter.cc b/modules/audio_processing/aec3/matched_filter.cc
index 5dc671e..c70e1a9 100644
--- a/modules/audio_processing/aec3/matched_filter.cc
+++ b/modules/audio_processing/aec3/matched_filter.cc
@@ -368,7 +368,7 @@
[](float a, float b) -> bool { return a * a < b * b; }));
// Update the lag estimates for the matched filter.
- const float kMatchingFilterThreshold = 0.2f;
+ const float kMatchingFilterThreshold = 0.1f;
lag_estimates_[n] = LagEstimate(
error_sum_anchor - error_sum,
(lag_estimate > 2 && lag_estimate < (filters_[n].size() - 10) &&
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc
index 7b306ba..013892e 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator.cc
@@ -108,54 +108,29 @@
R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f);
}
} else {
- const rtc::Optional<size_t> delay =
- aec_state.ExternalDelay()
- ? (aec_state.FilterDelay() ? aec_state.FilterDelay()
- : aec_state.ExternalDelay())
- : rtc::Optional<size_t>();
-
// Estimate the echo generating signal power.
std::array<float, kFftLengthBy2Plus1> X2;
- if (aec_state.ExternalDelay() && aec_state.FilterDelay()) {
- RTC_DCHECK(delay);
- const int delay_use = static_cast<int>(*delay);
-
- // Computes the spectral power over the blocks surrounding the delay.
- constexpr int kKnownDelayRenderWindowSize = 5;
- static_assert(
- kUnknownDelayRenderWindowSize >= kKnownDelayRenderWindowSize,
- "Requirement to ensure that the render buffer is overrun");
- EchoGeneratingPower(
- render_buffer, std::max(0, delay_use - 1),
- std::min(kKnownDelayRenderWindowSize - 1, delay_use + 1), &X2);
- } else {
- // Computes the spectral power over the latest blocks.
- EchoGeneratingPower(render_buffer, 0, kUnknownDelayRenderWindowSize - 1,
- &X2);
- }
+ EchoGeneratingPower(render_buffer, 0, kUnknownDelayRenderWindowSize - 1,
+ &X2);
// Subtract the stationary noise power to avoid stationary noise causing
// excessive echo suppression.
- std::transform(
- X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
- [](float a, float b) { return std::max(0.f, a - 10.f * b); });
+ if (!(aec_state.SaturatedEcho() || aec_state.SaturatingEchoPath())) {
+ std::transform(
+ X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
+ [](float a, float b) { return std::max(0.f, a - 10.f * b); });
+ }
NonLinearEstimate(
- aec_state.SufficientFilterUpdates(), aec_state.SaturatedEcho(),
+ aec_state.SufficientFilterUpdates(),
+ aec_state.SaturatedEcho() && aec_state.SaturatingEchoPath(),
config_.ep_strength.bounded_erl, aec_state.TransparentMode(),
aec_state.InitialState(), X2, Y2, R2);
-
- if (aec_state.ExternalDelay() && aec_state.FilterDelay() &&
- aec_state.SaturatedEcho()) {
- AddEchoReverb(*R2, aec_state.SaturatedEcho(),
- std::min(static_cast<size_t>(kAdaptiveFilterLength),
- delay.value_or(kAdaptiveFilterLength)),
- aec_state.ReverbDecay(), R2);
- }
}
// If the echo is deemed inaudible, set the residual echo to zero.
- if (aec_state.InaudibleEcho()) {
+ if (aec_state.InaudibleEcho() &&
+ (!(aec_state.SaturatedEcho() || aec_state.SaturatingEchoPath()))) {
R2->fill(0.f);
R2_old_.fill(0.f);
R2_hold_counter_.fill(0.f);
@@ -204,7 +179,7 @@
// Set echo path gains.
if (saturated_echo) {
// If the echo could be saturated, use a very conservative gain.
- echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 10000.f;
+ echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 1000.f;
} else if (sufficient_filter_updates && !bounded_erl) {
// If the filter should have been able to converge, and no assumption is
// possible on the ERL, use a low gain.
diff --git a/modules/audio_processing/aec3/subtractor.cc b/modules/audio_processing/aec3/subtractor.cc
index c1909f3..f1a9b80 100644
--- a/modules/audio_processing/aec3/subtractor.cc
+++ b/modules/audio_processing/aec3/subtractor.cc
@@ -59,12 +59,14 @@
void Subtractor::HandleEchoPathChange(
const EchoPathVariability& echo_path_variability) {
+ use_shadow_filter_frequency_response_ = false;
if (echo_path_variability.delay_change) {
main_filter_.HandleEchoPathChange();
shadow_filter_.HandleEchoPathChange();
G_main_.HandleEchoPathChange();
G_shadow_.HandleEchoPathChange();
converged_filter_ = false;
+ converged_filter_counter_ = 0;
}
}
@@ -91,16 +93,29 @@
shadow_filter_.Filter(render_buffer, &S);
PredictionError(fft_, S, y, &e_shadow, &E_shadow, nullptr);
- if (!converged_filter_) {
- const auto sum_of_squares = [](float a, float b) { return a + b * b; };
- const float e2_main =
- std::accumulate(e_main.begin(), e_main.end(), 0.f, sum_of_squares);
- const float e2_shadow =
- std::accumulate(e_shadow.begin(), e_shadow.end(), 0.f, sum_of_squares);
- const float y2 = std::accumulate(y.begin(), y.end(), 0.f, sum_of_squares);
+ // Determine which frequency response should be used.
+ const auto sum_of_squares = [](float a, float b) { return a + b * b; };
+ const float e2_main =
+ std::accumulate(e_main.begin(), e_main.end(), 0.f, sum_of_squares);
+ const float e2_shadow =
+ std::accumulate(e_shadow.begin(), e_shadow.end(), 0.f, sum_of_squares);
+ const float y2 = std::accumulate(y.begin(), y.end(), 0.f, sum_of_squares);
- if (y2 > kBlockSize * 50.f * 50.f) {
- converged_filter_ = (e2_main > 0.3 * y2 || e2_shadow > 0.1 * y2);
+ if (e2_main < e2_shadow && e2_main < 0.1 * y2) {
+ use_shadow_filter_frequency_response_ = false;
+ } else if (e2_shadow < e2_main && e2_shadow < 0.01 * y2) {
+ use_shadow_filter_frequency_response_ = true;
+ }
+
+ // Flag whether the filter has at some point converged.
+ // TODO(peah): Consider using a timeout for this.
+ if (!converged_filter_) {
+ if (y2 > kBlockSize * 100.f * 100.f) {
+ if (e2_main < 0.3 * y2) {
+ converged_filter_ = (++converged_filter_counter_) > 10;
+ } else {
+ converged_filter_counter_ = 0;
+ }
}
}
diff --git a/modules/audio_processing/aec3/subtractor.h b/modules/audio_processing/aec3/subtractor.h
index 680bf45..11c090f 100644
--- a/modules/audio_processing/aec3/subtractor.h
+++ b/modules/audio_processing/aec3/subtractor.h
@@ -48,6 +48,9 @@
// Returns the block-wise frequency response for the main adaptive filter.
const std::vector<std::array<float, kFftLengthBy2Plus1>>&
FilterFrequencyResponse() const {
+ if (use_shadow_filter_frequency_response_) {
+ return shadow_filter_.FilterFrequencyResponse();
+ }
return main_filter_.FilterFrequencyResponse();
}
@@ -68,7 +71,8 @@
MainFilterUpdateGain G_main_;
ShadowFilterUpdateGain G_shadow_;
bool converged_filter_ = false;
-
+ size_t converged_filter_counter_ = 0;
+ bool use_shadow_filter_frequency_response_ = false;
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(Subtractor);
};
diff --git a/modules/audio_processing/aec3/suppression_gain.cc b/modules/audio_processing/aec3/suppression_gain.cc
index 90e0ab8..df2f46a 100644
--- a/modules/audio_processing/aec3/suppression_gain.cc
+++ b/modules/audio_processing/aec3/suppression_gain.cc
@@ -126,7 +126,14 @@
float min_decreasing;
auto& param = config.gain_updates;
- if (!linear_echo_estimate) {
+ if (no_saturation_counter <= 10) {
+ max_increasing = param.saturation.max_inc;
+ max_decreasing = param.saturation.max_dec;
+ rate_increasing = param.saturation.rate_inc;
+ rate_decreasing = param.saturation.rate_dec;
+ min_increasing = param.saturation.min_inc;
+ min_decreasing = param.saturation.min_dec;
+ } else if (!linear_echo_estimate) {
max_increasing = param.nonlinear.max_inc;
max_decreasing = param.nonlinear.max_dec;
rate_increasing = param.nonlinear.rate_inc;
@@ -140,20 +147,13 @@
rate_decreasing = param.low_noise.rate_dec;
min_increasing = param.low_noise.min_inc;
min_decreasing = param.low_noise.min_dec;
- } else if (no_saturation_counter > 10) {
+ } else {
max_increasing = param.normal.max_inc;
max_decreasing = param.normal.max_dec;
rate_increasing = param.normal.rate_inc;
rate_decreasing = param.normal.rate_dec;
min_increasing = param.normal.min_inc;
min_decreasing = param.normal.min_dec;
- } else {
- max_increasing = param.saturation.max_inc;
- max_decreasing = param.saturation.max_dec;
- rate_increasing = param.saturation.rate_inc;
- rate_decreasing = param.saturation.rate_dec;
- min_increasing = param.saturation.min_inc;
- min_decreasing = param.saturation.min_dec;
}
for (size_t k = 0; k < new_gain.size(); ++k) {
@@ -176,6 +176,7 @@
const EchoCanceller3Config& config,
bool low_noise_render,
bool saturated_echo,
+ bool saturating_echo_path,
bool linear_echo_estimate,
const std::array<float, kFftLengthBy2Plus1>& nearend,
const std::array<float, kFftLengthBy2Plus1>& echo,
@@ -185,21 +186,29 @@
const std::array<float, kFftLengthBy2Plus1>& one_by_echo,
std::array<float, kFftLengthBy2Plus1>* gain) {
float nearend_masking_margin = 0.f;
- if (linear_echo_estimate) {
- nearend_masking_margin =
- low_noise_render
- ? config.gain_mask.m9
- : (saturated_echo ? config.gain_mask.m2 : config.gain_mask.m3);
+ if (saturated_echo) {
+ nearend_masking_margin = config.gain_mask.m2;
} else {
- nearend_masking_margin = config.gain_mask.m7;
+ if (linear_echo_estimate) {
+ nearend_masking_margin =
+ low_noise_render ? config.gain_mask.m9 : config.gain_mask.m3;
+ } else {
+ nearend_masking_margin = config.gain_mask.m7;
+ }
}
+
RTC_DCHECK_LE(0.f, nearend_masking_margin);
RTC_DCHECK_GT(1.f, nearend_masking_margin);
const float one_by_one_minus_nearend_masking_margin =
1.f / (1.0f - nearend_masking_margin);
- const float masker_margin =
- linear_echo_estimate ? config.gain_mask.m1 : config.gain_mask.m8;
+ float masker_margin;
+ if (saturated_echo || saturating_echo_path) {
+ masker_margin = 0.0001f;
+ } else {
+ masker_margin =
+ linear_echo_estimate ? config.gain_mask.m1 : config.gain_mask.m8;
+ }
for (size_t k = 0; k < gain->size(); ++k) {
const float unity_gain_masker = std::max(nearend[k], masker[k]);
@@ -276,6 +285,7 @@
bool low_noise_render,
const rtc::Optional<int>& narrow_peak_band,
bool saturated_echo,
+ bool saturating_echo_path,
bool linear_echo_estimate,
const std::array<float, kFftLengthBy2Plus1>& nearend,
const std::array<float, kFftLengthBy2Plus1>& echo,
@@ -296,7 +306,7 @@
const float min_echo_power =
low_noise_render ? config_.echo_audibility.low_render_limit
: config_.echo_audibility.normal_render_limit;
- if (no_saturation_counter_ > 10) {
+ if (!saturating_echo_path) {
for (size_t k = 0; k < nearend.size(); ++k) {
const float denom = std::min(nearend[k], echo[k]);
min_gain[k] = denom > 0.f ? min_echo_power / denom : 1.f;
@@ -309,10 +319,12 @@
// Compute the maximum gain by limiting the gain increase from the previous
// gain.
std::array<float, kFftLengthBy2Plus1> max_gain;
+ const float first_increase = saturated_echo || saturating_echo_path
+ ? 0.00001f
+ : config_.gain_updates.floor_first_increase;
for (size_t k = 0; k < gain->size(); ++k) {
- max_gain[k] = std::min(std::max(last_gain_[k] * gain_increase_[k],
- config_.gain_updates.floor_first_increase),
- 1.f);
+ max_gain[k] = std::min(
+ std::max(last_gain_[k] * gain_increase_[k], first_increase), 1.f);
}
// Iteratively compute the gain required to attenuate the echo to a non
@@ -321,9 +333,9 @@
for (int k = 0; k < 2; ++k) {
std::array<float, kFftLengthBy2Plus1> masker;
MaskingPower(config_, nearend, comfort_noise, last_masker_, *gain, &masker);
- GainToNoAudibleEcho(config_, low_noise_render, saturated_echo,
- linear_echo_estimate, nearend, echo, masker, min_gain,
- max_gain, one_by_echo, gain);
+ GainToNoAudibleEcho(config_, low_noise_render, no_saturation_counter_ > 10,
+ saturating_echo_path, linear_echo_estimate, nearend,
+ echo, masker, min_gain, max_gain, one_by_echo, gain);
AdjustForExternalFilters(gain);
if (narrow_peak_band) {
NarrowBandAttenuation(*narrow_peak_band, gain);
@@ -366,15 +378,18 @@
const std::array<float, kFftLengthBy2Plus1>& echo,
const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
const RenderSignalAnalyzer& render_signal_analyzer,
- bool saturated_echo,
+ const AecState& aec_state,
const std::vector<std::vector<float>>& render,
- bool force_zero_gain,
- bool linear_echo_estimate,
float* high_bands_gain,
std::array<float, kFftLengthBy2Plus1>* low_band_gain) {
RTC_DCHECK(high_bands_gain);
RTC_DCHECK(low_band_gain);
+ const bool saturated_echo = aec_state.SaturatedEcho();
+ const bool saturating_echo_path = aec_state.SaturatingEchoPath();
+ const bool force_zero_gain = aec_state.ForcedZeroGain();
+ const bool linear_echo_estimate = aec_state.LinearEchoEstimate();
+
if (force_zero_gain) {
last_gain_.fill(0.f);
std::copy(comfort_noise.begin(), comfort_noise.end(), last_masker_.begin());
@@ -390,8 +405,8 @@
const rtc::Optional<int> narrow_peak_band =
render_signal_analyzer.NarrowPeakBand();
LowerBandGain(low_noise_render, narrow_peak_band, saturated_echo,
- linear_echo_estimate, nearend, echo, comfort_noise,
- low_band_gain);
+ saturating_echo_path, linear_echo_estimate, nearend, echo,
+ comfort_noise, low_band_gain);
// Compute the gain for the upper bands.
*high_bands_gain =
diff --git a/modules/audio_processing/aec3/suppression_gain.h b/modules/audio_processing/aec3/suppression_gain.h
index 45d4ee7..6f21f71 100644
--- a/modules/audio_processing/aec3/suppression_gain.h
+++ b/modules/audio_processing/aec3/suppression_gain.h
@@ -15,6 +15,7 @@
#include <vector>
#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/aec_state.h"
#include "modules/audio_processing/aec3/render_signal_analyzer.h"
#include "modules/audio_processing/include/audio_processing.h"
#include "rtc_base/constructormagic.h"
@@ -29,10 +30,8 @@
const std::array<float, kFftLengthBy2Plus1>& echo,
const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
const RenderSignalAnalyzer& render_signal_analyzer,
- bool saturated_echo,
+ const AecState& aec_state,
const std::vector<std::vector<float>>& render,
- bool force_zero_gain,
- bool linear_echo_estimate,
float* high_bands_gain,
std::array<float, kFftLengthBy2Plus1>* low_band_gain);
@@ -40,6 +39,7 @@
void LowerBandGain(bool stationary_with_low_power,
const rtc::Optional<int>& narrow_peak_band,
bool saturated_echo,
+ bool saturating_echo_path,
bool linear_echo_estimate,
const std::array<float, kFftLengthBy2Plus1>& nearend,
const std::array<float, kFftLengthBy2Plus1>& echo,
diff --git a/modules/audio_processing/aec3/suppression_gain_unittest.cc b/modules/audio_processing/aec3/suppression_gain_unittest.cc
index f0e13af..5774079 100644
--- a/modules/audio_processing/aec3/suppression_gain_unittest.cc
+++ b/modules/audio_processing/aec3/suppression_gain_unittest.cc
@@ -10,6 +10,10 @@
#include "modules/audio_processing/aec3/suppression_gain.h"
+#include "modules/audio_processing/aec3/aec_state.h"
+#include "modules/audio_processing/aec3/render_buffer.h"
+#include "modules/audio_processing/aec3/subtractor.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
#include "system_wrappers/include/cpu_features_wrapper.h"
#include "test/gtest.h"
@@ -29,11 +33,12 @@
R2.fill(0.f);
N2.fill(0.f);
float high_bands_gain;
+ AecState aec_state(EchoCanceller3Config{});
EXPECT_DEATH(SuppressionGain(EchoCanceller3Config{}, DetectOptimization())
- .GetGain(E2, R2, N2, RenderSignalAnalyzer(), false,
+ .GetGain(E2, R2, N2, RenderSignalAnalyzer(), aec_state,
std::vector<std::vector<float>>(
3, std::vector<float>(kBlockSize, 0.f)),
- false, true, &high_bands_gain, nullptr),
+ &high_bands_gain, nullptr),
"");
}
@@ -46,17 +51,53 @@
RenderSignalAnalyzer analyzer;
float high_bands_gain;
std::array<float, kFftLengthBy2Plus1> E2;
+ std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kFftLengthBy2Plus1> R2;
std::array<float, kFftLengthBy2Plus1> N2;
std::array<float, kFftLengthBy2Plus1> g;
+ std::array<float, kBlockSize> s;
std::vector<std::vector<float>> x(1, std::vector<float>(kBlockSize, 0.f));
+ AecState aec_state(EchoCanceller3Config{});
+ ApmDataDumper data_dumper(42);
+ Subtractor subtractor(&data_dumper, DetectOptimization());
+ RenderBuffer render_buffer(
+ DetectOptimization(), 1,
+ std::max(kUnknownDelayRenderWindowSize, kAdaptiveFilterLength),
+ std::vector<size_t>(1, kAdaptiveFilterLength));
+
+ // Verify the functionality for forcing a zero gain.
+ E2.fill(1000000000.f);
+ R2.fill(10000000000000.f);
+ N2.fill(0.f);
+ s.fill(10.f);
+ aec_state.Update(subtractor.FilterFrequencyResponse(),
+ subtractor.FilterImpulseResponse(),
+ subtractor.ConvergedFilter(), rtc::Optional<size_t>(10),
+ render_buffer, E2, Y2, x[0], s, false);
+ suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, &high_bands_gain,
+ &g);
+ std::for_each(g.begin(), g.end(), [](float a) { EXPECT_FLOAT_EQ(0.f, a); });
+ EXPECT_FLOAT_EQ(0.f, high_bands_gain);
// Ensure that a strong noise is detected to mask any echoes.
E2.fill(10.f);
+ Y2.fill(10.f);
R2.fill(0.1f);
N2.fill(100.f);
- for (int k = 0; k < 10; ++k) {
- suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, false, true,
+ // Ensure that the gain is no longer forced to zero.
+ for (int k = 0; k <= kNumBlocksPerSecond / 5 + 1; ++k) {
+ aec_state.Update(subtractor.FilterFrequencyResponse(),
+ subtractor.FilterImpulseResponse(),
+ subtractor.ConvergedFilter(), rtc::Optional<size_t>(10),
+ render_buffer, E2, Y2, x[0], s, false);
+ }
+
+ for (int k = 0; k < 100; ++k) {
+ aec_state.Update(subtractor.FilterFrequencyResponse(),
+ subtractor.FilterImpulseResponse(),
+ subtractor.ConvergedFilter(), rtc::Optional<size_t>(10),
+ render_buffer, E2, Y2, x[0], s, false);
+ suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x,
&high_bands_gain, &g);
}
std::for_each(g.begin(), g.end(),
@@ -64,10 +105,15 @@
// Ensure that a strong nearend is detected to mask any echoes.
E2.fill(100.f);
+ Y2.fill(100.f);
R2.fill(0.1f);
N2.fill(0.f);
- for (int k = 0; k < 10; ++k) {
- suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, false, true,
+ for (int k = 0; k < 100; ++k) {
+ aec_state.Update(subtractor.FilterFrequencyResponse(),
+ subtractor.FilterImpulseResponse(),
+ subtractor.ConvergedFilter(), rtc::Optional<size_t>(10),
+ render_buffer, E2, Y2, x[0], s, false);
+ suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x,
&high_bands_gain, &g);
}
std::for_each(g.begin(), g.end(),
@@ -78,17 +124,12 @@
R2.fill(10000000000000.f);
N2.fill(0.f);
for (int k = 0; k < 10; ++k) {
- suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, false, true,
+ suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x,
&high_bands_gain, &g);
}
std::for_each(g.begin(), g.end(),
[](float a) { EXPECT_NEAR(0.f, a, 0.001); });
- // Verify the functionality for forcing a zero gain.
- suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, true, true,
- &high_bands_gain, &g);
- std::for_each(g.begin(), g.end(), [](float a) { EXPECT_FLOAT_EQ(0.f, a); });
- EXPECT_FLOAT_EQ(0.f, high_bands_gain);
}
} // namespace aec3
diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index 28bd7ad..dc6d9a1 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@@ -1188,7 +1188,7 @@
GainChanges low_noise = {3.f, 3.f, 1.5f, 1.5f, 1.5f, 1.5f};
GainChanges normal = {2.f, 2.f, 1.5f, 1.5f, 1.2f, 1.2f};
- GainChanges saturation = {1.2f, 1.2f, 1.5f, 1.5f, 1.f, 1.f};
+ GainChanges saturation = {1.5f, 1.5f, 1.2f, 1.2f, 1.1f, 1.1f};
GainChanges nonlinear = {1.5f, 1.5f, 1.2f, 1.2f, 1.1f, 1.1f};
float floor_first_increase = 0.0001f;