Transparency improvements for AEC3 during call start and after resets
This CL changes the AEC3 behavior to be more transparent when there
is uncertainty about the amount of echo in the microphone signal.
Bug: webrtc:8398, chromium:774868
Change-Id: I88e681f8decd892f44397b753df371a1c4b90af0
Reviewed-on: https://webrtc-review.googlesource.com/10801
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Commit-Queue: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#20319}
diff --git a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
index 9ff3c09..40f64fd 100644
--- a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
+++ b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
@@ -365,8 +365,9 @@
filter.Adapt(render_buffer, G);
aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
aec_state.Update(filter.FilterFrequencyResponse(),
- filter.FilterImpulseResponse(), rtc::Optional<size_t>(),
- render_buffer, E2_main, Y2, x[0], s, false);
+ filter.FilterImpulseResponse(), true,
+ rtc::Optional<size_t>(), render_buffer, E2_main, Y2,
+ x[0], s, false);
}
// Verify that the filter is able to perform well.
EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f),
diff --git a/modules/audio_processing/aec3/aec3_common.h b/modules/audio_processing/aec3/aec3_common.h
index 2442f90..031e9b1 100644
--- a/modules/audio_processing/aec3/aec3_common.h
+++ b/modules/audio_processing/aec3/aec3_common.h
@@ -39,7 +39,7 @@
constexpr size_t kFftLength = 2 * kFftLengthBy2;
constexpr int kAdaptiveFilterLength = 12;
-constexpr int kResidualEchoPowerRenderWindowSize = 30;
+constexpr int kUnknownDelayRenderWindowSize = 30;
constexpr int kAdaptiveFilterTimeDomainLength =
kAdaptiveFilterLength * kFftLengthBy2;
@@ -69,6 +69,8 @@
static_assert(2 * kRenderTransferQueueSize >= kMaxApiCallsJitterBlocks,
"Requirement to ensure buffer overflow detection");
+constexpr size_t kEchoPathChangeConvergenceBlocks = 2 * kNumBlocksPerSecond;
+
// TODO(peah): Integrate this with how it is done inside audio_processing_impl.
constexpr size_t NumBandsForRate(int sample_rate_hz) {
return static_cast<size_t>(sample_rate_hz == 8000 ? 1
diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc
index 6ea54fc..01a5fc5 100644
--- a/modules/audio_processing/aec3/aec_state.cc
+++ b/modules/audio_processing/aec3/aec_state.cc
@@ -68,9 +68,6 @@
return rtc::Optional<size_t>();
}
-constexpr int kEchoPathChangeCounterInitial = kNumBlocksPerSecond / 5;
-constexpr int kEchoPathChangeCounterMax = 2 * kNumBlocksPerSecond;
-
} // namespace
int AecState::instance_count_ = 0;
@@ -81,7 +78,6 @@
erle_estimator_(config.param.erle.min,
config.param.erle.max_l,
config.param.erle.max_h),
- echo_path_change_counter_(kEchoPathChangeCounterInitial),
config_(config),
reverb_decay_(config_.param.ep_strength.default_len) {}
@@ -102,10 +98,10 @@
blocks_with_filter_adaptation_ = 0;
render_received_ = false;
force_zero_gain_ = true;
- echo_path_change_counter_ = kEchoPathChangeCounterMax;
+ capture_block_counter_ = 0;
}
if (echo_path_variability.gain_change) {
- echo_path_change_counter_ = kEchoPathChangeCounterInitial;
+ capture_block_counter_ = kNumBlocksPerSecond;
}
}
}
@@ -114,6 +110,7 @@
adaptive_filter_frequency_response,
const std::array<float, kAdaptiveFilterTimeDomainLength>&
adaptive_filter_impulse_response,
+ bool converged_filter,
const rtc::Optional<size_t>& external_delay_samples,
const RenderBuffer& render_buffer,
const std::array<float, kFftLengthBy2Plus1>& E2_main,
@@ -121,31 +118,16 @@
rtc::ArrayView<const float> x,
const std::array<float, kBlockSize>& s,
bool echo_leakage_detected) {
- // Update the echo audibility evaluator.
- echo_audibility_.Update(x, s);
-
// Store input parameters.
echo_leakage_detected_ = echo_leakage_detected;
// Update counters.
- const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
-
- const bool active_render_block =
- x_energy > (config_.param.render_levels.active_render_limit *
- config_.param.render_levels.active_render_limit) *
- kFftLengthBy2;
- if (active_render_block) {
- render_received_ = true;
- }
- blocks_with_filter_adaptation_ +=
- (active_render_block && (!SaturatedCapture()) ? 1 : 0);
- --echo_path_change_counter_;
+ ++capture_block_counter_;
// Force zero echo suppression gain after an echo path change to allow at
// least some render data to be collected in order to avoid an initial echo
// burst.
- constexpr size_t kZeroGainBlocksAfterChange = kNumBlocksPerSecond / 5;
- force_zero_gain_ = (++force_zero_gain_counter_) < kZeroGainBlocksAfterChange;
+ force_zero_gain_ = (++force_zero_gain_counter_) < kNumBlocksPerSecond / 5;
// Estimate delays.
filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response);
@@ -155,43 +137,60 @@
: rtc::Optional<size_t>();
// Update the ERL and ERLE measures.
- if (filter_delay_ && echo_path_change_counter_ <= 0) {
+ if (filter_delay_ && capture_block_counter_ >= 2 * kNumBlocksPerSecond) {
const auto& X2 = render_buffer.Spectrum(*filter_delay_);
erle_estimator_.Update(X2, Y2, E2_main);
erl_estimator_.Update(X2, Y2);
}
+ // Update the echo audibility evaluator.
+ echo_audibility_.Update(x, s, converged_filter);
+
// Detect and flag echo saturation.
// TODO(peah): Add the delay in this computation to ensure that the render and
// capture signals are properly aligned.
RTC_DCHECK_LT(0, x.size());
const float max_sample = fabs(*std::max_element(
x.begin(), x.end(), [](float a, float b) { return a * a < b * b; }));
- const bool saturated_echo =
- previous_max_sample_ * 100 > 1600 && SaturatedCapture();
+
+ if (config_.param.ep_strength.echo_can_saturate) {
+ const bool saturated_echo =
+ (previous_max_sample_ > 200.f) && SaturatedCapture();
+
+ // Counts the blocks since saturation.
+ constexpr size_t kSaturationLeakageBlocks = 20;
+ blocks_since_last_saturation_ =
+ saturated_echo ? 0 : blocks_since_last_saturation_ + 1;
+
+ echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks;
+ } else {
+ echo_saturation_ = false;
+ }
previous_max_sample_ = max_sample;
- // Counts the blocks since saturation.
- constexpr size_t kSaturationLeakageBlocks = 20;
- blocks_since_last_saturation_ =
- saturated_echo ? 0 : blocks_since_last_saturation_ + 1;
- echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks;
-
// Flag whether the linear filter estimate is usable.
- constexpr size_t kEchoPathChangeConvergenceBlocks = 2 * kNumBlocksPerSecond;
usable_linear_estimate_ =
- (!echo_saturation_) &&
- (!render_received_ ||
- blocks_with_filter_adaptation_ > kEchoPathChangeConvergenceBlocks) &&
- filter_delay_ && echo_path_change_counter_ <= 0 && external_delay_;
+ (!echo_saturation_) && (converged_filter || SufficientFilterUpdates()) &&
+ filter_delay_ && capture_block_counter_ >= 2 * kNumBlocksPerSecond &&
+ external_delay_;
// After an amount of active render samples for which an echo should have been
// detected in the capture signal if the ERL was not infinite, flag that a
- // headset is used.
- constexpr size_t kHeadSetDetectionBlocks = 5 * kNumBlocksPerSecond;
- headset_detected_ = !external_delay_ && !filter_delay_ &&
+ // transparent mode should be entered.
+ const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
+ const bool active_render_block =
+ x_energy > (config_.param.render_levels.active_render_limit *
+ config_.param.render_levels.active_render_limit) *
+ kFftLengthBy2;
+ if (active_render_block) {
+ render_received_ = true;
+ }
+ blocks_with_filter_adaptation_ +=
+ (active_render_block && (!SaturatedCapture()) ? 1 : 0);
+
+ transparent_mode_ = !converged_filter &&
(!render_received_ || blocks_with_filter_adaptation_ >=
- kHeadSetDetectionBlocks);
+ 5 * kNumBlocksPerSecond);
// Update the room reverb estimate.
UpdateReverb(adaptive_filter_impulse_response);
@@ -276,7 +275,8 @@
}
void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x,
- const std::array<float, kBlockSize>& s) {
+ const std::array<float, kBlockSize>& s,
+ bool converged_filter) {
auto result_x = std::minmax_element(x.begin(), x.end());
auto result_s = std::minmax_element(s.begin(), s.end());
const float x_abs =
@@ -284,10 +284,18 @@
const float s_abs =
std::max(std::abs(*result_s.first), std::abs(*result_s.second));
- if (x_abs < 5.f) {
- ++low_farend_counter_;
+ if (converged_filter) {
+ if (x_abs < 20.f) {
+ ++low_farend_counter_;
+ } else {
+ low_farend_counter_ = 0;
+ }
} else {
- low_farend_counter_ = 0;
+ if (x_abs < 100.f) {
+ ++low_farend_counter_;
+ } else {
+ low_farend_counter_ = 0;
+ }
}
// The echo is deemed as not audible if the echo estimate is on the level of
@@ -296,7 +304,8 @@
// any residual echo that is below the quantization noise level. Furthermore,
// cases where the render signal is very close to zero are also identified as
// not producing audible echo.
- inaudible_echo_ = max_nearend_ > 500 && s_abs < 30.f;
+ inaudible_echo_ = (max_nearend_ > 500 && s_abs < 30.f) ||
+ (!converged_filter && x_abs < 500);
inaudible_echo_ = inaudible_echo_ || low_farend_counter_ > 20;
}
diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h
index 53899e5..358c74d 100644
--- a/modules/audio_processing/aec3/aec_state.h
+++ b/modules/audio_processing/aec3/aec_state.h
@@ -72,8 +72,8 @@
capture_signal_saturation_ = capture_signal_saturation;
}
- // Returns whether a probable headset setup has been detected.
- bool HeadsetDetected() const { return headset_detected_; }
+ // Returns whether the transparent mode is active
+ bool TransparentMode() const { return transparent_mode_; }
// Takes appropriate action at an echo path change.
void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
@@ -92,10 +92,20 @@
echo_audibility_.UpdateWithOutput(e);
}
+ // Returns whether the linear filter should have been able to adapt properly.
+ bool SufficientFilterUpdates() const {
+ return blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks;
+ }
+
// Returns whether the echo subtractor can be used to determine the residual
// echo.
bool LinearEchoEstimate() const {
- return UsableLinearEstimate() && !HeadsetDetected();
+ return UsableLinearEstimate() && !TransparentMode();
+ }
+
+ // Returns whether the AEC is in an initial state.
+ bool InitialState() const {
+ return capture_block_counter_ < 3 * kNumBlocksPerSecond;
}
// Updates the aec state.
@@ -103,6 +113,7 @@
adaptive_filter_frequency_response,
const std::array<float, kAdaptiveFilterTimeDomainLength>&
adaptive_filter_impulse_response,
+ bool converged_filter,
const rtc::Optional<size_t>& external_delay_samples,
const RenderBuffer& render_buffer,
const std::array<float, kFftLengthBy2Plus1>& E2_main,
@@ -115,7 +126,8 @@
class EchoAudibility {
public:
void Update(rtc::ArrayView<const float> x,
- const std::array<float, kBlockSize>& s);
+ const std::array<float, kBlockSize>& s,
+ bool converged_filter);
void UpdateWithOutput(rtc::ArrayView<const float> e);
bool InaudibleEcho() const { return inaudible_echo_; }
@@ -133,13 +145,13 @@
std::unique_ptr<ApmDataDumper> data_dumper_;
ErlEstimator erl_estimator_;
ErleEstimator erle_estimator_;
- int echo_path_change_counter_;
+ size_t capture_block_counter_ = 0;
size_t blocks_with_filter_adaptation_ = 0;
bool usable_linear_estimate_ = false;
bool echo_leakage_detected_ = false;
bool capture_signal_saturation_ = false;
bool echo_saturation_ = false;
- bool headset_detected_ = false;
+ bool transparent_mode_ = false;
float previous_max_sample_ = 0.f;
bool force_zero_gain_ = false;
bool render_received_ = false;
diff --git a/modules/audio_processing/aec3/aec_state_unittest.cc b/modules/audio_processing/aec3/aec_state_unittest.cc
index 8413413..9a84ef6 100644
--- a/modules/audio_processing/aec3/aec_state_unittest.cc
+++ b/modules/audio_processing/aec3/aec_state_unittest.cc
@@ -43,7 +43,7 @@
// Verify that linear AEC usability is false when the filter is diverged and
// there is no external delay reported.
- state.Update(diverged_filter_frequency_response, impulse_response,
+ state.Update(diverged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x[0], s,
false);
EXPECT_FALSE(state.UsableLinearEstimate());
@@ -51,7 +51,7 @@
// Verify that linear AEC usability is true when the filter is converged
std::fill(x[0].begin(), x[0].end(), 101.f);
for (int k = 0; k < 3000; ++k) {
- state.Update(converged_filter_frequency_response, impulse_response,
+ state.Update(converged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
false);
}
@@ -60,7 +60,7 @@
// Verify that linear AEC usability becomes false after an echo path change is
// reported
state.HandleEchoPathChange(EchoPathVariability(true, false));
- state.Update(converged_filter_frequency_response, impulse_response,
+ state.Update(converged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
false);
EXPECT_FALSE(state.UsableLinearEstimate());
@@ -68,25 +68,25 @@
// Verify that the active render detection works as intended.
std::fill(x[0].begin(), x[0].end(), 101.f);
state.HandleEchoPathChange(EchoPathVariability(true, true));
- state.Update(converged_filter_frequency_response, impulse_response,
+ state.Update(converged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
false);
EXPECT_FALSE(state.ActiveRender());
for (int k = 0; k < 1000; ++k) {
- state.Update(converged_filter_frequency_response, impulse_response,
+ state.Update(converged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
false);
}
EXPECT_TRUE(state.ActiveRender());
// Verify that echo leakage is properly reported.
- state.Update(converged_filter_frequency_response, impulse_response,
+ state.Update(converged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
false);
EXPECT_FALSE(state.EchoLeakageDetected());
- state.Update(converged_filter_frequency_response, impulse_response,
+ state.Update(converged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
true);
EXPECT_TRUE(state.EchoLeakageDetected());
@@ -103,7 +103,7 @@
Y2.fill(10.f * 10000.f * 10000.f);
for (size_t k = 0; k < 1000; ++k) {
- state.Update(converged_filter_frequency_response, impulse_response,
+ state.Update(converged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
false);
}
@@ -120,7 +120,7 @@
E2_main.fill(1.f * 10000.f * 10000.f);
Y2.fill(10.f * E2_main[0]);
for (size_t k = 0; k < 1000; ++k) {
- state.Update(converged_filter_frequency_response, impulse_response,
+ state.Update(converged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
false);
}
@@ -141,7 +141,7 @@
E2_main.fill(1.f * 10000.f * 10000.f);
Y2.fill(5.f * E2_main[0]);
for (size_t k = 0; k < 1000; ++k) {
- state.Update(converged_filter_frequency_response, impulse_response,
+ state.Update(converged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
false);
}
@@ -184,8 +184,9 @@
// Verify that a non-significant filter delay is identified correctly.
state.HandleEchoPathChange(echo_path_variability);
- state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(),
- render_buffer, E2_main, Y2, x, s, false);
+ state.Update(frequency_response, impulse_response, true,
+ rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x, s,
+ false);
EXPECT_FALSE(state.FilterDelay());
}
@@ -217,8 +218,9 @@
frequency_response[k].fill(100.f);
frequency_response[k][0] = 0.f;
state.HandleEchoPathChange(echo_path_variability);
- state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(),
- render_buffer, E2_main, Y2, x, s, false);
+ state.Update(frequency_response, impulse_response, true,
+ rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x, s,
+ false);
EXPECT_TRUE(k == (kFilterLength - 1) || state.FilterDelay());
if (k != (kFilterLength - 1)) {
EXPECT_EQ(k, state.FilterDelay());
@@ -251,7 +253,7 @@
for (size_t k = 0; k < frequency_response.size() - 1; ++k) {
state.HandleEchoPathChange(EchoPathVariability(false, false));
- state.Update(frequency_response, impulse_response,
+ state.Update(frequency_response, impulse_response, true,
rtc::Optional<size_t>(k * kBlockSize + 5), render_buffer,
E2_main, Y2, x, s, false);
EXPECT_TRUE(state.ExternalDelay());
@@ -261,8 +263,9 @@
// Verify that the externally reported delay is properly unset when it is no
// longer present.
state.HandleEchoPathChange(EchoPathVariability(false, false));
- state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(),
- render_buffer, E2_main, Y2, x, s, false);
+ state.Update(frequency_response, impulse_response, true,
+ rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x, s,
+ false);
EXPECT_FALSE(state.ExternalDelay());
}
diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc
index cb7e05b..341c89a 100644
--- a/modules/audio_processing/aec3/echo_remover.cc
+++ b/modules/audio_processing/aec3/echo_remover.cc
@@ -172,11 +172,12 @@
// Update the AEC state information.
aec_state_.Update(subtractor_.FilterFrequencyResponse(),
subtractor_.FilterImpulseResponse(),
- echo_path_delay_samples, render_buffer, E2_main, Y2, x0,
- subtractor_output.s_main, echo_leakage_detected_);
+ subtractor_.ConvergedFilter(), echo_path_delay_samples,
+ render_buffer, E2_main, Y2, x0, subtractor_output.s_main,
+ echo_leakage_detected_);
// Choose the linear output.
- output_selector_.FormLinearOutput(!aec_state_.HeadsetDetected(), e_main, y0);
+ output_selector_.FormLinearOutput(!aec_state_.TransparentMode(), e_main, y0);
data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0],
LowestBandRate(sample_rate_hz_), 1);
data_dumper_->DumpRaw("aec3_output_linear", y0);
diff --git a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
index 6d0423f..e3f968c 100644
--- a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
+++ b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
@@ -135,7 +135,7 @@
// Update the delay.
aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
aec_state.Update(main_filter.FilterFrequencyResponse(),
- main_filter.FilterImpulseResponse(),
+ main_filter.FilterImpulseResponse(), true,
rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x[0],
s, false);
}
diff --git a/modules/audio_processing/aec3/render_delay_buffer.cc b/modules/audio_processing/aec3/render_delay_buffer.cc
index 785b837..e173aa1 100644
--- a/modules/audio_processing/aec3/render_delay_buffer.cc
+++ b/modules/audio_processing/aec3/render_delay_buffer.cc
@@ -106,7 +106,7 @@
fft_buffer_(
optimization_,
num_bands,
- std::max(kResidualEchoPowerRenderWindowSize, kAdaptiveFilterLength),
+ std::max(kUnknownDelayRenderWindowSize, kAdaptiveFilterLength),
std::vector<size_t>(1, kAdaptiveFilterLength)),
api_call_jitter_buffer_(num_bands),
zero_block_(num_bands, std::vector<float>(kBlockSize, 0.f)) {
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc
index c5b0161..04251a4 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator.cc
@@ -74,9 +74,6 @@
}
}
-// Assume a minimum echo path gain of -33 dB for headsets.
-constexpr float kHeadsetEchoPathGain = 0.0005f;
-
} // namespace
ResidualEchoEstimator::ResidualEchoEstimator(
@@ -95,24 +92,29 @@
std::array<float, kFftLengthBy2Plus1>* R2) {
RTC_DCHECK(R2);
- const rtc::Optional<size_t> delay =
- aec_state.ExternalDelay()
- ? (aec_state.FilterDelay() ? aec_state.FilterDelay()
- : aec_state.ExternalDelay())
- : rtc::Optional<size_t>();
-
// Estimate the power of the stationary noise in the render signal.
RenderNoisePower(render_buffer, &X2_noise_floor_, &X2_noise_floor_counter_);
// Estimate the residual echo power.
-
if (aec_state.LinearEchoEstimate()) {
RTC_DCHECK(aec_state.FilterDelay());
const int filter_delay = *aec_state.FilterDelay();
LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2);
AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), filter_delay,
aec_state.ReverbDecay(), R2);
+
+ // If the echo is saturated, estimate the echo power as the maximum echo
+ // power with a leakage factor.
+ if (aec_state.SaturatedEcho()) {
+ R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f);
+ }
} else {
+ const rtc::Optional<size_t> delay =
+ aec_state.ExternalDelay()
+ ? (aec_state.FilterDelay() ? aec_state.FilterDelay()
+ : aec_state.ExternalDelay())
+ : rtc::Optional<size_t>();
+
// Estimate the echo generating signal power.
std::array<float, kFftLengthBy2Plus1> X2;
if (aec_state.ExternalDelay() && aec_state.FilterDelay()) {
@@ -120,14 +122,17 @@
const int delay_use = static_cast<int>(*delay);
// Computes the spectral power over the blocks surrounding the delay.
- RTC_DCHECK_LT(delay_use, kResidualEchoPowerRenderWindowSize);
+ constexpr int kKnownDelayRenderWindowSize = 5;
+ static_assert(
+ kUnknownDelayRenderWindowSize >= kKnownDelayRenderWindowSize,
+ "Requirement to ensure that the render buffer is overrun");
EchoGeneratingPower(
render_buffer, std::max(0, delay_use - 1),
- std::min(kResidualEchoPowerRenderWindowSize - 1, delay_use + 1), &X2);
+ std::min(kKnownDelayRenderWindowSize - 1, delay_use + 1), &X2);
} else {
// Computes the spectral power over the latest blocks.
- EchoGeneratingPower(render_buffer, 0,
- kResidualEchoPowerRenderWindowSize - 1, &X2);
+ EchoGeneratingPower(render_buffer, 0, kUnknownDelayRenderWindowSize - 1,
+ &X2);
}
// Subtract the stationary noise power to avoid stationary noise causing
@@ -136,23 +141,25 @@
X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
[](float a, float b) { return std::max(0.f, a - 10.f * b); });
- NonLinearEstimate(aec_state.HeadsetDetected(), X2, Y2, R2);
- AddEchoReverb(*R2, aec_state.SaturatedEcho(),
- std::min(static_cast<size_t>(kAdaptiveFilterLength),
- delay.value_or(kAdaptiveFilterLength)),
- aec_state.ReverbDecay(), R2);
+ NonLinearEstimate(
+ aec_state.SufficientFilterUpdates(), aec_state.SaturatedEcho(),
+ config_.param.ep_strength.bounded_erl, aec_state.TransparentMode(),
+ aec_state.InitialState(), X2, Y2, R2);
+
+ if (aec_state.ExternalDelay() && aec_state.FilterDelay() &&
+ aec_state.SaturatedEcho()) {
+ AddEchoReverb(*R2, aec_state.SaturatedEcho(),
+ std::min(static_cast<size_t>(kAdaptiveFilterLength),
+ delay.value_or(kAdaptiveFilterLength)),
+ aec_state.ReverbDecay(), R2);
+ }
}
// If the echo is deemed inaudible, set the residual echo to zero.
- if (aec_state.InaudibleEcho() &&
- (aec_state.ExternalDelay() || aec_state.HeadsetDetected())) {
+ if (aec_state.InaudibleEcho()) {
R2->fill(0.f);
- }
-
- // If the echo is saturated, estimate the echo power as the maximum echo power
- // with a leakage factor.
- if (aec_state.SaturatedEcho()) {
- R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f);
+ R2_old_.fill(0.f);
+ R2_hold_counter_.fill(0.f);
}
std::copy(R2->begin(), R2->end(), R2_old_.begin());
@@ -183,17 +190,39 @@
}
void ResidualEchoEstimator::NonLinearEstimate(
- bool headset_detected,
+ bool sufficient_filter_updates,
+ bool saturated_echo,
+ bool bounded_erl,
+ bool transparent_mode,
+ bool initial_state,
const std::array<float, kFftLengthBy2Plus1>& X2,
const std::array<float, kFftLengthBy2Plus1>& Y2,
std::array<float, kFftLengthBy2Plus1>* R2) {
- // Choose gains.
- const float echo_path_gain_lf =
- headset_detected ? kHeadsetEchoPathGain : config_.param.ep_strength.lf;
- const float echo_path_gain_mf =
- headset_detected ? kHeadsetEchoPathGain : config_.param.ep_strength.mf;
- const float echo_path_gain_hf =
- headset_detected ? kHeadsetEchoPathGain : config_.param.ep_strength.hf;
+ float echo_path_gain_lf;
+ float echo_path_gain_mf;
+ float echo_path_gain_hf;
+
+ // Set echo path gains.
+ if (saturated_echo) {
+ // If the echo could be saturated, use a very conservative gain.
+ echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 10000.f;
+ } else if (sufficient_filter_updates && !bounded_erl) {
+ // If the filter should have been able to converge, and no assumption is
+ // possible on the ERL, use a low gain.
+ echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.01f;
+ } else if ((sufficient_filter_updates && bounded_erl) || transparent_mode) {
+ // If the filter should have been able to converge, and and it is known that
+ // the ERL is bounded, use a very low gain.
+ echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.001f;
+ } else if (!initial_state) {
+ // If the AEC is no longer in an initial state, assume a weak echo path.
+ echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.01f;
+ } else {
+ // In the initial state, use conservative gains.
+ echo_path_gain_lf = config_.param.ep_strength.lf;
+ echo_path_gain_mf = config_.param.ep_strength.mf;
+ echo_path_gain_hf = config_.param.ep_strength.hf;
+ }
// Compute preliminary residual echo.
std::transform(
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.h b/modules/audio_processing/aec3/residual_echo_estimator.h
index d766f12..d2eada3 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.h
+++ b/modules/audio_processing/aec3/residual_echo_estimator.h
@@ -49,7 +49,11 @@
// Estimates the residual echo power based on the estimate of the echo path
// gain.
- void NonLinearEstimate(bool headset_detected,
+ void NonLinearEstimate(bool sufficient_filter_updates,
+ bool saturated_echo,
+ bool bounded_erl,
+ bool transparent_mode,
+ bool initial_state,
const std::array<float, kFftLengthBy2Plus1>& X2,
const std::array<float, kFftLengthBy2Plus1>& Y2,
std::array<float, kFftLengthBy2Plus1>* R2);
diff --git a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
index 46b726d..a44a548 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
@@ -83,8 +83,8 @@
render_buffer.Insert(x);
aec_state.HandleEchoPathChange(echo_path_variability);
- aec_state.Update(H2, h, rtc::Optional<size_t>(2), render_buffer, E2_main,
- Y2, x[0], s, false);
+ aec_state.Update(H2, h, true, rtc::Optional<size_t>(2), render_buffer,
+ E2_main, Y2, x[0], s, false);
estimator.Estimate(aec_state, render_buffer, S2_linear, Y2, &R2);
}
diff --git a/modules/audio_processing/aec3/subtractor.cc b/modules/audio_processing/aec3/subtractor.cc
index c64e5a4..c1909f3 100644
--- a/modules/audio_processing/aec3/subtractor.cc
+++ b/modules/audio_processing/aec3/subtractor.cc
@@ -11,6 +11,7 @@
#include "modules/audio_processing/aec3/subtractor.h"
#include <algorithm>
+#include <numeric>
#include "api/array_view.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
@@ -63,6 +64,7 @@
shadow_filter_.HandleEchoPathChange();
G_main_.HandleEchoPathChange();
G_shadow_.HandleEchoPathChange();
+ converged_filter_ = false;
}
}
@@ -89,6 +91,19 @@
shadow_filter_.Filter(render_buffer, &S);
PredictionError(fft_, S, y, &e_shadow, &E_shadow, nullptr);
+ if (!converged_filter_) {
+ const auto sum_of_squares = [](float a, float b) { return a + b * b; };
+ const float e2_main =
+ std::accumulate(e_main.begin(), e_main.end(), 0.f, sum_of_squares);
+ const float e2_shadow =
+ std::accumulate(e_shadow.begin(), e_shadow.end(), 0.f, sum_of_squares);
+ const float y2 = std::accumulate(y.begin(), y.end(), 0.f, sum_of_squares);
+
+ if (y2 > kBlockSize * 50.f * 50.f) {
+ converged_filter_ = (e2_main > 0.3 * y2 || e2_shadow > 0.1 * y2);
+ }
+ }
+
// Compute spectra for future use.
E_main.Spectrum(optimization_, &output->E2_main);
E_shadow.Spectrum(optimization_, &output->E2_shadow);
diff --git a/modules/audio_processing/aec3/subtractor.h b/modules/audio_processing/aec3/subtractor.h
index e761554..680bf45 100644
--- a/modules/audio_processing/aec3/subtractor.h
+++ b/modules/audio_processing/aec3/subtractor.h
@@ -57,6 +57,8 @@
return main_filter_.FilterImpulseResponse();
}
+ bool ConvergedFilter() const { return converged_filter_; }
+
private:
const Aec3Fft fft_;
ApmDataDumper* data_dumper_;
@@ -65,6 +67,7 @@
AdaptiveFirFilter shadow_filter_;
MainFilterUpdateGain G_main_;
ShadowFilterUpdateGain G_shadow_;
+ bool converged_filter_ = false;
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(Subtractor);
};
diff --git a/modules/audio_processing/aec3/subtractor_unittest.cc b/modules/audio_processing/aec3/subtractor_unittest.cc
index ea28c4e..0450b6c 100644
--- a/modules/audio_processing/aec3/subtractor_unittest.cc
+++ b/modules/audio_processing/aec3/subtractor_unittest.cc
@@ -69,6 +69,7 @@
aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
aec_state.Update(subtractor.FilterFrequencyResponse(),
subtractor.FilterImpulseResponse(),
+ subtractor.ConvergedFilter(),
rtc::Optional<size_t>(delay_samples / kBlockSize),
render_buffer, E2_main, Y2, x[0], output.s_main, false);
}
diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index 380e630..7276712 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@@ -285,9 +285,11 @@
struct EpStrength {
float lf = 10.f;
- float mf = 100.f;
- float hf = 200.f;
+ float mf = 10.f;
+ float hf = 10.f;
float default_len = 0.f;
+ bool echo_can_saturate = true;
+ bool bounded_erl = false;
} ep_strength;
struct Mask {
@@ -305,7 +307,6 @@
struct EchoAudibility {
float low_render_limit = 4 * 64.f;
float normal_render_limit = 64.f;
- float active_render_limit = 100.f;
} echo_audibility;
struct RenderLevels {