AEC3: Add multichannel configuration and multichannel detection
The features have two safety fallbacks:
- multichannel config has a killswitch WebRTC-Aec3SetupSpecificDefaultConfigDefaultsKillSwitch
- stereo detection has a killswitch WebRTC-Aec3StereoContentDetectionKillSwitch
Both features are enabled by default in the AEC3 config.
Tested: Bitexact on a large number of aecdumps.
Bug: chromium:1295710
Change-Id: I340cdc9140dacd4ca22d0911eb9f732b6cf8b226
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/258129
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Commit-Queue: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#36482}
diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h
index 1fd4036..efbc5b7 100644
--- a/api/audio/echo_canceller3_config.h
+++ b/api/audio/echo_canceller3_config.h
@@ -236,6 +236,11 @@
float floor_first_increase = 0.00001f;
bool conservative_hf_suppression = false;
} suppressor;
+
+ struct MultiChannel {
+ bool detect_stereo_content = true;
+ float stereo_detection_threshold = 0.0f;
+ } multi_channel;
};
} // namespace webrtc
diff --git a/api/audio/echo_canceller3_config_json.cc b/api/audio/echo_canceller3_config_json.cc
index 71966c1..aa490b0 100644
--- a/api/audio/echo_canceller3_config_json.cc
+++ b/api/audio/echo_canceller3_config_json.cc
@@ -415,6 +415,13 @@
ReadParam(section, "conservative_hf_suppression",
&cfg.suppressor.conservative_hf_suppression);
}
+
+ if (rtc::GetValueFromJsonObject(aec3_root, "multi_channel", §ion)) {
+ ReadParam(section, "detect_stereo_content",
+ &cfg.multi_channel.detect_stereo_content);
+ ReadParam(section, "stereo_detection_threshold",
+ &cfg.multi_channel.stereo_detection_threshold);
+ }
}
EchoCanceller3Config Aec3ConfigFromJsonString(absl::string_view json_string) {
@@ -574,7 +581,8 @@
ost << "\"erle_onset_compensation_in_dominant_nearend\": "
<< (config.ep_strength.erle_onset_compensation_in_dominant_nearend
? "true"
- : "false") << ",";
+ : "false")
+ << ",";
ost << "\"use_conservative_tail_frequency_response\": "
<< (config.ep_strength.use_conservative_tail_frequency_response
? "true"
@@ -736,7 +744,15 @@
<< ",";
ost << "\"conservative_hf_suppression\": "
<< config.suppressor.conservative_hf_suppression;
+ ost << "},";
+
+ ost << "\"multi_channel\": {";
+ ost << "\"detect_stereo_content\": "
+ << (config.multi_channel.detect_stereo_content ? "true" : "false") << ",";
+ ost << "\"stereo_detection_threshold\": "
+ << config.multi_channel.stereo_detection_threshold;
ost << "}";
+
ost << "}";
ost << "}";
diff --git a/api/audio/echo_canceller3_factory.cc b/api/audio/echo_canceller3_factory.cc
index d65a726..284b117 100644
--- a/api/audio/echo_canceller3_factory.cc
+++ b/api/audio/echo_canceller3_factory.cc
@@ -25,7 +25,8 @@
int num_render_channels,
int num_capture_channels) {
return std::make_unique<EchoCanceller3>(
- config_, sample_rate_hz, num_render_channels, num_capture_channels);
+ config_, /*multichannel_config=*/absl::nullopt, sample_rate_hz,
+ num_render_channels, num_capture_channels);
}
} // namespace webrtc
diff --git a/api/audio/test/echo_canceller3_config_json_unittest.cc b/api/audio/test/echo_canceller3_config_json_unittest.cc
index bb28b4f..5610aeb 100644
--- a/api/audio/test/echo_canceller3_config_json_unittest.cc
+++ b/api/audio/test/echo_canceller3_config_json_unittest.cc
@@ -31,6 +31,10 @@
cfg.suppressor.subband_nearend_detection.subband1 = {4, 5};
cfg.suppressor.subband_nearend_detection.nearend_threshold = 2.f;
cfg.suppressor.subband_nearend_detection.snr_threshold = 100.f;
+ cfg.multi_channel.detect_stereo_content =
+ !cfg.multi_channel.detect_stereo_content;
+ cfg.multi_channel.stereo_detection_threshold =
+ cfg.multi_channel.stereo_detection_threshold + 1.0f;
std::string json_string = Aec3ConfigToJsonString(cfg);
EchoCanceller3Config cfg_transformed = Aec3ConfigFromJsonString(json_string);
@@ -75,5 +79,9 @@
cfg_transformed.suppressor.subband_nearend_detection.nearend_threshold);
EXPECT_EQ(cfg.suppressor.subband_nearend_detection.snr_threshold,
cfg_transformed.suppressor.subband_nearend_detection.snr_threshold);
+ EXPECT_EQ(cfg.multi_channel.detect_stereo_content,
+ cfg_transformed.multi_channel.detect_stereo_content);
+ EXPECT_EQ(cfg.multi_channel.stereo_detection_threshold,
+ cfg_transformed.multi_channel.stereo_detection_threshold);
}
} // namespace webrtc
diff --git a/modules/audio_processing/aec3/BUILD.gn b/modules/audio_processing/aec3/BUILD.gn
index 128e8f3..928afef 100644
--- a/modules/audio_processing/aec3/BUILD.gn
+++ b/modules/audio_processing/aec3/BUILD.gn
@@ -37,6 +37,8 @@
"coarse_filter_update_gain.h",
"comfort_noise_generator.cc",
"comfort_noise_generator.h",
+ "config_selector.cc",
+ "config_selector.h",
"decimator.cc",
"decimator.h",
"delay_estimate.h",
@@ -72,6 +74,8 @@
"matched_filter_lag_aggregator.h",
"moving_average.cc",
"moving_average.h",
+ "multi_channel_content_detector.cc",
+ "multi_channel_content_detector.h",
"nearend_detector.h",
"refined_filter_update_gain.cc",
"refined_filter_update_gain.h",
@@ -338,6 +342,7 @@
"clockdrift_detector_unittest.cc",
"coarse_filter_update_gain_unittest.cc",
"comfort_noise_generator_unittest.cc",
+ "config_selector_unittest.cc",
"decimator_unittest.cc",
"echo_canceller3_unittest.cc",
"echo_path_delay_estimator_unittest.cc",
@@ -352,6 +357,7 @@
"matched_filter_lag_aggregator_unittest.cc",
"matched_filter_unittest.cc",
"moving_average_unittest.cc",
+ "multi_channel_content_detector_unittest.cc",
"refined_filter_update_gain_unittest.cc",
"render_buffer_unittest.cc",
"render_delay_buffer_unittest.cc",
diff --git a/modules/audio_processing/aec3/config_selector.cc b/modules/audio_processing/aec3/config_selector.cc
new file mode 100644
index 0000000..9a37da6
--- /dev/null
+++ b/modules/audio_processing/aec3/config_selector.cc
@@ -0,0 +1,69 @@
+
+/*
+ * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/config_selector.h"
+
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+namespace {
+
+// Validates that the mono and the multichannel configs have compatible fields.
+bool CompatibleConfigs(const EchoCanceller3Config& mono_config,
+ const EchoCanceller3Config& multichannel_config) {
+ if (mono_config.delay.fixed_capture_delay_samples !=
+ multichannel_config.delay.fixed_capture_delay_samples) {
+ return false;
+ }
+
+ if (mono_config.filter.export_linear_aec_output !=
+ multichannel_config.filter.export_linear_aec_output) {
+ return false;
+ }
+
+ if (mono_config.filter.high_pass_filter_echo_reference !=
+ multichannel_config.filter.high_pass_filter_echo_reference) {
+ return false;
+ }
+
+ if (mono_config.multi_channel.detect_stereo_content !=
+ multichannel_config.multi_channel.detect_stereo_content) {
+ return false;
+ }
+ return true;
+}
+
+} // namespace
+
+ConfigSelector::ConfigSelector(
+ const EchoCanceller3Config& config,
+ const absl::optional<EchoCanceller3Config>& multichannel_config,
+ int num_render_input_channels)
+ : config_(config), multichannel_config_(multichannel_config) {
+ if (multichannel_config_.has_value()) {
+ RTC_DCHECK(CompatibleConfigs(config_, *multichannel_config_));
+ }
+
+ Update(!config_.multi_channel.detect_stereo_content &&
+ num_render_input_channels > 1);
+
+ RTC_DCHECK(active_config_);
+}
+
+void ConfigSelector::Update(bool multichannel_content) {
+ if (multichannel_content && multichannel_config_.has_value()) {
+ active_config_ = &(*multichannel_config_);
+ } else {
+ active_config_ = &config_;
+ }
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/aec3/config_selector.h b/modules/audio_processing/aec3/config_selector.h
new file mode 100644
index 0000000..3b3f94e
--- /dev/null
+++ b/modules/audio_processing/aec3/config_selector.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_
+
+#include "absl/types/optional.h"
+#include "api/audio/echo_canceller3_config.h"
+
+namespace webrtc {
+
+// Selects the config to use.
+class ConfigSelector {
+ public:
+ ConfigSelector(
+ const EchoCanceller3Config& config,
+ const absl::optional<EchoCanceller3Config>& multichannel_config,
+ int num_render_input_channels);
+
+ // Updates the config selection based on the detection of multichannel
+ // content.
+ void Update(bool multichannel_content);
+
+ const EchoCanceller3Config& active_config() const { return *active_config_; }
+
+ private:
+ const EchoCanceller3Config config_;
+ const absl::optional<EchoCanceller3Config> multichannel_config_;
+ const EchoCanceller3Config* active_config_ = nullptr;
+};
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_
diff --git a/modules/audio_processing/aec3/config_selector_unittest.cc b/modules/audio_processing/aec3/config_selector_unittest.cc
new file mode 100644
index 0000000..1826bfc
--- /dev/null
+++ b/modules/audio_processing/aec3/config_selector_unittest.cc
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/config_selector.h"
+
+#include <tuple>
+
+#include "absl/types/optional.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "test/gtest.h"
+
+namespace webrtc {
+
+class ConfigSelectorChannelsAndContentDetection
+ : public ::testing::Test,
+ public ::testing::WithParamInterface<std::tuple<int, bool>> {};
+
+INSTANTIATE_TEST_SUITE_P(ConfigSelectorMultiParameters,
+ ConfigSelectorChannelsAndContentDetection,
+ ::testing::Combine(::testing::Values(1, 2, 8),
+ ::testing::Values(false, true)));
+
+class ConfigSelectorChannels : public ::testing::Test,
+ public ::testing::WithParamInterface<int> {};
+
+INSTANTIATE_TEST_SUITE_P(ConfigSelectorMultiParameters,
+ ConfigSelectorChannels,
+ ::testing::Values(1, 2, 8));
+
+TEST_P(ConfigSelectorChannelsAndContentDetection,
+ MonoConfigIsSelectedWhenNoMultiChannelConfigPresent) {
+ const auto [num_channels, detect_stereo_content] = GetParam();
+ EchoCanceller3Config config;
+ config.multi_channel.detect_stereo_content = detect_stereo_content;
+ absl::optional<EchoCanceller3Config> multichannel_config;
+
+ config.delay.default_delay = config.delay.default_delay + 1;
+ const size_t custom_delay_value_in_config = config.delay.default_delay;
+
+ ConfigSelector cs(config, multichannel_config,
+ /*num_render_input_channels=*/num_channels);
+ EXPECT_EQ(cs.active_config().delay.default_delay,
+ custom_delay_value_in_config);
+
+ cs.Update(/*multichannel_content=*/false);
+ EXPECT_EQ(cs.active_config().delay.default_delay,
+ custom_delay_value_in_config);
+
+ cs.Update(/*multichannel_content=*/true);
+ EXPECT_EQ(cs.active_config().delay.default_delay,
+ custom_delay_value_in_config);
+}
+
+TEST_P(ConfigSelectorChannelsAndContentDetection,
+ CorrectInitialConfigIsSelected) {
+ const auto [num_channels, detect_stereo_content] = GetParam();
+ EchoCanceller3Config config;
+ config.multi_channel.detect_stereo_content = detect_stereo_content;
+ absl::optional<EchoCanceller3Config> multichannel_config = config;
+
+ config.delay.default_delay += 1;
+ const size_t custom_delay_value_in_config = config.delay.default_delay;
+ multichannel_config->delay.default_delay += 2;
+ const size_t custom_delay_value_in_multichannel_config =
+ multichannel_config->delay.default_delay;
+
+ ConfigSelector cs(config, multichannel_config,
+ /*num_render_input_channels=*/num_channels);
+
+ if (num_channels == 1 || detect_stereo_content) {
+ EXPECT_EQ(cs.active_config().delay.default_delay,
+ custom_delay_value_in_config);
+ } else {
+ EXPECT_EQ(cs.active_config().delay.default_delay,
+ custom_delay_value_in_multichannel_config);
+ }
+}
+
+TEST_P(ConfigSelectorChannels, CorrectConfigUpdateBehavior) {
+ const int num_channels = GetParam();
+ EchoCanceller3Config config;
+ config.multi_channel.detect_stereo_content = true;
+ absl::optional<EchoCanceller3Config> multichannel_config = config;
+
+ config.delay.default_delay += 1;
+ const size_t custom_delay_value_in_config = config.delay.default_delay;
+ multichannel_config->delay.default_delay += 2;
+ const size_t custom_delay_value_in_multichannel_config =
+ multichannel_config->delay.default_delay;
+
+ ConfigSelector cs(config, multichannel_config,
+ /*num_render_input_channels=*/num_channels);
+
+ cs.Update(/*multichannel_content=*/false);
+ EXPECT_EQ(cs.active_config().delay.default_delay,
+ custom_delay_value_in_config);
+
+ if (num_channels == 1) {
+ cs.Update(/*multichannel_content=*/false);
+ EXPECT_EQ(cs.active_config().delay.default_delay,
+ custom_delay_value_in_config);
+ } else {
+ cs.Update(/*multichannel_content=*/true);
+ EXPECT_EQ(cs.active_config().delay.default_delay,
+ custom_delay_value_in_multichannel_config);
+ }
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/aec3/echo_canceller3.cc b/modules/audio_processing/aec3/echo_canceller3.cc
index fcc941b..ef58314 100644
--- a/modules/audio_processing/aec3/echo_canceller3.cc
+++ b/modules/audio_processing/aec3/echo_canceller3.cc
@@ -101,13 +101,24 @@
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
RTC_DCHECK_GE(1, sub_frame_index);
RTC_DCHECK_EQ(frame->size(), sub_frame_view->size());
- RTC_DCHECK_EQ((*frame)[0].size(), (*sub_frame_view)[0].size());
- for (size_t band = 0; band < frame->size(); ++band) {
- for (size_t channel = 0; channel < (*frame)[band].size(); ++channel) {
- (*sub_frame_view)[band][channel] = rtc::ArrayView<float>(
- &(*frame)[band][channel][sub_frame_index * kSubFrameLength],
+ if ((*frame)[0].size() > (*sub_frame_view)[0].size()) {
+ RTC_DCHECK_EQ((*sub_frame_view)[0].size(), 1);
+ // Downmix the audio to mono (should only be done when the audio contains
+ // fake-stereo or fake-multichannel).
+ for (size_t band = 0; band < frame->size(); ++band) {
+ (*sub_frame_view)[band][/*channel=*/0] = rtc::ArrayView<float>(
+ &(*frame)[band][/*channel=*/0][sub_frame_index * kSubFrameLength],
kSubFrameLength);
}
+ } else {
+ RTC_DCHECK_EQ((*frame)[0].size(), (*sub_frame_view)[0].size());
+ for (size_t band = 0; band < frame->size(); ++band) {
+ for (size_t channel = 0; channel < (*frame)[band].size(); ++channel) {
+ (*sub_frame_view)[band][channel] = rtc::ArrayView<float>(
+ &(*frame)[band][channel][sub_frame_index * kSubFrameLength],
+ kSubFrameLength);
+ }
+ }
}
}
@@ -221,6 +232,10 @@
EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
EchoCanceller3Config adjusted_cfg = config;
+ if (field_trial::IsEnabled("WebRTC-Aec3StereoContentDetectionKillSwitch")) {
+ adjusted_cfg.multi_channel.detect_stereo_content = false;
+ }
+
if (field_trial::IsEnabled("WebRTC-Aec3AntiHowlingMinimizationKillSwitch")) {
adjusted_cfg.suppressor.high_bands_suppression
.anti_howling_activation_threshold = 25.f;
@@ -667,68 +682,71 @@
int EchoCanceller3::instance_count_ = 0;
-EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
- int sample_rate_hz,
- size_t num_render_channels,
- size_t num_capture_channels)
+EchoCanceller3::EchoCanceller3(
+ const EchoCanceller3Config& config,
+ const absl::optional<EchoCanceller3Config>& multichannel_config,
+ int sample_rate_hz,
+ size_t num_render_channels,
+ size_t num_capture_channels)
: data_dumper_(
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
config_(AdjustConfig(config)),
sample_rate_hz_(sample_rate_hz),
num_bands_(NumBandsForRate(sample_rate_hz_)),
- num_render_channels_(num_render_channels),
+ num_render_input_channels_(num_render_channels),
num_capture_channels_(num_capture_channels),
+ config_selector_(AdjustConfig(config),
+ multichannel_config,
+ num_render_input_channels_),
+ multichannel_content_detector_(
+ config_selector_.active_config().multi_channel.detect_stereo_content,
+ num_render_input_channels_,
+ config_selector_.active_config()
+ .multi_channel.stereo_detection_threshold),
output_framer_(num_bands_, num_capture_channels_),
capture_blocker_(num_bands_, num_capture_channels_),
- render_blocker_(num_bands_, num_render_channels_),
render_transfer_queue_(
kRenderTransferQueueSizeFrames,
std::vector<std::vector<std::vector<float>>>(
num_bands_,
std::vector<std::vector<float>>(
- num_render_channels_,
+ num_render_input_channels_,
std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
Aec3RenderQueueItemVerifier(num_bands_,
- num_render_channels_,
+ num_render_input_channels_,
AudioBuffer::kSplitBandSize)),
render_queue_output_frame_(
num_bands_,
std::vector<std::vector<float>>(
- num_render_channels_,
+ num_render_input_channels_,
std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
render_block_(
num_bands_,
- std::vector<std::vector<float>>(num_render_channels_,
+ std::vector<std::vector<float>>(num_render_input_channels_,
std::vector<float>(kBlockSize, 0.f))),
capture_block_(
num_bands_,
std::vector<std::vector<float>>(num_capture_channels_,
std::vector<float>(kBlockSize, 0.f))),
- render_sub_frame_view_(
- num_bands_,
- std::vector<rtc::ArrayView<float>>(num_render_channels_)),
capture_sub_frame_view_(
num_bands_,
std::vector<rtc::ArrayView<float>>(num_capture_channels_)) {
RTC_DCHECK(ValidFullBandRate(sample_rate_hz_));
- block_processor_.reset(BlockProcessor::Create(
- config_, sample_rate_hz_, num_render_channels_, num_capture_channels_));
-
- if (config_.delay.fixed_capture_delay_samples > 0) {
+ if (config_selector_.active_config().delay.fixed_capture_delay_samples > 0) {
block_delay_buffer_.reset(new BlockDelayBuffer(
num_capture_channels_, num_bands_, AudioBuffer::kSplitBandSize,
config_.delay.fixed_capture_delay_samples));
}
- render_writer_.reset(new RenderWriter(data_dumper_.get(), config_,
- &render_transfer_queue_, num_bands_,
- num_render_channels_));
+ render_writer_.reset(new RenderWriter(
+ data_dumper_.get(), config_selector_.active_config(),
+ &render_transfer_queue_, num_bands_, num_render_input_channels_));
RTC_DCHECK_EQ(num_bands_, std::max(sample_rate_hz_, 16000) / 16000);
RTC_DCHECK_GE(kMaxNumBands, num_bands_);
- if (config_.filter.export_linear_aec_output) {
+ if (config_selector_.active_config().filter.export_linear_aec_output) {
linear_output_framer_.reset(new BlockFramer(1, num_capture_channels_));
linear_output_block_ =
std::make_unique<std::vector<std::vector<std::vector<float>>>>(
@@ -739,17 +757,49 @@
1, std::vector<rtc::ArrayView<float>>(num_capture_channels_));
}
+ Initialize();
+
RTC_LOG(LS_INFO) << "AEC3 created with sample rate: " << sample_rate_hz_
- << " Hz, num render channels: " << num_render_channels_
+ << " Hz, num render channels: " << num_render_input_channels_
<< ", num capture channels: " << num_capture_channels_;
}
EchoCanceller3::~EchoCanceller3() = default;
+void EchoCanceller3::Initialize() {
+ RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
+
+ num_render_channels_to_aec_ =
+ multichannel_content_detector_.IsMultiChannelContentDetected()
+ ? num_render_input_channels_
+ : 1;
+
+ config_selector_.Update(
+ multichannel_content_detector_.IsMultiChannelContentDetected());
+
+ for (std::vector<std::vector<float>>& block_band : render_block_) {
+ block_band.resize(num_render_channels_to_aec_);
+ for (std::vector<float>& block_channel : block_band) {
+ block_channel.resize(kBlockSize, 0.0f);
+ }
+ }
+
+ render_blocker_.reset(
+ new FrameBlocker(num_bands_, num_render_channels_to_aec_));
+
+ block_processor_.reset(BlockProcessor::Create(
+ config_selector_.active_config(), sample_rate_hz_,
+ num_render_channels_to_aec_, num_capture_channels_));
+
+ render_sub_frame_view_ = std::vector<std::vector<rtc::ArrayView<float>>>(
+ num_bands_,
+ std::vector<rtc::ArrayView<float>>(num_render_channels_to_aec_));
+}
+
void EchoCanceller3::AnalyzeRender(const AudioBuffer& render) {
RTC_DCHECK_RUNS_SERIALIZED(&render_race_checker_);
- RTC_DCHECK_EQ(render.num_channels(), num_render_channels_);
+ RTC_DCHECK_EQ(render.num_channels(), num_render_input_channels_);
data_dumper_->DumpRaw("aec3_call_order",
static_cast<int>(EchoCanceller3ApiCall::kRender));
@@ -797,7 +847,7 @@
api_call_metrics_.ReportCaptureCall();
// Optionally delay the capture signal.
- if (config_.delay.fixed_capture_delay_samples > 0) {
+ if (config_selector_.active_config().delay.fixed_capture_delay_samples > 0) {
RTC_DCHECK(block_delay_buffer_);
block_delay_buffer_->DelaySignal(capture);
}
@@ -853,22 +903,18 @@
return true;
}
-EchoCanceller3Config EchoCanceller3::CreateDefaultConfig(
- size_t num_render_channels,
- size_t num_capture_channels) {
+EchoCanceller3Config EchoCanceller3::CreateDefaultMultichannelConfig() {
EchoCanceller3Config cfg;
- if (num_render_channels > 1) {
- // Use shorter and more rapidly adapting coarse filter to compensate for
- // thge increased number of total filter parameters to adapt.
- cfg.filter.coarse.length_blocks = 11;
- cfg.filter.coarse.rate = 0.95f;
- cfg.filter.coarse_initial.length_blocks = 11;
- cfg.filter.coarse_initial.rate = 0.95f;
+ // Use shorter and more rapidly adapting coarse filter to compensate for
+ // thge increased number of total filter parameters to adapt.
+ cfg.filter.coarse.length_blocks = 11;
+ cfg.filter.coarse.rate = 0.95f;
+ cfg.filter.coarse_initial.length_blocks = 11;
+ cfg.filter.coarse_initial.rate = 0.95f;
- // Use more concervative suppressor behavior for non-nearend speech.
- cfg.suppressor.normal_tuning.max_dec_factor_lf = 0.35f;
- cfg.suppressor.normal_tuning.max_inc_factor = 1.5f;
- }
+ // Use more concervative suppressor behavior for non-nearend speech.
+ cfg.suppressor.normal_tuning.max_dec_factor_lf = 0.35f;
+ cfg.suppressor.normal_tuning.max_inc_factor = 1.5f;
return cfg;
}
@@ -887,16 +933,23 @@
// Report render call in the metrics.
api_call_metrics_.ReportRenderCall();
- BufferRenderFrameContent(&render_queue_output_frame_, 0, &render_blocker_,
- block_processor_.get(), &render_block_,
- &render_sub_frame_view_);
+ if (multichannel_content_detector_.UpdateDetection(
+ render_queue_output_frame_)) {
+ // Reinitialize the AEC when proper stereo is detected.
+ Initialize();
+ }
- BufferRenderFrameContent(&render_queue_output_frame_, 1, &render_blocker_,
- block_processor_.get(), &render_block_,
- &render_sub_frame_view_);
+ // Buffer frame content.
+ BufferRenderFrameContent(&render_queue_output_frame_, 0,
+ render_blocker_.get(), block_processor_.get(),
+ &render_block_, &render_sub_frame_view_);
- BufferRemainingRenderFrameContent(&render_blocker_, block_processor_.get(),
- &render_block_);
+ BufferRenderFrameContent(&render_queue_output_frame_, 1,
+ render_blocker_.get(), block_processor_.get(),
+ &render_block_, &render_sub_frame_view_);
+
+ BufferRemainingRenderFrameContent(render_blocker_.get(),
+ block_processor_.get(), &render_block_);
frame_to_buffer =
render_transfer_queue_.Remove(&render_queue_output_frame_);
diff --git a/modules/audio_processing/aec3/echo_canceller3.h b/modules/audio_processing/aec3/echo_canceller3.h
index 44e0f38..ba5895f 100644
--- a/modules/audio_processing/aec3/echo_canceller3.h
+++ b/modules/audio_processing/aec3/echo_canceller3.h
@@ -16,6 +16,7 @@
#include <memory>
#include <vector>
+#include "absl/types/optional.h"
#include "api/array_view.h"
#include "api/audio/echo_canceller3_config.h"
#include "api/audio/echo_control.h"
@@ -23,7 +24,9 @@
#include "modules/audio_processing/aec3/block_delay_buffer.h"
#include "modules/audio_processing/aec3/block_framer.h"
#include "modules/audio_processing/aec3/block_processor.h"
+#include "modules/audio_processing/aec3/config_selector.h"
#include "modules/audio_processing/aec3/frame_blocker.h"
+#include "modules/audio_processing/aec3/multi_channel_content_detector.h"
#include "modules/audio_processing/audio_buffer.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
@@ -84,10 +87,12 @@
// AnalyzeRender call which can be called concurrently with the other methods.
class EchoCanceller3 : public EchoControl {
public:
- EchoCanceller3(const EchoCanceller3Config& config,
- int sample_rate_hz,
- size_t num_render_channels,
- size_t num_capture_channels);
+ EchoCanceller3(
+ const EchoCanceller3Config& config,
+ const absl::optional<EchoCanceller3Config>& multichannel_config,
+ int sample_rate_hz,
+ size_t num_render_channels,
+ size_t num_capture_channels);
~EchoCanceller3() override;
@@ -130,20 +135,37 @@
block_processor_->UpdateEchoLeakageStatus(leakage_detected);
}
- // Produces a default configuration that is suitable for a certain combination
- // of render and capture channels.
- static EchoCanceller3Config CreateDefaultConfig(size_t num_render_channels,
- size_t num_capture_channels);
+ // Produces a default configuration for multichannel.
+ static EchoCanceller3Config CreateDefaultMultichannelConfig();
private:
- class RenderWriter;
friend class EchoCanceller3Tester;
- FRIEND_TEST_ALL_PREFIXES(EchoCanceller3Metrics, EchoReturnLossEnhancement);
+ FRIEND_TEST_ALL_PREFIXES(EchoCanceller3, DetectionOfProperStereo);
+ FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
+ DetectionOfProperStereoUsingThreshold);
+ FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
+ StereoContentDetectionForMonoSignals);
- // Replaces the internal block processor with a custom one for testing.
+ class RenderWriter;
+
+ // (Re-)Initializes the selected subset of the EchoCanceller3 fields, at
+ // creation as well as during reconfiguration.
+ void Initialize();
+
+ // Only for testing. Replaces the internal block processor.
void SetBlockProcessorForTesting(
std::unique_ptr<BlockProcessor> block_processor);
+ // Only for testing. Returns whether stereo processing is active.
+ bool StereoRenderProcessingActiveForTesting() const {
+ return multichannel_content_detector_.IsMultiChannelContentDetected();
+ }
+
+ // Only for testing.
+ const EchoCanceller3Config& GetActiveConfigForTesting() const {
+ return config_selector_.active_config();
+ }
+
// Empties the render SwapQueue.
void EmptyRenderQueue();
@@ -166,13 +188,17 @@
const EchoCanceller3Config config_;
const int sample_rate_hz_;
const int num_bands_;
- const size_t num_render_channels_;
+ const size_t num_render_input_channels_;
+ size_t num_render_channels_to_aec_;
const size_t num_capture_channels_;
+ ConfigSelector config_selector_;
+ MultiChannelContentDetector multichannel_content_detector_;
std::unique_ptr<BlockFramer> linear_output_framer_
RTC_GUARDED_BY(capture_race_checker_);
BlockFramer output_framer_ RTC_GUARDED_BY(capture_race_checker_);
FrameBlocker capture_blocker_ RTC_GUARDED_BY(capture_race_checker_);
- FrameBlocker render_blocker_ RTC_GUARDED_BY(capture_race_checker_);
+ std::unique_ptr<FrameBlocker> render_blocker_
+ RTC_GUARDED_BY(capture_race_checker_);
SwapQueue<std::vector<std::vector<std::vector<float>>>,
Aec3RenderQueueItemVerifier>
render_transfer_queue_;
diff --git a/modules/audio_processing/aec3/echo_canceller3_unittest.cc b/modules/audio_processing/aec3/echo_canceller3_unittest.cc
index ce0fa50..81cefb6 100644
--- a/modules/audio_processing/aec3/echo_canceller3_unittest.cc
+++ b/modules/audio_processing/aec3/echo_canceller3_unittest.cc
@@ -190,6 +190,32 @@
return ss.Release();
}
+void RunAecInStereo(AudioBuffer& buffer,
+ EchoCanceller3& aec3,
+ float channel_0_value,
+ float channel_1_value) {
+ rtc::ArrayView<float> data_channel_0(&buffer.channels()[0][0],
+ buffer.num_frames());
+ std::fill(data_channel_0.begin(), data_channel_0.end(), channel_0_value);
+ rtc::ArrayView<float> data_channel_1(&buffer.channels()[1][0],
+ buffer.num_frames());
+ std::fill(data_channel_1.begin(), data_channel_1.end(), channel_1_value);
+ aec3.AnalyzeRender(&buffer);
+ aec3.AnalyzeCapture(&buffer);
+ aec3.ProcessCapture(&buffer, /*level_change=*/false);
+}
+
+void RunAecInSMono(AudioBuffer& buffer,
+ EchoCanceller3& aec3,
+ float channel_0_value) {
+ rtc::ArrayView<float> data_channel_0(&buffer.channels()[0][0],
+ buffer.num_frames());
+ std::fill(data_channel_0.begin(), data_channel_0.end(), channel_0_value);
+ aec3.AnalyzeRender(&buffer);
+ aec3.AnalyzeCapture(&buffer);
+ aec3.ProcessCapture(&buffer, /*level_change=*/false);
+}
+
} // namespace
class EchoCanceller3Tester {
@@ -220,7 +246,9 @@
// and that the processor data is properly passed to the EchoCanceller3
// output.
void RunCaptureTransportVerificationTest() {
- EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, 1, 1);
+ EchoCanceller3 aec3(EchoCanceller3Config(),
+ /*multichannel_config=*/absl::nullopt, sample_rate_hz_,
+ 1, 1);
aec3.SetBlockProcessorForTesting(
std::make_unique<CaptureTransportVerificationProcessor>(num_bands_));
@@ -244,7 +272,9 @@
// Test method for testing that the render data is properly received by the
// block processor.
void RunRenderTransportVerificationTest() {
- EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, 1, 1);
+ EchoCanceller3 aec3(EchoCanceller3Config(),
+ /*multichannel_config=*/absl::nullopt, sample_rate_hz_,
+ 1, 1);
aec3.SetBlockProcessorForTesting(
std::make_unique<RenderTransportVerificationProcessor>(num_bands_));
@@ -313,7 +343,9 @@
break;
}
- EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, 1, 1);
+ EchoCanceller3 aec3(EchoCanceller3Config(),
+ /*multichannel_config=*/absl::nullopt, sample_rate_hz_,
+ 1, 1);
aec3.SetBlockProcessorForTesting(std::move(block_processor_mock));
for (size_t frame_index = 0; frame_index < kNumFramesToProcess;
@@ -393,7 +425,9 @@
} break;
}
- EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, 1, 1);
+ EchoCanceller3 aec3(EchoCanceller3Config(),
+ /*multichannel_config=*/absl::nullopt, sample_rate_hz_,
+ 1, 1);
aec3.SetBlockProcessorForTesting(std::move(block_processor_mock));
for (size_t frame_index = 0; frame_index < kNumFramesToProcess;
@@ -479,7 +513,9 @@
} break;
}
- EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, 1, 1);
+ EchoCanceller3 aec3(EchoCanceller3Config(),
+ /*multichannel_config=*/absl::nullopt, sample_rate_hz_,
+ 1, 1);
aec3.SetBlockProcessorForTesting(std::move(block_processor_mock));
for (size_t frame_index = 0; frame_index < kNumFramesToProcess;
++frame_index) {
@@ -518,7 +554,8 @@
// capture and render API calls.
void RunRenderSwapQueueVerificationTest() {
const EchoCanceller3Config config;
- EchoCanceller3 aec3(config, sample_rate_hz_, 1, 1);
+ EchoCanceller3 aec3(config, /*multichannel_config=*/absl::nullopt,
+ sample_rate_hz_, 1, 1);
aec3.SetBlockProcessorForTesting(
std::make_unique<RenderTransportVerificationProcessor>(num_bands_));
@@ -566,7 +603,9 @@
// This test verifies that a buffer overrun in the render swapqueue is
// properly reported.
void RunRenderPipelineSwapQueueOverrunReturnValueTest() {
- EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, 1, 1);
+ EchoCanceller3 aec3(EchoCanceller3Config(),
+ /*multichannel_config=*/absl::nullopt, sample_rate_hz_,
+ 1, 1);
constexpr size_t kRenderTransferQueueSize = 30;
for (size_t k = 0; k < 2; ++k) {
@@ -591,7 +630,9 @@
// Set aec3_sample_rate_hz to be different from sample_rate_hz_ in such a
// way that the number of bands for the rates are different.
const int aec3_sample_rate_hz = sample_rate_hz_ == 48000 ? 32000 : 48000;
- EchoCanceller3 aec3(EchoCanceller3Config(), aec3_sample_rate_hz, 1, 1);
+ EchoCanceller3 aec3(EchoCanceller3Config(),
+ /*multichannel_config=*/absl::nullopt,
+ aec3_sample_rate_hz, 1, 1);
PopulateInputFrame(frame_length_, 0, &render_buffer_.channels_f()[0][0], 0);
EXPECT_DEATH(aec3.AnalyzeRender(&render_buffer_), "");
@@ -604,7 +645,9 @@
// Set aec3_sample_rate_hz to be different from sample_rate_hz_ in such a
// way that the number of bands for the rates are different.
const int aec3_sample_rate_hz = sample_rate_hz_ == 48000 ? 32000 : 48000;
- EchoCanceller3 aec3(EchoCanceller3Config(), aec3_sample_rate_hz, 1, 1);
+ EchoCanceller3 aec3(EchoCanceller3Config(),
+ /*multichannel_config=*/absl::nullopt,
+ aec3_sample_rate_hz, 1, 1);
PopulateInputFrame(frame_length_, num_bands_, 0,
&capture_buffer_.split_bands_f(0)[0], 100);
EXPECT_DEATH(aec3.ProcessCapture(&capture_buffer_, false), "");
@@ -887,6 +930,147 @@
EXPECT_FLOAT_EQ(adjusted_config.ep_strength.nearend_len, 0.8);
}
+TEST(EchoCanceller3, DetectionOfProperStereo) {
+ constexpr int kSampleRateHz = 16000;
+ constexpr int kNumChannels = 2;
+ AudioBuffer buffer(/*input_rate=*/kSampleRateHz,
+ /*input_num_channels=*/kNumChannels,
+ /*input_rate=*/kSampleRateHz,
+ /*buffer_num_channels=*/kNumChannels,
+ /*output_rate=*/kSampleRateHz,
+ /*output_num_channels=*/kNumChannels);
+
+ constexpr size_t kNumBlocksForMonoConfig = 1;
+ constexpr size_t kNumBlocksForSurroundConfig = 2;
+ EchoCanceller3Config mono_config;
+ absl::optional<EchoCanceller3Config> multichannel_config;
+
+ mono_config.multi_channel.detect_stereo_content = true;
+ mono_config.multi_channel.stereo_detection_threshold = 0.0f;
+ multichannel_config = mono_config;
+ mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig;
+ multichannel_config->filter.coarse_initial.length_blocks =
+ kNumBlocksForSurroundConfig;
+
+ EchoCanceller3 aec3(mono_config, multichannel_config,
+ /*sample_rate_hz=*/kSampleRateHz,
+ /*num_render_channels=*/kNumChannels,
+ /*num_capture_input_channels=*/kNumChannels);
+
+ EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
+ EXPECT_EQ(
+ aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
+ kNumBlocksForMonoConfig);
+
+ RunAecInStereo(buffer, aec3, 100.0f, 100.0f);
+ EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
+ EXPECT_EQ(
+ aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
+ kNumBlocksForMonoConfig);
+
+ RunAecInStereo(buffer, aec3, 100.0f, 101.0f);
+ EXPECT_TRUE(aec3.StereoRenderProcessingActiveForTesting());
+ EXPECT_EQ(
+ aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
+ kNumBlocksForSurroundConfig);
+}
+
+TEST(EchoCanceller3, DetectionOfProperStereoUsingThreshold) {
+ constexpr int kSampleRateHz = 16000;
+ constexpr int kNumChannels = 2;
+ AudioBuffer buffer(/*input_rate=*/kSampleRateHz,
+ /*input_num_channels=*/kNumChannels,
+ /*input_rate=*/kSampleRateHz,
+ /*buffer_num_channels=*/kNumChannels,
+ /*output_rate=*/kSampleRateHz,
+ /*output_num_channels=*/kNumChannels);
+
+ constexpr size_t kNumBlocksForMonoConfig = 1;
+ constexpr size_t kNumBlocksForSurroundConfig = 2;
+ EchoCanceller3Config mono_config;
+ absl::optional<EchoCanceller3Config> multichannel_config;
+
+ constexpr float kStereoDetectionThreshold = 2.0f;
+ mono_config.multi_channel.detect_stereo_content = true;
+ mono_config.multi_channel.stereo_detection_threshold =
+ kStereoDetectionThreshold;
+ multichannel_config = mono_config;
+ mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig;
+ multichannel_config->filter.coarse_initial.length_blocks =
+ kNumBlocksForSurroundConfig;
+
+ EchoCanceller3 aec3(mono_config, multichannel_config,
+ /*sample_rate_hz=*/kSampleRateHz,
+ /*num_render_channels=*/kNumChannels,
+ /*num_capture_input_channels=*/kNumChannels);
+
+ EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
+ EXPECT_EQ(
+ aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
+ kNumBlocksForMonoConfig);
+
+ RunAecInStereo(buffer, aec3, 100.0f,
+ 100.0f + kStereoDetectionThreshold - 1.0f);
+ EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
+ EXPECT_EQ(
+ aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
+ kNumBlocksForMonoConfig);
+
+ RunAecInStereo(buffer, aec3, 100.0f,
+ 100.0f + kStereoDetectionThreshold + 10.0f);
+ EXPECT_TRUE(aec3.StereoRenderProcessingActiveForTesting());
+ EXPECT_EQ(
+ aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
+ kNumBlocksForSurroundConfig);
+}
+
+TEST(EchoCanceller3, StereoContentDetectionForMonoSignals) {
+ constexpr int kSampleRateHz = 16000;
+ constexpr int kNumChannels = 2;
+ AudioBuffer buffer(/*input_rate=*/kSampleRateHz,
+ /*input_num_channels=*/kNumChannels,
+ /*input_rate=*/kSampleRateHz,
+ /*buffer_num_channels=*/kNumChannels,
+ /*output_rate=*/kSampleRateHz,
+ /*output_num_channels=*/kNumChannels);
+
+ constexpr size_t kNumBlocksForMonoConfig = 1;
+ constexpr size_t kNumBlocksForSurroundConfig = 2;
+ EchoCanceller3Config mono_config;
+ absl::optional<EchoCanceller3Config> multichannel_config;
+
+ for (bool detect_stereo_content : {false, true}) {
+ mono_config.multi_channel.detect_stereo_content = detect_stereo_content;
+ multichannel_config = mono_config;
+ mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig;
+ multichannel_config->filter.coarse_initial.length_blocks =
+ kNumBlocksForSurroundConfig;
+
+ AudioBuffer mono_buffer(/*input_rate=*/kSampleRateHz,
+ /*input_num_channels=*/1,
+ /*input_rate=*/kSampleRateHz,
+ /*buffer_num_channels=*/1,
+ /*output_rate=*/kSampleRateHz,
+ /*output_num_channels=*/1);
+
+ EchoCanceller3 aec3(mono_config, multichannel_config,
+ /*sample_rate_hz=*/kSampleRateHz,
+ /*num_render_channels=*/1,
+ /*num_capture_input_channels=*/1);
+
+ EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
+ EXPECT_EQ(
+ aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
+ kNumBlocksForMonoConfig);
+
+ RunAecInSMono(mono_buffer, aec3, 100.0f);
+ EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
+ EXPECT_EQ(
+ aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
+ kNumBlocksForMonoConfig);
+ }
+}
+
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
TEST(EchoCanceller3InputCheckDeathTest, WrongCaptureNumBandsCheckVerification) {
@@ -899,9 +1083,11 @@
// Verifiers that the verification for null input to the capture processing api
// call works.
TEST(EchoCanceller3InputCheckDeathTest, NullCaptureProcessingParameter) {
- EXPECT_DEATH(EchoCanceller3(EchoCanceller3Config(), 16000, 1, 1)
- .ProcessCapture(nullptr, false),
- "");
+ EXPECT_DEATH(
+ EchoCanceller3(EchoCanceller3Config(),
+ /*multichannel_config_=*/absl::nullopt, 16000, 1, 1)
+ .ProcessCapture(nullptr, false),
+ "");
}
// Verifies the check for correct sample rate.
@@ -909,7 +1095,10 @@
// tests on test bots has been fixed.
TEST(EchoCanceller3InputCheckDeathTest, DISABLED_WrongSampleRate) {
ApmDataDumper data_dumper(0);
- EXPECT_DEATH(EchoCanceller3(EchoCanceller3Config(), 8001, 1, 1), "");
+ EXPECT_DEATH(
+ EchoCanceller3(EchoCanceller3Config(),
+ /*multichannel_config_=*/absl::nullopt, 8001, 1, 1),
+ "");
}
#endif
diff --git a/modules/audio_processing/aec3/multi_channel_content_detector.cc b/modules/audio_processing/aec3/multi_channel_content_detector.cc
new file mode 100644
index 0000000..62b3ae0
--- /dev/null
+++ b/modules/audio_processing/aec3/multi_channel_content_detector.cc
@@ -0,0 +1,64 @@
+
+/*
+ * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/multi_channel_content_detector.h"
+
+#include <cmath>
+
+namespace webrtc {
+
+namespace {
+
+// Compares the left and right channels in the render `frame` to determine
+// whether the signal is a proper stereo signal. To allow for differences
+// introduced by hardware drivers, a threshold `detection_threshold` is used for
+// the detection.
+bool IsProperStereo(const std::vector<std::vector<std::vector<float>>>& frame,
+ float detection_threshold) {
+ if (frame[0].size() < 2) {
+ return false;
+ }
+
+ for (size_t band = 0; band < frame.size(); ++band) {
+ for (size_t k = 0; k < frame[band][0].size(); ++k) {
+ if (std::fabs(frame[band][0][k] - frame[band][1][k]) >
+ detection_threshold) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+} // namespace
+
+MultiChannelContentDetector::MultiChannelContentDetector(
+ bool detect_stereo_content,
+ int num_render_input_channels,
+ float detection_threshold)
+ : detect_stereo_content_(detect_stereo_content),
+ detection_threshold_(detection_threshold),
+ proper_multichannel_content_detected_(!detect_stereo_content &&
+ num_render_input_channels > 1) {}
+
+bool MultiChannelContentDetector::UpdateDetection(
+ const std::vector<std::vector<std::vector<float>>>& frame) {
+ bool previous_proper_multichannel_content_detected_ =
+ proper_multichannel_content_detected_;
+ if (detect_stereo_content_ && !proper_multichannel_content_detected_) {
+ proper_multichannel_content_detected_ =
+ IsProperStereo(frame, detection_threshold_);
+ }
+ return previous_proper_multichannel_content_detected_ !=
+ proper_multichannel_content_detected_;
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/aec3/multi_channel_content_detector.h b/modules/audio_processing/aec3/multi_channel_content_detector.h
new file mode 100644
index 0000000..119cd1f
--- /dev/null
+++ b/modules/audio_processing/aec3/multi_channel_content_detector.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_MULTI_CHANNEL_CONTENT_DETECTOR_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_MULTI_CHANNEL_CONTENT_DETECTOR_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+namespace webrtc {
+
+// Analyzes audio content to determine whether the contained audio is proper
+// multichannel, or only upmixed mono. To allow for differences introduced by
+// hardware drivers, a threshold `detection_threshold` is used for the
+// detection.
+class MultiChannelContentDetector {
+ public:
+ MultiChannelContentDetector(bool detect_stereo_content,
+ int num_render_input_channels,
+ float detection_threshold);
+
+ // Compares the left and right channels in the render `frame` to determine
+ // whether the signal is a proper multichannel signal. Returns a bool
+ // indicating whether a change in the multichannel was detected.
+ bool UpdateDetection(
+ const std::vector<std::vector<std::vector<float>>>& frame);
+
+ bool IsMultiChannelContentDetected() const {
+ return proper_multichannel_content_detected_;
+ }
+
+ private:
+ const bool detect_stereo_content_;
+ const float detection_threshold_;
+ bool proper_multichannel_content_detected_;
+};
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_AEC3_MULTI_CHANNEL_CONTENT_DETECTOR_H_
diff --git a/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc b/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc
new file mode 100644
index 0000000..ae2a933
--- /dev/null
+++ b/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/multi_channel_content_detector.h"
+
+#include "test/gtest.h"
+
+namespace webrtc {
+
+TEST(MultiChannelContentDetector, HandlingOfMono) {
+ MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
+ /*num_render_input_channels=*/1,
+ /*detection_threshold=*/0.0f);
+ EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+}
+
+TEST(MultiChannelContentDetector, HandlingOfMonoAndDetectionOff) {
+ MultiChannelContentDetector mc(/*detect_stereo_content=*/false,
+ /*num_render_input_channels=*/1,
+ /*detection_threshold=*/0.0f);
+ EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+}
+
+TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
+ MultiChannelContentDetector mc(/*detect_stereo_content=*/false,
+ /*num_render_input_channels=*/2,
+ /*detection_threshold=*/0.0f);
+ EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+
+ std::vector<std::vector<std::vector<float>>> frame(
+ 1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
+ std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
+ std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
+
+ EXPECT_FALSE(mc.UpdateDetection(frame));
+ EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+
+ EXPECT_FALSE(mc.UpdateDetection(frame));
+}
+
+TEST(MultiChannelContentDetector, InitialDetectionOfStereo) {
+ MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
+ /*num_render_input_channels=*/2,
+ /*detection_threshold=*/0.0f);
+ EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+}
+
+TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) {
+ MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
+ /*num_render_input_channels=*/2,
+ /*detection_threshold=*/0.0f);
+ std::vector<std::vector<std::vector<float>>> frame(
+ 1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
+ std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
+ std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f);
+ EXPECT_FALSE(mc.UpdateDetection(frame));
+ EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+
+ EXPECT_FALSE(mc.UpdateDetection(frame));
+}
+
+TEST(MultiChannelContentDetector, DetectionWhenStereo) {
+ MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
+ /*num_render_input_channels=*/2,
+ /*detection_threshold=*/0.0f);
+ std::vector<std::vector<std::vector<float>>> frame(
+ 1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
+ std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
+ std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
+ EXPECT_TRUE(mc.UpdateDetection(frame));
+ EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+
+ EXPECT_FALSE(mc.UpdateDetection(frame));
+}
+
+TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) {
+ MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
+ /*num_render_input_channels=*/2,
+ /*detection_threshold=*/0.0f);
+ std::vector<std::vector<std::vector<float>>> frame(
+ 1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
+
+ std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
+ std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f);
+ EXPECT_FALSE(mc.UpdateDetection(frame));
+ EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+
+ EXPECT_FALSE(mc.UpdateDetection(frame));
+
+ std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
+ std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
+
+ EXPECT_TRUE(mc.UpdateDetection(frame));
+ EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+
+ EXPECT_FALSE(mc.UpdateDetection(frame));
+}
+
+TEST(MultiChannelContentDetector, DetectionWithStereoBelowThreshold) {
+ constexpr float kThreshold = 1.0f;
+ MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
+ /*num_render_input_channels=*/2,
+ /*detection_threshold=*/kThreshold);
+ std::vector<std::vector<std::vector<float>>> frame(
+ 1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
+ std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
+ std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold);
+
+ EXPECT_FALSE(mc.UpdateDetection(frame));
+ EXPECT_FALSE(mc.IsMultiChannelContentDetected());
+
+ EXPECT_FALSE(mc.UpdateDetection(frame));
+}
+
+TEST(MultiChannelContentDetector, DetectionWithStereoAboveThreshold) {
+ constexpr float kThreshold = 1.0f;
+ MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
+ /*num_render_input_channels=*/2,
+ /*detection_threshold=*/kThreshold);
+ std::vector<std::vector<std::vector<float>>> frame(
+ 1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
+ std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
+ std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold + 0.1f);
+
+ EXPECT_TRUE(mc.UpdateDetection(frame));
+ EXPECT_TRUE(mc.IsMultiChannelContentDetected());
+
+ EXPECT_FALSE(mc.UpdateDetection(frame));
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index 9a1aaee..20e826d 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -1726,14 +1726,14 @@
proc_sample_rate_hz(), num_reverse_channels(), num_proc_channels());
RTC_DCHECK(submodules_.echo_controller);
} else {
- EchoCanceller3Config config =
- use_setup_specific_default_aec3_config_
- ? EchoCanceller3::CreateDefaultConfig(num_reverse_channels(),
- num_proc_channels())
- : EchoCanceller3Config();
+ EchoCanceller3Config config;
+ absl::optional<EchoCanceller3Config> multichannel_config;
+ if (use_setup_specific_default_aec3_config_) {
+ multichannel_config = EchoCanceller3::CreateDefaultMultichannelConfig();
+ }
submodules_.echo_controller = std::make_unique<EchoCanceller3>(
- config, proc_sample_rate_hz(), num_reverse_channels(),
- num_proc_channels());
+ config, multichannel_config, proc_sample_rate_hz(),
+ num_reverse_channels(), num_proc_channels());
}
// Setup the storage for returning the linear AEC output.
diff --git a/test/fuzzers/BUILD.gn b/test/fuzzers/BUILD.gn
index 72fa6d7..487594d 100644
--- a/test/fuzzers/BUILD.gn
+++ b/test/fuzzers/BUILD.gn
@@ -560,6 +560,7 @@
"../../modules/audio_processing:audio_buffer",
"../../modules/audio_processing/aec3",
]
+ absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
}
webrtc_fuzzer_test("comfort_noise_decoder_fuzzer") {
diff --git a/test/fuzzers/aec3_fuzzer.cc b/test/fuzzers/aec3_fuzzer.cc
index a9b4a9e..a12ca30 100644
--- a/test/fuzzers/aec3_fuzzer.cc
+++ b/test/fuzzers/aec3_fuzzer.cc
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "absl/types/optional.h"
#include "modules/audio_processing/aec3/echo_canceller3.h"
#include "modules/audio_processing/audio_buffer.h"
#include "modules/audio_processing/include/audio_processing.h"
@@ -51,7 +52,8 @@
const size_t num_capture_channels =
1 + fuzz_data.ReadOrDefaultValue<uint8_t>(0) % (kMaxNumChannels - 1);
- EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz,
+ EchoCanceller3 aec3(EchoCanceller3Config(),
+ /*multichannel_config=*/absl::nullopt, sample_rate_hz,
num_render_channels, num_capture_channels);
AudioBuffer capture_audio(sample_rate_hz, num_capture_channels,