AGC2: adding level estimation option (RMS or peak-based).
This CL makes possible to choose the level estimation for the adaptive
digital GC of AGC2. The options are RMS (default and currently used
estimator) and peak-based (already computed, but not used).
Besides adding the new AGC2 config param for the level estimator, this CL
also refactors the config class by making it more structured.
Bug: webrtc:7494
Change-Id: I20eb558ca50f13536aa7bdea08d21de3b630f8bc
Reviewed-on: https://webrtc-review.googlesource.com/c/110144
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Alex Loiko <aleloi@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#25620}
diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn
index a244c84..91ed7fd 100644
--- a/modules/audio_processing/BUILD.gn
+++ b/modules/audio_processing/BUILD.gn
@@ -429,6 +429,7 @@
"agc2:biquad_filter_unittests",
"agc2:fixed_digital_unittests",
"agc2:noise_estimator_unittests",
+ "agc2:rnn_vad_with_level_unittests",
"agc2:test_utils",
"agc2/rnn_vad:unittests",
"test/conversational_speech:unittest",
diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn
index 18f2d78..22fd9db 100644
--- a/modules/audio_processing/agc2/BUILD.gn
+++ b/modules/audio_processing/agc2/BUILD.gn
@@ -27,6 +27,7 @@
":gain_applier",
":noise_level_estimator",
":rnn_vad_with_level",
+ "..:api",
"..:apm_logging",
"..:audio_frame_view",
"../../../api:array_view",
@@ -58,6 +59,7 @@
":gain_applier",
":noise_level_estimator",
":rnn_vad_with_level",
+ "..:api",
"..:apm_logging",
"..:audio_frame_view",
"../../../api:array_view",
@@ -257,6 +259,18 @@
]
}
+rtc_source_set("rnn_vad_with_level_unittests") {
+ testonly = true
+ sources = [
+ "vad_with_level_unittest.cc",
+ ]
+ deps = [
+ ":rnn_vad_with_level",
+ "..:audio_frame_view",
+ "../../../rtc_base:rtc_base_tests_utils",
+ ]
+}
+
rtc_source_set("test_utils") {
testonly = true
visibility = [
diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc
index 795b8b5..a5d3608 100644
--- a/modules/audio_processing/agc2/adaptive_agc.cc
+++ b/modules/audio_processing/agc2/adaptive_agc.cc
@@ -26,8 +26,12 @@
}
AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
- float extra_saturation_margin_db)
- : speech_level_estimator_(apm_data_dumper, extra_saturation_margin_db),
+ const AudioProcessing::Config::GainController2& config)
+ : speech_level_estimator_(
+ apm_data_dumper,
+ config.adaptive_digital.level_estimator,
+ config.adaptive_digital.use_saturation_protector,
+ config.adaptive_digital.extra_saturation_margin_db),
gain_applier_(apm_data_dumper),
apm_data_dumper_(apm_data_dumper),
noise_level_estimator_(apm_data_dumper) {
@@ -44,9 +48,9 @@
signal_with_levels.vad_result.speech_probability);
apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs",
signal_with_levels.vad_result.speech_rms_dbfs);
-
apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs",
signal_with_levels.vad_result.speech_peak_dbfs);
+
speech_level_estimator_.UpdateEstimation(signal_with_levels.vad_result);
signal_with_levels.input_level_dbfs =
@@ -68,7 +72,6 @@
// The gain applier applies the gain.
gain_applier_.Process(signal_with_levels);
- ;
}
void AdaptiveAgc::Reset() {
diff --git a/modules/audio_processing/agc2/adaptive_agc.h b/modules/audio_processing/agc2/adaptive_agc.h
index 6c0917a..16c0082 100644
--- a/modules/audio_processing/agc2/adaptive_agc.h
+++ b/modules/audio_processing/agc2/adaptive_agc.h
@@ -16,6 +16,7 @@
#include "modules/audio_processing/agc2/noise_level_estimator.h"
#include "modules/audio_processing/agc2/vad_with_level.h"
#include "modules/audio_processing/include/audio_frame_view.h"
+#include "modules/audio_processing/include/audio_processing.h"
namespace webrtc {
class ApmDataDumper;
@@ -23,7 +24,8 @@
class AdaptiveAgc {
public:
explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper);
- AdaptiveAgc(ApmDataDumper* apm_data_dumper, float extra_saturation_margin_db);
+ AdaptiveAgc(ApmDataDumper* apm_data_dumper,
+ const AudioProcessing::Config::GainController2& config);
~AdaptiveAgc();
void Process(AudioFrameView<float> float_frame, float last_audio_level);
diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc
index 138faec..8640324 100644
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc
@@ -19,13 +19,20 @@
AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
ApmDataDumper* apm_data_dumper)
- : saturation_protector_(apm_data_dumper),
+ : level_estimator_(
+ AudioProcessing::Config::GainController2::LevelEstimator::kRms),
+ use_saturation_protector_(true),
+ saturation_protector_(apm_data_dumper),
apm_data_dumper_(apm_data_dumper) {}
AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
ApmDataDumper* apm_data_dumper,
+ AudioProcessing::Config::GainController2::LevelEstimator level_estimator,
+ bool use_saturation_protector,
float extra_saturation_margin_db)
- : saturation_protector_(apm_data_dumper, extra_saturation_margin_db),
+ : level_estimator_(level_estimator),
+ use_saturation_protector_(use_saturation_protector),
+ saturation_protector_(apm_data_dumper, extra_saturation_margin_db),
apm_data_dumper_(apm_data_dumper) {}
void AdaptiveModeLevelEstimator::UpdateEstimation(
@@ -49,20 +56,38 @@
const float leak_factor = buffer_is_full ? kFullBufferLeakFactor : 1.f;
+ // Read speech level estimation.
+ float speech_level_dbfs = 0.f;
+ using LevelEstimatorType =
+ AudioProcessing::Config::GainController2::LevelEstimator;
+ switch (level_estimator_) {
+ case LevelEstimatorType::kRms:
+ speech_level_dbfs = vad_data.speech_rms_dbfs;
+ break;
+ case LevelEstimatorType::kPeak:
+ speech_level_dbfs = vad_data.speech_peak_dbfs;
+ break;
+ }
+
+ // Update speech level estimation.
estimate_numerator_ = estimate_numerator_ * leak_factor +
- vad_data.speech_rms_dbfs * vad_data.speech_probability;
+ speech_level_dbfs * vad_data.speech_probability;
estimate_denominator_ =
estimate_denominator_ * leak_factor + vad_data.speech_probability;
-
last_estimate_with_offset_dbfs_ = estimate_numerator_ / estimate_denominator_;
- saturation_protector_.UpdateMargin(vad_data, last_estimate_with_offset_dbfs_);
- DebugDumpEstimate();
+ if (use_saturation_protector_) {
+ saturation_protector_.UpdateMargin(vad_data,
+ last_estimate_with_offset_dbfs_);
+ DebugDumpEstimate();
+ }
}
float AdaptiveModeLevelEstimator::LatestLevelEstimate() const {
return rtc::SafeClamp<float>(
- last_estimate_with_offset_dbfs_ + saturation_protector_.LastMargin(),
+ last_estimate_with_offset_dbfs_ +
+ (use_saturation_protector_ ? saturation_protector_.LastMargin()
+ : 0.f),
-90.f, 30.f);
}
diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
index f887268..63b9de2 100644
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
@@ -16,6 +16,7 @@
#include "modules/audio_processing/agc2/agc2_common.h" // kFullBufferSizeMs...
#include "modules/audio_processing/agc2/saturation_protector.h"
#include "modules/audio_processing/agc2/vad_with_level.h"
+#include "modules/audio_processing/include/audio_processing.h"
namespace webrtc {
class ApmDataDumper;
@@ -23,8 +24,11 @@
class AdaptiveModeLevelEstimator {
public:
explicit AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper);
- AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper,
- float extra_saturation_margin_db);
+ AdaptiveModeLevelEstimator(
+ ApmDataDumper* apm_data_dumper,
+ AudioProcessing::Config::GainController2::LevelEstimator level_estimator,
+ bool use_saturation_protector,
+ float extra_saturation_margin_db);
void UpdateEstimation(const VadWithLevel::LevelAndProbability& vad_data);
float LatestLevelEstimate() const;
void Reset();
@@ -35,6 +39,9 @@
private:
void DebugDumpEstimate();
+ const AudioProcessing::Config::GainController2::LevelEstimator
+ level_estimator_;
+ const bool use_saturation_protector_;
size_t buffer_size_ms_ = 0;
float last_estimate_with_offset_dbfs_ = kInitialSpeechLevelEstimateDbfs;
float estimate_numerator_ = 0.f;
diff --git a/modules/audio_processing/agc2/vad_with_level_unittest.cc b/modules/audio_processing/agc2/vad_with_level_unittest.cc
new file mode 100644
index 0000000..f9aee62
--- /dev/null
+++ b/modules/audio_processing/agc2/vad_with_level_unittest.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/vad_with_level.h"
+
+#include "rtc_base/gunit.h"
+
+namespace webrtc {
+namespace test {
+
+TEST(AutomaticGainController2VadWithLevelEstimator,
+ PeakLevelGreaterThanRmsLevel) {
+ constexpr size_t kSampleRateHz = 8000;
+
+ // 10 ms input frame, constant except for one peak value.
+ // Handcrafted so that the average is lower than the peak value.
+ std::array<float, kSampleRateHz / 100> frame;
+ frame.fill(1000.f);
+ frame[10] = 2000.f;
+ float* const channel0 = frame.data();
+ AudioFrameView<float> frame_view(&channel0, 1, frame.size());
+
+ // Compute audio frame levels (the VAD result is ignored).
+ VadWithLevel vad_with_level;
+ auto levels_and_vad_prob = vad_with_level.AnalyzeFrame(frame_view);
+
+ // Compare peak and RMS levels.
+ EXPECT_LT(levels_and_vad_prob.speech_rms_dbfs,
+ levels_and_vad_prob.speech_peak_dbfs);
+}
+
+} // namespace test
+} // namespace webrtc
diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc
index 06ad8f5..9b8cf45 100644
--- a/modules/audio_processing/gain_controller2.cc
+++ b/modules/audio_processing/gain_controller2.cc
@@ -65,30 +65,52 @@
RTC_DCHECK(Validate(config))
<< " the invalid config was " << ToString(config);
- if (config.fixed_gain_db != config_.fixed_gain_db) {
+ config_ = config;
+ if (config.fixed_digital.gain_db != config_.fixed_digital.gain_db) {
// Reset the limiter to quickly react on abrupt level changes caused by
// large changes of the fixed gain.
limiter_.Reset();
}
- config_ = config;
- gain_applier_.SetGainFactor(DbToRatio(config_.fixed_gain_db));
- adaptive_digital_mode_ = config_.adaptive_digital_mode;
- adaptive_agc_.reset(
- new AdaptiveAgc(data_dumper_.get(), config_.extra_saturation_margin_db));
+ gain_applier_.SetGainFactor(DbToRatio(config_.fixed_digital.gain_db));
+ adaptive_digital_mode_ = config_.adaptive_digital.enabled;
+ adaptive_agc_.reset(new AdaptiveAgc(data_dumper_.get(), config_));
}
bool GainController2::Validate(
const AudioProcessing::Config::GainController2& config) {
- return config.fixed_gain_db >= 0.f && config.fixed_gain_db < 50.f &&
- config.extra_saturation_margin_db >= 0.f &&
- config.extra_saturation_margin_db <= 100.f;
+ return config.fixed_digital.gain_db >= 0.f &&
+ config.fixed_digital.gain_db < 50.f &&
+ config.adaptive_digital.extra_saturation_margin_db >= 0.f &&
+ config.adaptive_digital.extra_saturation_margin_db <= 100.f;
}
std::string GainController2::ToString(
const AudioProcessing::Config::GainController2& config) {
rtc::StringBuilder ss;
- ss << "{enabled: " << (config.enabled ? "true" : "false") << ", "
- << "fixed_gain_dB: " << config.fixed_gain_db << "}";
+ std::string adaptive_digital_level_estimator;
+ using LevelEstimatorType =
+ AudioProcessing::Config::GainController2::LevelEstimator;
+ switch (config.adaptive_digital.level_estimator) {
+ case LevelEstimatorType::kRms:
+ adaptive_digital_level_estimator = "RMS";
+ break;
+ case LevelEstimatorType::kPeak:
+ adaptive_digital_level_estimator = "peak";
+ break;
+ }
+ // clang-format off
+ // clang formatting doesn't respect custom nested style.
+ ss << "{"
+ << "enabled: " << (config.enabled ? "true" : "false") << ", "
+ << "fixed_digital: {gain_db: " << config.fixed_digital.gain_db << "}, "
+ << "adaptive_digital: {"
+ << "enabled: "
+ << (config.adaptive_digital.enabled ? "true" : "false") << ", "
+ << "level_estimator: " << adaptive_digital_level_estimator << ", "
+ << "extra_saturation_margin_db:"
+ << config.adaptive_digital.extra_saturation_margin_db << "}"
+ << "}";
+ // clang-format on
return ss.Release();
}
diff --git a/modules/audio_processing/gain_controller2_unittest.cc b/modules/audio_processing/gain_controller2_unittest.cc
index f469bc7..94837f5 100644
--- a/modules/audio_processing/gain_controller2_unittest.cc
+++ b/modules/audio_processing/gain_controller2_unittest.cc
@@ -52,8 +52,8 @@
AudioProcessing::Config::GainController2 CreateAgc2FixedDigitalModeConfig(
float fixed_gain_db) {
AudioProcessing::Config::GainController2 config;
- config.adaptive_digital_mode = false;
- config.fixed_gain_db = fixed_gain_db;
+ config.adaptive_digital.enabled = false;
+ config.fixed_digital.gain_db = fixed_gain_db;
// TODO(alessiob): Check why ASSERT_TRUE() below does not compile.
EXPECT_TRUE(GainController2::Validate(config));
return config;
@@ -113,29 +113,26 @@
gain_controller2->ApplyConfig(config);
// Check that attenuation is not allowed.
- config.fixed_gain_db = -5.f;
+ config.fixed_digital.gain_db = -5.f;
EXPECT_FALSE(GainController2::Validate(config));
// Check that valid configurations are applied.
for (const float& fixed_gain_db : {0.f, 5.f, 10.f, 40.f}) {
- config.fixed_gain_db = fixed_gain_db;
+ config.fixed_digital.gain_db = fixed_gain_db;
EXPECT_TRUE(GainController2::Validate(config));
gain_controller2->ApplyConfig(config);
}
}
TEST(GainController2, ToString) {
- // Tests GainController2::ToString().
+ // Tests GainController2::ToString(). Only test the enabled property.
AudioProcessing::Config::GainController2 config;
- config.fixed_gain_db = 5.f;
config.enabled = false;
- EXPECT_EQ("{enabled: false, fixed_gain_dB: 5}",
- GainController2::ToString(config));
+ EXPECT_EQ("{enabled: false", GainController2::ToString(config).substr(0, 15));
config.enabled = true;
- EXPECT_EQ("{enabled: true, fixed_gain_dB: 5}",
- GainController2::ToString(config));
+ EXPECT_EQ("{enabled: true", GainController2::ToString(config).substr(0, 14));
}
TEST(GainController2FixedDigital, GainShouldChangeOnSetGain) {
@@ -263,8 +260,8 @@
// Check that samples are not amplified as much when extra margin is
// high. They should not be amplified at all, but only after convergence. GC2
// starts with a gain, and it takes time until it's down to 0 dB.
- config.extra_saturation_margin_db = 50.f;
- config.fixed_gain_db = 0.f;
+ config.fixed_digital.gain_db = 0.f;
+ config.adaptive_digital.extra_saturation_margin_db = 50.f;
gain_controller2.ApplyConfig(config);
EXPECT_LT(GainAfterProcessingFile(&gain_controller2), 2.f);
@@ -276,8 +273,8 @@
AudioProcessing::Config::GainController2 config;
// Check that some gain is applied if there is no margin.
- config.extra_saturation_margin_db = 0.f;
- config.fixed_gain_db = 0.f;
+ config.fixed_digital.gain_db = 0.f;
+ config.adaptive_digital.extra_saturation_margin_db = 0.f;
gain_controller2.ApplyConfig(config);
EXPECT_GT(GainAfterProcessingFile(&gain_controller2), 2.f);
diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index b105ef1..b04629b 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@@ -270,10 +270,23 @@
// first applies a fixed gain. The adaptive digital AGC can be turned off by
// setting |adaptive_digital_mode=false|.
struct GainController2 {
+ enum LevelEstimator { kRms, kPeak };
bool enabled = false;
- bool adaptive_digital_mode = true;
- float extra_saturation_margin_db = 2.f;
+ struct {
+ float gain_db = 0.f;
+ } fixed_digital;
+ struct {
+ bool enabled = true;
+ LevelEstimator level_estimator = kRms;
+ bool use_saturation_protector = true;
+ float extra_saturation_margin_db = 2.f;
+ } adaptive_digital;
+ // Deprecated.
+ // TODO(webrtc:7494): Switch to fixed_digital.gain_db and remove.
float fixed_gain_db = 0.f;
+ // Deprecated.
+ // TODO(webrtc:7494): Switch to adaptive_digital.enabled and remove.
+ bool adaptive_digital_mode = false;
} gain_controller2;
// Explicit copy assignment implementation to avoid issues with memory
diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc
index b0d4f04..e829606 100644
--- a/modules/audio_processing/test/audio_processing_simulator.cc
+++ b/modules/audio_processing/test/audio_processing_simulator.cc
@@ -351,9 +351,10 @@
}
if (settings_.use_agc2) {
apm_config.gain_controller2.enabled = *settings_.use_agc2;
- apm_config.gain_controller2.fixed_gain_db = settings_.agc2_fixed_gain_db;
+ apm_config.gain_controller2.fixed_digital.gain_db =
+ settings_.agc2_fixed_gain_db;
if (settings_.agc2_use_adaptive_gain) {
- apm_config.gain_controller2.adaptive_digital_mode =
+ apm_config.gain_controller2.adaptive_digital.enabled =
*settings_.agc2_use_adaptive_gain;
}
}