AGC2 adaptive digital controller config clean-up
- Remove dry-run option
- Hard-code `adjacent_speech_frames_threshold` and
`vad_reset_period_ms`
- Expose `initial_gain_db` via field trial
Tested: adaptive digital controller bit-exactness verified
Bug: webrtc:7494
Change-Id: I6166611f91320b6c37de3f8e553c06c2ed95b772
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/287222
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Hanna Silen <silen@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#38862}
diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn
index 79a0255..3e6b201 100644
--- a/modules/audio_processing/BUILD.gn
+++ b/modules/audio_processing/BUILD.gn
@@ -139,6 +139,7 @@
"../../rtc_base:stringutils",
"../../system_wrappers:field_trial",
"agc2:adaptive_digital_gain_controller",
+ "agc2:common",
"agc2:cpu_features",
"agc2:fixed_digital",
"agc2:gain_applier",
diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc b/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc
index b8a99da..9a504c9 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc
@@ -100,25 +100,12 @@
max_gain_increase_db);
}
-// Copies the (multichannel) audio samples from `src` into `dst`.
-void CopyAudio(AudioFrameView<const float> src,
- std::vector<std::vector<float>>& dst) {
- RTC_DCHECK_GT(src.num_channels(), 0);
- RTC_DCHECK_GT(src.samples_per_channel(), 0);
- RTC_DCHECK_EQ(dst.size(), src.num_channels());
- for (int c = 0; c < src.num_channels(); ++c) {
- rtc::ArrayView<const float> channel_view = src.channel(c);
- RTC_DCHECK_EQ(channel_view.size(), src.samples_per_channel());
- RTC_DCHECK_EQ(dst[c].size(), src.samples_per_channel());
- std::copy(channel_view.begin(), channel_view.end(), dst[c].begin());
- }
-}
-
} // namespace
AdaptiveDigitalGainController::AdaptiveDigitalGainController(
ApmDataDumper* apm_data_dumper,
const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
+ int adjacent_speech_frames_threshold,
int sample_rate_hz,
int num_channels)
: apm_data_dumper_(apm_data_dumper),
@@ -126,41 +113,16 @@
/*hard_clip_samples=*/false,
/*initial_gain_factor=*/DbToRatio(config.initial_gain_db)),
config_(config),
+ adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
max_gain_change_db_per_10ms_(config_.max_gain_change_db_per_second *
kFrameDurationMs / 1000.0f),
calls_since_last_gain_log_(0),
- frames_to_gain_increase_allowed_(
- config_.adjacent_speech_frames_threshold),
+ frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold),
last_gain_db_(config_.initial_gain_db) {
RTC_DCHECK_GT(max_gain_change_db_per_10ms_, 0.0f);
RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
RTC_DCHECK_GE(config_.max_output_noise_level_dbfs, -90.0f);
RTC_DCHECK_LE(config_.max_output_noise_level_dbfs, 0.0f);
- Initialize(sample_rate_hz, num_channels);
-}
-
-void AdaptiveDigitalGainController::Initialize(int sample_rate_hz,
- int num_channels) {
- if (!config_.dry_run) {
- return;
- }
- RTC_DCHECK_GT(sample_rate_hz, 0);
- RTC_DCHECK_GT(num_channels, 0);
- int frame_size = rtc::CheckedDivExact(sample_rate_hz, 100);
- bool sample_rate_changed =
- dry_run_frame_.empty() || // Handle initialization.
- dry_run_frame_[0].size() != static_cast<size_t>(frame_size);
- bool num_channels_changed =
- dry_run_channels_.size() != static_cast<size_t>(num_channels);
- if (sample_rate_changed || num_channels_changed) {
- // Resize the multichannel audio vector and update the channel pointers.
- dry_run_frame_.resize(num_channels);
- dry_run_channels_.resize(num_channels);
- for (int c = 0; c < num_channels; ++c) {
- dry_run_frame_[c].resize(frame_size);
- dry_run_channels_[c] = dry_run_frame_[c].data();
- }
- }
}
void AdaptiveDigitalGainController::Process(const FrameInfo& info,
@@ -187,7 +149,7 @@
// observed.
bool first_confident_speech_frame = false;
if (info.speech_probability < kVadConfidenceThreshold) {
- frames_to_gain_increase_allowed_ = config_.adjacent_speech_frames_threshold;
+ frames_to_gain_increase_allowed_ = adjacent_speech_frames_threshold_;
} else if (frames_to_gain_increase_allowed_ > 0) {
frames_to_gain_increase_allowed_--;
first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0;
@@ -203,7 +165,7 @@
// No gain increase happened while waiting for a long enough speech
// sequence. Therefore, temporarily allow a faster gain increase.
RTC_DCHECK(gain_increase_allowed);
- max_gain_increase_db *= config_.adjacent_speech_frames_threshold;
+ max_gain_increase_db *= adjacent_speech_frames_threshold_;
}
const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
@@ -223,18 +185,7 @@
DbToRatio(last_gain_db_ + gain_change_this_frame_db));
}
- // Modify `frame` only if not running in "dry run" mode.
- if (!config_.dry_run) {
- gain_applier_.ApplyGain(frame);
- } else {
- // Copy `frame` so that `ApplyGain()` is called (on a copy).
- CopyAudio(frame, dry_run_frame_);
- RTC_DCHECK(!dry_run_channels_.empty());
- AudioFrameView<float> frame_copy(&dry_run_channels_[0],
- frame.num_channels(),
- frame.samples_per_channel());
- gain_applier_.ApplyGain(frame_copy);
- }
+ gain_applier_.ApplyGain(frame);
// Remember that the gain has changed for the next iteration.
last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;
diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller.h b/modules/audio_processing/agc2/adaptive_digital_gain_controller.h
index 05b2ef9..ce0dc8f 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_controller.h
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_controller.h
@@ -39,6 +39,7 @@
AdaptiveDigitalGainController(
ApmDataDumper* apm_data_dumper,
const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
+ int adjacent_speech_frames_threshold,
int sample_rate_hz,
int num_channels);
AdaptiveDigitalGainController(const AdaptiveDigitalGainController&) = delete;
@@ -56,14 +57,12 @@
GainApplier gain_applier_;
const AudioProcessing::Config::GainController2::AdaptiveDigital config_;
+ const int adjacent_speech_frames_threshold_;
const float max_gain_change_db_per_10ms_;
int calls_since_last_gain_log_;
int frames_to_gain_increase_allowed_;
float last_gain_db_;
-
- std::vector<std::vector<float>> dry_run_frame_;
- std::vector<float*> dry_run_channels_;
};
} // namespace webrtc
diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc
index 832be1e..b16cd1d 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc
@@ -51,14 +51,16 @@
// Helper to create initialized `AdaptiveDigitalGainController` objects.
struct GainApplierHelper {
GainApplierHelper(const AdaptiveDigitalConfig& config,
+ int adjacent_speech_frames_threshold,
int sample_rate_hz,
int num_channels)
: apm_data_dumper(0),
- gain_applier(
- std::make_unique<AdaptiveDigitalGainController>(&apm_data_dumper,
- config,
- sample_rate_hz,
- num_channels)) {}
+ gain_applier(std::make_unique<AdaptiveDigitalGainController>(
+ &apm_data_dumper,
+ config,
+ adjacent_speech_frames_threshold,
+ sample_rate_hz,
+ num_channels)) {}
ApmDataDumper apm_data_dumper;
std::unique_ptr<AdaptiveDigitalGainController> gain_applier;
};
@@ -81,7 +83,8 @@
TEST(GainController2AdaptiveDigitalGainControllerTest,
GainApplierShouldNotCrash) {
- GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kStereo);
+ GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
+ /*sample_rate_hz=*/48000, kStereo);
// Make one call with reasonable audio level values and settings.
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
helper.gain_applier->Process(GetFrameInfoToNotAdapt(kDefaultConfig),
@@ -96,7 +99,8 @@
kDefaultConfig.max_gain_change_db_per_second)) +
kNumExtraFrames;
- GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/8000, kMono);
+ GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
+ /*sample_rate_hz=*/8000, kMono);
AdaptiveDigitalGainController::FrameInfo info =
GetFrameInfoToNotAdapt(kDefaultConfig);
info.speech_level_dbfs = -60.0f;
@@ -111,7 +115,8 @@
}
TEST(GainController2AdaptiveDigitalGainControllerTest, GainDoesNotChangeFast) {
- GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/8000, kMono);
+ GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
+ /*sample_rate_hz=*/8000, kMono);
constexpr float initial_level_dbfs = -25.0f;
constexpr float kMaxGainChangeDbPerFrame =
@@ -152,7 +157,8 @@
}
TEST(GainController2AdaptiveDigitalGainControllerTest, GainIsRampedInAFrame) {
- GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono);
+ GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
+ /*sample_rate_hz=*/48000, kMono);
constexpr float initial_level_dbfs = -25.0f;
@@ -178,7 +184,8 @@
}
TEST(GainController2AdaptiveDigitalGainControllerTest, NoiseLimitsGain) {
- GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono);
+ GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
+ /*sample_rate_hz=*/48000, kMono);
constexpr float initial_level_dbfs = -25.0f;
constexpr int num_initial_frames =
@@ -210,7 +217,8 @@
TEST(GainController2AdaptiveDigitalGainControllerTest,
CanHandlePositiveSpeechLevels) {
- GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kStereo);
+ GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
+ /*sample_rate_hz=*/48000, kStereo);
// Make one call with positive audio level values and settings.
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
@@ -221,7 +229,8 @@
}
TEST(GainController2AdaptiveDigitalGainControllerTest, AudioLevelLimitsGain) {
- GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono);
+ GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
+ /*sample_rate_hz=*/48000, kMono);
constexpr float initial_level_dbfs = -25.0f;
constexpr int num_initial_frames =
@@ -260,17 +269,16 @@
TEST_P(AdaptiveDigitalGainControllerParametrizedTest,
DoNotIncreaseGainWithTooFewSpeechFrames) {
- AdaptiveDigitalConfig config;
- config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold();
- GainApplierHelper helper(config, /*sample_rate_hz=*/48000, kMono);
+ GainApplierHelper helper(kDefaultConfig, adjacent_speech_frames_threshold(),
+ /*sample_rate_hz=*/48000, kMono);
// Lower the speech level so that the target gain will be increased.
AdaptiveDigitalGainController::FrameInfo info =
- GetFrameInfoToNotAdapt(config);
+ GetFrameInfoToNotAdapt(kDefaultConfig);
info.speech_level_dbfs -= 12.0f;
float prev_gain = 0.0f;
- for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) {
+ for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) {
SCOPED_TRACE(i);
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
helper.gain_applier->Process(info, audio.float_frame_view());
@@ -284,17 +292,16 @@
TEST_P(AdaptiveDigitalGainControllerParametrizedTest,
IncreaseGainWithEnoughSpeechFrames) {
- AdaptiveDigitalConfig config;
- config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold();
- GainApplierHelper helper(config, /*sample_rate_hz=*/48000, kMono);
+ GainApplierHelper helper(kDefaultConfig, adjacent_speech_frames_threshold(),
+ /*sample_rate_hz=*/48000, kMono);
// Lower the speech level so that the target gain will be increased.
AdaptiveDigitalGainController::FrameInfo info =
- GetFrameInfoToNotAdapt(config);
+ GetFrameInfoToNotAdapt(kDefaultConfig);
info.speech_level_dbfs -= 12.0f;
float prev_gain = 0.0f;
- for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) {
+ for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) {
SCOPED_TRACE(i);
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
helper.gain_applier->Process(info, audio.float_frame_view());
@@ -309,77 +316,10 @@
EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain);
}
-INSTANTIATE_TEST_SUITE_P(GainController2,
- AdaptiveDigitalGainControllerParametrizedTest,
- ::testing::Values(1, 7, 31));
-
-// Checks that the input is never modified when running in dry run mode.
-TEST(GainController2AdaptiveDigitalGainControllerTest,
- DryRunDoesNotChangeInput) {
- AdaptiveDigitalConfig config;
- config.dry_run = true;
- GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono);
-
- // Simulate an input signal with log speech level.
- AdaptiveDigitalGainController::FrameInfo info =
- GetFrameInfoToNotAdapt(config);
- info.speech_level_dbfs = -60.0f;
- const int num_frames_to_adapt =
- static_cast<int>(
- config.max_gain_db /
- GetMaxGainChangePerFrameDb(config.max_gain_change_db_per_second)) +
- kNumExtraFrames;
- constexpr float kPcmSamples = 123.456f;
- // Run the gain applier and check that the PCM samples are not modified.
- for (int i = 0; i < num_frames_to_adapt; ++i) {
- SCOPED_TRACE(i);
- VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, kPcmSamples);
- helper.gain_applier->Process(info, fake_audio.float_frame_view());
- EXPECT_FLOAT_EQ(fake_audio.float_frame_view().channel(0)[0], kPcmSamples);
- }
-}
-
-// Checks that no sample is modified before and after the sample rate changes.
-TEST(GainController2AdaptiveDigitalGainControllerTest,
- DryRunHandlesSampleRateChange) {
- AdaptiveDigitalConfig config;
- config.dry_run = true;
- GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono);
-
- AdaptiveDigitalGainController::FrameInfo info =
- GetFrameInfoToNotAdapt(config);
- info.speech_level_dbfs = -60.0f;
- constexpr float kPcmSamples = 123.456f;
- VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
- helper.gain_applier->Process(info, fake_audio_8k.float_frame_view());
- EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
- helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
- VectorFloatFrame fake_audio_48k(kMono, kFrameLen10ms48kHz, kPcmSamples);
- helper.gain_applier->Process(info, fake_audio_48k.float_frame_view());
- EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
-}
-
-// Checks that no sample is modified before and after the number of channels
-// changes.
-TEST(GainController2AdaptiveDigitalGainControllerTest,
- DryRunHandlesNumChannelsChange) {
- AdaptiveDigitalConfig config;
- config.dry_run = true;
- GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono);
-
- AdaptiveDigitalGainController::FrameInfo info =
- GetFrameInfoToNotAdapt(config);
- info.speech_level_dbfs = -60.0f;
- constexpr float kPcmSamples = 123.456f;
- VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
- helper.gain_applier->Process(info, fake_audio_8k.float_frame_view());
- EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
- VectorFloatFrame fake_audio_48k(kStereo, kFrameLen10ms8kHz, kPcmSamples);
- helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kStereo);
- helper.gain_applier->Process(info, fake_audio_48k.float_frame_view());
- EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
- EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(1)[0], kPcmSamples);
-}
+INSTANTIATE_TEST_SUITE_P(
+ GainController2,
+ AdaptiveDigitalGainControllerParametrizedTest,
+ ::testing::Values(1, 7, 31, kAdjacentSpeechFramesThreshold));
} // namespace
} // namespace webrtc
diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h
index 4af8552..4597bcd 100644
--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h
@@ -29,11 +29,16 @@
// At what limiter levels should we start decreasing the adaptive digital gain.
constexpr float kLimiterThresholdForAgcGainDbfs = -1.0f;
-// This is the threshold for speech. Speech frames are used for updating the
-// speech level, measuring the amount of speech, and decide when to allow target
-// gain changes.
+// Number of milliseconds to wait to periodically reset the VAD.
+constexpr int kVadResetPeriodMs = 1500;
+
+// Speech probability threshold to detect speech activity.
constexpr float kVadConfidenceThreshold = 0.95f;
+// Minimum number of adjacent speech frames having a sufficiently high speech
+// probability to reliably detect speech activity.
+constexpr int kAdjacentSpeechFramesThreshold = 12;
+
// Number of milliseconds of speech frames to observe to make the estimator
// confident.
constexpr float kLevelEstimatorTimeToConfidenceMs = 400;
diff --git a/modules/audio_processing/agc2/speech_level_estimator.cc b/modules/audio_processing/agc2/speech_level_estimator.cc
index 9462555..7bf3252 100644
--- a/modules/audio_processing/agc2/speech_level_estimator.cc
+++ b/modules/audio_processing/agc2/speech_level_estimator.cc
@@ -46,11 +46,11 @@
SpeechLevelEstimator::SpeechLevelEstimator(
ApmDataDumper* apm_data_dumper,
- const AudioProcessing::Config::GainController2::AdaptiveDigital& config)
+ const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
+ int adjacent_speech_frames_threshold)
: apm_data_dumper_(apm_data_dumper),
initial_speech_level_dbfs_(GetInitialSpeechLevelEstimateDbfs(config)),
- adjacent_speech_frames_threshold_(
- config.adjacent_speech_frames_threshold),
+ adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
level_dbfs_(initial_speech_level_dbfs_),
// TODO(bugs.webrtc.org/7494): Remove init below when AGC2 input volume
// controller temporal dependency removed.
diff --git a/modules/audio_processing/agc2/speech_level_estimator.h b/modules/audio_processing/agc2/speech_level_estimator.h
index 5cb2b43..4d9f106 100644
--- a/modules/audio_processing/agc2/speech_level_estimator.h
+++ b/modules/audio_processing/agc2/speech_level_estimator.h
@@ -28,7 +28,8 @@
public:
SpeechLevelEstimator(
ApmDataDumper* apm_data_dumper,
- const AudioProcessing::Config::GainController2::AdaptiveDigital& config);
+ const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
+ int adjacent_speech_frames_threshold);
SpeechLevelEstimator(const SpeechLevelEstimator&) = delete;
SpeechLevelEstimator& operator=(const SpeechLevelEstimator&) = delete;
diff --git a/modules/audio_processing/agc2/speech_level_estimator_unittest.cc b/modules/audio_processing/agc2/speech_level_estimator_unittest.cc
index 2fec7f7..e1c5f85 100644
--- a/modules/audio_processing/agc2/speech_level_estimator_unittest.cc
+++ b/modules/audio_processing/agc2/speech_level_estimator_unittest.cc
@@ -42,13 +42,6 @@
}
}
-constexpr AdaptiveDigitalConfig GetAdaptiveDigitalConfig(
- int adjacent_speech_frames_threshold) {
- AdaptiveDigitalConfig config;
- config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold;
- return config;
-}
-
constexpr float kNoSpeechProbability = 0.0f;
constexpr float kLowSpeechProbability = kVadConfidenceThreshold / 2.0f;
constexpr float kMaxSpeechProbability = 1.0f;
@@ -59,7 +52,8 @@
: data_dumper(0),
estimator(std::make_unique<SpeechLevelEstimator>(
&data_dumper,
- GetAdaptiveDigitalConfig(adjacent_speech_frames_threshold))),
+ AdaptiveDigitalConfig{},
+ adjacent_speech_frames_threshold)),
initial_speech_level_dbfs(estimator->level_dbfs()),
level_rms_dbfs(initial_speech_level_dbfs / 2.0f),
level_peak_dbfs(initial_speech_level_dbfs / 3.0f) {
diff --git a/modules/audio_processing/agc2/vad_wrapper.cc b/modules/audio_processing/agc2/vad_wrapper.cc
index 91448f8..af6325d 100644
--- a/modules/audio_processing/agc2/vad_wrapper.cc
+++ b/modules/audio_processing/agc2/vad_wrapper.cc
@@ -53,6 +53,13 @@
} // namespace
VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper(
+ const AvailableCpuFeatures& cpu_features,
+ int sample_rate_hz)
+ : VoiceActivityDetectorWrapper(kVadResetPeriodMs,
+ cpu_features,
+ sample_rate_hz) {}
+
+VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper(
int vad_reset_period_ms,
const AvailableCpuFeatures& cpu_features,
int sample_rate_hz)
diff --git a/modules/audio_processing/agc2/vad_wrapper.h b/modules/audio_processing/agc2/vad_wrapper.h
index 6df0ead..459c471 100644
--- a/modules/audio_processing/agc2/vad_wrapper.h
+++ b/modules/audio_processing/agc2/vad_wrapper.h
@@ -40,6 +40,10 @@
virtual float Analyze(rtc::ArrayView<const float> frame) = 0;
};
+ // Ctor. Uses `cpu_features` to instantiate the default VAD.
+ VoiceActivityDetectorWrapper(const AvailableCpuFeatures& cpu_features,
+ int sample_rate_hz);
+
// Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call
// `MonoVad::Reset()`; it must be equal to or greater than the duration of two
// frames. Uses `cpu_features` to instantiate the default VAD.
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index 3200ea4..18d4ad9 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -378,6 +378,9 @@
FieldTrialConstrained<double> max_gain_db(
"max_gain_db", kDefaultAdaptiveDigitalConfig.max_gain_db, 0,
absl::nullopt);
+ FieldTrialConstrained<double> initial_gain_db(
+ "initial_gain_db", kDefaultAdaptiveDigitalConfig.initial_gain_db, 0,
+ absl::nullopt);
FieldTrialConstrained<double> max_gain_change_db_per_second(
"max_gain_change_db_per_second",
kDefaultAdaptiveDigitalConfig.max_gain_change_db_per_second, 0,
@@ -392,46 +395,51 @@
const std::string field_trial_name =
field_trial::FindFullName(kFieldTrialName);
- ParseFieldTrial({&enabled, &clipped_level_min, &clipped_level_step,
- &clipped_ratio_threshold, &clipped_wait_frames,
- &enable_clipping_predictor, &target_range_max_dbfs,
- &target_range_min_dbfs, &update_input_volume_wait_frames,
- &speech_probability_threshold, &speech_ratio_threshold,
- &headroom_db, &max_gain_db, &max_gain_change_db_per_second,
- &max_output_noise_level_dbfs},
- field_trial_name);
+ ParseFieldTrial(
+ {&enabled, &clipped_level_min, &clipped_level_step,
+ &clipped_ratio_threshold, &clipped_wait_frames,
+ &enable_clipping_predictor, &target_range_max_dbfs,
+ &target_range_min_dbfs, &update_input_volume_wait_frames,
+ &speech_probability_threshold, &speech_ratio_threshold, &headroom_db,
+ &max_gain_db, &initial_gain_db, &max_gain_change_db_per_second,
+ &max_output_noise_level_dbfs},
+ field_trial_name);
// Checked already by `IsEnabled()` before parsing, therefore always true.
RTC_DCHECK(enabled);
return AudioProcessingImpl::GainController2ConfigOverride{
- InputVolumeController::Config{
- .clipped_level_min = static_cast<int>(clipped_level_min.Get()),
- .clipped_level_step = static_cast<int>(clipped_level_step.Get()),
- .clipped_ratio_threshold =
- static_cast<float>(clipped_ratio_threshold.Get()),
- .clipped_wait_frames = static_cast<int>(clipped_wait_frames.Get()),
- .enable_clipping_predictor =
- static_cast<bool>(enable_clipping_predictor.Get()),
- .target_range_max_dbfs =
- static_cast<int>(target_range_max_dbfs.Get()),
- .target_range_min_dbfs =
- static_cast<int>(target_range_min_dbfs.Get()),
- .update_input_volume_wait_frames =
- static_cast<int>(update_input_volume_wait_frames.Get()),
- .speech_probability_threshold =
- static_cast<float>(speech_probability_threshold.Get()),
- .speech_ratio_threshold =
- static_cast<float>(speech_ratio_threshold.Get()),
- },
- AudioProcessingImpl::GainController2ConfigOverride::AdaptiveDigitalConfig{
- .headroom_db = static_cast<float>(headroom_db.Get()),
- .max_gain_db = static_cast<float>(max_gain_db.Get()),
- .max_gain_change_db_per_second =
- static_cast<float>(max_gain_change_db_per_second.Get()),
- .max_output_noise_level_dbfs =
- static_cast<float>(max_output_noise_level_dbfs.Get()),
- },
+ .input_volume_controller_config =
+ {
+ .clipped_level_min = static_cast<int>(clipped_level_min.Get()),
+ .clipped_level_step = static_cast<int>(clipped_level_step.Get()),
+ .clipped_ratio_threshold =
+ static_cast<float>(clipped_ratio_threshold.Get()),
+ .clipped_wait_frames =
+ static_cast<int>(clipped_wait_frames.Get()),
+ .enable_clipping_predictor =
+ static_cast<bool>(enable_clipping_predictor.Get()),
+ .target_range_max_dbfs =
+ static_cast<int>(target_range_max_dbfs.Get()),
+ .target_range_min_dbfs =
+ static_cast<int>(target_range_min_dbfs.Get()),
+ .update_input_volume_wait_frames =
+ static_cast<int>(update_input_volume_wait_frames.Get()),
+ .speech_probability_threshold =
+ static_cast<float>(speech_probability_threshold.Get()),
+ .speech_ratio_threshold =
+ static_cast<float>(speech_ratio_threshold.Get()),
+ },
+ .adaptive_digital_config =
+ {
+ .headroom_db = static_cast<float>(headroom_db.Get()),
+ .max_gain_db = static_cast<float>(max_gain_db.Get()),
+ .initial_gain_db = static_cast<float>(initial_gain_db.Get()),
+ .max_gain_change_db_per_second =
+ static_cast<float>(max_gain_change_db_per_second.Get()),
+ .max_output_noise_level_dbfs =
+ static_cast<float>(max_output_noise_level_dbfs.Get()),
+ },
};
}
@@ -489,21 +497,10 @@
adjusted_config.gain_controller1.analog_gain_controller.enabled = false;
adjusted_config.gain_controller2.enabled = true;
- adjusted_config.gain_controller2.adaptive_digital.enabled = true;
adjusted_config.gain_controller2.input_volume_controller.enabled = true;
-
- auto& adjusted_adaptive_digital = // Alias.
- adjusted_config.gain_controller2.adaptive_digital;
- const auto& adaptive_digital_override = // Alias.
+ adjusted_config.gain_controller2.adaptive_digital =
gain_controller2_config_override->adaptive_digital_config;
- adjusted_adaptive_digital.headroom_db =
- adaptive_digital_override.headroom_db;
- adjusted_adaptive_digital.max_gain_db =
- adaptive_digital_override.max_gain_db;
- adjusted_adaptive_digital.max_gain_change_db_per_second =
- adaptive_digital_override.max_gain_change_db_per_second;
- adjusted_adaptive_digital.max_output_noise_level_dbfs =
- adaptive_digital_override.max_output_noise_level_dbfs;
+ adjusted_config.gain_controller2.adaptive_digital.enabled = true;
}
}
@@ -2373,7 +2370,6 @@
// TODO(bugs.webrtc.org/13663): Cache CPU features in APM and use here.
submodules_.voice_activity_detector =
std::make_unique<VoiceActivityDetectorWrapper>(
- config_.gain_controller2.adaptive_digital.vad_reset_period_ms,
submodules_.gain_controller2->GetCpuFeatures(),
proc_fullband_sample_rate_hz());
}
diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h
index 189ed03..0f74c30 100644
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h
@@ -142,12 +142,8 @@
// removed.
struct GainController2ConfigOverride {
InputVolumeController::Config input_volume_controller_config;
- struct AdaptiveDigitalConfig {
- float headroom_db;
- float max_gain_db;
- float max_gain_change_db_per_second;
- float max_output_noise_level_dbfs;
- } adaptive_digital_config;
+ AudioProcessing::Config::GainController2::AdaptiveDigital
+ adaptive_digital_config;
};
protected:
diff --git a/modules/audio_processing/audio_processing_impl_unittest.cc b/modules/audio_processing/audio_processing_impl_unittest.cc
index b394e93..10c11a2 100644
--- a/modules/audio_processing/audio_processing_impl_unittest.cc
+++ b/modules/audio_processing/audio_processing_impl_unittest.cc
@@ -1235,6 +1235,70 @@
EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/135), 135);
}
+class GainController2FieldTrialParametrizedTest
+ : public ::testing::TestWithParam<AudioProcessing::Config> {};
+
+TEST_P(GainController2FieldTrialParametrizedTest,
+ CheckAgc2AdaptiveDigitalOverridesApplied) {
+ webrtc::test::ScopedFieldTrials field_trials(
+ "WebRTC-Audio-GainController2/"
+ "Enabled,"
+ "enable_clipping_predictor:true,"
+ "clipped_level_min:20,"
+ "clipped_level_step:30,"
+ "clipped_ratio_threshold:0.4,"
+ "clipped_wait_frames:50,"
+ "target_range_max_dbfs:-6,"
+ "target_range_min_dbfs:-70,"
+ "update_input_volume_wait_frames:80,"
+ "speech_probability_threshold:0.9,"
+ "speech_ratio_threshold:1.0,"
+ "headroom_db:10,"
+ "max_gain_db:20,"
+ "initial_gain_db:7,"
+ "max_gain_change_db_per_second:5,"
+ "max_output_noise_level_dbfs:-40/");
+
+ auto adjusted_config =
+ AudioProcessingBuilder().SetConfig(GetParam()).Create()->GetConfig();
+
+ EXPECT_FALSE(adjusted_config.gain_controller1.enabled);
+ EXPECT_TRUE(adjusted_config.gain_controller2.enabled);
+ EXPECT_TRUE(adjusted_config.gain_controller2.adaptive_digital.enabled);
+ EXPECT_TRUE(adjusted_config.gain_controller2.input_volume_controller.enabled);
+
+ EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital.headroom_db, 10);
+ EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital.max_gain_db, 20);
+ EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital.initial_gain_db,
+ 7);
+ EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital
+ .max_gain_change_db_per_second,
+ 5);
+ EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital
+ .max_output_noise_level_dbfs,
+ -40);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ AudioProcessingImplTest,
+ GainController2FieldTrialParametrizedTest,
+ ::testing::Values(
+ // Full AGC1.
+ AudioProcessing::Config{
+ .gain_controller1 =
+ {.enabled = true,
+ .analog_gain_controller = {.enabled = true,
+ .enable_digital_adaptive = true}},
+ .gain_controller2 = {.enabled = false}},
+ // Hybrid AGC.
+ AudioProcessing::Config{
+ .gain_controller1 =
+ {.enabled = true,
+ .analog_gain_controller = {.enabled = true,
+ .enable_digital_adaptive = false}},
+ .gain_controller2 = {.enabled = true,
+ .adaptive_digital = {.enabled = true}}}));
+
TEST(AudioProcessingImplGainController2FieldTrialTest,
ConfigAdjustedWhenExperimentEnabledAndAgc1AnalogEnabled) {
constexpr AudioProcessing::Config::GainController2::AdaptiveDigital
@@ -1254,6 +1318,7 @@
"speech_ratio_threshold:1.0,"
"headroom_db:10,"
"max_gain_db:20,"
+ "initial_gain_db:7,"
"max_gain_change_db_per_second:3,"
"max_output_noise_level_dbfs:-40/");
@@ -1318,6 +1383,7 @@
"speech_ratio_threshold:1.0,"
"headroom_db:10,"
"max_gain_db:20,"
+ "initial_gain_db:7,"
"max_gain_change_db_per_second:3,"
"max_output_noise_level_dbfs:-40/");
@@ -1382,6 +1448,7 @@
"speech_ratio_threshold:1.0,"
"headroom_db:10,"
"max_gain_db:20,"
+ "initial_gain_db:7,"
"max_gain_change_db_per_second:3,"
"max_output_noise_level_dbfs:-40/");
@@ -1434,6 +1501,7 @@
"speech_ratio_threshold:1.0,"
"headroom_db:10,"
"max_gain_db:20,"
+ "initial_gain_db:7,"
"max_gain_change_db_per_second:3,"
"max_output_noise_level_dbfs:-40/");
diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc
index bbb7f46..e320e71 100644
--- a/modules/audio_processing/audio_processing_unittest.cc
+++ b/modules/audio_processing/audio_processing_unittest.cc
@@ -3062,10 +3062,6 @@
b_adaptive.enabled = a_adaptive.enabled;
EXPECT_EQ(a, b);
- Toggle(a_adaptive.dry_run);
- b_adaptive.dry_run = a_adaptive.dry_run;
- EXPECT_EQ(a, b);
-
a_adaptive.headroom_db += 1.0f;
b_adaptive.headroom_db = a_adaptive.headroom_db;
EXPECT_EQ(a, b);
@@ -3078,15 +3074,6 @@
b_adaptive.initial_gain_db = a_adaptive.initial_gain_db;
EXPECT_EQ(a, b);
- a_adaptive.vad_reset_period_ms++;
- b_adaptive.vad_reset_period_ms = a_adaptive.vad_reset_period_ms;
- EXPECT_EQ(a, b);
-
- a_adaptive.adjacent_speech_frames_threshold++;
- b_adaptive.adjacent_speech_frames_threshold =
- a_adaptive.adjacent_speech_frames_threshold;
- EXPECT_EQ(a, b);
-
a_adaptive.max_gain_change_db_per_second += 1.0f;
b_adaptive.max_gain_change_db_per_second =
a_adaptive.max_gain_change_db_per_second;
@@ -3119,10 +3106,6 @@
EXPECT_NE(a, b);
a_adaptive = b_adaptive;
- Toggle(a_adaptive.dry_run);
- EXPECT_NE(a, b);
- a_adaptive = b_adaptive;
-
a_adaptive.headroom_db += 1.0f;
EXPECT_NE(a, b);
a_adaptive = b_adaptive;
@@ -3135,14 +3118,6 @@
EXPECT_NE(a, b);
a_adaptive = b_adaptive;
- a_adaptive.vad_reset_period_ms++;
- EXPECT_NE(a, b);
- a_adaptive = b_adaptive;
-
- a_adaptive.adjacent_speech_frames_threshold++;
- EXPECT_NE(a, b);
- a_adaptive = b_adaptive;
-
a_adaptive.max_gain_change_db_per_second += 1.0f;
EXPECT_NE(a, b);
a_adaptive = b_adaptive;
diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc
index d25ce7a..ea36dd2 100644
--- a/modules/audio_processing/gain_controller2.cc
+++ b/modules/audio_processing/gain_controller2.cc
@@ -14,6 +14,7 @@
#include <utility>
#include "common_audio/include/audio_util.h"
+#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/agc2/cpu_features.h"
#include "modules/audio_processing/audio_buffer.h"
#include "modules/audio_processing/include/audio_frame_view.h"
@@ -102,14 +103,10 @@
config.adaptive_digital.enabled) {
// Create dependencies.
speech_level_estimator_ = std::make_unique<SpeechLevelEstimator>(
- &data_dumper_, config.adaptive_digital);
- if (use_internal_vad) {
- // TODO(bugs.webrtc.org/7494): Move `vad_reset_period_ms` from adaptive
- // digital to gain controller 2 config.
+ &data_dumper_, config.adaptive_digital, kAdjacentSpeechFramesThreshold);
+ if (use_internal_vad)
vad_ = std::make_unique<VoiceActivityDetectorWrapper>(
- config.adaptive_digital.vad_reset_period_ms, cpu_features_,
- sample_rate_hz);
- }
+ kVadResetPeriodMs, cpu_features_, sample_rate_hz);
}
if (config.input_volume_controller.enabled) {
@@ -124,14 +121,13 @@
// Create dependencies.
noise_level_estimator_ = CreateNoiseFloorEstimator(&data_dumper_);
saturation_protector_ = CreateSaturationProtector(
- kSaturationProtectorInitialHeadroomDb,
- config.adaptive_digital.adjacent_speech_frames_threshold,
+ kSaturationProtectorInitialHeadroomDb, kAdjacentSpeechFramesThreshold,
&data_dumper_);
// Create controller.
adaptive_digital_controller_ =
std::make_unique<AdaptiveDigitalGainController>(
- &data_dumper_, config.adaptive_digital, sample_rate_hz,
- num_channels);
+ &data_dumper_, config.adaptive_digital,
+ kAdjacentSpeechFramesThreshold, sample_rate_hz, num_channels);
}
}
diff --git a/modules/audio_processing/gain_controller2_unittest.cc b/modules/audio_processing/gain_controller2_unittest.cc
index f7e5db2..bff6245 100644
--- a/modules/audio_processing/gain_controller2_unittest.cc
+++ b/modules/audio_processing/gain_controller2_unittest.cc
@@ -613,8 +613,7 @@
GainController2 agc2_reference(config, /*input_volume_controller_config=*/{},
kSampleRateHz, kStereo,
/*use_internal_vad=*/true);
- VoiceActivityDetectorWrapper vad(config.adaptive_digital.vad_reset_period_ms,
- GetAvailableCpuFeatures(), kSampleRateHz);
+ VoiceActivityDetectorWrapper vad(GetAvailableCpuFeatures(), kSampleRateHz);
test::InputAudioFile input_file(
test::GetApmCaptureTestVectorFileName(kSampleRateHz),
/*loop_at_end=*/true);
diff --git a/modules/audio_processing/include/audio_processing.cc b/modules/audio_processing/include/audio_processing.cc
index 83917c2..13ddcc5 100644
--- a/modules/audio_processing/include/audio_processing.cc
+++ b/modules/audio_processing/include/audio_processing.cc
@@ -87,12 +87,9 @@
bool Agc2Config::AdaptiveDigital::operator==(
const Agc2Config::AdaptiveDigital& rhs) const {
- return enabled == rhs.enabled && dry_run == rhs.dry_run &&
- headroom_db == rhs.headroom_db && max_gain_db == rhs.max_gain_db &&
+ return enabled == rhs.enabled && headroom_db == rhs.headroom_db &&
+ max_gain_db == rhs.max_gain_db &&
initial_gain_db == rhs.initial_gain_db &&
- vad_reset_period_ms == rhs.vad_reset_period_ms &&
- adjacent_speech_frames_threshold ==
- rhs.adjacent_speech_frames_threshold &&
max_gain_change_db_per_second == rhs.max_gain_change_db_per_second &&
max_output_noise_level_dbfs == rhs.max_output_noise_level_dbfs;
}
@@ -197,15 +194,10 @@
<< gain_controller2.fixed_digital.gain_db
<< " }, adaptive_digital: { enabled: "
<< gain_controller2.adaptive_digital.enabled
- << ", dry_run: " << gain_controller2.adaptive_digital.dry_run
<< ", headroom_db: " << gain_controller2.adaptive_digital.headroom_db
<< ", max_gain_db: " << gain_controller2.adaptive_digital.max_gain_db
<< ", initial_gain_db: "
<< gain_controller2.adaptive_digital.initial_gain_db
- << ", vad_reset_period_ms: "
- << gain_controller2.adaptive_digital.vad_reset_period_ms
- << ", adjacent_speech_frames_threshold: "
- << gain_controller2.adaptive_digital.adjacent_speech_frames_threshold
<< ", max_gain_change_db_per_second: "
<< gain_controller2.adaptive_digital.max_gain_change_db_per_second
<< ", max_output_noise_level_dbfs: "
diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index c5c6070..f613a38 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@@ -362,21 +362,10 @@
bool operator!=(const AdaptiveDigital& rhs) const {
return !(*this == rhs);
}
-
bool enabled = false;
- // TODO(bugs.webrtc.org/7494): Remove `dry_run`.
- // When true, the adaptive digital controller runs but the signal is not
- // modified.
- bool dry_run = false;
float headroom_db = 6.0f;
- // TODO(bugs.webrtc.org/7494): Consider removing and inferring from
- // `max_output_noise_level_dbfs`.
float max_gain_db = 30.0f;
float initial_gain_db = 8.0f;
- // TODO(bugs.webrtc.org/7494): Hard-code and remove parameter below.
- int vad_reset_period_ms = 1500;
- // TODO(bugs.webrtc.org/7494): Hard-code and remove parameter below.
- int adjacent_speech_frames_threshold = 12;
float max_gain_change_db_per_second = 3.0f;
float max_output_noise_level_dbfs = -50.0f;
} adaptive_digital;