APM AGC2: consolidate `GainController2`
Now that `InputVolumeController` is finalized, it's time to
consolidate AGC2.
Main changes:
- Remove `AdaptiveDigitalGainController`: it's too simple to justify
a dedicated class and some components of it are also used by
`InputVolumeController`
- Remove unwanted temporal dependency: make `InputVolumeController`
adapt the volume based on the current speech level estimation and
not on the estimation from the previous frame
Tested: AGC2 adaptive digital bit-exactness verified
Bug: webrtc:7494
Change-Id: I175c2741cafc52be81794219c996a3824c3bbf5e
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/280560
Reviewed-by: Hanna Silen <silen@webrtc.org>
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#38841}
diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn
index a403ead..44082f7 100644
--- a/modules/audio_processing/BUILD.gn
+++ b/modules/audio_processing/BUILD.gn
@@ -138,11 +138,14 @@
"../../rtc_base:logging",
"../../rtc_base:stringutils",
"../../system_wrappers:field_trial",
- "agc2:adaptive_digital",
+ "agc2:adaptive_digital_gain_applier",
"agc2:cpu_features",
"agc2:fixed_digital",
"agc2:gain_applier",
"agc2:input_volume_controller",
+ "agc2:noise_level_estimator",
+ "agc2:saturation_protector",
+ "agc2:speech_level_estimator",
"agc2:vad_wrapper",
]
}
diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn
index 6e07c4c..b26d692 100644
--- a/modules/audio_processing/agc2/BUILD.gn
+++ b/modules/audio_processing/agc2/BUILD.gn
@@ -8,13 +8,6 @@
import("../../../webrtc.gni")
-group("agc2") {
- deps = [
- ":adaptive_digital",
- ":fixed_digital",
- ]
-}
-
rtc_library("speech_level_estimator") {
sources = [
"speech_level_estimator.cc",
@@ -39,35 +32,6 @@
]
}
-rtc_library("adaptive_digital") {
- sources = [
- "adaptive_digital_gain_controller.cc",
- "adaptive_digital_gain_controller.h",
- ]
-
- visibility = [
- "..:gain_controller2",
- "./*",
- ]
-
- configs += [ "..:apm_debug_dump" ]
-
- deps = [
- ":adaptive_digital_gain_applier",
- ":noise_level_estimator",
- ":saturation_protector",
- ":speech_level_estimator",
- "..:api",
- "..:apm_logging",
- "..:audio_frame_view",
- "../../../common_audio",
- "../../../rtc_base:checks",
- "../../../rtc_base:logging",
- ]
-
- absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
-}
-
rtc_library("adaptive_digital_gain_applier") {
sources = [
"adaptive_digital_gain_applier.cc",
@@ -266,7 +230,6 @@
"noise_level_estimator.cc",
"noise_level_estimator.h",
]
- visibility = [ "./*" ]
deps = [
":biquad_filter",
"..:apm_logging",
@@ -276,6 +239,11 @@
"../../../system_wrappers",
]
+ visibility = [
+ "..:gain_controller2",
+ "./*",
+ ]
+
configs += [ "..:apm_debug_dump" ]
}
diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
index dc84c1e..0b1cceb 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
@@ -33,6 +33,7 @@
bool speech_level_reliable; // True with reliable speech level estimation.
float noise_rms_dbfs; // Estimated noise RMS level (dBFS).
float headroom_db; // Headroom (dB).
+ // TODO(bugs.webrtc.org/7494): Remove `limiter_envelope_dbfs`.
float limiter_envelope_dbfs; // Envelope level from the limiter (dBFS).
};
diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc b/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc
deleted file mode 100644
index 07ed6a3..0000000
--- a/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h"
-
-#include <algorithm>
-
-#include "common_audio/include/audio_util.h"
-#include "modules/audio_processing/logging/apm_data_dumper.h"
-#include "rtc_base/checks.h"
-#include "rtc_base/logging.h"
-
-namespace webrtc {
-namespace {
-
-// Peak and RMS audio levels in dBFS.
-struct AudioLevels {
- float peak_dbfs;
- float rms_dbfs;
-};
-
-// Computes the audio levels for the first channel in `frame`.
-AudioLevels ComputeAudioLevels(AudioFrameView<float> frame) {
- float peak = 0.0f;
- float rms = 0.0f;
- for (const auto& x : frame.channel(0)) {
- peak = std::max(std::fabs(x), peak);
- rms += x * x;
- }
- return {FloatS16ToDbfs(peak),
- FloatS16ToDbfs(std::sqrt(rms / frame.samples_per_channel()))};
-}
-
-} // namespace
-
-AdaptiveDigitalGainController::AdaptiveDigitalGainController(
- ApmDataDumper* apm_data_dumper,
- const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
- int sample_rate_hz,
- int num_channels)
- : speech_level_estimator_(apm_data_dumper, config),
- gain_controller_(apm_data_dumper, config, sample_rate_hz, num_channels),
- apm_data_dumper_(apm_data_dumper),
- noise_level_estimator_(CreateNoiseFloorEstimator(apm_data_dumper)),
- saturation_protector_(
- CreateSaturationProtector(kSaturationProtectorInitialHeadroomDb,
- config.adjacent_speech_frames_threshold,
- apm_data_dumper)) {
- RTC_DCHECK(apm_data_dumper);
- RTC_DCHECK(noise_level_estimator_);
- RTC_DCHECK(saturation_protector_);
-}
-
-AdaptiveDigitalGainController::~AdaptiveDigitalGainController() = default;
-
-void AdaptiveDigitalGainController::Initialize(int sample_rate_hz,
- int num_channels) {
- gain_controller_.Initialize(sample_rate_hz, num_channels);
-}
-
-void AdaptiveDigitalGainController::Process(AudioFrameView<float> frame,
- float speech_probability,
- float limiter_envelope) {
- AudioLevels levels = ComputeAudioLevels(frame);
- apm_data_dumper_->DumpRaw("agc2_input_rms_dbfs", levels.rms_dbfs);
- apm_data_dumper_->DumpRaw("agc2_input_peak_dbfs", levels.peak_dbfs);
-
- AdaptiveDigitalGainApplier::FrameInfo info;
-
- info.speech_probability = speech_probability;
-
- speech_level_estimator_.Update(levels.rms_dbfs, levels.peak_dbfs,
- info.speech_probability);
- info.speech_level_dbfs = speech_level_estimator_.level_dbfs();
- info.speech_level_reliable = speech_level_estimator_.is_confident();
-
- info.noise_rms_dbfs = noise_level_estimator_->Analyze(frame);
- apm_data_dumper_->DumpRaw("agc2_noise_rms_dbfs", info.noise_rms_dbfs);
-
- saturation_protector_->Analyze(info.speech_probability, levels.peak_dbfs,
- info.speech_level_dbfs);
- info.headroom_db = saturation_protector_->HeadroomDb();
- apm_data_dumper_->DumpRaw("agc2_headroom_db", info.headroom_db);
-
- info.limiter_envelope_dbfs = FloatS16ToDbfs(limiter_envelope);
- apm_data_dumper_->DumpRaw("agc2_limiter_envelope_dbfs",
- info.limiter_envelope_dbfs);
-
- gain_controller_.Process(info, frame);
-}
-
-void AdaptiveDigitalGainController::HandleInputGainChange() {
- speech_level_estimator_.Reset();
- saturation_protector_->Reset();
-}
-
-absl::optional<float>
-AdaptiveDigitalGainController::GetSpeechLevelDbfsIfConfident() const {
- return speech_level_estimator_.is_confident()
- ? absl::optional<float>(speech_level_estimator_.level_dbfs())
- : absl::nullopt;
-}
-
-} // namespace webrtc
diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller.h b/modules/audio_processing/agc2/adaptive_digital_gain_controller.h
deleted file mode 100644
index 78c5088..0000000
--- a/modules/audio_processing/agc2/adaptive_digital_gain_controller.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_CONTROLLER_H_
-#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_CONTROLLER_H_
-
-#include <memory>
-
-#include "absl/types/optional.h"
-#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
-#include "modules/audio_processing/agc2/noise_level_estimator.h"
-#include "modules/audio_processing/agc2/saturation_protector.h"
-#include "modules/audio_processing/agc2/speech_level_estimator.h"
-#include "modules/audio_processing/include/audio_frame_view.h"
-#include "modules/audio_processing/include/audio_processing.h"
-
-namespace webrtc {
-class ApmDataDumper;
-
-// Gain controller that adapts and applies a variable digital gain to meet the
-// target level, which is determined by the given configuration.
-class AdaptiveDigitalGainController {
- public:
- AdaptiveDigitalGainController(
- ApmDataDumper* apm_data_dumper,
- const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
- int sample_rate_hz,
- int num_channels);
- AdaptiveDigitalGainController(const AdaptiveDigitalGainController&) = delete;
- AdaptiveDigitalGainController& operator=(
- const AdaptiveDigitalGainController&) = delete;
- ~AdaptiveDigitalGainController();
-
- // Detects and handles changes of sample rate and or number of channels.
- void Initialize(int sample_rate_hz, int num_channels);
-
- // Analyzes `frame`, adapts the current digital gain and applies it to
- // `frame`.
- // TODO(bugs.webrtc.org/7494): Remove `limiter_envelope`.
- void Process(AudioFrameView<float> frame,
- float speech_probability,
- float limiter_envelope);
-
- // Handles a gain change applied to the input signal (e.g., analog gain).
- void HandleInputGainChange();
-
- // Returns the most recent speech level (dBFs) if the estimator is confident.
- // Otherwise returns absl::nullopt.
- absl::optional<float> GetSpeechLevelDbfsIfConfident() const;
-
- private:
- SpeechLevelEstimator speech_level_estimator_;
- AdaptiveDigitalGainApplier gain_controller_;
- ApmDataDumper* const apm_data_dumper_;
- std::unique_ptr<NoiseLevelEstimator> noise_level_estimator_;
- std::unique_ptr<SaturationProtector> saturation_protector_;
-};
-
-} // namespace webrtc
-
-#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_CONTROLLER_H_
diff --git a/modules/audio_processing/agc2/input_volume_controller.cc b/modules/audio_processing/agc2/input_volume_controller.cc
index 61b26bb..a428db8 100644
--- a/modules/audio_processing/agc2/input_volume_controller.cc
+++ b/modules/audio_processing/agc2/input_volume_controller.cc
@@ -221,10 +221,9 @@
speech_frames_since_update_input_volume_ = 0;
// Update the input volume if allowed.
- if (!is_first_frame_ && speech_ratio >= speech_ratio_threshold_) {
- if (rms_error_db.has_value()) {
- UpdateInputVolume(*rms_error_db);
- }
+ if (!is_first_frame_ && speech_ratio >= speech_ratio_threshold_ &&
+ rms_error_db.has_value()) {
+ UpdateInputVolume(*rms_error_db);
}
}
diff --git a/modules/audio_processing/agc2/input_volume_controller.h b/modules/audio_processing/agc2/input_volume_controller.h
index 6f836d4..5b7323b 100644
--- a/modules/audio_processing/agc2/input_volume_controller.h
+++ b/modules/audio_processing/agc2/input_volume_controller.h
@@ -93,10 +93,11 @@
void AnalyzePreProcess(const AudioBuffer& audio_buffer);
// TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore.
- // Adjusts the recommended input volume upwards/downwards based on
- // `speech_level_dbfs`. Must be called after `AnalyzePreProcess()`. The value
- // of `speech_probability` is expected to be in the range [0.0f, 1.0f] and
- // `speech_level_dbfs` in the the range [-90.f, 30.0f].
+ // Adjusts the recommended input volume upwards/downwards based on the result
+ // of `AnalyzePreProcess()` and on `speech_level_dbfs` (if specified). Must
+ // be called after `AnalyzePreProcess()`. The value of `speech_probability` is
+ // expected to be in the range [0, 1] and `speech_level_dbfs` in the the range
+ // [-90, 30].
void Process(float speech_probability,
absl::optional<float> speech_level_dbfs);
@@ -205,10 +206,10 @@
void HandleClipping(int clipped_level_step);
// TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore.
- // Adjusts the recommended input volume upwards/downwards depending on
- // whether `rms_error_dbfs` is positive or negative. Updates are only allowed
- // for active speech segments and when `rms_error_dbfs` is not empty. Must be
- // called after `HandleClipping()`.
+ // Adjusts the recommended input volume upwards/downwards depending on the
+ // result of `HandleClipping()` and on `rms_error_dbfs`. Updates are only
+ // allowed for active speech segments and when `rms_error_dbfs` is not empty.
+ // Must be called after `HandleClipping()`.
void Process(absl::optional<int> rms_error_dbfs, float speech_probability);
// Returns the recommended input volume. Must be called after `Process()`.
diff --git a/modules/audio_processing/agc2/input_volume_controller_unittest.cc b/modules/audio_processing/agc2/input_volume_controller_unittest.cc
index d2aa6ed..ac443e6 100644
--- a/modules/audio_processing/agc2/input_volume_controller_unittest.cc
+++ b/modules/audio_processing/agc2/input_volume_controller_unittest.cc
@@ -232,7 +232,7 @@
float UpdateRecommendedInputVolume(MonoInputVolumeController& controller,
int applied_input_volume,
float speech_probability,
- const absl::optional<float> rms_error_dbfs) {
+ absl::optional<float> rms_error_dbfs) {
controller.set_stream_analog_level(applied_input_volume);
EXPECT_EQ(controller.recommended_analog_level(), applied_input_volume);
controller.Process(rms_error_dbfs, speech_probability);
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index 9724666..c1a2756 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -2308,7 +2308,8 @@
const bool use_vad =
transient_suppressor_vad_mode_ == TransientSuppressor::VadMode::kRnnVad &&
config_.gain_controller2.enabled &&
- config_.gain_controller2.adaptive_digital.enabled;
+ (config_.gain_controller2.adaptive_digital.enabled ||
+ config_.gain_controller2.input_volume_controller.enabled);
if (!use_vad) {
submodules_.voice_activity_detector.reset();
return;
diff --git a/modules/audio_processing/audio_processing_impl_unittest.cc b/modules/audio_processing/audio_processing_impl_unittest.cc
index 346b5f5..7a45c45 100644
--- a/modules/audio_processing/audio_processing_impl_unittest.cc
+++ b/modules/audio_processing/audio_processing_impl_unittest.cc
@@ -648,11 +648,10 @@
rtc::scoped_refptr<AudioProcessing> apm = AudioProcessingBuilder().Create();
ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError);
webrtc::AudioProcessing::Config apm_config;
- // Disable AGC1 analog.
apm_config.gain_controller1.enabled = false;
- // Enable AGC2 digital.
apm_config.gain_controller2.enabled = true;
apm_config.gain_controller2.adaptive_digital.enabled = true;
+ apm_config.transient_suppression.enabled = true;
apm->ApplyConfig(apm_config);
constexpr int kSampleRateHz = 48000;
constexpr int kNumChannels = 1;
@@ -680,11 +679,10 @@
rtc::scoped_refptr<AudioProcessing> apm = AudioProcessingBuilder().Create();
ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError);
webrtc::AudioProcessing::Config apm_config;
- // Disable AGC1 analog.
apm_config.gain_controller1.enabled = false;
- // Enable AGC2 digital.
apm_config.gain_controller2.enabled = true;
apm_config.gain_controller2.adaptive_digital.enabled = true;
+ apm_config.transient_suppression.enabled = true;
apm->ApplyConfig(apm_config);
constexpr int kSampleRateHz = 48000;
constexpr int kNumChannels = 1;
diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc
index 174647c..6a57dca 100644
--- a/modules/audio_processing/gain_controller2.cc
+++ b/modules/audio_processing/gain_controller2.cc
@@ -49,28 +49,33 @@
return features;
}
-// Creates an adaptive digital gain controller if enabled.
-std::unique_ptr<AdaptiveDigitalGainController> CreateAdaptiveDigitalController(
- const Agc2Config::AdaptiveDigital& config,
- int sample_rate_hz,
- int num_channels,
- ApmDataDumper* data_dumper) {
- if (config.enabled) {
- return std::make_unique<AdaptiveDigitalGainController>(
- data_dumper, config, sample_rate_hz, num_channels);
- }
- return nullptr;
-}
+// Peak and RMS audio levels in dBFS.
+struct AudioLevels {
+ float peak_dbfs;
+ float rms_dbfs;
+};
-// Creates an input volume controller if `enabled` is true.
-std::unique_ptr<InputVolumeController> CreateInputVolumeController(
- bool enabled,
- const InputVolumeControllerConfig& config,
- int num_channels) {
- if (enabled) {
- return std::make_unique<InputVolumeController>(num_channels, config);
+// Speech level info.
+struct SpeechLevel {
+ bool is_confident;
+ float rms_dbfs;
+};
+
+// Computes the audio levels for the first channel in `frame`.
+AudioLevels ComputeAudioLevels(AudioFrameView<float> frame,
+ ApmDataDumper& data_dumper) {
+ float peak = 0.0f;
+ float rms = 0.0f;
+ for (const auto& x : frame.channel(0)) {
+ peak = std::max(std::fabs(x), peak);
+ rms += x * x;
}
- return nullptr;
+ AudioLevels levels{
+ FloatS16ToDbfs(peak),
+ FloatS16ToDbfs(std::sqrt(rms / frame.samples_per_channel()))};
+ data_dumper.DumpRaw("agc2_input_rms_dbfs", levels.rms_dbfs);
+ data_dumper.DumpRaw("agc2_input_peak_dbfs", levels.peak_dbfs);
+ return levels;
}
} // namespace
@@ -88,30 +93,44 @@
fixed_gain_applier_(
/*hard_clip_samples=*/false,
/*initial_gain_factor=*/DbToRatio(config.fixed_digital.gain_db)),
- adaptive_digital_controller_(
- CreateAdaptiveDigitalController(config.adaptive_digital,
- sample_rate_hz,
- num_channels,
- &data_dumper_)),
- input_volume_controller_(
- CreateInputVolumeController(config.input_volume_controller.enabled,
- input_volume_controller_config,
- num_channels)),
limiter_(sample_rate_hz, &data_dumper_, /*histogram_name_prefix=*/"Agc2"),
calls_since_last_limiter_log_(0) {
RTC_DCHECK(Validate(config));
data_dumper_.InitiateNewSetOfRecordings();
- const bool use_vad = config.adaptive_digital.enabled;
- if (use_vad && use_internal_vad) {
- // TODO(bugs.webrtc.org/7494): Move `vad_reset_period_ms` from adaptive
- // digital to gain controller 2 config.
- vad_ = std::make_unique<VoiceActivityDetectorWrapper>(
- config.adaptive_digital.vad_reset_period_ms, cpu_features_,
- sample_rate_hz);
+
+ if (config.input_volume_controller.enabled ||
+ config.adaptive_digital.enabled) {
+ // Create dependencies.
+ speech_level_estimator_ = std::make_unique<SpeechLevelEstimator>(
+ &data_dumper_, config.adaptive_digital);
+ if (use_internal_vad) {
+ // TODO(bugs.webrtc.org/7494): Move `vad_reset_period_ms` from adaptive
+ // digital to gain controller 2 config.
+ vad_ = std::make_unique<VoiceActivityDetectorWrapper>(
+ config.adaptive_digital.vad_reset_period_ms, cpu_features_,
+ sample_rate_hz);
+ }
}
- if (input_volume_controller_) {
+
+ if (config.input_volume_controller.enabled) {
+ // Create controller.
+ input_volume_controller_ = std::make_unique<InputVolumeController>(
+ num_channels, input_volume_controller_config);
+ // TODO(bugs.webrtc.org/7494): Call `Initialize` in ctor and remove method.
input_volume_controller_->Initialize();
}
+
+ if (config.adaptive_digital.enabled) {
+ // Create dependencies.
+ noise_level_estimator_ = CreateNoiseFloorEstimator(&data_dumper_);
+ saturation_protector_ = CreateSaturationProtector(
+ kSaturationProtectorInitialHeadroomDb,
+ config.adaptive_digital.adjacent_speech_frames_threshold,
+ &data_dumper_);
+ // Create controller.
+ adaptive_digital_controller_ = std::make_unique<AdaptiveDigitalGainApplier>(
+ &data_dumper_, config.adaptive_digital, sample_rate_hz, num_channels);
+ }
}
GainController2::~GainController2() = default;
@@ -140,6 +159,7 @@
RTC_DCHECK_LE(applied_input_volume, 255);
if (input_volume_controller_) {
+ // TODO(bugs.webrtc.org/7494): Pass applied volume to `AnalyzePreProcess()`.
input_volume_controller_->set_stream_analog_level(applied_input_volume);
input_volume_controller_->AnalyzePreProcess(audio_buffer);
}
@@ -157,42 +177,84 @@
AudioBuffer* audio) {
data_dumper_.DumpRaw("agc2_applied_input_volume_changed",
input_volume_changed);
- if (input_volume_changed && !!adaptive_digital_controller_) {
- adaptive_digital_controller_->HandleInputGainChange();
+ if (input_volume_changed) {
+ // Handle input volume changes.
+ if (speech_level_estimator_)
+ speech_level_estimator_->Reset();
+ if (saturation_protector_)
+ saturation_protector_->Reset();
}
AudioFrameView<float> float_frame(audio->channels(), audio->num_channels(),
audio->num_frames());
+ // Compute speech probability.
if (vad_) {
speech_probability = vad_->Analyze(float_frame);
} else if (speech_probability.has_value()) {
- RTC_DCHECK_GE(speech_probability.value(), 0.0f);
- RTC_DCHECK_LE(speech_probability.value(), 1.0f);
+ RTC_DCHECK_GE(*speech_probability, 0.0f);
+ RTC_DCHECK_LE(*speech_probability, 1.0f);
}
- if (speech_probability.has_value()) {
- data_dumper_.DumpRaw("agc2_speech_probability", speech_probability.value());
+ // The speech probability may not be defined at this step (e.g., when the
+ // fixed digital controller alone is enabled).
+ if (speech_probability.has_value())
+ data_dumper_.DumpRaw("agc2_speech_probability", *speech_probability);
+
+ // Compute audio, noise and speech levels.
+ AudioLevels audio_levels = ComputeAudioLevels(float_frame, data_dumper_);
+ absl::optional<float> noise_rms_dbfs;
+ if (noise_level_estimator_) {
+ // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated
+ // computation in `noise_level_estimator_`.
+ noise_rms_dbfs = noise_level_estimator_->Analyze(float_frame);
+ }
+ absl::optional<SpeechLevel> speech_level;
+ if (speech_level_estimator_) {
+ RTC_DCHECK(speech_probability.has_value());
+ speech_level_estimator_->Update(
+ audio_levels.rms_dbfs, audio_levels.peak_dbfs, *speech_probability);
+ speech_level =
+ SpeechLevel{.is_confident = speech_level_estimator_->is_confident(),
+ .rms_dbfs = speech_level_estimator_->level_dbfs()};
}
+ // Update the recommended input volume.
if (input_volume_controller_) {
- // TODO(bugs.webrtc.org/7494): A temprorary check, remove once not needed.
- RTC_DCHECK(adaptive_digital_controller_);
- absl::optional<float> speech_level;
- if (adaptive_digital_controller_) {
- speech_level =
- adaptive_digital_controller_->GetSpeechLevelDbfsIfConfident();
- }
+ RTC_DCHECK(speech_level.has_value());
RTC_DCHECK(speech_probability.has_value());
if (speech_probability.has_value()) {
- input_volume_controller_->Process(*speech_probability, speech_level);
+ // TODO(bugs.webrtc.org/7494): Rename `Process()` to `RecommendVolume()`
+ // and let it return the recommended input volume.
+ input_volume_controller_->Process(
+ *speech_probability,
+ speech_level->is_confident
+ ? absl::optional<float>(speech_level->rms_dbfs)
+ : absl::nullopt);
}
}
if (adaptive_digital_controller_) {
+ RTC_DCHECK(saturation_protector_);
RTC_DCHECK(speech_probability.has_value());
+ RTC_DCHECK(speech_level.has_value());
+ saturation_protector_->Analyze(*speech_probability, audio_levels.peak_dbfs,
+ speech_level->rms_dbfs);
+ float headroom_db = saturation_protector_->HeadroomDb();
+ data_dumper_.DumpRaw("agc2_headroom_db", headroom_db);
+ float limiter_envelope_dbfs = FloatS16ToDbfs(limiter_.LastAudioLevel());
+ data_dumper_.DumpRaw("agc2_limiter_envelope_dbfs", limiter_envelope_dbfs);
+ RTC_DCHECK(noise_rms_dbfs.has_value());
adaptive_digital_controller_->Process(
- float_frame, speech_probability.value(), limiter_.LastAudioLevel());
+ /*info=*/{.speech_probability = *speech_probability,
+ .speech_level_dbfs = speech_level->rms_dbfs,
+ .speech_level_reliable = speech_level->is_confident,
+ .noise_rms_dbfs = *noise_rms_dbfs,
+ .headroom_db = headroom_db,
+ .limiter_envelope_dbfs = limiter_envelope_dbfs},
+ float_frame);
}
+ // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated
+ // computation in `limiter_`.
fixed_gain_applier_.ApplyGain(float_frame);
limiter_.Process(float_frame);
@@ -201,7 +263,7 @@
if (++calls_since_last_limiter_log_ == kLogLimiterStatsPeriodNumFrames) {
calls_since_last_limiter_log_ = 0;
InterpolatedGainCurve::Stats stats = limiter_.GetGainCurveStats();
- RTC_LOG(LS_INFO) << "AGC2 limiter stats"
+ RTC_LOG(LS_INFO) << "[AGC2] limiter stats"
<< " | identity: " << stats.look_ups_identity_region
<< " | knee: " << stats.look_ups_knee_region
<< " | limiter: " << stats.look_ups_limiter_region
@@ -213,7 +275,7 @@
const AudioProcessing::Config::GainController2& config) {
const auto& fixed = config.fixed_digital;
const auto& adaptive = config.adaptive_digital;
- return fixed.gain_db >= 0.0f && fixed.gain_db < 50.f &&
+ return fixed.gain_db >= 0.0f && fixed.gain_db < 50.0f &&
adaptive.headroom_db >= 0.0f && adaptive.max_gain_db > 0.0f &&
adaptive.initial_gain_db >= 0.0f &&
adaptive.max_gain_change_db_per_second > 0.0f &&
diff --git a/modules/audio_processing/gain_controller2.h b/modules/audio_processing/gain_controller2.h
index 0d41eaa..fa4743c 100644
--- a/modules/audio_processing/gain_controller2.h
+++ b/modules/audio_processing/gain_controller2.h
@@ -15,11 +15,14 @@
#include <memory>
#include <string>
-#include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h"
+#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
#include "modules/audio_processing/agc2/cpu_features.h"
#include "modules/audio_processing/agc2/gain_applier.h"
#include "modules/audio_processing/agc2/input_volume_controller.h"
#include "modules/audio_processing/agc2/limiter.h"
+#include "modules/audio_processing/agc2/noise_level_estimator.h"
+#include "modules/audio_processing/agc2/saturation_protector.h"
+#include "modules/audio_processing/agc2/speech_level_estimator.h"
#include "modules/audio_processing/agc2/vad_wrapper.h"
#include "modules/audio_processing/include/audio_processing.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
@@ -58,12 +61,13 @@
// [0, 255].
void Analyze(int applied_input_volume, const AudioBuffer& audio_buffer);
- // Applies fixed and adaptive digital gains to `audio` and runs a limiter.
- // If the internal VAD is used, `speech_probability` is ignored. Otherwise
- // `speech_probability` is used for digital adaptive gain if it's available
- // (limited to values [0.0, 1.0]). Handles input volume changes; if the caller
- // cannot determine whether an input volume change occurred, set
- // `input_volume_changed` to false.
+ // Updates the recommended input volume, applies the adaptive digital and the
+ // fixed digital gains and runs a limiter on `audio`.
+ // When the internal VAD is not used, `speech_probability` should be specified
+ // and in the [0, 1] range. Otherwise ignores `speech_probability` and
+ // computes the speech probability via `vad_`.
+ // Handles input volume changes; if the caller cannot determine whether an
+ // input volume change occurred, set `input_volume_changed` to false.
void Process(absl::optional<float> speech_probability,
bool input_volume_changed,
AudioBuffer* audio);
@@ -80,11 +84,18 @@
static std::atomic<int> instance_count_;
const AvailableCpuFeatures cpu_features_;
ApmDataDumper data_dumper_;
+
GainApplier fixed_gain_applier_;
+ std::unique_ptr<NoiseLevelEstimator> noise_level_estimator_;
std::unique_ptr<VoiceActivityDetectorWrapper> vad_;
- std::unique_ptr<AdaptiveDigitalGainController> adaptive_digital_controller_;
+ std::unique_ptr<SpeechLevelEstimator> speech_level_estimator_;
std::unique_ptr<InputVolumeController> input_volume_controller_;
+ // TODO(bugs.webrtc.org/7494): Rename to `CrestFactorEstimator`.
+ std::unique_ptr<SaturationProtector> saturation_protector_;
+ // TODO(bugs.webrtc.org/7494): Rename to `AdaptiveDigitalGainController`.
+ std::unique_ptr<AdaptiveDigitalGainApplier> adaptive_digital_controller_;
Limiter limiter_;
+
int calls_since_last_limiter_log_;
};