AGC2 adaptive digital controller config clean-up - Remove dry-run option - Hard-code `adjacent_speech_frames_threshold` and `vad_reset_period_ms` - Expose `initial_gain_db` via field trial Tested: adaptive digital controller bit-exactness verified Bug: webrtc:7494 Change-Id: I6166611f91320b6c37de3f8e553c06c2ed95b772 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/287222 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Hanna Silen <silen@webrtc.org> Cr-Commit-Position: refs/heads/main@{#38862}

commit: dfba28e30eaa791147c98e34ef0476e99eb93f5e [log] [tgz]
author: Alessio Bazzica <alessiob@webrtc.org> Fri Dec 09 10:02:41 2022 +0100
committer: WebRTC LUCI CQ <webrtc-scoped@luci-project-accounts.iam.gserviceaccount.com> Fri Dec 09 13:07:34 2022 +0000
tree: a1a25aaffbd1685d88728734b972d5e75a50aa76
parent: 2cda27c0b9c5f9988459f02e87f2a764fb5983a0 [diff]
diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn
index 79a0255..3e6b201 100644
--- a/modules/audio_processing/BUILD.gn
+++ b/modules/audio_processing/BUILD.gn

@@ -139,6 +139,7 @@
     "../../rtc_base:stringutils",
     "../../system_wrappers:field_trial",
     "agc2:adaptive_digital_gain_controller",
+    "agc2:common",
     "agc2:cpu_features",
     "agc2:fixed_digital",
     "agc2:gain_applier",

diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc b/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc
index b8a99da..9a504c9 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc

@@ -100,25 +100,12 @@
                         max_gain_increase_db);
 }
 
-// Copies the (multichannel) audio samples from `src` into `dst`.
-void CopyAudio(AudioFrameView<const float> src,
-               std::vector<std::vector<float>>& dst) {
-  RTC_DCHECK_GT(src.num_channels(), 0);
-  RTC_DCHECK_GT(src.samples_per_channel(), 0);
-  RTC_DCHECK_EQ(dst.size(), src.num_channels());
-  for (int c = 0; c < src.num_channels(); ++c) {
-    rtc::ArrayView<const float> channel_view = src.channel(c);
-    RTC_DCHECK_EQ(channel_view.size(), src.samples_per_channel());
-    RTC_DCHECK_EQ(dst[c].size(), src.samples_per_channel());
-    std::copy(channel_view.begin(), channel_view.end(), dst[c].begin());
-  }
-}
-
 }  // namespace
 
 AdaptiveDigitalGainController::AdaptiveDigitalGainController(
     ApmDataDumper* apm_data_dumper,
     const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
+    int adjacent_speech_frames_threshold,
     int sample_rate_hz,
     int num_channels)
     : apm_data_dumper_(apm_data_dumper),
@@ -126,41 +113,16 @@
           /*hard_clip_samples=*/false,
           /*initial_gain_factor=*/DbToRatio(config.initial_gain_db)),
       config_(config),
+      adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
       max_gain_change_db_per_10ms_(config_.max_gain_change_db_per_second *
                                    kFrameDurationMs / 1000.0f),
       calls_since_last_gain_log_(0),
-      frames_to_gain_increase_allowed_(
-          config_.adjacent_speech_frames_threshold),
+      frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold),
       last_gain_db_(config_.initial_gain_db) {
   RTC_DCHECK_GT(max_gain_change_db_per_10ms_, 0.0f);
   RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
   RTC_DCHECK_GE(config_.max_output_noise_level_dbfs, -90.0f);
   RTC_DCHECK_LE(config_.max_output_noise_level_dbfs, 0.0f);
-  Initialize(sample_rate_hz, num_channels);
-}
-
-void AdaptiveDigitalGainController::Initialize(int sample_rate_hz,
-                                               int num_channels) {
-  if (!config_.dry_run) {
-    return;
-  }
-  RTC_DCHECK_GT(sample_rate_hz, 0);
-  RTC_DCHECK_GT(num_channels, 0);
-  int frame_size = rtc::CheckedDivExact(sample_rate_hz, 100);
-  bool sample_rate_changed =
-      dry_run_frame_.empty() ||  // Handle initialization.
-      dry_run_frame_[0].size() != static_cast<size_t>(frame_size);
-  bool num_channels_changed =
-      dry_run_channels_.size() != static_cast<size_t>(num_channels);
-  if (sample_rate_changed || num_channels_changed) {
-    // Resize the multichannel audio vector and update the channel pointers.
-    dry_run_frame_.resize(num_channels);
-    dry_run_channels_.resize(num_channels);
-    for (int c = 0; c < num_channels; ++c) {
-      dry_run_frame_[c].resize(frame_size);
-      dry_run_channels_[c] = dry_run_frame_[c].data();
-    }
-  }
 }
 
 void AdaptiveDigitalGainController::Process(const FrameInfo& info,
@@ -187,7 +149,7 @@
   // observed.
   bool first_confident_speech_frame = false;
   if (info.speech_probability < kVadConfidenceThreshold) {
-    frames_to_gain_increase_allowed_ = config_.adjacent_speech_frames_threshold;
+    frames_to_gain_increase_allowed_ = adjacent_speech_frames_threshold_;
   } else if (frames_to_gain_increase_allowed_ > 0) {
     frames_to_gain_increase_allowed_--;
     first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0;
@@ -203,7 +165,7 @@
     // No gain increase happened while waiting for a long enough speech
     // sequence. Therefore, temporarily allow a faster gain increase.
     RTC_DCHECK(gain_increase_allowed);
-    max_gain_increase_db *= config_.adjacent_speech_frames_threshold;
+    max_gain_increase_db *= adjacent_speech_frames_threshold_;
   }
 
   const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
@@ -223,18 +185,7 @@
         DbToRatio(last_gain_db_ + gain_change_this_frame_db));
   }
 
-  // Modify `frame` only if not running in "dry run" mode.
-  if (!config_.dry_run) {
-    gain_applier_.ApplyGain(frame);
-  } else {
-    // Copy `frame` so that `ApplyGain()` is called (on a copy).
-    CopyAudio(frame, dry_run_frame_);
-    RTC_DCHECK(!dry_run_channels_.empty());
-    AudioFrameView<float> frame_copy(&dry_run_channels_[0],
-                                     frame.num_channels(),
-                                     frame.samples_per_channel());
-    gain_applier_.ApplyGain(frame_copy);
-  }
+  gain_applier_.ApplyGain(frame);
 
   // Remember that the gain has changed for the next iteration.
   last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;

diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller.h b/modules/audio_processing/agc2/adaptive_digital_gain_controller.h
index 05b2ef9..ce0dc8f 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_controller.h
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_controller.h

@@ -39,6 +39,7 @@
   AdaptiveDigitalGainController(
       ApmDataDumper* apm_data_dumper,
       const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
+      int adjacent_speech_frames_threshold,
       int sample_rate_hz,
       int num_channels);
   AdaptiveDigitalGainController(const AdaptiveDigitalGainController&) = delete;
@@ -56,14 +57,12 @@
   GainApplier gain_applier_;
 
   const AudioProcessing::Config::GainController2::AdaptiveDigital config_;
+  const int adjacent_speech_frames_threshold_;
   const float max_gain_change_db_per_10ms_;
 
   int calls_since_last_gain_log_;
   int frames_to_gain_increase_allowed_;
   float last_gain_db_;
-
-  std::vector<std::vector<float>> dry_run_frame_;
-  std::vector<float*> dry_run_channels_;
 };
 
 }  // namespace webrtc

diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc
index 832be1e..b16cd1d 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc

@@ -51,14 +51,16 @@
 // Helper to create initialized `AdaptiveDigitalGainController` objects.
 struct GainApplierHelper {
   GainApplierHelper(const AdaptiveDigitalConfig& config,
+                    int adjacent_speech_frames_threshold,
                     int sample_rate_hz,
                     int num_channels)
       : apm_data_dumper(0),
-        gain_applier(
-            std::make_unique<AdaptiveDigitalGainController>(&apm_data_dumper,
-                                                            config,
-                                                            sample_rate_hz,
-                                                            num_channels)) {}
+        gain_applier(std::make_unique<AdaptiveDigitalGainController>(
+            &apm_data_dumper,
+            config,
+            adjacent_speech_frames_threshold,
+            sample_rate_hz,
+            num_channels)) {}
   ApmDataDumper apm_data_dumper;
   std::unique_ptr<AdaptiveDigitalGainController> gain_applier;
 };
@@ -81,7 +83,8 @@
 
 TEST(GainController2AdaptiveDigitalGainControllerTest,
      GainApplierShouldNotCrash) {
-  GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kStereo);
+  GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
+                           /*sample_rate_hz=*/48000, kStereo);
   // Make one call with reasonable audio level values and settings.
   VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
   helper.gain_applier->Process(GetFrameInfoToNotAdapt(kDefaultConfig),
@@ -96,7 +99,8 @@
                            kDefaultConfig.max_gain_change_db_per_second)) +
       kNumExtraFrames;
 
-  GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/8000, kMono);
+  GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
+                           /*sample_rate_hz=*/8000, kMono);
   AdaptiveDigitalGainController::FrameInfo info =
       GetFrameInfoToNotAdapt(kDefaultConfig);
   info.speech_level_dbfs = -60.0f;
@@ -111,7 +115,8 @@
 }
 
 TEST(GainController2AdaptiveDigitalGainControllerTest, GainDoesNotChangeFast) {
-  GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/8000, kMono);
+  GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
+                           /*sample_rate_hz=*/8000, kMono);
 
   constexpr float initial_level_dbfs = -25.0f;
   constexpr float kMaxGainChangeDbPerFrame =
@@ -152,7 +157,8 @@
 }
 
 TEST(GainController2AdaptiveDigitalGainControllerTest, GainIsRampedInAFrame) {
-  GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono);
+  GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
+                           /*sample_rate_hz=*/48000, kMono);
 
   constexpr float initial_level_dbfs = -25.0f;
 
@@ -178,7 +184,8 @@
 }
 
 TEST(GainController2AdaptiveDigitalGainControllerTest, NoiseLimitsGain) {
-  GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono);
+  GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
+                           /*sample_rate_hz=*/48000, kMono);
 
   constexpr float initial_level_dbfs = -25.0f;
   constexpr int num_initial_frames =
@@ -210,7 +217,8 @@
 
 TEST(GainController2AdaptiveDigitalGainControllerTest,
      CanHandlePositiveSpeechLevels) {
-  GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kStereo);
+  GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
+                           /*sample_rate_hz=*/48000, kStereo);
 
   // Make one call with positive audio level values and settings.
   VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
@@ -221,7 +229,8 @@
 }
 
 TEST(GainController2AdaptiveDigitalGainControllerTest, AudioLevelLimitsGain) {
-  GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono);
+  GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
+                           /*sample_rate_hz=*/48000, kMono);
 
   constexpr float initial_level_dbfs = -25.0f;
   constexpr int num_initial_frames =
@@ -260,17 +269,16 @@
 
 TEST_P(AdaptiveDigitalGainControllerParametrizedTest,
        DoNotIncreaseGainWithTooFewSpeechFrames) {
-  AdaptiveDigitalConfig config;
-  config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold();
-  GainApplierHelper helper(config, /*sample_rate_hz=*/48000, kMono);
+  GainApplierHelper helper(kDefaultConfig, adjacent_speech_frames_threshold(),
+                           /*sample_rate_hz=*/48000, kMono);
 
   // Lower the speech level so that the target gain will be increased.
   AdaptiveDigitalGainController::FrameInfo info =
-      GetFrameInfoToNotAdapt(config);
+      GetFrameInfoToNotAdapt(kDefaultConfig);
   info.speech_level_dbfs -= 12.0f;
 
   float prev_gain = 0.0f;
-  for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) {
+  for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) {
     SCOPED_TRACE(i);
     VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
     helper.gain_applier->Process(info, audio.float_frame_view());
@@ -284,17 +292,16 @@
 
 TEST_P(AdaptiveDigitalGainControllerParametrizedTest,
        IncreaseGainWithEnoughSpeechFrames) {
-  AdaptiveDigitalConfig config;
-  config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold();
-  GainApplierHelper helper(config, /*sample_rate_hz=*/48000, kMono);
+  GainApplierHelper helper(kDefaultConfig, adjacent_speech_frames_threshold(),
+                           /*sample_rate_hz=*/48000, kMono);
 
   // Lower the speech level so that the target gain will be increased.
   AdaptiveDigitalGainController::FrameInfo info =
-      GetFrameInfoToNotAdapt(config);
+      GetFrameInfoToNotAdapt(kDefaultConfig);
   info.speech_level_dbfs -= 12.0f;
 
   float prev_gain = 0.0f;
-  for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) {
+  for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) {
     SCOPED_TRACE(i);
     VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
     helper.gain_applier->Process(info, audio.float_frame_view());
@@ -309,77 +316,10 @@
   EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain);
 }
 
-INSTANTIATE_TEST_SUITE_P(GainController2,
-                         AdaptiveDigitalGainControllerParametrizedTest,
-                         ::testing::Values(1, 7, 31));
-
-// Checks that the input is never modified when running in dry run mode.
-TEST(GainController2AdaptiveDigitalGainControllerTest,
-     DryRunDoesNotChangeInput) {
-  AdaptiveDigitalConfig config;
-  config.dry_run = true;
-  GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono);
-
-  // Simulate an input signal with log speech level.
-  AdaptiveDigitalGainController::FrameInfo info =
-      GetFrameInfoToNotAdapt(config);
-  info.speech_level_dbfs = -60.0f;
-  const int num_frames_to_adapt =
-      static_cast<int>(
-          config.max_gain_db /
-          GetMaxGainChangePerFrameDb(config.max_gain_change_db_per_second)) +
-      kNumExtraFrames;
-  constexpr float kPcmSamples = 123.456f;
-  // Run the gain applier and check that the PCM samples are not modified.
-  for (int i = 0; i < num_frames_to_adapt; ++i) {
-    SCOPED_TRACE(i);
-    VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, kPcmSamples);
-    helper.gain_applier->Process(info, fake_audio.float_frame_view());
-    EXPECT_FLOAT_EQ(fake_audio.float_frame_view().channel(0)[0], kPcmSamples);
-  }
-}
-
-// Checks that no sample is modified before and after the sample rate changes.
-TEST(GainController2AdaptiveDigitalGainControllerTest,
-     DryRunHandlesSampleRateChange) {
-  AdaptiveDigitalConfig config;
-  config.dry_run = true;
-  GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono);
-
-  AdaptiveDigitalGainController::FrameInfo info =
-      GetFrameInfoToNotAdapt(config);
-  info.speech_level_dbfs = -60.0f;
-  constexpr float kPcmSamples = 123.456f;
-  VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
-  helper.gain_applier->Process(info, fake_audio_8k.float_frame_view());
-  EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
-  helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
-  VectorFloatFrame fake_audio_48k(kMono, kFrameLen10ms48kHz, kPcmSamples);
-  helper.gain_applier->Process(info, fake_audio_48k.float_frame_view());
-  EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
-}
-
-// Checks that no sample is modified before and after the number of channels
-// changes.
-TEST(GainController2AdaptiveDigitalGainControllerTest,
-     DryRunHandlesNumChannelsChange) {
-  AdaptiveDigitalConfig config;
-  config.dry_run = true;
-  GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono);
-
-  AdaptiveDigitalGainController::FrameInfo info =
-      GetFrameInfoToNotAdapt(config);
-  info.speech_level_dbfs = -60.0f;
-  constexpr float kPcmSamples = 123.456f;
-  VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
-  helper.gain_applier->Process(info, fake_audio_8k.float_frame_view());
-  EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
-  VectorFloatFrame fake_audio_48k(kStereo, kFrameLen10ms8kHz, kPcmSamples);
-  helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kStereo);
-  helper.gain_applier->Process(info, fake_audio_48k.float_frame_view());
-  EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
-  EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(1)[0], kPcmSamples);
-}
+INSTANTIATE_TEST_SUITE_P(
+    GainController2,
+    AdaptiveDigitalGainControllerParametrizedTest,
+    ::testing::Values(1, 7, 31, kAdjacentSpeechFramesThreshold));
 
 }  // namespace
 }  // namespace webrtc

diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h
index 4af8552..4597bcd 100644
--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h

@@ -29,11 +29,16 @@
 // At what limiter levels should we start decreasing the adaptive digital gain.
 constexpr float kLimiterThresholdForAgcGainDbfs = -1.0f;
 
-// This is the threshold for speech. Speech frames are used for updating the
-// speech level, measuring the amount of speech, and decide when to allow target
-// gain changes.
+// Number of milliseconds to wait to periodically reset the VAD.
+constexpr int kVadResetPeriodMs = 1500;
+
+// Speech probability threshold to detect speech activity.
 constexpr float kVadConfidenceThreshold = 0.95f;
 
+// Minimum number of adjacent speech frames having a sufficiently high speech
+// probability to reliably detect speech activity.
+constexpr int kAdjacentSpeechFramesThreshold = 12;
+
 // Number of milliseconds of speech frames to observe to make the estimator
 // confident.
 constexpr float kLevelEstimatorTimeToConfidenceMs = 400;

diff --git a/modules/audio_processing/agc2/speech_level_estimator.cc b/modules/audio_processing/agc2/speech_level_estimator.cc
index 9462555..7bf3252 100644
--- a/modules/audio_processing/agc2/speech_level_estimator.cc
+++ b/modules/audio_processing/agc2/speech_level_estimator.cc

@@ -46,11 +46,11 @@
 
 SpeechLevelEstimator::SpeechLevelEstimator(
     ApmDataDumper* apm_data_dumper,
-    const AudioProcessing::Config::GainController2::AdaptiveDigital& config)
+    const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
+    int adjacent_speech_frames_threshold)
     : apm_data_dumper_(apm_data_dumper),
       initial_speech_level_dbfs_(GetInitialSpeechLevelEstimateDbfs(config)),
-      adjacent_speech_frames_threshold_(
-          config.adjacent_speech_frames_threshold),
+      adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
       level_dbfs_(initial_speech_level_dbfs_),
       // TODO(bugs.webrtc.org/7494): Remove init below when AGC2 input volume
       // controller temporal dependency removed.

diff --git a/modules/audio_processing/agc2/speech_level_estimator.h b/modules/audio_processing/agc2/speech_level_estimator.h
index 5cb2b43..4d9f106 100644
--- a/modules/audio_processing/agc2/speech_level_estimator.h
+++ b/modules/audio_processing/agc2/speech_level_estimator.h

@@ -28,7 +28,8 @@
  public:
   SpeechLevelEstimator(
       ApmDataDumper* apm_data_dumper,
-      const AudioProcessing::Config::GainController2::AdaptiveDigital& config);
+      const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
+      int adjacent_speech_frames_threshold);
   SpeechLevelEstimator(const SpeechLevelEstimator&) = delete;
   SpeechLevelEstimator& operator=(const SpeechLevelEstimator&) = delete;
 

diff --git a/modules/audio_processing/agc2/speech_level_estimator_unittest.cc b/modules/audio_processing/agc2/speech_level_estimator_unittest.cc
index 2fec7f7..e1c5f85 100644
--- a/modules/audio_processing/agc2/speech_level_estimator_unittest.cc
+++ b/modules/audio_processing/agc2/speech_level_estimator_unittest.cc

@@ -42,13 +42,6 @@
   }
 }
 
-constexpr AdaptiveDigitalConfig GetAdaptiveDigitalConfig(
-    int adjacent_speech_frames_threshold) {
-  AdaptiveDigitalConfig config;
-  config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold;
-  return config;
-}
-
 constexpr float kNoSpeechProbability = 0.0f;
 constexpr float kLowSpeechProbability = kVadConfidenceThreshold / 2.0f;
 constexpr float kMaxSpeechProbability = 1.0f;
@@ -59,7 +52,8 @@
       : data_dumper(0),
         estimator(std::make_unique<SpeechLevelEstimator>(
             &data_dumper,
-            GetAdaptiveDigitalConfig(adjacent_speech_frames_threshold))),
+            AdaptiveDigitalConfig{},
+            adjacent_speech_frames_threshold)),
         initial_speech_level_dbfs(estimator->level_dbfs()),
         level_rms_dbfs(initial_speech_level_dbfs / 2.0f),
         level_peak_dbfs(initial_speech_level_dbfs / 3.0f) {

diff --git a/modules/audio_processing/agc2/vad_wrapper.cc b/modules/audio_processing/agc2/vad_wrapper.cc
index 91448f8..af6325d 100644
--- a/modules/audio_processing/agc2/vad_wrapper.cc
+++ b/modules/audio_processing/agc2/vad_wrapper.cc

@@ -53,6 +53,13 @@
 }  // namespace
 
 VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper(
+    const AvailableCpuFeatures& cpu_features,
+    int sample_rate_hz)
+    : VoiceActivityDetectorWrapper(kVadResetPeriodMs,
+                                   cpu_features,
+                                   sample_rate_hz) {}
+
+VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper(
     int vad_reset_period_ms,
     const AvailableCpuFeatures& cpu_features,
     int sample_rate_hz)

diff --git a/modules/audio_processing/agc2/vad_wrapper.h b/modules/audio_processing/agc2/vad_wrapper.h
index 6df0ead..459c471 100644
--- a/modules/audio_processing/agc2/vad_wrapper.h
+++ b/modules/audio_processing/agc2/vad_wrapper.h

@@ -40,6 +40,10 @@
     virtual float Analyze(rtc::ArrayView<const float> frame) = 0;
   };
 
+  // Ctor. Uses `cpu_features` to instantiate the default VAD.
+  VoiceActivityDetectorWrapper(const AvailableCpuFeatures& cpu_features,
+                               int sample_rate_hz);
+
   // Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call
   // `MonoVad::Reset()`; it must be equal to or greater than the duration of two
   // frames. Uses `cpu_features` to instantiate the default VAD.

diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index 3200ea4..18d4ad9 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc

@@ -378,6 +378,9 @@
   FieldTrialConstrained<double> max_gain_db(
       "max_gain_db", kDefaultAdaptiveDigitalConfig.max_gain_db, 0,
       absl::nullopt);
+  FieldTrialConstrained<double> initial_gain_db(
+      "initial_gain_db", kDefaultAdaptiveDigitalConfig.initial_gain_db, 0,
+      absl::nullopt);
   FieldTrialConstrained<double> max_gain_change_db_per_second(
       "max_gain_change_db_per_second",
       kDefaultAdaptiveDigitalConfig.max_gain_change_db_per_second, 0,
@@ -392,46 +395,51 @@
   const std::string field_trial_name =
       field_trial::FindFullName(kFieldTrialName);
 
-  ParseFieldTrial({&enabled, &clipped_level_min, &clipped_level_step,
-                   &clipped_ratio_threshold, &clipped_wait_frames,
-                   &enable_clipping_predictor, &target_range_max_dbfs,
-                   &target_range_min_dbfs, &update_input_volume_wait_frames,
-                   &speech_probability_threshold, &speech_ratio_threshold,
-                   &headroom_db, &max_gain_db, &max_gain_change_db_per_second,
-                   &max_output_noise_level_dbfs},
-                  field_trial_name);
+  ParseFieldTrial(
+      {&enabled, &clipped_level_min, &clipped_level_step,
+       &clipped_ratio_threshold, &clipped_wait_frames,
+       &enable_clipping_predictor, &target_range_max_dbfs,
+       &target_range_min_dbfs, &update_input_volume_wait_frames,
+       &speech_probability_threshold, &speech_ratio_threshold, &headroom_db,
+       &max_gain_db, &initial_gain_db, &max_gain_change_db_per_second,
+       &max_output_noise_level_dbfs},
+      field_trial_name);
 
   // Checked already by `IsEnabled()` before parsing, therefore always true.
   RTC_DCHECK(enabled);
 
   return AudioProcessingImpl::GainController2ConfigOverride{
-      InputVolumeController::Config{
-          .clipped_level_min = static_cast<int>(clipped_level_min.Get()),
-          .clipped_level_step = static_cast<int>(clipped_level_step.Get()),
-          .clipped_ratio_threshold =
-              static_cast<float>(clipped_ratio_threshold.Get()),
-          .clipped_wait_frames = static_cast<int>(clipped_wait_frames.Get()),
-          .enable_clipping_predictor =
-              static_cast<bool>(enable_clipping_predictor.Get()),
-          .target_range_max_dbfs =
-              static_cast<int>(target_range_max_dbfs.Get()),
-          .target_range_min_dbfs =
-              static_cast<int>(target_range_min_dbfs.Get()),
-          .update_input_volume_wait_frames =
-              static_cast<int>(update_input_volume_wait_frames.Get()),
-          .speech_probability_threshold =
-              static_cast<float>(speech_probability_threshold.Get()),
-          .speech_ratio_threshold =
-              static_cast<float>(speech_ratio_threshold.Get()),
-      },
-      AudioProcessingImpl::GainController2ConfigOverride::AdaptiveDigitalConfig{
-          .headroom_db = static_cast<float>(headroom_db.Get()),
-          .max_gain_db = static_cast<float>(max_gain_db.Get()),
-          .max_gain_change_db_per_second =
-              static_cast<float>(max_gain_change_db_per_second.Get()),
-          .max_output_noise_level_dbfs =
-              static_cast<float>(max_output_noise_level_dbfs.Get()),
-      },
+      .input_volume_controller_config =
+          {
+              .clipped_level_min = static_cast<int>(clipped_level_min.Get()),
+              .clipped_level_step = static_cast<int>(clipped_level_step.Get()),
+              .clipped_ratio_threshold =
+                  static_cast<float>(clipped_ratio_threshold.Get()),
+              .clipped_wait_frames =
+                  static_cast<int>(clipped_wait_frames.Get()),
+              .enable_clipping_predictor =
+                  static_cast<bool>(enable_clipping_predictor.Get()),
+              .target_range_max_dbfs =
+                  static_cast<int>(target_range_max_dbfs.Get()),
+              .target_range_min_dbfs =
+                  static_cast<int>(target_range_min_dbfs.Get()),
+              .update_input_volume_wait_frames =
+                  static_cast<int>(update_input_volume_wait_frames.Get()),
+              .speech_probability_threshold =
+                  static_cast<float>(speech_probability_threshold.Get()),
+              .speech_ratio_threshold =
+                  static_cast<float>(speech_ratio_threshold.Get()),
+          },
+      .adaptive_digital_config =
+          {
+              .headroom_db = static_cast<float>(headroom_db.Get()),
+              .max_gain_db = static_cast<float>(max_gain_db.Get()),
+              .initial_gain_db = static_cast<float>(initial_gain_db.Get()),
+              .max_gain_change_db_per_second =
+                  static_cast<float>(max_gain_change_db_per_second.Get()),
+              .max_output_noise_level_dbfs =
+                  static_cast<float>(max_output_noise_level_dbfs.Get()),
+          },
   };
 }
 
@@ -489,21 +497,10 @@
       adjusted_config.gain_controller1.analog_gain_controller.enabled = false;
 
       adjusted_config.gain_controller2.enabled = true;
-      adjusted_config.gain_controller2.adaptive_digital.enabled = true;
       adjusted_config.gain_controller2.input_volume_controller.enabled = true;
-
-      auto& adjusted_adaptive_digital =  // Alias.
-          adjusted_config.gain_controller2.adaptive_digital;
-      const auto& adaptive_digital_override =  // Alias.
+      adjusted_config.gain_controller2.adaptive_digital =
           gain_controller2_config_override->adaptive_digital_config;
-      adjusted_adaptive_digital.headroom_db =
-          adaptive_digital_override.headroom_db;
-      adjusted_adaptive_digital.max_gain_db =
-          adaptive_digital_override.max_gain_db;
-      adjusted_adaptive_digital.max_gain_change_db_per_second =
-          adaptive_digital_override.max_gain_change_db_per_second;
-      adjusted_adaptive_digital.max_output_noise_level_dbfs =
-          adaptive_digital_override.max_output_noise_level_dbfs;
+      adjusted_config.gain_controller2.adaptive_digital.enabled = true;
     }
   }
 
@@ -2373,7 +2370,6 @@
     // TODO(bugs.webrtc.org/13663): Cache CPU features in APM and use here.
     submodules_.voice_activity_detector =
         std::make_unique<VoiceActivityDetectorWrapper>(
-            config_.gain_controller2.adaptive_digital.vad_reset_period_ms,
             submodules_.gain_controller2->GetCpuFeatures(),
             proc_fullband_sample_rate_hz());
   }

diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h
index 189ed03..0f74c30 100644
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h

@@ -142,12 +142,8 @@
   // removed.
   struct GainController2ConfigOverride {
     InputVolumeController::Config input_volume_controller_config;
-    struct AdaptiveDigitalConfig {
-      float headroom_db;
-      float max_gain_db;
-      float max_gain_change_db_per_second;
-      float max_output_noise_level_dbfs;
-    } adaptive_digital_config;
+    AudioProcessing::Config::GainController2::AdaptiveDigital
+        adaptive_digital_config;
   };
 
  protected:

diff --git a/modules/audio_processing/audio_processing_impl_unittest.cc b/modules/audio_processing/audio_processing_impl_unittest.cc
index b394e93..10c11a2 100644
--- a/modules/audio_processing/audio_processing_impl_unittest.cc
+++ b/modules/audio_processing/audio_processing_impl_unittest.cc

@@ -1235,6 +1235,70 @@
   EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/135), 135);
 }
 
+class GainController2FieldTrialParametrizedTest
+    : public ::testing::TestWithParam<AudioProcessing::Config> {};
+
+TEST_P(GainController2FieldTrialParametrizedTest,
+       CheckAgc2AdaptiveDigitalOverridesApplied) {
+  webrtc::test::ScopedFieldTrials field_trials(
+      "WebRTC-Audio-GainController2/"
+      "Enabled,"
+      "enable_clipping_predictor:true,"
+      "clipped_level_min:20,"
+      "clipped_level_step:30,"
+      "clipped_ratio_threshold:0.4,"
+      "clipped_wait_frames:50,"
+      "target_range_max_dbfs:-6,"
+      "target_range_min_dbfs:-70,"
+      "update_input_volume_wait_frames:80,"
+      "speech_probability_threshold:0.9,"
+      "speech_ratio_threshold:1.0,"
+      "headroom_db:10,"
+      "max_gain_db:20,"
+      "initial_gain_db:7,"
+      "max_gain_change_db_per_second:5,"
+      "max_output_noise_level_dbfs:-40/");
+
+  auto adjusted_config =
+      AudioProcessingBuilder().SetConfig(GetParam()).Create()->GetConfig();
+
+  EXPECT_FALSE(adjusted_config.gain_controller1.enabled);
+  EXPECT_TRUE(adjusted_config.gain_controller2.enabled);
+  EXPECT_TRUE(adjusted_config.gain_controller2.adaptive_digital.enabled);
+  EXPECT_TRUE(adjusted_config.gain_controller2.input_volume_controller.enabled);
+
+  EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital.headroom_db, 10);
+  EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital.max_gain_db, 20);
+  EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital.initial_gain_db,
+            7);
+  EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital
+                .max_gain_change_db_per_second,
+            5);
+  EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital
+                .max_output_noise_level_dbfs,
+            -40);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AudioProcessingImplTest,
+    GainController2FieldTrialParametrizedTest,
+    ::testing::Values(
+        // Full AGC1.
+        AudioProcessing::Config{
+            .gain_controller1 =
+                {.enabled = true,
+                 .analog_gain_controller = {.enabled = true,
+                                            .enable_digital_adaptive = true}},
+            .gain_controller2 = {.enabled = false}},
+        // Hybrid AGC.
+        AudioProcessing::Config{
+            .gain_controller1 =
+                {.enabled = true,
+                 .analog_gain_controller = {.enabled = true,
+                                            .enable_digital_adaptive = false}},
+            .gain_controller2 = {.enabled = true,
+                                 .adaptive_digital = {.enabled = true}}}));
+
 TEST(AudioProcessingImplGainController2FieldTrialTest,
      ConfigAdjustedWhenExperimentEnabledAndAgc1AnalogEnabled) {
   constexpr AudioProcessing::Config::GainController2::AdaptiveDigital
@@ -1254,6 +1318,7 @@
       "speech_ratio_threshold:1.0,"
       "headroom_db:10,"
       "max_gain_db:20,"
+      "initial_gain_db:7,"
       "max_gain_change_db_per_second:3,"
       "max_output_noise_level_dbfs:-40/");
 
@@ -1318,6 +1383,7 @@
       "speech_ratio_threshold:1.0,"
       "headroom_db:10,"
       "max_gain_db:20,"
+      "initial_gain_db:7,"
       "max_gain_change_db_per_second:3,"
       "max_output_noise_level_dbfs:-40/");
 
@@ -1382,6 +1448,7 @@
       "speech_ratio_threshold:1.0,"
       "headroom_db:10,"
       "max_gain_db:20,"
+      "initial_gain_db:7,"
       "max_gain_change_db_per_second:3,"
       "max_output_noise_level_dbfs:-40/");
 
@@ -1434,6 +1501,7 @@
       "speech_ratio_threshold:1.0,"
       "headroom_db:10,"
       "max_gain_db:20,"
+      "initial_gain_db:7,"
       "max_gain_change_db_per_second:3,"
       "max_output_noise_level_dbfs:-40/");
 

diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc
index bbb7f46..e320e71 100644
--- a/modules/audio_processing/audio_processing_unittest.cc
+++ b/modules/audio_processing/audio_processing_unittest.cc

@@ -3062,10 +3062,6 @@
   b_adaptive.enabled = a_adaptive.enabled;
   EXPECT_EQ(a, b);
 
-  Toggle(a_adaptive.dry_run);
-  b_adaptive.dry_run = a_adaptive.dry_run;
-  EXPECT_EQ(a, b);
-
   a_adaptive.headroom_db += 1.0f;
   b_adaptive.headroom_db = a_adaptive.headroom_db;
   EXPECT_EQ(a, b);
@@ -3078,15 +3074,6 @@
   b_adaptive.initial_gain_db = a_adaptive.initial_gain_db;
   EXPECT_EQ(a, b);
 
-  a_adaptive.vad_reset_period_ms++;
-  b_adaptive.vad_reset_period_ms = a_adaptive.vad_reset_period_ms;
-  EXPECT_EQ(a, b);
-
-  a_adaptive.adjacent_speech_frames_threshold++;
-  b_adaptive.adjacent_speech_frames_threshold =
-      a_adaptive.adjacent_speech_frames_threshold;
-  EXPECT_EQ(a, b);
-
   a_adaptive.max_gain_change_db_per_second += 1.0f;
   b_adaptive.max_gain_change_db_per_second =
       a_adaptive.max_gain_change_db_per_second;
@@ -3119,10 +3106,6 @@
   EXPECT_NE(a, b);
   a_adaptive = b_adaptive;
 
-  Toggle(a_adaptive.dry_run);
-  EXPECT_NE(a, b);
-  a_adaptive = b_adaptive;
-
   a_adaptive.headroom_db += 1.0f;
   EXPECT_NE(a, b);
   a_adaptive = b_adaptive;
@@ -3135,14 +3118,6 @@
   EXPECT_NE(a, b);
   a_adaptive = b_adaptive;
 
-  a_adaptive.vad_reset_period_ms++;
-  EXPECT_NE(a, b);
-  a_adaptive = b_adaptive;
-
-  a_adaptive.adjacent_speech_frames_threshold++;
-  EXPECT_NE(a, b);
-  a_adaptive = b_adaptive;
-
   a_adaptive.max_gain_change_db_per_second += 1.0f;
   EXPECT_NE(a, b);
   a_adaptive = b_adaptive;

diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc
index d25ce7a..ea36dd2 100644
--- a/modules/audio_processing/gain_controller2.cc
+++ b/modules/audio_processing/gain_controller2.cc

@@ -14,6 +14,7 @@
 #include <utility>
 
 #include "common_audio/include/audio_util.h"
+#include "modules/audio_processing/agc2/agc2_common.h"
 #include "modules/audio_processing/agc2/cpu_features.h"
 #include "modules/audio_processing/audio_buffer.h"
 #include "modules/audio_processing/include/audio_frame_view.h"
@@ -102,14 +103,10 @@
       config.adaptive_digital.enabled) {
     // Create dependencies.
     speech_level_estimator_ = std::make_unique<SpeechLevelEstimator>(
-        &data_dumper_, config.adaptive_digital);
-    if (use_internal_vad) {
-      // TODO(bugs.webrtc.org/7494): Move `vad_reset_period_ms` from adaptive
-      // digital to gain controller 2 config.
+        &data_dumper_, config.adaptive_digital, kAdjacentSpeechFramesThreshold);
+    if (use_internal_vad)
       vad_ = std::make_unique<VoiceActivityDetectorWrapper>(
-          config.adaptive_digital.vad_reset_period_ms, cpu_features_,
-          sample_rate_hz);
-    }
+          kVadResetPeriodMs, cpu_features_, sample_rate_hz);
   }
 
   if (config.input_volume_controller.enabled) {
@@ -124,14 +121,13 @@
     // Create dependencies.
     noise_level_estimator_ = CreateNoiseFloorEstimator(&data_dumper_);
     saturation_protector_ = CreateSaturationProtector(
-        kSaturationProtectorInitialHeadroomDb,
-        config.adaptive_digital.adjacent_speech_frames_threshold,
+        kSaturationProtectorInitialHeadroomDb, kAdjacentSpeechFramesThreshold,
         &data_dumper_);
     // Create controller.
     adaptive_digital_controller_ =
         std::make_unique<AdaptiveDigitalGainController>(
-            &data_dumper_, config.adaptive_digital, sample_rate_hz,
-            num_channels);
+            &data_dumper_, config.adaptive_digital,
+            kAdjacentSpeechFramesThreshold, sample_rate_hz, num_channels);
   }
 }
 

diff --git a/modules/audio_processing/gain_controller2_unittest.cc b/modules/audio_processing/gain_controller2_unittest.cc
index f7e5db2..bff6245 100644
--- a/modules/audio_processing/gain_controller2_unittest.cc
+++ b/modules/audio_processing/gain_controller2_unittest.cc

@@ -613,8 +613,7 @@
   GainController2 agc2_reference(config, /*input_volume_controller_config=*/{},
                                  kSampleRateHz, kStereo,
                                  /*use_internal_vad=*/true);
-  VoiceActivityDetectorWrapper vad(config.adaptive_digital.vad_reset_period_ms,
-                                   GetAvailableCpuFeatures(), kSampleRateHz);
+  VoiceActivityDetectorWrapper vad(GetAvailableCpuFeatures(), kSampleRateHz);
   test::InputAudioFile input_file(
       test::GetApmCaptureTestVectorFileName(kSampleRateHz),
       /*loop_at_end=*/true);

diff --git a/modules/audio_processing/include/audio_processing.cc b/modules/audio_processing/include/audio_processing.cc
index 83917c2..13ddcc5 100644
--- a/modules/audio_processing/include/audio_processing.cc
+++ b/modules/audio_processing/include/audio_processing.cc

@@ -87,12 +87,9 @@
 
 bool Agc2Config::AdaptiveDigital::operator==(
     const Agc2Config::AdaptiveDigital& rhs) const {
-  return enabled == rhs.enabled && dry_run == rhs.dry_run &&
-         headroom_db == rhs.headroom_db && max_gain_db == rhs.max_gain_db &&
+  return enabled == rhs.enabled && headroom_db == rhs.headroom_db &&
+         max_gain_db == rhs.max_gain_db &&
          initial_gain_db == rhs.initial_gain_db &&
-         vad_reset_period_ms == rhs.vad_reset_period_ms &&
-         adjacent_speech_frames_threshold ==
-             rhs.adjacent_speech_frames_threshold &&
          max_gain_change_db_per_second == rhs.max_gain_change_db_per_second &&
          max_output_noise_level_dbfs == rhs.max_output_noise_level_dbfs;
 }
@@ -197,15 +194,10 @@
           << gain_controller2.fixed_digital.gain_db
           << " }, adaptive_digital: { enabled: "
           << gain_controller2.adaptive_digital.enabled
-          << ", dry_run: " << gain_controller2.adaptive_digital.dry_run
           << ", headroom_db: " << gain_controller2.adaptive_digital.headroom_db
           << ", max_gain_db: " << gain_controller2.adaptive_digital.max_gain_db
           << ", initial_gain_db: "
           << gain_controller2.adaptive_digital.initial_gain_db
-          << ", vad_reset_period_ms: "
-          << gain_controller2.adaptive_digital.vad_reset_period_ms
-          << ", adjacent_speech_frames_threshold: "
-          << gain_controller2.adaptive_digital.adjacent_speech_frames_threshold
           << ", max_gain_change_db_per_second: "
           << gain_controller2.adaptive_digital.max_gain_change_db_per_second
           << ", max_output_noise_level_dbfs: "

diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index c5c6070..f613a38 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h

@@ -362,21 +362,10 @@
         bool operator!=(const AdaptiveDigital& rhs) const {
           return !(*this == rhs);
         }
-
         bool enabled = false;
-        // TODO(bugs.webrtc.org/7494): Remove `dry_run`.
-        // When true, the adaptive digital controller runs but the signal is not
-        // modified.
-        bool dry_run = false;
         float headroom_db = 6.0f;
-        // TODO(bugs.webrtc.org/7494): Consider removing and inferring from
-        // `max_output_noise_level_dbfs`.
         float max_gain_db = 30.0f;
         float initial_gain_db = 8.0f;
-        // TODO(bugs.webrtc.org/7494): Hard-code and remove parameter below.
-        int vad_reset_period_ms = 1500;
-        // TODO(bugs.webrtc.org/7494): Hard-code and remove parameter below.
-        int adjacent_speech_frames_threshold = 12;
         float max_gain_change_db_per_second = 3.0f;
         float max_output_noise_level_dbfs = -50.0f;
       } adaptive_digital;
commit	dfba28e30eaa791147c98e34ef0476e99eb93f5e	[log] [tgz]
author	Alessio Bazzica <alessiob@webrtc.org>	Fri Dec 09 10:02:41 2022 +0100
committer	WebRTC LUCI CQ <webrtc-scoped@luci-project-accounts.iam.gserviceaccount.com>	Fri Dec 09 13:07:34 2022 +0000
tree	a1a25aaffbd1685d88728734b972d5e75a50aa76
parent	2cda27c0b9c5f9988459f02e87f2a764fb5983a0 [diff]