Revert "Deprecate the adaptive level controller"
This reverts commit 6f37ed78d99daa36e964ff0a65b205f0916d9949.
Reason for revert: <INSERT REASONING HERE>
Original change's description:
> Deprecate the adaptive level controller
>
> Level control handled by default-on AGC.
>
> Bug: none
> Change-Id: I405daeceece12c896d41156b649fcfd556726f77
> Reviewed-on: https://webrtc-review.googlesource.com/59682
> Reviewed-by: Fredrik Solenberg <solenberg@webrtc.org>
> Reviewed-by: Alex Loiko <aleloi@webrtc.org>
> Commit-Queue: Sam Zackrisson <saza@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#22305}
TBR=solenberg@webrtc.org,saza@webrtc.org,aleloi@webrtc.org
Change-Id: Ic52f41fcbebfd2291a51b17ac788313e1ceef163
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: none
Reviewed-on: https://webrtc-review.googlesource.com/60240
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Commit-Queue: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#22308}
diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn
index 93d3ec6..3dcea89 100644
--- a/modules/audio_processing/BUILD.gn
+++ b/modules/audio_processing/BUILD.gn
@@ -79,6 +79,27 @@
"include/audio_processing.h",
"include/config.cc",
"include/config.h",
+ "level_controller/biquad_filter.cc",
+ "level_controller/biquad_filter.h",
+ "level_controller/down_sampler.cc",
+ "level_controller/down_sampler.h",
+ "level_controller/gain_applier.cc",
+ "level_controller/gain_applier.h",
+ "level_controller/gain_selector.cc",
+ "level_controller/gain_selector.h",
+ "level_controller/level_controller.cc",
+ "level_controller/level_controller.h",
+ "level_controller/level_controller_constants.h",
+ "level_controller/noise_level_estimator.cc",
+ "level_controller/noise_level_estimator.h",
+ "level_controller/noise_spectrum_estimator.cc",
+ "level_controller/noise_spectrum_estimator.h",
+ "level_controller/peak_level_estimator.cc",
+ "level_controller/peak_level_estimator.h",
+ "level_controller/saturating_gain_estimator.cc",
+ "level_controller/saturating_gain_estimator.h",
+ "level_controller/signal_classifier.cc",
+ "level_controller/signal_classifier.h",
"level_estimator_impl.cc",
"level_estimator_impl.h",
"low_cut_filter.cc",
@@ -589,6 +610,7 @@
"echo_detector/moving_max_unittest.cc",
"echo_detector/normalized_covariance_estimator_unittest.cc",
"gain_control_unittest.cc",
+ "level_controller/level_controller_unittest.cc",
"level_estimator_unittest.cc",
"low_cut_filter_unittest.cc",
"noise_suppression_unittest.cc",
@@ -616,6 +638,7 @@
sources = [
"audio_processing_performance_unittest.cc",
+ "level_controller/level_controller_complexity_unittest.cc",
]
deps = [
":audio_processing",
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index 0caa142..f4b8dee 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -37,6 +37,7 @@
#if WEBRTC_INTELLIGIBILITY_ENHANCER
#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
#endif
+#include "modules/audio_processing/level_controller/level_controller.h"
#include "modules/audio_processing/level_estimator_impl.h"
#include "modules/audio_processing/low_cut_filter.h"
#include "modules/audio_processing/noise_suppression_impl.h"
@@ -187,6 +188,7 @@
bool beamformer_enabled,
bool adaptive_gain_controller_enabled,
bool gain_controller2_enabled,
+ bool level_controller_enabled,
bool echo_controller_enabled,
bool voice_activity_detector_enabled,
bool level_estimator_enabled,
@@ -206,6 +208,7 @@
(adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_);
changed |=
(gain_controller2_enabled != gain_controller2_enabled_);
+ changed |= (level_controller_enabled != level_controller_enabled_);
changed |= (echo_controller_enabled != echo_controller_enabled_);
changed |= (level_estimator_enabled != level_estimator_enabled_);
changed |=
@@ -221,6 +224,7 @@
beamformer_enabled_ = beamformer_enabled;
adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled;
gain_controller2_enabled_ = gain_controller2_enabled;
+ level_controller_enabled_ = level_controller_enabled;
echo_controller_enabled_ = echo_controller_enabled;
level_estimator_enabled_ = level_estimator_enabled;
voice_activity_detector_enabled_ = voice_activity_detector_enabled;
@@ -252,7 +256,8 @@
bool AudioProcessingImpl::ApmSubmoduleStates::CaptureFullBandProcessingActive()
const {
- return gain_controller2_enabled_ || capture_post_processor_enabled_;
+ return level_controller_enabled_ || gain_controller2_enabled_ ||
+ capture_post_processor_enabled_;
}
bool AudioProcessingImpl::ApmSubmoduleStates::RenderMultiBandSubModulesActive()
@@ -309,6 +314,7 @@
std::unique_ptr<AgcManagerDirect> agc_manager;
std::unique_ptr<GainController2> gain_controller2;
std::unique_ptr<LowCutFilter> low_cut_filter;
+ std::unique_ptr<LevelController> level_controller;
std::unique_ptr<EchoDetector> echo_detector;
std::unique_ptr<EchoControl> echo_controller;
std::unique_ptr<CustomProcessing> capture_post_processor;
@@ -434,6 +440,10 @@
private_submodules_->echo_detector.reset(new ResidualEchoDetector());
}
+ // TODO(peah): Move this creation to happen only when the level controller
+ // is enabled.
+ private_submodules_->level_controller.reset(new LevelController());
+
// TODO(alessiob): Move the injected gain controller once injection is
// implemented.
private_submodules_->gain_controller2.reset(new GainController2());
@@ -592,6 +602,7 @@
proc_sample_rate_hz());
public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz());
public_submodules_->level_estimator->Initialize();
+ InitializeLevelController();
InitializeResidualEchoDetector();
InitializeEchoController();
InitializeGainController2();
@@ -695,16 +706,40 @@
void AudioProcessingImpl::ApplyConfig(const AudioProcessing::Config& config) {
config_ = config;
+ bool config_ok = LevelController::Validate(config_.level_controller);
+ if (!config_ok) {
+ RTC_LOG(LS_ERROR) << "AudioProcessing module config error\n"
+ "level_controller: "
+ << LevelController::ToString(config_.level_controller)
+ << "\nReverting to default parameter set";
+ config_.level_controller = AudioProcessing::Config::LevelController();
+ }
+
// Run in a single-threaded manner when applying the settings.
rtc::CritScope cs_render(&crit_render_);
rtc::CritScope cs_capture(&crit_capture_);
+ // TODO(peah): Replace the use of capture_nonlocked_.level_controller_enabled
+ // with the value in config_ everywhere in the code.
+ if (capture_nonlocked_.level_controller_enabled !=
+ config_.level_controller.enabled) {
+ capture_nonlocked_.level_controller_enabled =
+ config_.level_controller.enabled;
+ // TODO(peah): Remove the conditional initialization to always initialize
+ // the level controller regardless of whether it is enabled or not.
+ InitializeLevelController();
+ }
+ RTC_LOG(LS_INFO) << "Level controller activated: "
+ << capture_nonlocked_.level_controller_enabled;
+
+ private_submodules_->level_controller->ApplyConfig(config_.level_controller);
+
InitializeLowCutFilter();
RTC_LOG(LS_INFO) << "Highpass filter activated: "
<< config_.high_pass_filter.enabled;
- const bool config_ok = GainController2::Validate(config_.gain_controller2);
+ config_ok = GainController2::Validate(config_.gain_controller2);
if (!config_ok) {
RTC_LOG(LS_ERROR) << "AudioProcessing module config error\n"
"Gain Controller 2: "
@@ -1224,11 +1259,13 @@
#if WEBRTC_INTELLIGIBILITY_ENHANCER
if (capture_nonlocked_.intelligibility_enabled) {
RTC_DCHECK(public_submodules_->noise_suppression->is_enabled());
- const int gain_db =
- public_submodules_->gain_control->is_enabled()
- ? public_submodules_->gain_control->compression_gain_db()
- : 0;
- const float gain = DbToRatio(gain_db);
+ int gain_db = public_submodules_->gain_control->is_enabled() ?
+ public_submodules_->gain_control->compression_gain_db() :
+ 0;
+ float gain = DbToRatio(gain_db);
+ gain *= capture_nonlocked_.level_controller_enabled ?
+ private_submodules_->level_controller->GetLastGain() :
+ 1.f;
public_submodules_->intelligibility_enhancer->SetCaptureNoiseEstimate(
public_submodules_->noise_suppression->NoiseEstimate(), gain);
}
@@ -1298,6 +1335,10 @@
private_submodules_->gain_controller2->Process(capture_buffer);
}
+ if (capture_nonlocked_.level_controller_enabled) {
+ private_submodules_->level_controller->Process(capture_buffer);
+ }
+
if (private_submodules_->capture_post_processor) {
private_submodules_->capture_post_processor->Process(capture_buffer);
}
@@ -1725,6 +1766,7 @@
capture_nonlocked_.beamformer_enabled,
public_submodules_->gain_control->is_enabled(),
config_.gain_controller2.enabled,
+ capture_nonlocked_.level_controller_enabled,
capture_nonlocked_.echo_controller_enabled,
public_submodules_->voice_detection->is_enabled(),
public_submodules_->level_estimator->is_enabled(),
@@ -1790,6 +1832,10 @@
}
}
+void AudioProcessingImpl::InitializeLevelController() {
+ private_submodules_->level_controller->Initialize(proc_sample_rate_hz());
+}
+
void AudioProcessingImpl::InitializeResidualEchoDetector() {
RTC_DCHECK(private_submodules_->echo_detector);
private_submodules_->echo_detector->Initialize(proc_sample_rate_hz(),
@@ -1892,6 +1938,9 @@
public_submodules_->echo_cancellation->GetExperimentsDescription();
// TODO(peah): Add semicolon-separated concatenations of experiment
// descriptions for other submodules.
+ if (capture_nonlocked_.level_controller_enabled) {
+ experiments_description += "LevelController;";
+ }
if (constants_.agc_clipped_level_min != kClippedLevelMin) {
experiments_description += "AgcClippingLevelExperiment;";
}
diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h
index 55c47ac..e7c6621 100644
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h
@@ -169,6 +169,7 @@
bool beamformer_enabled,
bool adaptive_gain_controller_enabled,
bool gain_controller2_enabled,
+ bool level_controller_enabled,
bool echo_controller_enabled,
bool voice_activity_detector_enabled,
bool level_estimator_enabled,
@@ -192,6 +193,7 @@
bool beamformer_enabled_ = false;
bool adaptive_gain_controller_enabled_ = false;
bool gain_controller2_enabled_ = false;
+ bool level_controller_enabled_ = false;
bool echo_controller_enabled_ = false;
bool level_estimator_enabled_ = false;
bool voice_activity_detector_enabled_ = false;
@@ -231,6 +233,7 @@
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
int InitializeLocked(const ProcessingConfig& config)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
+ void InitializeLevelController() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
void InitializeResidualEchoDetector()
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
void InitializeLowCutFilter() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
@@ -383,6 +386,7 @@
int stream_delay_ms;
bool beamformer_enabled;
bool intelligibility_enabled;
+ bool level_controller_enabled = false;
bool echo_controller_enabled = false;
} capture_nonlocked_;
diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc
index 89d6cb9..ecaeed3 100644
--- a/modules/audio_processing/audio_processing_unittest.cc
+++ b/modules/audio_processing/audio_processing_unittest.cc
@@ -25,6 +25,7 @@
#include "modules/audio_processing/common.h"
#include "modules/audio_processing/include/audio_processing.h"
#include "modules/audio_processing/include/mock_audio_processing.h"
+#include "modules/audio_processing/level_controller/level_controller_constants.h"
#include "modules/audio_processing/test/protobuf_utils.h"
#include "modules/audio_processing/test/test_utils.h"
#include "modules/include/module_common_types.h"
@@ -2820,6 +2821,98 @@
} // namespace
+TEST(ApmConfiguration, DefaultBehavior) {
+ // Verify that the level controller is default off, it can be activated using
+ // the config, and that the default initial level is maintained after the
+ // config has been applied.
+ std::unique_ptr<AudioProcessingImpl> apm(
+ new rtc::RefCountedObject<AudioProcessingImpl>(webrtc::Config()));
+ AudioProcessing::Config config;
+ EXPECT_FALSE(apm->config_.level_controller.enabled);
+ // TODO(peah): Add test for the existence of the level controller object once
+ // that is created only when that is specified in the config.
+ // TODO(peah): Remove the testing for
+ // apm->capture_nonlocked_.level_controller_enabled once the value in config_
+ // is instead used to activate the level controller.
+ EXPECT_FALSE(apm->capture_nonlocked_.level_controller_enabled);
+ EXPECT_NEAR(kTargetLcPeakLeveldBFS,
+ apm->config_.level_controller.initial_peak_level_dbfs,
+ std::numeric_limits<float>::epsilon());
+ config.level_controller.enabled = true;
+ apm->ApplyConfig(config);
+ EXPECT_TRUE(apm->config_.level_controller.enabled);
+ // TODO(peah): Add test for the existence of the level controller object once
+ // that is created only when the that is specified in the config.
+ // TODO(peah): Remove the testing for
+ // apm->capture_nonlocked_.level_controller_enabled once the value in config_
+ // is instead used to activate the level controller.
+ EXPECT_TRUE(apm->capture_nonlocked_.level_controller_enabled);
+ EXPECT_NEAR(kTargetLcPeakLeveldBFS,
+ apm->config_.level_controller.initial_peak_level_dbfs,
+ std::numeric_limits<float>::epsilon());
+}
+
+TEST(ApmConfiguration, ValidConfigBehavior) {
+ // Verify that the initial level can be specified and is retained after the
+ // config has been applied.
+ std::unique_ptr<AudioProcessingImpl> apm(
+ new rtc::RefCountedObject<AudioProcessingImpl>(webrtc::Config()));
+ AudioProcessing::Config config;
+ config.level_controller.initial_peak_level_dbfs = -50.f;
+ apm->ApplyConfig(config);
+ EXPECT_FALSE(apm->config_.level_controller.enabled);
+ // TODO(peah): Add test for the existence of the level controller object once
+ // that is created only when the that is specified in the config.
+ // TODO(peah): Remove the testing for
+ // apm->capture_nonlocked_.level_controller_enabled once the value in config_
+ // is instead used to activate the level controller.
+ EXPECT_FALSE(apm->capture_nonlocked_.level_controller_enabled);
+ EXPECT_NEAR(-50.f, apm->config_.level_controller.initial_peak_level_dbfs,
+ std::numeric_limits<float>::epsilon());
+}
+
+TEST(ApmConfiguration, InValidConfigBehavior) {
+ // Verify that the config is properly reset when nonproper values are applied
+ // for the initial level.
+
+ // Verify that the config is properly reset when the specified initial peak
+ // level is too low.
+ std::unique_ptr<AudioProcessingImpl> apm(
+ new rtc::RefCountedObject<AudioProcessingImpl>(webrtc::Config()));
+ AudioProcessing::Config config;
+ config.level_controller.enabled = true;
+ config.level_controller.initial_peak_level_dbfs = -101.f;
+ apm->ApplyConfig(config);
+ EXPECT_FALSE(apm->config_.level_controller.enabled);
+ // TODO(peah): Add test for the existence of the level controller object once
+ // that is created only when the that is specified in the config.
+ // TODO(peah): Remove the testing for
+ // apm->capture_nonlocked_.level_controller_enabled once the value in config_
+ // is instead used to activate the level controller.
+ EXPECT_FALSE(apm->capture_nonlocked_.level_controller_enabled);
+ EXPECT_NEAR(kTargetLcPeakLeveldBFS,
+ apm->config_.level_controller.initial_peak_level_dbfs,
+ std::numeric_limits<float>::epsilon());
+
+ // Verify that the config is properly reset when the specified initial peak
+ // level is too high.
+ apm.reset(new rtc::RefCountedObject<AudioProcessingImpl>(webrtc::Config()));
+ config = AudioProcessing::Config();
+ config.level_controller.enabled = true;
+ config.level_controller.initial_peak_level_dbfs = 1.f;
+ apm->ApplyConfig(config);
+ EXPECT_FALSE(apm->config_.level_controller.enabled);
+ // TODO(peah): Add test for the existence of the level controller object once
+ // that is created only when that is specified in the config.
+ // TODO(peah): Remove the testing for
+ // apm->capture_nonlocked_.level_controller_enabled once the value in config_
+ // is instead used to activate the level controller.
+ EXPECT_FALSE(apm->capture_nonlocked_.level_controller_enabled);
+ EXPECT_NEAR(kTargetLcPeakLeveldBFS,
+ apm->config_.level_controller.initial_peak_level_dbfs,
+ std::numeric_limits<float>::epsilon());
+}
+
TEST(ApmConfiguration, EnablePostProcessing) {
// Verify that apm uses a capture post processing module if one is provided.
webrtc::Config webrtc_config;
@@ -2914,6 +3007,7 @@
config.residual_echo_detector.enabled = true;
config.high_pass_filter.enabled = false;
config.gain_controller2.enabled = false;
+ config.level_controller.enabled = false;
apm->ApplyConfig(config);
EXPECT_EQ(apm->gain_control()->Enable(false), 0);
EXPECT_EQ(apm->level_estimator()->Enable(false), 0);
diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index 33ecf89..7057f28 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@@ -211,8 +211,8 @@
// AudioProcessing* apm = AudioProcessingBuilder().Create();
//
// AudioProcessing::Config config;
+// config.level_controller.enabled = true;
// config.high_pass_filter.enabled = true;
-// config.gain_controller2.enabled = true;
// apm->ApplyConfig(config)
//
// apm->echo_cancellation()->enable_drift_compensation(false);
@@ -262,6 +262,14 @@
// by changing the default values in the AudioProcessing::Config struct.
// The config is applied by passing the struct to the ApplyConfig method.
struct Config {
+ struct LevelController {
+ bool enabled = false;
+
+ // Sets the initial peak level to use inside the level controller in order
+ // to compute the signal gain. The unit for the peak level is dBFS and
+ // the allowed range is [-100, 0].
+ float initial_peak_level_dbfs = -6.0206f;
+ } level_controller;
struct ResidualEchoDetector {
bool enabled = true;
} residual_echo_detector;
diff --git a/modules/audio_processing/include/config.h b/modules/audio_processing/include/config.h
index 7615f62..7c34de8 100644
--- a/modules/audio_processing/include/config.h
+++ b/modules/audio_processing/include/config.h
@@ -35,7 +35,7 @@
kIntelligibility,
kEchoCanceller3, // Deprecated
kAecRefinedAdaptiveFilter,
- kLevelControl // Deprecated
+ kLevelControl
};
// Class Config is designed to ease passing a set of options across webrtc code.
diff --git a/modules/audio_processing/level_controller/biquad_filter.cc b/modules/audio_processing/level_controller/biquad_filter.cc
new file mode 100644
index 0000000..5a4ddc8
--- /dev/null
+++ b/modules/audio_processing/level_controller/biquad_filter.cc
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/level_controller/biquad_filter.h"
+
+namespace webrtc {
+
+// This method applies a biquad filter to an input signal x to produce an
+// output signal y. The biquad coefficients are specified at the construction
+// of the object.
+void BiQuadFilter::Process(rtc::ArrayView<const float> x,
+ rtc::ArrayView<float> y) {
+ for (size_t k = 0; k < x.size(); ++k) {
+ // Use temporary variable for x[k] to allow in-place function call
+ // (that x and y refer to the same array).
+ const float tmp = x[k];
+ y[k] = coefficients_.b[0] * tmp + coefficients_.b[1] * biquad_state_.b[0] +
+ coefficients_.b[2] * biquad_state_.b[1] -
+ coefficients_.a[0] * biquad_state_.a[0] -
+ coefficients_.a[1] * biquad_state_.a[1];
+ biquad_state_.b[1] = biquad_state_.b[0];
+ biquad_state_.b[0] = tmp;
+ biquad_state_.a[1] = biquad_state_.a[0];
+ biquad_state_.a[0] = y[k];
+ }
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/level_controller/biquad_filter.h b/modules/audio_processing/level_controller/biquad_filter.h
new file mode 100644
index 0000000..dad104d
--- /dev/null
+++ b/modules/audio_processing/level_controller/biquad_filter.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_BIQUAD_FILTER_H_
+#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_BIQUAD_FILTER_H_
+
+#include <vector>
+
+#include "api/array_view.h"
+#include "rtc_base/arraysize.h"
+#include "rtc_base/constructormagic.h"
+
+namespace webrtc {
+
+class BiQuadFilter {
+ public:
+ struct BiQuadCoefficients {
+ float b[3];
+ float a[2];
+ };
+
+ BiQuadFilter() = default;
+
+ void Initialize(const BiQuadCoefficients& coefficients) {
+ coefficients_ = coefficients;
+ }
+
+ // Produces a filtered output y of the input x. Both x and y need to
+ // have the same length.
+ void Process(rtc::ArrayView<const float> x, rtc::ArrayView<float> y);
+
+ private:
+ struct BiQuadState {
+ BiQuadState() {
+ std::fill(b, b + arraysize(b), 0.f);
+ std::fill(a, a + arraysize(a), 0.f);
+ }
+
+ float b[2];
+ float a[2];
+ };
+
+ BiQuadState biquad_state_;
+ BiQuadCoefficients coefficients_;
+
+ RTC_DISALLOW_COPY_AND_ASSIGN(BiQuadFilter);
+};
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_BIQUAD_FILTER_H_
diff --git a/modules/audio_processing/level_controller/down_sampler.cc b/modules/audio_processing/level_controller/down_sampler.cc
new file mode 100644
index 0000000..a1702f4
--- /dev/null
+++ b/modules/audio_processing/level_controller/down_sampler.cc
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/level_controller/down_sampler.h"
+
+#include <string.h>
+#include <algorithm>
+#include <vector>
+
+#include "modules/audio_processing/include/audio_processing.h"
+#include "modules/audio_processing/level_controller/biquad_filter.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+namespace {
+
+// Bandlimiter coefficients computed based on that only
+// the first 40 bins of the spectrum for the downsampled
+// signal are used.
+// [B,A] = butter(2,(41/64*4000)/8000)
+const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_16kHz = {
+ {0.1455f, 0.2911f, 0.1455f},
+ {-0.6698f, 0.2520f}};
+
+// [B,A] = butter(2,(41/64*4000)/16000)
+const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_32kHz = {
+ {0.0462f, 0.0924f, 0.0462f},
+ {-1.3066f, 0.4915f}};
+
+// [B,A] = butter(2,(41/64*4000)/24000)
+const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_48kHz = {
+ {0.0226f, 0.0452f, 0.0226f},
+ {-1.5320f, 0.6224f}};
+
+} // namespace
+
+DownSampler::DownSampler(ApmDataDumper* data_dumper)
+ : data_dumper_(data_dumper) {
+ Initialize(48000);
+}
+void DownSampler::Initialize(int sample_rate_hz) {
+ RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
+ sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
+ sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
+ sample_rate_hz == AudioProcessing::kSampleRate48kHz);
+
+ sample_rate_hz_ = sample_rate_hz;
+ down_sampling_factor_ = rtc::CheckedDivExact(sample_rate_hz_, 8000);
+
+ /// Note that the down sampling filter is not used if the sample rate is 8
+ /// kHz.
+ if (sample_rate_hz_ == AudioProcessing::kSampleRate16kHz) {
+ low_pass_filter_.Initialize(kLowPassFilterCoefficients_16kHz);
+ } else if (sample_rate_hz_ == AudioProcessing::kSampleRate32kHz) {
+ low_pass_filter_.Initialize(kLowPassFilterCoefficients_32kHz);
+ } else if (sample_rate_hz_ == AudioProcessing::kSampleRate48kHz) {
+ low_pass_filter_.Initialize(kLowPassFilterCoefficients_48kHz);
+ }
+}
+
+void DownSampler::DownSample(rtc::ArrayView<const float> in,
+ rtc::ArrayView<float> out) {
+ data_dumper_->DumpWav("lc_down_sampler_input", in, sample_rate_hz_, 1);
+ RTC_DCHECK_EQ(sample_rate_hz_ * AudioProcessing::kChunkSizeMs / 1000,
+ in.size());
+ RTC_DCHECK_EQ(
+ AudioProcessing::kSampleRate8kHz * AudioProcessing::kChunkSizeMs / 1000,
+ out.size());
+ const size_t kMaxNumFrames =
+ AudioProcessing::kSampleRate48kHz * AudioProcessing::kChunkSizeMs / 1000;
+ float x[kMaxNumFrames];
+
+ // Band-limit the signal to 4 kHz.
+ if (sample_rate_hz_ != AudioProcessing::kSampleRate8kHz) {
+ low_pass_filter_.Process(in, rtc::ArrayView<float>(x, in.size()));
+
+ // Downsample the signal.
+ size_t k = 0;
+ for (size_t j = 0; j < out.size(); ++j) {
+ RTC_DCHECK_GT(kMaxNumFrames, k);
+ out[j] = x[k];
+ k += down_sampling_factor_;
+ }
+ } else {
+ std::copy(in.data(), in.data() + in.size(), out.data());
+ }
+
+ data_dumper_->DumpWav("lc_down_sampler_output", out,
+ AudioProcessing::kSampleRate8kHz, 1);
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/level_controller/down_sampler.h b/modules/audio_processing/level_controller/down_sampler.h
new file mode 100644
index 0000000..d650242
--- /dev/null
+++ b/modules/audio_processing/level_controller/down_sampler.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_DOWN_SAMPLER_H_
+#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_DOWN_SAMPLER_H_
+
+#include "api/array_view.h"
+#include "modules/audio_processing/level_controller/biquad_filter.h"
+#include "rtc_base/constructormagic.h"
+
+namespace webrtc {
+
+class ApmDataDumper;
+
+class DownSampler {
+ public:
+ explicit DownSampler(ApmDataDumper* data_dumper);
+ void Initialize(int sample_rate_hz);
+
+ void DownSample(rtc::ArrayView<const float> in, rtc::ArrayView<float> out);
+
+ private:
+ ApmDataDumper* data_dumper_;
+ int sample_rate_hz_;
+ int down_sampling_factor_;
+ BiQuadFilter low_pass_filter_;
+
+ RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(DownSampler);
+};
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_DOWN_SAMPLER_H_
diff --git a/modules/audio_processing/level_controller/gain_applier.cc b/modules/audio_processing/level_controller/gain_applier.cc
new file mode 100644
index 0000000..018f809
--- /dev/null
+++ b/modules/audio_processing/level_controller/gain_applier.cc
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/level_controller/gain_applier.h"
+
+#include <algorithm>
+
+#include "api/array_view.h"
+#include "rtc_base/checks.h"
+
+#include "modules/audio_processing/audio_buffer.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+
+namespace webrtc {
+namespace {
+
+const float kMaxSampleValue = 32767.f;
+const float kMinSampleValue = -32767.f;
+
+int CountSaturations(rtc::ArrayView<const float> in) {
+ return std::count_if(in.begin(), in.end(), [](const float& v) {
+ return v >= kMaxSampleValue || v <= kMinSampleValue;
+ });
+}
+
+int CountSaturations(const AudioBuffer& audio) {
+ int num_saturations = 0;
+ for (size_t k = 0; k < audio.num_channels(); ++k) {
+ num_saturations += CountSaturations(rtc::ArrayView<const float>(
+ audio.channels_const_f()[k], audio.num_frames()));
+ }
+ return num_saturations;
+}
+
+void LimitToAllowedRange(rtc::ArrayView<float> x) {
+ for (auto& v : x) {
+ v = std::max(kMinSampleValue, v);
+ v = std::min(kMaxSampleValue, v);
+ }
+}
+
+void LimitToAllowedRange(AudioBuffer* audio) {
+ for (size_t k = 0; k < audio->num_channels(); ++k) {
+ LimitToAllowedRange(
+ rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames()));
+ }
+}
+
+float ApplyIncreasingGain(float new_gain,
+ float old_gain,
+ float step_size,
+ rtc::ArrayView<float> x) {
+ RTC_DCHECK_LT(0.f, step_size);
+ float gain = old_gain;
+ for (auto& v : x) {
+ gain = std::min(new_gain, gain + step_size);
+ v *= gain;
+ }
+ return gain;
+}
+
+float ApplyDecreasingGain(float new_gain,
+ float old_gain,
+ float step_size,
+ rtc::ArrayView<float> x) {
+ RTC_DCHECK_GT(0.f, step_size);
+ float gain = old_gain;
+ for (auto& v : x) {
+ gain = std::max(new_gain, gain + step_size);
+ v *= gain;
+ }
+ return gain;
+}
+
+float ApplyConstantGain(float gain, rtc::ArrayView<float> x) {
+ for (auto& v : x) {
+ v *= gain;
+ }
+
+ return gain;
+}
+
+float ApplyGain(float new_gain,
+ float old_gain,
+ float increase_step_size,
+ float decrease_step_size,
+ rtc::ArrayView<float> x) {
+ RTC_DCHECK_LT(0.f, increase_step_size);
+ RTC_DCHECK_GT(0.f, decrease_step_size);
+ if (new_gain == old_gain) {
+ return ApplyConstantGain(new_gain, x);
+ } else if (new_gain > old_gain) {
+ return ApplyIncreasingGain(new_gain, old_gain, increase_step_size, x);
+ } else {
+ return ApplyDecreasingGain(new_gain, old_gain, decrease_step_size, x);
+ }
+}
+
+} // namespace
+
+GainApplier::GainApplier(ApmDataDumper* data_dumper)
+ : data_dumper_(data_dumper) {}
+
+void GainApplier::Initialize(int sample_rate_hz) {
+ RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
+ sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
+ sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
+ sample_rate_hz == AudioProcessing::kSampleRate48kHz);
+ const float kGainIncreaseStepSize48kHz = 0.0001f;
+ const float kGainDecreaseStepSize48kHz = -0.01f;
+ const float kGainSaturatedDecreaseStepSize48kHz = -0.05f;
+
+ last_frame_was_saturated_ = false;
+ old_gain_ = 1.f;
+ gain_increase_step_size_ =
+ kGainIncreaseStepSize48kHz *
+ (static_cast<float>(AudioProcessing::kSampleRate48kHz) / sample_rate_hz);
+ gain_normal_decrease_step_size_ =
+ kGainDecreaseStepSize48kHz *
+ (static_cast<float>(AudioProcessing::kSampleRate48kHz) / sample_rate_hz);
+ gain_saturated_decrease_step_size_ =
+ kGainSaturatedDecreaseStepSize48kHz *
+ (static_cast<float>(AudioProcessing::kSampleRate48kHz) / sample_rate_hz);
+}
+
+int GainApplier::Process(float new_gain, AudioBuffer* audio) {
+ RTC_CHECK_NE(0.f, gain_increase_step_size_);
+ RTC_CHECK_NE(0.f, gain_normal_decrease_step_size_);
+ RTC_CHECK_NE(0.f, gain_saturated_decrease_step_size_);
+ int num_saturations = 0;
+ if (new_gain != 1.f) {
+ float last_applied_gain = 1.f;
+ float gain_decrease_step_size = last_frame_was_saturated_
+ ? gain_saturated_decrease_step_size_
+ : gain_normal_decrease_step_size_;
+ for (size_t k = 0; k < audio->num_channels(); ++k) {
+ last_applied_gain = ApplyGain(
+ new_gain, old_gain_, gain_increase_step_size_,
+ gain_decrease_step_size,
+ rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames()));
+ }
+
+ num_saturations = CountSaturations(*audio);
+ LimitToAllowedRange(audio);
+ old_gain_ = last_applied_gain;
+ }
+
+ data_dumper_->DumpRaw("lc_last_applied_gain", 1, &old_gain_);
+
+ return num_saturations;
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/level_controller/gain_applier.h b/modules/audio_processing/level_controller/gain_applier.h
new file mode 100644
index 0000000..5669f45
--- /dev/null
+++ b/modules/audio_processing/level_controller/gain_applier.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_APPLIER_H_
+#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_APPLIER_H_
+
+#include "rtc_base/constructormagic.h"
+
+namespace webrtc {
+
+class ApmDataDumper;
+class AudioBuffer;
+
+class GainApplier {
+ public:
+ explicit GainApplier(ApmDataDumper* data_dumper);
+ void Initialize(int sample_rate_hz);
+
+ // Applies the specified gain to the audio frame and returns the resulting
+ // number of saturated sample values.
+ int Process(float new_gain, AudioBuffer* audio);
+
+ private:
+ ApmDataDumper* const data_dumper_;
+ float old_gain_ = 1.f;
+ float gain_increase_step_size_ = 0.f;
+ float gain_normal_decrease_step_size_ = 0.f;
+ float gain_saturated_decrease_step_size_ = 0.f;
+ bool last_frame_was_saturated_;
+ RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(GainApplier);
+};
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_APPLIER_H_
diff --git a/modules/audio_processing/level_controller/gain_selector.cc b/modules/audio_processing/level_controller/gain_selector.cc
new file mode 100644
index 0000000..3ab75b1
--- /dev/null
+++ b/modules/audio_processing/level_controller/gain_selector.cc
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/level_controller/gain_selector.h"
+
+#include <math.h>
+#include <algorithm>
+
+#include "modules/audio_processing/include/audio_processing.h"
+#include "modules/audio_processing/level_controller/level_controller_constants.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+GainSelector::GainSelector() {
+ Initialize(AudioProcessing::kSampleRate48kHz);
+}
+
+void GainSelector::Initialize(int sample_rate_hz) {
+ gain_ = 1.f;
+ frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100);
+ highly_nonstationary_signal_hold_counter_ = 0;
+}
+
+// Chooses the gain to apply by the level controller such that
+// 1) The level of the stationary noise does not exceed
+// a predefined threshold.
+// 2) The gain does not exceed the gain that has been found
+// to saturate the signal.
+// 3) The peak level achieves the target peak level.
+// 4) The gain is not below 1.
+// 4) The gain is 1 if the signal has been classified as stationary
+// for a long time.
+// 5) The gain is not above the maximum gain.
+float GainSelector::GetNewGain(float peak_level,
+ float noise_energy,
+ float saturating_gain,
+ bool gain_jumpstart,
+ SignalClassifier::SignalType signal_type) {
+ RTC_DCHECK_LT(0.f, peak_level);
+
+ if (signal_type == SignalClassifier::SignalType::kHighlyNonStationary ||
+ gain_jumpstart) {
+ highly_nonstationary_signal_hold_counter_ = 100;
+ } else {
+ highly_nonstationary_signal_hold_counter_ =
+ std::max(0, highly_nonstationary_signal_hold_counter_ - 1);
+ }
+
+ float desired_gain;
+ if (highly_nonstationary_signal_hold_counter_ > 0) {
+ // Compute a desired gain that ensures that the peak level is amplified to
+ // the target level.
+ desired_gain = kTargetLcPeakLevel / peak_level;
+
+ // Limit the desired gain so that it does not amplify the noise too much.
+ float max_noise_energy = kMaxLcNoisePower * frame_length_;
+ if (noise_energy * desired_gain * desired_gain > max_noise_energy) {
+ RTC_DCHECK_LE(0.f, noise_energy);
+ desired_gain = sqrtf(max_noise_energy / noise_energy);
+ }
+ } else {
+ // If the signal has been stationary for a long while, apply a gain of 1 to
+ // avoid amplifying pure noise.
+ desired_gain = 1.0f;
+ }
+
+ // Smootly update the gain towards the desired gain.
+ gain_ += 0.2f * (desired_gain - gain_);
+
+ // Limit the gain to not exceed the maximum and the saturating gains, and to
+ // ensure that the lowest possible gain is 1.
+ gain_ = std::min(gain_, saturating_gain);
+ gain_ = std::min(gain_, kMaxLcGain);
+ gain_ = std::max(gain_, 1.f);
+
+ return gain_;
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/level_controller/gain_selector.h b/modules/audio_processing/level_controller/gain_selector.h
new file mode 100644
index 0000000..7966c43
--- /dev/null
+++ b/modules/audio_processing/level_controller/gain_selector.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_SELECTOR_H_
+#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_SELECTOR_H_
+
+#include "rtc_base/constructormagic.h"
+
+#include "modules/audio_processing/level_controller/signal_classifier.h"
+
+namespace webrtc {
+
+class GainSelector {
+ public:
+ GainSelector();
+ void Initialize(int sample_rate_hz);
+ float GetNewGain(float peak_level,
+ float noise_energy,
+ float saturating_gain,
+ bool gain_jumpstart,
+ SignalClassifier::SignalType signal_type);
+
+ private:
+ float gain_;
+ size_t frame_length_;
+ int highly_nonstationary_signal_hold_counter_;
+
+ RTC_DISALLOW_COPY_AND_ASSIGN(GainSelector);
+};
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_SELECTOR_H_
diff --git a/modules/audio_processing/level_controller/level_controller.cc b/modules/audio_processing/level_controller/level_controller.cc
new file mode 100644
index 0000000..b7854a0
--- /dev/null
+++ b/modules/audio_processing/level_controller/level_controller.cc
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/level_controller/level_controller.h"
+
+#include <math.h>
+#include <algorithm>
+#include <numeric>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/audio_buffer.h"
+#include "modules/audio_processing/level_controller/gain_applier.h"
+#include "modules/audio_processing/level_controller/gain_selector.h"
+#include "modules/audio_processing/level_controller/noise_level_estimator.h"
+#include "modules/audio_processing/level_controller/peak_level_estimator.h"
+#include "modules/audio_processing/level_controller/saturating_gain_estimator.h"
+#include "modules/audio_processing/level_controller/signal_classifier.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/arraysize.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+#include "system_wrappers/include/metrics.h"
+
+namespace webrtc {
+namespace {
+
+void UpdateAndRemoveDcLevel(float forgetting_factor,
+ float* dc_level,
+ rtc::ArrayView<float> x) {
+ RTC_DCHECK(!x.empty());
+ float mean =
+ std::accumulate(x.begin(), x.end(), 0.0f) / static_cast<float>(x.size());
+ *dc_level += forgetting_factor * (mean - *dc_level);
+
+ for (float& v : x) {
+ v -= *dc_level;
+ }
+}
+
+float FrameEnergy(const AudioBuffer& audio) {
+ float energy = 0.f;
+ for (size_t k = 0; k < audio.num_channels(); ++k) {
+ float channel_energy =
+ std::accumulate(audio.channels_const_f()[k],
+ audio.channels_const_f()[k] + audio.num_frames(), 0.f,
+ [](float a, float b) -> float { return a + b * b; });
+ energy = std::max(channel_energy, energy);
+ }
+ return energy;
+}
+
+float PeakLevel(const AudioBuffer& audio) {
+ float peak_level = 0.f;
+ for (size_t k = 0; k < audio.num_channels(); ++k) {
+ auto* channel_peak_level = std::max_element(
+ audio.channels_const_f()[k],
+ audio.channels_const_f()[k] + audio.num_frames(),
+ [](float a, float b) { return std::abs(a) < std::abs(b); });
+ peak_level = std::max(*channel_peak_level, peak_level);
+ }
+ return peak_level;
+}
+
+const int kMetricsFrameInterval = 1000;
+
+} // namespace
+
+int LevelController::instance_count_ = 0;
+
+void LevelController::Metrics::Initialize(int sample_rate_hz) {
+ RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
+ sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
+ sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
+ sample_rate_hz == AudioProcessing::kSampleRate48kHz);
+
+ Reset();
+ frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100);
+}
+
+void LevelController::Metrics::Reset() {
+ metrics_frame_counter_ = 0;
+ gain_sum_ = 0.f;
+ peak_level_sum_ = 0.f;
+ noise_energy_sum_ = 0.f;
+ max_gain_ = 0.f;
+ max_peak_level_ = 0.f;
+ max_noise_energy_ = 0.f;
+}
+
+void LevelController::Metrics::Update(float long_term_peak_level,
+ float noise_energy,
+ float gain,
+ float frame_peak_level) {
+ const float kdBFSOffset = 90.3090f;
+ gain_sum_ += gain;
+ peak_level_sum_ += long_term_peak_level;
+ noise_energy_sum_ += noise_energy;
+ max_gain_ = std::max(max_gain_, gain);
+ max_peak_level_ = std::max(max_peak_level_, long_term_peak_level);
+ max_noise_energy_ = std::max(max_noise_energy_, noise_energy);
+
+ ++metrics_frame_counter_;
+ if (metrics_frame_counter_ == kMetricsFrameInterval) {
+ RTC_DCHECK_LT(0, frame_length_);
+ RTC_DCHECK_LT(0, kMetricsFrameInterval);
+
+ const int max_noise_power_dbfs = static_cast<int>(
+ 10 * log10(max_noise_energy_ / frame_length_ + 1e-10f) - kdBFSOffset);
+ RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxNoisePower",
+ max_noise_power_dbfs, -90, 0, 50);
+
+ const int average_noise_power_dbfs = static_cast<int>(
+ 10 * log10(noise_energy_sum_ / (frame_length_ * kMetricsFrameInterval) +
+ 1e-10f) -
+ kdBFSOffset);
+ RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AverageNoisePower",
+ average_noise_power_dbfs, -90, 0, 50);
+
+ const int max_peak_level_dbfs = static_cast<int>(
+ 10 * log10(max_peak_level_ * max_peak_level_ + 1e-10f) - kdBFSOffset);
+ RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxPeakLevel",
+ max_peak_level_dbfs, -90, 0, 50);
+
+ const int average_peak_level_dbfs = static_cast<int>(
+ 10 * log10(peak_level_sum_ * peak_level_sum_ /
+ (kMetricsFrameInterval * kMetricsFrameInterval) +
+ 1e-10f) -
+ kdBFSOffset);
+ RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AveragePeakLevel",
+ average_peak_level_dbfs, -90, 0, 50);
+
+ RTC_DCHECK_LE(1.f, max_gain_);
+ RTC_DCHECK_LE(1.f, gain_sum_ / kMetricsFrameInterval);
+
+ const int max_gain_db = static_cast<int>(10 * log10(max_gain_ * max_gain_));
+ RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxGain", max_gain_db, 0,
+ 33, 30);
+
+ const int average_gain_db = static_cast<int>(
+ 10 * log10(gain_sum_ * gain_sum_ /
+ (kMetricsFrameInterval * kMetricsFrameInterval)));
+ RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AverageGain",
+ average_gain_db, 0, 33, 30);
+
+ const int long_term_peak_level_dbfs = static_cast<int>(
+ 10 * log10(long_term_peak_level * long_term_peak_level + 1e-10f) -
+ kdBFSOffset);
+
+ const int frame_peak_level_dbfs = static_cast<int>(
+ 10 * log10(frame_peak_level * frame_peak_level + 1e-10f) - kdBFSOffset);
+
+ RTC_LOG(LS_INFO) << "Level Controller metrics: {Max noise power: "
+ << max_noise_power_dbfs
+ << " dBFS, Average noise power: "
+ << average_noise_power_dbfs
+ << " dBFS, Max long term peak level: "
+ << max_peak_level_dbfs
+ << " dBFS, Average long term peak level: "
+ << average_peak_level_dbfs
+ << " dBFS, Max gain: "
+ << max_gain_db
+ << " dB, Average gain: "
+ << average_gain_db
+ << " dB, Long term peak level: "
+ << long_term_peak_level_dbfs
+ << " dBFS, Last frame peak level: "
+ << frame_peak_level_dbfs
+ << " dBFS}";
+
+ Reset();
+ }
+}
+
+LevelController::LevelController()
+ : data_dumper_(new ApmDataDumper(instance_count_)),
+ gain_applier_(data_dumper_.get()),
+ signal_classifier_(data_dumper_.get()),
+ peak_level_estimator_(kTargetLcPeakLeveldBFS) {
+ Initialize(AudioProcessing::kSampleRate48kHz);
+ ++instance_count_;
+}
+
+LevelController::~LevelController() {}
+
+void LevelController::Initialize(int sample_rate_hz) {
+ RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
+ sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
+ sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
+ sample_rate_hz == AudioProcessing::kSampleRate48kHz);
+ data_dumper_->InitiateNewSetOfRecordings();
+ gain_selector_.Initialize(sample_rate_hz);
+ gain_applier_.Initialize(sample_rate_hz);
+ signal_classifier_.Initialize(sample_rate_hz);
+ noise_level_estimator_.Initialize(sample_rate_hz);
+ peak_level_estimator_.Initialize(config_.initial_peak_level_dbfs);
+ saturating_gain_estimator_.Initialize();
+ metrics_.Initialize(sample_rate_hz);
+
+ last_gain_ = 1.0f;
+ sample_rate_hz_ = sample_rate_hz;
+ dc_forgetting_factor_ = 0.01f * sample_rate_hz / 48000.f;
+ std::fill(dc_level_, dc_level_ + arraysize(dc_level_), 0.f);
+}
+
+void LevelController::Process(AudioBuffer* audio) {
+ RTC_DCHECK_LT(0, audio->num_channels());
+ RTC_DCHECK_GE(2, audio->num_channels());
+ RTC_DCHECK_NE(0.f, dc_forgetting_factor_);
+ RTC_DCHECK(sample_rate_hz_);
+ data_dumper_->DumpWav("lc_input", audio->num_frames(),
+ audio->channels_const_f()[0], *sample_rate_hz_, 1);
+
+ // Remove DC level.
+ for (size_t k = 0; k < audio->num_channels(); ++k) {
+ UpdateAndRemoveDcLevel(
+ dc_forgetting_factor_, &dc_level_[k],
+ rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames()));
+ }
+
+ SignalClassifier::SignalType signal_type;
+ signal_classifier_.Analyze(*audio, &signal_type);
+ int tmp = static_cast<int>(signal_type);
+ data_dumper_->DumpRaw("lc_signal_type", 1, &tmp);
+
+ // Estimate the noise energy.
+ float noise_energy =
+ noise_level_estimator_.Analyze(signal_type, FrameEnergy(*audio));
+
+ // Estimate the overall signal peak level.
+ const float frame_peak_level = PeakLevel(*audio);
+ const float long_term_peak_level =
+ peak_level_estimator_.Analyze(signal_type, frame_peak_level);
+
+ float saturating_gain = saturating_gain_estimator_.GetGain();
+
+ // Compute the new gain to apply.
+ last_gain_ =
+ gain_selector_.GetNewGain(long_term_peak_level, noise_energy,
+ saturating_gain, gain_jumpstart_, signal_type);
+
+ // Unflag the jumpstart of the gain as it should only happen once.
+ gain_jumpstart_ = false;
+
+ // Apply the gain to the signal.
+ int num_saturations = gain_applier_.Process(last_gain_, audio);
+
+ // Estimate the gain that saturates the overall signal.
+ saturating_gain_estimator_.Update(last_gain_, num_saturations);
+
+ // Update the metrics.
+ metrics_.Update(long_term_peak_level, noise_energy, last_gain_,
+ frame_peak_level);
+
+ data_dumper_->DumpRaw("lc_selected_gain", 1, &last_gain_);
+ data_dumper_->DumpRaw("lc_noise_energy", 1, &noise_energy);
+ data_dumper_->DumpRaw("lc_peak_level", 1, &long_term_peak_level);
+ data_dumper_->DumpRaw("lc_saturating_gain", 1, &saturating_gain);
+
+ data_dumper_->DumpWav("lc_output", audio->num_frames(),
+ audio->channels_f()[0], *sample_rate_hz_, 1);
+}
+
+void LevelController::ApplyConfig(
+ const AudioProcessing::Config::LevelController& config) {
+ RTC_DCHECK(Validate(config));
+ config_ = config;
+ peak_level_estimator_.Initialize(config_.initial_peak_level_dbfs);
+ gain_jumpstart_ = true;
+}
+
+std::string LevelController::ToString(
+ const AudioProcessing::Config::LevelController& config) {
+ std::stringstream ss;
+ ss << "{"
+ << "enabled: " << (config.enabled ? "true" : "false") << ", "
+ << "initial_peak_level_dbfs: " << config.initial_peak_level_dbfs << "}";
+ return ss.str();
+}
+
+bool LevelController::Validate(
+ const AudioProcessing::Config::LevelController& config) {
+ return (config.initial_peak_level_dbfs <
+ std::numeric_limits<float>::epsilon() &&
+ config.initial_peak_level_dbfs >
+ -(100.f + std::numeric_limits<float>::epsilon()));
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/level_controller/level_controller.h b/modules/audio_processing/level_controller/level_controller.h
new file mode 100644
index 0000000..224b886
--- /dev/null
+++ b/modules/audio_processing/level_controller/level_controller.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_H_
+#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_H_
+
+#include <memory>
+#include <vector>
+
+#include "api/optional.h"
+#include "modules/audio_processing/include/audio_processing.h"
+#include "modules/audio_processing/level_controller/gain_applier.h"
+#include "modules/audio_processing/level_controller/gain_selector.h"
+#include "modules/audio_processing/level_controller/noise_level_estimator.h"
+#include "modules/audio_processing/level_controller/peak_level_estimator.h"
+#include "modules/audio_processing/level_controller/saturating_gain_estimator.h"
+#include "modules/audio_processing/level_controller/signal_classifier.h"
+#include "rtc_base/constructormagic.h"
+
+namespace webrtc {
+
+class ApmDataDumper;
+class AudioBuffer;
+
+class LevelController {
+ public:
+ LevelController();
+ ~LevelController();
+
+ void Initialize(int sample_rate_hz);
+ void Process(AudioBuffer* audio);
+ float GetLastGain() { return last_gain_; }
+
+ // TODO(peah): This method is a temporary solution as the the aim is to
+ // instead apply the config inside the constructor. Therefore this is likely
+ // to change.
+ void ApplyConfig(const AudioProcessing::Config::LevelController& config);
+ // Validates a config.
+ static bool Validate(const AudioProcessing::Config::LevelController& config);
+ // Dumps a config to a string.
+ static std::string ToString(
+ const AudioProcessing::Config::LevelController& config);
+
+ private:
+ class Metrics {
+ public:
+ Metrics() { Initialize(AudioProcessing::kSampleRate48kHz); }
+ void Initialize(int sample_rate_hz);
+ void Update(float long_term_peak_level,
+ float noise_level,
+ float gain,
+ float frame_peak_level);
+
+ private:
+ void Reset();
+
+ size_t metrics_frame_counter_;
+ float gain_sum_;
+ float peak_level_sum_;
+ float noise_energy_sum_;
+ float max_gain_;
+ float max_peak_level_;
+ float max_noise_energy_;
+ float frame_length_;
+ };
+
+ std::unique_ptr<ApmDataDumper> data_dumper_;
+ GainSelector gain_selector_;
+ GainApplier gain_applier_;
+ SignalClassifier signal_classifier_;
+ NoiseLevelEstimator noise_level_estimator_;
+ PeakLevelEstimator peak_level_estimator_;
+ SaturatingGainEstimator saturating_gain_estimator_;
+ Metrics metrics_;
+ rtc::Optional<int> sample_rate_hz_;
+ static int instance_count_;
+ float dc_level_[2];
+ float dc_forgetting_factor_;
+ float last_gain_;
+ bool gain_jumpstart_ = false;
+ AudioProcessing::Config::LevelController config_;
+
+ RTC_DISALLOW_COPY_AND_ASSIGN(LevelController);
+};
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_H_
diff --git a/modules/audio_processing/level_controller/level_controller_complexity_unittest.cc b/modules/audio_processing/level_controller/level_controller_complexity_unittest.cc
new file mode 100644
index 0000000..83f6725
--- /dev/null
+++ b/modules/audio_processing/level_controller/level_controller_complexity_unittest.cc
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <numeric>
+#include <vector>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/audio_buffer.h"
+#include "modules/audio_processing/include/audio_processing.h"
+#include "modules/audio_processing/level_controller/level_controller.h"
+#include "modules/audio_processing/test/audio_buffer_tools.h"
+#include "modules/audio_processing/test/bitexactness_tools.h"
+#include "modules/audio_processing/test/performance_timer.h"
+#include "modules/audio_processing/test/simulator_buffers.h"
+#include "rtc_base/random.h"
+#include "system_wrappers/include/clock.h"
+#include "test/gtest.h"
+#include "test/testsupport/perf_test.h"
+
+namespace webrtc {
+namespace {
+
+const size_t kNumFramesToProcess = 300;
+const size_t kNumFramesToProcessAtWarmup = 300;
+const size_t kToTalNumFrames =
+ kNumFramesToProcess + kNumFramesToProcessAtWarmup;
+
+void RunStandaloneSubmodule(int sample_rate_hz, size_t num_channels) {
+ test::SimulatorBuffers buffers(sample_rate_hz, sample_rate_hz, sample_rate_hz,
+ sample_rate_hz, num_channels, num_channels,
+ num_channels, num_channels);
+ test::PerformanceTimer timer(kNumFramesToProcess);
+
+ LevelController level_controller;
+ level_controller.Initialize(sample_rate_hz);
+
+ for (size_t frame_no = 0; frame_no < kToTalNumFrames; ++frame_no) {
+ buffers.UpdateInputBuffers();
+
+ if (frame_no >= kNumFramesToProcessAtWarmup) {
+ timer.StartTimer();
+ }
+ level_controller.Process(buffers.capture_input_buffer.get());
+ if (frame_no >= kNumFramesToProcessAtWarmup) {
+ timer.StopTimer();
+ }
+ }
+ webrtc::test::PrintResultMeanAndError(
+ "level_controller_call_durations",
+ "_" + std::to_string(sample_rate_hz) + "Hz_" +
+ std::to_string(num_channels) + "_channels",
+ "StandaloneLevelControl", timer.GetDurationAverage(),
+ timer.GetDurationStandardDeviation(), "us", false);
+}
+
+void RunTogetherWithApm(const std::string& test_description,
+ int render_input_sample_rate_hz,
+ int render_output_sample_rate_hz,
+ int capture_input_sample_rate_hz,
+ int capture_output_sample_rate_hz,
+ size_t num_channels,
+ bool use_mobile_aec,
+ bool include_default_apm_processing) {
+ test::SimulatorBuffers buffers(
+ render_input_sample_rate_hz, capture_input_sample_rate_hz,
+ render_output_sample_rate_hz, capture_output_sample_rate_hz, num_channels,
+ num_channels, num_channels, num_channels);
+ test::PerformanceTimer render_timer(kNumFramesToProcess);
+ test::PerformanceTimer capture_timer(kNumFramesToProcess);
+ test::PerformanceTimer total_timer(kNumFramesToProcess);
+
+ webrtc::Config config;
+ AudioProcessing::Config apm_config;
+ if (include_default_apm_processing) {
+ config.Set<DelayAgnostic>(new DelayAgnostic(true));
+ config.Set<ExtendedFilter>(new ExtendedFilter(true));
+ }
+ apm_config.level_controller.enabled = true;
+ apm_config.residual_echo_detector.enabled = include_default_apm_processing;
+
+ std::unique_ptr<AudioProcessing> apm;
+ apm.reset(AudioProcessingBuilder().Create(config));
+ ASSERT_TRUE(apm.get());
+ apm->ApplyConfig(apm_config);
+
+ ASSERT_EQ(AudioProcessing::kNoError,
+ apm->gain_control()->Enable(include_default_apm_processing));
+ if (use_mobile_aec) {
+ ASSERT_EQ(AudioProcessing::kNoError,
+ apm->echo_cancellation()->Enable(false));
+ ASSERT_EQ(AudioProcessing::kNoError, apm->echo_control_mobile()->Enable(
+ include_default_apm_processing));
+ } else {
+ ASSERT_EQ(AudioProcessing::kNoError,
+ apm->echo_cancellation()->Enable(include_default_apm_processing));
+ ASSERT_EQ(AudioProcessing::kNoError,
+ apm->echo_control_mobile()->Enable(false));
+ }
+ apm_config.high_pass_filter.enabled = include_default_apm_processing;
+ ASSERT_EQ(AudioProcessing::kNoError,
+ apm->noise_suppression()->Enable(include_default_apm_processing));
+ ASSERT_EQ(AudioProcessing::kNoError,
+ apm->voice_detection()->Enable(include_default_apm_processing));
+ ASSERT_EQ(AudioProcessing::kNoError,
+ apm->level_estimator()->Enable(include_default_apm_processing));
+
+ StreamConfig render_input_config(render_input_sample_rate_hz, num_channels,
+ false);
+ StreamConfig render_output_config(render_output_sample_rate_hz, num_channels,
+ false);
+ StreamConfig capture_input_config(capture_input_sample_rate_hz, num_channels,
+ false);
+ StreamConfig capture_output_config(capture_output_sample_rate_hz,
+ num_channels, false);
+
+ for (size_t frame_no = 0; frame_no < kToTalNumFrames; ++frame_no) {
+ buffers.UpdateInputBuffers();
+
+ if (frame_no >= kNumFramesToProcessAtWarmup) {
+ total_timer.StartTimer();
+ render_timer.StartTimer();
+ }
+ ASSERT_EQ(AudioProcessing::kNoError,
+ apm->ProcessReverseStream(
+ &buffers.render_input[0], render_input_config,
+ render_output_config, &buffers.render_output[0]));
+
+ if (frame_no >= kNumFramesToProcessAtWarmup) {
+ render_timer.StopTimer();
+
+ capture_timer.StartTimer();
+ }
+
+ ASSERT_EQ(AudioProcessing::kNoError, apm->set_stream_delay_ms(0));
+ ASSERT_EQ(
+ AudioProcessing::kNoError,
+ apm->ProcessStream(&buffers.capture_input[0], capture_input_config,
+ capture_output_config, &buffers.capture_output[0]));
+
+ if (frame_no >= kNumFramesToProcessAtWarmup) {
+ capture_timer.StopTimer();
+ total_timer.StopTimer();
+ }
+ }
+
+ webrtc::test::PrintResultMeanAndError(
+ "level_controller_call_durations",
+ "_" + std::to_string(render_input_sample_rate_hz) + "_" +
+ std::to_string(render_output_sample_rate_hz) + "_" +
+ std::to_string(capture_input_sample_rate_hz) + "_" +
+ std::to_string(capture_output_sample_rate_hz) + "Hz_" +
+ std::to_string(num_channels) + "_channels" + "_render",
+ test_description, render_timer.GetDurationAverage(),
+ render_timer.GetDurationStandardDeviation(), "us", false);
+ webrtc::test::PrintResultMeanAndError(
+ "level_controller_call_durations",
+ "_" + std::to_string(render_input_sample_rate_hz) + "_" +
+ std::to_string(render_output_sample_rate_hz) + "_" +
+ std::to_string(capture_input_sample_rate_hz) + "_" +
+ std::to_string(capture_output_sample_rate_hz) + "Hz_" +
+ std::to_string(num_channels) + "_channels" + "_capture",
+ test_description, capture_timer.GetDurationAverage(),
+ capture_timer.GetDurationStandardDeviation(), "us", false);
+ webrtc::test::PrintResultMeanAndError(
+ "level_controller_call_durations",
+ "_" + std::to_string(render_input_sample_rate_hz) + "_" +
+ std::to_string(render_output_sample_rate_hz) + "_" +
+ std::to_string(capture_input_sample_rate_hz) + "_" +
+ std::to_string(capture_output_sample_rate_hz) + "Hz_" +
+ std::to_string(num_channels) + "_channels" + "_total",
+ test_description, total_timer.GetDurationAverage(),
+ total_timer.GetDurationStandardDeviation(), "us", false);
+}
+
+} // namespace
+
+// TODO(peah): Reactivate once issue 7712 has been resolved.
+TEST(LevelControllerPerformanceTest, DISABLED_StandaloneProcessing) {
+ int sample_rates_to_test[] = {
+ AudioProcessing::kSampleRate8kHz, AudioProcessing::kSampleRate16kHz,
+ AudioProcessing::kSampleRate32kHz, AudioProcessing::kSampleRate48kHz};
+ for (auto sample_rate : sample_rates_to_test) {
+ for (size_t num_channels = 1; num_channels <= 2; ++num_channels) {
+ RunStandaloneSubmodule(sample_rate, num_channels);
+ }
+ }
+}
+
+void TestSomeSampleRatesWithApm(const std::string& test_name,
+ bool use_mobile_agc,
+ bool include_default_apm_processing) {
+ // Test some stereo combinations first.
+ size_t num_channels = 2;
+ RunTogetherWithApm(test_name, 48000, 48000, AudioProcessing::kSampleRate16kHz,
+ AudioProcessing::kSampleRate32kHz, num_channels,
+ use_mobile_agc, include_default_apm_processing);
+ RunTogetherWithApm(test_name, 48000, 48000, AudioProcessing::kSampleRate48kHz,
+ AudioProcessing::kSampleRate8kHz, num_channels,
+ use_mobile_agc, include_default_apm_processing);
+ RunTogetherWithApm(test_name, 48000, 48000, 44100, 44100, num_channels,
+ use_mobile_agc, include_default_apm_processing);
+
+ // Then test mono combinations.
+ num_channels = 1;
+ RunTogetherWithApm(test_name, 48000, 48000, AudioProcessing::kSampleRate48kHz,
+ AudioProcessing::kSampleRate48kHz, num_channels,
+ use_mobile_agc, include_default_apm_processing);
+}
+
+// TODO(peah): Reactivate once issue 7712 has been resolved.
+#if !defined(WEBRTC_ANDROID)
+TEST(LevelControllerPerformanceTest, DISABLED_ProcessingViaApm) {
+#else
+TEST(LevelControllerPerformanceTest, DISABLED_ProcessingViaApm) {
+#endif
+ // Run without default APM processing and desktop AGC.
+ TestSomeSampleRatesWithApm("SimpleLevelControlViaApm", false, false);
+}
+
+// TODO(peah): Reactivate once issue 7712 has been resolved.
+#if !defined(WEBRTC_ANDROID)
+TEST(LevelControllerPerformanceTest, DISABLED_InteractionWithDefaultApm) {
+#else
+TEST(LevelControllerPerformanceTest, DISABLED_InteractionWithDefaultApm) {
+#endif
+ bool include_default_apm_processing = true;
+ TestSomeSampleRatesWithApm("LevelControlAndDefaultDesktopApm", false,
+ include_default_apm_processing);
+ TestSomeSampleRatesWithApm("LevelControlAndDefaultMobileApm", true,
+ include_default_apm_processing);
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/level_controller/level_controller_constants.h b/modules/audio_processing/level_controller/level_controller_constants.h
new file mode 100644
index 0000000..6cf2cd4
--- /dev/null
+++ b/modules/audio_processing/level_controller/level_controller_constants.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_CONSTANTS_H_
+#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_CONSTANTS_H_
+
+namespace webrtc {
+
+const float kMaxLcGain = 10;
+const float kMaxLcNoisePower = 100.f * 100.f;
+const float kTargetLcPeakLevel = 16384.f;
+const float kTargetLcPeakLeveldBFS = -6.0206f;
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_CONSTANTS_H_
diff --git a/modules/audio_processing/level_controller/level_controller_unittest.cc b/modules/audio_processing/level_controller/level_controller_unittest.cc
new file mode 100644
index 0000000..cb36ae0
--- /dev/null
+++ b/modules/audio_processing/level_controller/level_controller_unittest.cc
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <vector>
+
+#include "api/array_view.h"
+#include "api/optional.h"
+#include "modules/audio_processing/audio_buffer.h"
+#include "modules/audio_processing/include/audio_processing.h"
+#include "modules/audio_processing/level_controller/level_controller.h"
+#include "modules/audio_processing/test/audio_buffer_tools.h"
+#include "modules/audio_processing/test/bitexactness_tools.h"
+#include "test/gtest.h"
+
+namespace webrtc {
+namespace {
+
+const int kNumFramesToProcess = 1000;
+
+// Processes a specified amount of frames, verifies the results and reports
+// any errors.
+void RunBitexactnessTest(int sample_rate_hz,
+ size_t num_channels,
+ rtc::Optional<float> initial_peak_level_dbfs,
+ rtc::ArrayView<const float> output_reference) {
+ LevelController level_controller;
+ level_controller.Initialize(sample_rate_hz);
+ if (initial_peak_level_dbfs) {
+ AudioProcessing::Config::LevelController config;
+ config.initial_peak_level_dbfs = *initial_peak_level_dbfs;
+ level_controller.ApplyConfig(config);
+ }
+
+ int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100);
+ const StreamConfig capture_config(sample_rate_hz, num_channels, false);
+ AudioBuffer capture_buffer(
+ capture_config.num_frames(), capture_config.num_channels(),
+ capture_config.num_frames(), capture_config.num_channels(),
+ capture_config.num_frames());
+ test::InputAudioFile capture_file(
+ test::GetApmCaptureTestVectorFileName(sample_rate_hz));
+ std::vector<float> capture_input(samples_per_channel * num_channels);
+ for (size_t frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) {
+ ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels,
+ &capture_file, capture_input);
+
+ test::CopyVectorToAudioBuffer(capture_config, capture_input,
+ &capture_buffer);
+
+ level_controller.Process(&capture_buffer);
+ }
+
+ // Extract test results.
+ std::vector<float> capture_output;
+ test::ExtractVectorFromAudioBuffer(capture_config, &capture_buffer,
+ &capture_output);
+
+ // Compare the output with the reference. Only the first values of the output
+ // from last frame processed are compared in order not having to specify all
+ // preceding frames as testvectors. As the algorithm being tested has a
+ // memory, testing only the last frame implicitly also tests the preceeding
+ // frames.
+ const float kVectorElementErrorBound = 1.0f / 32768.0f;
+ EXPECT_TRUE(test::VerifyDeinterleavedArray(
+ capture_config.num_frames(), capture_config.num_channels(),
+ output_reference, capture_output, kVectorElementErrorBound));
+}
+
+} // namespace
+
+TEST(LevelControllerConfig, ToString) {
+ AudioProcessing::Config config;
+ config.level_controller.enabled = true;
+ config.level_controller.initial_peak_level_dbfs = -6.0206f;
+ EXPECT_EQ("{enabled: true, initial_peak_level_dbfs: -6.0206}",
+ LevelController::ToString(config.level_controller));
+
+ config.level_controller.enabled = false;
+ config.level_controller.initial_peak_level_dbfs = -50.f;
+ EXPECT_EQ("{enabled: false, initial_peak_level_dbfs: -50}",
+ LevelController::ToString(config.level_controller));
+}
+
+TEST(LevelControlBitExactnessTest, Mono8kHz) {
+ const float kOutputReference[] = {-0.013939f, -0.012154f, -0.009054f};
+ RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 1, rtc::nullopt,
+ kOutputReference);
+}
+
+TEST(LevelControlBitExactnessTest, Mono16kHz) {
+ const float kOutputReference[] = {-0.013706f, -0.013215f, -0.013018f};
+ RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 1, rtc::nullopt,
+ kOutputReference);
+}
+
+TEST(LevelControlBitExactnessTest, Mono32kHz) {
+ const float kOutputReference[] = {-0.014495f, -0.016425f, -0.016085f};
+ RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 1, rtc::nullopt,
+ kOutputReference);
+}
+
+// TODO(peah): Investigate why this particular testcase differ between Android
+// and the rest of the platforms.
+TEST(LevelControlBitExactnessTest, Mono48kHz) {
+#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \
+ defined(WEBRTC_ANDROID))
+ const float kOutputReference[] = {-0.014277f, -0.015180f, -0.017437f};
+#else
+ const float kOutputReference[] = {-0.014306f, -0.015209f, -0.017466f};
+#endif
+ RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 1, rtc::nullopt,
+ kOutputReference);
+}
+
+TEST(LevelControlBitExactnessTest, Stereo8kHz) {
+ const float kOutputReference[] = {-0.014063f, -0.008450f, -0.012159f,
+ -0.051967f, -0.023202f, -0.047858f};
+ RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 2, rtc::nullopt,
+ kOutputReference);
+}
+
+TEST(LevelControlBitExactnessTest, Stereo16kHz) {
+ const float kOutputReference[] = {-0.012714f, -0.005896f, -0.012220f,
+ -0.053306f, -0.024549f, -0.051527f};
+ RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 2, rtc::nullopt,
+ kOutputReference);
+}
+
+TEST(LevelControlBitExactnessTest, Stereo32kHz) {
+ const float kOutputReference[] = {-0.011764f, -0.007044f, -0.013472f,
+ -0.053537f, -0.026322f, -0.056253f};
+ RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 2, rtc::nullopt,
+ kOutputReference);
+}
+
+TEST(LevelControlBitExactnessTest, Stereo48kHz) {
+ const float kOutputReference[] = {-0.010643f, -0.006334f, -0.011377f,
+ -0.049088f, -0.023600f, -0.050465f};
+ RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, rtc::nullopt,
+ kOutputReference);
+}
+
+TEST(LevelControlBitExactnessTest, MonoInitial48kHz) {
+ const float kOutputReference[] = {-0.013884f, -0.014761f, -0.016951f};
+ RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 1, -50,
+ kOutputReference);
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/level_controller/noise_level_estimator.cc b/modules/audio_processing/level_controller/noise_level_estimator.cc
new file mode 100644
index 0000000..abf4ea2
--- /dev/null
+++ b/modules/audio_processing/level_controller/noise_level_estimator.cc
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/level_controller/noise_level_estimator.h"
+
+#include <algorithm>
+
+#include "modules/audio_processing/audio_buffer.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+
+namespace webrtc {
+
+NoiseLevelEstimator::NoiseLevelEstimator() {
+ Initialize(AudioProcessing::kSampleRate48kHz);
+}
+
+NoiseLevelEstimator::~NoiseLevelEstimator() {}
+
+void NoiseLevelEstimator::Initialize(int sample_rate_hz) {
+ noise_energy_ = 1.f;
+ first_update_ = true;
+ min_noise_energy_ = sample_rate_hz * 2.f * 2.f / 100.f;
+ noise_energy_hold_counter_ = 0;
+}
+
+float NoiseLevelEstimator::Analyze(SignalClassifier::SignalType signal_type,
+ float frame_energy) {
+ if (frame_energy <= 0.f) {
+ return noise_energy_;
+ }
+
+ if (first_update_) {
+ // Initialize the noise energy to the frame energy.
+ first_update_ = false;
+ return noise_energy_ = std::max(frame_energy, min_noise_energy_);
+ }
+
+ // Update the noise estimate in a minimum statistics-type manner.
+ if (signal_type == SignalClassifier::SignalType::kStationary) {
+ if (frame_energy > noise_energy_) {
+ // Leak the estimate upwards towards the frame energy if no recent
+ // downward update.
+ noise_energy_hold_counter_ = std::max(noise_energy_hold_counter_ - 1, 0);
+
+ if (noise_energy_hold_counter_ == 0) {
+ noise_energy_ = std::min(noise_energy_ * 1.01f, frame_energy);
+ }
+ } else {
+ // Update smoothly downwards with a limited maximum update magnitude.
+ noise_energy_ =
+ std::max(noise_energy_ * 0.9f,
+ noise_energy_ + 0.05f * (frame_energy - noise_energy_));
+ noise_energy_hold_counter_ = 1000;
+ }
+ } else {
+ // For a non-stationary signal, leak the estimate downwards in order to
+ // avoid estimate locking due to incorrect signal classification.
+ noise_energy_ = noise_energy_ * 0.99f;
+ }
+
+ // Ensure a minimum of the estimate.
+ return noise_energy_ = std::max(noise_energy_, min_noise_energy_);
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/level_controller/noise_level_estimator.h b/modules/audio_processing/level_controller/noise_level_estimator.h
new file mode 100644
index 0000000..94ef673
--- /dev/null
+++ b/modules/audio_processing/level_controller/noise_level_estimator.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_LEVEL_ESTIMATOR_H_
+#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_LEVEL_ESTIMATOR_H_
+
+#include "modules/audio_processing/level_controller/signal_classifier.h"
+#include "rtc_base/constructormagic.h"
+
+namespace webrtc {
+
+class NoiseLevelEstimator {
+ public:
+ NoiseLevelEstimator();
+ ~NoiseLevelEstimator();
+ void Initialize(int sample_rate_hz);
+ float Analyze(SignalClassifier::SignalType signal_type, float frame_energy);
+
+ private:
+ float min_noise_energy_ = 0.f;
+ bool first_update_;
+ float noise_energy_;
+ int noise_energy_hold_counter_;
+
+ RTC_DISALLOW_COPY_AND_ASSIGN(NoiseLevelEstimator);
+};
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_LEVEL_ESTIMATOR_H_
diff --git a/modules/audio_processing/level_controller/noise_spectrum_estimator.cc b/modules/audio_processing/level_controller/noise_spectrum_estimator.cc
new file mode 100644
index 0000000..6e921c2
--- /dev/null
+++ b/modules/audio_processing/level_controller/noise_spectrum_estimator.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/level_controller/noise_spectrum_estimator.h"
+
+#include <string.h>
+#include <algorithm>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/arraysize.h"
+
+namespace webrtc {
+namespace {
+constexpr float kMinNoisePower = 100.f;
+} // namespace
+
+NoiseSpectrumEstimator::NoiseSpectrumEstimator(ApmDataDumper* data_dumper)
+ : data_dumper_(data_dumper) {
+ Initialize();
+}
+
+void NoiseSpectrumEstimator::Initialize() {
+ std::fill(noise_spectrum_, noise_spectrum_ + arraysize(noise_spectrum_),
+ kMinNoisePower);
+}
+
+void NoiseSpectrumEstimator::Update(rtc::ArrayView<const float> spectrum,
+ bool first_update) {
+ RTC_DCHECK_EQ(65, spectrum.size());
+
+ if (first_update) {
+ // Initialize the noise spectral estimate with the signal spectrum.
+ std::copy(spectrum.data(), spectrum.data() + spectrum.size(),
+ noise_spectrum_);
+ } else {
+ // Smoothly update the noise spectral estimate towards the signal spectrum
+ // such that the magnitude of the updates are limited.
+ for (size_t k = 0; k < spectrum.size(); ++k) {
+ if (noise_spectrum_[k] < spectrum[k]) {
+ noise_spectrum_[k] = std::min(
+ 1.01f * noise_spectrum_[k],
+ noise_spectrum_[k] + 0.05f * (spectrum[k] - noise_spectrum_[k]));
+ } else {
+ noise_spectrum_[k] = std::max(
+ 0.99f * noise_spectrum_[k],
+ noise_spectrum_[k] + 0.05f * (spectrum[k] - noise_spectrum_[k]));
+ }
+ }
+ }
+
+ // Ensure that the noise spectal estimate does not become too low.
+ for (auto& v : noise_spectrum_) {
+ v = std::max(v, kMinNoisePower);
+ }
+
+ data_dumper_->DumpRaw("lc_noise_spectrum", 65, noise_spectrum_);
+ data_dumper_->DumpRaw("lc_signal_spectrum", spectrum);
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/level_controller/noise_spectrum_estimator.h b/modules/audio_processing/level_controller/noise_spectrum_estimator.h
new file mode 100644
index 0000000..f10933e
--- /dev/null
+++ b/modules/audio_processing/level_controller/noise_spectrum_estimator.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_SPECTRUM_ESTIMATOR_H_
+#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_SPECTRUM_ESTIMATOR_H_
+
+#include "api/array_view.h"
+#include "rtc_base/constructormagic.h"
+
+namespace webrtc {
+
+class ApmDataDumper;
+
+class NoiseSpectrumEstimator {
+ public:
+ explicit NoiseSpectrumEstimator(ApmDataDumper* data_dumper);
+ void Initialize();
+ void Update(rtc::ArrayView<const float> spectrum, bool first_update);
+
+ rtc::ArrayView<const float> GetNoiseSpectrum() const {
+ return rtc::ArrayView<const float>(noise_spectrum_);
+ }
+
+ private:
+ ApmDataDumper* data_dumper_;
+ float noise_spectrum_[65];
+
+ RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(NoiseSpectrumEstimator);
+};
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_SPECTRUM_ESTIMATOR_H_
diff --git a/modules/audio_processing/level_controller/peak_level_estimator.cc b/modules/audio_processing/level_controller/peak_level_estimator.cc
new file mode 100644
index 0000000..f602892
--- /dev/null
+++ b/modules/audio_processing/level_controller/peak_level_estimator.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/level_controller/peak_level_estimator.h"
+
+#include <algorithm>
+
+#include "common_audio/include/audio_util.h"
+#include "modules/audio_processing/audio_buffer.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+
+namespace webrtc {
+namespace {
+
+constexpr float kMinLevel = 30.f;
+
+} // namespace
+
+PeakLevelEstimator::PeakLevelEstimator(float initial_peak_level_dbfs) {
+ Initialize(initial_peak_level_dbfs);
+}
+
+PeakLevelEstimator::~PeakLevelEstimator() {}
+
+void PeakLevelEstimator::Initialize(float initial_peak_level_dbfs) {
+ RTC_DCHECK_LE(-100.f, initial_peak_level_dbfs);
+ RTC_DCHECK_GE(0.f, initial_peak_level_dbfs);
+
+ peak_level_ = std::max(DbfsToFloatS16(initial_peak_level_dbfs), kMinLevel);
+
+ hold_counter_ = 0;
+ initialization_phase_ = true;
+}
+
+float PeakLevelEstimator::Analyze(SignalClassifier::SignalType signal_type,
+ float frame_peak_level) {
+ if (frame_peak_level == 0) {
+ RTC_DCHECK_LE(kMinLevel, peak_level_);
+ return peak_level_;
+ }
+
+ if (peak_level_ < frame_peak_level) {
+ // Smoothly update the estimate upwards when the frame peak level is
+ // higher than the estimate.
+ peak_level_ += 0.1f * (frame_peak_level - peak_level_);
+ hold_counter_ = 100;
+ initialization_phase_ = false;
+ } else {
+ hold_counter_ = std::max(0, hold_counter_ - 1);
+
+ // When the signal is highly non-stationary, update the estimate slowly
+ // downwards if the estimate is lower than the frame peak level.
+ if ((signal_type == SignalClassifier::SignalType::kHighlyNonStationary &&
+ hold_counter_ == 0) ||
+ initialization_phase_) {
+ peak_level_ =
+ std::max(peak_level_ + 0.01f * (frame_peak_level - peak_level_),
+ peak_level_ * 0.995f);
+ }
+ }
+
+ peak_level_ = std::max(peak_level_, kMinLevel);
+
+ return peak_level_;
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/level_controller/peak_level_estimator.h b/modules/audio_processing/level_controller/peak_level_estimator.h
new file mode 100644
index 0000000..0aa55d2
--- /dev/null
+++ b/modules/audio_processing/level_controller/peak_level_estimator.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_PEAK_LEVEL_ESTIMATOR_H_
+#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_PEAK_LEVEL_ESTIMATOR_H_
+
+#include "modules/audio_processing/level_controller/level_controller_constants.h"
+#include "modules/audio_processing/level_controller/signal_classifier.h"
+#include "rtc_base/constructormagic.h"
+
+namespace webrtc {
+
+class PeakLevelEstimator {
+ public:
+ explicit PeakLevelEstimator(float initial_peak_level_dbfs);
+ ~PeakLevelEstimator();
+ void Initialize(float initial_peak_level_dbfs);
+ float Analyze(SignalClassifier::SignalType signal_type,
+ float frame_peak_level);
+ private:
+ float peak_level_;
+ int hold_counter_;
+ bool initialization_phase_;
+
+ RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(PeakLevelEstimator);
+};
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_PEAK_LEVEL_ESTIMATOR_H_
diff --git a/modules/audio_processing/level_controller/saturating_gain_estimator.cc b/modules/audio_processing/level_controller/saturating_gain_estimator.cc
new file mode 100644
index 0000000..60110c6
--- /dev/null
+++ b/modules/audio_processing/level_controller/saturating_gain_estimator.cc
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/level_controller/saturating_gain_estimator.h"
+
+#include <math.h>
+#include <algorithm>
+
+#include "modules/audio_processing/level_controller/level_controller_constants.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+
+namespace webrtc {
+
+SaturatingGainEstimator::SaturatingGainEstimator() {
+ Initialize();
+}
+
+SaturatingGainEstimator::~SaturatingGainEstimator() {}
+
+void SaturatingGainEstimator::Initialize() {
+ saturating_gain_ = kMaxLcGain;
+ saturating_gain_hold_counter_ = 0;
+}
+
+void SaturatingGainEstimator::Update(float gain, int num_saturations) {
+ bool too_many_saturations = (num_saturations > 2);
+
+ if (too_many_saturations) {
+ saturating_gain_ = 0.95f * gain;
+ saturating_gain_hold_counter_ = 1000;
+ } else {
+ saturating_gain_hold_counter_ =
+ std::max(0, saturating_gain_hold_counter_ - 1);
+ if (saturating_gain_hold_counter_ == 0) {
+ saturating_gain_ *= 1.001f;
+ saturating_gain_ = std::min(kMaxLcGain, saturating_gain_);
+ }
+ }
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/level_controller/saturating_gain_estimator.h b/modules/audio_processing/level_controller/saturating_gain_estimator.h
new file mode 100644
index 0000000..8980f4e
--- /dev/null
+++ b/modules/audio_processing/level_controller/saturating_gain_estimator.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SATURATING_GAIN_ESTIMATOR_H_
+#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SATURATING_GAIN_ESTIMATOR_H_
+
+#include "rtc_base/constructormagic.h"
+
+namespace webrtc {
+
+class ApmDataDumper;
+
+class SaturatingGainEstimator {
+ public:
+ SaturatingGainEstimator();
+ ~SaturatingGainEstimator();
+ void Initialize();
+ void Update(float gain, int num_saturations);
+ float GetGain() const { return saturating_gain_; }
+
+ private:
+ float saturating_gain_;
+ int saturating_gain_hold_counter_;
+
+ RTC_DISALLOW_COPY_AND_ASSIGN(SaturatingGainEstimator);
+};
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SATURATING_GAIN_ESTIMATOR_H_
diff --git a/modules/audio_processing/level_controller/signal_classifier.cc b/modules/audio_processing/level_controller/signal_classifier.cc
new file mode 100644
index 0000000..d2d5917
--- /dev/null
+++ b/modules/audio_processing/level_controller/signal_classifier.cc
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/level_controller/signal_classifier.h"
+
+#include <algorithm>
+#include <numeric>
+#include <vector>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/audio_buffer.h"
+#include "modules/audio_processing/level_controller/down_sampler.h"
+#include "modules/audio_processing/level_controller/noise_spectrum_estimator.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/constructormagic.h"
+
+namespace webrtc {
+namespace {
+
+void RemoveDcLevel(rtc::ArrayView<float> x) {
+ RTC_DCHECK_LT(0, x.size());
+ float mean = std::accumulate(x.data(), x.data() + x.size(), 0.f);
+ mean /= x.size();
+
+ for (float& v : x) {
+ v -= mean;
+ }
+}
+
+void PowerSpectrum(const OouraFft* ooura_fft,
+ rtc::ArrayView<const float> x,
+ rtc::ArrayView<float> spectrum) {
+ RTC_DCHECK_EQ(65, spectrum.size());
+ RTC_DCHECK_EQ(128, x.size());
+ float X[128];
+ std::copy(x.data(), x.data() + x.size(), X);
+ ooura_fft->Fft(X);
+
+ float* X_p = X;
+ RTC_DCHECK_EQ(X_p, &X[0]);
+ spectrum[0] = (*X_p) * (*X_p);
+ ++X_p;
+ RTC_DCHECK_EQ(X_p, &X[1]);
+ spectrum[64] = (*X_p) * (*X_p);
+ for (int k = 1; k < 64; ++k) {
+ ++X_p;
+ RTC_DCHECK_EQ(X_p, &X[2 * k]);
+ spectrum[k] = (*X_p) * (*X_p);
+ ++X_p;
+ RTC_DCHECK_EQ(X_p, &X[2 * k + 1]);
+ spectrum[k] += (*X_p) * (*X_p);
+ }
+}
+
+webrtc::SignalClassifier::SignalType ClassifySignal(
+ rtc::ArrayView<const float> signal_spectrum,
+ rtc::ArrayView<const float> noise_spectrum,
+ ApmDataDumper* data_dumper) {
+ int num_stationary_bands = 0;
+ int num_highly_nonstationary_bands = 0;
+
+ // Detect stationary and highly nonstationary bands.
+ for (size_t k = 1; k < 40; k++) {
+ if (signal_spectrum[k] < 3 * noise_spectrum[k] &&
+ signal_spectrum[k] * 3 > noise_spectrum[k]) {
+ ++num_stationary_bands;
+ } else if (signal_spectrum[k] > 9 * noise_spectrum[k]) {
+ ++num_highly_nonstationary_bands;
+ }
+ }
+
+ data_dumper->DumpRaw("lc_num_stationary_bands", 1, &num_stationary_bands);
+ data_dumper->DumpRaw("lc_num_highly_nonstationary_bands", 1,
+ &num_highly_nonstationary_bands);
+
+ // Use the detected number of bands to classify the overall signal
+ // stationarity.
+ if (num_stationary_bands > 15) {
+ return SignalClassifier::SignalType::kStationary;
+ } else if (num_highly_nonstationary_bands > 15) {
+ return SignalClassifier::SignalType::kHighlyNonStationary;
+ } else {
+ return SignalClassifier::SignalType::kNonStationary;
+ }
+}
+
+} // namespace
+
+SignalClassifier::FrameExtender::FrameExtender(size_t frame_size,
+ size_t extended_frame_size)
+ : x_old_(extended_frame_size - frame_size, 0.f) {}
+
+SignalClassifier::FrameExtender::~FrameExtender() = default;
+
+void SignalClassifier::FrameExtender::ExtendFrame(
+ rtc::ArrayView<const float> x,
+ rtc::ArrayView<float> x_extended) {
+ RTC_DCHECK_EQ(x_old_.size() + x.size(), x_extended.size());
+ std::copy(x_old_.data(), x_old_.data() + x_old_.size(), x_extended.data());
+ std::copy(x.data(), x.data() + x.size(), x_extended.data() + x_old_.size());
+ std::copy(x_extended.data() + x_extended.size() - x_old_.size(),
+ x_extended.data() + x_extended.size(), x_old_.data());
+}
+
+SignalClassifier::SignalClassifier(ApmDataDumper* data_dumper)
+ : data_dumper_(data_dumper),
+ down_sampler_(data_dumper_),
+ noise_spectrum_estimator_(data_dumper_) {
+ Initialize(AudioProcessing::kSampleRate48kHz);
+}
+SignalClassifier::~SignalClassifier() {}
+
+void SignalClassifier::Initialize(int sample_rate_hz) {
+ down_sampler_.Initialize(sample_rate_hz);
+ noise_spectrum_estimator_.Initialize();
+ frame_extender_.reset(new FrameExtender(80, 128));
+ sample_rate_hz_ = sample_rate_hz;
+ initialization_frames_left_ = 2;
+ consistent_classification_counter_ = 3;
+ last_signal_type_ = SignalClassifier::SignalType::kNonStationary;
+}
+
+void SignalClassifier::Analyze(const AudioBuffer& audio,
+ SignalType* signal_type) {
+ RTC_DCHECK_EQ(audio.num_frames(), sample_rate_hz_ / 100);
+
+ // Compute the signal power spectrum.
+ float downsampled_frame[80];
+ down_sampler_.DownSample(rtc::ArrayView<const float>(
+ audio.channels_const_f()[0], audio.num_frames()),
+ downsampled_frame);
+ float extended_frame[128];
+ frame_extender_->ExtendFrame(downsampled_frame, extended_frame);
+ RemoveDcLevel(extended_frame);
+ float signal_spectrum[65];
+ PowerSpectrum(&ooura_fft_, extended_frame, signal_spectrum);
+
+ // Classify the signal based on the estimate of the noise spectrum and the
+ // signal spectrum estimate.
+ *signal_type = ClassifySignal(signal_spectrum,
+ noise_spectrum_estimator_.GetNoiseSpectrum(),
+ data_dumper_);
+
+ // Update the noise spectrum based on the signal spectrum.
+ noise_spectrum_estimator_.Update(signal_spectrum,
+ initialization_frames_left_ > 0);
+
+ // Update the number of frames until a reliable signal spectrum is achieved.
+ initialization_frames_left_ = std::max(0, initialization_frames_left_ - 1);
+
+ if (last_signal_type_ == *signal_type) {
+ consistent_classification_counter_ =
+ std::max(0, consistent_classification_counter_ - 1);
+ } else {
+ last_signal_type_ = *signal_type;
+ consistent_classification_counter_ = 3;
+ }
+
+ if (consistent_classification_counter_ > 0) {
+ *signal_type = SignalClassifier::SignalType::kNonStationary;
+ }
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/level_controller/signal_classifier.h b/modules/audio_processing/level_controller/signal_classifier.h
new file mode 100644
index 0000000..2be13fe
--- /dev/null
+++ b/modules/audio_processing/level_controller/signal_classifier.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SIGNAL_CLASSIFIER_H_
+#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SIGNAL_CLASSIFIER_H_
+
+#include <memory>
+#include <vector>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/level_controller/down_sampler.h"
+#include "modules/audio_processing/level_controller/noise_spectrum_estimator.h"
+#include "modules/audio_processing/utility/ooura_fft.h"
+#include "rtc_base/constructormagic.h"
+
+namespace webrtc {
+
+class ApmDataDumper;
+class AudioBuffer;
+
+class SignalClassifier {
+ public:
+ enum class SignalType { kHighlyNonStationary, kNonStationary, kStationary };
+
+ explicit SignalClassifier(ApmDataDumper* data_dumper);
+ ~SignalClassifier();
+
+ void Initialize(int sample_rate_hz);
+ void Analyze(const AudioBuffer& audio, SignalType* signal_type);
+
+ private:
+ class FrameExtender {
+ public:
+ FrameExtender(size_t frame_size, size_t extended_frame_size);
+ ~FrameExtender();
+
+ void ExtendFrame(rtc::ArrayView<const float> x,
+ rtc::ArrayView<float> x_extended);
+
+ private:
+ std::vector<float> x_old_;
+
+ RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(FrameExtender);
+ };
+
+ ApmDataDumper* const data_dumper_;
+ DownSampler down_sampler_;
+ std::unique_ptr<FrameExtender> frame_extender_;
+ NoiseSpectrumEstimator noise_spectrum_estimator_;
+ int sample_rate_hz_;
+ int initialization_frames_left_;
+ int consistent_classification_counter_;
+ SignalType last_signal_type_;
+ const OouraFft ooura_fft_;
+ RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(SignalClassifier);
+};
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SIGNAL_CLASSIFIER_H_
diff --git a/modules/audio_processing/test/aec_dump_based_simulator.cc b/modules/audio_processing/test/aec_dump_based_simulator.cc
index 83e8531..6d0b07c 100644
--- a/modules/audio_processing/test/aec_dump_based_simulator.cc
+++ b/modules/audio_processing/test/aec_dump_based_simulator.cc
@@ -473,6 +473,10 @@
new RefinedAdaptiveFilter(*settings_.use_refined_adaptive_filter));
}
+ if (settings_.use_lc) {
+ apm_config.level_controller.enabled = *settings_.use_lc;
+ }
+
if (settings_.use_ed) {
apm_config.residual_echo_detector.enabled = *settings_.use_ed;
}
diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc
index b4c3525..82bffe4 100644
--- a/modules/audio_processing/test/audio_processing_simulator.cc
+++ b/modules/audio_processing/test/audio_processing_simulator.cc
@@ -328,6 +328,9 @@
if (settings_.use_aec3 && *settings_.use_aec3) {
echo_control_factory.reset(new EchoCanceller3Factory());
}
+ if (settings_.use_lc) {
+ apm_config.level_controller.enabled = *settings_.use_lc;
+ }
if (settings_.use_hpf) {
apm_config.high_pass_filter.enabled = *settings_.use_hpf;
}
diff --git a/modules/audio_processing/test/audio_processing_simulator.h b/modules/audio_processing/test/audio_processing_simulator.h
index a6bdb90..41a3f45 100644
--- a/modules/audio_processing/test/audio_processing_simulator.h
+++ b/modules/audio_processing/test/audio_processing_simulator.h
@@ -66,6 +66,7 @@
rtc::Optional<bool> use_extended_filter;
rtc::Optional<bool> use_drift_compensation;
rtc::Optional<bool> use_aec3;
+ rtc::Optional<bool> use_lc;
rtc::Optional<bool> use_experimental_agc;
rtc::Optional<int> aecm_routing_mode;
rtc::Optional<bool> use_aecm_comfort_noise;
diff --git a/modules/audio_processing/test/audioproc_float.cc b/modules/audio_processing/test/audioproc_float.cc
index 554d6b4..c5229a4 100644
--- a/modules/audio_processing/test/audioproc_float.cc
+++ b/modules/audio_processing/test/audioproc_float.cc
@@ -121,6 +121,9 @@
DEFINE_int(aec3,
kParameterNotSpecifiedValue,
"Activate (1) or deactivate(0) the experimental AEC mode AEC3");
+DEFINE_int(lc,
+ kParameterNotSpecifiedValue,
+ "Activate (1) or deactivate(0) the level control");
DEFINE_int(experimental_agc,
kParameterNotSpecifiedValue,
"Activate (1) or deactivate(0) the experimental AGC");
@@ -258,6 +261,7 @@
&settings.use_refined_adaptive_filter);
SetSettingIfFlagSet(FLAG_aec3, &settings.use_aec3);
+ SetSettingIfFlagSet(FLAG_lc, &settings.use_lc);
SetSettingIfFlagSet(FLAG_experimental_agc, &settings.use_experimental_agc);
SetSettingIfSpecified(FLAG_aecm_routing_mode, &settings.aecm_routing_mode);
SetSettingIfFlagSet(FLAG_aecm_comfort_noise,
diff --git a/modules/audio_processing/test/debug_dump_test.cc b/modules/audio_processing/test/debug_dump_test.cc
index 4d3be48..56f47b0 100644
--- a/modules/audio_processing/test/debug_dump_test.cc
+++ b/modules/audio_processing/test/debug_dump_test.cc
@@ -484,6 +484,31 @@
}
}
+TEST_F(DebugDumpTest, VerifyLevelControllerExperimentalString) {
+ Config config;
+ AudioProcessing::Config apm_config;
+ apm_config.level_controller.enabled = true;
+ DebugDumpGenerator generator(config, apm_config);
+ generator.StartRecording();
+ generator.Process(100);
+ generator.StopRecording();
+
+ DebugDumpReplayer debug_dump_replayer_;
+
+ ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name()));
+
+ while (const rtc::Optional<audioproc::Event> event =
+ debug_dump_replayer_.GetNextEvent()) {
+ debug_dump_replayer_.RunNextEvent();
+ if (event->type() == audioproc::Event::CONFIG) {
+ const audioproc::Config* msg = &event->config();
+ ASSERT_TRUE(msg->has_experiments_description());
+ EXPECT_PRED_FORMAT2(testing::IsSubstring, "LevelController",
+ msg->experiments_description().c_str());
+ }
+ }
+}
+
TEST_F(DebugDumpTest, VerifyAgcClippingLevelExperimentalString) {
Config config;
// Arbitrarily set clipping gain to 17, which will never be the default.