Merge audio_processing changes.
R=aluebs@webrtc.org, bjornv@webrtc.org
BUG=
Review URL: https://webrtc-codereview.appspot.com/32769004
git-svn-id: http://webrtc.googlecode.com/svn/trunk@7893 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index ba22f33..3ce84fb 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -15,6 +15,8 @@
#include "webrtc/base/platform_file.h"
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"
+#include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/modules/audio_processing/channel_buffer.h"
#include "webrtc/modules/audio_processing/common.h"
@@ -54,6 +56,85 @@
// Throughout webrtc, it's assumed that success is represented by zero.
COMPILE_ASSERT(AudioProcessing::kNoError == 0, no_error_must_be_zero);
+// This class has two main functionalities:
+//
+// 1) It is returned instead of the real GainControl after the new AGC has been
+// enabled in order to prevent an outside user from overriding compression
+// settings. It doesn't do anything in its implementation, except for
+// delegating the const methods and Enable calls to the real GainControl, so
+// AGC can still be disabled.
+//
+// 2) It is injected into AgcManagerDirect and implements volume callbacks for
+// getting and setting the volume level. It just caches this value to be used
+// in VoiceEngine later.
+class GainControlForNewAgc : public GainControl, public VolumeCallbacks {
+ public:
+ explicit GainControlForNewAgc(GainControlImpl* gain_control)
+ : real_gain_control_(gain_control),
+ volume_(0) {
+ }
+
+ // GainControl implementation.
+ virtual int Enable(bool enable) OVERRIDE {
+ return real_gain_control_->Enable(enable);
+ }
+ virtual bool is_enabled() const OVERRIDE {
+ return real_gain_control_->is_enabled();
+ }
+ virtual int set_stream_analog_level(int level) OVERRIDE {
+ volume_ = level;
+ return AudioProcessing::kNoError;
+ }
+ virtual int stream_analog_level() OVERRIDE {
+ return volume_;
+ }
+ virtual int set_mode(Mode mode) OVERRIDE { return AudioProcessing::kNoError; }
+ virtual Mode mode() const OVERRIDE { return GainControl::kAdaptiveAnalog; }
+ virtual int set_target_level_dbfs(int level) OVERRIDE {
+ return AudioProcessing::kNoError;
+ }
+ virtual int target_level_dbfs() const OVERRIDE {
+ return real_gain_control_->target_level_dbfs();
+ }
+ virtual int set_compression_gain_db(int gain) OVERRIDE {
+ return AudioProcessing::kNoError;
+ }
+ virtual int compression_gain_db() const OVERRIDE {
+ return real_gain_control_->compression_gain_db();
+ }
+ virtual int enable_limiter(bool enable) OVERRIDE {
+ return AudioProcessing::kNoError;
+ }
+ virtual bool is_limiter_enabled() const OVERRIDE {
+ return real_gain_control_->is_limiter_enabled();
+ }
+ virtual int set_analog_level_limits(int minimum,
+ int maximum) OVERRIDE {
+ return AudioProcessing::kNoError;
+ }
+ virtual int analog_level_minimum() const OVERRIDE {
+ return real_gain_control_->analog_level_minimum();
+ }
+ virtual int analog_level_maximum() const OVERRIDE {
+ return real_gain_control_->analog_level_maximum();
+ }
+ virtual bool stream_is_saturated() const OVERRIDE {
+ return real_gain_control_->stream_is_saturated();
+ }
+
+ // VolumeCallbacks implementation.
+ virtual void SetMicVolume(int volume) OVERRIDE {
+ volume_ = volume;
+ }
+ virtual int GetMicVolume() OVERRIDE {
+ return volume_;
+ }
+
+ private:
+ GainControl* real_gain_control_;
+ int volume_;
+};
+
AudioProcessing* AudioProcessing::Create(int id) {
return Create();
}
@@ -96,7 +177,13 @@
delay_offset_ms_(0),
was_stream_delay_set_(false),
output_will_be_muted_(false),
- key_pressed_(false) {
+ key_pressed_(false),
+#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
+ use_new_agc_(false),
+#else
+ use_new_agc_(config.Get<ExperimentalAgc>().enabled),
+#endif
+ transient_suppressor_enabled_(config.Get<ExperimentalNs>().enabled) {
echo_cancellation_ = new EchoCancellationImpl(this, crit_);
component_list_.push_back(echo_cancellation_);
@@ -118,12 +205,18 @@
voice_detection_ = new VoiceDetectionImpl(this, crit_);
component_list_.push_back(voice_detection_);
+ gain_control_for_new_agc_.reset(new GainControlForNewAgc(gain_control_));
+
SetExtraOptions(config);
}
AudioProcessingImpl::~AudioProcessingImpl() {
{
CriticalSectionScoped crit_scoped(crit_);
+ // Depends on gain_control_ and gain_control_for_new_agc_.
+ agc_manager_.reset();
+ // Depends on gain_control_.
+ gain_control_for_new_agc_.reset();
while (!component_list_.empty()) {
ProcessingComponent* component = component_list_.front();
component->Destroy();
@@ -192,6 +285,16 @@
}
}
+ int err = InitializeExperimentalAgc();
+ if (err != kNoError) {
+ return err;
+ }
+
+ err = InitializeTransient();
+ if (err != kNoError) {
+ return err;
+ }
+
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_file_->Open()) {
int err = WriteInitMessage();
@@ -303,6 +406,11 @@
std::list<ProcessingComponent*>::iterator it;
for (it = component_list_.begin(); it != component_list_.end(); ++it)
(*it)->SetExtraOptions(config);
+
+ if (transient_suppressor_enabled_ != config.Get<ExperimentalNs>().enabled) {
+ transient_suppressor_enabled_ = config.Get<ExperimentalNs>().enabled;
+ InitializeTransient();
+ }
}
int AudioProcessingImpl::input_sample_rate_hz() const {
@@ -337,6 +445,10 @@
void AudioProcessingImpl::set_output_will_be_muted(bool muted) {
output_will_be_muted_ = muted;
+ CriticalSectionScoped lock(crit_);
+ if (agc_manager_.get()) {
+ agc_manager_->SetCaptureMuted(output_will_be_muted_);
+ }
}
bool AudioProcessingImpl::output_will_be_muted() const {
@@ -470,6 +582,12 @@
#endif
AudioBuffer* ca = capture_audio_.get(); // For brevity.
+ if (use_new_agc_ && gain_control_->is_enabled()) {
+ agc_manager_->AnalyzePreProcess(ca->data(0),
+ ca->num_channels(),
+ fwd_proc_format_.samples_per_channel());
+ }
+
bool data_processed = is_data_processed();
if (analysis_needed(data_processed)) {
ca->SplitIntoFrequencyBands();
@@ -486,12 +604,35 @@
RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(ca));
RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(ca));
RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(ca));
+
+ if (use_new_agc_ && gain_control_->is_enabled()) {
+ agc_manager_->Process(ca->split_bands_const(0)[kBand0To8kHz],
+ ca->samples_per_split_channel(),
+ split_rate_);
+ }
RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(ca));
if (synthesis_needed(data_processed)) {
ca->MergeFrequencyBands();
}
+ // TODO(aluebs): Investigate if the transient suppression placement should be
+ // before or after the AGC.
+ if (transient_suppressor_enabled_) {
+ float voice_probability =
+ agc_manager_.get() ? agc_manager_->voice_probability() : 1.f;
+
+ transient_suppressor_->Suppress(ca->data_f(0),
+ ca->samples_per_channel(),
+ ca->num_channels(),
+ ca->split_bands_const_f(0)[kBand0To8kHz],
+ ca->samples_per_split_channel(),
+ ca->keyboard_data(),
+ ca->samples_per_keyboard_channel(),
+ voice_probability,
+ key_pressed_);
+ }
+
// The level estimator operates on the recombined data.
RETURN_ON_ERR(level_estimator_->ProcessStream(ca));
@@ -586,7 +727,9 @@
RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra));
RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra));
- RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));
+ if (!use_new_agc_) {
+ RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));
+ }
return kNoError;
}
@@ -728,6 +871,9 @@
}
GainControl* AudioProcessingImpl::gain_control() const {
+ if (use_new_agc_) {
+ return gain_control_for_new_agc_.get();
+ }
return gain_control_;
}
@@ -775,7 +921,7 @@
bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const {
// Check if we've upmixed or downmixed the audio.
return ((fwd_out_format_.num_channels() != fwd_in_format_.num_channels()) ||
- is_data_processed);
+ is_data_processed || transient_suppressor_enabled_);
}
bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const {
@@ -784,7 +930,8 @@
}
bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const {
- if (!is_data_processed && !voice_detection_->is_enabled()) {
+ if (!is_data_processed && !voice_detection_->is_enabled() &&
+ !transient_suppressor_enabled_) {
// Only level_estimator_ is enabled.
return false;
} else if (fwd_proc_format_.rate() == kSampleRate32kHz ||
@@ -795,6 +942,30 @@
return false;
}
+int AudioProcessingImpl::InitializeExperimentalAgc() {
+ if (use_new_agc_) {
+ if (!agc_manager_.get()) {
+ agc_manager_.reset(
+ new AgcManagerDirect(gain_control_, gain_control_for_new_agc_.get()));
+ }
+ agc_manager_->Initialize();
+ agc_manager_->SetCaptureMuted(output_will_be_muted_);
+ }
+ return kNoError;
+}
+
+int AudioProcessingImpl::InitializeTransient() {
+ if (transient_suppressor_enabled_) {
+ if (!transient_suppressor_.get()) {
+ transient_suppressor_.reset(new TransientSuppressor());
+ }
+ transient_suppressor_->Initialize(fwd_proc_format_.rate(),
+ split_rate_,
+ fwd_out_format_.num_channels());
+ }
+ return kNoError;
+}
+
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
int AudioProcessingImpl::WriteMessageToDebugFile() {
int32_t size = event_msg_->ByteSize();