Only adapt AGC when the desired signal is present
Take the 50% quantile of the mask and compare it to certain threshold to determine if the desired signal is present. A hold is applied to avoid fast switching between states.
is_signal_present_ has been plotted and looks as expected. The AGC adaptation sounds promising, specially for the cases when the speaker fades in and out from the beam direction.
R=andrew@webrtc.org
Review URL: https://webrtc-codereview.appspot.com/28329005
git-svn-id: http://webrtc.googlecode.com/svn/trunk@8078 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index 5d7de3a..2fa6f28 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -137,11 +137,16 @@
AudioProcessing* AudioProcessing::Create() {
Config config;
- return Create(config);
+ return Create(config, nullptr);
}
AudioProcessing* AudioProcessing::Create(const Config& config) {
- AudioProcessingImpl* apm = new AudioProcessingImpl(config);
+ return Create(config, nullptr);
+}
+
+AudioProcessing* AudioProcessing::Create(const Config& config,
+ Beamformer* beamformer) {
+ AudioProcessingImpl* apm = new AudioProcessingImpl(config, beamformer);
if (apm->Initialize() != kNoError) {
delete apm;
apm = NULL;
@@ -151,6 +156,10 @@
}
AudioProcessingImpl::AudioProcessingImpl(const Config& config)
+ : AudioProcessingImpl(config, nullptr) {}
+
+AudioProcessingImpl::AudioProcessingImpl(const Config& config,
+ Beamformer* beamformer)
: echo_cancellation_(NULL),
echo_control_mobile_(NULL),
gain_control_(NULL),
@@ -181,6 +190,7 @@
#endif
transient_suppressor_enabled_(config.Get<ExperimentalNs>().enabled),
beamformer_enabled_(config.Get<Beamforming>().enabled),
+ beamformer_(beamformer),
array_geometry_(config.Get<Beamforming>().array_geometry) {
echo_cancellation_ = new EchoCancellationImpl(this, crit_);
component_list_.push_back(echo_cancellation_);
@@ -330,6 +340,11 @@
num_reverse_channels > 2 || num_reverse_channels < 1) {
return kBadNumberChannelsError;
}
+ if (beamformer_enabled_ &&
+ (static_cast<size_t>(num_input_channels) != array_geometry_.size() ||
+ num_output_channels > 1)) {
+ return kBadNumberChannelsError;
+ }
fwd_in_format_.set(input_sample_rate_hz, num_input_channels);
fwd_out_format_.set(output_sample_rate_hz, num_output_channels);
@@ -395,11 +410,6 @@
num_reverse_channels == rev_in_format_.num_channels()) {
return kNoError;
}
- if (beamformer_enabled_ &&
- (static_cast<size_t>(num_input_channels) != array_geometry_.size() ||
- num_output_channels > 1)) {
- return kBadNumberChannelsError;
- }
return InitializeLocked(input_sample_rate_hz,
output_sample_rate_hz,
reverse_sample_rate_hz,
@@ -622,7 +632,9 @@
RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(ca));
RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(ca));
- if (use_new_agc_ && gain_control_->is_enabled()) {
+ if (use_new_agc_ &&
+ gain_control_->is_enabled() &&
+ (!beamformer_enabled_ || beamformer_->is_target_present())) {
agc_manager_->Process(ca->split_bands_const(0)[kBand0To8kHz],
ca->samples_per_split_channel(),
split_rate_);
@@ -990,9 +1002,10 @@
void AudioProcessingImpl::InitializeBeamformer() {
if (beamformer_enabled_) {
#ifdef WEBRTC_BEAMFORMER
- beamformer_.reset(new Beamformer(kChunkSizeMs,
- split_rate_,
- array_geometry_));
+ if (!beamformer_) {
+ beamformer_.reset(new Beamformer(array_geometry_));
+ }
+ beamformer_->Initialize(kChunkSizeMs, split_rate_);
#else
assert(false);
#endif
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h
index 08de122..65437fe 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.h
+++ b/webrtc/modules/audio_processing/audio_processing_impl.h
@@ -86,6 +86,8 @@
class AudioProcessingImpl : public AudioProcessing {
public:
explicit AudioProcessingImpl(const Config& config);
+ // Only for testing.
+ AudioProcessingImpl(const Config& config, Beamformer* beamformer);
virtual ~AudioProcessingImpl();
// AudioProcessing methods.
diff --git a/webrtc/modules/audio_processing/beamformer/beamformer.cc b/webrtc/modules/audio_processing/beamformer/beamformer.cc
index f41462e..d76fa68 100644
--- a/webrtc/modules/audio_processing/beamformer/beamformer.cc
+++ b/webrtc/modules/audio_processing/beamformer/beamformer.cc
@@ -27,7 +27,6 @@
// The minimum value a postprocessing mask can take.
const float kMaskMinimum = 0.01f;
-const int kFftSize = 256;
const float kSpeedOfSoundMeterSeconds = 340;
// For both target and interf angles, 0 is perpendicular to the microphone
@@ -47,8 +46,6 @@
// Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)
const float kBalance = 0.2f;
-const int kNumFreqBins = kFftSize / 2 + 1;
-
// TODO(claguna): need comment here.
const float kBeamwidthConstant = 0.00001f;
@@ -61,10 +58,6 @@
// that our covariance matrices are positive semidefinite.
const float kCovUniformGapHalfWidth = 0.001f;
-// How many blocks of past masks (including the current block) we save. Saved
-// masks are used for postprocessing such as removing musical noise.
-const int kNumberSavedPostfilterMasks = 2;
-
// Lower bound on gain decay.
const float kHalfLifeSeconds = 0.05f;
@@ -72,9 +65,15 @@
const int kMidFrequnecyLowerBoundHz = 250;
const int kMidFrequencyUpperBoundHz = 400;
-const int kHighFrequnecyLowerBoundHz = 4000;
+const int kHighFrequencyLowerBoundHz = 4000;
const int kHighFrequencyUpperBoundHz = 7000;
+// Mask threshold over which the data is considered signal and not interference.
+const float kMaskTargetThreshold = 0.3f;
+// Time in seconds after which the data is considered interference if the mask
+// does not pass |kMaskTargetThreshold|.
+const float kHoldTargetSeconds = 0.25f;
+
// Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is
// used; to accomplish this, we compute both multiplications in the same loop.
float Norm(const ComplexMatrix<float>& mat,
@@ -126,46 +125,45 @@
} // namespace
-Beamformer::Beamformer(int chunk_size_ms,
- int sample_rate_hz,
- const std::vector<Point>& array_geometry)
- : chunk_length_(sample_rate_hz / (1000.f / chunk_size_ms)),
- window_(new float[kFftSize]),
- num_input_channels_(array_geometry.size()),
- sample_rate_hz_(sample_rate_hz),
- mic_spacing_(MicSpacingFromGeometry(array_geometry)),
- decay_threshold_(
- pow(2, (kFftSize / -2.f) / (sample_rate_hz_ * kHalfLifeSeconds))),
- mid_frequency_lower_bin_bound_(
- Round(kMidFrequnecyLowerBoundHz * kFftSize / sample_rate_hz_)),
- mid_frequency_upper_bin_bound_(
- Round(kMidFrequencyUpperBoundHz * kFftSize / sample_rate_hz_)),
- high_frequency_lower_bin_bound_(
- Round(kHighFrequnecyLowerBoundHz * kFftSize / sample_rate_hz_)),
- high_frequency_upper_bin_bound_(
- Round(kHighFrequencyUpperBoundHz * kFftSize / sample_rate_hz_)),
- current_block_ix_(0),
- previous_block_ix_(-1),
- postfilter_masks_(new MatrixF[kNumberSavedPostfilterMasks]),
- delay_sum_masks_(new ComplexMatrixF[kNumFreqBins]),
- target_cov_mats_(new ComplexMatrixF[kNumFreqBins]),
- interf_cov_mats_(new ComplexMatrixF[kNumFreqBins]),
- reflected_interf_cov_mats_(new ComplexMatrixF[kNumFreqBins]),
- mask_thresholds_(new float[kNumFreqBins]),
- wave_numbers_(new float[kNumFreqBins]),
- rxiws_(new float[kNumFreqBins]),
- rpsiws_(new float[kNumFreqBins]),
- reflected_rpsiws_(new float[kNumFreqBins]) {
+Beamformer::Beamformer(const std::vector<Point>& array_geometry)
+ : num_input_channels_(array_geometry.size()),
+ mic_spacing_(MicSpacingFromGeometry(array_geometry)) {
+
+ WindowGenerator::KaiserBesselDerived(kAlpha, kFftSize, window_);
+
+ for (int i = 0; i < kNumberSavedPostfilterMasks; ++i) {
+ postfilter_masks_[i].Resize(1, kNumFreqBins);
+ }
+}
+
+void Beamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
+ chunk_length_ = sample_rate_hz / (1000.f / chunk_size_ms);
+ sample_rate_hz_ = sample_rate_hz;
+ decay_threshold_ =
+ pow(2, (kFftSize / -2.f) / (sample_rate_hz_ * kHalfLifeSeconds));
+ mid_frequency_lower_bin_bound_ =
+ Round(kMidFrequnecyLowerBoundHz * kFftSize / sample_rate_hz_);
+ mid_frequency_upper_bin_bound_ =
+ Round(kMidFrequencyUpperBoundHz * kFftSize / sample_rate_hz_);
+ high_frequency_lower_bin_bound_ =
+ Round(kHighFrequencyLowerBoundHz * kFftSize / sample_rate_hz_);
+ high_frequency_upper_bin_bound_ =
+ Round(kHighFrequencyUpperBoundHz * kFftSize / sample_rate_hz_);
+ current_block_ix_ = 0;
+ previous_block_ix_ = -1;
+ is_target_present_ = false;
+ hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
+ interference_blocks_count_ = hold_target_blocks_;
+
DCHECK_LE(mid_frequency_upper_bin_bound_, kNumFreqBins);
DCHECK_LT(mid_frequency_lower_bin_bound_, mid_frequency_upper_bin_bound_);
DCHECK_LE(high_frequency_upper_bin_bound_, kNumFreqBins);
DCHECK_LT(high_frequency_lower_bin_bound_, high_frequency_upper_bin_bound_);
- WindowGenerator::KaiserBesselDerived(kAlpha, kFftSize, window_.get());
lapped_transform_.reset(new LappedTransform(num_input_channels_,
1,
chunk_length_,
- window_.get(),
+ window_,
kFftSize,
kFftSize / 2,
this));
@@ -196,9 +194,6 @@
reflected_rpsiws_[i] =
Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]);
}
- for (int i = 0; i < kNumberSavedPostfilterMasks; ++i) {
- postfilter_masks_[i].Resize(1, kNumFreqBins);
- }
}
void Beamformer::InitDelaySumMasks() {
@@ -379,6 +374,8 @@
mask_thresholds_[i]);
}
+ EstimateTargetPresence(mask_data, kNumFreqBins);
+
// Can't access block_index - 1 on the first block.
if (previous_block_ix_ >= 0) {
ApplyDecay();
@@ -490,4 +487,18 @@
return sqrt(mic_spacing);
}
+void Beamformer::EstimateTargetPresence(float* mask, int length) {
+ memcpy(sorted_mask_, mask, kNumFreqBins * sizeof(*mask));
+ const int median_ix = (length + 1) / 2;
+ std::nth_element(sorted_mask_,
+ sorted_mask_ + median_ix,
+ sorted_mask_ + length);
+ if (sorted_mask_[median_ix] > kMaskTargetThreshold) {
+ is_target_present_ = true;
+ interference_blocks_count_ = 0;
+ } else {
+ is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;
+ }
+}
+
} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/beamformer/beamformer.h b/webrtc/modules/audio_processing/beamformer/beamformer.h
index d50b684..427297b 100644
--- a/webrtc/modules/audio_processing/beamformer/beamformer.h
+++ b/webrtc/modules/audio_processing/beamformer/beamformer.h
@@ -29,22 +29,29 @@
public:
// At the moment it only accepts uniform linear microphone arrays. Using the
// first microphone as a reference position [0, 0, 0] is a natural choice.
- Beamformer(int chunk_size_ms,
- // Sample rate corresponds to the lower band.
- int sample_rate_hz,
- const std::vector<Point>& array_geometry);
+ explicit Beamformer(const std::vector<Point>& array_geometry);
+ virtual ~Beamformer() {};
+
+ // Sample rate corresponds to the lower band.
+ // Needs to be called before the Beamformer can be used.
+ virtual void Initialize(int chunk_size_ms, int sample_rate_hz);
// Process one time-domain chunk of audio. The audio can be separated into
// two signals by frequency, with the higher half passed in as the second
// parameter. Use NULL for |high_pass_split_input| if you only have one
// audio signal. The number of frames and channels must correspond to the
// ctor parameters. The same signal can be passed in as |input| and |output|.
- void ProcessChunk(const float* const* input,
- const float* const* high_pass_split_input,
- int num_input_channels,
- int num_frames_per_band,
- float* const* output,
- float* const* high_pass_split_output);
+ virtual void ProcessChunk(const float* const* input,
+ const float* const* high_pass_split_input,
+ int num_input_channels,
+ int num_frames_per_band,
+ float* const* output,
+ float* const* high_pass_split_output);
+ // After processing each block |is_target_present_| is set to true if the
+ // target signal es present and to false otherwise. This methods can be called
+ // to know if the data is target signal or interference and process it
+ // accordingly.
+ virtual bool is_target_present() { return is_target_present_; }
protected:
// Process one frequency-domain block of audio. This is where the fun
@@ -53,7 +60,7 @@
int num_input_channels,
int num_freq_bins,
int num_output_channels,
- complex<float>* const* output);
+ complex<float>* const* output) override;
private:
typedef Matrix<float> MatrixF;
@@ -93,23 +100,30 @@
void ApplyMasks(const complex_f* const* input, complex_f* const* output);
float MicSpacingFromGeometry(const std::vector<Point>& array_geometry);
+ void EstimateTargetPresence(float* mask, int length);
+
+ static const int kFftSize = 256;
+ static const int kNumFreqBins = kFftSize / 2 + 1;
+ // How many blocks of past masks (including the current block) we save. Saved
+ // masks are used for postprocessing such as removing musical noise.
+ static const int kNumberSavedPostfilterMasks = 2;
// Deals with the fft transform and blocking.
- const int chunk_length_;
+ int chunk_length_;
scoped_ptr<LappedTransform> lapped_transform_;
- scoped_ptr<float[]> window_;
+ float window_[kFftSize];
// Parameters exposed to the user.
const int num_input_channels_;
- const int sample_rate_hz_;
+ int sample_rate_hz_;
const float mic_spacing_;
// Calculated based on user-input and constants in the .cc file.
- const float decay_threshold_;
- const int mid_frequency_lower_bin_bound_;
- const int mid_frequency_upper_bin_bound_;
- const int high_frequency_lower_bin_bound_;
- const int high_frequency_upper_bin_bound_;
+ float decay_threshold_;
+ int mid_frequency_lower_bin_bound_;
+ int mid_frequency_upper_bin_bound_;
+ int high_frequency_lower_bin_bound_;
+ int high_frequency_upper_bin_bound_;
// Indices into |postfilter_masks_|.
int current_block_ix_;
@@ -117,29 +131,30 @@
// Old masks are saved in this ring buffer for smoothing. Array of length
// |kNumberSavedMasks| matrix of size 1 x |kNumFreqBins|.
- scoped_ptr<MatrixF[]> postfilter_masks_;
+ MatrixF postfilter_masks_[kNumberSavedPostfilterMasks];
+ float sorted_mask_[kNumFreqBins];
// Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|.
- scoped_ptr<ComplexMatrixF[]> delay_sum_masks_;
+ ComplexMatrixF delay_sum_masks_[kNumFreqBins];
// Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
// |num_input_channels_|.
- scoped_ptr<ComplexMatrixF[]> target_cov_mats_;
+ ComplexMatrixF target_cov_mats_[kNumFreqBins];
// Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
// |num_input_channels_|.
- scoped_ptr<ComplexMatrixF[]> interf_cov_mats_;
- scoped_ptr<ComplexMatrixF[]> reflected_interf_cov_mats_;
+ ComplexMatrixF interf_cov_mats_[kNumFreqBins];
+ ComplexMatrixF reflected_interf_cov_mats_[kNumFreqBins];
// Of length |kNumFreqBins|.
- scoped_ptr<float[]> mask_thresholds_;
- scoped_ptr<float[]> wave_numbers_;
+ float mask_thresholds_[kNumFreqBins];
+ float wave_numbers_[kNumFreqBins];
// Preallocated for ProcessAudioBlock()
// Of length |kNumFreqBins|.
- scoped_ptr<float[]> rxiws_;
- scoped_ptr<float[]> rpsiws_;
- scoped_ptr<float[]> reflected_rpsiws_;
+ float rxiws_[kNumFreqBins];
+ float rpsiws_[kNumFreqBins];
+ float reflected_rpsiws_[kNumFreqBins];
// The microphone normalization factor.
ComplexMatrixF eig_m_;
@@ -148,6 +163,14 @@
bool high_pass_exists_;
int num_blocks_in_this_chunk_;
float high_pass_postfilter_mask_;
+
+ // True when the target signal is present.
+ bool is_target_present_;
+ // Number of blocks after which the data is considered interference if the
+ // mask does not pass |kMaskSignalThreshold|.
+ int hold_target_blocks_;
+ // Number of blocks since the last mask that passed |kMaskSignalThreshold|.
+ int interference_blocks_count_;
};
} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/beamformer/beamformer_test.cc b/webrtc/modules/audio_processing/beamformer/beamformer_test.cc
index e20c3a9..74e8458 100644
--- a/webrtc/modules/audio_processing/beamformer/beamformer_test.cc
+++ b/webrtc/modules/audio_processing/beamformer/beamformer_test.cc
@@ -59,9 +59,8 @@
for (int i = 0; i < FLAGS_num_input_channels; ++i) {
array_geometry.push_back(webrtc::Point(i * FLAGS_mic_spacing, 0.f, 0.f));
}
- webrtc::Beamformer bf(kChunkTimeMilliseconds,
- FLAGS_sample_rate,
- array_geometry);
+ webrtc::Beamformer bf(array_geometry);
+ bf.Initialize(kChunkTimeMilliseconds, FLAGS_sample_rate);
while (true) {
size_t samples_read = webrtc::PcmReadToFloat(read_file,
kInputSamplesPerChunk,
diff --git a/webrtc/modules/audio_processing/beamformer/mock_beamformer.cc b/webrtc/modules/audio_processing/beamformer/mock_beamformer.cc
new file mode 100644
index 0000000..2319c32
--- /dev/null
+++ b/webrtc/modules/audio_processing/beamformer/mock_beamformer.cc
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/beamformer/mock_beamformer.h"
+
+#include <vector>
+
+namespace webrtc {
+
+MockBeamformer::MockBeamformer(const std::vector<Point>& array_geometry)
+ : Beamformer(array_geometry) {}
+
+MockBeamformer::~MockBeamformer() {}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/beamformer/mock_beamformer.h b/webrtc/modules/audio_processing/beamformer/mock_beamformer.h
new file mode 100644
index 0000000..2c04a12
--- /dev/null
+++ b/webrtc/modules/audio_processing/beamformer/mock_beamformer.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MOCK_BEAMFORMER_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MOCK_BEAMFORMER_H_
+
+#include <vector>
+
+#include "testing/gmock/include/gmock/gmock.h"
+#include "webrtc/modules/audio_processing/beamformer/beamformer.h"
+
+namespace webrtc {
+
+class MockBeamformer : public Beamformer {
+ public:
+ explicit MockBeamformer(const std::vector<Point>& array_geometry);
+ ~MockBeamformer() override;
+
+ MOCK_METHOD2(Initialize, void(int chunk_size_ms, int sample_rate_hz));
+ MOCK_METHOD6(ProcessChunk, void(const float* const* input,
+ const float* const* high_pass_split_input,
+ int num_input_channels,
+ int num_frames_per_band,
+ float* const* output,
+ float* const* high_pass_split_output));
+ MOCK_METHOD0(is_target_present, bool());
+};
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MOCK_BEAMFORMER_H_
diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h
index d23a9ae..6b761e1 100644
--- a/webrtc/modules/audio_processing/include/audio_processing.h
+++ b/webrtc/modules/audio_processing/include/audio_processing.h
@@ -24,6 +24,7 @@
namespace webrtc {
class AudioFrame;
+class Beamformer;
class EchoCancellation;
class EchoControlMobile;
class GainControl;
@@ -199,6 +200,8 @@
static AudioProcessing* Create();
// Allows passing in an optional configuration at create-time.
static AudioProcessing* Create(const Config& config);
+ // Only for testing.
+ static AudioProcessing* Create(const Config& config, Beamformer* beamformer);
virtual ~AudioProcessing() {}
// Initializes internal states, while retaining all user settings. This
diff --git a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc
index 217ffae..931169e 100644
--- a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc
+++ b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc
@@ -18,6 +18,7 @@
#include "webrtc/common_audio/resampler/include/push_resampler.h"
#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/beamformer/mock_beamformer.h"
#include "webrtc/modules/audio_processing/common.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
#include "webrtc/modules/audio_processing/test/test_utils.h"
@@ -278,6 +279,35 @@
fclose(file);
}
+// Reads a 10 ms chunk of int16 interleaved audio from the given (assumed
+// stereo) file, converts to deinterleaved float (optionally downmixing) and
+// returns the result in |cb|. Returns false if the file ended (or on error) and
+// true otherwise.
+//
+// |int_data| and |float_data| are just temporary space that must be
+// sufficiently large to hold the 10 ms chunk.
+bool ReadChunk(FILE* file, int16_t* int_data, float* float_data,
+ ChannelBuffer<float>* cb) {
+ // The files always contain stereo audio.
+ size_t frame_size = cb->samples_per_channel() * 2;
+ size_t read_count = fread(int_data, sizeof(int16_t), frame_size, file);
+ if (read_count != frame_size) {
+ // Check that the file really ended.
+ assert(feof(file));
+ return false; // This is expected.
+ }
+
+ S16ToFloat(int_data, frame_size, float_data);
+ if (cb->num_channels() == 1) {
+ MixStereoToMono(float_data, cb->data(), cb->samples_per_channel());
+ } else {
+ Deinterleave(float_data, cb->samples_per_channel(), 2,
+ cb->channels());
+ }
+
+ return true;
+}
+
class ApmTest : public ::testing::Test {
protected:
ApmTest();
@@ -1164,6 +1194,87 @@
}
}
+#if !defined(WEBRTC_ANDROID) && !defined(WEBRTC_IOS)
+TEST_F(ApmTest, AgcOnlyAdaptsWhenTargetSignalIsPresent) {
+ const int kSampleRateHz = 16000;
+ const int kSamplesPerChannel =
+ AudioProcessing::kChunkSizeMs * kSampleRateHz / 1000;
+ const int kNumInputChannels = 2;
+ const int kNumOutputChannels = 1;
+ const int kNumChunks = 700;
+ const float kScaleFactor = 0.25f;
+ Config config;
+ std::vector<webrtc::Point> geometry;
+ geometry.push_back(webrtc::Point(0.f, 0.f, 0.f));
+ geometry.push_back(webrtc::Point(0.05f, 0.f, 0.f));
+ config.Set<Beamforming>(new Beamforming(true, geometry));
+ testing::NiceMock<MockBeamformer>* beamformer =
+ new testing::NiceMock<MockBeamformer>(geometry);
+ scoped_ptr<AudioProcessing> apm(AudioProcessing::Create(config, beamformer));
+ EXPECT_EQ(kNoErr, apm->gain_control()->Enable(true));
+ ChannelBuffer<float> src_buf(kSamplesPerChannel, kNumInputChannels);
+ ChannelBuffer<float> dest_buf(kSamplesPerChannel, kNumOutputChannels);
+ const int max_length = kSamplesPerChannel * std::max(kNumInputChannels,
+ kNumOutputChannels);
+ scoped_ptr<int16_t[]> int_data(new int16_t[max_length]);
+ scoped_ptr<float[]> float_data(new float[max_length]);
+ std::string filename = ResourceFilePath("far", kSampleRateHz);
+ FILE* far_file = fopen(filename.c_str(), "rb");
+ ASSERT_TRUE(far_file != NULL) << "Could not open file " << filename << "\n";
+ const int kDefaultVolume = apm->gain_control()->stream_analog_level();
+ const int kDefaultCompressionGain =
+ apm->gain_control()->compression_gain_db();
+ bool is_target = false;
+ EXPECT_CALL(*beamformer, is_target_present())
+ .WillRepeatedly(testing::ReturnPointee(&is_target));
+ for (int i = 0; i < kNumChunks; ++i) {
+ ASSERT_TRUE(ReadChunk(far_file,
+ int_data.get(),
+ float_data.get(),
+ &src_buf));
+ for (int j = 0; j < kNumInputChannels * kSamplesPerChannel; ++j) {
+ src_buf.data()[j] *= kScaleFactor;
+ }
+ EXPECT_EQ(kNoErr,
+ apm->ProcessStream(src_buf.channels(),
+ src_buf.samples_per_channel(),
+ kSampleRateHz,
+ LayoutFromChannels(src_buf.num_channels()),
+ kSampleRateHz,
+ LayoutFromChannels(dest_buf.num_channels()),
+ dest_buf.channels()));
+ }
+ EXPECT_EQ(kDefaultVolume,
+ apm->gain_control()->stream_analog_level());
+ EXPECT_EQ(kDefaultCompressionGain,
+ apm->gain_control()->compression_gain_db());
+ rewind(far_file);
+ is_target = true;
+ for (int i = 0; i < kNumChunks; ++i) {
+ ASSERT_TRUE(ReadChunk(far_file,
+ int_data.get(),
+ float_data.get(),
+ &src_buf));
+ for (int j = 0; j < kNumInputChannels * kSamplesPerChannel; ++j) {
+ src_buf.data()[j] *= kScaleFactor;
+ }
+ EXPECT_EQ(kNoErr,
+ apm->ProcessStream(src_buf.channels(),
+ src_buf.samples_per_channel(),
+ kSampleRateHz,
+ LayoutFromChannels(src_buf.num_channels()),
+ kSampleRateHz,
+ LayoutFromChannels(dest_buf.num_channels()),
+ dest_buf.channels()));
+ }
+ EXPECT_LT(kDefaultVolume,
+ apm->gain_control()->stream_analog_level());
+ EXPECT_LT(kDefaultCompressionGain,
+ apm->gain_control()->compression_gain_db());
+ ASSERT_EQ(0, fclose(far_file));
+}
+#endif
+
TEST_F(ApmTest, NoiseSuppression) {
// Test valid suppression levels.
NoiseSuppression::Level level[] = {
@@ -2031,35 +2142,6 @@
}
}
-// Reads a 10 ms chunk of int16 interleaved audio from the given (assumed
-// stereo) file, converts to deinterleaved float (optionally downmixing) and
-// returns the result in |cb|. Returns false if the file ended (or on error) and
-// true otherwise.
-//
-// |int_data| and |float_data| are just temporary space that must be
-// sufficiently large to hold the 10 ms chunk.
-bool ReadChunk(FILE* file, int16_t* int_data, float* float_data,
- ChannelBuffer<float>* cb) {
- // The files always contain stereo audio.
- size_t frame_size = cb->samples_per_channel() * 2;
- size_t read_count = fread(int_data, sizeof(int16_t), frame_size, file);
- if (read_count != frame_size) {
- // Check that the file really ended.
- assert(feof(file));
- return false; // This is expected.
- }
-
- S16ToFloat(int_data, frame_size, float_data);
- if (cb->num_channels() == 1) {
- MixStereoToMono(float_data, cb->data(), cb->samples_per_channel());
- } else {
- Deinterleave(float_data, cb->samples_per_channel(), 2,
- cb->channels());
- }
-
- return true;
-}
-
// Compares the reference and test arrays over a region around the expected
// delay. Finds the highest SNR in that region and adds the variance and squared
// error results to the supplied accumulators.