Use VAD to get a better speech power estimation in the IntelligibilityEnhancer R=henrik.lundin@webrtc.org, turaj@webrtc.org Review URL: https://codereview.webrtc.org/1693823004 . Cr-Commit-Position: refs/heads/master@{#11713}

commit: 18fcbcf48c190b6248cd16ef044d1edb79cba040 [log] [tgz]
author: Alejandro Luebs <aluebs@webrtc.org> Mon Feb 22 15:57:38 2016 -0800
committer: Alejandro Luebs <aluebs@webrtc.org> Mon Feb 22 23:57:45 2016 +0000
tree: bdcd3fdaf96f36c531ae5bfbedcb6e23ce24423a
parent: 67b81f92f4bd16b1d640553339b729cb17b3827e [diff]
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index e155171..bb746ee 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc

@@ -1184,8 +1184,7 @@
 }
 
 bool AudioProcessingImpl::is_rev_processed() const {
-  return constants_.intelligibility_enabled &&
-         public_submodules_->intelligibility_enhancer->active();
+  return constants_.intelligibility_enabled;
 }
 
 bool AudioProcessingImpl::render_check_rev_conversion_needed() const {
@@ -1236,12 +1235,9 @@
 
 void AudioProcessingImpl::InitializeIntelligibility() {
   if (constants_.intelligibility_enabled) {
-    IntelligibilityEnhancer::Config config;
-    config.sample_rate_hz = capture_nonlocked_.split_rate;
-    config.num_capture_channels = capture_.capture_audio->num_channels();
-    config.num_render_channels = render_.render_audio->num_channels();
     public_submodules_->intelligibility_enhancer.reset(
-        new IntelligibilityEnhancer(config));
+        new IntelligibilityEnhancer(capture_nonlocked_.split_rate,
+                                    render_.render_audio->num_channels()));
   }
 }
 

diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
index f0050a2..8f0e7bf 100644
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

@@ -27,11 +27,16 @@
 const size_t kErbResolution = 2;
 const int kWindowSizeMs = 16;
 const int kChunkSizeMs = 10;  // Size provided by APM.
-const float kClipFreq = 200.0f;
-const float kConfigRho = 0.02f;  // Default production and interpretation SNR.
+const float kClipFreqKhz = 0.2f;
 const float kKbdAlpha = 1.5f;
 const float kLambdaBot = -1.0f;      // Extreme values in bisection
 const float kLambdaTop = -10e-18f;  // search for lamda.
+const float kVoiceProbabilityThreshold = 0.02f;
+// Number of chunks after voice activity which is still considered speech.
+const size_t kSpeechOffsetDelay = 80;
+const float kDecayRate = 0.98f;              // Power estimation decay rate.
+const float kMaxRelativeGainChange = 0.04f;  // Maximum relative change in gain.
+const float kRho = 0.0004f;  // Default production and interpretation SNR.
 
 // Returns dot product of vectors |a| and |b| with size |length|.
 float DotProduct(const float* a, const float* b, size_t length) {
@@ -72,61 +77,46 @@
   }
 }
 
-IntelligibilityEnhancer::IntelligibilityEnhancer()
-    : IntelligibilityEnhancer(IntelligibilityEnhancer::Config()) {
-}
-
-IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config)
+IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,
+                                                 size_t num_render_channels)
     : freqs_(RealFourier::ComplexLength(
-          RealFourier::FftOrder(config.sample_rate_hz * kWindowSizeMs / 1000))),
-      window_size_(static_cast<size_t>(1 << RealFourier::FftOrder(freqs_))),
-      chunk_length_(
-          static_cast<size_t>(config.sample_rate_hz * kChunkSizeMs / 1000)),
-      bank_size_(GetBankSize(config.sample_rate_hz, kErbResolution)),
-      sample_rate_hz_(config.sample_rate_hz),
-      erb_resolution_(kErbResolution),
-      num_capture_channels_(config.num_capture_channels),
-      num_render_channels_(config.num_render_channels),
-      analysis_rate_(config.analysis_rate),
-      active_(true),
-      clear_power_(freqs_, config.decay_rate),
-      noise_power_(freqs_, 0.f),
+          RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),
+      chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)),
+      bank_size_(GetBankSize(sample_rate_hz, kErbResolution)),
+      sample_rate_hz_(sample_rate_hz),
+      num_render_channels_(num_render_channels),
+      clear_power_estimator_(freqs_, kDecayRate),
+      noise_power_estimator_(
+          new intelligibility::PowerEstimator<float>(freqs_, kDecayRate)),
       filtered_clear_pow_(new float[bank_size_]),
       filtered_noise_pow_(new float[bank_size_]),
       center_freqs_(new float[bank_size_]),
       render_filter_bank_(CreateErbBank(freqs_)),
-      rho_(new float[bank_size_]),
       gains_eq_(new float[bank_size_]),
-      gain_applier_(freqs_, config.gain_change_limit),
+      gain_applier_(freqs_, kMaxRelativeGainChange),
       temp_render_out_buffer_(chunk_length_, num_render_channels_),
-      kbd_window_(new float[window_size_]),
       render_callback_(this),
-      block_count_(0),
-      analysis_step_(0) {
-  RTC_DCHECK_LE(config.rho, 1.0f);
+      audio_s16_(chunk_length_),
+      chunks_since_voice_(kSpeechOffsetDelay),
+      is_speech_(false) {
+  RTC_DCHECK_LE(kRho, 1.f);
 
-  memset(filtered_clear_pow_.get(),
-         0,
+  memset(filtered_clear_pow_.get(), 0,
          bank_size_ * sizeof(filtered_clear_pow_[0]));
-  memset(filtered_noise_pow_.get(),
-         0,
+  memset(filtered_noise_pow_.get(), 0,
          bank_size_ * sizeof(filtered_noise_pow_[0]));
 
-  // Assumes all rho equal.
-  for (size_t i = 0; i < bank_size_; ++i) {
-    rho_[i] = config.rho * config.rho;
-  }
+  const size_t erb_index = static_cast<size_t>(
+      ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +
+            43.f));
+  start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);
 
-  float freqs_khz = kClipFreq / 1000.0f;
-  size_t erb_index = static_cast<size_t>(ceilf(
-      11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f));
-  start_freq_ = std::max(static_cast<size_t>(1), erb_index * erb_resolution_);
-
-  WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_,
-                                       kbd_window_.get());
+  size_t window_size = static_cast<size_t>(1 << RealFourier::FftOrder(freqs_));
+  std::vector<float> kbd_window(window_size);
+  WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size, &kbd_window[0]);
   render_mangler_.reset(new LappedTransform(
-      num_render_channels_, num_render_channels_, chunk_length_,
-      kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_));
+      num_render_channels_, num_render_channels_, chunk_length_, &kbd_window[0],
+      window_size, window_size / 2, &render_callback_));
 }
 
 void IntelligibilityEnhancer::SetCaptureNoiseEstimate(
@@ -134,13 +124,10 @@
   if (capture_filter_bank_.size() != bank_size_ ||
       capture_filter_bank_[0].size() != noise.size()) {
     capture_filter_bank_ = CreateErbBank(noise.size());
+    noise_power_estimator_.reset(
+        new intelligibility::PowerEstimator<float>(noise.size(), kDecayRate));
   }
-  if (noise.size() != noise_power_.size()) {
-    noise_power_.resize(noise.size());
-  }
-  for (size_t i = 0; i < noise.size(); ++i) {
-    noise_power_[i] = noise[i] * noise[i];
-  }
+  noise_power_estimator_->Step(&noise[0]);
 }
 
 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio,
@@ -148,54 +135,29 @@
                                                  size_t num_channels) {
   RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz);
   RTC_CHECK_EQ(num_render_channels_, num_channels);
-
-  if (active_) {
-    render_mangler_->ProcessChunk(audio, temp_render_out_buffer_.channels());
-  }
-
-  if (active_) {
-    for (size_t i = 0; i < num_render_channels_; ++i) {
-      memcpy(audio[i], temp_render_out_buffer_.channels()[i],
-             chunk_length_ * sizeof(**audio));
-    }
+  is_speech_ = IsSpeech(audio[0]);
+  render_mangler_->ProcessChunk(audio, temp_render_out_buffer_.channels());
+  for (size_t i = 0; i < num_render_channels_; ++i) {
+    memcpy(audio[i], temp_render_out_buffer_.channels()[i],
+           chunk_length_ * sizeof(**audio));
   }
 }
 
 void IntelligibilityEnhancer::ProcessClearBlock(
     const std::complex<float>* in_block,
     std::complex<float>* out_block) {
-  if (block_count_ < 2) {
-    memset(out_block, 0, freqs_ * sizeof(*out_block));
-    ++block_count_;
-    return;
+  if (is_speech_) {
+    clear_power_estimator_.Step(in_block);
   }
-
-  // TODO(ekm): Use VAD to |Step| and |AnalyzeClearBlock| only if necessary.
-  if (true) {
-    clear_power_.Step(in_block);
-    if (block_count_ % analysis_rate_ == analysis_rate_ - 1) {
-      AnalyzeClearBlock();
-      ++analysis_step_;
-    }
-    ++block_count_;
-  }
-
-  if (active_) {
-    gain_applier_.Apply(in_block, out_block);
-  }
-}
-
-void IntelligibilityEnhancer::AnalyzeClearBlock() {
-  const float* clear_power = clear_power_.Power();
-  MapToErbBands(clear_power,
-                render_filter_bank_,
+  const std::vector<float>& clear_power = clear_power_estimator_.power();
+  const std::vector<float>& noise_power = noise_power_estimator_->power();
+  MapToErbBands(&clear_power[0], render_filter_bank_,
                 filtered_clear_pow_.get());
-  MapToErbBands(&noise_power_[0],
-                capture_filter_bank_,
+  MapToErbBands(&noise_power[0], capture_filter_bank_,
                 filtered_noise_pow_.get());
   SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get());
-  const float power_target = std::accumulate(
-          clear_power, clear_power + freqs_, 0.f);
+  const float power_target =
+      std::accumulate(&clear_power[0], &clear_power[0] + freqs_, 0.f);
   const float power_top =
       DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);
   SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get());
@@ -205,6 +167,7 @@
     SolveForLambda(power_target, power_bot, power_top);
     UpdateErbGains();
   }  // Else experiencing power underflow, so do nothing.
+  gain_applier_.Apply(in_block, out_block);
 }
 
 void IntelligibilityEnhancer::SolveForLambda(float power_target,
@@ -217,11 +180,10 @@
       1.f / (power_target + std::numeric_limits<float>::epsilon());
   float lambda_bot = kLambdaBot;
   float lambda_top = kLambdaTop;
-  float power_ratio = 2.0f;  // Ratio of achieved power to target power.
+  float power_ratio = 2.f;  // Ratio of achieved power to target power.
   int iters = 0;
-  while (std::fabs(power_ratio - 1.0f) > kConvergeThresh &&
-         iters <= kMaxIters) {
-    const float lambda = lambda_bot + (lambda_top - lambda_bot) / 2.0f;
+  while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) {
+    const float lambda = lambda_bot + (lambda_top - lambda_bot) / 2.f;
     SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get());
     const float power =
         DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);
@@ -239,7 +201,7 @@
   // (ERB gain) = filterbank' * (freq gain)
   float* gains = gain_applier_.target();
   for (size_t i = 0; i < freqs_; ++i) {
-    gains[i] = 0.0f;
+    gains[i] = 0.f;
     for (size_t j = 0; j < bank_size_; ++j) {
       gains[i] = fmaf(render_filter_bank_[j][i], gains_eq_[j], gains[i]);
     }
@@ -248,9 +210,9 @@
 
 size_t IntelligibilityEnhancer::GetBankSize(int sample_rate,
                                             size_t erb_resolution) {
-  float freq_limit = sample_rate / 2000.0f;
+  float freq_limit = sample_rate / 2000.f;
   size_t erb_scale = static_cast<size_t>(ceilf(
-      11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f));
+      11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.f));
   return erb_scale * erb_resolution;
 }
 
@@ -260,7 +222,7 @@
   size_t lf = 1, rf = 4;
 
   for (size_t i = 0; i < bank_size_; ++i) {
-    float abs_temp = fabsf((i + 1.0f) / static_cast<float>(erb_resolution_));
+    float abs_temp = fabsf((i + 1.f) / static_cast<float>(kErbResolution));
     center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp));
     center_freqs_[i] -= 14678.49f;
   }
@@ -274,48 +236,43 @@
   }
 
   for (size_t i = 1; i <= bank_size_; ++i) {
-    size_t lll, ll, rr, rrr;
     static const size_t kOne = 1;  // Avoids repeated static_cast<>s below.
-    lll = static_cast<size_t>(round(
-        center_freqs_[std::max(kOne, i - lf) - 1] * num_freqs /
-            (0.5f * sample_rate_hz_)));
-    ll = static_cast<size_t>(round(
-        center_freqs_[std::max(kOne, i) - 1] * num_freqs /
-            (0.5f * sample_rate_hz_)));
+    size_t lll =
+        static_cast<size_t>(round(center_freqs_[std::max(kOne, i - lf) - 1] *
+                                  num_freqs / (0.5f * sample_rate_hz_)));
+    size_t ll = static_cast<size_t>(round(center_freqs_[std::max(kOne, i) - 1] *
+                                   num_freqs / (0.5f * sample_rate_hz_)));
     lll = std::min(num_freqs, std::max(lll, kOne)) - 1;
     ll = std::min(num_freqs, std::max(ll, kOne)) - 1;
 
-    rrr = static_cast<size_t>(round(
-        center_freqs_[std::min(bank_size_, i + rf) - 1] * num_freqs /
-            (0.5f * sample_rate_hz_)));
-    rr = static_cast<size_t>(round(
-        center_freqs_[std::min(bank_size_, i + 1) - 1] * num_freqs /
-            (0.5f * sample_rate_hz_)));
+    size_t rrr = static_cast<size_t>(
+        round(center_freqs_[std::min(bank_size_, i + rf) - 1] * num_freqs /
+              (0.5f * sample_rate_hz_)));
+    size_t rr = static_cast<size_t>(
+        round(center_freqs_[std::min(bank_size_, i + 1) - 1] * num_freqs /
+              (0.5f * sample_rate_hz_)));
     rrr = std::min(num_freqs, std::max(rrr, kOne)) - 1;
     rr = std::min(num_freqs, std::max(rr, kOne)) - 1;
 
-    float step, element;
-
-    step = ll == lll ? 0.f : 1.f / (ll - lll);
-    element = 0.0f;
+    float step = ll == lll ? 0.f : 1.f / (ll - lll);
+    float element = 0.f;
     for (size_t j = lll; j <= ll; ++j) {
       filter_bank[i - 1][j] = element;
       element += step;
     }
     step = rr == rrr ? 0.f : 1.f / (rrr - rr);
-    element = 1.0f;
+    element = 1.f;
     for (size_t j = rr; j <= rrr; ++j) {
       filter_bank[i - 1][j] = element;
       element -= step;
     }
     for (size_t j = ll; j <= rr; ++j) {
-      filter_bank[i - 1][j] = 1.0f;
+      filter_bank[i - 1][j] = 1.f;
     }
   }
 
-  float sum;
   for (size_t i = 0; i < num_freqs; ++i) {
-    sum = 0.0f;
+    float sum = 0.f;
     for (size_t j = 0; j < bank_size_; ++j) {
       sum += filter_bank[j][i];
     }
@@ -329,22 +286,22 @@
 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
                                                        size_t start_freq,
                                                        float* sols) {
-  bool quadratic = (kConfigRho < 1.0f);
+  bool quadratic = (kRho < 1.f);
   const float* pow_x0 = filtered_clear_pow_.get();
   const float* pow_n0 = filtered_noise_pow_.get();
 
   for (size_t n = 0; n < start_freq; ++n) {
-    sols[n] = 1.0f;
+    sols[n] = 1.f;
   }
 
   // Analytic solution for optimal gains. See paper for derivation.
   for (size_t n = start_freq - 1; n < bank_size_; ++n) {
     float alpha0, beta0, gamma0;
-    gamma0 = 0.5f * rho_[n] * pow_x0[n] * pow_n0[n] +
+    gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] +
              lambda * pow_x0[n] * pow_n0[n] * pow_n0[n];
-    beta0 = lambda * pow_x0[n] * (2 - rho_[n]) * pow_x0[n] * pow_n0[n];
+    beta0 = lambda * pow_x0[n] * (2 - kRho) * pow_x0[n] * pow_n0[n];
     if (quadratic) {
-      alpha0 = lambda * pow_x0[n] * (1 - rho_[n]) * pow_x0[n] * pow_x0[n];
+      alpha0 = lambda * pow_x0[n] * (1 - kRho) * pow_x0[n] * pow_x0[n];
       sols[n] =
           (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) /
           (2 * alpha0 + std::numeric_limits<float>::epsilon());
@@ -355,8 +312,15 @@
   }
 }
 
-bool IntelligibilityEnhancer::active() const {
-  return active_;
+bool IntelligibilityEnhancer::IsSpeech(const float* audio) {
+  FloatToS16(audio, chunk_length_, &audio_s16_[0]);
+  vad_.ProcessChunk(&audio_s16_[0], chunk_length_, sample_rate_hz_);
+  if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {
+    chunks_since_voice_ = 0;
+  } else if (chunks_since_voice_ < kSpeechOffsetDelay) {
+    ++chunks_since_voice_;
+  }
+  return chunks_since_voice_ < kSpeechOffsetDelay;
 }
 
 }  // namespace webrtc

diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
index 2deb4d2..c18bac0 100644
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

@@ -18,6 +18,7 @@
 #include "webrtc/common_audio/lapped_transform.h"
 #include "webrtc/common_audio/channel_buffer.h"
 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h"
+#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"
 
 namespace webrtc {
 
@@ -28,28 +29,7 @@
 // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788
 class IntelligibilityEnhancer {
  public:
-  struct Config {
-    // TODO(bercic): the |decay_rate|, |analysis_rate| and |gain_limit|
-    // parameters should probably go away once fine tuning is done.
-    Config()
-        : sample_rate_hz(16000),
-          num_capture_channels(1),
-          num_render_channels(1),
-          decay_rate(0.9f),
-          analysis_rate(60),
-          gain_change_limit(0.1f),
-          rho(0.02f) {}
-    int sample_rate_hz;
-    size_t num_capture_channels;
-    size_t num_render_channels;
-    float decay_rate;
-    int analysis_rate;
-    float gain_change_limit;
-    float rho;
-  };
-
-  explicit IntelligibilityEnhancer(const Config& config);
-  IntelligibilityEnhancer();  // Initialize with default config.
+  IntelligibilityEnhancer(int sample_rate_hz, size_t num_render_channels);
 
   // Sets the capture noise magnitude spectrum estimate.
   void SetCaptureNoiseEstimate(std::vector<float> noise);
@@ -86,9 +66,6 @@
   void ProcessClearBlock(const std::complex<float>* in_block,
                          std::complex<float>* out_block);
 
-  // Computes and sets modified gains.
-  void AnalyzeClearBlock();
-
   // Bisection search for optimal |lambda|.
   void SolveForLambda(float power_target, float power_bot, float power_top);
 
@@ -105,29 +82,25 @@
   // Negative gains are set to 0. Stores the results in |sols|.
   void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);
 
+  // Returns true if the audio is speech.
+  bool IsSpeech(const float* audio);
+
   const size_t freqs_;         // Num frequencies in frequency domain.
-  const size_t window_size_;   // Window size in samples; also the block size.
   const size_t chunk_length_;  // Chunk size in samples.
   const size_t bank_size_;     // Num ERB filters.
   const int sample_rate_hz_;
-  const int erb_resolution_;
-  const size_t num_capture_channels_;
   const size_t num_render_channels_;
-  const int analysis_rate_;    // Num blocks before gains recalculated.
 
-  const bool active_;          // Whether render gains are being updated.
-                               // TODO(ekm): Add logic for updating |active_|.
-
-  intelligibility::PowerEstimator clear_power_;
-  std::vector<float> noise_power_;
+  intelligibility::PowerEstimator<std::complex<float>> clear_power_estimator_;
+  std::unique_ptr<intelligibility::PowerEstimator<float>>
+      noise_power_estimator_;
   std::unique_ptr<float[]> filtered_clear_pow_;
   std::unique_ptr<float[]> filtered_noise_pow_;
   std::unique_ptr<float[]> center_freqs_;
   std::vector<std::vector<float>> capture_filter_bank_;
   std::vector<std::vector<float>> render_filter_bank_;
   size_t start_freq_;
-  std::unique_ptr<float[]> rho_;  // Production and interpretation SNR.
-                                  // for each ERB band.
+
   std::unique_ptr<float[]> gains_eq_;  // Pre-filter modified gains.
   intelligibility::GainApplier gain_applier_;
 
@@ -135,11 +108,13 @@
   // the original input array with modifications.
   ChannelBuffer<float> temp_render_out_buffer_;
 
-  std::unique_ptr<float[]> kbd_window_;
   TransformCallback render_callback_;
   std::unique_ptr<LappedTransform> render_mangler_;
-  int block_count_;
-  int analysis_step_;
+
+  VoiceActivityDetector vad_;
+  std::vector<int16_t> audio_s16_;
+  size_t chunks_since_voice_;
+  bool is_speech_;
 };
 
 }  // namespace webrtc

diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
index b0f94ec..b59ae36 100644
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc

@@ -26,54 +26,184 @@
 
 // Target output for ERB create test. Generated with matlab.
 const float kTestCenterFreqs[] = {
-    13.169f, 26.965f, 41.423f, 56.577f, 72.461f, 89.113f, 106.57f, 124.88f,
-    144.08f, 164.21f, 185.34f, 207.5f,  230.75f, 255.16f, 280.77f, 307.66f,
-    335.9f,  365.56f, 396.71f, 429.44f, 463.84f, 500.f};
-const float kTestFilterBank[][9] = {
-    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.2f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.f, 0.25f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.f, 0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.f, 0.f, 0.25f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.f, 0.f, 0.25f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f},
-    {0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f},
-    {0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f},
-    {0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f},
-    {0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f},
-    {0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f},
-    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f},
-    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f},
-    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.f},
-    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.5f},
-    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.5f}};
+    14.5213f, 29.735f,  45.6781f, 62.3884f, 79.9058f, 98.2691f, 117.521f,
+    137.708f, 158.879f, 181.084f, 204.378f, 228.816f, 254.459f, 281.371f,
+    309.618f, 339.273f, 370.411f, 403.115f, 437.469f, 473.564f, 511.497f,
+    551.371f, 593.293f, 637.386f, 683.77f,  732.581f, 783.96f,  838.06f,
+    895.046f, 955.09f,  1018.38f, 1085.13f, 1155.54f, 1229.85f, 1308.32f,
+    1391.22f, 1478.83f, 1571.5f,  1669.55f, 1773.37f, 1883.37f, 2000.f};
+const float kTestFilterBank[][33] = {
+    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.2f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.25f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,   0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,   0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.25f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,   0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,   0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.25f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,   0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,   0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.157895f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.210526f, 0.117647f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.315789f, 0.176471f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.315789f, 0.352941f, 0.142857f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f},
+    {0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.352941f, 0.285714f,
+     0.157895f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,
+     0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f},
+    {0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f,
+     0.210526f, 0.111111f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f,       0.f,       0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.285714f, 0.315789f, 0.222222f, 0.111111f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,       0.f,       0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,       0.f,       0.f,       0.f,       0.f},
+    {0.f, 0.f, 0.f,       0.f,       0.f,       0.f,       0.f, 0.f, 0.f,
+     0.f, 0.f, 0.315789f, 0.333333f, 0.222222f, 0.111111f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,       0.f,       0.f,       0.f,       0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,       0.f,       0.f,       0.f},
+    {0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,       0.f, 0.f,
+     0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,       0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f,       0.f,       0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f,       0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,
+     0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f,
+     0.108108f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,
+     0.f,       0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f,       0.f,       0.f,        0.f, 0.f, 0.f, 0.f, 0.f,       0.f,
+     0.f,       0.f,       0.f,        0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f,
+     0.243243f, 0.153846f, 0.0833333f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,
+     0.f,       0.f,       0.f,        0.f, 0.f, 0.f},
+    {0.f,       0.f,       0.f,       0.f,        0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,       0.f,       0.f,       0.f,        0.f, 0.f, 0.f, 0.f, 0.333333f,
+     0.324324f, 0.230769f, 0.166667f, 0.0909091f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,       0.f,       0.f,       0.f,        0.f, 0.f},
+    {0.f,       0.f,       0.f,   0.f,       0.f,        0.f, 0.f, 0.f, 0.f,
+     0.f,       0.f,       0.f,   0.f,       0.f,        0.f, 0.f, 0.f, 0.f,
+     0.324324f, 0.307692f, 0.25f, 0.181818f, 0.0833333f, 0.f, 0.f, 0.f, 0.f,
+     0.f,       0.f,       0.f,   0.f,       0.f,        0.f},
+    {0.f,       0.f,   0.f,       0.f,        0.f, 0.f,       0.f,
+     0.f,       0.f,   0.f,       0.f,        0.f, 0.f,       0.f,
+     0.f,       0.f,   0.f,       0.f,        0.f, 0.307692f, 0.333333f,
+     0.363636f, 0.25f, 0.151515f, 0.0793651f, 0.f, 0.f,       0.f,
+     0.f,       0.f,   0.f,       0.f,        0.f},
+    {0.f,       0.f,       0.f,        0.f,       0.f,       0.f,
+     0.f,       0.f,       0.f,        0.f,       0.f,       0.f,
+     0.f,       0.f,       0.f,        0.f,       0.f,       0.f,
+     0.f,       0.f,       0.166667f,  0.363636f, 0.333333f, 0.242424f,
+     0.190476f, 0.133333f, 0.0689655f, 0.f,       0.f,       0.f,
+     0.f,       0.f,       0.f},
+    {0.f,        0.f, 0.f, 0.f, 0.f,       0.f,      0.f,       0.f,  0.f,
+     0.f,        0.f, 0.f, 0.f, 0.f,       0.f,      0.f,       0.f,  0.f,
+     0.f,        0.f, 0.f, 0.f, 0.333333f, 0.30303f, 0.253968f, 0.2f, 0.137931f,
+     0.0714286f, 0.f, 0.f, 0.f, 0.f,       0.f},
+    {0.f,    0.f,        0.f,      0.f,      0.f,       0.f,       0.f,
+     0.f,    0.f,        0.f,      0.f,      0.f,       0.f,       0.f,
+     0.f,    0.f,        0.f,      0.f,      0.f,       0.f,       0.f,
+     0.f,    0.f,        0.30303f, 0.31746f, 0.333333f, 0.275862f, 0.214286f,
+     0.125f, 0.0655738f, 0.f,      0.f,      0.f},
+    {0.f,   0.f,       0.f,       0.f,        0.f,       0.f,       0.f,
+     0.f,   0.f,       0.f,       0.f,        0.f,       0.f,       0.f,
+     0.f,   0.f,       0.f,       0.f,        0.f,       0.f,       0.f,
+     0.f,   0.f,       0.f,       0.15873f,   0.333333f, 0.344828f, 0.357143f,
+     0.25f, 0.196721f, 0.137931f, 0.0816327f, 0.f},
+    {0.f,     0.f,       0.f,       0.f,       0.f, 0.f,       0.f,
+     0.f,     0.f,       0.f,       0.f,       0.f, 0.f,       0.f,
+     0.f,     0.f,       0.f,       0.f,       0.f, 0.f,       0.f,
+     0.f,     0.f,       0.f,       0.f,       0.f, 0.172414f, 0.357143f,
+     0.3125f, 0.245902f, 0.172414f, 0.102041f, 0.f},
+    {0.f, 0.f,     0.f,       0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,     0.f,       0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,     0.f,       0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.3125f, 0.327869f, 0.344828f, 0.204082f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.163934f, 0.344828f, 0.408163f, 0.5f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.204082f, 0.5f}};
 static_assert(arraysize(kTestCenterFreqs) == arraysize(kTestFilterBank),
               "Test filterbank badly initialized.");
 
 // Target output for gain solving test. Generated with matlab.
 const size_t kTestStartFreq = 12;  // Lowest integral frequency for ERBs.
-const float kTestZeroVar[] = {1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f,
-                              1.f, 1.f, 1.f, 0.f, 0.f, 0.f, 0.f, 0.f,
-                              0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
+const float kTestZeroVar[] = {
+    1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f, 0.f,
+    0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+    0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0};
 static_assert(arraysize(kTestCenterFreqs) == arraysize(kTestZeroVar),
               "Power test data badly initialized.");
 const float kTestNonZeroVarLambdaTop[] = {
-    1.f,     1.f,     1.f,     1.f,     1.f,     1.f,     1.f,     1.f,
-    1.f,     1.f,     1.f,     0.f,     0.f,     0.0351f, 0.0636f, 0.0863f,
-    0.1037f, 0.1162f, 0.1236f, 0.1251f, 0.1189f, 0.0993f};
+    1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f, 0.f,
+    0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+    0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0};
 static_assert(arraysize(kTestCenterFreqs) ==
                   arraysize(kTestNonZeroVarLambdaTop),
               "Power test data badly initialized.");
 const float kMaxTestError = 0.005f;
 
 // Enhancer initialization parameters.
-const int kSamples = 2000;
-const int kSampleRate = 1000;
+const int kSamples = 1000;
+const int kSampleRate = 4000;
 const int kNumChannels = 1;
 const int kFragmentSize = kSampleRate / 100;
 
@@ -83,13 +213,11 @@
  protected:
   IntelligibilityEnhancerTest()
       : clear_data_(kSamples), noise_data_(kSamples), orig_data_(kSamples) {
-    config_.sample_rate_hz = kSampleRate;
-    enh_.reset(new IntelligibilityEnhancer(config_));
+    enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels));
   }
 
   bool CheckUpdate() {
-    config_.sample_rate_hz = kSampleRate;
-    enh_.reset(new IntelligibilityEnhancer(config_));
+    enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels));
     float* clear_cursor = &clear_data_[0];
     float* noise_cursor = &noise_data_[0];
     for (int i = 0; i < kSamples; i += kFragmentSize) {
@@ -105,7 +233,6 @@
     return false;
   }
 
-  IntelligibilityEnhancer::Config config_;
   std::unique_ptr<IntelligibilityEnhancer> enh_;
   std::vector<float> clear_data_;
   std::vector<float> noise_data_;
@@ -115,9 +242,9 @@
 // For each class of generated data, tests that render stream is updated when
 // it should be.
 TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) {
-  std::fill(noise_data_.begin(), noise_data_.end(), 0.0f);
-  std::fill(orig_data_.begin(), orig_data_.end(), 0.0f);
-  std::fill(clear_data_.begin(), clear_data_.end(), 0.0f);
+  std::fill(noise_data_.begin(), noise_data_.end(), 0.f);
+  std::fill(orig_data_.begin(), orig_data_.end(), 0.f);
+  std::fill(clear_data_.begin(), clear_data_.end(), 0.f);
   EXPECT_FALSE(CheckUpdate());
   std::srand(1);
   auto float_rand = []() { return std::rand() * 2.f / RAND_MAX - 1; };
@@ -148,9 +275,8 @@
   std::vector<float> sols(enh_->bank_size_);
   float lambda = -0.001f;
   for (size_t i = 0; i < enh_->bank_size_; i++) {
-    enh_->filtered_clear_pow_[i] = 0.0f;
-    enh_->filtered_noise_pow_[i] = 0.0f;
-    enh_->rho_[i] = 0.02f;
+    enh_->filtered_clear_pow_[i] = 0.f;
+    enh_->filtered_noise_pow_[i] = 0.f;
   }
   enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, &sols[0]);
   for (size_t i = 0; i < enh_->bank_size_; i++) {
@@ -164,7 +290,7 @@
   for (size_t i = 0; i < enh_->bank_size_; i++) {
     EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError);
   }
-  lambda = -1.0;
+  lambda = -1.f;
   enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, &sols[0]);
   for (size_t i = 0; i < enh_->bank_size_; i++) {
     EXPECT_NEAR(kTestZeroVar[i], sols[i], kMaxTestError);

diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc
index 6c44415..6d37199 100644
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc

@@ -14,6 +14,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <algorithm>
+#include <limits>
 
 namespace webrtc {
 
@@ -21,45 +22,38 @@
 
 namespace {
 
-// Return |current| changed towards |target|, with the change being at most
-// |limit|.
+// Return |current| changed towards |target|, with the relative change being at
+// most |limit|.
 float UpdateFactor(float target, float current, float limit) {
-  float delta = fabsf(target - current);
-  float sign = copysign(1.f, target - current);
-  return current + sign * fminf(delta, limit);
+  float gain = target / (current + std::numeric_limits<float>::epsilon());
+  if (gain < 1.f - limit) {
+    gain = 1.f - limit;
+  } else if (gain > 1.f + limit) {
+    gain = 1.f + limit;
+  }
+  return current * gain + std::numeric_limits<float>::epsilon();
 }
 
 }  // namespace
 
-PowerEstimator::PowerEstimator(size_t num_freqs,
-                               float decay)
-    : magnitude_(new float[num_freqs]()),
-      power_(new float[num_freqs]()),
-      num_freqs_(num_freqs),
-      decay_(decay) {
-  memset(magnitude_.get(), 0, sizeof(*magnitude_.get()) * num_freqs_);
-  memset(power_.get(), 0, sizeof(*power_.get()) * num_freqs_);
-}
+template<typename T>
+PowerEstimator<T>::PowerEstimator(size_t num_freqs, float decay)
+    : power_(num_freqs, 0.f), decay_(decay) {}
 
-// Compute the magnitude from the beginning, with exponential decaying of the
-// series data.
-void PowerEstimator::Step(const std::complex<float>* data) {
-  for (size_t i = 0; i < num_freqs_; ++i) {
-    magnitude_[i] = decay_ * magnitude_[i] +
-                (1.f - decay_) * std::abs(data[i]);
+template<typename T>
+void PowerEstimator<T>::Step(const T* data) {
+  for (size_t i = 0; i < power_.size(); ++i) {
+    power_[i] = decay_ * power_[i] +
+                (1.f - decay_) * std::abs(data[i]) * std::abs(data[i]);
   }
 }
 
-const float* PowerEstimator::Power() {
-  for (size_t i = 0; i < num_freqs_; ++i) {
-    power_[i] = magnitude_[i] * magnitude_[i];
-  }
-  return &power_[0];
-}
+template class PowerEstimator<float>;
+template class PowerEstimator<std::complex<float>>;
 
-GainApplier::GainApplier(size_t freqs, float change_limit)
+GainApplier::GainApplier(size_t freqs, float relative_change_limit)
     : num_freqs_(freqs),
-      change_limit_(change_limit),
+      relative_change_limit_(relative_change_limit),
       target_(new float[freqs]()),
       current_(new float[freqs]()) {
   for (size_t i = 0; i < freqs; ++i) {
@@ -71,12 +65,8 @@
 void GainApplier::Apply(const std::complex<float>* in_block,
                         std::complex<float>* out_block) {
   for (size_t i = 0; i < num_freqs_; ++i) {
-    float factor = sqrtf(fabsf(current_[i]));
-    if (!std::isnormal(factor)) {
-      factor = 1.f;
-    }
-    out_block[i] = factor * in_block[i];
-    current_[i] = UpdateFactor(target_[i], current_[i], change_limit_);
+    current_[i] = UpdateFactor(target_[i], current_[i], relative_change_limit_);
+    out_block[i] = sqrtf(fabsf(current_[i])) * in_block[i];
   }
 }
 

diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h
index 8858cff..3805a0c 100644
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h

@@ -13,6 +13,7 @@
 
 #include <complex>
 #include <memory>
+#include <vector>
 
 namespace webrtc {
 
@@ -21,6 +22,7 @@
 // Internal helper for computing the power of a stream of arrays.
 // The result is an array of power per position: the i-th power is the power of
 // the stream of data on the i-th positions in the input arrays.
+template <typename T>
 class PowerEstimator {
  public:
   // Construct an instance for the given input array length (|freqs|), with the
@@ -28,31 +30,24 @@
   PowerEstimator(size_t freqs, float decay);
 
   // Add a new data point to the series.
-  void Step(const std::complex<float>* data);
+  void Step(const T* data);
 
   // The current power array.
-  const float* Power();
+  const std::vector<float>& power() { return power_; };
 
  private:
-  // TODO(ekmeyerson): Switch the following running means
-  // and histories from std::unique_ptr to std::vector.
-  std::unique_ptr<std::complex<float>[]> running_mean_sq_;
-
-  // The current magnitude array.
-  std::unique_ptr<float[]> magnitude_;
   // The current power array.
-  std::unique_ptr<float[]> power_;
+  std::vector<float> power_;
 
-  const size_t num_freqs_;
   const float decay_;
 };
 
 // Helper class for smoothing gain changes. On each application step, the
 // currently used gains are changed towards a set of settable target gains,
-// constrained by a limit on the magnitude of the changes.
+// constrained by a limit on the relative changes.
 class GainApplier {
  public:
-  GainApplier(size_t freqs, float change_limit);
+  GainApplier(size_t freqs, float relative_change_limit);
 
   // Copy |in_block| to |out_block|, multiplied by the current set of gains,
   // and step the current set of gains towards the target set.
@@ -64,7 +59,7 @@
 
  private:
   const size_t num_freqs_;
-  const float change_limit_;
+  const float relative_change_limit_;
   std::unique_ptr<float[]> target_;
   std::unique_ptr<float[]> current_;
 };

diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc
index 43ad9a7..28957bb 100644
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc

@@ -39,17 +39,16 @@
   const float kDecay = 0.5f;
   const std::vector<std::vector<std::complex<float>>> test_data(
       GenerateTestData(kFreqs, kSamples));
-  PowerEstimator power_estimator(kFreqs, kDecay);
-  EXPECT_EQ(0, power_estimator.Power()[0]);
+  PowerEstimator<std::complex<float>> power_estimator(kFreqs, kDecay);
+  EXPECT_EQ(0, power_estimator.power()[0]);
 
   // Makes sure Step is doing something.
   power_estimator.Step(&test_data[0][0]);
   for (size_t i = 1; i < kSamples; ++i) {
     power_estimator.Step(&test_data[i][0]);
     for (size_t j = 0; j < kFreqs; ++j) {
-      const float* power = power_estimator.Power();
-      EXPECT_GE(power[j], 0.f);
-      EXPECT_LE(power[j], 1.f);
+      EXPECT_GE(power_estimator.power()[j], 0.f);
+      EXPECT_LE(power_estimator.power()[j], 1.f);
     }
   }
 }
@@ -62,8 +61,8 @@
   GainApplier gain_applier(kFreqs, kChangeLimit);
   const std::vector<std::vector<std::complex<float>>> in_data(
       GenerateTestData(kFreqs, kSamples));
-  std::vector<std::vector<std::complex<float>>> out_data(GenerateTestData(
-      kFreqs, kSamples));
+  std::vector<std::vector<std::complex<float>>> out_data(
+      GenerateTestData(kFreqs, kSamples));
   for (size_t i = 0; i < kSamples; ++i) {
     gain_applier.Apply(&in_data[i][0], &out_data[i][0]);
     for (size_t j = 0; j < kFreqs; ++j) {

diff --git a/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc b/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc
index 1ec85f0..ab8524b 100644
--- a/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc
+++ b/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc

@@ -30,44 +30,24 @@
 namespace webrtc {
 namespace {
 
-DEFINE_double(clear_alpha, 0.9, "Power decay factor for clear data.");
-DEFINE_int32(sample_rate,
-             16000,
-             "Audio sample rate used in the input and output files.");
-DEFINE_int32(ana_rate,
-             60,
-             "Analysis rate; gains recalculated every N blocks.");
-DEFINE_double(gain_limit, 1000.0, "Maximum gain change in one block.");
-
 DEFINE_string(clear_file, "speech.wav", "Input file with clear speech.");
 DEFINE_string(noise_file, "noise.wav", "Input file with noise data.");
 DEFINE_string(out_file, "proc_enhanced.wav", "Enhanced output file.");
 
-const size_t kNumChannels = 1;
-
 // void function for gtest
 void void_main(int argc, char* argv[]) {
   google::SetUsageMessage(
       "\n\nInput files must be little-endian 16-bit signed raw PCM.\n");
   google::ParseCommandLineFlags(&argc, &argv, true);
 
-  size_t samples;        // Number of samples in input PCM file
-  size_t fragment_size;  // Number of samples to process at a time
-                         // to simulate APM stream processing
-
   // Load settings and wav input.
-
-  fragment_size = FLAGS_sample_rate / 100;  // Mirror real time APM chunk size.
-                                            // Duplicates chunk_length_ in
-                                            // IntelligibilityEnhancer.
-
   struct stat in_stat, noise_stat;
   ASSERT_EQ(stat(FLAGS_clear_file.c_str(), &in_stat), 0)
       << "Empty speech file.";
   ASSERT_EQ(stat(FLAGS_noise_file.c_str(), &noise_stat), 0)
       << "Empty noise file.";
 
-  samples = std::min(in_stat.st_size, noise_stat.st_size) / 2;
+  const size_t samples = std::min(in_stat.st_size, noise_stat.st_size) / 2;
 
   WavReader in_file(FLAGS_clear_file);
   std::vector<float> in_fpcm(samples);
@@ -80,23 +60,19 @@
   FloatS16ToFloat(&noise_fpcm[0], samples, &noise_fpcm[0]);
 
   // Run intelligibility enhancement.
-  IntelligibilityEnhancer::Config config;
-  config.sample_rate_hz = FLAGS_sample_rate;
-  config.decay_rate = static_cast<float>(FLAGS_clear_alpha);
-  config.analysis_rate = FLAGS_ana_rate;
-  config.gain_change_limit = FLAGS_gain_limit;
-  IntelligibilityEnhancer enh(config);
+  IntelligibilityEnhancer enh(in_file.sample_rate(), in_file.num_channels());
   rtc::CriticalSection crit;
   NoiseSuppressionImpl ns(&crit);
-  ns.Initialize(kNumChannels, FLAGS_sample_rate);
+  ns.Initialize(noise_file.num_channels(), noise_file.sample_rate());
   ns.Enable(true);
 
-  AudioBuffer capture_audio(fragment_size,
-                            kNumChannels,
-                            fragment_size,
-                            kNumChannels,
+  // Mirror real time APM chunk size. Duplicates chunk_length_ in
+  // IntelligibilityEnhancer.
+  size_t fragment_size = in_file.sample_rate() / 100;
+  AudioBuffer capture_audio(fragment_size, noise_file.num_channels(),
+                            fragment_size, noise_file.num_channels(),
                             fragment_size);
-  StreamConfig stream_config(FLAGS_sample_rate, kNumChannels);
+  StreamConfig stream_config(in_file.sample_rate(), noise_file.num_channels());
 
   // Slice the input into smaller chunks, as the APM would do, and feed them
   // through the enhancer.
@@ -108,14 +84,17 @@
     ns.AnalyzeCaptureAudio(&capture_audio);
     ns.ProcessCaptureAudio(&capture_audio);
     enh.SetCaptureNoiseEstimate(ns.NoiseEstimate());
-    enh.ProcessRenderAudio(&clear_cursor, FLAGS_sample_rate, kNumChannels);
+    enh.ProcessRenderAudio(&clear_cursor, in_file.sample_rate(),
+                           in_file.num_channels());
     clear_cursor += fragment_size;
     noise_cursor += fragment_size;
   }
 
   FloatToFloatS16(&in_fpcm[0], samples, &in_fpcm[0]);
 
-  WavWriter out_file(FLAGS_out_file, FLAGS_sample_rate, kNumChannels);
+  WavWriter out_file(FLAGS_out_file,
+                     in_file.sample_rate(),
+                     in_file.num_channels());
   out_file.WriteSamples(&in_fpcm[0], samples);
 }
 

diff --git a/webrtc/modules/audio_processing/noise_suppression_impl.cc b/webrtc/modules/audio_processing/noise_suppression_impl.cc
index 076f1ba..7f19005 100644
--- a/webrtc/modules/audio_processing/noise_suppression_impl.cc
+++ b/webrtc/modules/audio_processing/noise_suppression_impl.cc

@@ -182,8 +182,8 @@
   for (auto& suppressor : suppressors_) {
     const float* noise = WebRtcNs_noise_estimate(suppressor->state());
     for (size_t i = 0; i < noise_estimate.size(); ++i) {
-      noise_estimate[i] += kNormalizationFactor *
-          noise[i] / suppressors_.size();
+      noise_estimate[i] +=
+          kNormalizationFactor * noise[i] / suppressors_.size();
     }
   }
 #elif defined(WEBRTC_NS_FIXED)
commit	18fcbcf48c190b6248cd16ef044d1edb79cba040	[log] [tgz]
author	Alejandro Luebs <aluebs@webrtc.org>	Mon Feb 22 15:57:38 2016 -0800
committer	Alejandro Luebs <aluebs@webrtc.org>	Mon Feb 22 23:57:45 2016 +0000
tree	bdcd3fdaf96f36c531ae5bfbedcb6e23ce24423a
parent	67b81f92f4bd16b1d640553339b729cb17b3827e [diff]