Rename neteq4 folder to neteq Keep the old neteq4/audio_decoder_unittests.isolate while waiting for a hard-coded reference to change. This CL effectively reverts r6257 "Rename neteq4 folder to neteq". BUG=2996 TBR=tina.legrand@webrtc.org Review URL: https://webrtc-codereview.appspot.com/21629004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@6367 4adac7df-926f-26a2-2b94-8c16560cd09d

commit: 9c55f0f957534144d2b8a64154f0a479249b34be [log] [tgz]
author: henrik.lundin@webrtc.org <henrik.lundin@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d> Mon Jun 09 08:10:28 2014 +0000
committer: henrik.lundin@webrtc.org <henrik.lundin@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d> Mon Jun 09 08:10:28 2014 +0000
tree: 630c80fc929a34dc5e8a8a70d42203e5d892aec8
parent: 31f967c611997a106c3d781d21dd4464d1a1273e [diff] [blame]
diff --git a/webrtc/modules/audio_coding/neteq/time_stretch.cc b/webrtc/modules/audio_coding/neteq/time_stretch.cc
new file mode 100644
index 0000000..a9228d4
--- /dev/null
+++ b/webrtc/modules/audio_coding/neteq/time_stretch.cc

@@ -0,0 +1,216 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_coding/neteq/time_stretch.h"
+
+#include <algorithm>  // min, max
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_coding/neteq/background_noise.h"
+#include "webrtc/modules/audio_coding/neteq/dsp_helper.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+
+namespace webrtc {
+
+TimeStretch::ReturnCodes TimeStretch::Process(
+    const int16_t* input,
+    size_t input_len,
+    AudioMultiVector* output,
+    int16_t* length_change_samples) {
+
+  // Pre-calculate common multiplication with |fs_mult_|.
+  int fs_mult_120 = fs_mult_ * 120;  // Corresponds to 15 ms.
+
+  const int16_t* signal;
+  scoped_ptr<int16_t[]> signal_array;
+  size_t signal_len;
+  if (num_channels_ == 1) {
+    signal = input;
+    signal_len = input_len;
+  } else {
+    // We want |signal| to be only the first channel of |input|, which is
+    // interleaved. Thus, we take the first sample, skip forward |num_channels|
+    // samples, and continue like that.
+    signal_len = input_len / num_channels_;
+    signal_array.reset(new int16_t[signal_len]);
+    signal = signal_array.get();
+    size_t j = master_channel_;
+    for (size_t i = 0; i < signal_len; ++i) {
+      signal_array[i] = input[j];
+      j += num_channels_;
+    }
+  }
+
+  // Find maximum absolute value of input signal.
+  max_input_value_ = WebRtcSpl_MaxAbsValueW16(signal,
+                                              static_cast<int>(signal_len));
+
+  // Downsample to 4 kHz sample rate and calculate auto-correlation.
+  DspHelper::DownsampleTo4kHz(signal, signal_len, kDownsampledLen,
+                              sample_rate_hz_, true /* compensate delay*/,
+                              downsampled_input_);
+  AutoCorrelation();
+
+  // Find the strongest correlation peak.
+  static const int kNumPeaks = 1;
+  int peak_index;
+  int16_t peak_value;
+  DspHelper::PeakDetection(auto_correlation_, kCorrelationLen, kNumPeaks,
+                           fs_mult_, &peak_index, &peak_value);
+  // Assert that |peak_index| stays within boundaries.
+  assert(peak_index >= 0);
+  assert(peak_index <= (2 * kCorrelationLen - 1) * fs_mult_);
+
+  // Compensate peak_index for displaced starting position. The displacement
+  // happens in AutoCorrelation(). Here, |kMinLag| is in the down-sampled 4 kHz
+  // domain, while the |peak_index| is in the original sample rate; hence, the
+  // multiplication by fs_mult_ * 2.
+  peak_index += kMinLag * fs_mult_ * 2;
+  // Assert that |peak_index| stays within boundaries.
+  assert(peak_index >= 20 * fs_mult_);
+  assert(peak_index <= 20 * fs_mult_ + (2 * kCorrelationLen - 1) * fs_mult_);
+
+  // Calculate scaling to ensure that |peak_index| samples can be square-summed
+  // without overflowing.
+  int scaling = 31 - WebRtcSpl_NormW32(max_input_value_ * max_input_value_) -
+      WebRtcSpl_NormW32(peak_index);
+  scaling = std::max(0, scaling);
+
+  // |vec1| starts at 15 ms minus one pitch period.
+  const int16_t* vec1 = &signal[fs_mult_120 - peak_index];
+  // |vec2| start at 15 ms.
+  const int16_t* vec2 = &signal[fs_mult_120];
+  // Calculate energies for |vec1| and |vec2|, assuming they both contain
+  // |peak_index| samples.
+  int32_t vec1_energy =
+      WebRtcSpl_DotProductWithScale(vec1, vec1, peak_index, scaling);
+  int32_t vec2_energy =
+      WebRtcSpl_DotProductWithScale(vec2, vec2, peak_index, scaling);
+
+  // Calculate cross-correlation between |vec1| and |vec2|.
+  int32_t cross_corr =
+      WebRtcSpl_DotProductWithScale(vec1, vec2, peak_index, scaling);
+
+  // Check if the signal seems to be active speech or not (simple VAD).
+  bool active_speech = SpeechDetection(vec1_energy, vec2_energy, peak_index,
+                                       scaling);
+
+  int16_t best_correlation;
+  if (!active_speech) {
+    SetParametersForPassiveSpeech(signal_len, &best_correlation, &peak_index);
+  } else {
+    // Calculate correlation:
+    // cross_corr / sqrt(vec1_energy * vec2_energy).
+
+    // Start with calculating scale values.
+    int energy1_scale = std::max(0, 16 - WebRtcSpl_NormW32(vec1_energy));
+    int energy2_scale = std::max(0, 16 - WebRtcSpl_NormW32(vec2_energy));
+
+    // Make sure total scaling is even (to simplify scale factor after sqrt).
+    if ((energy1_scale + energy2_scale) & 1) {
+      // The sum is odd.
+      energy1_scale += 1;
+    }
+
+    // Scale energies to int16_t.
+    int16_t vec1_energy_int16 =
+        static_cast<int16_t>(vec1_energy >> energy1_scale);
+    int16_t vec2_energy_int16 =
+        static_cast<int16_t>(vec2_energy >> energy2_scale);
+
+    // Calculate square-root of energy product.
+    int16_t sqrt_energy_prod = WebRtcSpl_SqrtFloor(vec1_energy_int16 *
+                                                   vec2_energy_int16);
+
+    // Calculate cross_corr / sqrt(en1*en2) in Q14.
+    int temp_scale = 14 - (energy1_scale + energy2_scale) / 2;
+    cross_corr = WEBRTC_SPL_SHIFT_W32(cross_corr, temp_scale);
+    cross_corr = std::max(0, cross_corr);  // Don't use if negative.
+    best_correlation = WebRtcSpl_DivW32W16(cross_corr, sqrt_energy_prod);
+    // Make sure |best_correlation| is no larger than 1 in Q14.
+    best_correlation = std::min(static_cast<int16_t>(16384), best_correlation);
+  }
+
+
+  // Check accelerate criteria and stretch the signal.
+  ReturnCodes return_value = CheckCriteriaAndStretch(
+      input, input_len, peak_index, best_correlation, active_speech, output);
+  switch (return_value) {
+    case kSuccess:
+      *length_change_samples = peak_index;
+      break;
+    case kSuccessLowEnergy:
+      *length_change_samples = peak_index;
+      break;
+    case kNoStretch:
+    case kError:
+      *length_change_samples = 0;
+      break;
+  }
+  return return_value;
+}
+
+void TimeStretch::AutoCorrelation() {
+  // Set scaling factor for cross correlation to protect against overflow.
+  int scaling = kLogCorrelationLen - WebRtcSpl_NormW32(
+      max_input_value_ * max_input_value_);
+  scaling = std::max(0, scaling);
+
+  // Calculate correlation from lag kMinLag to lag kMaxLag in 4 kHz domain.
+  int32_t auto_corr[kCorrelationLen];
+  WebRtcSpl_CrossCorrelation(auto_corr, &downsampled_input_[kMaxLag],
+                             &downsampled_input_[kMaxLag - kMinLag],
+                             kCorrelationLen, kMaxLag - kMinLag, scaling, -1);
+
+  // Normalize correlation to 14 bits and write to |auto_correlation_|.
+  int32_t max_corr = WebRtcSpl_MaxAbsValueW32(auto_corr, kCorrelationLen);
+  scaling = std::max(0, 17 - WebRtcSpl_NormW32(max_corr));
+  WebRtcSpl_VectorBitShiftW32ToW16(auto_correlation_, kCorrelationLen,
+                                   auto_corr, scaling);
+}
+
+bool TimeStretch::SpeechDetection(int32_t vec1_energy, int32_t vec2_energy,
+                                  int peak_index, int scaling) const {
+  // Check if the signal seems to be active speech or not (simple VAD).
+  // If (vec1_energy + vec2_energy) / (2 * peak_index) <=
+  // 8 * background_noise_energy, then we say that the signal contains no
+  // active speech.
+  // Rewrite the inequality as:
+  // (vec1_energy + vec2_energy) / 16 <= peak_index * background_noise_energy.
+  // The two sides of the inequality will be denoted |left_side| and
+  // |right_side|.
+  int32_t left_side = (vec1_energy + vec2_energy) / 16;
+  int32_t right_side;
+  if (background_noise_.initialized()) {
+    right_side = background_noise_.Energy(master_channel_);
+  } else {
+    // If noise parameters have not been estimated, use a fixed threshold.
+    right_side = 75000;
+  }
+  int right_scale = 16 - WebRtcSpl_NormW32(right_side);
+  right_scale = std::max(0, right_scale);
+  left_side = left_side >> right_scale;
+  right_side = peak_index * (right_side >> right_scale);
+
+  // Scale |left_side| properly before comparing with |right_side|.
+  // (|scaling| is the scale factor before energy calculation, thus the scale
+  // factor for the energy is 2 * scaling.)
+  if (WebRtcSpl_NormW32(left_side) < 2 * scaling) {
+    // Cannot scale only |left_side|, must scale |right_side| too.
+    int temp_scale = WebRtcSpl_NormW32(left_side);
+    left_side = left_side << temp_scale;
+    right_side = right_side >> (2 * scaling - temp_scale);
+  } else {
+    left_side = left_side << 2 * scaling;
+  }
+  return left_side > right_side;
+}
+
+}  // namespace webrtc
commit	9c55f0f957534144d2b8a64154f0a479249b34be	[log] [tgz]
author	henrik.lundin@webrtc.org <henrik.lundin@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d>	Mon Jun 09 08:10:28 2014 +0000
committer	henrik.lundin@webrtc.org <henrik.lundin@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d>	Mon Jun 09 08:10:28 2014 +0000
tree	630c80fc929a34dc5e8a8a70d42203e5d892aec8
parent	31f967c611997a106c3d781d21dd4464d1a1273e [diff] [blame]