Optimizations and refactoring of the APM 3-band split filter

This CL refactors and optimizes the 3-band split-filter in APM, which
is a very computationally complex component.

Beyond optimizing the code, the filter coefficients are also quantized
to avoid denormals.

The changes reduces the complexity of the split filter by about 30-50%.

The CL has been tested for bitexactness on a number of aecdump
recordings.

(the CL also removes the now unused code for the sparse_fir_filter)

Bug: webrtc:6181
Change-Id: If45f8d1f189c6812ccb03721156c77eb68181211
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/168189
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Commit-Queue: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#30592}
diff --git a/common_audio/BUILD.gn b/common_audio/BUILD.gn
index 48bd906..6c89bf2 100644
--- a/common_audio/BUILD.gn
+++ b/common_audio/BUILD.gn
@@ -32,8 +32,6 @@
     "resampler/sinc_resampler.cc",
     "smoothing_filter.cc",
     "smoothing_filter.h",
-    "sparse_fir_filter.cc",
-    "sparse_fir_filter.h",
     "vad/include/vad.h",
     "vad/vad.cc",
     "wav_file.cc",
@@ -47,6 +45,7 @@
   deps = [
     ":common_audio_c",
     ":sinc_resampler",
+    "../api:array_view",
     "../rtc_base:checks",
     "../rtc_base:gtest_prod",
     "../rtc_base:rtc_base_approved",
@@ -331,7 +330,6 @@
       "signal_processing/real_fft_unittest.cc",
       "signal_processing/signal_processing_unittest.cc",
       "smoothing_filter_unittest.cc",
-      "sparse_fir_filter_unittest.cc",
       "vad/vad_core_unittest.cc",
       "vad/vad_filterbank_unittest.cc",
       "vad/vad_gmm_unittest.cc",
diff --git a/common_audio/channel_buffer.h b/common_audio/channel_buffer.h
index dc44369..f027080 100644
--- a/common_audio/channel_buffer.h
+++ b/common_audio/channel_buffer.h
@@ -14,7 +14,9 @@
 #include <string.h>
 
 #include <memory>
+#include <vector>
 
+#include "api/array_view.h"
 #include "common_audio/include/audio_util.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/gtest_prod_util.h"
@@ -48,40 +50,60 @@
         num_frames_per_band_(num_frames / num_bands),
         num_allocated_channels_(num_channels),
         num_channels_(num_channels),
-        num_bands_(num_bands) {
-    for (size_t i = 0; i < num_allocated_channels_; ++i) {
-      for (size_t j = 0; j < num_bands_; ++j) {
-        channels_[j * num_allocated_channels_ + i] =
-            &data_[i * num_frames_ + j * num_frames_per_band_];
-        bands_[i * num_bands_ + j] = channels_[j * num_allocated_channels_ + i];
+        num_bands_(num_bands),
+        bands_view_(num_allocated_channels_,
+                    std::vector<rtc::ArrayView<T>>(num_bands_)),
+        channels_view_(
+            num_bands_,
+            std::vector<rtc::ArrayView<T>>(num_allocated_channels_)) {
+    // Temporarily cast away const_ness to allow populating the array views.
+    auto* bands_view =
+        const_cast<std::vector<std::vector<rtc::ArrayView<T>>>*>(&bands_view_);
+    auto* channels_view =
+        const_cast<std::vector<std::vector<rtc::ArrayView<T>>>*>(
+            &channels_view_);
+
+    for (size_t ch = 0; ch < num_allocated_channels_; ++ch) {
+      for (size_t band = 0; band < num_bands_; ++band) {
+        (*channels_view)[band][ch] = rtc::ArrayView<T>(
+            &data_[ch * num_frames_ + band * num_frames_per_band_],
+            num_frames_per_band_);
+        (*bands_view)[ch][band] = channels_view_[band][ch];
+        channels_[band * num_allocated_channels_ + ch] =
+            channels_view_[band][ch].data();
+        bands_[ch * num_bands_ + band] =
+            channels_[band * num_allocated_channels_ + ch];
       }
     }
   }
 
-  // Returns a pointer array to the full-band channels (or lower band channels).
-  // Usage:
-  // channels()[channel][sample].
-  // Where:
-  // 0 <= channel < |num_allocated_channels_|
-  // 0 <= sample < |num_frames_|
-  T* const* channels() { return channels(0); }
-  const T* const* channels() const { return channels(0); }
-
-  // Returns a pointer array to the channels for a specific band.
-  // Usage:
-  // channels(band)[channel][sample].
+  // Returns a pointer array to the channels.
+  // If band is explicitly specificed, the channels for a specific band are
+  // returned and the usage becomes: channels(band)[channel][sample].
   // Where:
   // 0 <= band < |num_bands_|
   // 0 <= channel < |num_allocated_channels_|
   // 0 <= sample < |num_frames_per_band_|
-  const T* const* channels(size_t band) const {
+
+  // If band is not explicitly specified, the full-band channels (or lower band
+  // channels) are returned and the usage becomes: channels()[channel][sample].
+  // Where:
+  // 0 <= channel < |num_allocated_channels_|
+  // 0 <= sample < |num_frames_|
+  const T* const* channels(size_t band = 0) const {
     RTC_DCHECK_LT(band, num_bands_);
     return &channels_[band * num_allocated_channels_];
   }
-  T* const* channels(size_t band) {
+  T* const* channels(size_t band = 0) {
     const ChannelBuffer<T>* t = this;
     return const_cast<T* const*>(t->channels(band));
   }
+  rtc::ArrayView<const rtc::ArrayView<T>> channels_view(size_t band = 0) {
+    return channels_view_[band];
+  }
+  rtc::ArrayView<const rtc::ArrayView<T>> channels_view(size_t band = 0) const {
+    return channels_view_[band];
+  }
 
   // Returns a pointer array to the bands for a specific channel.
   // Usage:
@@ -100,6 +122,13 @@
     return const_cast<T* const*>(t->bands(channel));
   }
 
+  rtc::ArrayView<const rtc::ArrayView<T>> bands_view(size_t channel) {
+    return bands_view_[channel];
+  }
+  rtc::ArrayView<const rtc::ArrayView<T>> bands_view(size_t channel) const {
+    return bands_view_[channel];
+  }
+
   // Sets the |slice| pointers to the |start_frame| position for each channel.
   // Returns |slice| for convenience.
   const T* const* Slice(T** slice, size_t start_frame) const {
@@ -140,6 +169,8 @@
   // Number of channels the user sees.
   size_t num_channels_;
   const size_t num_bands_;
+  const std::vector<std::vector<rtc::ArrayView<T>>> bands_view_;
+  const std::vector<std::vector<rtc::ArrayView<T>>> channels_view_;
 };
 
 // One int16_t and one float ChannelBuffer that are kept in sync. The sync is
diff --git a/common_audio/sparse_fir_filter.cc b/common_audio/sparse_fir_filter.cc
deleted file mode 100644
index 772eb82..0000000
--- a/common_audio/sparse_fir_filter.cc
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "common_audio/sparse_fir_filter.h"
-
-#include "rtc_base/checks.h"
-
-namespace webrtc {
-
-SparseFIRFilter::SparseFIRFilter(const float* nonzero_coeffs,
-                                 size_t num_nonzero_coeffs,
-                                 size_t sparsity,
-                                 size_t offset)
-    : sparsity_(sparsity),
-      offset_(offset),
-      nonzero_coeffs_(nonzero_coeffs, nonzero_coeffs + num_nonzero_coeffs),
-      state_(sparsity_ * (num_nonzero_coeffs - 1) + offset_, 0.f) {
-  RTC_CHECK_GE(num_nonzero_coeffs, 1);
-  RTC_CHECK_GE(sparsity, 1);
-}
-
-SparseFIRFilter::~SparseFIRFilter() = default;
-
-void SparseFIRFilter::Filter(const float* in, size_t length, float* out) {
-  // Convolves the input signal |in| with the filter kernel |nonzero_coeffs_|
-  // taking into account the previous state.
-  for (size_t i = 0; i < length; ++i) {
-    out[i] = 0.f;
-    size_t j;
-    for (j = 0; i >= j * sparsity_ + offset_ && j < nonzero_coeffs_.size();
-         ++j) {
-      out[i] += in[i - j * sparsity_ - offset_] * nonzero_coeffs_[j];
-    }
-    for (; j < nonzero_coeffs_.size(); ++j) {
-      out[i] += state_[i + (nonzero_coeffs_.size() - j - 1) * sparsity_] *
-                nonzero_coeffs_[j];
-    }
-  }
-
-  // Update current state.
-  if (!state_.empty()) {
-    if (length >= state_.size()) {
-      std::memcpy(&state_[0], &in[length - state_.size()],
-                  state_.size() * sizeof(*in));
-    } else {
-      std::memmove(&state_[0], &state_[length],
-                   (state_.size() - length) * sizeof(state_[0]));
-      std::memcpy(&state_[state_.size() - length], in, length * sizeof(*in));
-    }
-  }
-}
-
-}  // namespace webrtc
diff --git a/common_audio/sparse_fir_filter.h b/common_audio/sparse_fir_filter.h
deleted file mode 100644
index 5197a8e..0000000
--- a/common_audio/sparse_fir_filter.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef COMMON_AUDIO_SPARSE_FIR_FILTER_H_
-#define COMMON_AUDIO_SPARSE_FIR_FILTER_H_
-
-#include <cstring>
-#include <vector>
-
-#include "rtc_base/constructor_magic.h"
-
-namespace webrtc {
-
-// A Finite Impulse Response filter implementation which takes advantage of a
-// sparse structure with uniformly distributed non-zero coefficients.
-class SparseFIRFilter final {
- public:
-  // |num_nonzero_coeffs| is the number of non-zero coefficients,
-  // |nonzero_coeffs|. They are assumed to be uniformly distributed every
-  // |sparsity| samples and with an initial |offset|. The rest of the filter
-  // coefficients will be assumed zeros. For example, with sparsity = 3, and
-  // offset = 1 the filter coefficients will be:
-  // B = [0 coeffs[0] 0 0 coeffs[1] 0 0 coeffs[2] ... ]
-  // All initial state values will be zeros.
-  SparseFIRFilter(const float* nonzero_coeffs,
-                  size_t num_nonzero_coeffs,
-                  size_t sparsity,
-                  size_t offset);
-  ~SparseFIRFilter();
-
-  // Filters the |in| data supplied.
-  // |out| must be previously allocated and it must be at least of |length|.
-  void Filter(const float* in, size_t length, float* out);
-
- private:
-  const size_t sparsity_;
-  const size_t offset_;
-  const std::vector<float> nonzero_coeffs_;
-  std::vector<float> state_;
-
-  RTC_DISALLOW_COPY_AND_ASSIGN(SparseFIRFilter);
-};
-
-}  // namespace webrtc
-
-#endif  // COMMON_AUDIO_SPARSE_FIR_FILTER_H_
diff --git a/common_audio/sparse_fir_filter_unittest.cc b/common_audio/sparse_fir_filter_unittest.cc
deleted file mode 100644
index 5dc7b6d..0000000
--- a/common_audio/sparse_fir_filter_unittest.cc
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "common_audio/sparse_fir_filter.h"
-
-#include <memory>
-
-#include "common_audio/fir_filter.h"
-#include "common_audio/fir_filter_factory.h"
-#include "rtc_base/arraysize.h"
-#include "test/gtest.h"
-
-namespace webrtc {
-namespace {
-
-static const float kCoeffs[] = {0.2f, 0.3f, 0.5f, 0.7f, 0.11f};
-static const float kInput[] = {1.f, 2.f, 3.f, 4.f, 5.f,
-                               6.f, 7.f, 8.f, 9.f, 10.f};
-
-template <size_t N>
-void VerifyOutput(const float (&expected_output)[N], const float (&output)[N]) {
-  EXPECT_EQ(0, memcmp(expected_output, output, sizeof(output)));
-}
-
-}  // namespace
-
-TEST(SparseFIRFilterTest, FilterAsIdentity) {
-  const float kCoeff = 1.f;
-  const size_t kNumCoeff = 1;
-  const size_t kSparsity = 3;
-  const size_t kOffset = 0;
-  float output[arraysize(kInput)];
-  SparseFIRFilter filter(&kCoeff, kNumCoeff, kSparsity, kOffset);
-  filter.Filter(kInput, arraysize(kInput), output);
-  VerifyOutput(kInput, output);
-}
-
-TEST(SparseFIRFilterTest, SameOutputForScalarCoefficientAndDifferentSparsity) {
-  const float kCoeff = 2.f;
-  const size_t kNumCoeff = 1;
-  const size_t kLowSparsity = 1;
-  const size_t kHighSparsity = 7;
-  const size_t kOffset = 0;
-  float low_sparsity_output[arraysize(kInput)];
-  float high_sparsity_output[arraysize(kInput)];
-  SparseFIRFilter low_sparsity_filter(&kCoeff, kNumCoeff, kLowSparsity,
-                                      kOffset);
-  SparseFIRFilter high_sparsity_filter(&kCoeff, kNumCoeff, kHighSparsity,
-                                       kOffset);
-  low_sparsity_filter.Filter(kInput, arraysize(kInput), low_sparsity_output);
-  high_sparsity_filter.Filter(kInput, arraysize(kInput), high_sparsity_output);
-  VerifyOutput(low_sparsity_output, high_sparsity_output);
-}
-
-TEST(SparseFIRFilterTest, FilterUsedAsScalarMultiplication) {
-  const float kCoeff = 5.f;
-  const size_t kNumCoeff = 1;
-  const size_t kSparsity = 5;
-  const size_t kOffset = 0;
-  float output[arraysize(kInput)];
-  SparseFIRFilter filter(&kCoeff, kNumCoeff, kSparsity, kOffset);
-  filter.Filter(kInput, arraysize(kInput), output);
-  EXPECT_FLOAT_EQ(5.f, output[0]);
-  EXPECT_FLOAT_EQ(20.f, output[3]);
-  EXPECT_FLOAT_EQ(25.f, output[4]);
-  EXPECT_FLOAT_EQ(50.f, output[arraysize(kInput) - 1]);
-}
-
-TEST(SparseFIRFilterTest, FilterUsedAsInputShifting) {
-  const float kCoeff = 1.f;
-  const size_t kNumCoeff = 1;
-  const size_t kSparsity = 1;
-  const size_t kOffset = 4;
-  float output[arraysize(kInput)];
-  SparseFIRFilter filter(&kCoeff, kNumCoeff, kSparsity, kOffset);
-  filter.Filter(kInput, arraysize(kInput), output);
-  EXPECT_FLOAT_EQ(0.f, output[0]);
-  EXPECT_FLOAT_EQ(0.f, output[3]);
-  EXPECT_FLOAT_EQ(1.f, output[4]);
-  EXPECT_FLOAT_EQ(2.f, output[5]);
-  EXPECT_FLOAT_EQ(6.f, output[arraysize(kInput) - 1]);
-}
-
-TEST(SparseFIRFilterTest, FilterUsedAsArbitraryWeighting) {
-  const size_t kSparsity = 2;
-  const size_t kOffset = 1;
-  float output[arraysize(kInput)];
-  SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset);
-  filter.Filter(kInput, arraysize(kInput), output);
-  EXPECT_FLOAT_EQ(0.f, output[0]);
-  EXPECT_FLOAT_EQ(0.9f, output[3]);
-  EXPECT_FLOAT_EQ(1.4f, output[4]);
-  EXPECT_FLOAT_EQ(2.4f, output[5]);
-  EXPECT_FLOAT_EQ(8.61f, output[arraysize(kInput) - 1]);
-}
-
-TEST(SparseFIRFilterTest, FilterInLengthLesserOrEqualToCoefficientsLength) {
-  const size_t kSparsity = 1;
-  const size_t kOffset = 0;
-  float output[arraysize(kInput)];
-  SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset);
-  filter.Filter(kInput, 2, output);
-  EXPECT_FLOAT_EQ(0.2f, output[0]);
-  EXPECT_FLOAT_EQ(0.7f, output[1]);
-}
-
-TEST(SparseFIRFilterTest, MultipleFilterCalls) {
-  const size_t kSparsity = 1;
-  const size_t kOffset = 0;
-  float output[arraysize(kInput)];
-  SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset);
-  filter.Filter(kInput, 2, output);
-  EXPECT_FLOAT_EQ(0.2f, output[0]);
-  EXPECT_FLOAT_EQ(0.7f, output[1]);
-  filter.Filter(kInput, 2, output);
-  EXPECT_FLOAT_EQ(1.3f, output[0]);
-  EXPECT_FLOAT_EQ(2.4f, output[1]);
-  filter.Filter(kInput, 2, output);
-  EXPECT_FLOAT_EQ(2.81f, output[0]);
-  EXPECT_FLOAT_EQ(2.62f, output[1]);
-  filter.Filter(kInput, 2, output);
-  EXPECT_FLOAT_EQ(2.81f, output[0]);
-  EXPECT_FLOAT_EQ(2.62f, output[1]);
-  filter.Filter(&kInput[3], 3, output);
-  EXPECT_FLOAT_EQ(3.41f, output[0]);
-  EXPECT_FLOAT_EQ(4.12f, output[1]);
-  EXPECT_FLOAT_EQ(6.21f, output[2]);
-  filter.Filter(&kInput[3], 3, output);
-  EXPECT_FLOAT_EQ(8.12f, output[0]);
-  EXPECT_FLOAT_EQ(9.14f, output[1]);
-  EXPECT_FLOAT_EQ(9.45f, output[2]);
-}
-
-TEST(SparseFIRFilterTest, VerifySampleBasedVsBlockBasedFiltering) {
-  const size_t kSparsity = 3;
-  const size_t kOffset = 1;
-  float output_block_based[arraysize(kInput)];
-  SparseFIRFilter filter_block(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset);
-  filter_block.Filter(kInput, arraysize(kInput), output_block_based);
-  float output_sample_based[arraysize(kInput)];
-  SparseFIRFilter filter_sample(kCoeffs, arraysize(kCoeffs), kSparsity,
-                                kOffset);
-  for (size_t i = 0; i < arraysize(kInput); ++i)
-    filter_sample.Filter(&kInput[i], 1, &output_sample_based[i]);
-  VerifyOutput(output_block_based, output_sample_based);
-}
-
-TEST(SparseFIRFilterTest, SimpleHighPassFilter) {
-  const size_t kSparsity = 2;
-  const size_t kOffset = 2;
-  const float kHPCoeffs[] = {1.f, -1.f};
-  const float kConstantInput[] = {1.f, 1.f, 1.f, 1.f, 1.f,
-                                  1.f, 1.f, 1.f, 1.f, 1.f};
-  float output[arraysize(kConstantInput)];
-  SparseFIRFilter filter(kHPCoeffs, arraysize(kHPCoeffs), kSparsity, kOffset);
-  filter.Filter(kConstantInput, arraysize(kConstantInput), output);
-  EXPECT_FLOAT_EQ(0.f, output[0]);
-  EXPECT_FLOAT_EQ(0.f, output[1]);
-  EXPECT_FLOAT_EQ(1.f, output[2]);
-  EXPECT_FLOAT_EQ(1.f, output[3]);
-  for (size_t i = kSparsity + kOffset; i < arraysize(kConstantInput); ++i)
-    EXPECT_FLOAT_EQ(0.f, output[i]);
-}
-
-TEST(SparseFIRFilterTest, SimpleLowPassFilter) {
-  const size_t kSparsity = 2;
-  const size_t kOffset = 2;
-  const float kLPCoeffs[] = {1.f, 1.f};
-  const float kHighFrequencyInput[] = {1.f, 1.f,  -1.f, -1.f, 1.f,
-                                       1.f, -1.f, -1.f, 1.f,  1.f};
-  float output[arraysize(kHighFrequencyInput)];
-  SparseFIRFilter filter(kLPCoeffs, arraysize(kLPCoeffs), kSparsity, kOffset);
-  filter.Filter(kHighFrequencyInput, arraysize(kHighFrequencyInput), output);
-  EXPECT_FLOAT_EQ(0.f, output[0]);
-  EXPECT_FLOAT_EQ(0.f, output[1]);
-  EXPECT_FLOAT_EQ(1.f, output[2]);
-  EXPECT_FLOAT_EQ(1.f, output[3]);
-  for (size_t i = kSparsity + kOffset; i < arraysize(kHighFrequencyInput); ++i)
-    EXPECT_FLOAT_EQ(0.f, output[i]);
-}
-
-TEST(SparseFIRFilterTest, SameOutputWhenSwappedCoefficientsAndInput) {
-  const size_t kSparsity = 1;
-  const size_t kOffset = 0;
-  float output[arraysize(kCoeffs)];
-  float output_swapped[arraysize(kCoeffs)];
-  SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset);
-  // Use arraysize(kCoeffs) for in_length to get same-length outputs.
-  filter.Filter(kInput, arraysize(kCoeffs), output);
-  SparseFIRFilter filter_swapped(kInput, arraysize(kCoeffs), kSparsity,
-                                 kOffset);
-  filter_swapped.Filter(kCoeffs, arraysize(kCoeffs), output_swapped);
-  VerifyOutput(output, output_swapped);
-}
-
-TEST(SparseFIRFilterTest, SameOutputAsFIRFilterWhenSparsityOneAndOffsetZero) {
-  const size_t kSparsity = 1;
-  const size_t kOffset = 0;
-  float output[arraysize(kInput)];
-  float sparse_output[arraysize(kInput)];
-  std::unique_ptr<FIRFilter> filter(
-      CreateFirFilter(kCoeffs, arraysize(kCoeffs), arraysize(kInput)));
-  SparseFIRFilter sparse_filter(kCoeffs, arraysize(kCoeffs), kSparsity,
-                                kOffset);
-  filter->Filter(kInput, arraysize(kInput), output);
-  sparse_filter.Filter(kInput, arraysize(kInput), sparse_output);
-  for (size_t i = 0; i < arraysize(kInput); ++i) {
-    EXPECT_FLOAT_EQ(output[i], sparse_output[i]);
-  }
-}
-
-}  // namespace webrtc