Corrections of the render buffering scheme in AEC3 to ensure causality

This CL modifies the refactored render buffering scheme in AEC3
so that:
-A non-causal state can never occur which means that situations with
 nonrecoverable echo should not occur.
-For a stable audio pipeline with a predefined API call jitter,
 render overruns and underruns can never occur.

Bug: webrtc:8629,chromium:793305
Change-Id: I06ba1c368f92db95274090b08475dd02dbb85145
Reviewed-on: https://webrtc-review.googlesource.com/29861
Commit-Queue: Per Åhgren <peah@webrtc.org>
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#21215}
diff --git a/modules/audio_processing/aec3/render_delay_buffer.cc b/modules/audio_processing/aec3/render_delay_buffer.cc
index 9640131..4acc784 100644
--- a/modules/audio_processing/aec3/render_delay_buffer.cc
+++ b/modules/audio_processing/aec3/render_delay_buffer.cc
@@ -37,51 +37,109 @@
 
   void Reset() override;
   BufferingEvent Insert(const std::vector<std::vector<float>>& block) override;
-  BufferingEvent PrepareCaptureCall() override;
-  void SetDelay(size_t delay) override;
-  size_t Delay() const override { return delay_; }
+  BufferingEvent PrepareCaptureProcessing() override;
+  bool SetDelay(size_t delay) override;
+  rtc::Optional<size_t> Delay() const override { return delay_; }
   size_t MaxDelay() const override {
     return blocks_.buffer.size() - 1 - kBufferHeadroom;
   }
-  size_t MaxApiJitter() const override { return max_api_jitter_; }
-  const RenderBuffer& GetRenderBuffer() const override {
-    return echo_remover_buffer_;
-  }
+  RenderBuffer* GetRenderBuffer() override { return &echo_remover_buffer_; }
 
   const DownsampledRenderBuffer& GetDownsampledRenderBuffer() const override {
     return low_rate_;
   }
 
+  bool CausalDelay() const override;
+
  private:
   static int instance_count_;
   std::unique_ptr<ApmDataDumper> data_dumper_;
   const Aec3Optimization optimization_;
-  const size_t api_call_jitter_blocks_;
-  const size_t min_echo_path_delay_blocks_;
+  const EchoCanceller3Config config_;
   const int sub_block_size_;
   MatrixBuffer blocks_;
   VectorBuffer spectra_;
   FftBuffer ffts_;
-  size_t delay_;
-  int max_api_jitter_ = 0;
-  int render_surplus_ = 0;
-  bool first_reset_occurred_ = false;
+  rtc::Optional<size_t> delay_;
+  rtc::Optional<int> internal_delay_;
   RenderBuffer echo_remover_buffer_;
   DownsampledRenderBuffer low_rate_;
   Decimator render_decimator_;
   const std::vector<std::vector<float>> zero_block_;
   const Aec3Fft fft_;
-  size_t capture_call_counter_ = 0;
   std::vector<float> render_ds_;
-  int render_calls_in_a_row_ = 0;
 
-  void UpdateBuffersWithLatestBlock(size_t previous_write);
-  void IncreaseRead();
-  void IncreaseInsert();
+  int LowRateBufferOffset() const { return DelayEstimatorOffset(config_) >> 1; }
+  int MaxExternalDelayToInternalDelay(size_t delay) const;
+  void ApplyDelay(int delay);
+  void InsertBlock(const std::vector<std::vector<float>>& block,
+                   int previous_write);
 
   RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(RenderDelayBufferImpl);
 };
 
+// Increases the write indices for the render buffers.
+void IncreaseWriteIndices(int sub_block_size,
+                          MatrixBuffer* blocks,
+                          VectorBuffer* spectra,
+                          FftBuffer* ffts,
+                          DownsampledRenderBuffer* low_rate) {
+  low_rate->UpdateWriteIndex(-sub_block_size);
+  blocks->IncWriteIndex();
+  spectra->DecWriteIndex();
+  ffts->DecWriteIndex();
+}
+
+// Increases the read indices for the render buffers.
+void IncreaseReadIndices(const rtc::Optional<int>& delay,
+                         int sub_block_size,
+                         MatrixBuffer* blocks,
+                         VectorBuffer* spectra,
+                         FftBuffer* ffts,
+                         DownsampledRenderBuffer* low_rate) {
+  RTC_DCHECK_NE(low_rate->read, low_rate->write);
+  low_rate->UpdateReadIndex(-sub_block_size);
+
+  if (blocks->read != blocks->write) {
+    blocks->IncReadIndex();
+    spectra->DecReadIndex();
+    ffts->DecReadIndex();
+  } else {
+    // Only allow underrun for blocks_ when the delay is not set.
+    RTC_DCHECK(!delay);
+  }
+}
+
+// Checks for a render buffer overrun.
+bool RenderOverrun(const MatrixBuffer& b, const DownsampledRenderBuffer& l) {
+  return l.read == l.write || b.read == b.write;
+}
+
+// Checks for a render buffer underrun. If the delay is not specified, only the
+// low rate buffer underrun is counted as the delay offset for the other buffers
+// is unknown.
+bool RenderUnderrun(const rtc::Optional<int>& delay,
+                    const MatrixBuffer& b,
+                    const DownsampledRenderBuffer& l) {
+  return l.read == l.write || (delay && b.read == b.write);
+}
+
+// Computes the latency in the buffer (the number of unread elements).
+int BufferLatency(const DownsampledRenderBuffer& l) {
+  return (l.buffer.size() + l.read - l.write) % l.buffer.size();
+}
+
+// Computes the mismatch between the number of render and capture calls based on
+// the known offset (achieved during reset) of the low rate buffer.
+bool ApiCallSkew(const DownsampledRenderBuffer& low_rate_buffer,
+                 int sub_block_size,
+                 int low_rate_buffer_offset_sub_blocks) {
+  int latency = BufferLatency(low_rate_buffer);
+  int skew = abs(low_rate_buffer_offset_sub_blocks * sub_block_size - latency);
+  int skew_limit = low_rate_buffer_offset_sub_blocks * sub_block_size;
+  return skew >= skew_limit;
+}
+
 int RenderDelayBufferImpl::instance_count_ = 0;
 
 RenderDelayBufferImpl::RenderDelayBufferImpl(const EchoCanceller3Config& config,
@@ -89,8 +147,7 @@
     : data_dumper_(
           new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
       optimization_(DetectOptimization()),
-      api_call_jitter_blocks_(config.delay.api_call_jitter_blocks),
-      min_echo_path_delay_blocks_(config.delay.min_echo_path_delay_blocks),
+      config_(config),
       sub_block_size_(
           static_cast<int>(config.delay.down_sampling_factor > 0
                                ? kBlockSize / config.delay.down_sampling_factor
@@ -101,7 +158,6 @@
               kBlockSize),
       spectra_(blocks_.buffer.size(), kFftLengthBy2Plus1),
       ffts_(blocks_.buffer.size()),
-      delay_(min_echo_path_delay_blocks_),
       echo_remover_buffer_(kAdaptiveFilterLength, &blocks_, &spectra_, &ffts_),
       low_rate_(GetDownSampledBufferSize(config.delay.down_sampling_factor,
                                          config.delay.num_filters)),
@@ -111,127 +167,153 @@
       render_ds_(sub_block_size_, 0.f) {
   RTC_DCHECK_EQ(blocks_.buffer.size(), ffts_.buffer.size());
   RTC_DCHECK_EQ(spectra_.buffer.size(), ffts_.buffer.size());
+
+  // Necessary condition to avoid unrecoverable echp due to noncausal alignment.
+  RTC_DCHECK_EQ(DelayEstimatorOffset(config_), LowRateBufferOffset() * 2);
   Reset();
-  first_reset_occurred_ = false;
 }
 
 RenderDelayBufferImpl::~RenderDelayBufferImpl() = default;
 
+// Resets the buffer delays and clears the reported delays.
 void RenderDelayBufferImpl::Reset() {
-  delay_ = min_echo_path_delay_blocks_;
-  const int offset1 = std::max<int>(
-      std::min(api_call_jitter_blocks_, min_echo_path_delay_blocks_), 1);
-  const int offset2 = static_cast<int>(delay_ + offset1);
-  const int offset3 = offset1 * sub_block_size_;
-  low_rate_.read = low_rate_.OffsetIndex(low_rate_.write, offset3);
-  blocks_.read = blocks_.OffsetIndex(blocks_.write, -offset2);
-  spectra_.read = spectra_.OffsetIndex(spectra_.write, offset2);
-  ffts_.read = ffts_.OffsetIndex(ffts_.write, offset2);
-  render_surplus_ = 0;
-  first_reset_occurred_ = true;
+  // Pre-fill the low rate buffer (which is used for delay estimation) to add
+  // headroom for the allowed api call jitter.
+  low_rate_.read = low_rate_.OffsetIndex(
+      low_rate_.write, LowRateBufferOffset() * sub_block_size_);
+
+  // Set the render buffer delays to the default delay.
+  ApplyDelay(config_.delay.default_delay);
+
+  // Unset the delays which are set by ApplyConfig.
+  delay_ = rtc::nullopt;
+  internal_delay_ = rtc::nullopt;
 }
 
+// Inserts a new block into the render buffers.
 RenderDelayBuffer::BufferingEvent RenderDelayBufferImpl::Insert(
     const std::vector<std::vector<float>>& block) {
-  RTC_DCHECK_EQ(block.size(), blocks_.buffer[0].size());
-  RTC_DCHECK_EQ(block[0].size(), blocks_.buffer[0][0].size());
-  BufferingEvent event = BufferingEvent::kNone;
+  // Increase the write indices to where the new blocks should be written.
+  const int previous_write = blocks_.write;
+  IncreaseWriteIndices(sub_block_size_, &blocks_, &spectra_, &ffts_,
+                       &low_rate_);
 
-  ++render_surplus_;
-  if (first_reset_occurred_) {
-    ++render_calls_in_a_row_;
-    max_api_jitter_ = std::max(max_api_jitter_, render_calls_in_a_row_);
+  // Allow overrun and do a reset when render overrun occurrs due to more render
+  // data being inserted than capture data is received.
+  BufferingEvent event = RenderOverrun(blocks_, low_rate_)
+                             ? event = BufferingEvent::kRenderOverrun
+                             : BufferingEvent::kNone;
+
+  // Insert the new render block into the specified position.
+  InsertBlock(block, previous_write);
+
+  if (event != BufferingEvent::kNone) {
+    Reset();
   }
 
-  const size_t previous_write = blocks_.write;
-  IncreaseInsert();
-
-  if (low_rate_.read == low_rate_.write || blocks_.read == blocks_.write) {
-    // Render overrun due to more render data being inserted than read. Discard
-    // the oldest render data.
-    event = BufferingEvent::kRenderOverrun;
-    IncreaseRead();
-  }
-
-  for (size_t k = 0; k < block.size(); ++k) {
-    std::copy(block[k].begin(), block[k].end(),
-              blocks_.buffer[blocks_.write][k].begin());
-  }
-
-  UpdateBuffersWithLatestBlock(previous_write);
   return event;
 }
 
-RenderDelayBuffer::BufferingEvent RenderDelayBufferImpl::PrepareCaptureCall() {
+// Prepares the render buffers for processing another capture block.
+RenderDelayBuffer::BufferingEvent
+RenderDelayBufferImpl::PrepareCaptureProcessing() {
   BufferingEvent event = BufferingEvent::kNone;
-  render_calls_in_a_row_ = 0;
 
-  if (low_rate_.read == low_rate_.write || blocks_.read == blocks_.write) {
+  if (RenderUnderrun(internal_delay_, blocks_, low_rate_)) {
+    // Don't increase the read indices if there is a render underrun.
     event = BufferingEvent::kRenderUnderrun;
   } else {
-    IncreaseRead();
-  }
-  --render_surplus_;
+    // Increase the read indices in the render buffers to point to the most
+    // recent block to use in the capture processing.
+    IncreaseReadIndices(internal_delay_, sub_block_size_, &blocks_, &spectra_,
+                        &ffts_, &low_rate_);
 
-  echo_remover_buffer_.UpdateSpectralSum();
-
-  if (render_surplus_ >= static_cast<int>(api_call_jitter_blocks_)) {
-    event = BufferingEvent::kApiCallSkew;
-    RTC_LOG(LS_WARNING) << "Api call skew detected at " << capture_call_counter_
-                        << ".";
+    // Check for skew in the API calls which, if too large, causes the delay
+    // estimation to be noncausal. Doing this check after the render indice
+    // increase saves one unit of allowed skew. Note that the skew check only
+    // should need to be one-sided as one of the skew directions results in an
+    // underrun.
+    bool skew = ApiCallSkew(low_rate_, sub_block_size_, LowRateBufferOffset());
+    event = skew ? BufferingEvent::kApiCallSkew : BufferingEvent::kNone;
   }
 
-  ++capture_call_counter_;
+  if (event != BufferingEvent::kNone) {
+    Reset();
+  }
+
   return event;
 }
 
-void RenderDelayBufferImpl::SetDelay(size_t delay) {
-  if (delay_ == delay) {
-    return;
+// Sets the delay and returns a bool indicating whether the delay was changed.
+bool RenderDelayBufferImpl::SetDelay(size_t delay) {
+  if (delay_ && *delay_ == delay) {
+    return false;
   }
-
-  const int delta_delay = static_cast<int>(delay_) - static_cast<int>(delay);
   delay_ = delay;
-  if (delay_ > MaxDelay()) {
-    delay_ = std::min(MaxDelay(), delay);
-    RTC_NOTREACHED();
-  }
 
-  // Recompute the read indices according to the set delay.
-  blocks_.UpdateReadIndex(delta_delay);
-  spectra_.UpdateReadIndex(-delta_delay);
-  ffts_.UpdateReadIndex(-delta_delay);
+  // Compute the internal delay and limit the delay to the allowed range.
+  int internal_delay = MaxExternalDelayToInternalDelay(*delay_);
+  internal_delay_ =
+      std::min(MaxDelay(), static_cast<size_t>(std::max(internal_delay, 0)));
+
+  // Apply the delay to the buffers.
+  ApplyDelay(*internal_delay_);
+  return true;
 }
 
-void RenderDelayBufferImpl::UpdateBuffersWithLatestBlock(
-    size_t previous_write) {
-  render_decimator_.Decimate(blocks_.buffer[blocks_.write][0], render_ds_);
-  std::copy(render_ds_.rbegin(), render_ds_.rend(),
-            low_rate_.buffer.begin() + low_rate_.write);
+// Returns whether the specified delay is causal.
+bool RenderDelayBufferImpl::CausalDelay() const {
+  return !internal_delay_ ||
+         *internal_delay_ >=
+             static_cast<int>(config_.delay.min_echo_path_delay_blocks);
+}
 
-  fft_.PaddedFft(blocks_.buffer[blocks_.write][0],
-                 blocks_.buffer[previous_write][0], &ffts_.buffer[ffts_.write]);
+// Maps the externally computed delay to the delay used internally.
+int RenderDelayBufferImpl::MaxExternalDelayToInternalDelay(
+    size_t external_delay_blocks) const {
+  const int latency = BufferLatency(low_rate_);
+  RTC_DCHECK_LT(0, sub_block_size_);
+  RTC_DCHECK_EQ(0, latency % sub_block_size_);
+  int latency_blocks = latency / sub_block_size_;
+  return latency_blocks + static_cast<int>(external_delay_blocks) -
+         DelayEstimatorOffset(config_);
+}
 
-  ffts_.buffer[ffts_.write].Spectrum(optimization_,
-                                     spectra_.buffer[spectra_.write]);
-};
+// Set the read indices according to the delay.
+void RenderDelayBufferImpl::ApplyDelay(int delay) {
+  blocks_.read = blocks_.OffsetIndex(blocks_.write, -delay);
+  spectra_.read = spectra_.OffsetIndex(spectra_.write, delay);
+  ffts_.read = ffts_.OffsetIndex(ffts_.write, delay);
+}
 
-void RenderDelayBufferImpl::IncreaseRead() {
-  low_rate_.UpdateReadIndex(-sub_block_size_);
-  blocks_.IncReadIndex();
-  spectra_.DecReadIndex();
-  ffts_.DecReadIndex();
-};
+// Inserts a block into the render buffers.
+void RenderDelayBufferImpl::InsertBlock(
+    const std::vector<std::vector<float>>& block,
+    int previous_write) {
+  auto& b = blocks_;
+  auto& lr = low_rate_;
+  auto& ds = render_ds_;
+  auto& f = ffts_;
+  auto& s = spectra_;
+  RTC_DCHECK_EQ(block.size(), b.buffer[b.write].size());
+  for (size_t k = 0; k < block.size(); ++k) {
+    RTC_DCHECK_EQ(block[k].size(), b.buffer[b.write][k].size());
+    std::copy(block[k].begin(), block[k].end(), b.buffer[b.write][k].begin());
+  }
 
-void RenderDelayBufferImpl::IncreaseInsert() {
-  low_rate_.UpdateWriteIndex(-sub_block_size_);
-  blocks_.IncWriteIndex();
-  spectra_.DecWriteIndex();
-  ffts_.DecWriteIndex();
-};
+  render_decimator_.Decimate(block[0], ds);
+  std::copy(ds.rbegin(), ds.rend(), lr.buffer.begin() + lr.write);
+  fft_.PaddedFft(block[0], b.buffer[previous_write][0], &f.buffer[f.write]);
+  f.buffer[f.write].Spectrum(optimization_, s.buffer[s.write]);
+}
 
 }  // namespace
 
+int RenderDelayBuffer::RenderDelayBuffer::DelayEstimatorOffset(
+    const EchoCanceller3Config& config) {
+  return config.delay.api_call_jitter_blocks * 2;
+}
+
 RenderDelayBuffer* RenderDelayBuffer::Create(const EchoCanceller3Config& config,
                                              size_t num_bands) {
   return new RenderDelayBufferImpl(config, num_bands);