NetEq: Move counting of generated CNG samples from DecisionLogic

The counting is moved to NetEqImpl, and the new counter is realized as a
Stopwatch object. The DecisionLogic class still has to maintain record
of when the CNG period is shortened, in order to reduce the delay. This
is recorded in a new noise_fast_forward_ member in DecisionLogic.

BUG=webrtc:5608

Review-Url: https://codereview.webrtc.org/1914303004
Cr-Commit-Position: refs/heads/master@{#12608}
diff --git a/webrtc/modules/audio_coding/neteq/decision_logic.cc b/webrtc/modules/audio_coding/neteq/decision_logic.cc
index b702e6d..8cef2c9 100644
--- a/webrtc/modules/audio_coding/neteq/decision_logic.cc
+++ b/webrtc/modules/audio_coding/neteq/decision_logic.cc
@@ -67,7 +67,6 @@
       delay_manager_(delay_manager),
       buffer_level_filter_(buffer_level_filter),
       cng_state_(kCngOff),
-      generated_noise_samples_(0),
       packet_length_samples_(0),
       sample_memory_(0),
       prev_time_scale_(false),
@@ -80,7 +79,7 @@
 
 void DecisionLogic::Reset() {
   cng_state_ = kCngOff;
-  generated_noise_samples_ = 0;
+  noise_fast_forward_ = 0;
   packet_length_samples_ = 0;
   sample_memory_ = 0;
   prev_time_scale_ = false;
@@ -107,15 +106,15 @@
                                       size_t decoder_frame_length,
                                       const RTPHeader* packet_header,
                                       Modes prev_mode,
-                                      bool play_dtmf, bool* reset_decoder) {
+                                      bool play_dtmf,
+                                      size_t generated_noise_samples,
+                                      bool* reset_decoder) {
   if (prev_mode == kModeRfc3389Cng ||
       prev_mode == kModeCodecInternalCng ||
       prev_mode == kModeExpand) {
     // If last mode was CNG (or Expand, since this could be covering up for
-    // a lost CNG packet), increase the |generated_noise_samples_| counter.
-    generated_noise_samples_ += output_size_samples_;
-    // Remember that CNG is on. This is needed if comfort noise is interrupted
-    // by DTMF.
+    // a lost CNG packet), remember that CNG is on. This is needed if comfort
+    // noise is interrupted by DTMF.
     if (prev_mode == kModeRfc3389Cng) {
       cng_state_ = kCngRfc3389On;
     } else if (prev_mode == kModeCodecInternalCng) {
@@ -139,7 +138,7 @@
 
   return GetDecisionSpecialized(sync_buffer, expand, decoder_frame_length,
                                 packet_header, prev_mode, play_dtmf,
-                                reset_decoder);
+                                reset_decoder, generated_noise_samples);
 }
 
 void DecisionLogic::ExpandDecision(Operations operation) {
diff --git a/webrtc/modules/audio_coding/neteq/decision_logic.h b/webrtc/modules/audio_coding/neteq/decision_logic.h
index 72121b7..18f424a 100644
--- a/webrtc/modules/audio_coding/neteq/decision_logic.h
+++ b/webrtc/modules/audio_coding/neteq/decision_logic.h
@@ -79,6 +79,7 @@
                          const RTPHeader* packet_header,
                          Modes prev_mode,
                          bool play_dtmf,
+                         size_t generated_noise_samples,
                          bool* reset_decoder);
 
   // These methods test the |cng_state_| for different conditions.
@@ -101,10 +102,7 @@
 
   // Accessors and mutators.
   void set_sample_memory(int32_t value) { sample_memory_ = value; }
-  size_t generated_noise_samples() const { return generated_noise_samples_; }
-  void set_generated_noise_samples(size_t value) {
-    generated_noise_samples_ = value;
-  }
+  size_t noise_fast_forward() const { return noise_fast_forward_; }
   size_t packet_length_samples() const { return packet_length_samples_; }
   void set_packet_length_samples(size_t value) {
     packet_length_samples_ = value;
@@ -138,7 +136,8 @@
                                             const RTPHeader* packet_header,
                                             Modes prev_mode,
                                             bool play_dtmf,
-                                            bool* reset_decoder) = 0;
+                                            bool* reset_decoder,
+                                            size_t generated_noise_samples) = 0;
 
   // Updates the |buffer_level_filter_| with the current buffer level
   // |buffer_size_packets|.
@@ -152,7 +151,7 @@
   size_t output_size_samples_;
   CngState cng_state_;  // Remember if comfort noise is interrupted by other
                         // event (e.g., DTMF).
-  size_t generated_noise_samples_;
+  size_t noise_fast_forward_ = 0;
   size_t packet_length_samples_;
   int sample_memory_;
   bool prev_time_scale_;
diff --git a/webrtc/modules/audio_coding/neteq/decision_logic_fax.cc b/webrtc/modules/audio_coding/neteq/decision_logic_fax.cc
index ddea644..aace402 100644
--- a/webrtc/modules/audio_coding/neteq/decision_logic_fax.cc
+++ b/webrtc/modules/audio_coding/neteq/decision_logic_fax.cc
@@ -26,7 +26,8 @@
     const RTPHeader* packet_header,
     Modes prev_mode,
     bool play_dtmf,
-    bool* reset_decoder) {
+    bool* reset_decoder,
+    size_t generated_noise_samples) {
   assert(playout_mode_ == kPlayoutFax || playout_mode_ == kPlayoutOff);
   uint32_t target_timestamp = sync_buffer.end_timestamp();
   uint32_t available_timestamp = 0;
@@ -37,7 +38,7 @@
         decoder_database_->IsComfortNoise(packet_header->payloadType);
   }
   if (is_cng_packet) {
-    if (static_cast<int32_t>((generated_noise_samples_ + target_timestamp)
+    if (static_cast<int32_t>((generated_noise_samples + target_timestamp)
         - available_timestamp) >= 0) {
       // Time to play this packet now.
       return kRfc3389Cng;
@@ -70,13 +71,13 @@
   } else if (target_timestamp == available_timestamp) {
     return kNormal;
   } else {
-    if (static_cast<int32_t>((generated_noise_samples_ + target_timestamp)
+    if (static_cast<int32_t>((generated_noise_samples + target_timestamp)
         - available_timestamp) >= 0) {
       return kNormal;
     } else {
       // If currently playing comfort noise, continue with that. Do not
-      // increase the timestamp counter since generated_noise_samples_ will
-      // be increased.
+      // increase the timestamp counter since generated_noise_stopwatch_ in
+      // NetEqImpl will take care of the time-keeping.
       if (cng_state_ == kCngRfc3389On) {
         return kRfc3389CngNoPacket;
       } else if (cng_state_ == kCngInternalOn) {
diff --git a/webrtc/modules/audio_coding/neteq/decision_logic_fax.h b/webrtc/modules/audio_coding/neteq/decision_logic_fax.h
index 204dcc1..fbb0cc0 100644
--- a/webrtc/modules/audio_coding/neteq/decision_logic_fax.h
+++ b/webrtc/modules/audio_coding/neteq/decision_logic_fax.h
@@ -50,7 +50,8 @@
                                     const RTPHeader* packet_header,
                                     Modes prev_mode,
                                     bool play_dtmf,
-                                    bool* reset_decoder) override;
+                                    bool* reset_decoder,
+                                    size_t generated_noise_samples) override;
 
  private:
   RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicFax);
diff --git a/webrtc/modules/audio_coding/neteq/decision_logic_normal.cc b/webrtc/modules/audio_coding/neteq/decision_logic_normal.cc
index 0252d1c..37a75d7 100644
--- a/webrtc/modules/audio_coding/neteq/decision_logic_normal.cc
+++ b/webrtc/modules/audio_coding/neteq/decision_logic_normal.cc
@@ -31,7 +31,8 @@
     const RTPHeader* packet_header,
     Modes prev_mode,
     bool play_dtmf,
-    bool* reset_decoder) {
+    bool* reset_decoder,
+    size_t generated_noise_samples) {
   assert(playout_mode_ == kPlayoutOn || playout_mode_ == kPlayoutStreaming);
   // Guard for errors, to avoid getting stuck in error mode.
   if (prev_mode == kModeError) {
@@ -52,7 +53,8 @@
   }
 
   if (is_cng_packet) {
-    return CngOperation(prev_mode, target_timestamp, available_timestamp);
+    return CngOperation(prev_mode, target_timestamp, available_timestamp,
+                        generated_noise_samples);
   }
 
   // Handle the case with no packet at all available (except maybe DTMF).
@@ -76,7 +78,8 @@
                  available_timestamp, target_timestamp, five_seconds_samples)) {
     return FuturePacketAvailable(sync_buffer, expand, decoder_frame_length,
                                  prev_mode, target_timestamp,
-                                 available_timestamp, play_dtmf);
+                                 available_timestamp, play_dtmf,
+                                 generated_noise_samples);
   } else {
     // This implies that available_timestamp < target_timestamp, which can
     // happen when a new stream or codec is received. Signal for a reset.
@@ -86,10 +89,11 @@
 
 Operations DecisionLogicNormal::CngOperation(Modes prev_mode,
                                              uint32_t target_timestamp,
-                                             uint32_t available_timestamp) {
+                                             uint32_t available_timestamp,
+                                             size_t generated_noise_samples) {
   // Signed difference between target and available timestamp.
   int32_t timestamp_diff = static_cast<int32_t>(
-      static_cast<uint32_t>(generated_noise_samples_ + target_timestamp) -
+      static_cast<uint32_t>(generated_noise_samples + target_timestamp) -
       available_timestamp);
   int32_t optimal_level_samp = static_cast<int32_t>(
       (delay_manager_->TargetLevel() * packet_length_samples_) >> 8);
@@ -97,9 +101,9 @@
 
   if (excess_waiting_time_samp > optimal_level_samp / 2) {
     // The waiting time for this packet will be longer than 1.5
-    // times the wanted buffer delay. Advance the clock to cut
+    // times the wanted buffer delay. Apply fast-forward to cut the
     // waiting time down to the optimal.
-    generated_noise_samples_ += excess_waiting_time_samp;
+    noise_fast_forward_ += excess_waiting_time_samp;
     timestamp_diff += excess_waiting_time_samp;
   }
 
@@ -109,6 +113,7 @@
     return kRfc3389CngNoPacket;
   } else {
     // Otherwise, go for the CNG packet now.
+    noise_fast_forward_ = 0;
     return kRfc3389Cng;
   }
 }
@@ -153,7 +158,8 @@
     Modes prev_mode,
     uint32_t target_timestamp,
     uint32_t available_timestamp,
-    bool play_dtmf) {
+    bool play_dtmf,
+    size_t generated_noise_samples) {
   // Required packet is not available, but a future packet is.
   // Check if we should continue with an ongoing expand because the new packet
   // is too far into the future.
@@ -184,7 +190,7 @@
     // safety precaution), but make sure that the number of samples in buffer
     // is no higher than 4 times the optimal level. (Note that TargetLevel()
     // is in Q8.)
-    if (static_cast<uint32_t>(generated_noise_samples_ + target_timestamp) >=
+    if (static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
             available_timestamp ||
         cur_size_samples >
             ((delay_manager_->TargetLevel() * packet_length_samples_) >> 8) *
diff --git a/webrtc/modules/audio_coding/neteq/decision_logic_normal.h b/webrtc/modules/audio_coding/neteq/decision_logic_normal.h
index 7465906..57aa13b 100644
--- a/webrtc/modules/audio_coding/neteq/decision_logic_normal.h
+++ b/webrtc/modules/audio_coding/neteq/decision_logic_normal.h
@@ -54,7 +54,8 @@
                                     const RTPHeader* packet_header,
                                     Modes prev_mode,
                                     bool play_dtmf,
-                                    bool* reset_decoder) override;
+                                    bool* reset_decoder,
+                                    size_t generated_noise_samples) override;
 
   // Returns the operation to do given that the expected packet is not
   // available, but a packet further into the future is at hand.
@@ -65,7 +66,8 @@
       Modes prev_mode,
       uint32_t target_timestamp,
       uint32_t available_timestamp,
-      bool play_dtmf);
+      bool play_dtmf,
+      size_t generated_noise_samples);
 
   // Returns the operation to do given that the expected packet is available.
   virtual Operations ExpectedPacketAvailable(Modes prev_mode, bool play_dtmf);
@@ -77,8 +79,10 @@
  private:
   // Returns the operation given that the next available packet is a comfort
   // noise payload (RFC 3389 only, not codec-internal).
-  Operations CngOperation(Modes prev_mode, uint32_t target_timestamp,
-                          uint32_t available_timestamp);
+  Operations CngOperation(Modes prev_mode,
+                          uint32_t target_timestamp,
+                          uint32_t available_timestamp,
+                          size_t generated_noise_samples);
 
   // Checks if enough time has elapsed since the last successful timescale
   // operation was done (i.e., accelerate or preemptive expand).
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
index b4ba650..6d705e1 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
@@ -833,6 +833,11 @@
   vad_->Update(decoded_buffer_.get(), static_cast<size_t>(length), speech_type,
                sid_frame_available, fs_hz_);
 
+  if (sid_frame_available || speech_type == AudioDecoder::kComfortNoise) {
+    // Start a new stopwatch since we are decoding a new CNG packet.
+    generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch();
+  }
+
   algorithm_buffer_->Clear();
   switch (operation) {
     case kNormal: {
@@ -1006,6 +1011,12 @@
           : timestamp_scaler_->ToExternal(playout_timestamp_) -
                 static_cast<uint32_t>(audio_frame->samples_per_channel_);
 
+  if (!(last_mode_ == kModeRfc3389Cng ||
+      last_mode_ == kModeCodecInternalCng ||
+      last_mode_ == kModeExpand)) {
+    generated_noise_stopwatch_.reset();
+  }
+
   if (decode_return_value) return decode_return_value;
   return return_value;
 }
@@ -1029,14 +1040,22 @@
   }
   const RTPHeader* header = packet_buffer_->NextRtpHeader();
 
+  RTC_DCHECK(!generated_noise_stopwatch_ ||
+             generated_noise_stopwatch_->ElapsedTicks() >= 1);
+  uint64_t generated_noise_samples =
+      generated_noise_stopwatch_
+          ? (generated_noise_stopwatch_->ElapsedTicks() - 1) *
+                    output_size_samples_ +
+                decision_logic_->noise_fast_forward()
+          : 0;
+
   if (decision_logic_->CngRfc3389On() || last_mode_ == kModeRfc3389Cng) {
     // Because of timestamp peculiarities, we have to "manually" disallow using
     // a CNG packet with the same timestamp as the one that was last played.
     // This can happen when using redundancy and will cause the timing to shift.
     while (header && decoder_database_->IsComfortNoise(header->payloadType) &&
            (end_timestamp >= header->timestamp ||
-            end_timestamp + decision_logic_->generated_noise_samples() >
-                header->timestamp)) {
+            end_timestamp + generated_noise_samples > header->timestamp)) {
       // Don't use this packet, discard it.
       if (packet_buffer_->DiscardNextPacket() != PacketBuffer::kOK) {
         assert(false);  // Must be ok by design.
@@ -1064,7 +1083,7 @@
   // Check if it is time to play a DTMF event.
   if (dtmf_buffer_->GetEvent(
       static_cast<uint32_t>(
-          end_timestamp + decision_logic_->generated_noise_samples()),
+          end_timestamp + generated_noise_samples),
       dtmf_event)) {
     *play_dtmf = true;
   }
@@ -1072,13 +1091,14 @@
   // Get instruction.
   assert(sync_buffer_.get());
   assert(expand_.get());
-  *operation = decision_logic_->GetDecision(*sync_buffer_,
-                                            *expand_,
-                                            decoder_frame_length_,
-                                            header,
-                                            last_mode_,
-                                            *play_dtmf,
-                                            &reset_decoder_);
+  generated_noise_samples =
+      generated_noise_stopwatch_
+          ? generated_noise_stopwatch_->ElapsedTicks() * output_size_samples_ +
+                decision_logic_->noise_fast_forward()
+          : 0;
+  *operation = decision_logic_->GetDecision(
+      *sync_buffer_, *expand_, decoder_frame_length_, header, last_mode_,
+      *play_dtmf, generated_noise_samples, &reset_decoder_);
 
   // Check if we already have enough samples in the |sync_buffer_|. If so,
   // change decision to normal, unless the decision was merge, accelerate, or
@@ -1151,15 +1171,19 @@
       // TODO(hlundin): Write test for this.
       // Update timestamp.
       timestamp_ = end_timestamp;
-      if (decision_logic_->generated_noise_samples() > 0 &&
-          last_mode_ != kModeDtmf) {
+      const uint64_t generated_noise_samples =
+          generated_noise_stopwatch_
+              ? generated_noise_stopwatch_->ElapsedTicks() *
+                        output_size_samples_ +
+                    decision_logic_->noise_fast_forward()
+              : 0;
+      if (generated_noise_samples > 0 && last_mode_ != kModeDtmf) {
         // Make a jump in timestamp due to the recently played comfort noise.
         uint32_t timestamp_jump =
-            static_cast<uint32_t>(decision_logic_->generated_noise_samples());
+            static_cast<uint32_t>(generated_noise_samples);
         sync_buffer_->IncreaseEndTimestamp(timestamp_jump);
         timestamp_ += timestamp_jump;
       }
-      decision_logic_->set_generated_noise_samples(0);
       return 0;
     }
     case kAccelerate:
@@ -1242,9 +1266,6 @@
       // We are about to decode and use a non-CNG packet.
       decision_logic_->SetCngOff();
     }
-    // Reset CNG timestamp as a new packet will be delivered.
-    // (Also if this is a CNG packet, since playedOutTS is updated.)
-    decision_logic_->set_generated_noise_samples(0);
 
     extracted_samples = ExtractPackets(required_samples, packet_list);
     if (extracted_samples < 0) {
@@ -1577,6 +1598,12 @@
   if (!play_dtmf) {
     dtmf_tone_generator_->Reset();
   }
+
+  if (!generated_noise_stopwatch_) {
+    // Start a new stopwatch since we may be covering for a lost CNG packet.
+    generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch();
+  }
+
   return 0;
 }
 
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.h b/webrtc/modules/audio_coding/neteq/neteq_impl.h
index a707f25..2203de1 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.h
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.h
@@ -405,6 +405,8 @@
   bool nack_enabled_ GUARDED_BY(crit_sect_);
   AudioFrame::VADActivity last_vad_activity_ GUARDED_BY(crit_sect_) =
       AudioFrame::kVadPassive;
+  std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_
+      GUARDED_BY(crit_sect_);
 
  private:
   RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);