NetEq fix for repeated audio issue.

This CL implements a fix behind a field trial for a NetEq issue. NetEq restarts audio too quickly after a buffer underrun, which can quickly lead to another underrun in some circumstances. The fix changes NetEq's behavior to wait with restarting playback until sufficient audio is buffered.

Bug: webrtc:9289
Change-Id: I5968c9478ce8d84caf77f00b8d0a39156b47fc8d
Reviewed-on: https://webrtc-review.googlesource.com/77423
Reviewed-by: Minyue Li <minyue@webrtc.org>
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org>
Commit-Queue: Ivo Creusen <ivoc@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#23347}
diff --git a/modules/audio_coding/codecs/opus/audio_decoder_opus.cc b/modules/audio_coding/codecs/opus/audio_decoder_opus.cc
index 3d10b6f..c784a68 100644
--- a/modules/audio_coding/codecs/opus/audio_decoder_opus.cc
+++ b/modules/audio_coding/codecs/opus/audio_decoder_opus.cc
@@ -36,6 +36,8 @@
     return (ret < 0) ? 0 : static_cast<size_t>(ret);
   }
 
+  bool IsDtxPacket() const override { return payload_.size() <= 2; }
+
   rtc::Optional<DecodeResult> Decode(
       rtc::ArrayView<int16_t> decoded) const override {
     AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech;
diff --git a/modules/audio_coding/neteq/decision_logic.cc b/modules/audio_coding/neteq/decision_logic.cc
index 6ab2716..279a9e6 100644
--- a/modules/audio_coding/neteq/decision_logic.cc
+++ b/modules/audio_coding/neteq/decision_logic.cc
@@ -130,9 +130,9 @@
 
   FilterBufferLevel(cur_size_samples, prev_mode);
 
-  return GetDecisionSpecialized(sync_buffer, expand, decoder_frame_length,
-                                next_packet, prev_mode, play_dtmf,
-                                reset_decoder, generated_noise_samples);
+  return GetDecisionSpecialized(
+      sync_buffer, expand, decoder_frame_length, next_packet, prev_mode,
+      play_dtmf, reset_decoder, generated_noise_samples, cur_size_samples);
 }
 
 void DecisionLogic::ExpandDecision(Operations operation) {
diff --git a/modules/audio_coding/neteq/decision_logic.h b/modules/audio_coding/neteq/decision_logic.h
index 5b67196..9d88c4d 100644
--- a/modules/audio_coding/neteq/decision_logic.h
+++ b/modules/audio_coding/neteq/decision_logic.h
@@ -137,7 +137,8 @@
                                             Modes prev_mode,
                                             bool play_dtmf,
                                             bool* reset_decoder,
-                                            size_t generated_noise_samples) = 0;
+                                            size_t generated_noise_samples,
+                                            size_t cur_size_samples) = 0;
 
   // Updates the |buffer_level_filter_| with the current buffer level
   // |buffer_size_packets|.
diff --git a/modules/audio_coding/neteq/decision_logic_fax.cc b/modules/audio_coding/neteq/decision_logic_fax.cc
index cc21ee9..22d36ce 100644
--- a/modules/audio_coding/neteq/decision_logic_fax.cc
+++ b/modules/audio_coding/neteq/decision_logic_fax.cc
@@ -27,7 +27,8 @@
     Modes prev_mode,
     bool play_dtmf,
     bool* reset_decoder,
-    size_t generated_noise_samples) {
+    size_t generated_noise_samples,
+    size_t /*cur_size_samples*/) {
   assert(playout_mode_ == kPlayoutFax || playout_mode_ == kPlayoutOff);
   uint32_t target_timestamp = sync_buffer.end_timestamp();
   uint32_t available_timestamp = 0;
diff --git a/modules/audio_coding/neteq/decision_logic_fax.h b/modules/audio_coding/neteq/decision_logic_fax.h
index cefd8e4..1436f99 100644
--- a/modules/audio_coding/neteq/decision_logic_fax.h
+++ b/modules/audio_coding/neteq/decision_logic_fax.h
@@ -47,7 +47,8 @@
                                     Modes prev_mode,
                                     bool play_dtmf,
                                     bool* reset_decoder,
-                                    size_t generated_noise_samples) override;
+                                    size_t generated_noise_samples,
+                                    size_t cur_size_samples) override;
 
  private:
   RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicFax);
diff --git a/modules/audio_coding/neteq/decision_logic_normal.cc b/modules/audio_coding/neteq/decision_logic_normal.cc
index 1429bb7..c163999 100644
--- a/modules/audio_coding/neteq/decision_logic_normal.cc
+++ b/modules/audio_coding/neteq/decision_logic_normal.cc
@@ -13,6 +13,7 @@
 #include <assert.h>
 
 #include <algorithm>
+#include <limits>
 
 #include "modules/audio_coding/neteq/buffer_level_filter.h"
 #include "modules/audio_coding/neteq/decoder_database.h"
@@ -31,7 +32,8 @@
     Modes prev_mode,
     bool play_dtmf,
     bool* reset_decoder,
-    size_t generated_noise_samples) {
+    size_t generated_noise_samples,
+    size_t cur_size_samples) {
   assert(playout_mode_ == kPlayoutOn || playout_mode_ == kPlayoutStreaming);
   // Guard for errors, to avoid getting stuck in error mode.
   if (prev_mode == kModeError) {
@@ -68,6 +70,21 @@
     return kNormal;
   }
 
+  // Make sure we don't restart audio too soon after an expansion to avoid
+  // running out of data right away again. We should only wait if there are no
+  // DTX or CNG packets in the buffer (otherwise we should just play out what we
+  // have, since we cannot know the exact duration of DTX or CNG packets), and
+  // if the mute factor is low enough (otherwise the expansion was short enough
+  // to not be noticable).
+  // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
+  if (postpone_decoding_after_expand_ && prev_mode == kModeExpand &&
+      !packet_buffer_.ContainsDtxOrCngPacket(decoder_database_) &&
+      cur_size_samples < static_cast<size_t>(delay_manager_->TargetLevel() *
+                                             packet_length_samples_) >> 8 &&
+      expand.MuteFactor(0) < 16384 / 2) {
+    return kExpand;
+  }
+
   const uint32_t five_seconds_samples =
       static_cast<uint32_t>(5 * 8000 * fs_mult_);
   // Check if the required packet is available.
diff --git a/modules/audio_coding/neteq/decision_logic_normal.h b/modules/audio_coding/neteq/decision_logic_normal.h
index 366d103..a718f99 100644
--- a/modules/audio_coding/neteq/decision_logic_normal.h
+++ b/modules/audio_coding/neteq/decision_logic_normal.h
@@ -13,6 +13,7 @@
 
 #include "modules/audio_coding/neteq/decision_logic.h"
 #include "rtc_base/constructormagic.h"
+#include "system_wrappers/include/field_trial.h"
 #include "typedefs.h"  // NOLINT(build/include)
 
 namespace webrtc {
@@ -37,7 +38,9 @@
                       packet_buffer,
                       delay_manager,
                       buffer_level_filter,
-                      tick_timer) {}
+                      tick_timer),
+        postpone_decoding_after_expand_(field_trial::IsEnabled(
+            "WebRTC-Audio-NetEqPostponeDecodingAfterExpand")) {}
 
  protected:
   static const int kReinitAfterExpands = 100;
@@ -50,7 +53,8 @@
                                     Modes prev_mode,
                                     bool play_dtmf,
                                     bool* reset_decoder,
-                                    size_t generated_noise_samples) override;
+                                    size_t generated_noise_samples,
+                                    size_t cur_size_samples) override;
 
   // Returns the operation to do given that the expected packet is not
   // available, but a packet further into the future is at hand.
@@ -100,6 +104,8 @@
   // Checks if num_consecutive_expands_ >= kMaxWaitForPacket.
   bool MaxWaitForPacket() const;
 
+  const bool postpone_decoding_after_expand_;
+
   RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicNormal);
 };
 
diff --git a/modules/audio_coding/neteq/expand.h b/modules/audio_coding/neteq/expand.h
index 39249f1..4060bd7 100644
--- a/modules/audio_coding/neteq/expand.h
+++ b/modules/audio_coding/neteq/expand.h
@@ -57,7 +57,7 @@
   virtual void SetParametersForMergeAfterExpand();
 
   // Returns the mute factor for |channel|.
-  int16_t MuteFactor(size_t channel) {
+  int16_t MuteFactor(size_t channel) const {
     assert(channel < num_channels_);
     return channel_parameters_[channel].mute_factor;
   }
diff --git a/modules/audio_coding/neteq/packet_buffer.cc b/modules/audio_coding/neteq/packet_buffer.cc
index dfffebd..9752ec6 100644
--- a/modules/audio_coding/neteq/packet_buffer.cc
+++ b/modules/audio_coding/neteq/packet_buffer.cc
@@ -285,6 +285,18 @@
   return num_samples;
 }
 
+bool PacketBuffer::ContainsDtxOrCngPacket(
+    const DecoderDatabase* decoder_database) const {
+  RTC_DCHECK(decoder_database);
+  for (const Packet& packet : buffer_) {
+    if ((packet.frame && packet.frame->IsDtxPacket()) ||
+        decoder_database->IsComfortNoise(packet.payload_type)) {
+      return true;
+    }
+  }
+  return false;
+}
+
 void PacketBuffer::BufferStat(int* num_packets, int* max_num_packets) const {
   *num_packets = static_cast<int>(buffer_.size());
   *max_num_packets = static_cast<int>(max_number_of_packets_);
diff --git a/modules/audio_coding/neteq/packet_buffer.h b/modules/audio_coding/neteq/packet_buffer.h
index c83bf89..c646626 100644
--- a/modules/audio_coding/neteq/packet_buffer.h
+++ b/modules/audio_coding/neteq/packet_buffer.h
@@ -12,6 +12,7 @@
 #define MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_
 
 #include "api/optional.h"
+#include "modules/audio_coding/neteq/decoder_database.h"
 #include "modules/audio_coding/neteq/packet.h"
 #include "modules/include/module_common_types.h"
 #include "rtc_base/constructormagic.h"
@@ -121,6 +122,10 @@
   // duplicate and redundant packets.
   virtual size_t NumSamplesInBuffer(size_t last_decoded_length) const;
 
+  // Returns true if the packet buffer contains any DTX or CNG packets.
+  virtual bool ContainsDtxOrCngPacket(
+      const DecoderDatabase* decoder_database) const;
+
   virtual void BufferStat(int* num_packets, int* max_num_packets) const;
 
   // Static method returning true if |timestamp| is older than |timestamp_limit|