NetEq: Flush and reset if the speech and cng sample rates mismatch

If a CNG packet is received first, followed by a speech packet with
another sample rate, NetEq should treat this as a change of codec, flush
out the CNG packet and reset the sample rate to that of the speech
packet.

BUG=webrtc:5447
NOTRY=True

Review-Url: https://codereview.webrtc.org/2307493002
Cr-Commit-Position: refs/heads/master@{#14032}
diff --git a/webrtc/modules/audio_coding/neteq/packet_buffer.cc b/webrtc/modules/audio_coding/neteq/packet_buffer.cc
index 0e512b6..ba469a6 100644
--- a/webrtc/modules/audio_coding/neteq/packet_buffer.cc
+++ b/webrtc/modules/audio_coding/neteq/packet_buffer.cc
@@ -22,7 +22,7 @@
 #include "webrtc/modules/audio_coding/neteq/tick_timer.h"
 
 namespace webrtc {
-
+namespace {
 // Predicate used when inserting packets in the buffer list.
 // Operator() returns true when |packet| goes before |new_packet|.
 class NewTimestampIsLarger {
@@ -38,6 +38,17 @@
   const Packet* new_packet_;
 };
 
+// Returns true if both payload types are known to the decoder database, and
+// have the same sample rate.
+bool EqualSampleRates(uint8_t pt1,
+                      uint8_t pt2,
+                      const DecoderDatabase& decoder_database) {
+  auto di1 = decoder_database.GetDecoderInfo(pt1);
+  auto di2 = decoder_database.GetDecoderInfo(pt2);
+  return di1 && di2 && di1->SampleRateHz() == di2->SampleRateHz();
+}
+}  // namespace
+
 PacketBuffer::PacketBuffer(size_t max_number_of_packets,
                            const TickTimer* tick_timer)
     : max_number_of_packets_(max_number_of_packets), tick_timer_(tick_timer) {}
@@ -126,8 +137,12 @@
           rtc::Optional<uint8_t>(packet->header.payloadType);
     } else if (!decoder_database.IsDtmf(packet->header.payloadType)) {
       // This must be speech.
-      if (*current_rtp_payload_type &&
-          **current_rtp_payload_type != packet->header.payloadType) {
+      if ((*current_rtp_payload_type &&
+           **current_rtp_payload_type != packet->header.payloadType) ||
+          (*current_cng_rtp_payload_type &&
+           !EqualSampleRates(packet->header.payloadType,
+                             **current_cng_rtp_payload_type,
+                             decoder_database))) {
         *current_cng_rtp_payload_type = rtc::Optional<uint8_t>();
         Flush();
         flushed = true;
diff --git a/webrtc/modules/audio_coding/neteq/packet_buffer_unittest.cc b/webrtc/modules/audio_coding/neteq/packet_buffer_unittest.cc
index 4566943..1b86d83 100644
--- a/webrtc/modules/audio_coding/neteq/packet_buffer_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/packet_buffer_unittest.cc
@@ -384,6 +384,67 @@
   EXPECT_CALL(decoder_database, Die());  // Called when object is deleted.
 }
 
+// The test first inserts a packet with narrow-band CNG, then a packet with
+// wide-band speech. The expected behavior of the packet buffer is to detect a
+// change in sample rate, even though no speech packet has been inserted before,
+// and flush out the CNG packet.
+TEST(PacketBuffer, CngFirstThenSpeechWithNewSampleRate) {
+  TickTimer tick_timer;
+  PacketBuffer buffer(10, &tick_timer);  // 10 packets.
+  const uint8_t kCngPt = 13;
+  const int kPayloadLen = 10;
+  const uint8_t kSpeechPt = 100;
+
+  MockDecoderDatabase decoder_database;
+  auto factory = CreateBuiltinAudioDecoderFactory();
+  const DecoderDatabase::DecoderInfo info_cng(NetEqDecoder::kDecoderCNGnb, "",
+                                              factory);
+  EXPECT_CALL(decoder_database, GetDecoderInfo(kCngPt))
+      .WillRepeatedly(Return(&info_cng));
+  const DecoderDatabase::DecoderInfo info_speech(NetEqDecoder::kDecoderPCM16Bwb,
+                                                 "", factory);
+  EXPECT_CALL(decoder_database, GetDecoderInfo(kSpeechPt))
+      .WillRepeatedly(Return(&info_speech));
+
+  // Insert first packet, which is narrow-band CNG.
+  PacketGenerator gen(0, 0, kCngPt, 10);
+  PacketList list;
+  list.push_back(gen.NextPacket(kPayloadLen));
+  rtc::Optional<uint8_t> current_pt;
+  rtc::Optional<uint8_t> current_cng_pt;
+  EXPECT_EQ(PacketBuffer::kOK,
+            buffer.InsertPacketList(&list, decoder_database, &current_pt,
+                                    &current_cng_pt));
+  EXPECT_TRUE(list.empty());
+  EXPECT_EQ(1u, buffer.NumPacketsInBuffer());
+  ASSERT_TRUE(buffer.NextRtpHeader());
+  EXPECT_EQ(kCngPt, buffer.NextRtpHeader()->payloadType);
+  EXPECT_FALSE(current_pt);  // Current payload type not set.
+  EXPECT_EQ(rtc::Optional<uint8_t>(kCngPt),
+            current_cng_pt);  // CNG payload type set.
+
+  // Insert second packet, which is wide-band speech.
+  Packet* packet = gen.NextPacket(kPayloadLen);
+  packet->header.payloadType = kSpeechPt;
+  list.push_back(packet);
+  // Expect the buffer to flush out the CNG packet, since it does not match the
+  // new speech sample rate.
+  EXPECT_EQ(PacketBuffer::kFlushed,
+            buffer.InsertPacketList(&list, decoder_database, &current_pt,
+                                    &current_cng_pt));
+  EXPECT_TRUE(list.empty());
+  EXPECT_EQ(1u, buffer.NumPacketsInBuffer());
+  ASSERT_TRUE(buffer.NextRtpHeader());
+  EXPECT_EQ(kSpeechPt, buffer.NextRtpHeader()->payloadType);
+
+  EXPECT_EQ(rtc::Optional<uint8_t>(kSpeechPt),
+            current_pt);         // Current payload type set.
+  EXPECT_FALSE(current_cng_pt);  // CNG payload type reset.
+
+  buffer.Flush();                        // Clean up.
+  EXPECT_CALL(decoder_database, Die());  // Called when object is deleted.
+}
+
 TEST(PacketBuffer, Failures) {
   const uint16_t start_seq_no = 17;
   const uint32_t start_ts = 4711;