Only create H264 frames if there are no gaps in the packet sequence number.

In the case of H264 we can't know which packet that is the fist packet of a
frame. In order to avoid creating incomplete frames we keep track of which
packets that we haven't received, and if there is a gap in the packet sequence
number leading up to this frame then a frame wont be created.

BUG=chromium:716558

Review-Url: https://codereview.webrtc.org/2926083002
Cr-Commit-Position: refs/heads/master@{#18559}
diff --git a/webrtc/modules/video_coding/packet_buffer.cc b/webrtc/modules/video_coding/packet_buffer.cc
index 6c3cfac..1031042 100644
--- a/webrtc/modules/video_coding/packet_buffer.cc
+++ b/webrtc/modules/video_coding/packet_buffer.cc
@@ -17,6 +17,7 @@
 #include "webrtc/base/atomicops.h"
 #include "webrtc/base/checks.h"
 #include "webrtc/base/logging.h"
+#include "webrtc/common_video/h264/h264_common.h"
 #include "webrtc/modules/video_coding/frame_object.h"
 #include "webrtc/system_wrappers/include/clock.h"
 
@@ -108,6 +109,8 @@
     data_buffer_[index] = *packet;
     packet->dataPtr = nullptr;
 
+    UpdateMissingPackets(packet->seqNum);
+
     int64_t now_ms = clock_->TimeInMilliseconds();
     last_received_packet_ms_ = rtc::Optional<int64_t>(now_ms);
     if (packet->frameType == kVideoFrameKey)
@@ -137,6 +140,9 @@
     sequence_buffer_[index].used = false;
     ++first_seq_num_;
   }
+
+  missing_packets_.erase(missing_packets_.begin(),
+                         missing_packets_.upper_bound(seq_num));
 }
 
 void PacketBuffer::Clear() {
@@ -149,8 +155,22 @@
 
   first_packet_received_ = false;
   is_cleared_to_first_seq_num_ = false;
-  last_received_packet_ms_ = rtc::Optional<int64_t>();
-  last_received_keyframe_packet_ms_ = rtc::Optional<int64_t>();
+  last_received_packet_ms_.reset();
+  last_received_keyframe_packet_ms_.reset();
+  newest_inserted_seq_num_.reset();
+  missing_packets_.clear();
+}
+
+void PacketBuffer::PaddingReceived(uint16_t seq_num) {
+  std::vector<std::unique_ptr<RtpFrameObject>> found_frames;
+  {
+    rtc::CritScope lock(&crit_);
+    UpdateMissingPackets(seq_num);
+    found_frames = FindFrames(static_cast<uint16_t>(seq_num + 1));
+  }
+
+  for (std::unique_ptr<RtpFrameObject>& frame : found_frames)
+    received_frame_callback_->OnReceivedFrame(std::move(frame));
 }
 
 rtc::Optional<int64_t> PacketBuffer::LastReceivedPacketMs() const {
@@ -194,6 +214,8 @@
 
   if (!sequence_buffer_[index].used)
     return false;
+  if (sequence_buffer_[index].seq_num != seq_num)
+    return false;
   if (sequence_buffer_[index].frame_created)
     return false;
   if (sequence_buffer_[index].frame_begin)
@@ -231,6 +253,7 @@
       int start_index = index;
 
       bool is_h264 = data_buffer_[start_index].codec == kVideoCodecH264;
+      bool is_h264_keyframe = false;
       int64_t frame_timestamp = data_buffer_[start_index].timestamp;
 
       // Since packet at |data_buffer_[index]| is already part of the frame
@@ -244,6 +267,17 @@
         if (!is_h264 && sequence_buffer_[start_index].frame_begin)
           break;
 
+        if (is_h264 && !is_h264_keyframe) {
+          const RTPVideoHeaderH264& header =
+              data_buffer_[start_index].video_header.codecHeader.H264;
+          for (size_t i = 0; i < header.nalus_length; ++i) {
+            if (header.nalus[i].type == H264::NaluType::kIdr) {
+              is_h264_keyframe = true;
+              break;
+            }
+          }
+        }
+
         start_index = start_index > 0 ? start_index - 1 : size_ - 1;
 
         // In the case of H264 we don't have a frame_begin bit (yes,
@@ -261,6 +295,23 @@
         --start_seq_num;
       }
 
+      // If this is H264 but not a keyframe, make sure there are no gaps in the
+      // packet sequence numbers up until this point.
+      if (is_h264 && !is_h264_keyframe &&
+          missing_packets_.upper_bound(start_seq_num) !=
+              missing_packets_.begin()) {
+        uint16_t stop_index = (index + 1) % size_;
+        while (start_index != stop_index) {
+          sequence_buffer_[start_index].frame_created = false;
+          start_index = (start_index + 1) % size_;
+        }
+
+        return found_frames;
+      }
+
+      missing_packets_.erase(missing_packets_.begin(),
+                             missing_packets_.upper_bound(seq_num));
+
       found_frames.emplace_back(
           new RtpFrameObject(this, start_seq_num, seq_num, frame_size,
                              max_nack_count, clock_->TimeInMilliseconds()));
@@ -331,5 +382,30 @@
   return count;
 }
 
+void PacketBuffer::UpdateMissingPackets(uint16_t seq_num) {
+  if (!newest_inserted_seq_num_)
+    newest_inserted_seq_num_ = rtc::Optional<uint16_t>(seq_num);
+
+  const int kMaxPaddingAge = 1000;
+  if (AheadOf(seq_num, *newest_inserted_seq_num_)) {
+    uint16_t old_seq_num = seq_num - kMaxPaddingAge;
+    auto erase_to = missing_packets_.lower_bound(old_seq_num);
+    missing_packets_.erase(missing_packets_.begin(), erase_to);
+
+    // Guard against inserting a large amount of missing packets if there is a
+    // jump in the sequence number.
+    if (AheadOf(old_seq_num, *newest_inserted_seq_num_))
+      *newest_inserted_seq_num_ = old_seq_num;
+
+    ++*newest_inserted_seq_num_;
+    while (AheadOf(seq_num, *newest_inserted_seq_num_)) {
+      missing_packets_.insert(*newest_inserted_seq_num_);
+      ++*newest_inserted_seq_num_;
+    }
+  } else {
+    missing_packets_.erase(seq_num);
+  }
+}
+
 }  // namespace video_coding
 }  // namespace webrtc
diff --git a/webrtc/modules/video_coding/packet_buffer.h b/webrtc/modules/video_coding/packet_buffer.h
index 3d9eb9c..d5011b6 100644
--- a/webrtc/modules/video_coding/packet_buffer.h
+++ b/webrtc/modules/video_coding/packet_buffer.h
@@ -11,8 +11,9 @@
 #ifndef WEBRTC_MODULES_VIDEO_CODING_PACKET_BUFFER_H_
 #define WEBRTC_MODULES_VIDEO_CODING_PACKET_BUFFER_H_
 
-#include <vector>
 #include <memory>
+#include <set>
+#include <vector>
 
 #include "webrtc/base/criticalsection.h"
 #include "webrtc/base/scoped_ref_ptr.h"
@@ -54,6 +55,7 @@
   virtual bool InsertPacket(VCMPacket* packet);
   void ClearTo(uint16_t seq_num);
   void Clear();
+  void PaddingReceived(uint16_t seq_num);
 
   // Timestamp (not RTP timestamp) of the last received packet/keyframe packet.
   rtc::Optional<int64_t> LastReceivedPacketMs() const;
@@ -121,6 +123,8 @@
   // Virtual for testing.
   virtual void ReturnFrame(RtpFrameObject* frame);
 
+  void UpdateMissingPackets(uint16_t seq_num) EXCLUSIVE_LOCKS_REQUIRED(crit_);
+
   rtc::CriticalSection crit_;
 
   // Buffer size_ and max_size_ must always be a power of two.
@@ -150,6 +154,10 @@
   rtc::Optional<int64_t> last_received_packet_ms_ GUARDED_BY(crit_);
   rtc::Optional<int64_t> last_received_keyframe_packet_ms_ GUARDED_BY(crit_);
 
+  rtc::Optional<uint16_t> newest_inserted_seq_num_ GUARDED_BY(crit_);
+  std::set<uint16_t, DescendingSeqNumComp<uint16_t>> missing_packets_
+      GUARDED_BY(crit_);
+
   mutable volatile int ref_count_ = 0;
 };
 
diff --git a/webrtc/modules/video_coding/video_packet_buffer_unittest.cc b/webrtc/modules/video_coding/video_packet_buffer_unittest.cc
index c5d1fce..300a2a6 100644
--- a/webrtc/modules/video_coding/video_packet_buffer_unittest.cc
+++ b/webrtc/modules/video_coding/video_packet_buffer_unittest.cc
@@ -14,6 +14,7 @@
 #include <utility>
 
 #include "webrtc/base/random.h"
+#include "webrtc/common_video/h264/h264_common.h"
 #include "webrtc/modules/video_coding/frame_object.h"
 #include "webrtc/modules/video_coding/packet_buffer.h"
 #include "webrtc/system_wrappers/include/clock.h"
@@ -41,6 +42,7 @@
                     << first_seq_num << ".";
       return;
     }
+
     frames_from_callback_.insert(
         std::make_pair(frame->first_seq_num(), std::move(frame)));
   }
@@ -68,6 +70,27 @@
     return packet_buffer_->InsertPacket(&packet);
   }
 
+  bool InsertH264(uint16_t seq_num,           // packet sequence number
+                  IsKeyFrame keyframe,        // is keyframe
+                  IsFirst first,              // is first packet of frame
+                  IsLast last,                // is last packet of frame
+                  uint32_t timestamp,         // rtp timestamp
+                  int data_size = 0,          // size of data
+                  uint8_t* data = nullptr) {  // data pointer
+    VCMPacket packet;
+    packet.codec = kVideoCodecH264;
+    packet.seqNum = seq_num;
+    packet.timestamp = timestamp;
+    packet.video_header.codecHeader.H264.nalus[0].type = H264::NaluType::kIdr;
+    packet.video_header.codecHeader.H264.nalus_length = keyframe == kKeyFrame;
+    packet.is_first_packet_in_frame = first == kFirst;
+    packet.markerBit = last == kLast;
+    packet.sizeBytes = data_size;
+    packet.dataPtr = data;
+
+    return packet_buffer_->InsertPacket(&packet);
+  }
+
   void CheckFrame(uint16_t first_seq_num) {
     auto frame_it = frames_from_callback_.find(first_seq_num);
     ASSERT_FALSE(frame_it == frames_from_callback_.end())
@@ -366,6 +389,8 @@
       new uint8_t[sizeof(data_data) + EncodedImage::kBufferPaddingBytesH264]);
 
   VCMPacket packet;
+  packet.video_header.codecHeader.H264.nalus_length = 1;
+  packet.video_header.codecHeader.H264.nalus[0].type = H264::NaluType::kIdr;
   packet.seqNum = seq_num;
   packet.codec = kVideoCodecH264;
   packet.insertStartCode = true;
@@ -478,49 +503,6 @@
   EXPECT_EQ(0UL, frames_from_callback_.size());
 }
 
-TEST_F(TestPacketBuffer, OneH264FrameFillBuffer) {
-  VCMPacket packet;
-  packet.seqNum = 0;
-  packet.codec = kVideoCodecH264;
-  packet.dataPtr = nullptr;
-  packet.sizeBytes = 0;
-  packet.is_first_packet_in_frame = true;
-  packet.markerBit = false;
-  packet_buffer_->InsertPacket(&packet);
-
-  packet.is_first_packet_in_frame = false;
-  for (int i = 1; i < kStartSize - 1; ++i) {
-    packet.seqNum = i;
-    packet_buffer_->InsertPacket(&packet);
-  }
-
-  packet.seqNum = kStartSize - 1;
-  packet.markerBit = true;
-  packet_buffer_->InsertPacket(&packet);
-
-  EXPECT_EQ(1UL, frames_from_callback_.size());
-  CheckFrame(0);
-}
-
-TEST_F(TestPacketBuffer, OneH264FrameMaxSeqNum) {
-  VCMPacket packet;
-  packet.seqNum = 65534;
-  packet.codec = kVideoCodecH264;
-  packet.dataPtr = nullptr;
-  packet.sizeBytes = 0;
-  packet.is_first_packet_in_frame = true;
-  packet.markerBit = false;
-  packet_buffer_->InsertPacket(&packet);
-
-  packet.is_first_packet_in_frame = false;
-  packet.seqNum = 65535;
-  packet.markerBit = true;
-  packet_buffer_->InsertPacket(&packet);
-
-  EXPECT_EQ(1UL, frames_from_callback_.size());
-  CheckFrame(65534);
-}
-
 TEST_F(TestPacketBuffer, PacketTimestamps) {
   rtc::Optional<int64_t> packet_ms;
   rtc::Optional<int64_t> packet_keyframe_ms;
@@ -556,5 +538,51 @@
   EXPECT_FALSE(packet_keyframe_ms);
 }
 
+TEST_F(TestPacketBuffer, OneFrameFillBufferH264) {
+  InsertH264(0, kKeyFrame, kFirst, kNotLast, 1000);
+  for (int i = 1; i < kStartSize - 1; ++i)
+    InsertH264(i, kKeyFrame, kNotFirst, kNotLast, 1000);
+  InsertH264(kStartSize - 1, kKeyFrame, kNotFirst, kLast, 1000);
+
+  EXPECT_EQ(1UL, frames_from_callback_.size());
+  CheckFrame(0);
+}
+
+TEST_F(TestPacketBuffer, OneFrameMaxSeqNumH264) {
+  InsertH264(65534, kKeyFrame, kFirst, kNotLast, 1000);
+  InsertH264(65535, kKeyFrame, kNotFirst, kLast, 1000);
+
+  EXPECT_EQ(1UL, frames_from_callback_.size());
+  CheckFrame(65534);
+}
+
+TEST_F(TestPacketBuffer, ClearMissingPacketsOnKeyframeH264) {
+  InsertH264(0, kKeyFrame, kFirst, kLast, 1000);
+  InsertH264(2, kKeyFrame, kFirst, kLast, 3000);
+  InsertH264(3, kDeltaFrame, kFirst, kNotLast, 4000);
+  InsertH264(4, kDeltaFrame, kNotFirst, kLast, 4000);
+
+  ASSERT_EQ(3UL, frames_from_callback_.size());
+
+  InsertH264(kStartSize + 1, kKeyFrame, kFirst, kLast, 18000);
+
+  ASSERT_EQ(4UL, frames_from_callback_.size());
+  CheckFrame(0);
+  CheckFrame(2);
+  CheckFrame(3);
+  CheckFrame(kStartSize + 1);
+}
+
+TEST_F(TestPacketBuffer, FindFramesOnPaddingH264) {
+  InsertH264(0, kKeyFrame, kFirst, kLast, 1000);
+  InsertH264(2, kDeltaFrame, kFirst, kLast, 1000);
+
+  ASSERT_EQ(1UL, frames_from_callback_.size());
+  packet_buffer_->PaddingReceived(1);
+  ASSERT_EQ(2UL, frames_from_callback_.size());
+  CheckFrame(0);
+  CheckFrame(2);
+}
+
 }  // namespace video_coding
 }  // namespace webrtc