ManageFrameH264 for temporal layers (PART 3/3)

Bug: webrtc:10579
Change-Id: Iec54f6b1231d34c2018f22841c3614ddd0b05612
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/86200
Commit-Queue: Sergey Silkin <ssilkin@webrtc.org>
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Reviewed-by: Stefan Holmer <stefan@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#27820}
diff --git a/modules/video_coding/packet_buffer.cc b/modules/video_coding/packet_buffer.cc
index 69e8c17..bd1ab03 100644
--- a/modules/video_coding/packet_buffer.cc
+++ b/modules/video_coding/packet_buffer.cc
@@ -384,10 +384,13 @@
               VideoFrameType::kVideoFrameDelta;
         }
 
-        // If this is not a keyframe, make sure there are no gaps in the
-        // packet sequence numbers up until this point.
-        if (!is_h264_keyframe && missing_packets_.upper_bound(start_seq_num) !=
-                                     missing_packets_.begin()) {
+        // With IPPP, if this is not a keyframe, make sure there are no gaps
+        // in the packet sequence numbers up until this point.
+        const uint8_t h264tid =
+            data_buffer_[start_index].video_header.frame_marking.temporal_id;
+        if (h264tid == kNoTemporalIdx && !is_h264_keyframe
+            && missing_packets_.upper_bound(start_seq_num)
+            != missing_packets_.begin()) {
           uint16_t stop_index = (index + 1) % size_;
           while (start_index != stop_index) {
             sequence_buffer_[start_index].frame_created = false;
diff --git a/modules/video_coding/rtp_frame_reference_finder.cc b/modules/video_coding/rtp_frame_reference_finder.cc
index 8191017..af474d5 100644
--- a/modules/video_coding/rtp_frame_reference_finder.cc
+++ b/modules/video_coding/rtp_frame_reference_finder.cc
@@ -95,6 +95,8 @@
       return ManageFrameVp8(frame);
     case kVideoCodecVP9:
       return ManageFrameVp9(frame);
+    case kVideoCodecH264:
+      return ManageFrameH264(frame);
     default: {
       // Use 15 first bits of frame ID as picture ID if available.
       absl::optional<RTPVideoHeader> video_header = frame->GetRtpVideoHeader();
@@ -671,5 +673,190 @@
   frame->id.picture_id = unwrapper_.Unwrap(frame->id.picture_id);
 }
 
+RtpFrameReferenceFinder::FrameDecision RtpFrameReferenceFinder::ManageFrameH264(
+    RtpFrameObject* frame) {
+  absl::optional<FrameMarking> rtp_frame_marking = frame->GetFrameMarking();
+  if (!rtp_frame_marking) {
+    return ManageFramePidOrSeqNum(std::move(frame), kNoPictureId);
+  }
+
+  uint8_t tid = rtp_frame_marking->temporal_id;
+  bool blSync = rtp_frame_marking->base_layer_sync;
+
+  if (tid == kNoTemporalIdx)
+    return ManageFramePidOrSeqNum(std::move(frame), kNoPictureId);
+
+  frame->id.picture_id = frame->last_seq_num();
+
+  if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
+    // For H264, use last_seq_num_gop_ to simply store last picture id
+    // as a pair of unpadded and padded sequence numbers.
+    if (last_seq_num_gop_.empty()) {
+      last_seq_num_gop_.insert(std::make_pair(
+          0, std::make_pair(frame->id.picture_id, frame->id.picture_id)));
+    }
+  }
+
+  // Stash if we have no keyframe yet.
+  if (last_seq_num_gop_.empty())
+    return kStash;
+
+  // Check for gap in sequence numbers. Store in |not_yet_received_seq_num_|.
+  if (frame->frame_type() == VideoFrameType::kVideoFrameDelta) {
+    uint16_t last_pic_id_padded = last_seq_num_gop_.begin()->second.second;
+    if (AheadOf<uint16_t>(frame->id.picture_id, last_pic_id_padded)) {
+      do {
+        last_pic_id_padded = last_pic_id_padded + 1;
+        not_yet_received_seq_num_.insert(last_pic_id_padded);
+      } while (last_pic_id_padded != frame->id.picture_id);
+    }
+  }
+
+  int64_t unwrapped_tl0 = tl0_unwrapper_.Unwrap(rtp_frame_marking->tl0_pic_idx);
+
+  // Clean up info for base layers that are too old.
+  int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxLayerInfo;
+  auto clean_layer_info_to = layer_info_.lower_bound(old_tl0_pic_idx);
+  layer_info_.erase(layer_info_.begin(), clean_layer_info_to);
+
+  // Clean up info about not yet received frames that are too old.
+  uint16_t old_picture_id = frame->id.picture_id - kMaxNotYetReceivedFrames * 2;
+  auto clean_frames_to = not_yet_received_seq_num_.lower_bound(old_picture_id);
+  not_yet_received_seq_num_.erase(not_yet_received_seq_num_.begin(),
+                                  clean_frames_to);
+
+  if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
+    frame->num_references = 0;
+    layer_info_[unwrapped_tl0].fill(-1);
+    UpdateDataH264(frame, unwrapped_tl0, tid);
+    return kHandOff;
+  }
+
+  auto layer_info_it = layer_info_.find(
+      tid == 0 ? unwrapped_tl0 - 1 : unwrapped_tl0);
+
+  // Stash if we have no base layer frame yet.
+  if (layer_info_it == layer_info_.end())
+    return kStash;
+
+  // Base layer frame. Copy layer info from previous base layer frame.
+  if (tid == 0) {
+    layer_info_it = layer_info_.insert(
+        std::make_pair(unwrapped_tl0, layer_info_it->second)).first;
+    frame->num_references = 1;
+    frame->references[0] = layer_info_it->second[0];
+    UpdateDataH264(frame, unwrapped_tl0, tid);
+    return kHandOff;
+  }
+
+  // This frame only references its base layer frame.
+  if (blSync) {
+    frame->num_references = 1;
+    frame->references[0] = layer_info_it->second[0];
+    UpdateDataH264(frame, unwrapped_tl0, tid);
+    return kHandOff;
+  }
+
+  // Find all references for general frame.
+  frame->num_references = 0;
+  for (uint8_t layer = 0; layer <= tid; ++layer) {
+    // Stash if we have not yet received frames on this temporal layer.
+    if (layer_info_it->second[layer] == -1)
+      return kStash;
+
+    // Drop if the last frame on this layer is ahead of this frame. A layer sync
+    // frame was received after this frame for the same base layer frame.
+    uint16_t last_frame_in_layer = layer_info_it->second[layer];
+    if (AheadOf<uint16_t>(last_frame_in_layer, frame->id.picture_id))
+      return kDrop;
+
+    // Stash and wait for missing frame between this frame and the reference
+    auto not_received_seq_num_it =
+        not_yet_received_seq_num_.upper_bound(last_frame_in_layer);
+    if (not_received_seq_num_it != not_yet_received_seq_num_.end() &&
+        AheadOf<uint16_t>(frame->id.picture_id, *not_received_seq_num_it)) {
+      return kStash;
+    }
+
+    if (!(AheadOf<uint16_t>(frame->id.picture_id, last_frame_in_layer))) {
+      RTC_LOG(LS_WARNING) << "Frame with picture id " << frame->id.picture_id
+                          << " and packet range [" << frame->first_seq_num()
+                          << ", " << frame->last_seq_num()
+                          << "] already received, "
+                          << " dropping frame.";
+      return kDrop;
+    }
+
+    ++frame->num_references;
+    frame->references[layer] = last_frame_in_layer;
+  }
+
+  UpdateDataH264(frame, unwrapped_tl0, tid);
+  return kHandOff;
+}
+
+void RtpFrameReferenceFinder::UpdateLastPictureIdWithPaddingH264() {
+  auto seq_num_it = last_seq_num_gop_.begin();
+
+  // Check if next sequence number is in a stashed padding packet.
+  uint16_t next_padded_seq_num = seq_num_it->second.second + 1;
+  auto padding_seq_num_it = stashed_padding_.lower_bound(next_padded_seq_num);
+
+  // Check for more consecutive padding packets to increment
+  // the "last-picture-id-with-padding" and remove the stashed packets.
+  while (padding_seq_num_it != stashed_padding_.end() &&
+         *padding_seq_num_it == next_padded_seq_num) {
+    seq_num_it->second.second = next_padded_seq_num;
+    ++next_padded_seq_num;
+    padding_seq_num_it = stashed_padding_.erase(padding_seq_num_it);
+  }
+}
+
+void RtpFrameReferenceFinder::UpdateLayerInfoH264(RtpFrameObject* frame,
+                                                  int64_t unwrapped_tl0,
+                                                  uint8_t temporal_idx) {
+  auto layer_info_it = layer_info_.find(unwrapped_tl0);
+
+  // Update this layer info and newer.
+  while (layer_info_it != layer_info_.end()) {
+    if (layer_info_it->second[temporal_idx] != -1 &&
+        AheadOf<uint16_t>(layer_info_it->second[temporal_idx],
+                          frame->id.picture_id)) {
+      // Not a newer frame. No subsequent layer info needs update.
+      break;
+    }
+
+    layer_info_it->second[temporal_idx] = frame->id.picture_id;
+    ++unwrapped_tl0;
+    layer_info_it = layer_info_.find(unwrapped_tl0);
+  }
+
+  for (size_t i = 0; i < frame->num_references; ++i)
+    frame->references[i] = rtp_seq_num_unwrapper_.Unwrap(frame->references[i]);
+  frame->id.picture_id = rtp_seq_num_unwrapper_.Unwrap(frame->id.picture_id);
+}
+
+void RtpFrameReferenceFinder::UpdateDataH264(RtpFrameObject* frame,
+                                             int64_t unwrapped_tl0,
+                                             uint8_t temporal_idx) {
+  // Update last_seq_num_gop_ entry for last picture id.
+  auto seq_num_it = last_seq_num_gop_.begin();
+  uint16_t last_pic_id = seq_num_it->second.first;
+  if (AheadOf<uint16_t>(frame->id.picture_id, last_pic_id)) {
+    seq_num_it->second.first = frame->id.picture_id;
+    seq_num_it->second.second = frame->id.picture_id;
+  }
+  UpdateLastPictureIdWithPaddingH264();
+
+  UpdateLayerInfoH264(frame, unwrapped_tl0, temporal_idx);
+
+  // Remove any current packets from |not_yet_received_seq_num_|.
+  uint16_t last_seq_num_padded = seq_num_it->second.second;
+  for (uint16_t n = frame->first_seq_num();
+       AheadOrAt(last_seq_num_padded, n); ++n) {
+    not_yet_received_seq_num_.erase(n);
+  }
+}
+
 }  // namespace video_coding
 }  // namespace webrtc
diff --git a/modules/video_coding/rtp_frame_reference_finder.h b/modules/video_coding/rtp_frame_reference_finder.h
index e39d991..4021ab0 100644
--- a/modules/video_coding/rtp_frame_reference_finder.h
+++ b/modules/video_coding/rtp_frame_reference_finder.h
@@ -136,6 +136,26 @@
   void UnwrapPictureIds(RtpFrameObject* frame)
       RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
 
+  // Find references for H264 frames
+  FrameDecision ManageFrameH264(RtpFrameObject* frame)
+      RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+
+  // Update "last-picture-id-with-padding" sequence number for H264.
+  void UpdateLastPictureIdWithPaddingH264()
+      RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+
+  // Update H264 layer info state used to determine frame references.
+  void UpdateLayerInfoH264(RtpFrameObject* frame,
+                           int64_t unwrapped_tl0,
+                           uint8_t temporal_idx)
+      RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+
+  // Update H264 state for decodeable frames.
+  void UpdateDataH264(RtpFrameObject* frame,
+                      int64_t unwrapped_tl0,
+                      uint8_t temporal_idx)
+      RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+
   // For every group of pictures, hold two sequence numbers. The first being
   // the sequence number of the last packet of the last completed frame, and
   // the second being the sequence number of the last packet of the last
@@ -159,6 +179,11 @@
   std::set<uint16_t, DescendingSeqNumComp<uint16_t, kPicIdLength>>
       not_yet_received_frames_ RTC_GUARDED_BY(crit_);
 
+  // Sequence numbers of frames earlier than the last received frame that
+  // have not yet been fully received.
+  std::set<uint16_t, DescendingSeqNumComp<uint16_t>>
+      not_yet_received_seq_num_ RTC_GUARDED_BY(crit_);
+
   // Frames that have been fully received but didn't have all the information
   // needed to determine their references.
   std::deque<std::unique_ptr<RtpFrameObject>> stashed_frames_
@@ -166,7 +191,7 @@
 
   // Holds the information about the last completed frame for a given temporal
   // layer given an unwrapped Tl0 picture index.
-  std::map<int64_t, std::array<int16_t, kMaxTemporalLayers>> layer_info_
+  std::map<int64_t, std::array<int64_t, kMaxTemporalLayers>> layer_info_
       RTC_GUARDED_BY(crit_);
 
   // Where the current scalability structure is in the
diff --git a/modules/video_coding/rtp_frame_reference_finder_unittest.cc b/modules/video_coding/rtp_frame_reference_finder_unittest.cc
index 7361bb6..aba807e 100644
--- a/modules/video_coding/rtp_frame_reference_finder_unittest.cc
+++ b/modules/video_coding/rtp_frame_reference_finder_unittest.cc
@@ -217,6 +217,36 @@
     reference_finder_->ManageFrame(std::move(frame));
   }
 
+  void InsertH264(uint16_t seq_num_start,
+                  uint16_t seq_num_end,
+                  bool keyframe,
+                  uint8_t tid = kNoTemporalIdx,
+                  int32_t tl0 = kNoTl0PicIdx,
+                  bool sync = false) {
+    VCMPacket packet;
+    packet.video_header.frame_type = keyframe
+                                         ? VideoFrameType::kVideoFrameKey
+                                         : VideoFrameType::kVideoFrameDelta;
+    packet.seqNum = seq_num_start;
+    packet.video_header.is_last_packet_in_frame =
+        (seq_num_start == seq_num_end);
+    packet.video_header.codec = kVideoCodecH264;
+    packet.video_header.frame_marking.temporal_id = tid;
+    packet.video_header.frame_marking.tl0_pic_idx = tl0;
+    packet.video_header.frame_marking.base_layer_sync = sync;
+    ref_packet_buffer_->InsertPacket(&packet);
+
+    if (seq_num_start != seq_num_end) {
+      packet.seqNum = seq_num_end;
+      packet.video_header.is_last_packet_in_frame = true;
+      ref_packet_buffer_->InsertPacket(&packet);
+    }
+
+    std::unique_ptr<RtpFrameObject> frame(new RtpFrameObject(
+        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0));
+    reference_finder_->ManageFrame(std::move(frame));
+  }
+
   // Check if a frame with picture id |pid| and spatial index |sidx| has been
   // delivered from the packet buffer, and if so, if it has the references
   // specified by |refs|.
@@ -258,6 +288,11 @@
   }
 
   template <typename... T>
+  void CheckReferencesH264(int64_t pid, T... refs) const {
+    CheckReferences(pid, 0, refs...);
+  }
+
+  template <typename... T>
   void RefsToSet(std::set<int64_t>* m, int64_t ref, T... refs) const {
     m->insert(ref);
     RefsToSet(m, refs...);
@@ -1406,5 +1441,361 @@
   CheckReferencesVp9(pid + 1, 0, pid);
 }
 
+TEST_F(TestRtpFrameReferenceFinder, H264KeyFrameReferences) {
+  uint16_t sn = Rand();
+  InsertH264(sn, sn, true);
+
+  ASSERT_EQ(1UL, frames_from_callback_.size());
+  CheckReferencesH264(sn);
+}
+
+// Test with 1 temporal layer.
+TEST_F(TestRtpFrameReferenceFinder, H264TemporalLayers_0) {
+  uint16_t sn = Rand();
+
+  InsertH264(sn, sn, true, 0, 1);
+  InsertH264(sn + 1, sn + 1, false, 0, 2);
+  InsertH264(sn + 2, sn + 2, false, 0, 3);
+  InsertH264(sn + 3, sn + 3, false, 0, 4);
+
+  ASSERT_EQ(4UL, frames_from_callback_.size());
+  CheckReferencesH264(sn);
+  CheckReferencesH264(sn + 1, sn);
+  CheckReferencesH264(sn + 2, sn + 1);
+  CheckReferencesH264(sn + 3, sn + 2);
+}
+
+TEST_F(TestRtpFrameReferenceFinder, H264DuplicateTl1Frames) {
+  uint16_t sn = Rand();
+
+  InsertH264(sn, sn, true, 0, 0);
+  InsertH264(sn + 1, sn + 1, false, 1, 0, true);
+  InsertH264(sn + 2, sn + 2, false, 0, 1);
+  InsertH264(sn + 3, sn + 3, false, 1, 1);
+  InsertH264(sn + 3, sn + 3, false, 1, 1);
+  InsertH264(sn + 4, sn + 4, false, 0, 2);
+  InsertH264(sn + 5, sn + 5, false, 1, 2);
+
+  ASSERT_EQ(6UL, frames_from_callback_.size());
+  CheckReferencesH264(sn);
+  CheckReferencesH264(sn + 1, sn);
+  CheckReferencesH264(sn + 2, sn);
+  CheckReferencesH264(sn + 3, sn + 1, sn + 2);
+  CheckReferencesH264(sn + 4, sn + 2);
+  CheckReferencesH264(sn + 5, sn + 3, sn + 4);
+}
+
+// Test with 1 temporal layer.
+TEST_F(TestRtpFrameReferenceFinder, H264TemporalLayersReordering_0) {
+  uint16_t sn = Rand();
+
+  InsertH264(sn, sn, true, 0, 1);
+  InsertH264(sn + 1, sn + 1, false, 0, 2);
+  InsertH264(sn + 3, sn + 3, false, 0, 4);
+  InsertH264(sn + 2, sn + 2, false, 0, 3);
+  InsertH264(sn + 5, sn + 5, false, 0, 6);
+  InsertH264(sn + 6, sn + 6, false, 0, 7);
+  InsertH264(sn + 4, sn + 4, false, 0, 5);
+
+  ASSERT_EQ(7UL, frames_from_callback_.size());
+  CheckReferencesH264(sn);
+  CheckReferencesH264(sn + 1, sn);
+  CheckReferencesH264(sn + 2, sn + 1);
+  CheckReferencesH264(sn + 3, sn + 2);
+  CheckReferencesH264(sn + 4, sn + 3);
+  CheckReferencesH264(sn + 5, sn + 4);
+  CheckReferencesH264(sn + 6, sn + 5);
+}
+
+// Test with 2 temporal layers in a 01 pattern.
+TEST_F(TestRtpFrameReferenceFinder, H264TemporalLayers_01) {
+  uint16_t sn = Rand();
+
+  InsertH264(sn, sn, true, 0, 255);
+  InsertH264(sn + 1, sn + 1, false, 1, 255, true);
+  InsertH264(sn + 2, sn + 2, false, 0, 0);
+  InsertH264(sn + 3, sn + 3, false, 1, 0);
+
+  ASSERT_EQ(4UL, frames_from_callback_.size());
+  CheckReferencesH264(sn);
+  CheckReferencesH264(sn + 1, sn);
+  CheckReferencesH264(sn + 2, sn);
+  CheckReferencesH264(sn + 3, sn + 1, sn + 2);
+}
+
+TEST_F(TestRtpFrameReferenceFinder, H264TemporalLayersMultiSn_01) {
+  uint16_t sn = Rand();
+
+  InsertH264(sn, sn + 3, true, 0, 255);
+  InsertH264(sn + 4, sn + 5, false, 1, 255, true);
+  InsertH264(sn + 6, sn + 8, false, 0, 0);
+  InsertH264(sn + 9, sn + 9, false, 1, 0);
+
+  ASSERT_EQ(4UL, frames_from_callback_.size());
+  CheckReferencesH264(sn + 3);
+  CheckReferencesH264(sn + 5, sn + 3);
+  CheckReferencesH264(sn + 8, sn + 3);
+  CheckReferencesH264(sn + 9, sn + 5, sn + 8);
+}
+
+// Test with 2 temporal layers in a 01 pattern.
+TEST_F(TestRtpFrameReferenceFinder, H264TemporalLayersReordering_01) {
+  uint16_t sn = Rand();
+
+  InsertH264(sn + 1, sn + 1, false, 1, 255, true);
+  InsertH264(sn, sn, true, 0, 255);
+  InsertH264(sn + 3, sn + 3, false, 1, 0);
+  InsertH264(sn + 5, sn + 5, false, 1, 1);
+  InsertH264(sn + 2, sn + 2, false, 0, 0);
+  InsertH264(sn + 4, sn + 4, false, 0, 1);
+  InsertH264(sn + 6, sn + 6, false, 0, 2);
+  InsertH264(sn + 7, sn + 7, false, 1, 2);
+
+  ASSERT_EQ(8UL, frames_from_callback_.size());
+  CheckReferencesH264(sn);
+  CheckReferencesH264(sn + 1, sn);
+  CheckReferencesH264(sn + 2, sn);
+  CheckReferencesH264(sn + 3, sn + 1, sn + 2);
+  CheckReferencesH264(sn + 4, sn + 2);
+  CheckReferencesH264(sn + 5, sn + 3, sn + 4);
+  CheckReferencesH264(sn + 6, sn + 4);
+  CheckReferencesH264(sn + 7, sn + 5, sn + 6);
+}
+
+// Test with 3 temporal layers in a 0212 pattern.
+TEST_F(TestRtpFrameReferenceFinder, H264TemporalLayers_0212) {
+  uint16_t sn = Rand();
+
+  InsertH264(sn, sn, true, 0, 55);
+  InsertH264(sn + 1, sn + 1, false, 2, 55, true);
+  InsertH264(sn + 2, sn + 2, false, 1, 55, true);
+  InsertH264(sn + 3, sn + 3, false, 2, 55);
+  InsertH264(sn + 4, sn + 4, false, 0, 56);
+  InsertH264(sn + 5, sn + 5, false, 2, 56, true);
+  InsertH264(sn + 6, sn + 6, false, 1, 56, true);
+  InsertH264(sn + 7, sn + 7, false, 2, 56);
+  InsertH264(sn + 8, sn + 8, false, 0, 57);
+  InsertH264(sn + 9, sn + 9, false, 2, 57, true);
+  InsertH264(sn + 10, sn + 10, false, 1, 57, true);
+  InsertH264(sn + 11, sn + 11, false, 2, 57);
+
+  ASSERT_EQ(12UL, frames_from_callback_.size());
+  CheckReferencesH264(sn);
+  CheckReferencesH264(sn + 1, sn);
+  CheckReferencesH264(sn + 2, sn);
+  CheckReferencesH264(sn + 3, sn, sn + 1, sn + 2);
+  CheckReferencesH264(sn + 4, sn);
+  CheckReferencesH264(sn + 5, sn + 4);
+  CheckReferencesH264(sn + 6, sn + 4);
+  CheckReferencesH264(sn + 7, sn + 4, sn + 5, sn + 6);
+  CheckReferencesH264(sn + 8, sn + 4);
+  CheckReferencesH264(sn + 9, sn + 8);
+  CheckReferencesH264(sn + 10, sn + 8);
+  CheckReferencesH264(sn + 11, sn + 8, sn + 9, sn + 10);
+}
+
+// Test with 3 temporal layers in a 0212 pattern.
+TEST_F(TestRtpFrameReferenceFinder, H264TemporalLayersMissingFrame_0212) {
+  uint16_t sn = Rand();
+
+  InsertH264(sn, sn, true, 0, 55, false);
+  InsertH264(sn + 2, sn + 2, false, 1, 55, true);
+  InsertH264(sn + 3, sn + 3, false, 2, 55, false);
+
+  ASSERT_EQ(2UL, frames_from_callback_.size());
+  CheckReferencesH264(sn);
+  CheckReferencesH264(sn + 2, sn);
+}
+
+// Test with 3 temporal layers in a 0212 pattern.
+TEST_F(TestRtpFrameReferenceFinder, H264TemporalLayersReordering_0212) {
+  uint16_t sn = Rand();
+
+  InsertH264(sn + 1, sn + 1, false, 2, 55, true);
+  InsertH264(sn, sn, true, 0, 55, false);
+  InsertH264(sn + 2, sn + 2, false, 1, 55, true);
+  InsertH264(sn + 4, sn + 4, false, 0, 56, false);
+  InsertH264(sn + 5, sn + 5, false, 2, 56, false);
+  InsertH264(sn + 3, sn + 3, false, 2, 55, false);
+  InsertH264(sn + 7, sn + 7, false, 2, 56, false);
+  InsertH264(sn + 9, sn + 9, false, 2, 57, true);
+  InsertH264(sn + 6, sn + 6, false, 1, 56, false);
+  InsertH264(sn + 8, sn + 8, false, 0, 57, false);
+  InsertH264(sn + 11, sn + 11, false, 2, 57, false);
+  InsertH264(sn + 10, sn + 10, false, 1, 57, true);
+
+  ASSERT_EQ(12UL, frames_from_callback_.size());
+  CheckReferencesH264(sn);
+  CheckReferencesH264(sn + 1, sn);
+  CheckReferencesH264(sn + 2, sn);
+  CheckReferencesH264(sn + 3, sn, sn + 1, sn + 2);
+  CheckReferencesH264(sn + 4, sn);
+  CheckReferencesH264(sn + 5, sn + 2, sn + 3, sn + 4);
+  CheckReferencesH264(sn + 6, sn + 2, sn + 4);
+  CheckReferencesH264(sn + 7, sn + 4, sn + 5, sn + 6);
+  CheckReferencesH264(sn + 8, sn + 4);
+  CheckReferencesH264(sn + 9, sn + 8);
+  CheckReferencesH264(sn + 10, sn + 8);
+  CheckReferencesH264(sn + 11, sn + 8, sn + 9, sn + 10);
+}
+
+TEST_F(TestRtpFrameReferenceFinder, H264InsertManyFrames_0212) {
+  uint16_t sn = Rand();
+
+  const int keyframes_to_insert = 50;
+  const int frames_per_keyframe = 120;  // Should be a multiple of 4.
+  uint8_t tl0 = 128;
+
+  for (int k = 0; k < keyframes_to_insert; ++k) {
+    InsertH264(sn, sn, true, 0, tl0, false);
+    InsertH264(sn + 1, sn + 1, false, 2, tl0, true);
+    InsertH264(sn + 2, sn + 2, false, 1, tl0, true);
+    InsertH264(sn + 3, sn + 3, false, 2, tl0, false);
+    CheckReferencesH264(sn);
+    CheckReferencesH264(sn + 1, sn);
+    CheckReferencesH264(sn + 2, sn);
+    CheckReferencesH264(sn + 3, sn, sn + 1, sn + 2);
+    frames_from_callback_.clear();
+    ++tl0;
+
+    for (int f = 4; f < frames_per_keyframe; f += 4) {
+      uint16_t sf = sn + f;
+
+      InsertH264(sf, sf, false, 0, tl0, false);
+      InsertH264(sf + 1, sf + 1, false, 2, tl0, false);
+      InsertH264(sf + 2, sf + 2, false, 1, tl0, false);
+      InsertH264(sf + 3, sf + 3, false, 2, tl0, false);
+      CheckReferencesH264(sf, sf - 4);
+      CheckReferencesH264(sf + 1, sf, sf - 1, sf - 2);
+      CheckReferencesH264(sf + 2, sf, sf - 2);
+      CheckReferencesH264(sf + 3, sf, sf + 1, sf + 2);
+      frames_from_callback_.clear();
+      ++tl0;
+    }
+
+    sn += frames_per_keyframe;
+  }
+}
+
+TEST_F(TestRtpFrameReferenceFinder, H264LayerSync) {
+  uint16_t sn = Rand();
+
+  InsertH264(sn, sn, true, 0, 0, false);
+  InsertH264(sn + 1, sn + 1, false, 1, 0, true);
+  InsertH264(sn + 2, sn + 2, false, 0, 1, false);
+  ASSERT_EQ(3UL, frames_from_callback_.size());
+
+  InsertH264(sn + 4, sn + 4, false, 0, 2, false);
+  InsertH264(sn + 5, sn + 5, false, 1, 2, true);
+  InsertH264(sn + 6, sn + 6, false, 0, 3, false);
+  InsertH264(sn + 7, sn + 7, false, 1, 3, false);
+
+  ASSERT_EQ(7UL, frames_from_callback_.size());
+  CheckReferencesH264(sn);
+  CheckReferencesH264(sn + 1, sn);
+  CheckReferencesH264(sn + 2, sn);
+  CheckReferencesH264(sn + 4, sn + 2);
+  CheckReferencesH264(sn + 5, sn + 4);
+  CheckReferencesH264(sn + 6, sn + 4);
+  CheckReferencesH264(sn + 7, sn + 6, sn + 5);
+}
+
+TEST_F(TestRtpFrameReferenceFinder, H264Tl1SyncFrameAfterTl1Frame) {
+  InsertH264(1000, 1000, true, 0, 247, true);
+  InsertH264(1001, 1001, false, 0, 248, false);
+  InsertH264(1002, 1002, false, 1, 248, false);  // Will be dropped
+  InsertH264(1003, 1003, false, 1, 248, true);   // due to this frame.
+
+  ASSERT_EQ(3UL, frames_from_callback_.size());
+  CheckReferencesH264(1000);
+  CheckReferencesH264(1001, 1000);
+  CheckReferencesH264(1003, 1001);
+}
+
+TEST_F(TestRtpFrameReferenceFinder, H264DetectMissingFrame_0212) {
+  InsertH264(1, 1, true, 0, 1, false);
+  InsertH264(2, 2, false, 2, 1, true);
+  InsertH264(3, 3, false, 1, 1, true);
+  InsertH264(4, 4, false, 2, 1, false);
+
+  InsertH264(6, 6, false, 2, 2, false);
+  InsertH264(7, 7, false, 1, 2, false);
+  InsertH264(8, 8, false, 2, 2, false);
+  ASSERT_EQ(4UL, frames_from_callback_.size());
+
+  InsertH264(5, 5, false, 0, 2, false);
+  ASSERT_EQ(8UL, frames_from_callback_.size());
+
+  CheckReferencesH264(1);
+  CheckReferencesH264(2, 1);
+  CheckReferencesH264(3, 1);
+  CheckReferencesH264(4, 3, 2, 1);
+
+  CheckReferencesH264(5, 1);
+  CheckReferencesH264(6, 5, 4, 3);
+  CheckReferencesH264(7, 5, 3);
+  CheckReferencesH264(8, 7, 6, 5);
+}
+
+TEST_F(TestRtpFrameReferenceFinder, H264SequenceNumberWrap) {
+  uint16_t sn = 0xFFFF;
+
+  InsertH264(sn - 1, sn - 1, true, 0, 1);
+  InsertH264(sn, sn, false, 0, 2);
+  InsertH264(sn + 1, sn + 1, false, 0, 3);
+  InsertH264(sn + 2, sn + 2, false, 0, 4);
+
+  ASSERT_EQ(4UL, frames_from_callback_.size());
+  CheckReferencesH264(sn - 1);
+  CheckReferencesH264(sn, sn - 1);
+  CheckReferencesH264(sn + 1, sn);
+  CheckReferencesH264(sn + 2, sn + 1);
+}
+
+TEST_F(TestRtpFrameReferenceFinder, H264SequenceNumberWrapMulti) {
+  uint16_t sn = 0xFFFF;
+
+  InsertH264(sn - 3, sn - 2, true, 0, 1);
+  InsertH264(sn - 1, sn + 1, false, 0, 2);
+  InsertH264(sn + 2, sn + 3, false, 0, 3);
+  InsertH264(sn + 4, sn + 7, false, 0, 4);
+
+  ASSERT_EQ(4UL, frames_from_callback_.size());
+  CheckReferencesH264(sn - 2);
+  CheckReferencesH264(sn + 1, sn - 2);
+  CheckReferencesH264(sn + 3, sn + 1);
+  CheckReferencesH264(sn + 7, sn + 3);
+}
+
+TEST_F(TestRtpFrameReferenceFinder, H264Tl0PicIdxWrap) {
+  int numTl0Wraps = 1000;
+  int64_t sn = Rand();
+
+  for (int i = 0; i < numTl0Wraps; i++) {
+    for (int tl0 = 0; tl0 < 256; tl0 += 16, sn += 16) {
+      InsertH264(sn, sn, true, 0, tl0);
+      reference_finder_->ClearTo(sn);  // Too many stashed frames cause errors.
+
+      for (int k = 1; k < 8; k++) {
+        InsertH264(sn + k, sn + k, false, 0, tl0 + k);
+      }
+
+      // Skip a TL0 index.
+      for (int k = 9; k < 16; k++) {
+        InsertH264(sn + k, sn + k, false, 0, tl0 + k);
+      }
+
+      ASSERT_EQ(8UL, frames_from_callback_.size());
+
+      CheckReferencesH264(sn);
+      for (int k = 1; k < 8; k++) {
+        CheckReferencesH264(sn + k, sn + k - 1);
+      }
+
+      frames_from_callback_.clear();
+    }
+  }
+}
+
 }  // namespace video_coding
 }  // namespace webrtc