Logic for finding frame references moved from PacketBuffer to new class
RtpFrameReferenceFinder.

BUG=webrtc:5514

Review-Url: https://codereview.webrtc.org/1961053002
Cr-Commit-Position: refs/heads/master@{#12725}
diff --git a/webrtc/modules/video_coding/BUILD.gn b/webrtc/modules/video_coding/BUILD.gn
index fe600da..7c6c9ed 100644
--- a/webrtc/modules/video_coding/BUILD.gn
+++ b/webrtc/modules/video_coding/BUILD.gn
@@ -54,6 +54,8 @@
     "percentile_filter.h",
     "receiver.cc",
     "receiver.h",
+    "rtp_frame_reference_finder.cc",
+    "rtp_frame_reference_finder.h",
     "rtt_filter.cc",
     "rtt_filter.h",
     "session_info.cc",
diff --git a/webrtc/modules/video_coding/frame_object.cc b/webrtc/modules/video_coding/frame_object.cc
index ea7567c..1634f89 100644
--- a/webrtc/modules/video_coding/frame_object.cc
+++ b/webrtc/modules/video_coding/frame_object.cc
@@ -22,27 +22,48 @@
       inter_layer_predicted(false) {}
 
 RtpFrameObject::RtpFrameObject(PacketBuffer* packet_buffer,
-                               uint16_t first_packet,
-                               uint16_t last_packet)
+                               uint16_t first_seq_num,
+                               uint16_t last_seq_num)
     : packet_buffer_(packet_buffer),
-      first_packet_(first_packet),
-      last_packet_(last_packet) {}
+      first_seq_num_(first_seq_num),
+      last_seq_num_(last_seq_num) {
+  VCMPacket* packet = packet_buffer_->GetPacket(first_seq_num);
+  if (packet) {
+    frame_type_ = packet->frameType;
+    codec_type_ = packet->codec;
+  }
+}
 
 RtpFrameObject::~RtpFrameObject() {
   packet_buffer_->ReturnFrame(this);
 }
 
 uint16_t RtpFrameObject::first_seq_num() const {
-  return first_packet_;
+  return first_seq_num_;
 }
 
 uint16_t RtpFrameObject::last_seq_num() const {
-  return last_packet_;
+  return last_seq_num_;
+}
+
+FrameType RtpFrameObject::frame_type() const {
+  return frame_type_;
+}
+
+VideoCodecType RtpFrameObject::codec_type() const {
+  return codec_type_;
 }
 
 bool RtpFrameObject::GetBitstream(uint8_t* destination) const {
   return packet_buffer_->GetBitstream(*this, destination);
 }
 
+RTPVideoTypeHeader* RtpFrameObject::GetCodecHeader() const {
+  VCMPacket* packet = packet_buffer_->GetPacket(first_seq_num_);
+  if (!packet)
+    return nullptr;
+  return &packet->codecSpecificHeader.codecHeader;
+}
+
 }  // namespace video_coding
 }  // namespace webrtc
diff --git a/webrtc/modules/video_coding/frame_object.h b/webrtc/modules/video_coding/frame_object.h
index 2b39f06..80ab0ec 100644
--- a/webrtc/modules/video_coding/frame_object.h
+++ b/webrtc/modules/video_coding/frame_object.h
@@ -11,10 +11,8 @@
 #ifndef WEBRTC_MODULES_VIDEO_CODING_FRAME_OBJECT_H_
 #define WEBRTC_MODULES_VIDEO_CODING_FRAME_OBJECT_H_
 
-#include <stddef.h>
-#include <stdint.h>
-
-#include <array>
+#include "webrtc/common_types.h"
+#include "webrtc/modules/include/module_common_types.h"
 
 namespace webrtc {
 namespace video_coding {
@@ -44,18 +42,23 @@
 class RtpFrameObject : public FrameObject {
  public:
   RtpFrameObject(PacketBuffer* packet_buffer,
-                 uint16_t first_packet,
-                 uint16_t last_packet);
+                 uint16_t first_seq_num,
+                 uint16_t last_seq_num);
 
   ~RtpFrameObject();
   uint16_t first_seq_num() const;
   uint16_t last_seq_num() const;
+  FrameType frame_type() const;
+  VideoCodecType codec_type() const;
   bool GetBitstream(uint8_t* destination) const override;
+  RTPVideoTypeHeader* GetCodecHeader() const;
 
  private:
   PacketBuffer* packet_buffer_;
-  uint16_t first_packet_;
-  uint16_t last_packet_;
+  FrameType frame_type_;
+  VideoCodecType codec_type_;
+  uint16_t first_seq_num_;
+  uint16_t last_seq_num_;
 };
 
 }  // namespace video_coding
diff --git a/webrtc/modules/video_coding/packet_buffer.cc b/webrtc/modules/video_coding/packet_buffer.cc
index 71ab3e5..09fb249 100644
--- a/webrtc/modules/video_coding/packet_buffer.cc
+++ b/webrtc/modules/video_coding/packet_buffer.cc
@@ -30,10 +30,7 @@
       first_packet_received_(false),
       data_buffer_(start_buffer_size),
       sequence_buffer_(start_buffer_size),
-      frame_callback_(frame_callback),
-      last_picture_id_(-1),
-      last_unwrap_(-1),
-      current_ss_idx_(0) {
+      reference_finder_(frame_callback) {
   RTC_DCHECK_LE(start_buffer_size, max_buffer_size);
   // Buffer size must always be a power of 2.
   RTC_DCHECK((start_buffer_size & (start_buffer_size - 1)) == 0);
@@ -152,7 +149,7 @@
 
       std::unique_ptr<RtpFrameObject> frame(
           new RtpFrameObject(this, start_seq_num, seq_num));
-      ManageFrame(std::move(frame));
+      reference_finder_.ManageFrame(std::move(frame));
     }
 
     index = (index + 1) % size_;
@@ -204,468 +201,21 @@
   return true;
 }
 
-void PacketBuffer::ManageFrame(std::unique_ptr<RtpFrameObject> frame) {
-  size_t start_index = frame->first_seq_num() % size_;
-  VideoCodecType codec_type = data_buffer_[start_index].codec;
-
-  switch (codec_type) {
-    case kVideoCodecULPFEC:
-    case kVideoCodecRED:
-    case kVideoCodecUnknown:
-      RTC_NOTREACHED();
-      break;
-    case kVideoCodecVP8:
-      ManageFrameVp8(std::move(frame));
-      break;
-    case kVideoCodecVP9:
-      ManageFrameVp9(std::move(frame));
-      break;
-    case kVideoCodecH264:
-    case kVideoCodecI420:
-    case kVideoCodecGeneric:
-      ManageFrameGeneric(std::move(frame));
-      break;
+VCMPacket* PacketBuffer::GetPacket(uint16_t seq_num) {
+  rtc::CritScope lock(&crit_);
+  size_t index = seq_num % size_;
+  if (!sequence_buffer_[index].used ||
+      seq_num != sequence_buffer_[index].seq_num) {
+    return nullptr;
   }
+  return &data_buffer_[index];
 }
 
-void PacketBuffer::RetryStashedFrames() {
-  size_t num_stashed_frames = stashed_frames_.size();
-
-  // Clean up stashed frames if there are too many.
-  while (stashed_frames_.size() > kMaxStashedFrames)
-    stashed_frames_.pop();
-
-  // Since frames are stashed if there is not enough data to determine their
-  // frame references we should at most check |stashed_frames_.size()| in
-  // order to not pop and push frames in and endless loop.
-  for (size_t i = 0; i < num_stashed_frames && !stashed_frames_.empty(); ++i) {
-    std::unique_ptr<RtpFrameObject> frame = std::move(stashed_frames_.front());
-    stashed_frames_.pop();
-    ManageFrame(std::move(frame));
-  }
-}
-
-void PacketBuffer::ManageFrameGeneric(
-    std::unique_ptr<RtpFrameObject> frame) {
-  size_t index = frame->first_seq_num() % size_;
-  const VCMPacket& packet = data_buffer_[index];
-
-  if (packet.frameType == kVideoFrameKey)
-    last_seq_num_gop_[frame->last_seq_num()] = frame->last_seq_num();
-
-  // We have received a frame but not yet a keyframe, stash this frame.
-  if (last_seq_num_gop_.empty()) {
-    stashed_frames_.emplace(std::move(frame));
-    return;
-  }
-
-  // Clean up info for old keyframes but make sure to keep info
-  // for the last keyframe.
-  auto clean_to = last_seq_num_gop_.lower_bound(frame->last_seq_num() - 100);
-  if (clean_to != last_seq_num_gop_.end())
-    last_seq_num_gop_.erase(last_seq_num_gop_.begin(), clean_to);
-
-  // Find the last sequence number of the last frame for the keyframe
-  // that this frame indirectly references.
-  auto seq_num_it = last_seq_num_gop_.upper_bound(frame->last_seq_num());
-  seq_num_it--;
-
-  // Make sure the packet sequence numbers are continuous, otherwise stash
-  // this frame.
-  if (packet.frameType == kVideoFrameDelta) {
-    if (seq_num_it->second !=
-        static_cast<uint16_t>(frame->first_seq_num() - 1)) {
-      stashed_frames_.emplace(std::move(frame));
-      return;
-    }
-  }
-
-  RTC_DCHECK(AheadOrAt(frame->last_seq_num(), seq_num_it->first));
-
-  // Since keyframes can cause reordering of the frames delivered from
-  // FindFrames() we can't simply assign the picture id according to some
-  // incrementing counter.
-  frame->picture_id = frame->last_seq_num();
-  frame->num_references = packet.frameType == kVideoFrameDelta;
-  frame->references[0] = seq_num_it->second;
-  seq_num_it->second = frame->picture_id;
-
-  last_picture_id_ = frame->picture_id;
-  frame_callback_->OnCompleteFrame(std::move(frame));
-  RetryStashedFrames();
-}
-
-void PacketBuffer::ManageFrameVp8(std::unique_ptr<RtpFrameObject> frame) {
-  size_t index = frame->first_seq_num() % size_;
-  const VCMPacket& packet = data_buffer_[index];
-  const RTPVideoHeaderVP8& codec_header =
-      packet.codecSpecificHeader.codecHeader.VP8;
-
-  if (codec_header.pictureId == kNoPictureId ||
-      codec_header.temporalIdx == kNoTemporalIdx ||
-      codec_header.tl0PicIdx == kNoTl0PicIdx) {
-    ManageFrameGeneric(std::move(frame));
-    return;
-  }
-
-  frame->picture_id = codec_header.pictureId % kPicIdLength;
-
-  if (last_unwrap_ == -1)
-    last_unwrap_ = codec_header.pictureId;
-
-  if (last_picture_id_ == -1)
-    last_picture_id_ = frame->picture_id;
-
-  // Find if there has been a gap in fully received frames and save the picture
-  // id of those frames in |not_yet_received_frames_|.
-  if (AheadOf<uint16_t, kPicIdLength>(frame->picture_id, last_picture_id_)) {
-    last_picture_id_ = Add<kPicIdLength>(last_picture_id_, 1);
-    while (last_picture_id_ != frame->picture_id) {
-      not_yet_received_frames_.insert(last_picture_id_);
-      last_picture_id_ = Add<kPicIdLength>(last_picture_id_, 1);
-    }
-  }
-
-  // Clean up info for base layers that are too old.
-  uint8_t old_tl0_pic_idx = codec_header.tl0PicIdx - kMaxLayerInfo;
-  auto clean_layer_info_to = layer_info_.lower_bound(old_tl0_pic_idx);
-  layer_info_.erase(layer_info_.begin(), clean_layer_info_to);
-
-  // Clean up info about not yet received frames that are too old.
-  uint16_t old_picture_id = Subtract<kPicIdLength>(frame->picture_id,
-                                                   kMaxNotYetReceivedFrames);
-  auto clean_frames_to = not_yet_received_frames_.lower_bound(old_picture_id);
-  not_yet_received_frames_.erase(not_yet_received_frames_.begin(),
-                                 clean_frames_to);
-
-  if (packet.frameType == kVideoFrameKey) {
-    frame->num_references = 0;
-    layer_info_[codec_header.tl0PicIdx].fill(-1);
-    CompletedFrameVp8(std::move(frame));
-    return;
-  }
-
-  auto layer_info_it = layer_info_.find(codec_header.temporalIdx == 0
-                                            ? codec_header.tl0PicIdx - 1
-                                            : codec_header.tl0PicIdx);
-
-  // If we don't have the base layer frame yet, stash this frame.
-  if (layer_info_it == layer_info_.end()) {
-    stashed_frames_.emplace(std::move(frame));
-    return;
-  }
-
-  // A non keyframe base layer frame has been received, copy the layer info
-  // from the previous base layer frame and set a reference to the previous
-  // base layer frame.
-  if (codec_header.temporalIdx == 0) {
-    layer_info_it =
-        layer_info_
-            .insert(make_pair(codec_header.tl0PicIdx, layer_info_it->second))
-            .first;
-    frame->num_references = 1;
-    frame->references[0] = layer_info_it->second[0];
-    CompletedFrameVp8(std::move(frame));
-    return;
-  }
-
-  // Layer sync frame, this frame only references its base layer frame.
-  if (codec_header.layerSync) {
-    frame->num_references = 1;
-    frame->references[0] = layer_info_it->second[0];
-
-    CompletedFrameVp8(std::move(frame));
-    return;
-  }
-
-  // Find all references for this frame.
-  frame->num_references = 0;
-  for (uint8_t layer = 0; layer <= codec_header.temporalIdx; ++layer) {
-    RTC_DCHECK_NE(-1, layer_info_it->second[layer]);
-
-    // If we have not yet received a frame between this frame and the referenced
-    // frame then we have to wait for that frame to be completed first.
-    auto not_received_frame_it =
-             not_yet_received_frames_.upper_bound(layer_info_it->second[layer]);
-    if (not_received_frame_it != not_yet_received_frames_.end() &&
-        AheadOf<uint16_t, kPicIdLength>(frame->picture_id,
-                                        *not_received_frame_it)) {
-      stashed_frames_.emplace(std::move(frame));
-      return;
-    }
-
-    ++frame->num_references;
-    frame->references[layer] = layer_info_it->second[layer];
-  }
-
-  CompletedFrameVp8(std::move(frame));
-}
-
-void PacketBuffer::CompletedFrameVp8(std::unique_ptr<RtpFrameObject> frame) {
-  size_t index = frame->first_seq_num() % size_;
-  const VCMPacket& packet = data_buffer_[index];
-  const RTPVideoHeaderVP8& codec_header =
-      packet.codecSpecificHeader.codecHeader.VP8;
-
-  uint8_t tl0_pic_idx = codec_header.tl0PicIdx;
-  uint8_t temporal_index = codec_header.temporalIdx;
-  auto layer_info_it = layer_info_.find(tl0_pic_idx);
-
-  // Update this layer info and newer.
-  while (layer_info_it != layer_info_.end()) {
-    if (layer_info_it->second[temporal_index] != -1 &&
-        AheadOf<uint16_t, kPicIdLength>(layer_info_it->second[temporal_index],
-                                        frame->picture_id)) {
-      // The frame was not newer, then no subsequent layer info have to be
-      // update.
-      break;
-    }
-
-    layer_info_it->second[codec_header.temporalIdx] = frame->picture_id;
-    ++tl0_pic_idx;
-    layer_info_it = layer_info_.find(tl0_pic_idx);
-  }
-  not_yet_received_frames_.erase(frame->picture_id);
-
-  for (size_t i = 0; i < frame->num_references; ++i)
-    frame->references[i] = UnwrapPictureId(frame->references[i]);
-  frame->picture_id = UnwrapPictureId(frame->picture_id);
-
-  frame_callback_->OnCompleteFrame(std::move(frame));
-  RetryStashedFrames();
-}
-
-void PacketBuffer::ManageFrameVp9(std::unique_ptr<RtpFrameObject> frame) {
-  size_t index = frame->first_seq_num() % size_;
-  const VCMPacket& packet = data_buffer_[index];
-  const RTPVideoHeaderVP9& codec_header =
-      packet.codecSpecificHeader.codecHeader.VP9;
-
-  if (codec_header.picture_id == kNoPictureId) {
-    ManageFrameGeneric(std::move(frame));
-    return;
-  }
-
-  frame->spatial_layer = codec_header.spatial_idx;
-  frame->inter_layer_predicted = codec_header.inter_layer_predicted;
-  frame->picture_id = codec_header.picture_id % kPicIdLength;
-
-  if (last_unwrap_ == -1)
-    last_unwrap_ = codec_header.picture_id;
-
-  if (last_picture_id_ == -1)
-    last_picture_id_ = frame->picture_id;
-
-  if (codec_header.flexible_mode) {
-    frame->num_references = codec_header.num_ref_pics;
-    for (size_t i = 0; i < frame->num_references; ++i) {
-      frame->references[i] =
-          Subtract<1 << 16>(frame->picture_id, codec_header.pid_diff[i]);
-    }
-
-    CompletedFrameVp9(std::move(frame));
-    return;
-  }
-
-  if (codec_header.ss_data_available) {
-    // Scalability structures can only be sent with tl0 frames.
-    if (codec_header.temporal_idx != 0) {
-      LOG(LS_WARNING) << "Received scalability structure on a non base layer"
-                         " frame. Scalability structure ignored.";
-    } else {
-      current_ss_idx_ = Add<kMaxGofSaved>(current_ss_idx_, 1);
-      scalability_structures_[current_ss_idx_] = codec_header.gof;
-      scalability_structures_[current_ss_idx_].pid_start = frame->picture_id;
-
-      auto pid_and_gof = std::make_pair(
-          frame->picture_id, &scalability_structures_[current_ss_idx_]);
-      gof_info_.insert(std::make_pair(codec_header.tl0_pic_idx, pid_and_gof));
-    }
-  }
-
-  // Clean up info for base layers that are too old.
-  uint8_t old_tl0_pic_idx = codec_header.tl0_pic_idx - kMaxGofSaved;
-  auto clean_gof_info_to = gof_info_.lower_bound(old_tl0_pic_idx);
-  gof_info_.erase(gof_info_.begin(), clean_gof_info_to);
-
-  if (packet.frameType == kVideoFrameKey) {
-    // When using GOF all keyframes must include the scalability structure.
-    if (!codec_header.ss_data_available)
-      LOG(LS_WARNING) << "Received keyframe without scalability structure";
-
-    frame->num_references = 0;
-    GofInfoVP9* gof = gof_info_.find(codec_header.tl0_pic_idx)->second.second;
-    FrameReceivedVp9(frame->picture_id, *gof);
-    CompletedFrameVp9(std::move(frame));
-    return;
-  }
-
-  auto gof_info_it = gof_info_.find(
-      (codec_header.temporal_idx == 0 && !codec_header.ss_data_available)
-          ? codec_header.tl0_pic_idx - 1
-          : codec_header.tl0_pic_idx);
-
-  // Gof info for this frame is not available yet, stash this frame.
-  if (gof_info_it == gof_info_.end()) {
-    stashed_frames_.emplace(std::move(frame));
-    return;
-  }
-
-  GofInfoVP9* gof = gof_info_it->second.second;
-  uint16_t picture_id_tl0 = gof_info_it->second.first;
-
-  FrameReceivedVp9(frame->picture_id, *gof);
-
-  // Make sure we don't miss any frame that could potentially have the
-  // up switch flag set.
-  if (MissingRequiredFrameVp9(frame->picture_id, *gof)) {
-    stashed_frames_.emplace(std::move(frame));
-    return;
-  }
-
-  if (codec_header.temporal_up_switch) {
-    auto pid_tidx =
-        std::make_pair(frame->picture_id, codec_header.temporal_idx);
-    up_switch_.insert(pid_tidx);
-  }
-
-  // If this is a base layer frame that contains a scalability structure
-  // then gof info has already been inserted earlier, so we only want to
-  // insert if we haven't done so already.
-  if (codec_header.temporal_idx == 0 && !codec_header.ss_data_available) {
-    auto pid_and_gof = std::make_pair(frame->picture_id, gof);
-    gof_info_.insert(std::make_pair(codec_header.tl0_pic_idx, pid_and_gof));
-  }
-
-  // Clean out old info about up switch frames.
-  uint16_t old_picture_id = Subtract<kPicIdLength>(last_picture_id_, 50);
-  auto up_switch_erase_to = up_switch_.lower_bound(old_picture_id);
-  up_switch_.erase(up_switch_.begin(), up_switch_erase_to);
-
-  RTC_DCHECK(
-      (AheadOrAt<uint16_t, kPicIdLength>(frame->picture_id, picture_id_tl0)));
-
-  size_t diff =
-      ForwardDiff<uint16_t, kPicIdLength>(gof->pid_start, frame->picture_id);
-  size_t gof_idx = diff % gof->num_frames_in_gof;
-
-  // Populate references according to the scalability structure.
-  frame->num_references = gof->num_ref_pics[gof_idx];
-  for (size_t i = 0; i < frame->num_references; ++i) {
-    frame->references[i] =
-        Subtract<kPicIdLength>(frame->picture_id, gof->pid_diff[gof_idx][i]);
-
-    // If this is a reference to a frame earlier than the last up switch point,
-    // then ignore this reference.
-    if (UpSwitchInIntervalVp9(frame->picture_id, codec_header.temporal_idx,
-                              frame->references[i])) {
-      --frame->num_references;
-    }
-  }
-
-  CompletedFrameVp9(std::move(frame));
-}
-
-bool PacketBuffer::MissingRequiredFrameVp9(uint16_t picture_id,
-                                           const GofInfoVP9& gof) {
-  size_t diff = ForwardDiff<uint16_t, kPicIdLength>(gof.pid_start, picture_id);
-  size_t gof_idx = diff % gof.num_frames_in_gof;
-  size_t temporal_idx = gof.temporal_idx[gof_idx];
-
-  // For every reference this frame has, check if there is a frame missing in
-  // the interval (|ref_pid|, |picture_id|) in any of the lower temporal
-  // layers. If so, we are missing a required frame.
-  uint8_t num_references = gof.num_ref_pics[gof_idx];
-  for (size_t i = 0; i < num_references; ++i) {
-    uint16_t ref_pid =
-        Subtract<kPicIdLength>(picture_id, gof.pid_diff[gof_idx][i]);
-    for (size_t l = 0; l < temporal_idx; ++l) {
-      auto missing_frame_it = missing_frames_for_layer_[l].lower_bound(ref_pid);
-      if (missing_frame_it != missing_frames_for_layer_[l].end() &&
-          AheadOf<uint16_t, kPicIdLength>(picture_id, *missing_frame_it)) {
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-void PacketBuffer::FrameReceivedVp9(uint16_t picture_id,
-                                    const GofInfoVP9& gof) {
-  RTC_DCHECK_NE(-1, last_picture_id_);
-
-  // If there is a gap, find which temporal layer the missing frames
-  // belong to and add the frame as missing for that temporal layer.
-  // Otherwise, remove this frame from the set of missing frames.
-  if (AheadOf<uint16_t, kPicIdLength>(picture_id, last_picture_id_)) {
-    size_t diff =
-        ForwardDiff<uint16_t, kPicIdLength>(gof.pid_start, last_picture_id_);
-    size_t gof_idx = diff % gof.num_frames_in_gof;
-
-    last_picture_id_ = Add<kPicIdLength>(last_picture_id_, 1);
-    while (last_picture_id_ != picture_id) {
-      ++gof_idx;
-      RTC_DCHECK_NE(0ul, gof_idx % gof.num_frames_in_gof);
-      size_t temporal_idx = gof.temporal_idx[gof_idx];
-      missing_frames_for_layer_[temporal_idx].insert(last_picture_id_);
-      last_picture_id_ = Add<kPicIdLength>(last_picture_id_, 1);
-    }
-  } else {
-    size_t diff =
-        ForwardDiff<uint16_t, kPicIdLength>(gof.pid_start, picture_id);
-    size_t gof_idx = diff % gof.num_frames_in_gof;
-    size_t temporal_idx = gof.temporal_idx[gof_idx];
-    missing_frames_for_layer_[temporal_idx].erase(picture_id);
-  }
-}
-
-bool PacketBuffer::UpSwitchInIntervalVp9(uint16_t picture_id,
-                                         uint8_t temporal_idx,
-                                         uint16_t pid_ref) {
-  for (auto up_switch_it = up_switch_.upper_bound(pid_ref);
-       up_switch_it != up_switch_.end() &&
-       AheadOf<uint16_t, kPicIdLength>(picture_id, up_switch_it->first);
-       ++up_switch_it) {
-    if (up_switch_it->second < temporal_idx)
-      return true;
-  }
-
-  return false;
-}
-
-void PacketBuffer::CompletedFrameVp9(std::unique_ptr<RtpFrameObject> frame) {
-  for (size_t i = 0; i < frame->num_references; ++i)
-    frame->references[i] = UnwrapPictureId(frame->references[i]);
-  frame->picture_id = UnwrapPictureId(frame->picture_id);
-
-  frame_callback_->OnCompleteFrame(std::move(frame));
-  RetryStashedFrames();
-}
-
-uint16_t PacketBuffer::UnwrapPictureId(uint16_t picture_id) {
-  RTC_DCHECK_NE(-1, last_unwrap_);
-
-  uint16_t unwrap_truncated = last_unwrap_ % kPicIdLength;
-  uint16_t diff = MinDiff<uint16_t, kPicIdLength>(unwrap_truncated, picture_id);
-
-  if (AheadOf<uint16_t, kPicIdLength>(picture_id, unwrap_truncated))
-    last_unwrap_ = Add<1 << 16>(last_unwrap_, diff);
-  else
-    last_unwrap_ = Subtract<1 << 16>(last_unwrap_, diff);
-
-  return last_unwrap_;
-}
-
-void PacketBuffer::Flush() {
+void PacketBuffer::Clear() {
   rtc::CritScope lock(&crit_);
   for (size_t i = 0; i < size_; ++i)
     sequence_buffer_[i].used = false;
 
-  last_seq_num_gop_.clear();
-  while (!stashed_frames_.empty())
-    stashed_frames_.pop();
-  not_yet_received_frames_.clear();
-
   first_packet_received_ = false;
 }
 
diff --git a/webrtc/modules/video_coding/packet_buffer.h b/webrtc/modules/video_coding/packet_buffer.h
index 8a1d706..a30c884 100644
--- a/webrtc/modules/video_coding/packet_buffer.h
+++ b/webrtc/modules/video_coding/packet_buffer.h
@@ -11,12 +11,6 @@
 #ifndef WEBRTC_MODULES_VIDEO_CODING_PACKET_BUFFER_H_
 #define WEBRTC_MODULES_VIDEO_CODING_PACKET_BUFFER_H_
 
-#include <array>
-#include <map>
-#include <memory>
-#include <queue>
-#include <set>
-#include <utility>
 #include <vector>
 
 #include "webrtc/base/criticalsection.h"
@@ -24,6 +18,7 @@
 #include "webrtc/base/thread_annotations.h"
 #include "webrtc/modules/include/module_common_types.h"
 #include "webrtc/modules/video_coding/packet.h"
+#include "webrtc/modules/video_coding/rtp_frame_reference_finder.h"
 #include "webrtc/modules/video_coding/sequence_number_util.h"
 
 namespace webrtc {
@@ -47,16 +42,9 @@
 
   bool InsertPacket(const VCMPacket& packet);
   void ClearTo(uint16_t seq_num);
-  void Flush();
+  void Clear();
 
  private:
-  static const uint16_t kPicIdLength = 1 << 7;
-  static const uint8_t kMaxTemporalLayers = 5;
-  static const int kMaxStashedFrames = 10;
-  static const int kMaxLayerInfo = 10;
-  static const int kMaxNotYetReceivedFrames = 20;
-  static const int kMaxGofSaved = 15;
-
   friend RtpFrameObject;
   // Since we want the packet buffer to be as packet type agnostic
   // as possible we extract only the information needed in order
@@ -81,7 +69,7 @@
     bool frame_created = false;
   };
 
-  // Expand the buffer.
+  // Tries to expand the buffer.
   bool ExpandBufferSize() EXCLUSIVE_LOCKS_REQUIRED(crit_);
 
   // Test if all previous packets has arrived for the given sequence number.
@@ -94,61 +82,12 @@
   // Copy the bitstream for |frame| to |destination|.
   bool GetBitstream(const RtpFrameObject& frame, uint8_t* destination);
 
+  // Get the packet with sequence number |seq_num|.
+  VCMPacket* GetPacket(uint16_t seq_num);
+
   // Mark all slots used by |frame| as not used.
   void ReturnFrame(RtpFrameObject* frame);
 
-  // Find the references for this frame.
-  void ManageFrame(std::unique_ptr<RtpFrameObject> frame)
-      EXCLUSIVE_LOCKS_REQUIRED(crit_);
-
-  // Retry finding references for all frames that previously didn't have
-  // all information needed.
-  void RetryStashedFrames() EXCLUSIVE_LOCKS_REQUIRED(crit_);
-
-  // Find references for generic frames.
-  void ManageFrameGeneric(std::unique_ptr<RtpFrameObject> frame)
-      EXCLUSIVE_LOCKS_REQUIRED(crit_);
-
-  // Find references for Vp8 frames
-  void ManageFrameVp8(std::unique_ptr<RtpFrameObject> frame)
-      EXCLUSIVE_LOCKS_REQUIRED(crit_);
-
-  // Updates all necessary state used to determine frame references
-  // for Vp8 and then calls the |frame_callback| callback with the
-  // completed frame.
-  void CompletedFrameVp8(std::unique_ptr<RtpFrameObject> frame)
-      EXCLUSIVE_LOCKS_REQUIRED(crit_);
-
-  // Find references for Vp9 frames
-  void ManageFrameVp9(std::unique_ptr<RtpFrameObject> frame)
-      EXCLUSIVE_LOCKS_REQUIRED(crit_);
-
-  // Unwrap the picture id and the frame references  and then call the
-  // |frame_callback| callback with the completed frame.
-  void CompletedFrameVp9(std::unique_ptr<RtpFrameObject> frame)
-      EXCLUSIVE_LOCKS_REQUIRED(crit_);
-
-  // Check if we are missing a frame necessary to determine the references
-  // for this frame.
-  bool MissingRequiredFrameVp9(uint16_t picture_id, const GofInfoVP9& gof)
-      EXCLUSIVE_LOCKS_REQUIRED(crit_);
-
-  // Updates which frames that have been received. If there is a gap,
-  // missing frames will be added to |missing_frames_for_layer_| or
-  // if this is an already missing frame then it will be removed.
-  void FrameReceivedVp9(uint16_t picture_id, const GofInfoVP9& gof)
-      EXCLUSIVE_LOCKS_REQUIRED(crit_);
-
-  // Check if there is a frame with the up-switch flag set in the interval
-  // (|pid_ref|, |picture_id|) with temporal layer smaller than |temporal_idx|.
-  bool UpSwitchInIntervalVp9(uint16_t picture_id,
-                             uint8_t temporal_idx,
-                             uint16_t pid_ref) EXCLUSIVE_LOCKS_REQUIRED(crit_);
-
-  // All picture ids are unwrapped to 16 bits.
-  uint16_t UnwrapPictureId(uint16_t picture_id)
-      EXCLUSIVE_LOCKS_REQUIRED(crit_);
-
   rtc::CriticalSection crit_;
 
   // Buffer size_ and max_size_ must always be a power of two.
@@ -171,61 +110,9 @@
   // and information needed to determine the continuity between packets.
   std::vector<ContinuityInfo> sequence_buffer_ GUARDED_BY(crit_);
 
-  // The callback that is called when a frame has been created and all its
-  // references has been found.
-  OnCompleteFrameCallback* const frame_callback_;
-
-  // Holds the last sequence number of the last frame that has been created
-  // given the last sequence number of a given keyframe.
-  std::map<uint16_t, uint16_t, DescendingSeqNumComp<uint16_t>>
-    last_seq_num_gop_ GUARDED_BY(crit_);
-
-  // Save the last picture id in order to detect when there is a gap in frames
-  // that have not yet been fully received.
-  int last_picture_id_ GUARDED_BY(crit_);
-
-  // The last unwrapped picture id. Used to unwrap the picture id from a length
-  // of |kPicIdLength| to 16 bits.
-  int last_unwrap_ GUARDED_BY(crit_);
-
-  // Frames earlier than the last received frame that have not yet been
-  // fully received.
-  std::set<uint16_t, DescendingSeqNumComp<uint16_t, kPicIdLength>>
-      not_yet_received_frames_ GUARDED_BY(crit_);
-
-  // Frames that have been fully received but didn't have all the information
-  // needed to determine their references.
-  std::queue<std::unique_ptr<RtpFrameObject>> stashed_frames_ GUARDED_BY(crit_);
-
-  // Holds the information about the last completed frame for a given temporal
-  // layer given a Tl0 picture index.
-  std::map<uint8_t,
-           std::array<int16_t, kMaxTemporalLayers>,
-           DescendingSeqNumComp<uint8_t>>
-      layer_info_ GUARDED_BY(crit_);
-
-  // Where the current scalability structure is in the
-  // |scalability_structures_| array.
-  uint8_t current_ss_idx_;
-
-  // Holds received scalability structures.
-  std::array<GofInfoVP9, kMaxGofSaved> scalability_structures_
-      GUARDED_BY(crit_);
-
-  // Holds the picture id and the Gof information for a given TL0 picture index.
-  std::map<uint8_t,
-           std::pair<uint16_t, GofInfoVP9*>,
-           DescendingSeqNumComp<uint8_t>>
-      gof_info_ GUARDED_BY(crit_);
-
-  // Keep track of which picture id and which temporal layer that had the
-  // up switch flag set.
-  std::map<uint16_t, uint8_t> up_switch_ GUARDED_BY(crit_);
-
-  // For every temporal layer, keep a set of which frames that are missing.
-  std::array<std::set<uint16_t, DescendingSeqNumComp<uint16_t, kPicIdLength>>,
-             kMaxTemporalLayers>
-      missing_frames_for_layer_ GUARDED_BY(crit_);
+  // Frames that have received all their packets are handed off to the
+  // |reference_finder_| which finds the dependencies between the frames.
+  RtpFrameReferenceFinder reference_finder_;
 };
 
 }  // namespace video_coding
diff --git a/webrtc/modules/video_coding/packet_buffer_unittest.cc b/webrtc/modules/video_coding/packet_buffer_unittest.cc
index b7921e5..b50074d 100644
--- a/webrtc/modules/video_coding/packet_buffer_unittest.cc
+++ b/webrtc/modules/video_coding/packet_buffer_unittest.cc
@@ -10,7 +10,8 @@
 
 #include <cstring>
 #include <limits>
-#include <memory>
+#include <map>
+#include <set>
 #include <utility>
 
 #include "webrtc/modules/video_coding/frame_object.h"
@@ -444,7 +445,7 @@
   EXPECT_EQ(1UL, frames_from_callback_.size());
 }
 
-TEST_F(TestPacketBuffer, Flush) {
+TEST_F(TestPacketBuffer, Clear) {
   uint16_t seq_num = Rand();
 
   //            seq_num    , kf, frst, lst
@@ -453,7 +454,7 @@
   InsertGeneric(seq_num + 2, kF, kF  , kT);
   EXPECT_EQ(1UL, frames_from_callback_.size());
 
-  packet_buffer_->Flush();
+  packet_buffer_->Clear();
 
   //            seq_num                 , kf, frst, lst
   InsertGeneric(seq_num + kStartSize    , kT, kT  , kF);
@@ -462,7 +463,7 @@
   EXPECT_EQ(2UL, frames_from_callback_.size());
 }
 
-TEST_F(TestPacketBuffer, InvalidateFrameByFlushing) {
+TEST_F(TestPacketBuffer, InvalidateFrameByClearing) {
   VCMPacket packet;
   packet.codec = kVideoCodecGeneric;
   packet.frameType = kVideoFrameKey;
@@ -472,7 +473,7 @@
   EXPECT_TRUE(packet_buffer_->InsertPacket(packet));
   ASSERT_EQ(1UL, frames_from_callback_.size());
 
-  packet_buffer_->Flush();
+  packet_buffer_->Clear();
   EXPECT_FALSE(frames_from_callback_.begin()->second->GetBitstream(nullptr));
 }
 
diff --git a/webrtc/modules/video_coding/rtp_frame_reference_finder.cc b/webrtc/modules/video_coding/rtp_frame_reference_finder.cc
new file mode 100644
index 0000000..2ddfada
--- /dev/null
+++ b/webrtc/modules/video_coding/rtp_frame_reference_finder.cc
@@ -0,0 +1,486 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/video_coding/rtp_frame_reference_finder.h"
+
+#include <algorithm>
+#include <limits>
+
+#include "webrtc/base/checks.h"
+#include "webrtc/base/logging.h"
+#include "webrtc/modules/video_coding/frame_object.h"
+#include "webrtc/modules/video_coding/packet_buffer.h"
+
+namespace webrtc {
+namespace video_coding {
+
+RtpFrameReferenceFinder::RtpFrameReferenceFinder(
+    OnCompleteFrameCallback* frame_callback)
+    : last_picture_id_(-1),
+      last_unwrap_(-1),
+      current_ss_idx_(0),
+      frame_callback_(frame_callback) {}
+
+void RtpFrameReferenceFinder::ManageFrame(
+    std::unique_ptr<RtpFrameObject> frame) {
+  rtc::CritScope lock(&crit_);
+  switch (frame->codec_type()) {
+    case kVideoCodecULPFEC:
+    case kVideoCodecRED:
+    case kVideoCodecUnknown:
+      RTC_NOTREACHED();
+      break;
+    case kVideoCodecVP8:
+      ManageFrameVp8(std::move(frame));
+      break;
+    case kVideoCodecVP9:
+      ManageFrameVp9(std::move(frame));
+      break;
+    case kVideoCodecH264:
+    case kVideoCodecI420:
+    case kVideoCodecGeneric:
+      ManageFrameGeneric(std::move(frame));
+      break;
+  }
+}
+
+void RtpFrameReferenceFinder::RetryStashedFrames() {
+  size_t num_stashed_frames = stashed_frames_.size();
+
+  // Clean up stashed frames if there are too many.
+  while (stashed_frames_.size() > kMaxStashedFrames)
+    stashed_frames_.pop();
+
+  // Since frames are stashed if there is not enough data to determine their
+  // frame references we should at most check |stashed_frames_.size()| in
+  // order to not pop and push frames in and endless loop.
+  for (size_t i = 0; i < num_stashed_frames && !stashed_frames_.empty(); ++i) {
+    std::unique_ptr<RtpFrameObject> frame = std::move(stashed_frames_.front());
+    stashed_frames_.pop();
+    ManageFrame(std::move(frame));
+  }
+}
+
+void RtpFrameReferenceFinder::ManageFrameGeneric(
+    std::unique_ptr<RtpFrameObject> frame) {
+  if (frame->frame_type() == kVideoFrameKey)
+    last_seq_num_gop_[frame->last_seq_num()] = frame->last_seq_num();
+
+  // We have received a frame but not yet a keyframe, stash this frame.
+  if (last_seq_num_gop_.empty()) {
+    stashed_frames_.emplace(std::move(frame));
+    return;
+  }
+
+  // Clean up info for old keyframes but make sure to keep info
+  // for the last keyframe.
+  auto clean_to = last_seq_num_gop_.lower_bound(frame->last_seq_num() - 100);
+  if (clean_to != last_seq_num_gop_.end())
+    last_seq_num_gop_.erase(last_seq_num_gop_.begin(), clean_to);
+
+  // Find the last sequence number of the last frame for the keyframe
+  // that this frame indirectly references.
+  auto seq_num_it = last_seq_num_gop_.upper_bound(frame->last_seq_num());
+  seq_num_it--;
+
+  // Make sure the packet sequence numbers are continuous, otherwise stash
+  // this frame.
+  if (frame->frame_type() == kVideoFrameDelta) {
+    if (seq_num_it->second !=
+        static_cast<uint16_t>(frame->first_seq_num() - 1)) {
+      stashed_frames_.emplace(std::move(frame));
+      return;
+    }
+  }
+
+  RTC_DCHECK(AheadOrAt(frame->last_seq_num(), seq_num_it->first));
+
+  // Since keyframes can cause reordering we can't simply assign the
+  // picture id according to some incrementing counter.
+  frame->picture_id = frame->last_seq_num();
+  frame->num_references = frame->frame_type() == kVideoFrameDelta;
+  frame->references[0] = seq_num_it->second;
+  seq_num_it->second = frame->picture_id;
+
+  last_picture_id_ = frame->picture_id;
+  frame_callback_->OnCompleteFrame(std::move(frame));
+  RetryStashedFrames();
+}
+
+void RtpFrameReferenceFinder::ManageFrameVp8(
+    std::unique_ptr<RtpFrameObject> frame) {
+  RTPVideoTypeHeader* rtp_codec_header = frame->GetCodecHeader();
+  if (!rtp_codec_header)
+    return;
+
+  const RTPVideoHeaderVP8& codec_header = rtp_codec_header->VP8;
+
+  if (codec_header.pictureId == kNoPictureId ||
+      codec_header.temporalIdx == kNoTemporalIdx ||
+      codec_header.tl0PicIdx == kNoTl0PicIdx) {
+    ManageFrameGeneric(std::move(frame));
+    return;
+  }
+
+  frame->picture_id = codec_header.pictureId % kPicIdLength;
+
+  if (last_unwrap_ == -1)
+    last_unwrap_ = codec_header.pictureId;
+
+  if (last_picture_id_ == -1)
+    last_picture_id_ = frame->picture_id;
+
+  // Find if there has been a gap in fully received frames and save the picture
+  // id of those frames in |not_yet_received_frames_|.
+  if (AheadOf<uint16_t, kPicIdLength>(frame->picture_id, last_picture_id_)) {
+    last_picture_id_ = Add<kPicIdLength>(last_picture_id_, 1);
+    while (last_picture_id_ != frame->picture_id) {
+      not_yet_received_frames_.insert(last_picture_id_);
+      last_picture_id_ = Add<kPicIdLength>(last_picture_id_, 1);
+    }
+  }
+
+  // Clean up info for base layers that are too old.
+  uint8_t old_tl0_pic_idx = codec_header.tl0PicIdx - kMaxLayerInfo;
+  auto clean_layer_info_to = layer_info_.lower_bound(old_tl0_pic_idx);
+  layer_info_.erase(layer_info_.begin(), clean_layer_info_to);
+
+  // Clean up info about not yet received frames that are too old.
+  uint16_t old_picture_id =
+      Subtract<kPicIdLength>(frame->picture_id, kMaxNotYetReceivedFrames);
+  auto clean_frames_to = not_yet_received_frames_.lower_bound(old_picture_id);
+  not_yet_received_frames_.erase(not_yet_received_frames_.begin(),
+                                 clean_frames_to);
+
+  if (frame->frame_type() == kVideoFrameKey) {
+    frame->num_references = 0;
+    layer_info_[codec_header.tl0PicIdx].fill(-1);
+    CompletedFrameVp8(std::move(frame));
+    return;
+  }
+
+  auto layer_info_it = layer_info_.find(codec_header.temporalIdx == 0
+                                            ? codec_header.tl0PicIdx - 1
+                                            : codec_header.tl0PicIdx);
+
+  // If we don't have the base layer frame yet, stash this frame.
+  if (layer_info_it == layer_info_.end()) {
+    stashed_frames_.emplace(std::move(frame));
+    return;
+  }
+
+  // A non keyframe base layer frame has been received, copy the layer info
+  // from the previous base layer frame and set a reference to the previous
+  // base layer frame.
+  if (codec_header.temporalIdx == 0) {
+    layer_info_it =
+        layer_info_
+            .insert(make_pair(codec_header.tl0PicIdx, layer_info_it->second))
+            .first;
+    frame->num_references = 1;
+    frame->references[0] = layer_info_it->second[0];
+    CompletedFrameVp8(std::move(frame));
+    return;
+  }
+
+  // Layer sync frame, this frame only references its base layer frame.
+  if (codec_header.layerSync) {
+    frame->num_references = 1;
+    frame->references[0] = layer_info_it->second[0];
+
+    CompletedFrameVp8(std::move(frame));
+    return;
+  }
+
+  // Find all references for this frame.
+  frame->num_references = 0;
+  for (uint8_t layer = 0; layer <= codec_header.temporalIdx; ++layer) {
+    RTC_DCHECK_NE(-1, layer_info_it->second[layer]);
+
+    // If we have not yet received a frame between this frame and the referenced
+    // frame then we have to wait for that frame to be completed first.
+    auto not_received_frame_it =
+        not_yet_received_frames_.upper_bound(layer_info_it->second[layer]);
+    if (not_received_frame_it != not_yet_received_frames_.end() &&
+        AheadOf<uint16_t, kPicIdLength>(frame->picture_id,
+                                        *not_received_frame_it)) {
+      stashed_frames_.emplace(std::move(frame));
+      return;
+    }
+
+    ++frame->num_references;
+    frame->references[layer] = layer_info_it->second[layer];
+  }
+
+  CompletedFrameVp8(std::move(frame));
+}
+
+void RtpFrameReferenceFinder::CompletedFrameVp8(
+    std::unique_ptr<RtpFrameObject> frame) {
+  RTPVideoTypeHeader* rtp_codec_header = frame->GetCodecHeader();
+  if (!rtp_codec_header)
+    return;
+
+  const RTPVideoHeaderVP8& codec_header = rtp_codec_header->VP8;
+
+  uint8_t tl0_pic_idx = codec_header.tl0PicIdx;
+  uint8_t temporal_index = codec_header.temporalIdx;
+  auto layer_info_it = layer_info_.find(tl0_pic_idx);
+
+  // Update this layer info and newer.
+  while (layer_info_it != layer_info_.end()) {
+    if (layer_info_it->second[temporal_index] != -1 &&
+        AheadOf<uint16_t, kPicIdLength>(layer_info_it->second[temporal_index],
+                                        frame->picture_id)) {
+      // The frame was not newer, then no subsequent layer info have to be
+      // update.
+      break;
+    }
+
+    layer_info_it->second[codec_header.temporalIdx] = frame->picture_id;
+    ++tl0_pic_idx;
+    layer_info_it = layer_info_.find(tl0_pic_idx);
+  }
+  not_yet_received_frames_.erase(frame->picture_id);
+
+  for (size_t i = 0; i < frame->num_references; ++i)
+    frame->references[i] = UnwrapPictureId(frame->references[i]);
+  frame->picture_id = UnwrapPictureId(frame->picture_id);
+
+  frame_callback_->OnCompleteFrame(std::move(frame));
+  RetryStashedFrames();
+}
+
+void RtpFrameReferenceFinder::ManageFrameVp9(
+    std::unique_ptr<RtpFrameObject> frame) {
+  RTPVideoTypeHeader* rtp_codec_header = frame->GetCodecHeader();
+  if (!rtp_codec_header)
+    return;
+
+  const RTPVideoHeaderVP9& codec_header = rtp_codec_header->VP9;
+
+  if (codec_header.picture_id == kNoPictureId) {
+    ManageFrameGeneric(std::move(frame));
+    return;
+  }
+
+  frame->spatial_layer = codec_header.spatial_idx;
+  frame->inter_layer_predicted = codec_header.inter_layer_predicted;
+  frame->picture_id = codec_header.picture_id % kPicIdLength;
+
+  if (last_unwrap_ == -1)
+    last_unwrap_ = codec_header.picture_id;
+
+  if (last_picture_id_ == -1)
+    last_picture_id_ = frame->picture_id;
+
+  if (codec_header.flexible_mode) {
+    frame->num_references = codec_header.num_ref_pics;
+    for (size_t i = 0; i < frame->num_references; ++i) {
+      frame->references[i] =
+          Subtract<1 << 16>(frame->picture_id, codec_header.pid_diff[i]);
+    }
+
+    CompletedFrameVp9(std::move(frame));
+    return;
+  }
+
+  if (codec_header.ss_data_available) {
+    // Scalability structures can only be sent with tl0 frames.
+    if (codec_header.temporal_idx != 0) {
+      LOG(LS_WARNING) << "Received scalability structure on a non base layer"
+                         " frame. Scalability structure ignored.";
+    } else {
+      current_ss_idx_ = Add<kMaxGofSaved>(current_ss_idx_, 1);
+      scalability_structures_[current_ss_idx_] = codec_header.gof;
+      scalability_structures_[current_ss_idx_].pid_start = frame->picture_id;
+
+      auto pid_and_gof = std::make_pair(
+          frame->picture_id, &scalability_structures_[current_ss_idx_]);
+      gof_info_.insert(std::make_pair(codec_header.tl0_pic_idx, pid_and_gof));
+    }
+  }
+
+  // Clean up info for base layers that are too old.
+  uint8_t old_tl0_pic_idx = codec_header.tl0_pic_idx - kMaxGofSaved;
+  auto clean_gof_info_to = gof_info_.lower_bound(old_tl0_pic_idx);
+  gof_info_.erase(gof_info_.begin(), clean_gof_info_to);
+
+  if (frame->frame_type() == kVideoFrameKey) {
+    // When using GOF all keyframes must include the scalability structure.
+    if (!codec_header.ss_data_available)
+      LOG(LS_WARNING) << "Received keyframe without scalability structure";
+
+    frame->num_references = 0;
+    GofInfoVP9* gof = gof_info_.find(codec_header.tl0_pic_idx)->second.second;
+    FrameReceivedVp9(frame->picture_id, *gof);
+    CompletedFrameVp9(std::move(frame));
+    return;
+  }
+
+  auto gof_info_it = gof_info_.find(
+      (codec_header.temporal_idx == 0 && !codec_header.ss_data_available)
+          ? codec_header.tl0_pic_idx - 1
+          : codec_header.tl0_pic_idx);
+
+  // Gof info for this frame is not available yet, stash this frame.
+  if (gof_info_it == gof_info_.end()) {
+    stashed_frames_.emplace(std::move(frame));
+    return;
+  }
+
+  GofInfoVP9* gof = gof_info_it->second.second;
+  uint16_t picture_id_tl0 = gof_info_it->second.first;
+
+  FrameReceivedVp9(frame->picture_id, *gof);
+
+  // Make sure we don't miss any frame that could potentially have the
+  // up switch flag set.
+  if (MissingRequiredFrameVp9(frame->picture_id, *gof)) {
+    stashed_frames_.emplace(std::move(frame));
+    return;
+  }
+
+  if (codec_header.temporal_up_switch) {
+    auto pid_tidx =
+        std::make_pair(frame->picture_id, codec_header.temporal_idx);
+    up_switch_.insert(pid_tidx);
+  }
+
+  // If this is a base layer frame that contains a scalability structure
+  // then gof info has already been inserted earlier, so we only want to
+  // insert if we haven't done so already.
+  if (codec_header.temporal_idx == 0 && !codec_header.ss_data_available) {
+    auto pid_and_gof = std::make_pair(frame->picture_id, gof);
+    gof_info_.insert(std::make_pair(codec_header.tl0_pic_idx, pid_and_gof));
+  }
+
+  // Clean out old info about up switch frames.
+  uint16_t old_picture_id = Subtract<kPicIdLength>(last_picture_id_, 50);
+  auto up_switch_erase_to = up_switch_.lower_bound(old_picture_id);
+  up_switch_.erase(up_switch_.begin(), up_switch_erase_to);
+
+  RTC_DCHECK(
+      (AheadOrAt<uint16_t, kPicIdLength>(frame->picture_id, picture_id_tl0)));
+
+  size_t diff =
+      ForwardDiff<uint16_t, kPicIdLength>(gof->pid_start, frame->picture_id);
+  size_t gof_idx = diff % gof->num_frames_in_gof;
+
+  // Populate references according to the scalability structure.
+  frame->num_references = gof->num_ref_pics[gof_idx];
+  for (size_t i = 0; i < frame->num_references; ++i) {
+    frame->references[i] =
+        Subtract<kPicIdLength>(frame->picture_id, gof->pid_diff[gof_idx][i]);
+
+    // If this is a reference to a frame earlier than the last up switch point,
+    // then ignore this reference.
+    if (UpSwitchInIntervalVp9(frame->picture_id, codec_header.temporal_idx,
+                              frame->references[i])) {
+      --frame->num_references;
+    }
+  }
+
+  CompletedFrameVp9(std::move(frame));
+}
+
+bool RtpFrameReferenceFinder::MissingRequiredFrameVp9(uint16_t picture_id,
+                                                      const GofInfoVP9& gof) {
+  size_t diff = ForwardDiff<uint16_t, kPicIdLength>(gof.pid_start, picture_id);
+  size_t gof_idx = diff % gof.num_frames_in_gof;
+  size_t temporal_idx = gof.temporal_idx[gof_idx];
+
+  // For every reference this frame has, check if there is a frame missing in
+  // the interval (|ref_pid|, |picture_id|) in any of the lower temporal
+  // layers. If so, we are missing a required frame.
+  uint8_t num_references = gof.num_ref_pics[gof_idx];
+  for (size_t i = 0; i < num_references; ++i) {
+    uint16_t ref_pid =
+        Subtract<kPicIdLength>(picture_id, gof.pid_diff[gof_idx][i]);
+    for (size_t l = 0; l < temporal_idx; ++l) {
+      auto missing_frame_it = missing_frames_for_layer_[l].lower_bound(ref_pid);
+      if (missing_frame_it != missing_frames_for_layer_[l].end() &&
+          AheadOf<uint16_t, kPicIdLength>(picture_id, *missing_frame_it)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+void RtpFrameReferenceFinder::FrameReceivedVp9(uint16_t picture_id,
+                                               const GofInfoVP9& gof) {
+  RTC_DCHECK_NE(-1, last_picture_id_);
+
+  // If there is a gap, find which temporal layer the missing frames
+  // belong to and add the frame as missing for that temporal layer.
+  // Otherwise, remove this frame from the set of missing frames.
+  if (AheadOf<uint16_t, kPicIdLength>(picture_id, last_picture_id_)) {
+    size_t diff =
+        ForwardDiff<uint16_t, kPicIdLength>(gof.pid_start, last_picture_id_);
+    size_t gof_idx = diff % gof.num_frames_in_gof;
+
+    last_picture_id_ = Add<kPicIdLength>(last_picture_id_, 1);
+    while (last_picture_id_ != picture_id) {
+      ++gof_idx;
+      RTC_DCHECK_NE(0ul, gof_idx % gof.num_frames_in_gof);
+      size_t temporal_idx = gof.temporal_idx[gof_idx];
+      missing_frames_for_layer_[temporal_idx].insert(last_picture_id_);
+      last_picture_id_ = Add<kPicIdLength>(last_picture_id_, 1);
+    }
+  } else {
+    size_t diff =
+        ForwardDiff<uint16_t, kPicIdLength>(gof.pid_start, picture_id);
+    size_t gof_idx = diff % gof.num_frames_in_gof;
+    size_t temporal_idx = gof.temporal_idx[gof_idx];
+    missing_frames_for_layer_[temporal_idx].erase(picture_id);
+  }
+}
+
+bool RtpFrameReferenceFinder::UpSwitchInIntervalVp9(uint16_t picture_id,
+                                                    uint8_t temporal_idx,
+                                                    uint16_t pid_ref) {
+  for (auto up_switch_it = up_switch_.upper_bound(pid_ref);
+       up_switch_it != up_switch_.end() &&
+       AheadOf<uint16_t, kPicIdLength>(picture_id, up_switch_it->first);
+       ++up_switch_it) {
+    if (up_switch_it->second < temporal_idx)
+      return true;
+  }
+
+  return false;
+}
+
+void RtpFrameReferenceFinder::CompletedFrameVp9(
+    std::unique_ptr<RtpFrameObject> frame) {
+  for (size_t i = 0; i < frame->num_references; ++i)
+    frame->references[i] = UnwrapPictureId(frame->references[i]);
+  frame->picture_id = UnwrapPictureId(frame->picture_id);
+
+  frame_callback_->OnCompleteFrame(std::move(frame));
+  RetryStashedFrames();
+}
+
+uint16_t RtpFrameReferenceFinder::UnwrapPictureId(uint16_t picture_id) {
+  RTC_DCHECK_NE(-1, last_unwrap_);
+
+  uint16_t unwrap_truncated = last_unwrap_ % kPicIdLength;
+  uint16_t diff = MinDiff<uint16_t, kPicIdLength>(unwrap_truncated, picture_id);
+
+  if (AheadOf<uint16_t, kPicIdLength>(picture_id, unwrap_truncated))
+    last_unwrap_ = Add<1 << 16>(last_unwrap_, diff);
+  else
+    last_unwrap_ = Subtract<1 << 16>(last_unwrap_, diff);
+
+  return last_unwrap_;
+}
+
+}  // namespace video_coding
+}  // namespace webrtc
diff --git a/webrtc/modules/video_coding/rtp_frame_reference_finder.h b/webrtc/modules/video_coding/rtp_frame_reference_finder.h
new file mode 100644
index 0000000..52eeaed
--- /dev/null
+++ b/webrtc/modules/video_coding/rtp_frame_reference_finder.h
@@ -0,0 +1,152 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_VIDEO_CODING_RTP_FRAME_REFERENCE_FINDER_H_
+#define WEBRTC_MODULES_VIDEO_CODING_RTP_FRAME_REFERENCE_FINDER_H_
+
+#include <array>
+#include <map>
+#include <queue>
+#include <set>
+#include <utility>
+
+#include "webrtc/base/criticalsection.h"
+#include "webrtc/base/scoped_ptr.h"
+#include "webrtc/base/thread_annotations.h"
+#include "webrtc/modules/include/module_common_types.h"
+#include "webrtc/modules/video_coding/sequence_number_util.h"
+
+namespace webrtc {
+namespace video_coding {
+
+class RtpFrameObject;
+class OnCompleteFrameCallback;
+
+class RtpFrameReferenceFinder {
+ public:
+  explicit RtpFrameReferenceFinder(OnCompleteFrameCallback* frame_callback);
+  void ManageFrame(std::unique_ptr<RtpFrameObject> frame);
+
+ private:
+  static const uint16_t kPicIdLength = 1 << 7;
+  static const uint8_t kMaxTemporalLayers = 5;
+  static const int kMaxLayerInfo = 10;
+  static const int kMaxStashedFrames = 10;
+  static const int kMaxNotYetReceivedFrames = 20;
+  static const int kMaxGofSaved = 15;
+
+  rtc::CriticalSection crit_;
+
+  // Retry finding references for all frames that previously didn't have
+  // all information needed.
+  void RetryStashedFrames() EXCLUSIVE_LOCKS_REQUIRED(crit_);
+
+  // Find references for generic frames.
+  void ManageFrameGeneric(std::unique_ptr<RtpFrameObject> frame)
+      EXCLUSIVE_LOCKS_REQUIRED(crit_);
+
+  // Find references for Vp8 frames
+  void ManageFrameVp8(std::unique_ptr<RtpFrameObject> frame)
+      EXCLUSIVE_LOCKS_REQUIRED(crit_);
+
+  // Updates all necessary state used to determine frame references
+  // for Vp8 and then calls the |frame_callback| callback with the
+  // completed frame.
+  void CompletedFrameVp8(std::unique_ptr<RtpFrameObject> frame)
+      EXCLUSIVE_LOCKS_REQUIRED(crit_);
+
+  // Find references for Vp9 frames
+  void ManageFrameVp9(std::unique_ptr<RtpFrameObject> frame)
+      EXCLUSIVE_LOCKS_REQUIRED(crit_);
+
+  // Unwrap the picture id and the frame references  and then call the
+  // |frame_callback| callback with the completed frame.
+  void CompletedFrameVp9(std::unique_ptr<RtpFrameObject> frame)
+      EXCLUSIVE_LOCKS_REQUIRED(crit_);
+
+  // Check if we are missing a frame necessary to determine the references
+  // for this frame.
+  bool MissingRequiredFrameVp9(uint16_t picture_id, const GofInfoVP9& gof)
+      EXCLUSIVE_LOCKS_REQUIRED(crit_);
+
+  // Updates which frames that have been received. If there is a gap,
+  // missing frames will be added to |missing_frames_for_layer_| or
+  // if this is an already missing frame then it will be removed.
+  void FrameReceivedVp9(uint16_t picture_id, const GofInfoVP9& gof)
+      EXCLUSIVE_LOCKS_REQUIRED(crit_);
+
+  // Check if there is a frame with the up-switch flag set in the interval
+  // (|pid_ref|, |picture_id|) with temporal layer smaller than |temporal_idx|.
+  bool UpSwitchInIntervalVp9(uint16_t picture_id,
+                             uint8_t temporal_idx,
+                             uint16_t pid_ref) EXCLUSIVE_LOCKS_REQUIRED(crit_);
+
+  // All picture ids are unwrapped to 16 bits.
+  uint16_t UnwrapPictureId(uint16_t picture_id) EXCLUSIVE_LOCKS_REQUIRED(crit_);
+
+  // Holds the last sequence number of the last frame that has been created
+  // given the last sequence number of a given keyframe.
+  std::map<uint16_t, uint16_t, DescendingSeqNumComp<uint16_t>> last_seq_num_gop_
+      GUARDED_BY(crit_);
+
+  // Save the last picture id in order to detect when there is a gap in frames
+  // that have not yet been fully received.
+  int last_picture_id_ GUARDED_BY(crit_);
+
+  // The last unwrapped picture id. Used to unwrap the picture id from a length
+  // of |kPicIdLength| to 16 bits.
+  int last_unwrap_ GUARDED_BY(crit_);
+
+  // Frames earlier than the last received frame that have not yet been
+  // fully received.
+  std::set<uint16_t, DescendingSeqNumComp<uint16_t, kPicIdLength>>
+      not_yet_received_frames_ GUARDED_BY(crit_);
+
+  // Frames that have been fully received but didn't have all the information
+  // needed to determine their references.
+  std::queue<std::unique_ptr<RtpFrameObject>> stashed_frames_ GUARDED_BY(crit_);
+
+  // Holds the information about the last completed frame for a given temporal
+  // layer given a Tl0 picture index.
+  std::map<uint8_t,
+           std::array<int16_t, kMaxTemporalLayers>,
+           DescendingSeqNumComp<uint8_t>>
+      layer_info_ GUARDED_BY(crit_);
+
+  // Where the current scalability structure is in the
+  // |scalability_structures_| array.
+  uint8_t current_ss_idx_;
+
+  // Holds received scalability structures.
+  std::array<GofInfoVP9, kMaxGofSaved> scalability_structures_
+      GUARDED_BY(crit_);
+
+  // Holds the picture id and the Gof information for a given TL0 picture index.
+  std::map<uint8_t,
+           std::pair<uint16_t, GofInfoVP9*>,
+           DescendingSeqNumComp<uint8_t>>
+      gof_info_ GUARDED_BY(crit_);
+
+  // Keep track of which picture id and which temporal layer that had the
+  // up switch flag set.
+  std::map<uint16_t, uint8_t> up_switch_ GUARDED_BY(crit_);
+
+  // For every temporal layer, keep a set of which frames that are missing.
+  std::array<std::set<uint16_t, DescendingSeqNumComp<uint16_t, kPicIdLength>>,
+             kMaxTemporalLayers>
+      missing_frames_for_layer_ GUARDED_BY(crit_);
+
+  OnCompleteFrameCallback* frame_callback_;
+};
+
+}  // namespace video_coding
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_VIDEO_CODING_RTP_FRAME_REFERENCE_FINDER_H_
diff --git a/webrtc/modules/video_coding/video_coding.gypi b/webrtc/modules/video_coding/video_coding.gypi
index 515c6bc..8d31a6a 100644
--- a/webrtc/modules/video_coding/video_coding.gypi
+++ b/webrtc/modules/video_coding/video_coding.gypi
@@ -33,6 +33,7 @@
         'fec_tables_xor.h',
         'frame_buffer.h',
         'frame_object.h',
+        'rtp_frame_reference_finder.h',
         'generic_decoder.h',
         'generic_encoder.h',
         'histogram.h',
@@ -62,6 +63,7 @@
         'encoded_frame.cc',
         'frame_buffer.cc',
         'frame_object.cc',
+        'rtp_frame_reference_finder.cc',
         'generic_decoder.cc',
         'generic_encoder.cc',
         'inter_frame_delay.cc',