Make RtpGenericFrameDescriptor available for E2EE.

This CL makes the RtpGenericFrameDescriptor available in
RTPSenderVideo::SendVideo for encryption and in
RtpVideoStreamReceiver::OnReceivedFrame for decryption.

Bug: webrtc:9361
Change-Id: I5b6d10138c0874657862f103c8c9a2328e6d4a66
Reviewed-on: https://webrtc-review.googlesource.com/102720
Commit-Queue: Philip Eliasson <philipel@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#24929}
diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn
index c992da5..a414153 100644
--- a/modules/video_coding/BUILD.gn
+++ b/modules/video_coding/BUILD.gn
@@ -69,6 +69,7 @@
   ]
   deps = [
     "..:module_api",
+    "../rtp_rtcp:rtp_rtcp_format",
   ]
 }
 
diff --git a/modules/video_coding/frame_object.cc b/modules/video_coding/frame_object.cc
index 32b0cc1..7f823da 100644
--- a/modules/video_coding/frame_object.cc
+++ b/modules/video_coding/frame_object.cc
@@ -159,6 +159,15 @@
   return packet->video_header;
 }
 
+absl::optional<RtpGenericFrameDescriptor>
+RtpFrameObject::GetGenericFrameDescriptor() const {
+  rtc::CritScope lock(&packet_buffer_->crit_);
+  VCMPacket* packet = packet_buffer_->GetPacket(first_seq_num_);
+  if (!packet)
+    return absl::nullopt;
+  return packet->generic_descriptor;
+}
+
 absl::optional<FrameMarking> RtpFrameObject::GetFrameMarking() const {
   rtc::CritScope lock(&packet_buffer_->crit_);
   VCMPacket* packet = packet_buffer_->GetPacket(first_seq_num_);
diff --git a/modules/video_coding/frame_object.h b/modules/video_coding/frame_object.h
index 5fb5193..968489b 100644
--- a/modules/video_coding/frame_object.h
+++ b/modules/video_coding/frame_object.h
@@ -15,6 +15,7 @@
 #include "api/video/encoded_frame.h"
 #include "common_types.h"  // NOLINT(build/include)
 #include "modules/include/module_common_types.h"
+#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
 
 namespace webrtc {
 namespace video_coding {
@@ -41,6 +42,7 @@
   int64_t RenderTime() const override;
   bool delayed_by_retransmission() const override;
   absl::optional<RTPVideoHeader> GetRtpVideoHeader() const;
+  absl::optional<RtpGenericFrameDescriptor> GetGenericFrameDescriptor() const;
   absl::optional<FrameMarking> GetFrameMarking() const;
 
  private:
diff --git a/modules/video_coding/packet.cc b/modules/video_coding/packet.cc
index dea72cb..eec8ba3 100644
--- a/modules/video_coding/packet.cc
+++ b/modules/video_coding/packet.cc
@@ -81,4 +81,6 @@
   }
 }
 
+VCMPacket::~VCMPacket() = default;
+
 }  // namespace webrtc
diff --git a/modules/video_coding/packet.h b/modules/video_coding/packet.h
index 09ab983..bddf857 100644
--- a/modules/video_coding/packet.h
+++ b/modules/video_coding/packet.h
@@ -12,6 +12,7 @@
 #define MODULES_VIDEO_CODING_PACKET_H_
 
 #include "modules/include/module_common_types.h"
+#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
 
 namespace webrtc {
 
@@ -22,6 +23,8 @@
             const size_t size,
             const WebRtcRTPHeader& rtpHeader);
 
+  ~VCMPacket();
+
   uint8_t payloadType;
   uint32_t timestamp;
   // NTP time of the capture time in local timebase in milliseconds.
@@ -43,6 +46,7 @@
   int width;
   int height;
   RTPVideoHeader video_header;
+  absl::optional<RtpGenericFrameDescriptor> generic_descriptor;
 
   int64_t receive_time_ms;
 };
diff --git a/modules/video_coding/rtp_frame_reference_finder.cc b/modules/video_coding/rtp_frame_reference_finder.cc
index 67414bb..40b16f4 100644
--- a/modules/video_coding/rtp_frame_reference_finder.cc
+++ b/modules/video_coding/rtp_frame_reference_finder.cc
@@ -84,12 +84,10 @@
 
 RtpFrameReferenceFinder::FrameDecision
 RtpFrameReferenceFinder::ManageFrameInternal(RtpFrameObject* frame) {
-  absl::optional<RTPVideoHeader> video_header = frame->GetRtpVideoHeader();
-  // TODO(bugs.webrtc.org/9772): Remove the spatial id check when the old
-  //                             generic format has been removed.
-  if (video_header && video_header->generic &&
-      video_header->generic->spatial_index != -1) {
-    return ManageFrameGeneric(frame, *video_header->generic);
+  absl::optional<RtpGenericFrameDescriptor> generic_descriptor =
+      frame->GetGenericFrameDescriptor();
+  if (generic_descriptor) {
+    return ManageFrameGeneric(frame, *generic_descriptor);
   }
 
   switch (frame->codec_type()) {
@@ -99,6 +97,7 @@
       return ManageFrameVp9(frame);
     default: {
       // Use 15 first bits of frame ID as picture ID if available.
+      absl::optional<RTPVideoHeader> video_header = frame->GetRtpVideoHeader();
       int picture_id = kNoPictureId;
       if (video_header && video_header->generic)
         picture_id = video_header->generic->frame_id & 0x7fff;
@@ -171,19 +170,20 @@
 RtpFrameReferenceFinder::FrameDecision
 RtpFrameReferenceFinder::ManageFrameGeneric(
     RtpFrameObject* frame,
-    const RTPVideoHeader::GenericDescriptorInfo& descriptor) {
-  if (EncodedFrame::kMaxFrameReferences < descriptor.dependencies.size()) {
+    const RtpGenericFrameDescriptor& descriptor) {
+  int64_t frame_id = generic_frame_id_unwrapper_.Unwrap(descriptor.FrameId());
+  frame->id.picture_id = frame_id;
+  frame->id.spatial_layer = descriptor.SpatialLayer();
+
+  rtc::ArrayView<const uint16_t> diffs = descriptor.FrameDependenciesDiffs();
+  if (EncodedFrame::kMaxFrameReferences < diffs.size()) {
     RTC_LOG(LS_WARNING) << "Too many dependencies in generic descriptor.";
     return kDrop;
   }
 
-  int64_t frame_id = generic_frame_id_unwrapper_.Unwrap(descriptor.frame_id);
-  frame->id.picture_id = frame_id;
-  frame->id.spatial_layer = descriptor.spatial_index;
-
-  frame->num_references = descriptor.dependencies.size();
-  for (size_t i = 0; i < descriptor.dependencies.size(); ++i)
-    frame->references[i] = frame_id - descriptor.dependencies[i];
+  frame->num_references = diffs.size();
+  for (size_t i = 0; i < diffs.size(); ++i)
+    frame->references[i] = frame_id - diffs[i];
 
   return kHandOff;
 }
diff --git a/modules/video_coding/rtp_frame_reference_finder.h b/modules/video_coding/rtp_frame_reference_finder.h
index eae73d2..01819ea 100644
--- a/modules/video_coding/rtp_frame_reference_finder.h
+++ b/modules/video_coding/rtp_frame_reference_finder.h
@@ -19,6 +19,7 @@
 #include <utility>
 
 #include "modules/include/module_common_types.h"
+#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
 #include "rtc_base/criticalsection.h"
 #include "rtc_base/numerics/sequence_number_util.h"
 #include "rtc_base/thread_annotations.h"
@@ -88,9 +89,8 @@
   FrameDecision ManageFrameInternal(RtpFrameObject* frame)
       RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
 
-  FrameDecision ManageFrameGeneric(
-      RtpFrameObject* frame,
-      const RTPVideoHeader::GenericDescriptorInfo& descriptor)
+  FrameDecision ManageFrameGeneric(RtpFrameObject* frame,
+                                   const RtpGenericFrameDescriptor& descriptor)
       RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
 
   // Find references for frames with no or very limited information in the