Add plumbing of RtpPacketInfos to each VideoFrame as input for SourceTracker.

This change adds the plumbing of RtpPacketInfo from RtpVideoStreamReceiver::OnRtpPacket() to VideoReceiveStream::OnFrame() for video. It is a step towards replacing the non-spec compliant ContributingSources that updates itself at packet-receive time, with the spec-compliant SourceTracker that will update itself at frame-delivery-to-track time.

Bug: webrtc:10668
Change-Id: Ib97d430530c5a8487d3b129936c7c51e118889bd
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/139891
Reviewed-by: Stefan Holmer <stefan@webrtc.org>
Reviewed-by: Niels Moller <nisse@webrtc.org>
Commit-Queue: Chen Xing <chxg@google.com>
Cr-Commit-Position: refs/heads/master@{#28332}
diff --git a/modules/video_coding/codecs/multiplex/multiplex_decoder_adapter.cc b/modules/video_coding/codecs/multiplex/multiplex_decoder_adapter.cc
index d1c8220..fa1d2b9 100644
--- a/modules/video_coding/codecs/multiplex/multiplex_decoder_adapter.cc
+++ b/modules/video_coding/codecs/multiplex/multiplex_decoder_adapter.cc
@@ -262,6 +262,7 @@
                                 .set_timestamp_us(0)
                                 .set_rotation(decoded_image->rotation())
                                 .set_id(decoded_image->id())
+                                .set_packet_infos(decoded_image->packet_infos())
                                 .build();
   decoded_complete_callback_->Decoded(merged_image, decode_time_ms, qp);
 }
diff --git a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
index 59d59c1..bfc03f2 100644
--- a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
+++ b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
@@ -205,6 +205,7 @@
                                .set_timestamp_ms(input_image.render_time_ms())
                                .set_rotation(input_image.rotation())
                                .set_id(input_image.id())
+                               .set_packet_infos(input_image.packet_infos())
                                .build();
   rv = encoders_[kAXXStream]->Encode(alpha_image, &adjusted_frame_types);
   return rv;
diff --git a/modules/video_coding/encoded_frame.h b/modules/video_coding/encoded_frame.h
index 2ebef31..f8ee6a7 100644
--- a/modules/video_coding/encoded_frame.h
+++ b/modules/video_coding/encoded_frame.h
@@ -54,9 +54,11 @@
 
   using EncodedImage::ColorSpace;
   using EncodedImage::data;
+  using EncodedImage::PacketInfos;
   using EncodedImage::set_size;
   using EncodedImage::SetColorSpace;
   using EncodedImage::SetEncodedData;
+  using EncodedImage::SetPacketInfos;
   using EncodedImage::SetSpatialIndex;
   using EncodedImage::SetSpatialLayerFrameSize;
   using EncodedImage::SetTimestamp;
diff --git a/modules/video_coding/frame_object.cc b/modules/video_coding/frame_object.cc
index fab6066..5a485da 100644
--- a/modules/video_coding/frame_object.cc
+++ b/modules/video_coding/frame_object.cc
@@ -11,6 +11,7 @@
 #include "modules/video_coding/frame_object.h"
 
 #include <string.h>
+#include <utility>
 
 #include "api/video/encoded_image.h"
 #include "api/video/video_timing.h"
@@ -28,7 +29,8 @@
                                size_t frame_size,
                                int times_nacked,
                                int64_t first_packet_received_time,
-                               int64_t last_packet_received_time)
+                               int64_t last_packet_received_time,
+                               RtpPacketInfos packet_infos)
     : packet_buffer_(packet_buffer),
       first_seq_num_(first_seq_num),
       last_seq_num_(last_seq_num),
@@ -63,6 +65,7 @@
 
   // EncodedFrame members
   SetTimestamp(first_packet->timestamp);
+  SetPacketInfos(std::move(packet_infos));
 
   VCMPacket* last_packet = packet_buffer_->GetPacket(last_seq_num);
   RTC_CHECK(last_packet);
diff --git a/modules/video_coding/frame_object.h b/modules/video_coding/frame_object.h
index 1ba99cb..3ad356f 100644
--- a/modules/video_coding/frame_object.h
+++ b/modules/video_coding/frame_object.h
@@ -29,7 +29,8 @@
                  size_t frame_size,
                  int times_nacked,
                  int64_t first_packet_received_time,
-                 int64_t last_packet_received_time);
+                 int64_t last_packet_received_time,
+                 RtpPacketInfos packet_infos);
 
   ~RtpFrameObject() override;
   uint16_t first_seq_num() const;
diff --git a/modules/video_coding/generic_decoder.cc b/modules/video_coding/generic_decoder.cc
index cb6c819..ab83119 100644
--- a/modules/video_coding/generic_decoder.cc
+++ b/modules/video_coding/generic_decoder.cc
@@ -84,6 +84,7 @@
   if (frameInfo->color_space) {
     decodedImage.set_color_space(frameInfo->color_space);
   }
+  decodedImage.set_packet_infos(frameInfo->packet_infos);
   decodedImage.set_rotation(frameInfo->rotation);
 
   const int64_t now_ms = _clock->TimeInMilliseconds();
@@ -211,6 +212,7 @@
   } else {
     _frameInfos[_nextFrameInfoIdx].color_space = absl::nullopt;
   }
+  _frameInfos[_nextFrameInfoIdx].packet_infos = frame.PacketInfos();
 
   // Set correctly only for key frames. Thus, use latest key frame
   // content type. If the corresponding key frame was lost, decode will fail
diff --git a/modules/video_coding/generic_decoder.h b/modules/video_coding/generic_decoder.h
index 97336b1..50d7dba 100644
--- a/modules/video_coding/generic_decoder.h
+++ b/modules/video_coding/generic_decoder.h
@@ -36,6 +36,7 @@
   EncodedImage::Timing timing;
   int64_t ntp_time_ms;
   absl::optional<ColorSpace> color_space;
+  RtpPacketInfos packet_infos;
 };
 
 class VCMDecodedFrameCallback : public DecodedImageCallback {
diff --git a/modules/video_coding/generic_decoder_unittest.cc b/modules/video_coding/generic_decoder_unittest.cc
index adc945c..691561d 100644
--- a/modules/video_coding/generic_decoder_unittest.cc
+++ b/modules/video_coding/generic_decoder_unittest.cc
@@ -122,5 +122,31 @@
   EXPECT_EQ(*decoded_color_space, color_space);
 }
 
+TEST_F(GenericDecoderTest, PassesPacketInfos) {
+  RtpPacketInfos packet_infos = CreatePacketInfos(3);
+  VCMEncodedFrame encoded_frame;
+  encoded_frame.SetPacketInfos(packet_infos);
+  generic_decoder_.Decode(encoded_frame, clock_.TimeInMilliseconds());
+  absl::optional<VideoFrame> decoded_frame = user_callback_.WaitForFrame(10);
+  ASSERT_TRUE(decoded_frame.has_value());
+  EXPECT_EQ(decoded_frame->packet_infos().size(), 3U);
+}
+
+TEST_F(GenericDecoderTest, PassesPacketInfosForDelayedDecoders) {
+  RtpPacketInfos packet_infos = CreatePacketInfos(3);
+  decoder_.SetDelayedDecoding(100);
+
+  {
+    // Ensure the original frame is destroyed before the decoding is completed.
+    VCMEncodedFrame encoded_frame;
+    encoded_frame.SetPacketInfos(packet_infos);
+    generic_decoder_.Decode(encoded_frame, clock_.TimeInMilliseconds());
+  }
+
+  absl::optional<VideoFrame> decoded_frame = user_callback_.WaitForFrame(200);
+  ASSERT_TRUE(decoded_frame.has_value());
+  EXPECT_EQ(decoded_frame->packet_infos().size(), 3U);
+}
+
 }  // namespace video_coding
 }  // namespace webrtc
diff --git a/modules/video_coding/jitter_buffer_unittest.cc b/modules/video_coding/jitter_buffer_unittest.cc
index 53eba63..315e5f9 100644
--- a/modules/video_coding/jitter_buffer_unittest.cc
+++ b/modules/video_coding/jitter_buffer_unittest.cc
@@ -67,7 +67,8 @@
     video_header.is_first_packet_in_frame = true;
     video_header.frame_type = VideoFrameType::kVideoFrameDelta;
     packet_.reset(new VCMPacket(data_, size_, rtp_header, video_header,
-                                /*ntp_time_ms=*/0));
+                                /*ntp_time_ms=*/0,
+                                clock_->TimeInMilliseconds()));
   }
 
   VCMEncodedFrame* DecodeCompleteFrame() {
@@ -542,7 +543,7 @@
   video_header.codec = kVideoCodecGeneric;
   video_header.frame_type = VideoFrameType::kEmptyFrame;
   VCMPacket empty_packet(data_, 0, rtp_header, video_header,
-                         /*ntp_time_ms=*/0);
+                         /*ntp_time_ms=*/0, clock_->TimeInMilliseconds());
   EXPECT_EQ(kOldPacket,
             jitter_buffer_->InsertPacket(empty_packet, &retransmitted));
   empty_packet.seqNum += 1;
diff --git a/modules/video_coding/packet.cc b/modules/video_coding/packet.cc
index 46df82a..0c4a658 100644
--- a/modules/video_coding/packet.cc
+++ b/modules/video_coding/packet.cc
@@ -25,8 +25,7 @@
       timesNacked(-1),
       completeNALU(kNaluUnset),
       insertStartCode(false),
-      video_header(),
-      receive_time_ms(0) {
+      video_header() {
   video_header.playout_delay = {-1, -1};
 }
 
@@ -34,7 +33,8 @@
                      size_t size,
                      const RTPHeader& rtp_header,
                      const RTPVideoHeader& videoHeader,
-                     int64_t ntp_time_ms)
+                     int64_t ntp_time_ms,
+                     int64_t receive_time_ms)
     : payloadType(rtp_header.payloadType),
       timestamp(rtp_header.timestamp),
       ntp_time_ms_(ntp_time_ms),
@@ -46,7 +46,8 @@
       completeNALU(kNaluIncomplete),
       insertStartCode(videoHeader.codec == kVideoCodecH264 &&
                       videoHeader.is_first_packet_in_frame),
-      video_header(videoHeader) {
+      video_header(videoHeader),
+      packet_info(rtp_header, receive_time_ms) {
   if (is_first_packet_in_frame() && markerBit) {
     completeNALU = kNaluComplete;
   } else if (is_first_packet_in_frame()) {
diff --git a/modules/video_coding/packet.h b/modules/video_coding/packet.h
index 3f22845..f157e10 100644
--- a/modules/video_coding/packet.h
+++ b/modules/video_coding/packet.h
@@ -16,6 +16,7 @@
 
 #include "absl/types/optional.h"
 #include "api/rtp_headers.h"
+#include "api/rtp_packet_info.h"
 #include "api/video/video_frame_type.h"
 #include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
 #include "modules/rtp_rtcp/source/rtp_video_header.h"
@@ -39,7 +40,8 @@
             size_t size,
             const RTPHeader& rtp_header,
             const RTPVideoHeader& video_header,
-            int64_t ntp_time_ms);
+            int64_t ntp_time_ms,
+            int64_t receive_time_ms);
 
   ~VCMPacket();
 
@@ -70,7 +72,7 @@
   RTPVideoHeader video_header;
   absl::optional<RtpGenericFrameDescriptor> generic_descriptor;
 
-  int64_t receive_time_ms;
+  RtpPacketInfo packet_info;
 };
 
 }  // namespace webrtc
diff --git a/modules/video_coding/packet_buffer.cc b/modules/video_coding/packet_buffer.cc
index bd1ab03..e6469b1 100644
--- a/modules/video_coding/packet_buffer.cc
+++ b/modules/video_coding/packet_buffer.cc
@@ -286,8 +286,9 @@
       size_t frame_size = 0;
       int max_nack_count = -1;
       uint16_t start_seq_num = seq_num;
-      int64_t min_recv_time = data_buffer_[index].receive_time_ms;
-      int64_t max_recv_time = data_buffer_[index].receive_time_ms;
+      int64_t min_recv_time = data_buffer_[index].packet_info.receive_time_ms();
+      int64_t max_recv_time = data_buffer_[index].packet_info.receive_time_ms();
+      RtpPacketInfos::vector_type packet_infos;
 
       // Find the start index by searching backward until the packet with
       // the |frame_begin| flag is set.
@@ -310,9 +311,16 @@
         sequence_buffer_[start_index].frame_created = true;
 
         min_recv_time =
-            std::min(min_recv_time, data_buffer_[start_index].receive_time_ms);
+            std::min(min_recv_time,
+                     data_buffer_[start_index].packet_info.receive_time_ms());
         max_recv_time =
-            std::max(max_recv_time, data_buffer_[start_index].receive_time_ms);
+            std::max(max_recv_time,
+                     data_buffer_[start_index].packet_info.receive_time_ms());
+
+        // Should use |push_front()| since the loop traverses backwards. But
+        // it's too inefficient to do so on a vector so we'll instead fix the
+        // order afterwards.
+        packet_infos.push_back(data_buffer_[start_index].packet_info);
 
         if (!is_h264 && sequence_buffer_[start_index].frame_begin)
           break;
@@ -359,6 +367,9 @@
         --start_seq_num;
       }
 
+      // Fix the order since the packet-finding loop traverses backwards.
+      std::reverse(packet_infos.begin(), packet_infos.end());
+
       if (is_h264) {
         // Warn if this is an unsafe frame.
         if (has_h264_idr && (!has_h264_sps || !has_h264_pps)) {
@@ -406,7 +417,8 @@
 
       found_frames.emplace_back(
           new RtpFrameObject(this, start_seq_num, seq_num, frame_size,
-                             max_nack_count, min_recv_time, max_recv_time));
+                             max_nack_count, min_recv_time, max_recv_time,
+                             RtpPacketInfos(std::move(packet_infos))));
     }
     ++seq_num;
   }
diff --git a/modules/video_coding/rtp_frame_reference_finder_unittest.cc b/modules/video_coding/rtp_frame_reference_finder_unittest.cc
index aba807e..83945d0 100644
--- a/modules/video_coding/rtp_frame_reference_finder_unittest.cc
+++ b/modules/video_coding/rtp_frame_reference_finder_unittest.cc
@@ -92,7 +92,7 @@
     ref_packet_buffer_->InsertPacket(&packet);
 
     std::unique_ptr<RtpFrameObject> frame(new RtpFrameObject(
-        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0));
+        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0, {}));
     reference_finder_->ManageFrame(std::move(frame));
   }
 
@@ -126,7 +126,7 @@
     }
 
     std::unique_ptr<RtpFrameObject> frame(new RtpFrameObject(
-        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0));
+        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0, {}));
     reference_finder_->ManageFrame(std::move(frame));
   }
 
@@ -172,7 +172,7 @@
     }
 
     std::unique_ptr<RtpFrameObject> frame(new RtpFrameObject(
-        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0));
+        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0, {}));
     reference_finder_->ManageFrame(std::move(frame));
   }
 
@@ -213,7 +213,7 @@
     }
 
     std::unique_ptr<RtpFrameObject> frame(new RtpFrameObject(
-        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0));
+        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0, {}));
     reference_finder_->ManageFrame(std::move(frame));
   }
 
@@ -243,7 +243,7 @@
     }
 
     std::unique_ptr<RtpFrameObject> frame(new RtpFrameObject(
-        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0));
+        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0, {}));
     reference_finder_->ManageFrame(std::move(frame));
   }
 
diff --git a/modules/video_coding/video_receiver.cc b/modules/video_coding/video_receiver.cc
index 8f9e849..2ef06ff 100644
--- a/modules/video_coding/video_receiver.cc
+++ b/modules/video_coding/video_receiver.cc
@@ -338,7 +338,8 @@
   }
   // Callers don't provide any ntp time.
   const VCMPacket packet(incomingPayload, payloadLength, rtp_header,
-                         video_header, /*ntp_time_ms=*/0);
+                         video_header, /*ntp_time_ms=*/0,
+                         clock_->TimeInMilliseconds());
   int32_t ret = _receiver.InsertPacket(packet);
 
   // TODO(holmer): Investigate if this somehow should use the key frame