Propagate spatial index to EncodedImage.

Set spatial index of assembled VP9 picture equal to spatial index of
its top spatial layer frame.

Bug: webrtc:10151
Change-Id: Iae40505864b14b01cc6787f8da99a9e3fe283956
Reviewed-on: https://webrtc-review.googlesource.com/c/115280
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Commit-Queue: Sergey Silkin <ssilkin@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#26075}
diff --git a/modules/video_coding/encoded_frame.cc b/modules/video_coding/encoded_frame.cc
index ad28ec2..e19146d 100644
--- a/modules/video_coding/encoded_frame.cc
+++ b/modules/video_coding/encoded_frame.cc
@@ -44,6 +44,7 @@
 
 void VCMEncodedFrame::Reset() {
   SetTimestamp(0);
+  SetSpatialIndex(absl::nullopt);
   _renderTimeMs = -1;
   _payloadType = 0;
   _frameType = kVideoFrameDelta;
@@ -116,6 +117,7 @@
         if (vp9_header.spatial_idx != kNoSpatialIdx) {
           _codecSpecificInfo.codecSpecific.VP9.inter_layer_predicted =
               vp9_header.inter_layer_predicted;
+          SetSpatialIndex(vp9_header.spatial_idx);
         }
         if (vp9_header.gof_idx != kNoGofIdx) {
           _codecSpecificInfo.codecSpecific.VP9.gof_idx = vp9_header.gof_idx;
diff --git a/modules/video_coding/encoded_frame.h b/modules/video_coding/encoded_frame.h
index fcc3b5d..978ee5c 100644
--- a/modules/video_coding/encoded_frame.h
+++ b/modules/video_coding/encoded_frame.h
@@ -68,6 +68,7 @@
    *   Frame RTP timestamp (90kHz)
    */
   using EncodedImage::set_size;
+  using EncodedImage::SetSpatialIndex;
   using EncodedImage::SetTimestamp;
   using EncodedImage::size;
   using EncodedImage::Timestamp;
diff --git a/modules/video_coding/frame_buffer2.cc b/modules/video_coding/frame_buffer2.cc
index 52ec2da..e454031 100644
--- a/modules/video_coding/frame_buffer2.cc
+++ b/modules/video_coding/frame_buffer2.cc
@@ -668,28 +668,34 @@
 EncodedFrame* FrameBuffer::CombineAndDeleteFrames(
     const std::vector<EncodedFrame*>& frames) const {
   RTC_DCHECK(!frames.empty());
-  EncodedFrame* frame = frames[0];
+  EncodedFrame* first_frame = frames[0];
+  EncodedFrame* last_frame = frames.back();
   size_t total_length = 0;
   for (size_t i = 0; i < frames.size(); ++i) {
     total_length += frames[i]->size();
   }
-  frame->VerifyAndAllocate(total_length);
-  uint8_t* buffer = frame->MutableBuffer();
+  first_frame->VerifyAndAllocate(total_length);
+
+  // Spatial index of combined frame is set equal to spatial index of its top
+  // spatial layer.
+  first_frame->SetSpatialIndex(last_frame->id.spatial_layer);
+  first_frame->id.spatial_layer = last_frame->id.spatial_layer;
+
+  first_frame->video_timing_mutable()->network2_timestamp_ms =
+      last_frame->video_timing().network2_timestamp_ms;
+  first_frame->video_timing_mutable()->receive_finish_ms =
+      last_frame->video_timing().receive_finish_ms;
+
   // Append all remaining frames to the first one.
-  size_t used_buffer_bytes = frame->size();
+  uint8_t* buffer = first_frame->MutableBuffer() + first_frame->size();
   for (size_t i = 1; i < frames.size(); ++i) {
-    EncodedFrame* frame_to_append = frames[i];
-    memcpy(buffer + used_buffer_bytes, frame_to_append->Buffer(),
-           frame_to_append->size());
-    used_buffer_bytes += frame_to_append->size();
-    frame->video_timing_mutable()->network2_timestamp_ms =
-        frame_to_append->video_timing().network2_timestamp_ms;
-    frame->video_timing_mutable()->receive_finish_ms =
-        frame_to_append->video_timing().receive_finish_ms;
-    delete frame_to_append;
+    EncodedFrame* next_frame = frames[i];
+    memcpy(buffer, next_frame->Buffer(), next_frame->size());
+    buffer += next_frame->size();
+    delete next_frame;
   }
-  frame->set_size(total_length);
-  return frame;
+  first_frame->set_size(total_length);
+  return first_frame;
 }
 
 FrameBuffer::FrameInfo::FrameInfo() = default;
diff --git a/modules/video_coding/frame_buffer2_unittest.cc b/modules/video_coding/frame_buffer2_unittest.cc
index 578734c..321281f 100644
--- a/modules/video_coding/frame_buffer2_unittest.cc
+++ b/modules/video_coding/frame_buffer2_unittest.cc
@@ -161,6 +161,7 @@
     std::unique_ptr<FrameObjectFake> frame(new FrameObjectFake());
     frame->id.picture_id = picture_id;
     frame->id.spatial_layer = spatial_layer;
+    frame->SetSpatialIndex(spatial_layer);
     frame->SetTimestamp(ts_ms * 90);
     frame->num_references = references.size();
     frame->inter_layer_predicted = inter_layer_predicted;
@@ -271,7 +272,7 @@
   InsertFrame(pid, 1, ts, true, true);
   ExtractFrame();
 
-  CheckFrame(0, pid, 0);
+  CheckFrame(0, pid, 1);
 }
 
 TEST_F(TestFrameBuffer2, SetPlayoutDelay) {
@@ -599,7 +600,7 @@
   InsertFrame(pid, 1, ts, true, true);
   ExtractFrame(0);
   ExtractFrame(0);
-  CheckFrame(0, pid, 0);
+  CheckFrame(0, pid, 1);
   CheckNoFrame(1);
   // Two frames should be combined and returned together.
   CheckFrameSize(0, kFrameSize * 2);
@@ -613,7 +614,7 @@
   InsertFrame(pid, 1, ts, true, true);
 
   ExtractFrame(0);
-  CheckFrame(0, pid, 0);
+  CheckFrame(0, pid, 1);
 
   InsertFrame(pid + 1, 1, ts + kFps20, false, true, pid);
   InsertFrame(pid + 2, 0, ts + kFps10, false, false, pid);
@@ -627,7 +628,7 @@
   ExtractFrame();
   ExtractFrame();
   CheckFrame(1, pid + 1, 1);
-  CheckFrame(2, pid + 2, 0);
+  CheckFrame(2, pid + 2, 1);
 }
 
 }  // namespace video_coding