Reland "Add spatial index to EncodedImage."

This is a reland of da0898dfae3b0a013ca8ad3828e9adfdc749748d

Original change's description:
> Add spatial index to EncodedImage.
>
> Replaces the VP8 simulcast index and VP9 spatial index formely part of
> CodecSpecificInfo.
>
> Bug: webrtc:9378
> Change-Id: I80eafd63fbdee0a25864338196a690628b4bd3d2
> Reviewed-on: https://webrtc-review.googlesource.com/83161
> Commit-Queue: Niels Moller <nisse@webrtc.org>
> Reviewed-by: Erik Språng <sprang@webrtc.org>
> Reviewed-by: Sebastian Jansson <srte@webrtc.org>
> Reviewed-by: Magnus Jedvert <magjed@webrtc.org>
> Reviewed-by: Philip Eliasson <philipel@webrtc.org>
> Reviewed-by: Rasmus Brandt <brandtr@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#24485}

Tbr: magjed@webrtc.org
Bug: webrtc:9378
Change-Id: Iff20b656581ef63317e073833d1a326f7118fdfd
Reviewed-on: https://webrtc-review.googlesource.com/96780
Commit-Queue: Niels Moller <nisse@webrtc.org>
Reviewed-by: Sebastian Jansson <srte@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#24507}
diff --git a/modules/video_coding/codecs/h264/h264_encoder_impl.cc b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
index 4fc9b4f..4effcdb 100644
--- a/modules/video_coding/codecs/h264/h264_encoder_impl.cc
+++ b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
@@ -506,6 +506,7 @@
             : VideoContentType::UNSPECIFIED;
     encoded_images_[i].timing_.flags = VideoSendTiming::kInvalid;
     encoded_images_[i]._frameType = ConvertToVideoFrameType(info.eFrameType);
+    encoded_images_[i].SetSpatialIndex(configurations_[i].simulcast_idx);
 
     // Split encoded image up into fragments. This also updates
     // |encoded_image_|.
@@ -526,8 +527,6 @@
       codec_specific.codecType = kVideoCodecH264;
       codec_specific.codecSpecific.H264.packetization_mode =
           packetization_mode_;
-      codec_specific.codecSpecific.H264.simulcast_idx =
-          configurations_[i].simulcast_idx;
       encoded_image_callback_->OnEncodedImage(encoded_images_[i],
                                               &codec_specific, &frag_header);
     }
diff --git a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
index 4733b3a..c3a7506 100644
--- a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
+++ b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
@@ -285,7 +285,6 @@
 
       CodecSpecificInfo codec_info = *codecSpecificInfo;
       codec_info.codecType = kVideoCodecMultiplex;
-      codec_info.codecSpecific.generic.simulcast_idx = 0;
       encoded_complete_callback_->OnEncodedImage(combined_image_, &codec_info,
                                                  fragmentation);
     }
diff --git a/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc b/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc
index 303914d..56de138 100644
--- a/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc
+++ b/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc
@@ -232,7 +232,7 @@
   CodecSpecificInfo codec_specific_info;
   ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
   EXPECT_EQ(kVideoCodecMultiplex, codec_specific_info.codecType);
-  EXPECT_EQ(0, codec_specific_info.codecSpecific.generic.simulcast_idx);
+  EXPECT_FALSE(encoded_frame.SpatialIndex());
 
   const MultiplexImage& unpacked_frame =
       MultiplexEncodedImagePacker::Unpack(encoded_frame);
@@ -252,7 +252,7 @@
   CodecSpecificInfo codec_specific_info;
   ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
   EXPECT_EQ(kVideoCodecMultiplex, codec_specific_info.codecType);
-  EXPECT_EQ(0, codec_specific_info.codecSpecific.generic.simulcast_idx);
+  EXPECT_FALSE(encoded_frame.SpatialIndex());
 
   const MultiplexImage& unpacked_frame =
       MultiplexEncodedImagePacker::Unpack(encoded_frame);
diff --git a/modules/video_coding/codecs/test/videoprocessor.cc b/modules/video_coding/codecs/test/videoprocessor.cc
index ccf7053..75605ae 100644
--- a/modules/video_coding/codecs/test/videoprocessor.cc
+++ b/modules/video_coding/codecs/test/videoprocessor.cc
@@ -56,22 +56,17 @@
   return max_size;
 }
 
-void GetLayerIndices(const CodecSpecificInfo& codec_specific,
-                     size_t* spatial_idx,
-                     size_t* temporal_idx) {
+size_t GetTemporalLayerIndex(const CodecSpecificInfo& codec_specific) {
+  size_t temporal_idx = 0;
   if (codec_specific.codecType == kVideoCodecVP8) {
-    *spatial_idx = codec_specific.codecSpecific.VP8.simulcastIdx;
-    *temporal_idx = codec_specific.codecSpecific.VP8.temporalIdx;
+    temporal_idx = codec_specific.codecSpecific.VP8.temporalIdx;
   } else if (codec_specific.codecType == kVideoCodecVP9) {
-    *spatial_idx = codec_specific.codecSpecific.VP9.spatial_idx;
-    *temporal_idx = codec_specific.codecSpecific.VP9.temporal_idx;
+    temporal_idx = codec_specific.codecSpecific.VP9.temporal_idx;
   }
-  if (*spatial_idx == kNoSpatialIdx) {
-    *spatial_idx = 0;
+  if (temporal_idx == kNoTemporalIdx) {
+    temporal_idx = 0;
   }
-  if (*temporal_idx == kNoTemporalIdx) {
-    *temporal_idx = 0;
-  }
+  return temporal_idx;
 }
 
 int GetElapsedTimeMicroseconds(int64_t start_ns, int64_t stop_ns) {
@@ -347,9 +342,8 @@
   }
 
   // Layer metadata.
-  size_t spatial_idx = 0;
-  size_t temporal_idx = 0;
-  GetLayerIndices(codec_specific, &spatial_idx, &temporal_idx);
+  size_t spatial_idx = encoded_image.SpatialIndex().value_or(0);
+  size_t temporal_idx = GetTemporalLayerIndex(codec_specific);
 
   FrameStatistics* frame_stat =
       stats_->GetFrameWithTimestamp(encoded_image.Timestamp(), spatial_idx);
diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
index 3826e14..ef46fd2 100644
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
@@ -817,7 +817,6 @@
   codec_specific->codecType = kVideoCodecVP8;
   codec_specific->codec_name = ImplementationName();
   CodecSpecificInfoVP8* vp8Info = &(codec_specific->codecSpecific.VP8);
-  vp8Info->simulcastIdx = stream_idx;
   vp8Info->keyIdx = kNoKeyIdx;  // TODO(hlundin) populate this
   vp8Info->nonReference = (pkt.data.frame.flags & VPX_FRAME_IS_DROPPABLE) != 0;
   temporal_layers_[stream_idx]->PopulateCodecSpecific(
@@ -876,6 +875,7 @@
           encoded_images_[encoder_idx]._frameType = kVideoFrameKey;
           is_keyframe = true;
         }
+        encoded_images_[encoder_idx].SetSpatialIndex(stream_idx);
         PopulateCodecSpecific(&codec_specific, tl_configs[stream_idx], *pkt,
                               stream_idx, input_image.timestamp());
         break;
diff --git a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
index aaa10dd..dc55d17 100644
--- a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
@@ -70,7 +70,7 @@
     VerifyQpParser(*encoded_frame);
     EXPECT_STREQ("libvpx", codec_specific_info->codec_name);
     EXPECT_EQ(kVideoCodecVP8, codec_specific_info->codecType);
-    EXPECT_EQ(0u, codec_specific_info->codecSpecific.VP8.simulcastIdx);
+    EXPECT_EQ(0, encoded_frame->SpatialIndex());
   }
 
   void EncodeAndExpectFrameWith(const VideoFrame& input_frame,
diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
index f6594c1..0f5855f 100644
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@@ -356,7 +356,7 @@
             encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
 
   ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific));
-  EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, kNoSpatialIdx);
+  EXPECT_FALSE(frames[0].SpatialIndex());
   EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.end_of_picture);
 }
 
@@ -395,7 +395,7 @@
 
     // Key frame.
     EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted);
-    EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, 0);
+    EXPECT_EQ(frames[0].SpatialIndex(), 0);
     EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred,
               inter_layer_pred == InterLayerPredMode::kOff);
     EXPECT_TRUE(
@@ -408,7 +408,7 @@
 
     // Delta frame.
     EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted);
-    EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, 0);
+    EXPECT_EQ(frames[0].SpatialIndex(), 0);
     EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred,
               inter_layer_pred == InterLayerPredMode::kOff ||
                   inter_layer_pred == InterLayerPredMode::kOnKeyPic);
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc
index 5805563..03c2c77 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@@ -754,6 +754,7 @@
 }
 
 void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
+                                           absl::optional<int>* spatial_idx,
                                            const vpx_codec_cx_pkt& pkt,
                                            uint32_t timestamp,
                                            bool first_frame_in_picture) {
@@ -780,9 +781,9 @@
   }
   if (num_active_spatial_layers_ == 1) {
     RTC_CHECK_EQ(layer_id.spatial_layer_id, 0);
-    vp9_info->spatial_idx = kNoSpatialIdx;
+    *spatial_idx = absl::nullopt;
   } else {
-    vp9_info->spatial_idx = layer_id.spatial_layer_id;
+    *spatial_idx = layer_id.spatial_layer_id;
   }
   if (layer_id.spatial_layer_id != 0) {
     vp9_info->ss_data_available = false;
@@ -1021,8 +1022,10 @@
   RTC_DCHECK_LE(encoded_image_._length, encoded_image_._size);
 
   memset(&codec_specific_, 0, sizeof(codec_specific_));
-  PopulateCodecSpecific(&codec_specific_, *pkt, input_image_->timestamp(),
-                        first_frame_in_picture);
+  absl::optional<int> spatial_index;
+  PopulateCodecSpecific(&codec_specific_, &spatial_index, *pkt,
+                        input_image_->timestamp(), first_frame_in_picture);
+  encoded_image_.SetSpatialIndex(spatial_index);
 
   if (is_flexible_mode_) {
     UpdateReferenceBuffers(*pkt, pics_since_key_);
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h
index e8fd606..887b560 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.h
+++ b/modules/video_coding/codecs/vp9/vp9_impl.h
@@ -61,6 +61,7 @@
   int InitAndSetControlSettings(const VideoCodec* inst);
 
   void PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
+                             absl::optional<int>* spatial_idx,
                              const vpx_codec_cx_pkt& pkt,
                              uint32_t timestamp,
                              bool first_frame_in_picture);
diff --git a/modules/video_coding/encoded_frame.cc b/modules/video_coding/encoded_frame.cc
index c53a737..37914e4 100644
--- a/modules/video_coding/encoded_frame.cc
+++ b/modules/video_coding/encoded_frame.cc
@@ -82,7 +82,6 @@
         if (_codecSpecificInfo.codecType != kVideoCodecVP9) {
           // This is the first packet for this frame.
           _codecSpecificInfo.codecSpecific.VP9.temporal_idx = 0;
-          _codecSpecificInfo.codecSpecific.VP9.spatial_idx = 0;
           _codecSpecificInfo.codecSpecific.VP9.gof_idx = 0;
           _codecSpecificInfo.codecSpecific.VP9.inter_layer_predicted = false;
           _codecSpecificInfo.codecType = kVideoCodecVP9;
@@ -106,8 +105,6 @@
               vp9_header.temporal_up_switch;
         }
         if (vp9_header.spatial_idx != kNoSpatialIdx) {
-          _codecSpecificInfo.codecSpecific.VP9.spatial_idx =
-              vp9_header.spatial_idx;
           _codecSpecificInfo.codecSpecific.VP9.inter_layer_predicted =
               vp9_header.inter_layer_predicted;
         }
diff --git a/modules/video_coding/generic_encoder.cc b/modules/video_coding/generic_encoder.cc
index a8999fc..b1d7c28 100644
--- a/modules/video_coding/generic_encoder.cc
+++ b/modules/video_coding/generic_encoder.cc
@@ -391,21 +391,10 @@
     const RTPFragmentationHeader* fragmentation_header) {
   TRACE_EVENT_INSTANT1("webrtc", "VCMEncodedFrameCallback::Encoded",
                        "timestamp", encoded_image.Timestamp());
-  size_t simulcast_svc_idx = 0;
-  if (codec_specific->codecType == kVideoCodecVP9) {
-    if (codec_specific->codecSpecific.VP9.num_spatial_layers > 1)
-      simulcast_svc_idx = codec_specific->codecSpecific.VP9.spatial_idx;
-  } else if (codec_specific->codecType == kVideoCodecVP8) {
-    simulcast_svc_idx = codec_specific->codecSpecific.VP8.simulcastIdx;
-  } else if (codec_specific->codecType == kVideoCodecGeneric) {
-    simulcast_svc_idx = codec_specific->codecSpecific.generic.simulcast_idx;
-  } else if (codec_specific->codecType == kVideoCodecH264) {
-    // TODO(ilnik): When h264 simulcast is landed, extract simulcast idx here.
-  }
-
+  const size_t spatial_idx = encoded_image.SpatialIndex().value_or(0);
   EncodedImage image_copy(encoded_image);
 
-  FillTimingInfo(simulcast_svc_idx, &image_copy);
+  FillTimingInfo(spatial_idx, &image_copy);
 
   // Piggyback ALR experiment group id and simulcast id into the content type.
   uint8_t experiment_id =
@@ -421,7 +410,7 @@
   // id in content type to +1 of that is actual simulcast index. This is because
   // value 0 on the wire is reserved for 'no simulcast stream specified'.
   RTC_CHECK(videocontenttypehelpers::SetSimulcastId(
-      &image_copy.content_type_, static_cast<uint8_t>(simulcast_svc_idx + 1)));
+      &image_copy.content_type_, static_cast<uint8_t>(spatial_idx + 1)));
 
   Result result = post_encode_callback_->OnEncodedImage(
       image_copy, codec_specific, fragmentation_header);
diff --git a/modules/video_coding/generic_encoder_unittest.cc b/modules/video_coding/generic_encoder_unittest.cc
index c889769..2be6856 100644
--- a/modules/video_coding/generic_encoder_unittest.cc
+++ b/modules/video_coding/generic_encoder_unittest.cc
@@ -95,8 +95,8 @@
       image._length = FrameSize(min_frame_size, max_frame_size, s, i);
       image.capture_time_ms_ = current_timestamp;
       image.SetTimestamp(static_cast<uint32_t>(current_timestamp * 90));
+      image.SetSpatialIndex(s);
       codec_specific.codecType = kVideoCodecGeneric;
-      codec_specific.codecSpecific.generic.simulcast_idx = s;
       callback.OnEncodeStarted(static_cast<uint32_t>(current_timestamp * 90),
                                current_timestamp, s);
       if (dropped) {
@@ -189,7 +189,6 @@
   image.capture_time_ms_ = timestamp;
   image.SetTimestamp(static_cast<uint32_t>(timestamp * 90));
   codec_specific.codecType = kVideoCodecGeneric;
-  codec_specific.codecSpecific.generic.simulcast_idx = 0;
   FakeEncodedImageCallback sink;
   VCMEncodedFrameCallback callback(&sink, nullptr);
   VideoCodec::TimingFrameTriggerThresholds thresholds;
@@ -221,7 +220,6 @@
   image.capture_time_ms_ = timestamp;
   image.SetTimestamp(static_cast<uint32_t>(timestamp * 90));
   codec_specific.codecType = kVideoCodecGeneric;
-  codec_specific.codecSpecific.generic.simulcast_idx = 0;
   FakeEncodedImageCallback sink;
   VCMEncodedFrameCallback callback(&sink, nullptr);
   callback.SetInternalSource(true);
@@ -257,7 +255,6 @@
   const int64_t kTimestampMs3 = 47721860;
   const int64_t kTimestampMs4 = 47721870;
   codec_specific.codecType = kVideoCodecGeneric;
-  codec_specific.codecSpecific.generic.simulcast_idx = 0;
   FakeEncodedImageCallback sink;
   VCMEncodedFrameCallback callback(&sink, nullptr);
   // Any non-zero bitrate needed to be set before the first frame.
@@ -293,7 +290,6 @@
   CodecSpecificInfo codec_specific;
   const int64_t kTimestampMs = 123456;
   codec_specific.codecType = kVideoCodecGeneric;
-  codec_specific.codecSpecific.generic.simulcast_idx = 0;
   FakeEncodedImageCallback sink;
   VCMEncodedFrameCallback callback(&sink, nullptr);
   // Any non-zero bitrate needed to be set before the first frame.
diff --git a/modules/video_coding/include/video_codec_interface.h b/modules/video_coding/include/video_codec_interface.h
index 9108625..94d4271 100644
--- a/modules/video_coding/include/video_codec_interface.h
+++ b/modules/video_coding/include/video_codec_interface.h
@@ -28,6 +28,8 @@
 // with a copy-constructor. See below.
 struct CodecSpecificInfoVP8 {
   bool nonReference;
+  // TODO(bugs.webrtc.org/9378): Delete simulcastIdx, replaced by spatial index
+  // member in EncodedImage. Unused, but assigned in downstream code.
   uint8_t simulcastIdx;
   uint8_t temporalIdx;
   bool layerSync;
@@ -43,6 +45,8 @@
   bool non_ref_for_inter_layer_pred;
 
   uint8_t temporal_idx;
+  // TODO(bugs.webrtc.org/9378): Delete spatial_idx, replaced by spatial index
+  // member in EncodedImage. Unused, but assigned in downstream code.
   uint8_t spatial_idx;
   bool temporal_up_switch;
   bool inter_layer_predicted;  // Frame is dependent on directly lower spatial
@@ -63,13 +67,14 @@
   bool end_of_picture;
 };
 
+// TODO(bugs.webrtc.org/9378): Delete this struct. Unused, except that
+// simulcast_idx is assigned in downstream code.
 struct CodecSpecificInfoGeneric {
   uint8_t simulcast_idx;
 };
 
 struct CodecSpecificInfoH264 {
   H264PacketizationMode packetization_mode;
-  uint8_t simulcast_idx;
 };
 
 union CodecSpecificInfoUnion {
diff --git a/modules/video_coding/utility/simulcast_test_fixture_impl.cc b/modules/video_coding/utility/simulcast_test_fixture_impl.cc
index 03de176..4af526c 100644
--- a/modules/video_coding/utility/simulcast_test_fixture_impl.cc
+++ b/modules/video_coding/utility/simulcast_test_fixture_impl.cc
@@ -76,15 +76,9 @@
   virtual Result OnEncodedImage(const EncodedImage& encoded_image,
                                 const CodecSpecificInfo* codec_specific_info,
                                 const RTPFragmentationHeader* fragmentation) {
-    uint16_t simulcast_idx = 0;
     bool is_vp8 = (codec_specific_info->codecType == kVideoCodecVP8);
-    if (is_vp8) {
-      simulcast_idx = codec_specific_info->codecSpecific.VP8.simulcastIdx;
-    } else {
-      simulcast_idx = codec_specific_info->codecSpecific.H264.simulcast_idx;
-    }
     // Only store the base layer.
-    if (simulcast_idx) {
+    if (encoded_image.SpatialIndex().value_or(0) == 0) {
       if (encoded_image._frameType == kVideoFrameKey) {
         delete[] encoded_key_frame_._buffer;
         encoded_key_frame_._buffer = new uint8_t[encoded_image._size];
@@ -104,9 +98,9 @@
       }
     }
     if (is_vp8) {
-      layer_sync_[codec_specific_info->codecSpecific.VP8.simulcastIdx] =
+      layer_sync_[encoded_image.SpatialIndex().value_or(0)] =
           codec_specific_info->codecSpecific.VP8.layerSync;
-      temporal_layer_[codec_specific_info->codecSpecific.VP8.simulcastIdx] =
+      temporal_layer_[encoded_image.SpatialIndex().value_or(0)] =
           codec_specific_info->codecSpecific.VP8.temporalIdx;
     }
     return Result(Result::OK, encoded_image.Timestamp());