Reland "Add spatial index to EncodedImage."
This is a reland of da0898dfae3b0a013ca8ad3828e9adfdc749748d
Original change's description:
> Add spatial index to EncodedImage.
>
> Replaces the VP8 simulcast index and VP9 spatial index formely part of
> CodecSpecificInfo.
>
> Bug: webrtc:9378
> Change-Id: I80eafd63fbdee0a25864338196a690628b4bd3d2
> Reviewed-on: https://webrtc-review.googlesource.com/83161
> Commit-Queue: Niels Moller <nisse@webrtc.org>
> Reviewed-by: Erik Språng <sprang@webrtc.org>
> Reviewed-by: Sebastian Jansson <srte@webrtc.org>
> Reviewed-by: Magnus Jedvert <magjed@webrtc.org>
> Reviewed-by: Philip Eliasson <philipel@webrtc.org>
> Reviewed-by: Rasmus Brandt <brandtr@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#24485}
Tbr: magjed@webrtc.org
Bug: webrtc:9378
Change-Id: Iff20b656581ef63317e073833d1a326f7118fdfd
Reviewed-on: https://webrtc-review.googlesource.com/96780
Commit-Queue: Niels Moller <nisse@webrtc.org>
Reviewed-by: Sebastian Jansson <srte@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#24507}
diff --git a/modules/video_coding/codecs/h264/h264_encoder_impl.cc b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
index 4fc9b4f..4effcdb 100644
--- a/modules/video_coding/codecs/h264/h264_encoder_impl.cc
+++ b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
@@ -506,6 +506,7 @@
: VideoContentType::UNSPECIFIED;
encoded_images_[i].timing_.flags = VideoSendTiming::kInvalid;
encoded_images_[i]._frameType = ConvertToVideoFrameType(info.eFrameType);
+ encoded_images_[i].SetSpatialIndex(configurations_[i].simulcast_idx);
// Split encoded image up into fragments. This also updates
// |encoded_image_|.
@@ -526,8 +527,6 @@
codec_specific.codecType = kVideoCodecH264;
codec_specific.codecSpecific.H264.packetization_mode =
packetization_mode_;
- codec_specific.codecSpecific.H264.simulcast_idx =
- configurations_[i].simulcast_idx;
encoded_image_callback_->OnEncodedImage(encoded_images_[i],
&codec_specific, &frag_header);
}
diff --git a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
index 4733b3a..c3a7506 100644
--- a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
+++ b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
@@ -285,7 +285,6 @@
CodecSpecificInfo codec_info = *codecSpecificInfo;
codec_info.codecType = kVideoCodecMultiplex;
- codec_info.codecSpecific.generic.simulcast_idx = 0;
encoded_complete_callback_->OnEncodedImage(combined_image_, &codec_info,
fragmentation);
}
diff --git a/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc b/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc
index 303914d..56de138 100644
--- a/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc
+++ b/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc
@@ -232,7 +232,7 @@
CodecSpecificInfo codec_specific_info;
ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
EXPECT_EQ(kVideoCodecMultiplex, codec_specific_info.codecType);
- EXPECT_EQ(0, codec_specific_info.codecSpecific.generic.simulcast_idx);
+ EXPECT_FALSE(encoded_frame.SpatialIndex());
const MultiplexImage& unpacked_frame =
MultiplexEncodedImagePacker::Unpack(encoded_frame);
@@ -252,7 +252,7 @@
CodecSpecificInfo codec_specific_info;
ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
EXPECT_EQ(kVideoCodecMultiplex, codec_specific_info.codecType);
- EXPECT_EQ(0, codec_specific_info.codecSpecific.generic.simulcast_idx);
+ EXPECT_FALSE(encoded_frame.SpatialIndex());
const MultiplexImage& unpacked_frame =
MultiplexEncodedImagePacker::Unpack(encoded_frame);
diff --git a/modules/video_coding/codecs/test/videoprocessor.cc b/modules/video_coding/codecs/test/videoprocessor.cc
index ccf7053..75605ae 100644
--- a/modules/video_coding/codecs/test/videoprocessor.cc
+++ b/modules/video_coding/codecs/test/videoprocessor.cc
@@ -56,22 +56,17 @@
return max_size;
}
-void GetLayerIndices(const CodecSpecificInfo& codec_specific,
- size_t* spatial_idx,
- size_t* temporal_idx) {
+size_t GetTemporalLayerIndex(const CodecSpecificInfo& codec_specific) {
+ size_t temporal_idx = 0;
if (codec_specific.codecType == kVideoCodecVP8) {
- *spatial_idx = codec_specific.codecSpecific.VP8.simulcastIdx;
- *temporal_idx = codec_specific.codecSpecific.VP8.temporalIdx;
+ temporal_idx = codec_specific.codecSpecific.VP8.temporalIdx;
} else if (codec_specific.codecType == kVideoCodecVP9) {
- *spatial_idx = codec_specific.codecSpecific.VP9.spatial_idx;
- *temporal_idx = codec_specific.codecSpecific.VP9.temporal_idx;
+ temporal_idx = codec_specific.codecSpecific.VP9.temporal_idx;
}
- if (*spatial_idx == kNoSpatialIdx) {
- *spatial_idx = 0;
+ if (temporal_idx == kNoTemporalIdx) {
+ temporal_idx = 0;
}
- if (*temporal_idx == kNoTemporalIdx) {
- *temporal_idx = 0;
- }
+ return temporal_idx;
}
int GetElapsedTimeMicroseconds(int64_t start_ns, int64_t stop_ns) {
@@ -347,9 +342,8 @@
}
// Layer metadata.
- size_t spatial_idx = 0;
- size_t temporal_idx = 0;
- GetLayerIndices(codec_specific, &spatial_idx, &temporal_idx);
+ size_t spatial_idx = encoded_image.SpatialIndex().value_or(0);
+ size_t temporal_idx = GetTemporalLayerIndex(codec_specific);
FrameStatistics* frame_stat =
stats_->GetFrameWithTimestamp(encoded_image.Timestamp(), spatial_idx);
diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
index 3826e14..ef46fd2 100644
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
@@ -817,7 +817,6 @@
codec_specific->codecType = kVideoCodecVP8;
codec_specific->codec_name = ImplementationName();
CodecSpecificInfoVP8* vp8Info = &(codec_specific->codecSpecific.VP8);
- vp8Info->simulcastIdx = stream_idx;
vp8Info->keyIdx = kNoKeyIdx; // TODO(hlundin) populate this
vp8Info->nonReference = (pkt.data.frame.flags & VPX_FRAME_IS_DROPPABLE) != 0;
temporal_layers_[stream_idx]->PopulateCodecSpecific(
@@ -876,6 +875,7 @@
encoded_images_[encoder_idx]._frameType = kVideoFrameKey;
is_keyframe = true;
}
+ encoded_images_[encoder_idx].SetSpatialIndex(stream_idx);
PopulateCodecSpecific(&codec_specific, tl_configs[stream_idx], *pkt,
stream_idx, input_image.timestamp());
break;
diff --git a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
index aaa10dd..dc55d17 100644
--- a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
@@ -70,7 +70,7 @@
VerifyQpParser(*encoded_frame);
EXPECT_STREQ("libvpx", codec_specific_info->codec_name);
EXPECT_EQ(kVideoCodecVP8, codec_specific_info->codecType);
- EXPECT_EQ(0u, codec_specific_info->codecSpecific.VP8.simulcastIdx);
+ EXPECT_EQ(0, encoded_frame->SpatialIndex());
}
void EncodeAndExpectFrameWith(const VideoFrame& input_frame,
diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
index f6594c1..0f5855f 100644
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@@ -356,7 +356,7 @@
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific));
- EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, kNoSpatialIdx);
+ EXPECT_FALSE(frames[0].SpatialIndex());
EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.end_of_picture);
}
@@ -395,7 +395,7 @@
// Key frame.
EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted);
- EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, 0);
+ EXPECT_EQ(frames[0].SpatialIndex(), 0);
EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred,
inter_layer_pred == InterLayerPredMode::kOff);
EXPECT_TRUE(
@@ -408,7 +408,7 @@
// Delta frame.
EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted);
- EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, 0);
+ EXPECT_EQ(frames[0].SpatialIndex(), 0);
EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred,
inter_layer_pred == InterLayerPredMode::kOff ||
inter_layer_pred == InterLayerPredMode::kOnKeyPic);
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc
index 5805563..03c2c77 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@@ -754,6 +754,7 @@
}
void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
+ absl::optional<int>* spatial_idx,
const vpx_codec_cx_pkt& pkt,
uint32_t timestamp,
bool first_frame_in_picture) {
@@ -780,9 +781,9 @@
}
if (num_active_spatial_layers_ == 1) {
RTC_CHECK_EQ(layer_id.spatial_layer_id, 0);
- vp9_info->spatial_idx = kNoSpatialIdx;
+ *spatial_idx = absl::nullopt;
} else {
- vp9_info->spatial_idx = layer_id.spatial_layer_id;
+ *spatial_idx = layer_id.spatial_layer_id;
}
if (layer_id.spatial_layer_id != 0) {
vp9_info->ss_data_available = false;
@@ -1021,8 +1022,10 @@
RTC_DCHECK_LE(encoded_image_._length, encoded_image_._size);
memset(&codec_specific_, 0, sizeof(codec_specific_));
- PopulateCodecSpecific(&codec_specific_, *pkt, input_image_->timestamp(),
- first_frame_in_picture);
+ absl::optional<int> spatial_index;
+ PopulateCodecSpecific(&codec_specific_, &spatial_index, *pkt,
+ input_image_->timestamp(), first_frame_in_picture);
+ encoded_image_.SetSpatialIndex(spatial_index);
if (is_flexible_mode_) {
UpdateReferenceBuffers(*pkt, pics_since_key_);
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h
index e8fd606..887b560 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.h
+++ b/modules/video_coding/codecs/vp9/vp9_impl.h
@@ -61,6 +61,7 @@
int InitAndSetControlSettings(const VideoCodec* inst);
void PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
+ absl::optional<int>* spatial_idx,
const vpx_codec_cx_pkt& pkt,
uint32_t timestamp,
bool first_frame_in_picture);
diff --git a/modules/video_coding/encoded_frame.cc b/modules/video_coding/encoded_frame.cc
index c53a737..37914e4 100644
--- a/modules/video_coding/encoded_frame.cc
+++ b/modules/video_coding/encoded_frame.cc
@@ -82,7 +82,6 @@
if (_codecSpecificInfo.codecType != kVideoCodecVP9) {
// This is the first packet for this frame.
_codecSpecificInfo.codecSpecific.VP9.temporal_idx = 0;
- _codecSpecificInfo.codecSpecific.VP9.spatial_idx = 0;
_codecSpecificInfo.codecSpecific.VP9.gof_idx = 0;
_codecSpecificInfo.codecSpecific.VP9.inter_layer_predicted = false;
_codecSpecificInfo.codecType = kVideoCodecVP9;
@@ -106,8 +105,6 @@
vp9_header.temporal_up_switch;
}
if (vp9_header.spatial_idx != kNoSpatialIdx) {
- _codecSpecificInfo.codecSpecific.VP9.spatial_idx =
- vp9_header.spatial_idx;
_codecSpecificInfo.codecSpecific.VP9.inter_layer_predicted =
vp9_header.inter_layer_predicted;
}
diff --git a/modules/video_coding/generic_encoder.cc b/modules/video_coding/generic_encoder.cc
index a8999fc..b1d7c28 100644
--- a/modules/video_coding/generic_encoder.cc
+++ b/modules/video_coding/generic_encoder.cc
@@ -391,21 +391,10 @@
const RTPFragmentationHeader* fragmentation_header) {
TRACE_EVENT_INSTANT1("webrtc", "VCMEncodedFrameCallback::Encoded",
"timestamp", encoded_image.Timestamp());
- size_t simulcast_svc_idx = 0;
- if (codec_specific->codecType == kVideoCodecVP9) {
- if (codec_specific->codecSpecific.VP9.num_spatial_layers > 1)
- simulcast_svc_idx = codec_specific->codecSpecific.VP9.spatial_idx;
- } else if (codec_specific->codecType == kVideoCodecVP8) {
- simulcast_svc_idx = codec_specific->codecSpecific.VP8.simulcastIdx;
- } else if (codec_specific->codecType == kVideoCodecGeneric) {
- simulcast_svc_idx = codec_specific->codecSpecific.generic.simulcast_idx;
- } else if (codec_specific->codecType == kVideoCodecH264) {
- // TODO(ilnik): When h264 simulcast is landed, extract simulcast idx here.
- }
-
+ const size_t spatial_idx = encoded_image.SpatialIndex().value_or(0);
EncodedImage image_copy(encoded_image);
- FillTimingInfo(simulcast_svc_idx, &image_copy);
+ FillTimingInfo(spatial_idx, &image_copy);
// Piggyback ALR experiment group id and simulcast id into the content type.
uint8_t experiment_id =
@@ -421,7 +410,7 @@
// id in content type to +1 of that is actual simulcast index. This is because
// value 0 on the wire is reserved for 'no simulcast stream specified'.
RTC_CHECK(videocontenttypehelpers::SetSimulcastId(
- &image_copy.content_type_, static_cast<uint8_t>(simulcast_svc_idx + 1)));
+ &image_copy.content_type_, static_cast<uint8_t>(spatial_idx + 1)));
Result result = post_encode_callback_->OnEncodedImage(
image_copy, codec_specific, fragmentation_header);
diff --git a/modules/video_coding/generic_encoder_unittest.cc b/modules/video_coding/generic_encoder_unittest.cc
index c889769..2be6856 100644
--- a/modules/video_coding/generic_encoder_unittest.cc
+++ b/modules/video_coding/generic_encoder_unittest.cc
@@ -95,8 +95,8 @@
image._length = FrameSize(min_frame_size, max_frame_size, s, i);
image.capture_time_ms_ = current_timestamp;
image.SetTimestamp(static_cast<uint32_t>(current_timestamp * 90));
+ image.SetSpatialIndex(s);
codec_specific.codecType = kVideoCodecGeneric;
- codec_specific.codecSpecific.generic.simulcast_idx = s;
callback.OnEncodeStarted(static_cast<uint32_t>(current_timestamp * 90),
current_timestamp, s);
if (dropped) {
@@ -189,7 +189,6 @@
image.capture_time_ms_ = timestamp;
image.SetTimestamp(static_cast<uint32_t>(timestamp * 90));
codec_specific.codecType = kVideoCodecGeneric;
- codec_specific.codecSpecific.generic.simulcast_idx = 0;
FakeEncodedImageCallback sink;
VCMEncodedFrameCallback callback(&sink, nullptr);
VideoCodec::TimingFrameTriggerThresholds thresholds;
@@ -221,7 +220,6 @@
image.capture_time_ms_ = timestamp;
image.SetTimestamp(static_cast<uint32_t>(timestamp * 90));
codec_specific.codecType = kVideoCodecGeneric;
- codec_specific.codecSpecific.generic.simulcast_idx = 0;
FakeEncodedImageCallback sink;
VCMEncodedFrameCallback callback(&sink, nullptr);
callback.SetInternalSource(true);
@@ -257,7 +255,6 @@
const int64_t kTimestampMs3 = 47721860;
const int64_t kTimestampMs4 = 47721870;
codec_specific.codecType = kVideoCodecGeneric;
- codec_specific.codecSpecific.generic.simulcast_idx = 0;
FakeEncodedImageCallback sink;
VCMEncodedFrameCallback callback(&sink, nullptr);
// Any non-zero bitrate needed to be set before the first frame.
@@ -293,7 +290,6 @@
CodecSpecificInfo codec_specific;
const int64_t kTimestampMs = 123456;
codec_specific.codecType = kVideoCodecGeneric;
- codec_specific.codecSpecific.generic.simulcast_idx = 0;
FakeEncodedImageCallback sink;
VCMEncodedFrameCallback callback(&sink, nullptr);
// Any non-zero bitrate needed to be set before the first frame.
diff --git a/modules/video_coding/include/video_codec_interface.h b/modules/video_coding/include/video_codec_interface.h
index 9108625..94d4271 100644
--- a/modules/video_coding/include/video_codec_interface.h
+++ b/modules/video_coding/include/video_codec_interface.h
@@ -28,6 +28,8 @@
// with a copy-constructor. See below.
struct CodecSpecificInfoVP8 {
bool nonReference;
+ // TODO(bugs.webrtc.org/9378): Delete simulcastIdx, replaced by spatial index
+ // member in EncodedImage. Unused, but assigned in downstream code.
uint8_t simulcastIdx;
uint8_t temporalIdx;
bool layerSync;
@@ -43,6 +45,8 @@
bool non_ref_for_inter_layer_pred;
uint8_t temporal_idx;
+ // TODO(bugs.webrtc.org/9378): Delete spatial_idx, replaced by spatial index
+ // member in EncodedImage. Unused, but assigned in downstream code.
uint8_t spatial_idx;
bool temporal_up_switch;
bool inter_layer_predicted; // Frame is dependent on directly lower spatial
@@ -63,13 +67,14 @@
bool end_of_picture;
};
+// TODO(bugs.webrtc.org/9378): Delete this struct. Unused, except that
+// simulcast_idx is assigned in downstream code.
struct CodecSpecificInfoGeneric {
uint8_t simulcast_idx;
};
struct CodecSpecificInfoH264 {
H264PacketizationMode packetization_mode;
- uint8_t simulcast_idx;
};
union CodecSpecificInfoUnion {
diff --git a/modules/video_coding/utility/simulcast_test_fixture_impl.cc b/modules/video_coding/utility/simulcast_test_fixture_impl.cc
index 03de176..4af526c 100644
--- a/modules/video_coding/utility/simulcast_test_fixture_impl.cc
+++ b/modules/video_coding/utility/simulcast_test_fixture_impl.cc
@@ -76,15 +76,9 @@
virtual Result OnEncodedImage(const EncodedImage& encoded_image,
const CodecSpecificInfo* codec_specific_info,
const RTPFragmentationHeader* fragmentation) {
- uint16_t simulcast_idx = 0;
bool is_vp8 = (codec_specific_info->codecType == kVideoCodecVP8);
- if (is_vp8) {
- simulcast_idx = codec_specific_info->codecSpecific.VP8.simulcastIdx;
- } else {
- simulcast_idx = codec_specific_info->codecSpecific.H264.simulcast_idx;
- }
// Only store the base layer.
- if (simulcast_idx) {
+ if (encoded_image.SpatialIndex().value_or(0) == 0) {
if (encoded_image._frameType == kVideoFrameKey) {
delete[] encoded_key_frame_._buffer;
encoded_key_frame_._buffer = new uint8_t[encoded_image._size];
@@ -104,9 +98,9 @@
}
}
if (is_vp8) {
- layer_sync_[codec_specific_info->codecSpecific.VP8.simulcastIdx] =
+ layer_sync_[encoded_image.SpatialIndex().value_or(0)] =
codec_specific_info->codecSpecific.VP8.layerSync;
- temporal_layer_[codec_specific_info->codecSpecific.VP8.simulcastIdx] =
+ temporal_layer_[encoded_image.SpatialIndex().value_or(0)] =
codec_specific_info->codecSpecific.VP8.temporalIdx;
}
return Result(Result::OK, encoded_image.Timestamp());