Set inter_pic_predicted video codec flag in vp9 encoder correctly
This flag only needs to be set in kOn interlayer prediction mode, because
in all others, if new layer is enabled - a keyframe is generated.
Also, use external reference control in that case, because libvpx creates
rtp-incompatible references in that case.
Bug: webrtc:10180
Change-Id: I0fad188fa8cd424f831bac219769dbad3a788b1d
Reviewed-on: https://webrtc-review.googlesource.com/c/118041
Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org>
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#26316}
diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
index 9e03052..48f81b8 100644
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@@ -574,6 +574,224 @@
}
}
+TEST_F(TestVp9Impl,
+ EnablingUpperLayerUnsetsInterPicPredictedInInterlayerPredModeOn) {
+ const size_t num_spatial_layers = 3;
+ const size_t num_frames_to_encode = 2;
+
+ ConfigureSvc(num_spatial_layers);
+ codec_settings_.VP9()->frameDroppingOn = false;
+ codec_settings_.VP9()->flexibleMode = false;
+
+ const std::vector<InterLayerPredMode> inter_layer_pred_modes = {
+ InterLayerPredMode::kOff, InterLayerPredMode::kOn,
+ InterLayerPredMode::kOnKeyPic};
+
+ for (const InterLayerPredMode inter_layer_pred : inter_layer_pred_modes) {
+ codec_settings_.VP9()->interLayerPred = inter_layer_pred;
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
+ 0 /* max payload size (unused) */));
+
+ VideoBitrateAllocation bitrate_allocation;
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+ bitrate_allocation.SetBitrate(
+ sl_idx, 0,
+ codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->SetRateAllocation(bitrate_allocation,
+ codec_settings_.maxFramerate));
+
+ for (size_t frame_num = 0; frame_num < num_frames_to_encode;
+ ++frame_num) {
+ SetWaitForEncodedFramesThreshold(sl_idx + 1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+
+ ASSERT_EQ(codec_specific_info.size(), sl_idx + 1);
+
+ for (size_t i = 0; i <= sl_idx; ++i) {
+ const bool is_keyframe =
+ encoded_frame[0]._frameType == kVideoFrameKey;
+ const bool is_first_upper_layer_frame =
+ (i == sl_idx && frame_num == 0);
+ // Interframe references are there, unless it's a keyframe,
+ // or it's a first activated frame in a upper layer
+ const bool expect_no_references =
+ is_keyframe || (is_first_upper_layer_frame &&
+ inter_layer_pred == InterLayerPredMode::kOn);
+ EXPECT_EQ(
+ codec_specific_info[i].codecSpecific.VP9.inter_pic_predicted,
+ !expect_no_references);
+ }
+ }
+ }
+ }
+}
+
+TEST_F(TestVp9Impl, EnablingDisablingUpperLayerInTheSameGof) {
+ const size_t num_spatial_layers = 2;
+ const size_t num_temporal_layers = 2;
+
+ ConfigureSvc(num_spatial_layers, num_temporal_layers);
+ codec_settings_.VP9()->frameDroppingOn = false;
+ codec_settings_.VP9()->flexibleMode = false;
+
+ codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn;
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
+ 0 /* max payload size (unused) */));
+
+ VideoBitrateAllocation bitrate_allocation;
+
+ // Enable both spatial and both temporal layers.
+ bitrate_allocation.SetBitrate(
+ 0, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000 / 2);
+ bitrate_allocation.SetBitrate(
+ 0, 1, codec_settings_.spatialLayers[0].targetBitrate * 1000 / 2);
+ bitrate_allocation.SetBitrate(
+ 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2);
+ bitrate_allocation.SetBitrate(
+ 1, 1, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->SetRateAllocation(bitrate_allocation,
+ codec_settings_.maxFramerate));
+
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+
+ // Encode 3 frames.
+ for (int i = 0; i < 3; ++i) {
+ SetWaitForEncodedFramesThreshold(2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ ASSERT_EQ(codec_specific_info.size(), 2u);
+ }
+
+ // Disable SL1 layer.
+ bitrate_allocation.SetBitrate(1, 0, 0);
+ bitrate_allocation.SetBitrate(1, 1, 0);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->SetRateAllocation(bitrate_allocation,
+ codec_settings_.maxFramerate));
+
+ // Encode 1 frame.
+ SetWaitForEncodedFramesThreshold(1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ ASSERT_EQ(codec_specific_info.size(), 1u);
+ EXPECT_EQ(encoded_frame[0]._frameType, kVideoFrameDelta);
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 1);
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.inter_pic_predicted, true);
+
+ // Enable SL1 layer.
+ bitrate_allocation.SetBitrate(
+ 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2);
+ bitrate_allocation.SetBitrate(
+ 1, 1, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->SetRateAllocation(bitrate_allocation,
+ codec_settings_.maxFramerate));
+
+ // Encode 1 frame.
+ SetWaitForEncodedFramesThreshold(2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ ASSERT_EQ(codec_specific_info.size(), 2u);
+ EXPECT_EQ(encoded_frame[0]._frameType, kVideoFrameDelta);
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0);
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.inter_pic_predicted, true);
+ EXPECT_EQ(codec_specific_info[1].codecSpecific.VP9.inter_pic_predicted, true);
+}
+
+TEST_F(TestVp9Impl, EnablingDisablingUpperLayerAccrossGof) {
+ const size_t num_spatial_layers = 2;
+ const size_t num_temporal_layers = 2;
+
+ ConfigureSvc(num_spatial_layers, num_temporal_layers);
+ codec_settings_.VP9()->frameDroppingOn = false;
+ codec_settings_.VP9()->flexibleMode = false;
+
+ codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn;
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
+ 0 /* max payload size (unused) */));
+
+ VideoBitrateAllocation bitrate_allocation;
+
+ // Enable both spatial and both temporal layers.
+ bitrate_allocation.SetBitrate(
+ 0, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000 / 2);
+ bitrate_allocation.SetBitrate(
+ 0, 1, codec_settings_.spatialLayers[0].targetBitrate * 1000 / 2);
+ bitrate_allocation.SetBitrate(
+ 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2);
+ bitrate_allocation.SetBitrate(
+ 1, 1, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->SetRateAllocation(bitrate_allocation,
+ codec_settings_.maxFramerate));
+
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+
+ // Encode 3 frames.
+ for (int i = 0; i < 3; ++i) {
+ SetWaitForEncodedFramesThreshold(2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ ASSERT_EQ(codec_specific_info.size(), 2u);
+ }
+
+ // Disable SL1 layer.
+ bitrate_allocation.SetBitrate(1, 0, 0);
+ bitrate_allocation.SetBitrate(1, 1, 0);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->SetRateAllocation(bitrate_allocation,
+ codec_settings_.maxFramerate));
+
+ // Encode 11 frames. More than Gof length 2, and odd to end at TL1 frame.
+ for (int i = 0; i < 11; ++i) {
+ SetWaitForEncodedFramesThreshold(1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ ASSERT_EQ(codec_specific_info.size(), 1u);
+ EXPECT_EQ(encoded_frame[0]._frameType, kVideoFrameDelta);
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 1 - i % 2);
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.inter_pic_predicted,
+ true);
+ }
+
+ // Enable SL1 layer.
+ bitrate_allocation.SetBitrate(
+ 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2);
+ bitrate_allocation.SetBitrate(
+ 1, 1, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->SetRateAllocation(bitrate_allocation,
+ codec_settings_.maxFramerate));
+
+ // Encode 1 frame.
+ SetWaitForEncodedFramesThreshold(2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ ASSERT_EQ(codec_specific_info.size(), 2u);
+ EXPECT_EQ(encoded_frame[0]._frameType, kVideoFrameDelta);
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0);
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.inter_pic_predicted, true);
+ EXPECT_EQ(codec_specific_info[1].codecSpecific.VP9.inter_pic_predicted,
+ false);
+}
+
TEST_F(TestVp9Impl, EnablingNewLayerIsDelayedInScreenshareAndAddsSsInfo) {
const size_t num_spatial_layers = 3;
// Chosen by hand, the 2nd frame is dropped with configured per-layer max
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc
index 09a08a5..efc11bb 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@@ -452,15 +452,12 @@
config_->rc_buf_sz = 1000;
// Set the maximum target size of any key-frame.
rc_max_intra_target_ = MaxIntraTarget(config_->rc_buf_optimal_sz);
- if (inst->VP9().keyFrameInterval > 0) {
- config_->kf_mode = VPX_KF_AUTO;
- config_->kf_max_dist = inst->VP9().keyFrameInterval;
- // Needs to be set (in svc mode) to get correct periodic key frame interval
- // (will have no effect in non-svc).
- config_->kf_min_dist = config_->kf_max_dist;
- } else {
- config_->kf_mode = VPX_KF_DISABLED;
- }
+ // Key-frame interval is enforced manually by this wrapper.
+ config_->kf_mode = VPX_KF_DISABLED;
+ // TODO(webm:1592): work-around for libvpx issue, as it can still
+ // put some key-frames at will even in VPX_KF_DISABLED kf_mode.
+ config_->kf_max_dist = inst->VP9().keyFrameInterval;
+ config_->kf_min_dist = config_->kf_max_dist;
config_->rc_resize_allowed = inst->VP9().automaticResizeOn ? 1 : 0;
// Determine number of threads based on the image size and #cores.
config_->g_threads =
@@ -489,7 +486,8 @@
// External reference control is required for different frame rate on spatial
// layers because libvpx generates rtp incompatible references in this case.
external_ref_control_ = field_trial::IsEnabled("WebRTC-Vp9ExternalRefCtrl") ||
- different_framerates_used_;
+ different_framerates_used_ ||
+ inter_layer_pred_ == InterLayerPredMode::kOn;
if (num_temporal_layers_ == 1) {
gof_.SetGofInfoVP9(kTemporalStructureMode1);
@@ -733,6 +731,11 @@
}
}
+ if (pics_since_key_ + 1 ==
+ static_cast<size_t>(codec_.VP9()->keyFrameInterval)) {
+ force_key_frame_ = true;
+ }
+
vpx_svc_layer_id_t layer_id = {0};
if (!force_key_frame_) {
const size_t gof_idx = (pics_since_key_ + 1) % gof_.num_frames_in_gof;
@@ -921,6 +924,9 @@
vpx_svc_layer_id_t layer_id = {0};
vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
+ // Can't have keyframe with non-zero temporal layer.
+ RTC_DCHECK(pics_since_key_ != 0 || layer_id.temporal_layer_id == 0);
+
if (ss_info_needed_ && layer_id.temporal_layer_id == 0 &&
layer_id.spatial_layer_id == 0) {
// Force SS info after the layers configuration has changed.
@@ -976,15 +982,16 @@
vp9_info->num_spatial_layers = num_active_spatial_layers_;
vp9_info->num_ref_pics = 0;
+ FillReferenceIndices(pkt, pics_since_key_, vp9_info->inter_layer_predicted,
+ vp9_info);
if (vp9_info->flexible_mode) {
vp9_info->gof_idx = kNoGofIdx;
- FillReferenceIndices(pkt, pics_since_key_, vp9_info->inter_layer_predicted,
- vp9_info);
} else {
vp9_info->gof_idx =
static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof);
vp9_info->temporal_up_switch = gof_.temporal_up_switch[vp9_info->gof_idx];
- vp9_info->num_ref_pics = gof_.num_ref_pics[vp9_info->gof_idx];
+ RTC_DCHECK(vp9_info->num_ref_pics == gof_.num_ref_pics[vp9_info->gof_idx] ||
+ vp9_info->num_ref_pics == 0);
}
vp9_info->inter_pic_predicted = (!is_key_pic && vp9_info->num_ref_pics > 0);
@@ -1133,7 +1140,6 @@
ref_buf_[i] = frame_buf;
}
}
-
} else {
RTC_DCHECK_EQ(num_spatial_layers_, 1);
RTC_DCHECK_EQ(num_temporal_layers_, 1);
@@ -1284,9 +1290,7 @@
input_image_->timestamp());
encoded_image_.SetSpatialIndex(spatial_index);
- if (is_flexible_mode_) {
- UpdateReferenceBuffers(*pkt, pics_since_key_);
- }
+ UpdateReferenceBuffers(*pkt, pics_since_key_);
TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_.size());
encoded_image_.SetTimestamp(input_image_->timestamp());