Blame - modules/video_coding/rtp_vp9_ref_finder.cc - webrtc.googlesource.com/src

blob: e1dba9cd0e84c727da4fac532b100a5dab2c37c6 [file] [log] [blame]

philipel	4e70216	2020-11-27 17:56:37 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
				11	#include "modules/video_coding/rtp_vp9_ref_finder.h"
				12
				13	#include <algorithm>
				14	#include <utility>
				15
				16	#include "rtc_base/logging.h"
				17
				18	namespace webrtc {
				19	namespace video_coding {
				20
				21	RtpFrameReferenceFinder::ReturnVector RtpVp9RefFinder::ManageFrame(
				22	std::unique_ptr<RtpFrameObject> frame) {
				23	FrameDecision decision = ManageFrameInternal(frame.get());
				24
				25	RtpFrameReferenceFinder::ReturnVector res;
				26	switch (decision) {
				27	case kStash:
				28	if (stashed_frames_.size() > kMaxStashedFrames)
				29	stashed_frames_.pop_back();
				30	stashed_frames_.push_front(std::move(frame));
				31	return res;
				32	case kHandOff:
				33	res.push_back(std::move(frame));
				34	RetryStashedFrames(res);
				35	return res;
				36	case kDrop:
				37	return res;
				38	}
				39
				40	return res;
				41	}
				42
				43	RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameInternal(
				44	RtpFrameObject* frame) {
				45	const RTPVideoHeader& video_header = frame->GetRtpVideoHeader();
				46	const RTPVideoHeaderVP9& codec_header =
				47	absl::get<RTPVideoHeaderVP9>(video_header.video_type_header);
				48
				49	// Protect against corrupted packets with arbitrary large temporal idx.
				50	if (codec_header.temporal_idx >= kMaxTemporalLayers \|\|
				51	codec_header.spatial_idx >= kMaxSpatialLayers)
				52	return kDrop;
				53
philipel	0cb7326	2020-12-08 17:36:53 +0100	[diff] [blame]	54	frame->SetSpatialIndex(codec_header.spatial_idx);
philipel	4e70216	2020-11-27 17:56:37 +0100	[diff] [blame]	55	frame->id.picture_id = codec_header.picture_id & (kFrameIdLength - 1);
				56
				57	if (last_picture_id_ == -1)
				58	last_picture_id_ = frame->id.picture_id;
				59
				60	if (codec_header.flexible_mode) {
				61	if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) {
				62	return kDrop;
				63	}
				64	frame->num_references = codec_header.num_ref_pics;
				65	for (size_t i = 0; i < frame->num_references; ++i) {
				66	frame->references[i] = Subtract<kFrameIdLength>(frame->id.picture_id,
				67	codec_header.pid_diff[i]);
				68	}
				69
philipel	0cb7326	2020-12-08 17:36:53 +0100	[diff] [blame]	70	FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
philipel	4e70216	2020-11-27 17:56:37 +0100	[diff] [blame]	71	return kHandOff;
				72	}
				73
				74	if (codec_header.tl0_pic_idx == kNoTl0PicIdx) {
				75	RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in "
				76	"non-flexible mode.";
				77	return kDrop;
				78	}
				79
				80	GofInfo* info;
				81	int64_t unwrapped_tl0 =
				82	tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF);
				83	if (codec_header.ss_data_available) {
				84	if (codec_header.temporal_idx != 0) {
				85	RTC_LOG(LS_WARNING) << "Received scalability structure on a non base "
				86	"layer frame. Scalability structure ignored.";
				87	} else {
				88	if (codec_header.gof.num_frames_in_gof > kMaxVp9FramesInGof) {
				89	return kDrop;
				90	}
				91
				92	for (size_t i = 0; i < codec_header.gof.num_frames_in_gof; ++i) {
				93	if (codec_header.gof.num_ref_pics[i] > kMaxVp9RefPics) {
				94	return kDrop;
				95	}
				96	}
				97
				98	GofInfoVP9 gof = codec_header.gof;
				99	if (gof.num_frames_in_gof == 0) {
				100	RTC_LOG(LS_WARNING) << "Number of frames in GOF is zero. Assume "
				101	"that stream has only one temporal layer.";
				102	gof.SetGofInfoVP9(kTemporalStructureMode1);
				103	}
				104
				105	current_ss_idx_ = Add<kMaxGofSaved>(current_ss_idx_, 1);
				106	scalability_structures_[current_ss_idx_] = gof;
				107	scalability_structures_[current_ss_idx_].pid_start = frame->id.picture_id;
				108	gof_info_.emplace(unwrapped_tl0,
				109	GofInfo(&scalability_structures_[current_ss_idx_],
				110	frame->id.picture_id));
				111	}
				112
				113	const auto gof_info_it = gof_info_.find(unwrapped_tl0);
				114	if (gof_info_it == gof_info_.end())
				115	return kStash;
				116
				117	info = &gof_info_it->second;
				118
				119	if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
				120	frame->num_references = 0;
				121	FrameReceivedVp9(frame->id.picture_id, info);
philipel	0cb7326	2020-12-08 17:36:53 +0100	[diff] [blame]	122	FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
philipel	4e70216	2020-11-27 17:56:37 +0100	[diff] [blame]	123	return kHandOff;
				124	}
				125	} else if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
philipel	0cb7326	2020-12-08 17:36:53 +0100	[diff] [blame]	126	if (frame->SpatialIndex() == 0) {
philipel	4e70216	2020-11-27 17:56:37 +0100	[diff] [blame]	127	RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure";
				128	return kDrop;
				129	}
				130	const auto gof_info_it = gof_info_.find(unwrapped_tl0);
				131	if (gof_info_it == gof_info_.end())
				132	return kStash;
				133
				134	info = &gof_info_it->second;
				135
philipel	0cb7326	2020-12-08 17:36:53 +0100	[diff] [blame]	136	frame->num_references = 0;
				137	FrameReceivedVp9(frame->id.picture_id, info);
				138	FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
				139	return kHandOff;
philipel	4e70216	2020-11-27 17:56:37 +0100	[diff] [blame]	140	} else {
				141	auto gof_info_it = gof_info_.find(
				142	(codec_header.temporal_idx == 0) ? unwrapped_tl0 - 1 : unwrapped_tl0);
				143
				144	// Gof info for this frame is not available yet, stash this frame.
				145	if (gof_info_it == gof_info_.end())
				146	return kStash;
				147
				148	if (codec_header.temporal_idx == 0) {
				149	gof_info_it = gof_info_
				150	.emplace(unwrapped_tl0, GofInfo(gof_info_it->second.gof,
				151	frame->id.picture_id))
				152	.first;
				153	}
				154
				155	info = &gof_info_it->second;
				156	}
				157
				158	// Clean up info for base layers that are too old.
				159	int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxGofSaved;
				160	auto clean_gof_info_to = gof_info_.lower_bound(old_tl0_pic_idx);
				161	gof_info_.erase(gof_info_.begin(), clean_gof_info_to);
				162
				163	FrameReceivedVp9(frame->id.picture_id, info);
				164
				165	// Make sure we don't miss any frame that could potentially have the
				166	// up switch flag set.
				167	if (MissingRequiredFrameVp9(frame->id.picture_id, *info))
				168	return kStash;
				169
				170	if (codec_header.temporal_up_switch)
				171	up_switch_.emplace(frame->id.picture_id, codec_header.temporal_idx);
				172
				173	// Clean out old info about up switch frames.
				174	uint16_t old_picture_id = Subtract<kFrameIdLength>(frame->id.picture_id, 50);
				175	auto up_switch_erase_to = up_switch_.lower_bound(old_picture_id);
				176	up_switch_.erase(up_switch_.begin(), up_switch_erase_to);
				177
				178	size_t diff = ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start,
				179	frame->id.picture_id);
				180	size_t gof_idx = diff % info->gof->num_frames_in_gof;
				181
				182	if (info->gof->num_ref_pics[gof_idx] > EncodedFrame::kMaxFrameReferences) {
				183	return kDrop;
				184	}
				185	// Populate references according to the scalability structure.
				186	frame->num_references = info->gof->num_ref_pics[gof_idx];
				187	for (size_t i = 0; i < frame->num_references; ++i) {
				188	frame->references[i] = Subtract<kFrameIdLength>(
				189	frame->id.picture_id, info->gof->pid_diff[gof_idx][i]);
				190
				191	// If this is a reference to a frame earlier than the last up switch point,
				192	// then ignore this reference.
				193	if (UpSwitchInIntervalVp9(frame->id.picture_id, codec_header.temporal_idx,
				194	frame->references[i])) {
				195	--frame->num_references;
				196	}
				197	}
				198
				199	// Override GOF references.
				200	if (!codec_header.inter_pic_predicted) {
				201	frame->num_references = 0;
				202	}
				203
philipel	0cb7326	2020-12-08 17:36:53 +0100	[diff] [blame]	204	FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
philipel	4e70216	2020-11-27 17:56:37 +0100	[diff] [blame]	205	return kHandOff;
				206	}
				207
				208	bool RtpVp9RefFinder::MissingRequiredFrameVp9(uint16_t picture_id,
				209	const GofInfo& info) {
				210	size_t diff =
				211	ForwardDiff<uint16_t, kFrameIdLength>(info.gof->pid_start, picture_id);
				212	size_t gof_idx = diff % info.gof->num_frames_in_gof;
				213	size_t temporal_idx = info.gof->temporal_idx[gof_idx];
				214
				215	if (temporal_idx >= kMaxTemporalLayers) {
				216	RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
				217	<< " temporal "
				218	"layers are supported.";
				219	return true;
				220	}
				221
				222	// For every reference this frame has, check if there is a frame missing in
				223	// the interval (\|ref_pid\|, \|picture_id\|) in any of the lower temporal
				224	// layers. If so, we are missing a required frame.
				225	uint8_t num_references = info.gof->num_ref_pics[gof_idx];
				226	for (size_t i = 0; i < num_references; ++i) {
				227	uint16_t ref_pid =
				228	Subtract<kFrameIdLength>(picture_id, info.gof->pid_diff[gof_idx][i]);
				229	for (size_t l = 0; l < temporal_idx; ++l) {
				230	auto missing_frame_it = missing_frames_for_layer_[l].lower_bound(ref_pid);
				231	if (missing_frame_it != missing_frames_for_layer_[l].end() &&
				232	AheadOf<uint16_t, kFrameIdLength>(picture_id, *missing_frame_it)) {
				233	return true;
				234	}
				235	}
				236	}
				237	return false;
				238	}
				239
				240	void RtpVp9RefFinder::FrameReceivedVp9(uint16_t picture_id, GofInfo* info) {
				241	int last_picture_id = info->last_picture_id;
				242	size_t gof_size = std::min(info->gof->num_frames_in_gof, kMaxVp9FramesInGof);
				243
				244	// If there is a gap, find which temporal layer the missing frames
				245	// belong to and add the frame as missing for that temporal layer.
				246	// Otherwise, remove this frame from the set of missing frames.
				247	if (AheadOf<uint16_t, kFrameIdLength>(picture_id, last_picture_id)) {
				248	size_t diff = ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start,
				249	last_picture_id);
				250	size_t gof_idx = diff % gof_size;
				251
				252	last_picture_id = Add<kFrameIdLength>(last_picture_id, 1);
				253	while (last_picture_id != picture_id) {
				254	gof_idx = (gof_idx + 1) % gof_size;
				255	RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
				256
				257	size_t temporal_idx = info->gof->temporal_idx[gof_idx];
				258	if (temporal_idx >= kMaxTemporalLayers) {
				259	RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
				260	<< " temporal "
				261	"layers are supported.";
				262	return;
				263	}
				264
				265	missing_frames_for_layer_[temporal_idx].insert(last_picture_id);
				266	last_picture_id = Add<kFrameIdLength>(last_picture_id, 1);
				267	}
				268
				269	info->last_picture_id = last_picture_id;
				270	} else {
				271	size_t diff =
				272	ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start, picture_id);
				273	size_t gof_idx = diff % gof_size;
				274	RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
				275
				276	size_t temporal_idx = info->gof->temporal_idx[gof_idx];
				277	if (temporal_idx >= kMaxTemporalLayers) {
				278	RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
				279	<< " temporal "
				280	"layers are supported.";
				281	return;
				282	}
				283
				284	missing_frames_for_layer_[temporal_idx].erase(picture_id);
				285	}
				286	}
				287
				288	bool RtpVp9RefFinder::UpSwitchInIntervalVp9(uint16_t picture_id,
				289	uint8_t temporal_idx,
				290	uint16_t pid_ref) {
				291	for (auto up_switch_it = up_switch_.upper_bound(pid_ref);
				292	up_switch_it != up_switch_.end() &&
				293	AheadOf<uint16_t, kFrameIdLength>(picture_id, up_switch_it->first);
				294	++up_switch_it) {
				295	if (up_switch_it->second < temporal_idx)
				296	return true;
				297	}
				298
				299	return false;
				300	}
				301
				302	void RtpVp9RefFinder::RetryStashedFrames(
				303	RtpFrameReferenceFinder::ReturnVector& res) {
				304	bool complete_frame = false;
				305	do {
				306	complete_frame = false;
				307	for (auto frame_it = stashed_frames_.begin();
				308	frame_it != stashed_frames_.end();) {
				309	FrameDecision decision = ManageFrameInternal(frame_it->get());
				310
				311	switch (decision) {
				312	case kStash:
				313	++frame_it;
				314	break;
				315	case kHandOff:
				316	complete_frame = true;
				317	res.push_back(std::move(*frame_it));
				318	ABSL_FALLTHROUGH_INTENDED;
				319	case kDrop:
				320	frame_it = stashed_frames_.erase(frame_it);
				321	}
				322	}
				323	} while (complete_frame);
				324	}
				325
philipel	0cb7326	2020-12-08 17:36:53 +0100	[diff] [blame]	326	void RtpVp9RefFinder::FlattenFrameIdAndRefs(RtpFrameObject* frame,
				327	bool inter_layer_predicted) {
				328	for (size_t i = 0; i < frame->num_references; ++i) {
				329	frame->references[i] =
				330	unwrapper_.Unwrap(frame->references[i]) * kMaxSpatialLayers +
				331	*frame->SpatialIndex();
				332	}
				333	frame->id.picture_id =
				334	unwrapper_.Unwrap(frame->id.picture_id) * kMaxSpatialLayers +
				335	*frame->SpatialIndex();
				336
				337	if (inter_layer_predicted &&
				338	frame->num_references + 1 <= EncodedFrame::kMaxFrameReferences) {
				339	frame->references[frame->num_references] = frame->id.picture_id - 1;
				340	++frame->num_references;
				341	}
philipel	4e70216	2020-11-27 17:56:37 +0100	[diff] [blame]	342	}
				343
				344	void RtpVp9RefFinder::ClearTo(uint16_t seq_num) {
				345	auto it = stashed_frames_.begin();
				346	while (it != stashed_frames_.end()) {
				347	if (AheadOf<uint16_t>(seq_num, (*it)->first_seq_num())) {
				348	it = stashed_frames_.erase(it);
				349	} else {
				350	++it;
				351	}
				352	}
				353	}
				354
				355	} // namespace video_coding
				356	} // namespace webrtc