blob: e1dba9cd0e84c727da4fac532b100a5dab2c37c6 [file] [log] [blame]
philipel4e702162020-11-27 17:56:37 +01001/*
2 * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "modules/video_coding/rtp_vp9_ref_finder.h"
12
13#include <algorithm>
14#include <utility>
15
16#include "rtc_base/logging.h"
17
18namespace webrtc {
19namespace video_coding {
20
21RtpFrameReferenceFinder::ReturnVector RtpVp9RefFinder::ManageFrame(
22 std::unique_ptr<RtpFrameObject> frame) {
23 FrameDecision decision = ManageFrameInternal(frame.get());
24
25 RtpFrameReferenceFinder::ReturnVector res;
26 switch (decision) {
27 case kStash:
28 if (stashed_frames_.size() > kMaxStashedFrames)
29 stashed_frames_.pop_back();
30 stashed_frames_.push_front(std::move(frame));
31 return res;
32 case kHandOff:
33 res.push_back(std::move(frame));
34 RetryStashedFrames(res);
35 return res;
36 case kDrop:
37 return res;
38 }
39
40 return res;
41}
42
43RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameInternal(
44 RtpFrameObject* frame) {
45 const RTPVideoHeader& video_header = frame->GetRtpVideoHeader();
46 const RTPVideoHeaderVP9& codec_header =
47 absl::get<RTPVideoHeaderVP9>(video_header.video_type_header);
48
49 // Protect against corrupted packets with arbitrary large temporal idx.
50 if (codec_header.temporal_idx >= kMaxTemporalLayers ||
51 codec_header.spatial_idx >= kMaxSpatialLayers)
52 return kDrop;
53
philipel0cb73262020-12-08 17:36:53 +010054 frame->SetSpatialIndex(codec_header.spatial_idx);
philipel4e702162020-11-27 17:56:37 +010055 frame->id.picture_id = codec_header.picture_id & (kFrameIdLength - 1);
56
57 if (last_picture_id_ == -1)
58 last_picture_id_ = frame->id.picture_id;
59
60 if (codec_header.flexible_mode) {
61 if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) {
62 return kDrop;
63 }
64 frame->num_references = codec_header.num_ref_pics;
65 for (size_t i = 0; i < frame->num_references; ++i) {
66 frame->references[i] = Subtract<kFrameIdLength>(frame->id.picture_id,
67 codec_header.pid_diff[i]);
68 }
69
philipel0cb73262020-12-08 17:36:53 +010070 FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
philipel4e702162020-11-27 17:56:37 +010071 return kHandOff;
72 }
73
74 if (codec_header.tl0_pic_idx == kNoTl0PicIdx) {
75 RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in "
76 "non-flexible mode.";
77 return kDrop;
78 }
79
80 GofInfo* info;
81 int64_t unwrapped_tl0 =
82 tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF);
83 if (codec_header.ss_data_available) {
84 if (codec_header.temporal_idx != 0) {
85 RTC_LOG(LS_WARNING) << "Received scalability structure on a non base "
86 "layer frame. Scalability structure ignored.";
87 } else {
88 if (codec_header.gof.num_frames_in_gof > kMaxVp9FramesInGof) {
89 return kDrop;
90 }
91
92 for (size_t i = 0; i < codec_header.gof.num_frames_in_gof; ++i) {
93 if (codec_header.gof.num_ref_pics[i] > kMaxVp9RefPics) {
94 return kDrop;
95 }
96 }
97
98 GofInfoVP9 gof = codec_header.gof;
99 if (gof.num_frames_in_gof == 0) {
100 RTC_LOG(LS_WARNING) << "Number of frames in GOF is zero. Assume "
101 "that stream has only one temporal layer.";
102 gof.SetGofInfoVP9(kTemporalStructureMode1);
103 }
104
105 current_ss_idx_ = Add<kMaxGofSaved>(current_ss_idx_, 1);
106 scalability_structures_[current_ss_idx_] = gof;
107 scalability_structures_[current_ss_idx_].pid_start = frame->id.picture_id;
108 gof_info_.emplace(unwrapped_tl0,
109 GofInfo(&scalability_structures_[current_ss_idx_],
110 frame->id.picture_id));
111 }
112
113 const auto gof_info_it = gof_info_.find(unwrapped_tl0);
114 if (gof_info_it == gof_info_.end())
115 return kStash;
116
117 info = &gof_info_it->second;
118
119 if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
120 frame->num_references = 0;
121 FrameReceivedVp9(frame->id.picture_id, info);
philipel0cb73262020-12-08 17:36:53 +0100122 FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
philipel4e702162020-11-27 17:56:37 +0100123 return kHandOff;
124 }
125 } else if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
philipel0cb73262020-12-08 17:36:53 +0100126 if (frame->SpatialIndex() == 0) {
philipel4e702162020-11-27 17:56:37 +0100127 RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure";
128 return kDrop;
129 }
130 const auto gof_info_it = gof_info_.find(unwrapped_tl0);
131 if (gof_info_it == gof_info_.end())
132 return kStash;
133
134 info = &gof_info_it->second;
135
philipel0cb73262020-12-08 17:36:53 +0100136 frame->num_references = 0;
137 FrameReceivedVp9(frame->id.picture_id, info);
138 FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
139 return kHandOff;
philipel4e702162020-11-27 17:56:37 +0100140 } else {
141 auto gof_info_it = gof_info_.find(
142 (codec_header.temporal_idx == 0) ? unwrapped_tl0 - 1 : unwrapped_tl0);
143
144 // Gof info for this frame is not available yet, stash this frame.
145 if (gof_info_it == gof_info_.end())
146 return kStash;
147
148 if (codec_header.temporal_idx == 0) {
149 gof_info_it = gof_info_
150 .emplace(unwrapped_tl0, GofInfo(gof_info_it->second.gof,
151 frame->id.picture_id))
152 .first;
153 }
154
155 info = &gof_info_it->second;
156 }
157
158 // Clean up info for base layers that are too old.
159 int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxGofSaved;
160 auto clean_gof_info_to = gof_info_.lower_bound(old_tl0_pic_idx);
161 gof_info_.erase(gof_info_.begin(), clean_gof_info_to);
162
163 FrameReceivedVp9(frame->id.picture_id, info);
164
165 // Make sure we don't miss any frame that could potentially have the
166 // up switch flag set.
167 if (MissingRequiredFrameVp9(frame->id.picture_id, *info))
168 return kStash;
169
170 if (codec_header.temporal_up_switch)
171 up_switch_.emplace(frame->id.picture_id, codec_header.temporal_idx);
172
173 // Clean out old info about up switch frames.
174 uint16_t old_picture_id = Subtract<kFrameIdLength>(frame->id.picture_id, 50);
175 auto up_switch_erase_to = up_switch_.lower_bound(old_picture_id);
176 up_switch_.erase(up_switch_.begin(), up_switch_erase_to);
177
178 size_t diff = ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start,
179 frame->id.picture_id);
180 size_t gof_idx = diff % info->gof->num_frames_in_gof;
181
182 if (info->gof->num_ref_pics[gof_idx] > EncodedFrame::kMaxFrameReferences) {
183 return kDrop;
184 }
185 // Populate references according to the scalability structure.
186 frame->num_references = info->gof->num_ref_pics[gof_idx];
187 for (size_t i = 0; i < frame->num_references; ++i) {
188 frame->references[i] = Subtract<kFrameIdLength>(
189 frame->id.picture_id, info->gof->pid_diff[gof_idx][i]);
190
191 // If this is a reference to a frame earlier than the last up switch point,
192 // then ignore this reference.
193 if (UpSwitchInIntervalVp9(frame->id.picture_id, codec_header.temporal_idx,
194 frame->references[i])) {
195 --frame->num_references;
196 }
197 }
198
199 // Override GOF references.
200 if (!codec_header.inter_pic_predicted) {
201 frame->num_references = 0;
202 }
203
philipel0cb73262020-12-08 17:36:53 +0100204 FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
philipel4e702162020-11-27 17:56:37 +0100205 return kHandOff;
206}
207
208bool RtpVp9RefFinder::MissingRequiredFrameVp9(uint16_t picture_id,
209 const GofInfo& info) {
210 size_t diff =
211 ForwardDiff<uint16_t, kFrameIdLength>(info.gof->pid_start, picture_id);
212 size_t gof_idx = diff % info.gof->num_frames_in_gof;
213 size_t temporal_idx = info.gof->temporal_idx[gof_idx];
214
215 if (temporal_idx >= kMaxTemporalLayers) {
216 RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
217 << " temporal "
218 "layers are supported.";
219 return true;
220 }
221
222 // For every reference this frame has, check if there is a frame missing in
223 // the interval (|ref_pid|, |picture_id|) in any of the lower temporal
224 // layers. If so, we are missing a required frame.
225 uint8_t num_references = info.gof->num_ref_pics[gof_idx];
226 for (size_t i = 0; i < num_references; ++i) {
227 uint16_t ref_pid =
228 Subtract<kFrameIdLength>(picture_id, info.gof->pid_diff[gof_idx][i]);
229 for (size_t l = 0; l < temporal_idx; ++l) {
230 auto missing_frame_it = missing_frames_for_layer_[l].lower_bound(ref_pid);
231 if (missing_frame_it != missing_frames_for_layer_[l].end() &&
232 AheadOf<uint16_t, kFrameIdLength>(picture_id, *missing_frame_it)) {
233 return true;
234 }
235 }
236 }
237 return false;
238}
239
240void RtpVp9RefFinder::FrameReceivedVp9(uint16_t picture_id, GofInfo* info) {
241 int last_picture_id = info->last_picture_id;
242 size_t gof_size = std::min(info->gof->num_frames_in_gof, kMaxVp9FramesInGof);
243
244 // If there is a gap, find which temporal layer the missing frames
245 // belong to and add the frame as missing for that temporal layer.
246 // Otherwise, remove this frame from the set of missing frames.
247 if (AheadOf<uint16_t, kFrameIdLength>(picture_id, last_picture_id)) {
248 size_t diff = ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start,
249 last_picture_id);
250 size_t gof_idx = diff % gof_size;
251
252 last_picture_id = Add<kFrameIdLength>(last_picture_id, 1);
253 while (last_picture_id != picture_id) {
254 gof_idx = (gof_idx + 1) % gof_size;
255 RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
256
257 size_t temporal_idx = info->gof->temporal_idx[gof_idx];
258 if (temporal_idx >= kMaxTemporalLayers) {
259 RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
260 << " temporal "
261 "layers are supported.";
262 return;
263 }
264
265 missing_frames_for_layer_[temporal_idx].insert(last_picture_id);
266 last_picture_id = Add<kFrameIdLength>(last_picture_id, 1);
267 }
268
269 info->last_picture_id = last_picture_id;
270 } else {
271 size_t diff =
272 ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start, picture_id);
273 size_t gof_idx = diff % gof_size;
274 RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
275
276 size_t temporal_idx = info->gof->temporal_idx[gof_idx];
277 if (temporal_idx >= kMaxTemporalLayers) {
278 RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
279 << " temporal "
280 "layers are supported.";
281 return;
282 }
283
284 missing_frames_for_layer_[temporal_idx].erase(picture_id);
285 }
286}
287
288bool RtpVp9RefFinder::UpSwitchInIntervalVp9(uint16_t picture_id,
289 uint8_t temporal_idx,
290 uint16_t pid_ref) {
291 for (auto up_switch_it = up_switch_.upper_bound(pid_ref);
292 up_switch_it != up_switch_.end() &&
293 AheadOf<uint16_t, kFrameIdLength>(picture_id, up_switch_it->first);
294 ++up_switch_it) {
295 if (up_switch_it->second < temporal_idx)
296 return true;
297 }
298
299 return false;
300}
301
302void RtpVp9RefFinder::RetryStashedFrames(
303 RtpFrameReferenceFinder::ReturnVector& res) {
304 bool complete_frame = false;
305 do {
306 complete_frame = false;
307 for (auto frame_it = stashed_frames_.begin();
308 frame_it != stashed_frames_.end();) {
309 FrameDecision decision = ManageFrameInternal(frame_it->get());
310
311 switch (decision) {
312 case kStash:
313 ++frame_it;
314 break;
315 case kHandOff:
316 complete_frame = true;
317 res.push_back(std::move(*frame_it));
318 ABSL_FALLTHROUGH_INTENDED;
319 case kDrop:
320 frame_it = stashed_frames_.erase(frame_it);
321 }
322 }
323 } while (complete_frame);
324}
325
philipel0cb73262020-12-08 17:36:53 +0100326void RtpVp9RefFinder::FlattenFrameIdAndRefs(RtpFrameObject* frame,
327 bool inter_layer_predicted) {
328 for (size_t i = 0; i < frame->num_references; ++i) {
329 frame->references[i] =
330 unwrapper_.Unwrap(frame->references[i]) * kMaxSpatialLayers +
331 *frame->SpatialIndex();
332 }
333 frame->id.picture_id =
334 unwrapper_.Unwrap(frame->id.picture_id) * kMaxSpatialLayers +
335 *frame->SpatialIndex();
336
337 if (inter_layer_predicted &&
338 frame->num_references + 1 <= EncodedFrame::kMaxFrameReferences) {
339 frame->references[frame->num_references] = frame->id.picture_id - 1;
340 ++frame->num_references;
341 }
philipel4e702162020-11-27 17:56:37 +0100342}
343
344void RtpVp9RefFinder::ClearTo(uint16_t seq_num) {
345 auto it = stashed_frames_.begin();
346 while (it != stashed_frames_.end()) {
347 if (AheadOf<uint16_t>(seq_num, (*it)->first_seq_num())) {
348 it = stashed_frames_.erase(it);
349 } else {
350 ++it;
351 }
352 }
353}
354
355} // namespace video_coding
356} // namespace webrtc