Add AV1 RtpDepacketizer class
Implement Parse function that extracts is_first_packet_in_frame,
is_last_packet_in_frame, and frame_type fields.
Bug: webrtc:11042
Change-Id: I9360ea52ef274281b5c5e4c31955100b92155bfe
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/159180
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29814}
diff --git a/modules/rtp_rtcp/BUILD.gn b/modules/rtp_rtcp/BUILD.gn
index 55cda86..0a1dc4b 100644
--- a/modules/rtp_rtcp/BUILD.gn
+++ b/modules/rtp_rtcp/BUILD.gn
@@ -165,6 +165,8 @@
"source/rtcp_receiver.h",
"source/rtcp_sender.cc",
"source/rtcp_sender.h",
+ "source/rtp_depacketizer_av1.cc",
+ "source/rtp_depacketizer_av1.h",
"source/rtp_format.cc",
"source/rtp_format.h",
"source/rtp_format_h264.cc",
@@ -442,6 +444,7 @@
"source/rtcp_sender_unittest.cc",
"source/rtcp_transceiver_impl_unittest.cc",
"source/rtcp_transceiver_unittest.cc",
+ "source/rtp_depacketizer_av1_unittest.cc",
"source/rtp_fec_unittest.cc",
"source/rtp_format_h264_unittest.cc",
"source/rtp_format_unittest.cc",
diff --git a/modules/rtp_rtcp/source/rtp_depacketizer_av1.cc b/modules/rtp_rtcp/source/rtp_depacketizer_av1.cc
new file mode 100644
index 0000000..cc92526
--- /dev/null
+++ b/modules/rtp_rtcp/source/rtp_depacketizer_av1.cc
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/rtp_rtcp/source/rtp_depacketizer_av1.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "modules/rtp_rtcp/source/rtp_video_header.h"
+#include "rtc_base/byte_buffer.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+namespace {
+// AV1 format:
+//
+// RTP payload syntax:
+// 0 1 2 3 4 5 6 7
+// +-+-+-+-+-+-+-+-+
+// |Z|Y| W |-|-|-|-| (REQUIRED)
+// +=+=+=+=+=+=+=+=+ (REPEATED W-1 times, or any times if W = 0)
+// |1| |
+// +-+ OBU fragment|
+// |1| | (REQUIRED, leb128 encoded)
+// +-+ size |
+// |0| |
+// +-+-+-+-+-+-+-+-+
+// | OBU fragment |
+// | ... |
+// +=+=+=+=+=+=+=+=+
+// | ... |
+// +=+=+=+=+=+=+=+=+ if W > 0, last fragment MUST NOT have size field
+// | OBU fragment |
+// | ... |
+// +=+=+=+=+=+=+=+=+
+//
+//
+// OBU syntax:
+// 0 1 2 3 4 5 6 7
+// +-+-+-+-+-+-+-+-+
+// |0| type |X|S|-| (REQUIRED)
+// +-+-+-+-+-+-+-+-+
+// X: | TID |SID|-|-|-| (OPTIONAL)
+// +-+-+-+-+-+-+-+-+
+// |1| |
+// +-+ OBU payload |
+// S: |1| | (OPTIONAL, variable length leb128 encoded)
+// +-+ size |
+// |0| |
+// +-+-+-+-+-+-+-+-+
+// | OBU payload |
+// | ... |
+constexpr int kObuTypeSequenceHeader = 1;
+
+int ObuType(uint8_t obu_header) {
+ return (obu_header & 0b0'1111'000u) >> 3;
+}
+
+bool RtpStartsWithFragment(uint8_t aggregation_header) {
+ return aggregation_header & 0b1000'0000u;
+}
+bool RtpEndsWithFragment(uint8_t aggregation_header) {
+ return aggregation_header & 0b0100'0000u;
+}
+int RtpNumObus(uint8_t aggregation_header) { // 0 for any number of obus.
+ return (aggregation_header & 0b0011'0000u) >> 4;
+}
+
+} // namespace
+
+bool RtpDepacketizerAv1::Parse(ParsedPayload* parsed_payload,
+ const uint8_t* payload_data,
+ size_t payload_data_length) {
+ RTC_DCHECK(parsed_payload);
+ if (payload_data_length == 0) {
+ RTC_DLOG(LS_ERROR) << "Empty rtp payload.";
+ return false;
+ }
+ // To assemble frame, all of the rtp payload is required, including
+ // aggregation header.
+ parsed_payload->payload = payload_data;
+ parsed_payload->payload_length = payload_data_length;
+
+ rtc::ByteBufferReader payload(reinterpret_cast<const char*>(payload_data),
+ payload_data_length);
+ uint8_t aggregation_header;
+ RTC_CHECK(payload.ReadUInt8(&aggregation_header));
+
+ // TODO(danilchap): Set AV1 codec when there is such enum value
+ parsed_payload->video.codec = VideoCodecType::kVideoCodecGeneric;
+ // These are not accurate since frame may consist of several packet aligned
+ // chunks of obus, but should be good enough for most cases. It might produce
+ // frame that do not map to any real frame, but av1 decoder should be able to
+ // handle it since it promise to handle individual obus rather than full
+ // frames.
+ parsed_payload->video.is_first_packet_in_frame =
+ !RtpStartsWithFragment(aggregation_header);
+ parsed_payload->video.is_last_packet_in_frame =
+ !RtpEndsWithFragment(aggregation_header);
+ parsed_payload->video.frame_type = VideoFrameType::kVideoFrameDelta;
+ // If packet starts a frame, check if it contains Sequence Header OBU.
+ // In that case treat it as key frame packet.
+ if (parsed_payload->video.is_first_packet_in_frame) {
+ int num_expected_obus = RtpNumObus(aggregation_header);
+
+ // The only OBU that can preceed SequenceHeader is a TemporalDelimiter OBU,
+ // so check no more than two OBUs while searching for SH.
+ for (int obu_index = 1; payload.Length() > 0 && obu_index <= 2;
+ ++obu_index) {
+ uint64_t fragment_size;
+ // When num_expected_obus > 0, last OBU (fragment) is not preceeded by
+ // the size field. See W field in
+ // https://aomediacodec.github.io/av1-rtp-spec/#43-av1-aggregation-header
+ bool has_fragment_size = (obu_index != num_expected_obus);
+ if (has_fragment_size) {
+ if (!payload.ReadUVarint(&fragment_size)) {
+ RTC_DLOG(LS_WARNING)
+ << "Failed to read OBU fragment size for OBU#" << obu_index;
+ return false;
+ }
+ if (fragment_size > payload.Length()) {
+ RTC_DLOG(LS_WARNING) << "OBU fragment size " << fragment_size
+ << " exceeds remaining payload size "
+ << payload.Length() << " for OBU#" << obu_index;
+ // Malformed input: written size is larger than remaining buffer.
+ return false;
+ }
+ } else {
+ fragment_size = payload.Length();
+ }
+ // Though it is inpractical to pass empty fragments, it is allowed.
+ if (fragment_size == 0) {
+ RTC_LOG(LS_WARNING)
+ << "Weird obu of size 0 at offset "
+ << (payload_data_length - payload.Length()) << ", skipping.";
+ continue;
+ }
+ uint8_t obu_header = *reinterpret_cast<const uint8_t*>(payload.Data());
+ if (ObuType(obu_header) == kObuTypeSequenceHeader) {
+ // TODO(bugs.webrtc.org/11042): Check frame_header OBU and/or frame OBU
+ // too for other conditions of the start of a new coded video sequence.
+ // For proper checks checking single packet might not be enough. See
+ // https://aomediacodec.github.io/av1-spec/av1-spec.pdf section 7.5
+ parsed_payload->video.frame_type = VideoFrameType::kVideoFrameKey;
+ break;
+ }
+ payload.Consume(fragment_size);
+ }
+ }
+
+ return true;
+}
+
+} // namespace webrtc
diff --git a/modules/rtp_rtcp/source/rtp_depacketizer_av1.h b/modules/rtp_rtcp/source/rtp_depacketizer_av1.h
new file mode 100644
index 0000000..e4a6dce
--- /dev/null
+++ b/modules/rtp_rtcp/source/rtp_depacketizer_av1.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_RTP_RTCP_SOURCE_RTP_DEPACKETIZER_AV1_H_
+#define MODULES_RTP_RTCP_SOURCE_RTP_DEPACKETIZER_AV1_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "modules/rtp_rtcp/source/rtp_format.h"
+
+namespace webrtc {
+
+class RtpDepacketizerAv1 : public RtpDepacketizer {
+ public:
+ RtpDepacketizerAv1() = default;
+ RtpDepacketizerAv1(const RtpDepacketizerAv1&) = delete;
+ RtpDepacketizerAv1& operator=(const RtpDepacketizerAv1&) = delete;
+ ~RtpDepacketizerAv1() override = default;
+
+ bool Parse(ParsedPayload* parsed_payload,
+ const uint8_t* payload_data,
+ size_t payload_data_length) override;
+};
+
+} // namespace webrtc
+#endif // MODULES_RTP_RTCP_SOURCE_RTP_DEPACKETIZER_AV1_H_
diff --git a/modules/rtp_rtcp/source/rtp_depacketizer_av1_unittest.cc b/modules/rtp_rtcp/source/rtp_depacketizer_av1_unittest.cc
new file mode 100644
index 0000000..2520f74
--- /dev/null
+++ b/modules/rtp_rtcp/source/rtp_depacketizer_av1_unittest.cc
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/rtp_rtcp/source/rtp_depacketizer_av1.h"
+
+#include "test/gtest.h"
+
+namespace webrtc {
+namespace {
+// Signals number of the OBU (fragments) in the packet.
+constexpr uint8_t kObuCountAny = 0b0000'0000;
+constexpr uint8_t kObuCountOne = 0b0001'0000;
+constexpr uint8_t kObuCountTwo = 0b0010'0000;
+
+constexpr uint8_t kObuHeaderSequenceHeader = 0b0'0001'000;
+constexpr uint8_t kObuHeaderTemporalDelimiter = 0b0'0010'000;
+constexpr uint8_t kObuHeaderFrame = 0b0'0110'000;
+
+TEST(RtpDepacketizerAv1Test, ParsePassFullRtpPayloadAsCodecPayload) {
+ const uint8_t packet[] = {(uint8_t{1} << 7) | kObuCountOne, 1, 2, 3, 4};
+ RtpDepacketizerAv1 depacketizer;
+ RtpDepacketizer::ParsedPayload parsed;
+ ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+ EXPECT_EQ(parsed.payload_length, sizeof(packet));
+ EXPECT_TRUE(parsed.payload == packet);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseTreatsContinuationFlagAsNotBeginningOfFrame) {
+ const uint8_t packet[] = {
+ (uint8_t{1} << 7) | kObuCountOne,
+ kObuHeaderFrame}; // Value doesn't matter since it is a
+ // continuation of the OBU from previous packet.
+ RtpDepacketizerAv1 depacketizer;
+ RtpDepacketizer::ParsedPayload parsed;
+ ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+ EXPECT_FALSE(parsed.video.is_first_packet_in_frame);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseTreatsNoContinuationFlagAsBeginningOfFrame) {
+ const uint8_t packet[] = {(uint8_t{0} << 7) | kObuCountOne, kObuHeaderFrame};
+ RtpDepacketizerAv1 depacketizer;
+ RtpDepacketizer::ParsedPayload parsed;
+ ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+ EXPECT_TRUE(parsed.video.is_first_packet_in_frame);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseTreatsWillContinueFlagAsNotEndOfFrame) {
+ const uint8_t packet[] = {(uint8_t{1} << 6) | kObuCountOne, kObuHeaderFrame};
+ RtpDepacketizerAv1 depacketizer;
+ RtpDepacketizer::ParsedPayload parsed;
+ ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+ EXPECT_FALSE(parsed.video.is_last_packet_in_frame);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseTreatsNoWillContinueFlagAsEndOfFrame) {
+ const uint8_t packet[] = {(uint8_t{0} << 6) | kObuCountOne, kObuHeaderFrame};
+ RtpDepacketizerAv1 depacketizer;
+ RtpDepacketizer::ParsedPayload parsed;
+ ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+ EXPECT_TRUE(parsed.video.is_last_packet_in_frame);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseTreatsStartOfSequenceHeaderAsKeyFrame) {
+ const uint8_t packet[] = {kObuCountOne, kObuHeaderSequenceHeader};
+ RtpDepacketizerAv1 depacketizer;
+ RtpDepacketizer::ParsedPayload parsed;
+ ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+ EXPECT_TRUE(parsed.video.is_first_packet_in_frame);
+ EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseTreatsNotStartOfFrameAsDeltaFrame) {
+ const uint8_t packet[] = {
+ (uint8_t{1} << 7) | kObuCountOne,
+ // Byte that look like start of sequence header, but since it is not start
+ // of an OBU, it is actually not a start of sequence header.
+ kObuHeaderSequenceHeader};
+ RtpDepacketizerAv1 depacketizer;
+ RtpDepacketizer::ParsedPayload parsed;
+ ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+ EXPECT_FALSE(parsed.video.is_first_packet_in_frame);
+ EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameDelta);
+}
+
+TEST(RtpDepacketizerAv1Test,
+ ParseTreatsStartOfFrameWithoutSequenceHeaderAsDeltaFrame) {
+ const uint8_t packet[] = {kObuCountOne, kObuHeaderFrame};
+ RtpDepacketizerAv1 depacketizer;
+ RtpDepacketizer::ParsedPayload parsed;
+ ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+ EXPECT_TRUE(parsed.video.is_first_packet_in_frame);
+ EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameDelta);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseFindsSequenceHeaderBehindFragmentSize1) {
+ const uint8_t packet[] = {kObuCountAny,
+ 1, // size of the next fragment
+ kObuHeaderSequenceHeader};
+ RtpDepacketizerAv1 depacketizer;
+ RtpDepacketizer::ParsedPayload parsed;
+ ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+ EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseFindsSequenceHeaderBehindFragmentSize2) {
+ const uint8_t packet[] = {kObuCountTwo,
+ 2, // size of the next fragment
+ kObuHeaderSequenceHeader,
+ 42, // SH payload.
+ kObuHeaderFrame};
+ RtpDepacketizerAv1 depacketizer;
+ RtpDepacketizer::ParsedPayload parsed;
+ ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+ EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey);
+}
+
+TEST(RtpDepacketizerAv1Test,
+ ParseFindsSequenceHeaderBehindMultiByteFragmentSize) {
+ const uint8_t packet[] = {kObuCountTwo,
+ 0b1000'0101, // leb128 encoded value of 5
+ 0b1000'0000, // using 3 bytes
+ 0b0000'0000, // to encode the value.
+ kObuHeaderSequenceHeader,
+ 8, // 4 bytes of SH payload.
+ 0,
+ 0,
+ 0,
+ kObuHeaderFrame};
+ RtpDepacketizerAv1 depacketizer;
+ RtpDepacketizer::ParsedPayload parsed;
+ ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+ EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseFindsSequenceHeaderBehindTemporalDelimiter) {
+ const uint8_t packet[] = {kObuCountTwo,
+ 1, // size of the next fragment
+ kObuHeaderTemporalDelimiter,
+ kObuHeaderSequenceHeader,
+ 8, // 4 bytes of SH payload.
+ 0,
+ 0,
+ 0};
+ RtpDepacketizerAv1 depacketizer;
+ RtpDepacketizer::ParsedPayload parsed;
+ ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+ EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey);
+}
+
+TEST(RtpDepacketizerAv1Test,
+ ParseFindsSequenceHeaderBehindTemporalDelimiterAndSize) {
+ const uint8_t packet[] = {kObuCountAny,
+ 1, // size of the next fragment
+ kObuHeaderTemporalDelimiter,
+ 5, // size of the next fragment
+ kObuHeaderSequenceHeader,
+ 8, // 4 bytes of SH payload.
+ 0,
+ 0,
+ 0,
+ 1, // size of the next fragment
+ kObuHeaderFrame};
+ RtpDepacketizerAv1 depacketizer;
+ RtpDepacketizer::ParsedPayload parsed;
+ ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+ EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseSkipsEmptyFragments) {
+ static_assert(kObuHeaderSequenceHeader == 8, "");
+ const uint8_t packet[] = {kObuCountAny,
+ 0, // size of the next fragment
+ 8, // size of the next fragment that look like SH
+ kObuHeaderFrame,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7};
+ RtpDepacketizerAv1 depacketizer;
+ RtpDepacketizer::ParsedPayload parsed;
+ ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+ EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameDelta);
+}
+
+} // namespace
+} // namespace webrtc
diff --git a/test/fuzzers/BUILD.gn b/test/fuzzers/BUILD.gn
index 9bd8cef..b1723e9 100644
--- a/test/fuzzers/BUILD.gn
+++ b/test/fuzzers/BUILD.gn
@@ -537,6 +537,15 @@
]
}
+webrtc_fuzzer_test("rtp_depacketizer_av1_parse_fuzzer") {
+ sources = [
+ "rtp_depacketizer_av1_parse_fuzzer.cc",
+ ]
+ deps = [
+ "../../modules/rtp_rtcp",
+ ]
+}
+
webrtc_fuzzer_test("rtp_dependency_descriptor_fuzzer") {
sources = [
"rtp_dependency_descriptor_fuzzer.cc",
diff --git a/test/fuzzers/rtp_depacketizer_av1_parse_fuzzer.cc b/test/fuzzers/rtp_depacketizer_av1_parse_fuzzer.cc
new file mode 100644
index 0000000..d46860a
--- /dev/null
+++ b/test/fuzzers/rtp_depacketizer_av1_parse_fuzzer.cc
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/rtp_rtcp/source/rtp_depacketizer_av1.h"
+
+namespace webrtc {
+void FuzzOneInput(const uint8_t* data, size_t size) {
+ RtpDepacketizerAv1 depacketizer;
+ RtpDepacketizer::ParsedPayload parsed_payload;
+ depacketizer.Parse(&parsed_payload, data, size);
+}
+} // namespace webrtc