Add AV1 RtpDepacketizer class

Implement Parse function that extracts is_first_packet_in_frame,
is_last_packet_in_frame, and frame_type fields.

Bug: webrtc:11042
Change-Id: I9360ea52ef274281b5c5e4c31955100b92155bfe
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/159180
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29814}
diff --git a/modules/rtp_rtcp/BUILD.gn b/modules/rtp_rtcp/BUILD.gn
index 55cda86..0a1dc4b 100644
--- a/modules/rtp_rtcp/BUILD.gn
+++ b/modules/rtp_rtcp/BUILD.gn
@@ -165,6 +165,8 @@
     "source/rtcp_receiver.h",
     "source/rtcp_sender.cc",
     "source/rtcp_sender.h",
+    "source/rtp_depacketizer_av1.cc",
+    "source/rtp_depacketizer_av1.h",
     "source/rtp_format.cc",
     "source/rtp_format.h",
     "source/rtp_format_h264.cc",
@@ -442,6 +444,7 @@
       "source/rtcp_sender_unittest.cc",
       "source/rtcp_transceiver_impl_unittest.cc",
       "source/rtcp_transceiver_unittest.cc",
+      "source/rtp_depacketizer_av1_unittest.cc",
       "source/rtp_fec_unittest.cc",
       "source/rtp_format_h264_unittest.cc",
       "source/rtp_format_unittest.cc",
diff --git a/modules/rtp_rtcp/source/rtp_depacketizer_av1.cc b/modules/rtp_rtcp/source/rtp_depacketizer_av1.cc
new file mode 100644
index 0000000..cc92526
--- /dev/null
+++ b/modules/rtp_rtcp/source/rtp_depacketizer_av1.cc
@@ -0,0 +1,162 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/rtp_rtcp/source/rtp_depacketizer_av1.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "modules/rtp_rtcp/source/rtp_video_header.h"
+#include "rtc_base/byte_buffer.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+namespace {
+// AV1 format:
+//
+// RTP payload syntax:
+//     0 1 2 3 4 5 6 7
+//    +-+-+-+-+-+-+-+-+
+//    |Z|Y| W |-|-|-|-| (REQUIRED)
+//    +=+=+=+=+=+=+=+=+ (REPEATED W-1 times, or any times if W = 0)
+//    |1|             |
+//    +-+ OBU fragment|
+//    |1|             | (REQUIRED, leb128 encoded)
+//    +-+    size     |
+//    |0|             |
+//    +-+-+-+-+-+-+-+-+
+//    |  OBU fragment |
+//    |     ...       |
+//    +=+=+=+=+=+=+=+=+
+//    |     ...       |
+//    +=+=+=+=+=+=+=+=+ if W > 0, last fragment MUST NOT have size field
+//    |  OBU fragment |
+//    |     ...       |
+//    +=+=+=+=+=+=+=+=+
+//
+//
+// OBU syntax:
+//     0 1 2 3 4 5 6 7
+//    +-+-+-+-+-+-+-+-+
+//    |0| type  |X|S|-| (REQUIRED)
+//    +-+-+-+-+-+-+-+-+
+// X: | TID |SID|-|-|-| (OPTIONAL)
+//    +-+-+-+-+-+-+-+-+
+//    |1|             |
+//    +-+ OBU payload |
+// S: |1|             | (OPTIONAL, variable length leb128 encoded)
+//    +-+    size     |
+//    |0|             |
+//    +-+-+-+-+-+-+-+-+
+//    |  OBU payload  |
+//    |     ...       |
+constexpr int kObuTypeSequenceHeader = 1;
+
+int ObuType(uint8_t obu_header) {
+  return (obu_header & 0b0'1111'000u) >> 3;
+}
+
+bool RtpStartsWithFragment(uint8_t aggregation_header) {
+  return aggregation_header & 0b1000'0000u;
+}
+bool RtpEndsWithFragment(uint8_t aggregation_header) {
+  return aggregation_header & 0b0100'0000u;
+}
+int RtpNumObus(uint8_t aggregation_header) {  // 0 for any number of obus.
+  return (aggregation_header & 0b0011'0000u) >> 4;
+}
+
+}  // namespace
+
+bool RtpDepacketizerAv1::Parse(ParsedPayload* parsed_payload,
+                               const uint8_t* payload_data,
+                               size_t payload_data_length) {
+  RTC_DCHECK(parsed_payload);
+  if (payload_data_length == 0) {
+    RTC_DLOG(LS_ERROR) << "Empty rtp payload.";
+    return false;
+  }
+  // To assemble frame, all of the rtp payload is required, including
+  // aggregation header.
+  parsed_payload->payload = payload_data;
+  parsed_payload->payload_length = payload_data_length;
+
+  rtc::ByteBufferReader payload(reinterpret_cast<const char*>(payload_data),
+                                payload_data_length);
+  uint8_t aggregation_header;
+  RTC_CHECK(payload.ReadUInt8(&aggregation_header));
+
+  // TODO(danilchap): Set AV1 codec when there is such enum value
+  parsed_payload->video.codec = VideoCodecType::kVideoCodecGeneric;
+  // These are not accurate since frame may consist of several packet aligned
+  // chunks of obus, but should be good enough for most cases. It might produce
+  // frame that do not map to any real frame, but av1 decoder should be able to
+  // handle it since it promise to handle individual obus rather than full
+  // frames.
+  parsed_payload->video.is_first_packet_in_frame =
+      !RtpStartsWithFragment(aggregation_header);
+  parsed_payload->video.is_last_packet_in_frame =
+      !RtpEndsWithFragment(aggregation_header);
+  parsed_payload->video.frame_type = VideoFrameType::kVideoFrameDelta;
+  // If packet starts a frame, check if it contains Sequence Header OBU.
+  // In that case treat it as key frame packet.
+  if (parsed_payload->video.is_first_packet_in_frame) {
+    int num_expected_obus = RtpNumObus(aggregation_header);
+
+    // The only OBU that can preceed SequenceHeader is a TemporalDelimiter OBU,
+    // so check no more than two OBUs while searching for SH.
+    for (int obu_index = 1; payload.Length() > 0 && obu_index <= 2;
+         ++obu_index) {
+      uint64_t fragment_size;
+      // When num_expected_obus > 0, last OBU (fragment) is not preceeded by
+      // the size field. See W field in
+      // https://aomediacodec.github.io/av1-rtp-spec/#43-av1-aggregation-header
+      bool has_fragment_size = (obu_index != num_expected_obus);
+      if (has_fragment_size) {
+        if (!payload.ReadUVarint(&fragment_size)) {
+          RTC_DLOG(LS_WARNING)
+              << "Failed to read OBU fragment size for OBU#" << obu_index;
+          return false;
+        }
+        if (fragment_size > payload.Length()) {
+          RTC_DLOG(LS_WARNING) << "OBU fragment size " << fragment_size
+                               << " exceeds remaining payload size "
+                               << payload.Length() << " for OBU#" << obu_index;
+          // Malformed input: written size is larger than remaining buffer.
+          return false;
+        }
+      } else {
+        fragment_size = payload.Length();
+      }
+      // Though it is inpractical to pass empty fragments, it is allowed.
+      if (fragment_size == 0) {
+        RTC_LOG(LS_WARNING)
+            << "Weird obu of size 0 at offset "
+            << (payload_data_length - payload.Length()) << ", skipping.";
+        continue;
+      }
+      uint8_t obu_header = *reinterpret_cast<const uint8_t*>(payload.Data());
+      if (ObuType(obu_header) == kObuTypeSequenceHeader) {
+        // TODO(bugs.webrtc.org/11042): Check frame_header OBU and/or frame OBU
+        // too for other conditions of the start of a new coded video sequence.
+        // For proper checks checking single packet might not be enough. See
+        // https://aomediacodec.github.io/av1-spec/av1-spec.pdf section 7.5
+        parsed_payload->video.frame_type = VideoFrameType::kVideoFrameKey;
+        break;
+      }
+      payload.Consume(fragment_size);
+    }
+  }
+
+  return true;
+}
+
+}  // namespace webrtc
diff --git a/modules/rtp_rtcp/source/rtp_depacketizer_av1.h b/modules/rtp_rtcp/source/rtp_depacketizer_av1.h
new file mode 100644
index 0000000..e4a6dce
--- /dev/null
+++ b/modules/rtp_rtcp/source/rtp_depacketizer_av1.h
@@ -0,0 +1,34 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_RTP_RTCP_SOURCE_RTP_DEPACKETIZER_AV1_H_
+#define MODULES_RTP_RTCP_SOURCE_RTP_DEPACKETIZER_AV1_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "modules/rtp_rtcp/source/rtp_format.h"
+
+namespace webrtc {
+
+class RtpDepacketizerAv1 : public RtpDepacketizer {
+ public:
+  RtpDepacketizerAv1() = default;
+  RtpDepacketizerAv1(const RtpDepacketizerAv1&) = delete;
+  RtpDepacketizerAv1& operator=(const RtpDepacketizerAv1&) = delete;
+  ~RtpDepacketizerAv1() override = default;
+
+  bool Parse(ParsedPayload* parsed_payload,
+             const uint8_t* payload_data,
+             size_t payload_data_length) override;
+};
+
+}  // namespace webrtc
+#endif  // MODULES_RTP_RTCP_SOURCE_RTP_DEPACKETIZER_AV1_H_
diff --git a/modules/rtp_rtcp/source/rtp_depacketizer_av1_unittest.cc b/modules/rtp_rtcp/source/rtp_depacketizer_av1_unittest.cc
new file mode 100644
index 0000000..2520f74
--- /dev/null
+++ b/modules/rtp_rtcp/source/rtp_depacketizer_av1_unittest.cc
@@ -0,0 +1,196 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/rtp_rtcp/source/rtp_depacketizer_av1.h"
+
+#include "test/gtest.h"
+
+namespace webrtc {
+namespace {
+// Signals number of the OBU (fragments) in the packet.
+constexpr uint8_t kObuCountAny = 0b0000'0000;
+constexpr uint8_t kObuCountOne = 0b0001'0000;
+constexpr uint8_t kObuCountTwo = 0b0010'0000;
+
+constexpr uint8_t kObuHeaderSequenceHeader = 0b0'0001'000;
+constexpr uint8_t kObuHeaderTemporalDelimiter = 0b0'0010'000;
+constexpr uint8_t kObuHeaderFrame = 0b0'0110'000;
+
+TEST(RtpDepacketizerAv1Test, ParsePassFullRtpPayloadAsCodecPayload) {
+  const uint8_t packet[] = {(uint8_t{1} << 7) | kObuCountOne, 1, 2, 3, 4};
+  RtpDepacketizerAv1 depacketizer;
+  RtpDepacketizer::ParsedPayload parsed;
+  ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+  EXPECT_EQ(parsed.payload_length, sizeof(packet));
+  EXPECT_TRUE(parsed.payload == packet);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseTreatsContinuationFlagAsNotBeginningOfFrame) {
+  const uint8_t packet[] = {
+      (uint8_t{1} << 7) | kObuCountOne,
+      kObuHeaderFrame};  // Value doesn't matter since it is a
+                         // continuation of the OBU from previous packet.
+  RtpDepacketizerAv1 depacketizer;
+  RtpDepacketizer::ParsedPayload parsed;
+  ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+  EXPECT_FALSE(parsed.video.is_first_packet_in_frame);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseTreatsNoContinuationFlagAsBeginningOfFrame) {
+  const uint8_t packet[] = {(uint8_t{0} << 7) | kObuCountOne, kObuHeaderFrame};
+  RtpDepacketizerAv1 depacketizer;
+  RtpDepacketizer::ParsedPayload parsed;
+  ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+  EXPECT_TRUE(parsed.video.is_first_packet_in_frame);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseTreatsWillContinueFlagAsNotEndOfFrame) {
+  const uint8_t packet[] = {(uint8_t{1} << 6) | kObuCountOne, kObuHeaderFrame};
+  RtpDepacketizerAv1 depacketizer;
+  RtpDepacketizer::ParsedPayload parsed;
+  ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+  EXPECT_FALSE(parsed.video.is_last_packet_in_frame);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseTreatsNoWillContinueFlagAsEndOfFrame) {
+  const uint8_t packet[] = {(uint8_t{0} << 6) | kObuCountOne, kObuHeaderFrame};
+  RtpDepacketizerAv1 depacketizer;
+  RtpDepacketizer::ParsedPayload parsed;
+  ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+  EXPECT_TRUE(parsed.video.is_last_packet_in_frame);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseTreatsStartOfSequenceHeaderAsKeyFrame) {
+  const uint8_t packet[] = {kObuCountOne, kObuHeaderSequenceHeader};
+  RtpDepacketizerAv1 depacketizer;
+  RtpDepacketizer::ParsedPayload parsed;
+  ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+  EXPECT_TRUE(parsed.video.is_first_packet_in_frame);
+  EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseTreatsNotStartOfFrameAsDeltaFrame) {
+  const uint8_t packet[] = {
+      (uint8_t{1} << 7) | kObuCountOne,
+      // Byte that look like start of sequence header, but since it is not start
+      // of an OBU, it is actually not a start of sequence header.
+      kObuHeaderSequenceHeader};
+  RtpDepacketizerAv1 depacketizer;
+  RtpDepacketizer::ParsedPayload parsed;
+  ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+  EXPECT_FALSE(parsed.video.is_first_packet_in_frame);
+  EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameDelta);
+}
+
+TEST(RtpDepacketizerAv1Test,
+     ParseTreatsStartOfFrameWithoutSequenceHeaderAsDeltaFrame) {
+  const uint8_t packet[] = {kObuCountOne, kObuHeaderFrame};
+  RtpDepacketizerAv1 depacketizer;
+  RtpDepacketizer::ParsedPayload parsed;
+  ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+  EXPECT_TRUE(parsed.video.is_first_packet_in_frame);
+  EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameDelta);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseFindsSequenceHeaderBehindFragmentSize1) {
+  const uint8_t packet[] = {kObuCountAny,
+                            1,  // size of the next fragment
+                            kObuHeaderSequenceHeader};
+  RtpDepacketizerAv1 depacketizer;
+  RtpDepacketizer::ParsedPayload parsed;
+  ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+  EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseFindsSequenceHeaderBehindFragmentSize2) {
+  const uint8_t packet[] = {kObuCountTwo,
+                            2,  // size of the next fragment
+                            kObuHeaderSequenceHeader,
+                            42,  // SH payload.
+                            kObuHeaderFrame};
+  RtpDepacketizerAv1 depacketizer;
+  RtpDepacketizer::ParsedPayload parsed;
+  ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+  EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey);
+}
+
+TEST(RtpDepacketizerAv1Test,
+     ParseFindsSequenceHeaderBehindMultiByteFragmentSize) {
+  const uint8_t packet[] = {kObuCountTwo,
+                            0b1000'0101,  // leb128 encoded value of 5
+                            0b1000'0000,  // using 3 bytes
+                            0b0000'0000,  // to encode the value.
+                            kObuHeaderSequenceHeader,
+                            8,  // 4 bytes of SH payload.
+                            0,
+                            0,
+                            0,
+                            kObuHeaderFrame};
+  RtpDepacketizerAv1 depacketizer;
+  RtpDepacketizer::ParsedPayload parsed;
+  ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+  EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseFindsSequenceHeaderBehindTemporalDelimiter) {
+  const uint8_t packet[] = {kObuCountTwo,
+                            1,  // size of the next fragment
+                            kObuHeaderTemporalDelimiter,
+                            kObuHeaderSequenceHeader,
+                            8,  // 4 bytes of SH payload.
+                            0,
+                            0,
+                            0};
+  RtpDepacketizerAv1 depacketizer;
+  RtpDepacketizer::ParsedPayload parsed;
+  ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+  EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey);
+}
+
+TEST(RtpDepacketizerAv1Test,
+     ParseFindsSequenceHeaderBehindTemporalDelimiterAndSize) {
+  const uint8_t packet[] = {kObuCountAny,
+                            1,  // size of the next fragment
+                            kObuHeaderTemporalDelimiter,
+                            5,  // size of the next fragment
+                            kObuHeaderSequenceHeader,
+                            8,  // 4 bytes of SH payload.
+                            0,
+                            0,
+                            0,
+                            1,  // size of the next fragment
+                            kObuHeaderFrame};
+  RtpDepacketizerAv1 depacketizer;
+  RtpDepacketizer::ParsedPayload parsed;
+  ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+  EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey);
+}
+
+TEST(RtpDepacketizerAv1Test, ParseSkipsEmptyFragments) {
+  static_assert(kObuHeaderSequenceHeader == 8, "");
+  const uint8_t packet[] = {kObuCountAny,
+                            0,  // size of the next fragment
+                            8,  // size of the next fragment that look like SH
+                            kObuHeaderFrame,
+                            1,
+                            2,
+                            3,
+                            4,
+                            5,
+                            6,
+                            7};
+  RtpDepacketizerAv1 depacketizer;
+  RtpDepacketizer::ParsedPayload parsed;
+  ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+  EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameDelta);
+}
+
+}  // namespace
+}  // namespace webrtc
diff --git a/test/fuzzers/BUILD.gn b/test/fuzzers/BUILD.gn
index 9bd8cef..b1723e9 100644
--- a/test/fuzzers/BUILD.gn
+++ b/test/fuzzers/BUILD.gn
@@ -537,6 +537,15 @@
   ]
 }
 
+webrtc_fuzzer_test("rtp_depacketizer_av1_parse_fuzzer") {
+  sources = [
+    "rtp_depacketizer_av1_parse_fuzzer.cc",
+  ]
+  deps = [
+    "../../modules/rtp_rtcp",
+  ]
+}
+
 webrtc_fuzzer_test("rtp_dependency_descriptor_fuzzer") {
   sources = [
     "rtp_dependency_descriptor_fuzzer.cc",
diff --git a/test/fuzzers/rtp_depacketizer_av1_parse_fuzzer.cc b/test/fuzzers/rtp_depacketizer_av1_parse_fuzzer.cc
new file mode 100644
index 0000000..d46860a
--- /dev/null
+++ b/test/fuzzers/rtp_depacketizer_av1_parse_fuzzer.cc
@@ -0,0 +1,18 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/rtp_rtcp/source/rtp_depacketizer_av1.h"
+
+namespace webrtc {
+void FuzzOneInput(const uint8_t* data, size_t size) {
+  RtpDepacketizerAv1 depacketizer;
+  RtpDepacketizer::ParsedPayload parsed_payload;
+  depacketizer.Parse(&parsed_payload, data, size);
+}
+}  // namespace webrtc