Search for SPS NALU rather than assuming its position

Summary:
The implementation of H264AnnexBBufferHasVideoFormatDescription was
assuming that the SPS NALU is either the first NALU in the stream, or
the second one, in case an AUD NALU is present in the first location.
This change removes this assumption and instead searches for the SPS
NALU, failing only if we can't find one.

In addition, it cleans up some binary buffer manipulation code, using the
the parsed NALU indices we already have in AnnexBBufferReader instead.

Test Plan: Unit tests

Change-Id: Id9715aa1d751f0ba1a1992def2b690607896df56

bug: webrtc:8922
Change-Id: Id9715aa1d751f0ba1a1992def2b690607896df56
Reviewed-on: https://webrtc-review.googlesource.com/49982
Commit-Queue: Mirko Bonadei <mbonadei@webrtc.org>
Reviewed-by: Kári Helgason <kthelgason@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#22205}
diff --git a/sdk/objc/Framework/Classes/VideoToolbox/RTCVideoDecoderH264.mm b/sdk/objc/Framework/Classes/VideoToolbox/RTCVideoDecoderH264.mm
index e7ce739..b5b4cc9 100644
--- a/sdk/objc/Framework/Classes/VideoToolbox/RTCVideoDecoderH264.mm
+++ b/sdk/objc/Framework/Classes/VideoToolbox/RTCVideoDecoderH264.mm
@@ -120,21 +120,17 @@
     return WEBRTC_VIDEO_CODEC_NO_OUTPUT;
   }
 #endif
-  if (webrtc::H264AnnexBBufferHasVideoFormatDescription((uint8_t *)inputImage.buffer.bytes,
-                                                        inputImage.buffer.length)) {
-    rtc::ScopedCFTypeRef<CMVideoFormatDescriptionRef> inputFormat =
-        rtc::ScopedCF(webrtc::CreateVideoFormatDescription((uint8_t *)inputImage.buffer.bytes,
-                                                           inputImage.buffer.length));
-    if (inputFormat) {
-      // Check if the video format has changed, and reinitialize decoder if
-      // needed.
-      if (!CMFormatDescriptionEqual(inputFormat.get(), _videoFormat)) {
-        [self setVideoFormat:inputFormat.get()];
-
-        int resetDecompressionSessionError = [self resetDecompressionSession];
-        if (resetDecompressionSessionError != WEBRTC_VIDEO_CODEC_OK) {
-          return resetDecompressionSessionError;
-        }
+  rtc::ScopedCFTypeRef<CMVideoFormatDescriptionRef> inputFormat =
+      rtc::ScopedCF(webrtc::CreateVideoFormatDescription((uint8_t *)inputImage.buffer.bytes,
+                                                         inputImage.buffer.length));
+  if (inputFormat) {
+    // Check if the video format has changed, and reinitialize decoder if
+    // needed.
+    if (!CMFormatDescriptionEqual(inputFormat.get(), _videoFormat)) {
+      [self setVideoFormat:inputFormat.get()];
+      int resetDecompressionSessionError = [self resetDecompressionSession];
+      if (resetDecompressionSessionError != WEBRTC_VIDEO_CODEC_OK) {
+        return resetDecompressionSessionError;
       }
     }
   }
diff --git a/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.cc b/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.cc
index f6ee106..625fc53 100644
--- a/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.cc
+++ b/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.cc
@@ -167,11 +167,10 @@
   *out_sample_buffer = nullptr;
 
   AnnexBBufferReader reader(annexb_buffer, annexb_buffer_size);
-  if (H264AnnexBBufferHasVideoFormatDescription(annexb_buffer,
-                                                annexb_buffer_size)) {
-    // Advance past the SPS and PPS.
-    const uint8_t* data = nullptr;
-    size_t data_len = 0;
+  if (reader.SeekToNextNaluOfType(kSps)) {
+    // Buffer contains an SPS NALU - skip it and the following PPS
+    const uint8_t* data;
+    size_t data_len;
     if (!reader.ReadNalu(&data, &data_len)) {
       RTC_LOG(LS_ERROR) << "Failed to read SPS";
       return false;
@@ -180,6 +179,9 @@
       RTC_LOG(LS_ERROR) << "Failed to read PPS";
       return false;
     }
+  } else {
+    // No SPS NALU - start reading from the first NALU in the buffer
+    reader.SeekToStart();
   }
 
   // Allocate memory as a block buffer.
@@ -246,47 +248,15 @@
   return true;
 }
 
-bool H264AnnexBBufferHasVideoFormatDescription(const uint8_t* annexb_buffer,
-                                               size_t annexb_buffer_size) {
-  RTC_DCHECK(annexb_buffer);
-  RTC_DCHECK_GT(annexb_buffer_size, 4);
-
-  // The buffer we receive via RTP has 00 00 00 01 start code artifically
-  // embedded by the RTP depacketizer. Extract NALU information.
-  // TODO(tkchin): handle potential case where sps and pps are delivered
-  // separately.
-  NaluType first_nalu_type = ParseNaluType(annexb_buffer[4]);
-  bool is_first_nalu_type_sps = first_nalu_type == kSps;
-  if (is_first_nalu_type_sps)
-    return true;
-  bool is_first_nalu_type_aud = first_nalu_type == kAud;
-  // Start code + access unit delimiter + start code = 4 + 2 + 4 = 10.
-  if (!is_first_nalu_type_aud || annexb_buffer_size <= 10u)
-    return false;
-  NaluType second_nalu_type = ParseNaluType(annexb_buffer[10]);
-  bool is_second_nalu_type_sps = second_nalu_type == kSps;
-  return is_second_nalu_type_sps;
-}
-
 CMVideoFormatDescriptionRef CreateVideoFormatDescription(
     const uint8_t* annexb_buffer,
     size_t annexb_buffer_size) {
-  if (!H264AnnexBBufferHasVideoFormatDescription(annexb_buffer,
-                                                 annexb_buffer_size)) {
-    return nullptr;
-  }
-  AnnexBBufferReader reader(annexb_buffer, annexb_buffer_size);
-  CMVideoFormatDescriptionRef description = nullptr;
-  OSStatus status = noErr;
-  // Parse the SPS and PPS into a CMVideoFormatDescription.
   const uint8_t* param_set_ptrs[2] = {};
   size_t param_set_sizes[2] = {};
-  // Skip AUD.
-  if (ParseNaluType(annexb_buffer[4]) == kAud) {
-    if (!reader.ReadNalu(&param_set_ptrs[0], &param_set_sizes[0])) {
-      RTC_LOG(LS_ERROR) << "Failed to read AUD";
-      return nullptr;
-    }
+  AnnexBBufferReader reader(annexb_buffer, annexb_buffer_size);
+  // Skip everyting before the SPS, then read the SPS and PPS
+  if (!reader.SeekToNextNaluOfType(kSps)) {
+    return nullptr;
   }
   if (!reader.ReadNalu(&param_set_ptrs[0], &param_set_sizes[0])) {
     RTC_LOG(LS_ERROR) << "Failed to read SPS";
@@ -296,9 +266,11 @@
     RTC_LOG(LS_ERROR) << "Failed to read PPS";
     return nullptr;
   }
-  status = CMVideoFormatDescriptionCreateFromH264ParameterSets(
-      kCFAllocatorDefault, 2, param_set_ptrs, param_set_sizes, 4,
-      &description);
+
+  // Parse the SPS and PPS into a CMVideoFormatDescription.
+  CMVideoFormatDescriptionRef description = nullptr;
+  OSStatus status = CMVideoFormatDescriptionCreateFromH264ParameterSets(
+      kCFAllocatorDefault, 2, param_set_ptrs, param_set_sizes, 4, &description);
   if (status != noErr) {
     RTC_LOG(LS_ERROR) << "Failed to create video format description.";
     return nullptr;
@@ -337,6 +309,19 @@
   return length_ - offset_->start_offset;
 }
 
+void AnnexBBufferReader::SeekToStart() {
+  offset_ = offsets_.begin();
+}
+
+bool AnnexBBufferReader::SeekToNextNaluOfType(NaluType type) {
+  for (; offset_ != offsets_.end(); ++offset_) {
+    if (offset_->payload_size < 1)
+      continue;
+    if (ParseNaluType(*(start_ + offset_->payload_start_offset)) == type)
+      return true;
+  }
+  return false;
+}
 AvccBufferWriter::AvccBufferWriter(uint8_t* const avcc_buffer, size_t length)
     : start_(avcc_buffer), offset_(0), length_(length) {
   RTC_DCHECK(avcc_buffer);
diff --git a/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.h b/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.h
index fed83eb..cd81f12 100644
--- a/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.h
+++ b/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.h
@@ -46,11 +46,6 @@
                                       CMVideoFormatDescriptionRef video_format,
                                       CMSampleBufferRef* out_sample_buffer);
 
-// Returns true if the type of the first NALU in the supplied Annex B buffer is
-// the SPS type.
-bool H264AnnexBBufferHasVideoFormatDescription(const uint8_t* annexb_buffer,
-                                               size_t annexb_buffer_size);
-
 // Returns a video format description created from the sps/pps information in
 // the Annex B buffer. If there is no such information, nullptr is returned.
 // The caller is responsible for releasing the description.
@@ -74,6 +69,15 @@
   // If the buffer has no remaining NALUs this will return zero.
   size_t BytesRemaining() const;
 
+  // Reset the reader to start reading from the first NALU
+  void SeekToStart();
+
+  // Seek to the next position that holds a NALU of the desired type,
+  // or the end if no such NALU is found.
+  // Return true if a NALU of the desired type is found, false if we
+  // reached the end instead
+  bool SeekToNextNaluOfType(H264::NaluType type);
+
  private:
   // Returns the the next offset that contains NALU data.
   size_t FindNextNaluHeader(const uint8_t* start,
diff --git a/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter_unittest.cc b/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter_unittest.cc
index f5d5406..c8de64b 100644
--- a/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter_unittest.cc
+++ b/sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter_unittest.cc
@@ -11,32 +11,18 @@
 
 #include <memory>
 
+#include "common_video/h264/h264_common.h"
 #include "rtc_base/arraysize.h"
 #include "sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.h"
 #include "test/gtest.h"
 
 namespace webrtc {
 
+using H264::kSps;
+
 static const uint8_t NALU_TEST_DATA_0[] = {0xAA, 0xBB, 0xCC};
 static const uint8_t NALU_TEST_DATA_1[] = {0xDE, 0xAD, 0xBE, 0xEF};
 
-TEST(H264VideoToolboxNaluTest, TestHasVideoFormatDescription) {
-  const uint8_t sps_buffer[] = {0x00, 0x00, 0x00, 0x01, 0x27};
-  EXPECT_TRUE(H264AnnexBBufferHasVideoFormatDescription(sps_buffer,
-                                                        arraysize(sps_buffer)));
-  const uint8_t aud_sps_buffer[] = {0x00, 0x00, 0x00, 0x01, 0x29, 0x10,
-                                    0x00, 0x00, 0x00, 0x01, 0x27, 0xFF};
-  EXPECT_TRUE(H264AnnexBBufferHasVideoFormatDescription(
-      aud_sps_buffer, arraysize(aud_sps_buffer)));
-  const uint8_t other_buffer[] = {0x00, 0x00, 0x00, 0x01, 0x28};
-  EXPECT_FALSE(H264AnnexBBufferHasVideoFormatDescription(
-      other_buffer, arraysize(other_buffer)));
-  const uint8_t aud_other_buffer[] = {0x00, 0x00, 0x00, 0x01, 0x29,
-                                      0x00, 0x00, 0x00, 0x01, 0x28};
-  EXPECT_FALSE(H264AnnexBBufferHasVideoFormatDescription(
-      aud_other_buffer, arraysize(aud_other_buffer)));
-}
-
 TEST(H264VideoToolboxNaluTest, TestCreateVideoFormatDescription) {
   const uint8_t sps_pps_buffer[] = {
     // SPS nalu.
@@ -54,6 +40,24 @@
     CFRelease(description);
     description = nullptr;
   }
+
+  const uint8_t sps_pps_not_at_start_buffer[] = {
+      // Add some non-SPS/PPS NALUs at the beginning
+      0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0xFF, 0x00, 0x00, 0x00, 0x01,
+      0xAB, 0x33, 0x21,
+      // SPS nalu.
+      0x00, 0x00, 0x01, 0x27, 0x42, 0x00, 0x1E, 0xAB, 0x40, 0xF0, 0x28, 0xD3,
+      0x70, 0x20, 0x20, 0x20, 0x20,
+      // PPS nalu.
+      0x00, 0x00, 0x01, 0x28, 0xCE, 0x3C, 0x30};
+  description = CreateVideoFormatDescription(
+      sps_pps_not_at_start_buffer, arraysize(sps_pps_not_at_start_buffer));
+  EXPECT_TRUE(description);
+  if (description) {
+    CFRelease(description);
+    description = nullptr;
+  }
+
   const uint8_t other_buffer[] = {0x00, 0x00, 0x00, 0x01, 0x28};
   EXPECT_FALSE(CreateVideoFormatDescription(other_buffer,
                                             arraysize(other_buffer)));
@@ -140,6 +144,34 @@
   EXPECT_EQ(0u, nalu_length);
 }
 
+TEST(AnnexBBufferReaderTest, TestFindNextNaluOfType) {
+  const uint8_t notSps = 0x1F;
+  const uint8_t annex_b_test_data[] = {
+      0x00, 0x00,   0x00, 0x01,   kSps, 0x00,   0x00, 0x01, notSps,
+      0x00, 0x00,   0x01, notSps, 0xDD, 0x00,   0x00, 0x01, notSps,
+      0xEE, 0xFF,   0x00, 0x00,   0x00, 0xFF,   0x00, 0x00, 0x00,
+      0x01, 0x00,   0x00, 0x00,   0x01, kSps,   0xBB, 0x00, 0x00,
+      0x01, notSps, 0x00, 0x00,   0x01, notSps, 0xDD, 0x00, 0x00,
+      0x01, notSps, 0xEE, 0xFF,   0x00, 0x00,   0x00, 0x01};
+
+  AnnexBBufferReader reader(annex_b_test_data, arraysize(annex_b_test_data));
+  const uint8_t* nalu = nullptr;
+  size_t nalu_length = 0;
+  EXPECT_EQ(arraysize(annex_b_test_data), reader.BytesRemaining());
+  EXPECT_TRUE(reader.FindNextNaluOfType(kSps));
+  EXPECT_TRUE(reader.ReadNalu(&nalu, &nalu_length));
+  EXPECT_EQ(annex_b_test_data + 4, nalu);
+  EXPECT_EQ(1u, nalu_length);
+
+  EXPECT_TRUE(reader.FindNextNaluOfType(kSps));
+  EXPECT_TRUE(reader.ReadNalu(&nalu, &nalu_length));
+  EXPECT_EQ(annex_b_test_data + 32, nalu);
+  EXPECT_EQ(2u, nalu_length);
+
+  EXPECT_FALSE(reader.FindNextNaluOfType(kSps));
+  EXPECT_FALSE(reader.ReadNalu(&nalu, &nalu_length));
+}
+
 TEST(AvccBufferWriterTest, TestEmptyOutputBuffer) {
   const uint8_t expected_buffer[] = {0x00};
   const size_t buffer_size = 1;