Add H.264 packetization.

This also includes:
- Creating new packetizer and depacketizer interfaces.
- Moved VP8 packetization was H264 packetization and depacketization to these interfaces. This is a work in progress and should be continued to get this 100% generic. This also required changing the return type for RtpFormatVp8::NextPacket(), which now returns bool instead of the index of the first partition.
- Created a Create() factory method for packetizers and depacketizers.

R=niklas.enbom@webrtc.org, pbos@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/21009004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@6804 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/webrtc/modules/video_coding/codecs/interface/video_codec_interface.h b/webrtc/modules/video_coding/codecs/interface/video_codec_interface.h
index 9776479..e6b1d09 100644
--- a/webrtc/modules/video_coding/codecs/interface/video_codec_interface.h
+++ b/webrtc/modules/video_coding/codecs/interface/video_codec_interface.h
@@ -28,29 +28,30 @@
 
 // Note: if any pointers are added to this struct, it must be fitted
 // with a copy-constructor. See below.
-struct CodecSpecificInfoVP8
-{
-    bool             hasReceivedSLI;
-    uint8_t    pictureIdSLI;
-    bool             hasReceivedRPSI;
-    uint64_t   pictureIdRPSI;
-    int16_t    pictureId;         // negative value to skip pictureId
-    bool             nonReference;
-    uint8_t    simulcastIdx;
-    uint8_t    temporalIdx;
-    bool             layerSync;
-    int              tl0PicIdx;         // Negative value to skip tl0PicIdx
-    int8_t     keyIdx;            // negative value to skip keyIdx
+struct CodecSpecificInfoVP8 {
+  bool hasReceivedSLI;
+  uint8_t pictureIdSLI;
+  bool hasReceivedRPSI;
+  uint64_t pictureIdRPSI;
+  int16_t pictureId;  // Negative value to skip pictureId.
+  bool nonReference;
+  uint8_t simulcastIdx;
+  uint8_t temporalIdx;
+  bool layerSync;
+  int tl0PicIdx;  // Negative value to skip tl0PicIdx.
+  int8_t keyIdx;  // Negative value to skip keyIdx.
 };
 
 struct CodecSpecificInfoGeneric {
   uint8_t simulcast_idx;
 };
 
-union CodecSpecificInfoUnion
-{
-    CodecSpecificInfoGeneric   generic;
-    CodecSpecificInfoVP8       VP8;
+struct CodecSpecificInfoH264 {};
+
+union CodecSpecificInfoUnion {
+  CodecSpecificInfoGeneric generic;
+  CodecSpecificInfoVP8 VP8;
+  CodecSpecificInfoH264 H264;
 };
 
 // Note: if any pointers are added to this struct or its sub-structs, it
diff --git a/webrtc/modules/video_coding/main/source/encoded_frame.cc b/webrtc/modules/video_coding/main/source/encoded_frame.cc
index 3ccf0b0..0d07955 100644
--- a/webrtc/modules/video_coding/main/source/encoded_frame.cc
+++ b/webrtc/modules/video_coding/main/source/encoded_frame.cc
@@ -100,49 +100,45 @@
 
 void VCMEncodedFrame::CopyCodecSpecific(const RTPVideoHeader* header)
 {
-    if (header)
-    {
-        switch (header->codec)
-        {
-            case kRtpVideoVp8:
-            {
-                if (_codecSpecificInfo.codecType != kVideoCodecVP8)
-                {
-                    // This is the first packet for this frame.
-                    _codecSpecificInfo.codecSpecific.VP8.pictureId = -1;
-                    _codecSpecificInfo.codecSpecific.VP8.temporalIdx = 0;
-                    _codecSpecificInfo.codecSpecific.VP8.layerSync = false;
-                    _codecSpecificInfo.codecSpecific.VP8.keyIdx = -1;
-                    _codecSpecificInfo.codecType = kVideoCodecVP8;
-                }
-                _codecSpecificInfo.codecSpecific.VP8.nonReference =
-                    header->codecHeader.VP8.nonReference;
-                if (header->codecHeader.VP8.pictureId != kNoPictureId)
-                {
-                    _codecSpecificInfo.codecSpecific.VP8.pictureId =
-                        header->codecHeader.VP8.pictureId;
-                }
-                if (header->codecHeader.VP8.temporalIdx != kNoTemporalIdx)
-                {
-                    _codecSpecificInfo.codecSpecific.VP8.temporalIdx =
-                        header->codecHeader.VP8.temporalIdx;
-                    _codecSpecificInfo.codecSpecific.VP8.layerSync =
-                        header->codecHeader.VP8.layerSync;
-                }
-                if (header->codecHeader.VP8.keyIdx != kNoKeyIdx)
-                {
-                    _codecSpecificInfo.codecSpecific.VP8.keyIdx =
-                        header->codecHeader.VP8.keyIdx;
-                }
-                break;
-            }
-            default:
-            {
-                _codecSpecificInfo.codecType = kVideoCodecUnknown;
-                break;
-            }
+  if (header) {
+    switch (header->codec) {
+      case kRtpVideoVp8: {
+        if (_codecSpecificInfo.codecType != kVideoCodecVP8) {
+          // This is the first packet for this frame.
+          _codecSpecificInfo.codecSpecific.VP8.pictureId = -1;
+          _codecSpecificInfo.codecSpecific.VP8.temporalIdx = 0;
+          _codecSpecificInfo.codecSpecific.VP8.layerSync = false;
+          _codecSpecificInfo.codecSpecific.VP8.keyIdx = -1;
+          _codecSpecificInfo.codecType = kVideoCodecVP8;
         }
+        _codecSpecificInfo.codecSpecific.VP8.nonReference =
+            header->codecHeader.VP8.nonReference;
+        if (header->codecHeader.VP8.pictureId != kNoPictureId) {
+          _codecSpecificInfo.codecSpecific.VP8.pictureId =
+              header->codecHeader.VP8.pictureId;
+        }
+        if (header->codecHeader.VP8.temporalIdx != kNoTemporalIdx) {
+          _codecSpecificInfo.codecSpecific.VP8.temporalIdx =
+              header->codecHeader.VP8.temporalIdx;
+          _codecSpecificInfo.codecSpecific.VP8.layerSync =
+              header->codecHeader.VP8.layerSync;
+        }
+        if (header->codecHeader.VP8.keyIdx != kNoKeyIdx) {
+          _codecSpecificInfo.codecSpecific.VP8.keyIdx =
+              header->codecHeader.VP8.keyIdx;
+        }
+        break;
+      }
+      case kRtpVideoH264: {
+        _codecSpecificInfo.codecType = kVideoCodecH264;
+        break;
+      }
+      default: {
+        _codecSpecificInfo.codecType = kVideoCodecUnknown;
+        break;
+      }
     }
+  }
 }
 
 const RTPFragmentationHeader* VCMEncodedFrame::FragmentationHeader() const {
diff --git a/webrtc/modules/video_coding/main/source/generic_encoder.cc b/webrtc/modules/video_coding/main/source/generic_encoder.cc
index 6fb2c9f..655f7ac 100644
--- a/webrtc/modules/video_coding/main/source/generic_encoder.cc
+++ b/webrtc/modules/video_coding/main/source/generic_encoder.cc
@@ -38,6 +38,9 @@
       (*rtp)->simulcastIdx = info->codecSpecific.VP8.simulcastIdx;
       return;
     }
+    case kVideoCodecH264:
+      (*rtp)->codec = kRtpVideoH264;
+      return;
     case kVideoCodecGeneric:
       (*rtp)->codec = kRtpVideoGeneric;
       (*rtp)->simulcastIdx = info->codecSpecific.generic.simulcast_idx;
diff --git a/webrtc/modules/video_coding/main/source/jitter_buffer.cc b/webrtc/modules/video_coding/main/source/jitter_buffer.cc
index d8792f2..9aa3409 100644
--- a/webrtc/modules/video_coding/main/source/jitter_buffer.cc
+++ b/webrtc/modules/video_coding/main/source/jitter_buffer.cc
@@ -482,7 +482,6 @@
 
 VCMEncodedFrame* VCMJitterBuffer::ExtractAndSetDecode(uint32_t timestamp) {
   CriticalSectionScoped cs(crit_sect_);
-
   if (!running_) {
     return NULL;
   }
@@ -611,7 +610,6 @@
   if (error != kNoError && frame == NULL) {
     return error;
   }
-
   int64_t now_ms = clock_->TimeInMilliseconds();
   // We are keeping track of the first and latest seq numbers, and
   // the number of wraps to be able to calculate how many packets we expect.
diff --git a/webrtc/modules/video_coding/main/source/packet.cc b/webrtc/modules/video_coding/main/source/packet.cc
index c1f1a04..63dcd63 100644
--- a/webrtc/modules/video_coding/main/source/packet.cc
+++ b/webrtc/modules/video_coding/main/source/packet.cc
@@ -94,33 +94,44 @@
   memset(&codecSpecificHeader, 0, sizeof(RTPVideoHeader));
 }
 
-void VCMPacket::CopyCodecSpecifics(const RTPVideoHeader& videoHeader)
-{
-    switch(videoHeader.codec)
-    {
-        case kRtpVideoVp8:
-            {
-                // Handle all packets within a frame as depending on the previous packet
-                // TODO(holmer): This should be changed to make fragments independent
-                // when the VP8 RTP receiver supports fragments.
-                if (isFirstPacket && markerBit)
-                    completeNALU = kNaluComplete;
-                else if (isFirstPacket)
-                    completeNALU = kNaluStart;
-                else if (markerBit)
-                    completeNALU = kNaluEnd;
-                else
-                    completeNALU = kNaluIncomplete;
+void VCMPacket::CopyCodecSpecifics(const RTPVideoHeader& videoHeader) {
+  switch (videoHeader.codec) {
+    case kRtpVideoVp8:
+      // Handle all packets within a frame as depending on the previous packet
+      // TODO(holmer): This should be changed to make fragments independent
+      // when the VP8 RTP receiver supports fragments.
+      if (isFirstPacket && markerBit)
+        completeNALU = kNaluComplete;
+      else if (isFirstPacket)
+        completeNALU = kNaluStart;
+      else if (markerBit)
+        completeNALU = kNaluEnd;
+      else
+        completeNALU = kNaluIncomplete;
 
-                codec = kVideoCodecVP8;
-                break;
-            }
-        default:
-            {
-                codec = kVideoCodecUnknown;
-                break;
-            }
-    }
+      codec = kVideoCodecVP8;
+      return;
+    case kRtpVideoH264:
+      isFirstPacket = videoHeader.isFirstPacket;
+      if (isFirstPacket)
+        insertStartCode = true;
+
+      if (videoHeader.codecHeader.H264.single_nalu) {
+        completeNALU = kNaluComplete;
+      } else if (isFirstPacket) {
+        completeNALU = kNaluStart;
+      } else if (markerBit) {
+        completeNALU = kNaluEnd;
+      } else {
+        completeNALU = kNaluIncomplete;
+      }
+      codec = kVideoCodecH264;
+      return;
+    case kRtpVideoGeneric:
+    case kRtpVideoNone:
+      codec = kVideoCodecUnknown;
+      return;
+  }
 }
 
-}
+}  // namespace webrtc
diff --git a/webrtc/modules/video_coding/main/source/session_info.cc b/webrtc/modules/video_coding/main/source/session_info.cc
index dab3da1..b50a01a 100644
--- a/webrtc/modules/video_coding/main/source/session_info.cc
+++ b/webrtc/modules/video_coding/main/source/session_info.cc
@@ -14,7 +14,7 @@
 #include "webrtc/system_wrappers/interface/logging.h"
 
 namespace webrtc {
-
+namespace {
 // Used in determining whether a frame is decodable.
 enum {kRttThreshold = 100};  // Not decodable if Rtt is lower than this.
 
@@ -23,6 +23,11 @@
 static const float kLowPacketPercentageThreshold = 0.2f;
 static const float kHighPacketPercentageThreshold = 0.8f;
 
+uint16_t BufferToUWord16(const uint8_t* dataBuffer) {
+  return (dataBuffer[0] << 8) | dataBuffer[1];
+}
+}  // namespace
+
 VCMSessionInfo::VCMSessionInfo()
     : session_nack_(false),
       complete_(false),
@@ -121,9 +126,6 @@
   VCMPacket& packet = *packet_it;
   PacketIterator it;
 
-  int packet_size = packet.sizeBytes;
-  packet_size += (packet.insertStartCode ? kH264StartCodeLengthBytes : 0);
-
   // Calculate the offset into the frame buffer for this packet.
   int offset = 0;
   for (it = packets_.begin(); it != packet_it; ++it)
@@ -131,23 +133,63 @@
 
   // Set the data pointer to pointing to the start of this packet in the
   // frame buffer.
-  const uint8_t* data = packet.dataPtr;
+  const uint8_t* packet_buffer = packet.dataPtr;
   packet.dataPtr = frame_buffer + offset;
-  packet.sizeBytes = packet_size;
 
-  ShiftSubsequentPackets(packet_it, packet_size);
-
-  const unsigned char startCode[] = {0, 0, 0, 1};
-  if (packet.insertStartCode) {
-    memcpy(const_cast<uint8_t*>(packet.dataPtr), startCode,
-           kH264StartCodeLengthBytes);
+  // We handle H.264 STAP-A packets in a special way as we need to remove the
+  // two length bytes between each NAL unit, and potentially add start codes.
+  const size_t kH264NALHeaderLengthInBytes = 1;
+  const size_t kLengthFieldLength = 2;
+  if (packet.codecSpecificHeader.codecHeader.H264.stap_a) {
+    size_t required_length = 0;
+    const uint8_t* nalu_ptr = packet_buffer + kH264NALHeaderLengthInBytes;
+    while (nalu_ptr < packet_buffer + packet.sizeBytes) {
+      uint32_t length = BufferToUWord16(nalu_ptr);
+      required_length +=
+          length + (packet.insertStartCode ? kH264StartCodeLengthBytes : 0);
+      nalu_ptr += kLengthFieldLength + length;
+    }
+    ShiftSubsequentPackets(packet_it, required_length);
+    nalu_ptr = packet_buffer + kH264NALHeaderLengthInBytes;
+    uint8_t* frame_buffer_ptr = frame_buffer + offset;
+    while (nalu_ptr < packet_buffer + packet.sizeBytes) {
+      uint32_t length = BufferToUWord16(nalu_ptr);
+      nalu_ptr += kLengthFieldLength;
+      frame_buffer_ptr += Insert(nalu_ptr,
+                                 length,
+                                 packet.insertStartCode,
+                                 const_cast<uint8_t*>(frame_buffer_ptr));
+      nalu_ptr += length;
+    }
+    packet.sizeBytes = required_length;
+    return packet.sizeBytes;
   }
-  memcpy(const_cast<uint8_t*>(packet.dataPtr
-      + (packet.insertStartCode ? kH264StartCodeLengthBytes : 0)),
-      data,
-      packet.sizeBytes);
+  ShiftSubsequentPackets(
+      packet_it,
+      packet.sizeBytes +
+          (packet.insertStartCode ? kH264StartCodeLengthBytes : 0));
 
-  return packet_size;
+  packet.sizeBytes = Insert(packet_buffer,
+                            packet.sizeBytes,
+                            packet.insertStartCode,
+                            const_cast<uint8_t*>(packet.dataPtr));
+  return packet.sizeBytes;
+}
+
+size_t VCMSessionInfo::Insert(const uint8_t* buffer,
+                              size_t length,
+                              bool insert_start_code,
+                              uint8_t* frame_buffer) {
+  if (insert_start_code) {
+    const unsigned char startCode[] = {0, 0, 0, 1};
+    memcpy(frame_buffer, startCode, kH264StartCodeLengthBytes);
+  }
+  memcpy(frame_buffer + (insert_start_code ? kH264StartCodeLengthBytes : 0),
+         buffer,
+         length);
+  length += (insert_start_code ? kH264StartCodeLengthBytes : 0);
+
+  return length;
 }
 
 void VCMSessionInfo::ShiftSubsequentPackets(PacketIterator it,
@@ -420,34 +462,49 @@
       (*rit).seqNum == packet.seqNum && (*rit).sizeBytes > 0)
     return -2;
 
-  // Only insert media packets between first and last packets (when available).
-  // Placing check here, as to properly account for duplicate packets.
-  // Check if this is first packet (only valid for some codecs)
-  // Should only be set for one packet per session.
-  if (packet.isFirstPacket && first_packet_seq_num_ == -1) {
-    // The first packet in a frame signals the frame type.
+  if (packet.codec == kVideoCodecH264) {
     frame_type_ = packet.frameType;
-    // Store the sequence number for the first packet.
-    first_packet_seq_num_ = static_cast<int>(packet.seqNum);
-  } else if (first_packet_seq_num_ != -1 &&
-        !IsNewerSequenceNumber(packet.seqNum, first_packet_seq_num_)) {
-    LOG(LS_WARNING) << "Received packet with a sequence number which is out of"
-                       "frame boundaries";
-    return -3;
-  } else if (frame_type_ == kFrameEmpty && packet.frameType != kFrameEmpty) {
-    // Update the frame type with the type of the first media packet.
-    // TODO(mikhal): Can this trigger?
-    frame_type_ = packet.frameType;
-  }
+    if (packet.isFirstPacket &&
+        (first_packet_seq_num_ == -1 ||
+         IsNewerSequenceNumber(first_packet_seq_num_, packet.seqNum))) {
+      first_packet_seq_num_ = packet.seqNum;
+    }
+    if (packet.markerBit &&
+        (last_packet_seq_num_ == -1 ||
+         IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_))) {
+      last_packet_seq_num_ = packet.seqNum;
+    }
+  } else {
+    // Only insert media packets between first and last packets (when
+    // available).
+    // Placing check here, as to properly account for duplicate packets.
+    // Check if this is first packet (only valid for some codecs)
+    // Should only be set for one packet per session.
+    if (packet.isFirstPacket && first_packet_seq_num_ == -1) {
+      // The first packet in a frame signals the frame type.
+      frame_type_ = packet.frameType;
+      // Store the sequence number for the first packet.
+      first_packet_seq_num_ = static_cast<int>(packet.seqNum);
+    } else if (first_packet_seq_num_ != -1 &&
+               !IsNewerSequenceNumber(packet.seqNum, first_packet_seq_num_)) {
+      LOG(LS_WARNING) << "Received packet with a sequence number which is out "
+                         "of frame boundaries";
+      return -3;
+    } else if (frame_type_ == kFrameEmpty && packet.frameType != kFrameEmpty) {
+      // Update the frame type with the type of the first media packet.
+      // TODO(mikhal): Can this trigger?
+      frame_type_ = packet.frameType;
+    }
 
-  // Track the marker bit, should only be set for one packet per session.
-  if (packet.markerBit && last_packet_seq_num_ == -1) {
-    last_packet_seq_num_ = static_cast<int>(packet.seqNum);
-  } else if (last_packet_seq_num_ != -1 &&
-      IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_)) {
-    LOG(LS_WARNING) << "Received packet with a sequence number which is out of"
-                       "frame boundaries";
-    return -3;
+    // Track the marker bit, should only be set for one packet per session.
+    if (packet.markerBit && last_packet_seq_num_ == -1) {
+      last_packet_seq_num_ = static_cast<int>(packet.seqNum);
+    } else if (last_packet_seq_num_ != -1 &&
+               IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_)) {
+      LOG(LS_WARNING) << "Received packet with a sequence number which is out "
+                         "of frame boundaries";
+      return -3;
+    }
   }
 
   // The insert operation invalidates the iterator |rit|.
diff --git a/webrtc/modules/video_coding/main/source/session_info.h b/webrtc/modules/video_coding/main/source/session_info.h
index cae3ee1..25216c7 100644
--- a/webrtc/modules/video_coding/main/source/session_info.h
+++ b/webrtc/modules/video_coding/main/source/session_info.h
@@ -116,6 +116,10 @@
                          const PacketIterator& prev_it);
   int InsertBuffer(uint8_t* frame_buffer,
                    PacketIterator packetIterator);
+  size_t Insert(const uint8_t* buffer,
+                size_t length,
+                bool insert_start_code,
+                uint8_t* frame_buffer);
   void ShiftSubsequentPackets(PacketIterator it, int steps_to_shift);
   PacketIterator FindNaluEnd(PacketIterator packet_iter) const;
   // Deletes the data of all packets between |start| and |end|, inclusively.