Add stereo codec header and pass it through RTP

- Defines CodecSpecificInfoStereo that carries stereo specific header info from
encoded image.
- Defines RTPVideoHeaderStereo that carries the above info to packetizer,
see module_common_types.h.
- Adds an RTPPacketizer and RTPDepacketizer that supports passing specific stereo
header.
- Uses new data containers in StereoAdapter classes.

This CL is the step 3 for adding alpha channel support over the wire in webrtc.
See https://webrtc-review.googlesource.com/c/src/+/7800 for the experimental
CL that gives an idea about how it will come together.
Design Doc: https://goo.gl/sFeSUT

Bug: webrtc:7671
Change-Id: Ia932568fdd7065ba104afd2bc0ecf25a765748ab
Reviewed-on: https://webrtc-review.googlesource.com/22900
Reviewed-by: Emircan Uysaler <emircan@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Reviewed-by: Niklas Enbom <niklas.enbom@webrtc.org>
Commit-Queue: Emircan Uysaler <emircan@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#20920}
diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn
index 9d22006..997ad08 100644
--- a/modules/video_coding/BUILD.gn
+++ b/modules/video_coding/BUILD.gn
@@ -132,6 +132,7 @@
   sources = [
     "codecs/h264/include/h264_globals.h",
     "codecs/interface/common_constants.h",
+    "codecs/stereo/include/stereo_globals.h",
     "codecs/vp8/include/vp8_globals.h",
     "codecs/vp9/include/vp9_globals.h",
   ]
diff --git a/modules/video_coding/codec_database.cc b/modules/video_coding/codec_database.cc
index 2bc3077..e300ad3 100644
--- a/modules/video_coding/codec_database.cc
+++ b/modules/video_coding/codec_database.cc
@@ -254,6 +254,7 @@
     case kVideoCodecRED:
     case kVideoCodecULPFEC:
     case kVideoCodecFlexfec:
+    case kVideoCodecStereo:
       break;
     // Unknown codec type, reset just to be sure.
     case kVideoCodecUnknown:
diff --git a/modules/video_coding/codecs/stereo/include/stereo_encoder_adapter.h b/modules/video_coding/codecs/stereo/include/stereo_encoder_adapter.h
index ef1e9e1..74ed1a2 100644
--- a/modules/video_coding/codecs/stereo/include/stereo_encoder_adapter.h
+++ b/modules/video_coding/codecs/stereo/include/stereo_encoder_adapter.h
@@ -11,6 +11,7 @@
 #ifndef MODULES_VIDEO_CODING_CODECS_STEREO_INCLUDE_STEREO_ENCODER_ADAPTER_H_
 #define MODULES_VIDEO_CODING_CODECS_STEREO_INCLUDE_STEREO_ENCODER_ADAPTER_H_
 
+#include <map>
 #include <memory>
 #include <vector>
 
@@ -56,15 +57,16 @@
   // Wrapper class that redirects OnEncodedImage() calls.
   class AdapterEncodedImageCallback;
 
-  // Holds the encoded image output of a frame.
-  struct EncodedImageData;
-
   VideoEncoderFactory* const factory_;
   std::vector<std::unique_ptr<VideoEncoder>> encoders_;
   std::vector<std::unique_ptr<AdapterEncodedImageCallback>> adapter_callbacks_;
   EncodedImageCallback* encoded_complete_callback_;
 
-  uint64_t picture_index_ = 0;
+  // Holds the encoded image info.
+  struct ImageStereoInfo;
+  std::map<uint32_t /* timestamp */, ImageStereoInfo> image_stereo_info_;
+
+  uint16_t picture_index_ = 0;
   std::vector<uint8_t> stereo_dummy_planes_;
 };
 
diff --git a/modules/video_coding/codecs/stereo/include/stereo_globals.h b/modules/video_coding/codecs/stereo/include/stereo_globals.h
new file mode 100644
index 0000000..9f9ad0e
--- /dev/null
+++ b/modules/video_coding/codecs/stereo/include/stereo_globals.h
@@ -0,0 +1,24 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_VIDEO_CODING_CODECS_STEREO_INCLUDE_STEREO_GLOBALS_H_
+#define MODULES_VIDEO_CODING_CODECS_STEREO_INCLUDE_STEREO_GLOBALS_H_
+
+namespace webrtc {
+
+struct StereoIndices {
+  uint8_t frame_index;
+  uint8_t frame_count;
+  uint16_t picture_index;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_VIDEO_CODING_CODECS_STEREO_INCLUDE_STEREO_GLOBALS_H_
diff --git a/modules/video_coding/codecs/stereo/stereo_decoder_adapter.cc b/modules/video_coding/codecs/stereo/stereo_decoder_adapter.cc
index 82e87d4..caf3299 100644
--- a/modules/video_coding/codecs/stereo/stereo_decoder_adapter.cc
+++ b/modules/video_coding/codecs/stereo/stereo_decoder_adapter.cc
@@ -33,18 +33,18 @@
                               AlphaCodecStream stream_idx)
       : adapter_(adapter), stream_idx_(stream_idx) {}
 
-  void Decoded(VideoFrame& decodedImage,
+  void Decoded(VideoFrame& decoded_image,
                rtc::Optional<int32_t> decode_time_ms,
                rtc::Optional<uint8_t> qp) override {
     if (!adapter_)
       return;
-    adapter_->Decoded(stream_idx_, &decodedImage, decode_time_ms, qp);
+    adapter_->Decoded(stream_idx_, &decoded_image, decode_time_ms, qp);
   }
-  int32_t Decoded(VideoFrame& decodedImage) override {
+  int32_t Decoded(VideoFrame& decoded_image) override {
     RTC_NOTREACHED();
     return WEBRTC_VIDEO_CODEC_OK;
   }
-  int32_t Decoded(VideoFrame& decodedImage, int64_t decode_time_ms) override {
+  int32_t Decoded(VideoFrame& decoded_image, int64_t decode_time_ms) override {
     RTC_NOTREACHED();
     return WEBRTC_VIDEO_CODEC_OK;
   }
@@ -57,22 +57,22 @@
 struct StereoDecoderAdapter::DecodedImageData {
   explicit DecodedImageData(AlphaCodecStream stream_idx)
       : stream_idx_(stream_idx),
-        decodedImage_(I420Buffer::Create(1 /* width */, 1 /* height */),
-                      0,
-                      0,
-                      kVideoRotation_0) {
+        decoded_image_(I420Buffer::Create(1 /* width */, 1 /* height */),
+                       0,
+                       0,
+                       kVideoRotation_0) {
     RTC_DCHECK_EQ(kAXXStream, stream_idx);
   }
   DecodedImageData(AlphaCodecStream stream_idx,
-                   const VideoFrame& decodedImage,
+                   const VideoFrame& decoded_image,
                    const rtc::Optional<int32_t>& decode_time_ms,
                    const rtc::Optional<uint8_t>& qp)
       : stream_idx_(stream_idx),
-        decodedImage_(decodedImage),
+        decoded_image_(decoded_image),
         decode_time_ms_(decode_time_ms),
         qp_(qp) {}
   const AlphaCodecStream stream_idx_;
-  VideoFrame decodedImage_;
+  VideoFrame decoded_image_;
   const rtc::Optional<int32_t> decode_time_ms_;
   const rtc::Optional<uint8_t> qp_;
 
@@ -113,14 +113,21 @@
     const RTPFragmentationHeader* /*fragmentation*/,
     const CodecSpecificInfo* codec_specific_info,
     int64_t render_time_ms) {
-  // TODO(emircan): Read |codec_specific_info->stereoInfo| to split frames.
-  int32_t rv =
-      decoders_[kYUVStream]->Decode(input_image, missing_frames, nullptr,
-                                    codec_specific_info, render_time_ms);
-  if (rv)
-    return rv;
-  rv = decoders_[kAXXStream]->Decode(input_image, missing_frames, nullptr,
-                                     codec_specific_info, render_time_ms);
+  const CodecSpecificInfoStereo& stereo_info =
+      codec_specific_info->codecSpecific.stereo;
+  RTC_DCHECK_LT(static_cast<size_t>(stereo_info.indices.frame_index),
+                decoders_.size());
+  if (stereo_info.indices.frame_count == 1) {
+    RTC_DCHECK_EQ(static_cast<int>(stereo_info.indices.frame_index), 0);
+    RTC_DCHECK(decoded_data_.find(input_image._timeStamp) ==
+               decoded_data_.end());
+    decoded_data_.emplace(std::piecewise_construct,
+                          std::forward_as_tuple(input_image._timeStamp),
+                          std::forward_as_tuple(kAXXStream));
+  }
+
+  int32_t rv = decoders_[stereo_info.indices.frame_index]->Decode(
+      input_image, missing_frames, nullptr, nullptr, render_time_ms);
   return rv;
 }
 
@@ -152,12 +159,12 @@
     if (stream_idx == kYUVStream) {
       RTC_DCHECK_EQ(kAXXStream, other_image_data.stream_idx_);
       MergeAlphaImages(decoded_image, decode_time_ms, qp,
-                       &other_image_data.decodedImage_,
+                       &other_image_data.decoded_image_,
                        other_image_data.decode_time_ms_, other_image_data.qp_);
     } else {
       RTC_DCHECK_EQ(kYUVStream, other_image_data.stream_idx_);
       RTC_DCHECK_EQ(kAXXStream, stream_idx);
-      MergeAlphaImages(&other_image_data.decodedImage_,
+      MergeAlphaImages(&other_image_data.decoded_image_,
                        other_image_data.decode_time_ms_, other_image_data.qp_,
                        decoded_image, decode_time_ms, qp);
     }
@@ -166,6 +173,8 @@
   }
   RTC_DCHECK(decoded_data_.find(decoded_image->timestamp()) ==
              decoded_data_.end());
+  // decoded_data_[decoded_image->timestamp()] =
+  //     DecodedImageData(stream_idx, *decoded_image, decode_time_ms, qp);
   decoded_data_.emplace(
       std::piecewise_construct,
       std::forward_as_tuple(decoded_image->timestamp()),
@@ -173,16 +182,21 @@
 }
 
 void StereoDecoderAdapter::MergeAlphaImages(
-    VideoFrame* decodedImage,
+    VideoFrame* decoded_image,
     const rtc::Optional<int32_t>& decode_time_ms,
     const rtc::Optional<uint8_t>& qp,
-    VideoFrame* alpha_decodedImage,
+    VideoFrame* alpha_decoded_image,
     const rtc::Optional<int32_t>& alpha_decode_time_ms,
     const rtc::Optional<uint8_t>& alpha_qp) {
+  if (!alpha_decoded_image->timestamp()) {
+    decoded_complete_callback_->Decoded(*decoded_image, decode_time_ms, qp);
+    return;
+  }
+
   rtc::scoped_refptr<webrtc::I420BufferInterface> yuv_buffer =
-      decodedImage->video_frame_buffer()->ToI420();
+      decoded_image->video_frame_buffer()->ToI420();
   rtc::scoped_refptr<webrtc::I420BufferInterface> alpha_buffer =
-      alpha_decodedImage->video_frame_buffer()->ToI420();
+      alpha_decoded_image->video_frame_buffer()->ToI420();
   RTC_DCHECK_EQ(yuv_buffer->width(), alpha_buffer->width());
   RTC_DCHECK_EQ(yuv_buffer->height(), alpha_buffer->height());
   rtc::scoped_refptr<I420ABufferInterface> merged_buffer = WrapI420ABuffer(
@@ -192,8 +206,8 @@
       alpha_buffer->StrideY(),
       rtc::Bind(&KeepBufferRefs, yuv_buffer, alpha_buffer));
 
-  VideoFrame merged_image(merged_buffer, decodedImage->timestamp(),
-                          0 /* render_time_ms */, decodedImage->rotation());
+  VideoFrame merged_image(merged_buffer, decoded_image->timestamp(),
+                          0 /* render_time_ms */, decoded_image->rotation());
   decoded_complete_callback_->Decoded(merged_image, decode_time_ms, qp);
 }
 
diff --git a/modules/video_coding/codecs/stereo/stereo_encoder_adapter.cc b/modules/video_coding/codecs/stereo/stereo_encoder_adapter.cc
index b449c68..ed7a486 100644
--- a/modules/video_coding/codecs/stereo/stereo_encoder_adapter.cc
+++ b/modules/video_coding/codecs/stereo/stereo_encoder_adapter.cc
@@ -44,6 +44,20 @@
   const AlphaCodecStream stream_idx_;
 };
 
+// Holds the encoded image info.
+struct StereoEncoderAdapter::ImageStereoInfo {
+  ImageStereoInfo(uint16_t picture_index, uint8_t frame_count)
+      : picture_index(picture_index),
+        frame_count(frame_count),
+        encoded_count(0) {}
+  uint16_t picture_index;
+  uint8_t frame_count;
+  uint8_t encoded_count;
+
+ private:
+  RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(ImageStereoInfo);
+};
+
 StereoEncoderAdapter::StereoEncoderAdapter(VideoEncoderFactory* factory)
     : factory_(factory), encoded_complete_callback_(nullptr) {}
 
@@ -83,15 +97,21 @@
   if (!encoded_complete_callback_) {
     return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
   }
-  // Encode YUV
-  int rv = encoders_[kYUVStream]->Encode(input_image, codec_specific_info,
-                                         frame_types);
-  if (rv)
-    return rv;
 
   const bool has_alpha = input_image.video_frame_buffer()->type() ==
                          VideoFrameBuffer::Type::kI420A;
-  if (!has_alpha)
+  image_stereo_info_.emplace(
+      std::piecewise_construct, std::forward_as_tuple(input_image.timestamp()),
+      std::forward_as_tuple(picture_index_++,
+                            has_alpha ? kAlphaCodecStreams : 1));
+
+  // Encode YUV
+  int rv = encoders_[kYUVStream]->Encode(input_image, codec_specific_info,
+                                         frame_types);
+  // If we do not receive an alpha frame, we send a single frame for this
+  // |picture_index_|. The receiver will receive |frame_count| as 1 which
+  // soecifies this case.
+  if (rv || !has_alpha)
     return rv;
 
   // Encode AXX
@@ -129,7 +149,7 @@
 int StereoEncoderAdapter::SetRateAllocation(const BitrateAllocation& bitrate,
                                             uint32_t framerate) {
   for (auto& encoder : encoders_) {
-    // TODO(emircan): |new_framerate| is used to calculate duration for encoder
+    // TODO(emircan): |framerate| is used to calculate duration in encoder
     // instances. We report the total frame rate to keep real time for now.
     // Remove this after refactoring duration logic.
     const int rv = encoder->SetRateAllocation(
@@ -160,11 +180,25 @@
     const EncodedImage& encodedImage,
     const CodecSpecificInfo* codecSpecificInfo,
     const RTPFragmentationHeader* fragmentation) {
-  if (stream_idx == kAXXStream)
-    return EncodedImageCallback::Result(EncodedImageCallback::Result::OK);
+  const VideoCodecType associated_coded_type = codecSpecificInfo->codecType;
+  const auto& image_stereo_info_itr =
+      image_stereo_info_.find(encodedImage._timeStamp);
+  RTC_DCHECK(image_stereo_info_itr != image_stereo_info_.end());
+  ImageStereoInfo& image_stereo_info = image_stereo_info_itr->second;
+  const uint8_t frame_count = image_stereo_info.frame_count;
+  const uint16_t picture_index = image_stereo_info.picture_index;
+  if (++image_stereo_info.encoded_count == frame_count)
+    image_stereo_info_.erase(image_stereo_info_itr);
 
-  // TODO(emircan): Fill |codec_specific_info| with stereo parameters.
-  encoded_complete_callback_->OnEncodedImage(encodedImage, codecSpecificInfo,
+  CodecSpecificInfo codec_info = *codecSpecificInfo;
+  codec_info.codecType = kVideoCodecStereo;
+  codec_info.codec_name = "stereo";
+  codec_info.codecSpecific.stereo.associated_codec_type = associated_coded_type;
+  codec_info.codecSpecific.stereo.indices.frame_index = stream_idx;
+  codec_info.codecSpecific.stereo.indices.frame_count = frame_count;
+  codec_info.codecSpecific.stereo.indices.picture_index = picture_index;
+
+  encoded_complete_callback_->OnEncodedImage(encodedImage, &codec_info,
                                              fragmentation);
   return EncodedImageCallback::Result(EncodedImageCallback::Result::OK);
 }
diff --git a/modules/video_coding/codecs/stereo/test/stereo_adapter_unittest.cc b/modules/video_coding/codecs/stereo/test/stereo_adapter_unittest.cc
index caef8b1..34723c5 100644
--- a/modules/video_coding/codecs/stereo/test/stereo_adapter_unittest.cc
+++ b/modules/video_coding/codecs/stereo/test/stereo_adapter_unittest.cc
@@ -101,8 +101,18 @@
   EncodedImage encoded_frame;
   CodecSpecificInfo codec_specific_info;
   ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
-            decoder_->Decode(encoded_frame, false, nullptr));
+
+  EXPECT_EQ(kVideoCodecStereo, codec_specific_info.codecType);
+  EXPECT_EQ(kVideoCodecVP9,
+            codec_specific_info.codecSpecific.stereo.associated_codec_type);
+  EXPECT_EQ(0, codec_specific_info.codecSpecific.stereo.indices.frame_index);
+  EXPECT_EQ(1, codec_specific_info.codecSpecific.stereo.indices.frame_count);
+  EXPECT_EQ(0ull,
+            codec_specific_info.codecSpecific.stereo.indices.picture_index);
+
+  EXPECT_EQ(
+      WEBRTC_VIDEO_CODEC_OK,
+      decoder_->Decode(encoded_frame, false, nullptr, &codec_specific_info));
   std::unique_ptr<VideoFrame> decoded_frame;
   rtc::Optional<uint8_t> decoded_qp;
   ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp));
@@ -112,13 +122,38 @@
 
 TEST_F(TestStereoAdapter, EncodeDecodeI420AFrame) {
   std::unique_ptr<VideoFrame> yuva_frame = CreateI420AInputFrame();
+  const size_t expected_num_encoded_frames = 2;
+  SetWaitForEncodedFramesThreshold(expected_num_encoded_frames);
   EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
             encoder_->Encode(*yuva_frame, nullptr, nullptr));
-  EncodedImage encoded_frame;
-  CodecSpecificInfo codec_specific_info;
-  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
-            decoder_->Decode(encoded_frame, false, nullptr));
+  std::vector<EncodedImage> encoded_frames;
+  std::vector<CodecSpecificInfo> codec_specific_infos;
+  ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_infos));
+  EXPECT_EQ(expected_num_encoded_frames, encoded_frames.size());
+  EXPECT_EQ(expected_num_encoded_frames, codec_specific_infos.size());
+
+  const CodecSpecificInfo& yuv_info = codec_specific_infos[kYUVStream];
+  EXPECT_EQ(kVideoCodecStereo, yuv_info.codecType);
+  EXPECT_EQ(kVideoCodecVP9,
+            yuv_info.codecSpecific.stereo.associated_codec_type);
+  EXPECT_EQ(kYUVStream, yuv_info.codecSpecific.stereo.indices.frame_index);
+  EXPECT_EQ(kAlphaCodecStreams,
+            yuv_info.codecSpecific.stereo.indices.frame_count);
+  EXPECT_EQ(0ull, yuv_info.codecSpecific.stereo.indices.picture_index);
+
+  const CodecSpecificInfo& axx_info = codec_specific_infos[kAXXStream];
+  EXPECT_EQ(kVideoCodecStereo, axx_info.codecType);
+  EXPECT_EQ(kVideoCodecVP9,
+            axx_info.codecSpecific.stereo.associated_codec_type);
+  EXPECT_EQ(kAXXStream, axx_info.codecSpecific.stereo.indices.frame_index);
+  EXPECT_EQ(kAlphaCodecStreams,
+            axx_info.codecSpecific.stereo.indices.frame_count);
+  EXPECT_EQ(0ull, axx_info.codecSpecific.stereo.indices.picture_index);
+
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frames[kYUVStream],
+                                                    false, nullptr, &yuv_info));
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frames[kAXXStream],
+                                                    false, nullptr, &axx_info));
   std::unique_ptr<VideoFrame> decoded_frame;
   rtc::Optional<uint8_t> decoded_qp;
   ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp));
diff --git a/modules/video_coding/codecs/test/video_codec_test.cc b/modules/video_coding/codecs/test/video_codec_test.cc
index b6faae8..88e9b39 100644
--- a/modules/video_coding/codecs/test/video_codec_test.cc
+++ b/modules/video_coding/codecs/test/video_codec_test.cc
@@ -33,13 +33,19 @@
     const CodecSpecificInfo* codec_specific_info,
     const RTPFragmentationHeader* fragmentation) {
   rtc::CritScope lock(&test_->encoded_frame_section_);
-  test_->encoded_frame_.emplace(frame);
+  test_->encoded_frames_.push_back(frame);
   RTC_DCHECK(codec_specific_info);
-  test_->codec_specific_info_.codecType = codec_specific_info->codecType;
-  // Skip |codec_name|, to avoid allocating.
-  test_->codec_specific_info_.codecSpecific =
-      codec_specific_info->codecSpecific;
-  test_->encoded_frame_event_.Set();
+  test_->codec_specific_infos_.push_back(*codec_specific_info);
+  if (!test_->wait_for_encoded_frames_threshold_) {
+    test_->encoded_frame_event_.Set();
+    return Result(Result::OK);
+  }
+
+  if (test_->encoded_frames_.size() ==
+      test_->wait_for_encoded_frames_threshold_) {
+    test_->wait_for_encoded_frames_threshold_ = 1;
+    test_->encoded_frame_event_.Set();
+  }
   return Result(Result::OK);
 }
 
@@ -74,17 +80,38 @@
 bool VideoCodecTest::WaitForEncodedFrame(
     EncodedImage* frame,
     CodecSpecificInfo* codec_specific_info) {
-  bool ret = encoded_frame_event_.Wait(kEncodeTimeoutMs);
-  EXPECT_TRUE(ret) << "Timed out while waiting for an encoded frame.";
+  std::vector<EncodedImage> frames;
+  std::vector<CodecSpecificInfo> codec_specific_infos;
+  if (!WaitForEncodedFrames(&frames, &codec_specific_infos))
+    return false;
+  EXPECT_EQ(frames.size(), static_cast<size_t>(1));
+  EXPECT_EQ(frames.size(), codec_specific_infos.size());
+  *frame = frames[0];
+  *codec_specific_info = codec_specific_infos[0];
+  return true;
+}
+
+void VideoCodecTest::SetWaitForEncodedFramesThreshold(size_t num_frames) {
+  rtc::CritScope lock(&encoded_frame_section_);
+  wait_for_encoded_frames_threshold_ = num_frames;
+}
+
+bool VideoCodecTest::WaitForEncodedFrames(
+    std::vector<EncodedImage>* frames,
+    std::vector<CodecSpecificInfo>* codec_specific_info) {
+  EXPECT_TRUE(encoded_frame_event_.Wait(kEncodeTimeoutMs))
+      << "Timed out while waiting for encoded frame.";
   // This becomes unsafe if there are multiple threads waiting for frames.
   rtc::CritScope lock(&encoded_frame_section_);
-  EXPECT_TRUE(encoded_frame_);
-  if (encoded_frame_) {
-    *frame = std::move(*encoded_frame_);
-    encoded_frame_.reset();
-    RTC_DCHECK(codec_specific_info);
-    codec_specific_info->codecType = codec_specific_info_.codecType;
-    codec_specific_info->codecSpecific = codec_specific_info_.codecSpecific;
+  EXPECT_FALSE(encoded_frames_.empty());
+  EXPECT_FALSE(codec_specific_infos_.empty());
+  EXPECT_EQ(encoded_frames_.size(), codec_specific_infos_.size());
+  if (!encoded_frames_.empty()) {
+    *frames = encoded_frames_;
+    encoded_frames_.clear();
+    RTC_DCHECK(!codec_specific_infos_.empty());
+    *codec_specific_info = codec_specific_infos_;
+    codec_specific_infos_.clear();
     return true;
   } else {
     return false;
diff --git a/modules/video_coding/codecs/test/video_codec_test.h b/modules/video_coding/codecs/test/video_codec_test.h
index 76f5234..6c67a26 100644
--- a/modules/video_coding/codecs/test/video_codec_test.h
+++ b/modules/video_coding/codecs/test/video_codec_test.h
@@ -12,6 +12,7 @@
 #define MODULES_VIDEO_CODING_CODECS_TEST_VIDEO_CODEC_TEST_H_
 
 #include <memory>
+#include <vector>
 
 #include "api/video_codecs/video_decoder.h"
 #include "api/video_codecs/video_encoder.h"
@@ -32,6 +33,7 @@
         decode_complete_callback_(this),
         encoded_frame_event_(false /* manual reset */,
                              false /* initially signaled */),
+        wait_for_encoded_frames_threshold_(1),
         decoded_frame_event_(false /* manual reset */,
                              false /* initially signaled */) {}
 
@@ -74,8 +76,19 @@
 
   void SetUp() override;
 
+  // Helper method for waiting a single encoded frame.
   bool WaitForEncodedFrame(EncodedImage* frame,
                            CodecSpecificInfo* codec_specific_info);
+
+  // Helper methods for waiting for multiple encoded frames. Caller must
+  // define how many frames are to be waited for via |num_frames| before calling
+  // Encode(). Then, they can expect to retrive them via WaitForEncodedFrames().
+  void SetWaitForEncodedFramesThreshold(size_t num_frames);
+  bool WaitForEncodedFrames(
+      std::vector<EncodedImage>* frames,
+      std::vector<CodecSpecificInfo>* codec_specific_info);
+
+  // Helper method for waiting a single decoded frame.
   bool WaitForDecodedFrame(std::unique_ptr<VideoFrame>* frame,
                            rtc::Optional<uint8_t>* qp);
 
@@ -95,9 +108,11 @@
 
   rtc::Event encoded_frame_event_;
   rtc::CriticalSection encoded_frame_section_;
-  rtc::Optional<EncodedImage> encoded_frame_
+  size_t wait_for_encoded_frames_threshold_;
+  std::vector<EncodedImage> encoded_frames_
       RTC_GUARDED_BY(encoded_frame_section_);
-  CodecSpecificInfo codec_specific_info_ RTC_GUARDED_BY(encoded_frame_section_);
+  std::vector<CodecSpecificInfo> codec_specific_infos_
+      RTC_GUARDED_BY(encoded_frame_section_);
 
   rtc::Event decoded_frame_event_;
   rtc::CriticalSection decoded_frame_section_;
diff --git a/modules/video_coding/encoded_frame.cc b/modules/video_coding/encoded_frame.cc
index 8abea7e..2d8716c 100644
--- a/modules/video_coding/encoded_frame.cc
+++ b/modules/video_coding/encoded_frame.cc
@@ -193,6 +193,28 @@
         _codecSpecificInfo.codecType = kVideoCodecH264;
         break;
       }
+      case kRtpVideoStereo: {
+        _codecSpecificInfo.codecType = kVideoCodecStereo;
+        VideoCodecType associated_codec_type = kVideoCodecUnknown;
+        switch (header->codecHeader.stereo.associated_codec_type) {
+          case kRtpVideoVp8:
+            associated_codec_type = kVideoCodecVP8;
+            break;
+          case kRtpVideoVp9:
+            associated_codec_type = kVideoCodecVP9;
+            break;
+          case kRtpVideoH264:
+            associated_codec_type = kVideoCodecH264;
+            break;
+          default:
+            RTC_NOTREACHED();
+        }
+        _codecSpecificInfo.codecSpecific.stereo.associated_codec_type =
+            associated_codec_type;
+        _codecSpecificInfo.codecSpecific.stereo.indices =
+            header->codecHeader.stereo.indices;
+        break;
+      }
       default: {
         _codecSpecificInfo.codecType = kVideoCodecUnknown;
         break;
diff --git a/modules/video_coding/frame_object.cc b/modules/video_coding/frame_object.cc
index 6a31cfd..6eb28de 100644
--- a/modules/video_coding/frame_object.cc
+++ b/modules/video_coding/frame_object.cc
@@ -43,9 +43,14 @@
   frame_type_ = first_packet->frameType;
   codec_type_ = first_packet->codec;
 
+  // Stereo codec appends CopyCodecSpecific to last packet to avoid copy.
+  VCMPacket* packet_with_codec_specific =
+      codec_type_ == kVideoCodecStereo ? packet_buffer_->GetPacket(last_seq_num)
+                                       : first_packet;
+
   // TODO(philipel): Remove when encoded image is replaced by FrameObject.
   // VCMEncodedFrame members
-  CopyCodecSpecific(&first_packet->video_header);
+  CopyCodecSpecific(&packet_with_codec_specific->video_header);
   _completeFrame = true;
   _payloadType = first_packet->payloadType;
   _timeStamp = first_packet->timestamp;
diff --git a/modules/video_coding/include/video_codec_interface.h b/modules/video_coding/include/video_codec_interface.h
index 6616053..ef52d8bf 100644
--- a/modules/video_coding/include/video_codec_interface.h
+++ b/modules/video_coding/include/video_codec_interface.h
@@ -73,11 +73,17 @@
   H264PacketizationMode packetization_mode;
 };
 
+struct CodecSpecificInfoStereo {
+  VideoCodecType associated_codec_type;
+  StereoIndices indices;
+};
+
 union CodecSpecificInfoUnion {
   CodecSpecificInfoGeneric generic;
   CodecSpecificInfoVP8 VP8;
   CodecSpecificInfoVP9 VP9;
   CodecSpecificInfoH264 H264;
+  CodecSpecificInfoStereo stereo;
 };
 
 // Note: if any pointers are added to this struct or its sub-structs, it
diff --git a/modules/video_coding/packet.cc b/modules/video_coding/packet.cc
index f176194..9ae5ba0 100644
--- a/modules/video_coding/packet.cc
+++ b/modules/video_coding/packet.cc
@@ -133,6 +133,9 @@
       }
       codec = kVideoCodecH264;
       return;
+    case kRtpVideoStereo:
+      codec = kVideoCodecStereo;
+      return;
     case kRtpVideoGeneric:
       codec = kVideoCodecGeneric;
       return;
diff --git a/modules/video_coding/rtp_frame_reference_finder.cc b/modules/video_coding/rtp_frame_reference_finder.cc
index a2f32c2..1f8519e 100644
--- a/modules/video_coding/rtp_frame_reference_finder.cc
+++ b/modules/video_coding/rtp_frame_reference_finder.cc
@@ -98,6 +98,7 @@
     case kVideoCodecUnknown:
     case kVideoCodecH264:
     case kVideoCodecI420:
+    case kVideoCodecStereo:
     case kVideoCodecGeneric:
       return ManageFrameGeneric(frame, kNoPictureId);
   }