Add audio streams to CallTest and a first A/V call test.

Add audio send and receive streams to CallTest and call the necessary voice engine APIs for the streams to be usable. Verifies the implementation by adding a simple test which monitors outgoing packets and checks that both audio and video is being sent with transport sequence numbers.

Audio streams are using a fake audio device with file input.

The CallTest implementation is to a big degree based on call_perf_tests.cc and should in the future replace a lot of that code.

R=pbos@webrtc.org
TBR=kjellander@webrtc.org

BUG=webrtc:5263

Review URL: https://codereview.webrtc.org/1542653002 .

Cr-Commit-Position: refs/heads/master@{#11171}
diff --git a/webrtc/call/bitrate_estimator_tests.cc b/webrtc/call/bitrate_estimator_tests.cc
index e371270..4b24bbd 100644
--- a/webrtc/call/bitrate_estimator_tests.cc
+++ b/webrtc/call/bitrate_estimator_tests.cc
@@ -120,17 +120,18 @@
     receive_transport_->SetReceiver(sender_call_->Receiver());
 
     video_send_config_ = VideoSendStream::Config(send_transport_.get());
-    video_send_config_.rtp.ssrcs.push_back(kSendSsrcs[0]);
+    video_send_config_.rtp.ssrcs.push_back(kVideoSendSsrcs[0]);
     // Encoders will be set separately per stream.
     video_send_config_.encoder_settings.encoder = nullptr;
     video_send_config_.encoder_settings.payload_name = "FAKE";
-    video_send_config_.encoder_settings.payload_type = kFakeSendPayloadType;
+    video_send_config_.encoder_settings.payload_type =
+        kFakeVideoSendPayloadType;
     video_encoder_config_.streams = test::CreateVideoStreams(1);
 
     receive_config_ = VideoReceiveStream::Config(receive_transport_.get());
     // receive_config_.decoders will be set by every stream separately.
     receive_config_.rtp.remote_ssrc = video_send_config_.rtp.ssrcs[0];
-    receive_config_.rtp.local_ssrc = kReceiverLocalSsrc;
+    receive_config_.rtp.local_ssrc = kReceiverLocalVideoSsrc;
     receive_config_.rtp.remb = true;
     receive_config_.rtp.extensions.push_back(
         RtpExtension(RtpExtension::kTOffset, kTOFExtensionId));
diff --git a/webrtc/call/call_perf_tests.cc b/webrtc/call/call_perf_tests.cc
index faefc42..79f1ff6 100644
--- a/webrtc/call/call_perf_tests.cc
+++ b/webrtc/call/call_perf_tests.cc
@@ -284,7 +284,7 @@
 
   test::FakeDecoder fake_decoder;
 
-  CreateSendConfig(1, &sync_send_transport);
+  CreateSendConfig(1, 0, &sync_send_transport);
   CreateMatchingReceiveConfigs(&sync_receive_transport);
 
   AudioSendStream::Config audio_send_config(&audio_send_transport);
@@ -318,9 +318,9 @@
   if (create_audio_first) {
     audio_receive_stream =
         receiver_call_->CreateAudioReceiveStream(audio_recv_config);
-    CreateStreams();
+    CreateVideoStreams();
   } else {
-    CreateStreams();
+    CreateVideoStreams();
     audio_receive_stream =
         receiver_call_->CreateAudioReceiveStream(audio_recv_config);
   }
diff --git a/webrtc/call/packet_injection_tests.cc b/webrtc/call/packet_injection_tests.cc
index 315fc7b..277cd3e 100644
--- a/webrtc/call/packet_injection_tests.cc
+++ b/webrtc/call/packet_injection_tests.cc
@@ -40,7 +40,7 @@
   CreateReceiverCall(Call::Config());
 
   test::NullTransport null_transport;
-  CreateSendConfig(1, &null_transport);
+  CreateSendConfig(1, 0, &null_transport);
   CreateMatchingReceiveConfigs(&null_transport);
   video_receive_configs_[0].decoders[0].payload_type = payload_type;
   switch (codec_type) {
@@ -51,11 +51,11 @@
       video_receive_configs_[0].decoders[0].payload_name = "H264";
       break;
   }
-  CreateStreams();
+  CreateVideoStreams();
 
   RTPHeader header;
   EXPECT_TRUE(rtp_header_parser_->Parse(packet, length, &header));
-  EXPECT_EQ(kSendSsrcs[0], header.ssrc)
+  EXPECT_EQ(kVideoSendSsrcs[0], header.ssrc)
       << "Packet should have configured SSRC to not be dropped early.";
   EXPECT_EQ(payload_type, header.payloadType);
   Start();
diff --git a/webrtc/test/call_test.cc b/webrtc/test/call_test.cc
index bbc1224..83fd844 100644
--- a/webrtc/test/call_test.cc
+++ b/webrtc/test/call_test.cc
@@ -7,8 +7,15 @@
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
+#include "webrtc/base/checks.h"
+#include "webrtc/common.h"
+#include "webrtc/config.h"
 #include "webrtc/test/call_test.h"
 #include "webrtc/test/encoder_settings.h"
+#include "webrtc/test/testsupport/fileutils.h"
+#include "webrtc/voice_engine/include/voe_base.h"
+#include "webrtc/voice_engine/include/voe_codec.h"
+#include "webrtc/voice_engine/include/voe_network.h"
 
 namespace webrtc {
 namespace test {
@@ -20,17 +27,40 @@
 CallTest::CallTest()
     : clock_(Clock::GetRealTimeClock()),
       video_send_config_(nullptr),
-      video_send_stream_(NULL),
-      fake_encoder_(clock_) {}
+      video_send_stream_(nullptr),
+      audio_send_config_(nullptr),
+      audio_send_stream_(nullptr),
+      fake_encoder_(clock_),
+      num_video_streams_(0),
+      num_audio_streams_(0),
+      fake_send_audio_device_(nullptr),
+      fake_recv_audio_device_(nullptr) {}
 
 CallTest::~CallTest() {
 }
 
 void CallTest::RunBaseTest(BaseTest* test,
                            const FakeNetworkPipe::Config& config) {
-  CreateSenderCall(test->GetSenderCallConfig());
-  if (test->ShouldCreateReceivers())
-    CreateReceiverCall(test->GetReceiverCallConfig());
+  num_video_streams_ = test->GetNumVideoStreams();
+  num_audio_streams_ = test->GetNumAudioStreams();
+  RTC_DCHECK(num_video_streams_ > 0 || num_audio_streams_ > 0);
+  Call::Config send_config(test->GetSenderCallConfig());
+  if (num_audio_streams_ > 0) {
+    CreateVoiceEngines();
+    AudioState::Config audio_state_config;
+    audio_state_config.voice_engine = voe_send_.voice_engine;
+    send_config.audio_state = AudioState::Create(audio_state_config);
+  }
+  CreateSenderCall(send_config);
+  if (test->ShouldCreateReceivers()) {
+    Call::Config recv_config(test->GetReceiverCallConfig());
+    if (num_audio_streams_ > 0) {
+      AudioState::Config audio_state_config;
+      audio_state_config.voice_engine = voe_recv_.voice_engine;
+      recv_config.audio_state = AudioState::Create(audio_state_config);
+    }
+    CreateReceiverCall(recv_config);
+  }
   send_transport_.reset(new PacketTransport(
       sender_call_.get(), test, test::PacketTransport::kSender, config));
   receive_transport_.reset(new PacketTransport(
@@ -47,14 +77,29 @@
     receive_transport_->SetReceiver(nullptr);
   }
 
-  CreateSendConfig(test->GetNumStreams(), send_transport_.get());
+  CreateSendConfig(num_video_streams_, num_audio_streams_,
+                   send_transport_.get());
   if (test->ShouldCreateReceivers()) {
     CreateMatchingReceiveConfigs(receive_transport_.get());
   }
-  test->ModifyVideoConfigs(&video_send_config_, &video_receive_configs_,
-                           &video_encoder_config_);
-  CreateStreams();
-  test->OnVideoStreamsCreated(video_send_stream_, video_receive_streams_);
+  if (num_audio_streams_ > 0)
+    SetupVoiceEngineTransports(send_transport_.get(), receive_transport_.get());
+
+  if (num_video_streams_ > 0) {
+    test->ModifyVideoConfigs(&video_send_config_, &video_receive_configs_,
+                             &video_encoder_config_);
+  }
+  if (num_audio_streams_ > 0)
+    test->ModifyAudioConfigs(&audio_send_config_, &audio_receive_configs_);
+
+  if (num_video_streams_ > 0) {
+    CreateVideoStreams();
+    test->OnVideoStreamsCreated(video_send_stream_, video_receive_streams_);
+  }
+  if (num_audio_streams_ > 0) {
+    CreateAudioStreams();
+    test->OnAudioStreamsCreated(audio_send_stream_, audio_receive_streams_);
+  }
 
   CreateFrameGeneratorCapturer();
   test->OnFrameGeneratorCapturerCreated(frame_generator_capturer_.get());
@@ -66,12 +111,28 @@
   Stop();
 
   DestroyStreams();
+  DestroyCalls();
+  if (num_audio_streams_ > 0)
+    DestroyVoiceEngines();
 }
 
 void CallTest::Start() {
-  video_send_stream_->Start();
-  for (size_t i = 0; i < video_receive_streams_.size(); ++i)
-    video_receive_streams_[i]->Start();
+  if (video_send_stream_)
+    video_send_stream_->Start();
+  for (VideoReceiveStream* video_recv_stream : video_receive_streams_)
+    video_recv_stream->Start();
+  if (audio_send_stream_) {
+    fake_send_audio_device_->Start();
+    audio_send_stream_->Start();
+    EXPECT_EQ(0, voe_send_.base->StartSend(voe_send_.channel_id));
+  }
+  for (AudioReceiveStream* audio_recv_stream : audio_receive_streams_)
+    audio_recv_stream->Start();
+  if (!audio_receive_streams_.empty()) {
+    fake_recv_audio_device_->Start();
+    EXPECT_EQ(0, voe_recv_.base->StartPlayout(voe_recv_.channel_id));
+    EXPECT_EQ(0, voe_recv_.base->StartReceive(voe_recv_.channel_id));
+  }
   if (frame_generator_capturer_.get() != NULL)
     frame_generator_capturer_->Start();
 }
@@ -79,9 +140,22 @@
 void CallTest::Stop() {
   if (frame_generator_capturer_.get() != NULL)
     frame_generator_capturer_->Stop();
-  for (size_t i = 0; i < video_receive_streams_.size(); ++i)
-    video_receive_streams_[i]->Stop();
-  video_send_stream_->Stop();
+  if (!audio_receive_streams_.empty()) {
+    fake_recv_audio_device_->Stop();
+    EXPECT_EQ(0, voe_recv_.base->StopReceive(voe_recv_.channel_id));
+    EXPECT_EQ(0, voe_recv_.base->StopPlayout(voe_recv_.channel_id));
+  }
+  for (AudioReceiveStream* audio_recv_stream : audio_receive_streams_)
+    audio_recv_stream->Stop();
+  if (audio_send_stream_) {
+    fake_send_audio_device_->Stop();
+    EXPECT_EQ(0, voe_send_.base->StopSend(voe_send_.channel_id));
+    audio_send_stream_->Stop();
+  }
+  for (VideoReceiveStream* video_recv_stream : video_receive_streams_)
+    video_recv_stream->Stop();
+  if (video_send_stream_)
+    video_send_stream_->Stop();
 }
 
 void CallTest::CreateCalls(const Call::Config& sender_config,
@@ -99,44 +173,63 @@
 }
 
 void CallTest::DestroyCalls() {
-  sender_call_.reset(nullptr);
-  receiver_call_.reset(nullptr);
+  sender_call_.reset();
+  receiver_call_.reset();
 }
 
-void CallTest::CreateSendConfig(size_t num_streams,
+void CallTest::CreateSendConfig(size_t num_video_streams,
+                                size_t num_audio_streams,
                                 Transport* send_transport) {
-  assert(num_streams <= kNumSsrcs);
+  RTC_DCHECK(num_video_streams <= kNumSsrcs);
+  RTC_DCHECK_LE(num_audio_streams, 1u);
+  RTC_DCHECK(num_audio_streams == 0 || voe_send_.channel_id >= 0);
   video_send_config_ = VideoSendStream::Config(send_transport);
   video_send_config_.encoder_settings.encoder = &fake_encoder_;
   video_send_config_.encoder_settings.payload_name = "FAKE";
-  video_send_config_.encoder_settings.payload_type = kFakeSendPayloadType;
+  video_send_config_.encoder_settings.payload_type = kFakeVideoSendPayloadType;
   video_send_config_.rtp.extensions.push_back(
       RtpExtension(RtpExtension::kAbsSendTime, kAbsSendTimeExtensionId));
-  video_encoder_config_.streams = test::CreateVideoStreams(num_streams);
-  for (size_t i = 0; i < num_streams; ++i)
-    video_send_config_.rtp.ssrcs.push_back(kSendSsrcs[i]);
+  video_encoder_config_.streams = test::CreateVideoStreams(num_video_streams);
+  for (size_t i = 0; i < num_video_streams; ++i)
+    video_send_config_.rtp.ssrcs.push_back(kVideoSendSsrcs[i]);
   video_send_config_.rtp.extensions.push_back(
       RtpExtension(RtpExtension::kVideoRotation, kVideoRotationRtpExtensionId));
+
+  if (num_audio_streams > 0) {
+    audio_send_config_ = AudioSendStream::Config(send_transport);
+    audio_send_config_.voe_channel_id = voe_send_.channel_id;
+    audio_send_config_.rtp.ssrc = kAudioSendSsrc;
+  }
 }
 
-void CallTest::CreateMatchingReceiveConfigs(
-    Transport* rtcp_send_transport) {
-  assert(!video_send_config_.rtp.ssrcs.empty());
-  assert(video_receive_configs_.empty());
-  assert(allocated_decoders_.empty());
-  VideoReceiveStream::Config config(rtcp_send_transport);
-  config.rtp.remb = true;
-  config.rtp.local_ssrc = kReceiverLocalSsrc;
+void CallTest::CreateMatchingReceiveConfigs(Transport* rtcp_send_transport) {
+  RTC_DCHECK(!video_send_config_.rtp.ssrcs.empty());
+  RTC_DCHECK(video_receive_configs_.empty());
+  RTC_DCHECK(allocated_decoders_.empty());
+  RTC_DCHECK(num_audio_streams_ == 0 || voe_send_.channel_id >= 0);
+  VideoReceiveStream::Config video_config(rtcp_send_transport);
+  video_config.rtp.remb = true;
+  video_config.rtp.local_ssrc = kReceiverLocalVideoSsrc;
   for (const RtpExtension& extension : video_send_config_.rtp.extensions)
-    config.rtp.extensions.push_back(extension);
+    video_config.rtp.extensions.push_back(extension);
   for (size_t i = 0; i < video_send_config_.rtp.ssrcs.size(); ++i) {
     VideoReceiveStream::Decoder decoder =
         test::CreateMatchingDecoder(video_send_config_.encoder_settings);
     allocated_decoders_.push_back(decoder.decoder);
-    config.decoders.clear();
-    config.decoders.push_back(decoder);
-    config.rtp.remote_ssrc = video_send_config_.rtp.ssrcs[i];
-    video_receive_configs_.push_back(config);
+    video_config.decoders.clear();
+    video_config.decoders.push_back(decoder);
+    video_config.rtp.remote_ssrc = video_send_config_.rtp.ssrcs[i];
+    video_receive_configs_.push_back(video_config);
+  }
+
+  RTC_DCHECK(num_audio_streams_ <= 1);
+  if (num_audio_streams_ == 1) {
+    AudioReceiveStream::Config audio_config;
+    audio_config.rtp.local_ssrc = kReceiverLocalAudioSsrc;
+    audio_config.rtcp_send_transport = rtcp_send_transport;
+    audio_config.voe_channel_id = voe_recv_.channel_id;
+    audio_config.rtp.remote_ssrc = audio_send_config_.rtp.ssrc;
+    audio_receive_configs_.push_back(audio_config);
   }
 }
 
@@ -147,41 +240,131 @@
       stream.max_framerate, clock_));
 }
 
-void CallTest::CreateStreams() {
-  assert(video_send_stream_ == NULL);
-  assert(video_receive_streams_.empty());
+void CallTest::CreateFakeAudioDevices() {
+  fake_send_audio_device_.reset(new FakeAudioDevice(
+      clock_, test::ResourcePath("voice_engine/audio_long16", "pcm")));
+  fake_recv_audio_device_.reset(new FakeAudioDevice(
+      clock_, test::ResourcePath("voice_engine/audio_long16", "pcm")));
+}
+
+void CallTest::CreateVideoStreams() {
+  RTC_DCHECK(video_send_stream_ == nullptr);
+  RTC_DCHECK(video_receive_streams_.empty());
+  RTC_DCHECK(audio_send_stream_ == nullptr);
+  RTC_DCHECK(audio_receive_streams_.empty());
 
   video_send_stream_ = sender_call_->CreateVideoSendStream(
       video_send_config_, video_encoder_config_);
-
   for (size_t i = 0; i < video_receive_configs_.size(); ++i) {
     video_receive_streams_.push_back(
         receiver_call_->CreateVideoReceiveStream(video_receive_configs_[i]));
   }
 }
 
+void CallTest::CreateAudioStreams() {
+  audio_send_stream_ = sender_call_->CreateAudioSendStream(audio_send_config_);
+  for (size_t i = 0; i < audio_receive_configs_.size(); ++i) {
+    audio_receive_streams_.push_back(
+        receiver_call_->CreateAudioReceiveStream(audio_receive_configs_[i]));
+  }
+  CodecInst isac = {kAudioSendPayloadType, "ISAC", 16000, 480, 1, 32000};
+  EXPECT_EQ(0, voe_send_.codec->SetSendCodec(voe_send_.channel_id, isac));
+}
+
 void CallTest::DestroyStreams() {
-  if (video_send_stream_ != NULL)
+  if (video_send_stream_)
     sender_call_->DestroyVideoSendStream(video_send_stream_);
-  video_send_stream_ = NULL;
-  for (size_t i = 0; i < video_receive_streams_.size(); ++i)
-    receiver_call_->DestroyVideoReceiveStream(video_receive_streams_[i]);
+  video_send_stream_ = nullptr;
+  for (VideoReceiveStream* video_recv_stream : video_receive_streams_)
+    receiver_call_->DestroyVideoReceiveStream(video_recv_stream);
+
+  if (audio_send_stream_)
+    sender_call_->DestroyAudioSendStream(audio_send_stream_);
+  audio_send_stream_ = nullptr;
+  for (AudioReceiveStream* audio_recv_stream : audio_receive_streams_)
+    receiver_call_->DestroyAudioReceiveStream(audio_recv_stream);
   video_receive_streams_.clear();
+
   allocated_decoders_.clear();
 }
 
+void CallTest::CreateVoiceEngines() {
+  CreateFakeAudioDevices();
+  voe_send_.voice_engine = VoiceEngine::Create();
+  voe_send_.base = VoEBase::GetInterface(voe_send_.voice_engine);
+  voe_send_.network = VoENetwork::GetInterface(voe_send_.voice_engine);
+  voe_send_.codec = VoECodec::GetInterface(voe_send_.voice_engine);
+  EXPECT_EQ(0, voe_send_.base->Init(fake_send_audio_device_.get(), nullptr));
+  Config voe_config;
+  voe_config.Set<VoicePacing>(new VoicePacing(true));
+  voe_send_.channel_id = voe_send_.base->CreateChannel(voe_config);
+  EXPECT_GE(voe_send_.channel_id, 0);
+
+  voe_recv_.voice_engine = VoiceEngine::Create();
+  voe_recv_.base = VoEBase::GetInterface(voe_recv_.voice_engine);
+  voe_recv_.network = VoENetwork::GetInterface(voe_recv_.voice_engine);
+  voe_recv_.codec = VoECodec::GetInterface(voe_recv_.voice_engine);
+  EXPECT_EQ(0, voe_recv_.base->Init(fake_recv_audio_device_.get(), nullptr));
+  voe_recv_.channel_id = voe_recv_.base->CreateChannel();
+  EXPECT_GE(voe_recv_.channel_id, 0);
+}
+
+void CallTest::SetupVoiceEngineTransports(PacketTransport* send_transport,
+                                          PacketTransport* recv_transport) {
+  voe_send_.transport_adapter.reset(
+      new internal::TransportAdapter(send_transport));
+  voe_send_.transport_adapter->Enable();
+  EXPECT_EQ(0, voe_send_.network->RegisterExternalTransport(
+                   voe_send_.channel_id, *voe_send_.transport_adapter.get()));
+
+  voe_recv_.transport_adapter.reset(
+      new internal::TransportAdapter(recv_transport));
+  voe_recv_.transport_adapter->Enable();
+  EXPECT_EQ(0, voe_recv_.network->RegisterExternalTransport(
+                   voe_recv_.channel_id, *voe_recv_.transport_adapter.get()));
+}
+
+void CallTest::DestroyVoiceEngines() {
+  voe_recv_.base->DeleteChannel(voe_recv_.channel_id);
+  voe_recv_.channel_id = -1;
+  voe_recv_.base->Release();
+  voe_recv_.base = nullptr;
+  voe_recv_.network->Release();
+  voe_recv_.network = nullptr;
+  voe_recv_.codec->Release();
+  voe_recv_.codec = nullptr;
+
+  voe_send_.base->DeleteChannel(voe_send_.channel_id);
+  voe_send_.channel_id = -1;
+  voe_send_.base->Release();
+  voe_send_.base = nullptr;
+  voe_send_.network->Release();
+  voe_send_.network = nullptr;
+  voe_send_.codec->Release();
+  voe_send_.codec = nullptr;
+
+  VoiceEngine::Delete(voe_send_.voice_engine);
+  voe_send_.voice_engine = nullptr;
+  VoiceEngine::Delete(voe_recv_.voice_engine);
+  voe_recv_.voice_engine = nullptr;
+}
+
 const int CallTest::kDefaultTimeoutMs = 30 * 1000;
 const int CallTest::kLongTimeoutMs = 120 * 1000;
-const uint8_t CallTest::kSendPayloadType = 100;
-const uint8_t CallTest::kFakeSendPayloadType = 125;
+const uint8_t CallTest::kVideoSendPayloadType = 100;
+const uint8_t CallTest::kFakeVideoSendPayloadType = 125;
 const uint8_t CallTest::kSendRtxPayloadType = 98;
 const uint8_t CallTest::kRedPayloadType = 118;
 const uint8_t CallTest::kRtxRedPayloadType = 99;
 const uint8_t CallTest::kUlpfecPayloadType = 119;
+const uint8_t CallTest::kAudioSendPayloadType = 103;
 const uint32_t CallTest::kSendRtxSsrcs[kNumSsrcs] = {0xBADCAFD, 0xBADCAFE,
                                                      0xBADCAFF};
-const uint32_t CallTest::kSendSsrcs[kNumSsrcs] = {0xC0FFED, 0xC0FFEE, 0xC0FFEF};
-const uint32_t CallTest::kReceiverLocalSsrc = 0x123456;
+const uint32_t CallTest::kVideoSendSsrcs[kNumSsrcs] = {0xC0FFED, 0xC0FFEE,
+                                                       0xC0FFEF};
+const uint32_t CallTest::kAudioSendSsrc = 0xDEADBEEF;
+const uint32_t CallTest::kReceiverLocalVideoSsrc = 0x123456;
+const uint32_t CallTest::kReceiverLocalAudioSsrc = 0x1234567;
 const int CallTest::kNackRtpHistoryMs = 1000;
 
 BaseTest::BaseTest(unsigned int timeout_ms) : RtpRtcpObserver(timeout_ms) {
@@ -204,10 +387,14 @@
 void BaseTest::OnTransportsCreated(PacketTransport* send_transport,
                                    PacketTransport* receive_transport) {}
 
-size_t BaseTest::GetNumStreams() const {
+size_t BaseTest::GetNumVideoStreams() const {
   return 1;
 }
 
+size_t BaseTest::GetNumAudioStreams() const {
+  return 0;
+}
+
 void BaseTest::ModifyVideoConfigs(
     VideoSendStream::Config* send_config,
     std::vector<VideoReceiveStream::Config>* receive_configs,
@@ -217,6 +404,14 @@
     VideoSendStream* send_stream,
     const std::vector<VideoReceiveStream*>& receive_streams) {}
 
+void BaseTest::ModifyAudioConfigs(
+    AudioSendStream::Config* send_config,
+    std::vector<AudioReceiveStream::Config>* receive_configs) {}
+
+void BaseTest::OnAudioStreamsCreated(
+    AudioSendStream* send_stream,
+    const std::vector<AudioReceiveStream*>& receive_streams) {}
+
 void BaseTest::OnFrameGeneratorCapturerCreated(
     FrameGeneratorCapturer* frame_generator_capturer) {
 }
diff --git a/webrtc/test/call_test.h b/webrtc/test/call_test.h
index 32820ed..46fbe7f 100644
--- a/webrtc/test/call_test.h
+++ b/webrtc/test/call_test.h
@@ -7,19 +7,26 @@
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
-#ifndef WEBRTC_TEST_COMMON_CALL_TEST_H_
-#define WEBRTC_TEST_COMMON_CALL_TEST_H_
+#ifndef WEBRTC_TEST_CALL_TEST_H_
+#define WEBRTC_TEST_CALL_TEST_H_
 
 #include <vector>
 
 #include "webrtc/call.h"
+#include "webrtc/call/transport_adapter.h"
 #include "webrtc/system_wrappers/include/scoped_vector.h"
+#include "webrtc/test/fake_audio_device.h"
 #include "webrtc/test/fake_decoder.h"
 #include "webrtc/test/fake_encoder.h"
 #include "webrtc/test/frame_generator_capturer.h"
 #include "webrtc/test/rtp_rtcp_observer.h"
 
 namespace webrtc {
+
+class VoEBase;
+class VoECodec;
+class VoENetwork;
+
 namespace test {
 
 class BaseTest;
@@ -27,24 +34,30 @@
 class CallTest : public ::testing::Test {
  public:
   CallTest();
-  ~CallTest();
+  virtual ~CallTest();
 
   static const size_t kNumSsrcs = 3;
 
   static const int kDefaultTimeoutMs;
   static const int kLongTimeoutMs;
-  static const uint8_t kSendPayloadType;
+  static const uint8_t kVideoSendPayloadType;
   static const uint8_t kSendRtxPayloadType;
-  static const uint8_t kFakeSendPayloadType;
+  static const uint8_t kFakeVideoSendPayloadType;
   static const uint8_t kRedPayloadType;
   static const uint8_t kRtxRedPayloadType;
   static const uint8_t kUlpfecPayloadType;
+  static const uint8_t kAudioSendPayloadType;
   static const uint32_t kSendRtxSsrcs[kNumSsrcs];
-  static const uint32_t kSendSsrcs[kNumSsrcs];
-  static const uint32_t kReceiverLocalSsrc;
+  static const uint32_t kVideoSendSsrcs[kNumSsrcs];
+  static const uint32_t kAudioSendSsrc;
+  static const uint32_t kReceiverLocalVideoSsrc;
+  static const uint32_t kReceiverLocalAudioSsrc;
   static const int kNackRtpHistoryMs;
 
  protected:
+  // RunBaseTest overwrites the audio_state and the voice_engine of the send and
+  // receive Call configs to simplify test code and avoid having old VoiceEngine
+  // APIs in the tests.
   void RunBaseTest(BaseTest* test, const FakeNetworkPipe::Config& config);
 
   void CreateCalls(const Call::Config& sender_config,
@@ -53,12 +66,16 @@
   void CreateReceiverCall(const Call::Config& config);
   void DestroyCalls();
 
-  void CreateSendConfig(size_t num_streams, Transport* send_transport);
+  void CreateSendConfig(size_t num_video_streams,
+                        size_t num_audio_streams,
+                        Transport* send_transport);
   void CreateMatchingReceiveConfigs(Transport* rtcp_send_transport);
 
   void CreateFrameGeneratorCapturer();
+  void CreateFakeAudioDevices();
 
-  void CreateStreams();
+  void CreateVideoStreams();
+  void CreateAudioStreams();
   void Start();
   void Stop();
   void DestroyStreams();
@@ -70,15 +87,54 @@
   VideoSendStream::Config video_send_config_;
   VideoEncoderConfig video_encoder_config_;
   VideoSendStream* video_send_stream_;
+  AudioSendStream::Config audio_send_config_;
+  AudioSendStream* audio_send_stream_;
 
   rtc::scoped_ptr<Call> receiver_call_;
   rtc::scoped_ptr<PacketTransport> receive_transport_;
   std::vector<VideoReceiveStream::Config> video_receive_configs_;
   std::vector<VideoReceiveStream*> video_receive_streams_;
+  std::vector<AudioReceiveStream::Config> audio_receive_configs_;
+  std::vector<AudioReceiveStream*> audio_receive_streams_;
 
   rtc::scoped_ptr<test::FrameGeneratorCapturer> frame_generator_capturer_;
   test::FakeEncoder fake_encoder_;
   ScopedVector<VideoDecoder> allocated_decoders_;
+  size_t num_video_streams_;
+  size_t num_audio_streams_;
+
+ private:
+  // TODO(holmer): Remove once VoiceEngine is fully refactored to the new API.
+  // These methods are used to set up legacy voice engines and channels which is
+  // necessary while voice engine is being refactored to the new stream API.
+  struct VoiceEngineState {
+    VoiceEngineState()
+        : voice_engine(nullptr),
+          base(nullptr),
+          network(nullptr),
+          codec(nullptr),
+          channel_id(-1),
+          transport_adapter(nullptr) {}
+
+    VoiceEngine* voice_engine;
+    VoEBase* base;
+    VoENetwork* network;
+    VoECodec* codec;
+    int channel_id;
+    rtc::scoped_ptr<internal::TransportAdapter> transport_adapter;
+  };
+
+  void CreateVoiceEngines();
+  void SetupVoiceEngineTransports(PacketTransport* send_transport,
+                                  PacketTransport* recv_transport);
+  void DestroyVoiceEngines();
+
+  VoiceEngineState voe_send_;
+  VoiceEngineState voe_recv_;
+
+  // The audio devices must outlive the voice engines.
+  rtc::scoped_ptr<test::FakeAudioDevice> fake_send_audio_device_;
+  rtc::scoped_ptr<test::FakeAudioDevice> fake_recv_audio_device_;
 };
 
 class BaseTest : public RtpRtcpObserver {
@@ -89,7 +145,8 @@
   virtual void PerformTest() = 0;
   virtual bool ShouldCreateReceivers() const = 0;
 
-  virtual size_t GetNumStreams() const;
+  virtual size_t GetNumVideoStreams() const;
+  virtual size_t GetNumAudioStreams() const;
 
   virtual Call::Config GetSenderCallConfig();
   virtual Call::Config GetReceiverCallConfig();
@@ -105,6 +162,13 @@
       VideoSendStream* send_stream,
       const std::vector<VideoReceiveStream*>& receive_streams);
 
+  virtual void ModifyAudioConfigs(
+      AudioSendStream::Config* send_config,
+      std::vector<AudioReceiveStream::Config>* receive_configs);
+  virtual void OnAudioStreamsCreated(
+      AudioSendStream* send_stream,
+      const std::vector<AudioReceiveStream*>& receive_streams);
+
   virtual void OnFrameGeneratorCapturerCreated(
       FrameGeneratorCapturer* frame_generator_capturer);
 };
@@ -126,4 +190,4 @@
 }  // namespace test
 }  // namespace webrtc
 
-#endif  // WEBRTC_TEST_COMMON_CALL_TEST_H_
+#endif  // WEBRTC_TEST_CALL_TEST_H_
diff --git a/webrtc/video/end_to_end_tests.cc b/webrtc/video/end_to_end_tests.cc
index f654dbb..3c774ab 100644
--- a/webrtc/video/end_to_end_tests.cc
+++ b/webrtc/video/end_to_end_tests.cc
@@ -31,7 +31,6 @@
 #include "webrtc/test/call_test.h"
 #include "webrtc/test/direct_transport.h"
 #include "webrtc/test/encoder_settings.h"
-#include "webrtc/test/fake_audio_device.h"
 #include "webrtc/test/fake_decoder.h"
 #include "webrtc/test/fake_encoder.h"
 #include "webrtc/test/frame_generator.h"
@@ -86,10 +85,10 @@
   CreateCalls(Call::Config(), Call::Config());
 
   test::NullTransport transport;
-  CreateSendConfig(1, &transport);
+  CreateSendConfig(1, 0, &transport);
   CreateMatchingReceiveConfigs(&transport);
 
-  CreateStreams();
+  CreateVideoStreams();
 
   video_receive_streams_[0]->Start();
   video_receive_streams_[0]->Start();
@@ -101,10 +100,10 @@
   CreateCalls(Call::Config(), Call::Config());
 
   test::NullTransport transport;
-  CreateSendConfig(1, &transport);
+  CreateSendConfig(1, 0, &transport);
   CreateMatchingReceiveConfigs(&transport);
 
-  CreateStreams();
+  CreateVideoStreams();
 
   video_receive_streams_[0]->Stop();
   video_receive_streams_[0]->Stop();
@@ -158,14 +157,14 @@
   sender_transport.SetReceiver(receiver_call_->Receiver());
   receiver_transport.SetReceiver(sender_call_->Receiver());
 
-  CreateSendConfig(1, &sender_transport);
+  CreateSendConfig(1, 0, &sender_transport);
   CreateMatchingReceiveConfigs(&receiver_transport);
 
   TestFrameCallback pre_render_callback;
   video_receive_configs_[0].pre_render_callback = &pre_render_callback;
   video_receive_configs_[0].renderer = &renderer;
 
-  CreateStreams();
+  CreateVideoStreams();
   Start();
 
   // Create frames that are smaller than the send width/height, this is done to
@@ -210,11 +209,11 @@
   sender_transport.SetReceiver(receiver_call_->Receiver());
   receiver_transport.SetReceiver(sender_call_->Receiver());
 
-  CreateSendConfig(1, &sender_transport);
+  CreateSendConfig(1, 0, &sender_transport);
   CreateMatchingReceiveConfigs(&receiver_transport);
   video_receive_configs_[0].renderer = &renderer;
 
-  CreateStreams();
+  CreateVideoStreams();
   Start();
 
   rtc::scoped_ptr<test::FrameGenerator> frame_generator(
@@ -308,7 +307,7 @@
           (*receive_configs)[0].rtp.nack.rtp_history_ms = kNackRtpHistoryMs;
       send_config->encoder_settings.encoder = &fake_encoder_;
       send_config->encoder_settings.payload_name = "H264";
-      send_config->encoder_settings.payload_type = kFakeSendPayloadType;
+      send_config->encoder_settings.payload_type = kFakeVideoSendPayloadType;
       encoder_config->streams[0].min_bitrate_bps = 50000;
       encoder_config->streams[0].target_bitrate_bps =
           encoder_config->streams[0].max_bitrate_bps = 2000000;
@@ -353,7 +352,7 @@
       ssrc |= static_cast<uint32_t>(packet[5]) << 16;
       ssrc |= static_cast<uint32_t>(packet[6]) << 8;
       ssrc |= static_cast<uint32_t>(packet[7]) << 0;
-      EXPECT_EQ(kReceiverLocalSsrc, ssrc);
+      EXPECT_EQ(kReceiverLocalVideoSsrc, ssrc);
       observation_complete_.Set();
 
       return SEND_PACKET;
@@ -474,10 +473,10 @@
       if (header.payloadType == kRedPayloadType) {
         encapsulated_payload_type =
             static_cast<int>(packet[header.headerLength]);
-        if (encapsulated_payload_type != kFakeSendPayloadType)
+        if (encapsulated_payload_type != kFakeVideoSendPayloadType)
           EXPECT_EQ(kUlpfecPayloadType, encapsulated_payload_type);
       } else {
-        EXPECT_EQ(kFakeSendPayloadType, header.payloadType);
+        EXPECT_EQ(kFakeVideoSendPayloadType, header.payloadType);
       }
 
       if (protected_sequence_numbers_.count(header.sequenceNumber) != 0) {
@@ -501,7 +500,7 @@
             return DROP_PACKET;
           break;
         case kDropNextMediaPacket:
-          if (encapsulated_payload_type == kFakeSendPayloadType) {
+          if (encapsulated_payload_type == kFakeVideoSendPayloadType) {
             protected_sequence_numbers_.insert(header.sequenceNumber);
             protected_timestamps_.insert(header.timestamp);
             state_ = kDropEveryOtherPacketUntilFec;
@@ -580,10 +579,10 @@
       if (header.payloadType == kRedPayloadType) {
         encapsulated_payload_type =
             static_cast<int>(packet[header.headerLength]);
-        if (encapsulated_payload_type != kFakeSendPayloadType)
+        if (encapsulated_payload_type != kFakeVideoSendPayloadType)
           EXPECT_EQ(kUlpfecPayloadType, encapsulated_payload_type);
       } else {
-        EXPECT_EQ(kFakeSendPayloadType, header.payloadType);
+        EXPECT_EQ(kFakeVideoSendPayloadType, header.payloadType);
       }
 
       if (has_last_sequence_number_ &&
@@ -698,7 +697,7 @@
     explicit RetransmissionObserver(bool use_rtx, bool use_red)
         : EndToEndTest(kDefaultTimeoutMs),
           payload_type_(GetPayloadType(false, use_red)),
-          retransmission_ssrc_(use_rtx ? kSendRtxSsrcs[0] : kSendSsrcs[0]),
+          retransmission_ssrc_(use_rtx ? kSendRtxSsrcs[0] : kVideoSendSsrcs[0]),
           retransmission_payload_type_(GetPayloadType(use_rtx, use_red)),
           marker_bits_observed_(0),
           num_packets_observed_(0),
@@ -726,7 +725,7 @@
         return SEND_PACKET;
       }
 
-      EXPECT_EQ(kSendSsrcs[0], header.ssrc);
+      EXPECT_EQ(kVideoSendSsrcs[0], header.ssrc);
       EXPECT_EQ(payload_type_, header.payloadType);
 
       // Found the final packet of the frame to inflict loss to, drop this and
@@ -765,9 +764,9 @@
       if (retransmission_ssrc_ == kSendRtxSsrcs[0]) {
         send_config->rtp.rtx.ssrcs.push_back(kSendRtxSsrcs[0]);
         send_config->rtp.rtx.payload_type = kSendRtxPayloadType;
-        (*receive_configs)[0].rtp.rtx[kFakeSendPayloadType].ssrc =
+        (*receive_configs)[0].rtp.rtx[kFakeVideoSendPayloadType].ssrc =
             kSendRtxSsrcs[0];
-        (*receive_configs)[0].rtp.rtx[kFakeSendPayloadType].payload_type =
+        (*receive_configs)[0].rtp.rtx[kFakeVideoSendPayloadType].payload_type =
             kSendRtxPayloadType;
       }
     }
@@ -779,7 +778,7 @@
 
     int GetPayloadType(bool use_rtx, bool use_red) {
       return use_rtx ? kSendRtxPayloadType
-                     : (use_red ? kRedPayloadType : kFakeSendPayloadType);
+                     : (use_red ? kRedPayloadType : kFakeVideoSendPayloadType);
     }
 
     rtc::CriticalSection crit_;
@@ -876,7 +875,7 @@
   sender_transport.SetReceiver(receiver_call_->Receiver());
   receiver_transport.SetReceiver(sender_call_->Receiver());
 
-  CreateSendConfig(1, &sender_transport);
+  CreateSendConfig(1, 0, &sender_transport);
   rtc::scoped_ptr<VideoEncoder> encoder(
       VideoEncoder::Create(VideoEncoder::kVp8));
   video_send_config_.encoder_settings.encoder = encoder.get();
@@ -890,7 +889,7 @@
   video_receive_configs_[0].pre_render_callback = &pre_render_callback;
   video_receive_configs_[0].renderer = &renderer;
 
-  CreateStreams();
+  CreateVideoStreams();
   Start();
 
   // Create frames that are smaller than the send width/height, this is done to
@@ -1050,10 +1049,10 @@
   send_transport.SetReceiver(&input_observer);
   receive_transport.SetReceiver(sender_call_->Receiver());
 
-  CreateSendConfig(1, &send_transport);
+  CreateSendConfig(1, 0, &send_transport);
   CreateMatchingReceiveConfigs(&receive_transport);
 
-  CreateStreams();
+  CreateVideoStreams();
   CreateFrameGeneratorCapturer();
   Start();
 
@@ -1227,7 +1226,7 @@
 
       VideoReceiveStream::Config receive_config(receiver_transport.get());
       receive_config.rtp.remote_ssrc = ssrc;
-      receive_config.rtp.local_ssrc = test::CallTest::kReceiverLocalSsrc;
+      receive_config.rtp.local_ssrc = test::CallTest::kReceiverLocalVideoSsrc;
       VideoReceiveStream::Decoder decoder =
           test::CreateMatchingDecoder(send_config.encoder_settings);
       allocated_decoders.push_back(decoder.decoder);
@@ -1659,12 +1658,12 @@
   sender_transport.SetReceiver(receiver_call_->Receiver());
   receiver_transport.SetReceiver(sender_call_->Receiver());
 
-  CreateSendConfig(1, &sender_transport);
+  CreateSendConfig(1, 0, &sender_transport);
   CreateMatchingReceiveConfigs(&receiver_transport);
   video_send_config_.post_encode_callback = &post_encode_observer;
   video_receive_configs_[0].pre_decode_callback = &pre_decode_observer;
 
-  CreateStreams();
+  CreateVideoStreams();
   Start();
 
   rtc::scoped_ptr<test::FrameGenerator> frame_generator(
@@ -1705,13 +1704,13 @@
       while (packet_type != RTCPUtility::RTCPPacketTypes::kInvalid) {
         if (packet_type == RTCPUtility::RTCPPacketTypes::kPsfbRemb) {
           const RTCPUtility::RTCPPacket& packet = parser.Packet();
-          EXPECT_EQ(packet.PSFBAPP.SenderSSRC, kReceiverLocalSsrc);
+          EXPECT_EQ(packet.PSFBAPP.SenderSSRC, kReceiverLocalVideoSsrc);
           received_psfb = true;
         } else if (packet_type == RTCPUtility::RTCPPacketTypes::kPsfbRembItem) {
           const RTCPUtility::RTCPPacket& packet = parser.Packet();
           EXPECT_GT(packet.REMBItem.BitRate, 0u);
           EXPECT_EQ(packet.REMBItem.NumberOfSSRCs, 1u);
-          EXPECT_EQ(packet.REMBItem.SSRCs[0], kSendSsrcs[0]);
+          EXPECT_EQ(packet.REMBItem.SSRCs[0], kVideoSendSsrcs[0]);
           received_remb = true;
         }
         packet_type = parser.Iterate();
@@ -1825,8 +1824,7 @@
         receive_stream_nack_packets +=
             stats.rtcp_packet_type_counts.nack_packets;
       }
-      if (send_stream_nack_packets >= 1 &&
-          receive_stream_nack_packets >= 1) {
+      if (send_stream_nack_packets >= 1 && receive_stream_nack_packets >= 1) {
         // NACK packet sent on receive stream and received on sent stream.
         if (MinMetricRunTimePassed())
           observation_complete_.Set();
@@ -1940,9 +1938,9 @@
       if (use_rtx_) {
         send_config->rtp.rtx.ssrcs.push_back(kSendRtxSsrcs[0]);
         send_config->rtp.rtx.payload_type = kSendRtxPayloadType;
-        (*receive_configs)[0].rtp.rtx[kFakeSendPayloadType].ssrc =
+        (*receive_configs)[0].rtp.rtx[kFakeVideoSendPayloadType].ssrc =
             kSendRtxSsrcs[0];
-        (*receive_configs)[0].rtp.rtx[kFakeSendPayloadType].payload_type =
+        (*receive_configs)[0].rtp.rtx[kFakeVideoSendPayloadType].payload_type =
             kSendRtxPayloadType;
       }
       encoder_config->content_type =
@@ -2236,7 +2234,7 @@
       return SEND_PACKET;
     }
 
-    size_t GetNumStreams() const override { return num_ssrcs_; }
+    size_t GetNumVideoStreams() const override { return num_ssrcs_; }
 
     void ModifyVideoConfigs(
         VideoSendStream::Config* send_config,
@@ -2287,7 +2285,7 @@
 
     VideoSendStream* send_stream_;
     VideoEncoderConfig video_encoder_config_all_streams_;
-  } test(kSendSsrcs, num_ssrcs, send_single_ssrc_first);
+  } test(kVideoSendSsrcs, num_ssrcs, send_single_ssrc_first);
 
   RunBaseTest(&test, FakeNetworkPipe::Config());
 }
@@ -2443,9 +2441,9 @@
             stats.rtcp_packet_type_counts.unique_nack_requests != 0;
 
         assert(stats.current_payload_type == -1 ||
-               stats.current_payload_type == kFakeSendPayloadType);
+               stats.current_payload_type == kFakeVideoSendPayloadType);
         receive_stats_filled_["IncomingPayloadType"] |=
-            stats.current_payload_type == kFakeSendPayloadType;
+            stats.current_payload_type == kFakeVideoSendPayloadType;
       }
 
       return AllStatsFilled(receive_stats_filled_);
@@ -2552,7 +2550,7 @@
       }
     }
 
-    size_t GetNumStreams() const override { return kNumSsrcs; }
+    size_t GetNumVideoStreams() const override { return kNumSsrcs; }
 
     void OnVideoStreamsCreated(
         VideoSendStream* send_stream,
@@ -2713,7 +2711,7 @@
       return SEND_PACKET;
     }
 
-    size_t GetNumStreams() const override { return kNumSsrcs; }
+    size_t GetNumVideoStreams() const override { return kNumSsrcs; }
 
     void ModifyVideoConfigs(
         VideoSendStream::Config* send_config,
@@ -2759,7 +2757,7 @@
         : test::RtpRtcpObserver(kDefaultTimeoutMs),
           ssrcs_to_observe_(kNumSsrcs) {
       for (size_t i = 0; i < kNumSsrcs; ++i) {
-        configured_ssrcs_[kSendSsrcs[i]] = true;
+        configured_ssrcs_[kVideoSendSsrcs[i]] = true;
         if (use_rtx)
           configured_ssrcs_[kSendRtxSsrcs[i]] = true;
       }
@@ -2852,7 +2850,7 @@
   send_transport.SetReceiver(receiver_call_->Receiver());
   receive_transport.SetReceiver(sender_call_->Receiver());
 
-  CreateSendConfig(kNumSsrcs, &send_transport);
+  CreateSendConfig(kNumSsrcs, 0, &send_transport);
 
   if (use_rtx) {
     for (size_t i = 0; i < kNumSsrcs; ++i) {
@@ -2883,7 +2881,7 @@
 
   CreateMatchingReceiveConfigs(&receive_transport);
 
-  CreateStreams();
+  CreateVideoStreams();
   CreateFrameGeneratorCapturer();
 
   Start();
@@ -3129,10 +3127,10 @@
   sender_transport.SetReceiver(receiver_call_->Receiver());
   receiver_transport.SetReceiver(sender_call_->Receiver());
 
-  CreateSendConfig(1, &sender_transport);
+  CreateSendConfig(1, 0, &sender_transport);
   CreateMatchingReceiveConfigs(&receiver_transport);
 
-  CreateStreams();
+  CreateVideoStreams();
   CreateFrameGeneratorCapturer();
   Start();
 
@@ -3170,10 +3168,10 @@
   sender_call_->SignalNetworkState(kNetworkDown);
 
   UnusedTransport transport;
-  CreateSendConfig(1, &transport);
+  CreateSendConfig(1, 0, &transport);
   UnusedEncoder unused_encoder;
   video_send_config_.encoder_settings.encoder = &unused_encoder;
-  CreateStreams();
+  CreateVideoStreams();
   CreateFrameGeneratorCapturer();
 
   Start();
@@ -3189,10 +3187,10 @@
 
   test::DirectTransport sender_transport(sender_call_.get());
   sender_transport.SetReceiver(receiver_call_->Receiver());
-  CreateSendConfig(1, &sender_transport);
+  CreateSendConfig(1, 0, &sender_transport);
   UnusedTransport transport;
   CreateMatchingReceiveConfigs(&transport);
-  CreateStreams();
+  CreateVideoStreams();
   CreateFrameGeneratorCapturer();
 
   Start();
@@ -3249,4 +3247,76 @@
   VerifyEmptyFecConfig(default_receive_config.rtp.fec);
 }
 
+TEST_F(EndToEndTest, TransportSeqNumOnAudioAndVideo) {
+  static const int kExtensionId = 8;
+  class TransportSequenceNumberTest : public test::EndToEndTest {
+   public:
+    TransportSequenceNumberTest()
+        : EndToEndTest(kDefaultTimeoutMs),
+          video_observed_(false),
+          audio_observed_(false) {
+      parser_->RegisterRtpHeaderExtension(kRtpExtensionTransportSequenceNumber,
+                                          kExtensionId);
+    }
+
+    size_t GetNumVideoStreams() const override { return 1; }
+    size_t GetNumAudioStreams() const override { return 1; }
+
+    void ModifyVideoConfigs(
+        VideoSendStream::Config* send_config,
+        std::vector<VideoReceiveStream::Config>* receive_configs,
+        VideoEncoderConfig* encoder_config) override {
+      send_config->rtp.extensions.clear();
+      send_config->rtp.extensions.push_back(
+          RtpExtension(RtpExtension::kTransportSequenceNumber, kExtensionId));
+      (*receive_configs)[0].rtp.extensions = send_config->rtp.extensions;
+    }
+
+    void ModifyAudioConfigs(
+        AudioSendStream::Config* send_config,
+        std::vector<AudioReceiveStream::Config>* receive_configs) override {
+      send_config->rtp.extensions.clear();
+      send_config->rtp.extensions.push_back(
+          RtpExtension(RtpExtension::kTransportSequenceNumber, kExtensionId));
+      (*receive_configs)[0].rtp.extensions.clear();
+      (*receive_configs)[0].rtp.extensions = send_config->rtp.extensions;
+    }
+
+    Action OnSendRtp(const uint8_t* packet, size_t length) override {
+      RTPHeader header;
+      EXPECT_TRUE(parser_->Parse(packet, length, &header));
+      EXPECT_TRUE(header.extension.hasTransportSequenceNumber);
+      // Unwrap packet id and verify uniqueness.
+      int64_t packet_id =
+          unwrapper_.Unwrap(header.extension.transportSequenceNumber);
+      EXPECT_TRUE(received_packet_ids_.insert(packet_id).second);
+
+      if (header.ssrc == kVideoSendSsrcs[0])
+        video_observed_ = true;
+      if (header.ssrc == kAudioSendSsrc)
+        audio_observed_ = true;
+      if (audio_observed_ && video_observed_ &&
+          received_packet_ids_.size() == 50) {
+        size_t packet_id_range =
+            *received_packet_ids_.rbegin() - *received_packet_ids_.begin() + 1;
+        EXPECT_EQ(received_packet_ids_.size(), packet_id_range);
+        observation_complete_.Set();
+      }
+      return SEND_PACKET;
+    }
+
+    void PerformTest() override {
+      EXPECT_TRUE(Wait()) << "Timed out while waiting for audio and video "
+                             "packets with transport sequence number.";
+    }
+
+   private:
+    bool video_observed_;
+    bool audio_observed_;
+    SequenceNumberUnwrapper unwrapper_;
+    std::set<int64_t> received_packet_ids_;
+  } test;
+
+  RunBaseTest(&test, FakeNetworkPipe::Config());
+}
 }  // namespace webrtc
diff --git a/webrtc/video/video_quality_test.cc b/webrtc/video/video_quality_test.cc
index 5b23643..08ae0a9 100644
--- a/webrtc/video/video_quality_test.cc
+++ b/webrtc/video/video_quality_test.cc
@@ -781,7 +781,7 @@
     trace_to_stderr_.reset(new test::TraceToStderr);
 
   size_t num_streams = params_.ss.streams.size();
-  CreateSendConfig(num_streams, send_transport);
+  CreateSendConfig(num_streams, 0, send_transport);
 
   int payload_type;
   if (params_.common.codec == "VP8") {
@@ -964,7 +964,7 @@
       disable_quality_check ? -1.1 : params_.analyzer.avg_ssim_threshold,
       params_.analyzer.test_durations_secs * params_.common.fps,
       graph_data_output_file, graph_title,
-      kSendSsrcs[params_.ss.selected_stream]);
+      kVideoSendSsrcs[params_.ss.selected_stream]);
 
   analyzer.SetReceiver(receiver_call_->Receiver());
   send_transport.SetReceiver(&analyzer);
@@ -979,7 +979,7 @@
   if (params_.screenshare.enabled)
     SetupScreenshare();
 
-  CreateStreams();
+  CreateVideoStreams();
   analyzer.input_ = video_send_stream_->Input();
   analyzer.send_stream_ = video_send_stream_;
 
diff --git a/webrtc/video/video_send_stream_tests.cc b/webrtc/video/video_send_stream_tests.cc
index 24ee296..f0bac12 100644
--- a/webrtc/video/video_send_stream_tests.cc
+++ b/webrtc/video/video_send_stream_tests.cc
@@ -68,8 +68,8 @@
   CreateSenderCall(call_config);
 
   test::NullTransport transport;
-  CreateSendConfig(1, &transport);
-  CreateStreams();
+  CreateSendConfig(1, 0, &transport);
+  CreateVideoStreams();
   video_send_stream_->Start();
   video_send_stream_->Start();
   DestroyStreams();
@@ -80,8 +80,8 @@
   CreateSenderCall(call_config);
 
   test::NullTransport transport;
-  CreateSendConfig(1, &transport);
-  CreateStreams();
+  CreateSendConfig(1, 0, &transport);
+  CreateVideoStreams();
   video_send_stream_->Stop();
   video_send_stream_->Stop();
   DestroyStreams();
@@ -327,14 +327,14 @@
     if (send_count_++ % 2 != 0) {
       // Receive statistics reporting having lost 50% of the packets.
       FakeReceiveStatistics lossy_receive_stats(
-          VideoSendStreamTest::kSendSsrcs[0], header.sequenceNumber,
+          VideoSendStreamTest::kVideoSendSsrcs[0], header.sequenceNumber,
           send_count_ / 2, 127);
       RTCPSender rtcp_sender(false, Clock::GetRealTimeClock(),
                              &lossy_receive_stats, nullptr,
                              transport_adapter_.get());
 
       rtcp_sender.SetRTCPStatus(RtcpMode::kReducedSize);
-      rtcp_sender.SetRemoteSSRC(VideoSendStreamTest::kSendSsrcs[0]);
+      rtcp_sender.SetRemoteSSRC(VideoSendStreamTest::kVideoSendSsrcs[0]);
 
       RTCPSender::FeedbackState feedback_state;
 
@@ -345,11 +345,12 @@
     if (header.payloadType == VideoSendStreamTest::kRedPayloadType) {
       encapsulated_payload_type = static_cast<int>(packet[header.headerLength]);
       if (encapsulated_payload_type !=
-          VideoSendStreamTest::kFakeSendPayloadType)
+          VideoSendStreamTest::kFakeVideoSendPayloadType)
         EXPECT_EQ(VideoSendStreamTest::kUlpfecPayloadType,
                   encapsulated_payload_type);
     } else {
-      EXPECT_EQ(VideoSendStreamTest::kFakeSendPayloadType, header.payloadType);
+      EXPECT_EQ(VideoSendStreamTest::kFakeVideoSendPayloadType,
+                header.payloadType);
     }
 
     if (header_extensions_enabled_) {
@@ -459,7 +460,7 @@
                                nullptr, transport_adapter_.get());
 
         rtcp_sender.SetRTCPStatus(RtcpMode::kReducedSize);
-        rtcp_sender.SetRemoteSSRC(kSendSsrcs[0]);
+        rtcp_sender.SetRemoteSSRC(kVideoSendSsrcs[0]);
 
         RTCPSender::FeedbackState feedback_state;
 
@@ -471,8 +472,8 @@
       uint16_t sequence_number = header.sequenceNumber;
 
       if (header.ssrc == retransmit_ssrc_ &&
-          retransmit_ssrc_ != kSendSsrcs[0]) {
-        // Not kSendSsrcs[0], assume correct RTX packet. Extract sequence
+          retransmit_ssrc_ != kVideoSendSsrcs[0]) {
+        // Not kVideoSendSsrcs[0], assume correct RTX packet. Extract sequence
         // number.
         const uint8_t* rtx_header = packet + header.headerLength;
         sequence_number = (rtx_header[0] << 8) + rtx_header[1];
@@ -496,7 +497,7 @@
       transport_adapter_->Enable();
       send_config->rtp.nack.rtp_history_ms = kNackRtpHistoryMs;
       send_config->rtp.rtx.payload_type = retransmit_payload_type_;
-      if (retransmit_ssrc_ != kSendSsrcs[0])
+      if (retransmit_ssrc_ != kVideoSendSsrcs[0])
         send_config->rtp.rtx.ssrcs.push_back(retransmit_ssrc_);
     }
 
@@ -516,7 +517,7 @@
 
 TEST_F(VideoSendStreamTest, RetransmitsNack) {
   // Normal NACKs should use the send SSRC.
-  TestNackRetransmission(kSendSsrcs[0], kFakeSendPayloadType);
+  TestNackRetransmission(kVideoSendSsrcs[0], kFakeVideoSendPayloadType);
 }
 
 TEST_F(VideoSendStreamTest, RetransmitsNackOverRtx) {
@@ -641,13 +642,13 @@
       if (packet_count_++ % 2 != 0) {
         // Receive statistics reporting having lost 50% of the packets.
         FakeReceiveStatistics lossy_receive_stats(
-            kSendSsrcs[0], header.sequenceNumber, packet_count_ / 2, 127);
+            kVideoSendSsrcs[0], header.sequenceNumber, packet_count_ / 2, 127);
         RTCPSender rtcp_sender(false, Clock::GetRealTimeClock(),
                                &lossy_receive_stats, nullptr,
                                transport_adapter_.get());
 
         rtcp_sender.SetRTCPStatus(RtcpMode::kReducedSize);
-        rtcp_sender.SetRemoteSSRC(kSendSsrcs[0]);
+        rtcp_sender.SetRemoteSSRC(kVideoSendSsrcs[0]);
 
         RTCPSender::FeedbackState feedback_state;
 
@@ -864,13 +865,13 @@
 
     virtual void SendRtcpFeedback(int remb_value)
         EXCLUSIVE_LOCKS_REQUIRED(crit_) {
-      FakeReceiveStatistics receive_stats(
-          kSendSsrcs[0], last_sequence_number_, rtp_count_, 0);
+      FakeReceiveStatistics receive_stats(kVideoSendSsrcs[0],
+                                          last_sequence_number_, rtp_count_, 0);
       RTCPSender rtcp_sender(false, clock_, &receive_stats, nullptr,
                              transport_adapter_.get());
 
       rtcp_sender.SetRTCPStatus(RtcpMode::kReducedSize);
-      rtcp_sender.SetRemoteSSRC(kSendSsrcs[0]);
+      rtcp_sender.SetRemoteSSRC(kVideoSendSsrcs[0]);
       if (remb_value > 0) {
         rtcp_sender.SetREMBStatus(true);
         rtcp_sender.SetREMBData(remb_value, std::vector<uint32_t>());
@@ -921,12 +922,12 @@
               kVideoMutedThresholdMs)
         observation_complete_.Set();
       // Receive statistics reporting having lost 50% of the packets.
-      FakeReceiveStatistics receive_stats(kSendSsrcs[0], 1, 1, 0);
+      FakeReceiveStatistics receive_stats(kVideoSendSsrcs[0], 1, 1, 0);
       RTCPSender rtcp_sender(false, Clock::GetRealTimeClock(), &receive_stats,
                              nullptr, transport_adapter_.get());
 
       rtcp_sender.SetRTCPStatus(RtcpMode::kReducedSize);
-      rtcp_sender.SetRemoteSSRC(kSendSsrcs[0]);
+      rtcp_sender.SetRemoteSSRC(kVideoSendSsrcs[0]);
 
       RTCPSender::FeedbackState feedback_state;
 
@@ -942,7 +943,7 @@
       transport_adapter_->Enable();
     }
 
-    size_t GetNumStreams() const override { return 3; }
+    size_t GetNumVideoStreams() const override { return 3; }
 
     virtual void OnFrameGeneratorCapturerCreated(
         test::FrameGeneratorCapturer* frame_generator_capturer) {
@@ -1085,7 +1086,7 @@
   CreateSenderCall(Call::Config());
 
   test::NullTransport transport;
-  CreateSendConfig(1, &transport);
+  CreateSendConfig(1, 0, &transport);
 
   Call::Config::BitrateConfig bitrate_config;
   bitrate_config.start_bitrate_bps =
@@ -1095,7 +1096,7 @@
   StartBitrateObserver encoder;
   video_send_config_.encoder_settings.encoder = &encoder;
 
-  CreateStreams();
+  CreateVideoStreams();
 
   EXPECT_EQ(video_encoder_config_.streams[0].max_bitrate_bps / 1000,
             encoder.GetStartBitrateKbps());
@@ -1145,10 +1146,10 @@
   CreateSenderCall(Call::Config());
 
   test::NullTransport transport;
-  CreateSendConfig(1, &transport);
+  CreateSendConfig(1, 0, &transport);
   FrameObserver observer;
   video_send_config_.pre_encode_callback = &observer;
-  CreateStreams();
+  CreateVideoStreams();
 
   // Prepare five input frames. Send ordinary VideoFrame and texture frames
   // alternatively.
@@ -1819,7 +1820,7 @@
       EXPECT_EQ(kNumStreams, encoder_config->streams.size());
     }
 
-    size_t GetNumStreams() const override { return kNumStreams; }
+    size_t GetNumVideoStreams() const override { return kNumStreams; }
 
     void PerformTest() override {
       EXPECT_TRUE(Wait())
@@ -1827,12 +1828,12 @@
       VideoSendStream::Stats stats = send_stream_->GetStats();
 
       for (size_t i = 0; i < kNumStreams; ++i) {
-        ASSERT_TRUE(stats.substreams.find(kSendSsrcs[i]) !=
+        ASSERT_TRUE(stats.substreams.find(kVideoSendSsrcs[i]) !=
                     stats.substreams.end())
-            << "No stats for SSRC: " << kSendSsrcs[i]
+            << "No stats for SSRC: " << kVideoSendSsrcs[i]
             << ", stats should exist as soon as frames have been encoded.";
         VideoSendStream::StreamStats ssrc_stats =
-            stats.substreams[kSendSsrcs[i]];
+            stats.substreams[kVideoSendSsrcs[i]];
         EXPECT_EQ(kEncodedResolution[i].width, ssrc_stats.width);
         EXPECT_EQ(kEncodedResolution[i].height, ssrc_stats.height);
       }
diff --git a/webrtc/video_engine_tests.isolate b/webrtc/video_engine_tests.isolate
index 5aa9623..f2f961f 100644
--- a/webrtc/video_engine_tests.isolate
+++ b/webrtc/video_engine_tests.isolate
@@ -11,6 +11,7 @@
       'variables': {
         'files': [
           '<(DEPTH)/resources/foreman_cif_short.yuv',
+          '<(DEPTH)/resources/voice_engine/audio_long16.pcm',
         ],
       },
     }],