Insert audio frame transformer between depacketizer and decoder.

The frame transformer is passed from RTPReceiverInterface through the
library to be eventually set in ChannelReceive, where the frame
transformation will occur in the follow-up CL.

Insertable Streams Web API explainer:
https://github.com/alvestrand/webrtc-media-streams/blob/master/explainer.md

Design doc for WebRTC library changes:
http://doc/1eiLkjNUkRy2FssCPLUp6eH08BZuXXoHfbbBP1ZN7EVk

Bug: webrtc:11380
Change-Id: I5af06d1431047ef50d00e304cf95e92a832b4220
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/171872
Reviewed-by: Magnus Flodman <mflodman@webrtc.org>
Reviewed-by: Tommi <tommi@webrtc.org>
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Commit-Queue: Marina Ciocea <marinaciocea@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#30956}
diff --git a/audio/audio_receive_stream.cc b/audio/audio_receive_stream.cc
index ce1b344..6bc0d41 100644
--- a/audio/audio_receive_stream.cc
+++ b/audio/audio_receive_stream.cc
@@ -82,7 +82,8 @@
       config.jitter_buffer_max_packets, config.jitter_buffer_fast_accelerate,
       config.jitter_buffer_min_delay_ms,
       config.jitter_buffer_enable_rtx_handling, config.decoder_factory,
-      config.codec_pair_id, config.frame_decryptor, config.crypto_options);
+      config.codec_pair_id, config.frame_decryptor, config.crypto_options,
+      std::move(config.frame_transformer));
 }
 }  // namespace
 
@@ -409,6 +410,12 @@
     channel_receive->SetReceiveCodecs(new_config.decoder_map);
   }
 
+  if (first_time ||
+      old_config.frame_transformer != new_config.frame_transformer) {
+    channel_receive->SetDepacketizerToDecoderFrameTransformer(
+        new_config.frame_transformer);
+  }
+
   stream->config_ = new_config;
 }
 }  // namespace internal
diff --git a/audio/audio_receive_stream_unittest.cc b/audio/audio_receive_stream_unittest.cc
index 0b2cae5..186eb1c 100644
--- a/audio/audio_receive_stream_unittest.cc
+++ b/audio/audio_receive_stream_unittest.cc
@@ -100,6 +100,8 @@
         .WillRepeatedly(Invoke([](const std::map<int, SdpAudioFormat>& codecs) {
           EXPECT_THAT(codecs, ::testing::IsEmpty());
         }));
+    EXPECT_CALL(*channel_receive_, SetDepacketizerToDecoderFrameTransformer(_))
+        .Times(1);
 
     stream_config_.rtp.local_ssrc = kLocalSsrc;
     stream_config_.rtp.remote_ssrc = kRemoteSsrc;
diff --git a/audio/channel_receive.cc b/audio/channel_receive.cc
index dfc8493..1c88421 100644
--- a/audio/channel_receive.cc
+++ b/audio/channel_receive.cc
@@ -20,6 +20,7 @@
 #include <vector>
 
 #include "api/crypto/frame_decryptor_interface.h"
+#include "api/frame_transformer_interface.h"
 #include "api/rtc_event_log/rtc_event_log.h"
 #include "audio/audio_level.h"
 #include "audio/channel_send.h"
@@ -79,22 +80,24 @@
 class ChannelReceive : public ChannelReceiveInterface {
  public:
   // Used for receive streams.
-  ChannelReceive(Clock* clock,
-                 ProcessThread* module_process_thread,
-                 NetEqFactory* neteq_factory,
-                 AudioDeviceModule* audio_device_module,
-                 Transport* rtcp_send_transport,
-                 RtcEventLog* rtc_event_log,
-                 uint32_t local_ssrc,
-                 uint32_t remote_ssrc,
-                 size_t jitter_buffer_max_packets,
-                 bool jitter_buffer_fast_playout,
-                 int jitter_buffer_min_delay_ms,
-                 bool jitter_buffer_enable_rtx_handling,
-                 rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,
-                 absl::optional<AudioCodecPairId> codec_pair_id,
-                 rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor,
-                 const webrtc::CryptoOptions& crypto_options);
+  ChannelReceive(
+      Clock* clock,
+      ProcessThread* module_process_thread,
+      NetEqFactory* neteq_factory,
+      AudioDeviceModule* audio_device_module,
+      Transport* rtcp_send_transport,
+      RtcEventLog* rtc_event_log,
+      uint32_t local_ssrc,
+      uint32_t remote_ssrc,
+      size_t jitter_buffer_max_packets,
+      bool jitter_buffer_fast_playout,
+      int jitter_buffer_min_delay_ms,
+      bool jitter_buffer_enable_rtx_handling,
+      rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,
+      absl::optional<AudioCodecPairId> codec_pair_id,
+      rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor,
+      const webrtc::CryptoOptions& crypto_options,
+      rtc::scoped_refptr<FrameTransformerInterface> frame_transformer);
   ~ChannelReceive() override;
 
   void SetSink(AudioSinkInterface* sink) override;
@@ -161,6 +164,12 @@
   // Used for obtaining RTT for a receive-only channel.
   void SetAssociatedSendChannel(const ChannelSendInterface* channel) override;
 
+  // Sets a frame transformer between the depacketizer and the decoder, to
+  // transform the received frames before decoding them.
+  void SetDepacketizerToDecoderFrameTransformer(
+      rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer)
+      override;
+
  private:
   void ReceivePacket(const uint8_t* packet,
                      size_t packet_length,
@@ -262,6 +271,8 @@
   webrtc::CryptoOptions crypto_options_;
 
   webrtc::AbsoluteCaptureTimeReceiver absolute_capture_time_receiver_;
+
+  rtc::scoped_refptr<FrameTransformerInterface> frame_transformer_;
 };
 
 void ChannelReceive::OnReceivedPayloadData(
@@ -422,7 +433,8 @@
     rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,
     absl::optional<AudioCodecPairId> codec_pair_id,
     rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor,
-    const webrtc::CryptoOptions& crypto_options)
+    const webrtc::CryptoOptions& crypto_options,
+    rtc::scoped_refptr<FrameTransformerInterface> frame_transformer)
     : event_log_(rtc_event_log),
       rtp_receive_statistics_(ReceiveStatistics::Create(clock)),
       remote_ssrc_(remote_ssrc),
@@ -444,7 +456,8 @@
       associated_send_channel_(nullptr),
       frame_decryptor_(frame_decryptor),
       crypto_options_(crypto_options),
-      absolute_capture_time_receiver_(clock) {
+      absolute_capture_time_receiver_(clock),
+      frame_transformer_(std::move(frame_transformer)) {
   // TODO(nisse): Use _moduleProcessThreadPtr instead?
   module_process_thread_checker_.Detach();
 
@@ -742,6 +755,12 @@
   associated_send_channel_ = channel;
 }
 
+void ChannelReceive::SetDepacketizerToDecoderFrameTransformer(
+    rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer) {
+  RTC_DCHECK(worker_thread_checker_.IsCurrent());
+  frame_transformer_ = std::move(frame_transformer);
+}
+
 NetworkStatistics ChannelReceive::GetNetworkStatistics() const {
   RTC_DCHECK(worker_thread_checker_.IsCurrent());
   NetworkStatistics stats;
@@ -927,13 +946,15 @@
     rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,
     absl::optional<AudioCodecPairId> codec_pair_id,
     rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor,
-    const webrtc::CryptoOptions& crypto_options) {
+    const webrtc::CryptoOptions& crypto_options,
+    rtc::scoped_refptr<FrameTransformerInterface> frame_transformer) {
   return std::make_unique<ChannelReceive>(
       clock, module_process_thread, neteq_factory, audio_device_module,
       rtcp_send_transport, rtc_event_log, local_ssrc, remote_ssrc,
       jitter_buffer_max_packets, jitter_buffer_fast_playout,
       jitter_buffer_min_delay_ms, jitter_buffer_enable_rtx_handling,
-      decoder_factory, codec_pair_id, frame_decryptor, crypto_options);
+      decoder_factory, codec_pair_id, frame_decryptor, crypto_options,
+      std::move(frame_transformer));
 }
 
 }  // namespace voe
diff --git a/audio/channel_receive.h b/audio/channel_receive.h
index 034ac7b..bc02ff3 100644
--- a/audio/channel_receive.h
+++ b/audio/channel_receive.h
@@ -22,6 +22,7 @@
 #include "api/call/audio_sink.h"
 #include "api/call/transport.h"
 #include "api/crypto/crypto_options.h"
+#include "api/frame_transformer_interface.h"
 #include "api/neteq/neteq_factory.h"
 #include "api/transport/rtp/rtp_source.h"
 #include "call/rtp_packet_sink_interface.h"
@@ -137,6 +138,12 @@
   // Used for obtaining RTT for a receive-only channel.
   virtual void SetAssociatedSendChannel(
       const ChannelSendInterface* channel) = 0;
+
+  // Sets a frame transformer between the depacketizer and the decoder, to
+  // transform the received frames before decoding them.
+  virtual void SetDepacketizerToDecoderFrameTransformer(
+      rtc::scoped_refptr<webrtc::FrameTransformerInterface>
+          frame_transformer) = 0;
 };
 
 std::unique_ptr<ChannelReceiveInterface> CreateChannelReceive(
@@ -155,7 +162,8 @@
     rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,
     absl::optional<AudioCodecPairId> codec_pair_id,
     rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor,
-    const webrtc::CryptoOptions& crypto_options);
+    const webrtc::CryptoOptions& crypto_options,
+    rtc::scoped_refptr<FrameTransformerInterface> frame_transformer);
 
 }  // namespace voe
 }  // namespace webrtc
diff --git a/audio/mock_voe_channel_proxy.h b/audio/mock_voe_channel_proxy.h
index 9a013ff..38ad208 100644
--- a/audio/mock_voe_channel_proxy.h
+++ b/audio/mock_voe_channel_proxy.h
@@ -66,6 +66,9 @@
   MOCK_CONST_METHOD0(GetSources, std::vector<RtpSource>());
   MOCK_METHOD0(StartPlayout, void());
   MOCK_METHOD0(StopPlayout, void());
+  MOCK_METHOD1(SetDepacketizerToDecoderFrameTransformer,
+               void(rtc::scoped_refptr<webrtc::FrameTransformerInterface>
+                        frame_transformer));
 };
 
 class MockChannelSend : public voe::ChannelSendInterface {
diff --git a/call/audio_receive_stream.h b/call/audio_receive_stream.h
index 4a50cdb..d4012bf 100644
--- a/call/audio_receive_stream.h
+++ b/call/audio_receive_stream.h
@@ -21,6 +21,7 @@
 #include "api/call/transport.h"
 #include "api/crypto/crypto_options.h"
 #include "api/crypto/frame_decryptor_interface.h"
+#include "api/frame_transformer_interface.h"
 #include "api/rtp_parameters.h"
 #include "api/scoped_refptr.h"
 #include "api/transport/rtp/rtp_source.h"
@@ -150,6 +151,10 @@
     // decrypted in whatever way the caller choses. This is not required by
     // default.
     rtc::scoped_refptr<webrtc::FrameDecryptorInterface> frame_decryptor;
+
+    // An optional frame transformer used by insertable streams to transform
+    // encoded frames.
+    rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer;
   };
 
   // Reconfigure the stream according to the Configuration.
diff --git a/media/engine/webrtc_voice_engine.cc b/media/engine/webrtc_voice_engine.cc
index 972a7ac..42109e8 100644
--- a/media/engine/webrtc_voice_engine.cc
+++ b/media/engine/webrtc_voice_engine.cc
@@ -1063,7 +1063,8 @@
       int jitter_buffer_min_delay_ms,
       bool jitter_buffer_enable_rtx_handling,
       rtc::scoped_refptr<webrtc::FrameDecryptorInterface> frame_decryptor,
-      const webrtc::CryptoOptions& crypto_options)
+      const webrtc::CryptoOptions& crypto_options,
+      rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer)
       : call_(call), config_() {
     RTC_DCHECK(call);
     config_.rtp.remote_ssrc = remote_ssrc;
@@ -1085,6 +1086,7 @@
     config_.codec_pair_id = codec_pair_id;
     config_.frame_decryptor = frame_decryptor;
     config_.crypto_options = crypto_options;
+    config_.frame_transformer = std::move(frame_transformer);
     RecreateAudioReceiveStream();
   }
 
@@ -1215,6 +1217,13 @@
     return rtp_parameters;
   }
 
+  void SetDepacketizerToDecoderFrameTransformer(
+      rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer) {
+    RTC_DCHECK(worker_thread_checker_.IsCurrent());
+    config_.frame_transformer = std::move(frame_transformer);
+    ReconfigureAudioReceiveStream();
+  }
+
  private:
   void RecreateAudioReceiveStream() {
     RTC_DCHECK(worker_thread_checker_.IsCurrent());
@@ -1868,7 +1877,7 @@
                 engine()->audio_jitter_buffer_fast_accelerate_,
                 engine()->audio_jitter_buffer_min_delay_ms_,
                 engine()->audio_jitter_buffer_enable_rtx_handling_,
-                unsignaled_frame_decryptor_, crypto_options_)));
+                unsignaled_frame_decryptor_, crypto_options_, nullptr)));
   recv_streams_[ssrc]->SetPlayout(playout_);
 
   return true;
@@ -2337,6 +2346,20 @@
       std::move(frame_transformer));
 }
 
+void WebRtcVoiceMediaChannel::SetDepacketizerToDecoderFrameTransformer(
+    uint32_t ssrc,
+    rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer) {
+  RTC_DCHECK(worker_thread_checker_.IsCurrent());
+  auto matching_stream = recv_streams_.find(ssrc);
+  if (matching_stream == recv_streams_.end()) {
+    RTC_LOG(LS_INFO) << "Attempting to set frame transformer for SSRC:" << ssrc
+                     << " which doesn't exist.";
+    return;
+  }
+  matching_stream->second->SetDepacketizerToDecoderFrameTransformer(
+      std::move(frame_transformer));
+}
+
 bool WebRtcVoiceMediaChannel::MaybeDeregisterUnsignaledRecvStream(
     uint32_t ssrc) {
   RTC_DCHECK(worker_thread_checker_.IsCurrent());
diff --git a/media/engine/webrtc_voice_engine.h b/media/engine/webrtc_voice_engine.h
index a1f8ff5..86a7a49 100644
--- a/media/engine/webrtc_voice_engine.h
+++ b/media/engine/webrtc_voice_engine.h
@@ -215,6 +215,10 @@
       uint32_t ssrc,
       rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer)
       override;
+  void SetDepacketizerToDecoderFrameTransformer(
+      uint32_t ssrc,
+      rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer)
+      override;
 
   // implements Transport interface
   bool SendRtp(const uint8_t* data,
diff --git a/pc/audio_rtp_receiver.cc b/pc/audio_rtp_receiver.cc
index e83e558..69e8e7b 100644
--- a/pc/audio_rtp_receiver.cc
+++ b/pc/audio_rtp_receiver.cc
@@ -225,6 +225,20 @@
       RTC_FROM_HERE, [&] { return media_channel_->GetSources(*ssrc_); });
 }
 
+void AudioRtpReceiver::SetDepacketizerToDecoderFrameTransformer(
+    rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer) {
+  if (media_channel_ && ssrc_.has_value() && !stopped_) {
+    worker_thread_->Invoke<void>(
+        RTC_FROM_HERE,
+        [this, frame_transformer = std::move(frame_transformer)] {
+          RTC_DCHECK_RUN_ON(worker_thread_);
+          frame_transformer_ = frame_transformer;
+          media_channel_->SetDepacketizerToDecoderFrameTransformer(
+              *ssrc_, frame_transformer);
+        });
+  }
+}
+
 void AudioRtpReceiver::Reconfigure() {
   if (!media_channel_ || stopped_) {
     RTC_LOG(LS_ERROR)
@@ -237,6 +251,16 @@
   // Reattach the frame decryptor if we were reconfigured.
   MaybeAttachFrameDecryptorToMediaChannel(
       ssrc_, worker_thread_, frame_decryptor_, media_channel_, stopped_);
+
+  if (media_channel_ && ssrc_.has_value() && !stopped_) {
+    worker_thread_->Invoke<void>(RTC_FROM_HERE, [this] {
+      RTC_DCHECK_RUN_ON(worker_thread_);
+      if (!frame_transformer_)
+        return;
+      media_channel_->SetDepacketizerToDecoderFrameTransformer(
+          *ssrc_, frame_transformer_);
+    });
+  }
 }
 
 void AudioRtpReceiver::SetObserver(RtpReceiverObserverInterface* observer) {
diff --git a/pc/audio_rtp_receiver.h b/pc/audio_rtp_receiver.h
index 908cb64..88b16ee 100644
--- a/pc/audio_rtp_receiver.h
+++ b/pc/audio_rtp_receiver.h
@@ -104,6 +104,9 @@
 
   std::vector<RtpSource> GetSources() const override;
   int AttachmentId() const override { return attachment_id_; }
+  void SetDepacketizerToDecoderFrameTransformer(
+      rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer)
+      override;
 
  private:
   void RestartMediaChannel(absl::optional<uint32_t> ssrc);
@@ -128,6 +131,8 @@
   // Allows to thread safely change playout delay. Handles caching cases if
   // |SetJitterBufferMinimumDelay| is called before start.
   rtc::scoped_refptr<JitterBufferDelayInterface> delay_;
+  rtc::scoped_refptr<FrameTransformerInterface> frame_transformer_
+      RTC_GUARDED_BY(worker_thread_);
 };
 
 }  // namespace webrtc