Change NetEq::GetAudio to use AudioFrame

With this change, NetEq now uses AudioFrame as output type, like the
surrounding functions in ACM and VoiceEngine already do.

The computational savings is probably slim, since one memcpy is
removed while another one is added (both in AcmReceiver::GetAudio).

More simplifications and clean-up will be done in
AcmReceiver::GetAudio in future CLs.

BUG=webrtc:5607

Review URL: https://codereview.webrtc.org/1750353002

Cr-Commit-Position: refs/heads/master@{#11874}
diff --git a/webrtc/modules/audio_coding/acm2/acm_receive_test_oldapi.cc b/webrtc/modules/audio_coding/acm2/acm_receive_test_oldapi.cc
index ec3f4fc..e4a34a7 100644
--- a/webrtc/modules/audio_coding/acm2/acm_receive_test_oldapi.cc
+++ b/webrtc/modules/audio_coding/acm2/acm_receive_test_oldapi.cc
@@ -159,7 +159,7 @@
     while (clock_.TimeInMilliseconds() < packet->time_ms()) {
       AudioFrame output_frame;
       EXPECT_EQ(0, acm_->PlayoutData10Ms(output_freq_hz_, &output_frame));
-      EXPECT_EQ(output_freq_hz_, output_frame.sample_rate_hz_);
+      ASSERT_EQ(output_freq_hz_, output_frame.sample_rate_hz_);
       const size_t samples_per_block =
           static_cast<size_t>(output_freq_hz_ * 10 / 1000);
       EXPECT_EQ(samples_per_block, output_frame.samples_per_channel_);
diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver.cc b/webrtc/modules/audio_coding/acm2/acm_receiver.cc
index 39d8a0d..02d165a 100644
--- a/webrtc/modules/audio_coding/acm2/acm_receiver.cc
+++ b/webrtc/modules/audio_coding/acm2/acm_receiver.cc
@@ -18,6 +18,7 @@
 #include "webrtc/base/checks.h"
 #include "webrtc/base/format_macros.h"
 #include "webrtc/base/logging.h"
+#include "webrtc/base/safe_conversions.h"
 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 #include "webrtc/common_types.h"
 #include "webrtc/modules/audio_coding/codecs/audio_decoder.h"
@@ -120,14 +121,12 @@
 AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config)
     : last_audio_decoder_(nullptr),
       previous_audio_activity_(AudioFrame::kVadPassive),
-      audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
       last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
       neteq_(NetEq::Create(config.neteq_config)),
       vad_enabled_(config.neteq_config.enable_post_decode_vad),
       clock_(config.clock),
       resampled_last_output_frame_(true) {
   assert(clock_);
-  memset(audio_buffer_.get(), 0, AudioFrame::kMaxDataSizeSamples);
   memset(last_audio_buffer_.get(), 0, AudioFrame::kMaxDataSizeSamples);
 }
 
@@ -208,19 +207,11 @@
 }
 
 int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) {
-  enum NetEqOutputType type;
-  size_t samples_per_channel;
-  size_t num_channels;
-
   // Accessing members, take the lock.
   rtc::CritScope lock(&crit_sect_);
 
-  // Always write the output to |audio_buffer_| first.
-  if (neteq_->GetAudio(AudioFrame::kMaxDataSizeSamples,
-                       audio_buffer_.get(),
-                       &samples_per_channel,
-                       &num_channels,
-                       &type) != NetEq::kOK) {
+  enum NetEqOutputType type;
+  if (neteq_->GetAudio(audio_frame, &type) != NetEq::kOK) {
     LOG(LERROR) << "AcmReceiver::GetAudio - NetEq Failed.";
     return -1;
   }
@@ -236,44 +227,42 @@
     int16_t temp_output[AudioFrame::kMaxDataSizeSamples];
     int samples_per_channel_int = resampler_.Resample10Msec(
         last_audio_buffer_.get(), current_sample_rate_hz, desired_freq_hz,
-        num_channels, AudioFrame::kMaxDataSizeSamples, temp_output);
+        audio_frame->num_channels_, AudioFrame::kMaxDataSizeSamples,
+        temp_output);
     if (samples_per_channel_int < 0) {
       LOG(LERROR) << "AcmReceiver::GetAudio - "
                      "Resampling last_audio_buffer_ failed.";
       return -1;
     }
-    samples_per_channel = static_cast<size_t>(samples_per_channel_int);
   }
 
-  // The audio in |audio_buffer_| is tansferred to |audio_frame_| below, either
-  // through resampling, or through straight memcpy.
   // TODO(henrik.lundin) Glitches in the output may appear if the output rate
   // from NetEq changes. See WebRTC issue 3923.
   if (need_resampling) {
     int samples_per_channel_int = resampler_.Resample10Msec(
-        audio_buffer_.get(), current_sample_rate_hz, desired_freq_hz,
-        num_channels, AudioFrame::kMaxDataSizeSamples, audio_frame->data_);
+        audio_frame->data_, current_sample_rate_hz, desired_freq_hz,
+        audio_frame->num_channels_, AudioFrame::kMaxDataSizeSamples,
+        audio_frame->data_);
     if (samples_per_channel_int < 0) {
       LOG(LERROR) << "AcmReceiver::GetAudio - Resampling audio_buffer_ failed.";
       return -1;
     }
-    samples_per_channel = static_cast<size_t>(samples_per_channel_int);
+    audio_frame->samples_per_channel_ =
+        static_cast<size_t>(samples_per_channel_int);
+    audio_frame->sample_rate_hz_ = desired_freq_hz;
+    RTC_DCHECK_EQ(
+        audio_frame->sample_rate_hz_,
+        rtc::checked_cast<int>(audio_frame->samples_per_channel_ * 100));
     resampled_last_output_frame_ = true;
   } else {
     resampled_last_output_frame_ = false;
     // We might end up here ONLY if codec is changed.
-    memcpy(audio_frame->data_,
-           audio_buffer_.get(),
-           samples_per_channel * num_channels * sizeof(int16_t));
   }
 
-  // Swap buffers, so that the current audio is stored in |last_audio_buffer_|
-  // for next time.
-  audio_buffer_.swap(last_audio_buffer_);
-
-  audio_frame->num_channels_ = num_channels;
-  audio_frame->samples_per_channel_ = samples_per_channel;
-  audio_frame->sample_rate_hz_ = static_cast<int>(samples_per_channel * 100);
+  // Store current audio in |last_audio_buffer_| for next time.
+  memcpy(last_audio_buffer_.get(), audio_frame->data_,
+         sizeof(int16_t) * audio_frame->samples_per_channel_ *
+             audio_frame->num_channels_);
 
   // Should set |vad_activity| before calling SetAudioFrameActivityAndType().
   audio_frame->vad_activity_ = previous_audio_activity_;
@@ -284,6 +273,7 @@
   // Computes the RTP timestamp of the first sample in |audio_frame| from
   // |GetPlayoutTimestamp|, which is the timestamp of the last sample of
   // |audio_frame|.
+  // TODO(henrik.lundin) Move setting of audio_frame->timestamp_ inside NetEq.
   uint32_t playout_timestamp = 0;
   if (GetPlayoutTimestamp(&playout_timestamp)) {
     audio_frame->timestamp_ = playout_timestamp -
diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver.h b/webrtc/modules/audio_coding/acm2/acm_receiver.h
index 476f29d..ae3969b 100644
--- a/webrtc/modules/audio_coding/acm2/acm_receiver.h
+++ b/webrtc/modules/audio_coding/acm2/acm_receiver.h
@@ -280,9 +280,6 @@
   const Decoder* last_audio_decoder_ GUARDED_BY(crit_sect_);
   AudioFrame::VADActivity previous_audio_activity_ GUARDED_BY(crit_sect_);
   ACMResampler resampler_ GUARDED_BY(crit_sect_);
-  // Used in GetAudio, declared as member to avoid allocating every 10ms.
-  // TODO(henrik.lundin) Stack-allocate in GetAudio instead?
-  std::unique_ptr<int16_t[]> audio_buffer_ GUARDED_BY(crit_sect_);
   std::unique_ptr<int16_t[]> last_audio_buffer_ GUARDED_BY(crit_sect_);
   CallStatistics call_stats_ GUARDED_BY(crit_sect_);
   NetEq* neteq_;