APM: Replace most usages of AudioFrame with a stream interface

This CL creates a new stream interface and uses it to replace
most of the usage of AudioFrame in the non-test code.

The CL changes some of the test code as well, as the other
changes required that.

The CL will be followed by 2 more related CLs.

Bug: webrtc:5298
Change-Id: I5cfbe6079f30fc3fbf35b35fd077b6fb49c7def0
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/170040
Commit-Queue: Per Åhgren <peah@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#30799}
diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index b63fa70..6f85aa9 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@@ -372,6 +372,8 @@
     kStereoAndKeyboard
   };
 
+  enum class VoiceDetectionResult { kNotAvailable, kDetected, kNotDetected };
+
   // Specifies the properties of a setting to be passed to AudioProcessing at
   // runtime.
   class RuntimeSetting {
@@ -538,6 +540,15 @@
   // method, it will trigger an initialization.
   virtual int ProcessStream(AudioFrame* frame) = 0;
 
+  // Accepts and produces a 10 ms frame interleaved 16 bit integer audio as
+  // specified in |input_config| and |output_config|. |src| and |dest| may use
+  // the same memory, if desired.
+  virtual int ProcessStream(const int16_t* const src,
+                            const StreamConfig& input_config,
+                            const StreamConfig& output_config,
+                            int16_t* const dest,
+                            VoiceDetectionResult* vad_result) = 0;
+
   // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
   // |src| points to a channel buffer, arranged according to |input_stream|. At
   // output, the channels will be arranged according to |output_stream| in
@@ -564,6 +575,14 @@
   // members of |frame| must be valid.
   virtual int ProcessReverseStream(AudioFrame* frame) = 0;
 
+  // Accepts and produces a 10 ms frame of interleaved 16 bit integer audio for
+  // the reverse direction audio stream as specified in |input_config| and
+  // |output_config|. |src| and |dest| may use the same memory, if desired.
+  virtual int ProcessReverseStream(const int16_t* const src,
+                                   const StreamConfig& input_config,
+                                   const StreamConfig& output_config,
+                                   int16_t* const dest) = 0;
+
   // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
   // |data| points to a channel buffer, arranged according to |reverse_config|.
   virtual int ProcessReverseStream(const float* const* src,