Add a deinterleaved float interface to AudioProcessing. This is mainly to support the native audio format in Chrome. Although this implementation just moves the float->int conversion under the hood, we will transition AudioProcessing towards supporting this format throughout. - Add a test which verifies we get identical output with the float and int interfaces. - The float and int wrappers are tasked with conversion to the AudioBuffer format. A new shared Process/Analyze method does most of the work. - Add a new field to the debug.proto to hold deinterleaved data. - Add helpers to audio_utils.cc, and start using numeric_limits. - Note that there was no performance difference between numeric_limits and a literal value when measured on Linux using gcc or clang. BUG=2894 R=aluebs@webrtc.org, bjornv@webrtc.org, henrikg@webrtc.org, tommi@webrtc.org, turaj@webrtc.org, xians@webrtc.org Review URL: https://webrtc-codereview.appspot.com/9179004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@5641 4adac7df-926f-26a2-2b94-8c16560cd09d

commit: 17e40641b30559602e26382e500bd9708bad37e3 [log] [tgz]
author: andrew@webrtc.org <andrew@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d> Tue Mar 04 20:58:13 2014 +0000
committer: andrew@webrtc.org <andrew@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d> Tue Mar 04 20:58:13 2014 +0000
tree: 625e5908bcdc99a70a31b48e857dae9d2d862674
parent: b90991dade9139e5c14c3b616a9eff07b9d6fdda [diff] [blame]
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index 26b2020..272c786 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc

@@ -12,6 +12,7 @@
 
 #include <assert.h>
 
+#include "webrtc/common_audio/include/audio_util.h"
 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 #include "webrtc/modules/audio_processing/audio_buffer.h"
 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h"
@@ -37,8 +38,6 @@
 #endif
 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
 
-static const int kChunkSizeMs = 10;
-
 #define RETURN_ON_ERR(expr)  \
   do {                       \
     int err = expr;          \
@@ -48,6 +47,24 @@
   } while (0)
 
 namespace webrtc {
+namespace {
+
+const int kChunkSizeMs = 10;
+
+int ChannelsFromLayout(AudioProcessing::ChannelLayout layout) {
+  switch (layout) {
+    case AudioProcessing::kMono:
+    case AudioProcessing::kMonoAndKeyboard:
+      return 1;
+    case AudioProcessing::kStereo:
+    case AudioProcessing::kStereoAndKeyboard:
+      return 2;
+  }
+  assert(false);
+  return -1;
+}
+
+}  // namespace
 
 // Throughout webrtc, it's assumed that success is represented by zero.
 COMPILE_ASSERT(AudioProcessing::kNoError == 0, no_error_must_be_zero);
@@ -299,6 +316,8 @@
   return output_will_be_muted_;
 }
 
+// Calls InitializeLocked() if any of the audio parameters have changed from
+// their current values.
 int AudioProcessingImpl::MaybeInitializeLocked(int sample_rate_hz,
     int num_input_channels, int num_output_channels, int num_reverse_channels) {
   if (sample_rate_hz == sample_rate_hz_ &&
@@ -342,15 +361,62 @@
   return InitializeLocked();
 }
 
-int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
+int AudioProcessingImpl::ProcessStream(float* const* data,
+                                       int samples_per_channel,
+                                       int sample_rate_hz,
+                                       ChannelLayout input_layout,
+                                       ChannelLayout output_layout) {
   CriticalSectionScoped crit_scoped(crit_);
-  int err = kNoError;
-
-  if (frame == NULL) {
+  if (!data) {
     return kNullPointerError;
   }
+
+  const int num_input_channels = ChannelsFromLayout(input_layout);
   // TODO(ajm): We now always set the output channels equal to the input
-  // channels here. Remove the ability to downmix entirely.
+  // channels here. Restore the ability to downmix.
+  RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz,
+      num_input_channels, num_input_channels, num_reverse_channels_));
+  if (samples_per_channel != samples_per_channel_) {
+    return kBadDataLengthError;
+  }
+
+#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
+  if (debug_file_->Open()) {
+    event_msg_->set_type(audioproc::Event::STREAM);
+    audioproc::Stream* msg = event_msg_->mutable_stream();
+    const size_t channel_size = sizeof(float) * samples_per_channel;
+    for (int i = 0; i < num_input_channels; ++i)
+      msg->set_input_channel(i, data[i], channel_size);
+  }
+#endif
+
+  capture_audio_->CopyFrom(data, samples_per_channel, num_output_channels_);
+  RETURN_ON_ERR(ProcessStreamLocked());
+  if (output_copy_needed(is_data_processed())) {
+    capture_audio_->CopyTo(samples_per_channel, num_output_channels_, data);
+  }
+
+#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
+  if (debug_file_->Open()) {
+    audioproc::Stream* msg = event_msg_->mutable_stream();
+    const size_t channel_size = sizeof(float) * samples_per_channel;
+    for (int i = 0; i < num_output_channels_; ++i)
+      msg->set_output_channel(i, data[i], channel_size);
+    RETURN_ON_ERR(WriteMessageToDebugFile());
+  }
+#endif
+
+  return kNoError;
+}
+
+int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
+  CriticalSectionScoped crit_scoped(crit_);
+  if (!frame) {
+    return kNullPointerError;
+  }
+
+  // TODO(ajm): We now always set the output channels equal to the input
+  // channels here. Restore the ability to downmix.
   RETURN_ON_ERR(MaybeInitializeLocked(frame->sample_rate_hz_,
       frame->num_channels_, frame->num_channels_, num_reverse_channels_));
   if (frame->samples_per_channel_ != samples_per_channel_) {
@@ -365,6 +431,36 @@
                              frame->samples_per_channel_ *
                              frame->num_channels_;
     msg->set_input_data(frame->data_, data_size);
+  }
+#endif
+
+  capture_audio_->DeinterleaveFrom(frame);
+  if (num_output_channels_ < num_input_channels_) {
+    capture_audio_->Mix(num_output_channels_);
+    frame->num_channels_ = num_output_channels_;
+  }
+  RETURN_ON_ERR(ProcessStreamLocked());
+  capture_audio_->InterleaveTo(frame, output_copy_needed(is_data_processed()));
+
+#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
+  if (debug_file_->Open()) {
+    audioproc::Stream* msg = event_msg_->mutable_stream();
+    const size_t data_size = sizeof(int16_t) *
+                             frame->samples_per_channel_ *
+                             frame->num_channels_;
+    msg->set_output_data(frame->data_, data_size);
+    RETURN_ON_ERR(WriteMessageToDebugFile());
+  }
+#endif
+
+  return kNoError;
+}
+
+
+int AudioProcessingImpl::ProcessStreamLocked() {
+#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
+  if (debug_file_->Open()) {
+    audioproc::Stream* msg = event_msg_->mutable_stream();
     msg->set_delay(stream_delay_ms_);
     msg->set_drift(echo_cancellation_->stream_drift_samples());
     msg->set_level(gain_control_->stream_analog_level());
@@ -372,14 +468,6 @@
   }
 #endif
 
-  capture_audio_->DeinterleaveFrom(frame);
-
-  // TODO(ajm): experiment with mixing and AEC placement.
-  if (num_output_channels_ < num_input_channels_) {
-    capture_audio_->Mix(num_output_channels_);
-    frame->num_channels_ = num_output_channels_;
-  }
-
   bool data_processed = is_data_processed();
   if (analysis_needed(data_processed)) {
     for (int i = 0; i < num_output_channels_; i++) {
@@ -393,45 +481,18 @@
     }
   }
 
-  err = high_pass_filter_->ProcessCaptureAudio(capture_audio_);
-  if (err != kNoError) {
-    return err;
-  }
-
-  err = gain_control_->AnalyzeCaptureAudio(capture_audio_);
-  if (err != kNoError) {
-    return err;
-  }
-
-  err = echo_cancellation_->ProcessCaptureAudio(capture_audio_);
-  if (err != kNoError) {
-    return err;
-  }
+  RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(capture_audio_));
+  RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(capture_audio_));
+  RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(capture_audio_));
 
   if (echo_control_mobile_->is_enabled() &&
       noise_suppression_->is_enabled()) {
     capture_audio_->CopyLowPassToReference();
   }
-
-  err = noise_suppression_->ProcessCaptureAudio(capture_audio_);
-  if (err != kNoError) {
-    return err;
-  }
-
-  err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_);
-  if (err != kNoError) {
-    return err;
-  }
-
-  err = voice_detection_->ProcessCaptureAudio(capture_audio_);
-  if (err != kNoError) {
-    return err;
-  }
-
-  err = gain_control_->ProcessCaptureAudio(capture_audio_);
-  if (err != kNoError) {
-    return err;
-  }
+  RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(capture_audio_));
+  RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(capture_audio_));
+  RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(capture_audio_));
+  RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(capture_audio_));
 
   if (synthesis_needed(data_processed)) {
     for (int i = 0; i < num_output_channels_; i++) {
@@ -446,38 +507,48 @@
   }
 
   // The level estimator operates on the recombined data.
-  err = level_estimator_->ProcessStream(capture_audio_);
-  if (err != kNoError) {
-    return err;
-  }
-
-  capture_audio_->InterleaveTo(frame, interleave_needed(data_processed));
-
-#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
-  if (debug_file_->Open()) {
-    audioproc::Stream* msg = event_msg_->mutable_stream();
-    const size_t data_size = sizeof(int16_t) *
-                             frame->samples_per_channel_ *
-                             frame->num_channels_;
-    msg->set_output_data(frame->data_, data_size);
-    err = WriteMessageToDebugFile();
-    if (err != kNoError) {
-      return err;
-    }
-  }
-#endif
+  RETURN_ON_ERR(level_estimator_->ProcessStream(capture_audio_));
 
   was_stream_delay_set_ = false;
   return kNoError;
 }
 
-// TODO(ajm): Have AnalyzeReverseStream accept sample rates not matching the
-// primary stream and convert ourselves rather than having the user manage it.
-// We can be smarter and use the splitting filter when appropriate. Similarly,
-// perform downmixing here.
+int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
+                                              int samples_per_channel,
+                                              int sample_rate_hz,
+                                              ChannelLayout layout) {
+  CriticalSectionScoped crit_scoped(crit_);
+  if (data == NULL) {
+    return kNullPointerError;
+  }
+  if (sample_rate_hz != sample_rate_hz_) {
+    return kBadSampleRateError;
+  }
+
+  const int num_channels = ChannelsFromLayout(layout);
+  RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz_, num_input_channels_,
+      num_output_channels_, num_channels));
+  if (samples_per_channel != samples_per_channel_) {
+    return kBadDataLengthError;
+  }
+
+#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
+  if (debug_file_->Open()) {
+    event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
+    audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
+    const size_t channel_size = sizeof(float) * samples_per_channel;
+    for (int i = 0; i < num_channels; ++i)
+      msg->set_channel(i, data[i], channel_size);
+    RETURN_ON_ERR(WriteMessageToDebugFile());
+  }
+#endif
+
+  render_audio_->CopyFrom(data, samples_per_channel, num_channels);
+  return AnalyzeReverseStreamLocked();
+}
+
 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
   CriticalSectionScoped crit_scoped(crit_);
-  int err = kNoError;
   if (frame == NULL) {
     return kNullPointerError;
   }
@@ -486,6 +557,9 @@
   }
   RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz_, num_input_channels_,
       num_output_channels_, frame->num_channels_));
+  if (frame->samples_per_channel_ != samples_per_channel_) {
+    return kBadDataLengthError;
+  }
 
 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
   if (debug_file_->Open()) {
@@ -495,15 +569,19 @@
                              frame->samples_per_channel_ *
                              frame->num_channels_;
     msg->set_data(frame->data_, data_size);
-    err = WriteMessageToDebugFile();
-    if (err != kNoError) {
-      return err;
-    }
+    RETURN_ON_ERR(WriteMessageToDebugFile());
   }
 #endif
 
   render_audio_->DeinterleaveFrom(frame);
+  return AnalyzeReverseStreamLocked();
+}
 
+// TODO(ajm): Have AnalyzeReverseStream accept sample rates not matching the
+// primary stream and convert ourselves rather than having the user manage it.
+// We can be smarter and use the splitting filter when appropriate. Similarly,
+// perform downmixing here.
+int AudioProcessingImpl::AnalyzeReverseStreamLocked() {
   if (sample_rate_hz_ == kSampleRate32kHz) {
     for (int i = 0; i < num_reverse_channels_; i++) {
       // Split into low and high band.
@@ -516,23 +594,11 @@
     }
   }
 
-  // TODO(ajm): warnings possible from components?
-  err = echo_cancellation_->ProcessRenderAudio(render_audio_);
-  if (err != kNoError) {
-    return err;
-  }
+  RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(render_audio_));
+  RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(render_audio_));
+  RETURN_ON_ERR(gain_control_->ProcessRenderAudio(render_audio_));
 
-  err = echo_control_mobile_->ProcessRenderAudio(render_audio_);
-  if (err != kNoError) {
-    return err;
-  }
-
-  err = gain_control_->ProcessRenderAudio(render_audio_);
-  if (err != kNoError) {
-    return err;
-  }
-
-  return err;  // TODO(ajm): this is for returning warnings; necessary?
+  return kNoError;
 }
 
 int AudioProcessingImpl::set_stream_delay_ms(int delay) {
@@ -563,6 +629,14 @@
   return was_stream_delay_set_;
 }
 
+void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) {
+  key_pressed_ = key_pressed;
+}
+
+bool AudioProcessingImpl::stream_key_pressed() const {
+  return key_pressed_;
+}
+
 void AudioProcessingImpl::set_delay_offset_ms(int offset) {
   CriticalSectionScoped crit_scoped(crit_);
   delay_offset_ms_ = offset;
@@ -572,14 +646,6 @@
   return delay_offset_ms_;
 }
 
-void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) {
-  key_pressed_ = key_pressed;
-}
-
-bool AudioProcessingImpl::stream_key_pressed() const {
-  return key_pressed_;
-}
-
 int AudioProcessingImpl::StartDebugRecording(
     const char filename[AudioProcessing::kMaxFilenameSize]) {
   CriticalSectionScoped crit_scoped(crit_);
@@ -710,7 +776,7 @@
   return true;
 }
 
-bool AudioProcessingImpl::interleave_needed(bool is_data_processed) const {
+bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const {
   // Check if we've upmixed or downmixed the audio.
   return (num_output_channels_ != num_input_channels_ || is_data_processed);
 }
@@ -755,7 +821,7 @@
 
   event_msg_->Clear();
 
-  return 0;
+  return kNoError;
 }
 
 int AudioProcessingImpl::WriteInitMessage() {
commit	17e40641b30559602e26382e500bd9708bad37e3	[log] [tgz]
author	andrew@webrtc.org <andrew@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d>	Tue Mar 04 20:58:13 2014 +0000
committer	andrew@webrtc.org <andrew@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d>	Tue Mar 04 20:58:13 2014 +0000
tree	625e5908bcdc99a70a31b48e857dae9d2d862674
parent	b90991dade9139e5c14c3b616a9eff07b9d6fdda [diff] [blame]