Add a deinterleaved float interface to AudioProcessing.
This is mainly to support the native audio format in Chrome. Although
this implementation just moves the float->int conversion under the hood,
we will transition AudioProcessing towards supporting this format
throughout.
- Add a test which verifies we get identical output with the float and
int interfaces.
- The float and int wrappers are tasked with conversion to the
AudioBuffer format. A new shared Process/Analyze method does most of
the work.
- Add a new field to the debug.proto to hold deinterleaved data.
- Add helpers to audio_utils.cc, and start using numeric_limits.
- Note that there was no performance difference between numeric_limits
and a literal value when measured on Linux using gcc or clang.
BUG=2894
R=aluebs@webrtc.org, bjornv@webrtc.org, henrikg@webrtc.org, tommi@webrtc.org, turaj@webrtc.org, xians@webrtc.org
Review URL: https://webrtc-codereview.appspot.com/9179004
git-svn-id: http://webrtc.googlecode.com/svn/trunk@5641 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index 26b2020..272c786 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -12,6 +12,7 @@
#include <assert.h>
+#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/modules/audio_processing/echo_cancellation_impl.h"
@@ -37,8 +38,6 @@
#endif
#endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
-static const int kChunkSizeMs = 10;
-
#define RETURN_ON_ERR(expr) \
do { \
int err = expr; \
@@ -48,6 +47,24 @@
} while (0)
namespace webrtc {
+namespace {
+
+const int kChunkSizeMs = 10;
+
+int ChannelsFromLayout(AudioProcessing::ChannelLayout layout) {
+ switch (layout) {
+ case AudioProcessing::kMono:
+ case AudioProcessing::kMonoAndKeyboard:
+ return 1;
+ case AudioProcessing::kStereo:
+ case AudioProcessing::kStereoAndKeyboard:
+ return 2;
+ }
+ assert(false);
+ return -1;
+}
+
+} // namespace
// Throughout webrtc, it's assumed that success is represented by zero.
COMPILE_ASSERT(AudioProcessing::kNoError == 0, no_error_must_be_zero);
@@ -299,6 +316,8 @@
return output_will_be_muted_;
}
+// Calls InitializeLocked() if any of the audio parameters have changed from
+// their current values.
int AudioProcessingImpl::MaybeInitializeLocked(int sample_rate_hz,
int num_input_channels, int num_output_channels, int num_reverse_channels) {
if (sample_rate_hz == sample_rate_hz_ &&
@@ -342,15 +361,62 @@
return InitializeLocked();
}
-int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
+int AudioProcessingImpl::ProcessStream(float* const* data,
+ int samples_per_channel,
+ int sample_rate_hz,
+ ChannelLayout input_layout,
+ ChannelLayout output_layout) {
CriticalSectionScoped crit_scoped(crit_);
- int err = kNoError;
-
- if (frame == NULL) {
+ if (!data) {
return kNullPointerError;
}
+
+ const int num_input_channels = ChannelsFromLayout(input_layout);
// TODO(ajm): We now always set the output channels equal to the input
- // channels here. Remove the ability to downmix entirely.
+ // channels here. Restore the ability to downmix.
+ RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz,
+ num_input_channels, num_input_channels, num_reverse_channels_));
+ if (samples_per_channel != samples_per_channel_) {
+ return kBadDataLengthError;
+ }
+
+#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
+ if (debug_file_->Open()) {
+ event_msg_->set_type(audioproc::Event::STREAM);
+ audioproc::Stream* msg = event_msg_->mutable_stream();
+ const size_t channel_size = sizeof(float) * samples_per_channel;
+ for (int i = 0; i < num_input_channels; ++i)
+ msg->set_input_channel(i, data[i], channel_size);
+ }
+#endif
+
+ capture_audio_->CopyFrom(data, samples_per_channel, num_output_channels_);
+ RETURN_ON_ERR(ProcessStreamLocked());
+ if (output_copy_needed(is_data_processed())) {
+ capture_audio_->CopyTo(samples_per_channel, num_output_channels_, data);
+ }
+
+#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
+ if (debug_file_->Open()) {
+ audioproc::Stream* msg = event_msg_->mutable_stream();
+ const size_t channel_size = sizeof(float) * samples_per_channel;
+ for (int i = 0; i < num_output_channels_; ++i)
+ msg->set_output_channel(i, data[i], channel_size);
+ RETURN_ON_ERR(WriteMessageToDebugFile());
+ }
+#endif
+
+ return kNoError;
+}
+
+int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
+ CriticalSectionScoped crit_scoped(crit_);
+ if (!frame) {
+ return kNullPointerError;
+ }
+
+ // TODO(ajm): We now always set the output channels equal to the input
+ // channels here. Restore the ability to downmix.
RETURN_ON_ERR(MaybeInitializeLocked(frame->sample_rate_hz_,
frame->num_channels_, frame->num_channels_, num_reverse_channels_));
if (frame->samples_per_channel_ != samples_per_channel_) {
@@ -365,6 +431,36 @@
frame->samples_per_channel_ *
frame->num_channels_;
msg->set_input_data(frame->data_, data_size);
+ }
+#endif
+
+ capture_audio_->DeinterleaveFrom(frame);
+ if (num_output_channels_ < num_input_channels_) {
+ capture_audio_->Mix(num_output_channels_);
+ frame->num_channels_ = num_output_channels_;
+ }
+ RETURN_ON_ERR(ProcessStreamLocked());
+ capture_audio_->InterleaveTo(frame, output_copy_needed(is_data_processed()));
+
+#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
+ if (debug_file_->Open()) {
+ audioproc::Stream* msg = event_msg_->mutable_stream();
+ const size_t data_size = sizeof(int16_t) *
+ frame->samples_per_channel_ *
+ frame->num_channels_;
+ msg->set_output_data(frame->data_, data_size);
+ RETURN_ON_ERR(WriteMessageToDebugFile());
+ }
+#endif
+
+ return kNoError;
+}
+
+
+int AudioProcessingImpl::ProcessStreamLocked() {
+#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
+ if (debug_file_->Open()) {
+ audioproc::Stream* msg = event_msg_->mutable_stream();
msg->set_delay(stream_delay_ms_);
msg->set_drift(echo_cancellation_->stream_drift_samples());
msg->set_level(gain_control_->stream_analog_level());
@@ -372,14 +468,6 @@
}
#endif
- capture_audio_->DeinterleaveFrom(frame);
-
- // TODO(ajm): experiment with mixing and AEC placement.
- if (num_output_channels_ < num_input_channels_) {
- capture_audio_->Mix(num_output_channels_);
- frame->num_channels_ = num_output_channels_;
- }
-
bool data_processed = is_data_processed();
if (analysis_needed(data_processed)) {
for (int i = 0; i < num_output_channels_; i++) {
@@ -393,45 +481,18 @@
}
}
- err = high_pass_filter_->ProcessCaptureAudio(capture_audio_);
- if (err != kNoError) {
- return err;
- }
-
- err = gain_control_->AnalyzeCaptureAudio(capture_audio_);
- if (err != kNoError) {
- return err;
- }
-
- err = echo_cancellation_->ProcessCaptureAudio(capture_audio_);
- if (err != kNoError) {
- return err;
- }
+ RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(capture_audio_));
+ RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(capture_audio_));
+ RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(capture_audio_));
if (echo_control_mobile_->is_enabled() &&
noise_suppression_->is_enabled()) {
capture_audio_->CopyLowPassToReference();
}
-
- err = noise_suppression_->ProcessCaptureAudio(capture_audio_);
- if (err != kNoError) {
- return err;
- }
-
- err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_);
- if (err != kNoError) {
- return err;
- }
-
- err = voice_detection_->ProcessCaptureAudio(capture_audio_);
- if (err != kNoError) {
- return err;
- }
-
- err = gain_control_->ProcessCaptureAudio(capture_audio_);
- if (err != kNoError) {
- return err;
- }
+ RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(capture_audio_));
+ RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(capture_audio_));
+ RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(capture_audio_));
+ RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(capture_audio_));
if (synthesis_needed(data_processed)) {
for (int i = 0; i < num_output_channels_; i++) {
@@ -446,38 +507,48 @@
}
// The level estimator operates on the recombined data.
- err = level_estimator_->ProcessStream(capture_audio_);
- if (err != kNoError) {
- return err;
- }
-
- capture_audio_->InterleaveTo(frame, interleave_needed(data_processed));
-
-#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
- if (debug_file_->Open()) {
- audioproc::Stream* msg = event_msg_->mutable_stream();
- const size_t data_size = sizeof(int16_t) *
- frame->samples_per_channel_ *
- frame->num_channels_;
- msg->set_output_data(frame->data_, data_size);
- err = WriteMessageToDebugFile();
- if (err != kNoError) {
- return err;
- }
- }
-#endif
+ RETURN_ON_ERR(level_estimator_->ProcessStream(capture_audio_));
was_stream_delay_set_ = false;
return kNoError;
}
-// TODO(ajm): Have AnalyzeReverseStream accept sample rates not matching the
-// primary stream and convert ourselves rather than having the user manage it.
-// We can be smarter and use the splitting filter when appropriate. Similarly,
-// perform downmixing here.
+int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
+ int samples_per_channel,
+ int sample_rate_hz,
+ ChannelLayout layout) {
+ CriticalSectionScoped crit_scoped(crit_);
+ if (data == NULL) {
+ return kNullPointerError;
+ }
+ if (sample_rate_hz != sample_rate_hz_) {
+ return kBadSampleRateError;
+ }
+
+ const int num_channels = ChannelsFromLayout(layout);
+ RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz_, num_input_channels_,
+ num_output_channels_, num_channels));
+ if (samples_per_channel != samples_per_channel_) {
+ return kBadDataLengthError;
+ }
+
+#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
+ if (debug_file_->Open()) {
+ event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
+ audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
+ const size_t channel_size = sizeof(float) * samples_per_channel;
+ for (int i = 0; i < num_channels; ++i)
+ msg->set_channel(i, data[i], channel_size);
+ RETURN_ON_ERR(WriteMessageToDebugFile());
+ }
+#endif
+
+ render_audio_->CopyFrom(data, samples_per_channel, num_channels);
+ return AnalyzeReverseStreamLocked();
+}
+
int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
CriticalSectionScoped crit_scoped(crit_);
- int err = kNoError;
if (frame == NULL) {
return kNullPointerError;
}
@@ -486,6 +557,9 @@
}
RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz_, num_input_channels_,
num_output_channels_, frame->num_channels_));
+ if (frame->samples_per_channel_ != samples_per_channel_) {
+ return kBadDataLengthError;
+ }
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_file_->Open()) {
@@ -495,15 +569,19 @@
frame->samples_per_channel_ *
frame->num_channels_;
msg->set_data(frame->data_, data_size);
- err = WriteMessageToDebugFile();
- if (err != kNoError) {
- return err;
- }
+ RETURN_ON_ERR(WriteMessageToDebugFile());
}
#endif
render_audio_->DeinterleaveFrom(frame);
+ return AnalyzeReverseStreamLocked();
+}
+// TODO(ajm): Have AnalyzeReverseStream accept sample rates not matching the
+// primary stream and convert ourselves rather than having the user manage it.
+// We can be smarter and use the splitting filter when appropriate. Similarly,
+// perform downmixing here.
+int AudioProcessingImpl::AnalyzeReverseStreamLocked() {
if (sample_rate_hz_ == kSampleRate32kHz) {
for (int i = 0; i < num_reverse_channels_; i++) {
// Split into low and high band.
@@ -516,23 +594,11 @@
}
}
- // TODO(ajm): warnings possible from components?
- err = echo_cancellation_->ProcessRenderAudio(render_audio_);
- if (err != kNoError) {
- return err;
- }
+ RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(render_audio_));
+ RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(render_audio_));
+ RETURN_ON_ERR(gain_control_->ProcessRenderAudio(render_audio_));
- err = echo_control_mobile_->ProcessRenderAudio(render_audio_);
- if (err != kNoError) {
- return err;
- }
-
- err = gain_control_->ProcessRenderAudio(render_audio_);
- if (err != kNoError) {
- return err;
- }
-
- return err; // TODO(ajm): this is for returning warnings; necessary?
+ return kNoError;
}
int AudioProcessingImpl::set_stream_delay_ms(int delay) {
@@ -563,6 +629,14 @@
return was_stream_delay_set_;
}
+void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) {
+ key_pressed_ = key_pressed;
+}
+
+bool AudioProcessingImpl::stream_key_pressed() const {
+ return key_pressed_;
+}
+
void AudioProcessingImpl::set_delay_offset_ms(int offset) {
CriticalSectionScoped crit_scoped(crit_);
delay_offset_ms_ = offset;
@@ -572,14 +646,6 @@
return delay_offset_ms_;
}
-void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) {
- key_pressed_ = key_pressed;
-}
-
-bool AudioProcessingImpl::stream_key_pressed() const {
- return key_pressed_;
-}
-
int AudioProcessingImpl::StartDebugRecording(
const char filename[AudioProcessing::kMaxFilenameSize]) {
CriticalSectionScoped crit_scoped(crit_);
@@ -710,7 +776,7 @@
return true;
}
-bool AudioProcessingImpl::interleave_needed(bool is_data_processed) const {
+bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const {
// Check if we've upmixed or downmixed the audio.
return (num_output_channels_ != num_input_channels_ || is_data_processed);
}
@@ -755,7 +821,7 @@
event_msg_->Clear();
- return 0;
+ return kNoError;
}
int AudioProcessingImpl::WriteInitMessage() {