Revert "Simplification and refactoring of the AudioBuffer code"
This reverts commit 81c0cf287c8514cb1cd6f3baca484d668c6eb128.
Reason for revert: internal test failures
Original change's description:
> Simplification and refactoring of the AudioBuffer code
>
> This CL performs a major refactoring and simplification
> of the AudioBuffer code that.
> -Removes 7 of the 9 internal buffers of the AudioBuffer.
> -Avoids the implicit copying required to keep the
> internal buffers in sync.
> -Removes all code relating to handling of fixed-point
> sample data in the AudioBuffer.
> -Changes the naming of the class methods to reflect
> that only floating point is handled.
> -Corrects some bugs in the code.
> -Extends the handling of internal downmixing to be
> more generic.
>
> Bug: webrtc:10882
> Change-Id: I12c8af156fbe366b154744a0a1b3d926bf7be572
> Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/149828
> Commit-Queue: Per Ã…hgren <peah@webrtc.org>
> Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#28928}
TBR=gustaf@webrtc.org,peah@webrtc.org
Change-Id: I2729e3ad24b3a9b40b368b84cb565c859e79b51e
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: webrtc:10882
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/150084
Reviewed-by: Steve Anton <steveanton@webrtc.org>
Commit-Queue: Steve Anton <steveanton@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#28931}
diff --git a/modules/audio_processing/audio_buffer.cc b/modules/audio_processing/audio_buffer.cc
index e1d5b3a..32668fa 100644
--- a/modules/audio_processing/audio_buffer.cc
+++ b/modules/audio_processing/audio_buffer.cc
@@ -23,169 +23,183 @@
namespace webrtc {
namespace {
-constexpr size_t kSamplesPer32kHzChannel = 320;
-constexpr size_t kSamplesPer48kHzChannel = 480;
-constexpr size_t kSamplesPer192kHzChannel = 1920;
-constexpr size_t kMaxSamplesPerChannel = kSamplesPer192kHzChannel;
+const size_t kSamplesPer16kHzChannel = 160;
+const size_t kSamplesPer32kHzChannel = 320;
+const size_t kSamplesPer48kHzChannel = 480;
-size_t NumBandsFromFramesPerChannel(size_t num_frames) {
- if (num_frames == kSamplesPer32kHzChannel) {
- return 2;
+size_t NumBandsFromSamplesPerChannel(size_t num_frames) {
+ size_t num_bands = 1;
+ if (num_frames == kSamplesPer32kHzChannel ||
+ num_frames == kSamplesPer48kHzChannel) {
+ num_bands = rtc::CheckedDivExact(num_frames, kSamplesPer16kHzChannel);
}
- if (num_frames == kSamplesPer48kHzChannel) {
- return 3;
- }
- return 1;
+ return num_bands;
}
} // namespace
-AudioBuffer::AudioBuffer(size_t input_rate,
- size_t input_num_channels,
- size_t buffer_rate,
- size_t buffer_num_channels,
- size_t output_rate)
- : input_num_frames_(
- rtc::CheckedDivExact(static_cast<int>(input_rate), 100)),
- input_num_channels_(input_num_channels),
- buffer_num_frames_(
- rtc::CheckedDivExact(static_cast<int>(buffer_rate), 100)),
- buffer_num_channels_(buffer_num_channels),
- output_num_frames_(
- rtc::CheckedDivExact(static_cast<int>(output_rate), 100)),
- num_channels_(buffer_num_channels),
- num_bands_(NumBandsFromFramesPerChannel(buffer_num_frames_)),
- num_split_frames_(rtc::CheckedDivExact(buffer_num_frames_, num_bands_)),
- data_(new ChannelBuffer<float>(buffer_num_frames_, buffer_num_channels_)),
- output_buffer_(
- new ChannelBuffer<float>(output_num_frames_, num_channels_)) {
+AudioBuffer::AudioBuffer(size_t input_num_frames,
+ size_t num_input_channels,
+ size_t process_num_frames,
+ size_t num_process_channels,
+ size_t output_num_frames)
+ : input_num_frames_(input_num_frames),
+ num_input_channels_(num_input_channels),
+ proc_num_frames_(process_num_frames),
+ num_proc_channels_(num_process_channels),
+ output_num_frames_(output_num_frames),
+ num_channels_(num_process_channels),
+ num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)),
+ num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)),
+ data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)),
+ output_buffer_(new IFChannelBuffer(output_num_frames_, num_channels_)) {
RTC_DCHECK_GT(input_num_frames_, 0);
- RTC_DCHECK_GT(buffer_num_frames_, 0);
+ RTC_DCHECK_GT(proc_num_frames_, 0);
RTC_DCHECK_GT(output_num_frames_, 0);
- RTC_DCHECK_GT(input_num_channels_, 0);
- RTC_DCHECK_GT(buffer_num_channels_, 0);
- RTC_DCHECK_LE(buffer_num_channels_, input_num_channels_);
+ RTC_DCHECK_GT(num_input_channels_, 0);
+ RTC_DCHECK_GT(num_proc_channels_, 0);
+ RTC_DCHECK_LE(num_proc_channels_, num_input_channels_);
- const bool input_resampling_needed = input_num_frames_ != buffer_num_frames_;
- const bool output_resampling_needed =
- output_num_frames_ != buffer_num_frames_;
- if (input_resampling_needed) {
- for (size_t i = 0; i < buffer_num_channels_; ++i) {
- input_resamplers_.push_back(std::unique_ptr<PushSincResampler>(
- new PushSincResampler(input_num_frames_, buffer_num_frames_)));
+ if (input_num_frames_ != proc_num_frames_ ||
+ output_num_frames_ != proc_num_frames_) {
+ // Create an intermediate buffer for resampling.
+ process_buffer_.reset(
+ new ChannelBuffer<float>(proc_num_frames_, num_proc_channels_));
+
+ if (input_num_frames_ != proc_num_frames_) {
+ for (size_t i = 0; i < num_proc_channels_; ++i) {
+ input_resamplers_.push_back(std::unique_ptr<PushSincResampler>(
+ new PushSincResampler(input_num_frames_, proc_num_frames_)));
+ }
}
- }
- if (output_resampling_needed) {
- for (size_t i = 0; i < buffer_num_channels_; ++i) {
- output_resamplers_.push_back(std::unique_ptr<PushSincResampler>(
- new PushSincResampler(buffer_num_frames_, output_num_frames_)));
+ if (output_num_frames_ != proc_num_frames_) {
+ for (size_t i = 0; i < num_proc_channels_; ++i) {
+ output_resamplers_.push_back(std::unique_ptr<PushSincResampler>(
+ new PushSincResampler(proc_num_frames_, output_num_frames_)));
+ }
}
}
if (num_bands_ > 1) {
- split_data_.reset(new ChannelBuffer<float>(
- buffer_num_frames_, buffer_num_channels_, num_bands_));
- splitting_filter_.reset(new SplittingFilter(
- buffer_num_channels_, num_bands_, buffer_num_frames_));
+ split_data_.reset(
+ new IFChannelBuffer(proc_num_frames_, num_proc_channels_, num_bands_));
+ splitting_filter_.reset(
+ new SplittingFilter(num_proc_channels_, num_bands_, proc_num_frames_));
}
}
AudioBuffer::~AudioBuffer() {}
-void AudioBuffer::set_downmixing_to_specific_channel(size_t channel) {
- downmix_by_averaging_ = false;
- RTC_DCHECK_GT(input_num_channels_, channel);
- channel_for_downmixing_ = std::min(channel, input_num_channels_ - 1);
-}
-
-void AudioBuffer::set_downmixing_by_averaging() {
- downmix_by_averaging_ = true;
-}
-
void AudioBuffer::CopyFrom(const float* const* data,
const StreamConfig& stream_config) {
RTC_DCHECK_EQ(stream_config.num_frames(), input_num_frames_);
- RTC_DCHECK_EQ(stream_config.num_channels(), input_num_channels_);
- RestoreNumChannels();
- const bool downmix_needed = input_num_channels_ > 1 && num_channels_ == 1;
+ RTC_DCHECK_EQ(stream_config.num_channels(), num_input_channels_);
+ InitForNewData();
+ // Initialized lazily because there's a different condition in
+ // DeinterleaveFrom.
+ const bool need_to_downmix =
+ num_input_channels_ > 1 && num_proc_channels_ == 1;
+ if (need_to_downmix && !input_buffer_) {
+ input_buffer_.reset(
+ new IFChannelBuffer(input_num_frames_, num_proc_channels_));
+ }
- const bool resampling_needed = input_num_frames_ != buffer_num_frames_;
+ // Downmix.
+ const float* const* data_ptr = data;
+ if (need_to_downmix) {
+ DownmixToMono<float, float>(data, input_num_frames_, num_input_channels_,
+ input_buffer_->fbuf()->channels()[0]);
+ data_ptr = input_buffer_->fbuf_const()->channels();
+ }
- if (downmix_needed) {
- RTC_DCHECK_GT(kMaxSamplesPerChannel, input_num_frames_);
-
- std::array<float, kMaxSamplesPerChannel> downmix;
- if (downmix_by_averaging_) {
- const float kOneByNumChannels = 1.f / input_num_channels_;
- for (size_t i = 0; i < input_num_frames_; ++i) {
- float value = data[0][i];
- for (size_t j = 1; j < input_num_channels_; ++j) {
- value += data[j][i];
- }
- downmix[i] = value * kOneByNumChannels;
- }
+ // Resample.
+ if (input_num_frames_ != proc_num_frames_) {
+ for (size_t i = 0; i < num_proc_channels_; ++i) {
+ input_resamplers_[i]->Resample(data_ptr[i], input_num_frames_,
+ process_buffer_->channels()[i],
+ proc_num_frames_);
}
- const float* downmixed_data =
- downmix_by_averaging_ ? downmix.data() : data[channel_for_downmixing_];
+ data_ptr = process_buffer_->channels();
+ }
- if (resampling_needed) {
- input_resamplers_[0]->Resample(downmixed_data, input_num_frames_,
- data_->channels()[0], buffer_num_frames_);
- }
- const float* data_to_convert =
- resampling_needed ? data_->channels()[0] : downmixed_data;
- FloatToFloatS16(data_to_convert, buffer_num_frames_, data_->channels()[0]);
- } else {
- if (resampling_needed) {
- for (size_t i = 0; i < num_channels_; ++i) {
- input_resamplers_[i]->Resample(data[i], input_num_frames_,
- data_->channels()[i],
- buffer_num_frames_);
- FloatToFloatS16(data_->channels()[i], buffer_num_frames_,
- data_->channels()[i]);
- }
- } else {
- for (size_t i = 0; i < num_channels_; ++i) {
- FloatToFloatS16(data[i], buffer_num_frames_, data_->channels()[i]);
- }
- }
+ // Convert to the S16 range.
+ for (size_t i = 0; i < num_proc_channels_; ++i) {
+ FloatToFloatS16(data_ptr[i], proc_num_frames_,
+ data_->fbuf()->channels()[i]);
}
}
void AudioBuffer::CopyTo(const StreamConfig& stream_config,
float* const* data) {
RTC_DCHECK_EQ(stream_config.num_frames(), output_num_frames_);
+ RTC_DCHECK(stream_config.num_channels() == num_channels_ ||
+ num_channels_ == 1);
- const bool resampling_needed = output_num_frames_ != buffer_num_frames_;
- if (resampling_needed) {
+ // Convert to the float range.
+ float* const* data_ptr = data;
+ if (output_num_frames_ != proc_num_frames_) {
+ // Convert to an intermediate buffer for subsequent resampling.
+ data_ptr = process_buffer_->channels();
+ }
+ for (size_t i = 0; i < num_channels_; ++i) {
+ FloatS16ToFloat(data_->fbuf()->channels()[i], proc_num_frames_,
+ data_ptr[i]);
+ }
+
+ // Resample.
+ if (output_num_frames_ != proc_num_frames_) {
for (size_t i = 0; i < num_channels_; ++i) {
- FloatS16ToFloat(data_->channels()[i], buffer_num_frames_,
- data_->channels()[i]);
- output_resamplers_[i]->Resample(data_->channels()[i], buffer_num_frames_,
- data[i], output_num_frames_);
- }
- } else {
- for (size_t i = 0; i < num_channels_; ++i) {
- FloatS16ToFloat(data_->channels()[i], buffer_num_frames_, data[i]);
+ output_resamplers_[i]->Resample(data_ptr[i], proc_num_frames_, data[i],
+ output_num_frames_);
}
}
+ // Upmix.
for (size_t i = num_channels_; i < stream_config.num_channels(); ++i) {
memcpy(data[i], data[0], output_num_frames_ * sizeof(**data));
}
}
-void AudioBuffer::RestoreNumChannels() {
- num_channels_ = buffer_num_channels_;
- data_->set_num_channels(buffer_num_channels_);
+void AudioBuffer::InitForNewData() {
+ num_channels_ = num_proc_channels_;
+ data_->set_num_channels(num_proc_channels_);
if (split_data_.get()) {
- split_data_->set_num_channels(buffer_num_channels_);
+ split_data_->set_num_channels(num_proc_channels_);
}
}
+const float* const* AudioBuffer::split_channels_const_f(Band band) const {
+ if (split_data_.get()) {
+ return split_data_->fbuf_const()->channels(band);
+ } else {
+ return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr;
+ }
+}
+
+const float* const* AudioBuffer::channels_const_f() const {
+ return data_->fbuf_const()->channels();
+}
+
+float* const* AudioBuffer::channels_f() {
+ return data_->fbuf()->channels();
+}
+
+const float* const* AudioBuffer::split_bands_const_f(size_t channel) const {
+ return split_data_.get() ? split_data_->fbuf_const()->bands(channel)
+ : data_->fbuf_const()->bands(channel);
+}
+
+float* const* AudioBuffer::split_bands_f(size_t channel) {
+ return split_data_.get() ? split_data_->fbuf()->bands(channel)
+ : data_->fbuf()->bands(channel);
+}
+
+size_t AudioBuffer::num_channels() const {
+ return num_channels_;
+}
+
void AudioBuffer::set_num_channels(size_t num_channels) {
- RTC_DCHECK_GE(buffer_num_channels_, num_channels);
num_channels_ = num_channels;
data_->set_num_channels(num_channels);
if (split_data_.get()) {
@@ -193,140 +207,78 @@
}
}
+size_t AudioBuffer::num_frames() const {
+ return proc_num_frames_;
+}
+
+size_t AudioBuffer::num_frames_per_band() const {
+ return num_split_frames_;
+}
+
+size_t AudioBuffer::num_bands() const {
+ return num_bands_;
+}
+
// The resampler is only for supporting 48kHz to 16kHz in the reverse stream.
-void AudioBuffer::CopyFrom(const AudioFrame* frame) {
- RTC_DCHECK_EQ(frame->num_channels_, input_num_channels_);
+void AudioBuffer::DeinterleaveFrom(const AudioFrame* frame) {
+ RTC_DCHECK_EQ(frame->num_channels_, num_input_channels_);
RTC_DCHECK_EQ(frame->samples_per_channel_, input_num_frames_);
- RestoreNumChannels();
+ InitForNewData();
+ // Initialized lazily because there's a different condition in CopyFrom.
+ if ((input_num_frames_ != proc_num_frames_) && !input_buffer_) {
+ input_buffer_.reset(
+ new IFChannelBuffer(input_num_frames_, num_proc_channels_));
+ }
- const bool resampling_required = input_num_frames_ != buffer_num_frames_;
-
- const int16_t* interleaved = frame->data();
- if (num_channels_ == 1) {
- if (input_num_channels_ == 1) {
- if (resampling_required) {
- std::array<float, kMaxSamplesPerChannel> float_buffer;
- S16ToFloatS16(interleaved, input_num_frames_, float_buffer.data());
- input_resamplers_[0]->Resample(float_buffer.data(), input_num_frames_,
- data_->channels()[0],
- buffer_num_frames_);
- } else {
- S16ToFloatS16(interleaved, input_num_frames_, data_->channels()[0]);
- }
- } else {
- std::array<float, kMaxSamplesPerChannel> float_buffer;
- float* downmixed_data =
- resampling_required ? float_buffer.data() : data_->channels()[0];
- if (downmix_by_averaging_) {
- for (size_t j = 0, k = 0; j < input_num_frames_; ++j) {
- int32_t sum = 0;
- for (size_t i = 0; i < input_num_channels_; ++i, ++k) {
- sum += interleaved[k];
- }
- downmixed_data[j] = sum / static_cast<int16_t>(input_num_channels_);
- }
- } else {
- for (size_t j = 0, k = channel_for_downmixing_; j < input_num_frames_;
- ++j, k += input_num_channels_) {
- downmixed_data[j] = interleaved[k];
- }
- }
-
- if (resampling_required) {
- input_resamplers_[0]->Resample(downmixed_data, input_num_frames_,
- data_->channels()[0],
- buffer_num_frames_);
- }
- }
+ int16_t* const* deinterleaved;
+ if (input_num_frames_ == proc_num_frames_) {
+ deinterleaved = data_->ibuf()->channels();
} else {
- auto deinterleave_channel = [](size_t channel, size_t num_channels,
- size_t samples_per_channel, const int16_t* x,
- float* y) {
- for (size_t j = 0, k = channel; j < samples_per_channel;
- ++j, k += num_channels) {
- y[j] = x[k];
- }
- };
+ deinterleaved = input_buffer_->ibuf()->channels();
+ }
+ // TODO(yujo): handle muted frames more efficiently.
+ if (num_proc_channels_ == 1) {
+ // Downmix and deinterleave simultaneously.
+ DownmixInterleavedToMono(frame->data(), input_num_frames_,
+ num_input_channels_, deinterleaved[0]);
+ } else {
+ RTC_DCHECK_EQ(num_proc_channels_, num_input_channels_);
+ Deinterleave(frame->data(), input_num_frames_, num_proc_channels_,
+ deinterleaved);
+ }
- if (resampling_required) {
- std::array<float, kMaxSamplesPerChannel> float_buffer;
- for (size_t i = 0; i < num_channels_; ++i) {
- deinterleave_channel(i, num_channels_, input_num_frames_, interleaved,
- float_buffer.data());
- input_resamplers_[i]->Resample(float_buffer.data(), input_num_frames_,
- data_->channels()[i],
- buffer_num_frames_);
- }
- } else {
- for (size_t i = 0; i < num_channels_; ++i) {
- deinterleave_channel(i, num_channels_, input_num_frames_, interleaved,
- data_->channels()[i]);
- }
+ // Resample.
+ if (input_num_frames_ != proc_num_frames_) {
+ for (size_t i = 0; i < num_proc_channels_; ++i) {
+ input_resamplers_[i]->Resample(
+ input_buffer_->fbuf_const()->channels()[i], input_num_frames_,
+ data_->fbuf()->channels()[i], proc_num_frames_);
}
}
}
-void AudioBuffer::CopyTo(AudioFrame* frame) const {
+void AudioBuffer::InterleaveTo(AudioFrame* frame) const {
RTC_DCHECK(frame->num_channels_ == num_channels_ || num_channels_ == 1);
RTC_DCHECK_EQ(frame->samples_per_channel_, output_num_frames_);
- const bool resampling_required = buffer_num_frames_ != output_num_frames_;
-
- int16_t* interleaved = frame->mutable_data();
- if (num_channels_ == 1) {
- std::array<float, kMaxSamplesPerChannel> float_buffer;
-
- if (resampling_required) {
- output_resamplers_[0]->Resample(data_->channels()[0], buffer_num_frames_,
- float_buffer.data(), output_num_frames_);
+ // Resample if necessary.
+ IFChannelBuffer* data_ptr = data_.get();
+ if (proc_num_frames_ != output_num_frames_) {
+ for (size_t i = 0; i < num_channels_; ++i) {
+ output_resamplers_[i]->Resample(
+ data_->fbuf()->channels()[i], proc_num_frames_,
+ output_buffer_->fbuf()->channels()[i], output_num_frames_);
}
- const float* deinterleaved =
- resampling_required ? float_buffer.data() : data_->channels()[0];
+ data_ptr = output_buffer_.get();
+ }
- if (frame->num_channels_ == 1) {
- for (size_t j = 0; j < output_num_frames_; ++j) {
- interleaved[j] = FloatS16ToS16(deinterleaved[j]);
- }
- } else {
- for (size_t i = 0, k = 0; i < output_num_frames_; ++i) {
- float tmp = FloatS16ToS16(deinterleaved[i]);
- for (size_t j = 0; j < frame->num_channels_; ++j, ++k) {
- interleaved[k] = tmp;
- }
- }
- }
+ // TODO(yujo): handle muted frames more efficiently.
+ if (frame->num_channels_ == num_channels_) {
+ Interleave(data_ptr->ibuf()->channels(), output_num_frames_, num_channels_,
+ frame->mutable_data());
} else {
- auto interleave_channel = [](size_t channel, size_t num_channels,
- size_t samples_per_channel, const float* x,
- int16_t* y) {
- for (size_t k = 0, j = channel; k < samples_per_channel;
- ++k, j += num_channels) {
- y[j] = FloatS16ToS16(x[k]);
- }
- };
-
- if (resampling_required) {
- for (size_t i = 0; i < num_channels_; ++i) {
- std::array<float, kMaxSamplesPerChannel> float_buffer;
- output_resamplers_[i]->Resample(data_->channels()[i],
- buffer_num_frames_, float_buffer.data(),
- output_num_frames_);
- interleave_channel(i, frame->num_channels_, output_num_frames_,
- float_buffer.data(), interleaved);
- }
- } else {
- for (size_t i = 0; i < num_channels_; ++i) {
- interleave_channel(i, frame->num_channels_, output_num_frames_,
- data_->channels()[i], interleaved);
- }
- }
-
- for (size_t i = num_channels_; i < frame->num_channels_; ++i) {
- for (size_t j = 0, k = i, n = num_channels_; j < output_num_frames_;
- ++j, k += frame->num_channels_, n += frame->num_channels_) {
- interleaved[k] = interleaved[n];
- }
- }
+ UpmixMonoToInterleaved(data_ptr->ibuf()->channels()[0], output_num_frames_,
+ frame->num_channels_, frame->mutable_data());
}
}
@@ -338,11 +290,10 @@
splitting_filter_->Synthesis(split_data_.get(), data_.get());
}
-void AudioBuffer::ExportSplitChannelData(size_t channel,
+void AudioBuffer::CopySplitChannelDataTo(size_t channel,
int16_t* const* split_band_data) {
for (size_t k = 0; k < num_bands(); ++k) {
- const float* band_data = split_bands(channel)[k];
-
+ const float* band_data = split_bands_f(channel)[k];
RTC_DCHECK(split_band_data[k]);
RTC_DCHECK(band_data);
for (size_t i = 0; i < num_frames_per_band(); ++i) {
@@ -351,11 +302,11 @@
}
}
-void AudioBuffer::ImportSplitChannelData(
+void AudioBuffer::CopySplitChannelDataFrom(
size_t channel,
const int16_t* const* split_band_data) {
for (size_t k = 0; k < num_bands(); ++k) {
- float* band_data = split_bands(channel)[k];
+ float* band_data = split_bands_f(channel)[k];
RTC_DCHECK(split_band_data[k]);
RTC_DCHECK(band_data);
for (size_t i = 0; i < num_frames_per_band(); ++i) {