Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | #include "audio/audio_transport_impl.h" |
| 12 | |
| 13 | #include <algorithm> |
| 14 | #include <memory> |
| 15 | #include <utility> |
| 16 | |
Fredrik Solenberg | a8b7c7f | 2018-01-17 11:18:31 +0100 | [diff] [blame] | 17 | #include "audio/remix_resample.h" |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 18 | #include "audio/utility/audio_frame_operations.h" |
Tim Na | b8c775a | 2020-01-10 10:33:05 -0800 | [diff] [blame] | 19 | #include "call/audio_sender.h" |
Olga Sharonova | 09ceed2 | 2020-09-30 18:27:39 +0200 | [diff] [blame] | 20 | #include "modules/async_audio_processing/async_audio_processing.h" |
Per Åhgren | 71652f4 | 2020-03-17 13:23:58 +0100 | [diff] [blame] | 21 | #include "modules/audio_processing/include/audio_frame_proxies.h" |
Yves Gerey | 988cc08 | 2018-10-23 12:03:01 +0200 | [diff] [blame] | 22 | #include "rtc_base/checks.h" |
Olga Sharonova | 2d0ba28 | 2022-09-27 15:22:34 +0200 | [diff] [blame] | 23 | #include "rtc_base/trace_event.h" |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 24 | |
| 25 | namespace webrtc { |
| 26 | |
| 27 | namespace { |
| 28 | |
| 29 | // We want to process at the lowest sample rate and channel count possible |
| 30 | // without losing information. Choose the lowest native rate at least equal to |
| 31 | // the minimum of input and codec rates, choose lowest channel count, and |
| 32 | // configure the audio frame. |
| 33 | void InitializeCaptureFrame(int input_sample_rate, |
| 34 | int send_sample_rate_hz, |
| 35 | size_t input_num_channels, |
| 36 | size_t send_num_channels, |
| 37 | AudioFrame* audio_frame) { |
| 38 | RTC_DCHECK(audio_frame); |
| 39 | int min_processing_rate_hz = std::min(input_sample_rate, send_sample_rate_hz); |
| 40 | for (int native_rate_hz : AudioProcessing::kNativeSampleRatesHz) { |
| 41 | audio_frame->sample_rate_hz_ = native_rate_hz; |
| 42 | if (audio_frame->sample_rate_hz_ >= min_processing_rate_hz) { |
| 43 | break; |
| 44 | } |
| 45 | } |
| 46 | audio_frame->num_channels_ = std::min(input_num_channels, send_num_channels); |
| 47 | } |
| 48 | |
henrika | 649a385 | 2017-12-22 13:58:29 +0100 | [diff] [blame] | 49 | void ProcessCaptureFrame(uint32_t delay_ms, |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 50 | bool key_pressed, |
| 51 | bool swap_stereo_channels, |
| 52 | AudioProcessing* audio_processing, |
| 53 | AudioFrame* audio_frame) { |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 54 | RTC_DCHECK(audio_frame); |
Per Åhgren | cc73ed3 | 2020-04-26 23:56:17 +0200 | [diff] [blame] | 55 | if (audio_processing) { |
| 56 | audio_processing->set_stream_delay_ms(delay_ms); |
| 57 | audio_processing->set_stream_key_pressed(key_pressed); |
| 58 | int error = ProcessAudioFrame(audio_processing, audio_frame); |
Per Åhgren | 71652f4 | 2020-03-17 13:23:58 +0100 | [diff] [blame] | 59 | |
Per Åhgren | cc73ed3 | 2020-04-26 23:56:17 +0200 | [diff] [blame] | 60 | RTC_DCHECK_EQ(0, error) << "ProcessStream() error: " << error; |
| 61 | } |
| 62 | |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 63 | if (swap_stereo_channels) { |
| 64 | AudioFrameOperations::SwapStereoChannels(audio_frame); |
| 65 | } |
| 66 | } |
| 67 | |
Artem Titov | b0ea637 | 2021-07-26 11:47:07 +0200 | [diff] [blame] | 68 | // Resample audio in `frame` to given sample rate preserving the |
| 69 | // channel count and place the result in `destination`. |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 70 | int Resample(const AudioFrame& frame, |
| 71 | const int destination_sample_rate, |
| 72 | PushResampler<int16_t>* resampler, |
| 73 | int16_t* destination) { |
Olga Sharonova | 8a31b75 | 2022-10-04 14:58:00 +0200 | [diff] [blame] | 74 | TRACE_EVENT2("webrtc", "Resample", "frame sample rate", frame.sample_rate_hz_, |
| 75 | "destination_sample_rate", destination_sample_rate); |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 76 | const int number_of_channels = static_cast<int>(frame.num_channels_); |
| 77 | const int target_number_of_samples_per_channel = |
| 78 | destination_sample_rate / 100; |
| 79 | resampler->InitializeIfNeeded(frame.sample_rate_hz_, destination_sample_rate, |
| 80 | number_of_channels); |
| 81 | |
| 82 | // TODO(yujo): make resampler take an AudioFrame, and add special case |
| 83 | // handling of muted frames. |
| 84 | return resampler->Resample( |
| 85 | frame.data(), frame.samples_per_channel_ * number_of_channels, |
| 86 | destination, number_of_channels * target_number_of_samples_per_channel); |
| 87 | } |
| 88 | } // namespace |
| 89 | |
Olga Sharonova | 09ceed2 | 2020-09-30 18:27:39 +0200 | [diff] [blame] | 90 | AudioTransportImpl::AudioTransportImpl( |
| 91 | AudioMixer* mixer, |
| 92 | AudioProcessing* audio_processing, |
| 93 | AsyncAudioProcessing::Factory* async_audio_processing_factory) |
| 94 | : audio_processing_(audio_processing), |
| 95 | async_audio_processing_( |
| 96 | async_audio_processing_factory |
| 97 | ? async_audio_processing_factory->CreateAsyncAudioProcessing( |
| 98 | [this](std::unique_ptr<AudioFrame> frame) { |
| 99 | this->SendProcessedData(std::move(frame)); |
| 100 | }) |
| 101 | : nullptr), |
| 102 | mixer_(mixer) { |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 103 | RTC_DCHECK(mixer); |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 104 | } |
| 105 | |
| 106 | AudioTransportImpl::~AudioTransportImpl() {} |
| 107 | |
Olov Brändström | b732bd5 | 2022-01-28 15:07:39 +0100 | [diff] [blame] | 108 | int32_t AudioTransportImpl::RecordedDataIsAvailable( |
| 109 | const void* audio_data, |
Jakob Ivarsson | 22821de | 2023-01-20 22:09:29 +0100 | [diff] [blame] | 110 | size_t number_of_frames, |
| 111 | size_t bytes_per_sample, |
| 112 | size_t number_of_channels, |
| 113 | uint32_t sample_rate, |
| 114 | uint32_t audio_delay_milliseconds, |
| 115 | int32_t clock_drift, |
| 116 | uint32_t volume, |
| 117 | bool key_pressed, |
Olov Brändström | b732bd5 | 2022-01-28 15:07:39 +0100 | [diff] [blame] | 118 | uint32_t& new_mic_volume) { // NOLINT: to avoid changing APIs |
| 119 | return RecordedDataIsAvailable( |
| 120 | audio_data, number_of_frames, bytes_per_sample, number_of_channels, |
| 121 | sample_rate, audio_delay_milliseconds, clock_drift, volume, key_pressed, |
Jakob Ivarsson | 22821de | 2023-01-20 22:09:29 +0100 | [diff] [blame] | 122 | new_mic_volume, /*estimated_capture_time_ns=*/absl::nullopt); |
Olov Brändström | b732bd5 | 2022-01-28 15:07:39 +0100 | [diff] [blame] | 123 | } |
| 124 | |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 125 | // Not used in Chromium. Process captured audio and distribute to all sending |
| 126 | // streams, and try to do this at the lowest possible sample rate. |
| 127 | int32_t AudioTransportImpl::RecordedDataIsAvailable( |
| 128 | const void* audio_data, |
Jakob Ivarsson | 22821de | 2023-01-20 22:09:29 +0100 | [diff] [blame] | 129 | size_t number_of_frames, |
| 130 | size_t bytes_per_sample, |
| 131 | size_t number_of_channels, |
| 132 | uint32_t sample_rate, |
| 133 | uint32_t audio_delay_milliseconds, |
| 134 | int32_t /*clock_drift*/, |
| 135 | uint32_t /*volume*/, |
| 136 | bool key_pressed, |
Olov Brändström | b732bd5 | 2022-01-28 15:07:39 +0100 | [diff] [blame] | 137 | uint32_t& /*new_mic_volume*/, |
Jakob Ivarsson | 22821de | 2023-01-20 22:09:29 +0100 | [diff] [blame] | 138 | absl::optional<int64_t> |
Olov Brändström | b732bd5 | 2022-01-28 15:07:39 +0100 | [diff] [blame] | 139 | estimated_capture_time_ns) { // NOLINT: to avoid changing APIs |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 140 | RTC_DCHECK(audio_data); |
| 141 | RTC_DCHECK_GE(number_of_channels, 1); |
| 142 | RTC_DCHECK_LE(number_of_channels, 2); |
| 143 | RTC_DCHECK_EQ(2 * number_of_channels, bytes_per_sample); |
| 144 | RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz); |
| 145 | // 100 = 1 second / data duration (10 ms). |
| 146 | RTC_DCHECK_EQ(number_of_frames * 100, sample_rate); |
| 147 | RTC_DCHECK_LE(bytes_per_sample * number_of_frames * number_of_channels, |
| 148 | AudioFrame::kMaxDataSizeBytes); |
| 149 | |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 150 | int send_sample_rate_hz = 0; |
| 151 | size_t send_num_channels = 0; |
| 152 | bool swap_stereo_channels = false; |
| 153 | { |
Markus Handell | 6287280 | 2020-07-06 15:15:07 +0200 | [diff] [blame] | 154 | MutexLock lock(&capture_lock_); |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 155 | send_sample_rate_hz = send_sample_rate_hz_; |
| 156 | send_num_channels = send_num_channels_; |
| 157 | swap_stereo_channels = swap_stereo_channels_; |
| 158 | } |
| 159 | |
| 160 | std::unique_ptr<AudioFrame> audio_frame(new AudioFrame()); |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame] | 161 | InitializeCaptureFrame(sample_rate, send_sample_rate_hz, number_of_channels, |
| 162 | send_num_channels, audio_frame.get()); |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 163 | voe::RemixAndResample(static_cast<const int16_t*>(audio_data), |
| 164 | number_of_frames, number_of_channels, sample_rate, |
| 165 | &capture_resampler_, audio_frame.get()); |
henrika | 649a385 | 2017-12-22 13:58:29 +0100 | [diff] [blame] | 166 | ProcessCaptureFrame(audio_delay_milliseconds, key_pressed, |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 167 | swap_stereo_channels, audio_processing_, |
| 168 | audio_frame.get()); |
Jakob Ivarsson | 22821de | 2023-01-20 22:09:29 +0100 | [diff] [blame] | 169 | |
| 170 | if (estimated_capture_time_ns) { |
| 171 | audio_frame->set_absolute_capture_timestamp_ms(*estimated_capture_time_ns / |
| 172 | 1000000); |
| 173 | } |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 174 | |
Olga Sharonova | 09ceed2 | 2020-09-30 18:27:39 +0200 | [diff] [blame] | 175 | RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0); |
| 176 | if (async_audio_processing_) |
| 177 | async_audio_processing_->Process(std::move(audio_frame)); |
| 178 | else |
| 179 | SendProcessedData(std::move(audio_frame)); |
| 180 | |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 181 | return 0; |
| 182 | } |
| 183 | |
Olga Sharonova | 09ceed2 | 2020-09-30 18:27:39 +0200 | [diff] [blame] | 184 | void AudioTransportImpl::SendProcessedData( |
| 185 | std::unique_ptr<AudioFrame> audio_frame) { |
Olga Sharonova | 2d0ba28 | 2022-09-27 15:22:34 +0200 | [diff] [blame] | 186 | TRACE_EVENT0("webrtc", "AudioTransportImpl::SendProcessedData"); |
Olga Sharonova | 09ceed2 | 2020-09-30 18:27:39 +0200 | [diff] [blame] | 187 | RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0); |
| 188 | MutexLock lock(&capture_lock_); |
| 189 | if (audio_senders_.empty()) |
| 190 | return; |
| 191 | |
| 192 | auto it = audio_senders_.begin(); |
| 193 | while (++it != audio_senders_.end()) { |
| 194 | auto audio_frame_copy = std::make_unique<AudioFrame>(); |
| 195 | audio_frame_copy->CopyFrom(*audio_frame); |
| 196 | (*it)->SendAudioData(std::move(audio_frame_copy)); |
| 197 | } |
| 198 | // Send the original frame to the first stream w/o copying. |
| 199 | (*audio_senders_.begin())->SendAudioData(std::move(audio_frame)); |
| 200 | } |
| 201 | |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 202 | // Mix all received streams, feed the result to the AudioProcessing module, then |
| 203 | // resample the result to the requested output rate. |
| 204 | int32_t AudioTransportImpl::NeedMorePlayData(const size_t nSamples, |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame] | 205 | const size_t nBytesPerSample, |
| 206 | const size_t nChannels, |
| 207 | const uint32_t samplesPerSec, |
| 208 | void* audioSamples, |
| 209 | size_t& nSamplesOut, |
| 210 | int64_t* elapsed_time_ms, |
| 211 | int64_t* ntp_time_ms) { |
Olga Sharonova | 2d0ba28 | 2022-09-27 15:22:34 +0200 | [diff] [blame] | 212 | TRACE_EVENT0("webrtc", "AudioTransportImpl::SendProcessedData"); |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 213 | RTC_DCHECK_EQ(sizeof(int16_t) * nChannels, nBytesPerSample); |
| 214 | RTC_DCHECK_GE(nChannels, 1); |
| 215 | RTC_DCHECK_LE(nChannels, 2); |
| 216 | RTC_DCHECK_GE( |
| 217 | samplesPerSec, |
| 218 | static_cast<uint32_t>(AudioProcessing::NativeRate::kSampleRate8kHz)); |
| 219 | |
| 220 | // 100 = 1 second / data duration (10 ms). |
| 221 | RTC_DCHECK_EQ(nSamples * 100, samplesPerSec); |
| 222 | RTC_DCHECK_LE(nBytesPerSample * nSamples * nChannels, |
| 223 | AudioFrame::kMaxDataSizeBytes); |
| 224 | |
| 225 | mixer_->Mix(nChannels, &mixed_frame_); |
| 226 | *elapsed_time_ms = mixed_frame_.elapsed_time_ms_; |
| 227 | *ntp_time_ms = mixed_frame_.ntp_time_ms_; |
| 228 | |
Per Åhgren | cc73ed3 | 2020-04-26 23:56:17 +0200 | [diff] [blame] | 229 | if (audio_processing_) { |
| 230 | const auto error = |
| 231 | ProcessReverseAudioFrame(audio_processing_, &mixed_frame_); |
| 232 | RTC_DCHECK_EQ(error, AudioProcessing::kNoError); |
| 233 | } |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 234 | |
| 235 | nSamplesOut = Resample(mixed_frame_, samplesPerSec, &render_resampler_, |
| 236 | static_cast<int16_t*>(audioSamples)); |
| 237 | RTC_DCHECK_EQ(nSamplesOut, nChannels * nSamples); |
| 238 | return 0; |
| 239 | } |
| 240 | |
| 241 | // Used by Chromium - same as NeedMorePlayData() but because Chrome has its |
| 242 | // own APM instance, does not call audio_processing_->ProcessReverseStream(). |
| 243 | void AudioTransportImpl::PullRenderData(int bits_per_sample, |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame] | 244 | int sample_rate, |
| 245 | size_t number_of_channels, |
| 246 | size_t number_of_frames, |
| 247 | void* audio_data, |
| 248 | int64_t* elapsed_time_ms, |
| 249 | int64_t* ntp_time_ms) { |
Olga Sharonova | 2d0ba28 | 2022-09-27 15:22:34 +0200 | [diff] [blame] | 250 | TRACE_EVENT2("webrtc", "AudioTransportImpl::PullRenderData", "sample_rate", |
| 251 | sample_rate, "number_of_frames", number_of_frames); |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 252 | RTC_DCHECK_EQ(bits_per_sample, 16); |
| 253 | RTC_DCHECK_GE(number_of_channels, 1); |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 254 | RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz); |
| 255 | |
| 256 | // 100 = 1 second / data duration (10 ms). |
| 257 | RTC_DCHECK_EQ(number_of_frames * 100, sample_rate); |
| 258 | |
| 259 | // 8 = bits per byte. |
| 260 | RTC_DCHECK_LE(bits_per_sample / 8 * number_of_frames * number_of_channels, |
| 261 | AudioFrame::kMaxDataSizeBytes); |
| 262 | mixer_->Mix(number_of_channels, &mixed_frame_); |
| 263 | *elapsed_time_ms = mixed_frame_.elapsed_time_ms_; |
| 264 | *ntp_time_ms = mixed_frame_.ntp_time_ms_; |
| 265 | |
| 266 | auto output_samples = Resample(mixed_frame_, sample_rate, &render_resampler_, |
| 267 | static_cast<int16_t*>(audio_data)); |
| 268 | RTC_DCHECK_EQ(output_samples, number_of_channels * number_of_frames); |
| 269 | } |
| 270 | |
Tim Na | b8c775a | 2020-01-10 10:33:05 -0800 | [diff] [blame] | 271 | void AudioTransportImpl::UpdateAudioSenders(std::vector<AudioSender*> senders, |
| 272 | int send_sample_rate_hz, |
| 273 | size_t send_num_channels) { |
Markus Handell | 6287280 | 2020-07-06 15:15:07 +0200 | [diff] [blame] | 274 | MutexLock lock(&capture_lock_); |
Tim Na | b8c775a | 2020-01-10 10:33:05 -0800 | [diff] [blame] | 275 | audio_senders_ = std::move(senders); |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 276 | send_sample_rate_hz_ = send_sample_rate_hz; |
| 277 | send_num_channels_ = send_num_channels; |
| 278 | } |
| 279 | |
| 280 | void AudioTransportImpl::SetStereoChannelSwapping(bool enable) { |
Markus Handell | 6287280 | 2020-07-06 15:15:07 +0200 | [diff] [blame] | 281 | MutexLock lock(&capture_lock_); |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 282 | swap_stereo_channels_ = enable; |
| 283 | } |
| 284 | |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 285 | } // namespace webrtc |