blob: 4367ab08fac5e0f809a3ff15503c2a0ecc994e50 [file] [log] [blame]
turaj@webrtc.org7959e162013-09-12 18:30:26 +00001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_coding/include/audio_coding_module.h"
turaj@webrtc.org7959e162013-09-12 18:30:26 +000012
Jonathan Yu36344a02017-07-30 01:55:34 -070013#include <algorithm>
Yves Gerey988cc082018-10-23 12:03:01 +020014#include <cstdint>
Jonathan Yu36344a02017-07-30 01:55:34 -070015
Niels Möller2edab4c2018-10-22 09:48:08 +020016#include "absl/strings/match.h"
Ali Tofigh714e3cb2022-07-20 12:53:07 +020017#include "absl/strings/string_view.h"
Yves Gerey988cc082018-10-23 12:03:01 +020018#include "api/array_view.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020019#include "modules/audio_coding/acm2/acm_receiver.h"
Per Åhgren4dd56a32019-11-19 21:00:59 +010020#include "modules/audio_coding/acm2/acm_remixing.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020021#include "modules/audio_coding/acm2/acm_resampler.h"
Fredrik Solenbergbbf21a32018-04-12 22:44:09 +020022#include "modules/include/module_common_types.h"
Yves Gerey988cc082018-10-23 12:03:01 +020023#include "modules/include/module_common_types_public.h"
24#include "rtc_base/buffer.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020025#include "rtc_base/checks.h"
26#include "rtc_base/logging.h"
Karl Wiberge40468b2017-11-22 10:42:26 +010027#include "rtc_base/numerics/safe_conversions.h"
Markus Handell0df0fae2020-07-07 15:53:34 +020028#include "rtc_base/synchronization/mutex.h"
Yves Gerey988cc082018-10-23 12:03:01 +020029#include "rtc_base/thread_annotations.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020030#include "system_wrappers/include/metrics.h"
turaj@webrtc.org7959e162013-09-12 18:30:26 +000031
32namespace webrtc {
33
kwibergc13ded52016-06-17 06:00:45 -070034namespace {
35
Per Åhgren4f2e9402019-10-04 11:06:15 +020036// Initial size for the buffer in InputBuffer. This matches 6 channels of 10 ms
37// 48 kHz data.
38constexpr size_t kInitialInputDataBufferSize = 6 * 480;
39
Per Åhgrend82a02c2020-03-12 11:53:30 +010040constexpr int32_t kMaxInputSampleRateHz = 192000;
41
kwibergc13ded52016-06-17 06:00:45 -070042class AudioCodingModuleImpl final : public AudioCodingModule {
43 public:
44 explicit AudioCodingModuleImpl(const AudioCodingModule::Config& config);
45 ~AudioCodingModuleImpl() override;
46
47 /////////////////////////////////////////
48 // Sender
49 //
50
kwiberg24c7c122016-09-28 11:57:10 -070051 void ModifyEncoder(rtc::FunctionView<void(std::unique_ptr<AudioEncoder>*)>
52 modifier) override;
kwibergc13ded52016-06-17 06:00:45 -070053
kwibergc13ded52016-06-17 06:00:45 -070054 // Register a transport callback which will be
55 // called to deliver the encoded buffers.
56 int RegisterTransportCallback(AudioPacketizationCallback* transport) override;
57
58 // Add 10 ms of raw (PCM) audio data to the encoder.
59 int Add10MsData(const AudioFrame& audio_frame) override;
60
61 /////////////////////////////////////////
kwibergc13ded52016-06-17 06:00:45 -070062 // (FEC) Forward Error Correction (codec internal)
63 //
64
kwibergc13ded52016-06-17 06:00:45 -070065 // Set target packet loss rate
66 int SetPacketLossRate(int loss_rate) override;
67
68 /////////////////////////////////////////
kwibergc13ded52016-06-17 06:00:45 -070069 // Receiver
70 //
71
72 // Initialize receiver, resets codec database etc.
73 int InitializeReceiver() override;
74
kwiberg1c07c702017-03-27 07:15:49 -070075 void SetReceiveCodecs(const std::map<int, SdpAudioFormat>& codecs) override;
76
kwibergc13ded52016-06-17 06:00:45 -070077 // Incoming packet from network parsed and ready for decode.
78 int IncomingPacket(const uint8_t* incoming_payload,
79 const size_t payload_length,
Niels Möllerafb5dbb2019-02-15 15:21:47 +010080 const RTPHeader& rtp_info) override;
kwibergc13ded52016-06-17 06:00:45 -070081
kwibergc13ded52016-06-17 06:00:45 -070082 // Get 10 milliseconds of raw audio data to play out, and
83 // automatic resample to the requested frequency if > 0.
84 int PlayoutData10Ms(int desired_freq_hz,
85 AudioFrame* audio_frame,
86 bool* muted) override;
kwibergc13ded52016-06-17 06:00:45 -070087
88 /////////////////////////////////////////
89 // Statistics
90 //
91
92 int GetNetworkStatistics(NetworkStatistics* statistics) override;
93
ivoce1198e02017-09-08 08:13:19 -070094 ANAStats GetANAStats() const override;
95
Jakob Ivarssonbf087452021-11-11 13:43:49 +010096 int GetTargetBitrate() const override;
97
kwibergc13ded52016-06-17 06:00:45 -070098 private:
99 struct InputData {
Per Åhgren4f2e9402019-10-04 11:06:15 +0200100 InputData() : buffer(kInitialInputDataBufferSize) {}
kwibergc13ded52016-06-17 06:00:45 -0700101 uint32_t input_timestamp;
102 const int16_t* audio;
103 size_t length_per_channel;
104 size_t audio_channel;
105 // If a re-mix is required (up or down), this buffer will store a re-mixed
106 // version of the input.
Per Åhgren4f2e9402019-10-04 11:06:15 +0200107 std::vector<int16_t> buffer;
kwibergc13ded52016-06-17 06:00:45 -0700108 };
109
Markus Handell0df0fae2020-07-07 15:53:34 +0200110 InputData input_data_ RTC_GUARDED_BY(acm_mutex_);
Per Åhgren4f2e9402019-10-04 11:06:15 +0200111
kwibergc13ded52016-06-17 06:00:45 -0700112 // This member class writes values to the named UMA histogram, but only if
113 // the value has changed since the last time (and always for the first call).
114 class ChangeLogger {
115 public:
Ali Tofigh714e3cb2022-07-20 12:53:07 +0200116 explicit ChangeLogger(absl::string_view histogram_name)
kwibergc13ded52016-06-17 06:00:45 -0700117 : histogram_name_(histogram_name) {}
118 // Logs the new value if it is different from the last logged value, or if
119 // this is the first call.
120 void MaybeLog(int value);
121
122 private:
123 int last_value_ = 0;
124 int first_time_ = true;
125 const std::string histogram_name_;
126 };
127
kwibergc13ded52016-06-17 06:00:45 -0700128 int Add10MsDataInternal(const AudioFrame& audio_frame, InputData* input_data)
Markus Handell0df0fae2020-07-07 15:53:34 +0200129 RTC_EXCLUSIVE_LOCKS_REQUIRED(acm_mutex_);
Minyue Lidea73ee2020-02-18 15:45:41 +0100130
Artem Titovd00ce742021-07-28 20:00:17 +0200131 // TODO(bugs.webrtc.org/10739): change `absolute_capture_timestamp_ms` to
Minyue Lidea73ee2020-02-18 15:45:41 +0100132 // int64_t when it always receives a valid value.
133 int Encode(const InputData& input_data,
134 absl::optional<int64_t> absolute_capture_timestamp_ms)
Markus Handell0df0fae2020-07-07 15:53:34 +0200135 RTC_EXCLUSIVE_LOCKS_REQUIRED(acm_mutex_);
kwibergc13ded52016-06-17 06:00:45 -0700136
Markus Handell0df0fae2020-07-07 15:53:34 +0200137 int InitializeReceiverSafe() RTC_EXCLUSIVE_LOCKS_REQUIRED(acm_mutex_);
kwibergc13ded52016-06-17 06:00:45 -0700138
Ali Tofigh714e3cb2022-07-20 12:53:07 +0200139 bool HaveValidEncoder(absl::string_view caller_name) const
Markus Handell0df0fae2020-07-07 15:53:34 +0200140 RTC_EXCLUSIVE_LOCKS_REQUIRED(acm_mutex_);
kwibergc13ded52016-06-17 06:00:45 -0700141
142 // Preprocessing of input audio, including resampling and down-mixing if
143 // required, before pushing audio into encoder's buffer.
144 //
145 // in_frame: input audio-frame
146 // ptr_out: pointer to output audio_frame. If no preprocessing is required
Artem Titovd00ce742021-07-28 20:00:17 +0200147 // `ptr_out` will be pointing to `in_frame`, otherwise pointing to
148 // `preprocess_frame_`.
kwibergc13ded52016-06-17 06:00:45 -0700149 //
150 // Return value:
151 // -1: if encountering an error.
152 // 0: otherwise.
153 int PreprocessToAddData(const AudioFrame& in_frame,
154 const AudioFrame** ptr_out)
Markus Handell0df0fae2020-07-07 15:53:34 +0200155 RTC_EXCLUSIVE_LOCKS_REQUIRED(acm_mutex_);
kwibergc13ded52016-06-17 06:00:45 -0700156
157 // Change required states after starting to receive the codec corresponding
Artem Titovd00ce742021-07-28 20:00:17 +0200158 // to `index`.
kwibergc13ded52016-06-17 06:00:45 -0700159 int UpdateUponReceivingCodec(int index);
160
Markus Handell0df0fae2020-07-07 15:53:34 +0200161 mutable Mutex acm_mutex_;
162 rtc::Buffer encode_buffer_ RTC_GUARDED_BY(acm_mutex_);
163 uint32_t expected_codec_ts_ RTC_GUARDED_BY(acm_mutex_);
164 uint32_t expected_in_ts_ RTC_GUARDED_BY(acm_mutex_);
165 acm2::ACMResampler resampler_ RTC_GUARDED_BY(acm_mutex_);
kwibergc13ded52016-06-17 06:00:45 -0700166 acm2::AcmReceiver receiver_; // AcmReceiver has it's own internal lock.
Markus Handell0df0fae2020-07-07 15:53:34 +0200167 ChangeLogger bitrate_logger_ RTC_GUARDED_BY(acm_mutex_);
kwibergc13ded52016-06-17 06:00:45 -0700168
Karl Wiberg49c33ce2018-11-12 14:21:58 +0100169 // Current encoder stack, provided by a call to RegisterEncoder.
Markus Handell0df0fae2020-07-07 15:53:34 +0200170 std::unique_ptr<AudioEncoder> encoder_stack_ RTC_GUARDED_BY(acm_mutex_);
kwibergc13ded52016-06-17 06:00:45 -0700171
kwibergc13ded52016-06-17 06:00:45 -0700172 // This is to keep track of CN instances where we can send DTMFs.
Markus Handell0df0fae2020-07-07 15:53:34 +0200173 uint8_t previous_pltype_ RTC_GUARDED_BY(acm_mutex_);
kwibergc13ded52016-06-17 06:00:45 -0700174
Markus Handell0df0fae2020-07-07 15:53:34 +0200175 bool receiver_initialized_ RTC_GUARDED_BY(acm_mutex_);
kwibergc13ded52016-06-17 06:00:45 -0700176
Markus Handell0df0fae2020-07-07 15:53:34 +0200177 AudioFrame preprocess_frame_ RTC_GUARDED_BY(acm_mutex_);
178 bool first_10ms_data_ RTC_GUARDED_BY(acm_mutex_);
kwibergc13ded52016-06-17 06:00:45 -0700179
Markus Handell0df0fae2020-07-07 15:53:34 +0200180 bool first_frame_ RTC_GUARDED_BY(acm_mutex_);
181 uint32_t last_timestamp_ RTC_GUARDED_BY(acm_mutex_);
182 uint32_t last_rtp_timestamp_ RTC_GUARDED_BY(acm_mutex_);
kwibergc13ded52016-06-17 06:00:45 -0700183
Markus Handell0df0fae2020-07-07 15:53:34 +0200184 Mutex callback_mutex_;
kwibergc13ded52016-06-17 06:00:45 -0700185 AudioPacketizationCallback* packetization_callback_
Markus Handell0df0fae2020-07-07 15:53:34 +0200186 RTC_GUARDED_BY(callback_mutex_);
kwibergc13ded52016-06-17 06:00:45 -0700187
188 int codec_histogram_bins_log_[static_cast<size_t>(
189 AudioEncoder::CodecType::kMaxLoggedAudioCodecTypes)];
190 int number_of_consecutive_empty_packets_;
191};
192
193// Adds a codec usage sample to the histogram.
194void UpdateCodecTypeHistogram(size_t codec_type) {
195 RTC_HISTOGRAM_ENUMERATION(
196 "WebRTC.Audio.Encoder.CodecType", static_cast<int>(codec_type),
197 static_cast<int>(
198 webrtc::AudioEncoder::CodecType::kMaxLoggedAudioCodecTypes));
199}
200
kwibergc13ded52016-06-17 06:00:45 -0700201void AudioCodingModuleImpl::ChangeLogger::MaybeLog(int value) {
202 if (value != last_value_ || first_time_) {
203 first_time_ = false;
204 last_value_ = value;
205 RTC_HISTOGRAM_COUNTS_SPARSE_100(histogram_name_, value);
206 }
207}
208
209AudioCodingModuleImpl::AudioCodingModuleImpl(
210 const AudioCodingModule::Config& config)
solenbergc7b4a452017-09-28 07:37:11 -0700211 : expected_codec_ts_(0xD87F3F9F),
kwibergc13ded52016-06-17 06:00:45 -0700212 expected_in_ts_(0xD87F3F9F),
213 receiver_(config),
214 bitrate_logger_("WebRTC.Audio.TargetBitrateInKbps"),
kwibergc13ded52016-06-17 06:00:45 -0700215 encoder_stack_(nullptr),
216 previous_pltype_(255),
217 receiver_initialized_(false),
218 first_10ms_data_(false),
219 first_frame_(true),
220 packetization_callback_(NULL),
kwibergc13ded52016-06-17 06:00:45 -0700221 codec_histogram_bins_log_(),
222 number_of_consecutive_empty_packets_(0) {
223 if (InitializeReceiverSafe() < 0) {
Mirko Bonadei675513b2017-11-09 11:09:25 +0100224 RTC_LOG(LS_ERROR) << "Cannot initialize receiver";
kwibergc13ded52016-06-17 06:00:45 -0700225 }
Mirko Bonadei675513b2017-11-09 11:09:25 +0100226 RTC_LOG(LS_INFO) << "Created";
kwibergc13ded52016-06-17 06:00:45 -0700227}
228
229AudioCodingModuleImpl::~AudioCodingModuleImpl() = default;
230
Minyue Lidea73ee2020-02-18 15:45:41 +0100231int32_t AudioCodingModuleImpl::Encode(
232 const InputData& input_data,
233 absl::optional<int64_t> absolute_capture_timestamp_ms) {
234 // TODO(bugs.webrtc.org/10739): add dcheck that
Artem Titovcfea2182021-08-10 01:22:31 +0200235 // `audio_frame.absolute_capture_timestamp_ms()` always has a value.
kwibergc13ded52016-06-17 06:00:45 -0700236 AudioEncoder::EncodedInfo encoded_info;
237 uint8_t previous_pltype;
238
239 // Check if there is an encoder before.
240 if (!HaveValidEncoder("Process"))
241 return -1;
242
Yves Gerey665174f2018-06-19 15:03:05 +0200243 if (!first_frame_) {
deadbeeffcada902016-08-24 12:45:13 -0700244 RTC_DCHECK(IsNewerTimestamp(input_data.input_timestamp, last_timestamp_))
ossu63fb95a2016-07-06 09:34:22 -0700245 << "Time should not move backwards";
246 }
247
kwibergc13ded52016-06-17 06:00:45 -0700248 // Scale the timestamp to the codec's RTP timestamp rate.
249 uint32_t rtp_timestamp =
Karl Wiberg053c3712019-05-16 15:24:17 +0200250 first_frame_
251 ? input_data.input_timestamp
252 : last_rtp_timestamp_ +
253 rtc::dchecked_cast<uint32_t>(rtc::CheckedDivExact(
254 int64_t{input_data.input_timestamp - last_timestamp_} *
255 encoder_stack_->RtpTimestampRateHz(),
256 int64_t{encoder_stack_->SampleRateHz()}));
Minyue Liff0e4db2020-01-23 13:45:50 +0100257
kwibergc13ded52016-06-17 06:00:45 -0700258 last_timestamp_ = input_data.input_timestamp;
259 last_rtp_timestamp_ = rtp_timestamp;
260 first_frame_ = false;
261
262 // Clear the buffer before reuse - encoded data will get appended.
263 encode_buffer_.Clear();
264 encoded_info = encoder_stack_->Encode(
Yves Gerey665174f2018-06-19 15:03:05 +0200265 rtp_timestamp,
266 rtc::ArrayView<const int16_t>(
267 input_data.audio,
268 input_data.audio_channel * input_data.length_per_channel),
kwibergc13ded52016-06-17 06:00:45 -0700269 &encode_buffer_);
270
271 bitrate_logger_.MaybeLog(encoder_stack_->GetTargetBitrate() / 1000);
272 if (encode_buffer_.size() == 0 && !encoded_info.send_even_if_empty) {
273 // Not enough data.
274 return 0;
275 }
276 previous_pltype = previous_pltype_; // Read it while we have the critsect.
277
278 // Log codec type to histogram once every 500 packets.
279 if (encoded_info.encoded_bytes == 0) {
280 ++number_of_consecutive_empty_packets_;
281 } else {
282 size_t codec_type = static_cast<size_t>(encoded_info.encoder_type);
283 codec_histogram_bins_log_[codec_type] +=
284 number_of_consecutive_empty_packets_ + 1;
285 number_of_consecutive_empty_packets_ = 0;
286 if (codec_histogram_bins_log_[codec_type] >= 500) {
287 codec_histogram_bins_log_[codec_type] -= 500;
288 UpdateCodecTypeHistogram(codec_type);
289 }
290 }
291
Niels Möller87e2d782019-03-07 10:18:23 +0100292 AudioFrameType frame_type;
kwibergc13ded52016-06-17 06:00:45 -0700293 if (encode_buffer_.size() == 0 && encoded_info.send_even_if_empty) {
Niels Möllerc936cb62019-03-19 14:10:16 +0100294 frame_type = AudioFrameType::kEmptyFrame;
kwibergc13ded52016-06-17 06:00:45 -0700295 encoded_info.payload_type = previous_pltype;
296 } else {
kwibergaf476c72016-11-28 15:21:39 -0800297 RTC_DCHECK_GT(encode_buffer_.size(), 0);
Niels Möllerc936cb62019-03-19 14:10:16 +0100298 frame_type = encoded_info.speech ? AudioFrameType::kAudioFrameSpeech
299 : AudioFrameType::kAudioFrameCN;
kwibergc13ded52016-06-17 06:00:45 -0700300 }
301
302 {
Markus Handell0df0fae2020-07-07 15:53:34 +0200303 MutexLock lock(&callback_mutex_);
kwibergc13ded52016-06-17 06:00:45 -0700304 if (packetization_callback_) {
305 packetization_callback_->SendData(
306 frame_type, encoded_info.payload_type, encoded_info.encoded_timestamp,
Minyue Liff0e4db2020-01-23 13:45:50 +0100307 encode_buffer_.data(), encode_buffer_.size(),
Minyue Lidea73ee2020-02-18 15:45:41 +0100308 absolute_capture_timestamp_ms.value_or(-1));
kwibergc13ded52016-06-17 06:00:45 -0700309 }
kwibergc13ded52016-06-17 06:00:45 -0700310 }
311 previous_pltype_ = encoded_info.payload_type;
312 return static_cast<int32_t>(encode_buffer_.size());
313}
314
315/////////////////////////////////////////
316// Sender
317//
318
kwibergc13ded52016-06-17 06:00:45 -0700319void AudioCodingModuleImpl::ModifyEncoder(
kwiberg24c7c122016-09-28 11:57:10 -0700320 rtc::FunctionView<void(std::unique_ptr<AudioEncoder>*)> modifier) {
Markus Handell0df0fae2020-07-07 15:53:34 +0200321 MutexLock lock(&acm_mutex_);
kwibergc13ded52016-06-17 06:00:45 -0700322 modifier(&encoder_stack_);
323}
324
kwibergc13ded52016-06-17 06:00:45 -0700325// Register a transport callback which will be called to deliver
326// the encoded buffers.
327int AudioCodingModuleImpl::RegisterTransportCallback(
328 AudioPacketizationCallback* transport) {
Markus Handell0df0fae2020-07-07 15:53:34 +0200329 MutexLock lock(&callback_mutex_);
kwibergc13ded52016-06-17 06:00:45 -0700330 packetization_callback_ = transport;
331 return 0;
332}
333
334// Add 10MS of raw (PCM) audio data to the encoder.
335int AudioCodingModuleImpl::Add10MsData(const AudioFrame& audio_frame) {
Markus Handell0df0fae2020-07-07 15:53:34 +0200336 MutexLock lock(&acm_mutex_);
Per Åhgren4f2e9402019-10-04 11:06:15 +0200337 int r = Add10MsDataInternal(audio_frame, &input_data_);
Minyue Lidea73ee2020-02-18 15:45:41 +0100338 // TODO(bugs.webrtc.org/10739): add dcheck that
Artem Titovcfea2182021-08-10 01:22:31 +0200339 // `audio_frame.absolute_capture_timestamp_ms()` always has a value.
Minyue Lidea73ee2020-02-18 15:45:41 +0100340 return r < 0
341 ? r
342 : Encode(input_data_, audio_frame.absolute_capture_timestamp_ms());
kwibergc13ded52016-06-17 06:00:45 -0700343}
344
345int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame,
346 InputData* input_data) {
347 if (audio_frame.samples_per_channel_ == 0) {
Artem Titovd3251962021-11-15 16:57:07 +0100348 RTC_DCHECK_NOTREACHED();
Mirko Bonadei675513b2017-11-09 11:09:25 +0100349 RTC_LOG(LS_ERROR) << "Cannot Add 10 ms audio, payload length is zero";
kwibergc13ded52016-06-17 06:00:45 -0700350 return -1;
351 }
352
Per Åhgrend82a02c2020-03-12 11:53:30 +0100353 if (audio_frame.sample_rate_hz_ > kMaxInputSampleRateHz) {
Artem Titovd3251962021-11-15 16:57:07 +0100354 RTC_DCHECK_NOTREACHED();
Mirko Bonadei675513b2017-11-09 11:09:25 +0100355 RTC_LOG(LS_ERROR) << "Cannot Add 10 ms audio, input frequency not valid";
kwibergc13ded52016-06-17 06:00:45 -0700356 return -1;
357 }
358
359 // If the length and frequency matches. We currently just support raw PCM.
360 if (static_cast<size_t>(audio_frame.sample_rate_hz_ / 100) !=
361 audio_frame.samples_per_channel_) {
Mirko Bonadei675513b2017-11-09 11:09:25 +0100362 RTC_LOG(LS_ERROR)
Alex Loiko300ec8c2017-05-30 17:23:28 +0200363 << "Cannot Add 10 ms audio, input frequency and length doesn't match";
kwibergc13ded52016-06-17 06:00:45 -0700364 return -1;
365 }
366
Alex Loiko65438812019-02-22 10:13:44 +0100367 if (audio_frame.num_channels_ != 1 && audio_frame.num_channels_ != 2 &&
368 audio_frame.num_channels_ != 4 && audio_frame.num_channels_ != 6 &&
369 audio_frame.num_channels_ != 8) {
Mirko Bonadei675513b2017-11-09 11:09:25 +0100370 RTC_LOG(LS_ERROR) << "Cannot Add 10 ms audio, invalid number of channels.";
kwibergc13ded52016-06-17 06:00:45 -0700371 return -1;
372 }
373
374 // Do we have a codec registered?
375 if (!HaveValidEncoder("Add10MsData")) {
376 return -1;
377 }
378
379 const AudioFrame* ptr_frame;
380 // Perform a resampling, also down-mix if it is required and can be
381 // performed before resampling (a down mix prior to resampling will take
382 // place if both primary and secondary encoders are mono and input is in
383 // stereo).
384 if (PreprocessToAddData(audio_frame, &ptr_frame) < 0) {
385 return -1;
386 }
387
388 // Check whether we need an up-mix or down-mix?
389 const size_t current_num_channels = encoder_stack_->NumChannels();
390 const bool same_num_channels =
391 ptr_frame->num_channels_ == current_num_channels;
392
yujo36b1a5f2017-06-12 12:45:32 -0700393 // TODO(yujo): Skip encode of muted frames.
kwibergc13ded52016-06-17 06:00:45 -0700394 input_data->input_timestamp = ptr_frame->timestamp_;
kwibergc13ded52016-06-17 06:00:45 -0700395 input_data->length_per_channel = ptr_frame->samples_per_channel_;
396 input_data->audio_channel = current_num_channels;
397
Per Åhgren4f2e9402019-10-04 11:06:15 +0200398 if (!same_num_channels) {
399 // Remixes the input frame to the output data and in the process resize the
400 // output data if needed.
Per Åhgren4dd56a32019-11-19 21:00:59 +0100401 ReMixFrame(*ptr_frame, current_num_channels, &input_data->buffer);
Per Åhgren4f2e9402019-10-04 11:06:15 +0200402
Artem Titovd00ce742021-07-28 20:00:17 +0200403 // For pushing data to primary, point the `ptr_audio` to correct buffer.
Per Åhgren4f2e9402019-10-04 11:06:15 +0200404 input_data->audio = input_data->buffer.data();
405 RTC_DCHECK_GE(input_data->buffer.size(),
406 input_data->length_per_channel * input_data->audio_channel);
407 } else {
408 // When adding data to encoders this pointer is pointing to an audio buffer
409 // with correct number of channels.
410 input_data->audio = ptr_frame->data();
411 }
412
kwibergc13ded52016-06-17 06:00:45 -0700413 return 0;
414}
415
416// Perform a resampling and down-mix if required. We down-mix only if
417// encoder is mono and input is stereo. In case of dual-streaming, both
418// encoders has to be mono for down-mix to take place.
419// |*ptr_out| will point to the pre-processed audio-frame. If no pre-processing
Artem Titovd00ce742021-07-28 20:00:17 +0200420// is required, |*ptr_out| points to `in_frame`.
yujo36b1a5f2017-06-12 12:45:32 -0700421// TODO(yujo): Make this more efficient for muted frames.
kwibergc13ded52016-06-17 06:00:45 -0700422int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
423 const AudioFrame** ptr_out) {
424 const bool resample =
425 in_frame.sample_rate_hz_ != encoder_stack_->SampleRateHz();
426
427 // This variable is true if primary codec and secondary codec (if exists)
428 // are both mono and input is stereo.
429 // TODO(henrik.lundin): This condition should probably be
430 // in_frame.num_channels_ > encoder_stack_->NumChannels()
431 const bool down_mix =
432 in_frame.num_channels_ == 2 && encoder_stack_->NumChannels() == 1;
433
434 if (!first_10ms_data_) {
435 expected_in_ts_ = in_frame.timestamp_;
436 expected_codec_ts_ = in_frame.timestamp_;
437 first_10ms_data_ = true;
438 } else if (in_frame.timestamp_ != expected_in_ts_) {
Mirko Bonadei675513b2017-11-09 11:09:25 +0100439 RTC_LOG(LS_WARNING) << "Unexpected input timestamp: " << in_frame.timestamp_
440 << ", expected: " << expected_in_ts_;
kwibergc13ded52016-06-17 06:00:45 -0700441 expected_codec_ts_ +=
442 (in_frame.timestamp_ - expected_in_ts_) *
443 static_cast<uint32_t>(
444 static_cast<double>(encoder_stack_->SampleRateHz()) /
445 static_cast<double>(in_frame.sample_rate_hz_));
446 expected_in_ts_ = in_frame.timestamp_;
447 }
448
kwibergc13ded52016-06-17 06:00:45 -0700449 if (!down_mix && !resample) {
450 // No pre-processing is required.
ossu63fb95a2016-07-06 09:34:22 -0700451 if (expected_in_ts_ == expected_codec_ts_) {
452 // If we've never resampled, we can use the input frame as-is
453 *ptr_out = &in_frame;
454 } else {
455 // Otherwise we'll need to alter the timestamp. Since in_frame is const,
456 // we'll have to make a copy of it.
457 preprocess_frame_.CopyFrom(in_frame);
458 preprocess_frame_.timestamp_ = expected_codec_ts_;
459 *ptr_out = &preprocess_frame_;
460 }
461
kwibergc13ded52016-06-17 06:00:45 -0700462 expected_in_ts_ += static_cast<uint32_t>(in_frame.samples_per_channel_);
463 expected_codec_ts_ += static_cast<uint32_t>(in_frame.samples_per_channel_);
kwibergc13ded52016-06-17 06:00:45 -0700464 return 0;
465 }
466
467 *ptr_out = &preprocess_frame_;
468 preprocess_frame_.num_channels_ = in_frame.num_channels_;
Per Åhgren4dd56a32019-11-19 21:00:59 +0100469 preprocess_frame_.samples_per_channel_ = in_frame.samples_per_channel_;
Per Åhgrend82a02c2020-03-12 11:53:30 +0100470 std::array<int16_t, AudioFrame::kMaxDataSizeSamples> audio;
471 const int16_t* src_ptr_audio;
kwibergc13ded52016-06-17 06:00:45 -0700472 if (down_mix) {
Per Åhgrend82a02c2020-03-12 11:53:30 +0100473 // If a resampling is required, the output of a down-mix is written into a
kwibergc13ded52016-06-17 06:00:45 -0700474 // local buffer, otherwise, it will be written to the output frame.
Yves Gerey665174f2018-06-19 15:03:05 +0200475 int16_t* dest_ptr_audio =
Per Åhgren4dd56a32019-11-19 21:00:59 +0100476 resample ? audio.data() : preprocess_frame_.mutable_data();
Per Åhgrend82a02c2020-03-12 11:53:30 +0100477 RTC_DCHECK_GE(audio.size(), preprocess_frame_.samples_per_channel_);
Per Åhgren4dd56a32019-11-19 21:00:59 +0100478 RTC_DCHECK_GE(audio.size(), in_frame.samples_per_channel_);
479 DownMixFrame(in_frame,
480 rtc::ArrayView<int16_t>(
481 dest_ptr_audio, preprocess_frame_.samples_per_channel_));
kwibergc13ded52016-06-17 06:00:45 -0700482 preprocess_frame_.num_channels_ = 1;
Per Åhgrend82a02c2020-03-12 11:53:30 +0100483
484 // Set the input of the resampler to the down-mixed signal.
Per Åhgren4dd56a32019-11-19 21:00:59 +0100485 src_ptr_audio = audio.data();
Per Åhgrend82a02c2020-03-12 11:53:30 +0100486 } else {
487 // Set the input of the resampler to the original data.
488 src_ptr_audio = in_frame.data();
kwibergc13ded52016-06-17 06:00:45 -0700489 }
490
491 preprocess_frame_.timestamp_ = expected_codec_ts_;
kwibergc13ded52016-06-17 06:00:45 -0700492 preprocess_frame_.sample_rate_hz_ = in_frame.sample_rate_hz_;
493 // If it is required, we have to do a resampling.
494 if (resample) {
495 // The result of the resampler is written to output frame.
yujo36b1a5f2017-06-12 12:45:32 -0700496 int16_t* dest_ptr_audio = preprocess_frame_.mutable_data();
kwibergc13ded52016-06-17 06:00:45 -0700497
498 int samples_per_channel = resampler_.Resample10Msec(
499 src_ptr_audio, in_frame.sample_rate_hz_, encoder_stack_->SampleRateHz(),
500 preprocess_frame_.num_channels_, AudioFrame::kMaxDataSizeSamples,
501 dest_ptr_audio);
502
503 if (samples_per_channel < 0) {
Mirko Bonadei675513b2017-11-09 11:09:25 +0100504 RTC_LOG(LS_ERROR) << "Cannot add 10 ms audio, resampling failed";
kwibergc13ded52016-06-17 06:00:45 -0700505 return -1;
506 }
507 preprocess_frame_.samples_per_channel_ =
508 static_cast<size_t>(samples_per_channel);
509 preprocess_frame_.sample_rate_hz_ = encoder_stack_->SampleRateHz();
510 }
511
512 expected_codec_ts_ +=
513 static_cast<uint32_t>(preprocess_frame_.samples_per_channel_);
514 expected_in_ts_ += static_cast<uint32_t>(in_frame.samples_per_channel_);
515
516 return 0;
517}
518
519/////////////////////////////////////////
kwibergc13ded52016-06-17 06:00:45 -0700520// (FEC) Forward Error Correction (codec internal)
521//
522
kwibergc13ded52016-06-17 06:00:45 -0700523int AudioCodingModuleImpl::SetPacketLossRate(int loss_rate) {
Markus Handell0df0fae2020-07-07 15:53:34 +0200524 MutexLock lock(&acm_mutex_);
kwibergc13ded52016-06-17 06:00:45 -0700525 if (HaveValidEncoder("SetPacketLossRate")) {
minyue4b9a2cb2016-11-30 06:49:59 -0800526 encoder_stack_->OnReceivedUplinkPacketLossFraction(loss_rate / 100.0);
kwibergc13ded52016-06-17 06:00:45 -0700527 }
528 return 0;
529}
530
531/////////////////////////////////////////
kwibergc13ded52016-06-17 06:00:45 -0700532// Receiver
533//
534
535int AudioCodingModuleImpl::InitializeReceiver() {
Markus Handell0df0fae2020-07-07 15:53:34 +0200536 MutexLock lock(&acm_mutex_);
kwibergc13ded52016-06-17 06:00:45 -0700537 return InitializeReceiverSafe();
538}
539
540// Initialize receiver, resets codec database etc.
541int AudioCodingModuleImpl::InitializeReceiverSafe() {
542 // If the receiver is already initialized then we want to destroy any
543 // existing decoders. After a call to this function, we should have a clean
544 // start-up.
kwiberg6b19b562016-09-20 04:02:25 -0700545 if (receiver_initialized_)
546 receiver_.RemoveAllCodecs();
kwibergc13ded52016-06-17 06:00:45 -0700547 receiver_.FlushBuffers();
548
kwibergc13ded52016-06-17 06:00:45 -0700549 receiver_initialized_ = true;
550 return 0;
551}
552
kwiberg1c07c702017-03-27 07:15:49 -0700553void AudioCodingModuleImpl::SetReceiveCodecs(
554 const std::map<int, SdpAudioFormat>& codecs) {
Markus Handell0df0fae2020-07-07 15:53:34 +0200555 MutexLock lock(&acm_mutex_);
kwiberg1c07c702017-03-27 07:15:49 -0700556 receiver_.SetCodecs(codecs);
557}
558
kwibergc13ded52016-06-17 06:00:45 -0700559// Incoming packet from network parsed and ready for decode.
560int AudioCodingModuleImpl::IncomingPacket(const uint8_t* incoming_payload,
561 const size_t payload_length,
Niels Möllerafb5dbb2019-02-15 15:21:47 +0100562 const RTPHeader& rtp_header) {
henrik.lundinb8c55b12017-05-10 07:38:01 -0700563 RTC_DCHECK_EQ(payload_length == 0, incoming_payload == nullptr);
kwibergc13ded52016-06-17 06:00:45 -0700564 return receiver_.InsertPacket(
565 rtp_header,
566 rtc::ArrayView<const uint8_t>(incoming_payload, payload_length));
567}
568
kwibergc13ded52016-06-17 06:00:45 -0700569// Get 10 milliseconds of raw audio data to play out.
570// Automatic resample to the requested frequency.
571int AudioCodingModuleImpl::PlayoutData10Ms(int desired_freq_hz,
572 AudioFrame* audio_frame,
573 bool* muted) {
574 // GetAudio always returns 10 ms, at the requested sample rate.
575 if (receiver_.GetAudio(desired_freq_hz, audio_frame, muted) != 0) {
Mirko Bonadei675513b2017-11-09 11:09:25 +0100576 RTC_LOG(LS_ERROR) << "PlayoutData failed, RecOut Failed";
kwibergc13ded52016-06-17 06:00:45 -0700577 return -1;
578 }
kwibergc13ded52016-06-17 06:00:45 -0700579 return 0;
580}
581
kwibergc13ded52016-06-17 06:00:45 -0700582/////////////////////////////////////////
583// Statistics
584//
585
586// TODO(turajs) change the return value to void. Also change the corresponding
587// NetEq function.
588int AudioCodingModuleImpl::GetNetworkStatistics(NetworkStatistics* statistics) {
589 receiver_.GetNetworkStatistics(statistics);
590 return 0;
591}
592
Ali Tofigh714e3cb2022-07-20 12:53:07 +0200593bool AudioCodingModuleImpl::HaveValidEncoder(
594 absl::string_view caller_name) const {
kwibergc13ded52016-06-17 06:00:45 -0700595 if (!encoder_stack_) {
Mirko Bonadei675513b2017-11-09 11:09:25 +0100596 RTC_LOG(LS_ERROR) << caller_name << " failed: No send codec is registered.";
kwibergc13ded52016-06-17 06:00:45 -0700597 return false;
598 }
599 return true;
600}
601
ivoce1198e02017-09-08 08:13:19 -0700602ANAStats AudioCodingModuleImpl::GetANAStats() const {
Markus Handell0df0fae2020-07-07 15:53:34 +0200603 MutexLock lock(&acm_mutex_);
ivoce1198e02017-09-08 08:13:19 -0700604 if (encoder_stack_)
605 return encoder_stack_->GetANAStats();
606 // If no encoder is set, return default stats.
607 return ANAStats();
608}
609
Jakob Ivarssonbf087452021-11-11 13:43:49 +0100610int AudioCodingModuleImpl::GetTargetBitrate() const {
611 MutexLock lock(&acm_mutex_);
612 if (!encoder_stack_) {
613 return -1;
614 }
615 return encoder_stack_->GetTargetBitrate();
616}
617
kwibergc13ded52016-06-17 06:00:45 -0700618} // namespace
619
Karl Wiberg5817d3d2018-04-06 10:06:42 +0200620AudioCodingModule::Config::Config(
621 rtc::scoped_refptr<AudioDecoderFactory> decoder_factory)
622 : neteq_config(),
623 clock(Clock::GetRealTimeClock()),
624 decoder_factory(decoder_factory) {
kwiberg36a43882016-08-29 05:33:32 -0700625 // Post-decode VAD is disabled by default in NetEq, however, Audio
626 // Conference Mixer relies on VAD decisions and fails without them.
627 neteq_config.enable_post_decode_vad = true;
628}
629
630AudioCodingModule::Config::Config(const Config&) = default;
631AudioCodingModule::Config::~Config() = default;
632
Henrik Lundin64dad832015-05-11 12:44:23 +0200633AudioCodingModule* AudioCodingModule::Create(const Config& config) {
kwibergc13ded52016-06-17 06:00:45 -0700634 return new AudioCodingModuleImpl(config);
turaj@webrtc.org7959e162013-09-12 18:30:26 +0000635}
636
turaj@webrtc.org7959e162013-09-12 18:30:26 +0000637} // namespace webrtc