blob: 17ad71d53ae91889cde07e44a6e706ef57584e19 [file] [log] [blame]
kjellander3e6db232015-11-26 04:44:54 -08001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_
12#define MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_
kjellander3e6db232015-11-26 04:44:54 -080013
kwiberg84be5112016-04-27 01:19:58 -070014#include <memory>
henrik.lundin4cf61dd2015-12-09 06:20:58 -080015#include <string>
Fredrik Solenbergf693bfa2018-12-11 12:22:10 +010016#include <utility>
kjellander3e6db232015-11-26 04:44:54 -080017#include <vector>
18
Danil Chapovalovb6021232018-06-19 13:26:36 +020019#include "absl/types/optional.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020020#include "api/audio_codecs/audio_decoder_factory.h"
21#include "api/audio_codecs/audio_encoder.h"
Artem Titov741daaf2019-03-21 14:37:36 +010022#include "api/function_view.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020023#include "modules/audio_coding/include/audio_coding_module_typedefs.h"
24#include "modules/audio_coding/neteq/include/neteq.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020025#include "system_wrappers/include/clock.h"
kjellander3e6db232015-11-26 04:44:54 -080026
27namespace webrtc {
28
29// forward declarations
kjellander3e6db232015-11-26 04:44:54 -080030class AudioDecoder;
31class AudioEncoder;
32class AudioFrame;
33class RTPFragmentationHeader;
Niels Möllerafb5dbb2019-02-15 15:21:47 +010034struct RTPHeader;
kjellander3e6db232015-11-26 04:44:54 -080035
36#define WEBRTC_10MS_PCM_AUDIO 960 // 16 bits super wideband 48 kHz
37
38// Callback class used for sending data ready to be packetized
39class AudioPacketizationCallback {
40 public:
41 virtual ~AudioPacketizationCallback() {}
42
Niels Möller87e2d782019-03-07 10:18:23 +010043 virtual int32_t SendData(AudioFrameType frame_type,
kjellander3e6db232015-11-26 04:44:54 -080044 uint8_t payload_type,
45 uint32_t timestamp,
46 const uint8_t* payload_data,
Niels Möllerc35b6e62019-04-25 16:31:18 +020047 size_t payload_len_bytes) {
48 return SendData(frame_type, payload_type, timestamp, payload_data,
49 payload_len_bytes, nullptr);
50 }
51
52 // TODO(bugs.webrtc.org/6471) Deprecated, delete as soon as downstream
53 // implementations are updated. Then make above method pure virtual, and
54 // delete forward declaration of RTPFragmentationHeader.
55 virtual int32_t SendData(AudioFrameType frame_type,
56 uint8_t payload_type,
57 uint32_t timestamp,
58 const uint8_t* payload_data,
kjellander3e6db232015-11-26 04:44:54 -080059 size_t payload_len_bytes,
Niels Möllerc35b6e62019-04-25 16:31:18 +020060 const RTPFragmentationHeader* fragmentation) {
61 return SendData(frame_type, payload_type, timestamp, payload_data,
62 payload_len_bytes);
63 }
kjellander3e6db232015-11-26 04:44:54 -080064};
65
66// Callback class used for reporting VAD decision
67class ACMVADCallback {
68 public:
69 virtual ~ACMVADCallback() {}
70
Niels Möller87e2d782019-03-07 10:18:23 +010071 virtual int32_t InFrameType(AudioFrameType frame_type) = 0;
kjellander3e6db232015-11-26 04:44:54 -080072};
73
74class AudioCodingModule {
75 protected:
76 AudioCodingModule() {}
77
78 public:
79 struct Config {
Karl Wiberg5817d3d2018-04-06 10:06:42 +020080 explicit Config(
81 rtc::scoped_refptr<AudioDecoderFactory> decoder_factory = nullptr);
kwiberg36a43882016-08-29 05:33:32 -070082 Config(const Config&);
83 ~Config();
kjellander3e6db232015-11-26 04:44:54 -080084
kjellander3e6db232015-11-26 04:44:54 -080085 NetEq::Config neteq_config;
86 Clock* clock;
ossue3525782016-05-25 07:37:43 -070087 rtc::scoped_refptr<AudioDecoderFactory> decoder_factory;
kjellander3e6db232015-11-26 04:44:54 -080088 };
89
kjellander3e6db232015-11-26 04:44:54 -080090 static AudioCodingModule* Create(const Config& config);
91 virtual ~AudioCodingModule() = default;
92
93 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -080094 // Sender
95 //
96
kwiberg4cdbd572016-03-30 03:10:05 -070097 // |modifier| is called exactly once with one argument: a pointer to the
98 // unique_ptr that holds the current encoder (which is null if there is no
99 // current encoder). For the duration of the call, |modifier| has exclusive
100 // access to the unique_ptr; it may call the encoder, steal the encoder and
101 // replace it with another encoder or with nullptr, etc.
102 virtual void ModifyEncoder(
kwiberg24c7c122016-09-28 11:57:10 -0700103 rtc::FunctionView<void(std::unique_ptr<AudioEncoder>*)> modifier) = 0;
kwiberg4cdbd572016-03-30 03:10:05 -0700104
105 // Utility method for simply replacing the existing encoder with a new one.
106 void SetEncoder(std::unique_ptr<AudioEncoder> new_encoder) {
107 ModifyEncoder([&](std::unique_ptr<AudioEncoder>* encoder) {
108 *encoder = std::move(new_encoder);
109 });
110 }
111
kjellander3e6db232015-11-26 04:44:54 -0800112 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800113 // Sets the bitrate to the specified value in bits/sec. If the value is not
114 // supported by the codec, it will choose another appropriate value.
minyue7e304322016-10-12 05:00:55 -0700115 //
116 // This is only used in test code that rely on old ACM APIs.
117 // TODO(minyue): Remove it when possible.
kjellander3e6db232015-11-26 04:44:54 -0800118 virtual void SetBitRate(int bitrate_bps) = 0;
119
120 // int32_t RegisterTransportCallback()
121 // Register a transport callback which will be called to deliver
122 // the encoded buffers whenever Process() is called and a
123 // bit-stream is ready.
124 //
125 // Input:
126 // -transport : pointer to the callback class
127 // transport->SendData() is called whenever
128 // Process() is called and bit-stream is ready
129 // to deliver.
130 //
131 // Return value:
132 // -1 if the transport callback could not be registered
133 // 0 if registration is successful.
134 //
135 virtual int32_t RegisterTransportCallback(
136 AudioPacketizationCallback* transport) = 0;
137
138 ///////////////////////////////////////////////////////////////////////////
139 // int32_t Add10MsData()
140 // Add 10MS of raw (PCM) audio data and encode it. If the sampling
141 // frequency of the audio does not match the sampling frequency of the
142 // current encoder ACM will resample the audio. If an encoded packet was
143 // produced, it will be delivered via the callback object registered using
144 // RegisterTransportCallback, and the return value from this function will
145 // be the number of bytes encoded.
146 //
147 // Input:
148 // -audio_frame : the input audio frame, containing raw audio
Fredrik Solenbergbbf21a32018-04-12 22:44:09 +0200149 // sampling frequency etc.
kjellander3e6db232015-11-26 04:44:54 -0800150 //
151 // Return value:
152 // >= 0 number of bytes encoded.
153 // -1 some error occurred.
154 //
155 virtual int32_t Add10MsData(const AudioFrame& audio_frame) = 0;
156
157 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800158 // int SetPacketLossRate()
159 // Sets expected packet loss rate for encoding. Some encoders provide packet
160 // loss gnostic encoding to make stream less sensitive to packet losses,
161 // through e.g., FEC. No effects on codecs that do not provide such encoding.
162 //
163 // Input:
164 // -packet_loss_rate : expected packet loss rate (0 -- 100 inclusive).
165 //
166 // Return value
167 // -1 if failed to set packet loss rate,
168 // 0 if succeeded.
169 //
minyue7e304322016-10-12 05:00:55 -0700170 // This is only used in test code that rely on old ACM APIs.
171 // TODO(minyue): Remove it when possible.
kjellander3e6db232015-11-26 04:44:54 -0800172 virtual int SetPacketLossRate(int packet_loss_rate) = 0;
173
174 ///////////////////////////////////////////////////////////////////////////
175 // (VAD) Voice Activity Detection
176 //
177
178 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800179 // int32_t RegisterVADCallback()
180 // Call this method to register a callback function which is called
181 // any time that ACM encounters an empty frame. That is a frame which is
182 // recognized inactive. Depending on the codec WebRtc VAD or internal codec
183 // VAD is employed to identify a frame as active/inactive.
184 //
185 // Input:
186 // -vad_callback : pointer to a callback function.
187 //
188 // Return value:
189 // -1 if failed to register the callback function.
190 // 0 if the callback function is registered successfully.
191 //
192 virtual int32_t RegisterVADCallback(ACMVADCallback* vad_callback) = 0;
193
194 ///////////////////////////////////////////////////////////////////////////
195 // Receiver
196 //
197
198 ///////////////////////////////////////////////////////////////////////////
199 // int32_t InitializeReceiver()
200 // Any decoder-related state of ACM will be initialized to the
201 // same state when ACM is created. This will not interrupt or
202 // effect encoding functionality of ACM. ACM would lose all the
203 // decoding-related settings by calling this function.
204 // For instance, all registered codecs are deleted and have to be
205 // registered again.
206 //
207 // Return value:
208 // -1 if failed to initialize,
209 // 0 if succeeded.
210 //
211 virtual int32_t InitializeReceiver() = 0;
212
213 ///////////////////////////////////////////////////////////////////////////
214 // int32_t ReceiveFrequency()
215 // Get sampling frequency of the last received payload.
216 //
217 // Return value:
218 // non-negative the sampling frequency in Hertz.
219 // -1 if an error has occurred.
220 //
221 virtual int32_t ReceiveFrequency() const = 0;
222
223 ///////////////////////////////////////////////////////////////////////////
224 // int32_t PlayoutFrequency()
225 // Get sampling frequency of audio played out.
226 //
227 // Return value:
228 // the sampling frequency in Hertz.
229 //
230 virtual int32_t PlayoutFrequency() const = 0;
231
kwiberg1c07c702017-03-27 07:15:49 -0700232 // Replace any existing decoders with the given payload type -> decoder map.
233 virtual void SetReceiveCodecs(
234 const std::map<int, SdpAudioFormat>& codecs) = 0;
235
kjellander3e6db232015-11-26 04:44:54 -0800236 ///////////////////////////////////////////////////////////////////////////
Fredrik Solenbergf693bfa2018-12-11 12:22:10 +0100237 // absl::optional<std::pair<int, SdpAudioFormat>> ReceiveCodec()
238 // Get the codec info associated with last received payload.
kjellander3e6db232015-11-26 04:44:54 -0800239 //
240 // Return value:
Fredrik Solenbergf693bfa2018-12-11 12:22:10 +0100241 // A payload type and SdpAudioFormat describing the format associated with
242 // the last received payload.
ossue280cde2016-10-12 11:04:10 -0700243 // An empty Optional if no payload has yet been received.
244 //
Fredrik Solenbergf693bfa2018-12-11 12:22:10 +0100245 virtual absl::optional<std::pair<int, SdpAudioFormat>>
246 ReceiveCodec() const = 0;
ossue280cde2016-10-12 11:04:10 -0700247
248 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800249 // int32_t IncomingPacket()
250 // Call this function to insert a parsed RTP packet into ACM.
251 //
252 // Inputs:
253 // -incoming_payload : received payload.
254 // -payload_len_bytes : the length of payload in bytes.
255 // -rtp_info : the relevant information retrieved from RTP
256 // header.
257 //
258 // Return value:
259 // -1 if failed to push in the payload
260 // 0 if payload is successfully pushed in.
261 //
262 virtual int32_t IncomingPacket(const uint8_t* incoming_payload,
263 const size_t payload_len_bytes,
Niels Möllerafb5dbb2019-02-15 15:21:47 +0100264 const RTPHeader& rtp_header) = 0;
kjellander3e6db232015-11-26 04:44:54 -0800265
266 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800267 // int SetMinimumPlayoutDelay()
268 // Set a minimum for the playout delay, used for lip-sync. NetEq maintains
269 // such a delay unless channel condition yields to a higher delay.
270 //
271 // Input:
272 // -time_ms : minimum delay in milliseconds.
273 //
274 // Return value:
275 // -1 if failed to set the delay,
276 // 0 if the minimum delay is set.
277 //
278 virtual int SetMinimumPlayoutDelay(int time_ms) = 0;
279
280 ///////////////////////////////////////////////////////////////////////////
281 // int SetMaximumPlayoutDelay()
282 // Set a maximum for the playout delay
283 //
284 // Input:
285 // -time_ms : maximum delay in milliseconds.
286 //
287 // Return value:
288 // -1 if failed to set the delay,
289 // 0 if the maximum delay is set.
290 //
291 virtual int SetMaximumPlayoutDelay(int time_ms) = 0;
292
Ruslan Burakov3b50f9f2019-02-06 09:45:56 +0100293 // Sets a base minimum for the playout delay. Base minimum delay sets lower
294 // bound minimum delay value which is set via SetMinimumPlayoutDelay.
295 //
296 // Returns true if value was successfully set, false overwise.
297 virtual bool SetBaseMinimumPlayoutDelayMs(int delay_ms) = 0;
298
299 // Returns current value of base minimum delay in milliseconds.
300 virtual int GetBaseMinimumPlayoutDelayMs() const = 0;
301
henrik.lundin9a410dd2016-04-06 01:39:22 -0700302 ///////////////////////////////////////////////////////////////////////////
303 // int32_t PlayoutTimestamp()
304 // The send timestamp of an RTP packet is associated with the decoded
305 // audio of the packet in question. This function returns the timestamp of
306 // the latest audio obtained by calling PlayoutData10ms(), or empty if no
307 // valid timestamp is available.
308 //
Danil Chapovalovb6021232018-06-19 13:26:36 +0200309 virtual absl::optional<uint32_t> PlayoutTimestamp() = 0;
kjellander3e6db232015-11-26 04:44:54 -0800310
311 ///////////////////////////////////////////////////////////////////////////
henrik.lundinb3f1c5d2016-08-22 15:39:53 -0700312 // int FilteredCurrentDelayMs()
313 // Returns the current total delay from NetEq (packet buffer and sync buffer)
314 // in ms, with smoothing applied to even out short-time fluctuations due to
315 // jitter. The packet buffer part of the delay is not updated during DTX/CNG
316 // periods.
317 //
318 virtual int FilteredCurrentDelayMs() const = 0;
319
320 ///////////////////////////////////////////////////////////////////////////
Henrik Lundinabbff892017-11-29 09:14:04 +0100321 // int FilteredCurrentDelayMs()
322 // Returns the current target delay for NetEq in ms.
323 //
324 virtual int TargetDelayMs() const = 0;
325
326 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800327 // int32_t PlayoutData10Ms(
328 // Get 10 milliseconds of raw audio data for playout, at the given sampling
329 // frequency. ACM will perform a resampling if required.
330 //
331 // Input:
332 // -desired_freq_hz : the desired sampling frequency, in Hertz, of the
333 // output audio. If set to -1, the function returns
334 // the audio at the current sampling frequency.
335 //
336 // Output:
337 // -audio_frame : output audio frame which contains raw audio data
Fredrik Solenbergbbf21a32018-04-12 22:44:09 +0200338 // and other relevant parameters.
henrik.lundin834a6ea2016-05-13 03:45:24 -0700339 // -muted : if true, the sample data in audio_frame is not
340 // populated, and must be interpreted as all zero.
kjellander3e6db232015-11-26 04:44:54 -0800341 //
342 // Return value:
343 // -1 if the function fails,
344 // 0 if the function succeeds.
345 //
346 virtual int32_t PlayoutData10Ms(int32_t desired_freq_hz,
henrik.lundin834a6ea2016-05-13 03:45:24 -0700347 AudioFrame* audio_frame,
348 bool* muted) = 0;
349
kjellander3e6db232015-11-26 04:44:54 -0800350 ///////////////////////////////////////////////////////////////////////////
351 // Codec specific
352 //
353
354 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800355 // int SetOpusMaxPlaybackRate()
356 // If current send codec is Opus, informs it about maximum playback rate the
357 // receiver will render. Opus can use this information to optimize the bit
358 // rate and increase the computation efficiency.
359 //
360 // Input:
361 // -frequency_hz : maximum playback rate in Hz.
362 //
363 // Return value:
364 // -1 if current send codec is not Opus or
365 // error occurred in setting the maximum playback rate,
366 // 0 if maximum bandwidth is set successfully.
367 //
368 virtual int SetOpusMaxPlaybackRate(int frequency_hz) = 0;
369
370 ///////////////////////////////////////////////////////////////////////////
371 // EnableOpusDtx()
372 // Enable the DTX, if current send codec is Opus.
373 //
374 // Return value:
375 // -1 if current send codec is not Opus or error occurred in enabling the
376 // Opus DTX.
377 // 0 if Opus DTX is enabled successfully.
378 //
379 virtual int EnableOpusDtx() = 0;
380
381 ///////////////////////////////////////////////////////////////////////////
382 // int DisableOpusDtx()
383 // If current send codec is Opus, disables its internal DTX.
384 //
385 // Return value:
386 // -1 if current send codec is not Opus or error occurred in disabling DTX.
387 // 0 if Opus DTX is disabled successfully.
388 //
389 virtual int DisableOpusDtx() = 0;
390
391 ///////////////////////////////////////////////////////////////////////////
392 // statistics
393 //
394
395 ///////////////////////////////////////////////////////////////////////////
396 // int32_t GetNetworkStatistics()
397 // Get network statistics. Note that the internal statistics of NetEq are
398 // reset by this call.
399 //
400 // Input:
401 // -network_statistics : a structure that contains network statistics.
402 //
403 // Return value:
404 // -1 if failed to set the network statistics,
405 // 0 if statistics are set successfully.
406 //
407 virtual int32_t GetNetworkStatistics(
408 NetworkStatistics* network_statistics) = 0;
409
410 //
411 // Enable NACK and set the maximum size of the NACK list. If NACK is already
412 // enable then the maximum NACK list size is modified accordingly.
413 //
414 // If the sequence number of last received packet is N, the sequence numbers
415 // of NACK list are in the range of [N - |max_nack_list_size|, N).
416 //
417 // |max_nack_list_size| should be positive (none zero) and less than or
418 // equal to |Nack::kNackListSizeLimit|. Otherwise, No change is applied and -1
419 // is returned. 0 is returned at success.
420 //
421 virtual int EnableNack(size_t max_nack_list_size) = 0;
422
423 // Disable NACK.
424 virtual void DisableNack() = 0;
425
426 //
427 // Get a list of packets to be retransmitted. |round_trip_time_ms| is an
428 // estimate of the round-trip-time (in milliseconds). Missing packets which
429 // will be playout in a shorter time than the round-trip-time (with respect
430 // to the time this API is called) will not be included in the list.
431 //
432 // Negative |round_trip_time_ms| results is an error message and empty list
433 // is returned.
434 //
435 virtual std::vector<uint16_t> GetNackList(
436 int64_t round_trip_time_ms) const = 0;
437
438 virtual void GetDecodingCallStatistics(
439 AudioDecodingCallStats* call_stats) const = 0;
ivoce1198e02017-09-08 08:13:19 -0700440
441 virtual ANAStats GetANAStats() const = 0;
kjellander3e6db232015-11-26 04:44:54 -0800442};
443
444} // namespace webrtc
445
Mirko Bonadei92ea95e2017-09-15 06:47:31 +0200446#endif // MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_