blob: f9fdba5f511b33d0ab902aa9b2ec0dbd081a58b6 [file] [log] [blame]
kjellander3e6db232015-11-26 04:44:54 -08001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_
12#define MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_
kjellander3e6db232015-11-26 04:44:54 -080013
kwiberg84be5112016-04-27 01:19:58 -070014#include <memory>
henrik.lundin4cf61dd2015-12-09 06:20:58 -080015#include <string>
kjellander3e6db232015-11-26 04:44:54 -080016#include <vector>
17
Danil Chapovalovb6021232018-06-19 13:26:36 +020018#include "absl/types/optional.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020019#include "api/audio_codecs/audio_decoder_factory.h"
20#include "api/audio_codecs/audio_encoder.h"
Mirko Bonadei71207422017-09-15 13:58:09 +020021#include "common_types.h" // NOLINT(build/include)
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020022#include "modules/audio_coding/include/audio_coding_module_typedefs.h"
23#include "modules/audio_coding/neteq/include/neteq.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020024#include "rtc_base/function_view.h"
25#include "system_wrappers/include/clock.h"
kjellander3e6db232015-11-26 04:44:54 -080026
27namespace webrtc {
28
29// forward declarations
30struct CodecInst;
31struct WebRtcRTPHeader;
32class AudioDecoder;
33class AudioEncoder;
34class AudioFrame;
35class RTPFragmentationHeader;
36
37#define WEBRTC_10MS_PCM_AUDIO 960 // 16 bits super wideband 48 kHz
38
39// Callback class used for sending data ready to be packetized
40class AudioPacketizationCallback {
41 public:
42 virtual ~AudioPacketizationCallback() {}
43
44 virtual int32_t SendData(FrameType frame_type,
45 uint8_t payload_type,
46 uint32_t timestamp,
47 const uint8_t* payload_data,
48 size_t payload_len_bytes,
49 const RTPFragmentationHeader* fragmentation) = 0;
50};
51
52// Callback class used for reporting VAD decision
53class ACMVADCallback {
54 public:
55 virtual ~ACMVADCallback() {}
56
57 virtual int32_t InFrameType(FrameType frame_type) = 0;
58};
59
60class AudioCodingModule {
61 protected:
62 AudioCodingModule() {}
63
64 public:
65 struct Config {
Karl Wiberg5817d3d2018-04-06 10:06:42 +020066 explicit Config(
67 rtc::scoped_refptr<AudioDecoderFactory> decoder_factory = nullptr);
kwiberg36a43882016-08-29 05:33:32 -070068 Config(const Config&);
69 ~Config();
kjellander3e6db232015-11-26 04:44:54 -080070
kjellander3e6db232015-11-26 04:44:54 -080071 NetEq::Config neteq_config;
72 Clock* clock;
ossue3525782016-05-25 07:37:43 -070073 rtc::scoped_refptr<AudioDecoderFactory> decoder_factory;
kjellander3e6db232015-11-26 04:44:54 -080074 };
75
kjellander3e6db232015-11-26 04:44:54 -080076 static AudioCodingModule* Create(const Config& config);
77 virtual ~AudioCodingModule() = default;
78
79 ///////////////////////////////////////////////////////////////////////////
80 // Utility functions
81 //
82
83 ///////////////////////////////////////////////////////////////////////////
84 // uint8_t NumberOfCodecs()
85 // Returns number of supported codecs.
86 //
87 // Return value:
88 // number of supported codecs.
89 ///
90 static int NumberOfCodecs();
91
92 ///////////////////////////////////////////////////////////////////////////
93 // int32_t Codec()
94 // Get supported codec with list number.
95 //
96 // Input:
97 // -list_id : list number.
98 //
99 // Output:
100 // -codec : a structure where the parameters of the codec,
101 // given by list number is written to.
102 //
103 // Return value:
104 // -1 if the list number (list_id) is invalid.
105 // 0 if succeeded.
106 //
107 static int Codec(int list_id, CodecInst* codec);
108
109 ///////////////////////////////////////////////////////////////////////////
110 // int32_t Codec()
111 // Get supported codec with the given codec name, sampling frequency, and
112 // a given number of channels.
113 //
114 // Input:
115 // -payload_name : name of the codec.
116 // -sampling_freq_hz : sampling frequency of the codec. Note! for RED
117 // a sampling frequency of -1 is a valid input.
118 // -channels : number of channels ( 1 - mono, 2 - stereo).
119 //
120 // Output:
121 // -codec : a structure where the function returns the
122 // default parameters of the codec.
123 //
124 // Return value:
125 // -1 if no codec matches the given parameters.
126 // 0 if succeeded.
127 //
Yves Gerey665174f2018-06-19 15:03:05 +0200128 static int Codec(const char* payload_name,
129 CodecInst* codec,
130 int sampling_freq_hz,
131 size_t channels);
kjellander3e6db232015-11-26 04:44:54 -0800132
133 ///////////////////////////////////////////////////////////////////////////
134 // int32_t Codec()
135 //
136 // Returns the list number of the given codec name, sampling frequency, and
137 // a given number of channels.
138 //
139 // Input:
140 // -payload_name : name of the codec.
141 // -sampling_freq_hz : sampling frequency of the codec. Note! for RED
142 // a sampling frequency of -1 is a valid input.
143 // -channels : number of channels ( 1 - mono, 2 - stereo).
144 //
145 // Return value:
146 // if the codec is found, the index of the codec in the list,
147 // -1 if the codec is not found.
148 //
Yves Gerey665174f2018-06-19 15:03:05 +0200149 static int Codec(const char* payload_name,
150 int sampling_freq_hz,
Peter Kasting69558702016-01-12 16:26:35 -0800151 size_t channels);
kjellander3e6db232015-11-26 04:44:54 -0800152
153 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800154 // Sender
155 //
156
kwiberg4cdbd572016-03-30 03:10:05 -0700157 // |modifier| is called exactly once with one argument: a pointer to the
158 // unique_ptr that holds the current encoder (which is null if there is no
159 // current encoder). For the duration of the call, |modifier| has exclusive
160 // access to the unique_ptr; it may call the encoder, steal the encoder and
161 // replace it with another encoder or with nullptr, etc.
162 virtual void ModifyEncoder(
kwiberg24c7c122016-09-28 11:57:10 -0700163 rtc::FunctionView<void(std::unique_ptr<AudioEncoder>*)> modifier) = 0;
kwiberg4cdbd572016-03-30 03:10:05 -0700164
165 // Utility method for simply replacing the existing encoder with a new one.
166 void SetEncoder(std::unique_ptr<AudioEncoder> new_encoder) {
167 ModifyEncoder([&](std::unique_ptr<AudioEncoder>* encoder) {
168 *encoder = std::move(new_encoder);
169 });
170 }
171
kjellander3e6db232015-11-26 04:44:54 -0800172 ///////////////////////////////////////////////////////////////////////////
173 // int32_t SendCodec()
174 // Get parameters for the codec currently registered as send codec.
175 //
176 // Return value:
177 // The send codec, or nothing if we don't have one
178 //
Danil Chapovalovb6021232018-06-19 13:26:36 +0200179 virtual absl::optional<CodecInst> SendCodec() const = 0;
kjellander3e6db232015-11-26 04:44:54 -0800180
181 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800182 // Sets the bitrate to the specified value in bits/sec. If the value is not
183 // supported by the codec, it will choose another appropriate value.
minyue7e304322016-10-12 05:00:55 -0700184 //
185 // This is only used in test code that rely on old ACM APIs.
186 // TODO(minyue): Remove it when possible.
kjellander3e6db232015-11-26 04:44:54 -0800187 virtual void SetBitRate(int bitrate_bps) = 0;
188
189 // int32_t RegisterTransportCallback()
190 // Register a transport callback which will be called to deliver
191 // the encoded buffers whenever Process() is called and a
192 // bit-stream is ready.
193 //
194 // Input:
195 // -transport : pointer to the callback class
196 // transport->SendData() is called whenever
197 // Process() is called and bit-stream is ready
198 // to deliver.
199 //
200 // Return value:
201 // -1 if the transport callback could not be registered
202 // 0 if registration is successful.
203 //
204 virtual int32_t RegisterTransportCallback(
205 AudioPacketizationCallback* transport) = 0;
206
207 ///////////////////////////////////////////////////////////////////////////
208 // int32_t Add10MsData()
209 // Add 10MS of raw (PCM) audio data and encode it. If the sampling
210 // frequency of the audio does not match the sampling frequency of the
211 // current encoder ACM will resample the audio. If an encoded packet was
212 // produced, it will be delivered via the callback object registered using
213 // RegisterTransportCallback, and the return value from this function will
214 // be the number of bytes encoded.
215 //
216 // Input:
217 // -audio_frame : the input audio frame, containing raw audio
Fredrik Solenbergbbf21a32018-04-12 22:44:09 +0200218 // sampling frequency etc.
kjellander3e6db232015-11-26 04:44:54 -0800219 //
220 // Return value:
221 // >= 0 number of bytes encoded.
222 // -1 some error occurred.
223 //
224 virtual int32_t Add10MsData(const AudioFrame& audio_frame) = 0;
225
226 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800227 // int SetPacketLossRate()
228 // Sets expected packet loss rate for encoding. Some encoders provide packet
229 // loss gnostic encoding to make stream less sensitive to packet losses,
230 // through e.g., FEC. No effects on codecs that do not provide such encoding.
231 //
232 // Input:
233 // -packet_loss_rate : expected packet loss rate (0 -- 100 inclusive).
234 //
235 // Return value
236 // -1 if failed to set packet loss rate,
237 // 0 if succeeded.
238 //
minyue7e304322016-10-12 05:00:55 -0700239 // This is only used in test code that rely on old ACM APIs.
240 // TODO(minyue): Remove it when possible.
kjellander3e6db232015-11-26 04:44:54 -0800241 virtual int SetPacketLossRate(int packet_loss_rate) = 0;
242
243 ///////////////////////////////////////////////////////////////////////////
244 // (VAD) Voice Activity Detection
245 //
246
247 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800248 // int32_t RegisterVADCallback()
249 // Call this method to register a callback function which is called
250 // any time that ACM encounters an empty frame. That is a frame which is
251 // recognized inactive. Depending on the codec WebRtc VAD or internal codec
252 // VAD is employed to identify a frame as active/inactive.
253 //
254 // Input:
255 // -vad_callback : pointer to a callback function.
256 //
257 // Return value:
258 // -1 if failed to register the callback function.
259 // 0 if the callback function is registered successfully.
260 //
261 virtual int32_t RegisterVADCallback(ACMVADCallback* vad_callback) = 0;
262
263 ///////////////////////////////////////////////////////////////////////////
264 // Receiver
265 //
266
267 ///////////////////////////////////////////////////////////////////////////
268 // int32_t InitializeReceiver()
269 // Any decoder-related state of ACM will be initialized to the
270 // same state when ACM is created. This will not interrupt or
271 // effect encoding functionality of ACM. ACM would lose all the
272 // decoding-related settings by calling this function.
273 // For instance, all registered codecs are deleted and have to be
274 // registered again.
275 //
276 // Return value:
277 // -1 if failed to initialize,
278 // 0 if succeeded.
279 //
280 virtual int32_t InitializeReceiver() = 0;
281
282 ///////////////////////////////////////////////////////////////////////////
283 // int32_t ReceiveFrequency()
284 // Get sampling frequency of the last received payload.
285 //
286 // Return value:
287 // non-negative the sampling frequency in Hertz.
288 // -1 if an error has occurred.
289 //
290 virtual int32_t ReceiveFrequency() const = 0;
291
292 ///////////////////////////////////////////////////////////////////////////
293 // int32_t PlayoutFrequency()
294 // Get sampling frequency of audio played out.
295 //
296 // Return value:
297 // the sampling frequency in Hertz.
298 //
299 virtual int32_t PlayoutFrequency() const = 0;
300
kwiberg1c07c702017-03-27 07:15:49 -0700301 // Replace any existing decoders with the given payload type -> decoder map.
302 virtual void SetReceiveCodecs(
303 const std::map<int, SdpAudioFormat>& codecs) = 0;
304
kwiberg5adaf732016-10-04 09:33:27 -0700305 // Registers a decoder for the given payload type. Returns true iff
306 // successful.
307 virtual bool RegisterReceiveCodec(int rtp_payload_type,
308 const SdpAudioFormat& audio_format) = 0;
309
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800310 // Registers an external decoder. The name is only used to provide information
311 // back to the caller about the decoder. Hence, the name is arbitrary, and may
312 // be empty.
kjellander3e6db232015-11-26 04:44:54 -0800313 virtual int RegisterExternalReceiveCodec(int rtp_payload_type,
314 AudioDecoder* external_decoder,
315 int sample_rate_hz,
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800316 int num_channels,
317 const std::string& name) = 0;
kjellander3e6db232015-11-26 04:44:54 -0800318
319 ///////////////////////////////////////////////////////////////////////////
320 // int32_t UnregisterReceiveCodec()
321 // Unregister the codec currently registered with a specific payload type
322 // from the list of possible receive codecs.
323 //
324 // Input:
325 // -payload_type : The number representing the payload type to
326 // unregister.
327 //
328 // Output:
329 // -1 if fails to unregister.
330 // 0 if the given codec is successfully unregistered.
331 //
Yves Gerey665174f2018-06-19 15:03:05 +0200332 virtual int UnregisterReceiveCodec(uint8_t payload_type) = 0;
kjellander3e6db232015-11-26 04:44:54 -0800333
334 ///////////////////////////////////////////////////////////////////////////
335 // int32_t ReceiveCodec()
336 // Get the codec associated with last received payload.
337 //
338 // Output:
339 // -curr_receive_codec : parameters of the codec associated with the last
340 // received payload, c.f. common_types.h for
341 // the definition of CodecInst.
342 //
343 // Return value:
344 // -1 if failed to retrieve the codec,
345 // 0 if the codec is successfully retrieved.
346 //
347 virtual int32_t ReceiveCodec(CodecInst* curr_receive_codec) const = 0;
348
349 ///////////////////////////////////////////////////////////////////////////
Danil Chapovalovb6021232018-06-19 13:26:36 +0200350 // absl::optional<SdpAudioFormat> ReceiveFormat()
ossue280cde2016-10-12 11:04:10 -0700351 // Get the format associated with last received payload.
352 //
353 // Return value:
354 // An SdpAudioFormat describing the format associated with the last
355 // received payload.
356 // An empty Optional if no payload has yet been received.
357 //
Danil Chapovalovb6021232018-06-19 13:26:36 +0200358 virtual absl::optional<SdpAudioFormat> ReceiveFormat() const = 0;
ossue280cde2016-10-12 11:04:10 -0700359
360 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800361 // int32_t IncomingPacket()
362 // Call this function to insert a parsed RTP packet into ACM.
363 //
364 // Inputs:
365 // -incoming_payload : received payload.
366 // -payload_len_bytes : the length of payload in bytes.
367 // -rtp_info : the relevant information retrieved from RTP
368 // header.
369 //
370 // Return value:
371 // -1 if failed to push in the payload
372 // 0 if payload is successfully pushed in.
373 //
374 virtual int32_t IncomingPacket(const uint8_t* incoming_payload,
375 const size_t payload_len_bytes,
376 const WebRtcRTPHeader& rtp_info) = 0;
377
378 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800379 // int SetMinimumPlayoutDelay()
380 // Set a minimum for the playout delay, used for lip-sync. NetEq maintains
381 // such a delay unless channel condition yields to a higher delay.
382 //
383 // Input:
384 // -time_ms : minimum delay in milliseconds.
385 //
386 // Return value:
387 // -1 if failed to set the delay,
388 // 0 if the minimum delay is set.
389 //
390 virtual int SetMinimumPlayoutDelay(int time_ms) = 0;
391
392 ///////////////////////////////////////////////////////////////////////////
393 // int SetMaximumPlayoutDelay()
394 // Set a maximum for the playout delay
395 //
396 // Input:
397 // -time_ms : maximum delay in milliseconds.
398 //
399 // Return value:
400 // -1 if failed to set the delay,
401 // 0 if the maximum delay is set.
402 //
403 virtual int SetMaximumPlayoutDelay(int time_ms) = 0;
404
henrik.lundin9a410dd2016-04-06 01:39:22 -0700405 ///////////////////////////////////////////////////////////////////////////
406 // int32_t PlayoutTimestamp()
407 // The send timestamp of an RTP packet is associated with the decoded
408 // audio of the packet in question. This function returns the timestamp of
409 // the latest audio obtained by calling PlayoutData10ms(), or empty if no
410 // valid timestamp is available.
411 //
Danil Chapovalovb6021232018-06-19 13:26:36 +0200412 virtual absl::optional<uint32_t> PlayoutTimestamp() = 0;
kjellander3e6db232015-11-26 04:44:54 -0800413
414 ///////////////////////////////////////////////////////////////////////////
henrik.lundinb3f1c5d2016-08-22 15:39:53 -0700415 // int FilteredCurrentDelayMs()
416 // Returns the current total delay from NetEq (packet buffer and sync buffer)
417 // in ms, with smoothing applied to even out short-time fluctuations due to
418 // jitter. The packet buffer part of the delay is not updated during DTX/CNG
419 // periods.
420 //
421 virtual int FilteredCurrentDelayMs() const = 0;
422
423 ///////////////////////////////////////////////////////////////////////////
Henrik Lundinabbff892017-11-29 09:14:04 +0100424 // int FilteredCurrentDelayMs()
425 // Returns the current target delay for NetEq in ms.
426 //
427 virtual int TargetDelayMs() const = 0;
428
429 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800430 // int32_t PlayoutData10Ms(
431 // Get 10 milliseconds of raw audio data for playout, at the given sampling
432 // frequency. ACM will perform a resampling if required.
433 //
434 // Input:
435 // -desired_freq_hz : the desired sampling frequency, in Hertz, of the
436 // output audio. If set to -1, the function returns
437 // the audio at the current sampling frequency.
438 //
439 // Output:
440 // -audio_frame : output audio frame which contains raw audio data
Fredrik Solenbergbbf21a32018-04-12 22:44:09 +0200441 // and other relevant parameters.
henrik.lundin834a6ea2016-05-13 03:45:24 -0700442 // -muted : if true, the sample data in audio_frame is not
443 // populated, and must be interpreted as all zero.
kjellander3e6db232015-11-26 04:44:54 -0800444 //
445 // Return value:
446 // -1 if the function fails,
447 // 0 if the function succeeds.
448 //
449 virtual int32_t PlayoutData10Ms(int32_t desired_freq_hz,
henrik.lundin834a6ea2016-05-13 03:45:24 -0700450 AudioFrame* audio_frame,
451 bool* muted) = 0;
452
kjellander3e6db232015-11-26 04:44:54 -0800453 ///////////////////////////////////////////////////////////////////////////
454 // Codec specific
455 //
456
457 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800458 // int SetOpusMaxPlaybackRate()
459 // If current send codec is Opus, informs it about maximum playback rate the
460 // receiver will render. Opus can use this information to optimize the bit
461 // rate and increase the computation efficiency.
462 //
463 // Input:
464 // -frequency_hz : maximum playback rate in Hz.
465 //
466 // Return value:
467 // -1 if current send codec is not Opus or
468 // error occurred in setting the maximum playback rate,
469 // 0 if maximum bandwidth is set successfully.
470 //
471 virtual int SetOpusMaxPlaybackRate(int frequency_hz) = 0;
472
473 ///////////////////////////////////////////////////////////////////////////
474 // EnableOpusDtx()
475 // Enable the DTX, if current send codec is Opus.
476 //
477 // Return value:
478 // -1 if current send codec is not Opus or error occurred in enabling the
479 // Opus DTX.
480 // 0 if Opus DTX is enabled successfully.
481 //
482 virtual int EnableOpusDtx() = 0;
483
484 ///////////////////////////////////////////////////////////////////////////
485 // int DisableOpusDtx()
486 // If current send codec is Opus, disables its internal DTX.
487 //
488 // Return value:
489 // -1 if current send codec is not Opus or error occurred in disabling DTX.
490 // 0 if Opus DTX is disabled successfully.
491 //
492 virtual int DisableOpusDtx() = 0;
493
494 ///////////////////////////////////////////////////////////////////////////
495 // statistics
496 //
497
498 ///////////////////////////////////////////////////////////////////////////
499 // int32_t GetNetworkStatistics()
500 // Get network statistics. Note that the internal statistics of NetEq are
501 // reset by this call.
502 //
503 // Input:
504 // -network_statistics : a structure that contains network statistics.
505 //
506 // Return value:
507 // -1 if failed to set the network statistics,
508 // 0 if statistics are set successfully.
509 //
510 virtual int32_t GetNetworkStatistics(
511 NetworkStatistics* network_statistics) = 0;
512
513 //
514 // Enable NACK and set the maximum size of the NACK list. If NACK is already
515 // enable then the maximum NACK list size is modified accordingly.
516 //
517 // If the sequence number of last received packet is N, the sequence numbers
518 // of NACK list are in the range of [N - |max_nack_list_size|, N).
519 //
520 // |max_nack_list_size| should be positive (none zero) and less than or
521 // equal to |Nack::kNackListSizeLimit|. Otherwise, No change is applied and -1
522 // is returned. 0 is returned at success.
523 //
524 virtual int EnableNack(size_t max_nack_list_size) = 0;
525
526 // Disable NACK.
527 virtual void DisableNack() = 0;
528
529 //
530 // Get a list of packets to be retransmitted. |round_trip_time_ms| is an
531 // estimate of the round-trip-time (in milliseconds). Missing packets which
532 // will be playout in a shorter time than the round-trip-time (with respect
533 // to the time this API is called) will not be included in the list.
534 //
535 // Negative |round_trip_time_ms| results is an error message and empty list
536 // is returned.
537 //
538 virtual std::vector<uint16_t> GetNackList(
539 int64_t round_trip_time_ms) const = 0;
540
541 virtual void GetDecodingCallStatistics(
542 AudioDecodingCallStats* call_stats) const = 0;
ivoce1198e02017-09-08 08:13:19 -0700543
544 virtual ANAStats GetANAStats() const = 0;
kjellander3e6db232015-11-26 04:44:54 -0800545};
546
547} // namespace webrtc
548
Mirko Bonadei92ea95e2017-09-15 06:47:31 +0200549#endif // MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_