blob: 177ce3bc3080e3ff18ae02d8162623d7faaffc5c [file] [log] [blame]
kjellander3e6db232015-11-26 04:44:54 -08001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_
12#define MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_
kjellander3e6db232015-11-26 04:44:54 -080013
kwiberg84be5112016-04-27 01:19:58 -070014#include <memory>
henrik.lundin4cf61dd2015-12-09 06:20:58 -080015#include <string>
Fredrik Solenbergf693bfa2018-12-11 12:22:10 +010016#include <utility>
kjellander3e6db232015-11-26 04:44:54 -080017#include <vector>
18
Danil Chapovalovb6021232018-06-19 13:26:36 +020019#include "absl/types/optional.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020020#include "api/audio_codecs/audio_decoder_factory.h"
21#include "api/audio_codecs/audio_encoder.h"
Artem Titov741daaf2019-03-21 14:37:36 +010022#include "api/function_view.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020023#include "modules/audio_coding/include/audio_coding_module_typedefs.h"
24#include "modules/audio_coding/neteq/include/neteq.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020025#include "system_wrappers/include/clock.h"
kjellander3e6db232015-11-26 04:44:54 -080026
27namespace webrtc {
28
29// forward declarations
kjellander3e6db232015-11-26 04:44:54 -080030class AudioDecoder;
31class AudioEncoder;
32class AudioFrame;
Niels Möllerafb5dbb2019-02-15 15:21:47 +010033struct RTPHeader;
kjellander3e6db232015-11-26 04:44:54 -080034
35#define WEBRTC_10MS_PCM_AUDIO 960 // 16 bits super wideband 48 kHz
36
37// Callback class used for sending data ready to be packetized
38class AudioPacketizationCallback {
39 public:
40 virtual ~AudioPacketizationCallback() {}
41
Niels Möller87e2d782019-03-07 10:18:23 +010042 virtual int32_t SendData(AudioFrameType frame_type,
kjellander3e6db232015-11-26 04:44:54 -080043 uint8_t payload_type,
44 uint32_t timestamp,
45 const uint8_t* payload_data,
Niels Möller4babc682019-04-26 15:46:12 +020046 size_t payload_len_bytes) = 0;
kjellander3e6db232015-11-26 04:44:54 -080047};
48
49// Callback class used for reporting VAD decision
50class ACMVADCallback {
51 public:
52 virtual ~ACMVADCallback() {}
53
Niels Möller87e2d782019-03-07 10:18:23 +010054 virtual int32_t InFrameType(AudioFrameType frame_type) = 0;
kjellander3e6db232015-11-26 04:44:54 -080055};
56
57class AudioCodingModule {
58 protected:
59 AudioCodingModule() {}
60
61 public:
62 struct Config {
Karl Wiberg5817d3d2018-04-06 10:06:42 +020063 explicit Config(
64 rtc::scoped_refptr<AudioDecoderFactory> decoder_factory = nullptr);
kwiberg36a43882016-08-29 05:33:32 -070065 Config(const Config&);
66 ~Config();
kjellander3e6db232015-11-26 04:44:54 -080067
kjellander3e6db232015-11-26 04:44:54 -080068 NetEq::Config neteq_config;
69 Clock* clock;
ossue3525782016-05-25 07:37:43 -070070 rtc::scoped_refptr<AudioDecoderFactory> decoder_factory;
kjellander3e6db232015-11-26 04:44:54 -080071 };
72
kjellander3e6db232015-11-26 04:44:54 -080073 static AudioCodingModule* Create(const Config& config);
74 virtual ~AudioCodingModule() = default;
75
76 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -080077 // Sender
78 //
79
kwiberg4cdbd572016-03-30 03:10:05 -070080 // |modifier| is called exactly once with one argument: a pointer to the
81 // unique_ptr that holds the current encoder (which is null if there is no
82 // current encoder). For the duration of the call, |modifier| has exclusive
83 // access to the unique_ptr; it may call the encoder, steal the encoder and
84 // replace it with another encoder or with nullptr, etc.
85 virtual void ModifyEncoder(
kwiberg24c7c122016-09-28 11:57:10 -070086 rtc::FunctionView<void(std::unique_ptr<AudioEncoder>*)> modifier) = 0;
kwiberg4cdbd572016-03-30 03:10:05 -070087
88 // Utility method for simply replacing the existing encoder with a new one.
89 void SetEncoder(std::unique_ptr<AudioEncoder> new_encoder) {
90 ModifyEncoder([&](std::unique_ptr<AudioEncoder>* encoder) {
91 *encoder = std::move(new_encoder);
92 });
93 }
94
kjellander3e6db232015-11-26 04:44:54 -080095 // int32_t RegisterTransportCallback()
96 // Register a transport callback which will be called to deliver
97 // the encoded buffers whenever Process() is called and a
98 // bit-stream is ready.
99 //
100 // Input:
101 // -transport : pointer to the callback class
102 // transport->SendData() is called whenever
103 // Process() is called and bit-stream is ready
104 // to deliver.
105 //
106 // Return value:
107 // -1 if the transport callback could not be registered
108 // 0 if registration is successful.
109 //
110 virtual int32_t RegisterTransportCallback(
111 AudioPacketizationCallback* transport) = 0;
112
113 ///////////////////////////////////////////////////////////////////////////
114 // int32_t Add10MsData()
115 // Add 10MS of raw (PCM) audio data and encode it. If the sampling
116 // frequency of the audio does not match the sampling frequency of the
117 // current encoder ACM will resample the audio. If an encoded packet was
118 // produced, it will be delivered via the callback object registered using
119 // RegisterTransportCallback, and the return value from this function will
120 // be the number of bytes encoded.
121 //
122 // Input:
123 // -audio_frame : the input audio frame, containing raw audio
Fredrik Solenbergbbf21a32018-04-12 22:44:09 +0200124 // sampling frequency etc.
kjellander3e6db232015-11-26 04:44:54 -0800125 //
126 // Return value:
127 // >= 0 number of bytes encoded.
128 // -1 some error occurred.
129 //
130 virtual int32_t Add10MsData(const AudioFrame& audio_frame) = 0;
131
132 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800133 // int SetPacketLossRate()
134 // Sets expected packet loss rate for encoding. Some encoders provide packet
135 // loss gnostic encoding to make stream less sensitive to packet losses,
136 // through e.g., FEC. No effects on codecs that do not provide such encoding.
137 //
138 // Input:
139 // -packet_loss_rate : expected packet loss rate (0 -- 100 inclusive).
140 //
141 // Return value
142 // -1 if failed to set packet loss rate,
143 // 0 if succeeded.
144 //
minyue7e304322016-10-12 05:00:55 -0700145 // This is only used in test code that rely on old ACM APIs.
146 // TODO(minyue): Remove it when possible.
kjellander3e6db232015-11-26 04:44:54 -0800147 virtual int SetPacketLossRate(int packet_loss_rate) = 0;
148
149 ///////////////////////////////////////////////////////////////////////////
150 // (VAD) Voice Activity Detection
151 //
152
153 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800154 // int32_t RegisterVADCallback()
155 // Call this method to register a callback function which is called
156 // any time that ACM encounters an empty frame. That is a frame which is
157 // recognized inactive. Depending on the codec WebRtc VAD or internal codec
158 // VAD is employed to identify a frame as active/inactive.
159 //
160 // Input:
161 // -vad_callback : pointer to a callback function.
162 //
163 // Return value:
164 // -1 if failed to register the callback function.
165 // 0 if the callback function is registered successfully.
166 //
167 virtual int32_t RegisterVADCallback(ACMVADCallback* vad_callback) = 0;
168
169 ///////////////////////////////////////////////////////////////////////////
170 // Receiver
171 //
172
173 ///////////////////////////////////////////////////////////////////////////
174 // int32_t InitializeReceiver()
175 // Any decoder-related state of ACM will be initialized to the
176 // same state when ACM is created. This will not interrupt or
177 // effect encoding functionality of ACM. ACM would lose all the
178 // decoding-related settings by calling this function.
179 // For instance, all registered codecs are deleted and have to be
180 // registered again.
181 //
182 // Return value:
183 // -1 if failed to initialize,
184 // 0 if succeeded.
185 //
186 virtual int32_t InitializeReceiver() = 0;
187
188 ///////////////////////////////////////////////////////////////////////////
189 // int32_t ReceiveFrequency()
190 // Get sampling frequency of the last received payload.
191 //
192 // Return value:
193 // non-negative the sampling frequency in Hertz.
194 // -1 if an error has occurred.
195 //
196 virtual int32_t ReceiveFrequency() const = 0;
197
198 ///////////////////////////////////////////////////////////////////////////
199 // int32_t PlayoutFrequency()
200 // Get sampling frequency of audio played out.
201 //
202 // Return value:
203 // the sampling frequency in Hertz.
204 //
205 virtual int32_t PlayoutFrequency() const = 0;
206
kwiberg1c07c702017-03-27 07:15:49 -0700207 // Replace any existing decoders with the given payload type -> decoder map.
208 virtual void SetReceiveCodecs(
209 const std::map<int, SdpAudioFormat>& codecs) = 0;
210
kjellander3e6db232015-11-26 04:44:54 -0800211 ///////////////////////////////////////////////////////////////////////////
Fredrik Solenbergf693bfa2018-12-11 12:22:10 +0100212 // absl::optional<std::pair<int, SdpAudioFormat>> ReceiveCodec()
213 // Get the codec info associated with last received payload.
kjellander3e6db232015-11-26 04:44:54 -0800214 //
215 // Return value:
Fredrik Solenbergf693bfa2018-12-11 12:22:10 +0100216 // A payload type and SdpAudioFormat describing the format associated with
217 // the last received payload.
ossue280cde2016-10-12 11:04:10 -0700218 // An empty Optional if no payload has yet been received.
219 //
Jonas Olssona4d87372019-07-05 19:08:33 +0200220 virtual absl::optional<std::pair<int, SdpAudioFormat>> ReceiveCodec()
221 const = 0;
ossue280cde2016-10-12 11:04:10 -0700222
223 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800224 // int32_t IncomingPacket()
225 // Call this function to insert a parsed RTP packet into ACM.
226 //
227 // Inputs:
228 // -incoming_payload : received payload.
229 // -payload_len_bytes : the length of payload in bytes.
230 // -rtp_info : the relevant information retrieved from RTP
231 // header.
232 //
233 // Return value:
234 // -1 if failed to push in the payload
235 // 0 if payload is successfully pushed in.
236 //
237 virtual int32_t IncomingPacket(const uint8_t* incoming_payload,
238 const size_t payload_len_bytes,
Niels Möllerafb5dbb2019-02-15 15:21:47 +0100239 const RTPHeader& rtp_header) = 0;
kjellander3e6db232015-11-26 04:44:54 -0800240
241 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800242 // int SetMinimumPlayoutDelay()
243 // Set a minimum for the playout delay, used for lip-sync. NetEq maintains
244 // such a delay unless channel condition yields to a higher delay.
245 //
246 // Input:
247 // -time_ms : minimum delay in milliseconds.
248 //
249 // Return value:
250 // -1 if failed to set the delay,
251 // 0 if the minimum delay is set.
252 //
253 virtual int SetMinimumPlayoutDelay(int time_ms) = 0;
254
255 ///////////////////////////////////////////////////////////////////////////
256 // int SetMaximumPlayoutDelay()
257 // Set a maximum for the playout delay
258 //
259 // Input:
260 // -time_ms : maximum delay in milliseconds.
261 //
262 // Return value:
263 // -1 if failed to set the delay,
264 // 0 if the maximum delay is set.
265 //
266 virtual int SetMaximumPlayoutDelay(int time_ms) = 0;
267
Ruslan Burakov3b50f9f2019-02-06 09:45:56 +0100268 // Sets a base minimum for the playout delay. Base minimum delay sets lower
269 // bound minimum delay value which is set via SetMinimumPlayoutDelay.
270 //
271 // Returns true if value was successfully set, false overwise.
272 virtual bool SetBaseMinimumPlayoutDelayMs(int delay_ms) = 0;
273
274 // Returns current value of base minimum delay in milliseconds.
275 virtual int GetBaseMinimumPlayoutDelayMs() const = 0;
276
henrik.lundin9a410dd2016-04-06 01:39:22 -0700277 ///////////////////////////////////////////////////////////////////////////
278 // int32_t PlayoutTimestamp()
279 // The send timestamp of an RTP packet is associated with the decoded
280 // audio of the packet in question. This function returns the timestamp of
281 // the latest audio obtained by calling PlayoutData10ms(), or empty if no
282 // valid timestamp is available.
283 //
Danil Chapovalovb6021232018-06-19 13:26:36 +0200284 virtual absl::optional<uint32_t> PlayoutTimestamp() = 0;
kjellander3e6db232015-11-26 04:44:54 -0800285
286 ///////////////////////////////////////////////////////////////////////////
henrik.lundinb3f1c5d2016-08-22 15:39:53 -0700287 // int FilteredCurrentDelayMs()
288 // Returns the current total delay from NetEq (packet buffer and sync buffer)
289 // in ms, with smoothing applied to even out short-time fluctuations due to
290 // jitter. The packet buffer part of the delay is not updated during DTX/CNG
291 // periods.
292 //
293 virtual int FilteredCurrentDelayMs() const = 0;
294
295 ///////////////////////////////////////////////////////////////////////////
Henrik Lundinabbff892017-11-29 09:14:04 +0100296 // int FilteredCurrentDelayMs()
297 // Returns the current target delay for NetEq in ms.
298 //
299 virtual int TargetDelayMs() const = 0;
300
301 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800302 // int32_t PlayoutData10Ms(
303 // Get 10 milliseconds of raw audio data for playout, at the given sampling
304 // frequency. ACM will perform a resampling if required.
305 //
306 // Input:
307 // -desired_freq_hz : the desired sampling frequency, in Hertz, of the
308 // output audio. If set to -1, the function returns
309 // the audio at the current sampling frequency.
310 //
311 // Output:
312 // -audio_frame : output audio frame which contains raw audio data
Fredrik Solenbergbbf21a32018-04-12 22:44:09 +0200313 // and other relevant parameters.
henrik.lundin834a6ea2016-05-13 03:45:24 -0700314 // -muted : if true, the sample data in audio_frame is not
315 // populated, and must be interpreted as all zero.
kjellander3e6db232015-11-26 04:44:54 -0800316 //
317 // Return value:
318 // -1 if the function fails,
319 // 0 if the function succeeds.
320 //
321 virtual int32_t PlayoutData10Ms(int32_t desired_freq_hz,
henrik.lundin834a6ea2016-05-13 03:45:24 -0700322 AudioFrame* audio_frame,
323 bool* muted) = 0;
324
kjellander3e6db232015-11-26 04:44:54 -0800325 ///////////////////////////////////////////////////////////////////////////
kjellander3e6db232015-11-26 04:44:54 -0800326 // statistics
327 //
328
329 ///////////////////////////////////////////////////////////////////////////
330 // int32_t GetNetworkStatistics()
331 // Get network statistics. Note that the internal statistics of NetEq are
332 // reset by this call.
333 //
334 // Input:
335 // -network_statistics : a structure that contains network statistics.
336 //
337 // Return value:
338 // -1 if failed to set the network statistics,
339 // 0 if statistics are set successfully.
340 //
341 virtual int32_t GetNetworkStatistics(
342 NetworkStatistics* network_statistics) = 0;
343
kjellander3e6db232015-11-26 04:44:54 -0800344 virtual void GetDecodingCallStatistics(
345 AudioDecodingCallStats* call_stats) const = 0;
ivoce1198e02017-09-08 08:13:19 -0700346
347 virtual ANAStats GetANAStats() const = 0;
kjellander3e6db232015-11-26 04:44:54 -0800348};
349
350} // namespace webrtc
351
Mirko Bonadei92ea95e2017-09-15 06:47:31 +0200352#endif // MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_