Blame - modules/audio_coding/include/audio_coding_module.h - webrtc.googlesource.com/src

blob: 177ce3bc3080e3ff18ae02d8162623d7faaffc5c [file] [log] [blame]

kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	1	/*
				2	* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	11	#ifndef MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_
				12	#define MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	13
kwiberg	84be511	2016-04-27 01:19:58 -0700	[diff] [blame]	14	#include <memory>
henrik.lundin	4cf61dd	2015-12-09 06:20:58 -0800	[diff] [blame]	15	#include <string>
Fredrik Solenberg	f693bfa	2018-12-11 12:22:10 +0100	[diff] [blame]	16	#include <utility>
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	17	#include <vector>
				18
Danil Chapovalov	b602123	2018-06-19 13:26:36 +0200	[diff] [blame]	19	#include "absl/types/optional.h"
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	20	#include "api/audio_codecs/audio_decoder_factory.h"
				21	#include "api/audio_codecs/audio_encoder.h"
Artem Titov	741daaf	2019-03-21 14:37:36 +0100	[diff] [blame]	22	#include "api/function_view.h"
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	23	#include "modules/audio_coding/include/audio_coding_module_typedefs.h"
				24	#include "modules/audio_coding/neteq/include/neteq.h"
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	25	#include "system_wrappers/include/clock.h"
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	26
				27	namespace webrtc {
				28
				29	// forward declarations
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	30	class AudioDecoder;
				31	class AudioEncoder;
				32	class AudioFrame;
Niels Möller	afb5dbb	2019-02-15 15:21:47 +0100	[diff] [blame]	33	struct RTPHeader;
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	34
				35	#define WEBRTC_10MS_PCM_AUDIO 960 // 16 bits super wideband 48 kHz
				36
				37	// Callback class used for sending data ready to be packetized
				38	class AudioPacketizationCallback {
				39	public:
				40	virtual ~AudioPacketizationCallback() {}
				41
Niels Möller	87e2d78	2019-03-07 10:18:23 +0100	[diff] [blame]	42	virtual int32_t SendData(AudioFrameType frame_type,
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	43	uint8_t payload_type,
				44	uint32_t timestamp,
				45	const uint8_t* payload_data,
Niels Möller	4babc68	2019-04-26 15:46:12 +0200	[diff] [blame]	46	size_t payload_len_bytes) = 0;
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	47	};
				48
				49	// Callback class used for reporting VAD decision
				50	class ACMVADCallback {
				51	public:
				52	virtual ~ACMVADCallback() {}
				53
Niels Möller	87e2d78	2019-03-07 10:18:23 +0100	[diff] [blame]	54	virtual int32_t InFrameType(AudioFrameType frame_type) = 0;
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	55	};
				56
				57	class AudioCodingModule {
				58	protected:
				59	AudioCodingModule() {}
				60
				61	public:
				62	struct Config {
Karl Wiberg	5817d3d	2018-04-06 10:06:42 +0200	[diff] [blame]	63	explicit Config(
				64	rtc::scoped_refptr<AudioDecoderFactory> decoder_factory = nullptr);
kwiberg	36a4388	2016-08-29 05:33:32 -0700	[diff] [blame]	65	Config(const Config&);
				66	~Config();
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	67
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	68	NetEq::Config neteq_config;
				69	Clock* clock;
ossu	e352578	2016-05-25 07:37:43 -0700	[diff] [blame]	70	rtc::scoped_refptr<AudioDecoderFactory> decoder_factory;
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	71	};
				72
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	73	static AudioCodingModule* Create(const Config& config);
				74	virtual ~AudioCodingModule() = default;
				75
				76	///////////////////////////////////////////////////////////////////////////
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	77	// Sender
				78	//
				79
kwiberg	4cdbd57	2016-03-30 03:10:05 -0700	[diff] [blame]	80	// \|modifier\| is called exactly once with one argument: a pointer to the
				81	// unique_ptr that holds the current encoder (which is null if there is no
				82	// current encoder). For the duration of the call, \|modifier\| has exclusive
				83	// access to the unique_ptr; it may call the encoder, steal the encoder and
				84	// replace it with another encoder or with nullptr, etc.
				85	virtual void ModifyEncoder(
kwiberg	24c7c12	2016-09-28 11:57:10 -0700	[diff] [blame]	86	rtc::FunctionView<void(std::unique_ptr<AudioEncoder>*)> modifier) = 0;
kwiberg	4cdbd57	2016-03-30 03:10:05 -0700	[diff] [blame]	87
				88	// Utility method for simply replacing the existing encoder with a new one.
				89	void SetEncoder(std::unique_ptr<AudioEncoder> new_encoder) {
				90	ModifyEncoder([&](std::unique_ptr<AudioEncoder>* encoder) {
				91	*encoder = std::move(new_encoder);
				92	});
				93	}
				94
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	95	// int32_t RegisterTransportCallback()
				96	// Register a transport callback which will be called to deliver
				97	// the encoded buffers whenever Process() is called and a
				98	// bit-stream is ready.
				99	//
				100	// Input:
				101	// -transport : pointer to the callback class
				102	// transport->SendData() is called whenever
				103	// Process() is called and bit-stream is ready
				104	// to deliver.
				105	//
				106	// Return value:
				107	// -1 if the transport callback could not be registered
				108	// 0 if registration is successful.
				109	//
				110	virtual int32_t RegisterTransportCallback(
				111	AudioPacketizationCallback* transport) = 0;
				112
				113	///////////////////////////////////////////////////////////////////////////
				114	// int32_t Add10MsData()
				115	// Add 10MS of raw (PCM) audio data and encode it. If the sampling
				116	// frequency of the audio does not match the sampling frequency of the
				117	// current encoder ACM will resample the audio. If an encoded packet was
				118	// produced, it will be delivered via the callback object registered using
				119	// RegisterTransportCallback, and the return value from this function will
				120	// be the number of bytes encoded.
				121	//
				122	// Input:
				123	// -audio_frame : the input audio frame, containing raw audio
Fredrik Solenberg	bbf21a3	2018-04-12 22:44:09 +0200	[diff] [blame]	124	// sampling frequency etc.
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	125	//
				126	// Return value:
				127	// >= 0 number of bytes encoded.
				128	// -1 some error occurred.
				129	//
				130	virtual int32_t Add10MsData(const AudioFrame& audio_frame) = 0;
				131
				132	///////////////////////////////////////////////////////////////////////////
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	133	// int SetPacketLossRate()
				134	// Sets expected packet loss rate for encoding. Some encoders provide packet
				135	// loss gnostic encoding to make stream less sensitive to packet losses,
				136	// through e.g., FEC. No effects on codecs that do not provide such encoding.
				137	//
				138	// Input:
				139	// -packet_loss_rate : expected packet loss rate (0 -- 100 inclusive).
				140	//
				141	// Return value
				142	// -1 if failed to set packet loss rate,
				143	// 0 if succeeded.
				144	//
minyue	7e30432	2016-10-12 05:00:55 -0700	[diff] [blame]	145	// This is only used in test code that rely on old ACM APIs.
				146	// TODO(minyue): Remove it when possible.
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	147	virtual int SetPacketLossRate(int packet_loss_rate) = 0;
				148
				149	///////////////////////////////////////////////////////////////////////////
				150	// (VAD) Voice Activity Detection
				151	//
				152
				153	///////////////////////////////////////////////////////////////////////////
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	154	// int32_t RegisterVADCallback()
				155	// Call this method to register a callback function which is called
				156	// any time that ACM encounters an empty frame. That is a frame which is
				157	// recognized inactive. Depending on the codec WebRtc VAD or internal codec
				158	// VAD is employed to identify a frame as active/inactive.
				159	//
				160	// Input:
				161	// -vad_callback : pointer to a callback function.
				162	//
				163	// Return value:
				164	// -1 if failed to register the callback function.
				165	// 0 if the callback function is registered successfully.
				166	//
				167	virtual int32_t RegisterVADCallback(ACMVADCallback* vad_callback) = 0;
				168
				169	///////////////////////////////////////////////////////////////////////////
				170	// Receiver
				171	//
				172
				173	///////////////////////////////////////////////////////////////////////////
				174	// int32_t InitializeReceiver()
				175	// Any decoder-related state of ACM will be initialized to the
				176	// same state when ACM is created. This will not interrupt or
				177	// effect encoding functionality of ACM. ACM would lose all the
				178	// decoding-related settings by calling this function.
				179	// For instance, all registered codecs are deleted and have to be
				180	// registered again.
				181	//
				182	// Return value:
				183	// -1 if failed to initialize,
				184	// 0 if succeeded.
				185	//
				186	virtual int32_t InitializeReceiver() = 0;
				187
				188	///////////////////////////////////////////////////////////////////////////
				189	// int32_t ReceiveFrequency()
				190	// Get sampling frequency of the last received payload.
				191	//
				192	// Return value:
				193	// non-negative the sampling frequency in Hertz.
				194	// -1 if an error has occurred.
				195	//
				196	virtual int32_t ReceiveFrequency() const = 0;
				197
				198	///////////////////////////////////////////////////////////////////////////
				199	// int32_t PlayoutFrequency()
				200	// Get sampling frequency of audio played out.
				201	//
				202	// Return value:
				203	// the sampling frequency in Hertz.
				204	//
				205	virtual int32_t PlayoutFrequency() const = 0;
				206
kwiberg	1c07c70	2017-03-27 07:15:49 -0700	[diff] [blame]	207	// Replace any existing decoders with the given payload type -> decoder map.
				208	virtual void SetReceiveCodecs(
				209	const std::map<int, SdpAudioFormat>& codecs) = 0;
				210
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	211	///////////////////////////////////////////////////////////////////////////
Fredrik Solenberg	f693bfa	2018-12-11 12:22:10 +0100	[diff] [blame]	212	// absl::optional<std::pair<int, SdpAudioFormat>> ReceiveCodec()
				213	// Get the codec info associated with last received payload.
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	214	//
				215	// Return value:
Fredrik Solenberg	f693bfa	2018-12-11 12:22:10 +0100	[diff] [blame]	216	// A payload type and SdpAudioFormat describing the format associated with
				217	// the last received payload.
ossu	e280cde	2016-10-12 11:04:10 -0700	[diff] [blame]	218	// An empty Optional if no payload has yet been received.
				219	//
Jonas Olsson	a4d8737	2019-07-05 19:08:33 +0200	[diff] [blame]	220	virtual absl::optional<std::pair<int, SdpAudioFormat>> ReceiveCodec()
				221	const = 0;
ossu	e280cde	2016-10-12 11:04:10 -0700	[diff] [blame]	222
				223	///////////////////////////////////////////////////////////////////////////
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	224	// int32_t IncomingPacket()
				225	// Call this function to insert a parsed RTP packet into ACM.
				226	//
				227	// Inputs:
				228	// -incoming_payload : received payload.
				229	// -payload_len_bytes : the length of payload in bytes.
				230	// -rtp_info : the relevant information retrieved from RTP
				231	// header.
				232	//
				233	// Return value:
				234	// -1 if failed to push in the payload
				235	// 0 if payload is successfully pushed in.
				236	//
				237	virtual int32_t IncomingPacket(const uint8_t* incoming_payload,
				238	const size_t payload_len_bytes,
Niels Möller	afb5dbb	2019-02-15 15:21:47 +0100	[diff] [blame]	239	const RTPHeader& rtp_header) = 0;
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	240
				241	///////////////////////////////////////////////////////////////////////////
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	242	// int SetMinimumPlayoutDelay()
				243	// Set a minimum for the playout delay, used for lip-sync. NetEq maintains
				244	// such a delay unless channel condition yields to a higher delay.
				245	//
				246	// Input:
				247	// -time_ms : minimum delay in milliseconds.
				248	//
				249	// Return value:
				250	// -1 if failed to set the delay,
				251	// 0 if the minimum delay is set.
				252	//
				253	virtual int SetMinimumPlayoutDelay(int time_ms) = 0;
				254
				255	///////////////////////////////////////////////////////////////////////////
				256	// int SetMaximumPlayoutDelay()
				257	// Set a maximum for the playout delay
				258	//
				259	// Input:
				260	// -time_ms : maximum delay in milliseconds.
				261	//
				262	// Return value:
				263	// -1 if failed to set the delay,
				264	// 0 if the maximum delay is set.
				265	//
				266	virtual int SetMaximumPlayoutDelay(int time_ms) = 0;
				267
Ruslan Burakov	3b50f9f	2019-02-06 09:45:56 +0100	[diff] [blame]	268	// Sets a base minimum for the playout delay. Base minimum delay sets lower
				269	// bound minimum delay value which is set via SetMinimumPlayoutDelay.
				270	//
				271	// Returns true if value was successfully set, false overwise.
				272	virtual bool SetBaseMinimumPlayoutDelayMs(int delay_ms) = 0;
				273
				274	// Returns current value of base minimum delay in milliseconds.
				275	virtual int GetBaseMinimumPlayoutDelayMs() const = 0;
				276
henrik.lundin	9a410dd	2016-04-06 01:39:22 -0700	[diff] [blame]	277	///////////////////////////////////////////////////////////////////////////
				278	// int32_t PlayoutTimestamp()
				279	// The send timestamp of an RTP packet is associated with the decoded
				280	// audio of the packet in question. This function returns the timestamp of
				281	// the latest audio obtained by calling PlayoutData10ms(), or empty if no
				282	// valid timestamp is available.
				283	//
Danil Chapovalov	b602123	2018-06-19 13:26:36 +0200	[diff] [blame]	284	virtual absl::optional<uint32_t> PlayoutTimestamp() = 0;
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	285
				286	///////////////////////////////////////////////////////////////////////////
henrik.lundin	b3f1c5d	2016-08-22 15:39:53 -0700	[diff] [blame]	287	// int FilteredCurrentDelayMs()
				288	// Returns the current total delay from NetEq (packet buffer and sync buffer)
				289	// in ms, with smoothing applied to even out short-time fluctuations due to
				290	// jitter. The packet buffer part of the delay is not updated during DTX/CNG
				291	// periods.
				292	//
				293	virtual int FilteredCurrentDelayMs() const = 0;
				294
				295	///////////////////////////////////////////////////////////////////////////
Henrik Lundin	abbff89	2017-11-29 09:14:04 +0100	[diff] [blame]	296	// int FilteredCurrentDelayMs()
				297	// Returns the current target delay for NetEq in ms.
				298	//
				299	virtual int TargetDelayMs() const = 0;
				300
				301	///////////////////////////////////////////////////////////////////////////
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	302	// int32_t PlayoutData10Ms(
				303	// Get 10 milliseconds of raw audio data for playout, at the given sampling
				304	// frequency. ACM will perform a resampling if required.
				305	//
				306	// Input:
				307	// -desired_freq_hz : the desired sampling frequency, in Hertz, of the
				308	// output audio. If set to -1, the function returns
				309	// the audio at the current sampling frequency.
				310	//
				311	// Output:
				312	// -audio_frame : output audio frame which contains raw audio data
Fredrik Solenberg	bbf21a3	2018-04-12 22:44:09 +0200	[diff] [blame]	313	// and other relevant parameters.
henrik.lundin	834a6ea	2016-05-13 03:45:24 -0700	[diff] [blame]	314	// -muted : if true, the sample data in audio_frame is not
				315	// populated, and must be interpreted as all zero.
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	316	//
				317	// Return value:
				318	// -1 if the function fails,
				319	// 0 if the function succeeds.
				320	//
				321	virtual int32_t PlayoutData10Ms(int32_t desired_freq_hz,
henrik.lundin	834a6ea	2016-05-13 03:45:24 -0700	[diff] [blame]	322	AudioFrame* audio_frame,
				323	bool* muted) = 0;
				324
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	325	///////////////////////////////////////////////////////////////////////////
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	326	// statistics
				327	//
				328
				329	///////////////////////////////////////////////////////////////////////////
				330	// int32_t GetNetworkStatistics()
				331	// Get network statistics. Note that the internal statistics of NetEq are
				332	// reset by this call.
				333	//
				334	// Input:
				335	// -network_statistics : a structure that contains network statistics.
				336	//
				337	// Return value:
				338	// -1 if failed to set the network statistics,
				339	// 0 if statistics are set successfully.
				340	//
				341	virtual int32_t GetNetworkStatistics(
				342	NetworkStatistics* network_statistics) = 0;
				343
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	344	virtual void GetDecodingCallStatistics(
				345	AudioDecodingCallStats* call_stats) const = 0;
ivoc	e1198e0	2017-09-08 08:13:19 -0700	[diff] [blame]	346
				347	virtual ANAStats GetANAStats() const = 0;
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	348	};
				349
				350	} // namespace webrtc
				351
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	352	#endif // MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_