Blame - modules/audio_coding/include/audio_coding_module.h - webrtc.googlesource.com/src

blob: 17ad71d53ae91889cde07e44a6e706ef57584e19 [file] [log] [blame]

kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	1	/*
				2	* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	11	#ifndef MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_
				12	#define MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	13
kwiberg	84be511	2016-04-27 01:19:58 -0700	[diff] [blame]	14	#include <memory>
henrik.lundin	4cf61dd	2015-12-09 06:20:58 -0800	[diff] [blame]	15	#include <string>
Fredrik Solenberg	f693bfa	2018-12-11 12:22:10 +0100	[diff] [blame]	16	#include <utility>
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	17	#include <vector>
				18
Danil Chapovalov	b602123	2018-06-19 13:26:36 +0200	[diff] [blame]	19	#include "absl/types/optional.h"
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	20	#include "api/audio_codecs/audio_decoder_factory.h"
				21	#include "api/audio_codecs/audio_encoder.h"
Artem Titov	741daaf	2019-03-21 14:37:36 +0100	[diff] [blame]	22	#include "api/function_view.h"
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	23	#include "modules/audio_coding/include/audio_coding_module_typedefs.h"
				24	#include "modules/audio_coding/neteq/include/neteq.h"
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	25	#include "system_wrappers/include/clock.h"
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	26
				27	namespace webrtc {
				28
				29	// forward declarations
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	30	class AudioDecoder;
				31	class AudioEncoder;
				32	class AudioFrame;
				33	class RTPFragmentationHeader;
Niels Möller	afb5dbb	2019-02-15 15:21:47 +0100	[diff] [blame]	34	struct RTPHeader;
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	35
				36	#define WEBRTC_10MS_PCM_AUDIO 960 // 16 bits super wideband 48 kHz
				37
				38	// Callback class used for sending data ready to be packetized
				39	class AudioPacketizationCallback {
				40	public:
				41	virtual ~AudioPacketizationCallback() {}
				42
Niels Möller	87e2d78	2019-03-07 10:18:23 +0100	[diff] [blame]	43	virtual int32_t SendData(AudioFrameType frame_type,
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	44	uint8_t payload_type,
				45	uint32_t timestamp,
				46	const uint8_t* payload_data,
Niels Möller	c35b6e6	2019-04-25 16:31:18 +0200	[diff] [blame^]	47	size_t payload_len_bytes) {
				48	return SendData(frame_type, payload_type, timestamp, payload_data,
				49	payload_len_bytes, nullptr);
				50	}
				51
				52	// TODO(bugs.webrtc.org/6471) Deprecated, delete as soon as downstream
				53	// implementations are updated. Then make above method pure virtual, and
				54	// delete forward declaration of RTPFragmentationHeader.
				55	virtual int32_t SendData(AudioFrameType frame_type,
				56	uint8_t payload_type,
				57	uint32_t timestamp,
				58	const uint8_t* payload_data,
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	59	size_t payload_len_bytes,
Niels Möller	c35b6e6	2019-04-25 16:31:18 +0200	[diff] [blame^]	60	const RTPFragmentationHeader* fragmentation) {
				61	return SendData(frame_type, payload_type, timestamp, payload_data,
				62	payload_len_bytes);
				63	}
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	64	};
				65
				66	// Callback class used for reporting VAD decision
				67	class ACMVADCallback {
				68	public:
				69	virtual ~ACMVADCallback() {}
				70
Niels Möller	87e2d78	2019-03-07 10:18:23 +0100	[diff] [blame]	71	virtual int32_t InFrameType(AudioFrameType frame_type) = 0;
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	72	};
				73
				74	class AudioCodingModule {
				75	protected:
				76	AudioCodingModule() {}
				77
				78	public:
				79	struct Config {
Karl Wiberg	5817d3d	2018-04-06 10:06:42 +0200	[diff] [blame]	80	explicit Config(
				81	rtc::scoped_refptr<AudioDecoderFactory> decoder_factory = nullptr);
kwiberg	36a4388	2016-08-29 05:33:32 -0700	[diff] [blame]	82	Config(const Config&);
				83	~Config();
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	84
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	85	NetEq::Config neteq_config;
				86	Clock* clock;
ossu	e352578	2016-05-25 07:37:43 -0700	[diff] [blame]	87	rtc::scoped_refptr<AudioDecoderFactory> decoder_factory;
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	88	};
				89
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	90	static AudioCodingModule* Create(const Config& config);
				91	virtual ~AudioCodingModule() = default;
				92
				93	///////////////////////////////////////////////////////////////////////////
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	94	// Sender
				95	//
				96
kwiberg	4cdbd57	2016-03-30 03:10:05 -0700	[diff] [blame]	97	// \|modifier\| is called exactly once with one argument: a pointer to the
				98	// unique_ptr that holds the current encoder (which is null if there is no
				99	// current encoder). For the duration of the call, \|modifier\| has exclusive
				100	// access to the unique_ptr; it may call the encoder, steal the encoder and
				101	// replace it with another encoder or with nullptr, etc.
				102	virtual void ModifyEncoder(
kwiberg	24c7c12	2016-09-28 11:57:10 -0700	[diff] [blame]	103	rtc::FunctionView<void(std::unique_ptr<AudioEncoder>*)> modifier) = 0;
kwiberg	4cdbd57	2016-03-30 03:10:05 -0700	[diff] [blame]	104
				105	// Utility method for simply replacing the existing encoder with a new one.
				106	void SetEncoder(std::unique_ptr<AudioEncoder> new_encoder) {
				107	ModifyEncoder([&](std::unique_ptr<AudioEncoder>* encoder) {
				108	*encoder = std::move(new_encoder);
				109	});
				110	}
				111
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	112	///////////////////////////////////////////////////////////////////////////
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	113	// Sets the bitrate to the specified value in bits/sec. If the value is not
				114	// supported by the codec, it will choose another appropriate value.
minyue	7e30432	2016-10-12 05:00:55 -0700	[diff] [blame]	115	//
				116	// This is only used in test code that rely on old ACM APIs.
				117	// TODO(minyue): Remove it when possible.
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	118	virtual void SetBitRate(int bitrate_bps) = 0;
				119
				120	// int32_t RegisterTransportCallback()
				121	// Register a transport callback which will be called to deliver
				122	// the encoded buffers whenever Process() is called and a
				123	// bit-stream is ready.
				124	//
				125	// Input:
				126	// -transport : pointer to the callback class
				127	// transport->SendData() is called whenever
				128	// Process() is called and bit-stream is ready
				129	// to deliver.
				130	//
				131	// Return value:
				132	// -1 if the transport callback could not be registered
				133	// 0 if registration is successful.
				134	//
				135	virtual int32_t RegisterTransportCallback(
				136	AudioPacketizationCallback* transport) = 0;
				137
				138	///////////////////////////////////////////////////////////////////////////
				139	// int32_t Add10MsData()
				140	// Add 10MS of raw (PCM) audio data and encode it. If the sampling
				141	// frequency of the audio does not match the sampling frequency of the
				142	// current encoder ACM will resample the audio. If an encoded packet was
				143	// produced, it will be delivered via the callback object registered using
				144	// RegisterTransportCallback, and the return value from this function will
				145	// be the number of bytes encoded.
				146	//
				147	// Input:
				148	// -audio_frame : the input audio frame, containing raw audio
Fredrik Solenberg	bbf21a3	2018-04-12 22:44:09 +0200	[diff] [blame]	149	// sampling frequency etc.
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	150	//
				151	// Return value:
				152	// >= 0 number of bytes encoded.
				153	// -1 some error occurred.
				154	//
				155	virtual int32_t Add10MsData(const AudioFrame& audio_frame) = 0;
				156
				157	///////////////////////////////////////////////////////////////////////////
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	158	// int SetPacketLossRate()
				159	// Sets expected packet loss rate for encoding. Some encoders provide packet
				160	// loss gnostic encoding to make stream less sensitive to packet losses,
				161	// through e.g., FEC. No effects on codecs that do not provide such encoding.
				162	//
				163	// Input:
				164	// -packet_loss_rate : expected packet loss rate (0 -- 100 inclusive).
				165	//
				166	// Return value
				167	// -1 if failed to set packet loss rate,
				168	// 0 if succeeded.
				169	//
minyue	7e30432	2016-10-12 05:00:55 -0700	[diff] [blame]	170	// This is only used in test code that rely on old ACM APIs.
				171	// TODO(minyue): Remove it when possible.
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	172	virtual int SetPacketLossRate(int packet_loss_rate) = 0;
				173
				174	///////////////////////////////////////////////////////////////////////////
				175	// (VAD) Voice Activity Detection
				176	//
				177
				178	///////////////////////////////////////////////////////////////////////////
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	179	// int32_t RegisterVADCallback()
				180	// Call this method to register a callback function which is called
				181	// any time that ACM encounters an empty frame. That is a frame which is
				182	// recognized inactive. Depending on the codec WebRtc VAD or internal codec
				183	// VAD is employed to identify a frame as active/inactive.
				184	//
				185	// Input:
				186	// -vad_callback : pointer to a callback function.
				187	//
				188	// Return value:
				189	// -1 if failed to register the callback function.
				190	// 0 if the callback function is registered successfully.
				191	//
				192	virtual int32_t RegisterVADCallback(ACMVADCallback* vad_callback) = 0;
				193
				194	///////////////////////////////////////////////////////////////////////////
				195	// Receiver
				196	//
				197
				198	///////////////////////////////////////////////////////////////////////////
				199	// int32_t InitializeReceiver()
				200	// Any decoder-related state of ACM will be initialized to the
				201	// same state when ACM is created. This will not interrupt or
				202	// effect encoding functionality of ACM. ACM would lose all the
				203	// decoding-related settings by calling this function.
				204	// For instance, all registered codecs are deleted and have to be
				205	// registered again.
				206	//
				207	// Return value:
				208	// -1 if failed to initialize,
				209	// 0 if succeeded.
				210	//
				211	virtual int32_t InitializeReceiver() = 0;
				212
				213	///////////////////////////////////////////////////////////////////////////
				214	// int32_t ReceiveFrequency()
				215	// Get sampling frequency of the last received payload.
				216	//
				217	// Return value:
				218	// non-negative the sampling frequency in Hertz.
				219	// -1 if an error has occurred.
				220	//
				221	virtual int32_t ReceiveFrequency() const = 0;
				222
				223	///////////////////////////////////////////////////////////////////////////
				224	// int32_t PlayoutFrequency()
				225	// Get sampling frequency of audio played out.
				226	//
				227	// Return value:
				228	// the sampling frequency in Hertz.
				229	//
				230	virtual int32_t PlayoutFrequency() const = 0;
				231
kwiberg	1c07c70	2017-03-27 07:15:49 -0700	[diff] [blame]	232	// Replace any existing decoders with the given payload type -> decoder map.
				233	virtual void SetReceiveCodecs(
				234	const std::map<int, SdpAudioFormat>& codecs) = 0;
				235
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	236	///////////////////////////////////////////////////////////////////////////
Fredrik Solenberg	f693bfa	2018-12-11 12:22:10 +0100	[diff] [blame]	237	// absl::optional<std::pair<int, SdpAudioFormat>> ReceiveCodec()
				238	// Get the codec info associated with last received payload.
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	239	//
				240	// Return value:
Fredrik Solenberg	f693bfa	2018-12-11 12:22:10 +0100	[diff] [blame]	241	// A payload type and SdpAudioFormat describing the format associated with
				242	// the last received payload.
ossu	e280cde	2016-10-12 11:04:10 -0700	[diff] [blame]	243	// An empty Optional if no payload has yet been received.
				244	//
Fredrik Solenberg	f693bfa	2018-12-11 12:22:10 +0100	[diff] [blame]	245	virtual absl::optional<std::pair<int, SdpAudioFormat>>
				246	ReceiveCodec() const = 0;
ossu	e280cde	2016-10-12 11:04:10 -0700	[diff] [blame]	247
				248	///////////////////////////////////////////////////////////////////////////
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	249	// int32_t IncomingPacket()
				250	// Call this function to insert a parsed RTP packet into ACM.
				251	//
				252	// Inputs:
				253	// -incoming_payload : received payload.
				254	// -payload_len_bytes : the length of payload in bytes.
				255	// -rtp_info : the relevant information retrieved from RTP
				256	// header.
				257	//
				258	// Return value:
				259	// -1 if failed to push in the payload
				260	// 0 if payload is successfully pushed in.
				261	//
				262	virtual int32_t IncomingPacket(const uint8_t* incoming_payload,
				263	const size_t payload_len_bytes,
Niels Möller	afb5dbb	2019-02-15 15:21:47 +0100	[diff] [blame]	264	const RTPHeader& rtp_header) = 0;
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	265
				266	///////////////////////////////////////////////////////////////////////////
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	267	// int SetMinimumPlayoutDelay()
				268	// Set a minimum for the playout delay, used for lip-sync. NetEq maintains
				269	// such a delay unless channel condition yields to a higher delay.
				270	//
				271	// Input:
				272	// -time_ms : minimum delay in milliseconds.
				273	//
				274	// Return value:
				275	// -1 if failed to set the delay,
				276	// 0 if the minimum delay is set.
				277	//
				278	virtual int SetMinimumPlayoutDelay(int time_ms) = 0;
				279
				280	///////////////////////////////////////////////////////////////////////////
				281	// int SetMaximumPlayoutDelay()
				282	// Set a maximum for the playout delay
				283	//
				284	// Input:
				285	// -time_ms : maximum delay in milliseconds.
				286	//
				287	// Return value:
				288	// -1 if failed to set the delay,
				289	// 0 if the maximum delay is set.
				290	//
				291	virtual int SetMaximumPlayoutDelay(int time_ms) = 0;
				292
Ruslan Burakov	3b50f9f	2019-02-06 09:45:56 +0100	[diff] [blame]	293	// Sets a base minimum for the playout delay. Base minimum delay sets lower
				294	// bound minimum delay value which is set via SetMinimumPlayoutDelay.
				295	//
				296	// Returns true if value was successfully set, false overwise.
				297	virtual bool SetBaseMinimumPlayoutDelayMs(int delay_ms) = 0;
				298
				299	// Returns current value of base minimum delay in milliseconds.
				300	virtual int GetBaseMinimumPlayoutDelayMs() const = 0;
				301
henrik.lundin	9a410dd	2016-04-06 01:39:22 -0700	[diff] [blame]	302	///////////////////////////////////////////////////////////////////////////
				303	// int32_t PlayoutTimestamp()
				304	// The send timestamp of an RTP packet is associated with the decoded
				305	// audio of the packet in question. This function returns the timestamp of
				306	// the latest audio obtained by calling PlayoutData10ms(), or empty if no
				307	// valid timestamp is available.
				308	//
Danil Chapovalov	b602123	2018-06-19 13:26:36 +0200	[diff] [blame]	309	virtual absl::optional<uint32_t> PlayoutTimestamp() = 0;
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	310
				311	///////////////////////////////////////////////////////////////////////////
henrik.lundin	b3f1c5d	2016-08-22 15:39:53 -0700	[diff] [blame]	312	// int FilteredCurrentDelayMs()
				313	// Returns the current total delay from NetEq (packet buffer and sync buffer)
				314	// in ms, with smoothing applied to even out short-time fluctuations due to
				315	// jitter. The packet buffer part of the delay is not updated during DTX/CNG
				316	// periods.
				317	//
				318	virtual int FilteredCurrentDelayMs() const = 0;
				319
				320	///////////////////////////////////////////////////////////////////////////
Henrik Lundin	abbff89	2017-11-29 09:14:04 +0100	[diff] [blame]	321	// int FilteredCurrentDelayMs()
				322	// Returns the current target delay for NetEq in ms.
				323	//
				324	virtual int TargetDelayMs() const = 0;
				325
				326	///////////////////////////////////////////////////////////////////////////
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	327	// int32_t PlayoutData10Ms(
				328	// Get 10 milliseconds of raw audio data for playout, at the given sampling
				329	// frequency. ACM will perform a resampling if required.
				330	//
				331	// Input:
				332	// -desired_freq_hz : the desired sampling frequency, in Hertz, of the
				333	// output audio. If set to -1, the function returns
				334	// the audio at the current sampling frequency.
				335	//
				336	// Output:
				337	// -audio_frame : output audio frame which contains raw audio data
Fredrik Solenberg	bbf21a3	2018-04-12 22:44:09 +0200	[diff] [blame]	338	// and other relevant parameters.
henrik.lundin	834a6ea	2016-05-13 03:45:24 -0700	[diff] [blame]	339	// -muted : if true, the sample data in audio_frame is not
				340	// populated, and must be interpreted as all zero.
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	341	//
				342	// Return value:
				343	// -1 if the function fails,
				344	// 0 if the function succeeds.
				345	//
				346	virtual int32_t PlayoutData10Ms(int32_t desired_freq_hz,
henrik.lundin	834a6ea	2016-05-13 03:45:24 -0700	[diff] [blame]	347	AudioFrame* audio_frame,
				348	bool* muted) = 0;
				349
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	350	///////////////////////////////////////////////////////////////////////////
				351	// Codec specific
				352	//
				353
				354	///////////////////////////////////////////////////////////////////////////
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	355	// int SetOpusMaxPlaybackRate()
				356	// If current send codec is Opus, informs it about maximum playback rate the
				357	// receiver will render. Opus can use this information to optimize the bit
				358	// rate and increase the computation efficiency.
				359	//
				360	// Input:
				361	// -frequency_hz : maximum playback rate in Hz.
				362	//
				363	// Return value:
				364	// -1 if current send codec is not Opus or
				365	// error occurred in setting the maximum playback rate,
				366	// 0 if maximum bandwidth is set successfully.
				367	//
				368	virtual int SetOpusMaxPlaybackRate(int frequency_hz) = 0;
				369
				370	///////////////////////////////////////////////////////////////////////////
				371	// EnableOpusDtx()
				372	// Enable the DTX, if current send codec is Opus.
				373	//
				374	// Return value:
				375	// -1 if current send codec is not Opus or error occurred in enabling the
				376	// Opus DTX.
				377	// 0 if Opus DTX is enabled successfully.
				378	//
				379	virtual int EnableOpusDtx() = 0;
				380
				381	///////////////////////////////////////////////////////////////////////////
				382	// int DisableOpusDtx()
				383	// If current send codec is Opus, disables its internal DTX.
				384	//
				385	// Return value:
				386	// -1 if current send codec is not Opus or error occurred in disabling DTX.
				387	// 0 if Opus DTX is disabled successfully.
				388	//
				389	virtual int DisableOpusDtx() = 0;
				390
				391	///////////////////////////////////////////////////////////////////////////
				392	// statistics
				393	//
				394
				395	///////////////////////////////////////////////////////////////////////////
				396	// int32_t GetNetworkStatistics()
				397	// Get network statistics. Note that the internal statistics of NetEq are
				398	// reset by this call.
				399	//
				400	// Input:
				401	// -network_statistics : a structure that contains network statistics.
				402	//
				403	// Return value:
				404	// -1 if failed to set the network statistics,
				405	// 0 if statistics are set successfully.
				406	//
				407	virtual int32_t GetNetworkStatistics(
				408	NetworkStatistics* network_statistics) = 0;
				409
				410	//
				411	// Enable NACK and set the maximum size of the NACK list. If NACK is already
				412	// enable then the maximum NACK list size is modified accordingly.
				413	//
				414	// If the sequence number of last received packet is N, the sequence numbers
				415	// of NACK list are in the range of [N - \|max_nack_list_size\|, N).
				416	//
				417	// \|max_nack_list_size\| should be positive (none zero) and less than or
				418	// equal to \|Nack::kNackListSizeLimit\|. Otherwise, No change is applied and -1
				419	// is returned. 0 is returned at success.
				420	//
				421	virtual int EnableNack(size_t max_nack_list_size) = 0;
				422
				423	// Disable NACK.
				424	virtual void DisableNack() = 0;
				425
				426	//
				427	// Get a list of packets to be retransmitted. \|round_trip_time_ms\| is an
				428	// estimate of the round-trip-time (in milliseconds). Missing packets which
				429	// will be playout in a shorter time than the round-trip-time (with respect
				430	// to the time this API is called) will not be included in the list.
				431	//
				432	// Negative \|round_trip_time_ms\| results is an error message and empty list
				433	// is returned.
				434	//
				435	virtual std::vector<uint16_t> GetNackList(
				436	int64_t round_trip_time_ms) const = 0;
				437
				438	virtual void GetDecodingCallStatistics(
				439	AudioDecodingCallStats* call_stats) const = 0;
ivoc	e1198e0	2017-09-08 08:13:19 -0700	[diff] [blame]	440
				441	virtual ANAStats GetANAStats() const = 0;
kjellander	3e6db23	2015-11-26 04:44:54 -0800	[diff] [blame]	442	};
				443
				444	} // namespace webrtc
				445
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	446	#endif // MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_