Blame - webrtc/api/audio_codecs/audio_encoder.h - webrtc.googlesource.com/src

blob: a77894b7daedd36c7686093e00dbaa2c30c7ce75 [file] [log] [blame]

ossu	eb1fde4	2017-05-02 06:46:30 -0700	[diff] [blame]	1	/*
				2	* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
				11	#ifndef WEBRTC_API_AUDIO_CODECS_AUDIO_ENCODER_H_
				12	#define WEBRTC_API_AUDIO_CODECS_AUDIO_ENCODER_H_
				13
				14	#include <algorithm>
				15	#include <memory>
				16	#include <string>
				17	#include <vector>
				18
kwiberg	529662a	2017-09-04 05:43:17 -0700	[diff] [blame]	19	#include "webrtc/api/array_view.h"
kwiberg	84f6a3f	2017-09-05 08:43:13 -0700	[diff] [blame]	20	#include "webrtc/api/optional.h"
Edward Lemur	c20978e	2017-07-06 19:44:34 +0200	[diff] [blame]	21	#include "webrtc/rtc_base/buffer.h"
				22	#include "webrtc/rtc_base/deprecation.h"
ossu	eb1fde4	2017-05-02 06:46:30 -0700	[diff] [blame]	23	#include "webrtc/typedefs.h"
				24
				25	namespace webrtc {
				26
				27	class Clock;
				28	class RtcEventLog;
				29
ivoc	e1198e0	2017-09-08 08:13:19 -0700	[diff] [blame]	30	// Statistics related to Audio Network Adaptation.
				31	struct ANAStats {
				32	ANAStats();
				33	ANAStats(const ANAStats&);
				34	~ANAStats();
				35	// Number of actions taken by the ANA bitrate controller since the start of
				36	// the call. If this value is not set, it indicates that the bitrate
				37	// controller is disabled.
				38	rtc::Optional<uint32_t> bitrate_action_counter;
				39	// Number of actions taken by the ANA channel controller since the start of
				40	// the call. If this value is not set, it indicates that the channel
				41	// controller is disabled.
				42	rtc::Optional<uint32_t> channel_action_counter;
				43	// Number of actions taken by the ANA DTX controller since the start of the
				44	// call. If this value is not set, it indicates that the DTX controller is
				45	// disabled.
				46	rtc::Optional<uint32_t> dtx_action_counter;
				47	// Number of actions taken by the ANA FEC controller since the start of the
				48	// call. If this value is not set, it indicates that the FEC controller is
				49	// disabled.
				50	rtc::Optional<uint32_t> fec_action_counter;
				51	// Number of actions taken by the ANA frame length controller since the start
				52	// of the call. If this value is not set, it indicates that the frame length
				53	// controller is disabled.
				54	rtc::Optional<uint32_t> frame_length_action_counter;
				55	};
				56
ossu	eb1fde4	2017-05-02 06:46:30 -0700	[diff] [blame]	57	// This is the interface class for encoders in AudioCoding module. Each codec
				58	// type must have an implementation of this class.
				59	class AudioEncoder {
				60	public:
				61	// Used for UMA logging of codec usage. The same codecs, with the
				62	// same values, must be listed in
				63	// src/tools/metrics/histograms/histograms.xml in chromium to log
				64	// correct values.
				65	enum class CodecType {
				66	kOther = 0, // Codec not specified, and/or not listed in this enum
				67	kOpus = 1,
				68	kIsac = 2,
				69	kPcmA = 3,
				70	kPcmU = 4,
				71	kG722 = 5,
				72	kIlbc = 6,
				73
				74	// Number of histogram bins in the UMA logging of codec types. The
				75	// total number of different codecs that are logged cannot exceed this
				76	// number.
				77	kMaxLoggedAudioCodecTypes
				78	};
				79
				80	struct EncodedInfoLeaf {
				81	size_t encoded_bytes = 0;
				82	uint32_t encoded_timestamp = 0;
				83	int payload_type = 0;
				84	bool send_even_if_empty = false;
				85	bool speech = true;
				86	CodecType encoder_type = CodecType::kOther;
				87	};
				88
				89	// This is the main struct for auxiliary encoding information. Each encoded
				90	// packet should be accompanied by one EncodedInfo struct, containing the
				91	// total number of \|encoded_bytes\|, the \|encoded_timestamp\| and the
				92	// \|payload_type\|. If the packet contains redundant encodings, the \|redundant\|
				93	// vector will be populated with EncodedInfoLeaf structs. Each struct in the
				94	// vector represents one encoding; the order of structs in the vector is the
				95	// same as the order in which the actual payloads are written to the byte
				96	// stream. When EncoderInfoLeaf structs are present in the vector, the main
				97	// struct's \|encoded_bytes\| will be the sum of all the \|encoded_bytes\| in the
				98	// vector.
				99	struct EncodedInfo : public EncodedInfoLeaf {
				100	EncodedInfo();
				101	EncodedInfo(const EncodedInfo&);
				102	EncodedInfo(EncodedInfo&&);
				103	~EncodedInfo();
				104	EncodedInfo& operator=(const EncodedInfo&);
				105	EncodedInfo& operator=(EncodedInfo&&);
				106
				107	std::vector<EncodedInfoLeaf> redundant;
				108	};
				109
				110	virtual ~AudioEncoder() = default;
				111
				112	// Returns the input sample rate in Hz and the number of input channels.
				113	// These are constants set at instantiation time.
				114	virtual int SampleRateHz() const = 0;
				115	virtual size_t NumChannels() const = 0;
				116
				117	// Returns the rate at which the RTP timestamps are updated. The default
				118	// implementation returns SampleRateHz().
				119	virtual int RtpTimestampRateHz() const;
				120
				121	// Returns the number of 10 ms frames the encoder will put in the next
				122	// packet. This value may only change when Encode() outputs a packet; i.e.,
				123	// the encoder may vary the number of 10 ms frames from packet to packet, but
				124	// it must decide the length of the next packet no later than when outputting
				125	// the preceding packet.
				126	virtual size_t Num10MsFramesInNextPacket() const = 0;
				127
				128	// Returns the maximum value that can be returned by
				129	// Num10MsFramesInNextPacket().
				130	virtual size_t Max10MsFramesInAPacket() const = 0;
				131
				132	// Returns the current target bitrate in bits/s. The value -1 means that the
				133	// codec adapts the target automatically, and a current target cannot be
				134	// provided.
				135	virtual int GetTargetBitrate() const = 0;
				136
				137	// Accepts one 10 ms block of input audio (i.e., SampleRateHz() / 100 *
				138	// NumChannels() samples). Multi-channel audio must be sample-interleaved.
				139	// The encoder appends zero or more bytes of output to \|encoded\| and returns
				140	// additional encoding information. Encode() checks some preconditions, calls
				141	// EncodeImpl() which does the actual work, and then checks some
				142	// postconditions.
				143	EncodedInfo Encode(uint32_t rtp_timestamp,
				144	rtc::ArrayView<const int16_t> audio,
				145	rtc::Buffer* encoded);
				146
				147	// Resets the encoder to its starting state, discarding any input that has
				148	// been fed to the encoder but not yet emitted in a packet.
				149	virtual void Reset() = 0;
				150
				151	// Enables or disables codec-internal FEC (forward error correction). Returns
				152	// true if the codec was able to comply. The default implementation returns
				153	// true when asked to disable FEC and false when asked to enable it (meaning
				154	// that FEC isn't supported).
				155	virtual bool SetFec(bool enable);
				156
				157	// Enables or disables codec-internal VAD/DTX. Returns true if the codec was
				158	// able to comply. The default implementation returns true when asked to
				159	// disable DTX and false when asked to enable it (meaning that DTX isn't
				160	// supported).
				161	virtual bool SetDtx(bool enable);
				162
				163	// Returns the status of codec-internal DTX. The default implementation always
				164	// returns false.
				165	virtual bool GetDtx() const;
				166
				167	// Sets the application mode. Returns true if the codec was able to comply.
				168	// The default implementation just returns false.
				169	enum class Application { kSpeech, kAudio };
				170	virtual bool SetApplication(Application application);
				171
				172	// Tells the encoder about the highest sample rate the decoder is expected to
				173	// use when decoding the bitstream. The encoder would typically use this
				174	// information to adjust the quality of the encoding. The default
				175	// implementation does nothing.
				176	virtual void SetMaxPlaybackRate(int frequency_hz);
				177
				178	// This is to be deprecated. Please use \|OnReceivedTargetAudioBitrate\|
				179	// instead.
				180	// Tells the encoder what average bitrate we'd like it to produce. The
				181	// encoder is free to adjust or disregard the given bitrate (the default
				182	// implementation does the latter).
				183	RTC_DEPRECATED virtual void SetTargetBitrate(int target_bps);
				184
				185	// Causes this encoder to let go of any other encoders it contains, and
				186	// returns a pointer to an array where they are stored (which is required to
				187	// live as long as this encoder). Unless the returned array is empty, you may
				188	// not call any methods on this encoder afterwards, except for the
				189	// destructor. The default implementation just returns an empty array.
				190	// NOTE: This method is subject to change. Do not call or override it.
				191	virtual rtc::ArrayView<std::unique_ptr<AudioEncoder>>
				192	ReclaimContainedEncoders();
				193
				194	// Enables audio network adaptor. Returns true if successful.
				195	virtual bool EnableAudioNetworkAdaptor(const std::string& config_string,
				196	RtcEventLog* event_log);
				197
				198	// Disables audio network adaptor.
				199	virtual void DisableAudioNetworkAdaptor();
				200
				201	// Provides uplink packet loss fraction to this encoder to allow it to adapt.
				202	// \|uplink_packet_loss_fraction\| is in the range [0.0, 1.0].
				203	virtual void OnReceivedUplinkPacketLossFraction(
				204	float uplink_packet_loss_fraction);
				205
				206	// Provides 1st-order-FEC-recoverable uplink packet loss rate to this encoder
				207	// to allow it to adapt.
				208	// \|uplink_recoverable_packet_loss_fraction\| is in the range [0.0, 1.0].
				209	virtual void OnReceivedUplinkRecoverablePacketLossFraction(
				210	float uplink_recoverable_packet_loss_fraction);
				211
				212	// Provides target audio bitrate to this encoder to allow it to adapt.
				213	virtual void OnReceivedTargetAudioBitrate(int target_bps);
				214
				215	// Provides target audio bitrate and corresponding probing interval of
				216	// the bandwidth estimator to this encoder to allow it to adapt.
				217	virtual void OnReceivedUplinkBandwidth(
				218	int target_audio_bitrate_bps,
minyue	93e4522	2017-05-18 14:32:41 -0700	[diff] [blame]	219	rtc::Optional<int64_t> bwe_period_ms);
ossu	eb1fde4	2017-05-02 06:46:30 -0700	[diff] [blame]	220
				221	// Provides RTT to this encoder to allow it to adapt.
				222	virtual void OnReceivedRtt(int rtt_ms);
				223
				224	// Provides overhead to this encoder to adapt. The overhead is the number of
				225	// bytes that will be added to each packet the encoder generates.
				226	virtual void OnReceivedOverhead(size_t overhead_bytes_per_packet);
				227
				228	// To allow encoder to adapt its frame length, it must be provided the frame
				229	// length range that receivers can accept.
				230	virtual void SetReceiverFrameLengthRange(int min_frame_length_ms,
				231	int max_frame_length_ms);
				232
ivoc	e1198e0	2017-09-08 08:13:19 -0700	[diff] [blame]	233	// Get statistics related to audio network adaptation.
				234	virtual ANAStats GetANAStats() const;
				235
ossu	eb1fde4	2017-05-02 06:46:30 -0700	[diff] [blame]	236	protected:
				237	// Subclasses implement this to perform the actual encoding. Called by
				238	// Encode().
				239	virtual EncodedInfo EncodeImpl(uint32_t rtp_timestamp,
				240	rtc::ArrayView<const int16_t> audio,
				241	rtc::Buffer* encoded) = 0;
				242	};
				243	} // namespace webrtc
				244	#endif // WEBRTC_API_AUDIO_CODECS_AUDIO_ENCODER_H_