blob: 322a86fe7c5786241f1a08193d27b5c18b8dc0a2 [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Henrik Kjellander74640892015-10-29 11:31:02 +010011#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
12#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000013
pbos@webrtc.org12dc1a32013-08-05 16:22:53 +000014#include <string.h> // Provide access to size_t.
15
Henrik Lundin905495c2015-05-25 16:58:41 +020016#include <string>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000017
henrike@webrtc.org88fbb2d2014-05-21 21:18:46 +000018#include "webrtc/base/constructormagic.h"
henrik.lundin9a410dd2016-04-06 01:39:22 -070019#include "webrtc/base/optional.h"
ossue3525782016-05-25 07:37:43 -070020#include "webrtc/base/scoped_ref_ptr.h"
sprang@webrtc.orgfe5d36b2013-10-28 09:21:07 +000021#include "webrtc/common_types.h"
kwiberg@webrtc.orge04a93b2014-12-09 10:12:53 +000022#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000023#include "webrtc/typedefs.h"
24
25namespace webrtc {
26
27// Forward declarations.
henrik.lundin6d8e0112016-03-04 10:34:21 -080028class AudioFrame;
ossue3525782016-05-25 07:37:43 -070029class AudioDecoderFactory;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000030
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000031struct NetEqNetworkStatistics {
32 uint16_t current_buffer_size_ms; // Current jitter buffer size in ms.
33 uint16_t preferred_buffer_size_ms; // Target buffer size in ms.
34 uint16_t jitter_peaks_found; // 1 if adding extra delay due to peaky
35 // jitter; 0 otherwise.
36 uint16_t packet_loss_rate; // Loss rate (network + late) in Q14.
37 uint16_t packet_discard_rate; // Late loss rate in Q14.
38 uint16_t expand_rate; // Fraction (of original stream) of synthesized
minyue@webrtc.org7d721ee2015-02-18 10:01:53 +000039 // audio inserted through expansion (in Q14).
40 uint16_t speech_expand_rate; // Fraction (of original stream) of synthesized
41 // speech inserted through expansion (in Q14).
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000042 uint16_t preemptive_rate; // Fraction of data inserted through pre-emptive
43 // expansion (in Q14).
44 uint16_t accelerate_rate; // Fraction of data removed through acceleration
45 // (in Q14).
minyue@webrtc.org2c1bcf22015-02-17 10:17:09 +000046 uint16_t secondary_decoded_rate; // Fraction of data coming from secondary
47 // decoding (in Q14).
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000048 int32_t clockdrift_ppm; // Average clock-drift in parts-per-million
49 // (positive or negative).
Peter Kastingdce40cf2015-08-24 14:52:23 -070050 size_t added_zero_samples; // Number of zero samples added in "off" mode.
Henrik Lundin1bb8cf82015-08-25 13:08:04 +020051 // Statistics for packet waiting times, i.e., the time between a packet
52 // arrives until it is decoded.
53 int mean_waiting_time_ms;
54 int median_waiting_time_ms;
55 int min_waiting_time_ms;
56 int max_waiting_time_ms;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000057};
58
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000059enum NetEqPlayoutMode {
60 kPlayoutOn,
61 kPlayoutOff,
62 kPlayoutFax,
63 kPlayoutStreaming
64};
65
66// This is the interface class for NetEq.
67class NetEq {
68 public:
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000069 enum BackgroundNoiseMode {
70 kBgnOn, // Default behavior with eternal noise.
71 kBgnFade, // Noise fades to zero after some time.
72 kBgnOff // Background noise is always zero.
73 };
74
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000075 struct Config {
76 Config()
77 : sample_rate_hz(16000),
henrik.lundin9bc26672015-11-02 03:25:57 -080078 enable_post_decode_vad(false),
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000079 max_packets_in_buffer(50),
80 // |max_delay_ms| has the same effect as calling SetMaximumDelay().
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000081 max_delay_ms(2000),
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +000082 background_noise_mode(kBgnOff),
Henrik Lundincf808d22015-05-27 14:33:29 +020083 playout_mode(kPlayoutOn),
84 enable_fast_accelerate(false) {}
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000085
Henrik Lundin905495c2015-05-25 16:58:41 +020086 std::string ToString() const;
87
Henrik Lundin83b5c052015-05-08 10:33:57 +020088 int sample_rate_hz; // Initial value. Will change with input data.
henrik.lundin9bc26672015-11-02 03:25:57 -080089 bool enable_post_decode_vad;
Peter Kastingdce40cf2015-08-24 14:52:23 -070090 size_t max_packets_in_buffer;
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000091 int max_delay_ms;
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000092 BackgroundNoiseMode background_noise_mode;
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +000093 NetEqPlayoutMode playout_mode;
Henrik Lundincf808d22015-05-27 14:33:29 +020094 bool enable_fast_accelerate;
henrik.lundin7a926812016-05-12 13:51:28 -070095 bool enable_muted_state = false;
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000096 };
97
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000098 enum ReturnCodes {
99 kOK = 0,
100 kFail = -1,
101 kNotImplemented = -2
102 };
103
104 enum ErrorCodes {
105 kNoError = 0,
106 kOtherError,
107 kInvalidRtpPayloadType,
108 kUnknownRtpPayloadType,
109 kCodecNotSupported,
110 kDecoderExists,
111 kDecoderNotFound,
112 kInvalidSampleRate,
113 kInvalidPointer,
114 kAccelerateError,
115 kPreemptiveExpandError,
116 kComfortNoiseErrorCode,
117 kDecoderErrorCode,
118 kOtherDecoderError,
119 kInvalidOperation,
120 kDtmfParameterError,
121 kDtmfParsingError,
122 kDtmfInsertError,
123 kStereoNotSupported,
124 kSampleUnderrun,
125 kDecodedTooMuch,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000126 kRedundancySplitError,
ossu17e3fa12016-09-08 04:52:55 -0700127 kPacketBufferCorruption
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000128 };
129
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +0000130 // Creates a new NetEq object, with parameters set in |config|. The |config|
131 // object will only have to be valid for the duration of the call to this
132 // method.
ossue3525782016-05-25 07:37:43 -0700133 static NetEq* Create(
134 const NetEq::Config& config,
135 const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000136
137 virtual ~NetEq() {}
138
139 // Inserts a new packet into NetEq. The |receive_timestamp| is an indication
140 // of the time when the packet was received, and should be measured with
141 // the same tick rate as the RTP timestamp of the current payload.
142 // Returns 0 on success, -1 on failure.
Henrik Lundin70c09bd2017-04-24 15:56:56 +0200143 virtual int InsertPacket(const RTPHeader& rtp_header,
kwibergee2bac22015-11-11 10:34:00 -0800144 rtc::ArrayView<const uint8_t> payload,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000145 uint32_t receive_timestamp) = 0;
146
147 // Instructs NetEq to deliver 10 ms of audio data. The data is written to
henrik.lundin7dc68892016-04-06 01:03:02 -0700148 // |audio_frame|. All data in |audio_frame| is wiped; |data_|, |speech_type_|,
149 // |num_channels_|, |sample_rate_hz_|, |samples_per_channel_|, and
henrik.lundin55480f52016-03-08 02:37:57 -0800150 // |vad_activity_| are updated upon success. If an error is returned, some
henrik.lundin5fac3f02016-08-24 11:18:49 -0700151 // fields may not have been updated, or may contain inconsistent values.
henrik.lundin7a926812016-05-12 13:51:28 -0700152 // If muted state is enabled (through Config::enable_muted_state), |muted|
153 // may be set to true after a prolonged expand period. When this happens, the
154 // |data_| in |audio_frame| is not written, but should be interpreted as being
155 // all zeros.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000156 // Returns kOK on success, or kFail in case of an error.
henrik.lundin7a926812016-05-12 13:51:28 -0700157 virtual int GetAudio(AudioFrame* audio_frame, bool* muted) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000158
kwiberg1c07c702017-03-27 07:15:49 -0700159 // Replaces the current set of decoders with the given one.
160 virtual void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) = 0;
161
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800162 // Associates |rtp_payload_type| with |codec| and |codec_name|, and stores the
163 // information in the codec database. Returns 0 on success, -1 on failure.
164 // The name is only used to provide information back to the caller about the
165 // decoders. Hence, the name is arbitrary, and may be empty.
kwibergee1879c2015-10-29 06:20:28 -0700166 virtual int RegisterPayloadType(NetEqDecoder codec,
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800167 const std::string& codec_name,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000168 uint8_t rtp_payload_type) = 0;
169
170 // Provides an externally created decoder object |decoder| to insert in the
171 // decoder database. The decoder implements a decoder of type |codec| and
kwiberg342f7402016-06-16 03:18:00 -0700172 // associates it with |rtp_payload_type| and |codec_name|. Returns kOK on
173 // success, kFail on failure. The name is only used to provide information
174 // back to the caller about the decoders. Hence, the name is arbitrary, and
175 // may be empty.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000176 virtual int RegisterExternalDecoder(AudioDecoder* decoder,
kwibergee1879c2015-10-29 06:20:28 -0700177 NetEqDecoder codec,
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800178 const std::string& codec_name,
kwiberg342f7402016-06-16 03:18:00 -0700179 uint8_t rtp_payload_type) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000180
kwiberg5adaf732016-10-04 09:33:27 -0700181 // Associates |rtp_payload_type| with the given codec, which NetEq will
182 // instantiate when it needs it. Returns true iff successful.
183 virtual bool RegisterPayloadType(int rtp_payload_type,
184 const SdpAudioFormat& audio_format) = 0;
185
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000186 // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
187 // -1 on failure.
188 virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
189
kwiberg6b19b562016-09-20 04:02:25 -0700190 // Removes all payload types from the codec database.
191 virtual void RemoveAllPayloadTypes() = 0;
192
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000193 // Sets a minimum delay in millisecond for packet buffer. The minimum is
194 // maintained unless a higher latency is dictated by channel condition.
195 // Returns true if the minimum is successfully applied, otherwise false is
196 // returned.
197 virtual bool SetMinimumDelay(int delay_ms) = 0;
198
199 // Sets a maximum delay in milliseconds for packet buffer. The latency will
200 // not exceed the given value, even required delay (given the channel
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000201 // conditions) is higher. Calling this method has the same effect as setting
202 // the |max_delay_ms| value in the NetEq::Config struct.
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000203 virtual bool SetMaximumDelay(int delay_ms) = 0;
204
205 // The smallest latency required. This is computed bases on inter-arrival
206 // time and internal NetEq logic. Note that in computing this latency none of
207 // the user defined limits (applied by calling setMinimumDelay() and/or
208 // SetMaximumDelay()) are applied.
209 virtual int LeastRequiredDelayMs() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000210
211 // Not implemented.
212 virtual int SetTargetDelay() = 0;
213
214 // Not implemented.
215 virtual int TargetDelay() = 0;
216
henrik.lundin9c3efd02015-08-27 13:12:22 -0700217 // Returns the current total delay (packet buffer and sync buffer) in ms.
218 virtual int CurrentDelayMs() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000219
henrik.lundinb3f1c5d2016-08-22 15:39:53 -0700220 // Returns the current total delay (packet buffer and sync buffer) in ms,
221 // with smoothing applied to even out short-time fluctuations due to jitter.
222 // The packet buffer part of the delay is not updated during DTX/CNG periods.
223 virtual int FilteredCurrentDelayMs() const = 0;
224
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000225 // Sets the playout mode to |mode|.
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000226 // Deprecated. Set the mode in the Config struct passed to the constructor.
227 // TODO(henrik.lundin) Delete.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000228 virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0;
229
230 // Returns the current playout mode.
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000231 // Deprecated.
232 // TODO(henrik.lundin) Delete.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000233 virtual NetEqPlayoutMode PlayoutMode() const = 0;
234
235 // Writes the current network statistics to |stats|. The statistics are reset
236 // after the call.
237 virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
238
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000239 // Writes the current RTCP statistics to |stats|. The statistics are reset
240 // and a new report period is started with the call.
241 virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0;
242
243 // Same as RtcpStatistics(), but does not reset anything.
244 virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0;
245
246 // Enables post-decode VAD. When enabled, GetAudio() will return
247 // kOutputVADPassive when the signal contains no speech.
248 virtual void EnableVad() = 0;
249
250 // Disables post-decode VAD.
251 virtual void DisableVad() = 0;
252
henrik.lundin9a410dd2016-04-06 01:39:22 -0700253 // Returns the RTP timestamp for the last sample delivered by GetAudio().
254 // The return value will be empty if no valid timestamp is available.
henrik.lundin15c51e32016-04-06 08:38:56 -0700255 virtual rtc::Optional<uint32_t> GetPlayoutTimestamp() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000256
henrik.lundind89814b2015-11-23 06:49:25 -0800257 // Returns the sample rate in Hz of the audio produced in the last GetAudio
258 // call. If GetAudio has not been called yet, the configured sample rate
259 // (Config::sample_rate_hz) is returned.
260 virtual int last_output_sample_rate_hz() const = 0;
261
kwiberg6f0f6162016-09-20 03:07:46 -0700262 // Returns info about the decoder for the given payload type, or an empty
263 // value if we have no decoder for that payload type.
264 virtual rtc::Optional<CodecInst> GetDecoder(int payload_type) const = 0;
265
ossuf1b08da2016-09-23 02:19:43 -0700266 // Returns the decoder format for the given payload type. Returns empty if no
267 // such payload type was registered.
268 virtual rtc::Optional<SdpAudioFormat> GetDecoderFormat(
269 int payload_type) const = 0;
kwibergc4ccd4d2016-09-21 10:55:15 -0700270
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000271 // Not implemented.
272 virtual int SetTargetNumberOfChannels() = 0;
273
274 // Not implemented.
275 virtual int SetTargetSampleRate() = 0;
276
277 // Returns the error code for the last occurred error. If no error has
278 // occurred, 0 is returned.
henrik.lundin@webrtc.orgb0f4b3d2014-11-04 08:53:10 +0000279 virtual int LastError() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000280
281 // Returns the error code last returned by a decoder (audio or comfort noise).
282 // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check
283 // this method to get the decoder's error code.
284 virtual int LastDecoderError() = 0;
285
286 // Flushes both the packet buffer and the sync buffer.
287 virtual void FlushBuffers() = 0;
288
turaj@webrtc.org7df97062013-08-02 18:07:13 +0000289 // Current usage of packet-buffer and it's limits.
290 virtual void PacketBufferStatistics(int* current_num_packets,
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000291 int* max_num_packets) const = 0;
turaj@webrtc.org7df97062013-08-02 18:07:13 +0000292
henrik.lundin48ed9302015-10-29 05:36:24 -0700293 // Enables NACK and sets the maximum size of the NACK list, which should be
294 // positive and no larger than Nack::kNackListSizeLimit. If NACK is already
295 // enabled then the maximum NACK list size is modified accordingly.
296 virtual void EnableNack(size_t max_nack_list_size) = 0;
297
298 virtual void DisableNack() = 0;
299
300 // Returns a list of RTP sequence numbers corresponding to packets to be
301 // retransmitted, given an estimate of the round-trip time in milliseconds.
302 virtual std::vector<uint16_t> GetNackList(
303 int64_t round_trip_time_ms) const = 0;
minyue@webrtc.orgd7301772013-08-29 00:58:14 +0000304
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000305 protected:
306 NetEq() {}
307
308 private:
henrikg3c089d72015-09-16 05:37:44 -0700309 RTC_DISALLOW_COPY_AND_ASSIGN(NetEq);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000310};
311
312} // namespace webrtc
Henrik Kjellander74640892015-10-29 11:31:02 +0100313#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_