blob: f03458092eb266d3c383d9c4144bb45cfe436df9 [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Henrik Kjellander74640892015-10-29 11:31:02 +010011#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
12#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000013
pbos@webrtc.org12dc1a32013-08-05 16:22:53 +000014#include <string.h> // Provide access to size_t.
15
Henrik Lundin905495c2015-05-25 16:58:41 +020016#include <string>
henrik.lundin114c1b32017-04-26 07:47:32 -070017#include <vector>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000018
henrike@webrtc.org88fbb2d2014-05-21 21:18:46 +000019#include "webrtc/base/constructormagic.h"
henrik.lundin9a410dd2016-04-06 01:39:22 -070020#include "webrtc/base/optional.h"
ossue3525782016-05-25 07:37:43 -070021#include "webrtc/base/scoped_ref_ptr.h"
sprang@webrtc.orgfe5d36b2013-10-28 09:21:07 +000022#include "webrtc/common_types.h"
kwiberg@webrtc.orge04a93b2014-12-09 10:12:53 +000023#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000024#include "webrtc/typedefs.h"
25
26namespace webrtc {
27
28// Forward declarations.
henrik.lundin6d8e0112016-03-04 10:34:21 -080029class AudioFrame;
ossue3525782016-05-25 07:37:43 -070030class AudioDecoderFactory;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000031
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000032struct NetEqNetworkStatistics {
33 uint16_t current_buffer_size_ms; // Current jitter buffer size in ms.
34 uint16_t preferred_buffer_size_ms; // Target buffer size in ms.
35 uint16_t jitter_peaks_found; // 1 if adding extra delay due to peaky
36 // jitter; 0 otherwise.
37 uint16_t packet_loss_rate; // Loss rate (network + late) in Q14.
38 uint16_t packet_discard_rate; // Late loss rate in Q14.
39 uint16_t expand_rate; // Fraction (of original stream) of synthesized
minyue@webrtc.org7d721ee2015-02-18 10:01:53 +000040 // audio inserted through expansion (in Q14).
41 uint16_t speech_expand_rate; // Fraction (of original stream) of synthesized
42 // speech inserted through expansion (in Q14).
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000043 uint16_t preemptive_rate; // Fraction of data inserted through pre-emptive
44 // expansion (in Q14).
45 uint16_t accelerate_rate; // Fraction of data removed through acceleration
46 // (in Q14).
minyue@webrtc.org2c1bcf22015-02-17 10:17:09 +000047 uint16_t secondary_decoded_rate; // Fraction of data coming from secondary
48 // decoding (in Q14).
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000049 int32_t clockdrift_ppm; // Average clock-drift in parts-per-million
50 // (positive or negative).
Peter Kastingdce40cf2015-08-24 14:52:23 -070051 size_t added_zero_samples; // Number of zero samples added in "off" mode.
Henrik Lundin1bb8cf82015-08-25 13:08:04 +020052 // Statistics for packet waiting times, i.e., the time between a packet
53 // arrives until it is decoded.
54 int mean_waiting_time_ms;
55 int median_waiting_time_ms;
56 int min_waiting_time_ms;
57 int max_waiting_time_ms;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000058};
59
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000060enum NetEqPlayoutMode {
61 kPlayoutOn,
62 kPlayoutOff,
63 kPlayoutFax,
64 kPlayoutStreaming
65};
66
67// This is the interface class for NetEq.
68class NetEq {
69 public:
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000070 enum BackgroundNoiseMode {
71 kBgnOn, // Default behavior with eternal noise.
72 kBgnFade, // Noise fades to zero after some time.
73 kBgnOff // Background noise is always zero.
74 };
75
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000076 struct Config {
77 Config()
78 : sample_rate_hz(16000),
henrik.lundin9bc26672015-11-02 03:25:57 -080079 enable_post_decode_vad(false),
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000080 max_packets_in_buffer(50),
81 // |max_delay_ms| has the same effect as calling SetMaximumDelay().
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000082 max_delay_ms(2000),
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +000083 background_noise_mode(kBgnOff),
Henrik Lundincf808d22015-05-27 14:33:29 +020084 playout_mode(kPlayoutOn),
85 enable_fast_accelerate(false) {}
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000086
Henrik Lundin905495c2015-05-25 16:58:41 +020087 std::string ToString() const;
88
Henrik Lundin83b5c052015-05-08 10:33:57 +020089 int sample_rate_hz; // Initial value. Will change with input data.
henrik.lundin9bc26672015-11-02 03:25:57 -080090 bool enable_post_decode_vad;
Peter Kastingdce40cf2015-08-24 14:52:23 -070091 size_t max_packets_in_buffer;
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000092 int max_delay_ms;
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000093 BackgroundNoiseMode background_noise_mode;
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +000094 NetEqPlayoutMode playout_mode;
Henrik Lundincf808d22015-05-27 14:33:29 +020095 bool enable_fast_accelerate;
henrik.lundin7a926812016-05-12 13:51:28 -070096 bool enable_muted_state = false;
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000097 };
98
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000099 enum ReturnCodes {
100 kOK = 0,
101 kFail = -1,
102 kNotImplemented = -2
103 };
104
105 enum ErrorCodes {
106 kNoError = 0,
107 kOtherError,
108 kInvalidRtpPayloadType,
109 kUnknownRtpPayloadType,
110 kCodecNotSupported,
111 kDecoderExists,
112 kDecoderNotFound,
113 kInvalidSampleRate,
114 kInvalidPointer,
115 kAccelerateError,
116 kPreemptiveExpandError,
117 kComfortNoiseErrorCode,
118 kDecoderErrorCode,
119 kOtherDecoderError,
120 kInvalidOperation,
121 kDtmfParameterError,
122 kDtmfParsingError,
123 kDtmfInsertError,
124 kStereoNotSupported,
125 kSampleUnderrun,
126 kDecodedTooMuch,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000127 kRedundancySplitError,
ossu17e3fa12016-09-08 04:52:55 -0700128 kPacketBufferCorruption
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000129 };
130
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +0000131 // Creates a new NetEq object, with parameters set in |config|. The |config|
132 // object will only have to be valid for the duration of the call to this
133 // method.
ossue3525782016-05-25 07:37:43 -0700134 static NetEq* Create(
135 const NetEq::Config& config,
136 const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000137
138 virtual ~NetEq() {}
139
140 // Inserts a new packet into NetEq. The |receive_timestamp| is an indication
141 // of the time when the packet was received, and should be measured with
142 // the same tick rate as the RTP timestamp of the current payload.
143 // Returns 0 on success, -1 on failure.
Henrik Lundin70c09bd2017-04-24 15:56:56 +0200144 virtual int InsertPacket(const RTPHeader& rtp_header,
kwibergee2bac22015-11-11 10:34:00 -0800145 rtc::ArrayView<const uint8_t> payload,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000146 uint32_t receive_timestamp) = 0;
147
148 // Instructs NetEq to deliver 10 ms of audio data. The data is written to
henrik.lundin7dc68892016-04-06 01:03:02 -0700149 // |audio_frame|. All data in |audio_frame| is wiped; |data_|, |speech_type_|,
150 // |num_channels_|, |sample_rate_hz_|, |samples_per_channel_|, and
henrik.lundin55480f52016-03-08 02:37:57 -0800151 // |vad_activity_| are updated upon success. If an error is returned, some
henrik.lundin5fac3f02016-08-24 11:18:49 -0700152 // fields may not have been updated, or may contain inconsistent values.
henrik.lundin7a926812016-05-12 13:51:28 -0700153 // If muted state is enabled (through Config::enable_muted_state), |muted|
154 // may be set to true after a prolonged expand period. When this happens, the
155 // |data_| in |audio_frame| is not written, but should be interpreted as being
156 // all zeros.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000157 // Returns kOK on success, or kFail in case of an error.
henrik.lundin7a926812016-05-12 13:51:28 -0700158 virtual int GetAudio(AudioFrame* audio_frame, bool* muted) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000159
kwiberg1c07c702017-03-27 07:15:49 -0700160 // Replaces the current set of decoders with the given one.
161 virtual void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) = 0;
162
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800163 // Associates |rtp_payload_type| with |codec| and |codec_name|, and stores the
164 // information in the codec database. Returns 0 on success, -1 on failure.
165 // The name is only used to provide information back to the caller about the
166 // decoders. Hence, the name is arbitrary, and may be empty.
kwibergee1879c2015-10-29 06:20:28 -0700167 virtual int RegisterPayloadType(NetEqDecoder codec,
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800168 const std::string& codec_name,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000169 uint8_t rtp_payload_type) = 0;
170
171 // Provides an externally created decoder object |decoder| to insert in the
172 // decoder database. The decoder implements a decoder of type |codec| and
kwiberg342f7402016-06-16 03:18:00 -0700173 // associates it with |rtp_payload_type| and |codec_name|. Returns kOK on
174 // success, kFail on failure. The name is only used to provide information
175 // back to the caller about the decoders. Hence, the name is arbitrary, and
176 // may be empty.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000177 virtual int RegisterExternalDecoder(AudioDecoder* decoder,
kwibergee1879c2015-10-29 06:20:28 -0700178 NetEqDecoder codec,
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800179 const std::string& codec_name,
kwiberg342f7402016-06-16 03:18:00 -0700180 uint8_t rtp_payload_type) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000181
kwiberg5adaf732016-10-04 09:33:27 -0700182 // Associates |rtp_payload_type| with the given codec, which NetEq will
183 // instantiate when it needs it. Returns true iff successful.
184 virtual bool RegisterPayloadType(int rtp_payload_type,
185 const SdpAudioFormat& audio_format) = 0;
186
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000187 // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
188 // -1 on failure.
189 virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
190
kwiberg6b19b562016-09-20 04:02:25 -0700191 // Removes all payload types from the codec database.
192 virtual void RemoveAllPayloadTypes() = 0;
193
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000194 // Sets a minimum delay in millisecond for packet buffer. The minimum is
195 // maintained unless a higher latency is dictated by channel condition.
196 // Returns true if the minimum is successfully applied, otherwise false is
197 // returned.
198 virtual bool SetMinimumDelay(int delay_ms) = 0;
199
200 // Sets a maximum delay in milliseconds for packet buffer. The latency will
201 // not exceed the given value, even required delay (given the channel
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000202 // conditions) is higher. Calling this method has the same effect as setting
203 // the |max_delay_ms| value in the NetEq::Config struct.
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000204 virtual bool SetMaximumDelay(int delay_ms) = 0;
205
206 // The smallest latency required. This is computed bases on inter-arrival
207 // time and internal NetEq logic. Note that in computing this latency none of
208 // the user defined limits (applied by calling setMinimumDelay() and/or
209 // SetMaximumDelay()) are applied.
210 virtual int LeastRequiredDelayMs() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000211
212 // Not implemented.
213 virtual int SetTargetDelay() = 0;
214
henrik.lundin114c1b32017-04-26 07:47:32 -0700215 // Returns the current target delay in ms. This includes any extra delay
216 // requested through SetMinimumDelay.
217 virtual int TargetDelayMs() = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000218
henrik.lundin9c3efd02015-08-27 13:12:22 -0700219 // Returns the current total delay (packet buffer and sync buffer) in ms.
220 virtual int CurrentDelayMs() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000221
henrik.lundinb3f1c5d2016-08-22 15:39:53 -0700222 // Returns the current total delay (packet buffer and sync buffer) in ms,
223 // with smoothing applied to even out short-time fluctuations due to jitter.
224 // The packet buffer part of the delay is not updated during DTX/CNG periods.
225 virtual int FilteredCurrentDelayMs() const = 0;
226
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000227 // Sets the playout mode to |mode|.
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000228 // Deprecated. Set the mode in the Config struct passed to the constructor.
229 // TODO(henrik.lundin) Delete.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000230 virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0;
231
232 // Returns the current playout mode.
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000233 // Deprecated.
234 // TODO(henrik.lundin) Delete.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000235 virtual NetEqPlayoutMode PlayoutMode() const = 0;
236
237 // Writes the current network statistics to |stats|. The statistics are reset
238 // after the call.
239 virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
240
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000241 // Writes the current RTCP statistics to |stats|. The statistics are reset
242 // and a new report period is started with the call.
243 virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0;
244
245 // Same as RtcpStatistics(), but does not reset anything.
246 virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0;
247
248 // Enables post-decode VAD. When enabled, GetAudio() will return
249 // kOutputVADPassive when the signal contains no speech.
250 virtual void EnableVad() = 0;
251
252 // Disables post-decode VAD.
253 virtual void DisableVad() = 0;
254
henrik.lundin9a410dd2016-04-06 01:39:22 -0700255 // Returns the RTP timestamp for the last sample delivered by GetAudio().
256 // The return value will be empty if no valid timestamp is available.
henrik.lundin15c51e32016-04-06 08:38:56 -0700257 virtual rtc::Optional<uint32_t> GetPlayoutTimestamp() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000258
henrik.lundind89814b2015-11-23 06:49:25 -0800259 // Returns the sample rate in Hz of the audio produced in the last GetAudio
260 // call. If GetAudio has not been called yet, the configured sample rate
261 // (Config::sample_rate_hz) is returned.
262 virtual int last_output_sample_rate_hz() const = 0;
263
kwiberg6f0f6162016-09-20 03:07:46 -0700264 // Returns info about the decoder for the given payload type, or an empty
265 // value if we have no decoder for that payload type.
266 virtual rtc::Optional<CodecInst> GetDecoder(int payload_type) const = 0;
267
ossuf1b08da2016-09-23 02:19:43 -0700268 // Returns the decoder format for the given payload type. Returns empty if no
269 // such payload type was registered.
270 virtual rtc::Optional<SdpAudioFormat> GetDecoderFormat(
271 int payload_type) const = 0;
kwibergc4ccd4d2016-09-21 10:55:15 -0700272
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000273 // Not implemented.
274 virtual int SetTargetNumberOfChannels() = 0;
275
276 // Not implemented.
277 virtual int SetTargetSampleRate() = 0;
278
279 // Returns the error code for the last occurred error. If no error has
280 // occurred, 0 is returned.
henrik.lundin@webrtc.orgb0f4b3d2014-11-04 08:53:10 +0000281 virtual int LastError() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000282
283 // Returns the error code last returned by a decoder (audio or comfort noise).
284 // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check
285 // this method to get the decoder's error code.
286 virtual int LastDecoderError() = 0;
287
288 // Flushes both the packet buffer and the sync buffer.
289 virtual void FlushBuffers() = 0;
290
turaj@webrtc.org7df97062013-08-02 18:07:13 +0000291 // Current usage of packet-buffer and it's limits.
292 virtual void PacketBufferStatistics(int* current_num_packets,
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000293 int* max_num_packets) const = 0;
turaj@webrtc.org7df97062013-08-02 18:07:13 +0000294
henrik.lundin48ed9302015-10-29 05:36:24 -0700295 // Enables NACK and sets the maximum size of the NACK list, which should be
296 // positive and no larger than Nack::kNackListSizeLimit. If NACK is already
297 // enabled then the maximum NACK list size is modified accordingly.
298 virtual void EnableNack(size_t max_nack_list_size) = 0;
299
300 virtual void DisableNack() = 0;
301
302 // Returns a list of RTP sequence numbers corresponding to packets to be
303 // retransmitted, given an estimate of the round-trip time in milliseconds.
304 virtual std::vector<uint16_t> GetNackList(
305 int64_t round_trip_time_ms) const = 0;
minyue@webrtc.orgd7301772013-08-29 00:58:14 +0000306
henrik.lundin114c1b32017-04-26 07:47:32 -0700307 // Returns a vector containing the timestamps of the packets that were decoded
308 // in the last GetAudio call. If no packets were decoded in the last call, the
309 // vector is empty.
310 // Mainly intended for testing.
311 virtual std::vector<uint32_t> LastDecodedTimestamps() const = 0;
312
313 // Returns the length of the audio yet to play in the sync buffer.
314 // Mainly intended for testing.
315 virtual int SyncBufferSizeMs() const = 0;
316
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000317 protected:
318 NetEq() {}
319
320 private:
henrikg3c089d72015-09-16 05:37:44 -0700321 RTC_DISALLOW_COPY_AND_ASSIGN(NetEq);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000322};
323
324} // namespace webrtc
Henrik Kjellander74640892015-10-29 11:31:02 +0100325#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_