blob: 9420cdbac936b5ec98dfa89b7581513ecae5e020 [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Henrik Kjellander74640892015-10-29 11:31:02 +010011#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
12#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000013
pbos@webrtc.org12dc1a32013-08-05 16:22:53 +000014#include <string.h> // Provide access to size_t.
15
Henrik Lundin905495c2015-05-25 16:58:41 +020016#include <string>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000017
henrike@webrtc.org88fbb2d2014-05-21 21:18:46 +000018#include "webrtc/base/constructormagic.h"
henrik.lundin9a410dd2016-04-06 01:39:22 -070019#include "webrtc/base/optional.h"
ossue3525782016-05-25 07:37:43 -070020#include "webrtc/base/scoped_ref_ptr.h"
sprang@webrtc.orgfe5d36b2013-10-28 09:21:07 +000021#include "webrtc/common_types.h"
kwiberg@webrtc.orge04a93b2014-12-09 10:12:53 +000022#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000023#include "webrtc/typedefs.h"
24
25namespace webrtc {
26
27// Forward declarations.
henrik.lundin6d8e0112016-03-04 10:34:21 -080028class AudioFrame;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000029struct WebRtcRTPHeader;
ossue3525782016-05-25 07:37:43 -070030class AudioDecoderFactory;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000031
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000032struct NetEqNetworkStatistics {
33 uint16_t current_buffer_size_ms; // Current jitter buffer size in ms.
34 uint16_t preferred_buffer_size_ms; // Target buffer size in ms.
35 uint16_t jitter_peaks_found; // 1 if adding extra delay due to peaky
36 // jitter; 0 otherwise.
37 uint16_t packet_loss_rate; // Loss rate (network + late) in Q14.
38 uint16_t packet_discard_rate; // Late loss rate in Q14.
39 uint16_t expand_rate; // Fraction (of original stream) of synthesized
minyue@webrtc.org7d721ee2015-02-18 10:01:53 +000040 // audio inserted through expansion (in Q14).
41 uint16_t speech_expand_rate; // Fraction (of original stream) of synthesized
42 // speech inserted through expansion (in Q14).
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000043 uint16_t preemptive_rate; // Fraction of data inserted through pre-emptive
44 // expansion (in Q14).
45 uint16_t accelerate_rate; // Fraction of data removed through acceleration
46 // (in Q14).
minyue@webrtc.org2c1bcf22015-02-17 10:17:09 +000047 uint16_t secondary_decoded_rate; // Fraction of data coming from secondary
48 // decoding (in Q14).
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000049 int32_t clockdrift_ppm; // Average clock-drift in parts-per-million
50 // (positive or negative).
Peter Kastingdce40cf2015-08-24 14:52:23 -070051 size_t added_zero_samples; // Number of zero samples added in "off" mode.
Henrik Lundin1bb8cf82015-08-25 13:08:04 +020052 // Statistics for packet waiting times, i.e., the time between a packet
53 // arrives until it is decoded.
54 int mean_waiting_time_ms;
55 int median_waiting_time_ms;
56 int min_waiting_time_ms;
57 int max_waiting_time_ms;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000058};
59
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000060enum NetEqPlayoutMode {
61 kPlayoutOn,
62 kPlayoutOff,
63 kPlayoutFax,
64 kPlayoutStreaming
65};
66
67// This is the interface class for NetEq.
68class NetEq {
69 public:
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000070 enum BackgroundNoiseMode {
71 kBgnOn, // Default behavior with eternal noise.
72 kBgnFade, // Noise fades to zero after some time.
73 kBgnOff // Background noise is always zero.
74 };
75
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000076 struct Config {
77 Config()
78 : sample_rate_hz(16000),
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000079 enable_audio_classifier(false),
henrik.lundin9bc26672015-11-02 03:25:57 -080080 enable_post_decode_vad(false),
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000081 max_packets_in_buffer(50),
82 // |max_delay_ms| has the same effect as calling SetMaximumDelay().
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000083 max_delay_ms(2000),
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +000084 background_noise_mode(kBgnOff),
Henrik Lundincf808d22015-05-27 14:33:29 +020085 playout_mode(kPlayoutOn),
86 enable_fast_accelerate(false) {}
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000087
Henrik Lundin905495c2015-05-25 16:58:41 +020088 std::string ToString() const;
89
Henrik Lundin83b5c052015-05-08 10:33:57 +020090 int sample_rate_hz; // Initial value. Will change with input data.
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000091 bool enable_audio_classifier;
henrik.lundin9bc26672015-11-02 03:25:57 -080092 bool enable_post_decode_vad;
Peter Kastingdce40cf2015-08-24 14:52:23 -070093 size_t max_packets_in_buffer;
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000094 int max_delay_ms;
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000095 BackgroundNoiseMode background_noise_mode;
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +000096 NetEqPlayoutMode playout_mode;
Henrik Lundincf808d22015-05-27 14:33:29 +020097 bool enable_fast_accelerate;
henrik.lundin7a926812016-05-12 13:51:28 -070098 bool enable_muted_state = false;
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000099 };
100
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000101 enum ReturnCodes {
102 kOK = 0,
103 kFail = -1,
104 kNotImplemented = -2
105 };
106
107 enum ErrorCodes {
108 kNoError = 0,
109 kOtherError,
110 kInvalidRtpPayloadType,
111 kUnknownRtpPayloadType,
112 kCodecNotSupported,
113 kDecoderExists,
114 kDecoderNotFound,
115 kInvalidSampleRate,
116 kInvalidPointer,
117 kAccelerateError,
118 kPreemptiveExpandError,
119 kComfortNoiseErrorCode,
120 kDecoderErrorCode,
121 kOtherDecoderError,
122 kInvalidOperation,
123 kDtmfParameterError,
124 kDtmfParsingError,
125 kDtmfInsertError,
126 kStereoNotSupported,
127 kSampleUnderrun,
128 kDecodedTooMuch,
129 kFrameSplitError,
130 kRedundancySplitError,
minyue@webrtc.org7bb54362013-08-06 05:40:57 +0000131 kPacketBufferCorruption,
turaj@webrtc.org7b75ac62013-09-26 00:27:56 +0000132 kSyncPacketNotAccepted
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000133 };
134
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +0000135 // Creates a new NetEq object, with parameters set in |config|. The |config|
136 // object will only have to be valid for the duration of the call to this
137 // method.
ossue3525782016-05-25 07:37:43 -0700138 static NetEq* Create(
139 const NetEq::Config& config,
140 const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000141
142 virtual ~NetEq() {}
143
144 // Inserts a new packet into NetEq. The |receive_timestamp| is an indication
145 // of the time when the packet was received, and should be measured with
146 // the same tick rate as the RTP timestamp of the current payload.
147 // Returns 0 on success, -1 on failure.
148 virtual int InsertPacket(const WebRtcRTPHeader& rtp_header,
kwibergee2bac22015-11-11 10:34:00 -0800149 rtc::ArrayView<const uint8_t> payload,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000150 uint32_t receive_timestamp) = 0;
151
turaj@webrtc.org7b75ac62013-09-26 00:27:56 +0000152 // Inserts a sync-packet into packet queue. Sync-packets are decoded to
153 // silence and are intended to keep AV-sync intact in an event of long packet
154 // losses when Video NACK is enabled but Audio NACK is not. Clients of NetEq
155 // might insert sync-packet when they observe that buffer level of NetEq is
156 // decreasing below a certain threshold, defined by the application.
157 // Sync-packets should have the same payload type as the last audio payload
158 // type, i.e. they cannot have DTMF or CNG payload type, nor a codec change
159 // can be implied by inserting a sync-packet.
160 // Returns kOk on success, kFail on failure.
161 virtual int InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
162 uint32_t receive_timestamp) = 0;
163
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000164 // Instructs NetEq to deliver 10 ms of audio data. The data is written to
henrik.lundin7dc68892016-04-06 01:03:02 -0700165 // |audio_frame|. All data in |audio_frame| is wiped; |data_|, |speech_type_|,
166 // |num_channels_|, |sample_rate_hz_|, |samples_per_channel_|, and
henrik.lundin55480f52016-03-08 02:37:57 -0800167 // |vad_activity_| are updated upon success. If an error is returned, some
henrik.lundin5fac3f02016-08-24 11:18:49 -0700168 // fields may not have been updated, or may contain inconsistent values.
henrik.lundin7a926812016-05-12 13:51:28 -0700169 // If muted state is enabled (through Config::enable_muted_state), |muted|
170 // may be set to true after a prolonged expand period. When this happens, the
171 // |data_| in |audio_frame| is not written, but should be interpreted as being
172 // all zeros.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000173 // Returns kOK on success, or kFail in case of an error.
henrik.lundin7a926812016-05-12 13:51:28 -0700174 virtual int GetAudio(AudioFrame* audio_frame, bool* muted) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000175
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800176 // Associates |rtp_payload_type| with |codec| and |codec_name|, and stores the
177 // information in the codec database. Returns 0 on success, -1 on failure.
178 // The name is only used to provide information back to the caller about the
179 // decoders. Hence, the name is arbitrary, and may be empty.
kwibergee1879c2015-10-29 06:20:28 -0700180 virtual int RegisterPayloadType(NetEqDecoder codec,
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800181 const std::string& codec_name,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000182 uint8_t rtp_payload_type) = 0;
183
184 // Provides an externally created decoder object |decoder| to insert in the
185 // decoder database. The decoder implements a decoder of type |codec| and
kwiberg342f7402016-06-16 03:18:00 -0700186 // associates it with |rtp_payload_type| and |codec_name|. Returns kOK on
187 // success, kFail on failure. The name is only used to provide information
188 // back to the caller about the decoders. Hence, the name is arbitrary, and
189 // may be empty.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000190 virtual int RegisterExternalDecoder(AudioDecoder* decoder,
kwibergee1879c2015-10-29 06:20:28 -0700191 NetEqDecoder codec,
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800192 const std::string& codec_name,
kwiberg342f7402016-06-16 03:18:00 -0700193 uint8_t rtp_payload_type) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000194
195 // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
196 // -1 on failure.
197 virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
198
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000199 // Sets a minimum delay in millisecond for packet buffer. The minimum is
200 // maintained unless a higher latency is dictated by channel condition.
201 // Returns true if the minimum is successfully applied, otherwise false is
202 // returned.
203 virtual bool SetMinimumDelay(int delay_ms) = 0;
204
205 // Sets a maximum delay in milliseconds for packet buffer. The latency will
206 // not exceed the given value, even required delay (given the channel
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000207 // conditions) is higher. Calling this method has the same effect as setting
208 // the |max_delay_ms| value in the NetEq::Config struct.
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000209 virtual bool SetMaximumDelay(int delay_ms) = 0;
210
211 // The smallest latency required. This is computed bases on inter-arrival
212 // time and internal NetEq logic. Note that in computing this latency none of
213 // the user defined limits (applied by calling setMinimumDelay() and/or
214 // SetMaximumDelay()) are applied.
215 virtual int LeastRequiredDelayMs() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000216
217 // Not implemented.
218 virtual int SetTargetDelay() = 0;
219
220 // Not implemented.
221 virtual int TargetDelay() = 0;
222
henrik.lundin9c3efd02015-08-27 13:12:22 -0700223 // Returns the current total delay (packet buffer and sync buffer) in ms.
224 virtual int CurrentDelayMs() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000225
henrik.lundinb3f1c5d2016-08-22 15:39:53 -0700226 // Returns the current total delay (packet buffer and sync buffer) in ms,
227 // with smoothing applied to even out short-time fluctuations due to jitter.
228 // The packet buffer part of the delay is not updated during DTX/CNG periods.
229 virtual int FilteredCurrentDelayMs() const = 0;
230
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000231 // Sets the playout mode to |mode|.
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000232 // Deprecated. Set the mode in the Config struct passed to the constructor.
233 // TODO(henrik.lundin) Delete.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000234 virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0;
235
236 // Returns the current playout mode.
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000237 // Deprecated.
238 // TODO(henrik.lundin) Delete.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000239 virtual NetEqPlayoutMode PlayoutMode() const = 0;
240
241 // Writes the current network statistics to |stats|. The statistics are reset
242 // after the call.
243 virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
244
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000245 // Writes the current RTCP statistics to |stats|. The statistics are reset
246 // and a new report period is started with the call.
247 virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0;
248
249 // Same as RtcpStatistics(), but does not reset anything.
250 virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0;
251
252 // Enables post-decode VAD. When enabled, GetAudio() will return
253 // kOutputVADPassive when the signal contains no speech.
254 virtual void EnableVad() = 0;
255
256 // Disables post-decode VAD.
257 virtual void DisableVad() = 0;
258
henrik.lundin9a410dd2016-04-06 01:39:22 -0700259 // Returns the RTP timestamp for the last sample delivered by GetAudio().
260 // The return value will be empty if no valid timestamp is available.
henrik.lundin15c51e32016-04-06 08:38:56 -0700261 virtual rtc::Optional<uint32_t> GetPlayoutTimestamp() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000262
henrik.lundind89814b2015-11-23 06:49:25 -0800263 // Returns the sample rate in Hz of the audio produced in the last GetAudio
264 // call. If GetAudio has not been called yet, the configured sample rate
265 // (Config::sample_rate_hz) is returned.
266 virtual int last_output_sample_rate_hz() const = 0;
267
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000268 // Not implemented.
269 virtual int SetTargetNumberOfChannels() = 0;
270
271 // Not implemented.
272 virtual int SetTargetSampleRate() = 0;
273
274 // Returns the error code for the last occurred error. If no error has
275 // occurred, 0 is returned.
henrik.lundin@webrtc.orgb0f4b3d2014-11-04 08:53:10 +0000276 virtual int LastError() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000277
278 // Returns the error code last returned by a decoder (audio or comfort noise).
279 // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check
280 // this method to get the decoder's error code.
281 virtual int LastDecoderError() = 0;
282
283 // Flushes both the packet buffer and the sync buffer.
284 virtual void FlushBuffers() = 0;
285
turaj@webrtc.org7df97062013-08-02 18:07:13 +0000286 // Current usage of packet-buffer and it's limits.
287 virtual void PacketBufferStatistics(int* current_num_packets,
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000288 int* max_num_packets) const = 0;
turaj@webrtc.org7df97062013-08-02 18:07:13 +0000289
henrik.lundin48ed9302015-10-29 05:36:24 -0700290 // Enables NACK and sets the maximum size of the NACK list, which should be
291 // positive and no larger than Nack::kNackListSizeLimit. If NACK is already
292 // enabled then the maximum NACK list size is modified accordingly.
293 virtual void EnableNack(size_t max_nack_list_size) = 0;
294
295 virtual void DisableNack() = 0;
296
297 // Returns a list of RTP sequence numbers corresponding to packets to be
298 // retransmitted, given an estimate of the round-trip time in milliseconds.
299 virtual std::vector<uint16_t> GetNackList(
300 int64_t round_trip_time_ms) const = 0;
minyue@webrtc.orgd7301772013-08-29 00:58:14 +0000301
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000302 protected:
303 NetEq() {}
304
305 private:
henrikg3c089d72015-09-16 05:37:44 -0700306 RTC_DISALLOW_COPY_AND_ASSIGN(NetEq);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000307};
308
309} // namespace webrtc
Henrik Kjellander74640892015-10-29 11:31:02 +0100310#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_