blob: dff09db3db9ae568751c0fd5a4a83ee5958edb65 [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Henrik Kjellander74640892015-10-29 11:31:02 +010011#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
12#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000013
pbos@webrtc.org12dc1a32013-08-05 16:22:53 +000014#include <string.h> // Provide access to size_t.
15
Henrik Lundin905495c2015-05-25 16:58:41 +020016#include <string>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000017
henrike@webrtc.org88fbb2d2014-05-21 21:18:46 +000018#include "webrtc/base/constructormagic.h"
sprang@webrtc.orgfe5d36b2013-10-28 09:21:07 +000019#include "webrtc/common_types.h"
kwiberg@webrtc.orge04a93b2014-12-09 10:12:53 +000020#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000021#include "webrtc/typedefs.h"
22
23namespace webrtc {
24
25// Forward declarations.
henrik.lundin6d8e0112016-03-04 10:34:21 -080026class AudioFrame;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000027struct WebRtcRTPHeader;
28
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000029struct NetEqNetworkStatistics {
30 uint16_t current_buffer_size_ms; // Current jitter buffer size in ms.
31 uint16_t preferred_buffer_size_ms; // Target buffer size in ms.
32 uint16_t jitter_peaks_found; // 1 if adding extra delay due to peaky
33 // jitter; 0 otherwise.
34 uint16_t packet_loss_rate; // Loss rate (network + late) in Q14.
35 uint16_t packet_discard_rate; // Late loss rate in Q14.
36 uint16_t expand_rate; // Fraction (of original stream) of synthesized
minyue@webrtc.org7d721ee2015-02-18 10:01:53 +000037 // audio inserted through expansion (in Q14).
38 uint16_t speech_expand_rate; // Fraction (of original stream) of synthesized
39 // speech inserted through expansion (in Q14).
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000040 uint16_t preemptive_rate; // Fraction of data inserted through pre-emptive
41 // expansion (in Q14).
42 uint16_t accelerate_rate; // Fraction of data removed through acceleration
43 // (in Q14).
minyue@webrtc.org2c1bcf22015-02-17 10:17:09 +000044 uint16_t secondary_decoded_rate; // Fraction of data coming from secondary
45 // decoding (in Q14).
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000046 int32_t clockdrift_ppm; // Average clock-drift in parts-per-million
47 // (positive or negative).
Peter Kastingdce40cf2015-08-24 14:52:23 -070048 size_t added_zero_samples; // Number of zero samples added in "off" mode.
Henrik Lundin1bb8cf82015-08-25 13:08:04 +020049 // Statistics for packet waiting times, i.e., the time between a packet
50 // arrives until it is decoded.
51 int mean_waiting_time_ms;
52 int median_waiting_time_ms;
53 int min_waiting_time_ms;
54 int max_waiting_time_ms;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000055};
56
57enum NetEqOutputType {
58 kOutputNormal,
59 kOutputPLC,
60 kOutputCNG,
61 kOutputPLCtoCNG,
62 kOutputVADPassive
63};
64
65enum NetEqPlayoutMode {
66 kPlayoutOn,
67 kPlayoutOff,
68 kPlayoutFax,
69 kPlayoutStreaming
70};
71
72// This is the interface class for NetEq.
73class NetEq {
74 public:
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000075 enum BackgroundNoiseMode {
76 kBgnOn, // Default behavior with eternal noise.
77 kBgnFade, // Noise fades to zero after some time.
78 kBgnOff // Background noise is always zero.
79 };
80
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000081 struct Config {
82 Config()
83 : sample_rate_hz(16000),
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000084 enable_audio_classifier(false),
henrik.lundin9bc26672015-11-02 03:25:57 -080085 enable_post_decode_vad(false),
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000086 max_packets_in_buffer(50),
87 // |max_delay_ms| has the same effect as calling SetMaximumDelay().
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000088 max_delay_ms(2000),
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +000089 background_noise_mode(kBgnOff),
Henrik Lundincf808d22015-05-27 14:33:29 +020090 playout_mode(kPlayoutOn),
91 enable_fast_accelerate(false) {}
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000092
Henrik Lundin905495c2015-05-25 16:58:41 +020093 std::string ToString() const;
94
Henrik Lundin83b5c052015-05-08 10:33:57 +020095 int sample_rate_hz; // Initial value. Will change with input data.
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000096 bool enable_audio_classifier;
henrik.lundin9bc26672015-11-02 03:25:57 -080097 bool enable_post_decode_vad;
Peter Kastingdce40cf2015-08-24 14:52:23 -070098 size_t max_packets_in_buffer;
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000099 int max_delay_ms;
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +0000100 BackgroundNoiseMode background_noise_mode;
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000101 NetEqPlayoutMode playout_mode;
Henrik Lundincf808d22015-05-27 14:33:29 +0200102 bool enable_fast_accelerate;
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +0000103 };
104
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000105 enum ReturnCodes {
106 kOK = 0,
107 kFail = -1,
108 kNotImplemented = -2
109 };
110
111 enum ErrorCodes {
112 kNoError = 0,
113 kOtherError,
114 kInvalidRtpPayloadType,
115 kUnknownRtpPayloadType,
116 kCodecNotSupported,
117 kDecoderExists,
118 kDecoderNotFound,
119 kInvalidSampleRate,
120 kInvalidPointer,
121 kAccelerateError,
122 kPreemptiveExpandError,
123 kComfortNoiseErrorCode,
124 kDecoderErrorCode,
125 kOtherDecoderError,
126 kInvalidOperation,
127 kDtmfParameterError,
128 kDtmfParsingError,
129 kDtmfInsertError,
130 kStereoNotSupported,
131 kSampleUnderrun,
132 kDecodedTooMuch,
133 kFrameSplitError,
134 kRedundancySplitError,
minyue@webrtc.org7bb54362013-08-06 05:40:57 +0000135 kPacketBufferCorruption,
turaj@webrtc.org7b75ac62013-09-26 00:27:56 +0000136 kSyncPacketNotAccepted
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000137 };
138
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +0000139 // Creates a new NetEq object, with parameters set in |config|. The |config|
140 // object will only have to be valid for the duration of the call to this
141 // method.
142 static NetEq* Create(const NetEq::Config& config);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000143
144 virtual ~NetEq() {}
145
146 // Inserts a new packet into NetEq. The |receive_timestamp| is an indication
147 // of the time when the packet was received, and should be measured with
148 // the same tick rate as the RTP timestamp of the current payload.
149 // Returns 0 on success, -1 on failure.
150 virtual int InsertPacket(const WebRtcRTPHeader& rtp_header,
kwibergee2bac22015-11-11 10:34:00 -0800151 rtc::ArrayView<const uint8_t> payload,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000152 uint32_t receive_timestamp) = 0;
153
turaj@webrtc.org7b75ac62013-09-26 00:27:56 +0000154 // Inserts a sync-packet into packet queue. Sync-packets are decoded to
155 // silence and are intended to keep AV-sync intact in an event of long packet
156 // losses when Video NACK is enabled but Audio NACK is not. Clients of NetEq
157 // might insert sync-packet when they observe that buffer level of NetEq is
158 // decreasing below a certain threshold, defined by the application.
159 // Sync-packets should have the same payload type as the last audio payload
160 // type, i.e. they cannot have DTMF or CNG payload type, nor a codec change
161 // can be implied by inserting a sync-packet.
162 // Returns kOk on success, kFail on failure.
163 virtual int InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
164 uint32_t receive_timestamp) = 0;
165
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000166 // Instructs NetEq to deliver 10 ms of audio data. The data is written to
henrik.lundin6d8e0112016-03-04 10:34:21 -0800167 // |audio_frame|. All data in |audio_frame| is wiped; |data_|, |interleaved_|,
168 // |num_channels_|, and |samples_per_channel_| are updated upon success. If
169 // an error is returned, some fields may not have been updated.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000170 // The speech type is written to |type|, if |type| is not NULL.
171 // Returns kOK on success, or kFail in case of an error.
henrik.lundin6d8e0112016-03-04 10:34:21 -0800172 virtual int GetAudio(AudioFrame* audio_frame, NetEqOutputType* type) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000173
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800174 // Associates |rtp_payload_type| with |codec| and |codec_name|, and stores the
175 // information in the codec database. Returns 0 on success, -1 on failure.
176 // The name is only used to provide information back to the caller about the
177 // decoders. Hence, the name is arbitrary, and may be empty.
kwibergee1879c2015-10-29 06:20:28 -0700178 virtual int RegisterPayloadType(NetEqDecoder codec,
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800179 const std::string& codec_name,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000180 uint8_t rtp_payload_type) = 0;
181
182 // Provides an externally created decoder object |decoder| to insert in the
183 // decoder database. The decoder implements a decoder of type |codec| and
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800184 // associates it with |rtp_payload_type| and |codec_name|. The decoder will
185 // produce samples at the rate |sample_rate_hz|. Returns kOK on success, kFail
186 // on failure.
187 // The name is only used to provide information back to the caller about the
188 // decoders. Hence, the name is arbitrary, and may be empty.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000189 virtual int RegisterExternalDecoder(AudioDecoder* decoder,
kwibergee1879c2015-10-29 06:20:28 -0700190 NetEqDecoder codec,
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800191 const std::string& codec_name,
Karl Wibergd8399e62015-05-25 14:39:56 +0200192 uint8_t rtp_payload_type,
193 int sample_rate_hz) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000194
195 // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
196 // -1 on failure.
197 virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
198
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000199 // Sets a minimum delay in millisecond for packet buffer. The minimum is
200 // maintained unless a higher latency is dictated by channel condition.
201 // Returns true if the minimum is successfully applied, otherwise false is
202 // returned.
203 virtual bool SetMinimumDelay(int delay_ms) = 0;
204
205 // Sets a maximum delay in milliseconds for packet buffer. The latency will
206 // not exceed the given value, even required delay (given the channel
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000207 // conditions) is higher. Calling this method has the same effect as setting
208 // the |max_delay_ms| value in the NetEq::Config struct.
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000209 virtual bool SetMaximumDelay(int delay_ms) = 0;
210
211 // The smallest latency required. This is computed bases on inter-arrival
212 // time and internal NetEq logic. Note that in computing this latency none of
213 // the user defined limits (applied by calling setMinimumDelay() and/or
214 // SetMaximumDelay()) are applied.
215 virtual int LeastRequiredDelayMs() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000216
217 // Not implemented.
218 virtual int SetTargetDelay() = 0;
219
220 // Not implemented.
221 virtual int TargetDelay() = 0;
222
henrik.lundin9c3efd02015-08-27 13:12:22 -0700223 // Returns the current total delay (packet buffer and sync buffer) in ms.
224 virtual int CurrentDelayMs() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000225
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000226 // Sets the playout mode to |mode|.
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000227 // Deprecated. Set the mode in the Config struct passed to the constructor.
228 // TODO(henrik.lundin) Delete.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000229 virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0;
230
231 // Returns the current playout mode.
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000232 // Deprecated.
233 // TODO(henrik.lundin) Delete.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000234 virtual NetEqPlayoutMode PlayoutMode() const = 0;
235
236 // Writes the current network statistics to |stats|. The statistics are reset
237 // after the call.
238 virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
239
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000240 // Writes the current RTCP statistics to |stats|. The statistics are reset
241 // and a new report period is started with the call.
242 virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0;
243
244 // Same as RtcpStatistics(), but does not reset anything.
245 virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0;
246
247 // Enables post-decode VAD. When enabled, GetAudio() will return
248 // kOutputVADPassive when the signal contains no speech.
249 virtual void EnableVad() = 0;
250
251 // Disables post-decode VAD.
252 virtual void DisableVad() = 0;
253
wu@webrtc.org94454b72014-06-05 20:34:08 +0000254 // Gets the RTP timestamp for the last sample delivered by GetAudio().
255 // Returns true if the RTP timestamp is valid, otherwise false.
256 virtual bool GetPlayoutTimestamp(uint32_t* timestamp) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000257
henrik.lundind89814b2015-11-23 06:49:25 -0800258 // Returns the sample rate in Hz of the audio produced in the last GetAudio
259 // call. If GetAudio has not been called yet, the configured sample rate
260 // (Config::sample_rate_hz) is returned.
261 virtual int last_output_sample_rate_hz() const = 0;
262
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000263 // Not implemented.
264 virtual int SetTargetNumberOfChannels() = 0;
265
266 // Not implemented.
267 virtual int SetTargetSampleRate() = 0;
268
269 // Returns the error code for the last occurred error. If no error has
270 // occurred, 0 is returned.
henrik.lundin@webrtc.orgb0f4b3d2014-11-04 08:53:10 +0000271 virtual int LastError() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000272
273 // Returns the error code last returned by a decoder (audio or comfort noise).
274 // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check
275 // this method to get the decoder's error code.
276 virtual int LastDecoderError() = 0;
277
278 // Flushes both the packet buffer and the sync buffer.
279 virtual void FlushBuffers() = 0;
280
turaj@webrtc.org7df97062013-08-02 18:07:13 +0000281 // Current usage of packet-buffer and it's limits.
282 virtual void PacketBufferStatistics(int* current_num_packets,
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000283 int* max_num_packets) const = 0;
turaj@webrtc.org7df97062013-08-02 18:07:13 +0000284
henrik.lundin48ed9302015-10-29 05:36:24 -0700285 // Enables NACK and sets the maximum size of the NACK list, which should be
286 // positive and no larger than Nack::kNackListSizeLimit. If NACK is already
287 // enabled then the maximum NACK list size is modified accordingly.
288 virtual void EnableNack(size_t max_nack_list_size) = 0;
289
290 virtual void DisableNack() = 0;
291
292 // Returns a list of RTP sequence numbers corresponding to packets to be
293 // retransmitted, given an estimate of the round-trip time in milliseconds.
294 virtual std::vector<uint16_t> GetNackList(
295 int64_t round_trip_time_ms) const = 0;
minyue@webrtc.orgd7301772013-08-29 00:58:14 +0000296
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000297 protected:
298 NetEq() {}
299
300 private:
henrikg3c089d72015-09-16 05:37:44 -0700301 RTC_DISALLOW_COPY_AND_ASSIGN(NetEq);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000302};
303
304} // namespace webrtc
Henrik Kjellander74640892015-10-29 11:31:02 +0100305#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_