blob: 88677d83558735a7643e57f89d9971ec16537b9b [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Henrik Kjellander74640892015-10-29 11:31:02 +010011#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
12#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000013
pbos@webrtc.org12dc1a32013-08-05 16:22:53 +000014#include <string.h> // Provide access to size_t.
15
Henrik Lundin905495c2015-05-25 16:58:41 +020016#include <string>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000017
henrike@webrtc.org88fbb2d2014-05-21 21:18:46 +000018#include "webrtc/base/constructormagic.h"
sprang@webrtc.orgfe5d36b2013-10-28 09:21:07 +000019#include "webrtc/common_types.h"
kwiberg@webrtc.orge04a93b2014-12-09 10:12:53 +000020#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000021#include "webrtc/typedefs.h"
22
23namespace webrtc {
24
25// Forward declarations.
26struct WebRtcRTPHeader;
27
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000028struct NetEqNetworkStatistics {
29 uint16_t current_buffer_size_ms; // Current jitter buffer size in ms.
30 uint16_t preferred_buffer_size_ms; // Target buffer size in ms.
31 uint16_t jitter_peaks_found; // 1 if adding extra delay due to peaky
32 // jitter; 0 otherwise.
33 uint16_t packet_loss_rate; // Loss rate (network + late) in Q14.
34 uint16_t packet_discard_rate; // Late loss rate in Q14.
35 uint16_t expand_rate; // Fraction (of original stream) of synthesized
minyue@webrtc.org7d721ee2015-02-18 10:01:53 +000036 // audio inserted through expansion (in Q14).
37 uint16_t speech_expand_rate; // Fraction (of original stream) of synthesized
38 // speech inserted through expansion (in Q14).
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000039 uint16_t preemptive_rate; // Fraction of data inserted through pre-emptive
40 // expansion (in Q14).
41 uint16_t accelerate_rate; // Fraction of data removed through acceleration
42 // (in Q14).
minyue@webrtc.org2c1bcf22015-02-17 10:17:09 +000043 uint16_t secondary_decoded_rate; // Fraction of data coming from secondary
44 // decoding (in Q14).
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000045 int32_t clockdrift_ppm; // Average clock-drift in parts-per-million
46 // (positive or negative).
Peter Kastingdce40cf2015-08-24 14:52:23 -070047 size_t added_zero_samples; // Number of zero samples added in "off" mode.
Henrik Lundin1bb8cf82015-08-25 13:08:04 +020048 // Statistics for packet waiting times, i.e., the time between a packet
49 // arrives until it is decoded.
50 int mean_waiting_time_ms;
51 int median_waiting_time_ms;
52 int min_waiting_time_ms;
53 int max_waiting_time_ms;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000054};
55
56enum NetEqOutputType {
57 kOutputNormal,
58 kOutputPLC,
59 kOutputCNG,
60 kOutputPLCtoCNG,
61 kOutputVADPassive
62};
63
64enum NetEqPlayoutMode {
65 kPlayoutOn,
66 kPlayoutOff,
67 kPlayoutFax,
68 kPlayoutStreaming
69};
70
71// This is the interface class for NetEq.
72class NetEq {
73 public:
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000074 enum BackgroundNoiseMode {
75 kBgnOn, // Default behavior with eternal noise.
76 kBgnFade, // Noise fades to zero after some time.
77 kBgnOff // Background noise is always zero.
78 };
79
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000080 struct Config {
81 Config()
82 : sample_rate_hz(16000),
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000083 enable_audio_classifier(false),
henrik.lundin9bc26672015-11-02 03:25:57 -080084 enable_post_decode_vad(false),
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000085 max_packets_in_buffer(50),
86 // |max_delay_ms| has the same effect as calling SetMaximumDelay().
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000087 max_delay_ms(2000),
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +000088 background_noise_mode(kBgnOff),
Henrik Lundincf808d22015-05-27 14:33:29 +020089 playout_mode(kPlayoutOn),
90 enable_fast_accelerate(false) {}
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000091
Henrik Lundin905495c2015-05-25 16:58:41 +020092 std::string ToString() const;
93
Henrik Lundin83b5c052015-05-08 10:33:57 +020094 int sample_rate_hz; // Initial value. Will change with input data.
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000095 bool enable_audio_classifier;
henrik.lundin9bc26672015-11-02 03:25:57 -080096 bool enable_post_decode_vad;
Peter Kastingdce40cf2015-08-24 14:52:23 -070097 size_t max_packets_in_buffer;
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000098 int max_delay_ms;
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000099 BackgroundNoiseMode background_noise_mode;
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000100 NetEqPlayoutMode playout_mode;
Henrik Lundincf808d22015-05-27 14:33:29 +0200101 bool enable_fast_accelerate;
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +0000102 };
103
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000104 enum ReturnCodes {
105 kOK = 0,
106 kFail = -1,
107 kNotImplemented = -2
108 };
109
110 enum ErrorCodes {
111 kNoError = 0,
112 kOtherError,
113 kInvalidRtpPayloadType,
114 kUnknownRtpPayloadType,
115 kCodecNotSupported,
116 kDecoderExists,
117 kDecoderNotFound,
118 kInvalidSampleRate,
119 kInvalidPointer,
120 kAccelerateError,
121 kPreemptiveExpandError,
122 kComfortNoiseErrorCode,
123 kDecoderErrorCode,
124 kOtherDecoderError,
125 kInvalidOperation,
126 kDtmfParameterError,
127 kDtmfParsingError,
128 kDtmfInsertError,
129 kStereoNotSupported,
130 kSampleUnderrun,
131 kDecodedTooMuch,
132 kFrameSplitError,
133 kRedundancySplitError,
minyue@webrtc.org7bb54362013-08-06 05:40:57 +0000134 kPacketBufferCorruption,
turaj@webrtc.org7b75ac62013-09-26 00:27:56 +0000135 kSyncPacketNotAccepted
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000136 };
137
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +0000138 // Creates a new NetEq object, with parameters set in |config|. The |config|
139 // object will only have to be valid for the duration of the call to this
140 // method.
141 static NetEq* Create(const NetEq::Config& config);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000142
143 virtual ~NetEq() {}
144
145 // Inserts a new packet into NetEq. The |receive_timestamp| is an indication
146 // of the time when the packet was received, and should be measured with
147 // the same tick rate as the RTP timestamp of the current payload.
148 // Returns 0 on success, -1 on failure.
149 virtual int InsertPacket(const WebRtcRTPHeader& rtp_header,
kwibergee2bac22015-11-11 10:34:00 -0800150 rtc::ArrayView<const uint8_t> payload,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000151 uint32_t receive_timestamp) = 0;
152
turaj@webrtc.org7b75ac62013-09-26 00:27:56 +0000153 // Inserts a sync-packet into packet queue. Sync-packets are decoded to
154 // silence and are intended to keep AV-sync intact in an event of long packet
155 // losses when Video NACK is enabled but Audio NACK is not. Clients of NetEq
156 // might insert sync-packet when they observe that buffer level of NetEq is
157 // decreasing below a certain threshold, defined by the application.
158 // Sync-packets should have the same payload type as the last audio payload
159 // type, i.e. they cannot have DTMF or CNG payload type, nor a codec change
160 // can be implied by inserting a sync-packet.
161 // Returns kOk on success, kFail on failure.
162 virtual int InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
163 uint32_t receive_timestamp) = 0;
164
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000165 // Instructs NetEq to deliver 10 ms of audio data. The data is written to
166 // |output_audio|, which can hold (at least) |max_length| elements.
167 // The number of channels that were written to the output is provided in
168 // the output variable |num_channels|, and each channel contains
169 // |samples_per_channel| elements. If more than one channel is written,
170 // the samples are interleaved.
171 // The speech type is written to |type|, if |type| is not NULL.
172 // Returns kOK on success, or kFail in case of an error.
173 virtual int GetAudio(size_t max_length, int16_t* output_audio,
Peter Kastingdce40cf2015-08-24 14:52:23 -0700174 size_t* samples_per_channel, int* num_channels,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000175 NetEqOutputType* type) = 0;
176
177 // Associates |rtp_payload_type| with |codec| and stores the information in
178 // the codec database. Returns 0 on success, -1 on failure.
kwibergee1879c2015-10-29 06:20:28 -0700179 virtual int RegisterPayloadType(NetEqDecoder codec,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000180 uint8_t rtp_payload_type) = 0;
181
182 // Provides an externally created decoder object |decoder| to insert in the
183 // decoder database. The decoder implements a decoder of type |codec| and
Karl Wibergd8399e62015-05-25 14:39:56 +0200184 // associates it with |rtp_payload_type|. The decoder will produce samples
185 // at the rate |sample_rate_hz|. Returns kOK on success, kFail on failure.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000186 virtual int RegisterExternalDecoder(AudioDecoder* decoder,
kwibergee1879c2015-10-29 06:20:28 -0700187 NetEqDecoder codec,
Karl Wibergd8399e62015-05-25 14:39:56 +0200188 uint8_t rtp_payload_type,
189 int sample_rate_hz) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000190
191 // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
192 // -1 on failure.
193 virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
194
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000195 // Sets a minimum delay in millisecond for packet buffer. The minimum is
196 // maintained unless a higher latency is dictated by channel condition.
197 // Returns true if the minimum is successfully applied, otherwise false is
198 // returned.
199 virtual bool SetMinimumDelay(int delay_ms) = 0;
200
201 // Sets a maximum delay in milliseconds for packet buffer. The latency will
202 // not exceed the given value, even required delay (given the channel
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000203 // conditions) is higher. Calling this method has the same effect as setting
204 // the |max_delay_ms| value in the NetEq::Config struct.
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000205 virtual bool SetMaximumDelay(int delay_ms) = 0;
206
207 // The smallest latency required. This is computed bases on inter-arrival
208 // time and internal NetEq logic. Note that in computing this latency none of
209 // the user defined limits (applied by calling setMinimumDelay() and/or
210 // SetMaximumDelay()) are applied.
211 virtual int LeastRequiredDelayMs() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000212
213 // Not implemented.
214 virtual int SetTargetDelay() = 0;
215
216 // Not implemented.
217 virtual int TargetDelay() = 0;
218
henrik.lundin9c3efd02015-08-27 13:12:22 -0700219 // Returns the current total delay (packet buffer and sync buffer) in ms.
220 virtual int CurrentDelayMs() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000221
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000222 // Sets the playout mode to |mode|.
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000223 // Deprecated. Set the mode in the Config struct passed to the constructor.
224 // TODO(henrik.lundin) Delete.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000225 virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0;
226
227 // Returns the current playout mode.
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000228 // Deprecated.
229 // TODO(henrik.lundin) Delete.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000230 virtual NetEqPlayoutMode PlayoutMode() const = 0;
231
232 // Writes the current network statistics to |stats|. The statistics are reset
233 // after the call.
234 virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
235
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000236 // Writes the current RTCP statistics to |stats|. The statistics are reset
237 // and a new report period is started with the call.
238 virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0;
239
240 // Same as RtcpStatistics(), but does not reset anything.
241 virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0;
242
243 // Enables post-decode VAD. When enabled, GetAudio() will return
244 // kOutputVADPassive when the signal contains no speech.
245 virtual void EnableVad() = 0;
246
247 // Disables post-decode VAD.
248 virtual void DisableVad() = 0;
249
wu@webrtc.org94454b72014-06-05 20:34:08 +0000250 // Gets the RTP timestamp for the last sample delivered by GetAudio().
251 // Returns true if the RTP timestamp is valid, otherwise false.
252 virtual bool GetPlayoutTimestamp(uint32_t* timestamp) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000253
henrik.lundind89814b2015-11-23 06:49:25 -0800254 // Returns the sample rate in Hz of the audio produced in the last GetAudio
255 // call. If GetAudio has not been called yet, the configured sample rate
256 // (Config::sample_rate_hz) is returned.
257 virtual int last_output_sample_rate_hz() const = 0;
258
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000259 // Not implemented.
260 virtual int SetTargetNumberOfChannels() = 0;
261
262 // Not implemented.
263 virtual int SetTargetSampleRate() = 0;
264
265 // Returns the error code for the last occurred error. If no error has
266 // occurred, 0 is returned.
henrik.lundin@webrtc.orgb0f4b3d2014-11-04 08:53:10 +0000267 virtual int LastError() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000268
269 // Returns the error code last returned by a decoder (audio or comfort noise).
270 // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check
271 // this method to get the decoder's error code.
272 virtual int LastDecoderError() = 0;
273
274 // Flushes both the packet buffer and the sync buffer.
275 virtual void FlushBuffers() = 0;
276
turaj@webrtc.org7df97062013-08-02 18:07:13 +0000277 // Current usage of packet-buffer and it's limits.
278 virtual void PacketBufferStatistics(int* current_num_packets,
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000279 int* max_num_packets) const = 0;
turaj@webrtc.org7df97062013-08-02 18:07:13 +0000280
henrik.lundin48ed9302015-10-29 05:36:24 -0700281 // Enables NACK and sets the maximum size of the NACK list, which should be
282 // positive and no larger than Nack::kNackListSizeLimit. If NACK is already
283 // enabled then the maximum NACK list size is modified accordingly.
284 virtual void EnableNack(size_t max_nack_list_size) = 0;
285
286 virtual void DisableNack() = 0;
287
288 // Returns a list of RTP sequence numbers corresponding to packets to be
289 // retransmitted, given an estimate of the round-trip time in milliseconds.
290 virtual std::vector<uint16_t> GetNackList(
291 int64_t round_trip_time_ms) const = 0;
minyue@webrtc.orgd7301772013-08-29 00:58:14 +0000292
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000293 protected:
294 NetEq() {}
295
296 private:
henrikg3c089d72015-09-16 05:37:44 -0700297 RTC_DISALLOW_COPY_AND_ASSIGN(NetEq);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000298};
299
300} // namespace webrtc
Henrik Kjellander74640892015-10-29 11:31:02 +0100301#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_