blob: 9cd4b57a1f78ee78c914bbb97611a6f196844dd8 [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Henrik Kjellander74640892015-10-29 11:31:02 +010011#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
12#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000013
pbos@webrtc.org12dc1a32013-08-05 16:22:53 +000014#include <string.h> // Provide access to size_t.
15
Henrik Lundin905495c2015-05-25 16:58:41 +020016#include <string>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000017
henrike@webrtc.org88fbb2d2014-05-21 21:18:46 +000018#include "webrtc/base/constructormagic.h"
sprang@webrtc.orgfe5d36b2013-10-28 09:21:07 +000019#include "webrtc/common_types.h"
kwiberg@webrtc.orge04a93b2014-12-09 10:12:53 +000020#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000021#include "webrtc/typedefs.h"
22
23namespace webrtc {
24
25// Forward declarations.
26struct WebRtcRTPHeader;
27
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000028struct NetEqNetworkStatistics {
29 uint16_t current_buffer_size_ms; // Current jitter buffer size in ms.
30 uint16_t preferred_buffer_size_ms; // Target buffer size in ms.
31 uint16_t jitter_peaks_found; // 1 if adding extra delay due to peaky
32 // jitter; 0 otherwise.
33 uint16_t packet_loss_rate; // Loss rate (network + late) in Q14.
34 uint16_t packet_discard_rate; // Late loss rate in Q14.
35 uint16_t expand_rate; // Fraction (of original stream) of synthesized
minyue@webrtc.org7d721ee2015-02-18 10:01:53 +000036 // audio inserted through expansion (in Q14).
37 uint16_t speech_expand_rate; // Fraction (of original stream) of synthesized
38 // speech inserted through expansion (in Q14).
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000039 uint16_t preemptive_rate; // Fraction of data inserted through pre-emptive
40 // expansion (in Q14).
41 uint16_t accelerate_rate; // Fraction of data removed through acceleration
42 // (in Q14).
minyue@webrtc.org2c1bcf22015-02-17 10:17:09 +000043 uint16_t secondary_decoded_rate; // Fraction of data coming from secondary
44 // decoding (in Q14).
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000045 int32_t clockdrift_ppm; // Average clock-drift in parts-per-million
46 // (positive or negative).
Peter Kastingdce40cf2015-08-24 14:52:23 -070047 size_t added_zero_samples; // Number of zero samples added in "off" mode.
Henrik Lundin1bb8cf82015-08-25 13:08:04 +020048 // Statistics for packet waiting times, i.e., the time between a packet
49 // arrives until it is decoded.
50 int mean_waiting_time_ms;
51 int median_waiting_time_ms;
52 int min_waiting_time_ms;
53 int max_waiting_time_ms;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000054};
55
56enum NetEqOutputType {
57 kOutputNormal,
58 kOutputPLC,
59 kOutputCNG,
60 kOutputPLCtoCNG,
61 kOutputVADPassive
62};
63
64enum NetEqPlayoutMode {
65 kPlayoutOn,
66 kPlayoutOff,
67 kPlayoutFax,
68 kPlayoutStreaming
69};
70
71// This is the interface class for NetEq.
72class NetEq {
73 public:
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000074 enum BackgroundNoiseMode {
75 kBgnOn, // Default behavior with eternal noise.
76 kBgnFade, // Noise fades to zero after some time.
77 kBgnOff // Background noise is always zero.
78 };
79
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000080 struct Config {
81 Config()
82 : sample_rate_hz(16000),
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000083 enable_audio_classifier(false),
84 max_packets_in_buffer(50),
85 // |max_delay_ms| has the same effect as calling SetMaximumDelay().
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000086 max_delay_ms(2000),
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +000087 background_noise_mode(kBgnOff),
Henrik Lundincf808d22015-05-27 14:33:29 +020088 playout_mode(kPlayoutOn),
89 enable_fast_accelerate(false) {}
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000090
Henrik Lundin905495c2015-05-25 16:58:41 +020091 std::string ToString() const;
92
Henrik Lundin83b5c052015-05-08 10:33:57 +020093 int sample_rate_hz; // Initial value. Will change with input data.
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000094 bool enable_audio_classifier;
Peter Kastingdce40cf2015-08-24 14:52:23 -070095 size_t max_packets_in_buffer;
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000096 int max_delay_ms;
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000097 BackgroundNoiseMode background_noise_mode;
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +000098 NetEqPlayoutMode playout_mode;
Henrik Lundincf808d22015-05-27 14:33:29 +020099 bool enable_fast_accelerate;
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +0000100 };
101
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000102 enum ReturnCodes {
103 kOK = 0,
104 kFail = -1,
105 kNotImplemented = -2
106 };
107
108 enum ErrorCodes {
109 kNoError = 0,
110 kOtherError,
111 kInvalidRtpPayloadType,
112 kUnknownRtpPayloadType,
113 kCodecNotSupported,
114 kDecoderExists,
115 kDecoderNotFound,
116 kInvalidSampleRate,
117 kInvalidPointer,
118 kAccelerateError,
119 kPreemptiveExpandError,
120 kComfortNoiseErrorCode,
121 kDecoderErrorCode,
122 kOtherDecoderError,
123 kInvalidOperation,
124 kDtmfParameterError,
125 kDtmfParsingError,
126 kDtmfInsertError,
127 kStereoNotSupported,
128 kSampleUnderrun,
129 kDecodedTooMuch,
130 kFrameSplitError,
131 kRedundancySplitError,
minyue@webrtc.org7bb54362013-08-06 05:40:57 +0000132 kPacketBufferCorruption,
turaj@webrtc.org7b75ac62013-09-26 00:27:56 +0000133 kSyncPacketNotAccepted
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000134 };
135
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +0000136 // Creates a new NetEq object, with parameters set in |config|. The |config|
137 // object will only have to be valid for the duration of the call to this
138 // method.
139 static NetEq* Create(const NetEq::Config& config);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000140
141 virtual ~NetEq() {}
142
143 // Inserts a new packet into NetEq. The |receive_timestamp| is an indication
144 // of the time when the packet was received, and should be measured with
145 // the same tick rate as the RTP timestamp of the current payload.
146 // Returns 0 on success, -1 on failure.
147 virtual int InsertPacket(const WebRtcRTPHeader& rtp_header,
148 const uint8_t* payload,
pkasting@chromium.org4591fbd2014-11-20 22:28:14 +0000149 size_t length_bytes,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000150 uint32_t receive_timestamp) = 0;
151
turaj@webrtc.org7b75ac62013-09-26 00:27:56 +0000152 // Inserts a sync-packet into packet queue. Sync-packets are decoded to
153 // silence and are intended to keep AV-sync intact in an event of long packet
154 // losses when Video NACK is enabled but Audio NACK is not. Clients of NetEq
155 // might insert sync-packet when they observe that buffer level of NetEq is
156 // decreasing below a certain threshold, defined by the application.
157 // Sync-packets should have the same payload type as the last audio payload
158 // type, i.e. they cannot have DTMF or CNG payload type, nor a codec change
159 // can be implied by inserting a sync-packet.
160 // Returns kOk on success, kFail on failure.
161 virtual int InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
162 uint32_t receive_timestamp) = 0;
163
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000164 // Instructs NetEq to deliver 10 ms of audio data. The data is written to
165 // |output_audio|, which can hold (at least) |max_length| elements.
166 // The number of channels that were written to the output is provided in
167 // the output variable |num_channels|, and each channel contains
168 // |samples_per_channel| elements. If more than one channel is written,
169 // the samples are interleaved.
170 // The speech type is written to |type|, if |type| is not NULL.
171 // Returns kOK on success, or kFail in case of an error.
172 virtual int GetAudio(size_t max_length, int16_t* output_audio,
Peter Kastingdce40cf2015-08-24 14:52:23 -0700173 size_t* samples_per_channel, int* num_channels,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000174 NetEqOutputType* type) = 0;
175
176 // Associates |rtp_payload_type| with |codec| and stores the information in
177 // the codec database. Returns 0 on success, -1 on failure.
178 virtual int RegisterPayloadType(enum NetEqDecoder codec,
179 uint8_t rtp_payload_type) = 0;
180
181 // Provides an externally created decoder object |decoder| to insert in the
182 // decoder database. The decoder implements a decoder of type |codec| and
Karl Wibergd8399e62015-05-25 14:39:56 +0200183 // associates it with |rtp_payload_type|. The decoder will produce samples
184 // at the rate |sample_rate_hz|. Returns kOK on success, kFail on failure.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000185 virtual int RegisterExternalDecoder(AudioDecoder* decoder,
186 enum NetEqDecoder codec,
Karl Wibergd8399e62015-05-25 14:39:56 +0200187 uint8_t rtp_payload_type,
188 int sample_rate_hz) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000189
190 // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
191 // -1 on failure.
192 virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
193
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000194 // Sets a minimum delay in millisecond for packet buffer. The minimum is
195 // maintained unless a higher latency is dictated by channel condition.
196 // Returns true if the minimum is successfully applied, otherwise false is
197 // returned.
198 virtual bool SetMinimumDelay(int delay_ms) = 0;
199
200 // Sets a maximum delay in milliseconds for packet buffer. The latency will
201 // not exceed the given value, even required delay (given the channel
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000202 // conditions) is higher. Calling this method has the same effect as setting
203 // the |max_delay_ms| value in the NetEq::Config struct.
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000204 virtual bool SetMaximumDelay(int delay_ms) = 0;
205
206 // The smallest latency required. This is computed bases on inter-arrival
207 // time and internal NetEq logic. Note that in computing this latency none of
208 // the user defined limits (applied by calling setMinimumDelay() and/or
209 // SetMaximumDelay()) are applied.
210 virtual int LeastRequiredDelayMs() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000211
212 // Not implemented.
213 virtual int SetTargetDelay() = 0;
214
215 // Not implemented.
216 virtual int TargetDelay() = 0;
217
henrik.lundin9c3efd02015-08-27 13:12:22 -0700218 // Returns the current total delay (packet buffer and sync buffer) in ms.
219 virtual int CurrentDelayMs() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000220
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000221 // Sets the playout mode to |mode|.
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000222 // Deprecated. Set the mode in the Config struct passed to the constructor.
223 // TODO(henrik.lundin) Delete.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000224 virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0;
225
226 // Returns the current playout mode.
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000227 // Deprecated.
228 // TODO(henrik.lundin) Delete.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000229 virtual NetEqPlayoutMode PlayoutMode() const = 0;
230
231 // Writes the current network statistics to |stats|. The statistics are reset
232 // after the call.
233 virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
234
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000235 // Writes the current RTCP statistics to |stats|. The statistics are reset
236 // and a new report period is started with the call.
237 virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0;
238
239 // Same as RtcpStatistics(), but does not reset anything.
240 virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0;
241
242 // Enables post-decode VAD. When enabled, GetAudio() will return
243 // kOutputVADPassive when the signal contains no speech.
244 virtual void EnableVad() = 0;
245
246 // Disables post-decode VAD.
247 virtual void DisableVad() = 0;
248
wu@webrtc.org94454b72014-06-05 20:34:08 +0000249 // Gets the RTP timestamp for the last sample delivered by GetAudio().
250 // Returns true if the RTP timestamp is valid, otherwise false.
251 virtual bool GetPlayoutTimestamp(uint32_t* timestamp) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000252
253 // Not implemented.
254 virtual int SetTargetNumberOfChannels() = 0;
255
256 // Not implemented.
257 virtual int SetTargetSampleRate() = 0;
258
259 // Returns the error code for the last occurred error. If no error has
260 // occurred, 0 is returned.
henrik.lundin@webrtc.orgb0f4b3d2014-11-04 08:53:10 +0000261 virtual int LastError() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000262
263 // Returns the error code last returned by a decoder (audio or comfort noise).
264 // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check
265 // this method to get the decoder's error code.
266 virtual int LastDecoderError() = 0;
267
268 // Flushes both the packet buffer and the sync buffer.
269 virtual void FlushBuffers() = 0;
270
turaj@webrtc.org7df97062013-08-02 18:07:13 +0000271 // Current usage of packet-buffer and it's limits.
272 virtual void PacketBufferStatistics(int* current_num_packets,
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000273 int* max_num_packets) const = 0;
turaj@webrtc.org7df97062013-08-02 18:07:13 +0000274
minyue@webrtc.orgd7301772013-08-29 00:58:14 +0000275 // Get sequence number and timestamp of the latest RTP.
276 // This method is to facilitate NACK.
turaj@webrtc.orgff43c852013-09-25 00:07:27 +0000277 virtual int DecodedRtpInfo(int* sequence_number,
278 uint32_t* timestamp) const = 0;
minyue@webrtc.orgd7301772013-08-29 00:58:14 +0000279
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000280 protected:
281 NetEq() {}
282
283 private:
henrikg3c089d72015-09-16 05:37:44 -0700284 RTC_DISALLOW_COPY_AND_ASSIGN(NetEq);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000285};
286
287} // namespace webrtc
Henrik Kjellander74640892015-10-29 11:31:02 +0100288#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_