blob: a641c9e8df8777088f404e4a3e528e6133fd8d60 [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
henrik.lundin@webrtc.org9c55f0f2014-06-09 08:10:28 +000011#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INTERFACE_NETEQ_H_
12#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INTERFACE_NETEQ_H_
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000013
pbos@webrtc.org12dc1a32013-08-05 16:22:53 +000014#include <string.h> // Provide access to size_t.
15
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000016#include <vector>
17
henrike@webrtc.org88fbb2d2014-05-21 21:18:46 +000018#include "webrtc/base/constructormagic.h"
sprang@webrtc.orgfe5d36b2013-10-28 09:21:07 +000019#include "webrtc/common_types.h"
kwiberg@webrtc.orge04a93b2014-12-09 10:12:53 +000020#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000021#include "webrtc/typedefs.h"
22
23namespace webrtc {
24
25// Forward declarations.
26struct WebRtcRTPHeader;
27
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000028struct NetEqNetworkStatistics {
29 uint16_t current_buffer_size_ms; // Current jitter buffer size in ms.
30 uint16_t preferred_buffer_size_ms; // Target buffer size in ms.
31 uint16_t jitter_peaks_found; // 1 if adding extra delay due to peaky
32 // jitter; 0 otherwise.
33 uint16_t packet_loss_rate; // Loss rate (network + late) in Q14.
34 uint16_t packet_discard_rate; // Late loss rate in Q14.
35 uint16_t expand_rate; // Fraction (of original stream) of synthesized
minyue@webrtc.org7d721ee2015-02-18 10:01:53 +000036 // audio inserted through expansion (in Q14).
37 uint16_t speech_expand_rate; // Fraction (of original stream) of synthesized
38 // speech inserted through expansion (in Q14).
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000039 uint16_t preemptive_rate; // Fraction of data inserted through pre-emptive
40 // expansion (in Q14).
41 uint16_t accelerate_rate; // Fraction of data removed through acceleration
42 // (in Q14).
minyue@webrtc.org2c1bcf22015-02-17 10:17:09 +000043 uint16_t secondary_decoded_rate; // Fraction of data coming from secondary
44 // decoding (in Q14).
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000045 int32_t clockdrift_ppm; // Average clock-drift in parts-per-million
46 // (positive or negative).
47 int added_zero_samples; // Number of zero samples added in "off" mode.
48};
49
50enum NetEqOutputType {
51 kOutputNormal,
52 kOutputPLC,
53 kOutputCNG,
54 kOutputPLCtoCNG,
55 kOutputVADPassive
56};
57
58enum NetEqPlayoutMode {
59 kPlayoutOn,
60 kPlayoutOff,
61 kPlayoutFax,
62 kPlayoutStreaming
63};
64
65// This is the interface class for NetEq.
66class NetEq {
67 public:
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000068 enum BackgroundNoiseMode {
69 kBgnOn, // Default behavior with eternal noise.
70 kBgnFade, // Noise fades to zero after some time.
71 kBgnOff // Background noise is always zero.
72 };
73
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000074 struct Config {
75 Config()
76 : sample_rate_hz(16000),
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000077 enable_audio_classifier(false),
78 max_packets_in_buffer(50),
79 // |max_delay_ms| has the same effect as calling SetMaximumDelay().
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000080 max_delay_ms(2000),
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +000081 background_noise_mode(kBgnOff),
82 playout_mode(kPlayoutOn) {}
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000083
Henrik Lundin83b5c052015-05-08 10:33:57 +020084 int sample_rate_hz; // Initial value. Will change with input data.
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000085 bool enable_audio_classifier;
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000086 int max_packets_in_buffer;
87 int max_delay_ms;
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000088 BackgroundNoiseMode background_noise_mode;
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +000089 NetEqPlayoutMode playout_mode;
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000090 };
91
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000092 enum ReturnCodes {
93 kOK = 0,
94 kFail = -1,
95 kNotImplemented = -2
96 };
97
98 enum ErrorCodes {
99 kNoError = 0,
100 kOtherError,
101 kInvalidRtpPayloadType,
102 kUnknownRtpPayloadType,
103 kCodecNotSupported,
104 kDecoderExists,
105 kDecoderNotFound,
106 kInvalidSampleRate,
107 kInvalidPointer,
108 kAccelerateError,
109 kPreemptiveExpandError,
110 kComfortNoiseErrorCode,
111 kDecoderErrorCode,
112 kOtherDecoderError,
113 kInvalidOperation,
114 kDtmfParameterError,
115 kDtmfParsingError,
116 kDtmfInsertError,
117 kStereoNotSupported,
118 kSampleUnderrun,
119 kDecodedTooMuch,
120 kFrameSplitError,
121 kRedundancySplitError,
minyue@webrtc.org7bb54362013-08-06 05:40:57 +0000122 kPacketBufferCorruption,
turaj@webrtc.org7b75ac62013-09-26 00:27:56 +0000123 kSyncPacketNotAccepted
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000124 };
125
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +0000126 // Creates a new NetEq object, with parameters set in |config|. The |config|
127 // object will only have to be valid for the duration of the call to this
128 // method.
129 static NetEq* Create(const NetEq::Config& config);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000130
131 virtual ~NetEq() {}
132
133 // Inserts a new packet into NetEq. The |receive_timestamp| is an indication
134 // of the time when the packet was received, and should be measured with
135 // the same tick rate as the RTP timestamp of the current payload.
136 // Returns 0 on success, -1 on failure.
137 virtual int InsertPacket(const WebRtcRTPHeader& rtp_header,
138 const uint8_t* payload,
pkasting@chromium.org4591fbd2014-11-20 22:28:14 +0000139 size_t length_bytes,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000140 uint32_t receive_timestamp) = 0;
141
turaj@webrtc.org7b75ac62013-09-26 00:27:56 +0000142 // Inserts a sync-packet into packet queue. Sync-packets are decoded to
143 // silence and are intended to keep AV-sync intact in an event of long packet
144 // losses when Video NACK is enabled but Audio NACK is not. Clients of NetEq
145 // might insert sync-packet when they observe that buffer level of NetEq is
146 // decreasing below a certain threshold, defined by the application.
147 // Sync-packets should have the same payload type as the last audio payload
148 // type, i.e. they cannot have DTMF or CNG payload type, nor a codec change
149 // can be implied by inserting a sync-packet.
150 // Returns kOk on success, kFail on failure.
151 virtual int InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
152 uint32_t receive_timestamp) = 0;
153
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000154 // Instructs NetEq to deliver 10 ms of audio data. The data is written to
155 // |output_audio|, which can hold (at least) |max_length| elements.
156 // The number of channels that were written to the output is provided in
157 // the output variable |num_channels|, and each channel contains
158 // |samples_per_channel| elements. If more than one channel is written,
159 // the samples are interleaved.
160 // The speech type is written to |type|, if |type| is not NULL.
161 // Returns kOK on success, or kFail in case of an error.
162 virtual int GetAudio(size_t max_length, int16_t* output_audio,
163 int* samples_per_channel, int* num_channels,
164 NetEqOutputType* type) = 0;
165
166 // Associates |rtp_payload_type| with |codec| and stores the information in
167 // the codec database. Returns 0 on success, -1 on failure.
168 virtual int RegisterPayloadType(enum NetEqDecoder codec,
169 uint8_t rtp_payload_type) = 0;
170
171 // Provides an externally created decoder object |decoder| to insert in the
172 // decoder database. The decoder implements a decoder of type |codec| and
Karl Wibergd8399e62015-05-25 14:39:56 +0200173 // associates it with |rtp_payload_type|. The decoder will produce samples
174 // at the rate |sample_rate_hz|. Returns kOK on success, kFail on failure.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000175 virtual int RegisterExternalDecoder(AudioDecoder* decoder,
176 enum NetEqDecoder codec,
Karl Wibergd8399e62015-05-25 14:39:56 +0200177 uint8_t rtp_payload_type,
178 int sample_rate_hz) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000179
180 // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
181 // -1 on failure.
182 virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
183
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000184 // Sets a minimum delay in millisecond for packet buffer. The minimum is
185 // maintained unless a higher latency is dictated by channel condition.
186 // Returns true if the minimum is successfully applied, otherwise false is
187 // returned.
188 virtual bool SetMinimumDelay(int delay_ms) = 0;
189
190 // Sets a maximum delay in milliseconds for packet buffer. The latency will
191 // not exceed the given value, even required delay (given the channel
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000192 // conditions) is higher. Calling this method has the same effect as setting
193 // the |max_delay_ms| value in the NetEq::Config struct.
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000194 virtual bool SetMaximumDelay(int delay_ms) = 0;
195
196 // The smallest latency required. This is computed bases on inter-arrival
197 // time and internal NetEq logic. Note that in computing this latency none of
198 // the user defined limits (applied by calling setMinimumDelay() and/or
199 // SetMaximumDelay()) are applied.
200 virtual int LeastRequiredDelayMs() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000201
202 // Not implemented.
203 virtual int SetTargetDelay() = 0;
204
205 // Not implemented.
206 virtual int TargetDelay() = 0;
207
208 // Not implemented.
209 virtual int CurrentDelay() = 0;
210
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000211 // Sets the playout mode to |mode|.
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000212 // Deprecated. Set the mode in the Config struct passed to the constructor.
213 // TODO(henrik.lundin) Delete.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000214 virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0;
215
216 // Returns the current playout mode.
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000217 // Deprecated.
218 // TODO(henrik.lundin) Delete.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000219 virtual NetEqPlayoutMode PlayoutMode() const = 0;
220
221 // Writes the current network statistics to |stats|. The statistics are reset
222 // after the call.
223 virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
224
225 // Writes the last packet waiting times (in ms) to |waiting_times|. The number
226 // of values written is no more than 100, but may be smaller if the interface
227 // is polled again before 100 packets has arrived.
228 virtual void WaitingTimes(std::vector<int>* waiting_times) = 0;
229
230 // Writes the current RTCP statistics to |stats|. The statistics are reset
231 // and a new report period is started with the call.
232 virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0;
233
234 // Same as RtcpStatistics(), but does not reset anything.
235 virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0;
236
237 // Enables post-decode VAD. When enabled, GetAudio() will return
238 // kOutputVADPassive when the signal contains no speech.
239 virtual void EnableVad() = 0;
240
241 // Disables post-decode VAD.
242 virtual void DisableVad() = 0;
243
wu@webrtc.org94454b72014-06-05 20:34:08 +0000244 // Gets the RTP timestamp for the last sample delivered by GetAudio().
245 // Returns true if the RTP timestamp is valid, otherwise false.
246 virtual bool GetPlayoutTimestamp(uint32_t* timestamp) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000247
248 // Not implemented.
249 virtual int SetTargetNumberOfChannels() = 0;
250
251 // Not implemented.
252 virtual int SetTargetSampleRate() = 0;
253
254 // Returns the error code for the last occurred error. If no error has
255 // occurred, 0 is returned.
henrik.lundin@webrtc.orgb0f4b3d2014-11-04 08:53:10 +0000256 virtual int LastError() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000257
258 // Returns the error code last returned by a decoder (audio or comfort noise).
259 // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check
260 // this method to get the decoder's error code.
261 virtual int LastDecoderError() = 0;
262
263 // Flushes both the packet buffer and the sync buffer.
264 virtual void FlushBuffers() = 0;
265
turaj@webrtc.org7df97062013-08-02 18:07:13 +0000266 // Current usage of packet-buffer and it's limits.
267 virtual void PacketBufferStatistics(int* current_num_packets,
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000268 int* max_num_packets) const = 0;
turaj@webrtc.org7df97062013-08-02 18:07:13 +0000269
minyue@webrtc.orgd7301772013-08-29 00:58:14 +0000270 // Get sequence number and timestamp of the latest RTP.
271 // This method is to facilitate NACK.
turaj@webrtc.orgff43c852013-09-25 00:07:27 +0000272 virtual int DecodedRtpInfo(int* sequence_number,
273 uint32_t* timestamp) const = 0;
minyue@webrtc.orgd7301772013-08-29 00:58:14 +0000274
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000275 protected:
276 NetEq() {}
277
278 private:
279 DISALLOW_COPY_AND_ASSIGN(NetEq);
280};
281
282} // namespace webrtc
henrik.lundin@webrtc.org9c55f0f2014-06-09 08:10:28 +0000283#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INTERFACE_NETEQ_H_