blob: 439f04926936fadd832ba77af9bb99c675882c96 [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
henrik.lundin@webrtc.org9c55f0f2014-06-09 08:10:28 +000011#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INTERFACE_NETEQ_H_
12#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INTERFACE_NETEQ_H_
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000013
pbos@webrtc.org12dc1a32013-08-05 16:22:53 +000014#include <string.h> // Provide access to size_t.
15
Henrik Lundin905495c2015-05-25 16:58:41 +020016#include <string>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000017#include <vector>
18
henrike@webrtc.org88fbb2d2014-05-21 21:18:46 +000019#include "webrtc/base/constructormagic.h"
sprang@webrtc.orgfe5d36b2013-10-28 09:21:07 +000020#include "webrtc/common_types.h"
kwiberg@webrtc.orge04a93b2014-12-09 10:12:53 +000021#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000022#include "webrtc/typedefs.h"
23
24namespace webrtc {
25
26// Forward declarations.
27struct WebRtcRTPHeader;
28
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000029struct NetEqNetworkStatistics {
30 uint16_t current_buffer_size_ms; // Current jitter buffer size in ms.
31 uint16_t preferred_buffer_size_ms; // Target buffer size in ms.
32 uint16_t jitter_peaks_found; // 1 if adding extra delay due to peaky
33 // jitter; 0 otherwise.
34 uint16_t packet_loss_rate; // Loss rate (network + late) in Q14.
35 uint16_t packet_discard_rate; // Late loss rate in Q14.
36 uint16_t expand_rate; // Fraction (of original stream) of synthesized
minyue@webrtc.org7d721ee2015-02-18 10:01:53 +000037 // audio inserted through expansion (in Q14).
38 uint16_t speech_expand_rate; // Fraction (of original stream) of synthesized
39 // speech inserted through expansion (in Q14).
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000040 uint16_t preemptive_rate; // Fraction of data inserted through pre-emptive
41 // expansion (in Q14).
42 uint16_t accelerate_rate; // Fraction of data removed through acceleration
43 // (in Q14).
minyue@webrtc.org2c1bcf22015-02-17 10:17:09 +000044 uint16_t secondary_decoded_rate; // Fraction of data coming from secondary
45 // decoding (in Q14).
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000046 int32_t clockdrift_ppm; // Average clock-drift in parts-per-million
47 // (positive or negative).
48 int added_zero_samples; // Number of zero samples added in "off" mode.
49};
50
51enum NetEqOutputType {
52 kOutputNormal,
53 kOutputPLC,
54 kOutputCNG,
55 kOutputPLCtoCNG,
56 kOutputVADPassive
57};
58
59enum NetEqPlayoutMode {
60 kPlayoutOn,
61 kPlayoutOff,
62 kPlayoutFax,
63 kPlayoutStreaming
64};
65
66// This is the interface class for NetEq.
67class NetEq {
68 public:
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000069 enum BackgroundNoiseMode {
70 kBgnOn, // Default behavior with eternal noise.
71 kBgnFade, // Noise fades to zero after some time.
72 kBgnOff // Background noise is always zero.
73 };
74
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000075 struct Config {
76 Config()
77 : sample_rate_hz(16000),
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000078 enable_audio_classifier(false),
79 max_packets_in_buffer(50),
80 // |max_delay_ms| has the same effect as calling SetMaximumDelay().
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000081 max_delay_ms(2000),
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +000082 background_noise_mode(kBgnOff),
83 playout_mode(kPlayoutOn) {}
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000084
Henrik Lundin905495c2015-05-25 16:58:41 +020085 std::string ToString() const;
86
Henrik Lundin83b5c052015-05-08 10:33:57 +020087 int sample_rate_hz; // Initial value. Will change with input data.
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000088 bool enable_audio_classifier;
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000089 int max_packets_in_buffer;
90 int max_delay_ms;
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000091 BackgroundNoiseMode background_noise_mode;
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +000092 NetEqPlayoutMode playout_mode;
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000093 };
94
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000095 enum ReturnCodes {
96 kOK = 0,
97 kFail = -1,
98 kNotImplemented = -2
99 };
100
101 enum ErrorCodes {
102 kNoError = 0,
103 kOtherError,
104 kInvalidRtpPayloadType,
105 kUnknownRtpPayloadType,
106 kCodecNotSupported,
107 kDecoderExists,
108 kDecoderNotFound,
109 kInvalidSampleRate,
110 kInvalidPointer,
111 kAccelerateError,
112 kPreemptiveExpandError,
113 kComfortNoiseErrorCode,
114 kDecoderErrorCode,
115 kOtherDecoderError,
116 kInvalidOperation,
117 kDtmfParameterError,
118 kDtmfParsingError,
119 kDtmfInsertError,
120 kStereoNotSupported,
121 kSampleUnderrun,
122 kDecodedTooMuch,
123 kFrameSplitError,
124 kRedundancySplitError,
minyue@webrtc.org7bb54362013-08-06 05:40:57 +0000125 kPacketBufferCorruption,
turaj@webrtc.org7b75ac62013-09-26 00:27:56 +0000126 kSyncPacketNotAccepted
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000127 };
128
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +0000129 // Creates a new NetEq object, with parameters set in |config|. The |config|
130 // object will only have to be valid for the duration of the call to this
131 // method.
132 static NetEq* Create(const NetEq::Config& config);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000133
134 virtual ~NetEq() {}
135
136 // Inserts a new packet into NetEq. The |receive_timestamp| is an indication
137 // of the time when the packet was received, and should be measured with
138 // the same tick rate as the RTP timestamp of the current payload.
139 // Returns 0 on success, -1 on failure.
140 virtual int InsertPacket(const WebRtcRTPHeader& rtp_header,
141 const uint8_t* payload,
pkasting@chromium.org4591fbd2014-11-20 22:28:14 +0000142 size_t length_bytes,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000143 uint32_t receive_timestamp) = 0;
144
turaj@webrtc.org7b75ac62013-09-26 00:27:56 +0000145 // Inserts a sync-packet into packet queue. Sync-packets are decoded to
146 // silence and are intended to keep AV-sync intact in an event of long packet
147 // losses when Video NACK is enabled but Audio NACK is not. Clients of NetEq
148 // might insert sync-packet when they observe that buffer level of NetEq is
149 // decreasing below a certain threshold, defined by the application.
150 // Sync-packets should have the same payload type as the last audio payload
151 // type, i.e. they cannot have DTMF or CNG payload type, nor a codec change
152 // can be implied by inserting a sync-packet.
153 // Returns kOk on success, kFail on failure.
154 virtual int InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
155 uint32_t receive_timestamp) = 0;
156
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000157 // Instructs NetEq to deliver 10 ms of audio data. The data is written to
158 // |output_audio|, which can hold (at least) |max_length| elements.
159 // The number of channels that were written to the output is provided in
160 // the output variable |num_channels|, and each channel contains
161 // |samples_per_channel| elements. If more than one channel is written,
162 // the samples are interleaved.
163 // The speech type is written to |type|, if |type| is not NULL.
164 // Returns kOK on success, or kFail in case of an error.
165 virtual int GetAudio(size_t max_length, int16_t* output_audio,
166 int* samples_per_channel, int* num_channels,
167 NetEqOutputType* type) = 0;
168
169 // Associates |rtp_payload_type| with |codec| and stores the information in
170 // the codec database. Returns 0 on success, -1 on failure.
171 virtual int RegisterPayloadType(enum NetEqDecoder codec,
172 uint8_t rtp_payload_type) = 0;
173
174 // Provides an externally created decoder object |decoder| to insert in the
175 // decoder database. The decoder implements a decoder of type |codec| and
Karl Wibergd8399e62015-05-25 14:39:56 +0200176 // associates it with |rtp_payload_type|. The decoder will produce samples
177 // at the rate |sample_rate_hz|. Returns kOK on success, kFail on failure.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000178 virtual int RegisterExternalDecoder(AudioDecoder* decoder,
179 enum NetEqDecoder codec,
Karl Wibergd8399e62015-05-25 14:39:56 +0200180 uint8_t rtp_payload_type,
181 int sample_rate_hz) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000182
183 // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
184 // -1 on failure.
185 virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
186
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000187 // Sets a minimum delay in millisecond for packet buffer. The minimum is
188 // maintained unless a higher latency is dictated by channel condition.
189 // Returns true if the minimum is successfully applied, otherwise false is
190 // returned.
191 virtual bool SetMinimumDelay(int delay_ms) = 0;
192
193 // Sets a maximum delay in milliseconds for packet buffer. The latency will
194 // not exceed the given value, even required delay (given the channel
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000195 // conditions) is higher. Calling this method has the same effect as setting
196 // the |max_delay_ms| value in the NetEq::Config struct.
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000197 virtual bool SetMaximumDelay(int delay_ms) = 0;
198
199 // The smallest latency required. This is computed bases on inter-arrival
200 // time and internal NetEq logic. Note that in computing this latency none of
201 // the user defined limits (applied by calling setMinimumDelay() and/or
202 // SetMaximumDelay()) are applied.
203 virtual int LeastRequiredDelayMs() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000204
205 // Not implemented.
206 virtual int SetTargetDelay() = 0;
207
208 // Not implemented.
209 virtual int TargetDelay() = 0;
210
211 // Not implemented.
212 virtual int CurrentDelay() = 0;
213
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000214 // Sets the playout mode to |mode|.
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000215 // Deprecated. Set the mode in the Config struct passed to the constructor.
216 // TODO(henrik.lundin) Delete.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000217 virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0;
218
219 // Returns the current playout mode.
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000220 // Deprecated.
221 // TODO(henrik.lundin) Delete.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000222 virtual NetEqPlayoutMode PlayoutMode() const = 0;
223
224 // Writes the current network statistics to |stats|. The statistics are reset
225 // after the call.
226 virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
227
228 // Writes the last packet waiting times (in ms) to |waiting_times|. The number
229 // of values written is no more than 100, but may be smaller if the interface
230 // is polled again before 100 packets has arrived.
231 virtual void WaitingTimes(std::vector<int>* waiting_times) = 0;
232
233 // Writes the current RTCP statistics to |stats|. The statistics are reset
234 // and a new report period is started with the call.
235 virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0;
236
237 // Same as RtcpStatistics(), but does not reset anything.
238 virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0;
239
240 // Enables post-decode VAD. When enabled, GetAudio() will return
241 // kOutputVADPassive when the signal contains no speech.
242 virtual void EnableVad() = 0;
243
244 // Disables post-decode VAD.
245 virtual void DisableVad() = 0;
246
wu@webrtc.org94454b72014-06-05 20:34:08 +0000247 // Gets the RTP timestamp for the last sample delivered by GetAudio().
248 // Returns true if the RTP timestamp is valid, otherwise false.
249 virtual bool GetPlayoutTimestamp(uint32_t* timestamp) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000250
251 // Not implemented.
252 virtual int SetTargetNumberOfChannels() = 0;
253
254 // Not implemented.
255 virtual int SetTargetSampleRate() = 0;
256
257 // Returns the error code for the last occurred error. If no error has
258 // occurred, 0 is returned.
henrik.lundin@webrtc.orgb0f4b3d2014-11-04 08:53:10 +0000259 virtual int LastError() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000260
261 // Returns the error code last returned by a decoder (audio or comfort noise).
262 // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check
263 // this method to get the decoder's error code.
264 virtual int LastDecoderError() = 0;
265
266 // Flushes both the packet buffer and the sync buffer.
267 virtual void FlushBuffers() = 0;
268
turaj@webrtc.org7df97062013-08-02 18:07:13 +0000269 // Current usage of packet-buffer and it's limits.
270 virtual void PacketBufferStatistics(int* current_num_packets,
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000271 int* max_num_packets) const = 0;
turaj@webrtc.org7df97062013-08-02 18:07:13 +0000272
minyue@webrtc.orgd7301772013-08-29 00:58:14 +0000273 // Get sequence number and timestamp of the latest RTP.
274 // This method is to facilitate NACK.
turaj@webrtc.orgff43c852013-09-25 00:07:27 +0000275 virtual int DecodedRtpInfo(int* sequence_number,
276 uint32_t* timestamp) const = 0;
minyue@webrtc.orgd7301772013-08-29 00:58:14 +0000277
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000278 protected:
279 NetEq() {}
280
281 private:
282 DISALLOW_COPY_AND_ASSIGN(NetEq);
283};
284
285} // namespace webrtc
henrik.lundin@webrtc.org9c55f0f2014-06-09 08:10:28 +0000286#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INTERFACE_NETEQ_H_