blob: 5b52424bee71609d2b34e9d8f20ff27e8495f842 [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Henrik Kjellander74640892015-10-29 11:31:02 +010011#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
12#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000013
pbos@webrtc.org12dc1a32013-08-05 16:22:53 +000014#include <string.h> // Provide access to size_t.
15
Henrik Lundin905495c2015-05-25 16:58:41 +020016#include <string>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000017
henrike@webrtc.org88fbb2d2014-05-21 21:18:46 +000018#include "webrtc/base/constructormagic.h"
henrik.lundin9a410dd2016-04-06 01:39:22 -070019#include "webrtc/base/optional.h"
sprang@webrtc.orgfe5d36b2013-10-28 09:21:07 +000020#include "webrtc/common_types.h"
kwiberg@webrtc.orge04a93b2014-12-09 10:12:53 +000021#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000022#include "webrtc/typedefs.h"
23
24namespace webrtc {
25
26// Forward declarations.
henrik.lundin6d8e0112016-03-04 10:34:21 -080027class AudioFrame;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000028struct WebRtcRTPHeader;
29
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000030struct NetEqNetworkStatistics {
31 uint16_t current_buffer_size_ms; // Current jitter buffer size in ms.
32 uint16_t preferred_buffer_size_ms; // Target buffer size in ms.
33 uint16_t jitter_peaks_found; // 1 if adding extra delay due to peaky
34 // jitter; 0 otherwise.
35 uint16_t packet_loss_rate; // Loss rate (network + late) in Q14.
36 uint16_t packet_discard_rate; // Late loss rate in Q14.
37 uint16_t expand_rate; // Fraction (of original stream) of synthesized
minyue@webrtc.org7d721ee2015-02-18 10:01:53 +000038 // audio inserted through expansion (in Q14).
39 uint16_t speech_expand_rate; // Fraction (of original stream) of synthesized
40 // speech inserted through expansion (in Q14).
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000041 uint16_t preemptive_rate; // Fraction of data inserted through pre-emptive
42 // expansion (in Q14).
43 uint16_t accelerate_rate; // Fraction of data removed through acceleration
44 // (in Q14).
minyue@webrtc.org2c1bcf22015-02-17 10:17:09 +000045 uint16_t secondary_decoded_rate; // Fraction of data coming from secondary
46 // decoding (in Q14).
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000047 int32_t clockdrift_ppm; // Average clock-drift in parts-per-million
48 // (positive or negative).
Peter Kastingdce40cf2015-08-24 14:52:23 -070049 size_t added_zero_samples; // Number of zero samples added in "off" mode.
Henrik Lundin1bb8cf82015-08-25 13:08:04 +020050 // Statistics for packet waiting times, i.e., the time between a packet
51 // arrives until it is decoded.
52 int mean_waiting_time_ms;
53 int median_waiting_time_ms;
54 int min_waiting_time_ms;
55 int max_waiting_time_ms;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000056};
57
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000058enum NetEqPlayoutMode {
59 kPlayoutOn,
60 kPlayoutOff,
61 kPlayoutFax,
62 kPlayoutStreaming
63};
64
65// This is the interface class for NetEq.
66class NetEq {
67 public:
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000068 enum BackgroundNoiseMode {
69 kBgnOn, // Default behavior with eternal noise.
70 kBgnFade, // Noise fades to zero after some time.
71 kBgnOff // Background noise is always zero.
72 };
73
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000074 struct Config {
75 Config()
76 : sample_rate_hz(16000),
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000077 enable_audio_classifier(false),
henrik.lundin9bc26672015-11-02 03:25:57 -080078 enable_post_decode_vad(false),
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000079 max_packets_in_buffer(50),
80 // |max_delay_ms| has the same effect as calling SetMaximumDelay().
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000081 max_delay_ms(2000),
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +000082 background_noise_mode(kBgnOff),
Henrik Lundincf808d22015-05-27 14:33:29 +020083 playout_mode(kPlayoutOn),
84 enable_fast_accelerate(false) {}
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000085
Henrik Lundin905495c2015-05-25 16:58:41 +020086 std::string ToString() const;
87
Henrik Lundin83b5c052015-05-08 10:33:57 +020088 int sample_rate_hz; // Initial value. Will change with input data.
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000089 bool enable_audio_classifier;
henrik.lundin9bc26672015-11-02 03:25:57 -080090 bool enable_post_decode_vad;
Peter Kastingdce40cf2015-08-24 14:52:23 -070091 size_t max_packets_in_buffer;
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +000092 int max_delay_ms;
henrik.lundin@webrtc.orgea257842014-08-07 12:27:37 +000093 BackgroundNoiseMode background_noise_mode;
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +000094 NetEqPlayoutMode playout_mode;
Henrik Lundincf808d22015-05-27 14:33:29 +020095 bool enable_fast_accelerate;
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +000096 };
97
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000098 enum ReturnCodes {
99 kOK = 0,
100 kFail = -1,
101 kNotImplemented = -2
102 };
103
104 enum ErrorCodes {
105 kNoError = 0,
106 kOtherError,
107 kInvalidRtpPayloadType,
108 kUnknownRtpPayloadType,
109 kCodecNotSupported,
110 kDecoderExists,
111 kDecoderNotFound,
112 kInvalidSampleRate,
113 kInvalidPointer,
114 kAccelerateError,
115 kPreemptiveExpandError,
116 kComfortNoiseErrorCode,
117 kDecoderErrorCode,
118 kOtherDecoderError,
119 kInvalidOperation,
120 kDtmfParameterError,
121 kDtmfParsingError,
122 kDtmfInsertError,
123 kStereoNotSupported,
124 kSampleUnderrun,
125 kDecodedTooMuch,
126 kFrameSplitError,
127 kRedundancySplitError,
minyue@webrtc.org7bb54362013-08-06 05:40:57 +0000128 kPacketBufferCorruption,
turaj@webrtc.org7b75ac62013-09-26 00:27:56 +0000129 kSyncPacketNotAccepted
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000130 };
131
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +0000132 // Creates a new NetEq object, with parameters set in |config|. The |config|
133 // object will only have to be valid for the duration of the call to this
134 // method.
135 static NetEq* Create(const NetEq::Config& config);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000136
137 virtual ~NetEq() {}
138
139 // Inserts a new packet into NetEq. The |receive_timestamp| is an indication
140 // of the time when the packet was received, and should be measured with
141 // the same tick rate as the RTP timestamp of the current payload.
142 // Returns 0 on success, -1 on failure.
143 virtual int InsertPacket(const WebRtcRTPHeader& rtp_header,
kwibergee2bac22015-11-11 10:34:00 -0800144 rtc::ArrayView<const uint8_t> payload,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000145 uint32_t receive_timestamp) = 0;
146
turaj@webrtc.org7b75ac62013-09-26 00:27:56 +0000147 // Inserts a sync-packet into packet queue. Sync-packets are decoded to
148 // silence and are intended to keep AV-sync intact in an event of long packet
149 // losses when Video NACK is enabled but Audio NACK is not. Clients of NetEq
150 // might insert sync-packet when they observe that buffer level of NetEq is
151 // decreasing below a certain threshold, defined by the application.
152 // Sync-packets should have the same payload type as the last audio payload
153 // type, i.e. they cannot have DTMF or CNG payload type, nor a codec change
154 // can be implied by inserting a sync-packet.
155 // Returns kOk on success, kFail on failure.
156 virtual int InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
157 uint32_t receive_timestamp) = 0;
158
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000159 // Instructs NetEq to deliver 10 ms of audio data. The data is written to
henrik.lundin7dc68892016-04-06 01:03:02 -0700160 // |audio_frame|. All data in |audio_frame| is wiped; |data_|, |speech_type_|,
161 // |num_channels_|, |sample_rate_hz_|, |samples_per_channel_|, and
henrik.lundin55480f52016-03-08 02:37:57 -0800162 // |vad_activity_| are updated upon success. If an error is returned, some
163 // fields may not have been updated.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000164 // Returns kOK on success, or kFail in case of an error.
henrik.lundin55480f52016-03-08 02:37:57 -0800165 virtual int GetAudio(AudioFrame* audio_frame) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000166
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800167 // Associates |rtp_payload_type| with |codec| and |codec_name|, and stores the
168 // information in the codec database. Returns 0 on success, -1 on failure.
169 // The name is only used to provide information back to the caller about the
170 // decoders. Hence, the name is arbitrary, and may be empty.
kwibergee1879c2015-10-29 06:20:28 -0700171 virtual int RegisterPayloadType(NetEqDecoder codec,
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800172 const std::string& codec_name,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000173 uint8_t rtp_payload_type) = 0;
174
175 // Provides an externally created decoder object |decoder| to insert in the
176 // decoder database. The decoder implements a decoder of type |codec| and
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800177 // associates it with |rtp_payload_type| and |codec_name|. The decoder will
178 // produce samples at the rate |sample_rate_hz|. Returns kOK on success, kFail
179 // on failure.
180 // The name is only used to provide information back to the caller about the
181 // decoders. Hence, the name is arbitrary, and may be empty.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000182 virtual int RegisterExternalDecoder(AudioDecoder* decoder,
kwibergee1879c2015-10-29 06:20:28 -0700183 NetEqDecoder codec,
henrik.lundin4cf61dd2015-12-09 06:20:58 -0800184 const std::string& codec_name,
Karl Wibergd8399e62015-05-25 14:39:56 +0200185 uint8_t rtp_payload_type,
186 int sample_rate_hz) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000187
188 // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
189 // -1 on failure.
190 virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
191
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000192 // Sets a minimum delay in millisecond for packet buffer. The minimum is
193 // maintained unless a higher latency is dictated by channel condition.
194 // Returns true if the minimum is successfully applied, otherwise false is
195 // returned.
196 virtual bool SetMinimumDelay(int delay_ms) = 0;
197
198 // Sets a maximum delay in milliseconds for packet buffer. The latency will
199 // not exceed the given value, even required delay (given the channel
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000200 // conditions) is higher. Calling this method has the same effect as setting
201 // the |max_delay_ms| value in the NetEq::Config struct.
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000202 virtual bool SetMaximumDelay(int delay_ms) = 0;
203
204 // The smallest latency required. This is computed bases on inter-arrival
205 // time and internal NetEq logic. Note that in computing this latency none of
206 // the user defined limits (applied by calling setMinimumDelay() and/or
207 // SetMaximumDelay()) are applied.
208 virtual int LeastRequiredDelayMs() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000209
210 // Not implemented.
211 virtual int SetTargetDelay() = 0;
212
213 // Not implemented.
214 virtual int TargetDelay() = 0;
215
henrik.lundin9c3efd02015-08-27 13:12:22 -0700216 // Returns the current total delay (packet buffer and sync buffer) in ms.
217 virtual int CurrentDelayMs() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000218
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000219 // Sets the playout mode to |mode|.
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000220 // Deprecated. Set the mode in the Config struct passed to the constructor.
221 // TODO(henrik.lundin) Delete.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000222 virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0;
223
224 // Returns the current playout mode.
henrik.lundin@webrtc.org7cbc4f92014-10-07 06:37:39 +0000225 // Deprecated.
226 // TODO(henrik.lundin) Delete.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000227 virtual NetEqPlayoutMode PlayoutMode() const = 0;
228
229 // Writes the current network statistics to |stats|. The statistics are reset
230 // after the call.
231 virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
232
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000233 // Writes the current RTCP statistics to |stats|. The statistics are reset
234 // and a new report period is started with the call.
235 virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0;
236
237 // Same as RtcpStatistics(), but does not reset anything.
238 virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0;
239
240 // Enables post-decode VAD. When enabled, GetAudio() will return
241 // kOutputVADPassive when the signal contains no speech.
242 virtual void EnableVad() = 0;
243
244 // Disables post-decode VAD.
245 virtual void DisableVad() = 0;
246
henrik.lundin9a410dd2016-04-06 01:39:22 -0700247 // Returns the RTP timestamp for the last sample delivered by GetAudio().
248 // The return value will be empty if no valid timestamp is available.
249 virtual rtc::Optional<uint32_t> GetPlayoutTimestamp() = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000250
henrik.lundind89814b2015-11-23 06:49:25 -0800251 // Returns the sample rate in Hz of the audio produced in the last GetAudio
252 // call. If GetAudio has not been called yet, the configured sample rate
253 // (Config::sample_rate_hz) is returned.
254 virtual int last_output_sample_rate_hz() const = 0;
255
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000256 // Not implemented.
257 virtual int SetTargetNumberOfChannels() = 0;
258
259 // Not implemented.
260 virtual int SetTargetSampleRate() = 0;
261
262 // Returns the error code for the last occurred error. If no error has
263 // occurred, 0 is returned.
henrik.lundin@webrtc.orgb0f4b3d2014-11-04 08:53:10 +0000264 virtual int LastError() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000265
266 // Returns the error code last returned by a decoder (audio or comfort noise).
267 // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check
268 // this method to get the decoder's error code.
269 virtual int LastDecoderError() = 0;
270
271 // Flushes both the packet buffer and the sync buffer.
272 virtual void FlushBuffers() = 0;
273
turaj@webrtc.org7df97062013-08-02 18:07:13 +0000274 // Current usage of packet-buffer and it's limits.
275 virtual void PacketBufferStatistics(int* current_num_packets,
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000276 int* max_num_packets) const = 0;
turaj@webrtc.org7df97062013-08-02 18:07:13 +0000277
henrik.lundin48ed9302015-10-29 05:36:24 -0700278 // Enables NACK and sets the maximum size of the NACK list, which should be
279 // positive and no larger than Nack::kNackListSizeLimit. If NACK is already
280 // enabled then the maximum NACK list size is modified accordingly.
281 virtual void EnableNack(size_t max_nack_list_size) = 0;
282
283 virtual void DisableNack() = 0;
284
285 // Returns a list of RTP sequence numbers corresponding to packets to be
286 // retransmitted, given an estimate of the round-trip time in milliseconds.
287 virtual std::vector<uint16_t> GetNackList(
288 int64_t round_trip_time_ms) const = 0;
minyue@webrtc.orgd7301772013-08-29 00:58:14 +0000289
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000290 protected:
291 NetEq() {}
292
293 private:
henrikg3c089d72015-09-16 05:37:44 -0700294 RTC_DISALLOW_COPY_AND_ASSIGN(NetEq);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000295};
296
297} // namespace webrtc
Henrik Kjellander74640892015-10-29 11:31:02 +0100298#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_