blob: ea7079e369589a1b23dac0c500cd7c8b318a1387 [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Ivo Creusen3ce44a32019-10-31 14:38:11 +010011#ifndef API_NETEQ_NETEQ_H_
12#define API_NETEQ_NETEQ_H_
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000013
Ivo Creusen3ce44a32019-10-31 14:38:11 +010014#include <stddef.h> // Provide access to size_t.
pbos@webrtc.org12dc1a32013-08-05 16:22:53 +000015
Niels Möller72899062019-01-11 09:36:13 +010016#include <map>
Henrik Lundin905495c2015-05-25 16:58:41 +020017#include <string>
henrik.lundin114c1b32017-04-26 07:47:32 -070018#include <vector>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000019
Danil Chapovalovb6021232018-06-19 13:26:36 +020020#include "absl/types/optional.h"
Karl Wiberg08126342018-03-20 19:18:55 +010021#include "api/audio_codecs/audio_codec_pair_id.h"
Karl Wiberg31fbb542017-10-16 12:42:38 +020022#include "api/audio_codecs/audio_decoder.h"
Niels Möller72899062019-01-11 09:36:13 +010023#include "api/audio_codecs/audio_format.h"
Patrik Höglund3e113432017-12-15 14:40:10 +010024#include "api/rtp_headers.h"
Mirko Bonadeid9708072019-01-25 20:26:48 +010025#include "api/scoped_refptr.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000026
27namespace webrtc {
28
29// Forward declarations.
henrik.lundin6d8e0112016-03-04 10:34:21 -080030class AudioFrame;
ossue3525782016-05-25 07:37:43 -070031class AudioDecoderFactory;
Alessio Bazzica8f319a32019-07-24 16:47:02 +000032class Clock;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000033
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000034struct NetEqNetworkStatistics {
Yves Gerey665174f2018-06-19 15:03:05 +020035 uint16_t current_buffer_size_ms; // Current jitter buffer size in ms.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000036 uint16_t preferred_buffer_size_ms; // Target buffer size in ms.
Yves Gerey665174f2018-06-19 15:03:05 +020037 uint16_t jitter_peaks_found; // 1 if adding extra delay due to peaky
38 // jitter; 0 otherwise.
Yves Gerey665174f2018-06-19 15:03:05 +020039 uint16_t expand_rate; // Fraction (of original stream) of synthesized
40 // audio inserted through expansion (in Q14).
minyue@webrtc.org7d721ee2015-02-18 10:01:53 +000041 uint16_t speech_expand_rate; // Fraction (of original stream) of synthesized
42 // speech inserted through expansion (in Q14).
Yves Gerey665174f2018-06-19 15:03:05 +020043 uint16_t preemptive_rate; // Fraction of data inserted through pre-emptive
44 // expansion (in Q14).
45 uint16_t accelerate_rate; // Fraction of data removed through acceleration
46 // (in Q14).
47 uint16_t secondary_decoded_rate; // Fraction of data coming from FEC/RED
48 // decoding (in Q14).
minyue-webrtc0c3ca752017-08-23 15:59:38 +020049 uint16_t secondary_discarded_rate; // Fraction of discarded FEC/RED data (in
50 // Q14).
Henrik Lundin1bb8cf82015-08-25 13:08:04 +020051 // Statistics for packet waiting times, i.e., the time between a packet
52 // arrives until it is decoded.
53 int mean_waiting_time_ms;
54 int median_waiting_time_ms;
55 int min_waiting_time_ms;
56 int max_waiting_time_ms;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000057};
58
Steve Anton2dbc69f2017-08-24 17:15:13 -070059// NetEq statistics that persist over the lifetime of the class.
60// These metrics are never reset.
61struct NetEqLifetimeStatistics {
Gustaf Ullberg9a2e9062017-09-18 09:28:20 +020062 // Stats below correspond to similarly-named fields in the WebRTC stats spec.
63 // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats
Steve Anton2dbc69f2017-08-24 17:15:13 -070064 uint64_t total_samples_received = 0;
Steve Anton2dbc69f2017-08-24 17:15:13 -070065 uint64_t concealed_samples = 0;
Gustaf Ullberg9a2e9062017-09-18 09:28:20 +020066 uint64_t concealment_events = 0;
Gustaf Ullbergb0a02072017-10-02 12:00:34 +020067 uint64_t jitter_buffer_delay_ms = 0;
Chen Xing0acffb52019-01-15 15:46:29 +010068 uint64_t jitter_buffer_emitted_count = 0;
Artem Titove618cc92020-03-11 11:18:54 +010069 uint64_t jitter_buffer_target_delay_ms = 0;
Ivo Creusenbf4a2212019-04-24 14:06:24 +020070 uint64_t inserted_samples_for_deceleration = 0;
71 uint64_t removed_samples_for_acceleration = 0;
72 uint64_t silent_concealed_samples = 0;
73 uint64_t fec_packets_received = 0;
74 uint64_t fec_packets_discarded = 0;
Jakob Ivarsson44507082019-03-05 16:59:03 +010075 // Below stats are not part of the spec.
Jakob Ivarsson352ce5c2018-11-27 12:52:16 +010076 uint64_t delayed_packet_outage_samples = 0;
Jakob Ivarsson44507082019-03-05 16:59:03 +010077 // This is sum of relative packet arrival delays of received packets so far.
78 // Since end-to-end delay of a packet is difficult to measure and is not
79 // necessarily useful for measuring jitter buffer performance, we report a
80 // relative packet arrival delay. The relative packet arrival delay of a
81 // packet is defined as the arrival delay compared to the first packet
82 // received, given that it had zero delay. To avoid clock drift, the "first"
83 // packet can be made dynamic.
84 uint64_t relative_packet_arrival_delay_ms = 0;
85 uint64_t jitter_buffer_packets_received = 0;
Henrik Lundin2a8bd092019-04-26 09:47:07 +020086 // An interruption is a loss-concealment event lasting at least 150 ms. The
87 // two stats below count the number os such events and the total duration of
88 // these events.
Henrik Lundin44125fa2019-04-29 17:00:46 +020089 int32_t interruption_count = 0;
90 int32_t total_interruption_duration_ms = 0;
Steve Anton2dbc69f2017-08-24 17:15:13 -070091};
92
Ivo Creusend1c2f782018-09-13 14:39:55 +020093// Metrics that describe the operations performed in NetEq, and the internal
94// state.
95struct NetEqOperationsAndState {
96 // These sample counters are cumulative, and don't reset. As a reference, the
97 // total number of output samples can be found in
98 // NetEqLifetimeStatistics::total_samples_received.
99 uint64_t preemptive_samples = 0;
100 uint64_t accelerate_samples = 0;
Ivo Creusendc6d5532018-09-27 11:43:42 +0200101 // Count of the number of buffer flushes.
102 uint64_t packet_buffer_flushes = 0;
Ivo Creusen2db46b02018-12-14 16:49:12 +0100103 // The number of primary packets that were discarded.
104 uint64_t discarded_primary_packets = 0;
Ivo Creusend1c2f782018-09-13 14:39:55 +0200105 // The statistics below are not cumulative.
106 // The waiting time of the last decoded packet.
107 uint64_t last_waiting_time_ms = 0;
108 // The sum of the packet and jitter buffer size in ms.
109 uint64_t current_buffer_size_ms = 0;
Ivo Creusendc6d5532018-09-27 11:43:42 +0200110 // The current frame size in ms.
111 uint64_t current_frame_size_ms = 0;
112 // Flag to indicate that the next packet is available.
113 bool next_packet_available = false;
Ivo Creusend1c2f782018-09-13 14:39:55 +0200114};
115
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000116// This is the interface class for NetEq.
117class NetEq {
118 public:
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +0000119 struct Config {
Karl Wiberg08126342018-03-20 19:18:55 +0100120 Config();
121 Config(const Config&);
122 Config(Config&&);
123 ~Config();
124 Config& operator=(const Config&);
125 Config& operator=(Config&&);
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +0000126
Henrik Lundin905495c2015-05-25 16:58:41 +0200127 std::string ToString() const;
128
Karl Wiberg08126342018-03-20 19:18:55 +0100129 int sample_rate_hz = 16000; // Initial value. Will change with input data.
130 bool enable_post_decode_vad = false;
Jakob Ivarsson647d5e62019-03-15 10:37:31 +0100131 size_t max_packets_in_buffer = 200;
Ruslan Burakovb35bacc2019-02-20 13:41:59 +0100132 int max_delay_ms = 0;
Jakob Ivarsson10403ae2018-11-27 15:45:20 +0100133 int min_delay_ms = 0;
Karl Wiberg08126342018-03-20 19:18:55 +0100134 bool enable_fast_accelerate = false;
henrik.lundin7a926812016-05-12 13:51:28 -0700135 bool enable_muted_state = false;
Jakob Ivarsson39b934b2019-01-10 10:28:23 +0100136 bool enable_rtx_handling = false;
Danil Chapovalovb6021232018-06-19 13:26:36 +0200137 absl::optional<AudioCodecPairId> codec_pair_id;
Henrik Lundin7687ad52018-07-02 10:14:46 +0200138 bool for_test_no_time_stretching = false; // Use only for testing.
Henrik Lundinc49e9c22020-05-25 11:26:15 +0200139 // Adds extra delay to the output of NetEq, without affecting jitter or
140 // loss behavior. This is mainly for testing. Value must be a non-negative
141 // multiple of 10 ms.
142 int extra_output_delay_ms = 0;
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +0000143 };
144
Niels Möllerd941c092018-08-27 12:44:08 +0200145 enum ReturnCodes { kOK = 0, kFail = -1 };
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000146
Ivo Creusen3ce44a32019-10-31 14:38:11 +0100147 enum class Operation {
148 kNormal,
149 kMerge,
150 kExpand,
151 kAccelerate,
152 kFastAccelerate,
153 kPreemptiveExpand,
154 kRfc3389Cng,
155 kRfc3389CngNoPacket,
156 kCodecInternalCng,
157 kDtmf,
158 kUndefined,
159 };
160
161 enum class Mode {
162 kNormal,
163 kExpand,
164 kMerge,
165 kAccelerateSuccess,
166 kAccelerateLowEnergy,
167 kAccelerateFail,
168 kPreemptiveExpandSuccess,
169 kPreemptiveExpandLowEnergy,
170 kPreemptiveExpandFail,
171 kRfc3389Cng,
172 kCodecInternalCng,
173 kCodecPlc,
174 kDtmf,
175 kError,
176 kUndefined,
177 };
178
Karl Wiberg4b644112019-10-11 09:37:42 +0200179 // Return type for GetDecoderFormat.
180 struct DecoderFormat {
181 int sample_rate_hz;
182 int num_channels;
183 SdpAudioFormat sdp_format;
184 };
185
henrik.lundin@webrtc.org35ead382014-04-14 18:49:17 +0000186 // Creates a new NetEq object, with parameters set in |config|. The |config|
187 // object will only have to be valid for the duration of the call to this
188 // method.
ossue3525782016-05-25 07:37:43 -0700189 static NetEq* Create(
190 const NetEq::Config& config,
Alessio Bazzica8f319a32019-07-24 16:47:02 +0000191 Clock* clock,
ossue3525782016-05-25 07:37:43 -0700192 const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000193
194 virtual ~NetEq() {}
195
Karl Wiberg45eb1352019-10-10 14:23:00 +0200196 // Inserts a new packet into NetEq.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000197 // Returns 0 on success, -1 on failure.
Henrik Lundin70c09bd2017-04-24 15:56:56 +0200198 virtual int InsertPacket(const RTPHeader& rtp_header,
Karl Wiberg45eb1352019-10-10 14:23:00 +0200199 rtc::ArrayView<const uint8_t> payload) = 0;
200
henrik.lundinb8c55b12017-05-10 07:38:01 -0700201 // Lets NetEq know that a packet arrived with an empty payload. This typically
202 // happens when empty packets are used for probing the network channel, and
203 // these packets use RTP sequence numbers from the same series as the actual
204 // audio packets.
205 virtual void InsertEmptyPacket(const RTPHeader& rtp_header) = 0;
206
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000207 // Instructs NetEq to deliver 10 ms of audio data. The data is written to
henrik.lundin7dc68892016-04-06 01:03:02 -0700208 // |audio_frame|. All data in |audio_frame| is wiped; |data_|, |speech_type_|,
209 // |num_channels_|, |sample_rate_hz_|, |samples_per_channel_|, and
henrik.lundin55480f52016-03-08 02:37:57 -0800210 // |vad_activity_| are updated upon success. If an error is returned, some
henrik.lundin5fac3f02016-08-24 11:18:49 -0700211 // fields may not have been updated, or may contain inconsistent values.
henrik.lundin7a926812016-05-12 13:51:28 -0700212 // If muted state is enabled (through Config::enable_muted_state), |muted|
213 // may be set to true after a prolonged expand period. When this happens, the
214 // |data_| in |audio_frame| is not written, but should be interpreted as being
Ivo Creusen55de08e2018-09-03 11:49:27 +0200215 // all zeros. For testing purposes, an override can be supplied in the
216 // |action_override| argument, which will cause NetEq to take this action
Tommi3cc68ec2021-06-09 19:30:41 +0200217 // next, instead of the action it would normally choose. An optional output
218 // argument for fetching the current sample rate can be provided, which
219 // will return the same value as last_output_sample_rate_hz() but will avoid
220 // additional synchronization.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000221 // Returns kOK on success, or kFail in case of an error.
Ivo Creusen55de08e2018-09-03 11:49:27 +0200222 virtual int GetAudio(
223 AudioFrame* audio_frame,
224 bool* muted,
Tommi3cc68ec2021-06-09 19:30:41 +0200225 int* current_sample_rate_hz = nullptr,
Ivo Creusen3ce44a32019-10-31 14:38:11 +0100226 absl::optional<Operation> action_override = absl::nullopt) = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000227
kwiberg1c07c702017-03-27 07:15:49 -0700228 // Replaces the current set of decoders with the given one.
229 virtual void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) = 0;
230
kwiberg5adaf732016-10-04 09:33:27 -0700231 // Associates |rtp_payload_type| with the given codec, which NetEq will
232 // instantiate when it needs it. Returns true iff successful.
233 virtual bool RegisterPayloadType(int rtp_payload_type,
234 const SdpAudioFormat& audio_format) = 0;
235
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000236 // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
Henrik Lundinc417d9e2017-06-14 12:29:03 +0200237 // -1 on failure. Removing a payload type that is not registered is ok and
238 // will not result in an error.
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000239 virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
240
kwiberg6b19b562016-09-20 04:02:25 -0700241 // Removes all payload types from the codec database.
242 virtual void RemoveAllPayloadTypes() = 0;
243
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000244 // Sets a minimum delay in millisecond for packet buffer. The minimum is
245 // maintained unless a higher latency is dictated by channel condition.
246 // Returns true if the minimum is successfully applied, otherwise false is
247 // returned.
248 virtual bool SetMinimumDelay(int delay_ms) = 0;
249
250 // Sets a maximum delay in milliseconds for packet buffer. The latency will
251 // not exceed the given value, even required delay (given the channel
henrik.lundin@webrtc.org116ed1d2014-04-28 08:20:04 +0000252 // conditions) is higher. Calling this method has the same effect as setting
253 // the |max_delay_ms| value in the NetEq::Config struct.
turaj@webrtc.orgf1efc572013-08-16 23:44:24 +0000254 virtual bool SetMaximumDelay(int delay_ms) = 0;
255
Ruslan Burakov9bee67c2019-02-05 13:49:26 +0100256 // Sets a base minimum delay in milliseconds for packet buffer. The minimum
257 // delay which is set via |SetMinimumDelay| can't be lower than base minimum
258 // delay. Calling this method is similar to setting the |min_delay_ms| value
259 // in the NetEq::Config struct. Returns true if the base minimum is
260 // successfully applied, otherwise false is returned.
261 virtual bool SetBaseMinimumDelayMs(int delay_ms) = 0;
262
263 // Returns current value of base minimum delay in milliseconds.
264 virtual int GetBaseMinimumDelayMs() const = 0;
265
henrik.lundin114c1b32017-04-26 07:47:32 -0700266 // Returns the current target delay in ms. This includes any extra delay
267 // requested through SetMinimumDelay.
Henrik Lundinabbff892017-11-29 09:14:04 +0100268 virtual int TargetDelayMs() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000269
henrik.lundinb3f1c5d2016-08-22 15:39:53 -0700270 // Returns the current total delay (packet buffer and sync buffer) in ms,
271 // with smoothing applied to even out short-time fluctuations due to jitter.
272 // The packet buffer part of the delay is not updated during DTX/CNG periods.
273 virtual int FilteredCurrentDelayMs() const = 0;
274
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000275 // Writes the current network statistics to |stats|. The statistics are reset
276 // after the call.
277 virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
278
Niels Möller6b4d9622020-09-14 10:47:50 +0200279 // Current values only, not resetting any state.
280 virtual NetEqNetworkStatistics CurrentNetworkStatistics() const = 0;
281
Steve Anton2dbc69f2017-08-24 17:15:13 -0700282 // Returns a copy of this class's lifetime statistics. These statistics are
283 // never reset.
284 virtual NetEqLifetimeStatistics GetLifetimeStatistics() const = 0;
285
Ivo Creusend1c2f782018-09-13 14:39:55 +0200286 // Returns statistics about the performed operations and internal state. These
287 // statistics are never reset.
288 virtual NetEqOperationsAndState GetOperationsAndState() const = 0;
289
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000290 // Enables post-decode VAD. When enabled, GetAudio() will return
291 // kOutputVADPassive when the signal contains no speech.
292 virtual void EnableVad() = 0;
293
294 // Disables post-decode VAD.
295 virtual void DisableVad() = 0;
296
henrik.lundin9a410dd2016-04-06 01:39:22 -0700297 // Returns the RTP timestamp for the last sample delivered by GetAudio().
298 // The return value will be empty if no valid timestamp is available.
Danil Chapovalovb6021232018-06-19 13:26:36 +0200299 virtual absl::optional<uint32_t> GetPlayoutTimestamp() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000300
henrik.lundind89814b2015-11-23 06:49:25 -0800301 // Returns the sample rate in Hz of the audio produced in the last GetAudio
302 // call. If GetAudio has not been called yet, the configured sample rate
303 // (Config::sample_rate_hz) is returned.
304 virtual int last_output_sample_rate_hz() const = 0;
305
Fredrik Solenbergf693bfa2018-12-11 12:22:10 +0100306 // Returns the decoder info for the given payload type. Returns empty if no
ossuf1b08da2016-09-23 02:19:43 -0700307 // such payload type was registered.
Karl Wiberg4b644112019-10-11 09:37:42 +0200308 virtual absl::optional<DecoderFormat> GetDecoderFormat(
ossuf1b08da2016-09-23 02:19:43 -0700309 int payload_type) const = 0;
kwibergc4ccd4d2016-09-21 10:55:15 -0700310
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000311 // Flushes both the packet buffer and the sync buffer.
312 virtual void FlushBuffers() = 0;
313
henrik.lundin48ed9302015-10-29 05:36:24 -0700314 // Enables NACK and sets the maximum size of the NACK list, which should be
315 // positive and no larger than Nack::kNackListSizeLimit. If NACK is already
316 // enabled then the maximum NACK list size is modified accordingly.
317 virtual void EnableNack(size_t max_nack_list_size) = 0;
318
319 virtual void DisableNack() = 0;
320
321 // Returns a list of RTP sequence numbers corresponding to packets to be
322 // retransmitted, given an estimate of the round-trip time in milliseconds.
323 virtual std::vector<uint16_t> GetNackList(
324 int64_t round_trip_time_ms) const = 0;
minyue@webrtc.orgd7301772013-08-29 00:58:14 +0000325
henrik.lundin114c1b32017-04-26 07:47:32 -0700326 // Returns a vector containing the timestamps of the packets that were decoded
327 // in the last GetAudio call. If no packets were decoded in the last call, the
328 // vector is empty.
329 // Mainly intended for testing.
330 virtual std::vector<uint32_t> LastDecodedTimestamps() const = 0;
331
332 // Returns the length of the audio yet to play in the sync buffer.
333 // Mainly intended for testing.
334 virtual int SyncBufferSizeMs() const = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000335};
336
337} // namespace webrtc
Ivo Creusen3ce44a32019-10-31 14:38:11 +0100338#endif // API_NETEQ_NETEQ_H_