blob: 5976410aac5565af88bd313f79b37f1c6019cb87 [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_coding/neteq/decision_logic.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000012
Henrik Lundin7687ad52018-07-02 10:14:46 +020013#include <assert.h>
Yves Gerey988cc082018-10-23 12:03:01 +020014#include <stdio.h>
15#include <string>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000016
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020017#include "modules/audio_coding/neteq/buffer_level_filter.h"
Henrik Lundin7687ad52018-07-02 10:14:46 +020018#include "modules/audio_coding/neteq/decoder_database.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020019#include "modules/audio_coding/neteq/delay_manager.h"
20#include "modules/audio_coding/neteq/expand.h"
21#include "modules/audio_coding/neteq/packet_buffer.h"
22#include "modules/audio_coding/neteq/sync_buffer.h"
Yves Gerey988cc082018-10-23 12:03:01 +020023#include "rtc_base/checks.h"
Minyue Li7f6417f2018-10-03 21:19:08 +020024#include "rtc_base/logging.h"
Yves Gerey988cc082018-10-23 12:03:01 +020025#include "rtc_base/numerics/safe_conversions.h"
Henrik Lundin7687ad52018-07-02 10:14:46 +020026#include "system_wrappers/include/field_trial.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000027
Minyue Li7f6417f2018-10-03 21:19:08 +020028namespace {
Minyue Li7f6417f2018-10-03 21:19:08 +020029
Jakob Ivarssond3a780b2019-02-28 14:30:21 +010030constexpr int kPostponeDecodingLevel = 50;
Minyue Li7f6417f2018-10-03 21:19:08 +020031
32} // namespace
33
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000034namespace webrtc {
35
36DecisionLogic* DecisionLogic::Create(int fs_hz,
Peter Kastingdce40cf2015-08-24 14:52:23 -070037 size_t output_size_samples,
Henrik Lundin7687ad52018-07-02 10:14:46 +020038 bool disallow_time_stretching,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000039 DecoderDatabase* decoder_database,
40 const PacketBuffer& packet_buffer,
41 DelayManager* delay_manager,
Henrik Lundin47b17dc2016-05-10 10:20:59 +020042 BufferLevelFilter* buffer_level_filter,
43 const TickTimer* tick_timer) {
Henrik Lundin7687ad52018-07-02 10:14:46 +020044 return new DecisionLogic(fs_hz, output_size_samples, disallow_time_stretching,
45 decoder_database, packet_buffer, delay_manager,
46 buffer_level_filter, tick_timer);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000047}
48
49DecisionLogic::DecisionLogic(int fs_hz,
Peter Kastingdce40cf2015-08-24 14:52:23 -070050 size_t output_size_samples,
Henrik Lundin7687ad52018-07-02 10:14:46 +020051 bool disallow_time_stretching,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000052 DecoderDatabase* decoder_database,
53 const PacketBuffer& packet_buffer,
54 DelayManager* delay_manager,
Henrik Lundin47b17dc2016-05-10 10:20:59 +020055 BufferLevelFilter* buffer_level_filter,
56 const TickTimer* tick_timer)
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000057 : decoder_database_(decoder_database),
58 packet_buffer_(packet_buffer),
59 delay_manager_(delay_manager),
60 buffer_level_filter_(buffer_level_filter),
Henrik Lundin47b17dc2016-05-10 10:20:59 +020061 tick_timer_(tick_timer),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000062 cng_state_(kCngOff),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000063 packet_length_samples_(0),
64 sample_memory_(0),
65 prev_time_scale_(false),
Henrik Lundin7687ad52018-07-02 10:14:46 +020066 disallow_time_stretching_(disallow_time_stretching),
Henrik Lundin47b17dc2016-05-10 10:20:59 +020067 timescale_countdown_(
68 tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)),
Jakob Ivarssond3a780b2019-02-28 14:30:21 +010069 num_consecutive_expands_(0) {
Henrik Lundin7687ad52018-07-02 10:14:46 +020070 delay_manager_->set_streaming_mode(false);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000071 SetSampleRate(fs_hz, output_size_samples);
72}
73
Henrik Lundin47b17dc2016-05-10 10:20:59 +020074DecisionLogic::~DecisionLogic() = default;
75
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000076void DecisionLogic::Reset() {
77 cng_state_ = kCngOff;
henrik.lundinb1fb72b2016-05-03 08:18:47 -070078 noise_fast_forward_ = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000079 packet_length_samples_ = 0;
80 sample_memory_ = 0;
81 prev_time_scale_ = false;
Henrik Lundin47b17dc2016-05-10 10:20:59 +020082 timescale_countdown_.reset();
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000083 num_consecutive_expands_ = 0;
84}
85
86void DecisionLogic::SoftReset() {
87 packet_length_samples_ = 0;
88 sample_memory_ = 0;
89 prev_time_scale_ = false;
Henrik Lundin47b17dc2016-05-10 10:20:59 +020090 timescale_countdown_ =
91 tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000092}
93
Peter Kastingdce40cf2015-08-24 14:52:23 -070094void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) {
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000095 // TODO(hlundin): Change to an enumerator and skip assert.
Yves Gerey665174f2018-06-19 15:03:05 +020096 assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000097 fs_mult_ = fs_hz / 8000;
98 output_size_samples_ = output_size_samples;
99}
100
101Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
102 const Expand& expand,
Peter Kastingdce40cf2015-08-24 14:52:23 -0700103 size_t decoder_frame_length,
ossu7a377612016-10-18 04:06:13 -0700104 const Packet* next_packet,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000105 Modes prev_mode,
henrik.lundinb1fb72b2016-05-03 08:18:47 -0700106 bool play_dtmf,
107 size_t generated_noise_samples,
108 bool* reset_decoder) {
ossu61a208b2016-09-20 01:38:00 -0700109 // If last mode was CNG (or Expand, since this could be covering up for
110 // a lost CNG packet), remember that CNG is on. This is needed if comfort
111 // noise is interrupted by DTMF.
112 if (prev_mode == kModeRfc3389Cng) {
113 cng_state_ = kCngRfc3389On;
114 } else if (prev_mode == kModeCodecInternalCng) {
115 cng_state_ = kCngInternalOn;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000116 }
117
Peter Kastingdce40cf2015-08-24 14:52:23 -0700118 const size_t samples_left =
119 sync_buffer.FutureLength() - expand.overlap_length();
120 const size_t cur_size_samples =
ossu61a208b2016-09-20 01:38:00 -0700121 samples_left + packet_buffer_.NumSamplesInBuffer(decoder_frame_length);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000122
Yves Gerey665174f2018-06-19 15:03:05 +0200123 prev_time_scale_ =
124 prev_time_scale_ && (prev_mode == kModeAccelerateSuccess ||
125 prev_mode == kModeAccelerateLowEnergy ||
126 prev_mode == kModePreemptiveExpandSuccess ||
127 prev_mode == kModePreemptiveExpandLowEnergy);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000128
129 FilterBufferLevel(cur_size_samples, prev_mode);
130
Henrik Lundin7687ad52018-07-02 10:14:46 +0200131 // Guard for errors, to avoid getting stuck in error mode.
132 if (prev_mode == kModeError) {
133 if (!next_packet) {
134 return kExpand;
135 } else {
136 return kUndefined; // Use kUndefined to flag for a reset.
137 }
138 }
139
140 uint32_t target_timestamp = sync_buffer.end_timestamp();
141 uint32_t available_timestamp = 0;
142 bool is_cng_packet = false;
143 if (next_packet) {
144 available_timestamp = next_packet->timestamp;
145 is_cng_packet =
146 decoder_database_->IsComfortNoise(next_packet->payload_type);
147 }
148
149 if (is_cng_packet) {
150 return CngOperation(prev_mode, target_timestamp, available_timestamp,
151 generated_noise_samples);
152 }
153
154 // Handle the case with no packet at all available (except maybe DTMF).
155 if (!next_packet) {
156 return NoPacket(play_dtmf);
157 }
158
159 // If the expand period was very long, reset NetEQ since it is likely that the
160 // sender was restarted.
161 if (num_consecutive_expands_ > kReinitAfterExpands) {
162 *reset_decoder = true;
163 return kNormal;
164 }
165
166 // Make sure we don't restart audio too soon after an expansion to avoid
167 // running out of data right away again. We should only wait if there are no
168 // DTX or CNG packets in the buffer (otherwise we should just play out what we
169 // have, since we cannot know the exact duration of DTX or CNG packets), and
170 // if the mute factor is low enough (otherwise the expansion was short enough
171 // to not be noticable).
172 // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
Minyue Li7f6417f2018-10-03 21:19:08 +0200173 if ((prev_mode == kModeExpand || prev_mode == kModeCodecPlc) &&
174 expand.MuteFactor(0) < 16384 / 2 &&
175 cur_size_samples < static_cast<size_t>(
176 delay_manager_->TargetLevel() * packet_length_samples_ *
Jakob Ivarssond3a780b2019-02-28 14:30:21 +0100177 kPostponeDecodingLevel / 100) >> 8 &&
Minyue Li7f6417f2018-10-03 21:19:08 +0200178 !packet_buffer_.ContainsDtxOrCngPacket(decoder_database_)) {
Henrik Lundin7687ad52018-07-02 10:14:46 +0200179 return kExpand;
180 }
181
182 const uint32_t five_seconds_samples =
183 static_cast<uint32_t>(5 * 8000 * fs_mult_);
184 // Check if the required packet is available.
185 if (target_timestamp == available_timestamp) {
186 return ExpectedPacketAvailable(prev_mode, play_dtmf);
187 } else if (!PacketBuffer::IsObsoleteTimestamp(
188 available_timestamp, target_timestamp, five_seconds_samples)) {
189 return FuturePacketAvailable(
190 sync_buffer, expand, decoder_frame_length, prev_mode, target_timestamp,
191 available_timestamp, play_dtmf, generated_noise_samples);
192 } else {
193 // This implies that available_timestamp < target_timestamp, which can
194 // happen when a new stream or codec is received. Signal for a reset.
195 return kUndefined;
196 }
197}
198
Henrik Lundin5afa61c2018-07-02 14:53:24 +0200199void DecisionLogic::ExpandDecision(Operations operation) {
200 if (operation == kExpand) {
201 num_consecutive_expands_++;
202 } else {
203 num_consecutive_expands_ = 0;
204 }
205}
206
207void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples,
208 Modes prev_mode) {
209 // Do not update buffer history if currently playing CNG since it will bias
210 // the filtered buffer level.
211 if ((prev_mode != kModeRfc3389Cng) && (prev_mode != kModeCodecInternalCng)) {
212 buffer_level_filter_->SetTargetBufferLevel(
213 delay_manager_->base_target_level());
214
215 size_t buffer_size_packets = 0;
216 if (packet_length_samples_ > 0) {
217 // Calculate size in packets.
218 buffer_size_packets = buffer_size_samples / packet_length_samples_;
219 }
220 int sample_memory_local = 0;
221 if (prev_time_scale_) {
222 sample_memory_local = sample_memory_;
223 timescale_countdown_ =
224 tick_timer_->GetNewCountdown(kMinTimescaleInterval);
225 }
226 buffer_level_filter_->Update(buffer_size_packets, sample_memory_local,
227 packet_length_samples_);
228 prev_time_scale_ = false;
229 }
230}
231
Henrik Lundin7687ad52018-07-02 10:14:46 +0200232Operations DecisionLogic::CngOperation(Modes prev_mode,
233 uint32_t target_timestamp,
234 uint32_t available_timestamp,
235 size_t generated_noise_samples) {
236 // Signed difference between target and available timestamp.
237 int32_t timestamp_diff = static_cast<int32_t>(
238 static_cast<uint32_t>(generated_noise_samples + target_timestamp) -
239 available_timestamp);
240 int32_t optimal_level_samp = static_cast<int32_t>(
241 (delay_manager_->TargetLevel() * packet_length_samples_) >> 8);
242 const int64_t excess_waiting_time_samp =
243 -static_cast<int64_t>(timestamp_diff) - optimal_level_samp;
244
245 if (excess_waiting_time_samp > optimal_level_samp / 2) {
246 // The waiting time for this packet will be longer than 1.5
247 // times the wanted buffer delay. Apply fast-forward to cut the
248 // waiting time down to the optimal.
249 noise_fast_forward_ = rtc::dchecked_cast<size_t>(noise_fast_forward_ +
250 excess_waiting_time_samp);
251 timestamp_diff =
252 rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp);
253 }
254
255 if (timestamp_diff < 0 && prev_mode == kModeRfc3389Cng) {
256 // Not time to play this packet yet. Wait another round before using this
257 // packet. Keep on playing CNG from previous CNG parameters.
258 return kRfc3389CngNoPacket;
259 } else {
260 // Otherwise, go for the CNG packet now.
261 noise_fast_forward_ = 0;
262 return kRfc3389Cng;
263 }
264}
265
266Operations DecisionLogic::NoPacket(bool play_dtmf) {
267 if (cng_state_ == kCngRfc3389On) {
268 // Keep on playing comfort noise.
269 return kRfc3389CngNoPacket;
270 } else if (cng_state_ == kCngInternalOn) {
271 // Keep on playing codec internal comfort noise.
272 return kCodecInternalCng;
273 } else if (play_dtmf) {
274 return kDtmf;
275 } else {
276 // Nothing to play, do expand.
277 return kExpand;
278 }
279}
280
281Operations DecisionLogic::ExpectedPacketAvailable(Modes prev_mode,
282 bool play_dtmf) {
283 if (!disallow_time_stretching_ && prev_mode != kModeExpand && !play_dtmf) {
284 // Check criterion for time-stretching.
285 int low_limit, high_limit;
286 delay_manager_->BufferLimits(&low_limit, &high_limit);
287 if (buffer_level_filter_->filtered_current_level() >= high_limit << 2)
288 return kFastAccelerate;
289 if (TimescaleAllowed()) {
290 if (buffer_level_filter_->filtered_current_level() >= high_limit)
291 return kAccelerate;
292 if (buffer_level_filter_->filtered_current_level() < low_limit)
293 return kPreemptiveExpand;
294 }
295 }
296 return kNormal;
297}
298
299Operations DecisionLogic::FuturePacketAvailable(
300 const SyncBuffer& sync_buffer,
301 const Expand& expand,
302 size_t decoder_frame_length,
303 Modes prev_mode,
304 uint32_t target_timestamp,
305 uint32_t available_timestamp,
306 bool play_dtmf,
307 size_t generated_noise_samples) {
308 // Required packet is not available, but a future packet is.
309 // Check if we should continue with an ongoing expand because the new packet
310 // is too far into the future.
311 uint32_t timestamp_leap = available_timestamp - target_timestamp;
Henrik Lundin00eb12a2018-09-05 18:14:52 +0200312 if ((prev_mode == kModeExpand || prev_mode == kModeCodecPlc) &&
313 !ReinitAfterExpands(timestamp_leap) && !MaxWaitForPacket() &&
314 PacketTooEarly(timestamp_leap) && UnderTargetLevel()) {
Henrik Lundin7687ad52018-07-02 10:14:46 +0200315 if (play_dtmf) {
316 // Still have DTMF to play, so do not do expand.
317 return kDtmf;
318 } else {
319 // Nothing to play.
320 return kExpand;
321 }
322 }
323
Henrik Lundin00eb12a2018-09-05 18:14:52 +0200324 if (prev_mode == kModeCodecPlc) {
325 return kNormal;
326 }
327
Henrik Lundin7687ad52018-07-02 10:14:46 +0200328 const size_t samples_left =
329 sync_buffer.FutureLength() - expand.overlap_length();
330 const size_t cur_size_samples =
331 samples_left + packet_buffer_.NumPacketsInBuffer() * decoder_frame_length;
332
333 // If previous was comfort noise, then no merge is needed.
334 if (prev_mode == kModeRfc3389Cng || prev_mode == kModeCodecInternalCng) {
335 // Keep the same delay as before the CNG, but make sure that the number of
336 // samples in buffer is no higher than 4 times the optimal level. (Note that
337 // TargetLevel() is in Q8.)
338 if (static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
339 available_timestamp ||
340 cur_size_samples >
341 ((delay_manager_->TargetLevel() * packet_length_samples_) >> 8) *
342 4) {
343 // Time to play this new packet.
344 return kNormal;
345 } else {
346 // Too early to play this new packet; keep on playing comfort noise.
347 if (prev_mode == kModeRfc3389Cng) {
348 return kRfc3389CngNoPacket;
349 } else { // prevPlayMode == kModeCodecInternalCng.
350 return kCodecInternalCng;
351 }
352 }
353 }
354 // Do not merge unless we have done an expand before.
355 if (prev_mode == kModeExpand) {
356 return kMerge;
357 } else if (play_dtmf) {
358 // Play DTMF instead of expand.
359 return kDtmf;
360 } else {
361 return kExpand;
362 }
363}
364
365bool DecisionLogic::UnderTargetLevel() const {
366 return buffer_level_filter_->filtered_current_level() <=
367 delay_manager_->TargetLevel();
368}
369
370bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const {
371 return timestamp_leap >=
372 static_cast<uint32_t>(output_size_samples_ * kReinitAfterExpands);
373}
374
375bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const {
376 return timestamp_leap >
377 static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_);
378}
379
380bool DecisionLogic::MaxWaitForPacket() const {
381 return num_consecutive_expands_ >= kMaxWaitForPacket;
382}
383
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000384} // namespace webrtc