blob: f9f420af0ef778c63a589f12d7e96cac3d7be75b [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_coding/neteq/decision_logic.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000012
Henrik Lundin7687ad52018-07-02 10:14:46 +020013#include <assert.h>
Yves Gerey988cc082018-10-23 12:03:01 +020014#include <stdio.h>
15#include <string>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000016
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020017#include "modules/audio_coding/neteq/buffer_level_filter.h"
Henrik Lundin7687ad52018-07-02 10:14:46 +020018#include "modules/audio_coding/neteq/decoder_database.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020019#include "modules/audio_coding/neteq/delay_manager.h"
20#include "modules/audio_coding/neteq/expand.h"
21#include "modules/audio_coding/neteq/packet_buffer.h"
22#include "modules/audio_coding/neteq/sync_buffer.h"
Yves Gerey988cc082018-10-23 12:03:01 +020023#include "rtc_base/checks.h"
Minyue Li7f6417f2018-10-03 21:19:08 +020024#include "rtc_base/logging.h"
Yves Gerey988cc082018-10-23 12:03:01 +020025#include "rtc_base/numerics/safe_conversions.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000026
Minyue Li7f6417f2018-10-03 21:19:08 +020027namespace {
Minyue Li7f6417f2018-10-03 21:19:08 +020028
Jakob Ivarssond3a780b2019-02-28 14:30:21 +010029constexpr int kPostponeDecodingLevel = 50;
Minyue Li7f6417f2018-10-03 21:19:08 +020030
31} // namespace
32
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000033namespace webrtc {
34
35DecisionLogic* DecisionLogic::Create(int fs_hz,
Peter Kastingdce40cf2015-08-24 14:52:23 -070036 size_t output_size_samples,
Henrik Lundin7687ad52018-07-02 10:14:46 +020037 bool disallow_time_stretching,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000038 DecoderDatabase* decoder_database,
39 const PacketBuffer& packet_buffer,
40 DelayManager* delay_manager,
Henrik Lundin47b17dc2016-05-10 10:20:59 +020041 BufferLevelFilter* buffer_level_filter,
42 const TickTimer* tick_timer) {
Henrik Lundin7687ad52018-07-02 10:14:46 +020043 return new DecisionLogic(fs_hz, output_size_samples, disallow_time_stretching,
44 decoder_database, packet_buffer, delay_manager,
45 buffer_level_filter, tick_timer);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000046}
47
48DecisionLogic::DecisionLogic(int fs_hz,
Peter Kastingdce40cf2015-08-24 14:52:23 -070049 size_t output_size_samples,
Henrik Lundin7687ad52018-07-02 10:14:46 +020050 bool disallow_time_stretching,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000051 DecoderDatabase* decoder_database,
52 const PacketBuffer& packet_buffer,
53 DelayManager* delay_manager,
Henrik Lundin47b17dc2016-05-10 10:20:59 +020054 BufferLevelFilter* buffer_level_filter,
55 const TickTimer* tick_timer)
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000056 : decoder_database_(decoder_database),
57 packet_buffer_(packet_buffer),
58 delay_manager_(delay_manager),
59 buffer_level_filter_(buffer_level_filter),
Henrik Lundin47b17dc2016-05-10 10:20:59 +020060 tick_timer_(tick_timer),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000061 cng_state_(kCngOff),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000062 packet_length_samples_(0),
63 sample_memory_(0),
64 prev_time_scale_(false),
Henrik Lundin7687ad52018-07-02 10:14:46 +020065 disallow_time_stretching_(disallow_time_stretching),
Henrik Lundin47b17dc2016-05-10 10:20:59 +020066 timescale_countdown_(
67 tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)),
Jakob Ivarssond3a780b2019-02-28 14:30:21 +010068 num_consecutive_expands_(0) {
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000069 SetSampleRate(fs_hz, output_size_samples);
70}
71
Henrik Lundin47b17dc2016-05-10 10:20:59 +020072DecisionLogic::~DecisionLogic() = default;
73
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000074void DecisionLogic::Reset() {
75 cng_state_ = kCngOff;
henrik.lundinb1fb72b2016-05-03 08:18:47 -070076 noise_fast_forward_ = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000077 packet_length_samples_ = 0;
78 sample_memory_ = 0;
79 prev_time_scale_ = false;
Henrik Lundin47b17dc2016-05-10 10:20:59 +020080 timescale_countdown_.reset();
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000081 num_consecutive_expands_ = 0;
82}
83
84void DecisionLogic::SoftReset() {
85 packet_length_samples_ = 0;
86 sample_memory_ = 0;
87 prev_time_scale_ = false;
Henrik Lundin47b17dc2016-05-10 10:20:59 +020088 timescale_countdown_ =
89 tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000090}
91
Peter Kastingdce40cf2015-08-24 14:52:23 -070092void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) {
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000093 // TODO(hlundin): Change to an enumerator and skip assert.
Yves Gerey665174f2018-06-19 15:03:05 +020094 assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000095 fs_mult_ = fs_hz / 8000;
96 output_size_samples_ = output_size_samples;
97}
98
99Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
100 const Expand& expand,
Peter Kastingdce40cf2015-08-24 14:52:23 -0700101 size_t decoder_frame_length,
ossu7a377612016-10-18 04:06:13 -0700102 const Packet* next_packet,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000103 Modes prev_mode,
henrik.lundinb1fb72b2016-05-03 08:18:47 -0700104 bool play_dtmf,
105 size_t generated_noise_samples,
106 bool* reset_decoder) {
ossu61a208b2016-09-20 01:38:00 -0700107 // If last mode was CNG (or Expand, since this could be covering up for
108 // a lost CNG packet), remember that CNG is on. This is needed if comfort
109 // noise is interrupted by DTMF.
110 if (prev_mode == kModeRfc3389Cng) {
111 cng_state_ = kCngRfc3389On;
112 } else if (prev_mode == kModeCodecInternalCng) {
113 cng_state_ = kCngInternalOn;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000114 }
115
Jakob Ivarsson1b4254a2019-03-12 15:12:08 +0100116 // TODO(jakobi): Use buffer span instead of num samples.
Peter Kastingdce40cf2015-08-24 14:52:23 -0700117 const size_t cur_size_samples =
Jakob Ivarssona36c5912019-06-27 10:12:02 +0200118 packet_buffer_.NumSamplesInBuffer(decoder_frame_length);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000119
Yves Gerey665174f2018-06-19 15:03:05 +0200120 prev_time_scale_ =
121 prev_time_scale_ && (prev_mode == kModeAccelerateSuccess ||
122 prev_mode == kModeAccelerateLowEnergy ||
123 prev_mode == kModePreemptiveExpandSuccess ||
124 prev_mode == kModePreemptiveExpandLowEnergy);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000125
Minyue Li7d204d52019-04-16 11:44:49 +0200126 // Do not update buffer history if currently playing CNG since it will bias
127 // the filtered buffer level.
128 if ((prev_mode != kModeRfc3389Cng) && (prev_mode != kModeCodecInternalCng) &&
129 !(next_packet && next_packet->frame &&
130 next_packet->frame->IsDtxPacket())) {
131 FilterBufferLevel(cur_size_samples);
132 }
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000133
Henrik Lundin7687ad52018-07-02 10:14:46 +0200134 // Guard for errors, to avoid getting stuck in error mode.
135 if (prev_mode == kModeError) {
136 if (!next_packet) {
137 return kExpand;
138 } else {
139 return kUndefined; // Use kUndefined to flag for a reset.
140 }
141 }
142
143 uint32_t target_timestamp = sync_buffer.end_timestamp();
144 uint32_t available_timestamp = 0;
145 bool is_cng_packet = false;
146 if (next_packet) {
147 available_timestamp = next_packet->timestamp;
148 is_cng_packet =
149 decoder_database_->IsComfortNoise(next_packet->payload_type);
150 }
151
152 if (is_cng_packet) {
153 return CngOperation(prev_mode, target_timestamp, available_timestamp,
154 generated_noise_samples);
155 }
156
157 // Handle the case with no packet at all available (except maybe DTMF).
158 if (!next_packet) {
159 return NoPacket(play_dtmf);
160 }
161
162 // If the expand period was very long, reset NetEQ since it is likely that the
163 // sender was restarted.
164 if (num_consecutive_expands_ > kReinitAfterExpands) {
165 *reset_decoder = true;
166 return kNormal;
167 }
168
169 // Make sure we don't restart audio too soon after an expansion to avoid
170 // running out of data right away again. We should only wait if there are no
171 // DTX or CNG packets in the buffer (otherwise we should just play out what we
172 // have, since we cannot know the exact duration of DTX or CNG packets), and
173 // if the mute factor is low enough (otherwise the expansion was short enough
174 // to not be noticable).
175 // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
Jakob Ivarssona36c5912019-06-27 10:12:02 +0200176 size_t current_span = packet_buffer_.GetSpanSamples(decoder_frame_length);
Minyue Li7f6417f2018-10-03 21:19:08 +0200177 if ((prev_mode == kModeExpand || prev_mode == kModeCodecPlc) &&
178 expand.MuteFactor(0) < 16384 / 2 &&
Jakob Ivarsson1b4254a2019-03-12 15:12:08 +0100179 current_span < static_cast<size_t>(delay_manager_->TargetLevel() *
180 packet_length_samples_ *
181 kPostponeDecodingLevel / 100)>> 8 &&
Minyue Li7f6417f2018-10-03 21:19:08 +0200182 !packet_buffer_.ContainsDtxOrCngPacket(decoder_database_)) {
Henrik Lundin7687ad52018-07-02 10:14:46 +0200183 return kExpand;
184 }
185
186 const uint32_t five_seconds_samples =
187 static_cast<uint32_t>(5 * 8000 * fs_mult_);
188 // Check if the required packet is available.
189 if (target_timestamp == available_timestamp) {
190 return ExpectedPacketAvailable(prev_mode, play_dtmf);
191 } else if (!PacketBuffer::IsObsoleteTimestamp(
192 available_timestamp, target_timestamp, five_seconds_samples)) {
Jakob Ivarssona36c5912019-06-27 10:12:02 +0200193 return FuturePacketAvailable(decoder_frame_length, prev_mode,
194 target_timestamp, available_timestamp,
195 play_dtmf, generated_noise_samples);
Henrik Lundin7687ad52018-07-02 10:14:46 +0200196 } else {
197 // This implies that available_timestamp < target_timestamp, which can
198 // happen when a new stream or codec is received. Signal for a reset.
199 return kUndefined;
200 }
201}
202
Henrik Lundin5afa61c2018-07-02 14:53:24 +0200203void DecisionLogic::ExpandDecision(Operations operation) {
204 if (operation == kExpand) {
205 num_consecutive_expands_++;
206 } else {
207 num_consecutive_expands_ = 0;
208 }
209}
210
Minyue Li7d204d52019-04-16 11:44:49 +0200211void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples) {
212 buffer_level_filter_->SetTargetBufferLevel(
213 delay_manager_->base_target_level());
Henrik Lundin5afa61c2018-07-02 14:53:24 +0200214
Minyue Li7d204d52019-04-16 11:44:49 +0200215 int sample_memory_local = 0;
216 if (prev_time_scale_) {
217 sample_memory_local = sample_memory_;
218 timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval);
219 }
220
Jakob Ivarssona36c5912019-06-27 10:12:02 +0200221 buffer_level_filter_->Update(buffer_size_samples, sample_memory_local);
Minyue Li7d204d52019-04-16 11:44:49 +0200222 prev_time_scale_ = false;
Henrik Lundin5afa61c2018-07-02 14:53:24 +0200223}
224
Henrik Lundin7687ad52018-07-02 10:14:46 +0200225Operations DecisionLogic::CngOperation(Modes prev_mode,
226 uint32_t target_timestamp,
227 uint32_t available_timestamp,
228 size_t generated_noise_samples) {
229 // Signed difference between target and available timestamp.
230 int32_t timestamp_diff = static_cast<int32_t>(
231 static_cast<uint32_t>(generated_noise_samples + target_timestamp) -
232 available_timestamp);
233 int32_t optimal_level_samp = static_cast<int32_t>(
234 (delay_manager_->TargetLevel() * packet_length_samples_) >> 8);
235 const int64_t excess_waiting_time_samp =
236 -static_cast<int64_t>(timestamp_diff) - optimal_level_samp;
237
238 if (excess_waiting_time_samp > optimal_level_samp / 2) {
239 // The waiting time for this packet will be longer than 1.5
240 // times the wanted buffer delay. Apply fast-forward to cut the
241 // waiting time down to the optimal.
242 noise_fast_forward_ = rtc::dchecked_cast<size_t>(noise_fast_forward_ +
243 excess_waiting_time_samp);
244 timestamp_diff =
245 rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp);
246 }
247
248 if (timestamp_diff < 0 && prev_mode == kModeRfc3389Cng) {
249 // Not time to play this packet yet. Wait another round before using this
250 // packet. Keep on playing CNG from previous CNG parameters.
251 return kRfc3389CngNoPacket;
252 } else {
253 // Otherwise, go for the CNG packet now.
254 noise_fast_forward_ = 0;
255 return kRfc3389Cng;
256 }
257}
258
259Operations DecisionLogic::NoPacket(bool play_dtmf) {
260 if (cng_state_ == kCngRfc3389On) {
261 // Keep on playing comfort noise.
262 return kRfc3389CngNoPacket;
263 } else if (cng_state_ == kCngInternalOn) {
264 // Keep on playing codec internal comfort noise.
265 return kCodecInternalCng;
266 } else if (play_dtmf) {
267 return kDtmf;
268 } else {
269 // Nothing to play, do expand.
270 return kExpand;
271 }
272}
273
274Operations DecisionLogic::ExpectedPacketAvailable(Modes prev_mode,
275 bool play_dtmf) {
276 if (!disallow_time_stretching_ && prev_mode != kModeExpand && !play_dtmf) {
Jakob Ivarssona36c5912019-06-27 10:12:02 +0200277 // Check criterion for time-stretching. The values are in number of packets
278 // in Q8.
Henrik Lundin7687ad52018-07-02 10:14:46 +0200279 int low_limit, high_limit;
280 delay_manager_->BufferLimits(&low_limit, &high_limit);
Jakob Ivarssona36c5912019-06-27 10:12:02 +0200281 int buffer_level_packets = 0;
282 if (packet_length_samples_ > 0) {
283 buffer_level_packets =
284 ((1 << 8) * buffer_level_filter_->filtered_current_level()) /
285 packet_length_samples_;
286 }
287 if (buffer_level_packets >= high_limit << 2)
Henrik Lundin7687ad52018-07-02 10:14:46 +0200288 return kFastAccelerate;
289 if (TimescaleAllowed()) {
Jakob Ivarssona36c5912019-06-27 10:12:02 +0200290 if (buffer_level_packets >= high_limit)
Henrik Lundin7687ad52018-07-02 10:14:46 +0200291 return kAccelerate;
Jakob Ivarssona36c5912019-06-27 10:12:02 +0200292 if (buffer_level_packets < low_limit)
Henrik Lundin7687ad52018-07-02 10:14:46 +0200293 return kPreemptiveExpand;
294 }
295 }
296 return kNormal;
297}
298
299Operations DecisionLogic::FuturePacketAvailable(
Henrik Lundin7687ad52018-07-02 10:14:46 +0200300 size_t decoder_frame_length,
301 Modes prev_mode,
302 uint32_t target_timestamp,
303 uint32_t available_timestamp,
304 bool play_dtmf,
305 size_t generated_noise_samples) {
306 // Required packet is not available, but a future packet is.
307 // Check if we should continue with an ongoing expand because the new packet
308 // is too far into the future.
309 uint32_t timestamp_leap = available_timestamp - target_timestamp;
Henrik Lundin00eb12a2018-09-05 18:14:52 +0200310 if ((prev_mode == kModeExpand || prev_mode == kModeCodecPlc) &&
311 !ReinitAfterExpands(timestamp_leap) && !MaxWaitForPacket() &&
312 PacketTooEarly(timestamp_leap) && UnderTargetLevel()) {
Henrik Lundin7687ad52018-07-02 10:14:46 +0200313 if (play_dtmf) {
314 // Still have DTMF to play, so do not do expand.
315 return kDtmf;
316 } else {
317 // Nothing to play.
318 return kExpand;
319 }
320 }
321
Henrik Lundin00eb12a2018-09-05 18:14:52 +0200322 if (prev_mode == kModeCodecPlc) {
323 return kNormal;
324 }
325
Henrik Lundin7687ad52018-07-02 10:14:46 +0200326 const size_t cur_size_samples =
Jakob Ivarssona36c5912019-06-27 10:12:02 +0200327 packet_buffer_.NumPacketsInBuffer() * decoder_frame_length;
Henrik Lundin7687ad52018-07-02 10:14:46 +0200328
329 // If previous was comfort noise, then no merge is needed.
330 if (prev_mode == kModeRfc3389Cng || prev_mode == kModeCodecInternalCng) {
331 // Keep the same delay as before the CNG, but make sure that the number of
332 // samples in buffer is no higher than 4 times the optimal level. (Note that
333 // TargetLevel() is in Q8.)
334 if (static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
335 available_timestamp ||
336 cur_size_samples >
337 ((delay_manager_->TargetLevel() * packet_length_samples_) >> 8) *
338 4) {
339 // Time to play this new packet.
340 return kNormal;
341 } else {
342 // Too early to play this new packet; keep on playing comfort noise.
343 if (prev_mode == kModeRfc3389Cng) {
344 return kRfc3389CngNoPacket;
345 } else { // prevPlayMode == kModeCodecInternalCng.
346 return kCodecInternalCng;
347 }
348 }
349 }
350 // Do not merge unless we have done an expand before.
351 if (prev_mode == kModeExpand) {
352 return kMerge;
353 } else if (play_dtmf) {
354 // Play DTMF instead of expand.
355 return kDtmf;
356 } else {
357 return kExpand;
358 }
359}
360
361bool DecisionLogic::UnderTargetLevel() const {
Jakob Ivarssona36c5912019-06-27 10:12:02 +0200362 int buffer_level_packets = 0;
363 if (packet_length_samples_ > 0) {
364 buffer_level_packets =
365 ((1 << 8) * buffer_level_filter_->filtered_current_level()) /
366 packet_length_samples_;
367 }
368 return buffer_level_packets <= delay_manager_->TargetLevel();
Henrik Lundin7687ad52018-07-02 10:14:46 +0200369}
370
371bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const {
372 return timestamp_leap >=
373 static_cast<uint32_t>(output_size_samples_ * kReinitAfterExpands);
374}
375
376bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const {
377 return timestamp_leap >
378 static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_);
379}
380
381bool DecisionLogic::MaxWaitForPacket() const {
382 return num_consecutive_expands_ >= kMaxWaitForPacket;
383}
384
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000385} // namespace webrtc