blob: 91e828792ed4dbdcba981818a836fb3f05574454 [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_coding/neteq/decision_logic.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000012
Henrik Lundin7687ad52018-07-02 10:14:46 +020013#include <assert.h>
Yves Gerey988cc082018-10-23 12:03:01 +020014#include <stdio.h>
15#include <string>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000016
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020017#include "modules/audio_coding/neteq/buffer_level_filter.h"
Henrik Lundin7687ad52018-07-02 10:14:46 +020018#include "modules/audio_coding/neteq/decoder_database.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020019#include "modules/audio_coding/neteq/delay_manager.h"
20#include "modules/audio_coding/neteq/expand.h"
21#include "modules/audio_coding/neteq/packet_buffer.h"
22#include "modules/audio_coding/neteq/sync_buffer.h"
Yves Gerey988cc082018-10-23 12:03:01 +020023#include "rtc_base/checks.h"
Minyue Li7f6417f2018-10-03 21:19:08 +020024#include "rtc_base/logging.h"
Yves Gerey988cc082018-10-23 12:03:01 +020025#include "rtc_base/numerics/safe_conversions.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000026
Minyue Li7f6417f2018-10-03 21:19:08 +020027namespace {
Minyue Li7f6417f2018-10-03 21:19:08 +020028
Jakob Ivarssond3a780b2019-02-28 14:30:21 +010029constexpr int kPostponeDecodingLevel = 50;
Minyue Li7f6417f2018-10-03 21:19:08 +020030
31} // namespace
32
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000033namespace webrtc {
34
35DecisionLogic* DecisionLogic::Create(int fs_hz,
Peter Kastingdce40cf2015-08-24 14:52:23 -070036 size_t output_size_samples,
Henrik Lundin7687ad52018-07-02 10:14:46 +020037 bool disallow_time_stretching,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000038 DecoderDatabase* decoder_database,
39 const PacketBuffer& packet_buffer,
40 DelayManager* delay_manager,
Henrik Lundin47b17dc2016-05-10 10:20:59 +020041 BufferLevelFilter* buffer_level_filter,
42 const TickTimer* tick_timer) {
Henrik Lundin7687ad52018-07-02 10:14:46 +020043 return new DecisionLogic(fs_hz, output_size_samples, disallow_time_stretching,
44 decoder_database, packet_buffer, delay_manager,
45 buffer_level_filter, tick_timer);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000046}
47
48DecisionLogic::DecisionLogic(int fs_hz,
Peter Kastingdce40cf2015-08-24 14:52:23 -070049 size_t output_size_samples,
Henrik Lundin7687ad52018-07-02 10:14:46 +020050 bool disallow_time_stretching,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000051 DecoderDatabase* decoder_database,
52 const PacketBuffer& packet_buffer,
53 DelayManager* delay_manager,
Henrik Lundin47b17dc2016-05-10 10:20:59 +020054 BufferLevelFilter* buffer_level_filter,
55 const TickTimer* tick_timer)
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000056 : decoder_database_(decoder_database),
57 packet_buffer_(packet_buffer),
58 delay_manager_(delay_manager),
59 buffer_level_filter_(buffer_level_filter),
Henrik Lundin47b17dc2016-05-10 10:20:59 +020060 tick_timer_(tick_timer),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000061 cng_state_(kCngOff),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000062 packet_length_samples_(0),
63 sample_memory_(0),
64 prev_time_scale_(false),
Henrik Lundin7687ad52018-07-02 10:14:46 +020065 disallow_time_stretching_(disallow_time_stretching),
Henrik Lundin47b17dc2016-05-10 10:20:59 +020066 timescale_countdown_(
67 tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)),
Jakob Ivarssond3a780b2019-02-28 14:30:21 +010068 num_consecutive_expands_(0) {
Henrik Lundin7687ad52018-07-02 10:14:46 +020069 delay_manager_->set_streaming_mode(false);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000070 SetSampleRate(fs_hz, output_size_samples);
71}
72
Henrik Lundin47b17dc2016-05-10 10:20:59 +020073DecisionLogic::~DecisionLogic() = default;
74
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000075void DecisionLogic::Reset() {
76 cng_state_ = kCngOff;
henrik.lundinb1fb72b2016-05-03 08:18:47 -070077 noise_fast_forward_ = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000078 packet_length_samples_ = 0;
79 sample_memory_ = 0;
80 prev_time_scale_ = false;
Henrik Lundin47b17dc2016-05-10 10:20:59 +020081 timescale_countdown_.reset();
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000082 num_consecutive_expands_ = 0;
83}
84
85void DecisionLogic::SoftReset() {
86 packet_length_samples_ = 0;
87 sample_memory_ = 0;
88 prev_time_scale_ = false;
Henrik Lundin47b17dc2016-05-10 10:20:59 +020089 timescale_countdown_ =
90 tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000091}
92
Peter Kastingdce40cf2015-08-24 14:52:23 -070093void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) {
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000094 // TODO(hlundin): Change to an enumerator and skip assert.
Yves Gerey665174f2018-06-19 15:03:05 +020095 assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000096 fs_mult_ = fs_hz / 8000;
97 output_size_samples_ = output_size_samples;
98}
99
100Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
101 const Expand& expand,
Peter Kastingdce40cf2015-08-24 14:52:23 -0700102 size_t decoder_frame_length,
ossu7a377612016-10-18 04:06:13 -0700103 const Packet* next_packet,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000104 Modes prev_mode,
henrik.lundinb1fb72b2016-05-03 08:18:47 -0700105 bool play_dtmf,
106 size_t generated_noise_samples,
107 bool* reset_decoder) {
ossu61a208b2016-09-20 01:38:00 -0700108 // If last mode was CNG (or Expand, since this could be covering up for
109 // a lost CNG packet), remember that CNG is on. This is needed if comfort
110 // noise is interrupted by DTMF.
111 if (prev_mode == kModeRfc3389Cng) {
112 cng_state_ = kCngRfc3389On;
113 } else if (prev_mode == kModeCodecInternalCng) {
114 cng_state_ = kCngInternalOn;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000115 }
116
Peter Kastingdce40cf2015-08-24 14:52:23 -0700117 const size_t samples_left =
118 sync_buffer.FutureLength() - expand.overlap_length();
Jakob Ivarsson1b4254a2019-03-12 15:12:08 +0100119 // TODO(jakobi): Use buffer span instead of num samples.
Peter Kastingdce40cf2015-08-24 14:52:23 -0700120 const size_t cur_size_samples =
ossu61a208b2016-09-20 01:38:00 -0700121 samples_left + packet_buffer_.NumSamplesInBuffer(decoder_frame_length);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000122
Yves Gerey665174f2018-06-19 15:03:05 +0200123 prev_time_scale_ =
124 prev_time_scale_ && (prev_mode == kModeAccelerateSuccess ||
125 prev_mode == kModeAccelerateLowEnergy ||
126 prev_mode == kModePreemptiveExpandSuccess ||
127 prev_mode == kModePreemptiveExpandLowEnergy);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000128
Minyue Li7d204d52019-04-16 11:44:49 +0200129 // Do not update buffer history if currently playing CNG since it will bias
130 // the filtered buffer level.
131 if ((prev_mode != kModeRfc3389Cng) && (prev_mode != kModeCodecInternalCng) &&
132 !(next_packet && next_packet->frame &&
133 next_packet->frame->IsDtxPacket())) {
134 FilterBufferLevel(cur_size_samples);
135 }
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000136
Henrik Lundin7687ad52018-07-02 10:14:46 +0200137 // Guard for errors, to avoid getting stuck in error mode.
138 if (prev_mode == kModeError) {
139 if (!next_packet) {
140 return kExpand;
141 } else {
142 return kUndefined; // Use kUndefined to flag for a reset.
143 }
144 }
145
146 uint32_t target_timestamp = sync_buffer.end_timestamp();
147 uint32_t available_timestamp = 0;
148 bool is_cng_packet = false;
149 if (next_packet) {
150 available_timestamp = next_packet->timestamp;
151 is_cng_packet =
152 decoder_database_->IsComfortNoise(next_packet->payload_type);
153 }
154
155 if (is_cng_packet) {
156 return CngOperation(prev_mode, target_timestamp, available_timestamp,
157 generated_noise_samples);
158 }
159
160 // Handle the case with no packet at all available (except maybe DTMF).
161 if (!next_packet) {
162 return NoPacket(play_dtmf);
163 }
164
165 // If the expand period was very long, reset NetEQ since it is likely that the
166 // sender was restarted.
167 if (num_consecutive_expands_ > kReinitAfterExpands) {
168 *reset_decoder = true;
169 return kNormal;
170 }
171
172 // Make sure we don't restart audio too soon after an expansion to avoid
173 // running out of data right away again. We should only wait if there are no
174 // DTX or CNG packets in the buffer (otherwise we should just play out what we
175 // have, since we cannot know the exact duration of DTX or CNG packets), and
176 // if the mute factor is low enough (otherwise the expansion was short enough
177 // to not be noticable).
178 // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
Jakob Ivarsson1b4254a2019-03-12 15:12:08 +0100179 size_t current_span =
180 samples_left + packet_buffer_.GetSpanSamples(decoder_frame_length);
Minyue Li7f6417f2018-10-03 21:19:08 +0200181 if ((prev_mode == kModeExpand || prev_mode == kModeCodecPlc) &&
182 expand.MuteFactor(0) < 16384 / 2 &&
Jakob Ivarsson1b4254a2019-03-12 15:12:08 +0100183 current_span < static_cast<size_t>(delay_manager_->TargetLevel() *
184 packet_length_samples_ *
185 kPostponeDecodingLevel / 100)>> 8 &&
Minyue Li7f6417f2018-10-03 21:19:08 +0200186 !packet_buffer_.ContainsDtxOrCngPacket(decoder_database_)) {
Henrik Lundin7687ad52018-07-02 10:14:46 +0200187 return kExpand;
188 }
189
190 const uint32_t five_seconds_samples =
191 static_cast<uint32_t>(5 * 8000 * fs_mult_);
192 // Check if the required packet is available.
193 if (target_timestamp == available_timestamp) {
194 return ExpectedPacketAvailable(prev_mode, play_dtmf);
195 } else if (!PacketBuffer::IsObsoleteTimestamp(
196 available_timestamp, target_timestamp, five_seconds_samples)) {
197 return FuturePacketAvailable(
198 sync_buffer, expand, decoder_frame_length, prev_mode, target_timestamp,
199 available_timestamp, play_dtmf, generated_noise_samples);
200 } else {
201 // This implies that available_timestamp < target_timestamp, which can
202 // happen when a new stream or codec is received. Signal for a reset.
203 return kUndefined;
204 }
205}
206
Henrik Lundin5afa61c2018-07-02 14:53:24 +0200207void DecisionLogic::ExpandDecision(Operations operation) {
208 if (operation == kExpand) {
209 num_consecutive_expands_++;
210 } else {
211 num_consecutive_expands_ = 0;
212 }
213}
214
Minyue Li7d204d52019-04-16 11:44:49 +0200215void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples) {
216 buffer_level_filter_->SetTargetBufferLevel(
217 delay_manager_->base_target_level());
Henrik Lundin5afa61c2018-07-02 14:53:24 +0200218
Minyue Li7d204d52019-04-16 11:44:49 +0200219 size_t buffer_size_packets = 0;
220 if (packet_length_samples_ > 0) {
221 // Calculate size in packets.
222 buffer_size_packets = buffer_size_samples / packet_length_samples_;
Henrik Lundin5afa61c2018-07-02 14:53:24 +0200223 }
Minyue Li7d204d52019-04-16 11:44:49 +0200224 int sample_memory_local = 0;
225 if (prev_time_scale_) {
226 sample_memory_local = sample_memory_;
227 timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval);
228 }
229
230 buffer_level_filter_->Update(buffer_size_packets, sample_memory_local,
231 packet_length_samples_);
232 prev_time_scale_ = false;
Henrik Lundin5afa61c2018-07-02 14:53:24 +0200233}
234
Henrik Lundin7687ad52018-07-02 10:14:46 +0200235Operations DecisionLogic::CngOperation(Modes prev_mode,
236 uint32_t target_timestamp,
237 uint32_t available_timestamp,
238 size_t generated_noise_samples) {
239 // Signed difference between target and available timestamp.
240 int32_t timestamp_diff = static_cast<int32_t>(
241 static_cast<uint32_t>(generated_noise_samples + target_timestamp) -
242 available_timestamp);
243 int32_t optimal_level_samp = static_cast<int32_t>(
244 (delay_manager_->TargetLevel() * packet_length_samples_) >> 8);
245 const int64_t excess_waiting_time_samp =
246 -static_cast<int64_t>(timestamp_diff) - optimal_level_samp;
247
248 if (excess_waiting_time_samp > optimal_level_samp / 2) {
249 // The waiting time for this packet will be longer than 1.5
250 // times the wanted buffer delay. Apply fast-forward to cut the
251 // waiting time down to the optimal.
252 noise_fast_forward_ = rtc::dchecked_cast<size_t>(noise_fast_forward_ +
253 excess_waiting_time_samp);
254 timestamp_diff =
255 rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp);
256 }
257
258 if (timestamp_diff < 0 && prev_mode == kModeRfc3389Cng) {
259 // Not time to play this packet yet. Wait another round before using this
260 // packet. Keep on playing CNG from previous CNG parameters.
261 return kRfc3389CngNoPacket;
262 } else {
263 // Otherwise, go for the CNG packet now.
264 noise_fast_forward_ = 0;
265 return kRfc3389Cng;
266 }
267}
268
269Operations DecisionLogic::NoPacket(bool play_dtmf) {
270 if (cng_state_ == kCngRfc3389On) {
271 // Keep on playing comfort noise.
272 return kRfc3389CngNoPacket;
273 } else if (cng_state_ == kCngInternalOn) {
274 // Keep on playing codec internal comfort noise.
275 return kCodecInternalCng;
276 } else if (play_dtmf) {
277 return kDtmf;
278 } else {
279 // Nothing to play, do expand.
280 return kExpand;
281 }
282}
283
284Operations DecisionLogic::ExpectedPacketAvailable(Modes prev_mode,
285 bool play_dtmf) {
286 if (!disallow_time_stretching_ && prev_mode != kModeExpand && !play_dtmf) {
287 // Check criterion for time-stretching.
288 int low_limit, high_limit;
289 delay_manager_->BufferLimits(&low_limit, &high_limit);
290 if (buffer_level_filter_->filtered_current_level() >= high_limit << 2)
291 return kFastAccelerate;
292 if (TimescaleAllowed()) {
293 if (buffer_level_filter_->filtered_current_level() >= high_limit)
294 return kAccelerate;
295 if (buffer_level_filter_->filtered_current_level() < low_limit)
296 return kPreemptiveExpand;
297 }
298 }
299 return kNormal;
300}
301
302Operations DecisionLogic::FuturePacketAvailable(
303 const SyncBuffer& sync_buffer,
304 const Expand& expand,
305 size_t decoder_frame_length,
306 Modes prev_mode,
307 uint32_t target_timestamp,
308 uint32_t available_timestamp,
309 bool play_dtmf,
310 size_t generated_noise_samples) {
311 // Required packet is not available, but a future packet is.
312 // Check if we should continue with an ongoing expand because the new packet
313 // is too far into the future.
314 uint32_t timestamp_leap = available_timestamp - target_timestamp;
Henrik Lundin00eb12a2018-09-05 18:14:52 +0200315 if ((prev_mode == kModeExpand || prev_mode == kModeCodecPlc) &&
316 !ReinitAfterExpands(timestamp_leap) && !MaxWaitForPacket() &&
317 PacketTooEarly(timestamp_leap) && UnderTargetLevel()) {
Henrik Lundin7687ad52018-07-02 10:14:46 +0200318 if (play_dtmf) {
319 // Still have DTMF to play, so do not do expand.
320 return kDtmf;
321 } else {
322 // Nothing to play.
323 return kExpand;
324 }
325 }
326
Henrik Lundin00eb12a2018-09-05 18:14:52 +0200327 if (prev_mode == kModeCodecPlc) {
328 return kNormal;
329 }
330
Henrik Lundin7687ad52018-07-02 10:14:46 +0200331 const size_t samples_left =
332 sync_buffer.FutureLength() - expand.overlap_length();
333 const size_t cur_size_samples =
334 samples_left + packet_buffer_.NumPacketsInBuffer() * decoder_frame_length;
335
336 // If previous was comfort noise, then no merge is needed.
337 if (prev_mode == kModeRfc3389Cng || prev_mode == kModeCodecInternalCng) {
338 // Keep the same delay as before the CNG, but make sure that the number of
339 // samples in buffer is no higher than 4 times the optimal level. (Note that
340 // TargetLevel() is in Q8.)
341 if (static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
342 available_timestamp ||
343 cur_size_samples >
344 ((delay_manager_->TargetLevel() * packet_length_samples_) >> 8) *
345 4) {
346 // Time to play this new packet.
347 return kNormal;
348 } else {
349 // Too early to play this new packet; keep on playing comfort noise.
350 if (prev_mode == kModeRfc3389Cng) {
351 return kRfc3389CngNoPacket;
352 } else { // prevPlayMode == kModeCodecInternalCng.
353 return kCodecInternalCng;
354 }
355 }
356 }
357 // Do not merge unless we have done an expand before.
358 if (prev_mode == kModeExpand) {
359 return kMerge;
360 } else if (play_dtmf) {
361 // Play DTMF instead of expand.
362 return kDtmf;
363 } else {
364 return kExpand;
365 }
366}
367
368bool DecisionLogic::UnderTargetLevel() const {
369 return buffer_level_filter_->filtered_current_level() <=
370 delay_manager_->TargetLevel();
371}
372
373bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const {
374 return timestamp_leap >=
375 static_cast<uint32_t>(output_size_samples_ * kReinitAfterExpands);
376}
377
378bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const {
379 return timestamp_leap >
380 static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_);
381}
382
383bool DecisionLogic::MaxWaitForPacket() const {
384 return num_consecutive_expands_ >= kMaxWaitForPacket;
385}
386
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000387} // namespace webrtc