blob: fda12c3ad3e747e8e7b475508a8dd8226bcba42d [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_coding/neteq/decision_logic.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000012
Henrik Lundin7687ad52018-07-02 10:14:46 +020013#include <assert.h>
Yves Gerey988cc082018-10-23 12:03:01 +020014#include <stdio.h>
15#include <string>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000016
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020017#include "modules/audio_coding/neteq/buffer_level_filter.h"
Henrik Lundin7687ad52018-07-02 10:14:46 +020018#include "modules/audio_coding/neteq/decoder_database.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020019#include "modules/audio_coding/neteq/delay_manager.h"
20#include "modules/audio_coding/neteq/expand.h"
21#include "modules/audio_coding/neteq/packet_buffer.h"
22#include "modules/audio_coding/neteq/sync_buffer.h"
Yves Gerey988cc082018-10-23 12:03:01 +020023#include "rtc_base/checks.h"
Minyue Li7f6417f2018-10-03 21:19:08 +020024#include "rtc_base/logging.h"
Yves Gerey988cc082018-10-23 12:03:01 +020025#include "rtc_base/numerics/safe_conversions.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000026
Minyue Li7f6417f2018-10-03 21:19:08 +020027namespace {
Minyue Li7f6417f2018-10-03 21:19:08 +020028
Jakob Ivarssond3a780b2019-02-28 14:30:21 +010029constexpr int kPostponeDecodingLevel = 50;
Minyue Li7f6417f2018-10-03 21:19:08 +020030
31} // namespace
32
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000033namespace webrtc {
34
35DecisionLogic* DecisionLogic::Create(int fs_hz,
Peter Kastingdce40cf2015-08-24 14:52:23 -070036 size_t output_size_samples,
Henrik Lundin7687ad52018-07-02 10:14:46 +020037 bool disallow_time_stretching,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000038 DecoderDatabase* decoder_database,
39 const PacketBuffer& packet_buffer,
40 DelayManager* delay_manager,
Henrik Lundin47b17dc2016-05-10 10:20:59 +020041 BufferLevelFilter* buffer_level_filter,
42 const TickTimer* tick_timer) {
Henrik Lundin7687ad52018-07-02 10:14:46 +020043 return new DecisionLogic(fs_hz, output_size_samples, disallow_time_stretching,
44 decoder_database, packet_buffer, delay_manager,
45 buffer_level_filter, tick_timer);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000046}
47
48DecisionLogic::DecisionLogic(int fs_hz,
Peter Kastingdce40cf2015-08-24 14:52:23 -070049 size_t output_size_samples,
Henrik Lundin7687ad52018-07-02 10:14:46 +020050 bool disallow_time_stretching,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000051 DecoderDatabase* decoder_database,
52 const PacketBuffer& packet_buffer,
53 DelayManager* delay_manager,
Henrik Lundin47b17dc2016-05-10 10:20:59 +020054 BufferLevelFilter* buffer_level_filter,
55 const TickTimer* tick_timer)
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000056 : decoder_database_(decoder_database),
57 packet_buffer_(packet_buffer),
58 delay_manager_(delay_manager),
59 buffer_level_filter_(buffer_level_filter),
Henrik Lundin47b17dc2016-05-10 10:20:59 +020060 tick_timer_(tick_timer),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000061 cng_state_(kCngOff),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000062 packet_length_samples_(0),
63 sample_memory_(0),
64 prev_time_scale_(false),
Henrik Lundin7687ad52018-07-02 10:14:46 +020065 disallow_time_stretching_(disallow_time_stretching),
Henrik Lundin47b17dc2016-05-10 10:20:59 +020066 timescale_countdown_(
67 tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)),
Jakob Ivarssond3a780b2019-02-28 14:30:21 +010068 num_consecutive_expands_(0) {
Henrik Lundin7687ad52018-07-02 10:14:46 +020069 delay_manager_->set_streaming_mode(false);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000070 SetSampleRate(fs_hz, output_size_samples);
71}
72
Henrik Lundin47b17dc2016-05-10 10:20:59 +020073DecisionLogic::~DecisionLogic() = default;
74
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000075void DecisionLogic::Reset() {
76 cng_state_ = kCngOff;
henrik.lundinb1fb72b2016-05-03 08:18:47 -070077 noise_fast_forward_ = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000078 packet_length_samples_ = 0;
79 sample_memory_ = 0;
80 prev_time_scale_ = false;
Henrik Lundin47b17dc2016-05-10 10:20:59 +020081 timescale_countdown_.reset();
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000082 num_consecutive_expands_ = 0;
83}
84
85void DecisionLogic::SoftReset() {
86 packet_length_samples_ = 0;
87 sample_memory_ = 0;
88 prev_time_scale_ = false;
Henrik Lundin47b17dc2016-05-10 10:20:59 +020089 timescale_countdown_ =
90 tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000091}
92
Peter Kastingdce40cf2015-08-24 14:52:23 -070093void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) {
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000094 // TODO(hlundin): Change to an enumerator and skip assert.
Yves Gerey665174f2018-06-19 15:03:05 +020095 assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000096 fs_mult_ = fs_hz / 8000;
97 output_size_samples_ = output_size_samples;
98}
99
100Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
101 const Expand& expand,
Peter Kastingdce40cf2015-08-24 14:52:23 -0700102 size_t decoder_frame_length,
ossu7a377612016-10-18 04:06:13 -0700103 const Packet* next_packet,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000104 Modes prev_mode,
henrik.lundinb1fb72b2016-05-03 08:18:47 -0700105 bool play_dtmf,
106 size_t generated_noise_samples,
107 bool* reset_decoder) {
ossu61a208b2016-09-20 01:38:00 -0700108 // If last mode was CNG (or Expand, since this could be covering up for
109 // a lost CNG packet), remember that CNG is on. This is needed if comfort
110 // noise is interrupted by DTMF.
111 if (prev_mode == kModeRfc3389Cng) {
112 cng_state_ = kCngRfc3389On;
113 } else if (prev_mode == kModeCodecInternalCng) {
114 cng_state_ = kCngInternalOn;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000115 }
116
Peter Kastingdce40cf2015-08-24 14:52:23 -0700117 const size_t samples_left =
118 sync_buffer.FutureLength() - expand.overlap_length();
Jakob Ivarsson1b4254a2019-03-12 15:12:08 +0100119 // TODO(jakobi): Use buffer span instead of num samples.
Peter Kastingdce40cf2015-08-24 14:52:23 -0700120 const size_t cur_size_samples =
ossu61a208b2016-09-20 01:38:00 -0700121 samples_left + packet_buffer_.NumSamplesInBuffer(decoder_frame_length);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000122
Yves Gerey665174f2018-06-19 15:03:05 +0200123 prev_time_scale_ =
124 prev_time_scale_ && (prev_mode == kModeAccelerateSuccess ||
125 prev_mode == kModeAccelerateLowEnergy ||
126 prev_mode == kModePreemptiveExpandSuccess ||
127 prev_mode == kModePreemptiveExpandLowEnergy);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000128
129 FilterBufferLevel(cur_size_samples, prev_mode);
130
Henrik Lundin7687ad52018-07-02 10:14:46 +0200131 // Guard for errors, to avoid getting stuck in error mode.
132 if (prev_mode == kModeError) {
133 if (!next_packet) {
134 return kExpand;
135 } else {
136 return kUndefined; // Use kUndefined to flag for a reset.
137 }
138 }
139
140 uint32_t target_timestamp = sync_buffer.end_timestamp();
141 uint32_t available_timestamp = 0;
142 bool is_cng_packet = false;
143 if (next_packet) {
144 available_timestamp = next_packet->timestamp;
145 is_cng_packet =
146 decoder_database_->IsComfortNoise(next_packet->payload_type);
147 }
148
149 if (is_cng_packet) {
150 return CngOperation(prev_mode, target_timestamp, available_timestamp,
151 generated_noise_samples);
152 }
153
154 // Handle the case with no packet at all available (except maybe DTMF).
155 if (!next_packet) {
156 return NoPacket(play_dtmf);
157 }
158
159 // If the expand period was very long, reset NetEQ since it is likely that the
160 // sender was restarted.
161 if (num_consecutive_expands_ > kReinitAfterExpands) {
162 *reset_decoder = true;
163 return kNormal;
164 }
165
166 // Make sure we don't restart audio too soon after an expansion to avoid
167 // running out of data right away again. We should only wait if there are no
168 // DTX or CNG packets in the buffer (otherwise we should just play out what we
169 // have, since we cannot know the exact duration of DTX or CNG packets), and
170 // if the mute factor is low enough (otherwise the expansion was short enough
171 // to not be noticable).
172 // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
Jakob Ivarsson1b4254a2019-03-12 15:12:08 +0100173 size_t current_span =
174 samples_left + packet_buffer_.GetSpanSamples(decoder_frame_length);
Minyue Li7f6417f2018-10-03 21:19:08 +0200175 if ((prev_mode == kModeExpand || prev_mode == kModeCodecPlc) &&
176 expand.MuteFactor(0) < 16384 / 2 &&
Jakob Ivarsson1b4254a2019-03-12 15:12:08 +0100177 current_span < static_cast<size_t>(delay_manager_->TargetLevel() *
178 packet_length_samples_ *
179 kPostponeDecodingLevel / 100)>> 8 &&
Minyue Li7f6417f2018-10-03 21:19:08 +0200180 !packet_buffer_.ContainsDtxOrCngPacket(decoder_database_)) {
Henrik Lundin7687ad52018-07-02 10:14:46 +0200181 return kExpand;
182 }
183
184 const uint32_t five_seconds_samples =
185 static_cast<uint32_t>(5 * 8000 * fs_mult_);
186 // Check if the required packet is available.
187 if (target_timestamp == available_timestamp) {
188 return ExpectedPacketAvailable(prev_mode, play_dtmf);
189 } else if (!PacketBuffer::IsObsoleteTimestamp(
190 available_timestamp, target_timestamp, five_seconds_samples)) {
191 return FuturePacketAvailable(
192 sync_buffer, expand, decoder_frame_length, prev_mode, target_timestamp,
193 available_timestamp, play_dtmf, generated_noise_samples);
194 } else {
195 // This implies that available_timestamp < target_timestamp, which can
196 // happen when a new stream or codec is received. Signal for a reset.
197 return kUndefined;
198 }
199}
200
Henrik Lundin5afa61c2018-07-02 14:53:24 +0200201void DecisionLogic::ExpandDecision(Operations operation) {
202 if (operation == kExpand) {
203 num_consecutive_expands_++;
204 } else {
205 num_consecutive_expands_ = 0;
206 }
207}
208
209void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples,
210 Modes prev_mode) {
211 // Do not update buffer history if currently playing CNG since it will bias
212 // the filtered buffer level.
213 if ((prev_mode != kModeRfc3389Cng) && (prev_mode != kModeCodecInternalCng)) {
214 buffer_level_filter_->SetTargetBufferLevel(
215 delay_manager_->base_target_level());
216
217 size_t buffer_size_packets = 0;
218 if (packet_length_samples_ > 0) {
219 // Calculate size in packets.
220 buffer_size_packets = buffer_size_samples / packet_length_samples_;
221 }
222 int sample_memory_local = 0;
223 if (prev_time_scale_) {
224 sample_memory_local = sample_memory_;
225 timescale_countdown_ =
226 tick_timer_->GetNewCountdown(kMinTimescaleInterval);
227 }
228 buffer_level_filter_->Update(buffer_size_packets, sample_memory_local,
229 packet_length_samples_);
230 prev_time_scale_ = false;
231 }
232}
233
Henrik Lundin7687ad52018-07-02 10:14:46 +0200234Operations DecisionLogic::CngOperation(Modes prev_mode,
235 uint32_t target_timestamp,
236 uint32_t available_timestamp,
237 size_t generated_noise_samples) {
238 // Signed difference between target and available timestamp.
239 int32_t timestamp_diff = static_cast<int32_t>(
240 static_cast<uint32_t>(generated_noise_samples + target_timestamp) -
241 available_timestamp);
242 int32_t optimal_level_samp = static_cast<int32_t>(
243 (delay_manager_->TargetLevel() * packet_length_samples_) >> 8);
244 const int64_t excess_waiting_time_samp =
245 -static_cast<int64_t>(timestamp_diff) - optimal_level_samp;
246
247 if (excess_waiting_time_samp > optimal_level_samp / 2) {
248 // The waiting time for this packet will be longer than 1.5
249 // times the wanted buffer delay. Apply fast-forward to cut the
250 // waiting time down to the optimal.
251 noise_fast_forward_ = rtc::dchecked_cast<size_t>(noise_fast_forward_ +
252 excess_waiting_time_samp);
253 timestamp_diff =
254 rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp);
255 }
256
257 if (timestamp_diff < 0 && prev_mode == kModeRfc3389Cng) {
258 // Not time to play this packet yet. Wait another round before using this
259 // packet. Keep on playing CNG from previous CNG parameters.
260 return kRfc3389CngNoPacket;
261 } else {
262 // Otherwise, go for the CNG packet now.
263 noise_fast_forward_ = 0;
264 return kRfc3389Cng;
265 }
266}
267
268Operations DecisionLogic::NoPacket(bool play_dtmf) {
269 if (cng_state_ == kCngRfc3389On) {
270 // Keep on playing comfort noise.
271 return kRfc3389CngNoPacket;
272 } else if (cng_state_ == kCngInternalOn) {
273 // Keep on playing codec internal comfort noise.
274 return kCodecInternalCng;
275 } else if (play_dtmf) {
276 return kDtmf;
277 } else {
278 // Nothing to play, do expand.
279 return kExpand;
280 }
281}
282
283Operations DecisionLogic::ExpectedPacketAvailable(Modes prev_mode,
284 bool play_dtmf) {
285 if (!disallow_time_stretching_ && prev_mode != kModeExpand && !play_dtmf) {
286 // Check criterion for time-stretching.
287 int low_limit, high_limit;
288 delay_manager_->BufferLimits(&low_limit, &high_limit);
289 if (buffer_level_filter_->filtered_current_level() >= high_limit << 2)
290 return kFastAccelerate;
291 if (TimescaleAllowed()) {
292 if (buffer_level_filter_->filtered_current_level() >= high_limit)
293 return kAccelerate;
294 if (buffer_level_filter_->filtered_current_level() < low_limit)
295 return kPreemptiveExpand;
296 }
297 }
298 return kNormal;
299}
300
301Operations DecisionLogic::FuturePacketAvailable(
302 const SyncBuffer& sync_buffer,
303 const Expand& expand,
304 size_t decoder_frame_length,
305 Modes prev_mode,
306 uint32_t target_timestamp,
307 uint32_t available_timestamp,
308 bool play_dtmf,
309 size_t generated_noise_samples) {
310 // Required packet is not available, but a future packet is.
311 // Check if we should continue with an ongoing expand because the new packet
312 // is too far into the future.
313 uint32_t timestamp_leap = available_timestamp - target_timestamp;
Henrik Lundin00eb12a2018-09-05 18:14:52 +0200314 if ((prev_mode == kModeExpand || prev_mode == kModeCodecPlc) &&
315 !ReinitAfterExpands(timestamp_leap) && !MaxWaitForPacket() &&
316 PacketTooEarly(timestamp_leap) && UnderTargetLevel()) {
Henrik Lundin7687ad52018-07-02 10:14:46 +0200317 if (play_dtmf) {
318 // Still have DTMF to play, so do not do expand.
319 return kDtmf;
320 } else {
321 // Nothing to play.
322 return kExpand;
323 }
324 }
325
Henrik Lundin00eb12a2018-09-05 18:14:52 +0200326 if (prev_mode == kModeCodecPlc) {
327 return kNormal;
328 }
329
Henrik Lundin7687ad52018-07-02 10:14:46 +0200330 const size_t samples_left =
331 sync_buffer.FutureLength() - expand.overlap_length();
332 const size_t cur_size_samples =
333 samples_left + packet_buffer_.NumPacketsInBuffer() * decoder_frame_length;
334
335 // If previous was comfort noise, then no merge is needed.
336 if (prev_mode == kModeRfc3389Cng || prev_mode == kModeCodecInternalCng) {
337 // Keep the same delay as before the CNG, but make sure that the number of
338 // samples in buffer is no higher than 4 times the optimal level. (Note that
339 // TargetLevel() is in Q8.)
340 if (static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
341 available_timestamp ||
342 cur_size_samples >
343 ((delay_manager_->TargetLevel() * packet_length_samples_) >> 8) *
344 4) {
345 // Time to play this new packet.
346 return kNormal;
347 } else {
348 // Too early to play this new packet; keep on playing comfort noise.
349 if (prev_mode == kModeRfc3389Cng) {
350 return kRfc3389CngNoPacket;
351 } else { // prevPlayMode == kModeCodecInternalCng.
352 return kCodecInternalCng;
353 }
354 }
355 }
356 // Do not merge unless we have done an expand before.
357 if (prev_mode == kModeExpand) {
358 return kMerge;
359 } else if (play_dtmf) {
360 // Play DTMF instead of expand.
361 return kDtmf;
362 } else {
363 return kExpand;
364 }
365}
366
367bool DecisionLogic::UnderTargetLevel() const {
368 return buffer_level_filter_->filtered_current_level() <=
369 delay_manager_->TargetLevel();
370}
371
372bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const {
373 return timestamp_leap >=
374 static_cast<uint32_t>(output_size_samples_ * kReinitAfterExpands);
375}
376
377bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const {
378 return timestamp_leap >
379 static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_);
380}
381
382bool DecisionLogic::MaxWaitForPacket() const {
383 return num_consecutive_expands_ >= kMaxWaitForPacket;
384}
385
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000386} // namespace webrtc