blob: 83c2b3b90c569eab3cc11d491971ceb06932d3f8 [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_coding/neteq/decision_logic.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000012
Henrik Lundin7687ad52018-07-02 10:14:46 +020013#include <assert.h>
Yves Gerey988cc082018-10-23 12:03:01 +020014#include <stdio.h>
15#include <string>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000016
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020017#include "modules/audio_coding/neteq/buffer_level_filter.h"
Henrik Lundin7687ad52018-07-02 10:14:46 +020018#include "modules/audio_coding/neteq/decoder_database.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020019#include "modules/audio_coding/neteq/delay_manager.h"
20#include "modules/audio_coding/neteq/expand.h"
21#include "modules/audio_coding/neteq/packet_buffer.h"
22#include "modules/audio_coding/neteq/sync_buffer.h"
Yves Gerey988cc082018-10-23 12:03:01 +020023#include "rtc_base/checks.h"
Minyue Li7f6417f2018-10-03 21:19:08 +020024#include "rtc_base/logging.h"
Yves Gerey988cc082018-10-23 12:03:01 +020025#include "rtc_base/numerics/safe_conversions.h"
Henrik Lundin7687ad52018-07-02 10:14:46 +020026#include "system_wrappers/include/field_trial.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000027
Minyue Li7f6417f2018-10-03 21:19:08 +020028namespace {
29constexpr char kPostponeDecodingFieldTrial[] =
30 "WebRTC-Audio-NetEqPostponeDecodingAfterExpand";
31
32int GetPostponeDecodingLevel() {
33 const bool enabled =
34 webrtc::field_trial::IsEnabled(kPostponeDecodingFieldTrial);
35 if (!enabled)
36 return 0;
37
38 constexpr int kDefaultPostponeDecodingLevel = 50;
39 const std::string field_trial_string =
40 webrtc::field_trial::FindFullName(kPostponeDecodingFieldTrial);
41 int value = -1;
42 if (sscanf(field_trial_string.c_str(), "Enabled-%d", &value) == 1) {
43 if (value >= 0 && value <= 100) {
44 return value;
45 } else {
46 RTC_LOG(LS_WARNING)
47 << "Wrong value (" << value
48 << ") for postpone decoding after expand, using default ("
49 << kDefaultPostponeDecodingLevel << ")";
50 }
51 }
52 return kDefaultPostponeDecodingLevel;
53}
54
55} // namespace
56
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000057namespace webrtc {
58
59DecisionLogic* DecisionLogic::Create(int fs_hz,
Peter Kastingdce40cf2015-08-24 14:52:23 -070060 size_t output_size_samples,
Henrik Lundin7687ad52018-07-02 10:14:46 +020061 bool disallow_time_stretching,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000062 DecoderDatabase* decoder_database,
63 const PacketBuffer& packet_buffer,
64 DelayManager* delay_manager,
Henrik Lundin47b17dc2016-05-10 10:20:59 +020065 BufferLevelFilter* buffer_level_filter,
66 const TickTimer* tick_timer) {
Henrik Lundin7687ad52018-07-02 10:14:46 +020067 return new DecisionLogic(fs_hz, output_size_samples, disallow_time_stretching,
68 decoder_database, packet_buffer, delay_manager,
69 buffer_level_filter, tick_timer);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000070}
71
72DecisionLogic::DecisionLogic(int fs_hz,
Peter Kastingdce40cf2015-08-24 14:52:23 -070073 size_t output_size_samples,
Henrik Lundin7687ad52018-07-02 10:14:46 +020074 bool disallow_time_stretching,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000075 DecoderDatabase* decoder_database,
76 const PacketBuffer& packet_buffer,
77 DelayManager* delay_manager,
Henrik Lundin47b17dc2016-05-10 10:20:59 +020078 BufferLevelFilter* buffer_level_filter,
79 const TickTimer* tick_timer)
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000080 : decoder_database_(decoder_database),
81 packet_buffer_(packet_buffer),
82 delay_manager_(delay_manager),
83 buffer_level_filter_(buffer_level_filter),
Henrik Lundin47b17dc2016-05-10 10:20:59 +020084 tick_timer_(tick_timer),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000085 cng_state_(kCngOff),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000086 packet_length_samples_(0),
87 sample_memory_(0),
88 prev_time_scale_(false),
Henrik Lundin7687ad52018-07-02 10:14:46 +020089 disallow_time_stretching_(disallow_time_stretching),
Henrik Lundin47b17dc2016-05-10 10:20:59 +020090 timescale_countdown_(
91 tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000092 num_consecutive_expands_(0),
Minyue Li7f6417f2018-10-03 21:19:08 +020093 postpone_decoding_level_(GetPostponeDecodingLevel()) {
Henrik Lundin7687ad52018-07-02 10:14:46 +020094 delay_manager_->set_streaming_mode(false);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000095 SetSampleRate(fs_hz, output_size_samples);
96}
97
Henrik Lundin47b17dc2016-05-10 10:20:59 +020098DecisionLogic::~DecisionLogic() = default;
99
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000100void DecisionLogic::Reset() {
101 cng_state_ = kCngOff;
henrik.lundinb1fb72b2016-05-03 08:18:47 -0700102 noise_fast_forward_ = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000103 packet_length_samples_ = 0;
104 sample_memory_ = 0;
105 prev_time_scale_ = false;
Henrik Lundin47b17dc2016-05-10 10:20:59 +0200106 timescale_countdown_.reset();
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000107 num_consecutive_expands_ = 0;
108}
109
110void DecisionLogic::SoftReset() {
111 packet_length_samples_ = 0;
112 sample_memory_ = 0;
113 prev_time_scale_ = false;
Henrik Lundin47b17dc2016-05-10 10:20:59 +0200114 timescale_countdown_ =
115 tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000116}
117
Peter Kastingdce40cf2015-08-24 14:52:23 -0700118void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) {
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000119 // TODO(hlundin): Change to an enumerator and skip assert.
Yves Gerey665174f2018-06-19 15:03:05 +0200120 assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000121 fs_mult_ = fs_hz / 8000;
122 output_size_samples_ = output_size_samples;
123}
124
125Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
126 const Expand& expand,
Peter Kastingdce40cf2015-08-24 14:52:23 -0700127 size_t decoder_frame_length,
ossu7a377612016-10-18 04:06:13 -0700128 const Packet* next_packet,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000129 Modes prev_mode,
henrik.lundinb1fb72b2016-05-03 08:18:47 -0700130 bool play_dtmf,
131 size_t generated_noise_samples,
132 bool* reset_decoder) {
ossu61a208b2016-09-20 01:38:00 -0700133 // If last mode was CNG (or Expand, since this could be covering up for
134 // a lost CNG packet), remember that CNG is on. This is needed if comfort
135 // noise is interrupted by DTMF.
136 if (prev_mode == kModeRfc3389Cng) {
137 cng_state_ = kCngRfc3389On;
138 } else if (prev_mode == kModeCodecInternalCng) {
139 cng_state_ = kCngInternalOn;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000140 }
141
Peter Kastingdce40cf2015-08-24 14:52:23 -0700142 const size_t samples_left =
143 sync_buffer.FutureLength() - expand.overlap_length();
144 const size_t cur_size_samples =
ossu61a208b2016-09-20 01:38:00 -0700145 samples_left + packet_buffer_.NumSamplesInBuffer(decoder_frame_length);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000146
Yves Gerey665174f2018-06-19 15:03:05 +0200147 prev_time_scale_ =
148 prev_time_scale_ && (prev_mode == kModeAccelerateSuccess ||
149 prev_mode == kModeAccelerateLowEnergy ||
150 prev_mode == kModePreemptiveExpandSuccess ||
151 prev_mode == kModePreemptiveExpandLowEnergy);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000152
153 FilterBufferLevel(cur_size_samples, prev_mode);
154
Henrik Lundin7687ad52018-07-02 10:14:46 +0200155 // Guard for errors, to avoid getting stuck in error mode.
156 if (prev_mode == kModeError) {
157 if (!next_packet) {
158 return kExpand;
159 } else {
160 return kUndefined; // Use kUndefined to flag for a reset.
161 }
162 }
163
164 uint32_t target_timestamp = sync_buffer.end_timestamp();
165 uint32_t available_timestamp = 0;
166 bool is_cng_packet = false;
167 if (next_packet) {
168 available_timestamp = next_packet->timestamp;
169 is_cng_packet =
170 decoder_database_->IsComfortNoise(next_packet->payload_type);
171 }
172
173 if (is_cng_packet) {
174 return CngOperation(prev_mode, target_timestamp, available_timestamp,
175 generated_noise_samples);
176 }
177
178 // Handle the case with no packet at all available (except maybe DTMF).
179 if (!next_packet) {
180 return NoPacket(play_dtmf);
181 }
182
183 // If the expand period was very long, reset NetEQ since it is likely that the
184 // sender was restarted.
185 if (num_consecutive_expands_ > kReinitAfterExpands) {
186 *reset_decoder = true;
187 return kNormal;
188 }
189
190 // Make sure we don't restart audio too soon after an expansion to avoid
191 // running out of data right away again. We should only wait if there are no
192 // DTX or CNG packets in the buffer (otherwise we should just play out what we
193 // have, since we cannot know the exact duration of DTX or CNG packets), and
194 // if the mute factor is low enough (otherwise the expansion was short enough
195 // to not be noticable).
196 // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
Minyue Li7f6417f2018-10-03 21:19:08 +0200197 if ((prev_mode == kModeExpand || prev_mode == kModeCodecPlc) &&
198 expand.MuteFactor(0) < 16384 / 2 &&
199 cur_size_samples < static_cast<size_t>(
200 delay_manager_->TargetLevel() * packet_length_samples_ *
201 postpone_decoding_level_ / 100) >> 8 &&
202 !packet_buffer_.ContainsDtxOrCngPacket(decoder_database_)) {
203 RTC_DCHECK(webrtc::field_trial::IsEnabled(kPostponeDecodingFieldTrial));
Henrik Lundin7687ad52018-07-02 10:14:46 +0200204 return kExpand;
205 }
206
207 const uint32_t five_seconds_samples =
208 static_cast<uint32_t>(5 * 8000 * fs_mult_);
209 // Check if the required packet is available.
210 if (target_timestamp == available_timestamp) {
211 return ExpectedPacketAvailable(prev_mode, play_dtmf);
212 } else if (!PacketBuffer::IsObsoleteTimestamp(
213 available_timestamp, target_timestamp, five_seconds_samples)) {
214 return FuturePacketAvailable(
215 sync_buffer, expand, decoder_frame_length, prev_mode, target_timestamp,
216 available_timestamp, play_dtmf, generated_noise_samples);
217 } else {
218 // This implies that available_timestamp < target_timestamp, which can
219 // happen when a new stream or codec is received. Signal for a reset.
220 return kUndefined;
221 }
222}
223
Henrik Lundin5afa61c2018-07-02 14:53:24 +0200224void DecisionLogic::ExpandDecision(Operations operation) {
225 if (operation == kExpand) {
226 num_consecutive_expands_++;
227 } else {
228 num_consecutive_expands_ = 0;
229 }
230}
231
232void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples,
233 Modes prev_mode) {
234 // Do not update buffer history if currently playing CNG since it will bias
235 // the filtered buffer level.
236 if ((prev_mode != kModeRfc3389Cng) && (prev_mode != kModeCodecInternalCng)) {
237 buffer_level_filter_->SetTargetBufferLevel(
238 delay_manager_->base_target_level());
239
240 size_t buffer_size_packets = 0;
241 if (packet_length_samples_ > 0) {
242 // Calculate size in packets.
243 buffer_size_packets = buffer_size_samples / packet_length_samples_;
244 }
245 int sample_memory_local = 0;
246 if (prev_time_scale_) {
247 sample_memory_local = sample_memory_;
248 timescale_countdown_ =
249 tick_timer_->GetNewCountdown(kMinTimescaleInterval);
250 }
251 buffer_level_filter_->Update(buffer_size_packets, sample_memory_local,
252 packet_length_samples_);
253 prev_time_scale_ = false;
254 }
255}
256
Henrik Lundin7687ad52018-07-02 10:14:46 +0200257Operations DecisionLogic::CngOperation(Modes prev_mode,
258 uint32_t target_timestamp,
259 uint32_t available_timestamp,
260 size_t generated_noise_samples) {
261 // Signed difference between target and available timestamp.
262 int32_t timestamp_diff = static_cast<int32_t>(
263 static_cast<uint32_t>(generated_noise_samples + target_timestamp) -
264 available_timestamp);
265 int32_t optimal_level_samp = static_cast<int32_t>(
266 (delay_manager_->TargetLevel() * packet_length_samples_) >> 8);
267 const int64_t excess_waiting_time_samp =
268 -static_cast<int64_t>(timestamp_diff) - optimal_level_samp;
269
270 if (excess_waiting_time_samp > optimal_level_samp / 2) {
271 // The waiting time for this packet will be longer than 1.5
272 // times the wanted buffer delay. Apply fast-forward to cut the
273 // waiting time down to the optimal.
274 noise_fast_forward_ = rtc::dchecked_cast<size_t>(noise_fast_forward_ +
275 excess_waiting_time_samp);
276 timestamp_diff =
277 rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp);
278 }
279
280 if (timestamp_diff < 0 && prev_mode == kModeRfc3389Cng) {
281 // Not time to play this packet yet. Wait another round before using this
282 // packet. Keep on playing CNG from previous CNG parameters.
283 return kRfc3389CngNoPacket;
284 } else {
285 // Otherwise, go for the CNG packet now.
286 noise_fast_forward_ = 0;
287 return kRfc3389Cng;
288 }
289}
290
291Operations DecisionLogic::NoPacket(bool play_dtmf) {
292 if (cng_state_ == kCngRfc3389On) {
293 // Keep on playing comfort noise.
294 return kRfc3389CngNoPacket;
295 } else if (cng_state_ == kCngInternalOn) {
296 // Keep on playing codec internal comfort noise.
297 return kCodecInternalCng;
298 } else if (play_dtmf) {
299 return kDtmf;
300 } else {
301 // Nothing to play, do expand.
302 return kExpand;
303 }
304}
305
306Operations DecisionLogic::ExpectedPacketAvailable(Modes prev_mode,
307 bool play_dtmf) {
308 if (!disallow_time_stretching_ && prev_mode != kModeExpand && !play_dtmf) {
309 // Check criterion for time-stretching.
310 int low_limit, high_limit;
311 delay_manager_->BufferLimits(&low_limit, &high_limit);
312 if (buffer_level_filter_->filtered_current_level() >= high_limit << 2)
313 return kFastAccelerate;
314 if (TimescaleAllowed()) {
315 if (buffer_level_filter_->filtered_current_level() >= high_limit)
316 return kAccelerate;
317 if (buffer_level_filter_->filtered_current_level() < low_limit)
318 return kPreemptiveExpand;
319 }
320 }
321 return kNormal;
322}
323
324Operations DecisionLogic::FuturePacketAvailable(
325 const SyncBuffer& sync_buffer,
326 const Expand& expand,
327 size_t decoder_frame_length,
328 Modes prev_mode,
329 uint32_t target_timestamp,
330 uint32_t available_timestamp,
331 bool play_dtmf,
332 size_t generated_noise_samples) {
333 // Required packet is not available, but a future packet is.
334 // Check if we should continue with an ongoing expand because the new packet
335 // is too far into the future.
336 uint32_t timestamp_leap = available_timestamp - target_timestamp;
Henrik Lundin00eb12a2018-09-05 18:14:52 +0200337 if ((prev_mode == kModeExpand || prev_mode == kModeCodecPlc) &&
338 !ReinitAfterExpands(timestamp_leap) && !MaxWaitForPacket() &&
339 PacketTooEarly(timestamp_leap) && UnderTargetLevel()) {
Henrik Lundin7687ad52018-07-02 10:14:46 +0200340 if (play_dtmf) {
341 // Still have DTMF to play, so do not do expand.
342 return kDtmf;
343 } else {
344 // Nothing to play.
345 return kExpand;
346 }
347 }
348
Henrik Lundin00eb12a2018-09-05 18:14:52 +0200349 if (prev_mode == kModeCodecPlc) {
350 return kNormal;
351 }
352
Henrik Lundin7687ad52018-07-02 10:14:46 +0200353 const size_t samples_left =
354 sync_buffer.FutureLength() - expand.overlap_length();
355 const size_t cur_size_samples =
356 samples_left + packet_buffer_.NumPacketsInBuffer() * decoder_frame_length;
357
358 // If previous was comfort noise, then no merge is needed.
359 if (prev_mode == kModeRfc3389Cng || prev_mode == kModeCodecInternalCng) {
360 // Keep the same delay as before the CNG, but make sure that the number of
361 // samples in buffer is no higher than 4 times the optimal level. (Note that
362 // TargetLevel() is in Q8.)
363 if (static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
364 available_timestamp ||
365 cur_size_samples >
366 ((delay_manager_->TargetLevel() * packet_length_samples_) >> 8) *
367 4) {
368 // Time to play this new packet.
369 return kNormal;
370 } else {
371 // Too early to play this new packet; keep on playing comfort noise.
372 if (prev_mode == kModeRfc3389Cng) {
373 return kRfc3389CngNoPacket;
374 } else { // prevPlayMode == kModeCodecInternalCng.
375 return kCodecInternalCng;
376 }
377 }
378 }
379 // Do not merge unless we have done an expand before.
380 if (prev_mode == kModeExpand) {
381 return kMerge;
382 } else if (play_dtmf) {
383 // Play DTMF instead of expand.
384 return kDtmf;
385 } else {
386 return kExpand;
387 }
388}
389
390bool DecisionLogic::UnderTargetLevel() const {
391 return buffer_level_filter_->filtered_current_level() <=
392 delay_manager_->TargetLevel();
393}
394
395bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const {
396 return timestamp_leap >=
397 static_cast<uint32_t>(output_size_samples_ * kReinitAfterExpands);
398}
399
400bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const {
401 return timestamp_leap >
402 static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_);
403}
404
405bool DecisionLogic::MaxWaitForPacket() const {
406 return num_consecutive_expands_ >= kMaxWaitForPacket;
407}
408
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000409} // namespace webrtc