blob: 349fdab9f15d0973e9f8c393edd56f43f88fb712 [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_coding/neteq/decision_logic.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000012
Henrik Lundin7687ad52018-07-02 10:14:46 +020013#include <assert.h>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000014#include <algorithm>
Henrik Lundin7687ad52018-07-02 10:14:46 +020015#include <limits>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000016
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020017#include "modules/audio_coding/neteq/buffer_level_filter.h"
Henrik Lundin7687ad52018-07-02 10:14:46 +020018#include "modules/audio_coding/neteq/decoder_database.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020019#include "modules/audio_coding/neteq/delay_manager.h"
20#include "modules/audio_coding/neteq/expand.h"
21#include "modules/audio_coding/neteq/packet_buffer.h"
22#include "modules/audio_coding/neteq/sync_buffer.h"
Fredrik Solenbergbbf21a32018-04-12 22:44:09 +020023#include "modules/include/module_common_types.h"
Minyue Li7f6417f2018-10-03 21:19:08 +020024#include "rtc_base/logging.h"
Henrik Lundin7687ad52018-07-02 10:14:46 +020025#include "system_wrappers/include/field_trial.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000026
Minyue Li7f6417f2018-10-03 21:19:08 +020027namespace {
28constexpr char kPostponeDecodingFieldTrial[] =
29 "WebRTC-Audio-NetEqPostponeDecodingAfterExpand";
30
31int GetPostponeDecodingLevel() {
32 const bool enabled =
33 webrtc::field_trial::IsEnabled(kPostponeDecodingFieldTrial);
34 if (!enabled)
35 return 0;
36
37 constexpr int kDefaultPostponeDecodingLevel = 50;
38 const std::string field_trial_string =
39 webrtc::field_trial::FindFullName(kPostponeDecodingFieldTrial);
40 int value = -1;
41 if (sscanf(field_trial_string.c_str(), "Enabled-%d", &value) == 1) {
42 if (value >= 0 && value <= 100) {
43 return value;
44 } else {
45 RTC_LOG(LS_WARNING)
46 << "Wrong value (" << value
47 << ") for postpone decoding after expand, using default ("
48 << kDefaultPostponeDecodingLevel << ")";
49 }
50 }
51 return kDefaultPostponeDecodingLevel;
52}
53
54} // namespace
55
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000056namespace webrtc {
57
58DecisionLogic* DecisionLogic::Create(int fs_hz,
Peter Kastingdce40cf2015-08-24 14:52:23 -070059 size_t output_size_samples,
Henrik Lundin7687ad52018-07-02 10:14:46 +020060 bool disallow_time_stretching,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000061 DecoderDatabase* decoder_database,
62 const PacketBuffer& packet_buffer,
63 DelayManager* delay_manager,
Henrik Lundin47b17dc2016-05-10 10:20:59 +020064 BufferLevelFilter* buffer_level_filter,
65 const TickTimer* tick_timer) {
Henrik Lundin7687ad52018-07-02 10:14:46 +020066 return new DecisionLogic(fs_hz, output_size_samples, disallow_time_stretching,
67 decoder_database, packet_buffer, delay_manager,
68 buffer_level_filter, tick_timer);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000069}
70
71DecisionLogic::DecisionLogic(int fs_hz,
Peter Kastingdce40cf2015-08-24 14:52:23 -070072 size_t output_size_samples,
Henrik Lundin7687ad52018-07-02 10:14:46 +020073 bool disallow_time_stretching,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000074 DecoderDatabase* decoder_database,
75 const PacketBuffer& packet_buffer,
76 DelayManager* delay_manager,
Henrik Lundin47b17dc2016-05-10 10:20:59 +020077 BufferLevelFilter* buffer_level_filter,
78 const TickTimer* tick_timer)
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000079 : decoder_database_(decoder_database),
80 packet_buffer_(packet_buffer),
81 delay_manager_(delay_manager),
82 buffer_level_filter_(buffer_level_filter),
Henrik Lundin47b17dc2016-05-10 10:20:59 +020083 tick_timer_(tick_timer),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000084 cng_state_(kCngOff),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000085 packet_length_samples_(0),
86 sample_memory_(0),
87 prev_time_scale_(false),
Henrik Lundin7687ad52018-07-02 10:14:46 +020088 disallow_time_stretching_(disallow_time_stretching),
Henrik Lundin47b17dc2016-05-10 10:20:59 +020089 timescale_countdown_(
90 tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000091 num_consecutive_expands_(0),
Minyue Li7f6417f2018-10-03 21:19:08 +020092 postpone_decoding_level_(GetPostponeDecodingLevel()) {
Henrik Lundin7687ad52018-07-02 10:14:46 +020093 delay_manager_->set_streaming_mode(false);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000094 SetSampleRate(fs_hz, output_size_samples);
95}
96
Henrik Lundin47b17dc2016-05-10 10:20:59 +020097DecisionLogic::~DecisionLogic() = default;
98
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000099void DecisionLogic::Reset() {
100 cng_state_ = kCngOff;
henrik.lundinb1fb72b2016-05-03 08:18:47 -0700101 noise_fast_forward_ = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000102 packet_length_samples_ = 0;
103 sample_memory_ = 0;
104 prev_time_scale_ = false;
Henrik Lundin47b17dc2016-05-10 10:20:59 +0200105 timescale_countdown_.reset();
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000106 num_consecutive_expands_ = 0;
107}
108
109void DecisionLogic::SoftReset() {
110 packet_length_samples_ = 0;
111 sample_memory_ = 0;
112 prev_time_scale_ = false;
Henrik Lundin47b17dc2016-05-10 10:20:59 +0200113 timescale_countdown_ =
114 tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000115}
116
Peter Kastingdce40cf2015-08-24 14:52:23 -0700117void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) {
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000118 // TODO(hlundin): Change to an enumerator and skip assert.
Yves Gerey665174f2018-06-19 15:03:05 +0200119 assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000120 fs_mult_ = fs_hz / 8000;
121 output_size_samples_ = output_size_samples;
122}
123
124Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
125 const Expand& expand,
Peter Kastingdce40cf2015-08-24 14:52:23 -0700126 size_t decoder_frame_length,
ossu7a377612016-10-18 04:06:13 -0700127 const Packet* next_packet,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000128 Modes prev_mode,
henrik.lundinb1fb72b2016-05-03 08:18:47 -0700129 bool play_dtmf,
130 size_t generated_noise_samples,
131 bool* reset_decoder) {
ossu61a208b2016-09-20 01:38:00 -0700132 // If last mode was CNG (or Expand, since this could be covering up for
133 // a lost CNG packet), remember that CNG is on. This is needed if comfort
134 // noise is interrupted by DTMF.
135 if (prev_mode == kModeRfc3389Cng) {
136 cng_state_ = kCngRfc3389On;
137 } else if (prev_mode == kModeCodecInternalCng) {
138 cng_state_ = kCngInternalOn;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000139 }
140
Peter Kastingdce40cf2015-08-24 14:52:23 -0700141 const size_t samples_left =
142 sync_buffer.FutureLength() - expand.overlap_length();
143 const size_t cur_size_samples =
ossu61a208b2016-09-20 01:38:00 -0700144 samples_left + packet_buffer_.NumSamplesInBuffer(decoder_frame_length);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000145
Yves Gerey665174f2018-06-19 15:03:05 +0200146 prev_time_scale_ =
147 prev_time_scale_ && (prev_mode == kModeAccelerateSuccess ||
148 prev_mode == kModeAccelerateLowEnergy ||
149 prev_mode == kModePreemptiveExpandSuccess ||
150 prev_mode == kModePreemptiveExpandLowEnergy);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000151
152 FilterBufferLevel(cur_size_samples, prev_mode);
153
Henrik Lundin7687ad52018-07-02 10:14:46 +0200154 // Guard for errors, to avoid getting stuck in error mode.
155 if (prev_mode == kModeError) {
156 if (!next_packet) {
157 return kExpand;
158 } else {
159 return kUndefined; // Use kUndefined to flag for a reset.
160 }
161 }
162
163 uint32_t target_timestamp = sync_buffer.end_timestamp();
164 uint32_t available_timestamp = 0;
165 bool is_cng_packet = false;
166 if (next_packet) {
167 available_timestamp = next_packet->timestamp;
168 is_cng_packet =
169 decoder_database_->IsComfortNoise(next_packet->payload_type);
170 }
171
172 if (is_cng_packet) {
173 return CngOperation(prev_mode, target_timestamp, available_timestamp,
174 generated_noise_samples);
175 }
176
177 // Handle the case with no packet at all available (except maybe DTMF).
178 if (!next_packet) {
179 return NoPacket(play_dtmf);
180 }
181
182 // If the expand period was very long, reset NetEQ since it is likely that the
183 // sender was restarted.
184 if (num_consecutive_expands_ > kReinitAfterExpands) {
185 *reset_decoder = true;
186 return kNormal;
187 }
188
189 // Make sure we don't restart audio too soon after an expansion to avoid
190 // running out of data right away again. We should only wait if there are no
191 // DTX or CNG packets in the buffer (otherwise we should just play out what we
192 // have, since we cannot know the exact duration of DTX or CNG packets), and
193 // if the mute factor is low enough (otherwise the expansion was short enough
194 // to not be noticable).
195 // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
Minyue Li7f6417f2018-10-03 21:19:08 +0200196 if ((prev_mode == kModeExpand || prev_mode == kModeCodecPlc) &&
197 expand.MuteFactor(0) < 16384 / 2 &&
198 cur_size_samples < static_cast<size_t>(
199 delay_manager_->TargetLevel() * packet_length_samples_ *
200 postpone_decoding_level_ / 100) >> 8 &&
201 !packet_buffer_.ContainsDtxOrCngPacket(decoder_database_)) {
202 RTC_DCHECK(webrtc::field_trial::IsEnabled(kPostponeDecodingFieldTrial));
Henrik Lundin7687ad52018-07-02 10:14:46 +0200203 return kExpand;
204 }
205
206 const uint32_t five_seconds_samples =
207 static_cast<uint32_t>(5 * 8000 * fs_mult_);
208 // Check if the required packet is available.
209 if (target_timestamp == available_timestamp) {
210 return ExpectedPacketAvailable(prev_mode, play_dtmf);
211 } else if (!PacketBuffer::IsObsoleteTimestamp(
212 available_timestamp, target_timestamp, five_seconds_samples)) {
213 return FuturePacketAvailable(
214 sync_buffer, expand, decoder_frame_length, prev_mode, target_timestamp,
215 available_timestamp, play_dtmf, generated_noise_samples);
216 } else {
217 // This implies that available_timestamp < target_timestamp, which can
218 // happen when a new stream or codec is received. Signal for a reset.
219 return kUndefined;
220 }
221}
222
Henrik Lundin5afa61c2018-07-02 14:53:24 +0200223void DecisionLogic::ExpandDecision(Operations operation) {
224 if (operation == kExpand) {
225 num_consecutive_expands_++;
226 } else {
227 num_consecutive_expands_ = 0;
228 }
229}
230
231void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples,
232 Modes prev_mode) {
233 // Do not update buffer history if currently playing CNG since it will bias
234 // the filtered buffer level.
235 if ((prev_mode != kModeRfc3389Cng) && (prev_mode != kModeCodecInternalCng)) {
236 buffer_level_filter_->SetTargetBufferLevel(
237 delay_manager_->base_target_level());
238
239 size_t buffer_size_packets = 0;
240 if (packet_length_samples_ > 0) {
241 // Calculate size in packets.
242 buffer_size_packets = buffer_size_samples / packet_length_samples_;
243 }
244 int sample_memory_local = 0;
245 if (prev_time_scale_) {
246 sample_memory_local = sample_memory_;
247 timescale_countdown_ =
248 tick_timer_->GetNewCountdown(kMinTimescaleInterval);
249 }
250 buffer_level_filter_->Update(buffer_size_packets, sample_memory_local,
251 packet_length_samples_);
252 prev_time_scale_ = false;
253 }
254}
255
Henrik Lundin7687ad52018-07-02 10:14:46 +0200256Operations DecisionLogic::CngOperation(Modes prev_mode,
257 uint32_t target_timestamp,
258 uint32_t available_timestamp,
259 size_t generated_noise_samples) {
260 // Signed difference between target and available timestamp.
261 int32_t timestamp_diff = static_cast<int32_t>(
262 static_cast<uint32_t>(generated_noise_samples + target_timestamp) -
263 available_timestamp);
264 int32_t optimal_level_samp = static_cast<int32_t>(
265 (delay_manager_->TargetLevel() * packet_length_samples_) >> 8);
266 const int64_t excess_waiting_time_samp =
267 -static_cast<int64_t>(timestamp_diff) - optimal_level_samp;
268
269 if (excess_waiting_time_samp > optimal_level_samp / 2) {
270 // The waiting time for this packet will be longer than 1.5
271 // times the wanted buffer delay. Apply fast-forward to cut the
272 // waiting time down to the optimal.
273 noise_fast_forward_ = rtc::dchecked_cast<size_t>(noise_fast_forward_ +
274 excess_waiting_time_samp);
275 timestamp_diff =
276 rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp);
277 }
278
279 if (timestamp_diff < 0 && prev_mode == kModeRfc3389Cng) {
280 // Not time to play this packet yet. Wait another round before using this
281 // packet. Keep on playing CNG from previous CNG parameters.
282 return kRfc3389CngNoPacket;
283 } else {
284 // Otherwise, go for the CNG packet now.
285 noise_fast_forward_ = 0;
286 return kRfc3389Cng;
287 }
288}
289
290Operations DecisionLogic::NoPacket(bool play_dtmf) {
291 if (cng_state_ == kCngRfc3389On) {
292 // Keep on playing comfort noise.
293 return kRfc3389CngNoPacket;
294 } else if (cng_state_ == kCngInternalOn) {
295 // Keep on playing codec internal comfort noise.
296 return kCodecInternalCng;
297 } else if (play_dtmf) {
298 return kDtmf;
299 } else {
300 // Nothing to play, do expand.
301 return kExpand;
302 }
303}
304
305Operations DecisionLogic::ExpectedPacketAvailable(Modes prev_mode,
306 bool play_dtmf) {
307 if (!disallow_time_stretching_ && prev_mode != kModeExpand && !play_dtmf) {
308 // Check criterion for time-stretching.
309 int low_limit, high_limit;
310 delay_manager_->BufferLimits(&low_limit, &high_limit);
311 if (buffer_level_filter_->filtered_current_level() >= high_limit << 2)
312 return kFastAccelerate;
313 if (TimescaleAllowed()) {
314 if (buffer_level_filter_->filtered_current_level() >= high_limit)
315 return kAccelerate;
316 if (buffer_level_filter_->filtered_current_level() < low_limit)
317 return kPreemptiveExpand;
318 }
319 }
320 return kNormal;
321}
322
323Operations DecisionLogic::FuturePacketAvailable(
324 const SyncBuffer& sync_buffer,
325 const Expand& expand,
326 size_t decoder_frame_length,
327 Modes prev_mode,
328 uint32_t target_timestamp,
329 uint32_t available_timestamp,
330 bool play_dtmf,
331 size_t generated_noise_samples) {
332 // Required packet is not available, but a future packet is.
333 // Check if we should continue with an ongoing expand because the new packet
334 // is too far into the future.
335 uint32_t timestamp_leap = available_timestamp - target_timestamp;
Henrik Lundin00eb12a2018-09-05 18:14:52 +0200336 if ((prev_mode == kModeExpand || prev_mode == kModeCodecPlc) &&
337 !ReinitAfterExpands(timestamp_leap) && !MaxWaitForPacket() &&
338 PacketTooEarly(timestamp_leap) && UnderTargetLevel()) {
Henrik Lundin7687ad52018-07-02 10:14:46 +0200339 if (play_dtmf) {
340 // Still have DTMF to play, so do not do expand.
341 return kDtmf;
342 } else {
343 // Nothing to play.
344 return kExpand;
345 }
346 }
347
Henrik Lundin00eb12a2018-09-05 18:14:52 +0200348 if (prev_mode == kModeCodecPlc) {
349 return kNormal;
350 }
351
Henrik Lundin7687ad52018-07-02 10:14:46 +0200352 const size_t samples_left =
353 sync_buffer.FutureLength() - expand.overlap_length();
354 const size_t cur_size_samples =
355 samples_left + packet_buffer_.NumPacketsInBuffer() * decoder_frame_length;
356
357 // If previous was comfort noise, then no merge is needed.
358 if (prev_mode == kModeRfc3389Cng || prev_mode == kModeCodecInternalCng) {
359 // Keep the same delay as before the CNG, but make sure that the number of
360 // samples in buffer is no higher than 4 times the optimal level. (Note that
361 // TargetLevel() is in Q8.)
362 if (static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
363 available_timestamp ||
364 cur_size_samples >
365 ((delay_manager_->TargetLevel() * packet_length_samples_) >> 8) *
366 4) {
367 // Time to play this new packet.
368 return kNormal;
369 } else {
370 // Too early to play this new packet; keep on playing comfort noise.
371 if (prev_mode == kModeRfc3389Cng) {
372 return kRfc3389CngNoPacket;
373 } else { // prevPlayMode == kModeCodecInternalCng.
374 return kCodecInternalCng;
375 }
376 }
377 }
378 // Do not merge unless we have done an expand before.
379 if (prev_mode == kModeExpand) {
380 return kMerge;
381 } else if (play_dtmf) {
382 // Play DTMF instead of expand.
383 return kDtmf;
384 } else {
385 return kExpand;
386 }
387}
388
389bool DecisionLogic::UnderTargetLevel() const {
390 return buffer_level_filter_->filtered_current_level() <=
391 delay_manager_->TargetLevel();
392}
393
394bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const {
395 return timestamp_leap >=
396 static_cast<uint32_t>(output_size_samples_ * kReinitAfterExpands);
397}
398
399bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const {
400 return timestamp_leap >
401 static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_);
402}
403
404bool DecisionLogic::MaxWaitForPacket() const {
405 return num_consecutive_expands_ >= kMaxWaitForPacket;
406}
407
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000408} // namespace webrtc