blob: afe25e1a20edbe87d29bc7177f38678cbfc65da9 [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_coding/neteq/decision_logic.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000012
Henrik Lundin80c4cca2018-06-21 11:13:07 +020013#include <assert.h>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000014#include <algorithm>
Henrik Lundin80c4cca2018-06-21 11:13:07 +020015#include <limits>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000016
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020017#include "modules/audio_coding/neteq/buffer_level_filter.h"
Henrik Lundin80c4cca2018-06-21 11:13:07 +020018#include "modules/audio_coding/neteq/decoder_database.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020019#include "modules/audio_coding/neteq/delay_manager.h"
20#include "modules/audio_coding/neteq/expand.h"
21#include "modules/audio_coding/neteq/packet_buffer.h"
22#include "modules/audio_coding/neteq/sync_buffer.h"
Fredrik Solenbergbbf21a32018-04-12 22:44:09 +020023#include "modules/include/module_common_types.h"
Henrik Lundin80c4cca2018-06-21 11:13:07 +020024#include "system_wrappers/include/field_trial.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000025
26namespace webrtc {
27
28DecisionLogic* DecisionLogic::Create(int fs_hz,
Peter Kastingdce40cf2015-08-24 14:52:23 -070029 size_t output_size_samples,
Henrik Lundin80c4cca2018-06-21 11:13:07 +020030 bool disallow_time_stretching,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000031 DecoderDatabase* decoder_database,
32 const PacketBuffer& packet_buffer,
33 DelayManager* delay_manager,
Henrik Lundin47b17dc2016-05-10 10:20:59 +020034 BufferLevelFilter* buffer_level_filter,
35 const TickTimer* tick_timer) {
Henrik Lundin80c4cca2018-06-21 11:13:07 +020036 return new DecisionLogic(fs_hz, output_size_samples, disallow_time_stretching,
37 decoder_database, packet_buffer, delay_manager,
38 buffer_level_filter, tick_timer);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000039}
40
41DecisionLogic::DecisionLogic(int fs_hz,
Peter Kastingdce40cf2015-08-24 14:52:23 -070042 size_t output_size_samples,
Henrik Lundin80c4cca2018-06-21 11:13:07 +020043 bool disallow_time_stretching,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000044 DecoderDatabase* decoder_database,
45 const PacketBuffer& packet_buffer,
46 DelayManager* delay_manager,
Henrik Lundin47b17dc2016-05-10 10:20:59 +020047 BufferLevelFilter* buffer_level_filter,
48 const TickTimer* tick_timer)
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000049 : decoder_database_(decoder_database),
50 packet_buffer_(packet_buffer),
51 delay_manager_(delay_manager),
52 buffer_level_filter_(buffer_level_filter),
Henrik Lundin47b17dc2016-05-10 10:20:59 +020053 tick_timer_(tick_timer),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000054 cng_state_(kCngOff),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000055 packet_length_samples_(0),
56 sample_memory_(0),
57 prev_time_scale_(false),
Henrik Lundin80c4cca2018-06-21 11:13:07 +020058 disallow_time_stretching_(disallow_time_stretching),
Henrik Lundin47b17dc2016-05-10 10:20:59 +020059 timescale_countdown_(
60 tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000061 num_consecutive_expands_(0),
Henrik Lundin80c4cca2018-06-21 11:13:07 +020062 postpone_decoding_after_expand_(field_trial::IsEnabled(
63 "WebRTC-Audio-NetEqPostponeDecodingAfterExpand")) {
64 delay_manager_->set_streaming_mode(false);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000065 SetSampleRate(fs_hz, output_size_samples);
66}
67
Henrik Lundin47b17dc2016-05-10 10:20:59 +020068DecisionLogic::~DecisionLogic() = default;
69
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000070void DecisionLogic::Reset() {
71 cng_state_ = kCngOff;
henrik.lundinb1fb72b2016-05-03 08:18:47 -070072 noise_fast_forward_ = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000073 packet_length_samples_ = 0;
74 sample_memory_ = 0;
75 prev_time_scale_ = false;
Henrik Lundin47b17dc2016-05-10 10:20:59 +020076 timescale_countdown_.reset();
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000077 num_consecutive_expands_ = 0;
78}
79
80void DecisionLogic::SoftReset() {
81 packet_length_samples_ = 0;
82 sample_memory_ = 0;
83 prev_time_scale_ = false;
Henrik Lundin47b17dc2016-05-10 10:20:59 +020084 timescale_countdown_ =
85 tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000086}
87
Peter Kastingdce40cf2015-08-24 14:52:23 -070088void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) {
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000089 // TODO(hlundin): Change to an enumerator and skip assert.
Yves Gerey665174f2018-06-19 15:03:05 +020090 assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000091 fs_mult_ = fs_hz / 8000;
92 output_size_samples_ = output_size_samples;
93}
94
95Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
96 const Expand& expand,
Peter Kastingdce40cf2015-08-24 14:52:23 -070097 size_t decoder_frame_length,
ossu7a377612016-10-18 04:06:13 -070098 const Packet* next_packet,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000099 Modes prev_mode,
henrik.lundinb1fb72b2016-05-03 08:18:47 -0700100 bool play_dtmf,
101 size_t generated_noise_samples,
102 bool* reset_decoder) {
ossu61a208b2016-09-20 01:38:00 -0700103 // If last mode was CNG (or Expand, since this could be covering up for
104 // a lost CNG packet), remember that CNG is on. This is needed if comfort
105 // noise is interrupted by DTMF.
106 if (prev_mode == kModeRfc3389Cng) {
107 cng_state_ = kCngRfc3389On;
108 } else if (prev_mode == kModeCodecInternalCng) {
109 cng_state_ = kCngInternalOn;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000110 }
111
Peter Kastingdce40cf2015-08-24 14:52:23 -0700112 const size_t samples_left =
113 sync_buffer.FutureLength() - expand.overlap_length();
114 const size_t cur_size_samples =
ossu61a208b2016-09-20 01:38:00 -0700115 samples_left + packet_buffer_.NumSamplesInBuffer(decoder_frame_length);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000116
Yves Gerey665174f2018-06-19 15:03:05 +0200117 prev_time_scale_ =
118 prev_time_scale_ && (prev_mode == kModeAccelerateSuccess ||
119 prev_mode == kModeAccelerateLowEnergy ||
120 prev_mode == kModePreemptiveExpandSuccess ||
121 prev_mode == kModePreemptiveExpandLowEnergy);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000122
123 FilterBufferLevel(cur_size_samples, prev_mode);
124
Ivo Creusenc7f09ad2018-05-22 13:21:01 +0200125 return GetDecisionSpecialized(
126 sync_buffer, expand, decoder_frame_length, next_packet, prev_mode,
127 play_dtmf, reset_decoder, generated_noise_samples, cur_size_samples);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000128}
129
turaj@webrtc.org8d1cdaa2014-04-11 18:47:55 +0000130void DecisionLogic::ExpandDecision(Operations operation) {
131 if (operation == kExpand) {
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000132 num_consecutive_expands_++;
133 } else {
134 num_consecutive_expands_ = 0;
135 }
136}
137
Peter Kastingdce40cf2015-08-24 14:52:23 -0700138void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000139 Modes prev_mode) {
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000140 // Do not update buffer history if currently playing CNG since it will bias
141 // the filtered buffer level.
142 if ((prev_mode != kModeRfc3389Cng) && (prev_mode != kModeCodecInternalCng)) {
143 buffer_level_filter_->SetTargetBufferLevel(
144 delay_manager_->base_target_level());
145
Peter Kastingdce40cf2015-08-24 14:52:23 -0700146 size_t buffer_size_packets = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000147 if (packet_length_samples_ > 0) {
148 // Calculate size in packets.
149 buffer_size_packets = buffer_size_samples / packet_length_samples_;
150 }
151 int sample_memory_local = 0;
152 if (prev_time_scale_) {
153 sample_memory_local = sample_memory_;
Henrik Lundin47b17dc2016-05-10 10:20:59 +0200154 timescale_countdown_ =
155 tick_timer_->GetNewCountdown(kMinTimescaleInterval);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000156 }
157 buffer_level_filter_->Update(buffer_size_packets, sample_memory_local,
158 packet_length_samples_);
159 prev_time_scale_ = false;
160 }
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000161}
162
Henrik Lundin80c4cca2018-06-21 11:13:07 +0200163Operations DecisionLogic::GetDecisionSpecialized(const SyncBuffer& sync_buffer,
164 const Expand& expand,
165 size_t decoder_frame_length,
166 const Packet* next_packet,
167 Modes prev_mode,
168 bool play_dtmf,
169 bool* reset_decoder,
170 size_t generated_noise_samples,
171 size_t cur_size_samples) {
172 // Guard for errors, to avoid getting stuck in error mode.
173 if (prev_mode == kModeError) {
174 if (!next_packet) {
175 return kExpand;
176 } else {
177 return kUndefined; // Use kUndefined to flag for a reset.
178 }
179 }
180
181 uint32_t target_timestamp = sync_buffer.end_timestamp();
182 uint32_t available_timestamp = 0;
183 bool is_cng_packet = false;
184 if (next_packet) {
185 available_timestamp = next_packet->timestamp;
186 is_cng_packet =
187 decoder_database_->IsComfortNoise(next_packet->payload_type);
188 }
189
190 if (is_cng_packet) {
191 return CngOperation(prev_mode, target_timestamp, available_timestamp,
192 generated_noise_samples);
193 }
194
195 // Handle the case with no packet at all available (except maybe DTMF).
196 if (!next_packet) {
197 return NoPacket(play_dtmf);
198 }
199
200 // If the expand period was very long, reset NetEQ since it is likely that the
201 // sender was restarted.
202 if (num_consecutive_expands_ > kReinitAfterExpands) {
203 *reset_decoder = true;
204 return kNormal;
205 }
206
207 // Make sure we don't restart audio too soon after an expansion to avoid
208 // running out of data right away again. We should only wait if there are no
209 // DTX or CNG packets in the buffer (otherwise we should just play out what we
210 // have, since we cannot know the exact duration of DTX or CNG packets), and
211 // if the mute factor is low enough (otherwise the expansion was short enough
212 // to not be noticable).
213 // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
214 if (postpone_decoding_after_expand_ && prev_mode == kModeExpand &&
215 !packet_buffer_.ContainsDtxOrCngPacket(decoder_database_) &&
216 cur_size_samples<static_cast<size_t>(delay_manager_->TargetLevel() *
217 packet_length_samples_)>> 8 &&
218 expand.MuteFactor(0) < 16384 / 2) {
219 return kExpand;
220 }
221
222 const uint32_t five_seconds_samples =
223 static_cast<uint32_t>(5 * 8000 * fs_mult_);
224 // Check if the required packet is available.
225 if (target_timestamp == available_timestamp) {
226 return ExpectedPacketAvailable(prev_mode, play_dtmf);
227 } else if (!PacketBuffer::IsObsoleteTimestamp(
228 available_timestamp, target_timestamp, five_seconds_samples)) {
229 return FuturePacketAvailable(
230 sync_buffer, expand, decoder_frame_length, prev_mode, target_timestamp,
231 available_timestamp, play_dtmf, generated_noise_samples);
232 } else {
233 // This implies that available_timestamp < target_timestamp, which can
234 // happen when a new stream or codec is received. Signal for a reset.
235 return kUndefined;
236 }
237}
238
239Operations DecisionLogic::CngOperation(Modes prev_mode,
240 uint32_t target_timestamp,
241 uint32_t available_timestamp,
242 size_t generated_noise_samples) {
243 // Signed difference between target and available timestamp.
244 int32_t timestamp_diff = static_cast<int32_t>(
245 static_cast<uint32_t>(generated_noise_samples + target_timestamp) -
246 available_timestamp);
247 int32_t optimal_level_samp = static_cast<int32_t>(
248 (delay_manager_->TargetLevel() * packet_length_samples_) >> 8);
249 const int64_t excess_waiting_time_samp =
250 -static_cast<int64_t>(timestamp_diff) - optimal_level_samp;
251
252 if (excess_waiting_time_samp > optimal_level_samp / 2) {
253 // The waiting time for this packet will be longer than 1.5
254 // times the wanted buffer delay. Apply fast-forward to cut the
255 // waiting time down to the optimal.
256 noise_fast_forward_ = rtc::dchecked_cast<size_t>(noise_fast_forward_ +
257 excess_waiting_time_samp);
258 timestamp_diff =
259 rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp);
260 }
261
262 if (timestamp_diff < 0 && prev_mode == kModeRfc3389Cng) {
263 // Not time to play this packet yet. Wait another round before using this
264 // packet. Keep on playing CNG from previous CNG parameters.
265 return kRfc3389CngNoPacket;
266 } else {
267 // Otherwise, go for the CNG packet now.
268 noise_fast_forward_ = 0;
269 return kRfc3389Cng;
270 }
271}
272
273Operations DecisionLogic::NoPacket(bool play_dtmf) {
274 if (cng_state_ == kCngRfc3389On) {
275 // Keep on playing comfort noise.
276 return kRfc3389CngNoPacket;
277 } else if (cng_state_ == kCngInternalOn) {
278 // Keep on playing codec internal comfort noise.
279 return kCodecInternalCng;
280 } else if (play_dtmf) {
281 return kDtmf;
282 } else {
283 // Nothing to play, do expand.
284 return kExpand;
285 }
286}
287
288Operations DecisionLogic::ExpectedPacketAvailable(Modes prev_mode,
289 bool play_dtmf) {
290 if (!disallow_time_stretching_ && prev_mode != kModeExpand && !play_dtmf) {
291 // Check criterion for time-stretching.
292 int low_limit, high_limit;
293 delay_manager_->BufferLimits(&low_limit, &high_limit);
294 if (buffer_level_filter_->filtered_current_level() >= high_limit << 2)
295 return kFastAccelerate;
296 if (TimescaleAllowed()) {
297 if (buffer_level_filter_->filtered_current_level() >= high_limit)
298 return kAccelerate;
299 if (buffer_level_filter_->filtered_current_level() < low_limit)
300 return kPreemptiveExpand;
301 }
302 }
303 return kNormal;
304}
305
306Operations DecisionLogic::FuturePacketAvailable(
307 const SyncBuffer& sync_buffer,
308 const Expand& expand,
309 size_t decoder_frame_length,
310 Modes prev_mode,
311 uint32_t target_timestamp,
312 uint32_t available_timestamp,
313 bool play_dtmf,
314 size_t generated_noise_samples) {
315 // Required packet is not available, but a future packet is.
316 // Check if we should continue with an ongoing expand because the new packet
317 // is too far into the future.
318 uint32_t timestamp_leap = available_timestamp - target_timestamp;
319 if ((prev_mode == kModeExpand) && !ReinitAfterExpands(timestamp_leap) &&
320 !MaxWaitForPacket() && PacketTooEarly(timestamp_leap) &&
321 UnderTargetLevel()) {
322 if (play_dtmf) {
323 // Still have DTMF to play, so do not do expand.
324 return kDtmf;
325 } else {
326 // Nothing to play.
327 return kExpand;
328 }
329 }
330
331 const size_t samples_left =
332 sync_buffer.FutureLength() - expand.overlap_length();
333 const size_t cur_size_samples =
334 samples_left + packet_buffer_.NumPacketsInBuffer() * decoder_frame_length;
335
336 // If previous was comfort noise, then no merge is needed.
337 if (prev_mode == kModeRfc3389Cng || prev_mode == kModeCodecInternalCng) {
338 // Keep the same delay as before the CNG, but make sure that the number of
339 // samples in buffer is no higher than 4 times the optimal level. (Note that
340 // TargetLevel() is in Q8.)
341 if (static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
342 available_timestamp ||
343 cur_size_samples >
344 ((delay_manager_->TargetLevel() * packet_length_samples_) >> 8) *
345 4) {
346 // Time to play this new packet.
347 return kNormal;
348 } else {
349 // Too early to play this new packet; keep on playing comfort noise.
350 if (prev_mode == kModeRfc3389Cng) {
351 return kRfc3389CngNoPacket;
352 } else { // prevPlayMode == kModeCodecInternalCng.
353 return kCodecInternalCng;
354 }
355 }
356 }
357 // Do not merge unless we have done an expand before.
358 if (prev_mode == kModeExpand) {
359 return kMerge;
360 } else if (play_dtmf) {
361 // Play DTMF instead of expand.
362 return kDtmf;
363 } else {
364 return kExpand;
365 }
366}
367
368bool DecisionLogic::UnderTargetLevel() const {
369 return buffer_level_filter_->filtered_current_level() <=
370 delay_manager_->TargetLevel();
371}
372
373bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const {
374 return timestamp_leap >=
375 static_cast<uint32_t>(output_size_samples_ * kReinitAfterExpands);
376}
377
378bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const {
379 return timestamp_leap >
380 static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_);
381}
382
383bool DecisionLogic::MaxWaitForPacket() const {
384 return num_consecutive_expands_ >= kMaxWaitForPacket;
385}
386
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000387} // namespace webrtc