blob: e24ca6283a0c68dc926c736a45c786d764b4234b [file] [log] [blame]
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +00001/*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_coding/neteq/decision_logic.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000012
Henrik Lundin7687ad52018-07-02 10:14:46 +020013#include <assert.h>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000014#include <algorithm>
Henrik Lundin7687ad52018-07-02 10:14:46 +020015#include <limits>
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000016
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020017#include "modules/audio_coding/neteq/buffer_level_filter.h"
Henrik Lundin7687ad52018-07-02 10:14:46 +020018#include "modules/audio_coding/neteq/decoder_database.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020019#include "modules/audio_coding/neteq/delay_manager.h"
20#include "modules/audio_coding/neteq/expand.h"
21#include "modules/audio_coding/neteq/packet_buffer.h"
22#include "modules/audio_coding/neteq/sync_buffer.h"
Fredrik Solenbergbbf21a32018-04-12 22:44:09 +020023#include "modules/include/module_common_types.h"
Henrik Lundin7687ad52018-07-02 10:14:46 +020024#include "system_wrappers/include/field_trial.h"
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000025
26namespace webrtc {
27
28DecisionLogic* DecisionLogic::Create(int fs_hz,
Peter Kastingdce40cf2015-08-24 14:52:23 -070029 size_t output_size_samples,
Henrik Lundin7687ad52018-07-02 10:14:46 +020030 bool disallow_time_stretching,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000031 DecoderDatabase* decoder_database,
32 const PacketBuffer& packet_buffer,
33 DelayManager* delay_manager,
Henrik Lundin47b17dc2016-05-10 10:20:59 +020034 BufferLevelFilter* buffer_level_filter,
35 const TickTimer* tick_timer) {
Henrik Lundin7687ad52018-07-02 10:14:46 +020036 return new DecisionLogic(fs_hz, output_size_samples, disallow_time_stretching,
37 decoder_database, packet_buffer, delay_manager,
38 buffer_level_filter, tick_timer);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000039}
40
41DecisionLogic::DecisionLogic(int fs_hz,
Peter Kastingdce40cf2015-08-24 14:52:23 -070042 size_t output_size_samples,
Henrik Lundin7687ad52018-07-02 10:14:46 +020043 bool disallow_time_stretching,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000044 DecoderDatabase* decoder_database,
45 const PacketBuffer& packet_buffer,
46 DelayManager* delay_manager,
Henrik Lundin47b17dc2016-05-10 10:20:59 +020047 BufferLevelFilter* buffer_level_filter,
48 const TickTimer* tick_timer)
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000049 : decoder_database_(decoder_database),
50 packet_buffer_(packet_buffer),
51 delay_manager_(delay_manager),
52 buffer_level_filter_(buffer_level_filter),
Henrik Lundin47b17dc2016-05-10 10:20:59 +020053 tick_timer_(tick_timer),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000054 cng_state_(kCngOff),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000055 packet_length_samples_(0),
56 sample_memory_(0),
57 prev_time_scale_(false),
Henrik Lundin7687ad52018-07-02 10:14:46 +020058 disallow_time_stretching_(disallow_time_stretching),
Henrik Lundin47b17dc2016-05-10 10:20:59 +020059 timescale_countdown_(
60 tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)),
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000061 num_consecutive_expands_(0),
Henrik Lundin7687ad52018-07-02 10:14:46 +020062 postpone_decoding_after_expand_(field_trial::IsEnabled(
63 "WebRTC-Audio-NetEqPostponeDecodingAfterExpand")) {
64 delay_manager_->set_streaming_mode(false);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000065 SetSampleRate(fs_hz, output_size_samples);
66}
67
Henrik Lundin47b17dc2016-05-10 10:20:59 +020068DecisionLogic::~DecisionLogic() = default;
69
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000070void DecisionLogic::Reset() {
71 cng_state_ = kCngOff;
henrik.lundinb1fb72b2016-05-03 08:18:47 -070072 noise_fast_forward_ = 0;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000073 packet_length_samples_ = 0;
74 sample_memory_ = 0;
75 prev_time_scale_ = false;
Henrik Lundin47b17dc2016-05-10 10:20:59 +020076 timescale_countdown_.reset();
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000077 num_consecutive_expands_ = 0;
78}
79
80void DecisionLogic::SoftReset() {
81 packet_length_samples_ = 0;
82 sample_memory_ = 0;
83 prev_time_scale_ = false;
Henrik Lundin47b17dc2016-05-10 10:20:59 +020084 timescale_countdown_ =
85 tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000086}
87
Peter Kastingdce40cf2015-08-24 14:52:23 -070088void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) {
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000089 // TODO(hlundin): Change to an enumerator and skip assert.
Yves Gerey665174f2018-06-19 15:03:05 +020090 assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000091 fs_mult_ = fs_hz / 8000;
92 output_size_samples_ = output_size_samples;
93}
94
95Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
96 const Expand& expand,
Peter Kastingdce40cf2015-08-24 14:52:23 -070097 size_t decoder_frame_length,
ossu7a377612016-10-18 04:06:13 -070098 const Packet* next_packet,
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +000099 Modes prev_mode,
henrik.lundinb1fb72b2016-05-03 08:18:47 -0700100 bool play_dtmf,
101 size_t generated_noise_samples,
102 bool* reset_decoder) {
ossu61a208b2016-09-20 01:38:00 -0700103 // If last mode was CNG (or Expand, since this could be covering up for
104 // a lost CNG packet), remember that CNG is on. This is needed if comfort
105 // noise is interrupted by DTMF.
106 if (prev_mode == kModeRfc3389Cng) {
107 cng_state_ = kCngRfc3389On;
108 } else if (prev_mode == kModeCodecInternalCng) {
109 cng_state_ = kCngInternalOn;
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000110 }
111
Peter Kastingdce40cf2015-08-24 14:52:23 -0700112 const size_t samples_left =
113 sync_buffer.FutureLength() - expand.overlap_length();
114 const size_t cur_size_samples =
ossu61a208b2016-09-20 01:38:00 -0700115 samples_left + packet_buffer_.NumSamplesInBuffer(decoder_frame_length);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000116
Yves Gerey665174f2018-06-19 15:03:05 +0200117 prev_time_scale_ =
118 prev_time_scale_ && (prev_mode == kModeAccelerateSuccess ||
119 prev_mode == kModeAccelerateLowEnergy ||
120 prev_mode == kModePreemptiveExpandSuccess ||
121 prev_mode == kModePreemptiveExpandLowEnergy);
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000122
123 FilterBufferLevel(cur_size_samples, prev_mode);
124
Henrik Lundin7687ad52018-07-02 10:14:46 +0200125 // Guard for errors, to avoid getting stuck in error mode.
126 if (prev_mode == kModeError) {
127 if (!next_packet) {
128 return kExpand;
129 } else {
130 return kUndefined; // Use kUndefined to flag for a reset.
131 }
132 }
133
134 uint32_t target_timestamp = sync_buffer.end_timestamp();
135 uint32_t available_timestamp = 0;
136 bool is_cng_packet = false;
137 if (next_packet) {
138 available_timestamp = next_packet->timestamp;
139 is_cng_packet =
140 decoder_database_->IsComfortNoise(next_packet->payload_type);
141 }
142
143 if (is_cng_packet) {
144 return CngOperation(prev_mode, target_timestamp, available_timestamp,
145 generated_noise_samples);
146 }
147
148 // Handle the case with no packet at all available (except maybe DTMF).
149 if (!next_packet) {
150 return NoPacket(play_dtmf);
151 }
152
153 // If the expand period was very long, reset NetEQ since it is likely that the
154 // sender was restarted.
155 if (num_consecutive_expands_ > kReinitAfterExpands) {
156 *reset_decoder = true;
157 return kNormal;
158 }
159
160 // Make sure we don't restart audio too soon after an expansion to avoid
161 // running out of data right away again. We should only wait if there are no
162 // DTX or CNG packets in the buffer (otherwise we should just play out what we
163 // have, since we cannot know the exact duration of DTX or CNG packets), and
164 // if the mute factor is low enough (otherwise the expansion was short enough
165 // to not be noticable).
166 // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
167 if (postpone_decoding_after_expand_ && prev_mode == kModeExpand &&
168 !packet_buffer_.ContainsDtxOrCngPacket(decoder_database_) &&
169 cur_size_samples<static_cast<size_t>(delay_manager_->TargetLevel() *
170 packet_length_samples_)>> 8 &&
171 expand.MuteFactor(0) < 16384 / 2) {
172 return kExpand;
173 }
174
175 const uint32_t five_seconds_samples =
176 static_cast<uint32_t>(5 * 8000 * fs_mult_);
177 // Check if the required packet is available.
178 if (target_timestamp == available_timestamp) {
179 return ExpectedPacketAvailable(prev_mode, play_dtmf);
180 } else if (!PacketBuffer::IsObsoleteTimestamp(
181 available_timestamp, target_timestamp, five_seconds_samples)) {
182 return FuturePacketAvailable(
183 sync_buffer, expand, decoder_frame_length, prev_mode, target_timestamp,
184 available_timestamp, play_dtmf, generated_noise_samples);
185 } else {
186 // This implies that available_timestamp < target_timestamp, which can
187 // happen when a new stream or codec is received. Signal for a reset.
188 return kUndefined;
189 }
190}
191
Henrik Lundin5afa61c2018-07-02 14:53:24 +0200192void DecisionLogic::ExpandDecision(Operations operation) {
193 if (operation == kExpand) {
194 num_consecutive_expands_++;
195 } else {
196 num_consecutive_expands_ = 0;
197 }
198}
199
200void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples,
201 Modes prev_mode) {
202 // Do not update buffer history if currently playing CNG since it will bias
203 // the filtered buffer level.
204 if ((prev_mode != kModeRfc3389Cng) && (prev_mode != kModeCodecInternalCng)) {
205 buffer_level_filter_->SetTargetBufferLevel(
206 delay_manager_->base_target_level());
207
208 size_t buffer_size_packets = 0;
209 if (packet_length_samples_ > 0) {
210 // Calculate size in packets.
211 buffer_size_packets = buffer_size_samples / packet_length_samples_;
212 }
213 int sample_memory_local = 0;
214 if (prev_time_scale_) {
215 sample_memory_local = sample_memory_;
216 timescale_countdown_ =
217 tick_timer_->GetNewCountdown(kMinTimescaleInterval);
218 }
219 buffer_level_filter_->Update(buffer_size_packets, sample_memory_local,
220 packet_length_samples_);
221 prev_time_scale_ = false;
222 }
223}
224
Henrik Lundin7687ad52018-07-02 10:14:46 +0200225Operations DecisionLogic::CngOperation(Modes prev_mode,
226 uint32_t target_timestamp,
227 uint32_t available_timestamp,
228 size_t generated_noise_samples) {
229 // Signed difference between target and available timestamp.
230 int32_t timestamp_diff = static_cast<int32_t>(
231 static_cast<uint32_t>(generated_noise_samples + target_timestamp) -
232 available_timestamp);
233 int32_t optimal_level_samp = static_cast<int32_t>(
234 (delay_manager_->TargetLevel() * packet_length_samples_) >> 8);
235 const int64_t excess_waiting_time_samp =
236 -static_cast<int64_t>(timestamp_diff) - optimal_level_samp;
237
238 if (excess_waiting_time_samp > optimal_level_samp / 2) {
239 // The waiting time for this packet will be longer than 1.5
240 // times the wanted buffer delay. Apply fast-forward to cut the
241 // waiting time down to the optimal.
242 noise_fast_forward_ = rtc::dchecked_cast<size_t>(noise_fast_forward_ +
243 excess_waiting_time_samp);
244 timestamp_diff =
245 rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp);
246 }
247
248 if (timestamp_diff < 0 && prev_mode == kModeRfc3389Cng) {
249 // Not time to play this packet yet. Wait another round before using this
250 // packet. Keep on playing CNG from previous CNG parameters.
251 return kRfc3389CngNoPacket;
252 } else {
253 // Otherwise, go for the CNG packet now.
254 noise_fast_forward_ = 0;
255 return kRfc3389Cng;
256 }
257}
258
259Operations DecisionLogic::NoPacket(bool play_dtmf) {
260 if (cng_state_ == kCngRfc3389On) {
261 // Keep on playing comfort noise.
262 return kRfc3389CngNoPacket;
263 } else if (cng_state_ == kCngInternalOn) {
264 // Keep on playing codec internal comfort noise.
265 return kCodecInternalCng;
266 } else if (play_dtmf) {
267 return kDtmf;
268 } else {
269 // Nothing to play, do expand.
270 return kExpand;
271 }
272}
273
274Operations DecisionLogic::ExpectedPacketAvailable(Modes prev_mode,
275 bool play_dtmf) {
276 if (!disallow_time_stretching_ && prev_mode != kModeExpand && !play_dtmf) {
277 // Check criterion for time-stretching.
278 int low_limit, high_limit;
279 delay_manager_->BufferLimits(&low_limit, &high_limit);
280 if (buffer_level_filter_->filtered_current_level() >= high_limit << 2)
281 return kFastAccelerate;
282 if (TimescaleAllowed()) {
283 if (buffer_level_filter_->filtered_current_level() >= high_limit)
284 return kAccelerate;
285 if (buffer_level_filter_->filtered_current_level() < low_limit)
286 return kPreemptiveExpand;
287 }
288 }
289 return kNormal;
290}
291
292Operations DecisionLogic::FuturePacketAvailable(
293 const SyncBuffer& sync_buffer,
294 const Expand& expand,
295 size_t decoder_frame_length,
296 Modes prev_mode,
297 uint32_t target_timestamp,
298 uint32_t available_timestamp,
299 bool play_dtmf,
300 size_t generated_noise_samples) {
301 // Required packet is not available, but a future packet is.
302 // Check if we should continue with an ongoing expand because the new packet
303 // is too far into the future.
304 uint32_t timestamp_leap = available_timestamp - target_timestamp;
305 if ((prev_mode == kModeExpand) && !ReinitAfterExpands(timestamp_leap) &&
306 !MaxWaitForPacket() && PacketTooEarly(timestamp_leap) &&
307 UnderTargetLevel()) {
308 if (play_dtmf) {
309 // Still have DTMF to play, so do not do expand.
310 return kDtmf;
311 } else {
312 // Nothing to play.
313 return kExpand;
314 }
315 }
316
317 const size_t samples_left =
318 sync_buffer.FutureLength() - expand.overlap_length();
319 const size_t cur_size_samples =
320 samples_left + packet_buffer_.NumPacketsInBuffer() * decoder_frame_length;
321
322 // If previous was comfort noise, then no merge is needed.
323 if (prev_mode == kModeRfc3389Cng || prev_mode == kModeCodecInternalCng) {
324 // Keep the same delay as before the CNG, but make sure that the number of
325 // samples in buffer is no higher than 4 times the optimal level. (Note that
326 // TargetLevel() is in Q8.)
327 if (static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
328 available_timestamp ||
329 cur_size_samples >
330 ((delay_manager_->TargetLevel() * packet_length_samples_) >> 8) *
331 4) {
332 // Time to play this new packet.
333 return kNormal;
334 } else {
335 // Too early to play this new packet; keep on playing comfort noise.
336 if (prev_mode == kModeRfc3389Cng) {
337 return kRfc3389CngNoPacket;
338 } else { // prevPlayMode == kModeCodecInternalCng.
339 return kCodecInternalCng;
340 }
341 }
342 }
343 // Do not merge unless we have done an expand before.
344 if (prev_mode == kModeExpand) {
345 return kMerge;
346 } else if (play_dtmf) {
347 // Play DTMF instead of expand.
348 return kDtmf;
349 } else {
350 return kExpand;
351 }
352}
353
354bool DecisionLogic::UnderTargetLevel() const {
355 return buffer_level_filter_->filtered_current_level() <=
356 delay_manager_->TargetLevel();
357}
358
359bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const {
360 return timestamp_leap >=
361 static_cast<uint32_t>(output_size_samples_ * kReinitAfterExpands);
362}
363
364bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const {
365 return timestamp_leap >
366 static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_);
367}
368
369bool DecisionLogic::MaxWaitForPacket() const {
370 return num_consecutive_expands_ >= kMaxWaitForPacket;
371}
372
henrik.lundin@webrtc.orgd94659d2013-01-29 12:09:21 +0000373} // namespace webrtc