NetEq: Fix a problem with too large delay during codec-internal DTX/CNG
The length of the generated comfort noise is measured with a
counter. A bug in the implementation caused the counter to be reset
not only when a new packet was decoded, but also when NetEq asked the
decoder for more comfort noise without giving it a new packet to
decode. This means that the counter was reset once every 20 ms (in the
case of Opus), and it would never match the gap in timestamps that is
the exit criterion for CNG. This would have resulted in perpetual CNG,
but there is a stop-gap in NetEq. If the buffer level exceeds 4 times
the target level, CNG mode is exited anyway. This is what happens at
the end of every silence period.
With this CL, the bug should be fixed. The fix is wrapped in an
experiment, to allow verifying the fix and the impact of it with real
world data.
Bug: webrtc:8488
Change-Id: Idfc24df780eb2c55dbf08de840e6644e8557a0af
Reviewed-on: https://webrtc-review.googlesource.com/18181
Reviewed-by: Ivo Creusen <ivoc@webrtc.org>
Commit-Queue: Henrik Lundin <henrik.lundin@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#20551}
diff --git a/modules/audio_coding/neteq/decision_logic_normal.cc b/modules/audio_coding/neteq/decision_logic_normal.cc
index c78063c..10f501a 100644
--- a/modules/audio_coding/neteq/decision_logic_normal.cc
+++ b/modules/audio_coding/neteq/decision_logic_normal.cc
@@ -189,10 +189,9 @@
// If previous was comfort noise, then no merge is needed.
if (prev_mode == kModeRfc3389Cng ||
prev_mode == kModeCodecInternalCng) {
- // Keep the same delay as before the CNG (or maximum 70 ms in buffer as
- // safety precaution), but make sure that the number of samples in buffer
- // is no higher than 4 times the optimal level. (Note that TargetLevel()
- // is in Q8.)
+ // Keep the same delay as before the CNG, but make sure that the number of
+ // samples in buffer is no higher than 4 times the optimal level. (Note that
+ // TargetLevel() is in Q8.)
if (static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
available_timestamp ||
cur_size_samples >
diff --git a/modules/audio_coding/neteq/neteq_impl.cc b/modules/audio_coding/neteq/neteq_impl.cc
index e7757ae..b7bc0da 100644
--- a/modules/audio_coding/neteq/neteq_impl.cc
+++ b/modules/audio_coding/neteq/neteq_impl.cc
@@ -47,6 +47,7 @@
#include "rtc_base/safe_conversions.h"
#include "rtc_base/sanitizer.h"
#include "rtc_base/trace_event.h"
+#include "system_wrappers/include/field_trial.h"
namespace webrtc {
@@ -102,7 +103,9 @@
playout_mode_(config.playout_mode),
enable_fast_accelerate_(config.enable_fast_accelerate),
nack_enabled_(false),
- enable_muted_state_(config.enable_muted_state) {
+ enable_muted_state_(config.enable_muted_state),
+ use_dtx_delay_fix_(
+ field_trial::IsEnabled("WebRTC-NetEqOpusDtxDelayFix")) {
LOG(LS_INFO) << "NetEq config: " << config.ToString();
int fs = config.sample_rate_hz;
if (fs != 8000 && fs != 16000 && fs != 32000 && fs != 48000) {
@@ -862,6 +865,7 @@
AudioDecoder::SpeechType speech_type;
int length = 0;
+ const size_t start_num_packets = packet_list.size();
int decode_return_value = Decode(&packet_list, &operation,
&length, &speech_type);
@@ -871,7 +875,14 @@
vad_->Update(decoded_buffer_.get(), static_cast<size_t>(length), speech_type,
sid_frame_available, fs_hz_);
- if (sid_frame_available || speech_type == AudioDecoder::kComfortNoise) {
+ // This is the criterion that we did decode some data through the speech
+ // decoder, and the operation resulted in comfort noise.
+ const bool codec_internal_sid_frame =
+ use_dtx_delay_fix_ ? (speech_type == AudioDecoder::kComfortNoise &&
+ start_num_packets > packet_list.size())
+ : (speech_type == AudioDecoder::kComfortNoise);
+
+ if (sid_frame_available || codec_internal_sid_frame) {
// Start a new stopwatch since we are decoding a new CNG packet.
generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch();
}
diff --git a/modules/audio_coding/neteq/neteq_impl.h b/modules/audio_coding/neteq/neteq_impl.h
index be986fb..8dca6a5 100644
--- a/modules/audio_coding/neteq/neteq_impl.h
+++ b/modules/audio_coding/neteq/neteq_impl.h
@@ -440,6 +440,7 @@
std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_
RTC_GUARDED_BY(crit_sect_);
std::vector<uint32_t> last_decoded_timestamps_ RTC_GUARDED_BY(crit_sect_);
+ const bool use_dtx_delay_fix_ RTC_GUARDED_BY(crit_sect_);
private:
RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);