NetEq fix for repeated audio issue.
This CL implements a fix behind a field trial for a NetEq issue. NetEq restarts audio too quickly after a buffer underrun, which can quickly lead to another underrun in some circumstances. The fix changes NetEq's behavior to wait with restarting playback until sufficient audio is buffered.
Bug: webrtc:9289
Change-Id: I5968c9478ce8d84caf77f00b8d0a39156b47fc8d
Reviewed-on: https://webrtc-review.googlesource.com/77423
Reviewed-by: Minyue Li <minyue@webrtc.org>
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org>
Commit-Queue: Ivo Creusen <ivoc@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#23347}
diff --git a/modules/audio_coding/codecs/opus/audio_decoder_opus.cc b/modules/audio_coding/codecs/opus/audio_decoder_opus.cc
index 3d10b6f..c784a68 100644
--- a/modules/audio_coding/codecs/opus/audio_decoder_opus.cc
+++ b/modules/audio_coding/codecs/opus/audio_decoder_opus.cc
@@ -36,6 +36,8 @@
return (ret < 0) ? 0 : static_cast<size_t>(ret);
}
+ bool IsDtxPacket() const override { return payload_.size() <= 2; }
+
rtc::Optional<DecodeResult> Decode(
rtc::ArrayView<int16_t> decoded) const override {
AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech;
diff --git a/modules/audio_coding/neteq/decision_logic.cc b/modules/audio_coding/neteq/decision_logic.cc
index 6ab2716..279a9e6 100644
--- a/modules/audio_coding/neteq/decision_logic.cc
+++ b/modules/audio_coding/neteq/decision_logic.cc
@@ -130,9 +130,9 @@
FilterBufferLevel(cur_size_samples, prev_mode);
- return GetDecisionSpecialized(sync_buffer, expand, decoder_frame_length,
- next_packet, prev_mode, play_dtmf,
- reset_decoder, generated_noise_samples);
+ return GetDecisionSpecialized(
+ sync_buffer, expand, decoder_frame_length, next_packet, prev_mode,
+ play_dtmf, reset_decoder, generated_noise_samples, cur_size_samples);
}
void DecisionLogic::ExpandDecision(Operations operation) {
diff --git a/modules/audio_coding/neteq/decision_logic.h b/modules/audio_coding/neteq/decision_logic.h
index 5b67196..9d88c4d 100644
--- a/modules/audio_coding/neteq/decision_logic.h
+++ b/modules/audio_coding/neteq/decision_logic.h
@@ -137,7 +137,8 @@
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder,
- size_t generated_noise_samples) = 0;
+ size_t generated_noise_samples,
+ size_t cur_size_samples) = 0;
// Updates the |buffer_level_filter_| with the current buffer level
// |buffer_size_packets|.
diff --git a/modules/audio_coding/neteq/decision_logic_fax.cc b/modules/audio_coding/neteq/decision_logic_fax.cc
index cc21ee9..22d36ce 100644
--- a/modules/audio_coding/neteq/decision_logic_fax.cc
+++ b/modules/audio_coding/neteq/decision_logic_fax.cc
@@ -27,7 +27,8 @@
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder,
- size_t generated_noise_samples) {
+ size_t generated_noise_samples,
+ size_t /*cur_size_samples*/) {
assert(playout_mode_ == kPlayoutFax || playout_mode_ == kPlayoutOff);
uint32_t target_timestamp = sync_buffer.end_timestamp();
uint32_t available_timestamp = 0;
diff --git a/modules/audio_coding/neteq/decision_logic_fax.h b/modules/audio_coding/neteq/decision_logic_fax.h
index cefd8e4..1436f99 100644
--- a/modules/audio_coding/neteq/decision_logic_fax.h
+++ b/modules/audio_coding/neteq/decision_logic_fax.h
@@ -47,7 +47,8 @@
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder,
- size_t generated_noise_samples) override;
+ size_t generated_noise_samples,
+ size_t cur_size_samples) override;
private:
RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicFax);
diff --git a/modules/audio_coding/neteq/decision_logic_normal.cc b/modules/audio_coding/neteq/decision_logic_normal.cc
index 1429bb7..c163999 100644
--- a/modules/audio_coding/neteq/decision_logic_normal.cc
+++ b/modules/audio_coding/neteq/decision_logic_normal.cc
@@ -13,6 +13,7 @@
#include <assert.h>
#include <algorithm>
+#include <limits>
#include "modules/audio_coding/neteq/buffer_level_filter.h"
#include "modules/audio_coding/neteq/decoder_database.h"
@@ -31,7 +32,8 @@
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder,
- size_t generated_noise_samples) {
+ size_t generated_noise_samples,
+ size_t cur_size_samples) {
assert(playout_mode_ == kPlayoutOn || playout_mode_ == kPlayoutStreaming);
// Guard for errors, to avoid getting stuck in error mode.
if (prev_mode == kModeError) {
@@ -68,6 +70,21 @@
return kNormal;
}
+ // Make sure we don't restart audio too soon after an expansion to avoid
+ // running out of data right away again. We should only wait if there are no
+ // DTX or CNG packets in the buffer (otherwise we should just play out what we
+ // have, since we cannot know the exact duration of DTX or CNG packets), and
+ // if the mute factor is low enough (otherwise the expansion was short enough
+ // to not be noticable).
+ // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
+ if (postpone_decoding_after_expand_ && prev_mode == kModeExpand &&
+ !packet_buffer_.ContainsDtxOrCngPacket(decoder_database_) &&
+ cur_size_samples < static_cast<size_t>(delay_manager_->TargetLevel() *
+ packet_length_samples_) >> 8 &&
+ expand.MuteFactor(0) < 16384 / 2) {
+ return kExpand;
+ }
+
const uint32_t five_seconds_samples =
static_cast<uint32_t>(5 * 8000 * fs_mult_);
// Check if the required packet is available.
diff --git a/modules/audio_coding/neteq/decision_logic_normal.h b/modules/audio_coding/neteq/decision_logic_normal.h
index 366d103..a718f99 100644
--- a/modules/audio_coding/neteq/decision_logic_normal.h
+++ b/modules/audio_coding/neteq/decision_logic_normal.h
@@ -13,6 +13,7 @@
#include "modules/audio_coding/neteq/decision_logic.h"
#include "rtc_base/constructormagic.h"
+#include "system_wrappers/include/field_trial.h"
#include "typedefs.h" // NOLINT(build/include)
namespace webrtc {
@@ -37,7 +38,9 @@
packet_buffer,
delay_manager,
buffer_level_filter,
- tick_timer) {}
+ tick_timer),
+ postpone_decoding_after_expand_(field_trial::IsEnabled(
+ "WebRTC-Audio-NetEqPostponeDecodingAfterExpand")) {}
protected:
static const int kReinitAfterExpands = 100;
@@ -50,7 +53,8 @@
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder,
- size_t generated_noise_samples) override;
+ size_t generated_noise_samples,
+ size_t cur_size_samples) override;
// Returns the operation to do given that the expected packet is not
// available, but a packet further into the future is at hand.
@@ -100,6 +104,8 @@
// Checks if num_consecutive_expands_ >= kMaxWaitForPacket.
bool MaxWaitForPacket() const;
+ const bool postpone_decoding_after_expand_;
+
RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicNormal);
};
diff --git a/modules/audio_coding/neteq/expand.h b/modules/audio_coding/neteq/expand.h
index 39249f1..4060bd7 100644
--- a/modules/audio_coding/neteq/expand.h
+++ b/modules/audio_coding/neteq/expand.h
@@ -57,7 +57,7 @@
virtual void SetParametersForMergeAfterExpand();
// Returns the mute factor for |channel|.
- int16_t MuteFactor(size_t channel) {
+ int16_t MuteFactor(size_t channel) const {
assert(channel < num_channels_);
return channel_parameters_[channel].mute_factor;
}
diff --git a/modules/audio_coding/neteq/packet_buffer.cc b/modules/audio_coding/neteq/packet_buffer.cc
index dfffebd..9752ec6 100644
--- a/modules/audio_coding/neteq/packet_buffer.cc
+++ b/modules/audio_coding/neteq/packet_buffer.cc
@@ -285,6 +285,18 @@
return num_samples;
}
+bool PacketBuffer::ContainsDtxOrCngPacket(
+ const DecoderDatabase* decoder_database) const {
+ RTC_DCHECK(decoder_database);
+ for (const Packet& packet : buffer_) {
+ if ((packet.frame && packet.frame->IsDtxPacket()) ||
+ decoder_database->IsComfortNoise(packet.payload_type)) {
+ return true;
+ }
+ }
+ return false;
+}
+
void PacketBuffer::BufferStat(int* num_packets, int* max_num_packets) const {
*num_packets = static_cast<int>(buffer_.size());
*max_num_packets = static_cast<int>(max_number_of_packets_);
diff --git a/modules/audio_coding/neteq/packet_buffer.h b/modules/audio_coding/neteq/packet_buffer.h
index c83bf89..c646626 100644
--- a/modules/audio_coding/neteq/packet_buffer.h
+++ b/modules/audio_coding/neteq/packet_buffer.h
@@ -12,6 +12,7 @@
#define MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_
#include "api/optional.h"
+#include "modules/audio_coding/neteq/decoder_database.h"
#include "modules/audio_coding/neteq/packet.h"
#include "modules/include/module_common_types.h"
#include "rtc_base/constructormagic.h"
@@ -121,6 +122,10 @@
// duplicate and redundant packets.
virtual size_t NumSamplesInBuffer(size_t last_decoded_length) const;
+ // Returns true if the packet buffer contains any DTX or CNG packets.
+ virtual bool ContainsDtxOrCngPacket(
+ const DecoderDatabase* decoder_database) const;
+
virtual void BufferStat(int* num_packets, int* max_num_packets) const;
// Static method returning true if |timestamp| is older than |timestamp_limit|