Adding a delay line to NetEq's output
This change adds an optional delay to NetEq's output. Note, this is not
equivalent to increasing the jitter buffer with the same extra length.
Bug: b/156734419
Change-Id: I8b70b6b3bffcfd3da296ccf29853864baa03d6bb
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/175110
Commit-Queue: Henrik Lundin <henrik.lundin@webrtc.org>
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Reviewed-by: Ivo Creusen <ivoc@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#31343}
diff --git a/modules/audio_coding/neteq/neteq_impl.cc b/modules/audio_coding/neteq/neteq_impl.cc
index 0b7510d..163a287 100644
--- a/modules/audio_coding/neteq/neteq_impl.cc
+++ b/modules/audio_coding/neteq/neteq_impl.cc
@@ -140,7 +140,10 @@
10, // Report once every 10 s.
tick_timer_.get()),
no_time_stretching_(config.for_test_no_time_stretching),
- enable_rtx_handling_(config.enable_rtx_handling) {
+ enable_rtx_handling_(config.enable_rtx_handling),
+ output_delay_chain_(
+ rtc::CheckedDivExact(config.extra_output_delay_ms, 10)),
+ output_delay_chain_ms_(config.extra_output_delay_ms) {
RTC_LOG(LS_INFO) << "NetEq config: " << config.ToString();
int fs = config.sample_rate_hz;
if (fs != 8000 && fs != 16000 && fs != 32000 && fs != 48000) {
@@ -255,6 +258,25 @@
last_output_sample_rate_hz_ == 32000 ||
last_output_sample_rate_hz_ == 48000)
<< "Unexpected sample rate " << last_output_sample_rate_hz_;
+
+ if (!output_delay_chain_.empty()) {
+ if (output_delay_chain_empty_) {
+ for (auto& f : output_delay_chain_) {
+ f.CopyFrom(*audio_frame);
+ }
+ output_delay_chain_empty_ = false;
+ delayed_last_output_sample_rate_hz_ = last_output_sample_rate_hz_;
+ } else {
+ RTC_DCHECK_GE(output_delay_chain_ix_, 0);
+ RTC_DCHECK_LT(output_delay_chain_ix_, output_delay_chain_.size());
+ swap(output_delay_chain_[output_delay_chain_ix_], *audio_frame);
+ *muted = audio_frame->muted();
+ output_delay_chain_ix_ =
+ (output_delay_chain_ix_ + 1) % output_delay_chain_.size();
+ delayed_last_output_sample_rate_hz_ = audio_frame->sample_rate_hz();
+ }
+ }
+
return kOK;
}
@@ -297,7 +319,8 @@
rtc::CritScope lock(&crit_sect_);
if (delay_ms >= 0 && delay_ms <= 10000) {
assert(controller_.get());
- return controller_->SetMinimumDelay(delay_ms);
+ return controller_->SetMinimumDelay(
+ std::max(delay_ms - output_delay_chain_ms_, 0));
}
return false;
}
@@ -306,7 +329,8 @@
rtc::CritScope lock(&crit_sect_);
if (delay_ms >= 0 && delay_ms <= 10000) {
assert(controller_.get());
- return controller_->SetMaximumDelay(delay_ms);
+ return controller_->SetMaximumDelay(
+ std::max(delay_ms - output_delay_chain_ms_, 0));
}
return false;
}
@@ -327,7 +351,7 @@
int NetEqImpl::TargetDelayMs() const {
rtc::CritScope lock(&crit_sect_);
RTC_DCHECK(controller_.get());
- return controller_->TargetLevelMs();
+ return controller_->TargetLevelMs() + output_delay_chain_ms_;
}
int NetEqImpl::FilteredCurrentDelayMs() const {
@@ -337,7 +361,8 @@
const int delay_samples =
controller_->GetFilteredBufferLevel() + sync_buffer_->FutureLength();
// The division below will truncate. The return value is in ms.
- return delay_samples / rtc::CheckedDivExact(fs_hz_, 1000);
+ return delay_samples / rtc::CheckedDivExact(fs_hz_, 1000) +
+ output_delay_chain_ms_;
}
int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) {
@@ -351,6 +376,13 @@
stats->jitter_peaks_found = controller_->PeakFound();
stats_->GetNetworkStatistics(fs_hz_, total_samples_in_buffers,
decoder_frame_length_, stats);
+ // Compensate for output delay chain.
+ stats->current_buffer_size_ms += output_delay_chain_ms_;
+ stats->preferred_buffer_size_ms += output_delay_chain_ms_;
+ stats->mean_waiting_time_ms += output_delay_chain_ms_;
+ stats->median_waiting_time_ms += output_delay_chain_ms_;
+ stats->min_waiting_time_ms += output_delay_chain_ms_;
+ stats->max_waiting_time_ms += output_delay_chain_ms_;
return 0;
}
@@ -394,12 +426,19 @@
// which is indicated by returning an empty value.
return absl::nullopt;
}
- return timestamp_scaler_->ToExternal(playout_timestamp_);
+ size_t sum_samples_in_output_delay_chain = 0;
+ for (const auto& audio_frame : output_delay_chain_) {
+ sum_samples_in_output_delay_chain += audio_frame.samples_per_channel();
+ }
+ return timestamp_scaler_->ToExternal(
+ playout_timestamp_ -
+ static_cast<uint32_t>(sum_samples_in_output_delay_chain));
}
int NetEqImpl::last_output_sample_rate_hz() const {
rtc::CritScope lock(&crit_sect_);
- return last_output_sample_rate_hz_;
+ return delayed_last_output_sample_rate_hz_.value_or(
+ last_output_sample_rate_hz_);
}
absl::optional<NetEq::DecoderFormat> NetEqImpl::GetDecoderFormat(
@@ -1988,8 +2027,9 @@
extracted_samples = packet->timestamp - first_timestamp + packet_duration;
RTC_DCHECK(controller_);
- stats_->JitterBufferDelay(packet_duration, waiting_time_ms,
- controller_->TargetLevelMs());
+ stats_->JitterBufferDelay(
+ packet_duration, waiting_time_ms + output_delay_chain_ms_,
+ controller_->TargetLevelMs() + output_delay_chain_ms_);
packet_list->push_back(std::move(*packet)); // Store packet in list.
packet = absl::nullopt; // Ensure it's never used after the move.