Introduce injectable NetEqController interface.
This interface is implemented by the DecisionLogic class, which now contains the DelayManager and DelayPeakDetector.
Bug: webrtc:11005
Change-Id: I4fb69fa359e60831cf153e41f101d5b623749380
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/155176
Reviewed-by: Minyue Li <minyue@webrtc.org>
Reviewed-by: Jakob Ivarsson <jakobi@webrtc.org>
Commit-Queue: Ivo Creusen <ivoc@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29613}
diff --git a/modules/audio_coding/neteq/neteq_impl.cc b/modules/audio_coding/neteq/neteq_impl.cc
index 37036e3..7290e93 100644
--- a/modules/audio_coding/neteq/neteq_impl.cc
+++ b/modules/audio_coding/neteq/neteq_impl.cc
@@ -25,13 +25,10 @@
#include "modules/audio_coding/codecs/cng/webrtc_cng.h"
#include "modules/audio_coding/neteq/accelerate.h"
#include "modules/audio_coding/neteq/background_noise.h"
-#include "modules/audio_coding/neteq/buffer_level_filter.h"
#include "modules/audio_coding/neteq/comfort_noise.h"
#include "modules/audio_coding/neteq/decision_logic.h"
#include "modules/audio_coding/neteq/decoder_database.h"
#include "modules/audio_coding/neteq/defines.h"
-#include "modules/audio_coding/neteq/delay_manager.h"
-#include "modules/audio_coding/neteq/delay_peak_detector.h"
#include "modules/audio_coding/neteq/dtmf_buffer.h"
#include "modules/audio_coding/neteq/dtmf_tone_generator.h"
#include "modules/audio_coding/neteq/expand.h"
@@ -57,6 +54,24 @@
#include "system_wrappers/include/clock.h"
namespace webrtc {
+namespace {
+
+std::unique_ptr<NetEqController> CreateNetEqController(
+ int base_min_delay,
+ int max_packets_in_buffer,
+ bool enable_rtx_handling,
+ bool allow_time_stretching,
+ TickTimer* tick_timer) {
+ NetEqController::Config config;
+ config.base_min_delay_ms = base_min_delay;
+ config.max_packets_in_buffer = max_packets_in_buffer;
+ config.enable_rtx_handling = enable_rtx_handling;
+ config.allow_time_stretching = allow_time_stretching;
+ config.tick_timer = tick_timer;
+ return std::make_unique<DecisionLogic>(std::move(config));
+}
+
+} // namespace
NetEqImpl::Dependencies::Dependencies(
const NetEq::Config& config,
@@ -65,21 +80,18 @@
: clock(clock),
tick_timer(new TickTimer),
stats(new StatisticsCalculator),
- buffer_level_filter(new BufferLevelFilter),
decoder_database(
new DecoderDatabase(decoder_factory, config.codec_pair_id)),
- delay_peak_detector(
- new DelayPeakDetector(tick_timer.get(), config.enable_rtx_handling)),
- delay_manager(DelayManager::Create(config.max_packets_in_buffer,
- config.min_delay_ms,
- config.enable_rtx_handling,
- delay_peak_detector.get(),
- tick_timer.get(),
- stats.get())),
dtmf_buffer(new DtmfBuffer(config.sample_rate_hz)),
dtmf_tone_generator(new DtmfToneGenerator),
packet_buffer(
new PacketBuffer(config.max_packets_in_buffer, tick_timer.get())),
+ neteq_controller(
+ CreateNetEqController(config.min_delay_ms,
+ config.max_packets_in_buffer,
+ config.enable_rtx_handling,
+ !config.for_test_no_time_stretching,
+ tick_timer.get())),
red_payload_splitter(new RedPayloadSplitter),
timestamp_scaler(new TimestampScaler(*decoder_database)),
accelerate_factory(new AccelerateFactory),
@@ -93,10 +105,7 @@
bool create_components)
: clock_(deps.clock),
tick_timer_(std::move(deps.tick_timer)),
- buffer_level_filter_(std::move(deps.buffer_level_filter)),
decoder_database_(std::move(deps.decoder_database)),
- delay_manager_(std::move(deps.delay_manager)),
- delay_peak_detector_(std::move(deps.delay_peak_detector)),
dtmf_buffer_(std::move(deps.dtmf_buffer)),
dtmf_tone_generator_(std::move(deps.dtmf_tone_generator)),
packet_buffer_(std::move(deps.packet_buffer)),
@@ -107,6 +116,7 @@
accelerate_factory_(std::move(deps.accelerate_factory)),
preemptive_expand_factory_(std::move(deps.preemptive_expand_factory)),
stats_(std::move(deps.stats)),
+ controller_(std::move(deps.neteq_controller)),
last_mode_(kModeNormal),
decoded_buffer_length_(kMaxFrameSize),
decoded_buffer_(new int16_t[decoded_buffer_length_]),
@@ -133,11 +143,12 @@
<< "Changing to 8000 Hz.";
fs = 8000;
}
- delay_manager_->SetMaximumDelay(config.max_delay_ms);
+ controller_->SetMaximumDelay(config.max_delay_ms);
fs_hz_ = fs;
fs_mult_ = fs / 8000;
last_output_sample_rate_hz_ = fs;
output_size_samples_ = static_cast<size_t>(kOutputSizeMs * 8 * fs_mult_);
+ controller_->SetSampleRate(fs_hz_, output_size_samples_);
decoder_frame_length_ = 3 * output_size_samples_;
if (create_components) {
SetSampleRateAndChannels(fs, 1); // Default is 1 channel.
@@ -166,7 +177,7 @@
// rtp_header parameter.
// https://bugs.chromium.org/p/webrtc/issues/detail?id=7611
rtc::CritScope lock(&crit_sect_);
- delay_manager_->RegisterEmptyPacket();
+ controller_->RegisterEmptyPacket();
}
namespace {
@@ -279,8 +290,8 @@
bool NetEqImpl::SetMinimumDelay(int delay_ms) {
rtc::CritScope lock(&crit_sect_);
if (delay_ms >= 0 && delay_ms <= 10000) {
- assert(delay_manager_.get());
- return delay_manager_->SetMinimumDelay(delay_ms);
+ assert(controller_.get());
+ return controller_->SetMinimumDelay(delay_ms);
}
return false;
}
@@ -288,8 +299,8 @@
bool NetEqImpl::SetMaximumDelay(int delay_ms) {
rtc::CritScope lock(&crit_sect_);
if (delay_ms >= 0 && delay_ms <= 10000) {
- assert(delay_manager_.get());
- return delay_manager_->SetMaximumDelay(delay_ms);
+ assert(controller_.get());
+ return controller_->SetMaximumDelay(delay_ms);
}
return false;
}
@@ -297,32 +308,28 @@
bool NetEqImpl::SetBaseMinimumDelayMs(int delay_ms) {
rtc::CritScope lock(&crit_sect_);
if (delay_ms >= 0 && delay_ms <= 10000) {
- return delay_manager_->SetBaseMinimumDelay(delay_ms);
+ return controller_->SetBaseMinimumDelay(delay_ms);
}
return false;
}
int NetEqImpl::GetBaseMinimumDelayMs() const {
rtc::CritScope lock(&crit_sect_);
- return delay_manager_->GetBaseMinimumDelay();
+ return controller_->GetBaseMinimumDelay();
}
int NetEqImpl::TargetDelayMs() const {
rtc::CritScope lock(&crit_sect_);
- RTC_DCHECK(delay_manager_.get());
- // The value from TargetLevel() is in number of packets, represented in Q8.
- const size_t target_delay_samples =
- (delay_manager_->TargetLevel() * decoder_frame_length_) >> 8;
- return static_cast<int>(target_delay_samples) /
- rtc::CheckedDivExact(fs_hz_, 1000);
+ RTC_DCHECK(controller_.get());
+ return controller_->TargetLevelMs();
}
int NetEqImpl::FilteredCurrentDelayMs() const {
rtc::CritScope lock(&crit_sect_);
// Sum up the filtered packet buffer level with the future length of the sync
// buffer.
- const int delay_samples = buffer_level_filter_->filtered_current_level() +
- sync_buffer_->FutureLength();
+ const int delay_samples =
+ controller_->GetFilteredBufferLevel() + sync_buffer_->FutureLength();
// The division below will truncate. The return value is in ms.
return delay_samples / rtc::CheckedDivExact(fs_hz_, 1000);
}
@@ -333,12 +340,9 @@
const size_t total_samples_in_buffers =
packet_buffer_->NumSamplesInBuffer(decoder_frame_length_) +
sync_buffer_->FutureLength();
- assert(delay_manager_.get());
- assert(decision_logic_.get());
- const int ms_per_packet = rtc::dchecked_cast<int>(
- decision_logic_->packet_length_samples() / (fs_hz_ / 1000));
- stats_->PopulateDelayManagerStats(ms_per_packet, *delay_manager_.get(),
- stats);
+ assert(controller_.get());
+ stats->preferred_buffer_size_ms = controller_->TargetLevelMs();
+ stats->jitter_peaks_found = controller_->PeakFound();
stats_->GetNetworkStatistics(fs_hz_, total_samples_in_buffers,
decoder_frame_length_, stats);
return 0;
@@ -712,38 +716,27 @@
}
}
- // TODO(hlundin): Move this code to DelayManager class.
const DecoderDatabase::DecoderInfo* dec_info =
decoder_database_->GetDecoderInfo(main_payload_type);
assert(dec_info); // Already checked that the payload type is known.
- delay_manager_->LastDecodedWasCngOrDtmf(dec_info->IsComfortNoise() ||
- dec_info->IsDtmf());
- if (delay_manager_->last_pack_cng_or_dtmf() == 0) {
- // Calculate the total speech length carried in each packet.
- if (number_of_primary_packets > 0) {
- const size_t packet_length_samples =
- number_of_primary_packets * decoder_frame_length_;
- if (packet_length_samples != decision_logic_->packet_length_samples()) {
- decision_logic_->set_packet_length_samples(packet_length_samples);
- delay_manager_->SetPacketAudioLength(
- rtc::dchecked_cast<int>((1000 * packet_length_samples) / fs_hz_));
- }
- }
- // Update statistics.
- if ((enable_rtx_handling_ || (int32_t)(main_timestamp - timestamp_) >= 0) &&
- !new_codec_) {
- // Only update statistics if incoming packet is not older than last played
- // out packet or RTX handling is enabled, and if new codec flag is not
- // set.
- delay_manager_->Update(main_sequence_number, main_timestamp, fs_hz_);
- }
- } else if (delay_manager_->last_pack_cng_or_dtmf() == -1) {
- // This is first "normal" packet after CNG or DTMF.
- // Reset packet time counter and measure time until next packet,
- // but don't update statistics.
- delay_manager_->set_last_pack_cng_or_dtmf(0);
- delay_manager_->ResetPacketIatCount();
+ const bool last_cng_or_dtmf =
+ dec_info->IsComfortNoise() || dec_info->IsDtmf();
+ const size_t packet_length_samples =
+ number_of_primary_packets * decoder_frame_length_;
+ // Only update statistics if incoming packet is not older than last played
+ // out packet or RTX handling is enabled, and if new codec flag is not
+ // set.
+ const bool should_update_stats =
+ (enable_rtx_handling_ ||
+ static_cast<int32_t>(main_timestamp - timestamp_) >= 0) &&
+ !new_codec_;
+
+ auto relative_delay = controller_->PacketArrived(
+ last_cng_or_dtmf, packet_length_samples, should_update_stats,
+ main_sequence_number, main_timestamp, fs_hz_);
+ if (relative_delay) {
+ stats_->RelativePacketArrivalDelay(relative_delay.value());
}
return 0;
}
@@ -1018,10 +1011,10 @@
uint64_t generated_noise_samples =
generated_noise_stopwatch_ ? (generated_noise_stopwatch_->ElapsedTicks() -
1) * output_size_samples_ +
- decision_logic_->noise_fast_forward()
+ controller_->noise_fast_forward()
: 0;
- if (decision_logic_->CngRfc3389On() || last_mode_ == kModeRfc3389Cng) {
+ if (controller_->CngRfc3389On() || last_mode_ == kModeRfc3389Cng) {
// Because of timestamp peculiarities, we have to "manually" disallow using
// a CNG packet with the same timestamp as the one that was last played.
// This can happen when using redundancy and will cause the timing to shift.
@@ -1050,7 +1043,7 @@
last_mode_ == kModePreemptiveExpandSuccess ||
last_mode_ == kModePreemptiveExpandLowEnergy) {
// Subtract (samples_left + output_size_samples_) from sampleMemory.
- decision_logic_->AddSampleMemory(
+ controller_->AddSampleMemory(
-(samples_left + rtc::dchecked_cast<int>(output_size_samples_)));
}
@@ -1067,11 +1060,31 @@
generated_noise_samples =
generated_noise_stopwatch_
? generated_noise_stopwatch_->ElapsedTicks() * output_size_samples_ +
- decision_logic_->noise_fast_forward()
+ controller_->noise_fast_forward()
: 0;
- *operation = decision_logic_->GetDecision(
- *sync_buffer_, *expand_, decoder_frame_length_, packet, last_mode_,
- *play_dtmf, generated_noise_samples, &reset_decoder_);
+ NetEqController::NetEqStatus status;
+ status.packet_buffer_info.dtx_or_cng =
+ packet_buffer_->ContainsDtxOrCngPacket(decoder_database_.get());
+ status.packet_buffer_info.num_samples =
+ packet_buffer_->NumSamplesInBuffer(decoder_frame_length_);
+ status.packet_buffer_info.span_samples = packet_buffer_->GetSpanSamples(
+ decoder_frame_length_, last_output_sample_rate_hz_, true);
+ status.packet_buffer_info.span_samples_no_dtx =
+ packet_buffer_->GetSpanSamples(decoder_frame_length_,
+ last_output_sample_rate_hz_, false);
+ status.packet_buffer_info.num_packets = packet_buffer_->NumPacketsInBuffer();
+ status.target_timestamp = sync_buffer_->end_timestamp();
+ status.expand_mutefactor = expand_->MuteFactor(0);
+ status.last_packet_samples = decoder_frame_length_;
+ status.last_mode = last_mode_;
+ status.play_dtmf = *play_dtmf;
+ status.generated_noise_samples = generated_noise_samples;
+ if (packet) {
+ status.next_packet = {
+ packet->timestamp, packet->frame && packet->frame->IsDtxPacket(),
+ decoder_database_->IsComfortNoise(packet->payload_type)};
+ }
+ *operation = controller_->GetDecision(status, &reset_decoder_);
// Disallow time stretching if this packet is DTX, because such a decision may
// be based on earlier buffer level estimate, as we do not update buffer level
@@ -1097,7 +1110,7 @@
return 0;
}
- decision_logic_->ExpandDecision(*operation);
+ controller_->ExpandDecision(*operation);
// Check conditions for reset.
if (new_codec_ || *operation == kUndefined) {
@@ -1125,9 +1138,7 @@
sync_buffer_->IncreaseEndTimestamp(timestamp_ - end_timestamp);
end_timestamp = timestamp_;
new_codec_ = false;
- decision_logic_->SoftReset();
- buffer_level_filter_->Reset();
- delay_manager_->Reset();
+ controller_->SoftReset();
stats_->ResetMcu();
}
@@ -1153,7 +1164,7 @@
generated_noise_stopwatch_
? generated_noise_stopwatch_->ElapsedTicks() *
output_size_samples_ +
- decision_logic_->noise_fast_forward()
+ controller_->noise_fast_forward()
: 0;
if (generated_noise_samples > 0 && last_mode_ != kModeDtmf) {
// Make a jump in timestamp due to the recently played comfort noise.
@@ -1169,8 +1180,8 @@
// In order to do an accelerate we need at least 30 ms of audio data.
if (samples_left >= static_cast<int>(samples_30_ms)) {
// Already have enough data, so we do not need to extract any more.
- decision_logic_->set_sample_memory(samples_left);
- decision_logic_->set_prev_time_scale(true);
+ controller_->set_sample_memory(samples_left);
+ controller_->set_prev_time_scale(true);
return 0;
} else if (samples_left >= static_cast<int>(samples_10_ms) &&
decoder_frame_length_ >= samples_30_ms) {
@@ -1201,8 +1212,8 @@
// Already have enough data, so we do not need to extract any more.
// Or, avoid decoding more data as it might overflow the playout buffer.
// Still try preemptive expand, though.
- decision_logic_->set_sample_memory(samples_left);
- decision_logic_->set_prev_time_scale(true);
+ controller_->set_sample_memory(samples_left);
+ controller_->set_prev_time_scale(true);
return 0;
}
if (samples_left < static_cast<int>(samples_20_ms) &&
@@ -1228,7 +1239,7 @@
int extracted_samples = 0;
if (packet) {
sync_buffer_->IncreaseEndTimestamp(packet->timestamp - end_timestamp);
- if (decision_logic_->CngOff()) {
+ if (controller_->CngOff()) {
// Adjustment of timestamp only corresponds to an actual packet loss
// if comfort noise is not played. If comfort noise was just played,
// this adjustment of timestamp is only done to get back in sync with the
@@ -1238,7 +1249,7 @@
if (*operation != kRfc3389Cng) {
// We are about to decode and use a non-CNG packet.
- decision_logic_->SetCngOff();
+ controller_->SetCngOff();
}
extracted_samples = ExtractPackets(required_samples, packet_list);
@@ -1249,8 +1260,8 @@
if (*operation == kAccelerate || *operation == kFastAccelerate ||
*operation == kPreemptiveExpand) {
- decision_logic_->set_sample_memory(samples_left + extracted_samples);
- decision_logic_->set_prev_time_scale(true);
+ controller_->set_sample_memory(samples_left + extracted_samples);
+ controller_->set_prev_time_scale(true);
}
if (*operation == kAccelerate || *operation == kFastAccelerate) {
@@ -2058,13 +2069,8 @@
decoded_buffer_length_ = kMaxFrameSize * channels;
decoded_buffer_.reset(new int16_t[decoded_buffer_length_]);
}
-
- // Create DecisionLogic if it is not created yet, then communicate new sample
- // rate and output size to DecisionLogic object.
- if (!decision_logic_.get()) {
- CreateDecisionLogic();
- }
- decision_logic_->SetSampleRate(fs_hz_, output_size_samples_);
+ RTC_CHECK(controller_) << "Unexpectedly found no NetEqController";
+ controller_->SetSampleRate(fs_hz_, output_size_samples_);
}
NetEqImpl::OutputType NetEqImpl::LastOutputType() {
@@ -2085,11 +2091,4 @@
return OutputType::kNormalSpeech;
}
}
-
-void NetEqImpl::CreateDecisionLogic() {
- decision_logic_.reset(DecisionLogic::Create(
- fs_hz_, output_size_samples_, no_time_stretching_,
- decoder_database_.get(), *packet_buffer_.get(), delay_manager_.get(),
- buffer_level_filter_.get(), tick_timer_.get()));
-}
} // namespace webrtc