Introduce injectable NetEqController interface.
This interface is implemented by the DecisionLogic class, which now contains the DelayManager and DelayPeakDetector.
Bug: webrtc:11005
Change-Id: I4fb69fa359e60831cf153e41f101d5b623749380
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/155176
Reviewed-by: Minyue Li <minyue@webrtc.org>
Reviewed-by: Jakob Ivarsson <jakobi@webrtc.org>
Commit-Queue: Ivo Creusen <ivoc@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29613}
diff --git a/modules/audio_coding/neteq/decision_logic.cc b/modules/audio_coding/neteq/decision_logic.cc
index fefad68..1c887b1 100644
--- a/modules/audio_coding/neteq/decision_logic.cc
+++ b/modules/audio_coding/neteq/decision_logic.cc
@@ -16,12 +16,7 @@
#include <string>
#include "absl/types/optional.h"
-#include "modules/audio_coding/neteq/buffer_level_filter.h"
-#include "modules/audio_coding/neteq/decoder_database.h"
-#include "modules/audio_coding/neteq/delay_manager.h"
-#include "modules/audio_coding/neteq/expand.h"
#include "modules/audio_coding/neteq/packet_buffer.h"
-#include "modules/audio_coding/neteq/sync_buffer.h"
#include "rtc_base/checks.h"
#include "rtc_base/experiments/field_trial_parser.h"
#include "rtc_base/logging.h"
@@ -37,48 +32,23 @@
namespace webrtc {
-DecisionLogic* DecisionLogic::Create(int fs_hz,
- size_t output_size_samples,
- bool disallow_time_stretching,
- DecoderDatabase* decoder_database,
- const PacketBuffer& packet_buffer,
- DelayManager* delay_manager,
- BufferLevelFilter* buffer_level_filter,
- const TickTimer* tick_timer) {
- return new DecisionLogic(fs_hz, output_size_samples, disallow_time_stretching,
- decoder_database, packet_buffer, delay_manager,
- buffer_level_filter, tick_timer);
-}
-
-DecisionLogic::DecisionLogic(int fs_hz,
- size_t output_size_samples,
- bool disallow_time_stretching,
- DecoderDatabase* decoder_database,
- const PacketBuffer& packet_buffer,
- DelayManager* delay_manager,
- BufferLevelFilter* buffer_level_filter,
- const TickTimer* tick_timer)
- : decoder_database_(decoder_database),
- packet_buffer_(packet_buffer),
- delay_manager_(delay_manager),
- buffer_level_filter_(buffer_level_filter),
- tick_timer_(tick_timer),
- cng_state_(kCngOff),
- packet_length_samples_(0),
- sample_memory_(0),
- prev_time_scale_(false),
- disallow_time_stretching_(disallow_time_stretching),
+DecisionLogic::DecisionLogic(NetEqController::Config config)
+ : delay_peak_detector_(config.tick_timer, config.enable_rtx_handling),
+ delay_manager_(DelayManager::Create(config.max_packets_in_buffer,
+ config.base_min_delay_ms,
+ config.enable_rtx_handling,
+ &delay_peak_detector_,
+ config.tick_timer)),
+ tick_timer_(config.tick_timer),
+ disallow_time_stretching_(!config.allow_time_stretching),
timescale_countdown_(
tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)),
- num_consecutive_expands_(0),
- time_stretched_cn_samples_(0),
estimate_dtx_delay_("estimate_dtx_delay", false),
time_stretch_cn_("time_stretch_cn", false),
target_level_window_ms_("target_level_window",
kDefaultTargetLevelWindowMs,
0,
absl::nullopt) {
- SetSampleRate(fs_hz, output_size_samples);
const std::string field_trial_name =
field_trial::FindFullName("WebRTC-Audio-NetEqDecisionLogicSettings");
ParseFieldTrial(
@@ -110,6 +80,8 @@
timescale_countdown_ =
tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1);
time_stretched_cn_samples_ = 0;
+ delay_manager_->Reset();
+ buffer_level_filter_.Reset();
}
void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) {
@@ -119,69 +91,54 @@
output_size_samples_ = output_size_samples;
}
-Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
- const Expand& expand,
- size_t decoder_frame_length,
- const Packet* next_packet,
- Modes prev_mode,
- bool play_dtmf,
- size_t generated_noise_samples,
+Operations DecisionLogic::GetDecision(const NetEqStatus& status,
bool* reset_decoder) {
// If last mode was CNG (or Expand, since this could be covering up for
// a lost CNG packet), remember that CNG is on. This is needed if comfort
// noise is interrupted by DTMF.
- if (prev_mode == kModeRfc3389Cng) {
+ if (status.last_mode == kModeRfc3389Cng) {
cng_state_ = kCngRfc3389On;
- } else if (prev_mode == kModeCodecInternalCng) {
+ } else if (status.last_mode == kModeCodecInternalCng) {
cng_state_ = kCngInternalOn;
}
- size_t cur_size_samples =
- estimate_dtx_delay_
- ? packet_buffer_.GetSpanSamples(decoder_frame_length, sample_rate_,
- true)
- : packet_buffer_.NumSamplesInBuffer(decoder_frame_length);
+ size_t cur_size_samples = estimate_dtx_delay_
+ ? status.packet_buffer_info.span_samples
+ : status.packet_buffer_info.num_samples;
prev_time_scale_ =
- prev_time_scale_ && (prev_mode == kModeAccelerateSuccess ||
- prev_mode == kModeAccelerateLowEnergy ||
- prev_mode == kModePreemptiveExpandSuccess ||
- prev_mode == kModePreemptiveExpandLowEnergy);
+ prev_time_scale_ && (status.last_mode == kModeAccelerateSuccess ||
+ status.last_mode == kModeAccelerateLowEnergy ||
+ status.last_mode == kModePreemptiveExpandSuccess ||
+ status.last_mode == kModePreemptiveExpandLowEnergy);
// Do not update buffer history if currently playing CNG since it will bias
// the filtered buffer level.
- if (prev_mode != kModeRfc3389Cng && prev_mode != kModeCodecInternalCng &&
- !(next_packet && next_packet->frame &&
- next_packet->frame->IsDtxPacket() && !estimate_dtx_delay_)) {
+ if (status.last_mode != kModeRfc3389Cng &&
+ status.last_mode != kModeCodecInternalCng &&
+ !(status.next_packet && status.next_packet->is_dtx &&
+ !estimate_dtx_delay_)) {
FilterBufferLevel(cur_size_samples);
}
// Guard for errors, to avoid getting stuck in error mode.
- if (prev_mode == kModeError) {
- if (!next_packet) {
+ if (status.last_mode == kModeError) {
+ if (!status.next_packet) {
return kExpand;
} else {
return kUndefined; // Use kUndefined to flag for a reset.
}
}
- uint32_t target_timestamp = sync_buffer.end_timestamp();
- uint32_t available_timestamp = 0;
- bool is_cng_packet = false;
- if (next_packet) {
- available_timestamp = next_packet->timestamp;
- is_cng_packet =
- decoder_database_->IsComfortNoise(next_packet->payload_type);
- }
-
- if (is_cng_packet) {
- return CngOperation(prev_mode, target_timestamp, available_timestamp,
- generated_noise_samples);
+ if (status.next_packet && status.next_packet->is_cng) {
+ return CngOperation(status.last_mode, status.target_timestamp,
+ status.next_packet->timestamp,
+ status.generated_noise_samples);
}
// Handle the case with no packet at all available (except maybe DTMF).
- if (!next_packet) {
- return NoPacket(play_dtmf);
+ if (!status.next_packet) {
+ return NoPacket(status.play_dtmf);
}
// If the expand period was very long, reset NetEQ since it is likely that the
@@ -198,26 +155,30 @@
// if the mute factor is low enough (otherwise the expansion was short enough
// to not be noticable).
// Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
- size_t current_span = packet_buffer_.GetSpanSamples(
- decoder_frame_length, sample_rate_, estimate_dtx_delay_);
- if ((prev_mode == kModeExpand || prev_mode == kModeCodecPlc) &&
- expand.MuteFactor(0) < 16384 / 2 &&
+ const size_t current_span =
+ estimate_dtx_delay_ ? status.packet_buffer_info.span_samples
+ : status.packet_buffer_info.span_samples_no_dtx;
+ if ((status.last_mode == kModeExpand || status.last_mode == kModeCodecPlc) &&
+ status.expand_mutefactor < 16384 / 2 &&
current_span<static_cast<size_t>(delay_manager_->TargetLevel() *
packet_length_samples_ *
kPostponeDecodingLevel / 100)>> 8 &&
- !packet_buffer_.ContainsDtxOrCngPacket(decoder_database_)) {
+ !status.packet_buffer_info.dtx_or_cng) {
return kExpand;
}
const uint32_t five_seconds_samples = static_cast<uint32_t>(5 * sample_rate_);
// Check if the required packet is available.
- if (target_timestamp == available_timestamp) {
- return ExpectedPacketAvailable(prev_mode, play_dtmf);
- } else if (!PacketBuffer::IsObsoleteTimestamp(
- available_timestamp, target_timestamp, five_seconds_samples)) {
- return FuturePacketAvailable(decoder_frame_length, prev_mode,
- target_timestamp, available_timestamp,
- play_dtmf, generated_noise_samples);
+ if (status.target_timestamp == status.next_packet->timestamp) {
+ return ExpectedPacketAvailable(status.last_mode, status.play_dtmf);
+ } else if (!PacketBuffer::IsObsoleteTimestamp(status.next_packet->timestamp,
+ status.target_timestamp,
+ five_seconds_samples)) {
+ return FuturePacketAvailable(
+ status.last_packet_samples, status.last_mode, status.target_timestamp,
+ status.next_packet->timestamp, status.play_dtmf,
+ status.generated_noise_samples, status.packet_buffer_info.span_samples,
+ status.packet_buffer_info.num_packets);
} else {
// This implies that available_timestamp < target_timestamp, which can
// happen when a new stream or codec is received. Signal for a reset.
@@ -233,8 +194,40 @@
}
}
+absl::optional<int> DecisionLogic::PacketArrived(bool last_cng_or_dtmf,
+ size_t packet_length_samples,
+ bool should_update_stats,
+ uint16_t main_sequence_number,
+ uint32_t main_timestamp,
+ int fs_hz) {
+ delay_manager_->LastDecodedWasCngOrDtmf(last_cng_or_dtmf);
+ absl::optional<int> relative_delay;
+ if (delay_manager_->last_pack_cng_or_dtmf() == 0) {
+ // Calculate the total speech length carried in each packet.
+ if (packet_length_samples > 0 &&
+ packet_length_samples != packet_length_samples_) {
+ packet_length_samples_ = packet_length_samples;
+ delay_manager_->SetPacketAudioLength(
+ rtc::dchecked_cast<int>((1000 * packet_length_samples) / fs_hz));
+ }
+
+ // Update statistics.
+ if (should_update_stats) {
+ relative_delay =
+ delay_manager_->Update(main_sequence_number, main_timestamp, fs_hz);
+ }
+ } else if (delay_manager_->last_pack_cng_or_dtmf() == -1) {
+ // This is first "normal" packet after CNG or DTMF.
+ // Reset packet time counter and measure time until next packet,
+ // but don't update statistics.
+ delay_manager_->set_last_pack_cng_or_dtmf(0);
+ delay_manager_->ResetPacketIatCount();
+ }
+ return relative_delay;
+}
+
void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples) {
- buffer_level_filter_->SetTargetBufferLevel(
+ buffer_level_filter_.SetTargetBufferLevel(
delay_manager_->base_target_level());
int time_stretched_samples = time_stretched_cn_samples_;
@@ -243,7 +236,7 @@
timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval);
}
- buffer_level_filter_->Update(buffer_size_samples, time_stretched_samples);
+ buffer_level_filter_.Update(buffer_size_samples, time_stretched_samples);
prev_time_scale_ = false;
time_stretched_cn_samples_ = 0;
}
@@ -307,7 +300,7 @@
int buffer_level_packets = 0;
if (packet_length_samples_ > 0) {
buffer_level_packets =
- ((1 << 8) * buffer_level_filter_->filtered_current_level()) /
+ ((1 << 8) * buffer_level_filter_.filtered_current_level()) /
packet_length_samples_;
}
if (buffer_level_packets >= high_limit << 2)
@@ -328,7 +321,9 @@
uint32_t target_timestamp,
uint32_t available_timestamp,
bool play_dtmf,
- size_t generated_noise_samples) {
+ size_t generated_noise_samples,
+ size_t span_samples_in_packet_buffer,
+ size_t num_packets_in_packet_buffer) {
// Required packet is not available, but a future packet is.
// Check if we should continue with an ongoing expand because the new packet
// is too far into the future.
@@ -353,9 +348,8 @@
if (prev_mode == kModeRfc3389Cng || prev_mode == kModeCodecInternalCng) {
size_t cur_size_samples =
estimate_dtx_delay_
- ? cur_size_samples = packet_buffer_.GetSpanSamples(
- decoder_frame_length, sample_rate_, true)
- : packet_buffer_.NumPacketsInBuffer() * decoder_frame_length;
+ ? cur_size_samples = span_samples_in_packet_buffer
+ : num_packets_in_packet_buffer * decoder_frame_length;
// Target level is in number of packets in Q8.
const size_t target_level_samples =
(delay_manager_->TargetLevel() * packet_length_samples_) >> 8;
@@ -411,7 +405,7 @@
int buffer_level_packets = 0;
if (packet_length_samples_ > 0) {
buffer_level_packets =
- ((1 << 8) * buffer_level_filter_->filtered_current_level()) /
+ ((1 << 8) * buffer_level_filter_.filtered_current_level()) /
packet_length_samples_;
}
return buffer_level_packets <= delay_manager_->TargetLevel();