NetEq: Move counting of generated CNG samples from DecisionLogic
The counting is moved to NetEqImpl, and the new counter is realized as a
Stopwatch object. The DecisionLogic class still has to maintain record
of when the CNG period is shortened, in order to reduce the delay. This
is recorded in a new noise_fast_forward_ member in DecisionLogic.
BUG=webrtc:5608
Review-Url: https://codereview.webrtc.org/1914303004
Cr-Commit-Position: refs/heads/master@{#12608}
diff --git a/webrtc/modules/audio_coding/neteq/decision_logic.cc b/webrtc/modules/audio_coding/neteq/decision_logic.cc
index b702e6d..8cef2c9 100644
--- a/webrtc/modules/audio_coding/neteq/decision_logic.cc
+++ b/webrtc/modules/audio_coding/neteq/decision_logic.cc
@@ -67,7 +67,6 @@
delay_manager_(delay_manager),
buffer_level_filter_(buffer_level_filter),
cng_state_(kCngOff),
- generated_noise_samples_(0),
packet_length_samples_(0),
sample_memory_(0),
prev_time_scale_(false),
@@ -80,7 +79,7 @@
void DecisionLogic::Reset() {
cng_state_ = kCngOff;
- generated_noise_samples_ = 0;
+ noise_fast_forward_ = 0;
packet_length_samples_ = 0;
sample_memory_ = 0;
prev_time_scale_ = false;
@@ -107,15 +106,15 @@
size_t decoder_frame_length,
const RTPHeader* packet_header,
Modes prev_mode,
- bool play_dtmf, bool* reset_decoder) {
+ bool play_dtmf,
+ size_t generated_noise_samples,
+ bool* reset_decoder) {
if (prev_mode == kModeRfc3389Cng ||
prev_mode == kModeCodecInternalCng ||
prev_mode == kModeExpand) {
// If last mode was CNG (or Expand, since this could be covering up for
- // a lost CNG packet), increase the |generated_noise_samples_| counter.
- generated_noise_samples_ += output_size_samples_;
- // Remember that CNG is on. This is needed if comfort noise is interrupted
- // by DTMF.
+ // a lost CNG packet), remember that CNG is on. This is needed if comfort
+ // noise is interrupted by DTMF.
if (prev_mode == kModeRfc3389Cng) {
cng_state_ = kCngRfc3389On;
} else if (prev_mode == kModeCodecInternalCng) {
@@ -139,7 +138,7 @@
return GetDecisionSpecialized(sync_buffer, expand, decoder_frame_length,
packet_header, prev_mode, play_dtmf,
- reset_decoder);
+ reset_decoder, generated_noise_samples);
}
void DecisionLogic::ExpandDecision(Operations operation) {
diff --git a/webrtc/modules/audio_coding/neteq/decision_logic.h b/webrtc/modules/audio_coding/neteq/decision_logic.h
index 72121b7..18f424a 100644
--- a/webrtc/modules/audio_coding/neteq/decision_logic.h
+++ b/webrtc/modules/audio_coding/neteq/decision_logic.h
@@ -79,6 +79,7 @@
const RTPHeader* packet_header,
Modes prev_mode,
bool play_dtmf,
+ size_t generated_noise_samples,
bool* reset_decoder);
// These methods test the |cng_state_| for different conditions.
@@ -101,10 +102,7 @@
// Accessors and mutators.
void set_sample_memory(int32_t value) { sample_memory_ = value; }
- size_t generated_noise_samples() const { return generated_noise_samples_; }
- void set_generated_noise_samples(size_t value) {
- generated_noise_samples_ = value;
- }
+ size_t noise_fast_forward() const { return noise_fast_forward_; }
size_t packet_length_samples() const { return packet_length_samples_; }
void set_packet_length_samples(size_t value) {
packet_length_samples_ = value;
@@ -138,7 +136,8 @@
const RTPHeader* packet_header,
Modes prev_mode,
bool play_dtmf,
- bool* reset_decoder) = 0;
+ bool* reset_decoder,
+ size_t generated_noise_samples) = 0;
// Updates the |buffer_level_filter_| with the current buffer level
// |buffer_size_packets|.
@@ -152,7 +151,7 @@
size_t output_size_samples_;
CngState cng_state_; // Remember if comfort noise is interrupted by other
// event (e.g., DTMF).
- size_t generated_noise_samples_;
+ size_t noise_fast_forward_ = 0;
size_t packet_length_samples_;
int sample_memory_;
bool prev_time_scale_;
diff --git a/webrtc/modules/audio_coding/neteq/decision_logic_fax.cc b/webrtc/modules/audio_coding/neteq/decision_logic_fax.cc
index ddea644..aace402 100644
--- a/webrtc/modules/audio_coding/neteq/decision_logic_fax.cc
+++ b/webrtc/modules/audio_coding/neteq/decision_logic_fax.cc
@@ -26,7 +26,8 @@
const RTPHeader* packet_header,
Modes prev_mode,
bool play_dtmf,
- bool* reset_decoder) {
+ bool* reset_decoder,
+ size_t generated_noise_samples) {
assert(playout_mode_ == kPlayoutFax || playout_mode_ == kPlayoutOff);
uint32_t target_timestamp = sync_buffer.end_timestamp();
uint32_t available_timestamp = 0;
@@ -37,7 +38,7 @@
decoder_database_->IsComfortNoise(packet_header->payloadType);
}
if (is_cng_packet) {
- if (static_cast<int32_t>((generated_noise_samples_ + target_timestamp)
+ if (static_cast<int32_t>((generated_noise_samples + target_timestamp)
- available_timestamp) >= 0) {
// Time to play this packet now.
return kRfc3389Cng;
@@ -70,13 +71,13 @@
} else if (target_timestamp == available_timestamp) {
return kNormal;
} else {
- if (static_cast<int32_t>((generated_noise_samples_ + target_timestamp)
+ if (static_cast<int32_t>((generated_noise_samples + target_timestamp)
- available_timestamp) >= 0) {
return kNormal;
} else {
// If currently playing comfort noise, continue with that. Do not
- // increase the timestamp counter since generated_noise_samples_ will
- // be increased.
+ // increase the timestamp counter since generated_noise_stopwatch_ in
+ // NetEqImpl will take care of the time-keeping.
if (cng_state_ == kCngRfc3389On) {
return kRfc3389CngNoPacket;
} else if (cng_state_ == kCngInternalOn) {
diff --git a/webrtc/modules/audio_coding/neteq/decision_logic_fax.h b/webrtc/modules/audio_coding/neteq/decision_logic_fax.h
index 204dcc1..fbb0cc0 100644
--- a/webrtc/modules/audio_coding/neteq/decision_logic_fax.h
+++ b/webrtc/modules/audio_coding/neteq/decision_logic_fax.h
@@ -50,7 +50,8 @@
const RTPHeader* packet_header,
Modes prev_mode,
bool play_dtmf,
- bool* reset_decoder) override;
+ bool* reset_decoder,
+ size_t generated_noise_samples) override;
private:
RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicFax);
diff --git a/webrtc/modules/audio_coding/neteq/decision_logic_normal.cc b/webrtc/modules/audio_coding/neteq/decision_logic_normal.cc
index 0252d1c..37a75d7 100644
--- a/webrtc/modules/audio_coding/neteq/decision_logic_normal.cc
+++ b/webrtc/modules/audio_coding/neteq/decision_logic_normal.cc
@@ -31,7 +31,8 @@
const RTPHeader* packet_header,
Modes prev_mode,
bool play_dtmf,
- bool* reset_decoder) {
+ bool* reset_decoder,
+ size_t generated_noise_samples) {
assert(playout_mode_ == kPlayoutOn || playout_mode_ == kPlayoutStreaming);
// Guard for errors, to avoid getting stuck in error mode.
if (prev_mode == kModeError) {
@@ -52,7 +53,8 @@
}
if (is_cng_packet) {
- return CngOperation(prev_mode, target_timestamp, available_timestamp);
+ return CngOperation(prev_mode, target_timestamp, available_timestamp,
+ generated_noise_samples);
}
// Handle the case with no packet at all available (except maybe DTMF).
@@ -76,7 +78,8 @@
available_timestamp, target_timestamp, five_seconds_samples)) {
return FuturePacketAvailable(sync_buffer, expand, decoder_frame_length,
prev_mode, target_timestamp,
- available_timestamp, play_dtmf);
+ available_timestamp, play_dtmf,
+ generated_noise_samples);
} else {
// This implies that available_timestamp < target_timestamp, which can
// happen when a new stream or codec is received. Signal for a reset.
@@ -86,10 +89,11 @@
Operations DecisionLogicNormal::CngOperation(Modes prev_mode,
uint32_t target_timestamp,
- uint32_t available_timestamp) {
+ uint32_t available_timestamp,
+ size_t generated_noise_samples) {
// Signed difference between target and available timestamp.
int32_t timestamp_diff = static_cast<int32_t>(
- static_cast<uint32_t>(generated_noise_samples_ + target_timestamp) -
+ static_cast<uint32_t>(generated_noise_samples + target_timestamp) -
available_timestamp);
int32_t optimal_level_samp = static_cast<int32_t>(
(delay_manager_->TargetLevel() * packet_length_samples_) >> 8);
@@ -97,9 +101,9 @@
if (excess_waiting_time_samp > optimal_level_samp / 2) {
// The waiting time for this packet will be longer than 1.5
- // times the wanted buffer delay. Advance the clock to cut
+ // times the wanted buffer delay. Apply fast-forward to cut the
// waiting time down to the optimal.
- generated_noise_samples_ += excess_waiting_time_samp;
+ noise_fast_forward_ += excess_waiting_time_samp;
timestamp_diff += excess_waiting_time_samp;
}
@@ -109,6 +113,7 @@
return kRfc3389CngNoPacket;
} else {
// Otherwise, go for the CNG packet now.
+ noise_fast_forward_ = 0;
return kRfc3389Cng;
}
}
@@ -153,7 +158,8 @@
Modes prev_mode,
uint32_t target_timestamp,
uint32_t available_timestamp,
- bool play_dtmf) {
+ bool play_dtmf,
+ size_t generated_noise_samples) {
// Required packet is not available, but a future packet is.
// Check if we should continue with an ongoing expand because the new packet
// is too far into the future.
@@ -184,7 +190,7 @@
// safety precaution), but make sure that the number of samples in buffer
// is no higher than 4 times the optimal level. (Note that TargetLevel()
// is in Q8.)
- if (static_cast<uint32_t>(generated_noise_samples_ + target_timestamp) >=
+ if (static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
available_timestamp ||
cur_size_samples >
((delay_manager_->TargetLevel() * packet_length_samples_) >> 8) *
diff --git a/webrtc/modules/audio_coding/neteq/decision_logic_normal.h b/webrtc/modules/audio_coding/neteq/decision_logic_normal.h
index 7465906..57aa13b 100644
--- a/webrtc/modules/audio_coding/neteq/decision_logic_normal.h
+++ b/webrtc/modules/audio_coding/neteq/decision_logic_normal.h
@@ -54,7 +54,8 @@
const RTPHeader* packet_header,
Modes prev_mode,
bool play_dtmf,
- bool* reset_decoder) override;
+ bool* reset_decoder,
+ size_t generated_noise_samples) override;
// Returns the operation to do given that the expected packet is not
// available, but a packet further into the future is at hand.
@@ -65,7 +66,8 @@
Modes prev_mode,
uint32_t target_timestamp,
uint32_t available_timestamp,
- bool play_dtmf);
+ bool play_dtmf,
+ size_t generated_noise_samples);
// Returns the operation to do given that the expected packet is available.
virtual Operations ExpectedPacketAvailable(Modes prev_mode, bool play_dtmf);
@@ -77,8 +79,10 @@
private:
// Returns the operation given that the next available packet is a comfort
// noise payload (RFC 3389 only, not codec-internal).
- Operations CngOperation(Modes prev_mode, uint32_t target_timestamp,
- uint32_t available_timestamp);
+ Operations CngOperation(Modes prev_mode,
+ uint32_t target_timestamp,
+ uint32_t available_timestamp,
+ size_t generated_noise_samples);
// Checks if enough time has elapsed since the last successful timescale
// operation was done (i.e., accelerate or preemptive expand).
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
index b4ba650..6d705e1 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
@@ -833,6 +833,11 @@
vad_->Update(decoded_buffer_.get(), static_cast<size_t>(length), speech_type,
sid_frame_available, fs_hz_);
+ if (sid_frame_available || speech_type == AudioDecoder::kComfortNoise) {
+ // Start a new stopwatch since we are decoding a new CNG packet.
+ generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch();
+ }
+
algorithm_buffer_->Clear();
switch (operation) {
case kNormal: {
@@ -1006,6 +1011,12 @@
: timestamp_scaler_->ToExternal(playout_timestamp_) -
static_cast<uint32_t>(audio_frame->samples_per_channel_);
+ if (!(last_mode_ == kModeRfc3389Cng ||
+ last_mode_ == kModeCodecInternalCng ||
+ last_mode_ == kModeExpand)) {
+ generated_noise_stopwatch_.reset();
+ }
+
if (decode_return_value) return decode_return_value;
return return_value;
}
@@ -1029,14 +1040,22 @@
}
const RTPHeader* header = packet_buffer_->NextRtpHeader();
+ RTC_DCHECK(!generated_noise_stopwatch_ ||
+ generated_noise_stopwatch_->ElapsedTicks() >= 1);
+ uint64_t generated_noise_samples =
+ generated_noise_stopwatch_
+ ? (generated_noise_stopwatch_->ElapsedTicks() - 1) *
+ output_size_samples_ +
+ decision_logic_->noise_fast_forward()
+ : 0;
+
if (decision_logic_->CngRfc3389On() || last_mode_ == kModeRfc3389Cng) {
// Because of timestamp peculiarities, we have to "manually" disallow using
// a CNG packet with the same timestamp as the one that was last played.
// This can happen when using redundancy and will cause the timing to shift.
while (header && decoder_database_->IsComfortNoise(header->payloadType) &&
(end_timestamp >= header->timestamp ||
- end_timestamp + decision_logic_->generated_noise_samples() >
- header->timestamp)) {
+ end_timestamp + generated_noise_samples > header->timestamp)) {
// Don't use this packet, discard it.
if (packet_buffer_->DiscardNextPacket() != PacketBuffer::kOK) {
assert(false); // Must be ok by design.
@@ -1064,7 +1083,7 @@
// Check if it is time to play a DTMF event.
if (dtmf_buffer_->GetEvent(
static_cast<uint32_t>(
- end_timestamp + decision_logic_->generated_noise_samples()),
+ end_timestamp + generated_noise_samples),
dtmf_event)) {
*play_dtmf = true;
}
@@ -1072,13 +1091,14 @@
// Get instruction.
assert(sync_buffer_.get());
assert(expand_.get());
- *operation = decision_logic_->GetDecision(*sync_buffer_,
- *expand_,
- decoder_frame_length_,
- header,
- last_mode_,
- *play_dtmf,
- &reset_decoder_);
+ generated_noise_samples =
+ generated_noise_stopwatch_
+ ? generated_noise_stopwatch_->ElapsedTicks() * output_size_samples_ +
+ decision_logic_->noise_fast_forward()
+ : 0;
+ *operation = decision_logic_->GetDecision(
+ *sync_buffer_, *expand_, decoder_frame_length_, header, last_mode_,
+ *play_dtmf, generated_noise_samples, &reset_decoder_);
// Check if we already have enough samples in the |sync_buffer_|. If so,
// change decision to normal, unless the decision was merge, accelerate, or
@@ -1151,15 +1171,19 @@
// TODO(hlundin): Write test for this.
// Update timestamp.
timestamp_ = end_timestamp;
- if (decision_logic_->generated_noise_samples() > 0 &&
- last_mode_ != kModeDtmf) {
+ const uint64_t generated_noise_samples =
+ generated_noise_stopwatch_
+ ? generated_noise_stopwatch_->ElapsedTicks() *
+ output_size_samples_ +
+ decision_logic_->noise_fast_forward()
+ : 0;
+ if (generated_noise_samples > 0 && last_mode_ != kModeDtmf) {
// Make a jump in timestamp due to the recently played comfort noise.
uint32_t timestamp_jump =
- static_cast<uint32_t>(decision_logic_->generated_noise_samples());
+ static_cast<uint32_t>(generated_noise_samples);
sync_buffer_->IncreaseEndTimestamp(timestamp_jump);
timestamp_ += timestamp_jump;
}
- decision_logic_->set_generated_noise_samples(0);
return 0;
}
case kAccelerate:
@@ -1242,9 +1266,6 @@
// We are about to decode and use a non-CNG packet.
decision_logic_->SetCngOff();
}
- // Reset CNG timestamp as a new packet will be delivered.
- // (Also if this is a CNG packet, since playedOutTS is updated.)
- decision_logic_->set_generated_noise_samples(0);
extracted_samples = ExtractPackets(required_samples, packet_list);
if (extracted_samples < 0) {
@@ -1577,6 +1598,12 @@
if (!play_dtmf) {
dtmf_tone_generator_->Reset();
}
+
+ if (!generated_noise_stopwatch_) {
+ // Start a new stopwatch since we may be covering for a lost CNG packet.
+ generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch();
+ }
+
return 0;
}
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.h b/webrtc/modules/audio_coding/neteq/neteq_impl.h
index a707f25..2203de1 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.h
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.h
@@ -405,6 +405,8 @@
bool nack_enabled_ GUARDED_BY(crit_sect_);
AudioFrame::VADActivity last_vad_activity_ GUARDED_BY(crit_sect_) =
AudioFrame::kVadPassive;
+ std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_
+ GUARDED_BY(crit_sect_);
private:
RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);