NetEq: Add functionality to assist with delay analysis and tooling
This CL adds a few methods to the NetEq API that will be used for
delay analysis and plotting.
BUG=webrtc:7467
Review-Url: https://codereview.webrtc.org/2839163002
Cr-Commit-Position: refs/heads/master@{#17889}
diff --git a/webrtc/modules/audio_coding/neteq/include/neteq.h b/webrtc/modules/audio_coding/neteq/include/neteq.h
index 322a86f..f034580 100644
--- a/webrtc/modules/audio_coding/neteq/include/neteq.h
+++ b/webrtc/modules/audio_coding/neteq/include/neteq.h
@@ -14,6 +14,7 @@
#include <string.h> // Provide access to size_t.
#include <string>
+#include <vector>
#include "webrtc/base/constructormagic.h"
#include "webrtc/base/optional.h"
@@ -211,8 +212,9 @@
// Not implemented.
virtual int SetTargetDelay() = 0;
- // Not implemented.
- virtual int TargetDelay() = 0;
+ // Returns the current target delay in ms. This includes any extra delay
+ // requested through SetMinimumDelay.
+ virtual int TargetDelayMs() = 0;
// Returns the current total delay (packet buffer and sync buffer) in ms.
virtual int CurrentDelayMs() const = 0;
@@ -302,6 +304,16 @@
virtual std::vector<uint16_t> GetNackList(
int64_t round_trip_time_ms) const = 0;
+ // Returns a vector containing the timestamps of the packets that were decoded
+ // in the last GetAudio call. If no packets were decoded in the last call, the
+ // vector is empty.
+ // Mainly intended for testing.
+ virtual std::vector<uint32_t> LastDecodedTimestamps() const = 0;
+
+ // Returns the length of the audio yet to play in the sync buffer.
+ // Mainly intended for testing.
+ virtual int SyncBufferSizeMs() const = 0;
+
protected:
NetEq() {}
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
index 89bddec..e119d94 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
@@ -364,8 +364,14 @@
return kNotImplemented;
}
-int NetEqImpl::TargetDelay() {
- return kNotImplemented;
+int NetEqImpl::TargetDelayMs() {
+ rtc::CritScope lock(&crit_sect_);
+ RTC_DCHECK(delay_manager_.get());
+ // The value from TargetLevel() is in number of packets, represented in Q8.
+ const size_t target_delay_samples =
+ (delay_manager_->TargetLevel() * decoder_frame_length_) >> 8;
+ return static_cast<int>(target_delay_samples) /
+ rtc::CheckedDivExact(fs_hz_, 1000);
}
int NetEqImpl::CurrentDelayMs() const {
@@ -569,6 +575,17 @@
return nack_->GetNackList(round_trip_time_ms);
}
+std::vector<uint32_t> NetEqImpl::LastDecodedTimestamps() const {
+ rtc::CritScope lock(&crit_sect_);
+ return last_decoded_timestamps_;
+}
+
+int NetEqImpl::SyncBufferSizeMs() const {
+ rtc::CritScope lock(&crit_sect_);
+ return rtc::dchecked_cast<int>(sync_buffer_->FutureLength() /
+ rtc::CheckedDivExact(fs_hz_, 1000));
+}
+
const SyncBuffer* NetEqImpl::sync_buffer_for_test() const {
rtc::CritScope lock(&crit_sect_);
return sync_buffer_.get();
@@ -873,6 +890,7 @@
Operations operation;
bool play_dtmf;
*muted = false;
+ last_decoded_timestamps_.clear();
tick_timer_->Increment();
stats_.IncreaseCounter(output_size_samples_, fs_hz_);
@@ -1498,6 +1516,8 @@
int NetEqImpl::DecodeLoop(PacketList* packet_list, const Operations& operation,
AudioDecoder* decoder, int* decoded_length,
AudioDecoder::SpeechType* speech_type) {
+ RTC_DCHECK(last_decoded_timestamps_.empty());
+
// Do decoding.
while (
!packet_list->empty() &&
@@ -1514,6 +1534,7 @@
auto opt_result = packet_list->front().frame->Decode(
rtc::ArrayView<int16_t>(&decoded_buffer_[*decoded_length],
decoded_buffer_length_ - *decoded_length));
+ last_decoded_timestamps_.push_back(packet_list->front().timestamp);
packet_list->pop_front();
if (opt_result) {
const auto& result = *opt_result;
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.h b/webrtc/modules/audio_coding/neteq/neteq_impl.h
index 863bfbb..8a789bc 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.h
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.h
@@ -139,7 +139,7 @@
int SetTargetDelay() override;
- int TargetDelay() override;
+ int TargetDelayMs() override;
int CurrentDelayMs() const override;
@@ -207,6 +207,10 @@
std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const override;
+ std::vector<uint32_t> LastDecodedTimestamps() const override;
+
+ int SyncBufferSizeMs() const override;
+
// This accessor method is only intended for testing purposes.
const SyncBuffer* sync_buffer_for_test() const;
Operations last_operation_for_test() const;
@@ -414,6 +418,7 @@
AudioFrame::kVadPassive;
std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_
GUARDED_BY(crit_sect_);
+ std::vector<uint32_t> last_decoded_timestamps_ GUARDED_BY(crit_sect_);
private:
RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
index 4e110a6..fb87111 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
@@ -1271,6 +1271,19 @@
EXPECT_EQ(1u, tick_timer_->ticks());
}
+TEST_F(NetEqImplTest, TargetDelayMs) {
+ UseNoMocks();
+ use_mock_delay_manager_ = true;
+ CreateInstance();
+ // Let the dummy target delay be 17 packets.
+ constexpr int kTargetLevelPacketsQ8 = 17 << 8;
+ EXPECT_CALL(*mock_delay_manager_, TargetLevel())
+ .WillOnce(Return(kTargetLevelPacketsQ8));
+ // Default packet size before any packet has been decoded is 30 ms, so we are
+ // expecting 17 * 30 = 510 ms target delay.
+ EXPECT_EQ(17 * 30, neteq_->TargetDelayMs());
+}
+
class Decoder120ms : public AudioDecoder {
public:
Decoder120ms(int sample_rate_hz, SpeechType speech_type)
diff --git a/webrtc/modules/audio_coding/neteq/neteq_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_unittest.cc
index 68a4921..03a788e 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_unittest.cc
@@ -270,8 +270,6 @@
void DuplicateCng();
- rtc::Optional<uint32_t> PlayoutTimestamp();
-
NetEq* neteq_;
NetEq::Config config_;
std::unique_ptr<test::RtpFileSource> rtp_source_;
@@ -644,7 +642,7 @@
}
EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_);
- rtc::Optional<uint32_t> playout_timestamp = PlayoutTimestamp();
+ rtc::Optional<uint32_t> playout_timestamp = neteq_->GetPlayoutTimestamp();
ASSERT_TRUE(playout_timestamp);
int32_t delay_before = timestamp - *playout_timestamp;
@@ -736,7 +734,7 @@
// Check that the speech starts again within reasonable time.
double time_until_speech_returns_ms = t_ms - speech_restart_time_ms;
EXPECT_LT(time_until_speech_returns_ms, max_time_to_speech_ms);
- playout_timestamp = PlayoutTimestamp();
+ playout_timestamp = neteq_->GetPlayoutTimestamp();
ASSERT_TRUE(playout_timestamp);
int32_t delay_after = timestamp - *playout_timestamp;
// Compare delay before and after, and make sure it differs less than 20 ms.
@@ -1128,7 +1126,7 @@
ASSERT_EQ(1u, output.num_channels_);
// Expect delay (in samples) to be less than 2 packets.
- rtc::Optional<uint32_t> playout_timestamp = PlayoutTimestamp();
+ rtc::Optional<uint32_t> playout_timestamp = neteq_->GetPlayoutTimestamp();
ASSERT_TRUE(playout_timestamp);
EXPECT_LE(timestamp - *playout_timestamp,
static_cast<uint32_t>(kSamples * 2));
@@ -1207,7 +1205,8 @@
ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_);
- EXPECT_FALSE(PlayoutTimestamp()); // Returns empty value during CNG.
+ EXPECT_FALSE(
+ neteq_->GetPlayoutTimestamp()); // Returns empty value during CNG.
EXPECT_EQ(timestamp - algorithmic_delay_samples,
out_frame_.timestamp_ + out_frame_.samples_per_channel_);
@@ -1223,7 +1222,8 @@
ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_);
- EXPECT_FALSE(PlayoutTimestamp()); // Returns empty value during CNG.
+ EXPECT_FALSE(
+ neteq_->GetPlayoutTimestamp()); // Returns empty value during CNG.
EXPECT_EQ(timestamp - algorithmic_delay_samples,
out_frame_.timestamp_ + out_frame_.samples_per_channel_);
}
@@ -1238,16 +1238,12 @@
ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_);
- rtc::Optional<uint32_t> playout_timestamp = PlayoutTimestamp();
+ rtc::Optional<uint32_t> playout_timestamp = neteq_->GetPlayoutTimestamp();
ASSERT_TRUE(playout_timestamp);
EXPECT_EQ(timestamp + kSamples - algorithmic_delay_samples,
*playout_timestamp);
}
-rtc::Optional<uint32_t> NetEqDecodingTest::PlayoutTimestamp() {
- return neteq_->GetPlayoutTimestamp();
-}
-
TEST_F(NetEqDecodingTest, DiscardDuplicateCng) { DuplicateCng(); }
TEST_F(NetEqDecodingTest, CngFirst) {
@@ -1590,4 +1586,65 @@
EXPECT_FALSE(muted);
}
+TEST_F(NetEqDecodingTest, LastDecodedTimestampsEmpty) {
+ EXPECT_TRUE(neteq_->LastDecodedTimestamps().empty());
+
+ // Pull out data once.
+ AudioFrame output;
+ bool muted;
+ ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
+
+ EXPECT_TRUE(neteq_->LastDecodedTimestamps().empty());
+}
+
+TEST_F(NetEqDecodingTest, LastDecodedTimestampsOneDecoded) {
+ // Insert one packet with PCM16b WB data (this is what PopulateRtpInfo does by
+ // default). Make the length 10 ms.
+ constexpr size_t kPayloadSamples = 16 * 10;
+ constexpr size_t kPayloadBytes = 2 * kPayloadSamples;
+ uint8_t payload[kPayloadBytes] = {0};
+
+ RTPHeader rtp_info;
+ constexpr uint32_t kRtpTimestamp = 0x1234;
+ PopulateRtpInfo(0, kRtpTimestamp, &rtp_info);
+ EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
+
+ // Pull out data once.
+ AudioFrame output;
+ bool muted;
+ ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
+
+ EXPECT_EQ(std::vector<uint32_t>({kRtpTimestamp}),
+ neteq_->LastDecodedTimestamps());
+
+ // Nothing decoded on the second call.
+ ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
+ EXPECT_TRUE(neteq_->LastDecodedTimestamps().empty());
+}
+
+TEST_F(NetEqDecodingTest, LastDecodedTimestampsTwoDecoded) {
+ // Insert two packets with PCM16b WB data (this is what PopulateRtpInfo does
+ // by default). Make the length 5 ms so that NetEq must decode them both in
+ // the same GetAudio call.
+ constexpr size_t kPayloadSamples = 16 * 5;
+ constexpr size_t kPayloadBytes = 2 * kPayloadSamples;
+ uint8_t payload[kPayloadBytes] = {0};
+
+ RTPHeader rtp_info;
+ constexpr uint32_t kRtpTimestamp1 = 0x1234;
+ PopulateRtpInfo(0, kRtpTimestamp1, &rtp_info);
+ EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
+ constexpr uint32_t kRtpTimestamp2 = kRtpTimestamp1 + kPayloadSamples;
+ PopulateRtpInfo(1, kRtpTimestamp2, &rtp_info);
+ EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
+
+ // Pull out data once.
+ AudioFrame output;
+ bool muted;
+ ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
+
+ EXPECT_EQ(std::vector<uint32_t>({kRtpTimestamp1, kRtpTimestamp2}),
+ neteq_->LastDecodedTimestamps());
+}
+
} // namespace webrtc
diff --git a/webrtc/modules/audio_coding/neteq/sync_buffer.h b/webrtc/modules/audio_coding/neteq/sync_buffer.h
index 5eae4bf..eb90034 100644
--- a/webrtc/modules/audio_coding/neteq/sync_buffer.h
+++ b/webrtc/modules/audio_coding/neteq/sync_buffer.h
@@ -26,7 +26,7 @@
end_timestamp_(0),
dtmf_index_(0) {}
- // Returns the number of samples yet to play out form the buffer.
+ // Returns the number of samples yet to play out from the buffer.
size_t FutureLength() const;
// Adds the contents of |append_this| to the back of the SyncBuffer. Removes