Added RTCMediaStreamTrackStats.jitterBufferDelay for audio
Description of this stat can be found here:
https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-jitterbufferdelay
Bug: webrtc:8281
Change-Id: Ib2e8174f3449e68ad419ae2d58d5565fc9854023
Reviewed-on: https://webrtc-review.googlesource.com/3381
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Reviewed-by: Henrik Boström <hbos@webrtc.org>
Reviewed-by: Taylor Brandstetter <deadbeef@webrtc.org>
Reviewed-by: Fredrik Solenberg <solenberg@webrtc.org>
Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org>
Reviewed-by: Henrik Andreassson <henrika@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#20069}
diff --git a/api/stats/rtcstats_objects.h b/api/stats/rtcstats_objects.h
index 903d266..97a7d3c 100644
--- a/api/stats/rtcstats_objects.h
+++ b/api/stats/rtcstats_objects.h
@@ -255,6 +255,10 @@
RTCStatsMember<bool> detached;
// See |RTCMediaStreamTrackKind| for valid values.
RTCStatsMember<std::string> kind;
+ // TODO(gustaf): Implement jitter_buffer_delay for video (currently
+ // implemented for audio only).
+ // https://crbug.com/webrtc/8318
+ RTCStatsMember<double> jitter_buffer_delay;
// Video-only members
RTCStatsMember<uint32_t> frame_width;
RTCStatsMember<uint32_t> frame_height;
diff --git a/api/statstypes.cc b/api/statstypes.cc
index 37e8aac..f1a1130 100644
--- a/api/statstypes.cc
+++ b/api/statstypes.cc
@@ -403,6 +403,8 @@
return "framesDecoded";
case kStatsValueNameFramesEncoded:
return "framesEncoded";
+ case kStatsValueNameJitterBufferDelay:
+ return "jitterBufferDelay";
case kStatsValueNameCodecImplementationName:
return "codecImplementationName";
case kStatsValueNameMediaType:
diff --git a/api/statstypes.h b/api/statstypes.h
index 7f69b02..9e7f08c 100644
--- a/api/statstypes.h
+++ b/api/statstypes.h
@@ -109,6 +109,7 @@
kStatsValueNameDataChannelId,
kStatsValueNameFramesDecoded,
kStatsValueNameFramesEncoded,
+ kStatsValueNameJitterBufferDelay,
kStatsValueNameMediaType,
kStatsValueNamePacketsLost,
kStatsValueNamePacketsReceived,
diff --git a/audio/audio_receive_stream.cc b/audio/audio_receive_stream.cc
index 704c86e..2a57551 100644
--- a/audio/audio_receive_stream.cc
+++ b/audio/audio_receive_stream.cc
@@ -197,6 +197,9 @@
stats.total_samples_received = ns.totalSamplesReceived;
stats.concealed_samples = ns.concealedSamples;
stats.concealment_events = ns.concealmentEvents;
+ stats.jitter_buffer_delay_seconds =
+ static_cast<double>(ns.jitterBufferDelayMs) /
+ static_cast<double>(rtc::kNumMillisecsPerSec);
stats.expand_rate = Q14ToFloat(ns.currentExpandRate);
stats.speech_expand_rate = Q14ToFloat(ns.currentSpeechExpandRate);
stats.secondary_decoded_rate = Q14ToFloat(ns.currentSecondaryDecodedRate);
diff --git a/audio/audio_receive_stream_unittest.cc b/audio/audio_receive_stream_unittest.cc
index 1ceaaab..4fdb68c 100644
--- a/audio/audio_receive_stream_unittest.cc
+++ b/audio/audio_receive_stream_unittest.cc
@@ -64,9 +64,9 @@
345, 678, 901, 234, -12, 3456, 7890, 567, 890, 123};
const CodecInst kCodecInst = {
123, "codec_name_recv", 96000, -187, 0, -103};
-const NetworkStatistics kNetworkStats = {123, 456, false, 789012, 3456, 123, 0,
- {}, 789, 12, 345, 678, 901, 0,
- -1, -1, -1, -1, -1, 0};
+const NetworkStatistics kNetworkStats = {
+ 123, 456, false, 789012, 3456, 123, 456, 0, {}, 789, 12,
+ 345, 678, 901, 0, -1, -1, -1, -1, -1, 0};
const AudioDecodingCallStats kAudioDecodeStats = MakeAudioDecodeStatsForTest();
struct ConfigHelper {
@@ -316,6 +316,9 @@
EXPECT_EQ(kTotalOutputDuration, stats.total_output_duration);
EXPECT_EQ(kNetworkStats.concealedSamples, stats.concealed_samples);
EXPECT_EQ(kNetworkStats.concealmentEvents, stats.concealment_events);
+ EXPECT_EQ(static_cast<double>(kNetworkStats.jitterBufferDelayMs) /
+ static_cast<double>(rtc::kNumMillisecsPerSec),
+ stats.jitter_buffer_delay_seconds);
EXPECT_EQ(Q14ToFloat(kNetworkStats.currentExpandRate), stats.expand_rate);
EXPECT_EQ(Q14ToFloat(kNetworkStats.currentSpeechExpandRate),
stats.speech_expand_rate);
diff --git a/call/audio_receive_stream.h b/call/audio_receive_stream.h
index baf2b67..44f093c 100644
--- a/call/audio_receive_stream.h
+++ b/call/audio_receive_stream.h
@@ -57,6 +57,7 @@
double total_output_duration = 0.0;
uint64_t concealed_samples = 0;
uint64_t concealment_events = 0;
+ double jitter_buffer_delay_seconds = 0.0;
// Stats below DO NOT correspond directly to anything in the WebRTC stats
float expand_rate = 0.0f;
float speech_expand_rate = 0.0f;
diff --git a/common_types.h b/common_types.h
index 69fc761..207c81e 100644
--- a/common_types.h
+++ b/common_types.h
@@ -368,17 +368,13 @@
uint16_t preferredBufferSize;
// adding extra delay due to "peaky jitter"
bool jitterPeaksFound;
- // Total number of audio samples received, including synthesized samples.
- // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalsamplesreceived
+ // Stats below correspond to similarly-named fields in the WebRTC stats spec.
+ // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats
uint64_t totalSamplesReceived;
- // Total number of inbound audio samples that are based on synthesized data to
- // conceal packet loss.
- // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-concealedsamples
uint64_t concealedSamples;
- // Number of times a concealed sample is synthesized after a non-concealed
- // sample.
- // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-concealmentevents
uint64_t concealmentEvents;
+ uint64_t jitterBufferDelayMs;
+ // Stats below DO NOT correspond directly to anything in the WebRTC stats
// Loss rate (network + late); fraction between 0 and 1, scaled to Q14.
uint16_t currentPacketLossRate;
// Late loss rate; fraction between 0 and 1, scaled to Q14.
diff --git a/media/base/mediachannel.h b/media/base/mediachannel.h
index 103240e..06766a8 100644
--- a/media/base/mediachannel.h
+++ b/media/base/mediachannel.h
@@ -658,6 +658,7 @@
total_output_duration(0.0),
concealed_samples(0),
concealment_events(0),
+ jitter_buffer_delay_seconds(0),
expand_rate(0),
speech_expand_rate(0),
secondary_decoded_rate(0),
@@ -686,6 +687,7 @@
double total_output_duration;
uint64_t concealed_samples;
uint64_t concealment_events;
+ double jitter_buffer_delay_seconds;
// Stats below DO NOT correspond directly to anything in the WebRTC stats
// fraction of synthesized audio inserted through expansion.
float expand_rate;
diff --git a/media/engine/webrtcvoiceengine.cc b/media/engine/webrtcvoiceengine.cc
index 881e8ec..467e38d 100644
--- a/media/engine/webrtcvoiceengine.cc
+++ b/media/engine/webrtcvoiceengine.cc
@@ -2302,6 +2302,7 @@
rinfo.total_output_duration = stats.total_output_duration;
rinfo.concealed_samples = stats.concealed_samples;
rinfo.concealment_events = stats.concealment_events;
+ rinfo.jitter_buffer_delay_seconds = stats.jitter_buffer_delay_seconds;
rinfo.expand_rate = stats.expand_rate;
rinfo.speech_expand_rate = stats.speech_expand_rate;
rinfo.secondary_decoded_rate = stats.secondary_decoded_rate;
diff --git a/media/engine/webrtcvoiceengine_unittest.cc b/media/engine/webrtcvoiceengine_unittest.cc
index 4e80788..30396d9 100644
--- a/media/engine/webrtcvoiceengine_unittest.cc
+++ b/media/engine/webrtcvoiceengine_unittest.cc
@@ -623,6 +623,7 @@
stats.total_samples_received = 5678901;
stats.concealed_samples = 234;
stats.concealment_events = 12;
+ stats.jitter_buffer_delay_seconds = 34;
stats.expand_rate = 5.67f;
stats.speech_expand_rate = 8.90f;
stats.secondary_decoded_rate = 1.23f;
@@ -663,6 +664,8 @@
EXPECT_EQ(info.total_samples_received, stats.total_samples_received);
EXPECT_EQ(info.concealed_samples, stats.concealed_samples);
EXPECT_EQ(info.concealment_events, stats.concealment_events);
+ EXPECT_EQ(info.jitter_buffer_delay_seconds,
+ stats.jitter_buffer_delay_seconds);
EXPECT_EQ(info.expand_rate, stats.expand_rate);
EXPECT_EQ(info.speech_expand_rate, stats.speech_expand_rate);
EXPECT_EQ(info.secondary_decoded_rate, stats.secondary_decoded_rate);
diff --git a/modules/audio_coding/acm2/acm_receiver.cc b/modules/audio_coding/acm2/acm_receiver.cc
index d999df0..085e77a 100644
--- a/modules/audio_coding/acm2/acm_receiver.cc
+++ b/modules/audio_coding/acm2/acm_receiver.cc
@@ -337,6 +337,7 @@
acm_stat->totalSamplesReceived = neteq_lifetime_stat.total_samples_received;
acm_stat->concealedSamples = neteq_lifetime_stat.concealed_samples;
acm_stat->concealmentEvents = neteq_lifetime_stat.concealment_events;
+ acm_stat->jitterBufferDelayMs = neteq_lifetime_stat.jitter_buffer_delay_ms;
}
int AcmReceiver::DecoderByPayloadType(uint8_t payload_type,
diff --git a/modules/audio_coding/neteq/include/neteq.h b/modules/audio_coding/neteq/include/neteq.h
index b349f20..e6cafa8 100644
--- a/modules/audio_coding/neteq/include/neteq.h
+++ b/modules/audio_coding/neteq/include/neteq.h
@@ -66,6 +66,7 @@
uint64_t total_samples_received = 0;
uint64_t concealed_samples = 0;
uint64_t concealment_events = 0;
+ uint64_t jitter_buffer_delay_ms = 0;
};
enum NetEqPlayoutMode {
diff --git a/modules/audio_coding/neteq/neteq_impl.cc b/modules/audio_coding/neteq/neteq_impl.cc
index 2d50225..36d6b27 100644
--- a/modules/audio_coding/neteq/neteq_impl.cc
+++ b/modules/audio_coding/neteq/neteq_impl.cc
@@ -1950,7 +1950,8 @@
assert(false); // Should always be able to extract a packet here.
return -1;
}
- stats_.StoreWaitingTime(packet->waiting_time->ElapsedMs());
+ const uint64_t waiting_time_ms = packet->waiting_time->ElapsedMs();
+ stats_.StoreWaitingTime(waiting_time_ms);
RTC_DCHECK(!packet->empty());
if (first_packet) {
@@ -1990,6 +1991,8 @@
}
extracted_samples = packet->timestamp - first_timestamp + packet_duration;
+ stats_.JitterBufferDelay(extracted_samples, waiting_time_ms);
+
packet_list->push_back(std::move(*packet)); // Store packet in list.
packet = rtc::Optional<Packet>(); // Ensure it's never used after the move.
diff --git a/modules/audio_coding/neteq/neteq_unittest.cc b/modules/audio_coding/neteq/neteq_unittest.cc
index 5b92217..9dd60eb 100644
--- a/modules/audio_coding/neteq/neteq_unittest.cc
+++ b/modules/audio_coding/neteq/neteq_unittest.cc
@@ -522,6 +522,7 @@
NetEqDecodingTestFaxMode() : NetEqDecodingTest() {
config_.playout_mode = kPlayoutFax;
}
+ void TestJitterBufferDelay(bool apply_packet_loss);
};
TEST_F(NetEqDecodingTestFaxMode, TestFrameWaitingTimeStatistics) {
@@ -1684,4 +1685,64 @@
EXPECT_EQ(kNumConcealmentEvents, static_cast<int>(stats.concealment_events));
}
+// Test that the jitter buffer delay stat is computed correctly.
+void NetEqDecodingTestFaxMode::TestJitterBufferDelay(bool apply_packet_loss) {
+ const int kNumPackets = 10;
+ const int kDelayInNumPackets = 2;
+ const int kPacketLenMs = 10; // All packets are of 10 ms size.
+ const size_t kSamples = kPacketLenMs * 16;
+ const size_t kPayloadBytes = kSamples * 2;
+ RTPHeader rtp_info;
+ rtp_info.ssrc = 0x1234; // Just an arbitrary SSRC.
+ rtp_info.payloadType = 94; // PCM16b WB codec.
+ rtp_info.markerBit = 0;
+ const uint8_t payload[kPayloadBytes] = {0};
+ bool muted;
+ int packets_sent = 0;
+ int packets_received = 0;
+ int expected_delay = 0;
+ while (packets_received < kNumPackets) {
+ // Insert packet.
+ if (packets_sent < kNumPackets) {
+ rtp_info.sequenceNumber = packets_sent++;
+ rtp_info.timestamp = rtp_info.sequenceNumber * kSamples;
+ neteq_->InsertPacket(rtp_info, payload, 0);
+ }
+
+ // Get packet.
+ if (packets_sent > kDelayInNumPackets) {
+ neteq_->GetAudio(&out_frame_, &muted);
+ packets_received++;
+
+ // The delay reported by the jitter buffer never exceeds
+ // the number of samples previously fetched with GetAudio
+ // (hence the min()).
+ int packets_delay = std::min(packets_received, kDelayInNumPackets + 1);
+
+ // The increase of the expected delay is the product of
+ // the current delay of the jitter buffer in ms * the
+ // number of samples that are sent for play out.
+ int current_delay_ms = packets_delay * kPacketLenMs;
+ expected_delay += current_delay_ms * kSamples;
+ }
+ }
+
+ if (apply_packet_loss) {
+ // Extra call to GetAudio to cause concealment.
+ neteq_->GetAudio(&out_frame_, &muted);
+ }
+
+ // Check jitter buffer delay.
+ NetEqLifetimeStatistics stats = neteq_->GetLifetimeStatistics();
+ EXPECT_EQ(expected_delay, static_cast<int>(stats.jitter_buffer_delay_ms));
+}
+
+TEST_F(NetEqDecodingTestFaxMode, TestJitterBufferDelayWithoutLoss) {
+ TestJitterBufferDelay(false);
+}
+
+TEST_F(NetEqDecodingTestFaxMode, TestJitterBufferDelayWithLoss) {
+ TestJitterBufferDelay(true);
+}
+
} // namespace webrtc
diff --git a/modules/audio_coding/neteq/statistics_calculator.cc b/modules/audio_coding/neteq/statistics_calculator.cc
index 4e034e6..70a15ae 100644
--- a/modules/audio_coding/neteq/statistics_calculator.cc
+++ b/modules/audio_coding/neteq/statistics_calculator.cc
@@ -229,6 +229,11 @@
lifetime_stats_.total_samples_received += num_samples;
}
+void StatisticsCalculator::JitterBufferDelay(size_t num_samples,
+ uint64_t waiting_time_ms) {
+ lifetime_stats_.jitter_buffer_delay_ms += waiting_time_ms * num_samples;
+}
+
void StatisticsCalculator::SecondaryDecodedSamples(int num_samples) {
secondary_decoded_samples_ += num_samples;
}
diff --git a/modules/audio_coding/neteq/statistics_calculator.h b/modules/audio_coding/neteq/statistics_calculator.h
index 5c2fbf3..c3d5c86 100644
--- a/modules/audio_coding/neteq/statistics_calculator.h
+++ b/modules/audio_coding/neteq/statistics_calculator.h
@@ -75,6 +75,9 @@
// time is increasing.
void IncreaseCounter(size_t num_samples, int fs_hz);
+ // Update jitter buffer delay counter.
+ void JitterBufferDelay(size_t num_samples, uint64_t waiting_time_ms);
+
// Stores new packet waiting time in waiting time statistics.
void StoreWaitingTime(int waiting_time_ms);
diff --git a/pc/rtcstats_integrationtest.cc b/pc/rtcstats_integrationtest.cc
index 62d316d..e0fb577 100644
--- a/pc/rtcstats_integrationtest.cc
+++ b/pc/rtcstats_integrationtest.cc
@@ -562,8 +562,11 @@
}
// totalSamplesReceived, concealedSamples and concealmentEvents are only
// present on inbound audio tracks.
+ // jitterBufferDelay is currently only implemented for audio.
if (*media_stream_track.kind == RTCMediaStreamTrackKind::kAudio &&
*media_stream_track.remote_source) {
+ verifier.TestMemberIsNonNegative<double>(
+ media_stream_track.jitter_buffer_delay);
verifier.TestMemberIsNonNegative<uint64_t>(
media_stream_track.total_samples_received);
verifier.TestMemberIsNonNegative<uint64_t>(
@@ -571,6 +574,7 @@
verifier.TestMemberIsNonNegative<uint64_t>(
media_stream_track.concealment_events);
} else {
+ verifier.TestMemberIsUndefined(media_stream_track.jitter_buffer_delay);
verifier.TestMemberIsUndefined(media_stream_track.total_samples_received);
verifier.TestMemberIsUndefined(media_stream_track.concealed_samples);
verifier.TestMemberIsUndefined(media_stream_track.concealment_events);
diff --git a/pc/rtcstatscollector.cc b/pc/rtcstatscollector.cc
index 161d224..2fcb754 100644
--- a/pc/rtcstatscollector.cc
+++ b/pc/rtcstatscollector.cc
@@ -410,6 +410,8 @@
audio_track_stats->audio_level = DoubleAudioLevelFromIntAudioLevel(
voice_receiver_info.audio_level);
}
+ audio_track_stats->jitter_buffer_delay =
+ voice_receiver_info.jitter_buffer_delay_seconds;
audio_track_stats->total_audio_energy =
voice_receiver_info.total_output_energy;
audio_track_stats->total_samples_received =
diff --git a/pc/rtcstatscollector_unittest.cc b/pc/rtcstatscollector_unittest.cc
index 14f669c..0e573b1 100644
--- a/pc/rtcstatscollector_unittest.cc
+++ b/pc/rtcstatscollector_unittest.cc
@@ -1556,6 +1556,7 @@
voice_receiver_info.total_output_duration = 0.25;
voice_receiver_info.concealed_samples = 123;
voice_receiver_info.concealment_events = 12;
+ voice_receiver_info.jitter_buffer_delay_seconds = 3456;
test_->CreateMockRtpSendersReceiversAndChannels(
{ std::make_pair(local_audio_track.get(), voice_sender_info_ssrc1),
@@ -1633,6 +1634,7 @@
expected_remote_audio_track.total_samples_duration = 0.25;
expected_remote_audio_track.concealed_samples = 123;
expected_remote_audio_track.concealment_events = 12;
+ expected_remote_audio_track.jitter_buffer_delay = 3456;
ASSERT_TRUE(report->Get(expected_remote_audio_track.id()));
EXPECT_EQ(expected_remote_audio_track,
report->Get(expected_remote_audio_track.id())->cast_to<
diff --git a/stats/rtcstats_objects.cc b/stats/rtcstats_objects.cc
index e643e12..b1698cf 100644
--- a/stats/rtcstats_objects.cc
+++ b/stats/rtcstats_objects.cc
@@ -367,6 +367,7 @@
&ended,
&detached,
&kind,
+ &jitter_buffer_delay,
&frame_width,
&frame_height,
&frames_per_second,
@@ -401,6 +402,7 @@
ended("ended"),
detached("detached"),
kind("kind", kind),
+ jitter_buffer_delay("jitterBufferDelay"),
frame_width("frameWidth"),
frame_height("frameHeight"),
frames_per_second("framesPerSecond"),
@@ -431,6 +433,7 @@
ended(other.ended),
detached(other.detached),
kind(other.kind),
+ jitter_buffer_delay(other.jitter_buffer_delay),
frame_width(other.frame_width),
frame_height(other.frame_height),
frames_per_second(other.frames_per_second),