NetEq: Add functionality to assist with delay analysis and tooling

This CL adds a few methods to the NetEq API that will be used for
delay analysis and plotting.

BUG=webrtc:7467

Review-Url: https://codereview.webrtc.org/2839163002
Cr-Commit-Position: refs/heads/master@{#17889}
diff --git a/webrtc/modules/audio_coding/neteq/include/neteq.h b/webrtc/modules/audio_coding/neteq/include/neteq.h
index 322a86f..f034580 100644
--- a/webrtc/modules/audio_coding/neteq/include/neteq.h
+++ b/webrtc/modules/audio_coding/neteq/include/neteq.h
@@ -14,6 +14,7 @@
 #include <string.h>  // Provide access to size_t.
 
 #include <string>
+#include <vector>
 
 #include "webrtc/base/constructormagic.h"
 #include "webrtc/base/optional.h"
@@ -211,8 +212,9 @@
   // Not implemented.
   virtual int SetTargetDelay() = 0;
 
-  // Not implemented.
-  virtual int TargetDelay() = 0;
+  // Returns the current target delay in ms. This includes any extra delay
+  // requested through SetMinimumDelay.
+  virtual int TargetDelayMs() = 0;
 
   // Returns the current total delay (packet buffer and sync buffer) in ms.
   virtual int CurrentDelayMs() const = 0;
@@ -302,6 +304,16 @@
   virtual std::vector<uint16_t> GetNackList(
       int64_t round_trip_time_ms) const = 0;
 
+  // Returns a vector containing the timestamps of the packets that were decoded
+  // in the last GetAudio call. If no packets were decoded in the last call, the
+  // vector is empty.
+  // Mainly intended for testing.
+  virtual std::vector<uint32_t> LastDecodedTimestamps() const = 0;
+
+  // Returns the length of the audio yet to play in the sync buffer.
+  // Mainly intended for testing.
+  virtual int SyncBufferSizeMs() const = 0;
+
  protected:
   NetEq() {}
 
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
index 89bddec..e119d94 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
@@ -364,8 +364,14 @@
   return kNotImplemented;
 }
 
-int NetEqImpl::TargetDelay() {
-  return kNotImplemented;
+int NetEqImpl::TargetDelayMs() {
+  rtc::CritScope lock(&crit_sect_);
+  RTC_DCHECK(delay_manager_.get());
+  // The value from TargetLevel() is in number of packets, represented in Q8.
+  const size_t target_delay_samples =
+      (delay_manager_->TargetLevel() * decoder_frame_length_) >> 8;
+  return static_cast<int>(target_delay_samples) /
+         rtc::CheckedDivExact(fs_hz_, 1000);
 }
 
 int NetEqImpl::CurrentDelayMs() const {
@@ -569,6 +575,17 @@
   return nack_->GetNackList(round_trip_time_ms);
 }
 
+std::vector<uint32_t> NetEqImpl::LastDecodedTimestamps() const {
+  rtc::CritScope lock(&crit_sect_);
+  return last_decoded_timestamps_;
+}
+
+int NetEqImpl::SyncBufferSizeMs() const {
+  rtc::CritScope lock(&crit_sect_);
+  return rtc::dchecked_cast<int>(sync_buffer_->FutureLength() /
+                                 rtc::CheckedDivExact(fs_hz_, 1000));
+}
+
 const SyncBuffer* NetEqImpl::sync_buffer_for_test() const {
   rtc::CritScope lock(&crit_sect_);
   return sync_buffer_.get();
@@ -873,6 +890,7 @@
   Operations operation;
   bool play_dtmf;
   *muted = false;
+  last_decoded_timestamps_.clear();
   tick_timer_->Increment();
   stats_.IncreaseCounter(output_size_samples_, fs_hz_);
 
@@ -1498,6 +1516,8 @@
 int NetEqImpl::DecodeLoop(PacketList* packet_list, const Operations& operation,
                           AudioDecoder* decoder, int* decoded_length,
                           AudioDecoder::SpeechType* speech_type) {
+  RTC_DCHECK(last_decoded_timestamps_.empty());
+
   // Do decoding.
   while (
       !packet_list->empty() &&
@@ -1514,6 +1534,7 @@
     auto opt_result = packet_list->front().frame->Decode(
         rtc::ArrayView<int16_t>(&decoded_buffer_[*decoded_length],
                                 decoded_buffer_length_ - *decoded_length));
+    last_decoded_timestamps_.push_back(packet_list->front().timestamp);
     packet_list->pop_front();
     if (opt_result) {
       const auto& result = *opt_result;
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.h b/webrtc/modules/audio_coding/neteq/neteq_impl.h
index 863bfbb..8a789bc 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.h
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.h
@@ -139,7 +139,7 @@
 
   int SetTargetDelay() override;
 
-  int TargetDelay() override;
+  int TargetDelayMs() override;
 
   int CurrentDelayMs() const override;
 
@@ -207,6 +207,10 @@
 
   std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const override;
 
+  std::vector<uint32_t> LastDecodedTimestamps() const override;
+
+  int SyncBufferSizeMs() const override;
+
   // This accessor method is only intended for testing purposes.
   const SyncBuffer* sync_buffer_for_test() const;
   Operations last_operation_for_test() const;
@@ -414,6 +418,7 @@
       AudioFrame::kVadPassive;
   std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_
       GUARDED_BY(crit_sect_);
+  std::vector<uint32_t> last_decoded_timestamps_ GUARDED_BY(crit_sect_);
 
  private:
   RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
index 4e110a6..fb87111 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
@@ -1271,6 +1271,19 @@
   EXPECT_EQ(1u, tick_timer_->ticks());
 }
 
+TEST_F(NetEqImplTest, TargetDelayMs) {
+  UseNoMocks();
+  use_mock_delay_manager_ = true;
+  CreateInstance();
+  // Let the dummy target delay be 17 packets.
+  constexpr int kTargetLevelPacketsQ8 = 17 << 8;
+  EXPECT_CALL(*mock_delay_manager_, TargetLevel())
+      .WillOnce(Return(kTargetLevelPacketsQ8));
+  // Default packet size before any packet has been decoded is 30 ms, so we are
+  // expecting 17 * 30 = 510 ms target delay.
+  EXPECT_EQ(17 * 30, neteq_->TargetDelayMs());
+}
+
 class Decoder120ms : public AudioDecoder {
  public:
   Decoder120ms(int sample_rate_hz, SpeechType speech_type)
diff --git a/webrtc/modules/audio_coding/neteq/neteq_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_unittest.cc
index 68a4921..03a788e 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_unittest.cc
@@ -270,8 +270,6 @@
 
   void DuplicateCng();
 
-  rtc::Optional<uint32_t> PlayoutTimestamp();
-
   NetEq* neteq_;
   NetEq::Config config_;
   std::unique_ptr<test::RtpFileSource> rtp_source_;
@@ -644,7 +642,7 @@
   }
 
   EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_);
-  rtc::Optional<uint32_t> playout_timestamp = PlayoutTimestamp();
+  rtc::Optional<uint32_t> playout_timestamp = neteq_->GetPlayoutTimestamp();
   ASSERT_TRUE(playout_timestamp);
   int32_t delay_before = timestamp - *playout_timestamp;
 
@@ -736,7 +734,7 @@
   // Check that the speech starts again within reasonable time.
   double time_until_speech_returns_ms = t_ms - speech_restart_time_ms;
   EXPECT_LT(time_until_speech_returns_ms, max_time_to_speech_ms);
-  playout_timestamp = PlayoutTimestamp();
+  playout_timestamp = neteq_->GetPlayoutTimestamp();
   ASSERT_TRUE(playout_timestamp);
   int32_t delay_after = timestamp - *playout_timestamp;
   // Compare delay before and after, and make sure it differs less than 20 ms.
@@ -1128,7 +1126,7 @@
     ASSERT_EQ(1u, output.num_channels_);
 
     // Expect delay (in samples) to be less than 2 packets.
-    rtc::Optional<uint32_t> playout_timestamp = PlayoutTimestamp();
+    rtc::Optional<uint32_t> playout_timestamp = neteq_->GetPlayoutTimestamp();
     ASSERT_TRUE(playout_timestamp);
     EXPECT_LE(timestamp - *playout_timestamp,
               static_cast<uint32_t>(kSamples * 2));
@@ -1207,7 +1205,8 @@
   ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
   ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
   EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_);
-  EXPECT_FALSE(PlayoutTimestamp());  // Returns empty value during CNG.
+  EXPECT_FALSE(
+      neteq_->GetPlayoutTimestamp());  // Returns empty value during CNG.
   EXPECT_EQ(timestamp - algorithmic_delay_samples,
             out_frame_.timestamp_ + out_frame_.samples_per_channel_);
 
@@ -1223,7 +1222,8 @@
     ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
     ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
     EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_);
-    EXPECT_FALSE(PlayoutTimestamp());  // Returns empty value during CNG.
+    EXPECT_FALSE(
+        neteq_->GetPlayoutTimestamp());  // Returns empty value during CNG.
     EXPECT_EQ(timestamp - algorithmic_delay_samples,
               out_frame_.timestamp_ + out_frame_.samples_per_channel_);
   }
@@ -1238,16 +1238,12 @@
   ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
   ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
   EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_);
-  rtc::Optional<uint32_t> playout_timestamp = PlayoutTimestamp();
+  rtc::Optional<uint32_t> playout_timestamp = neteq_->GetPlayoutTimestamp();
   ASSERT_TRUE(playout_timestamp);
   EXPECT_EQ(timestamp + kSamples - algorithmic_delay_samples,
             *playout_timestamp);
 }
 
-rtc::Optional<uint32_t> NetEqDecodingTest::PlayoutTimestamp() {
-  return neteq_->GetPlayoutTimestamp();
-}
-
 TEST_F(NetEqDecodingTest, DiscardDuplicateCng) { DuplicateCng(); }
 
 TEST_F(NetEqDecodingTest, CngFirst) {
@@ -1590,4 +1586,65 @@
   EXPECT_FALSE(muted);
 }
 
+TEST_F(NetEqDecodingTest, LastDecodedTimestampsEmpty) {
+  EXPECT_TRUE(neteq_->LastDecodedTimestamps().empty());
+
+  // Pull out data once.
+  AudioFrame output;
+  bool muted;
+  ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
+
+  EXPECT_TRUE(neteq_->LastDecodedTimestamps().empty());
+}
+
+TEST_F(NetEqDecodingTest, LastDecodedTimestampsOneDecoded) {
+  // Insert one packet with PCM16b WB data (this is what PopulateRtpInfo does by
+  // default). Make the length 10 ms.
+  constexpr size_t kPayloadSamples = 16 * 10;
+  constexpr size_t kPayloadBytes = 2 * kPayloadSamples;
+  uint8_t payload[kPayloadBytes] = {0};
+
+  RTPHeader rtp_info;
+  constexpr uint32_t kRtpTimestamp = 0x1234;
+  PopulateRtpInfo(0, kRtpTimestamp, &rtp_info);
+  EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
+
+  // Pull out data once.
+  AudioFrame output;
+  bool muted;
+  ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
+
+  EXPECT_EQ(std::vector<uint32_t>({kRtpTimestamp}),
+            neteq_->LastDecodedTimestamps());
+
+  // Nothing decoded on the second call.
+  ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
+  EXPECT_TRUE(neteq_->LastDecodedTimestamps().empty());
+}
+
+TEST_F(NetEqDecodingTest, LastDecodedTimestampsTwoDecoded) {
+  // Insert two packets with PCM16b WB data (this is what PopulateRtpInfo does
+  // by default). Make the length 5 ms so that NetEq must decode them both in
+  // the same GetAudio call.
+  constexpr size_t kPayloadSamples = 16 * 5;
+  constexpr size_t kPayloadBytes = 2 * kPayloadSamples;
+  uint8_t payload[kPayloadBytes] = {0};
+
+  RTPHeader rtp_info;
+  constexpr uint32_t kRtpTimestamp1 = 0x1234;
+  PopulateRtpInfo(0, kRtpTimestamp1, &rtp_info);
+  EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
+  constexpr uint32_t kRtpTimestamp2 = kRtpTimestamp1 + kPayloadSamples;
+  PopulateRtpInfo(1, kRtpTimestamp2, &rtp_info);
+  EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
+
+  // Pull out data once.
+  AudioFrame output;
+  bool muted;
+  ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
+
+  EXPECT_EQ(std::vector<uint32_t>({kRtpTimestamp1, kRtpTimestamp2}),
+            neteq_->LastDecodedTimestamps());
+}
+
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_coding/neteq/sync_buffer.h b/webrtc/modules/audio_coding/neteq/sync_buffer.h
index 5eae4bf..eb90034 100644
--- a/webrtc/modules/audio_coding/neteq/sync_buffer.h
+++ b/webrtc/modules/audio_coding/neteq/sync_buffer.h
@@ -26,7 +26,7 @@
         end_timestamp_(0),
         dtmf_index_(0) {}
 
-  // Returns the number of samples yet to play out form the buffer.
+  // Returns the number of samples yet to play out from the buffer.
   size_t FutureLength() const;
 
   // Adds the contents of |append_this| to the back of the SyncBuffer. Removes