Replace timestamp map with a deque in generic decoder

* Add test to Generic decoder unittests to ensure drop behaviour is covered.
* Use simulated time in the generic decoder unittests.

Bug: webrtc:14324
Change-Id: I10b28b45c434f92d5344683fb9ca6676efe0e08c
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/270662
Reviewed-by: Erik Språng <sprang@webrtc.org>
Commit-Queue: Evan Shrubsole <eshr@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#37710}
diff --git a/modules/video_coding/generic_decoder.cc b/modules/video_coding/generic_decoder.cc
index dd8f52a..dac8f2c 100644
--- a/modules/video_coding/generic_decoder.cc
+++ b/modules/video_coding/generic_decoder.cc
@@ -14,9 +14,13 @@
 
 #include <algorithm>
 #include <cmath>
+#include <iterator>
+#include <utility>
 
+#include "absl/algorithm/container.h"
 #include "absl/types/optional.h"
 #include "api/video/video_timing.h"
+#include "modules/include/module_common_types_public.h"
 #include "modules/video_coding/include/video_error_codes.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"
@@ -25,11 +29,17 @@
 
 namespace webrtc {
 
+namespace {
+
+constexpr size_t kDecoderFrameMemoryLength = 10;
+
+}
+
 VCMDecodedFrameCallback::VCMDecodedFrameCallback(
     VCMTiming* timing,
     Clock* clock,
     const FieldTrialsView& field_trials)
-    : _clock(clock), _timing(timing), _timestampMap(kDecoderFrameMemoryLength) {
+    : _clock(clock), _timing(timing) {
   ntp_offset_ =
       _clock->CurrentNtpInMilliseconds() - _clock->TimeInMilliseconds();
 }
@@ -66,6 +76,26 @@
   return WEBRTC_VIDEO_CODEC_OK;
 }
 
+std::pair<absl::optional<FrameInfo>, size_t>
+VCMDecodedFrameCallback::FindFrameInfo(uint32_t rtp_timestamp) {
+  absl::optional<FrameInfo> frame_info;
+
+  auto it = absl::c_find_if(frame_infos_, [rtp_timestamp](const auto& entry) {
+    return entry.rtp_timestamp == rtp_timestamp ||
+           IsNewerTimestamp(entry.rtp_timestamp, rtp_timestamp);
+  });
+  size_t dropped_frames = std::distance(frame_infos_.begin(), it);
+
+  if (it != frame_infos_.end() && it->rtp_timestamp == rtp_timestamp) {
+    // Frame was found and should also be removed from the queue.
+    frame_info = std::move(*it);
+    ++it;
+  }
+
+  frame_infos_.erase(frame_infos_.begin(), it);
+  return std::make_pair(std::move(frame_info), dropped_frames);
+}
+
 void VCMDecodedFrameCallback::Decoded(VideoFrame& decodedImage,
                                       absl::optional<int32_t> decode_time_ms,
                                       absl::optional<uint8_t> qp) {
@@ -74,36 +104,29 @@
                        "timestamp", decodedImage.timestamp());
   // TODO(holmer): We should improve this so that we can handle multiple
   // callbacks from one call to Decode().
-  absl::optional<FrameInformation> frameInfo;
+  absl::optional<FrameInfo> frame_info;
   int timestamp_map_size = 0;
   int dropped_frames = 0;
   {
     MutexLock lock(&lock_);
-    int initial_timestamp_map_size = _timestampMap.Size();
-    frameInfo = _timestampMap.Pop(decodedImage.timestamp());
-    timestamp_map_size = _timestampMap.Size();
-    // _timestampMap.Pop() erases all frame upto the specified timestamp and
-    // return the frame info for this timestamp if it exists. Thus, the
-    // difference in the _timestampMap size before and after Pop() will show
-    // internally dropped frames.
-    dropped_frames =
-        initial_timestamp_map_size - timestamp_map_size - (frameInfo ? 1 : 0);
+    std::tie(frame_info, dropped_frames) =
+        FindFrameInfo(decodedImage.timestamp());
+    timestamp_map_size = frame_infos_.size();
   }
-
   if (dropped_frames > 0) {
     _receiveCallback->OnDroppedFrames(dropped_frames);
   }
 
-  if (!frameInfo) {
+  if (!frame_info) {
     RTC_LOG(LS_WARNING) << "Too many frames backed up in the decoder, dropping "
                            "frame with timestamp "
                         << decodedImage.timestamp();
     return;
   }
 
-  decodedImage.set_ntp_time_ms(frameInfo->ntp_time_ms);
-  decodedImage.set_packet_infos(frameInfo->packet_infos);
-  decodedImage.set_rotation(frameInfo->rotation);
+  decodedImage.set_ntp_time_ms(frame_info->ntp_time_ms);
+  decodedImage.set_packet_infos(frame_info->packet_infos);
+  decodedImage.set_rotation(frame_info->rotation);
   VideoFrame::RenderParameters render_parameters = _timing->RenderParameters();
   if (render_parameters.max_composition_delay_in_frames) {
     // Subtract frames that are in flight.
@@ -113,70 +136,70 @@
   }
   decodedImage.set_render_parameters(render_parameters);
 
-  RTC_DCHECK(frameInfo->decode_start);
+  RTC_DCHECK(frame_info->decode_start);
   const Timestamp now = _clock->CurrentTime();
   const TimeDelta decode_time = decode_time_ms
                                     ? TimeDelta::Millis(*decode_time_ms)
-                                    : now - *frameInfo->decode_start;
+                                    : now - *frame_info->decode_start;
   _timing->StopDecodeTimer(decode_time, now);
   decodedImage.set_processing_time(
-      {*frameInfo->decode_start, *frameInfo->decode_start + decode_time});
+      {*frame_info->decode_start, *frame_info->decode_start + decode_time});
 
   // Report timing information.
   TimingFrameInfo timing_frame_info;
-  if (frameInfo->timing.flags != VideoSendTiming::kInvalid) {
+  if (frame_info->timing.flags != VideoSendTiming::kInvalid) {
     int64_t capture_time_ms = decodedImage.ntp_time_ms() - ntp_offset_;
     // Convert remote timestamps to local time from ntp timestamps.
-    frameInfo->timing.encode_start_ms -= ntp_offset_;
-    frameInfo->timing.encode_finish_ms -= ntp_offset_;
-    frameInfo->timing.packetization_finish_ms -= ntp_offset_;
-    frameInfo->timing.pacer_exit_ms -= ntp_offset_;
-    frameInfo->timing.network_timestamp_ms -= ntp_offset_;
-    frameInfo->timing.network2_timestamp_ms -= ntp_offset_;
+    frame_info->timing.encode_start_ms -= ntp_offset_;
+    frame_info->timing.encode_finish_ms -= ntp_offset_;
+    frame_info->timing.packetization_finish_ms -= ntp_offset_;
+    frame_info->timing.pacer_exit_ms -= ntp_offset_;
+    frame_info->timing.network_timestamp_ms -= ntp_offset_;
+    frame_info->timing.network2_timestamp_ms -= ntp_offset_;
 
     int64_t sender_delta_ms = 0;
     if (decodedImage.ntp_time_ms() < 0) {
       // Sender clock is not estimated yet. Make sure that sender times are all
       // negative to indicate that. Yet they still should be relatively correct.
       sender_delta_ms =
-          std::max({capture_time_ms, frameInfo->timing.encode_start_ms,
-                    frameInfo->timing.encode_finish_ms,
-                    frameInfo->timing.packetization_finish_ms,
-                    frameInfo->timing.pacer_exit_ms,
-                    frameInfo->timing.network_timestamp_ms,
-                    frameInfo->timing.network2_timestamp_ms}) +
+          std::max({capture_time_ms, frame_info->timing.encode_start_ms,
+                    frame_info->timing.encode_finish_ms,
+                    frame_info->timing.packetization_finish_ms,
+                    frame_info->timing.pacer_exit_ms,
+                    frame_info->timing.network_timestamp_ms,
+                    frame_info->timing.network2_timestamp_ms}) +
           1;
     }
 
     timing_frame_info.capture_time_ms = capture_time_ms - sender_delta_ms;
     timing_frame_info.encode_start_ms =
-        frameInfo->timing.encode_start_ms - sender_delta_ms;
+        frame_info->timing.encode_start_ms - sender_delta_ms;
     timing_frame_info.encode_finish_ms =
-        frameInfo->timing.encode_finish_ms - sender_delta_ms;
+        frame_info->timing.encode_finish_ms - sender_delta_ms;
     timing_frame_info.packetization_finish_ms =
-        frameInfo->timing.packetization_finish_ms - sender_delta_ms;
+        frame_info->timing.packetization_finish_ms - sender_delta_ms;
     timing_frame_info.pacer_exit_ms =
-        frameInfo->timing.pacer_exit_ms - sender_delta_ms;
+        frame_info->timing.pacer_exit_ms - sender_delta_ms;
     timing_frame_info.network_timestamp_ms =
-        frameInfo->timing.network_timestamp_ms - sender_delta_ms;
+        frame_info->timing.network_timestamp_ms - sender_delta_ms;
     timing_frame_info.network2_timestamp_ms =
-        frameInfo->timing.network2_timestamp_ms - sender_delta_ms;
+        frame_info->timing.network2_timestamp_ms - sender_delta_ms;
   }
 
-  timing_frame_info.flags = frameInfo->timing.flags;
-  timing_frame_info.decode_start_ms = frameInfo->decode_start->ms();
+  timing_frame_info.flags = frame_info->timing.flags;
+  timing_frame_info.decode_start_ms = frame_info->decode_start->ms();
   timing_frame_info.decode_finish_ms = now.ms();
   timing_frame_info.render_time_ms =
-      frameInfo->render_time ? frameInfo->render_time->ms() : -1;
+      frame_info->render_time ? frame_info->render_time->ms() : -1;
   timing_frame_info.rtp_timestamp = decodedImage.timestamp();
-  timing_frame_info.receive_start_ms = frameInfo->timing.receive_start_ms;
-  timing_frame_info.receive_finish_ms = frameInfo->timing.receive_finish_ms;
+  timing_frame_info.receive_start_ms = frame_info->timing.receive_start_ms;
+  timing_frame_info.receive_finish_ms = frame_info->timing.receive_finish_ms;
   _timing->SetTimingFrameInfo(timing_frame_info);
 
   decodedImage.set_timestamp_us(
-      frameInfo->render_time ? frameInfo->render_time->us() : -1);
+      frame_info->render_time ? frame_info->render_time->us() : -1);
   _receiveCallback->FrameToRender(decodedImage, qp, decode_time,
-                                  frameInfo->content_type);
+                                  frame_info->content_type);
 }
 
 void VCMDecodedFrameCallback::OnDecoderImplementationName(
@@ -184,15 +207,17 @@
   _receiveCallback->OnDecoderImplementationName(implementation_name);
 }
 
-void VCMDecodedFrameCallback::Map(uint32_t timestamp,
-                                  const FrameInformation& frameInfo) {
+void VCMDecodedFrameCallback::Map(FrameInfo frameInfo) {
   int dropped_frames = 0;
   {
     MutexLock lock(&lock_);
-    int initial_size = _timestampMap.Size();
-    _timestampMap.Add(timestamp, frameInfo);
+    int initial_size = frame_infos_.size();
+    if (initial_size == kDecoderFrameMemoryLength) {
+      frame_infos_.pop_front();
+      dropped_frames = 1;
+    }
+    frame_infos_.push_back(std::move(frameInfo));
     // If no frame is dropped, the new size should be `initial_size` + 1
-    dropped_frames = (initial_size + 1) - _timestampMap.Size();
   }
   if (dropped_frames > 0) {
     _receiveCallback->OnDroppedFrames(dropped_frames);
@@ -203,8 +228,8 @@
   int dropped_frames = 0;
   {
     MutexLock lock(&lock_);
-    dropped_frames = _timestampMap.Size();
-    _timestampMap.Clear();
+    dropped_frames = frame_infos_.size();
+    frame_infos_.clear();
   }
   if (dropped_frames > 0) {
     _receiveCallback->OnDroppedFrames(dropped_frames);
@@ -238,7 +263,8 @@
 int32_t VCMGenericDecoder::Decode(const VCMEncodedFrame& frame, Timestamp now) {
   TRACE_EVENT1("webrtc", "VCMGenericDecoder::Decode", "timestamp",
                frame.Timestamp());
-  FrameInformation frame_info;
+  FrameInfo frame_info;
+  frame_info.rtp_timestamp = frame.Timestamp();
   frame_info.decode_start = now;
   frame_info.render_time =
       frame.RenderTimeMs() >= 0
@@ -258,7 +284,7 @@
   } else {
     frame_info.content_type = _last_keyframe_content_type;
   }
-  _callback->Map(frame.Timestamp(), frame_info);
+  _callback->Map(std::move(frame_info));
 
   int32_t ret = decoder_->Decode(frame.EncodedImage(), frame.MissingFrame(),
                                  frame.RenderTimeMs());