In generic encoder ensure capture timestamp is preserved.

Because some hardware encoders may not preserve capture timestamp, use
rtp timestamp as a key to capture time storage and restore corrupted
capture timestamps whenever possible.

Bug: webrtc:8497
Change-Id: Ib3449313c65e864a89cd645a3f5dec99aa9cf8e4
Reviewed-on: https://webrtc-review.googlesource.com/23620
Reviewed-by: Erik Språng <sprang@webrtc.org>
Reviewed-by: Magnus Jedvert <magjed@webrtc.org>
Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#20732}
diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn
index 01ecaa8..9d22006 100644
--- a/modules/video_coding/BUILD.gn
+++ b/modules/video_coding/BUILD.gn
@@ -97,6 +97,7 @@
     ":webrtc_vp8",
     ":webrtc_vp9",
     "..:module_api",
+    "..:module_api_public",
     "../..:webrtc_common",
     "../../api:optional",
     "../../api:video_frame_api_i420",
diff --git a/modules/video_coding/generic_encoder.cc b/modules/video_coding/generic_encoder.cc
index f1e1dfb..184cb50 100644
--- a/modules/video_coding/generic_encoder.cc
+++ b/modules/video_coding/generic_encoder.cc
@@ -14,6 +14,7 @@
 
 #include "api/optional.h"
 #include "api/video/i420_buffer.h"
+#include "modules/include/module_common_types_public.h"
 #include "modules/pacing/alr_detector.h"
 #include "modules/video_coding/encoded_frame.h"
 #include "modules/video_coding/media_optimization.h"
@@ -25,6 +26,11 @@
 
 namespace webrtc {
 
+namespace {
+const int kMessagesThrottlingThreshold = 100;
+const int kThrottleRatio = 1000;
+}  // namespace
+
 VCMGenericEncoder::VCMGenericEncoder(
     VideoEncoder* encoder,
     VCMEncodedFrameCallback* encoded_frame_callback,
@@ -82,7 +88,8 @@
     RTC_DCHECK(frame_type == kVideoFrameKey || frame_type == kVideoFrameDelta);
 
   for (size_t i = 0; i < streams_or_svc_num_; ++i)
-    vcm_encoded_frame_callback_->OnEncodeStarted(frame.render_time_ms(), i);
+    vcm_encoded_frame_callback_->OnEncodeStarted(frame.timestamp(),
+                                                 frame.render_time_ms(), i);
 
   return encoder_->Encode(frame, codec_specific, &frame_types);
 }
@@ -184,6 +191,7 @@
       framerate_(1),
       last_timing_frame_time_ms_(-1),
       timing_frames_thresholds_({-1, 0}),
+      incorrect_capture_time_logged_messages_(0),
       reordered_frames_logged_messages_(0),
       stalled_encoder_logged_messages_(0) {
   rtc::Optional<AlrDetector::AlrExperimentSettings> experiment_settings =
@@ -220,7 +228,8 @@
   framerate_ = framerate;
 }
 
-void VCMEncodedFrameCallback::OnEncodeStarted(int64_t capture_time_ms,
+void VCMEncodedFrameCallback::OnEncodeStarted(uint32_t rtp_timestamp,
+                                              int64_t capture_time_ms,
                                               size_t simulcast_svc_idx) {
   if (internal_source_) {
     return;
@@ -240,19 +249,20 @@
   if (timing_frames_info_[simulcast_svc_idx].encode_start_list.size() ==
       kMaxEncodeStartTimeListSize) {
     ++stalled_encoder_logged_messages_;
-    if (stalled_encoder_logged_messages_ <= 100) {
+    if (stalled_encoder_logged_messages_ <= kMessagesThrottlingThreshold ||
+        stalled_encoder_logged_messages_ % kThrottleRatio == 0) {
       RTC_LOG(LS_WARNING) << "Too many frames in the encode_start_list."
                              " Did encoder stall?";
-      if (stalled_encoder_logged_messages_ == 100) {
+      if (stalled_encoder_logged_messages_ == kMessagesThrottlingThreshold) {
         RTC_LOG(LS_WARNING) << "Too many log messages. Further stalled encoder"
-                               "warnings will not be printed.";
+                               "warnings will be throttled.";
       }
     }
     post_encode_callback_->OnDroppedFrame(DropReason::kDroppedByEncoder);
     timing_frames_info_[simulcast_svc_idx].encode_start_list.pop_front();
   }
   timing_frames_info_[simulcast_svc_idx].encode_start_list.emplace_back(
-      capture_time_ms, rtc::TimeMillis());
+      rtp_timestamp, capture_time_ms, rtc::TimeMillis());
 }
 
 EncodedImageCallback::Result VCMEncodedFrameCallback::OnEncodedImage(
@@ -273,6 +283,8 @@
     // TODO(ilnik): When h264 simulcast is landed, extract simulcast idx here.
   }
 
+  EncodedImage image_copy(encoded_image);
+
   rtc::Optional<size_t> outlier_frame_size;
   rtc::Optional<int64_t> encode_start_ms;
   size_t num_simulcast_svc_streams = 1;
@@ -288,27 +300,49 @@
           &timing_frames_info_[simulcast_svc_idx].encode_start_list;
       // Skip frames for which there was OnEncodeStarted but no OnEncodedImage
       // call. These are dropped by encoder internally.
+      // Because some hardware encoders don't preserve capture timestamp we use
+      // RTP timestamps here.
       while (!encode_start_list->empty() &&
-             encode_start_list->front().capture_time_ms <
-                 encoded_image.capture_time_ms_) {
+             IsNewerTimestamp(image_copy._timeStamp,
+                              encode_start_list->front().rtp_timestamp)) {
         post_encode_callback_->OnDroppedFrame(DropReason::kDroppedByEncoder);
         encode_start_list->pop_front();
       }
       if (encode_start_list->size() > 0 &&
-          encode_start_list->front().capture_time_ms ==
-              encoded_image.capture_time_ms_) {
+          encode_start_list->front().rtp_timestamp == image_copy._timeStamp) {
         encode_start_ms.emplace(
             encode_start_list->front().encode_start_time_ms);
+        if (image_copy.capture_time_ms_ !=
+            encode_start_list->front().capture_time_ms) {
+          // Force correct capture timestamp.
+          image_copy.capture_time_ms_ =
+              encode_start_list->front().capture_time_ms;
+          ++incorrect_capture_time_logged_messages_;
+          if (incorrect_capture_time_logged_messages_ <=
+                  kMessagesThrottlingThreshold ||
+              incorrect_capture_time_logged_messages_ % kThrottleRatio == 0) {
+            RTC_LOG(LS_WARNING)
+                << "Encoder is not preserving capture timestamps.";
+            if (incorrect_capture_time_logged_messages_ ==
+                kMessagesThrottlingThreshold) {
+              RTC_LOG(LS_WARNING) << "Too many log messages. Further incorrect "
+                                     "timestamps warnings will be throttled.";
+            }
+          }
+        }
         encode_start_list->pop_front();
       } else {
         ++reordered_frames_logged_messages_;
-        if (reordered_frames_logged_messages_ <= 100) {
+        if (reordered_frames_logged_messages_ <= kMessagesThrottlingThreshold ||
+            reordered_frames_logged_messages_ % kThrottleRatio == 0) {
           RTC_LOG(LS_WARNING)
               << "Frame with no encode started time recordings. "
-                 "Encoder may be reordering frames.";
-          if (reordered_frames_logged_messages_ == 100) {
+                 "Encoder may be reordering frames "
+                 "or not preserving RTP timestamps.";
+          if (reordered_frames_logged_messages_ ==
+              kMessagesThrottlingThreshold) {
             RTC_LOG(LS_WARNING) << "Too many log messages. Further frames "
-                                   "reordering warnings will not be printed.";
+                                   "reordering warnings will be throttled.";
           }
         }
       }
@@ -326,7 +360,7 @@
 
     // Check if it's time to send a timing frame.
     int64_t timing_frame_delay_ms =
-        encoded_image.capture_time_ms_ - last_timing_frame_time_ms_;
+        image_copy.capture_time_ms_ - last_timing_frame_time_ms_;
     // Trigger threshold if it's a first frame, too long passed since the last
     // timing frame, or we already sent timing frame on a different simulcast
     // stream with the same capture time.
@@ -334,12 +368,12 @@
         timing_frame_delay_ms >= timing_frames_thresholds_.delay_ms ||
         timing_frame_delay_ms == 0) {
       timing_flags = TimingFrameFlags::kTriggeredByTimer;
-      last_timing_frame_time_ms_ = encoded_image.capture_time_ms_;
+      last_timing_frame_time_ms_ = image_copy.capture_time_ms_;
     }
 
     // Outliers trigger timing frames, but do not affect scheduled timing
     // frames.
-    if (outlier_frame_size && encoded_image._length >= *outlier_frame_size) {
+    if (outlier_frame_size && image_copy._length >= *outlier_frame_size) {
       timing_flags |= TimingFrameFlags::kTriggeredBySize;
     }
   }
@@ -350,36 +384,35 @@
   // because to being sent in the network capture time required to be less than
   // all the other timestamps.
   if (encode_start_ms) {
-    encoded_image.SetEncodeTime(*encode_start_ms, rtc::TimeMillis());
-    encoded_image.timing_.flags = timing_flags;
+    image_copy.SetEncodeTime(*encode_start_ms, rtc::TimeMillis());
+    image_copy.timing_.flags = timing_flags;
   } else {
-    encoded_image.timing_.flags = TimingFrameFlags::kInvalid;
+    image_copy.timing_.flags = TimingFrameFlags::kInvalid;
   }
 
   // Piggyback ALR experiment group id and simulcast id into the content type.
   uint8_t experiment_id =
       experiment_groups_[videocontenttypehelpers::IsScreenshare(
-          encoded_image.content_type_)];
+          image_copy.content_type_)];
 
   // TODO(ilnik): This will force content type extension to be present even
   // for realtime video. At the expense of miniscule overhead we will get
   // sliced receive statistics.
-  RTC_CHECK(videocontenttypehelpers::SetExperimentId(
-      &encoded_image.content_type_, experiment_id));
+  RTC_CHECK(videocontenttypehelpers::SetExperimentId(&image_copy.content_type_,
+                                                     experiment_id));
   // We count simulcast streams from 1 on the wire. That's why we set simulcast
   // id in content type to +1 of that is actual simulcast index. This is because
   // value 0 on the wire is reserved for 'no simulcast stream specified'.
   RTC_CHECK(videocontenttypehelpers::SetSimulcastId(
-      &encoded_image.content_type_,
-      static_cast<uint8_t>(simulcast_svc_idx + 1)));
+      &image_copy.content_type_, static_cast<uint8_t>(simulcast_svc_idx + 1)));
 
   Result result = post_encode_callback_->OnEncodedImage(
-      encoded_image, codec_specific, fragmentation_header);
+      image_copy, codec_specific, fragmentation_header);
   if (result.error != Result::OK)
     return result;
 
   if (media_opt_) {
-    media_opt_->UpdateWithEncodedData(encoded_image);
+    media_opt_->UpdateWithEncodedData(image_copy);
     if (internal_source_) {
       // Signal to encoder to drop next frame.
       result.drop_next_frame = media_opt_->DropFrame();
diff --git a/modules/video_coding/generic_encoder.h b/modules/video_coding/generic_encoder.h
index f8ffc48..244c614 100644
--- a/modules/video_coding/generic_encoder.h
+++ b/modules/video_coding/generic_encoder.h
@@ -57,7 +57,9 @@
 
   void OnFrameRateChanged(size_t framerate);
 
-  void OnEncodeStarted(int64_t capture_time_ms, size_t simulcast_svc_idx);
+  void OnEncodeStarted(uint32_t rtp_timestamps,
+                       int64_t capture_time_ms,
+                       size_t simulcast_svc_idx);
 
   void SetTimingFramesThresholds(
       const VideoCodec::TimingFrameTriggerThresholds& thresholds) {
@@ -81,9 +83,13 @@
   media_optimization::MediaOptimization* const media_opt_;
 
   struct EncodeStartTimeRecord {
-    EncodeStartTimeRecord(int64_t capture_time, int64_t encode_start_time)
-        : capture_time_ms(capture_time),
+    EncodeStartTimeRecord(uint32_t timestamp,
+                          int64_t capture_time,
+                          int64_t encode_start_time)
+        : rtp_timestamp(timestamp),
+          capture_time_ms(capture_time),
           encode_start_time_ms(encode_start_time) {}
+    uint32_t rtp_timestamp;
     int64_t capture_time_ms;
     int64_t encode_start_time_ms;
   };
@@ -98,6 +104,8 @@
   int64_t last_timing_frame_time_ms_ RTC_GUARDED_BY(timing_params_lock_);
   VideoCodec::TimingFrameTriggerThresholds timing_frames_thresholds_
       RTC_GUARDED_BY(timing_params_lock_);
+  size_t incorrect_capture_time_logged_messages_
+      RTC_GUARDED_BY(timing_params_lock_);
   size_t reordered_frames_logged_messages_ RTC_GUARDED_BY(timing_params_lock_);
   size_t stalled_encoder_logged_messages_ RTC_GUARDED_BY(timing_params_lock_);
 
diff --git a/modules/video_coding/generic_encoder_unittest.cc b/modules/video_coding/generic_encoder_unittest.cc
index 48c152f..2b788c5 100644
--- a/modules/video_coding/generic_encoder_unittest.cc
+++ b/modules/video_coding/generic_encoder_unittest.cc
@@ -28,13 +28,16 @@
 class FakeEncodedImageCallback : public EncodedImageCallback {
  public:
   FakeEncodedImageCallback()
-      : last_frame_was_timing_(false), num_frames_dropped_(0) {}
+      : last_frame_was_timing_(false),
+        num_frames_dropped_(0),
+        last_capture_timestamp_(-1) {}
   Result OnEncodedImage(const EncodedImage& encoded_image,
                         const CodecSpecificInfo* codec_specific_info,
                         const RTPFragmentationHeader* fragmentation) override {
     last_frame_was_timing_ =
         encoded_image.timing_.flags != TimingFrameFlags::kInvalid &&
         encoded_image.timing_.flags != TimingFrameFlags::kNotTriggered;
+    last_capture_timestamp_ = encoded_image.capture_time_ms_;
     return Result(Result::OK);
   };
 
@@ -44,9 +47,12 @@
 
   size_t GetNumFramesDropped() { return num_frames_dropped_; }
 
+  int64_t GetLastCaptureTimestamp() { return last_capture_timestamp_; }
+
  private:
   bool last_frame_was_timing_;
   size_t num_frames_dropped_;
+  int64_t last_capture_timestamp_;
 };
 
 enum class FrameType {
@@ -87,9 +93,11 @@
       CodecSpecificInfo codec_specific;
       image._length = FrameSize(min_frame_size, max_frame_size, s, i);
       image.capture_time_ms_ = current_timestamp;
+      image._timeStamp = static_cast<uint32_t>(current_timestamp * 90);
       codec_specific.codecType = kVideoCodecGeneric;
       codec_specific.codecSpecific.generic.simulcast_idx = s;
-      callback.OnEncodeStarted(current_timestamp, s);
+      callback.OnEncodeStarted(static_cast<uint32_t>(current_timestamp * 90),
+                               current_timestamp, s);
       if (dropped) {
         result[s].push_back(FrameType::kDropped);
         continue;
@@ -178,6 +186,7 @@
   int64_t timestamp = 1;
   image._length = 500;
   image.capture_time_ms_ = timestamp;
+  image._timeStamp = static_cast<uint32_t>(timestamp * 90);
   codec_specific.codecType = kVideoCodecGeneric;
   codec_specific.codecSpecific.generic.simulcast_idx = 0;
   FakeEncodedImageCallback sink;
@@ -188,12 +197,13 @@
   callback.OnTargetBitrateChanged(500, 0);
 
   // Verify a single frame works with encode start time set.
-  callback.OnEncodeStarted(timestamp, 0);
+  callback.OnEncodeStarted(static_cast<uint32_t>(timestamp * 90), timestamp, 0);
   callback.OnEncodedImage(image, &codec_specific, nullptr);
   EXPECT_TRUE(sink.WasTimingFrame());
 
   // New frame, now skip OnEncodeStarted. Should not result in timing frame.
   image.capture_time_ms_ = ++timestamp;
+  image._timeStamp = static_cast<uint32_t>(timestamp * 90);
   callback.OnEncodedImage(image, &codec_specific, nullptr);
   EXPECT_FALSE(sink.WasTimingFrame());
 }
@@ -201,33 +211,59 @@
 TEST(TestVCMEncodedFrameCallback, NotifiesAboutDroppedFrames) {
   EncodedImage image;
   CodecSpecificInfo codec_specific;
-  const int64_t timestamp1 = 100;
-  const int64_t timestamp2 = 110;
-  const int64_t timestamp3 = 120;
-  const int64_t timestamp4 = 130;
+  const int64_t kTimestampMs1 = 47721840;
+  const int64_t kTimestampMs2 = 47721850;
+  const int64_t kTimestampMs3 = 47721860;
+  const int64_t kTimestampMs4 = 47721870;
   codec_specific.codecType = kVideoCodecGeneric;
   codec_specific.codecSpecific.generic.simulcast_idx = 0;
   FakeEncodedImageCallback sink;
   VCMEncodedFrameCallback callback(&sink, nullptr);
   // Any non-zero bitrate needed to be set before the first frame.
   callback.OnTargetBitrateChanged(500, 0);
-  callback.OnEncodeStarted(timestamp1, 0);
+  image.capture_time_ms_ = kTimestampMs1;
+  image._timeStamp = static_cast<uint32_t>(image.capture_time_ms_ * 90);
+  callback.OnEncodeStarted(image._timeStamp, image.capture_time_ms_, 0);
   EXPECT_EQ(0u, sink.GetNumFramesDropped());
-  image.capture_time_ms_ = timestamp1;
   callback.OnEncodedImage(image, &codec_specific, nullptr);
-  callback.OnEncodeStarted(timestamp2, 0);
+
+  image.capture_time_ms_ = kTimestampMs2;
+  image._timeStamp = static_cast<uint32_t>(image.capture_time_ms_ * 90);
+  callback.OnEncodeStarted(image._timeStamp, image.capture_time_ms_, 0);
   // No OnEncodedImageCall for timestamp2. Yet, at this moment it's not known
   // that frame with timestamp2 was dropped.
   EXPECT_EQ(0u, sink.GetNumFramesDropped());
-  callback.OnEncodeStarted(timestamp3, 0);
-  image.capture_time_ms_ = timestamp3;
+
+  image.capture_time_ms_ = kTimestampMs3;
+  image._timeStamp = static_cast<uint32_t>(image.capture_time_ms_ * 90);
+  callback.OnEncodeStarted(image._timeStamp, image.capture_time_ms_, 0);
   callback.OnEncodedImage(image, &codec_specific, nullptr);
   EXPECT_EQ(1u, sink.GetNumFramesDropped());
-  callback.OnEncodeStarted(timestamp4, 0);
-  image.capture_time_ms_ = timestamp4;
+
+  image.capture_time_ms_ = kTimestampMs4;
+  image._timeStamp = static_cast<uint32_t>(image.capture_time_ms_ * 90);
+  callback.OnEncodeStarted(image._timeStamp, image.capture_time_ms_, 0);
   callback.OnEncodedImage(image, &codec_specific, nullptr);
   EXPECT_EQ(1u, sink.GetNumFramesDropped());
 }
 
+TEST(TestVCMEncodedFrameCallback, RestoresCaptureTimestamps) {
+  EncodedImage image;
+  CodecSpecificInfo codec_specific;
+  const int64_t kTimestampMs = 123456;
+  codec_specific.codecType = kVideoCodecGeneric;
+  codec_specific.codecSpecific.generic.simulcast_idx = 0;
+  FakeEncodedImageCallback sink;
+  VCMEncodedFrameCallback callback(&sink, nullptr);
+  // Any non-zero bitrate needed to be set before the first frame.
+  callback.OnTargetBitrateChanged(500, 0);
+  image.capture_time_ms_ = kTimestampMs;  // Incorrect timesetamp.
+  image._timeStamp = static_cast<uint32_t>(image.capture_time_ms_ * 90);
+  callback.OnEncodeStarted(image._timeStamp, image.capture_time_ms_, 0);
+  image.capture_time_ms_ = 0;  // Incorrect timesetamp.
+  callback.OnEncodedImage(image, &codec_specific, nullptr);
+  EXPECT_EQ(kTimestampMs, sink.GetLastCaptureTimestamp());
+}
+
 }  // namespace test
 }  // namespace webrtc