Implement timestamp translation/filter in VideoCapturer.

Use in AndroidVideoCapturer.

BUG=webrtc:5740

Review-Url: https://codereview.webrtc.org/2017443003
Cr-Commit-Position: refs/heads/master@{#13254}
diff --git a/webrtc/base/timestampaligner.cc b/webrtc/base/timestampaligner.cc
new file mode 100644
index 0000000..e03e4fb
--- /dev/null
+++ b/webrtc/base/timestampaligner.cc
@@ -0,0 +1,107 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/base/logging.h"
+#include "webrtc/base/timestampaligner.h"
+
+namespace rtc {
+
+TimestampAligner::TimestampAligner() : frames_seen_(0), offset_us_(0) {}
+TimestampAligner::~TimestampAligner() {}
+
+int64_t TimestampAligner::UpdateOffset(int64_t camera_time_us,
+                                       int64_t system_time_us) {
+  // Estimate the offset between system monotonic time and the capture
+  // time from the camera. The camera is assumed to provide more
+  // accurate timestamps than we get from the system time. But the
+  // camera may use its own free-running clock with a large offset and
+  // a small drift compared to the system clock. So the model is
+  // basically
+  //
+  //   y_k = c_0 + c_1 * x_k + v_k
+  //
+  // where x_k is the camera timestamp, believed to be accurate in its
+  // own scale. y_k is our reading of the system clock. v_k is the
+  // measurement noise, i.e., the delay from frame capture until the
+  // system clock was read.
+  //
+  // It's possible to do (weighted) least-squares estimation of both
+  // c_0 and c_1. Then we get the constants as c_1 = Cov(x,y) /
+  // Var(x), and c_0 = mean(y) - c_1 * mean(x). Substituting this c_0,
+  // we can rearrange the model as
+  //
+  //   y_k = mean(y) + (x_k - mean(x)) + (c_1 - 1) * (x_k - mean(x)) + v_k
+  //
+  // Now if we use a weighted average which gradually forgets old
+  // values, x_k - mean(x) is bounded, of the same order as the time
+  // constant (and close to constant for a steady frame rate). In
+  // addition, the frequency error |c_1 - 1| should be small. Cameras
+  // with a frequency error up to 3000 ppm (3 ms drift per second)
+  // have been observed, but frequency errors below 100 ppm could be
+  // expected of any cheap crystal.
+  //
+  // Bottom line is that we ignore the c_1 term, and use only the estimator
+  //
+  //    x_k + mean(y-x)
+  //
+  // where mean is plain averaging for initial samples, followed by
+  // exponential averaging.
+
+  // The input for averaging, y_k - x_k in the above notation.
+  int64_t diff_us = system_time_us - camera_time_us;
+  // The deviation from the current average.
+  int64_t error_us = diff_us - offset_us_;
+
+  // If the current difference is far from the currently estimated
+  // offset, the filter is reset. This could happen, e.g., if the
+  // camera clock is reset, or cameras are plugged in and out, or if
+  // the application process is temporarily suspended. The limit of
+  // 300 ms should make this unlikely in normal operation, and at the
+  // same time, converging gradually rather than resetting the filter
+  // should be tolerable for jumps in camera time below this
+  // threshold.
+  static const int64_t kResetLimitUs = 300000;
+  if (std::abs(error_us) > kResetLimitUs) {
+    LOG(LS_INFO) << "Resetting timestamp translation after averaging "
+                 << frames_seen_ << " frames. Old offset: " << offset_us_
+                 << ", new offset: " << diff_us;
+    frames_seen_ = 0;
+    prev_translated_time_us_ = rtc::Optional<int64_t>();
+  }
+
+  static const int kWindowSize = 100;
+  if (frames_seen_ < kWindowSize) {
+    ++frames_seen_;
+  }
+  offset_us_ += error_us / frames_seen_;
+  return offset_us_;
+}
+
+int64_t TimestampAligner::ClipTimestamp(int64_t time_us,
+                                        int64_t system_time_us) {
+  // Make timestamps monotonic.
+  if (!prev_translated_time_us_) {
+    // Initialize.
+    clip_bias_us_ = 0;
+  } else if (time_us < *prev_translated_time_us_) {
+    time_us = *prev_translated_time_us_;
+  }
+
+  // Clip to make sure we don't produce time stamps in the future.
+  time_us -= clip_bias_us_;
+  if (time_us > system_time_us) {
+    clip_bias_us_ += time_us - system_time_us;
+    time_us = system_time_us;
+  }
+  prev_translated_time_us_ = rtc::Optional<int64_t>(time_us);
+  return time_us;
+}
+
+}  // namespace rtc