Android MediaCodecVideoDecoder: Manage lifetime of texture frames

This CL should be the last one in a series to finally unblock camera texture capture.

The SurfaceTexture.updateTexImage() calls are moved from the video renderers into MediaCodecVideoDecoder, and the destructor of the texture frames will signal MediaCodecVideoDecoder that the frame has returned. This CL also removes the SurfaceTexture from the native handle and only exposes the texture matrix instead, because only the video source should access the SurfaceTexture.

BUG=webrtc:4993
R=glaznev@webrtc.org, perkj@webrtc.org

Review URL: https://codereview.webrtc.org/1378033003 .

Cr-Commit-Position: refs/heads/master@{#10203}
diff --git a/talk/app/webrtc/java/jni/androidmediadecoder_jni.cc b/talk/app/webrtc/java/jni/androidmediadecoder_jni.cc
index 1f63131..6012e18 100644
--- a/talk/app/webrtc/java/jni/androidmediadecoder_jni.cc
+++ b/talk/app/webrtc/java/jni/androidmediadecoder_jni.cc
@@ -32,6 +32,7 @@
 #include "talk/app/webrtc/java/jni/androidmediacodeccommon.h"
 #include "talk/app/webrtc/java/jni/classreferenceholder.h"
 #include "talk/app/webrtc/java/jni/native_handle_impl.h"
+#include "talk/app/webrtc/java/jni/surfacetexturehelper_jni.h"
 #include "webrtc/base/bind.h"
 #include "webrtc/base/checks.h"
 #include "webrtc/base/logging.h"
@@ -110,7 +111,7 @@
   bool use_surface_;
   VideoCodec codec_;
   webrtc::I420BufferPool decoded_frame_pool_;
-  NativeHandleImpl native_handle_;
+  rtc::scoped_refptr<SurfaceTextureHelper> surface_texture_helper_;
   DecodedImageCallback* callback_;
   int frames_received_;  // Number of frames received by decoder.
   int frames_decoded_;  // Number of frames decoded by decoder.
@@ -143,10 +144,10 @@
   jfieldID j_height_field_;
   jfieldID j_stride_field_;
   jfieldID j_slice_height_field_;
-  jfieldID j_surface_texture_field_;
   // MediaCodecVideoDecoder.DecodedTextureBuffer fields.
   jfieldID j_textureID_field_;
-  jfieldID j_texture_presentation_timestamp_us_field_;
+  jfieldID j_transform_matrix_field_;
+  jfieldID j_texture_timestamp_ns_field_;
   // MediaCodecVideoDecoder.DecodedByteBuffer fields.
   jfieldID j_info_index_field_;
   jfieldID j_info_offset_field_;
@@ -155,8 +156,6 @@
 
   // Global references; must be deleted in Release().
   std::vector<jobject> input_buffers_;
-  jobject surface_texture_;
-  jobject previous_surface_texture_;
 
   // Render EGL context - owned by factory, should not be allocated/destroyed
   // by VideoDecoder.
@@ -170,8 +169,6 @@
     key_frame_required_(true),
     inited_(false),
     sw_fallback_required_(false),
-    surface_texture_(NULL),
-    previous_surface_texture_(NULL),
     codec_thread_(new Thread()),
     j_media_codec_video_decoder_class_(
         jni,
@@ -190,7 +187,7 @@
   j_init_decode_method_ = GetMethodID(
       jni, *j_media_codec_video_decoder_class_, "initDecode",
       "(Lorg/webrtc/MediaCodecVideoDecoder$VideoCodecType;"
-      "IILandroid/opengl/EGLContext;)Z");
+      "IILorg/webrtc/SurfaceTextureHelper;)Z");
   j_release_method_ =
       GetMethodID(jni, *j_media_codec_video_decoder_class_, "release", "()V");
   j_dequeue_input_buffer_method_ = GetMethodID(
@@ -220,17 +217,15 @@
       jni, *j_media_codec_video_decoder_class_, "stride", "I");
   j_slice_height_field_ = GetFieldID(
       jni, *j_media_codec_video_decoder_class_, "sliceHeight", "I");
-  j_surface_texture_field_ = GetFieldID(
-      jni, *j_media_codec_video_decoder_class_, "surfaceTexture",
-      "Landroid/graphics/SurfaceTexture;");
 
   jclass j_decoder_decoded_texture_buffer_class = FindClass(jni,
       "org/webrtc/MediaCodecVideoDecoder$DecodedTextureBuffer");
   j_textureID_field_ = GetFieldID(
       jni, j_decoder_decoded_texture_buffer_class, "textureID", "I");
-  j_texture_presentation_timestamp_us_field_ =
-      GetFieldID(jni, j_decoder_decoded_texture_buffer_class,
-                 "presentationTimestampUs", "J");
+  j_transform_matrix_field_ = GetFieldID(
+      jni, j_decoder_decoded_texture_buffer_class, "transformMatrix", "[F");
+  j_texture_timestamp_ns_field_ = GetFieldID(
+      jni, j_decoder_decoded_texture_buffer_class, "timestampNs", "J");
 
   jclass j_decoder_decoded_byte_buffer_class = FindClass(jni,
       "org/webrtc/MediaCodecVideoDecoder$DecodedByteBuffer");
@@ -253,14 +248,6 @@
 MediaCodecVideoDecoder::~MediaCodecVideoDecoder() {
   // Call Release() to ensure no more callbacks to us after we are deleted.
   Release();
-  // Delete global references.
-  JNIEnv* jni = AttachCurrentThreadIfNeeded();
-  if (previous_surface_texture_ != NULL) {
-    jni->DeleteGlobalRef(previous_surface_texture_);
-  }
-  if (surface_texture_ != NULL) {
-    jni->DeleteGlobalRef(surface_texture_);
-  }
 }
 
 int32_t MediaCodecVideoDecoder::InitDecode(const VideoCodec* inst,
@@ -310,6 +297,11 @@
   frames_received_ = 0;
   frames_decoded_ = 0;
 
+  if (use_surface_) {
+    surface_texture_helper_ = new rtc::RefCountedObject<SurfaceTextureHelper>(
+        jni, render_egl_context_);
+  }
+
   jobject j_video_codec_enum = JavaEnumFromIndex(
       jni, "MediaCodecVideoDecoder$VideoCodecType", codecType_);
   bool success = jni->CallBooleanMethod(
@@ -318,7 +310,8 @@
       j_video_codec_enum,
       codec_.width,
       codec_.height,
-      use_surface_ ? render_egl_context_ : nullptr);
+      use_surface_ ? surface_texture_helper_->GetJavaSurfaceTextureHelper()
+                   : nullptr);
   if (CheckException(jni) || !success) {
     ALOGE("Codec initialization error - fallback to SW codec.");
     sw_fallback_required_ = true;
@@ -358,15 +351,6 @@
     }
   }
 
-  if (use_surface_) {
-    jobject surface_texture = GetObjectField(
-        jni, *j_media_codec_video_decoder_, j_surface_texture_field_);
-    if (previous_surface_texture_ != NULL) {
-      jni->DeleteGlobalRef(previous_surface_texture_);
-    }
-    previous_surface_texture_ = surface_texture_;
-    surface_texture_ = jni->NewGlobalRef(surface_texture);
-  }
   codec_thread_->PostDelayed(kMediaCodecPollMs, this);
 
   return WEBRTC_VIDEO_CODEC_OK;
@@ -391,6 +375,7 @@
   }
   input_buffers_.clear();
   jni->CallVoidMethod(*j_media_codec_video_decoder_, j_release_method_);
+  surface_texture_helper_ = nullptr;
   inited_ = false;
   rtc::MessageQueueManager::Clear(this);
   if (CheckException(jni)) {
@@ -499,7 +484,7 @@
   if (frames_received_ > frames_decoded_ + max_pending_frames_) {
     ALOGV("Received: %d. Decoded: %d. Wait for output...",
         frames_received_, frames_decoded_);
-    if (!DeliverPendingOutputs(jni, kMediaCodecTimeoutMs * 1000)) {
+    if (!DeliverPendingOutputs(jni, kMediaCodecTimeoutMs)) {
       ALOGE("DeliverPendingOutputs error");
       return ProcessHWErrorOnCodecThread();
     }
@@ -562,7 +547,7 @@
 }
 
 bool MediaCodecVideoDecoder::DeliverPendingOutputs(
-    JNIEnv* jni, int dequeue_timeout_us) {
+    JNIEnv* jni, int dequeue_timeout_ms) {
   if (frames_received_ <= frames_decoded_) {
     // No need to query for output buffers - decoder is drained.
     return true;
@@ -571,7 +556,7 @@
   jobject j_decoder_output_buffer = jni->CallObjectMethod(
       *j_media_codec_video_decoder_,
       j_dequeue_output_buffer_method_,
-      dequeue_timeout_us);
+      dequeue_timeout_ms);
   if (CheckException(jni)) {
     ALOGE("dequeueOutputBuffer() error");
     return false;
@@ -596,14 +581,15 @@
     // Extract data from Java DecodedTextureBuffer.
     const int texture_id =
         GetIntField(jni, j_decoder_output_buffer, j_textureID_field_);
-    const int64_t timestamp_us =
-        GetLongField(jni, j_decoder_output_buffer,
-                     j_texture_presentation_timestamp_us_field_);
-    output_timestamps_ms = timestamp_us / rtc::kNumMicrosecsPerMillisec;
+    const jfloatArray j_transform_matrix =
+        reinterpret_cast<jfloatArray>(GetObjectField(
+            jni, j_decoder_output_buffer, j_transform_matrix_field_));
+    const int64_t timestamp_ns = GetLongField(jni, j_decoder_output_buffer,
+                                              j_texture_timestamp_ns_field_);
+    output_timestamps_ms = timestamp_ns / rtc::kNumNanosecsPerMillisec;
     // Create webrtc::VideoFrameBuffer with native texture handle.
-    native_handle_.SetTextureObject(surface_texture_, texture_id);
-    frame_buffer = new rtc::RefCountedObject<JniNativeHandleBuffer>(
-        &native_handle_, width, height);
+    frame_buffer = surface_texture_helper_->CreateTextureFrame(
+        width, height, NativeHandleImpl(jni, texture_id, j_transform_matrix));
   } else {
     // Extract data from Java ByteBuffer and create output yuv420 frame -
     // for non surface decoding only.
diff --git a/talk/app/webrtc/java/jni/native_handle_impl.cc b/talk/app/webrtc/java/jni/native_handle_impl.cc
index 37f5489..98af4d8 100644
--- a/talk/app/webrtc/java/jni/native_handle_impl.cc
+++ b/talk/app/webrtc/java/jni/native_handle_impl.cc
@@ -31,32 +31,17 @@
 
 namespace webrtc_jni {
 
-NativeHandleImpl::NativeHandleImpl() : texture_object_(NULL), texture_id_(-1) {}
-
-void* NativeHandleImpl::GetHandle() {
-  return texture_object_;
-}
-
-int NativeHandleImpl::GetTextureId() {
-  return texture_id_;
-}
-
-void NativeHandleImpl::SetTextureObject(void* texture_object, int texture_id) {
-  texture_object_ = reinterpret_cast<jobject>(texture_object);
-  texture_id_ = texture_id;
-}
-
-JniNativeHandleBuffer::JniNativeHandleBuffer(void* native_handle,
-                                             int width,
-                                             int height)
-    : NativeHandleBuffer(native_handle, width, height) {}
-
-rtc::scoped_refptr<webrtc::VideoFrameBuffer>
-JniNativeHandleBuffer::NativeToI420Buffer() {
-  // TODO(pbos): Implement before using this in the encoder pipeline (or
-  // remove the RTC_CHECK() in VideoCapture).
-  RTC_NOTREACHED();
-  return nullptr;
+NativeHandleImpl::NativeHandleImpl(JNIEnv* jni,
+                                   jint j_oes_texture_id,
+                                   jfloatArray j_transform_matrix)
+    : oes_texture_id(j_oes_texture_id) {
+  RTC_CHECK_EQ(16, jni->GetArrayLength(j_transform_matrix));
+  jfloat* transform_matrix_ptr =
+      jni->GetFloatArrayElements(j_transform_matrix, nullptr);
+  for (int i = 0; i < 16; ++i) {
+    sampling_matrix[i] = transform_matrix_ptr[i];
+  }
+  jni->ReleaseFloatArrayElements(j_transform_matrix, transform_matrix_ptr, 0);
 }
 
 }  // namespace webrtc_jni
diff --git a/talk/app/webrtc/java/jni/native_handle_impl.h b/talk/app/webrtc/java/jni/native_handle_impl.h
index 2ce2b73..370039e 100644
--- a/talk/app/webrtc/java/jni/native_handle_impl.h
+++ b/talk/app/webrtc/java/jni/native_handle_impl.h
@@ -31,33 +31,16 @@
 
 #include <jni.h>
 
-#include "webrtc/common_video/interface/video_frame_buffer.h"
-
 namespace webrtc_jni {
 
 // Wrapper for texture object.
-class NativeHandleImpl {
- public:
-  NativeHandleImpl();
+struct NativeHandleImpl {
+  NativeHandleImpl(JNIEnv* jni,
+                   jint j_oes_texture_id,
+                   jfloatArray j_transform_matrix);
 
-  void* GetHandle();
-  int GetTextureId();
-  void SetTextureObject(void* texture_object, int texture_id);
-
- private:
-  jobject texture_object_;
-  int32_t texture_id_;
-};
-
-class JniNativeHandleBuffer : public webrtc::NativeHandleBuffer {
- public:
-  JniNativeHandleBuffer(void* native_handle, int width, int height);
-
-  // TODO(pbos): Override destructor to release native handle, at the moment the
-  // native handle is not released based on refcount.
-
- private:
-  rtc::scoped_refptr<webrtc::VideoFrameBuffer> NativeToI420Buffer() override;
+  const int oes_texture_id;
+  float sampling_matrix[16];
 };
 
 }  // namespace webrtc_jni
diff --git a/talk/app/webrtc/java/jni/peerconnection_jni.cc b/talk/app/webrtc/java/jni/peerconnection_jni.cc
index fc6ce50c..2d14dee 100644
--- a/talk/app/webrtc/java/jni/peerconnection_jni.cc
+++ b/talk/app/webrtc/java/jni/peerconnection_jni.cc
@@ -771,7 +771,7 @@
             jni, *j_frame_class_, "<init>", "(III[I[Ljava/nio/ByteBuffer;J)V")),
         j_texture_frame_ctor_id_(GetMethodID(
             jni, *j_frame_class_, "<init>",
-            "(IIILjava/lang/Object;IJ)V")),
+            "(IIII[FJ)V")),
         j_byte_buffer_class_(jni, FindClass(jni, "java/nio/ByteBuffer")) {
     CHECK_EXCEPTION(jni);
   }
@@ -827,13 +827,13 @@
   jobject CricketToJavaTextureFrame(const cricket::VideoFrame* frame) {
     NativeHandleImpl* handle =
         reinterpret_cast<NativeHandleImpl*>(frame->GetNativeHandle());
-    jobject texture_object = reinterpret_cast<jobject>(handle->GetHandle());
-    int texture_id = handle->GetTextureId();
+    jfloatArray sampling_matrix = jni()->NewFloatArray(16);
+    jni()->SetFloatArrayRegion(sampling_matrix, 0, 16, handle->sampling_matrix);
     return jni()->NewObject(
         *j_frame_class_, j_texture_frame_ctor_id_,
         frame->GetWidth(), frame->GetHeight(),
         static_cast<int>(frame->GetVideoRotation()),
-        texture_object, texture_id, javaShallowCopy(frame));
+        handle->oes_texture_id, sampling_matrix, javaShallowCopy(frame));
   }
 
   JNIEnv* jni() {