Expose functionality to convert TextureBuffer to I420.

Bug: webrtc:8392
Change-Id: I79682efbef3aecbba904aa5047b229833fae25e8
Reviewed-on: https://webrtc-review.googlesource.com/8940
Commit-Queue: Sami Kalliomäki <sakal@webrtc.org>
Reviewed-by: Magnus Jedvert <magjed@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#20313}
diff --git a/sdk/android/api/org/webrtc/SurfaceTextureHelper.java b/sdk/android/api/org/webrtc/SurfaceTextureHelper.java
index 1fbc53f..8e26085 100644
--- a/sdk/android/api/org/webrtc/SurfaceTextureHelper.java
+++ b/sdk/android/api/org/webrtc/SurfaceTextureHelper.java
@@ -231,8 +231,11 @@
     });
   }
 
-  public void textureToYUV(final ByteBuffer buf, final int width, final int height,
-      final int stride, final int textureId, final float[] transformMatrix) {
+  /** Deprecated, use textureToYuv. */
+  @Deprecated
+  @SuppressWarnings("deprecation") // yuvConverter.convert is deprecated
+  void textureToYUV(final ByteBuffer buf, final int width, final int height, final int stride,
+      final int textureId, final float[] transformMatrix) {
     if (textureId != oesTextureId) {
       throw new IllegalStateException("textureToByteBuffer called with unexpected textureId");
     }
@@ -248,6 +251,25 @@
     });
   }
 
+  /**
+   * Posts to the correct thread to convert |textureBuffer| to I420. Must only be called with
+   * textures generated by this SurfaceTextureHelper.
+   */
+  public VideoFrame.I420Buffer textureToYuv(final TextureBuffer textureBuffer) {
+    if (textureBuffer.getTextureId() != oesTextureId) {
+      throw new IllegalStateException("textureToByteBuffer called with unexpected textureId");
+    }
+
+    final VideoFrame.I420Buffer[] result = new VideoFrame.I420Buffer[1];
+    ThreadUtils.invokeAtFrontUninterruptibly(handler, () -> {
+      if (yuvConverter == null) {
+        yuvConverter = new YuvConverter();
+      }
+      result[0] = yuvConverter.convert(textureBuffer);
+    });
+    return result[0];
+  }
+
   private void updateTexImage() {
     // SurfaceTexture.updateTexImage apparently can compete and deadlock with eglSwapBuffers,
     // as observed on Nexus 5. Therefore, synchronize it with the EGL functions.
diff --git a/sdk/android/api/org/webrtc/VideoFileRenderer.java b/sdk/android/api/org/webrtc/VideoFileRenderer.java
index 17676c0..bb6ebaa 100644
--- a/sdk/android/api/org/webrtc/VideoFileRenderer.java
+++ b/sdk/android/api/org/webrtc/VideoFileRenderer.java
@@ -15,8 +15,8 @@
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
-import java.util.concurrent.CountDownLatch;
 import java.util.ArrayList;
+import java.util.concurrent.CountDownLatch;
 
 /**
  * Can be used to save the video frames to file.
@@ -94,7 +94,7 @@
     final float[] texMatrix = RendererCommon.multiplyMatrices(rotatedSamplingMatrix, layoutMatrix);
 
     try {
-      ByteBuffer buffer = nativeCreateNativeByteBuffer(outputFrameSize);
+      ByteBuffer buffer = JniCommon.allocateNativeByteBuffer(outputFrameSize);
       if (!frame.yuvFrame) {
         yuvConverter.convert(outputFrameBuffer, outputFileWidth, outputFileHeight, outputFileWidth,
             frame.textureId, texMatrix);
@@ -153,7 +153,7 @@
 
         videoOutFile.write(data);
 
-        nativeFreeNativeByteBuffer(buffer);
+        JniCommon.freeNativeByteBuffer(buffer);
       }
       videoOutFile.close();
       Logging.d(TAG, "Video written to disk as " + outputFileName + ". Number frames are "
@@ -167,8 +167,4 @@
   public static native void nativeI420Scale(ByteBuffer srcY, int strideY, ByteBuffer srcU,
       int strideU, ByteBuffer srcV, int strideV, int width, int height, ByteBuffer dst,
       int dstWidth, int dstHeight);
-
-  public static native ByteBuffer nativeCreateNativeByteBuffer(int size);
-
-  public static native void nativeFreeNativeByteBuffer(ByteBuffer buffer);
 }
diff --git a/sdk/android/api/org/webrtc/VideoFrame.java b/sdk/android/api/org/webrtc/VideoFrame.java
index b49262b..da9542b 100644
--- a/sdk/android/api/org/webrtc/VideoFrame.java
+++ b/sdk/android/api/org/webrtc/VideoFrame.java
@@ -11,6 +11,8 @@
 package org.webrtc;
 
 import android.graphics.Matrix;
+import android.opengl.GLES11Ext;
+import android.opengl.GLES20;
 import java.nio.ByteBuffer;
 
 /**
@@ -88,7 +90,20 @@
    * Interface for buffers that are stored as a single texture, either in OES or RGB format.
    */
   public interface TextureBuffer extends Buffer {
-    enum Type { OES, RGB }
+    enum Type {
+      OES(GLES11Ext.GL_TEXTURE_EXTERNAL_OES),
+      RGB(GLES20.GL_TEXTURE_2D);
+
+      private final int glTarget;
+
+      private Type(final int glTarget) {
+        this.glTarget = glTarget;
+      }
+
+      public int getGlTarget() {
+        return glTarget;
+      }
+    }
 
     Type getType();
     int getTextureId();
diff --git a/sdk/android/api/org/webrtc/YuvConverter.java b/sdk/android/api/org/webrtc/YuvConverter.java
new file mode 100644
index 0000000..a6ea438
--- /dev/null
+++ b/sdk/android/api/org/webrtc/YuvConverter.java
@@ -0,0 +1,327 @@
+/*
+ *  Copyright 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+package org.webrtc;
+
+import android.opengl.GLES11Ext;
+import android.opengl.GLES20;
+import java.nio.ByteBuffer;
+import java.nio.FloatBuffer;
+import org.webrtc.VideoFrame.I420Buffer;
+import org.webrtc.VideoFrame.TextureBuffer;
+
+/**
+ * Class for converting OES textures to a YUV ByteBuffer. It should be constructed on a thread with
+ * an active EGL context, and only be used from that thread.
+ */
+class YuvConverter {
+  // Vertex coordinates in Normalized Device Coordinates, i.e.
+  // (-1, -1) is bottom-left and (1, 1) is top-right.
+  private static final FloatBuffer DEVICE_RECTANGLE = GlUtil.createFloatBuffer(new float[] {
+      -1.0f, -1.0f, // Bottom left.
+      1.0f, -1.0f, // Bottom right.
+      -1.0f, 1.0f, // Top left.
+      1.0f, 1.0f, // Top right.
+  });
+
+  // Texture coordinates - (0, 0) is bottom-left and (1, 1) is top-right.
+  private static final FloatBuffer TEXTURE_RECTANGLE = GlUtil.createFloatBuffer(new float[] {
+      0.0f, 0.0f, // Bottom left.
+      1.0f, 0.0f, // Bottom right.
+      0.0f, 1.0f, // Top left.
+      1.0f, 1.0f // Top right.
+  });
+
+  // clang-format off
+  private static final String VERTEX_SHADER =
+        "varying vec2 interp_tc;\n"
+      + "attribute vec4 in_pos;\n"
+      + "attribute vec4 in_tc;\n"
+      + "\n"
+      + "uniform mat4 texMatrix;\n"
+      + "\n"
+      + "void main() {\n"
+      + "    gl_Position = in_pos;\n"
+      + "    interp_tc = (texMatrix * in_tc).xy;\n"
+      + "}\n";
+
+  private static final String OES_FRAGMENT_SHADER =
+        "#extension GL_OES_EGL_image_external : require\n"
+      + "precision mediump float;\n"
+      + "varying vec2 interp_tc;\n"
+      + "\n"
+      + "uniform samplerExternalOES tex;\n"
+      // Difference in texture coordinate corresponding to one
+      // sub-pixel in the x direction.
+      + "uniform vec2 xUnit;\n"
+      // Color conversion coefficients, including constant term
+      + "uniform vec4 coeffs;\n"
+      + "\n"
+      + "void main() {\n"
+      // Since the alpha read from the texture is always 1, this could
+      // be written as a mat4 x vec4 multiply. However, that seems to
+      // give a worse framerate, possibly because the additional
+      // multiplies by 1.0 consume resources. TODO(nisse): Could also
+      // try to do it as a vec3 x mat3x4, followed by an add in of a
+      // constant vector.
+      + "  gl_FragColor.r = coeffs.a + dot(coeffs.rgb,\n"
+      + "      texture2D(tex, interp_tc - 1.5 * xUnit).rgb);\n"
+      + "  gl_FragColor.g = coeffs.a + dot(coeffs.rgb,\n"
+      + "      texture2D(tex, interp_tc - 0.5 * xUnit).rgb);\n"
+      + "  gl_FragColor.b = coeffs.a + dot(coeffs.rgb,\n"
+      + "      texture2D(tex, interp_tc + 0.5 * xUnit).rgb);\n"
+      + "  gl_FragColor.a = coeffs.a + dot(coeffs.rgb,\n"
+      + "      texture2D(tex, interp_tc + 1.5 * xUnit).rgb);\n"
+      + "}\n";
+
+  private static final String RGB_FRAGMENT_SHADER =
+        "precision mediump float;\n"
+      + "varying vec2 interp_tc;\n"
+      + "\n"
+      + "uniform sample2D tex;\n"
+      // Difference in texture coordinate corresponding to one
+      // sub-pixel in the x direction.
+      + "uniform vec2 xUnit;\n"
+      // Color conversion coefficients, including constant term
+      + "uniform vec4 coeffs;\n"
+      + "\n"
+      + "void main() {\n"
+      // Since the alpha read from the texture is always 1, this could
+      // be written as a mat4 x vec4 multiply. However, that seems to
+      // give a worse framerate, possibly because the additional
+      // multiplies by 1.0 consume resources. TODO(nisse): Could also
+      // try to do it as a vec3 x mat3x4, followed by an add in of a
+      // constant vector.
+      + "  gl_FragColor.r = coeffs.a + dot(coeffs.rgb,\n"
+      + "      texture2D(tex, interp_tc - 1.5 * xUnit).rgb);\n"
+      + "  gl_FragColor.g = coeffs.a + dot(coeffs.rgb,\n"
+      + "      texture2D(tex, interp_tc - 0.5 * xUnit).rgb);\n"
+      + "  gl_FragColor.b = coeffs.a + dot(coeffs.rgb,\n"
+      + "      texture2D(tex, interp_tc + 0.5 * xUnit).rgb);\n"
+      + "  gl_FragColor.a = coeffs.a + dot(coeffs.rgb,\n"
+      + "      texture2D(tex, interp_tc + 1.5 * xUnit).rgb);\n"
+      + "}\n";
+  // clang-format on
+
+  private final ThreadUtils.ThreadChecker threadChecker = new ThreadUtils.ThreadChecker();
+  private final GlTextureFrameBuffer textureFrameBuffer;
+  private TextureBuffer.Type shaderTextureType;
+  private GlShader shader;
+  private int texMatrixLoc;
+  private int xUnitLoc;
+  private int coeffsLoc;
+  private boolean released = false;
+
+  /**
+   * This class should be constructed on a thread that has an active EGL context.
+   */
+  public YuvConverter() {
+    threadChecker.checkIsOnValidThread();
+    textureFrameBuffer = new GlTextureFrameBuffer(GLES20.GL_RGBA);
+  }
+
+  /** Converts the texture buffer to I420. */
+  public I420Buffer convert(TextureBuffer textureBuffer) {
+    final int width = textureBuffer.getWidth();
+    final int height = textureBuffer.getHeight();
+
+    // SurfaceTextureHelper requires a stride that is divisible by 8.  Round width up.
+    // See SurfaceTextureHelper for details on the size and format.
+    final int stride = ((width + 7) / 8) * 8;
+    final int uvHeight = (height + 1) / 2;
+    // Due to the layout used by SurfaceTextureHelper, vPos + stride * uvHeight would overrun the
+    // buffer.  Add one row at the bottom to compensate for this.  There will never be data in the
+    // extra row, but now other code does not have to deal with v stride * v height exceeding the
+    // buffer's capacity.
+    final int size = stride * (height + uvHeight + 1);
+    ByteBuffer buffer = JniCommon.allocateNativeByteBuffer(size);
+    convert(buffer, width, height, stride, textureBuffer.getTextureId(),
+        RendererCommon.convertMatrixFromAndroidGraphicsMatrix(textureBuffer.getTransformMatrix()),
+        textureBuffer.getType());
+
+    final int yPos = 0;
+    final int uPos = yPos + stride * height;
+    // Rows of U and V alternate in the buffer, so V data starts after the first row of U.
+    final int vPos = uPos + stride / 2;
+
+    buffer.position(yPos);
+    buffer.limit(yPos + stride * height);
+    ByteBuffer dataY = buffer.slice();
+
+    buffer.position(uPos);
+    buffer.limit(uPos + stride * uvHeight);
+    ByteBuffer dataU = buffer.slice();
+
+    buffer.position(vPos);
+    buffer.limit(vPos + stride * uvHeight);
+    ByteBuffer dataV = buffer.slice();
+
+    // SurfaceTextureHelper uses the same stride for Y, U, and V data.
+    return JavaI420Buffer.wrap(width, height, dataY, stride, dataU, stride, dataV, stride,
+        () -> { JniCommon.freeNativeByteBuffer(buffer); });
+  }
+
+  /** Deprecated, use convert(TextureBuffer). */
+  @Deprecated
+  void convert(ByteBuffer buf, int width, int height, int stride, int srcTextureId,
+      float[] transformMatrix) {
+    convert(buf, width, height, stride, srcTextureId, transformMatrix, TextureBuffer.Type.OES);
+  }
+
+  private void initShader(TextureBuffer.Type textureType) {
+    if (shader != null) {
+      shader.release();
+    }
+
+    final String fragmentShader;
+    switch (textureType) {
+      case OES:
+        fragmentShader = OES_FRAGMENT_SHADER;
+        break;
+      case RGB:
+        fragmentShader = RGB_FRAGMENT_SHADER;
+        break;
+      default:
+        throw new IllegalArgumentException("Unsupported texture type.");
+    }
+
+    shaderTextureType = textureType;
+    shader = new GlShader(VERTEX_SHADER, fragmentShader);
+    shader.useProgram();
+    texMatrixLoc = shader.getUniformLocation("texMatrix");
+    xUnitLoc = shader.getUniformLocation("xUnit");
+    coeffsLoc = shader.getUniformLocation("coeffs");
+    GLES20.glUniform1i(shader.getUniformLocation("tex"), 0);
+    GlUtil.checkNoGLES2Error("Initialize fragment shader uniform values.");
+    // Initialize vertex shader attributes.
+    shader.setVertexAttribArray("in_pos", 2, DEVICE_RECTANGLE);
+    // If the width is not a multiple of 4 pixels, the texture
+    // will be scaled up slightly and clipped at the right border.
+    shader.setVertexAttribArray("in_tc", 2, TEXTURE_RECTANGLE);
+  }
+
+  private void convert(ByteBuffer buf, int width, int height, int stride, int srcTextureId,
+      float[] transformMatrix, TextureBuffer.Type textureType) {
+    threadChecker.checkIsOnValidThread();
+    if (released) {
+      throw new IllegalStateException("YuvConverter.convert called on released object");
+    }
+    if (textureType != shaderTextureType) {
+      initShader(textureType);
+    }
+
+    // We draw into a buffer laid out like
+    //
+    //    +---------+
+    //    |         |
+    //    |  Y      |
+    //    |         |
+    //    |         |
+    //    +----+----+
+    //    | U  | V  |
+    //    |    |    |
+    //    +----+----+
+    //
+    // In memory, we use the same stride for all of Y, U and V. The
+    // U data starts at offset |height| * |stride| from the Y data,
+    // and the V data starts at at offset |stride/2| from the U
+    // data, with rows of U and V data alternating.
+    //
+    // Now, it would have made sense to allocate a pixel buffer with
+    // a single byte per pixel (EGL10.EGL_COLOR_BUFFER_TYPE,
+    // EGL10.EGL_LUMINANCE_BUFFER,), but that seems to be
+    // unsupported by devices. So do the following hack: Allocate an
+    // RGBA buffer, of width |stride|/4. To render each of these
+    // large pixels, sample the texture at 4 different x coordinates
+    // and store the results in the four components.
+    //
+    // Since the V data needs to start on a boundary of such a
+    // larger pixel, it is not sufficient that |stride| is even, it
+    // has to be a multiple of 8 pixels.
+
+    if (stride % 8 != 0) {
+      throw new IllegalArgumentException("Invalid stride, must be a multiple of 8");
+    }
+    if (stride < width) {
+      throw new IllegalArgumentException("Invalid stride, must >= width");
+    }
+
+    int y_width = (width + 3) / 4;
+    int uv_width = (width + 7) / 8;
+    int uv_height = (height + 1) / 2;
+    int total_height = height + uv_height;
+    int size = stride * total_height;
+
+    if (buf.capacity() < size) {
+      throw new IllegalArgumentException("YuvConverter.convert called with too small buffer");
+    }
+    // Produce a frame buffer starting at top-left corner, not
+    // bottom-left.
+    transformMatrix =
+        RendererCommon.multiplyMatrices(transformMatrix, RendererCommon.verticalFlipMatrix());
+
+    final int frameBufferWidth = stride / 4;
+    final int frameBufferHeight = total_height;
+    textureFrameBuffer.setSize(frameBufferWidth, frameBufferHeight);
+
+    // Bind our framebuffer.
+    GLES20.glBindFramebuffer(GLES20.GL_FRAMEBUFFER, textureFrameBuffer.getFrameBufferId());
+    GlUtil.checkNoGLES2Error("glBindFramebuffer");
+
+    GLES20.glActiveTexture(GLES20.GL_TEXTURE0);
+    GLES20.glBindTexture(textureType.getGlTarget(), srcTextureId);
+    GLES20.glUniformMatrix4fv(texMatrixLoc, 1, false, transformMatrix, 0);
+
+    // Draw Y
+    GLES20.glViewport(0, 0, y_width, height);
+    // Matrix * (1;0;0;0) / width. Note that opengl uses column major order.
+    GLES20.glUniform2f(xUnitLoc, transformMatrix[0] / width, transformMatrix[1] / width);
+    // Y'UV444 to RGB888, see
+    // https://en.wikipedia.org/wiki/YUV#Y.27UV444_to_RGB888_conversion.
+    // We use the ITU-R coefficients for U and V */
+    GLES20.glUniform4f(coeffsLoc, 0.299f, 0.587f, 0.114f, 0.0f);
+    GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, 0, 4);
+
+    // Draw U
+    GLES20.glViewport(0, height, uv_width, uv_height);
+    // Matrix * (1;0;0;0) / (width / 2). Note that opengl uses column major order.
+    GLES20.glUniform2f(
+        xUnitLoc, 2.0f * transformMatrix[0] / width, 2.0f * transformMatrix[1] / width);
+    GLES20.glUniform4f(coeffsLoc, -0.169f, -0.331f, 0.499f, 0.5f);
+    GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, 0, 4);
+
+    // Draw V
+    GLES20.glViewport(stride / 8, height, uv_width, uv_height);
+    GLES20.glUniform4f(coeffsLoc, 0.499f, -0.418f, -0.0813f, 0.5f);
+    GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, 0, 4);
+
+    GLES20.glReadPixels(
+        0, 0, frameBufferWidth, frameBufferHeight, GLES20.GL_RGBA, GLES20.GL_UNSIGNED_BYTE, buf);
+
+    GlUtil.checkNoGLES2Error("YuvConverter.convert");
+
+    // Restore normal framebuffer.
+    GLES20.glBindFramebuffer(GLES20.GL_FRAMEBUFFER, 0);
+    GLES20.glBindTexture(GLES20.GL_TEXTURE_2D, 0);
+
+    // Unbind texture. Reportedly needed on some devices to get
+    // the texture updated from the camera.
+    GLES20.glBindTexture(textureType.getGlTarget(), 0);
+  }
+
+  public void release() {
+    threadChecker.checkIsOnValidThread();
+    released = true;
+    if (shader != null) {
+      shader.release();
+    }
+    textureFrameBuffer.release();
+  }
+}