Add H.264 HW encoder and decoder support for Android.
- Allow to configure MediaCodec Java wrapper to use VP8
and H.264 codec.
- Save H.264 config frames with SPS and PPS NALUs and append them to every key frame.
- Correctly handle the case when one encoded frame may generate several output NALUs.
- Add code to find H.264 start codes.
- Add a flag (non configurable yet) to use H.264 in AppRTCDemo.
- Improve MediaCodec logging.
R=wzh@webrtc.org
Review URL: https://webrtc-codereview.appspot.com/43379004
Cr-Commit-Position: refs/heads/master@{#8465}
git-svn-id: http://webrtc.googlecode.com/svn/trunk@8465 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/talk/app/webrtc/java/jni/androidmediaencoder_jni.cc b/talk/app/webrtc/java/jni/androidmediaencoder_jni.cc
index 7028431..edefe20 100644
--- a/talk/app/webrtc/java/jni/androidmediaencoder_jni.cc
+++ b/talk/app/webrtc/java/jni/androidmediaencoder_jni.cc
@@ -49,10 +49,22 @@
using webrtc::I420VideoFrame;
using webrtc::RTPFragmentationHeader;
using webrtc::VideoCodec;
+using webrtc::VideoCodecType;
+using webrtc::kVideoCodecH264;
using webrtc::kVideoCodecVP8;
namespace webrtc_jni {
+// H.264 start code length.
+#define H264_SC_LENGTH 4
+// Maximum allowed NALUs in one output frame.
+#define MAX_NALUS_PERFRAME 32
+// Maximum supported HW video encoder resolution.
+#define MAX_VIDEO_WIDTH 1280
+#define MAX_VIDEO_HEIGHT 1280
+// Maximum supported HW video encoder fps.
+#define MAX_VIDEO_FPS 30
+
// MediaCodecVideoEncoder is a webrtc::VideoEncoder implementation that uses
// Android's MediaCodec SDK API behind the scenes to implement (hopefully)
// HW-backed video encode. This C++ class is implemented as a very thin shim,
@@ -63,7 +75,7 @@
public rtc::MessageHandler {
public:
virtual ~MediaCodecVideoEncoder();
- explicit MediaCodecVideoEncoder(JNIEnv* jni);
+ explicit MediaCodecVideoEncoder(JNIEnv* jni, VideoCodecType codecType);
// webrtc::VideoEncoder implementation. Everything trampolines to
// |codec_thread_| for execution.
@@ -112,13 +124,18 @@
jobject GetOutputBufferInfoBuffer(JNIEnv* jni, jobject j_output_buffer_info);
bool GetOutputBufferInfoIsKeyFrame(JNIEnv* jni, jobject j_output_buffer_info);
jlong GetOutputBufferInfoPresentationTimestampUs(
- JNIEnv* jni,
- jobject j_output_buffer_info);
+ JNIEnv* jni, jobject j_output_buffer_info);
// Deliver any outputs pending in the MediaCodec to our |callback_| and return
// true on success.
bool DeliverPendingOutputs(JNIEnv* jni);
+ // Search for H.264 start codes.
+ int32_t NextNaluPosition(uint8_t *buffer, size_t buffer_size);
+
+ // Type of video codec.
+ VideoCodecType codecType_;
+
// Valid all the time since RegisterEncodeCompleteCallback() Invoke()s to
// |codec_thread_| synchronously.
webrtc::EncodedImageCallback* callback_;
@@ -152,6 +169,7 @@
int last_set_fps_; // Last-requested frame rate.
int64_t current_timestamp_us_; // Current frame timestamps in us.
int frames_received_; // Number of frames received by encoder.
+ int frames_encoded_; // Number of frames encoded by encoder.
int frames_dropped_; // Number of frames dropped by encoder.
int frames_resolution_update_; // Number of frames with new codec resolution.
int frames_in_queue_; // Number of frames in encoder queue.
@@ -165,6 +183,9 @@
std::vector<int64_t> render_times_ms_; // Video frames render time queue.
std::vector<int64_t> frame_rtc_times_ms_; // Time when video frame is sent to
// encoder input.
+ int32_t output_timestamp_; // Last output frame timestamp from timestamps_ Q.
+ int64_t output_render_time_ms_; // Last output frame render time from
+ // render_times_ms_ queue.
// Frame size in bytes fed to MediaCodec.
int yuv_size_;
// True only when between a callback_->Encoded() call return a positive value
@@ -179,8 +200,10 @@
Release();
}
-MediaCodecVideoEncoder::MediaCodecVideoEncoder(JNIEnv* jni)
- : callback_(NULL),
+MediaCodecVideoEncoder::MediaCodecVideoEncoder(
+ JNIEnv* jni, VideoCodecType codecType) :
+ codecType_(codecType),
+ callback_(NULL),
inited_(false),
picture_id_(0),
codec_thread_(new Thread()),
@@ -207,10 +230,12 @@
jclass j_output_buffer_info_class =
FindClass(jni, "org/webrtc/MediaCodecVideoEncoder$OutputBufferInfo");
- j_init_encode_method_ = GetMethodID(jni,
- *j_media_codec_video_encoder_class_,
- "initEncode",
- "(IIII)[Ljava/nio/ByteBuffer;");
+ j_init_encode_method_ = GetMethodID(
+ jni,
+ *j_media_codec_video_encoder_class_,
+ "initEncode",
+ "(Lorg/webrtc/MediaCodecVideoEncoder$VideoCodecType;IIII)"
+ "[Ljava/nio/ByteBuffer;");
j_dequeue_input_buffer_method_ = GetMethodID(
jni, *j_media_codec_video_encoder_class_, "dequeueInputBuffer", "()I");
j_encode_method_ = GetMethodID(
@@ -219,11 +244,11 @@
GetMethodID(jni, *j_media_codec_video_encoder_class_, "release", "()V");
j_set_rates_method_ = GetMethodID(
jni, *j_media_codec_video_encoder_class_, "setRates", "(II)Z");
- j_dequeue_output_buffer_method_ =
- GetMethodID(jni,
- *j_media_codec_video_encoder_class_,
- "dequeueOutputBuffer",
- "()Lorg/webrtc/MediaCodecVideoEncoder$OutputBufferInfo;");
+ j_dequeue_output_buffer_method_ = GetMethodID(
+ jni,
+ *j_media_codec_video_encoder_class_,
+ "dequeueOutputBuffer",
+ "()Lorg/webrtc/MediaCodecVideoEncoder$OutputBufferInfo;");
j_release_output_buffer_method_ = GetMethodID(
jni, *j_media_codec_video_encoder_class_, "releaseOutputBuffer", "(I)Z");
@@ -245,9 +270,15 @@
const webrtc::VideoCodec* codec_settings,
int32_t /* number_of_cores */,
size_t /* max_payload_size */) {
+ if (codec_settings == NULL) {
+ ALOGE("NULL VideoCodec instance");
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
// Factory should guard against other codecs being used with us.
- CHECK(codec_settings->codecType == kVideoCodecVP8) << "Unsupported codec";
+ CHECK(codec_settings->codecType == codecType_) << "Unsupported codec " <<
+ codec_settings->codecType << " for " << codecType_;
+ ALOGD("InitEncode request");
return codec_thread_->Invoke<int32_t>(
Bind(&MediaCodecVideoEncoder::InitEncodeOnCodecThread,
this,
@@ -274,6 +305,7 @@
}
int32_t MediaCodecVideoEncoder::Release() {
+ ALOGD("EncoderRelease request");
return codec_thread_->Invoke<int32_t>(
Bind(&MediaCodecVideoEncoder::ReleaseOnCodecThread, this));
}
@@ -335,8 +367,8 @@
JNIEnv* jni = AttachCurrentThreadIfNeeded();
ScopedLocalRefFrame local_ref_frame(jni);
- ALOGD("InitEncodeOnCodecThread %d x %d. Bitrate: %d kbps. Fps: %d",
- width, height, kbps, fps);
+ ALOGD("InitEncodeOnCodecThread Type: %d. %d x %d. Bitrate: %d kbps. Fps: %d",
+ (int)codecType_, width, height, kbps, fps);
if (kbps == 0) {
kbps = last_set_bitrate_kbps_;
}
@@ -350,6 +382,7 @@
last_set_fps_ = fps;
yuv_size_ = width_ * height_ * 3 / 2;
frames_received_ = 0;
+ frames_encoded_ = 0;
frames_dropped_ = 0;
frames_resolution_update_ = 0;
frames_in_queue_ = 0;
@@ -360,22 +393,28 @@
current_encoding_time_ms_ = 0;
last_input_timestamp_ms_ = -1;
last_output_timestamp_ms_ = -1;
+ output_timestamp_ = 0;
+ output_render_time_ms_ = 0;
timestamps_.clear();
render_times_ms_.clear();
frame_rtc_times_ms_.clear();
drop_next_input_frame_ = false;
picture_id_ = static_cast<uint16_t>(rand()) & 0x7FFF;
// We enforce no extra stride/padding in the format creation step.
+ jobject j_video_codec_enum = JavaEnumFromIndex(
+ jni, "MediaCodecVideoEncoder$VideoCodecType", codecType_);
jobjectArray input_buffers = reinterpret_cast<jobjectArray>(
jni->CallObjectMethod(*j_media_codec_video_encoder_,
j_init_encode_method_,
+ j_video_codec_enum,
width_,
height_,
kbps,
fps));
CHECK_EXCEPTION(jni);
- if (IsNull(jni, input_buffers))
+ if (IsNull(jni, input_buffers)) {
return WEBRTC_VIDEO_CODEC_ERROR;
+ }
inited_ = true;
switch (GetIntField(jni, *j_media_codec_video_encoder_,
@@ -477,8 +516,8 @@
return WEBRTC_VIDEO_CODEC_ERROR;
}
- ALOGV("Encode frame # %d. Buffer # %d. TS: %lld.",
- frames_received_, j_input_buffer_index, current_timestamp_us_ / 1000);
+ ALOGV("Encoder frame in # %d. TS: %lld. Q: %d",
+ frames_received_ - 1, current_timestamp_us_ / 1000, frames_in_queue_);
jobject j_input_buffer = input_buffers_[j_input_buffer_index];
uint8* yuv_buffer =
@@ -533,8 +572,8 @@
}
CheckOnCodecThread();
JNIEnv* jni = AttachCurrentThreadIfNeeded();
- ALOGD("EncoderRelease: Frames received: %d. Frames dropped: %d.",
- frames_received_, frames_dropped_);
+ ALOGD("EncoderReleaseOnCodecThread: Frames received: %d. Encoded: %d. "
+ "Dropped: %d.", frames_received_, frames_encoded_, frames_dropped_);
ScopedLocalRefFrame local_ref_frame(jni);
for (size_t i = 0; i < input_buffers_.size(); ++i)
jni->DeleteGlobalRef(input_buffers_[i]);
@@ -614,36 +653,41 @@
return false;
}
- // Get frame timestamps from a queue.
- last_output_timestamp_ms_ =
- GetOutputBufferInfoPresentationTimestampUs(jni, j_output_buffer_info) /
- 1000;
- int32_t timestamp = timestamps_.front();
- timestamps_.erase(timestamps_.begin());
- int64_t render_time_ms = render_times_ms_.front();
- render_times_ms_.erase(render_times_ms_.begin());
- int64_t frame_encoding_time_ms = GetCurrentTimeMs() -
- frame_rtc_times_ms_.front();
- frame_rtc_times_ms_.erase(frame_rtc_times_ms_.begin());
- frames_in_queue_--;
-
- // Extract payload and key frame flag.
- int32_t callback_status = 0;
+ // Get key and config frame flags.
jobject j_output_buffer =
GetOutputBufferInfoBuffer(jni, j_output_buffer_info);
bool key_frame = GetOutputBufferInfoIsKeyFrame(jni, j_output_buffer_info);
+
+ // Get frame timestamps from a queue - for non config frames only.
+ int64_t frame_encoding_time_ms = 0;
+ last_output_timestamp_ms_ =
+ GetOutputBufferInfoPresentationTimestampUs(jni, j_output_buffer_info) /
+ 1000;
+ if (frames_in_queue_ > 0) {
+ output_timestamp_ = timestamps_.front();
+ timestamps_.erase(timestamps_.begin());
+ output_render_time_ms_ = render_times_ms_.front();
+ render_times_ms_.erase(render_times_ms_.begin());
+ frame_encoding_time_ms = GetCurrentTimeMs() - frame_rtc_times_ms_.front();
+ frame_rtc_times_ms_.erase(frame_rtc_times_ms_.begin());
+ frames_in_queue_--;
+ }
+
+ // Extract payload.
size_t payload_size = jni->GetDirectBufferCapacity(j_output_buffer);
uint8* payload = reinterpret_cast<uint8_t*>(
jni->GetDirectBufferAddress(j_output_buffer));
CHECK_EXCEPTION(jni);
- ALOGV("Encoder got output buffer # %d. Size: %d. TS: %lld. Latency: %lld."
- " EncTime: %lld",
- output_buffer_index, payload_size, last_output_timestamp_ms_,
+ ALOGV("Encoder frame out # %d. Key: %d. Size: %d. TS: %lld."
+ " Latency: %lld. EncTime: %lld",
+ frames_encoded_, key_frame, payload_size,
+ last_output_timestamp_ms_,
last_input_timestamp_ms_ - last_output_timestamp_ms_,
frame_encoding_time_ms);
// Calculate and print encoding statistics - every 3 seconds.
+ frames_encoded_++;
current_frames_++;
current_bytes_ += payload_size;
current_encoding_time_ms_ += frame_encoding_time_ms;
@@ -663,36 +707,73 @@
}
// Callback - return encoded frame.
+ int32_t callback_status = 0;
if (callback_) {
scoped_ptr<webrtc::EncodedImage> image(
new webrtc::EncodedImage(payload, payload_size, payload_size));
image->_encodedWidth = width_;
image->_encodedHeight = height_;
- image->_timeStamp = timestamp;
- image->capture_time_ms_ = render_time_ms;
+ image->_timeStamp = output_timestamp_;
+ image->capture_time_ms_ = output_render_time_ms_;
image->_frameType = (key_frame ? webrtc::kKeyFrame : webrtc::kDeltaFrame);
image->_completeFrame = true;
webrtc::CodecSpecificInfo info;
memset(&info, 0, sizeof(info));
- info.codecType = kVideoCodecVP8;
- info.codecSpecific.VP8.pictureId = picture_id_;
- info.codecSpecific.VP8.nonReference = false;
- info.codecSpecific.VP8.simulcastIdx = 0;
- info.codecSpecific.VP8.temporalIdx = webrtc::kNoTemporalIdx;
- info.codecSpecific.VP8.layerSync = false;
- info.codecSpecific.VP8.tl0PicIdx = webrtc::kNoTl0PicIdx;
- info.codecSpecific.VP8.keyIdx = webrtc::kNoKeyIdx;
- picture_id_ = (picture_id_ + 1) & 0x7FFF;
+ info.codecType = codecType_;
+ if (codecType_ == kVideoCodecVP8) {
+ info.codecSpecific.VP8.pictureId = picture_id_;
+ info.codecSpecific.VP8.nonReference = false;
+ info.codecSpecific.VP8.simulcastIdx = 0;
+ info.codecSpecific.VP8.temporalIdx = webrtc::kNoTemporalIdx;
+ info.codecSpecific.VP8.layerSync = false;
+ info.codecSpecific.VP8.tl0PicIdx = webrtc::kNoTl0PicIdx;
+ info.codecSpecific.VP8.keyIdx = webrtc::kNoKeyIdx;
+ picture_id_ = (picture_id_ + 1) & 0x7FFF;
+ }
// Generate a header describing a single fragment.
webrtc::RTPFragmentationHeader header;
memset(&header, 0, sizeof(header));
- header.VerifyAndAllocateFragmentationHeader(1);
- header.fragmentationOffset[0] = 0;
- header.fragmentationLength[0] = image->_length;
- header.fragmentationPlType[0] = 0;
- header.fragmentationTimeDiff[0] = 0;
+ if (codecType_ == kVideoCodecVP8) {
+ header.VerifyAndAllocateFragmentationHeader(1);
+ header.fragmentationOffset[0] = 0;
+ header.fragmentationLength[0] = image->_length;
+ header.fragmentationPlType[0] = 0;
+ header.fragmentationTimeDiff[0] = 0;
+ } else if (codecType_ == kVideoCodecH264) {
+ // For H.264 search for start codes.
+ int32_t scPositions[MAX_NALUS_PERFRAME + 1] = {};
+ int32_t scPositionsLength = 0;
+ int32_t scPosition = 0;
+ while (scPositionsLength < MAX_NALUS_PERFRAME) {
+ int32_t naluPosition = NextNaluPosition(
+ payload + scPosition, payload_size - scPosition);
+ if (naluPosition < 0) {
+ break;
+ }
+ scPosition += naluPosition;
+ scPositions[scPositionsLength++] = scPosition;
+ scPosition += H264_SC_LENGTH;
+ }
+ if (scPositionsLength == 0) {
+ ALOGE("Start code is not found!");
+ ALOGE("Data 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x",
+ image->_buffer[0], image->_buffer[1], image->_buffer[2],
+ image->_buffer[3], image->_buffer[4], image->_buffer[5]);
+ ResetCodec();
+ return false;
+ }
+ scPositions[scPositionsLength] = payload_size;
+ header.VerifyAndAllocateFragmentationHeader(scPositionsLength);
+ for (size_t i = 0; i < scPositionsLength; i++) {
+ header.fragmentationOffset[i] = scPositions[i] + H264_SC_LENGTH;
+ header.fragmentationLength[i] =
+ scPositions[i + 1] - header.fragmentationOffset[i];
+ header.fragmentationPlType[i] = 0;
+ header.fragmentationTimeDiff[i] = 0;
+ }
+ }
callback_status = callback_->Encoded(*image, &info, &header);
}
@@ -709,39 +790,90 @@
if (callback_status > 0) {
drop_next_input_frame_ = true;
- // Theoretically could handle callback_status<0 here, but unclear what that
- // would mean for us.
+ // Theoretically could handle callback_status<0 here, but unclear what
+ // that would mean for us.
}
}
return true;
}
+int32_t MediaCodecVideoEncoder::NextNaluPosition(
+ uint8_t *buffer, size_t buffer_size) {
+ if (buffer_size < H264_SC_LENGTH) {
+ return -1;
+ }
+ uint8_t *head = buffer;
+ // Set end buffer pointer to 4 bytes before actual buffer end so we can
+ // access head[1], head[2] and head[3] in a loop without buffer overrun.
+ uint8_t *end = buffer + buffer_size - H264_SC_LENGTH;
+
+ while (head < end) {
+ if (head[0]) {
+ head++;
+ continue;
+ }
+ if (head[1]) { // got 00xx
+ head += 2;
+ continue;
+ }
+ if (head[2]) { // got 0000xx
+ head += 3;
+ continue;
+ }
+ if (head[3] != 0x01) { // got 000000xx
+ head += 4; // xx != 1, continue searching.
+ continue;
+ }
+ return (int32_t)(head - buffer);
+ }
+ return -1;
+}
+
+
MediaCodecVideoEncoderFactory::MediaCodecVideoEncoderFactory() {
JNIEnv* jni = AttachCurrentThreadIfNeeded();
ScopedLocalRefFrame local_ref_frame(jni);
jclass j_encoder_class = FindClass(jni, "org/webrtc/MediaCodecVideoEncoder");
- bool is_platform_supported = jni->CallStaticBooleanMethod(
- j_encoder_class,
- GetStaticMethodID(jni, j_encoder_class, "isPlatformSupported", "()Z"));
- CHECK_EXCEPTION(jni);
- if (!is_platform_supported)
- return;
+ supported_codecs_.clear();
- // Wouldn't it be nice if MediaCodec exposed the maximum capabilities of the
- // encoder? Sure would be. Too bad it doesn't. So we hard-code some
- // reasonable defaults.
- supported_codecs_.push_back(
- VideoCodec(kVideoCodecVP8, "VP8", 1280, 1280, 30));
+ bool is_vp8_hw_supported = jni->CallStaticBooleanMethod(
+ j_encoder_class,
+ GetStaticMethodID(jni, j_encoder_class, "isVp8HwSupported", "()Z"));
+ CHECK_EXCEPTION(jni);
+ if (is_vp8_hw_supported) {
+ ALOGD("VP8 HW Encoder supported.");
+ supported_codecs_.push_back(VideoCodec(kVideoCodecVP8, "VP8",
+ MAX_VIDEO_WIDTH, MAX_VIDEO_HEIGHT, MAX_VIDEO_FPS));
+ }
+
+ bool is_h264_hw_supported = jni->CallStaticBooleanMethod(
+ j_encoder_class,
+ GetStaticMethodID(jni, j_encoder_class, "isH264HwSupported", "()Z"));
+ CHECK_EXCEPTION(jni);
+ if (is_h264_hw_supported) {
+ ALOGD("H.264 HW Encoder supported.");
+ supported_codecs_.push_back(VideoCodec(kVideoCodecH264, "H264",
+ MAX_VIDEO_WIDTH, MAX_VIDEO_HEIGHT, MAX_VIDEO_FPS));
+ }
}
MediaCodecVideoEncoderFactory::~MediaCodecVideoEncoderFactory() {}
webrtc::VideoEncoder* MediaCodecVideoEncoderFactory::CreateVideoEncoder(
- webrtc::VideoCodecType type) {
- if (type != kVideoCodecVP8 || supported_codecs_.empty())
+ VideoCodecType type) {
+ if (supported_codecs_.empty()) {
return NULL;
- return new MediaCodecVideoEncoder(AttachCurrentThreadIfNeeded());
+ }
+ for (std::vector<VideoCodec>::const_iterator it = supported_codecs_.begin();
+ it != supported_codecs_.end(); ++it) {
+ if (it->type == type) {
+ ALOGD("Create HW video encoder for type %d (%s).",
+ (int)type, it->name.c_str());
+ return new MediaCodecVideoEncoder(AttachCurrentThreadIfNeeded(), type);
+ }
+ }
+ return NULL;
}
const std::vector<MediaCodecVideoEncoderFactory::VideoCodec>&
@@ -751,6 +883,7 @@
void MediaCodecVideoEncoderFactory::DestroyVideoEncoder(
webrtc::VideoEncoder* encoder) {
+ ALOGD("Destroy video encoder.");
delete encoder;
}