Rockchip: add keyframe sps pps header

Change-Id: Ie2068260dfc0036065cd1a0b29a6eef430029205
Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com>
Reviewed-on: https://chromium-review.googlesource.com/365489
Commit-Ready: Pawel Osciak <posciak@chromium.org>
Tested-by: Pawel Osciak <posciak@chromium.org>
Reviewed-by: Kuang-che Wu <kcwu@chromium.org>
diff --git a/libv4l-rockchip_v2/Makefile.am b/libv4l-rockchip_v2/Makefile.am
index 335e41d..1ae7d10 100644
--- a/libv4l-rockchip_v2/Makefile.am
+++ b/libv4l-rockchip_v2/Makefile.am
@@ -8,6 +8,7 @@
 
 libv4l_encplugin_la_SOURCES = \
 	libv4l-encplugin-rockchip.c \
+	libvepu/streams.c \
 	libvepu/rk_vepu.c \
 	libvepu/rk_vepu_debug.c \
 	libvepu/common/rk_venc_rate_control.c \
diff --git a/libv4l-rockchip_v2/libv4l-encplugin-rockchip.c b/libv4l-rockchip_v2/libv4l-encplugin-rockchip.c
index 56b828f..968b1e5 100644
--- a/libv4l-rockchip_v2/libv4l-encplugin-rockchip.c
+++ b/libv4l-rockchip_v2/libv4l-encplugin-rockchip.c
@@ -367,6 +367,8 @@
 	if (ret)
 		return ret;
 
+	rk_vepu_assemble_bitstream(ctx->enc, fd, buffer);
+
 	assert(!ctx->can_qbuf);
 
 	/* Get the encoder configuration and update the library. */
@@ -426,6 +428,9 @@
 		case V4L2_CID_MPEG_VIDEO_BITRATE:
 			runtime_param_ptr->bitrate = ext_ctrls->controls[i].value;
 			break;
+		case V4L2_CID_MPEG_VIDEO_H264_SPS_PPS_BEFORE_IDR:
+			ctx->init_param.h264e.h264_sps_pps_before_idr =
+				ext_ctrls->controls[i].value;
 		default:
 			break;
 		}
@@ -576,6 +581,8 @@
 	init_param.width = crop.c.width;
 	init_param.height = crop.c.height;
 
+	init_param.h264e = ctx->init_param.h264e;
+
 	/*
 	 * If the encoder library has initialized and parameters have not
 	 * changed, skip the initialization.
diff --git a/libv4l-rockchip_v2/libvepu/common/rk_venc.h b/libv4l-rockchip_v2/libvepu/common/rk_venc.h
index caaaf82..de25bdb 100644
--- a/libv4l-rockchip_v2/libvepu/common/rk_venc.h
+++ b/libv4l-rockchip_v2/libvepu/common/rk_venc.h
@@ -58,6 +58,8 @@
 	void (*apply_param)(struct rk_venc *enc);
 	void (*get_payloads)(struct rk_venc *enc, size_t *num, uint32_t **ids,
 			     void ***payloads, uint32_t **payload_sizes);
+	int (*assemble_bitstream)(struct rk_venc *enc, int fd,
+				  struct v4l2_buffer *buffer);
 };
 
 enum ENC_FORMAT {
diff --git a/libv4l-rockchip_v2/libvepu/h264e/h264e.c b/libv4l-rockchip_v2/libvepu/h264e/h264e.c
index d21ec96..2976513 100644
--- a/libv4l-rockchip_v2/libvepu/h264e/h264e.c
+++ b/libv4l-rockchip_v2/libvepu/h264e/h264e.c
@@ -18,11 +18,13 @@
 #include <malloc.h>
 #include <memory.h>
 #include <stdio.h>
+#include <sys/mman.h>
 
 #include "h264e.h"
 #include "h264e_rate_control.h"
 #include "../common/rk_venc_rate_control.h"
 #include "../rk_vepu_debug.h"
+#include "../streams.h"
 
 const int32_t h264e_qp_tbl[2][11] = {
 	{ 27, 44, 72, 119, 192, 314, 453, 653, 952, 1395, 0x7FFFFFFF },
@@ -112,16 +114,16 @@
 static void h264e_init_rc(struct rk_venc *ictx)
 {
 	struct rk_h264_encoder *ctx = (struct rk_h264_encoder *)ictx;
-	struct mb_qpctrl *qpCtrl = &ctx->mbrc.qp_ctrl;
+	struct mb_qpctrl *qp_ctrl = &ctx->mbrc.qp_ctrl;
 	struct v4l2_plugin_rate_control *rc = &ictx->rc;
 
-	memset(qpCtrl, 0, sizeof(*qpCtrl));
+	memset(qp_ctrl, 0, sizeof(*qp_ctrl));
 
 	ctx->mbrc.mb_rc_en = true;
-	qpCtrl->check_points = MIN(ctx->sps.pic_height_in_map_units - 1,
+	qp_ctrl->check_points = MIN(ctx->sps.pic_height_in_map_units - 1,
 				  CHECK_POINTS_MAX);
-	qpCtrl->chkptr_distance =
-		MB_PER_PIC(ctx) / (qpCtrl->check_points + 1);
+	qp_ctrl->chkptr_distance =
+		MB_PER_PIC(ctx) / (qp_ctrl->check_points + 1);
 
 	rc->pic_rc_en = true;
 	rc->fps_num = 30;
@@ -143,6 +145,250 @@
 	rk_venc_init_pic_rc(&ctx->venc.rc, h264e_qp_tbl);
 }
 
+static void h264e_assemble_sps(struct rk_venc *ictx, struct stream_s *sps)
+{
+	struct rk_h264_encoder *ctx = (struct rk_h264_encoder *)ictx;
+
+	stream_buffer_init(sps);
+	stream_put_bits(sps, 0, 8, "start code");
+	stream_put_bits(sps, 0, 8, "start code");
+	stream_put_bits(sps, 0, 8, "start code");
+	stream_put_bits(sps, 1, 8, "start code");
+
+	stream_put_bits(sps, 0, 1, "forbidden_zero_bit");
+	stream_put_bits(sps, 1, 2, "nal_ref_idc");
+	stream_put_bits(sps, 7, 5, "nal_unit_type");
+
+	stream_put_bits(sps, ctx->sps.profile_idc, 8, "profile_idc");
+	stream_put_bits(sps, ctx->sps.constraint_set0_flag, 1,
+		"constraint_set0_flag");
+	stream_put_bits(sps, ctx->sps.constraint_set1_flag, 1,
+		"constraint_set1_flag");
+	stream_put_bits(sps, ctx->sps.constraint_set2_flag, 1,
+		"constraint_set2_flag");
+	stream_put_bits(sps, ctx->sps.constraint_set3_flag, 1,
+		"constraint_set3_flag");
+
+	stream_put_bits(sps, 0, 4, "reserved_zero_4bits");
+	stream_put_bits(sps, ctx->sps.level_idc, 8, "level_idc");
+
+	stream_write_ue(sps, ctx->sps.seq_parameter_set_id,
+		"seq_parameter_set_id");
+
+	if (ctx->sps.profile_idc >= 100) {
+		stream_write_ue(sps, ctx->sps.chroma_format_idc,
+			"chroma_format_idc");
+		stream_write_ue(sps, ctx->sps.bit_depth_luma_minus8,
+			"bit_depth_luma_minus8");
+		stream_write_ue(sps, ctx->sps.bit_depth_chroma_minus8,
+			"bit_depth_chroma_minus8");
+		stream_put_bits(sps,
+			ctx->sps.qpprime_y_zero_transform_bypass_flag, 1,
+			"qpprime_y_zero_transform_bypass_flag");
+		stream_put_bits(sps, 0, 1, "seq_scaling_matrix_present_flag");
+	}
+
+	stream_write_ue(sps, ctx->sps.log2_max_frame_num_minus4,
+		"log2_max_frame_num_minus4");
+
+	stream_write_ue(sps, ctx->sps.pic_order_cnt_type, "pic_order_cnt_type");
+
+	stream_write_ue(sps, ctx->sps.max_num_ref_frames, "num_ref_frames");
+
+	stream_put_bits(sps, ctx->sps.gaps_in_frame_num_value_allowed_flag, 1,
+		"gaps_in_frame_num_value_allowed_flag");
+
+	stream_write_ue(sps, ctx->sps.pic_width_in_mbs - 1,
+			"pic_width_in_mbs_minus1");
+
+	stream_write_ue(sps, ctx->sps.pic_height_in_map_units - 1,
+			"pic_height_in_map_units_minus1");
+
+	stream_put_bits(sps, ctx->sps.frame_mbs_only_flag, 1,
+		"frame_mbs_only_flag");
+
+	stream_put_bits(sps, ctx->sps.direct_8x8_inference_flag, 1,
+		"direct_8x8_inference_flag");
+
+	stream_put_bits(sps, ctx->sps.frame_cropping_flag, 1,
+		"frame_cropping_flag");
+	if (ctx->sps.frame_cropping_flag) {
+		stream_write_ue(sps, ctx->sps.frame_crop_left_offset,
+			"frame_crop_left_offset");
+		stream_write_ue(sps, ctx->sps.frame_crop_right_offset,
+			"frame_crop_right_offset");
+		stream_write_ue(sps, ctx->sps.frame_crop_top_offset,
+			"frame_crop_top_offset");
+		stream_write_ue(sps, ctx->sps.frame_crop_bottom_offset,
+			"frame_crop_bottom_offset");
+	}
+	stream_put_bits(sps, ctx->sps.vui_parameters_present_flag, 1,
+		"vui_parameters_present_flag");
+	if (ctx->sps.vui_parameters_present_flag) {
+		/* do not set special sar */
+		stream_put_bits(sps, 0, 1, "aspect_ratio_info_present_flag");
+		stream_put_bits(sps, 0, 1, "overscan_info_present_flag");
+		stream_put_bits(sps, 0, 1, "video_signal_type_present_flag");
+		stream_put_bits(sps, 0, 1, "chroma_loc_info_present_flag");
+
+		if (ictx->rc.fps_num != 0) {
+			stream_put_bits(sps, 1, 1, "timing_info_present_flag");
+			stream_put_bits(sps, ictx->rc.fps_denom >> 16, 16,
+				"num_units_in_tick msb");
+			stream_put_bits(sps, ictx->rc.fps_denom & 0xFFFF, 16,
+				"num_units_in_tick lsb");
+			stream_put_bits(sps, (ictx->rc.fps_num * 2) >> 16, 16,
+				"time_scale msb");
+			stream_put_bits(sps, (ictx->rc.fps_num * 2) & 0xFFFF,
+				16, "time_scale lsb");
+			stream_put_bits(sps, 0, 1, "fixed_frame_rate_flag");
+		} else {
+			stream_put_bits(sps, 0, 1, "timing_info_present_flag");
+		}
+
+		stream_put_bits(sps, 0, 1, "nal_hrd_parameters_present_flag");
+		stream_put_bits(sps, 0, 1, "vcl_hrd_parameters_present_flag");
+		stream_put_bits(sps, 0, 1, "pic_struct_present_flag");
+		stream_put_bits(sps, 1, 1, "bit_stream_restriction_flag");
+
+		/* set bit_stream_restriction flag to true */
+		{
+			stream_put_bits(sps, 1, 1,
+				"motion_vectors_over_pic_boundaries");
+
+			stream_write_ue(sps, 0, "max_bytes_per_pic_denom");
+
+			stream_write_ue(sps, 0, "max_bits_per_mb_denom");
+
+			/* restricted by hardware */
+			stream_write_ue(sps, 9, "log2_mv_length_horizontal");
+			stream_write_ue(sps, 7, "log2_mv_length_vertical");
+
+			stream_write_ue(sps, 0, "num_reorder_frames");
+
+			stream_write_ue(sps, ctx->sps.max_num_ref_frames,
+				"max_dec_frame_buffering");
+		}
+
+	}
+	stream_put_bits(sps, 1, 1, "rbsp_stop_one_bit");
+
+	stream_buffer_flush(sps);
+}
+
+static void h264e_assemble_pps(struct rk_venc *ictx, struct stream_s *pps)
+{
+	struct rk_h264_encoder *ctx = (struct rk_h264_encoder *)ictx;
+
+	stream_buffer_init(pps);
+
+	stream_put_bits(pps, 0, 8, "start code");
+	stream_put_bits(pps, 0, 8, "start code");
+	stream_put_bits(pps, 0, 8, "start code");
+	stream_put_bits(pps, 1, 8, "start code");
+
+	stream_put_bits(pps, 0, 1, "forbidden_zero_bit");
+	stream_put_bits(pps, 1, 2, "nal_ref_idc");
+	stream_put_bits(pps, 8, 5, "nal_unit_type");
+
+	stream_write_ue(pps, ctx->pps.pic_parameter_set_id,
+		"pic_parameter_set_id");
+	stream_write_ue(pps, ctx->pps.seq_parameter_set_id,
+		"seq_parameter_set_id");
+
+	stream_put_bits(pps, ctx->pps.entropy_coding_mode_flag, 1,
+		"entropy_coding_mode_flag");
+	stream_put_bits(pps, ctx->pps.pic_order_present_flag, 1,
+		"pic_order_present_flag");
+
+	stream_write_ue(pps, ctx->pps.num_slice_groups_minus_1,
+		"num_slice_groups_minus1");
+	stream_write_ue(pps, ctx->pps.num_ref_idx_l0_default_active_minus1,
+		"num_ref_idx_l0_active_minus1");
+	stream_write_ue(pps, ctx->pps.num_ref_idx_l1_default_active_minus1,
+		"num_ref_idx_l1_active_minus1");
+
+	stream_put_bits(pps, ctx->pps.weighted_pred_flag, 1,
+		"weighted_pred_flag");
+	stream_put_bits(pps, ctx->pps.weighted_bipred_idc, 2,
+		"weighted_bipred_idc");
+
+	stream_write_se(pps, ctx->pps.pic_init_qp_minus26,
+		"pic_init_qp_minus26");
+	stream_write_se(pps, ctx->pps.pic_init_qs_minus26,
+		"pic_init_qs_minus26");
+	stream_write_se(pps, ctx->pps.chroma_qp_index_offset,
+		"chroma_qp_index_offset");
+
+	stream_put_bits(pps, ctx->pps.deblocking_filter_control_present_flag, 1,
+		"deblocking_filter_control_present_flag");
+	stream_put_bits(pps, ctx->pps.constrained_intra_pred_flag, 1,
+		"constrained_intra_pred_flag");
+
+	stream_put_bits(pps, ctx->pps.redundant_pic_cnt_present_flag, 1,
+		"redundant_pic_cnt_present_flag");
+
+	if (ctx->pps.transform_8x8_mode_flag) {
+		stream_put_bits(pps, 1, 1, "transform_8x8_mode_flag");
+		stream_put_bits(pps, 0, 1, "pic_scaling_matrix_present_flag");
+		stream_write_se(pps, ctx->pps.chroma_qp_index_offset,
+				"chroma_qp_index_offset");
+	}
+	stream_put_bits(pps, 1, 1, "rbsp_stop_one_bit");
+
+	stream_buffer_flush(pps);
+}
+
+static void h264e_nal_escape_c(struct stream_s *strm)
+{
+	uint8_t *tmp;
+	size_t len = strm->bits_cnt >> 3;
+	int i, j;
+
+	tmp = calloc(1, len);
+	if (tmp == NULL) {
+		VPU_PLG_ERR("allocate escape buffer failed\n");
+		return;
+	}
+	memcpy(tmp, strm->buffer, len);
+
+	i = 2 + 4; /* skip nal prefix bytes */
+	j = 2 + 4;
+	while (i < len) {
+		if (strm->buffer[j - 2] == 0 && strm->buffer[j - 1] == 0 &&
+		    tmp[i] <= 3)
+			strm->buffer[j++] = 3;
+		strm->buffer[j] = tmp[i];
+		i++;
+		j++;
+	}
+	strm->bits_cnt = j << 3;
+
+	free(tmp);
+}
+
+static void h264e_build_stream_header(struct rk_venc *ictx)
+{
+	struct rk_h264_encoder *ctx = (struct rk_h264_encoder *)ictx;
+	struct stream_s sps, pps;
+	int sps_size, pps_size;
+
+	h264e_assemble_sps(ictx, &sps);
+	h264e_nal_escape_c(&sps);
+	h264e_assemble_pps(ictx, &pps);
+	h264e_nal_escape_c(&pps);
+
+	sps_size = stream_buffer_bytes(&sps);
+	pps_size = stream_buffer_bytes(&pps);
+
+	ctx->stream_header_size = sps_size + pps_size;
+
+	assert(ctx->stream_header_size <= H264E_MAX_STREAM_HEADER_SIZE);
+
+	memcpy(ctx->stream_header, sps.buffer, sps_size);
+	memcpy(ctx->stream_header + sps_size, pps.buffer, pps_size);
+}
+
 static int h264e_init(struct rk_venc *ictx,
 	struct rk_vepu_init_param *param)
 {
@@ -154,6 +400,8 @@
 	h264e_init_pps(&ctx->pps);
 	h264e_init_slice(&ctx->slice);
 
+	ctx->h264_sps_pps_before_idr = param->h264e.h264_sps_pps_before_idr;
+
 	ctx->width = param->width;
 	ctx->height = param->height;
 	ctx->slice_size_mb_rows = 0;
@@ -183,6 +431,8 @@
 
 	ctx->rk_ctrl_ids[0] = V4L2_CID_PRIVATE_ROCKCHIP_REG_PARAMS;
 
+	h264e_build_stream_header(ictx);
+
 	return 0;
 }
 
@@ -335,6 +585,44 @@
 	*payload_sizes = ctx->rk_payload_sizes;
 }
 
+static int h264e_assemble_bitstream(struct rk_venc *ictx, int fd,
+	struct v4l2_buffer *buffer)
+{
+	struct rk_h264_encoder *ctx = (struct rk_h264_encoder *)ictx;
+	struct rk3288_h264e_reg_params *hw_info = &ctx->hw_info;
+	void *buf;
+
+	if (hw_info->frame_coding_type != 1 || !ctx->h264_sps_pps_before_idr)
+		return 0;
+
+	if (buffer->memory != V4L2_MEMORY_MMAP || V4L2_TYPE_IS_OUTPUT(buffer->type))
+		return -1;
+
+	if (buffer->m.planes[0].bytesused + ctx->stream_header_size >
+	    buffer->m.planes[0].length)
+		return -1;
+
+	buffer->length = 1;
+
+	buf = mmap(NULL, buffer->m.planes[0].length,
+			PROT_READ | PROT_WRITE,
+			MAP_SHARED, fd,
+			buffer->m.planes[0].m.mem_offset);
+
+	if (buf == MAP_FAILED)
+		return -1;
+
+	memmove((uint8_t *)buf + ctx->stream_header_size, buf,
+		buffer->m.planes[0].bytesused);
+	memcpy(buf, ctx->stream_header, ctx->stream_header_size);
+	buffer->m.planes[0].bytesused += ctx->stream_header_size;
+
+	munmap(buf, buffer->m.planes[0].length);
+
+	return 0;
+}
+
+
 static struct rk_venc_ops h264_enc_ops = {
 	.init = h264e_init,
 	.before_encode = h264e_begin_picture,
@@ -343,6 +631,7 @@
 	.update_priv = h264e_update_priv,
 	.apply_param = h264e_apply_param,
 	.get_payloads = h264e_get_payloads,
+	.assemble_bitstream = h264e_assemble_bitstream,
 };
 
 struct rk_venc* rk_h264_encoder_alloc_ctx(void)
diff --git a/libv4l-rockchip_v2/libvepu/h264e/h264e.h b/libv4l-rockchip_v2/libvepu/h264e/h264e.h
index 817bb4e..32b8bf7 100644
--- a/libv4l-rockchip_v2/libvepu/h264e/h264e.h
+++ b/libv4l-rockchip_v2/libvepu/h264e/h264e.h
@@ -28,6 +28,8 @@
 #include "../common/rk_venc.h"
 
 #define H264E_NUM_CTRLS	1
+#define H264E_MAX_STREAM_HEADER_SIZE 256
+
 struct rk_h264_encoder {
 	struct rk_venc venc;
 
@@ -39,6 +41,10 @@
 
 	struct h264_mb_rate_control mbrc;
 
+	char stream_header[H264E_MAX_STREAM_HEADER_SIZE];
+	int stream_header_size;
+	int h264_sps_pps_before_idr;
+
 	int width;
 	int height;
 
diff --git a/libv4l-rockchip_v2/libvepu/rk_vepu.c b/libv4l-rockchip_v2/libvepu/rk_vepu.c
index 7959184..911ee4e 100644
--- a/libv4l-rockchip_v2/libvepu/rk_vepu.c
+++ b/libv4l-rockchip_v2/libvepu/rk_vepu.c
@@ -119,3 +119,13 @@
 	ienc->ops->apply_param(ienc);
 	return 0;
 }
+
+int rk_vepu_assemble_bitstream(void *enc, int fd, struct v4l2_buffer *buffer) {
+  struct rk_venc *ienc = (struct rk_venc*)enc;
+
+  assert(enc != NULL && buffer != NULL);
+  if (ienc->ops->assemble_bitstream)
+    return ienc->ops->assemble_bitstream(ienc, fd, buffer);
+
+  return 0;
+}
diff --git a/libv4l-rockchip_v2/libvepu/rk_vepu_interface.h b/libv4l-rockchip_v2/libvepu/rk_vepu_interface.h
index eb6e98e..1e84ddc 100644
--- a/libv4l-rockchip_v2/libvepu/rk_vepu_interface.h
+++ b/libv4l-rockchip_v2/libvepu/rk_vepu_interface.h
@@ -24,6 +24,10 @@
 #define V4L2_CID_PRIVATE_ROCKCHIP_HW_PARAMS	(V4L2_CID_CUSTOM_BASE + 2)
 #define V4L2_CID_PRIVATE_ROCKCHIP_GET_PARAMS	(V4L2_CID_CUSTOM_BASE + 3)
 
+#ifndef V4L2_CID_MPEG_VIDEO_H264_SPS_PPS_BEFORE_IDR
+#define V4L2_CID_MPEG_VIDEO_H264_SPS_PPS_BEFORE_IDR (V4L2_CID_MPEG_BASE+388)
+#endif
+
 /* The maximum number of controls returned by rk_vepu_get_config(). */
 #define MAX_NUM_GET_CONFIG_CTRLS 5
 
@@ -32,6 +36,11 @@
 	uint32_t height; /* video height */
 	uint32_t input_format; /* V4L2 fourcc pixel format */
 	uint32_t output_format; /* V4L2 fourcc pixel format */
+	union {
+		struct {
+			bool h264_sps_pps_before_idr;
+		} h264e;
+	};
 };
 
 struct rk_vepu_runtime_param {
@@ -109,5 +118,7 @@
 int rk_vepu_update_param(void *enc,
 			 struct rk_vepu_runtime_param *runtime_param);
 
+int rk_vepu_assemble_bitstream(void *enc, int fd, struct v4l2_buffer *buffer);
+
 #endif  // LIBVPU_RK_VEPU_INTERFACE_H_
 
diff --git a/libv4l-rockchip_v2/libvepu/streams.c b/libv4l-rockchip_v2/libvepu/streams.c
new file mode 100644
index 0000000..d05161b
--- /dev/null
+++ b/libv4l-rockchip_v2/libvepu/streams.c
@@ -0,0 +1,100 @@
+#include "streams.h"
+
+#include <assert.h>
+#include <memory.h>
+
+/*
+ * bit stream assembler
+ */
+
+void stream_buffer_init(struct stream_s *buffer)
+{
+	buffer->bits_cnt = 0;
+
+	memset(buffer->buffer, 0, sizeof(buffer->buffer));
+}
+
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define ROUND_UP(x, y) ((((x)-1) | __round_mask(x, y))+1)
+
+#define STREAM_BYTES(stream) (stream->bits_cnt >> 3)
+#define STREAM_LAST_BYTE(stream) (stream->buffer[STREAM_BYTES(stream)])
+#define STREAM_TAILROOM(stream) \
+	((sizeof(stream->buffer) << 3) - stream->bits_cnt)
+#define STREAM_ALIGN_BITS(stream) (8 - (stream->bits_cnt % 8))
+
+void stream_buffer_flush(struct stream_s *stream)
+{
+	stream->bits_cnt = ROUND_UP(stream->bits_cnt, 8);
+}
+
+int stream_buffer_bytes(struct stream_s *stream)
+{
+	return stream->bits_cnt >> 3;
+}
+
+void stream_put_bits(struct stream_s *stream, u32 value, int bits,
+		     const char *name)
+{
+	if (bits > STREAM_TAILROOM(stream))
+		return;
+
+	while (bits > 0) {
+		int align_bits = STREAM_ALIGN_BITS(stream);
+		int n = align_bits > bits ? bits : align_bits;
+		int shift = align_bits - bits;
+
+		value &= ((1 << bits) - 1);
+		STREAM_LAST_BYTE(stream)
+			|= (u8)(shift > 0 ? value << shift : value >> -shift);
+
+		bits -= n;
+		stream->bits_cnt += n;
+	}
+}
+
+void stream_write_ue(struct stream_s *fifo, u32 val, const char *name)
+{
+	u32 num_bits = 0;
+
+	assert(val < 0x7fffffff);
+
+	val++;
+	while (val >> ++num_bits);
+
+	if (num_bits > 12) {
+		u32 tmp;
+
+		tmp = num_bits - 1;
+
+		if (tmp > 24) {
+			tmp -= 24;
+			stream_put_bits(fifo, 0, 24, name);
+		}
+
+		stream_put_bits(fifo, 0, tmp, name);
+
+		if (num_bits > 24) {
+			num_bits -= 24;
+			stream_put_bits(fifo, val >> num_bits, 24, name);
+			val &= (1 << num_bits) - 1;
+		}
+
+		stream_put_bits(fifo, val, num_bits, name);
+	} else {
+		stream_put_bits(fifo, val, 2 * num_bits - 1, name);
+	}
+}
+
+void stream_write_se(struct stream_s *fifo, s32 val, const char *name)
+{
+	u32 tmp;
+
+	if (val > 0)
+		tmp = (u32)(2 * val - 1);
+	else
+		tmp = (u32)(-2 * val);
+
+	stream_write_ue(fifo, tmp, name);
+}
+
diff --git a/libv4l-rockchip_v2/libvepu/streams.h b/libv4l-rockchip_v2/libvepu/streams.h
new file mode 100644
index 0000000..b5fea1c
--- /dev/null
+++ b/libv4l-rockchip_v2/libvepu/streams.h
@@ -0,0 +1,23 @@
+#ifndef STREAMS_H
+#define STREAMS_H
+
+#include "common/rk_venc.h"
+
+#define STREAM_BUFFER_SIZE 128
+
+/* struct for assemble bitstream */
+struct stream_s {
+	u8 buffer[STREAM_BUFFER_SIZE];
+	u32 bits_cnt;
+};
+
+void stream_buffer_init(struct stream_s *stream);
+void stream_buffer_flush(struct stream_s *stream);
+int stream_buffer_bytes(struct stream_s *stream);
+
+void stream_put_bits(struct stream_s *buffer, u32 value, int bits,
+		     const char *name);
+void stream_write_se(struct stream_s *fifo, s32 val, const char *name);
+void stream_write_ue(struct stream_s *fifo, u32 val, const char *name);
+
+#endif