diff -u -ruX ../difX ../x264.rcapi/common/cabac.c ./common/cabac.c --- ../x264.rcapi/common/cabac.c 2006-07-16 14:04:57.000000000 -0700 +++ ./common/cabac.c 2006-07-16 18:19:52.000000000 -0700 @@ -908,6 +908,7 @@ cb->i_low = 0; cb->i_range = 0x01FE; cb->i_bits_outstanding = 0; + cb->f8_bits_encoded = 0; cb->s = s; s->i_left++; // the first bit will be shifted away and not written } diff -u -ruX ../difX ../x264.rcapi/common/common.c ./common/common.c --- ../x264.rcapi/common/common.c 2006-07-16 14:28:28.000000000 -0700 +++ ./common/common.c 2006-07-16 18:19:52.000000000 -0700 @@ -93,6 +93,8 @@ param->rc.i_qp_step = 4; param->rc.f_ip_factor = 1.4; param->rc.f_pb_factor = 1.3; + param->rc.i_rcrd_range = 4; + param->rc.f_rcrd_lambda = 0.0; param->rc.b_stat_write = 0; param->rc.psz_stat_out = "x264_2pass.log"; @@ -132,6 +134,7 @@ memset( param->cqm_8iy, 16, 64 ); memset( param->cqm_8py, 16, 64 ); + param->b_write_bitstream = 1; param->b_repeat_headers = 1; param->b_aud = 0; } @@ -494,9 +497,10 @@ s += sprintf( s, " keyint=%d keyint_min=%d scenecut=%d", p->i_keyint_max, p->i_keyint_min, p->i_scenecut_threshold ); - s += sprintf( s, " rc=%s", p->rc.i_rc_method == X264_RC_ABR ? - ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_buffer_size ? "cbr" : "abr" ) - : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp" ); + s += sprintf( s, " rc=%s", p->rc.i_rc_method == X264_RC_RD ? "rd" : + p->rc.i_rc_method == X264_RC_ABR ? + ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_buffer_size ? "cbr" : "abr" ) : + p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp" ); if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF ) { if( p->rc.i_rc_method == X264_RC_CRF ) @@ -514,6 +518,9 @@ s += sprintf( s, " vbv_maxrate=%d vbv_bufsize=%d", p->rc.i_vbv_max_bitrate, p->rc.i_vbv_buffer_size ); } + else if( p->rc.i_rc_method == X264_RC_RD ) + s += sprintf( s, " rc_range=%d rc_lambda=%.2f", + p->rc.i_rcrd_range, p->rc.f_rcrd_lambda ); else if( p->rc.i_rc_method == X264_RC_CQP ) s += sprintf( s, " qp=%d", p->rc.i_qp_constant ); if( !(p->rc.i_rc_method == X264_RC_CQP && p->rc.i_qp_constant == 0) ) diff -u -ruX ../difX ../x264.rcapi/common/common.h ./common/common.h --- ../x264.rcapi/common/common.h 2006-07-16 14:04:57.000000000 -0700 +++ ./common/common.h 2006-07-16 18:19:52.000000000 -0700 @@ -90,6 +90,7 @@ #define X264_BFRAME_MAX 16 #define X264_SLICE_MAX 4 #define X264_NAL_MAX (4 + X264_SLICE_MAX) +#define X264_RDRC_MAX 32 /**************************************************************************** * Includes @@ -317,14 +318,14 @@ /* Frames to be encoded (whose types have been decided) */ x264_frame_t *current[X264_BFRAME_MAX+3]; /* Temporary buffer (frames types not yet decided) */ - x264_frame_t *next[X264_BFRAME_MAX+3]; + x264_frame_t *next[X264_BFRAME_MAX+X264_RDRC_MAX+3]; /* Unused frames */ - x264_frame_t *unused[X264_BFRAME_MAX+3]; + x264_frame_t *unused[X264_BFRAME_MAX+X264_RDRC_MAX+3]; /* For adaptive B decision */ x264_frame_t *last_nonb; /* frames used for reference +1 for decoding + sentinels */ - x264_frame_t *reference[16+2+1+2]; + x264_frame_t *reference[16+2+1+2+X264_RDRC_MAX]; int i_last_idr; /* Frame number of the last IDR */ @@ -345,9 +346,9 @@ /* references lists */ int i_ref0; - x264_frame_t *fref0[16+3]; /* ref list 0 */ + x264_frame_t *fref0[16+3+X264_RDRC_MAX]; /* ref list 0 */ int i_ref1; - x264_frame_t *fref1[16+3]; /* ref list 1 */ + x264_frame_t *fref1[16+3+X264_RDRC_MAX]; /* ref list 1 */ int b_ref_reorder[2]; diff -u -ruX ../difX ../x264.rcapi/encoder/encoder.c ./encoder/encoder.c --- ../x264.rcapi/encoder/encoder.c 2006-07-16 14:11:47.000000000 -0700 +++ ./encoder/encoder.c 2006-07-16 18:19:52.000000000 -0700 @@ -341,7 +341,7 @@ } #endif - if( h->param.rc.i_rc_method < 0 || h->param.rc.i_rc_method > 2 ) + if( h->param.rc.i_rc_method < 0 || h->param.rc.i_rc_method > 3 ) { x264_log( h, X264_LOG_ERROR, "invalid RC method\n" ); return -1; @@ -350,6 +350,10 @@ h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, 51 ); if( h->param.rc.i_rc_method == X264_RC_CRF ) h->param.rc.i_qp_constant = h->param.rc.i_rf_constant; + if( h->param.rc.i_rc_method == X264_RC_RD ) + h->param.rc.i_rcrd_range = x264_clip3( h->param.rc.i_rcrd_range, 1, X264_RDRC_MAX ); + else + h->param.rc.i_rcrd_range = 0; if( (h->param.rc.i_rc_method == X264_RC_CQP || h->param.rc.i_rc_method == X264_RC_CRF) && h->param.rc.i_qp_constant == 0 ) { @@ -547,10 +551,10 @@ h->mb.i_mb_count = h->sps->i_mb_width * h->sps->i_mb_height; /* Init frames. */ - h->frames.i_delay = h->param.i_bframe; + h->frames.i_delay = h->param.i_bframe + h->param.rc.i_rcrd_range; h->frames.i_max_ref0 = h->param.i_frame_reference; h->frames.i_max_ref1 = h->sps->vui.i_num_reorder_frames; - h->frames.i_max_dpb = h->sps->vui.i_max_dec_frame_buffering + 1; + h->frames.i_max_dpb = h->sps->vui.i_max_dec_frame_buffering + 1 + h->param.rc.i_rcrd_range; h->frames.b_have_lowres = !h->param.rc.b_stat_read && ( h->param.rc.i_rc_method == X264_RC_ABR || h->param.rc.i_rc_method == X264_RC_CRF @@ -964,7 +968,7 @@ const int i_mb_y = mb_xy / h->sps->i_mb_width; const int i_mb_x = mb_xy % h->sps->i_mb_width; - int mb_spos = bs_pos(&h->out.bs); + int mb_spos = bs_pos(&h->out.bs) + (h->cabac.f8_bits_encoded >> 8); /* load cache */ x264_macroblock_cache_load( h, i_mb_x, i_mb_y ); @@ -994,7 +998,10 @@ { if( h->sh.i_type != SLICE_TYPE_I ) x264_cabac_mb_skip( h, 0 ); - x264_macroblock_write_cabac( h, &h->cabac ); + if( h->param.b_write_bitstream ) + x264_macroblock_write_cabac( h, &h->cabac ); + else + x264_macroblock_size_cabac( h, &h->cabac ); } } else @@ -1047,7 +1054,7 @@ } if( h->mb.b_variable_qp ) - x264_ratecontrol_mb(h, bs_pos(&h->out.bs) - mb_spos); + x264_ratecontrol_mb(h, bs_pos(&h->out.bs) + (h->cabac.f8_bits_encoded >> 8) - mb_spos); } if( h->param.b_cabac ) @@ -1080,13 +1087,14 @@ - h->stat.frame.i_ptex_bits - h->stat.frame.i_hdr_bits; + if( !h->param.b_write_bitstream ) + h->stat.frame.i_ptex_bits += (h->cabac.f8_bits_encoded + 128) >> 8; + return 0; } static inline int x264_slices_write( x264_t *h ) { - int i_frame_size; - #if VISUALIZE if( h->param.b_visualize ) x264_visualize_init( h ); @@ -1096,7 +1104,6 @@ { x264_ratecontrol_threads_start( h ); x264_slice_write( h ); - i_frame_size = h->out.nal[h->out.i_nal-1].i_payload; } else { @@ -1135,13 +1142,11 @@ #endif /* merge contexts */ - i_frame_size = h->out.nal[i_nal].i_payload; for( i = 1; i < h->param.i_threads; i++ ) { int j; x264_t *t = h->thread[i]; h->out.nal[i_nal+i] = t->out.nal[i_nal+i]; - i_frame_size += t->out.nal[i_nal+i].i_payload; // all entries in stat.frame are ints for( j = 0; j < sizeof(h->stat.frame) / sizeof(int); j++ ) ((int*)&h->stat.frame)[j] += ((int*)&t->stat.frame)[j]; @@ -1157,7 +1162,10 @@ } #endif - return i_frame_size; + return ( h->stat.frame.i_hdr_bits + + h->stat.frame.i_itex_bits + + h->stat.frame.i_ptex_bits + + h->stat.frame.i_misc_bits + 7 ) >> 3; } /**************************************************************************** @@ -1189,10 +1197,16 @@ int i_global_qp; char psz_message[80]; + x264_picture_t dummy_pic_out; /* no data out */ - *pi_nal = 0; - *pp_nal = NULL; + if( pp_nal && pi_nal ) + { + *pi_nal = 0; + *pp_nal = NULL; + } + if( !pic_out ) + pic_out = &dummy_pic_out; /* ------------------- Setup new frame from picture -------------------- */ @@ -1223,6 +1237,8 @@ } } + x264_ratecontrol_pre( h ); + if( h->frames.current[0] == NULL ) { int bframes = 0; @@ -1487,8 +1503,11 @@ } /* End bitstream, set output */ - *pi_nal = h->out.i_nal; - *pp_nal = h->out.nal; + if( pp_nal && pi_nal ) + { + *pi_nal = h->out.i_nal; + *pp_nal = h->out.nal; + } /* Set output picture properties */ if( i_slice_type == SLICE_TYPE_I ) @@ -1533,7 +1552,7 @@ /* ---------------------- Compute/Print statistics --------------------- */ /* Slice stat */ h->stat.i_slice_count[i_slice_type]++; - h->stat.i_slice_size[i_slice_type] += i_frame_size + NALU_OVERHEAD; + h->stat.i_slice_size[i_slice_type] += i_frame_size + NALU_OVERHEAD * h->param.i_threads; h->stat.i_slice_qp[i_slice_type] += i_global_qp; for( i = 0; i < 19; i++ ) diff -u -ruX ../difX ../x264.rcapi/encoder/macroblock.h ./encoder/macroblock.h --- ../x264.rcapi/encoder/macroblock.h 2006-07-16 14:00:47.000000000 -0700 +++ ./encoder/macroblock.h 2006-07-16 18:19:52.000000000 -0700 @@ -38,6 +38,8 @@ void x264_macroblock_encode ( x264_t *h ); void x264_macroblock_write_cabac ( x264_t *h, x264_cabac_t *cb ); void x264_macroblock_write_cavlc ( x264_t *h, bs_t *s ); +void x264_macroblock_size_cabac ( x264_t *h, x264_cabac_t *cb ); +void x264_macroblock_size_cavlc ( x264_t *h, bs_t *s ); void x264_macroblock_encode_p8x8( x264_t *h, int i8 ); diff -u -ruX ../difX ../x264.rcapi/encoder/ratecontrol.c ./encoder/ratecontrol.c --- ../x264.rcapi/encoder/ratecontrol.c 2006-07-16 14:13:37.000000000 -0700 +++ ./encoder/ratecontrol.c 2006-07-16 20:51:26.000000000 -0700 @@ -150,14 +150,19 @@ int bframes; /* # consecutive B-frames before this P-frame */ int bframe_bits; /* total cost of those frames */ + /* RDRC stuff */ + x264_t *h_bak; /* backup from before processing the current frame */ + int rcrd_prev_dir[5]; + int i_zones; x264_zone_t *zones; }; static int parse_zones( x264_t *h ); -static int init_pass2(x264_t *); +static int init_pass2( x264_t *h ); static float rate_estimate_qscale( x264_t *h, int pict_type ); +static int rcrd_get_qp( x264_t *h ); static void update_vbv( x264_t *h, int bits ); static double predict_size( predictor_t *p, double q, double var ); static void update_predictor( predictor_t *p, double q, double var, double bits ); @@ -175,6 +180,10 @@ { return 12.0 + 6.0 * log(qscale/0.85) / log(2.0); } +static inline int qscale2iqp(double qscale) +{ + return x264_clip3( qscale2qp(qscale) + 0.5, 0, 51 ); +} /* Texture bitrate is not quite inversely proportional to qscale, * probably due the the changing number of SKIP blocks. @@ -220,6 +229,11 @@ x264_log(h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n"); return -1; } + if( h->param.rc.i_rc_method == X264_RC_RD && !h->param.rc.b_stat_read ) + { + x264_log(h, X264_LOG_ERROR, "rcrd requires 2pass.\n"); + return -1; + } if( h->param.rc.i_vbv_buffer_size ) { if( h->param.rc.i_rc_method == X264_RC_CQP ) @@ -463,6 +477,11 @@ x264_free( p ); } + if( h->param.rc.i_rc_method == X264_RC_RD ) + { + rc->h_bak = x264_malloc( sizeof(x264_t) ); + } + return 0; } @@ -552,6 +571,7 @@ } x264_free( rc->entry ); x264_free( rc->zones ); + x264_free( rc->h_bak ); x264_free( rc ); } @@ -599,16 +619,19 @@ { rc->qpm = rc->qp = i_force_qp - 1; } + else if( h->param.rc.i_rc_method == X264_RC_RD ) + { + rc->qpm = rc->qp = rcrd_get_qp( h ); + rc->slice_type = i_slice_type; // gets clobbered by the trial encodes + } else if( rc->b_abr ) { - rc->qpm = rc->qp = - x264_clip3( (int)(qscale2qp( rate_estimate_qscale( h, i_slice_type ) ) + .5), 0, 51 ); + rc->qpm = rc->qp = qscale2iqp( rate_estimate_qscale( h, i_slice_type ) ); } else if( rc->b_2pass ) { rce->new_qscale = rate_estimate_qscale( h, i_slice_type ); - rc->qpm = rc->qp = rce->new_qp = - x264_clip3( (int)(qscale2qp(rce->new_qscale) + 0.5), 0, 51 ); + rc->qpm = rc->qp = rce->new_qp = qscale2iqp( rce->new_qscale ); } else /* CQP */ { @@ -731,8 +754,8 @@ h->param.rc.i_qp_constant = (h->stat.i_slice_count[SLICE_TYPE_P] == 0) ? 24 : 1 + h->stat.i_slice_qp[SLICE_TYPE_P] / h->stat.i_slice_count[SLICE_TYPE_P]; rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, 51 ); - rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 ); - rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 ); + rc->qp_constant[SLICE_TYPE_I] = qscale2iqp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor ) ); + rc->qp_constant[SLICE_TYPE_B] = qscale2iqp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor ) ); x264_log(h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries); x264_log(h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant); @@ -1504,4 +1527,121 @@ return 0; } +void x264_ratecontrol_pre( x264_t *h ) +{ + if( h->param.rc.i_rc_method == X264_RC_RD ) + *h->rc->h_bak = *h; +} + +static void rcrd_set_options( x264_t *t ) +{ + x264_param_t *p = &t->param; + + p->rc.i_rc_method = X264_RC_CQP; + p->rc.b_stat_write = 0; + p->b_write_bitstream = 0; + p->i_log_level = X264_MIN( X264_LOG_ERROR, p->i_log_level ); + t->mb.b_direct_auto_write = 0; + + p->analyse.inter &= ~X264_ANALYSE_PSUB8x8; + p->analyse.i_me_method = X264_ME_DIA; + p->analyse.i_subpel_refine = X264_MIN( 3, p->analyse.i_subpel_refine ); + p->analyse.i_trellis = 0; + p->analyse.b_bidir_me = 0; + p->analyse.b_fast_pskip = 1; + p->analyse.b_mixed_references = 0; + p->analyse.b_transform_8x8 = 0; + t->frames.i_max_ref0 = + p->i_frame_reference = X264_MIN( 2, p->i_frame_reference ); +} + +static float rcrd_try_encode( x264_t *h, x264_t *t, int qp ) +{ + /* TODO + * adapt range based on number of B-frames or % intra blocks? + * try a larger search range for I-frames? + * try stopping at all I-frames, not just IDR? + * reuse motion vectors between candidate encodes? loses too much quality. just init the motion search? + * map lambda onto some intuitive scale, like crf? */ + float rd; + int i; + int range = (h->fenc->i_type == X264_TYPE_B) ? 1 : h->param.rc.i_rcrd_range; + + if( qp < 0 || qp > 51 ) + return 1e30; + + *t = *h->rc->h_bak; + h->fenc->i_qpplus1 = qp + 1; + for( i = 0; i < range; i++ ) + { + int next_type; + x264_encoder_encode( t, NULL, NULL, NULL, NULL ); + next_type = t->frames.current[0] ? t->frames.current[0]->i_type : + t->frames.next[0] ? t->frames.next[0]->i_type : -1; + if( next_type == X264_TYPE_IDR ) + break; + } + + rd = 0; + for( i = 0; i < 5; i++ ) + rd += t->stat.i_sqe_global[i] + t->stat.i_slice_size[i] * h->param.rc.f_rcrd_lambda; + return rd; +} + +static int rcrd_get_qp( x264_t *h ) +{ + x264_ratecontrol_t *rcc = h->rc; + x264_frame_t **fp; + x264_t t; + int base_qp = qscale2iqp( rcc->rce->qscale ); + int best_qp; + float best_rd; + int i, dir; + int *p_dir; + + /* assume the following frames use the same qp as they did in the previous pass */ + for( fp = h->frames.current; *fp; fp++ ) + (*fp)->i_qpplus1 = qscale2iqp( rcc->entry[(*fp)->i_frame].qscale ) + 1; + for( fp = h->frames.next; *fp; fp++ ) + (*fp)->i_qpplus1 = qscale2iqp( rcc->entry[(*fp)->i_frame].qscale ) + 1; + + rcrd_set_options( rcc->h_bak ); + memset( rcc->h_bak->stat.i_slice_size, 0, sizeof(rcc->h_bak->stat.i_slice_size) ); + memset( rcc->h_bak->stat.i_sqe_global, 0, sizeof(rcc->h_bak->stat.i_sqe_global) ); + + best_qp = base_qp; + best_rd = rcrd_try_encode( h, &t, base_qp ); + + /* not sure if this is actually better/faster than just searching one direction first always */ + p_dir = &rcc->rcrd_prev_dir[h->fenc->i_type-1]; + if( *p_dir == 0 ) + *p_dir = -1; + dir = *p_dir; + for( i = 1; i < 20; i++ ) + { + int qp = base_qp + i*dir; + float rd = rcrd_try_encode( h, &t, qp ); + if( best_rd > rd ) + { + best_rd = rd; + best_qp = qp; + *p_dir = dir; + } + else if( i == 1 && dir == *p_dir ) + { + dir = -dir; + i = 0; + } + else + break; + } + + for( fp = h->frames.current; *fp; fp++ ) + (*fp)->i_qpplus1 = 0; + for( fp = h->frames.next; *fp; fp++ ) + (*fp)->i_qpplus1 = 0; + h->fenc->i_qpplus1 = 0; + + return best_qp; +} diff -u -ruX ../difX ../x264.rcapi/encoder/ratecontrol.h ./encoder/ratecontrol.h --- ../x264.rcapi/encoder/ratecontrol.h 2006-07-16 14:00:47.000000000 -0700 +++ ./encoder/ratecontrol.h 2006-07-16 18:19:52.000000000 -0700 @@ -27,6 +27,7 @@ int x264_ratecontrol_new ( x264_t * ); void x264_ratecontrol_delete( x264_t * ); +void x264_ratecontrol_pre( x264_t * ); void x264_ratecontrol_start( x264_t *, int i_slice_type, int i_force_qp ); void x264_ratecontrol_threads_start( x264_t * ); int x264_ratecontrol_slice_type( x264_t *, int i_frame ); diff -u -ruX ../difX ../x264.rcapi/x264.c ./x264.c --- ../x264.rcapi/x264.c 2006-07-16 14:00:49.000000000 -0700 +++ ./x264.c 2006-07-16 18:19:52.000000000 -0700 @@ -65,6 +65,7 @@ int i_seek; hnd_t hin; hnd_t hout; + FILE *qpfile; } cli_opt_t; /* input file operation function pointers */ @@ -168,6 +169,8 @@ " --qpmax Set max QP [%d]\n" " --qpstep Set max QP step [%d]\n" " --ratetol Allowed variance of average bitrate [%.1f]\n" + " --rcrd-lambda Enable RD ratecontrol, and select quality [%.1f]\n" + " --rcrd-range RD search range [%d]\n" " --vbv-maxrate Max local bitrate [%d]\n" " --vbv-bufsize Size of VBV buffer [%d]\n" " --vbv-init Initial VBV buffer occupancy [%.1f]\n" @@ -193,6 +196,7 @@ " where