Index: encoder/ratecontrol.h =================================================================== --- encoder/ratecontrol.h (revision 534) +++ encoder/ratecontrol.h (working copy) @@ -27,6 +27,7 @@ int x264_ratecontrol_new ( x264_t * ); void x264_ratecontrol_delete( x264_t * ); +void x264_ratecontrol_pre( x264_t * ); void x264_ratecontrol_start( x264_t *, int i_slice_type, int i_force_qp ); void x264_ratecontrol_threads_start( x264_t * ); int x264_ratecontrol_slice_type( x264_t *, int i_frame ); Index: encoder/encoder.c =================================================================== --- encoder/encoder.c (revision 534) +++ encoder/encoder.c (working copy) @@ -347,7 +347,8 @@ h->param.rc.i_qp_constant = h->param.rc.i_rf_constant; h->param.rc.i_rf_constant = x264_clip3( h->param.rc.i_rf_constant, 0, 51 ); h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, 51 ); - if( !h->param.rc.b_cbr && h->param.rc.i_qp_constant == 0 ) + h->param.rc.i_rdrc_range = x264_clip3( h->param.rc.i_rdrc_range, 0, X264_RDRC_MAX ); + if( !h->param.rc.b_cbr && !h->param.rc.i_rdrc_range && h->param.rc.i_qp_constant == 0 ) { h->mb.b_lossless = 1; h->param.analyse.b_transform_8x8 = 0; @@ -543,10 +544,10 @@ h->mb.i_mb_count = h->sps->i_mb_width * h->sps->i_mb_height; /* Init frames. */ - h->frames.i_delay = h->param.i_bframe; + h->frames.i_delay = h->param.i_bframe + h->param.rc.i_rdrc_range; h->frames.i_max_ref0 = h->param.i_frame_reference; h->frames.i_max_ref1 = h->sps->vui.i_num_reorder_frames; - h->frames.i_max_dpb = h->sps->vui.i_max_dec_frame_buffering + 1; + h->frames.i_max_dpb = h->sps->vui.i_max_dec_frame_buffering + 1 + h->param.rc.i_rdrc_range; h->frames.b_have_lowres = !h->param.rc.b_stat_read && ( h->param.rc.b_cbr || h->param.rc.i_rf_constant || h->param.b_bframe_adaptive ); @@ -958,7 +959,7 @@ const int i_mb_y = mb_xy / h->sps->i_mb_width; const int i_mb_x = mb_xy % h->sps->i_mb_width; - int mb_spos = bs_pos(&h->out.bs); + int mb_spos = bs_pos(&h->out.bs) + (h->cabac.f8_bits_encoded >> 8); /* load cache */ x264_macroblock_cache_load( h, i_mb_x, i_mb_y ); @@ -988,7 +989,10 @@ { if( h->sh.i_type != SLICE_TYPE_I ) x264_cabac_mb_skip( h, 0 ); - x264_macroblock_write_cabac( h, &h->cabac ); + if( h->param.b_write_bitstream ) + x264_macroblock_write_cabac( h, &h->cabac ); + else + x264_macroblock_size_cabac( h, &h->cabac ); } } else @@ -1041,7 +1045,7 @@ } if( h->mb.b_variable_qp ) - x264_ratecontrol_mb(h, bs_pos(&h->out.bs) - mb_spos); + x264_ratecontrol_mb(h, bs_pos(&h->out.bs) + (h->cabac.f8_bits_encoded >> 8) - mb_spos); } if( h->param.b_cabac ) @@ -1074,13 +1078,14 @@ - h->stat.frame.i_ptex_bits - h->stat.frame.i_hdr_bits; + if( !h->param.b_write_bitstream ) + h->stat.frame.i_ptex_bits += (h->cabac.f8_bits_encoded + 128) >> 8; + return 0; } static inline int x264_slices_write( x264_t *h ) { - int i_frame_size; - #if VISUALIZE if( h->param.b_visualize ) x264_visualize_init( h ); @@ -1090,7 +1095,6 @@ { x264_ratecontrol_threads_start( h ); x264_slice_write( h ); - i_frame_size = h->out.nal[h->out.i_nal-1].i_payload; } else { @@ -1129,13 +1133,11 @@ #endif /* merge contexts */ - i_frame_size = h->out.nal[i_nal].i_payload; for( i = 1; i < h->param.i_threads; i++ ) { int j; x264_t *t = h->thread[i]; h->out.nal[i_nal+i] = t->out.nal[i_nal+i]; - i_frame_size += t->out.nal[i_nal+i].i_payload; // all entries in stat.frame are ints for( j = 0; j < sizeof(h->stat.frame) / sizeof(int); j++ ) ((int*)&h->stat.frame)[j] += ((int*)&t->stat.frame)[j]; @@ -1151,7 +1153,10 @@ } #endif - return i_frame_size; + return ( h->stat.frame.i_hdr_bits + + h->stat.frame.i_itex_bits + + h->stat.frame.i_ptex_bits + + h->stat.frame.i_misc_bits + 7 ) >> 3; } /**************************************************************************** @@ -1183,10 +1188,16 @@ int i_global_qp; char psz_message[80]; + x264_picture_t dummy_pic_out; /* no data out */ - *pi_nal = 0; - *pp_nal = NULL; + if( pp_nal && pi_nal ) + { + *pi_nal = 0; + *pp_nal = NULL; + } + if( !pic_out ) + pic_out = &dummy_pic_out; /* ------------------- Setup new frame from picture -------------------- */ @@ -1217,6 +1228,8 @@ } } + x264_ratecontrol_pre( h ); + if( h->frames.current[0] == NULL ) { int bframes = 0; @@ -1481,8 +1494,11 @@ } /* End bitstream, set output */ - *pi_nal = h->out.i_nal; - *pp_nal = h->out.nal; + if( pp_nal && pi_nal ) + { + *pi_nal = h->out.i_nal; + *pp_nal = h->out.nal; + } /* Set output picture properties */ if( i_slice_type == SLICE_TYPE_I ) @@ -1527,7 +1543,7 @@ /* ---------------------- Compute/Print statistics --------------------- */ /* Slice stat */ h->stat.i_slice_count[i_slice_type]++; - h->stat.i_slice_size[i_slice_type] += i_frame_size + NALU_OVERHEAD; + h->stat.i_slice_size[i_slice_type] += i_frame_size + NALU_OVERHEAD * h->param.i_threads; h->stat.i_slice_qp[i_slice_type] += i_global_qp; for( i = 0; i < 19; i++ ) Index: encoder/macroblock.h =================================================================== --- encoder/macroblock.h (revision 534) +++ encoder/macroblock.h (working copy) @@ -38,6 +38,8 @@ void x264_macroblock_encode ( x264_t *h ); void x264_macroblock_write_cabac ( x264_t *h, x264_cabac_t *cb ); void x264_macroblock_write_cavlc ( x264_t *h, bs_t *s ); +void x264_macroblock_size_cabac ( x264_t *h, x264_cabac_t *cb ); +void x264_macroblock_size_cavlc ( x264_t *h, bs_t *s ); void x264_macroblock_encode_p8x8( x264_t *h, int i8 ); Index: encoder/ratecontrol.c =================================================================== --- encoder/ratecontrol.c (revision 534) +++ encoder/ratecontrol.c (working copy) @@ -147,14 +147,19 @@ int bframes; /* # consecutive B-frames before this P-frame */ int bframe_bits; /* total cost of those frames */ + /* RDRC stuff */ + x264_t *h_bak; /* backup from before processing the current frame */ + int rdrc_prev_dir[5]; + int i_zones; x264_zone_t *zones; }; static int parse_zones( x264_t *h ); -static int init_pass2(x264_t *); +static int init_pass2( x264_t *h ); static float rate_estimate_qscale( x264_t *h, int pict_type ); +static int rdrc_get_qp( x264_t *h ); static void update_vbv( x264_t *h, int bits ); static double predict_size( predictor_t *p, double q, double var ); static void update_predictor( predictor_t *p, double q, double var, double bits ); @@ -172,6 +177,10 @@ { return 12.0 + 6.0 * log(qscale/0.85) / log(2.0); } +static inline int qscale2iqp(double qscale) +{ + return x264_clip3( qscale2qp(qscale) + 0.5, 0, 51 ); +} /* Texture bitrate is not quite inversely proportional to qscale, * probably due the the changing number of SKIP blocks. @@ -217,6 +226,11 @@ x264_log(h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n"); return -1; } + if( h->param.rc.i_rdrc_range && !h->param.rc.b_stat_read ) + { + x264_log(h, X264_LOG_ERROR, "rdrc requires 2pass.\n"); + return -1; + } if( h->param.rc.i_vbv_buffer_size && !h->param.rc.b_cbr && !h->param.rc.i_rf_constant ) x264_log(h, X264_LOG_WARNING, "VBV is incompatible with constant QP.\n"); if( h->param.rc.i_vbv_buffer_size && h->param.rc.b_cbr @@ -458,6 +472,11 @@ x264_free( p ); } + if( h->param.rc.i_rdrc_range ) + { + rc->h_bak = x264_malloc( sizeof(x264_t) ); + } + return 0; } @@ -547,6 +566,7 @@ } x264_free( rc->entry ); x264_free( rc->zones ); + x264_free( rc->h_bak ); x264_free( rc ); } @@ -594,16 +614,19 @@ { rc->qpm = rc->qp = i_force_qp - 1; } + else if( h->param.rc.i_rdrc_range ) + { + rc->qpm = rc->qp = rdrc_get_qp( h ); + rc->slice_type = i_slice_type; // gets clobbered by the trial encodes + } else if( rc->b_abr ) { - rc->qpm = rc->qp = - x264_clip3( (int)(qscale2qp( rate_estimate_qscale( h, i_slice_type ) ) + .5), 0, 51 ); + rc->qpm = rc->qp = qscale2iqp( rate_estimate_qscale( h, i_slice_type ) ); } else if( rc->b_2pass ) { rce->new_qscale = rate_estimate_qscale( h, i_slice_type ); - rc->qpm = rc->qp = rce->new_qp = - x264_clip3( (int)(qscale2qp(rce->new_qscale) + 0.5), 0, 51 ); + rc->qpm = rc->qp = rce->new_qp = qscale2iqp( rce->new_qscale ); } else /* CQP */ { @@ -726,8 +749,8 @@ h->param.rc.i_qp_constant = (h->stat.i_slice_count[SLICE_TYPE_P] == 0) ? 24 : 1 + h->stat.i_slice_qp[SLICE_TYPE_P] / h->stat.i_slice_count[SLICE_TYPE_P]; rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, 51 ); - rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 ); - rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 ); + rc->qp_constant[SLICE_TYPE_I] = qscale2iqp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor ) ); + rc->qp_constant[SLICE_TYPE_B] = qscale2iqp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor ) ); x264_log(h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries); x264_log(h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant); @@ -1499,4 +1522,117 @@ return 0; } +void x264_ratecontrol_pre( x264_t *h ) +{ + if( h->param.rc.i_rdrc_range ) + *h->rc->h_bak = *h; +} +static void rdrc_set_options( x264_param_t *p ) +{ + p->rc.i_rdrc_range = 0; + p->rc.b_stat_write = 0; + p->b_write_bitstream = 0; + p->i_log_level = X264_MIN( X264_LOG_WARNING, p->i_log_level ); + + p->analyse.inter &= ~X264_ANALYSE_PSUB8x8; + p->analyse.i_me_method = X264_ME_DIA; + p->analyse.i_subpel_refine = X264_MIN( 3, p->analyse.i_subpel_refine ); + p->analyse.i_trellis = 0; + p->analyse.b_bidir_me = 0; + p->analyse.b_mixed_references = 0; + p->analyse.b_fast_pskip = 1; + p->analyse.b_transform_8x8 = 0; + p->i_frame_reference = X264_MIN( 3, p->i_frame_reference ); +} + +static float rdrc_try_encode( x264_t *h, x264_t *t, int qp ) +{ + /* TODO B-refs are searched fully. try searching just their immediate neighbors? */ + /* adapt range based on number of B-frames or % intra blocks? */ + /* try stopping at all I-frames, not just IDR? */ + /* try larger search range for I-frames? */ + /* reuse motion vectors between candidate encodes? */ + /* do something about the "[warning]: specified frame type is not compatible with max B-frames" on ctrl-C */ + float rd; + int i; + int range = (h->fenc->i_type == X264_TYPE_B) ? 1 : h->param.rc.i_rdrc_range; + + if( qp < 0 || qp > 51 ) + return 1e20; + + *t = *h->rc->h_bak; + h->fenc->i_qpplus1 = qp + 1; + /* the next few pictures are already buffered in h->frames.next[] */ + for( i = 0; i < range; i++ ) + { + x264_encoder_encode( t, NULL, NULL, NULL, NULL ); + if( t->frames.current[0] ? (t->frames.current[0]->i_type == X264_TYPE_IDR) + : t->frames.next[0] ? (t->frames.next[0]->i_type == X264_TYPE_IDR) : 0) + break; + } + + rd = 0; + for( i = 0; i < 5; i++ ) + rd += t->stat.i_sqe_global[i] + t->stat.i_slice_size[i] * h->param.rc.f_rdrc_lambda; + return rd; +} + +static int rdrc_get_qp( x264_t *h ) +{ + x264_ratecontrol_t *rcc = h->rc; + x264_frame_t **fp; + x264_t t; + int base_qp = qscale2iqp( rcc->rce->qscale ); + int best_qp; + float best_rd; + int i, dir; + int *p_dir; + + /* assume the following frames use the same qp as they did in the previous pass */ + for( fp = h->frames.current; *fp; fp++ ) + (*fp)->i_qpplus1 = qscale2iqp( rcc->entry[(*fp)->i_frame].qscale ) + 1; + for( fp = h->frames.next; *fp; fp++ ) + (*fp)->i_qpplus1 = qscale2iqp( rcc->entry[(*fp)->i_frame].qscale ) + 1; + + rdrc_set_options( &rcc->h_bak->param ); + rcc->h_bak->mb.b_direct_auto_write = 0; + memset( rcc->h_bak->stat.i_slice_size, 0, sizeof(rcc->h_bak->stat.i_slice_size) ); + memset( rcc->h_bak->stat.i_sqe_global, 0, sizeof(rcc->h_bak->stat.i_sqe_global) ); + + best_qp = base_qp; + best_rd = rdrc_try_encode( h, &t, base_qp ); + + /* not sure if this is actually better/faster than just searching one direction first always */ + p_dir = &rcc->rdrc_prev_dir[h->fenc->i_type-1]; + if( *p_dir == 0 ) + *p_dir = -1; + dir = *p_dir; + for( i = 1; i < 20; i++ ) + { + int qp = base_qp + i*dir; + float rd = rdrc_try_encode( h, &t, qp ); + if( best_rd > rd ) + { + best_rd = rd; + best_qp = qp; + *p_dir = dir; + } + else if( i == 1 && dir == *p_dir ) + { + dir = -dir; + i = 0; + } + else + break; + } + + for( fp = h->frames.current; *fp; fp++ ) + (*fp)->i_qpplus1 = 0; + for( fp = h->frames.next; *fp; fp++ ) + (*fp)->i_qpplus1 = 0; + h->fenc->i_qpplus1 = 0; + + return best_qp; +} + Index: x264.c =================================================================== --- x264.c (revision 534) +++ x264.c (working copy) @@ -65,6 +65,7 @@ int i_seek; hnd_t hin; hnd_t hout; + FILE *qpfile; } cli_opt_t; /* input file operation function pointers */ @@ -193,6 +194,7 @@ " where