Index: encoder/encoder.c =================================================================== --- encoder/encoder.c (revision 718) +++ encoder/encoder.c (working copy) @@ -375,7 +375,7 @@ } } - if( h->param.rc.i_rc_method < 0 || h->param.rc.i_rc_method > 2 ) + if( h->param.rc.i_rc_method < 0 || h->param.rc.i_rc_method > 3 ) { x264_log( h, X264_LOG_ERROR, "no ratecontrol method specified\n" ); return -1; @@ -384,6 +384,10 @@ h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, 51 ); if( h->param.rc.i_rc_method == X264_RC_CRF ) h->param.rc.i_qp_constant = h->param.rc.f_rf_constant; + if( h->param.rc.i_rc_method == X264_RC_RD ) + h->param.rc.i_rcrd_window = x264_clip3( h->param.rc.i_rcrd_window, 1, X264_RCRD_MAX ); + else + h->param.rc.i_rcrd_window = 0; if( (h->param.rc.i_rc_method == X264_RC_CQP || h->param.rc.i_rc_method == X264_RC_CRF) && h->param.rc.i_qp_constant == 0 ) { @@ -636,7 +640,7 @@ h->mb.i_mb_count = h->sps->i_mb_width * h->sps->i_mb_height; /* Init frames. */ - h->frames.i_delay = h->param.i_bframe + h->param.i_threads - 1; + h->frames.i_delay = h->param.i_bframe + h->param.i_threads + h->param.rc.i_rcrd_window - 1; h->frames.i_max_ref0 = h->param.i_frame_reference; h->frames.i_max_ref1 = h->sps->vui.i_num_reorder_frames; h->frames.i_max_dpb = h->sps->vui.i_max_dec_frame_buffering; @@ -645,6 +649,7 @@ || h->param.rc.i_rc_method == X264_RC_CRF || h->param.b_bframe_adaptive || h->param.b_pre_scenecut ); + h->frames.b_have_integral = h->param.analyse.i_me_method == X264_ME_ESA; h->frames.i_last_idr = - h->param.i_keyint_max; h->frames.i_input = 0; @@ -956,11 +961,12 @@ } /* move lowres copy of the image to the ref frame */ - for( i = 0; i < 4; i++) - { - XCHG( uint8_t*, h->fdec->lowres[i], h->fenc->lowres[i] ); - XCHG( uint8_t*, h->fdec->buffer_lowres[i], h->fenc->buffer_lowres[i] ); - } + if( h->frames.b_have_lowres ) + for( i = 0; i < 4; i++) + { + XCHG( uint8_t*, h->fdec->lowres[i], h->fenc->lowres[i] ); + XCHG( uint8_t*, h->fdec->buffer_lowres[i], h->fenc->buffer_lowres[i] ); + } /* adaptive B decision needs a pointer, since it can't use the ref lists */ if( h->sh.i_type != SLICE_TYPE_B ) @@ -1050,7 +1056,7 @@ { const int i_mb_y = mb_xy / h->sps->i_mb_width; const int i_mb_x = mb_xy % h->sps->i_mb_width; - int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac); + int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac) + (h->cabac.f8_bits_encoded >> 8); if( i_mb_x == 0 ) x264_fdec_filter_row( h, i_mb_y ); @@ -1083,7 +1089,10 @@ { if( h->sh.i_type != SLICE_TYPE_I ) x264_cabac_mb_skip( h, 0 ); - x264_macroblock_write_cabac( h, &h->cabac ); + if( h->param.b_write_bitstream ) + x264_macroblock_write_cabac( h, &h->cabac ); + else + x264_macroblock_size_cabac( h, &h->cabac ); } } else @@ -1136,7 +1145,7 @@ } if( h->mb.b_variable_qp ) - x264_ratecontrol_mb(h, bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac) - mb_spos); + x264_ratecontrol_mb(h, bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac) + (h->cabac.f8_bits_encoded >> 8) - mb_spos); if( h->sh.b_mbaff ) { @@ -1180,6 +1189,9 @@ - h->stat.frame.i_itex_bits - h->stat.frame.i_ptex_bits - h->stat.frame.i_hdr_bits; + + if( !h->param.b_write_bitstream ) + h->stat.frame.i_ptex_bits += h->cabac.f8_bits_encoded >> 8; } static void x264_thread_sync_context( x264_t *dst, x264_t *src ) @@ -1211,15 +1223,12 @@ static int x264_slices_write( x264_t *h ) { - int i_frame_size; - #if VISUALIZE if( h->param.b_visualize ) x264_visualize_init( h ); #endif x264_stack_align( x264_slice_write, h ); - i_frame_size = h->out.nal[h->out.i_nal-1].i_payload; x264_fdec_filter_row( h, h->sps->i_mb_height ); #if VISUALIZE @@ -1230,7 +1239,11 @@ } #endif - h->out.i_frame_size = i_frame_size; + h->out.i_frame_size = + ( h->stat.frame.i_hdr_bits + + h->stat.frame.i_itex_bits + + h->stat.frame.i_ptex_bits + + h->stat.frame.i_misc_bits + 7 ) >> 3; return 0; } @@ -1253,6 +1266,7 @@ x264_picture_t *pic_out ) { x264_t *thread_current, *thread_prev, *thread_oldest; + x264_picture_t dummy_pic_out; int i_nal_type; int i_nal_ref_idc; @@ -1282,9 +1296,15 @@ h->fdec->i_lines_completed = -1; /* no data out */ - *pi_nal = 0; - *pp_nal = NULL; + if( pi_nal && pp_nal ) + { + *pi_nal = 0; + *pp_nal = NULL; + } + if( !pic_out ) + pic_out = &dummy_pic_out; + /* ------------------- Setup new frame from picture -------------------- */ TIMER_START( i_mtime_encode_frame ); if( pic_in != NULL ) @@ -1610,8 +1630,11 @@ x264_frame_push_unused( thread_current, h->fenc ); /* End bitstream, set output */ - *pi_nal = h->out.i_nal; - *pp_nal = h->out.nal; + if( pi_nal && pp_nal ) + { + *pi_nal = h->out.i_nal; + *pp_nal = h->out.nal; + } h->out.i_nal = 0; /* Set output picture properties */ Index: encoder/macroblock.h =================================================================== --- encoder/macroblock.h (revision 718) +++ encoder/macroblock.h (working copy) @@ -38,6 +38,8 @@ void x264_macroblock_encode ( x264_t *h ); void x264_macroblock_write_cabac ( x264_t *h, x264_cabac_t *cb ); void x264_macroblock_write_cavlc ( x264_t *h, bs_t *s ); +void x264_macroblock_size_cabac ( x264_t *h, x264_cabac_t *cb ); +void x264_macroblock_size_cavlc ( x264_t *h, bs_t *s ); void x264_macroblock_encode_p8x8( x264_t *h, int i8 ); void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale ); Index: encoder/ratecontrol.c =================================================================== --- encoder/ratecontrol.c (revision 718) +++ encoder/ratecontrol.c (working copy) @@ -98,6 +98,8 @@ double rate_factor_constant; double ip_offset; double pb_offset; + double qp_type_offset[6]; + predictor_t rcrd_qp_offset[5]; /* 2pass stuff */ FILE *p_stat_file_out; @@ -134,8 +136,9 @@ static int parse_zones( x264_t *h ); -static int init_pass2(x264_t *); +static int init_pass2( x264_t *h ); static float rate_estimate_qscale( x264_t *h ); +static int rcrd_get_qp( x264_t *h ); static void update_vbv( x264_t *h, int bits ); static void update_vbv_plan( x264_t *h ); static double predict_size( predictor_t *p, double q, double var ); @@ -154,6 +157,10 @@ { return 12.0 + 6.0 * log(qscale/0.85) / log(2.0); } +static inline int qscale2iqp(double qscale) +{ + return x264_clip3( qscale2qp(qscale) + 0.5, 0, 51 ); +} /* Texture bitrate is not quite inversely proportional to qscale, * probably due the the changing number of SKIP blocks. @@ -199,9 +206,19 @@ x264_log(h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n"); return -1; } + if( h->param.rc.i_rc_method == X264_RC_RD && !h->param.rc.b_stat_read ) + { + x264_log(h, X264_LOG_ERROR, "rcrd requires 2pass.\n"); + return -1; + } + if( h->param.rc.i_rc_method == X264_RC_RD && h->param.rc.f_rcrd_lambda <= 0 ) + { + x264_log(h, X264_LOG_ERROR, "rcrd-lambda must be > 0.\n"); + return -1; + } if( h->param.rc.i_vbv_buffer_size ) { - if( h->param.rc.i_rc_method == X264_RC_CQP ) + if( h->param.rc.i_rc_method == X264_RC_CQP || h->param.rc.i_rc_method == X264_RC_RD ) x264_log(h, X264_LOG_WARNING, "VBV is incompatible with constant QP, ignored.\n"); else if( h->param.rc.i_vbv_max_bitrate == 0 ) { @@ -265,11 +282,16 @@ / qp2qscale( h->param.rc.f_rf_constant ); } - rc->ip_offset = 6.0 * log(h->param.rc.f_ip_factor) / log(2.0); - rc->pb_offset = 6.0 * log(h->param.rc.f_pb_factor) / log(2.0); + rc->ip_offset = -6.0 * log(h->param.rc.f_ip_factor) / log(2.0); + rc->pb_offset = 6.0 * log(h->param.rc.f_pb_factor) / log(2.0); rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant; - rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant - rc->ip_offset + 0.5, 0, 51 ); + rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant + rc->ip_offset + 0.5, 0, 51 ); rc->qp_constant[SLICE_TYPE_B] = x264_clip3( h->param.rc.i_qp_constant + rc->pb_offset + 0.5, 0, 51 ); + rc->qp_type_offset[X264_TYPE_IDR] = rc->ip_offset; + rc->qp_type_offset[X264_TYPE_I] = rc->ip_offset; + rc->qp_type_offset[X264_TYPE_B] = rc->pb_offset; + rc->qp_type_offset[X264_TYPE_BREF] = rc->pb_offset / 2; + rc->qp_type_offset[X264_TYPE_P] = 0; rc->lstep = pow( 2, h->param.rc.i_qp_step / 6.0 ); rc->last_qscale = qp2qscale(26); @@ -286,6 +308,9 @@ rc->row_preds[i].coeff= .25; rc->row_preds[i].count= 1.0; rc->row_preds[i].decay= 0.5; + rc->rcrd_qp_offset[i].coeff= 0.0; + rc->rcrd_qp_offset[i].count= 1.0; + rc->rcrd_qp_offset[i].decay= 0.75; } *rc->pred_b_from_p = rc->pred[0]; @@ -632,7 +657,7 @@ rc->accum_p_norm *= .95; rc->accum_p_norm += 1; if( h->sh.i_type == SLICE_TYPE_I ) - rc->accum_p_qp += qp + rc->ip_offset; + rc->accum_p_qp += qp - rc->ip_offset; else rc->accum_p_qp += qp; } @@ -687,6 +712,10 @@ { q = i_force_qp - 1; } + else if( h->param.rc.i_rc_method == X264_RC_RD ) + { + q = rcrd_get_qp( h ); + } else if( rc->b_abr ) { q = qscale2qp( rate_estimate_qscale( h ) ); @@ -842,8 +871,8 @@ h->param.rc.i_qp_constant = (h->stat.i_slice_count[SLICE_TYPE_P] == 0) ? 24 : 1 + h->stat.i_slice_qp[SLICE_TYPE_P] / h->stat.i_slice_count[SLICE_TYPE_P]; rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, 51 ); - rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 ); - rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 ); + rc->qp_constant[SLICE_TYPE_I] = qscale2iqp( qp2qscale( h->param.rc.i_qp_constant ) / h->param.rc.f_ip_factor ); + rc->qp_constant[SLICE_TYPE_B] = qscale2iqp( qp2qscale( h->param.rc.i_qp_constant ) * h->param.rc.f_pb_factor ); x264_log(h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries); x264_log(h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant); @@ -1291,7 +1320,7 @@ q1 -= rcc->pb_offset/2; if(i0 && i1) - q = (q0 + q1) / 2 + rcc->ip_offset; + q = (q0 + q1) / 2 - rcc->ip_offset; else if(i0) q = q1; else if(i1) @@ -1632,4 +1661,190 @@ return 0; } +static void rcrd_set_options( x264_t *t ) +{ + x264_param_t *p = &t->param; + p->rc.i_rc_method = X264_RC_CQP; + p->rc.b_stat_write = 0; + p->b_write_bitstream = 0; + p->i_log_level = X264_MIN( X264_LOG_ERROR, p->i_log_level ); + t->mb.b_direct_auto_write = 0; + + p->analyse.inter = 0; + p->analyse.i_me_method = X264_ME_DIA; + p->analyse.i_subpel_refine = X264_MIN( 2, p->analyse.i_subpel_refine ); + p->analyse.i_trellis = 0; + p->analyse.b_bidir_me = 0; + p->analyse.b_fast_pskip = 1; + p->analyse.b_mixed_references = 0; + p->analyse.b_transform_8x8 = 0; + p->i_frame_reference = X264_MIN( 2, p->i_frame_reference ); +} + +static void rcrd_sync_context( x264_t *h, x264_t *t ) +{ + x264_frame_t **f; + + *t = *h; + t->thread[0] = t; + memset( t->stat.i_slice_size, 0, sizeof(t->stat.i_slice_size) ); + memset( t->stat.i_sqe_global, 0, sizeof(t->stat.i_sqe_global) ); + rcrd_set_options( t ); + + x264_frame_unshift( t->frames.current, t->fenc ); + t->fenc = NULL; + t->fdec->b_kept_as_ref = 0; // it has already been put in the DPB + + for( f = t->frames.reference; *f; f++ ) (*f)->i_reference_count++; + for( f = t->frames.current; *f; f++ ) (*f)->i_reference_count++; + for( f = t->frames.next; *f; f++ ) (*f)->i_reference_count++; + if( t->fdec ) t->fdec->i_reference_count++; +} + +static void rcrd_desync_context( x264_t *h, x264_t *t ) +{ + x264_frame_t **f; + for( f = t->frames.reference; *f; f++ ) x264_frame_push_unused( t, *f ); + for( f = t->frames.current; *f; f++ ) x264_frame_push_unused( t, *f ); + for( f = t->frames.next; *f; f++ ) x264_frame_push_unused( t, *f ); + x264_frame_push_unused( t, t->fdec ); + memcpy( h->frames.unused, t->frames.unused, sizeof(h->frames.unused) ); + /* all frames and frame arrays are back to the state they were before + * rcrd_sync_context, unless new frames had to be allocated during the + * test encodes, in which case those are now in h->frames.unused */ +} + +static int64_t rcrd_try_encode( x264_t *h, int qp, float lambda ) +{ + /* TODO + * adapt range based on number of B-frames or % intra blocks? + * try a larger search range for I-frames? + * try stopping at all I-frames, not just IDR? + * use rcrd_qp_offset[] for the dependent frames too? + * use different lambda for B-frames? + * decide which dependent frames to code? e.g. this + next P + 2 nearest Bs, instead of just the next N frames in encode order. + * reuse motion vectors between candidate encodes? loses too much quality. just init the motion search? + * map lambda onto some intuitive scale, like crf. + * plug lambda into the normal ratecontrol algo, to allow target bitrate. + */ + // FIXME C99 + x264_t t_buf, *t=&t_buf; + int64_t rd; + int i; + int range = (h->fenc->i_type == X264_TYPE_B) ? 1 : h->param.rc.i_rcrd_window; + +// fprintf(stderr, "rcrd_try_encode(%d+%d,%d) ... ", h->fenc->i_frame, range, qp); + + if( qp < 0 || qp > 51 ) + return INT64_MAX; + + rcrd_sync_context( h, t ); + h->fenc->i_qpplus1 = qp + 1; + int i_cutoff = h->mb.i_mb_count * 7 / 8; + for( i = 0; i < range; i++ ) + { + x264_encoder_encode( t, NULL, NULL, NULL, NULL ); + if(i == 0 && t->stat.frame.i_mb_count_i > i_cutoff) + range = X264_MIN( range*8, X264_RCRD_MAX ); + x264_frame_t *next_frame = t->frames.current[0] ? t->frames.current[0] : t->frames.next[0]; + if( next_frame && h->rc->entry[next_frame->i_frame].pict_type == SLICE_TYPE_I ) + break; + if( i > 0 && t->stat.frame.i_mb_count_i > i_cutoff ) + break; + } + + rd = 0; + for( i = 0; i < 5; i++ ) + rd += t->stat.i_sqe_global[i] + t->stat.i_slice_size[i] * lambda; + +// fprintf(stderr, "%"PRIu64"\n", rd); + rcrd_desync_context( h, t ); + return rd; +} + +static int rcrd_get_qp( x264_t *h ) +{ + x264_ratecontrol_t *rcc = h->rc; + x264_frame_t **fp; + x264_zone_t *z; + int base_qp, best_qp, pred_qp; + int64_t best_rd; + int i, dir; + float lambda = h->param.rc.f_rcrd_lambda; + + z = get_zone( h, h->fenc->i_frame ); + if( z ) + { + if( z->b_force_qp ) + return x264_clip3( z->i_qp + rcc->qp_type_offset[h->fenc->i_type] + .5, 0, 51 ); + else + lambda *= pow( z->f_bitrate_factor, -1.5 ); + } + + /* assume the following frames use the same qp as they did in the previous pass */ + for( fp = h->frames.current; *fp; fp++ ) + (*fp)->i_qpplus1 = qscale2iqp( rcc->entry[(*fp)->i_frame].qscale ) + 1; + for( fp = h->frames.next; *fp; fp++ ) + (*fp)->i_qpplus1 = qscale2iqp( rcc->entry[(*fp)->i_frame].qscale ) + 1; + + /* predict the qp of the current frame based on how much we have + * changed the qps of other frames of the same type. + * but limit the prediction, because otherwise there's feedback. */ + base_qp = qscale2iqp( rcc->rce->qscale ); + pred_qp = base_qp + x264_clip3( predict_size( &rcc->rcrd_qp_offset[h->fenc->i_type-1], 1, 1 ) + .5, -1, 1 ); + best_qp = pred_qp; + best_rd = rcrd_try_encode( h, pred_qp, lambda ); + + dir = 1; + int score = 0; + int64_t diff = 0; + for( i = 1; i < 20; i++ ) + { + int qp = pred_qp + i*dir; + int64_t rd = rcrd_try_encode( h, qp, lambda ); + if( best_rd > rd ) + { + best_rd = rd; + best_qp = qp; + } + else if(rd - best_rd >= diff) + score++; + diff = rd - best_rd; + if(score == 2) break; + } + dir = -1; + score = 0; + diff = 0; + for( i = 1; i < 20; i++ ) + { + int qp = pred_qp + i*dir; + int64_t rd = rcrd_try_encode( h, qp, lambda ); + if( best_rd > rd ) + { + best_rd = rd; + best_qp = qp; + } + else if(rd - best_rd >= diff) + score++; + diff = rd - best_rd; + if(score == 2) break; + } + + update_predictor( &rcc->rcrd_qp_offset[h->fenc->i_type-1], 1, 1, best_qp - base_qp ); + + for( fp = h->frames.current; *fp; fp++ ) + (*fp)->i_qpplus1 = 0; + for( fp = h->frames.next; *fp; fp++ ) + (*fp)->i_qpplus1 = 0; + h->fenc->i_qpplus1 = 0; + + for( fp = h->frames.reference; *fp; fp++ ) + { + if( (*fp)->i_frame_num > h->fdec->i_frame_num ) + (*fp)->i_poc = -1; + } + + return best_qp; +} + Index: x264.c =================================================================== --- x264.c (revision 718) +++ x264.c (working copy) @@ -177,6 +177,8 @@ H0( " -q, --qp Set QP (0=lossless) [%d]\n", defaults->rc.i_qp_constant ); H0( " -B, --bitrate Set bitrate (kbit/s)\n" ); H0( " --crf Quality-based VBR (nominal QP)\n" ); + H1( " --rcrd-lambda Enable RD ratecontrol, and select quality\n" ); + H1( " --rcrd-window RD lookahead range [%d]\n", defaults->rc.i_rcrd_window ); H1( " --vbv-maxrate Max local bitrate (kbit/s) [%d]\n", defaults->rc.i_vbv_max_bitrate ); H0( " --vbv-bufsize Enable CBR and set size of the VBV buffer (kbit) [%d]\n", defaults->rc.i_vbv_buffer_size ); H1( " --vbv-init Initial VBV buffer occupancy [%.1f]\n", defaults->rc.f_vbv_buffer_init ); @@ -381,6 +383,8 @@ { "qpmax", required_argument, NULL, 0 }, { "qpstep", required_argument, NULL, 0 }, { "crf", required_argument, NULL, 0 }, + { "rcrd-lambda", required_argument, NULL, 0 }, + { "rcrd-window", required_argument, NULL, 0 }, { "ref", required_argument, NULL, 'r' }, { "no-asm", no_argument, NULL, 0 }, { "sar", required_argument, NULL, 0 }, Index: common/common.c =================================================================== --- common/common.c (revision 718) +++ common/common.c (working copy) @@ -92,6 +92,8 @@ param->rc.i_qp_step = 4; param->rc.f_ip_factor = 1.4; param->rc.f_pb_factor = 1.3; + param->rc.f_rcrd_lambda = 0; + param->rc.i_rcrd_window = 4; param->rc.b_stat_write = 0; param->rc.psz_stat_out = "x264_2pass.log"; @@ -136,6 +138,7 @@ memset( param->cqm_8iy, 16, 64 ); memset( param->cqm_8py, 16, 64 ); + param->b_write_bitstream = 1; param->b_repeat_headers = 1; param->b_aud = 0; } @@ -476,6 +479,13 @@ p->rc.f_rf_constant = atof(value); p->rc.i_rc_method = X264_RC_CRF; } + OPT("rcrd-lambda") + { + p->rc.f_rcrd_lambda = atof(value); + p->rc.i_rc_method = X264_RC_RD; + } + OPT("rcrd-window") + p->rc.i_rcrd_window = atoi(value); OPT2("qpmin", "qp-min") p->rc.i_qp_min = atoi(value); OPT2("qpmax", "qp-max") @@ -908,6 +918,7 @@ s += sprintf( s, " rc=%s", p->rc.i_rc_method == X264_RC_ABR ? ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_buffer_size ? "cbr" : "abr" ) + : p->rc.i_rc_method == X264_RC_RD ? "rd" : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp" ); if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF ) { @@ -928,6 +939,9 @@ } else if( p->rc.i_rc_method == X264_RC_CQP ) s += sprintf( s, " qp=%d", p->rc.i_qp_constant ); + else if( p->rc.i_rc_method == X264_RC_RD ) + s += sprintf( s, " rc_lambda=%.2f rc_window=%d", + p->rc.f_rcrd_lambda, p->rc.i_rcrd_window ); if( !(p->rc.i_rc_method == X264_RC_CQP && p->rc.i_qp_constant == 0) ) { s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor ); Index: common/cabac.c =================================================================== --- common/cabac.c (revision 718) +++ common/cabac.c (working copy) @@ -845,6 +845,7 @@ cb->i_range = 0x01FE; cb->i_queue = -1; // the first bit will be shifted away and not written cb->i_bytes_outstanding = 0; + cb->f8_bits_encoded = 0; cb->p_start = p_data; cb->p = p_data; cb->p_end = p_end; Index: common/frame.c =================================================================== --- common/frame.c (revision 718) +++ common/frame.c (working copy) @@ -94,7 +94,7 @@ } } - if( h->param.analyse.i_me_method == X264_ME_ESA ) + if( h->frames.b_have_integral ) { CHECKED_MALLOC( frame->buffer[7], 2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) ); Index: common/common.h =================================================================== --- common/common.h (revision 718) +++ common/common.h (working copy) @@ -54,6 +54,7 @@ #define X264_THREAD_MAX 128 #define X264_SLICE_MAX 4 #define X264_NAL_MAX (4 + X264_SLICE_MAX) +#define X264_RCRD_MAX 256 // number of pixels (per thread) in progress at any given time. // 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety @@ -235,7 +236,7 @@ /* encoder parameters */ x264_param_t param; - x264_t *thread[X264_THREAD_MAX]; + x264_t *thread[X264_THREAD_MAX+1]; x264_pthread_t thread_handle; int b_thread_active; int i_thread_phase; /* which thread to use for the next frame */ @@ -298,9 +299,9 @@ /* Frames to be encoded (whose types have been decided) */ x264_frame_t *current[X264_BFRAME_MAX+3]; /* Temporary buffer (frames types not yet decided) */ - x264_frame_t *next[X264_BFRAME_MAX+3]; + x264_frame_t *next[X264_BFRAME_MAX+X264_RCRD_MAX+3]; /* Unused frames */ - x264_frame_t *unused[X264_BFRAME_MAX + X264_THREAD_MAX*2 + 16+4]; + x264_frame_t *unused[X264_BFRAME_MAX + X264_THREAD_MAX*2 + X264_RCRD_MAX + 16+4]; /* For adaptive B decision */ x264_frame_t *last_nonb; @@ -315,7 +316,8 @@ int i_max_ref0; int i_max_ref1; int i_delay; /* Number of frames buffered for B reordering */ - int b_have_lowres; /* Whether 1/2 resolution luma planes are being used */ + int b_have_lowres; /* Whether 1/2 resolution luma planes are allocated */ + int b_have_integral; /* Whether the cached block sums are allocated */ } frames; /* current frame being encoded */ Index: x264.h =================================================================== --- x264.h (revision 718) +++ x264.h (working copy) @@ -81,6 +81,7 @@ #define X264_RC_CQP 0 #define X264_RC_CRF 1 #define X264_RC_ABR 2 +#define X264_RC_RD 3 static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 }; static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", 0 }; @@ -258,6 +259,9 @@ float f_ip_factor; float f_pb_factor; + float f_rcrd_lambda; + int i_rcrd_window; + /* 2pass */ int b_stat_write; /* Enable stat writing in psz_stat_out */ char *psz_stat_out; @@ -275,6 +279,8 @@ } rc; /* Muxing parameters */ + int b_write_bitstream; /* if not set, then x264 will only analyse, not generate an output file. + * doesn't yet work for normal 1st pass; internal use only. */ int b_aud; /* generate access unit delimiters */ int b_repeat_headers; /* put SPS/PPS before each keyframe */ int i_sps_id; /* SPS and PPS id number */