Index: encoder/encoder.c
===================================================================
--- encoder/encoder.c	(revision 718)
+++ encoder/encoder.c	(working copy)
@@ -375,7 +375,7 @@
         }
     }
 
-    if( h->param.rc.i_rc_method < 0 || h->param.rc.i_rc_method > 2 )
+    if( h->param.rc.i_rc_method < 0 || h->param.rc.i_rc_method > 3 )
     {
         x264_log( h, X264_LOG_ERROR, "no ratecontrol method specified\n" );
         return -1;
@@ -384,6 +384,10 @@
     h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
     if( h->param.rc.i_rc_method == X264_RC_CRF )
         h->param.rc.i_qp_constant = h->param.rc.f_rf_constant;
+    if( h->param.rc.i_rc_method == X264_RC_RD )
+        h->param.rc.i_rcrd_window = x264_clip3( h->param.rc.i_rcrd_window, 1, X264_RCRD_MAX );
+    else
+        h->param.rc.i_rcrd_window = 0;
     if( (h->param.rc.i_rc_method == X264_RC_CQP || h->param.rc.i_rc_method == X264_RC_CRF)
         && h->param.rc.i_qp_constant == 0 )
     {
@@ -636,7 +640,7 @@
     h->mb.i_mb_count = h->sps->i_mb_width * h->sps->i_mb_height;
 
     /* Init frames. */
-    h->frames.i_delay = h->param.i_bframe + h->param.i_threads - 1;
+    h->frames.i_delay = h->param.i_bframe + h->param.i_threads + h->param.rc.i_rcrd_window - 1;
     h->frames.i_max_ref0 = h->param.i_frame_reference;
     h->frames.i_max_ref1 = h->sps->vui.i_num_reorder_frames;
     h->frames.i_max_dpb  = h->sps->vui.i_max_dec_frame_buffering;
@@ -645,6 +649,7 @@
           || h->param.rc.i_rc_method == X264_RC_CRF
           || h->param.b_bframe_adaptive
           || h->param.b_pre_scenecut );
+    h->frames.b_have_integral = h->param.analyse.i_me_method == X264_ME_ESA;
 
     h->frames.i_last_idr = - h->param.i_keyint_max;
     h->frames.i_input    = 0;
@@ -956,11 +961,12 @@
     }
 
     /* move lowres copy of the image to the ref frame */
-    for( i = 0; i < 4; i++)
-    {
-        XCHG( uint8_t*, h->fdec->lowres[i], h->fenc->lowres[i] );
-        XCHG( uint8_t*, h->fdec->buffer_lowres[i], h->fenc->buffer_lowres[i] );
-    }
+    if( h->frames.b_have_lowres )
+        for( i = 0; i < 4; i++)
+        {
+            XCHG( uint8_t*, h->fdec->lowres[i], h->fenc->lowres[i] );
+            XCHG( uint8_t*, h->fdec->buffer_lowres[i], h->fenc->buffer_lowres[i] );
+        }
 
     /* adaptive B decision needs a pointer, since it can't use the ref lists */
     if( h->sh.i_type != SLICE_TYPE_B )
@@ -1050,7 +1056,7 @@
     {
         const int i_mb_y = mb_xy / h->sps->i_mb_width;
         const int i_mb_x = mb_xy % h->sps->i_mb_width;
-        int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
+        int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac) + (h->cabac.f8_bits_encoded >> 8);
 
         if( i_mb_x == 0 )
             x264_fdec_filter_row( h, i_mb_y );
@@ -1083,7 +1089,10 @@
             {
                 if( h->sh.i_type != SLICE_TYPE_I )
                     x264_cabac_mb_skip( h, 0 );
-                x264_macroblock_write_cabac( h, &h->cabac );
+                if( h->param.b_write_bitstream )
+                    x264_macroblock_write_cabac( h, &h->cabac );
+                else
+                    x264_macroblock_size_cabac( h, &h->cabac );
             }
         }
         else
@@ -1136,7 +1145,7 @@
         }
 
         if( h->mb.b_variable_qp )
-            x264_ratecontrol_mb(h, bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac) - mb_spos);
+            x264_ratecontrol_mb(h, bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac) + (h->cabac.f8_bits_encoded >> 8) - mb_spos);
 
         if( h->sh.b_mbaff )
         {
@@ -1180,6 +1189,9 @@
                               - h->stat.frame.i_itex_bits
                               - h->stat.frame.i_ptex_bits
                               - h->stat.frame.i_hdr_bits;
+
+    if( !h->param.b_write_bitstream )
+        h->stat.frame.i_ptex_bits += h->cabac.f8_bits_encoded >> 8;
 }
 
 static void x264_thread_sync_context( x264_t *dst, x264_t *src )
@@ -1211,15 +1223,12 @@
 
 static int x264_slices_write( x264_t *h )
 {
-    int i_frame_size;
-
 #if VISUALIZE
     if( h->param.b_visualize )
         x264_visualize_init( h );
 #endif
 
     x264_stack_align( x264_slice_write, h );
-    i_frame_size = h->out.nal[h->out.i_nal-1].i_payload;
     x264_fdec_filter_row( h, h->sps->i_mb_height );
 
 #if VISUALIZE
@@ -1230,7 +1239,11 @@
     }
 #endif
 
-    h->out.i_frame_size = i_frame_size;
+    h->out.i_frame_size =
+        ( h->stat.frame.i_hdr_bits
+        + h->stat.frame.i_itex_bits
+        + h->stat.frame.i_ptex_bits
+        + h->stat.frame.i_misc_bits + 7 ) >> 3;
     return 0;
 }
 
@@ -1253,6 +1266,7 @@
                              x264_picture_t *pic_out )
 {
     x264_t *thread_current, *thread_prev, *thread_oldest;
+    x264_picture_t dummy_pic_out;
     int     i_nal_type;
     int     i_nal_ref_idc;
 
@@ -1282,9 +1296,15 @@
     h->fdec->i_lines_completed = -1;
 
     /* no data out */
-    *pi_nal = 0;
-    *pp_nal = NULL;
+    if( pi_nal && pp_nal )
+    {
+        *pi_nal = 0;
+        *pp_nal = NULL;
+    }
 
+    if( !pic_out )
+        pic_out = &dummy_pic_out;
+
     /* ------------------- Setup new frame from picture -------------------- */
     TIMER_START( i_mtime_encode_frame );
     if( pic_in != NULL )
@@ -1610,8 +1630,11 @@
     x264_frame_push_unused( thread_current, h->fenc );
 
     /* End bitstream, set output  */
-    *pi_nal = h->out.i_nal;
-    *pp_nal = h->out.nal;
+    if( pi_nal && pp_nal )
+    {
+        *pi_nal = h->out.i_nal;
+        *pp_nal = h->out.nal;
+    }
     h->out.i_nal = 0;
 
     /* Set output picture properties */
Index: encoder/macroblock.h
===================================================================
--- encoder/macroblock.h	(revision 718)
+++ encoder/macroblock.h	(working copy)
@@ -38,6 +38,8 @@
 void x264_macroblock_encode      ( x264_t *h );
 void x264_macroblock_write_cabac ( x264_t *h, x264_cabac_t *cb );
 void x264_macroblock_write_cavlc ( x264_t *h, bs_t *s );
+void x264_macroblock_size_cabac  ( x264_t *h, x264_cabac_t *cb );
+void x264_macroblock_size_cavlc  ( x264_t *h, bs_t *s );
 
 void x264_macroblock_encode_p8x8( x264_t *h, int i8 );
 void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale );
Index: encoder/ratecontrol.c
===================================================================
--- encoder/ratecontrol.c	(revision 718)
+++ encoder/ratecontrol.c	(working copy)
@@ -98,6 +98,8 @@
     double rate_factor_constant;
     double ip_offset;
     double pb_offset;
+    double qp_type_offset[6];
+    predictor_t rcrd_qp_offset[5];
 
     /* 2pass stuff */
     FILE *p_stat_file_out;
@@ -134,8 +136,9 @@
 
 
 static int parse_zones( x264_t *h );
-static int init_pass2(x264_t *);
+static int init_pass2( x264_t *h );
 static float rate_estimate_qscale( x264_t *h );
+static int rcrd_get_qp( x264_t *h );
 static void update_vbv( x264_t *h, int bits );
 static void update_vbv_plan( x264_t *h );
 static double predict_size( predictor_t *p, double q, double var );
@@ -154,6 +157,10 @@
 {
     return 12.0 + 6.0 * log(qscale/0.85) / log(2.0);
 }
+static inline int qscale2iqp(double qscale)
+{
+    return x264_clip3( qscale2qp(qscale) + 0.5, 0, 51 );
+}
 
 /* Texture bitrate is not quite inversely proportional to qscale,
  * probably due the the changing number of SKIP blocks.
@@ -199,9 +206,19 @@
         x264_log(h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n");
         return -1;
     }
+    if( h->param.rc.i_rc_method == X264_RC_RD && !h->param.rc.b_stat_read )
+    {
+        x264_log(h, X264_LOG_ERROR, "rcrd requires 2pass.\n");
+        return -1;
+    }
+    if( h->param.rc.i_rc_method == X264_RC_RD && h->param.rc.f_rcrd_lambda <= 0 )
+    {
+        x264_log(h, X264_LOG_ERROR, "rcrd-lambda must be > 0.\n");
+        return -1;
+    }
     if( h->param.rc.i_vbv_buffer_size )
     {
-        if( h->param.rc.i_rc_method == X264_RC_CQP )
+        if( h->param.rc.i_rc_method == X264_RC_CQP || h->param.rc.i_rc_method == X264_RC_RD )
             x264_log(h, X264_LOG_WARNING, "VBV is incompatible with constant QP, ignored.\n");
         else if( h->param.rc.i_vbv_max_bitrate == 0 )
         {
@@ -265,11 +282,16 @@
                                  / qp2qscale( h->param.rc.f_rf_constant );
     }
 
-    rc->ip_offset = 6.0 * log(h->param.rc.f_ip_factor) / log(2.0);
-    rc->pb_offset = 6.0 * log(h->param.rc.f_pb_factor) / log(2.0);
+    rc->ip_offset = -6.0 * log(h->param.rc.f_ip_factor) / log(2.0);
+    rc->pb_offset =  6.0 * log(h->param.rc.f_pb_factor) / log(2.0);
     rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
-    rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant - rc->ip_offset + 0.5, 0, 51 );
+    rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant + rc->ip_offset + 0.5, 0, 51 );
     rc->qp_constant[SLICE_TYPE_B] = x264_clip3( h->param.rc.i_qp_constant + rc->pb_offset + 0.5, 0, 51 );
+    rc->qp_type_offset[X264_TYPE_IDR] = rc->ip_offset;
+    rc->qp_type_offset[X264_TYPE_I] = rc->ip_offset;
+    rc->qp_type_offset[X264_TYPE_B] = rc->pb_offset;
+    rc->qp_type_offset[X264_TYPE_BREF] = rc->pb_offset / 2;
+    rc->qp_type_offset[X264_TYPE_P] = 0;
 
     rc->lstep = pow( 2, h->param.rc.i_qp_step / 6.0 );
     rc->last_qscale = qp2qscale(26);
@@ -286,6 +308,9 @@
         rc->row_preds[i].coeff= .25;
         rc->row_preds[i].count= 1.0;
         rc->row_preds[i].decay= 0.5;
+        rc->rcrd_qp_offset[i].coeff= 0.0;
+        rc->rcrd_qp_offset[i].count= 1.0;
+        rc->rcrd_qp_offset[i].decay= 0.75;
     }
     *rc->pred_b_from_p = rc->pred[0];
 
@@ -632,7 +657,7 @@
     rc->accum_p_norm *= .95;
     rc->accum_p_norm += 1;
     if( h->sh.i_type == SLICE_TYPE_I )
-        rc->accum_p_qp += qp + rc->ip_offset;
+        rc->accum_p_qp += qp - rc->ip_offset;
     else
         rc->accum_p_qp += qp;
 }
@@ -687,6 +712,10 @@
     {
         q = i_force_qp - 1;
     }
+    else if( h->param.rc.i_rc_method == X264_RC_RD )
+    {
+        q = rcrd_get_qp( h );
+    }
     else if( rc->b_abr )
     {
         q = qscale2qp( rate_estimate_qscale( h ) );
@@ -842,8 +871,8 @@
             h->param.rc.i_qp_constant = (h->stat.i_slice_count[SLICE_TYPE_P] == 0) ? 24
                                       : 1 + h->stat.i_slice_qp[SLICE_TYPE_P] / h->stat.i_slice_count[SLICE_TYPE_P];
             rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
-            rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
-            rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
+            rc->qp_constant[SLICE_TYPE_I] = qscale2iqp( qp2qscale( h->param.rc.i_qp_constant ) / h->param.rc.f_ip_factor );
+            rc->qp_constant[SLICE_TYPE_B] = qscale2iqp( qp2qscale( h->param.rc.i_qp_constant ) * h->param.rc.f_pb_factor );
 
             x264_log(h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries);
             x264_log(h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant);
@@ -1291,7 +1320,7 @@
             q1 -= rcc->pb_offset/2;
 
         if(i0 && i1)
-            q = (q0 + q1) / 2 + rcc->ip_offset;
+            q = (q0 + q1) / 2 - rcc->ip_offset;
         else if(i0)
             q = q1;
         else if(i1)
@@ -1632,4 +1661,190 @@
     return 0;
 }
 
+static void rcrd_set_options( x264_t *t )
+{
+    x264_param_t *p = &t->param;
 
+    p->rc.i_rc_method = X264_RC_CQP;
+    p->rc.b_stat_write = 0;
+    p->b_write_bitstream = 0;
+    p->i_log_level = X264_MIN( X264_LOG_ERROR, p->i_log_level );
+    t->mb.b_direct_auto_write = 0;
+
+    p->analyse.inter = 0;
+    p->analyse.i_me_method = X264_ME_DIA;
+    p->analyse.i_subpel_refine = X264_MIN( 2, p->analyse.i_subpel_refine );
+    p->analyse.i_trellis = 0;
+    p->analyse.b_bidir_me = 0;
+    p->analyse.b_fast_pskip = 1;
+    p->analyse.b_mixed_references = 0;
+    p->analyse.b_transform_8x8 = 0;
+    p->i_frame_reference = X264_MIN( 2, p->i_frame_reference );
+}
+
+static void rcrd_sync_context( x264_t *h, x264_t *t )
+{
+    x264_frame_t **f;
+
+    *t = *h;
+    t->thread[0] = t;
+    memset( t->stat.i_slice_size, 0, sizeof(t->stat.i_slice_size) );
+    memset( t->stat.i_sqe_global, 0, sizeof(t->stat.i_sqe_global) );
+    rcrd_set_options( t );
+
+    x264_frame_unshift( t->frames.current, t->fenc );
+    t->fenc = NULL;
+    t->fdec->b_kept_as_ref = 0; // it has already been put in the DPB
+
+    for( f = t->frames.reference; *f; f++ ) (*f)->i_reference_count++;
+    for( f = t->frames.current; *f; f++ ) (*f)->i_reference_count++;
+    for( f = t->frames.next; *f; f++ ) (*f)->i_reference_count++;
+    if( t->fdec ) t->fdec->i_reference_count++;
+}
+
+static void rcrd_desync_context( x264_t *h, x264_t *t )
+{
+    x264_frame_t **f;
+    for( f = t->frames.reference; *f; f++ ) x264_frame_push_unused( t, *f );
+    for( f = t->frames.current; *f; f++ ) x264_frame_push_unused( t, *f );
+    for( f = t->frames.next; *f; f++ ) x264_frame_push_unused( t, *f );
+    x264_frame_push_unused( t, t->fdec );
+    memcpy( h->frames.unused, t->frames.unused, sizeof(h->frames.unused) );
+    /* all frames and frame arrays are back to the state they were before
+     * rcrd_sync_context, unless new frames had to be allocated during the
+     * test encodes, in which case those are now in h->frames.unused */
+}
+
+static int64_t rcrd_try_encode( x264_t *h, int qp, float lambda )
+{
+    /* TODO
+     * adapt range based on number of B-frames or % intra blocks?
+     * try a larger search range for I-frames?
+     * try stopping at all I-frames, not just IDR?
+     * use rcrd_qp_offset[] for the dependent frames too?
+     * use different lambda for B-frames?
+     * decide which dependent frames to code? e.g. this + next P + 2 nearest Bs, instead of just the next N frames in encode order.
+     * reuse motion vectors between candidate encodes? loses too much quality. just init the motion search?
+     * map lambda onto some intuitive scale, like crf.
+     * plug lambda into the normal ratecontrol algo, to allow target bitrate.
+     */
+    // FIXME C99
+    x264_t t_buf, *t=&t_buf;
+    int64_t rd;
+    int i;
+    int range = (h->fenc->i_type == X264_TYPE_B) ? 1 : h->param.rc.i_rcrd_window;
+
+//  fprintf(stderr, "rcrd_try_encode(%d+%d,%d) ... ", h->fenc->i_frame, range, qp);
+
+    if( qp < 0 || qp > 51 )
+        return INT64_MAX;
+
+    rcrd_sync_context( h, t );
+    h->fenc->i_qpplus1 = qp + 1;
+    int i_cutoff = h->mb.i_mb_count * 7 / 8;
+    for( i = 0; i < range; i++ )
+    {
+        x264_encoder_encode( t, NULL, NULL, NULL, NULL );
+        if(i == 0 && t->stat.frame.i_mb_count_i > i_cutoff)
+            range = X264_MIN( range*8, X264_RCRD_MAX );
+        x264_frame_t *next_frame = t->frames.current[0] ? t->frames.current[0] : t->frames.next[0];
+        if( next_frame && h->rc->entry[next_frame->i_frame].pict_type == SLICE_TYPE_I )
+            break;
+        if( i > 0 && t->stat.frame.i_mb_count_i > i_cutoff )
+            break;
+    }
+
+    rd = 0;
+    for( i = 0; i < 5; i++ )
+        rd += t->stat.i_sqe_global[i] + t->stat.i_slice_size[i] * lambda;
+
+//  fprintf(stderr, "%"PRIu64"\n", rd);
+    rcrd_desync_context( h, t );
+    return rd;
+}
+
+static int rcrd_get_qp( x264_t *h )
+{
+    x264_ratecontrol_t *rcc = h->rc;
+    x264_frame_t **fp;
+    x264_zone_t *z;
+    int base_qp, best_qp, pred_qp;
+    int64_t best_rd;
+    int i, dir;
+    float lambda = h->param.rc.f_rcrd_lambda;
+
+    z = get_zone( h, h->fenc->i_frame );
+    if( z )
+    {
+        if( z->b_force_qp )
+            return x264_clip3( z->i_qp + rcc->qp_type_offset[h->fenc->i_type] + .5, 0, 51 );
+        else
+            lambda *= pow( z->f_bitrate_factor, -1.5 );
+    }
+
+    /* assume the following frames use the same qp as they did in the previous pass */
+    for( fp = h->frames.current; *fp; fp++ )
+        (*fp)->i_qpplus1 = qscale2iqp( rcc->entry[(*fp)->i_frame].qscale ) + 1;
+    for( fp = h->frames.next; *fp; fp++ )
+        (*fp)->i_qpplus1 = qscale2iqp( rcc->entry[(*fp)->i_frame].qscale ) + 1;
+
+    /* predict the qp of the current frame based on how much we have
+     * changed the qps of other frames of the same type.
+     * but limit the prediction, because otherwise there's feedback. */
+    base_qp = qscale2iqp( rcc->rce->qscale );
+    pred_qp = base_qp + x264_clip3( predict_size( &rcc->rcrd_qp_offset[h->fenc->i_type-1], 1, 1 ) + .5, -1, 1 );
+    best_qp = pred_qp;
+    best_rd = rcrd_try_encode( h, pred_qp, lambda );
+
+    dir = 1;
+    int score = 0;
+    int64_t diff = 0;
+    for( i = 1; i < 20; i++ )
+    {
+        int qp = pred_qp + i*dir;
+        int64_t rd = rcrd_try_encode( h, qp, lambda );
+        if( best_rd > rd )
+        {
+            best_rd = rd;
+            best_qp = qp;
+        }
+        else if(rd - best_rd >= diff)
+            score++;
+        diff = rd - best_rd;
+        if(score == 2) break;
+    }
+    dir = -1;
+    score = 0;
+    diff = 0;
+    for( i = 1; i < 20; i++ )
+    {
+        int qp = pred_qp + i*dir;
+        int64_t rd = rcrd_try_encode( h, qp, lambda );
+        if( best_rd > rd )
+        {
+            best_rd = rd;
+            best_qp = qp;
+        }
+        else if(rd - best_rd >= diff)
+            score++;
+        diff = rd - best_rd;
+        if(score == 2) break;
+    }
+
+    update_predictor( &rcc->rcrd_qp_offset[h->fenc->i_type-1], 1, 1, best_qp - base_qp );
+
+    for( fp = h->frames.current; *fp; fp++ )
+        (*fp)->i_qpplus1 = 0;
+    for( fp = h->frames.next; *fp; fp++ )
+        (*fp)->i_qpplus1 = 0;
+    h->fenc->i_qpplus1 = 0;
+
+    for( fp = h->frames.reference; *fp; fp++ )
+    {
+        if( (*fp)->i_frame_num > h->fdec->i_frame_num )
+            (*fp)->i_poc = -1;
+    }
+
+    return best_qp;
+}
+
Index: x264.c
===================================================================
--- x264.c	(revision 718)
+++ x264.c	(working copy)
@@ -177,6 +177,8 @@
     H0( "  -q, --qp <integer>          Set QP (0=lossless) [%d]\n", defaults->rc.i_qp_constant );
     H0( "  -B, --bitrate <integer>     Set bitrate (kbit/s)\n" );
     H0( "      --crf <float>           Quality-based VBR (nominal QP)\n" );
+    H1( "      --rcrd-lambda <float>   Enable RD ratecontrol, and select quality\n" );
+    H1( "      --rcrd-window <int>     RD lookahead range [%d]\n", defaults->rc.i_rcrd_window );
     H1( "      --vbv-maxrate <integer> Max local bitrate (kbit/s) [%d]\n", defaults->rc.i_vbv_max_bitrate );
     H0( "      --vbv-bufsize <integer> Enable CBR and set size of the VBV buffer (kbit) [%d]\n", defaults->rc.i_vbv_buffer_size );
     H1( "      --vbv-init <float>      Initial VBV buffer occupancy [%.1f]\n", defaults->rc.f_vbv_buffer_init );
@@ -381,6 +383,8 @@
             { "qpmax",   required_argument, NULL, 0 },
             { "qpstep",  required_argument, NULL, 0 },
             { "crf",     required_argument, NULL, 0 },
+            { "rcrd-lambda", required_argument, NULL, 0 },
+            { "rcrd-window", required_argument, NULL, 0 },
             { "ref",     required_argument, NULL, 'r' },
             { "no-asm",  no_argument,       NULL, 0 },
             { "sar",     required_argument, NULL, 0 },
Index: common/common.c
===================================================================
--- common/common.c	(revision 718)
+++ common/common.c	(working copy)
@@ -92,6 +92,8 @@
     param->rc.i_qp_step = 4;
     param->rc.f_ip_factor = 1.4;
     param->rc.f_pb_factor = 1.3;
+    param->rc.f_rcrd_lambda = 0;
+    param->rc.i_rcrd_window = 4;
 
     param->rc.b_stat_write = 0;
     param->rc.psz_stat_out = "x264_2pass.log";
@@ -136,6 +138,7 @@
     memset( param->cqm_8iy, 16, 64 );
     memset( param->cqm_8py, 16, 64 );
 
+    param->b_write_bitstream = 1;
     param->b_repeat_headers = 1;
     param->b_aud = 0;
 }
@@ -476,6 +479,13 @@
         p->rc.f_rf_constant = atof(value);
         p->rc.i_rc_method = X264_RC_CRF;
     }
+    OPT("rcrd-lambda")
+    {
+        p->rc.f_rcrd_lambda = atof(value);
+        p->rc.i_rc_method = X264_RC_RD;
+    }
+    OPT("rcrd-window")
+        p->rc.i_rcrd_window = atoi(value);
     OPT2("qpmin", "qp-min")
         p->rc.i_qp_min = atoi(value);
     OPT2("qpmax", "qp-max")
@@ -908,6 +918,7 @@
 
     s += sprintf( s, " rc=%s", p->rc.i_rc_method == X264_RC_ABR ?
                                ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_buffer_size ? "cbr" : "abr" )
+                               : p->rc.i_rc_method == X264_RC_RD ? "rd"
                                : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp" );
     if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF )
     {
@@ -928,6 +939,9 @@
     }
     else if( p->rc.i_rc_method == X264_RC_CQP )
         s += sprintf( s, " qp=%d", p->rc.i_qp_constant );
+    else if( p->rc.i_rc_method == X264_RC_RD )
+        s += sprintf( s, " rc_lambda=%.2f rc_window=%d",
+                      p->rc.f_rcrd_lambda, p->rc.i_rcrd_window );
     if( !(p->rc.i_rc_method == X264_RC_CQP && p->rc.i_qp_constant == 0) )
     {
         s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor );
Index: common/cabac.c
===================================================================
--- common/cabac.c	(revision 718)
+++ common/cabac.c	(working copy)
@@ -845,6 +845,7 @@
     cb->i_range = 0x01FE;
     cb->i_queue = -1; // the first bit will be shifted away and not written
     cb->i_bytes_outstanding = 0;
+    cb->f8_bits_encoded = 0;
     cb->p_start = p_data;
     cb->p       = p_data;
     cb->p_end   = p_end;
Index: common/frame.c
===================================================================
--- common/frame.c	(revision 718)
+++ common/frame.c	(working copy)
@@ -94,7 +94,7 @@
         }
     }
 
-    if( h->param.analyse.i_me_method == X264_ME_ESA )
+    if( h->frames.b_have_integral )
     {
         CHECKED_MALLOC( frame->buffer[7],
                         2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
Index: common/common.h
===================================================================
--- common/common.h	(revision 718)
+++ common/common.h	(working copy)
@@ -54,6 +54,7 @@
 #define X264_THREAD_MAX 128
 #define X264_SLICE_MAX 4
 #define X264_NAL_MAX (4 + X264_SLICE_MAX)
+#define X264_RCRD_MAX 256
 
 // number of pixels (per thread) in progress at any given time.
 // 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
@@ -235,7 +236,7 @@
     /* encoder parameters */
     x264_param_t    param;
 
-    x264_t          *thread[X264_THREAD_MAX];
+    x264_t          *thread[X264_THREAD_MAX+1];
     x264_pthread_t  thread_handle;
     int             b_thread_active;
     int             i_thread_phase; /* which thread to use for the next frame */
@@ -298,9 +299,9 @@
         /* Frames to be encoded (whose types have been decided) */
         x264_frame_t *current[X264_BFRAME_MAX+3];
         /* Temporary buffer (frames types not yet decided) */
-        x264_frame_t *next[X264_BFRAME_MAX+3];
+        x264_frame_t *next[X264_BFRAME_MAX+X264_RCRD_MAX+3];
         /* Unused frames */
-        x264_frame_t *unused[X264_BFRAME_MAX + X264_THREAD_MAX*2 + 16+4];
+        x264_frame_t *unused[X264_BFRAME_MAX + X264_THREAD_MAX*2 + X264_RCRD_MAX + 16+4];
         /* For adaptive B decision */
         x264_frame_t *last_nonb;
 
@@ -315,7 +316,8 @@
         int i_max_ref0;
         int i_max_ref1;
         int i_delay;    /* Number of frames buffered for B reordering */
-        int b_have_lowres;  /* Whether 1/2 resolution luma planes are being used */
+        int b_have_lowres;  /* Whether 1/2 resolution luma planes are allocated */
+        int b_have_integral; /* Whether the cached block sums are allocated */
     } frames;
 
     /* current frame being encoded */
Index: x264.h
===================================================================
--- x264.h	(revision 718)
+++ x264.h	(working copy)
@@ -81,6 +81,7 @@
 #define X264_RC_CQP                  0
 #define X264_RC_CRF                  1
 #define X264_RC_ABR                  2
+#define X264_RC_RD                   3
 
 static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 };
 static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", 0 };
@@ -258,6 +259,9 @@
         float       f_ip_factor;
         float       f_pb_factor;
 
+        float       f_rcrd_lambda;
+        int         i_rcrd_window;
+
         /* 2pass */
         int         b_stat_write;   /* Enable stat writing in psz_stat_out */
         char        *psz_stat_out;
@@ -275,6 +279,8 @@
     } rc;
 
     /* Muxing parameters */
+    int b_write_bitstream;      /* if not set, then x264 will only analyse, not generate an output file.
+                                 * doesn't yet work for normal 1st pass; internal use only. */
     int b_aud;                  /* generate access unit delimiters */
     int b_repeat_headers;       /* put SPS/PPS before each keyframe */
     int i_sps_id;               /* SPS and PPS id number */