Index: encoder/encoder.c =================================================================== --- encoder/encoder.c (revision 451) +++ encoder/encoder.c (working copy) @@ -341,7 +341,7 @@ return -1; } - h->param.i_threads = x264_clip3( h->param.i_threads, 1, X264_SLICE_MAX ); + h->param.i_threads = x264_clip3( h->param.i_threads, 1, X264_THREADS_MAX ); h->param.i_threads = X264_MIN( h->param.i_threads, (h->param.i_height + 15) / 16 ); #if !(HAVE_PTHREAD) if( h->param.i_threads > 1 ) @@ -536,7 +536,12 @@ /* Init x264_t */ + + /* We need 3 NALs for possible SPS, PPS and SEI and at least 1 for a slice */ + h->out.i_nal_bufsize = 4; h->out.i_nal = 0; + h->out.nal = x264_malloc( h->out.i_nal_bufsize * sizeof(x264_nal_t) ); + h->out.i_bitstream = X264_MAX( 1000000, h->param.i_width * h->param.i_height * 1.7 * ( h->param.rc.b_cbr ? pow( 0.5, h->param.rc.i_qp_min ) : pow( 0.5, h->param.rc.i_qp_constant ) * X264_MAX( 1, h->param.rc.f_ip_factor ))); @@ -921,10 +926,8 @@ int i_skip; int mb_xy; int i; + int i_pos_max; - /* init stats */ - memset( &h->stat.frame, 0, sizeof(h->stat.frame) ); - /* Slice */ x264_nal_start( h, h->i_nal_type, h->i_nal_ref_idc ); @@ -942,7 +945,13 @@ h->mb.i_last_qp = h->sh.i_qp; h->mb.i_last_dqp = 0; - for( mb_xy = h->sh.i_first_mb, i_skip = 0; mb_xy < h->sh.i_last_mb; mb_xy++ ) + i_skip = 0; + i_pos_max = bs_pos(&h->out.bs) + 8 * h->param.i_slice_size_max; + + for( mb_xy = h->sh.i_first_mb; + mb_xy < h->sh.i_last_mb + && ( !h->param.i_slice_size_max || bs_pos( &h->out.bs ) < i_pos_max ); + mb_xy++ ) { const int i_mb_y = mb_xy / h->sps->i_mb_width; const int i_mb_x = mb_xy % h->sps->i_mb_width; @@ -1057,19 +1066,41 @@ x264_nal_end( h ); /* Compute misc bits */ - h->stat.frame.i_misc_bits = bs_pos( &h->out.bs ) + h->stat.frame.i_misc_bits += bs_pos( &h->out.bs ) + NALU_OVERHEAD * 8 - h->stat.frame.i_itex_bits - h->stat.frame.i_ptex_bits - h->stat.frame.i_hdr_bits; + /* Needed to check if there is another slice possible in the current frame */ + h->sh.i_first_mb = mb_xy; + return 0; } +static int x264_slice_write_thread( x264_t *h ) +{ + /* init stats */ + memset( &h->stat.frame, 0, sizeof(h->stat.frame) ); + + /* Run as long as the last MB isn't reached */ + while( h->sh.i_first_mb < h->sh.i_last_mb ) + { + x264_slice_write( h ); + + /* More NALs when needed */ + if( h->out.i_nal >= h->out.i_nal_bufsize ) + { + h->out.i_nal_bufsize *= 2; + h->out.nal = x264_realloc( h->out.nal, h->out.i_nal_bufsize * sizeof(x264_nal_t) ); + } + } + + return 0; +} + static inline int x264_slices_write( x264_t *h ) { - int i_frame_size; - #if VISUALIZE if( h->param.b_visualize ) x264_visualize_init( h ); @@ -1077,12 +1108,10 @@ if( h->param.i_threads == 1 ) { - x264_slice_write( h ); - i_frame_size = h->out.nal[h->out.i_nal-1].i_payload; + x264_slice_write_thread( h ); } else { - int i_nal = h->out.i_nal; int i_bs_size = h->out.i_bitstream / h->param.i_threads; int i; /* duplicate contexts */ @@ -1092,42 +1121,55 @@ if( i > 0 ) { memcpy( t, h, sizeof(x264_t) ); + + /* Create a temporary place for the NALs */ + t->out.i_nal_bufsize = 4; + t->out.i_nal = 0; + t->out.nal = x264_malloc( h->out.i_nal_bufsize * sizeof(x264_nal_t) ); + t->out.p_bitstream += i*i_bs_size; bs_init( &t->out.bs, t->out.p_bitstream, i_bs_size ); } t->sh.i_first_mb = (i * h->sps->i_mb_height / h->param.i_threads) * h->sps->i_mb_width; t->sh.i_last_mb = ((i+1) * h->sps->i_mb_height / h->param.i_threads) * h->sps->i_mb_width; - t->out.i_nal = i_nal + i; } /* dispatch */ #if HAVE_PTHREAD { - pthread_t handles[X264_SLICE_MAX]; + pthread_t handles[X264_THREADS_MAX]; void *status; for( i = 0; i < h->param.i_threads; i++ ) - pthread_create( &handles[i], NULL, (void*)x264_slice_write, (void*)h->thread[i] ); + pthread_create( &handles[i], NULL, (void*)x264_slice_write_thread, (void*)h->thread[i] ); for( i = 0; i < h->param.i_threads; i++ ) pthread_join( handles[i], &status ); } #else for( i = 0; i < h->param.i_threads; i++ ) - x264_slice_write( h->thread[i] ); + x264_slice_write_thread( h->thread[i] ); #endif /* merge contexts */ - i_frame_size = h->out.nal[i_nal].i_payload; for( i = 1; i < h->param.i_threads; i++ ) { int j; x264_t *t = h->thread[i]; - h->out.nal[i_nal+i] = t->out.nal[i_nal+i]; - i_frame_size += t->out.nal[i_nal+i].i_payload; + + /* Copy the thread's NALs back into the main thread. */ + if ( h->out.i_nal + t->out.i_nal >= h->out.i_nal_bufsize ) + { + h->out.i_nal_bufsize = h->out.i_nal + t->out.i_nal + 1; + h->out.nal = x264_realloc( h->out.nal, h->out.i_nal_bufsize * sizeof(x264_nal_t) ); + } + memcpy( &h->out.nal[h->out.i_nal], t->out.nal, t->out.i_nal * sizeof(x264_nal_t) ); + h->out.i_nal += t->out.i_nal; + + x264_free( t->out.nal ); + // all entries in stat.frame are ints for( j = 0; j < sizeof(h->stat.frame) / sizeof(int); j++ ) ((int*)&h->stat.frame)[j] += ((int*)&t->stat.frame)[j]; } - h->out.i_nal = i_nal + h->param.i_threads; } #if VISUALIZE @@ -1138,7 +1180,10 @@ } #endif - return i_frame_size; + return ( h->stat.frame.i_hdr_bits + + h->stat.frame.i_itex_bits + + h->stat.frame.i_ptex_bits + + h->stat.frame.i_hdr_bits ) >> 3; } /**************************************************************************** @@ -1779,6 +1824,7 @@ x264_free( h->out.p_bitstream ); for( i = 1; i < h->param.i_threads; i++ ) x264_free( h->thread[i] ); + x264_free( h->out.nal ); x264_free( h ); } Index: encoder/analyse.c =================================================================== --- encoder/analyse.c (revision 451) +++ encoder/analyse.c (working copy) @@ -369,16 +369,20 @@ if( b_l && b_t ) { + *pi_count = 6; *mode++ = I_PRED_4x4_DC; *mode++ = I_PRED_4x4_H; *mode++ = I_PRED_4x4_V; *mode++ = I_PRED_4x4_DDL; - *mode++ = I_PRED_4x4_DDR; - *mode++ = I_PRED_4x4_VR; - *mode++ = I_PRED_4x4_HD; + if( i_neighbour & MB_TOPLEFT ) + { + *mode++ = I_PRED_4x4_DDR; + *mode++ = I_PRED_4x4_VR; + *mode++ = I_PRED_4x4_HD; + *pi_count += 3; + } *mode++ = I_PRED_4x4_VL; *mode++ = I_PRED_4x4_HU; - *pi_count = 9; } else if( b_l ) { Index: x264.c =================================================================== --- x264.c (revision 451) +++ x264.c (working copy) @@ -303,6 +303,13 @@ " smpte170m, smpte240m, GBR, YCgCo\n" " --chromaloc Specify chroma sample location (0 to 5) [%d]\n" "\n" + "Bitstream:\n" + "\n" + " --aud Use access unit delimiters\n" + " --max-slicesize Limit the size of each slice (in bytes),\n" + " splitting them if they get too big.\n" + " (0 => unlimited) [%d]\n" + "\n" "Input/Output:\n" "\n" " --level Specify level (as defined by Annex A)\n" @@ -318,7 +325,6 @@ " -v, --verbose Print stats for each frame\n" " --progress Show a progress indicator while encoding\n" " --visualize Show MB types overlayed on the encoded video\n" - " --aud Use access unit delimiters\n" "\n", X264_BUILD, X264_VERSION, #ifdef AVIS_INPUT @@ -367,7 +373,8 @@ strtable_lookup( colorprim_str, defaults->vui.i_colorprim ), strtable_lookup( transfer_str, defaults->vui.i_transfer ), strtable_lookup( colmatrix_str, defaults->vui.i_colmatrix ), - defaults->vui.i_chroma_loc + defaults->vui.i_chroma_loc, + defaults->i_slice_size_max ); } @@ -488,6 +495,7 @@ #define OPT_NO_FAST_PSKIP 317 #define OPT_BIME 318 #define OPT_NR 319 +#define OPT_SLICE_MAX 320 static struct option long_options[] = { @@ -544,6 +552,7 @@ { "cplxblur",required_argument, NULL, OPT_CPLXBLUR }, { "zones", required_argument, NULL, OPT_ZONES }, { "threads", required_argument, NULL, OPT_THREADS }, + { "max-slicesize", required_argument, NULL, OPT_SLICE_MAX }, { "no-psnr", no_argument, NULL, OPT_NOPSNR }, { "quiet", no_argument, NULL, OPT_QUIET }, { "verbose", no_argument, NULL, 'v' }, @@ -830,6 +839,9 @@ case OPT_THREADS: param->i_threads = atoi(optarg); break; + case OPT_SLICE_MAX: + param->i_slice_size_max = atoi(optarg); + break; case OPT_NOPSNR: param->analyse.b_psnr = 0; break; Index: common/common.c =================================================================== --- common/common.c (revision 451) +++ common/common.c (working copy) @@ -62,6 +62,7 @@ param->i_fps_num = 25; param->i_fps_den = 1; param->i_level_idc = 51; /* as close to "unrestricted" as we can get */ + param->i_slice_size_max = 0; /* don't restrict by default */ /* Encoder parameters */ param->i_frame_reference = 1; Index: common/common.h =================================================================== --- common/common.h (revision 451) +++ common/common.h (working copy) @@ -218,8 +218,7 @@ */ #define X264_BFRAME_MAX 16 -#define X264_SLICE_MAX 4 -#define X264_NAL_MAX (4 + X264_SLICE_MAX) +#define X264_THREADS_MAX 4 typedef struct x264_ratecontrol_t x264_ratecontrol_t; typedef struct x264_vlc_table_t x264_vlc_table_t; @@ -229,13 +228,14 @@ /* encoder parameters */ x264_param_t param; - x264_t *thread[X264_SLICE_MAX]; + x264_t *thread[X264_THREADS_MAX]; /* bitstream output */ struct { int i_nal; - x264_nal_t nal[X264_NAL_MAX]; + int i_nal_bufsize; + x264_nal_t *nal; int i_bitstream; /* size of p_bitstream */ uint8_t *p_bitstream; /* will hold data for all nal */ bs_t bs; Index: x264.h =================================================================== --- x264.h (revision 451) +++ x264.h (working copy) @@ -35,7 +35,7 @@ #include -#define X264_BUILD 44 +#define X264_BUILD 45 /* x264_t: * opaque handler for decoder and encoder */ @@ -241,8 +241,10 @@ char *psz_zones; /* alternate method of specifying zones */ } rc; + /* Output format */ int b_aud; /* generate access unit delimiters */ int b_repeat_headers; /* put SPS/PPS before each keyframe */ + int i_slice_size_max; /* split slices if larger than this many bytes (0 => unlimited) */ } x264_param_t; typedef struct {