diff --git a/common/frame.c b/common/frame.c index ce58b34..9973b36 100644 --- a/common/frame.c +++ b/common/frame.c @@ -26,6 +26,21 @@ #define ALIGN(x,a) (((x)+((a)-1))&~((a)-1)) +static int align_stride( int x, int align, int disalign ) +{ + x = ALIGN( x, align ); + if( !(x&(disalign-1)) ) + x += align; + return x; +} + +static int align_plane( int x, int disalign ) +{ + if( !(x&(disalign-1)) ) + x += 128; + return x; +} + x264_frame_t *x264_frame_new( x264_t *h, int b_fdec ) { x264_frame_t *frame; @@ -37,6 +52,7 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec ) int luma_plane_size; int chroma_plane_size; int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16; + int disalign = 1024; CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) ); @@ -48,13 +64,13 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec ) frame->i_plane = 3; for( i = 0; i < 3; i++ ) { - frame->i_stride[i] = ALIGN( i_stride >> !!i, align ); + frame->i_stride[i] = align_stride( i_stride >> !!i, align, disalign ); frame->i_width[i] = i_width >> !!i; frame->i_lines[i] = i_lines >> !!i; } - luma_plane_size = (frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv)); - chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + 2*i_padv)); + luma_plane_size = align_plane( frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv), disalign ); + chroma_plane_size = align_plane( frame->i_stride[1] * (frame->i_lines[1] + 2*i_padv), disalign ); for( i = 1; i < 3; i++ ) { CHECKED_MALLOC( frame->buffer[i], chroma_plane_size ); @@ -121,10 +137,10 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec ) if( h->frames.b_have_lowres ) { frame->i_width_lowres = frame->i_width[0]/2; - frame->i_stride_lowres = ALIGN( frame->i_width_lowres + 2*PADH, align ); + frame->i_stride_lowres = align_stride( frame->i_width_lowres + 2*PADH, align, disalign ); frame->i_lines_lowres = frame->i_lines[0]/2; - luma_plane_size = frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*i_padv); + luma_plane_size = align_plane( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*i_padv), disalign ); CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size ); for( i = 0; i < 4; i++ )