Index: encoder/encoder.c =================================================================== --- encoder/encoder.c (revision 600) +++ encoder/encoder.c (working copy) @@ -38,7 +38,7 @@ #endif //#define DEBUG_MB_TYPE -//#define DEBUG_DUMP_FRAME +#define DEBUG_DUMP_FRAME //#define DEBUG_BENCHMARK #ifdef DEBUG_BENCHMARK @@ -76,22 +76,84 @@ } #ifdef DEBUG_DUMP_FRAME -static void x264_frame_dump( x264_t *h, x264_frame_t *fr, char *name ) +static void x264_frame_dump( x264_t *h, x264_frame_t *fr, char *name, float *map, float ssim_y ) { FILE *f = fopen( name, "r+b" ); - int i, y; + int i, y, x; if( !f ) return; /* Write the frame in display order */ - fseek( f, fr->i_frame * h->param.i_height * h->param.i_width * 3 / 2, SEEK_SET ); + fseek( f, fr->i_frame * h->param.i_height * h->param.i_width * 3 >> !map, SEEK_SET ); for( i = 0; i < fr->i_plane; i++ ) { - for( y = 0; y < h->param.i_height / ( i == 0 ? 1 : 2 ); y++ ) + for( y = 0; y < h->param.i_height >> !!i; y++ ) { - fwrite( &fr->plane[i][y*fr->i_stride[i]], 1, h->param.i_width / ( i == 0 ? 1 : 2 ), f ); + fwrite( &fr->plane[i][y*fr->i_stride[i]], 1, h->param.i_width >> !!i, f ); } + + if( map ) + { + if( i == 0 ) + { + int iw = h->param.i_width; + int ih = h->param.i_height; + int sw = (iw-2)/4-1; + int sh = (ih-2)/4-1; + float scale = 32 / (1 - ssim_y); +#if 0 + float fbuf[sw]; + uint8_t buf[iw]; + for( y=0; y>2 >= sh-1 ) + memcpy( fbuf, map+(sh-1)*sw, sw*sizeof(float) ); + else + { + float b = ((y-2)&3)*.25; + float a = 1.0-b; + float *row0 = map+((y-2)>>2)*sw; + float *row1 = map+((y+2)>>2)*sw; + for( x=0; x>2; + buf[4*x+0] = (u+v+1)>>1; + buf[4*x+1] = (u+v*3+2)>>2; + buf[4*x+2] = v; + u = v; + } + for( x=sw*4-1; xparam.i_width, f ); + } +#else + uint8_t buf[iw]; + for(y=0; yparam.i_width >> 1]; + memset( buf, 128, h->param.i_width >> 1 ); + for( y = 0; y < h->param.i_height >> 1; y++ ) + fwrite( buf, 1, h->param.i_width >> 1, f ); + } + } } fclose( f ); } @@ -1677,13 +1739,20 @@ x264_psnr( sqe[2], h->param.i_width * h->param.i_height / 4) ); } + static float *ssim_map = NULL; + float ssim_y = 0; + if( h->param.analyse.b_ssim ) { +#ifdef DEBUG_DUMP_FRAME + if( !ssim_map ) + ssim_map = x264_malloc( ((h->param.i_width-2)/4-1) * ((h->param.i_height-2)/4-1) * sizeof(float) ); +#endif // offset by 2 pixels to avoid alignment of ssim blocks with dct blocks - float ssim_y = x264_pixel_ssim_wxh( &h->pixf, + ssim_y = x264_pixel_ssim_wxh( &h->pixf, frame_psnr->plane[0] + 2+2*frame_psnr->i_stride[0], frame_psnr->i_stride[0], h->fenc->plane[0] + 2+2*h->fenc->i_stride[0], h->fenc->i_stride[0], - h->param.i_width-2, h->param.i_height-2 ); + h->param.i_width-2, h->param.i_height-2, ssim_map ); h->stat.f_ssim_mean_y[i_slice_type] += ssim_y; snprintf( psz_message + strlen(psz_message), 80 - strlen(psz_message), " SSIM Y:%.5f", ssim_y ); @@ -1724,7 +1793,7 @@ #ifdef DEBUG_DUMP_FRAME /* Dump reconstructed frame */ - x264_frame_dump( h, frame_psnr, "fdec.yuv" ); + x264_frame_dump( h, frame_psnr, "fdec.yuv", ssim_map, ssim_y ); #endif return 0; } Index: common/pixel.c =================================================================== --- common/pixel.c (revision 600) +++ common/pixel.c (working copy) @@ -378,10 +378,13 @@ return ssim; } +#include + float x264_pixel_ssim_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int stride1, uint8_t *pix2, int stride2, - int width, int height ) + int width, int height, + float *map ) { int x, y, z; float ssim = 0.0; @@ -392,14 +395,29 @@ z = 0; for( y = 1; y < height; y++ ) { + float *p_map = &map[(y-1)*(width-1)]; for( ; z <= y; z++ ) { XCHG( void*, sum0, sum1 ); for( x = 0; x < width; x+=2 ) pf->ssim_4x4x2_core( &pix1[4*(x+z*stride1)], stride1, &pix2[4*(x+z*stride2)], stride2, &sum0[x] ); } - for( x = 0; x < width-1; x += 4 ) - ssim += pf->ssim_end4( sum0+x, sum1+x, X264_MIN(4,width-x-1) ); + if( map ) + { + for( x = 0; x < width-1; x++ ) + { + ssim += p_map[x] = + ssim_end1( sum0[x][0] + sum0[x+1][0] + sum1[x][0] + sum1[x+1][0], + sum0[x][1] + sum0[x+1][1] + sum1[x][1] + sum1[x+1][1], + sum0[x][2] + sum0[x+1][2] + sum1[x][2] + sum1[x+1][2], + sum0[x][3] + sum0[x+1][3] + sum1[x][3] + sum1[x+1][3] ); + } + } + else + { + for( x = 0; x < width-1; x += 4 ) + ssim += pf->ssim_end4( sum0+x, sum1+x, X264_MIN(4,width-x-1) ); + } } return ssim / ((width-1) * (height-1)); } Index: common/pixel.h =================================================================== --- common/pixel.h (revision 600) +++ common/pixel.h (working copy) @@ -100,6 +100,6 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ); int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height ); -float x264_pixel_ssim_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height ); +float x264_pixel_ssim_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height, float *map ); #endif Index: tools/checkasm.c =================================================================== --- tools/checkasm.c (revision 600) +++ tools/checkasm.c (working copy) @@ -138,8 +138,8 @@ float res_c, res_a; ok = 1; x264_cpu_restore( cpu_new ); - res_c = x264_pixel_ssim_wxh( &pixel_c, buf1+2, 32, buf2+2, 32, 32, 28 ); - res_a = x264_pixel_ssim_wxh( &pixel_asm, buf1+2, 32, buf2+2, 32, 32, 28 ); + res_c = x264_pixel_ssim_wxh( &pixel_c, buf1+2, 32, buf2+2, 32, 32, 28, NULL ); + res_a = x264_pixel_ssim_wxh( &pixel_asm, buf1+2, 32, buf2+2, 32, 32, 28, NULL ); if( fabs(res_c - res_a) > 1e-8 ) { ok = 0;