Index: encoder/encoder.c =================================================================== --- encoder/encoder.c (revision 395) +++ encoder/encoder.c (working copy) @@ -1072,6 +1072,16 @@ { int i_frame_size; + if( h->param.analyse.b_ssim_rd ) + { + //FIXME not threadsafe + x264_frame_expand_border( h->fenc ); + memcpy( h->fdec->plane[0], h->fenc->plane[0], h->fenc->i_stride[0] * h->fenc->i_lines[0] ); + memcpy( h->fdec->plane[1], h->fenc->plane[1], h->fenc->i_stride[1] * h->fenc->i_lines[1] ); + memcpy( h->fdec->plane[2], h->fenc->plane[2], h->fenc->i_stride[2] * h->fenc->i_lines[2] ); + x264_frame_expand_border( h->fdec ); + } + #if VISUALIZE if( h->param.b_visualize ) x264_visualize_init( h ); @@ -1531,6 +1541,7 @@ if( h->param.analyse.b_psnr ) { int64_t i_sqe_y, i_sqe_u, i_sqe_v; + int64_t i_ssim_y, i_ssim_u, i_ssim_v; /* PSNR */ i_sqe_y = x264_pixel_ssd_wxh( &h->pixf, frame_psnr->plane[0], frame_psnr->i_stride[0], h->fenc->plane[0], h->fenc->i_stride[0], h->param.i_width, h->param.i_height ); @@ -1538,16 +1549,31 @@ i_sqe_v = x264_pixel_ssd_wxh( &h->pixf, frame_psnr->plane[2], frame_psnr->i_stride[2], h->fenc->plane[2], h->fenc->i_stride[2], h->param.i_width/2, h->param.i_height/2); x264_cpu_restore( h->param.cpu ); + i_ssim_y = x264_pixel_ssim( frame_psnr->plane[0]+4+4*frame_psnr->i_stride[0], frame_psnr->i_stride[0], + h->fenc->plane[0]+4+4*h->fenc->i_stride[0], h->fenc->i_stride[0], + h->param.i_width-8, h->param.i_height-8 ) >>8; + i_ssim_u = x264_pixel_ssim( frame_psnr->plane[1]+4+4*frame_psnr->i_stride[1], frame_psnr->i_stride[1], + h->fenc->plane[1]+4+4*h->fenc->i_stride[1], h->fenc->i_stride[1], + h->param.i_width/2-8, h->param.i_height/2-8 ) >>8; + i_ssim_v = x264_pixel_ssim( frame_psnr->plane[2]+4+4*frame_psnr->i_stride[2], frame_psnr->i_stride[2], + h->fenc->plane[2]+4+4*h->fenc->i_stride[2], h->fenc->i_stride[2], + h->param.i_width/2-8, h->param.i_height/2-8 ) >>8; + x264_cpu_restore( h->param.cpu ); + h->stat.i_sqe_global[i_slice_type] += i_sqe_y + i_sqe_u + i_sqe_v; h->stat.f_psnr_average[i_slice_type] += x264_psnr( i_sqe_y + i_sqe_u + i_sqe_v, 3 * h->param.i_width * h->param.i_height / 2 ); h->stat.f_psnr_mean_y[i_slice_type] += x264_psnr( i_sqe_y, h->param.i_width * h->param.i_height ); h->stat.f_psnr_mean_u[i_slice_type] += x264_psnr( i_sqe_u, h->param.i_width * h->param.i_height / 4 ); h->stat.f_psnr_mean_v[i_slice_type] += x264_psnr( i_sqe_v, h->param.i_width * h->param.i_height / 4 ); - snprintf( psz_message, 80, " PSNR Y:%2.2f U:%2.2f V:%2.2f", + snprintf( psz_message, 80, " PSNR Y:%.2f U:%.2f V:%.2f SSIM Y:%.5f U:%.5f V:%.5f", x264_psnr( i_sqe_y, h->param.i_width * h->param.i_height ), x264_psnr( i_sqe_u, h->param.i_width * h->param.i_height / 4), - x264_psnr( i_sqe_v, h->param.i_width * h->param.i_height / 4) ); + x264_psnr( i_sqe_v, h->param.i_width * h->param.i_height / 4), + (double)i_ssim_y / ((h->param.i_width-8) * (h->param.i_height-8)), + (double)i_ssim_u / ((h->param.i_width/2-8) * (h->param.i_height/2-8)), + (double)i_ssim_v / ((h->param.i_width/2-8) * (h->param.i_height/2-8)) + ); psz_message[79] = '\0'; } else Index: encoder/rdo.c =================================================================== --- encoder/rdo.c (revision 395) +++ encoder/rdo.c (working copy) @@ -55,12 +55,24 @@ x264_macroblock_encode( h ); - i_ssd = h->pixf.ssd[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0], - h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] ) - + h->pixf.ssd[PIXEL_8x8]( h->mb.pic.p_fenc[1], h->mb.pic.i_stride[1], - h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1] ) - + h->pixf.ssd[PIXEL_8x8]( h->mb.pic.p_fenc[2], h->mb.pic.i_stride[2], - h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2] ); + if( h->param.analyse.b_ssim_rd ) + { + int i; + i_ssd = 0; + for( i = 0; i < 3; i++ ) + i_ssd += x264_pixel_ssim( h->mb.pic.p_fenc[i]-4-4*h->mb.pic.i_stride[i], h->mb.pic.i_stride[i], + h->mb.pic.p_fdec[i]-4-4*h->mb.pic.i_stride[i], h->mb.pic.i_stride[i], + i ? 16 : 24, i ? 16 : 24 ); + } + else + { + i_ssd = h->pixf.ssd[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0], + h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] ) + + h->pixf.ssd[PIXEL_8x8]( h->mb.pic.p_fenc[1], h->mb.pic.i_stride[1], + h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1] ) + + h->pixf.ssd[PIXEL_8x8]( h->mb.pic.p_fenc[2], h->mb.pic.i_stride[2], + h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2] ); + } if( IS_SKIP( h->mb.i_type ) ) { Index: encoder/analyse.c =================================================================== --- encoder/analyse.c (revision 395) +++ encoder/analyse.c (working copy) @@ -143,6 +143,19 @@ 3686,4645,5852,7373 }; +// pc 'join ", ", map int(550*2**($_*0.928/6)), 0..51' +static const int i_qp0_cost_ssim_table[52] = { + 550, 612, 681, 758, 844, 940, + 1046, 1164, 1296, 1443, 1606, 1788, + 1991, 2216, 2467, 2746, 3057, 3403, + 3788, 4216, 4694, 5225, 5816, 6474, + 7207, 8023, 8931, 9941, 11066, 12319, + 13713, 15265, 16992, 18915, 21056, 23438, + 26091, 29044, 32330, 35989, 40062, 44595, + 49642, 55260, 61513, 68474, 76223, 84849, + 94451, 105140, 117038, 130282 +}; + /* TODO: calculate CABAC costs */ static const int i_mb_b_cost_table[19] = { 9, 9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0 @@ -188,7 +201,7 @@ /* conduct the analysis using this lamda and QP */ a->i_qp = h->mb.i_qp = i_qp; a->i_lambda = i_qp0_cost_table[i_qp]; - a->i_lambda2 = i_qp0_cost2_table[i_qp]; + a->i_lambda2 = h->param.analyse.b_ssim_rd ? i_qp0_cost_ssim_table[i_qp] : i_qp0_cost2_table[i_qp]; a->b_mbrd = h->param.analyse.i_subpel_refine >= 6 && ( h->sh.i_type != SLICE_TYPE_B || h->param.analyse.b_bframe_rdo ); Index: x264.c =================================================================== --- x264.c (revision 395) +++ x264.c (working copy) @@ -254,6 +254,7 @@ " -m, --subme Subpixel motion estimation and partition\n" " decision quality: 1=fast, 6=best. [%d]\n" " --b-rdo RD based mode decision for B-frames. Requires subme 6.\n" + " --ssim-rd Use SSIM instead of SSD in RDO.\n" " --mixed-refs Decide references on a per partition basis\n" " --no-chroma-me Ignore chroma in motion estimation\n" " --bime Jointly optimize both MVs in B-frames\n" @@ -482,6 +483,7 @@ #define OPT_B_RDO 316 #define OPT_NO_FAST_PSKIP 317 #define OPT_BIME 318 +#define OPT_SSIM_RD 319 static struct option long_options[] = { @@ -516,6 +518,7 @@ { "merange", required_argument, NULL, OPT_MERANGE }, { "subme", required_argument, NULL, 'm' }, { "b-rdo", no_argument, NULL, OPT_B_RDO }, + { "ssim-rd", no_argument, NULL, OPT_SSIM_RD }, { "mixed-refs", no_argument, NULL, OPT_MIXED_REFS }, { "no-chroma-me", no_argument, NULL, OPT_NO_CHROMA_ME }, { "bime", no_argument, NULL, OPT_BIME }, @@ -742,6 +745,9 @@ case OPT_B_RDO: param->analyse.b_bframe_rdo = 1; break; + case OPT_SSIM_RD: + param->analyse.b_ssim_rd = 1; + break; case OPT_MIXED_REFS: param->analyse.b_mixed_references = 1; break; Index: common/pixel.c =================================================================== --- common/pixel.c (revision 395) +++ common/pixel.c (working copy) @@ -29,9 +29,9 @@ #include #include #include +#include -#include "x264.h" -#include "pixel.h" +#include "common.h" #include "clip1.h" #ifdef HAVE_MMXEXT @@ -302,6 +302,103 @@ /**************************************************************************** + * visual structural similarity metric + * "Image Quality Assessment: From Error Visibility to Structural Similarity" + * Z.Wang, A.Bovik, et al. IEEE 1057-7149/04 + ****************************************************************************/ + +int64_t x264_pixel_ssim( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height ) +{ +#define R 4 // window = 8x8 unweighted + uint16_t buf_m1[i_width+2*R+1]; + uint16_t buf_m2[i_width+2*R+1]; + uint32_t buf_m11[i_width+2*R+1]; + uint32_t buf_m22[i_width+2*R+1]; + uint32_t buf_m12[i_width+2*R+1]; + double ssim = 0; + int x, y, i; + + { + uint8_t *p1 = pix1 - R; + uint8_t *p2 = pix2 - R; + uint32_t m1 = 0, m2 = 0, m11 = 0, m12 = 0, m22 = 0; + for( x = -R; x <= i_width+R; x++, p1++, p2++ ) + { + for( i = -R; i < R; i++ ) + { + int v1 = p1[i*i_pix1]; + int v2 = p2[i*i_pix2]; + m1 += v1; + m2 += v2; + m11 += v1*v1; + m12 += v1*v2; + m22 += v2*v2; + } + buf_m1[x+R] = m1; + buf_m2[x+R] = m2; + buf_m11[x+R] = m11; + buf_m12[x+R] = m12; + buf_m22[x+R] = m22; + } + } + + for( y = 0; y < i_height; y++ ) + { + uint8_t *o1 = pix1 + (y-R)*i_pix1; + uint8_t *o2 = pix2 + (y-R)*i_pix2; + uint8_t *p1 = pix1 + (y+R)*i_pix1; + uint8_t *p2 = pix2 + (y+R)*i_pix2; + uint32_t d1 = 0, d2 = 0, d11 = 0, d12 = 0, d22 = 0; + for( x = -R; x <= i_width+R; x++ ) + { + int u1 = o1[x]; + int u2 = o2[x]; + int v1 = p1[x]; + int v2 = p2[x]; + buf_m1[x+R] += d1 += v1 - u1; + buf_m2[x+R] += d2 += v2 - u2; + buf_m11[x+R] += d11 += v1*v1 - u1*u1; + buf_m12[x+R] += d12 += v1*v2 - u1*u2; + buf_m22[x+R] += d22 += v2*v2 - u2*u2; + } + + for( x = 0; x < i_width; x++ ) + { + uint16_t m1 = buf_m1[x+2*R] - buf_m1[x]; + uint16_t m2 = buf_m2[x+2*R] - buf_m2[x]; + uint32_t m11 = buf_m11[x+2*R] - buf_m11[x]; + uint32_t m12 = buf_m12[x+2*R] - buf_m12[x]; + uint32_t m22 = buf_m22[x+2*R] - buf_m22[x]; + + int M1M1 = m1*m1; + int M1M2 = m1*m2; + int M2M2 = m2*m2; + int M11 = 64*m11; + int M12 = 64*m12; + int M22 = 64*m22; + int S11 = M11 - M1M1; + int S12 = M12 - M1M2; + int S22 = M22 - M2M2; + +#define K1 .01 +#define K2 .03 +#define C1 (int)(K1*K1*255*255*64*64+.5) +#define C2 (int)(K2*K2*255*255*64*64+.5) + + int ln = 2*M1M2+C1; + int ld = M1M1+M2M2+C1; + int cn = 2*S12+C2; + int cd = S11+S22+C2; + + ssim += ((double)ln*cn)/((double)ld*cd); + } + } + + return (i_width*i_height - ssim)*256; +} + + +/**************************************************************************** * x264_pixel_init: ****************************************************************************/ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ) Index: common/pixel.h =================================================================== --- common/pixel.h (revision 395) +++ common/pixel.h (working copy) @@ -70,5 +70,6 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ); int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height ); +int64_t x264_pixel_ssim( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height ); #endif Index: x264.h =================================================================== --- x264.h (revision 395) +++ x264.h (working copy) @@ -199,6 +199,7 @@ int b_bidir_me; /* jointly optimize both MVs in B-frames */ int b_chroma_me; /* chroma ME for subpel and mode decision in P-frames */ int b_bframe_rdo; /* RD based mode decision for B-frames */ + int b_ssim_rd; int b_mixed_references; /* allow each mb partition in P-frames to have it's own reference number */ int i_trellis; /* trellis RD quantization */ int b_fast_pskip; /* early SKIP detection on P-frames */