Index: common/pixel.c =================================================================== --- common/pixel.c (revision 133) +++ common/pixel.c (working copy) @@ -71,6 +71,39 @@ PIXEL_SAD_C( pixel_sad_4x8, 4, 8 ) PIXEL_SAD_C( pixel_sad_4x4, 4, 4 ) + +/**************************************************************************** + * pixel_ssd_WxH + * FIXME optimize + ****************************************************************************/ +#define PIXEL_SSD_C( name, lx, ly ) \ +static int name( uint8_t *pix1, int i_stride_pix1, \ + uint8_t *pix2, int i_stride_pix2 ) \ +{ \ + int i_sum = 0; \ + int x, y; \ + for( y = 0; y < ly; y++ ) \ + { \ + for( x = 0; x < lx; x++ ) \ + { \ + int d = pix1[x] - pix2[x]; \ + i_sum += d*d; \ + } \ + pix1 += i_stride_pix1; \ + pix2 += i_stride_pix2; \ + } \ + return i_sum; \ +} + +PIXEL_SSD_C( pixel_ssd_16x16, 16, 16 ) +PIXEL_SSD_C( pixel_ssd_16x8, 16, 8 ) +PIXEL_SSD_C( pixel_ssd_8x16, 8, 16 ) +PIXEL_SSD_C( pixel_ssd_8x8, 8, 8 ) +PIXEL_SSD_C( pixel_ssd_8x4, 8, 4 ) +PIXEL_SSD_C( pixel_ssd_4x8, 4, 8 ) +PIXEL_SSD_C( pixel_ssd_4x4, 4, 4 ) + + static void pixel_sub_4x4( int16_t diff[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ) { int y, x; @@ -146,6 +179,75 @@ PIXEL_SATD_C( pixel_satd_4x4, 4, 4 ) +/* Sum of absolute DCT transformed differences + * FIXME: reuse MMX DCT functions */ +static const int quant_mf[4][4] = +{ { 13107, 8066, 13107, 8066}, { 8066, 5243, 8066, 5243}, + { 13107, 8066, 13107, 8066}, { 8066, 5243, 8066, 5243} }; + +static int pixel_sadct_wxh( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height ) +{ + int16_t tmp[4][4]; + int16_t diff[4][4]; + int x, y; + unsigned int i_sadct = 0; + + for( y = 0; y < i_height; y += 4 ) + { + for( x = 0; x < i_width; x += 4 ) + { + int d; + + pixel_sub_4x4( diff, &pix1[x], i_pix1, &pix2[x], i_pix2 ); + + for( d = 0; d < 4; d++ ) + { + int s03 = diff[d][0] + diff[d][3]; + int s12 = diff[d][1] + diff[d][2]; + int d03 = diff[d][0] - diff[d][3]; + int d12 = diff[d][1] - diff[d][2]; + + tmp[0][d] = s03 + s12; + tmp[1][d] = 2*d03 + d12; + tmp[2][d] = s03 - s12; + tmp[3][d] = d03 - 2*d12; + } + for( d = 0; d < 4; d++ ) + { + int s03 = tmp[d][0] + tmp[d][3]; + int s12 = tmp[d][1] + tmp[d][2]; + int d03 = tmp[d][0] - tmp[d][3]; + int d12 = tmp[d][1] - tmp[d][2]; + + /* FIXME merge multiplies */ + i_sadct += abs( s03 + s12) * quant_mf[0][d] + + abs(2*d03 + d12) * quant_mf[1][d] + + abs( s03 - s12) * quant_mf[2][d] + + abs( d03 - 2*d12) * quant_mf[3][d]; + } + } + pix1 += 4 * i_pix1; + pix2 += 4 * i_pix2; + } + + /* FIXME scale? overflow? */ + return i_sadct; +} +#define PIXEL_SADCT_C( name, width, height ) \ +static int name( uint8_t *pix1, int i_stride_pix1, \ + uint8_t *pix2, int i_stride_pix2 ) \ +{ \ + return pixel_sadct_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, width, height ); \ +} +PIXEL_SADCT_C( pixel_sadct_16x16, 16, 16 ) +PIXEL_SADCT_C( pixel_sadct_16x8, 16, 8 ) +PIXEL_SADCT_C( pixel_sadct_8x16, 8, 16 ) +PIXEL_SADCT_C( pixel_sadct_8x8, 8, 8 ) +PIXEL_SADCT_C( pixel_sadct_8x4, 8, 4 ) +PIXEL_SADCT_C( pixel_sadct_4x8, 4, 8 ) +PIXEL_SADCT_C( pixel_sadct_4x4, 4, 4 ) + + static inline void pixel_avg_wxh( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int width, int height ) { int x, y; @@ -188,6 +290,14 @@ pixf->sad[PIXEL_4x8] = pixel_sad_4x8; pixf->sad[PIXEL_4x4] = pixel_sad_4x4; + pixf->ssd[PIXEL_16x16] = pixel_ssd_16x16; + pixf->ssd[PIXEL_16x8] = pixel_ssd_16x8; + pixf->ssd[PIXEL_8x16] = pixel_ssd_8x16; + pixf->ssd[PIXEL_8x8] = pixel_ssd_8x8; + pixf->ssd[PIXEL_8x4] = pixel_ssd_8x4; + pixf->ssd[PIXEL_4x8] = pixel_ssd_4x8; + pixf->ssd[PIXEL_4x4] = pixel_ssd_4x4; + pixf->satd[PIXEL_16x16]= pixel_satd_16x16; pixf->satd[PIXEL_16x8] = pixel_satd_16x8; pixf->satd[PIXEL_8x16] = pixel_satd_8x16; @@ -196,6 +306,14 @@ pixf->satd[PIXEL_4x8] = pixel_satd_4x8; pixf->satd[PIXEL_4x4] = pixel_satd_4x4; + pixf->sadct[PIXEL_16x16]= pixel_sadct_16x16; + pixf->sadct[PIXEL_16x8] = pixel_sadct_16x8; + pixf->sadct[PIXEL_8x16] = pixel_sadct_8x16; + pixf->sadct[PIXEL_8x8] = pixel_sadct_8x8; + pixf->sadct[PIXEL_8x4] = pixel_sadct_8x4; + pixf->sadct[PIXEL_4x8] = pixel_sadct_4x8; + pixf->sadct[PIXEL_4x4] = pixel_sadct_4x4; + pixf->avg[PIXEL_16x16]= pixel_avg_16x16; pixf->avg[PIXEL_16x8] = pixel_avg_16x8; pixf->avg[PIXEL_8x16] = pixel_avg_8x16; Index: common/pixel.h =================================================================== --- common/pixel.h (revision 133) +++ common/pixel.h (working copy) @@ -24,8 +24,7 @@ #ifndef _PIXEL_H #define _PIXEL_H 1 -typedef int (*x264_pixel_sad_t) ( uint8_t *, int, uint8_t *, int ); -typedef int (*x264_pixel_satd_t)( uint8_t *, int, uint8_t *, int ); +typedef int (*x264_pixel_cmp_t) ( uint8_t *, int, uint8_t *, int ); typedef void (*x264_pixel_avg_t) ( uint8_t *, int, uint8_t *, int ); enum @@ -60,8 +59,10 @@ typedef struct { - x264_pixel_sad_t sad[7]; - x264_pixel_satd_t satd[7]; + x264_pixel_cmp_t sad[7]; + x264_pixel_cmp_t ssd[7]; + x264_pixel_cmp_t satd[7]; + x264_pixel_cmp_t sadct[7]; x264_pixel_avg_t avg[7]; } x264_pixel_function_t;