#include "common.h"
-#ifdef HAVE_MMX
+#if HAVE_MMX
# include "x86/pixel.h"
#endif
-#ifdef ARCH_PPC
+#if ARCH_PPC
# include "ppc/pixel.h"
#endif
-#ifdef ARCH_ARM
+#if ARCH_ARM
# include "arm/pixel.h"
#endif
-#ifdef ARCH_UltraSparc
+#if ARCH_UltraSparc
# include "sparc/pixel.h"
#endif
SAD_X( 4x8 )
SAD_X( 4x4 )
-#ifdef ARCH_UltraSparc
+#if ARCH_UltraSparc
SAD_X( 16x16_vis )
SAD_X( 16x8_vis )
SAD_X( 8x16_vis )
SATD_X( 4x4, cpu )
SATD_X_DECL7()
-#ifdef HAVE_MMX
+#if HAVE_MMX
SATD_X_DECL7( _mmxext )
SATD_X_DECL6( _sse2 )
SATD_X_DECL7( _ssse3 )
SATD_X_DECL7( _sse4 )
#endif
-#ifdef HAVE_ARMV6
+#if HAVE_ARMV6
SATD_X_DECL7( _neon )
#endif
+#define INTRA_MBCMP_8x8( mbcmp )\
+void x264_intra_##mbcmp##_x3_8x8( pixel *fenc, pixel edge[33], int res[3] )\
+{\
+ pixel pix[8*FDEC_STRIDE];\
+ x264_predict_8x8_v_c( pix, edge );\
+ res[0] = x264_pixel_##mbcmp##_8x8( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\
+ x264_predict_8x8_h_c( pix, edge );\
+ res[1] = x264_pixel_##mbcmp##_8x8( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\
+ x264_predict_8x8_dc_c( pix, edge );\
+ res[2] = x264_pixel_##mbcmp##_8x8( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\
+}
+
+INTRA_MBCMP_8x8(sad)
+INTRA_MBCMP_8x8(sa8d)
+
+#define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma )\
+void x264_intra_##mbcmp##_x3_##size##x##size##chroma( pixel *fenc, pixel *fdec, int res[3] )\
+{\
+ x264_predict_##size##x##size##chroma##_##pred1##_c( fdec );\
+ res[0] = x264_pixel_##mbcmp##_##size##x##size( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
+ x264_predict_##size##x##size##chroma##_##pred2##_c( fdec );\
+ res[1] = x264_pixel_##mbcmp##_##size##x##size( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
+ x264_predict_##size##x##size##chroma##_##pred3##_c( fdec );\
+ res[2] = x264_pixel_##mbcmp##_##size##x##size( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
+}
+
+INTRA_MBCMP(sad, 4, v, h, dc, )
+INTRA_MBCMP(satd, 4, v, h, dc, )
+INTRA_MBCMP(sad, 8, dc, h, v, c )
+INTRA_MBCMP(satd, 8, dc, h, v, c )
+INTRA_MBCMP(sad, 16, v, h, dc, )
+INTRA_MBCMP(satd, 16, v, h, dc, )
+
/****************************************************************************
* structural similarity metric
****************************************************************************/
int z = 0;
float ssim = 0.0;
int (*sum0)[4] = buf;
- int (*sum1)[4] = sum0 + width/4+3;
+ int (*sum1)[4] = sum0 + (width >> 2) + 3;
width >>= 2;
height >>= 2;
for( int y = 1; y < height; y++ )
pixf->ssim_end4 = ssim_end4;
pixf->var2_8x8 = pixel_var2_8x8;
-#ifdef HAVE_MMX
+ pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4;
+ pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4;
+ pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8;
+ pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8;
+ pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c;
+ pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c;
+ pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16;
+ pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16;
+
+#if HAVE_MMX
if( cpu&X264_CPU_MMX )
{
INIT7( ssd, _mmx );
INIT_ADS( _mmxext );
pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_mmxext;
pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_mmxext;
-#ifdef ARCH_X86
+#if ARCH_X86
pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_mmxext;
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_mmxext;
pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_mmxext;
pixf->ssim_end4 = x264_pixel_ssim_end4_sse2;
pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2;
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse2;
-#ifdef ARCH_X86_64
+#if ARCH_X86_64
pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2;
#endif
pixf->var2_8x8 = x264_pixel_var2_8x8_sse2;
if( cpu&X264_CPU_CACHELINE_64 )
{
INIT2( ssd, _sse2); /* faster for width 16 on p4 */
-#ifdef ARCH_X86
+#if ARCH_X86
INIT2( sad, _cache64_sse2 );
INIT2( sad_x3, _cache64_sse2 );
INIT2( sad_x4, _cache64_sse2 );
pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_ssse3;
pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_ssse3;
pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4_ssse3;
-#ifdef ARCH_X86_64
+#if ARCH_X86_64
pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_ssse3;
#endif
pixf->var2_8x8 = x264_pixel_var2_8x8_ssse3;
}
pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_sse4;
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse4;
+ pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_sse4;
+ /* Slower on Conroe, so only enable under SSE4 */
+ pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_ssse3;
}
#endif //HAVE_MMX
-#ifdef HAVE_ARMV6
+#if HAVE_ARMV6
if( cpu&X264_CPU_ARMV6 )
{
pixf->sad[PIXEL_4x8] = x264_pixel_sad_4x8_armv6;
}
}
#endif
-#ifdef HAVE_ALTIVEC
+#if HAVE_ALTIVEC
if( cpu&X264_CPU_ALTIVEC )
{
x264_pixel_altivec_init( pixf );
}
#endif
-#ifdef ARCH_UltraSparc
+#if ARCH_UltraSparc
INIT4( sad, _vis );
INIT4( sad_x3, _vis );
INIT4( sad_x4, _vis );