return ssim;
}
+int pixel_vsad( pixel *src, int stride )
+{
+ int score = 0;
+ for( int i = 1; i < 16; i++, src += stride )
+ for( int j = 0; j < 16; j++ )
+ score += abs(src[j] - src[j+stride]);
+ return score;
+}
/****************************************************************************
* successive elimination
pixf->ssim_4x4x2_core = ssim_4x4x2_core;
pixf->ssim_end4 = ssim_end4;
pixf->var2_8x8 = pixel_var2_8x8;
+ pixf->vsad = pixel_vsad;
pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4;
pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4;
pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_mmxext;
pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_mmxext;
pixf->var2_8x8 = x264_pixel_var2_8x8_mmxext;
+ pixf->vsad = x264_pixel_vsad_mmxext;
if( cpu&X264_CPU_CACHELINE_32 )
{
pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2;
#endif
pixf->var2_8x8 = x264_pixel_var2_8x8_sse2;
+ pixf->vsad = x264_pixel_vsad_sse2;
}
if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_SSE2_IS_SLOW) )
x264_pixel_cmp_x3_t fpelcmp_x3[7];
x264_pixel_cmp_x4_t fpelcmp_x4[7];
x264_pixel_cmp_t sad_aligned[7]; /* Aligned SAD for mbcmp */
+ int (*vsad)( pixel *, int );
int (*var2_8x8)( pixel *, int, pixel *, int, int * );
uint64_t (*var[4])( pixel *pix, int stride );
int x264_pixel_var2_8x8_mmxext( pixel *, int, pixel *, int, int * );
int x264_pixel_var2_8x8_sse2( pixel *, int, pixel *, int, int * );
int x264_pixel_var2_8x8_ssse3( uint8_t *, int, uint8_t *, int, int * );
+int x264_pixel_vsad_mmxext( pixel *src, int stride );
+int x264_pixel_vsad_sse2( pixel *src, int stride );
#define DECL_ADS( size, suffix ) \
int x264_pixel_ads##size##_##suffix( int enc_dc[size], uint16_t *sums, int delta,\
SAD_END_SSE2
RET
+;-----------------------------------------------------------------------------
+; void pixel_vsad( pixel *src, int stride );
+;-----------------------------------------------------------------------------
+
+%ifndef ARCH_X86_64
+INIT_MMX
+cglobal pixel_vsad_mmxext, 2,3
+ mova m0, [r0+0]
+ mova m1, [r0+8]
+ mova m2, [r0+r1+0]
+ mova m3, [r0+r1+8]
+ lea r0, [r0+r1*2]
+ psadbw m0, m2
+ psadbw m1, m3
+ mov r2d, 7
+.loop:
+ mova m4, [r0+0]
+ mova m5, [r0+8]
+ psadbw m2, m4
+ psadbw m3, m5
+ paddw m0, m2
+ paddw m1, m3
+ mova m2, [r0+r1+0]
+ mova m3, [r0+r1+8]
+ lea r0, [r0+r1*2]
+ psadbw m4, m2
+ psadbw m5, m3
+ paddw m0, m4
+ paddw m1, m5
+ dec r2d
+ jg .loop
+ paddw m0, m1
+ movd eax, m0
+ RET
+%endif
+
+INIT_XMM
+cglobal pixel_vsad_sse2, 2,2
+ mova m1, [r0]
+%assign i 1
+%rep 15
+ mova m2, [r0+r1*(i&1)]
+%if i&1
+ lea r0, [r0+r1*2]
+%endif
+ psadbw m1, m2
+%if i>1
+ paddw m0, m1
+%else
+ SWAP 0, 1
+%endif
+ SWAP 1, 2
+%assign i i+1
+%endrep
+ movhlps m1, m0
+ paddw m0, m1
+ movd eax, m0
+ RET
+
;-----------------------------------------------------------------------------
; void intra_sad_x3_4x4( uint8_t *fenc, uint8_t *fdec, int res[3] );
;-----------------------------------------------------------------------------
}
report( "pixel hadamard_ac :" );
+ ok = 1; used_asm = 0;
+ if( pixel_asm.vsad != pixel_ref.vsad )
+ {
+ int res_c, res_asm;
+ set_func_name( "vsad" );
+ used_asm = 1;
+ res_c = call_c( pixel_c.vsad, pbuf1, 16 );
+ res_asm = call_a( pixel_asm.vsad, pbuf1, 16 );
+ if( res_c != res_asm )
+ {
+ ok = 0;
+ fprintf( stderr, "vsad: %d != %d\n", res_c, res_asm );
+ }
+ }
+ report( "pixel vsad :" );
+
#define TEST_INTRA_MBCMP( name, pred, satd, i8x8, ... ) \
if( pixel_asm.name && pixel_asm.name != pixel_ref.name ) \
{ \