#ifdef ARCH_PPC
# include "ppc/pixel.h"
#endif
+#ifdef ARCH_ARM
+# include "arm/pixel.h"
+#endif
#ifdef ARCH_UltraSparc
# include "sparc/pixel.h"
#endif
/****************************************************************************
* pixel_var_wxh
****************************************************************************/
-#define PIXEL_VAR_C( name, w, shift ) \
-static int name( uint8_t *pix, int i_stride ) \
+#define PIXEL_VAR_C( name, w ) \
+static uint64_t name( uint8_t *pix, int i_stride ) \
{ \
- uint32_t var = 0, sum = 0, sqr = 0; \
+ uint32_t sum = 0, sqr = 0; \
int x, y; \
for( y = 0; y < w; y++ ) \
{ \
} \
pix += i_stride; \
} \
- var = sqr - (sum * sum >> shift); \
- return var; \
+ return sum + ((uint64_t)sqr << 32); \
}
-PIXEL_VAR_C( x264_pixel_var_16x16, 16, 8 )
-PIXEL_VAR_C( x264_pixel_var_8x8, 8, 6 )
+PIXEL_VAR_C( x264_pixel_var_16x16, 16 )
+PIXEL_VAR_C( x264_pixel_var_8x8, 8 )
+
+/****************************************************************************
+ * pixel_var2_wxh
+ ****************************************************************************/
+static int pixel_var2_8x8( uint8_t *pix1, int i_stride1, uint8_t *pix2, int i_stride2, int *ssd )
+{
+ uint32_t var = 0, sum = 0, sqr = 0;
+ int x, y;
+ for( y = 0; y < 8; y++ )
+ {
+ for( x = 0; x < 8; x++ )
+ {
+ int diff = pix1[x] - pix2[x];
+ sum += diff;
+ sqr += diff * diff;
+ }
+ pix1 += i_stride1;
+ pix2 += i_stride2;
+ }
+ sum = abs(sum);
+ var = sqr - (sum * sum >> 6);
+ *ssd = sqr;
+ return var;
+}
#define HADAMARD4(d0,d1,d2,d3,s0,s1,s2,s3) {\
SATD_X_DECL7( _sse4 )
#endif
+#ifdef HAVE_ARMV6
+SATD_X_DECL7( _neon )
+#endif
+
/****************************************************************************
* structural similarity metric
****************************************************************************/
pixf->ssim_4x4x2_core = ssim_4x4x2_core;
pixf->ssim_end4 = ssim_end4;
+ pixf->var2_8x8 = pixel_var2_8x8;
#ifdef HAVE_MMX
if( cpu&X264_CPU_MMX )
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_mmxext;
pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_mmxext;
pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_mmxext;
+ pixf->var2_8x8 = x264_pixel_var2_8x8_mmxext;
if( cpu&X264_CPU_CACHELINE_32 )
{
pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_mmxext;
pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_mmxext;
pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_mmxext;
+ pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_mmxext;
pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4_mmxext;
pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_mmxext;
}
#ifdef ARCH_X86_64
pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2;
#endif
+ pixf->var2_8x8 = x264_pixel_var2_8x8_sse2;
}
if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_SSE2_IS_SLOW) )
#ifdef ARCH_X86_64
pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_ssse3;
#endif
+ pixf->var2_8x8 = x264_pixel_var2_8x8_ssse3;
if( cpu&X264_CPU_CACHELINE_64 )
{
INIT2( sad, _cache64_ssse3 );
}
#endif //HAVE_MMX
+#ifdef HAVE_ARMV6
+ if( cpu&X264_CPU_ARMV6 )
+ {
+ pixf->sad[PIXEL_4x8] = x264_pixel_sad_4x8_armv6;
+ pixf->sad[PIXEL_4x4] = x264_pixel_sad_4x4_armv6;
+ pixf->sad_aligned[PIXEL_4x8] = x264_pixel_sad_4x8_armv6;
+ pixf->sad_aligned[PIXEL_4x4] = x264_pixel_sad_4x4_armv6;
+ }
+ if( cpu&X264_CPU_NEON )
+ {
+ INIT5( sad, _neon );
+ INIT5( sad_aligned, _neon );
+ INIT7( sad_x3, _neon );
+ INIT7( sad_x4, _neon );
+ INIT7( ssd, _neon );
+ INIT7( satd, _neon );
+ INIT7( satd_x3, _neon );
+ INIT7( satd_x4, _neon );
+ INIT4( hadamard_ac, _neon );
+ pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_neon;
+ pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_neon;
+ pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_neon;
+ pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_neon;
+ pixf->var2_8x8 = x264_pixel_var2_8x8_neon;
+
+ pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_neon;
+ pixf->ssim_end4 = x264_pixel_ssim_end4_neon;
+
+ if( cpu&X264_CPU_FAST_NEON_MRC )
+ {
+ pixf->sad[PIXEL_4x8] = x264_pixel_sad_4x8_neon;
+ pixf->sad[PIXEL_4x4] = x264_pixel_sad_4x4_neon;
+ pixf->sad_aligned[PIXEL_4x8] = x264_pixel_sad_aligned_4x8_neon;
+ pixf->sad_aligned[PIXEL_4x4] = x264_pixel_sad_aligned_4x4_neon;
+ }
+ else // really just scheduled for dual issue / A8
+ {
+ INIT5( sad_aligned, _neon_dual );
+ }
+ }
+#endif
#ifdef ARCH_PPC
if( cpu&X264_CPU_ALTIVEC )
{