X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=common%2Fpixel.c;h=7c6023711e94e18b37ff794df07addfee0de54af;hb=979c14da90d69d05661430ace29d111efe615281;hp=852748ec8300ab474667fbce3bb270b3e060b7c8;hpb=205a032c22467c90c26d33ed9ab23d60461e57c1;p=x264 diff --git a/common/pixel.c b/common/pixel.c index 852748ec..7c602371 100644 --- a/common/pixel.c +++ b/common/pixel.c @@ -29,6 +29,9 @@ #ifdef ARCH_PPC # include "ppc/pixel.h" #endif +#ifdef ARCH_ARM +# include "arm/pixel.h" +#endif #ifdef ARCH_UltraSparc # include "sparc/pixel.h" #endif @@ -139,10 +142,10 @@ int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1 /**************************************************************************** * pixel_var_wxh ****************************************************************************/ -#define PIXEL_VAR_C( name, w, shift ) \ -static int name( uint8_t *pix, int i_stride ) \ +#define PIXEL_VAR_C( name, w ) \ +static uint64_t name( uint8_t *pix, int i_stride ) \ { \ - uint32_t var = 0, sum = 0, sqr = 0; \ + uint32_t sum = 0, sqr = 0; \ int x, y; \ for( y = 0; y < w; y++ ) \ { \ @@ -153,12 +156,11 @@ static int name( uint8_t *pix, int i_stride ) \ } \ pix += i_stride; \ } \ - var = sqr - (sum * sum >> shift); \ - return var; \ + return sum + ((uint64_t)sqr << 32); \ } -PIXEL_VAR_C( x264_pixel_var_16x16, 16, 8 ) -PIXEL_VAR_C( x264_pixel_var_8x8, 8, 6 ) +PIXEL_VAR_C( x264_pixel_var_16x16, 16 ) +PIXEL_VAR_C( x264_pixel_var_8x8, 8 ) /**************************************************************************** * pixel_var2_wxh @@ -453,6 +455,10 @@ SATD_X_DECL7( _ssse3 ) SATD_X_DECL7( _sse4 ) #endif +#ifdef HAVE_ARMV6 +SATD_X_DECL7( _neon ) +#endif + /**************************************************************************** * structural similarity metric ****************************************************************************/ @@ -815,6 +821,47 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ) } #endif //HAVE_MMX +#ifdef HAVE_ARMV6 + if( cpu&X264_CPU_ARMV6 ) + { + pixf->sad[PIXEL_4x8] = x264_pixel_sad_4x8_armv6; + pixf->sad[PIXEL_4x4] = x264_pixel_sad_4x4_armv6; + pixf->sad_aligned[PIXEL_4x8] = x264_pixel_sad_4x8_armv6; + pixf->sad_aligned[PIXEL_4x4] = x264_pixel_sad_4x4_armv6; + } + if( cpu&X264_CPU_NEON ) + { + INIT5( sad, _neon ); + INIT5( sad_aligned, _neon ); + INIT7( sad_x3, _neon ); + INIT7( sad_x4, _neon ); + INIT7( ssd, _neon ); + INIT7( satd, _neon ); + INIT7( satd_x3, _neon ); + INIT7( satd_x4, _neon ); + INIT4( hadamard_ac, _neon ); + pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_neon; + pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_neon; + pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_neon; + pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_neon; + pixf->var2_8x8 = x264_pixel_var2_8x8_neon; + + pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_neon; + pixf->ssim_end4 = x264_pixel_ssim_end4_neon; + + if( cpu&X264_CPU_FAST_NEON_MRC ) + { + pixf->sad[PIXEL_4x8] = x264_pixel_sad_4x8_neon; + pixf->sad[PIXEL_4x4] = x264_pixel_sad_4x4_neon; + pixf->sad_aligned[PIXEL_4x8] = x264_pixel_sad_aligned_4x8_neon; + pixf->sad_aligned[PIXEL_4x4] = x264_pixel_sad_aligned_4x4_neon; + } + else // really just scheduled for dual issue / A8 + { + INIT5( sad_aligned, _neon_dual ); + } + } +#endif #ifdef ARCH_PPC if( cpu&X264_CPU_ALTIVEC ) {