]> git.sesse.net Git - x264/blobdiff - common/pixel.c
Much faster weightp
[x264] / common / pixel.c
index 852748ec8300ab474667fbce3bb270b3e060b7c8..7c6023711e94e18b37ff794df07addfee0de54af 100644 (file)
@@ -29,6 +29,9 @@
 #ifdef ARCH_PPC
 #   include "ppc/pixel.h"
 #endif
+#ifdef ARCH_ARM
+#   include "arm/pixel.h"
+#endif
 #ifdef ARCH_UltraSparc
 #   include "sparc/pixel.h"
 #endif
@@ -139,10 +142,10 @@ int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1
 /****************************************************************************
  * pixel_var_wxh
  ****************************************************************************/
-#define PIXEL_VAR_C( name, w, shift ) \
-static int name( uint8_t *pix, int i_stride ) \
+#define PIXEL_VAR_C( name, w ) \
+static uint64_t name( uint8_t *pix, int i_stride ) \
 {                                             \
-    uint32_t var = 0, sum = 0, sqr = 0;       \
+    uint32_t sum = 0, sqr = 0;                \
     int x, y;                                 \
     for( y = 0; y < w; y++ )                  \
     {                                         \
@@ -153,12 +156,11 @@ static int name( uint8_t *pix, int i_stride ) \
         }                                     \
         pix += i_stride;                      \
     }                                         \
-    var = sqr - (sum * sum >> shift);         \
-    return var;                               \
+    return sum + ((uint64_t)sqr << 32);       \
 }
 
-PIXEL_VAR_C( x264_pixel_var_16x16, 16, 8 )
-PIXEL_VAR_C( x264_pixel_var_8x8,    8, 6 )
+PIXEL_VAR_C( x264_pixel_var_16x16, 16 )
+PIXEL_VAR_C( x264_pixel_var_8x8,    8 )
 
 /****************************************************************************
  * pixel_var2_wxh
@@ -453,6 +455,10 @@ SATD_X_DECL7( _ssse3 )
 SATD_X_DECL7( _sse4 )
 #endif
 
+#ifdef HAVE_ARMV6
+SATD_X_DECL7( _neon )
+#endif
+
 /****************************************************************************
  * structural similarity metric
  ****************************************************************************/
@@ -815,6 +821,47 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
     }
 #endif //HAVE_MMX
 
+#ifdef HAVE_ARMV6
+    if( cpu&X264_CPU_ARMV6 )
+    {
+        pixf->sad[PIXEL_4x8] = x264_pixel_sad_4x8_armv6;
+        pixf->sad[PIXEL_4x4] = x264_pixel_sad_4x4_armv6;
+        pixf->sad_aligned[PIXEL_4x8] = x264_pixel_sad_4x8_armv6;
+        pixf->sad_aligned[PIXEL_4x4] = x264_pixel_sad_4x4_armv6;
+    }
+    if( cpu&X264_CPU_NEON )
+    {
+        INIT5( sad, _neon );
+        INIT5( sad_aligned, _neon );
+        INIT7( sad_x3, _neon );
+        INIT7( sad_x4, _neon );
+        INIT7( ssd, _neon );
+        INIT7( satd, _neon );
+        INIT7( satd_x3, _neon );
+        INIT7( satd_x4, _neon );
+        INIT4( hadamard_ac, _neon );
+        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_neon;
+        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_neon;
+        pixf->var[PIXEL_8x8]    = x264_pixel_var_8x8_neon;
+        pixf->var[PIXEL_16x16]  = x264_pixel_var_16x16_neon;
+        pixf->var2_8x8          = x264_pixel_var2_8x8_neon;
+
+        pixf->ssim_4x4x2_core   = x264_pixel_ssim_4x4x2_core_neon;
+        pixf->ssim_end4         = x264_pixel_ssim_end4_neon;
+
+        if( cpu&X264_CPU_FAST_NEON_MRC )
+        {
+            pixf->sad[PIXEL_4x8] = x264_pixel_sad_4x8_neon;
+            pixf->sad[PIXEL_4x4] = x264_pixel_sad_4x4_neon;
+            pixf->sad_aligned[PIXEL_4x8] = x264_pixel_sad_aligned_4x8_neon;
+            pixf->sad_aligned[PIXEL_4x4] = x264_pixel_sad_aligned_4x4_neon;
+        }
+        else    // really just scheduled for dual issue / A8
+        {
+            INIT5( sad_aligned, _neon_dual );
+        }
+    }
+#endif
 #ifdef ARCH_PPC
     if( cpu&X264_CPU_ALTIVEC )
     {