High bit depth intra_sad_x3_4x4

[x264] / common / arm / mc-c.c
diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c

index 0a7b734b8b6edc1b141bbe0f82eb6a9b618d465c..c1fc05c0cf592a39c5098a51458e613d0ccddfdb 100644 (file)
--- a/common/arm/mc-c.c
+++ b/common/arm/mc-c.c
@@ -1,7 +1,7 @@
  /*****************************************************************************
- * mc-c.c: h264 encoder library (Motion Compensation)
+ * mc-c.c: arm motion compensation
   *****************************************************************************
- * Copyright (C) 2009 x264 project
+ * Copyright (C) 2009-2011 x264 project
   *
   * Authors: David Conrad <lessen42@gmail.com>
   *
@@ -18,6 +18,9 @@
   * You should have received a copy of the GNU General Public License
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
   *****************************************************************************/
  
  #include "common/common.h"
@@ -64,6 +67,19 @@ MC_WEIGHT(_nodenom)
  MC_WEIGHT(_offsetadd)
  MC_WEIGHT(_offsetsub)
  
+void x264_mc_copy_w4_neon( uint8_t *, int, uint8_t *, int, int );
+void x264_mc_copy_w8_neon( uint8_t *, int, uint8_t *, int, int );
+void x264_mc_copy_w16_neon( uint8_t *, int, uint8_t *, int, int );
+void x264_mc_copy_w16_aligned_neon( uint8_t *, int, uint8_t *, int, int );
+
+void x264_mc_chroma_neon( uint8_t *, int, uint8_t *, int, int, int, int, int );
+void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int, int, int);
+
+void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, int, int );
+void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
+void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
+
+#if !HIGH_BIT_DEPTH
  static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w )
  {
      if( w->i_scale == 1<<w->i_denom )
@@ -85,14 +101,6 @@ static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w )
          w->weightfn = x264_mc_wtab_neon;
  }
  
-void x264_mc_copy_w4_neon( uint8_t *, int, uint8_t *, int, int );
-void x264_mc_copy_w8_neon( uint8_t *, int, uint8_t *, int, int );
-void x264_mc_copy_w16_neon( uint8_t *, int, uint8_t *, int, int );
-void x264_mc_copy_w16_aligned_neon( uint8_t *, int, uint8_t *, int, int );
-
-void x264_mc_chroma_neon( uint8_t *, int, uint8_t *, int, int, int, int, int );
-void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int, int, int);
-
  static void (* const x264_pixel_avg_wtab_neon[6])( uint8_t *, int, uint8_t *, int, uint8_t *, int ) =
  {
      NULL,
@@ -112,8 +120,8 @@ static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, int, uint8_t *, int,
      x264_mc_copy_w16_neon,
  };
  
-static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
-static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
+static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
+static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
  
  static void mc_luma_neon( uint8_t *dst,    int i_dst_stride,
                            uint8_t *src[4], int i_src_stride,
@@ -174,10 +182,6 @@ static uint8_t *get_ref_neon( uint8_t *dst,   int *i_dst_stride,
      }
  }
  
-void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, int, int );
-void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
-void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
-
  static void hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
                                int stride, int width, int height, int16_t *buf )
  {
@@ -198,18 +202,23 @@ static void hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8
          src  += stride;
      }
  }
+#endif // !HIGH_BIT_DEPTH
  
  void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
  {
      if( !(cpu&X264_CPU_ARMV6) )
          return;
  
-    pf->prefetch_fenc = x264_prefetch_fenc_arm;
+#if !HIGH_BIT_DEPTH
+    pf->prefetch_fenc_420 = x264_prefetch_fenc_arm;
+    pf->prefetch_fenc_422 = x264_prefetch_fenc_arm; /* FIXME */
      pf->prefetch_ref  = x264_prefetch_ref_arm;
+#endif // !HIGH_BIT_DEPTH
  
      if( !(cpu&X264_CPU_NEON) )
          return;
  
+#if !HIGH_BIT_DEPTH
      pf->copy_16x16_unaligned = x264_mc_copy_w16_neon;
      pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_neon;
      pf->copy[PIXEL_8x8]   = x264_mc_copy_w8_neon;
@@ -229,15 +238,16 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
      pf->offsetsub = x264_mc_offsetsub_wtab_neon;
      pf->weight_cache = x264_weight_cache_neon;
  
+//  pf->mc_chroma = x264_mc_chroma_neon;
+    pf->mc_luma = mc_luma_neon;
+    pf->get_ref = get_ref_neon;
+    pf->hpel_filter = hpel_filter_neon;
+    pf->frame_init_lowres_core = x264_frame_init_lowres_core_neon;
+#endif // !HIGH_BIT_DEPTH
+
  // Apple's gcc stupidly cannot align stack variables, and ALIGNED_ARRAY can't work on structs
  #ifndef SYS_MACOSX
      pf->memcpy_aligned  = x264_memcpy_aligned_neon;
  #endif
      pf->memzero_aligned = x264_memzero_aligned_neon;
-
-    pf->mc_chroma = x264_mc_chroma_neon;
-    pf->mc_luma = mc_luma_neon;
-    pf->get_ref = get_ref_neon;
-    pf->hpel_filter = hpel_filter_neon;
-    pf->frame_init_lowres_core = x264_frame_init_lowres_core_neon;
  }