CAVLC optimizations

[x264] / common / predict.c
diff --git a/common/predict.c b/common/predict.c

index d253879878b400eabb342a63100572b9a93ace0b..3c6cb1088dabef95a572bad596c402c00c0b6cd7 100644 (file)
--- a/common/predict.c
+++ b/common/predict.c
@@ -1,11 +1,11 @@
  /*****************************************************************************
   * predict.c: h264 encoder
   *****************************************************************************
- * Copyright (C) 2003 Laurent Aimar
- * $Id: predict.c,v 1.1 2004/06/03 19:27:07 fenrir Exp $
+ * Copyright (C) 2003-2008 x264 project
   *
   * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   *          Loren Merritt <lorenm@u.washington.edu>
+ *          Fiona Glaser <fiona@x264.com>
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@
   *
   * You should have received a copy of the GNU General Public License
   * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
   *****************************************************************************/
  
  /* predict4x4 are inspired from ffmpeg h264 decoder */
@@ -27,9 +27,6 @@
  
  #include "common.h"
  
-#ifdef _MSC_VER
-#undef HAVE_MMX  /* not finished now */
-#endif
  #ifdef HAVE_MMX
  #   include "x86/predict.h"
  #endif
@@ -37,24 +34,6 @@
  #   include "ppc/predict.h"
  #endif
  
-static ALWAYS_INLINE uint32_t pack16to32( int a, int b )
-{
-#ifdef WORDS_BIGENDIAN
-   return b + (a<<16);
-#else
-   return a + (b<<16);
-#endif
-}
-
-static ALWAYS_INLINE uint32_t pack8to16( int a, int b )
-{
-#ifdef WORDS_BIGENDIAN
-   return b + (a<<8);
-#else
-   return a + (b<<8);
-#endif
-}
-
  /****************************************************************************
   * 16x16 prediction for intra luma block
   ****************************************************************************/
@@ -527,7 +506,7 @@ void x264_predict_8x8_filter( uint8_t *src, uint8_t edge[33], int i_neighbor, in
      int have_lt = i_neighbor & MB_TOPLEFT;
      if( i_filters & MB_LEFT )
      {
-        edge[15] = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2;
+        edge[15] = (SRC(0,-1) + 2*SRC(-1,-1) + SRC(-1,0) + 2) >> 2;
          edge[14] = ((have_lt ? SRC(-1,-1) : SRC(-1,0))
                      + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2;
          PL(1) PL(2) PL(3) PL(4) PL(5) PL(6)
@@ -540,8 +519,8 @@ void x264_predict_8x8_filter( uint8_t *src, uint8_t edge[33], int i_neighbor, in
          edge[16] = ((have_lt ? SRC(-1,-1) : SRC(0,-1))
                      + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2;
          PT(1) PT(2) PT(3) PT(4) PT(5) PT(6)
-        edge[23] = ((have_tr ? SRC(8,-1) : SRC(7,-1))
-                    + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2;
+        edge[23] = (SRC(6,-1) + 2*SRC(7,-1)
+                    + (have_tr ? SRC(8,-1) : SRC(7,-1)) + 2) >> 2;
  
          if( i_filters & MB_TOPRIGHT )
          {
@@ -584,7 +563,6 @@ void x264_predict_8x8_filter( uint8_t *src, uint8_t edge[33], int i_neighbor, in
          src += FDEC_STRIDE; \
      }
  
-/* SIMD is much faster than C for all of these except HU and HD. */
  static void predict_8x8_dc_128( uint8_t *src, uint8_t edge[33] )
  {
      PREDICT_8x8_DC(0x80808080);
@@ -664,7 +642,7 @@ static void predict_8x8_ddr( uint8_t *src, uint8_t edge[33] )
      SRC(5,0)=SRC(6,1)=SRC(7,2)= F2(t3,t4,t5);
      SRC(6,0)=SRC(7,1)= F2(t4,t5,t6);
      SRC(7,0)= F2(t5,t6,t7);
-  
+
  }
  static void predict_8x8_vr( uint8_t *src, uint8_t edge[33] )
  {
@@ -807,9 +785,16 @@ void x264_predict_8x8c_init( int cpu, x264_predict_t pf[7] )
  #ifdef HAVE_MMX
      x264_predict_8x8c_init_mmx( cpu, pf );
  #endif
+
+#ifdef ARCH_PPC
+    if( cpu&X264_CPU_ALTIVEC )
+    {
+        x264_predict_8x8c_init_altivec( pf );
+    }
+#endif
  }
  
-void x264_predict_8x8_init( int cpu, x264_predict8x8_t pf[12] )
+void x264_predict_8x8_init( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter )
  {
      pf[I_PRED_8x8_V]      = predict_8x8_v;
      pf[I_PRED_8x8_H]      = predict_8x8_h;
@@ -823,9 +808,10 @@ void x264_predict_8x8_init( int cpu, x264_predict8x8_t pf[12] )
      pf[I_PRED_8x8_DC_LEFT]= predict_8x8_dc_left;
      pf[I_PRED_8x8_DC_TOP] = predict_8x8_dc_top;
      pf[I_PRED_8x8_DC_128] = predict_8x8_dc_128;
+    *predict_8x8_filter   = x264_predict_8x8_filter;
  
  #ifdef HAVE_MMX
-    x264_predict_8x8_init_mmx( cpu, pf );
+    x264_predict_8x8_init_mmx( cpu, pf, predict_8x8_filter );
  #endif
  }