cabac asm.

[x264] / common / common.h
diff --git a/common/common.h b/common/common.h

index 2fbb069f9242bbb5170141b41baf6129e01495e1..b9f4869fa789ea1bde646d2ba63bbe9745aeecc2 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -36,10 +36,6 @@
  #define XCHG(type,a,b) { type t = a; a = b; b = t; }
  #define FIX8(f) ((int)(f*(1<<8)+.5))
  
-#ifndef offsetof
-#define offsetof(T,F) ((unsigned int)((char *)&((T *)0)->F))
-#endif
-
  #define CHECKED_MALLOC( var, size )\
  {\
      var = x264_malloc( size );\
@@ -64,6 +60,7 @@
   ****************************************************************************/
  #include "osdep.h"
  #include <stdarg.h>
+#include <stddef.h>
  #include <stdlib.h>
  #include <string.h>
  #include <assert.h>
@@ -102,6 +99,11 @@ void x264_log( x264_t *h, int i_level, const char *psz_fmt, ... );
  
  void x264_reduce_fraction( int *n, int *d );
  
+static inline uint8_t x264_clip_uint8( int x )
+{
+    return x&(~255) ? (-x)>>31 : x;
+}
+
  static inline int x264_clip3( int v, int i_min, int i_max )
  {
      return ( (v < i_min) ? i_min : (v > i_max) ? i_max : v );
@@ -227,7 +229,6 @@ static const int x264_scan8[16+2*4] =
  */
  
  typedef struct x264_ratecontrol_t   x264_ratecontrol_t;
-typedef struct x264_vlc_table_t     x264_vlc_table_t;
  
  struct x264_t
  {
@@ -335,15 +336,11 @@ struct x264_t
      /* Current MB DCT coeffs */
      struct
      {
-        DECLARE_ALIGNED( int, luma16x16_dc[16], 16 );
-        DECLARE_ALIGNED( int, chroma_dc[2][4], 16 );
-        // FIXME merge with union
-        DECLARE_ALIGNED( int, luma8x8[4][64], 16 );
-        union
-        {
-            DECLARE_ALIGNED( int, residual_ac[15], 16 );
-            DECLARE_ALIGNED( int, luma4x4[16], 16 );
-        } block[16+8];
+        DECLARE_ALIGNED_16( int16_t luma16x16_dc[16] );
+        DECLARE_ALIGNED_16( int16_t chroma_dc[2][4] );
+        // FIXME share memory?
+        DECLARE_ALIGNED_16( int16_t luma8x8[4][64] );
+        DECLARE_ALIGNED_16( int16_t luma4x4[16+8][16] );
      } dct;
  
      /* MB table and cache for current frame/mb */
@@ -422,13 +419,25 @@ struct x264_t
          int     i_intra16x16_pred_mode;
          int     i_chroma_pred_mode;
  
+        /* skip flags for i4x4 and i8x8
+         * 0 = encode as normal.
+         * 1 (non-RD only) = the DCT is still in h->dct, restore fdec and skip reconstruction.
+         * 2 (RD only) = the DCT has since been overwritten by RD; restore that too. */
+        int i_skip_intra;
+
          struct
          {
              /* space for p_fenc and p_fdec */
  #define FENC_STRIDE 16
  #define FDEC_STRIDE 32
-            DECLARE_ALIGNED( uint8_t, fenc_buf[24*FENC_STRIDE], 16 );
-            DECLARE_ALIGNED( uint8_t, fdec_buf[27*FDEC_STRIDE], 16 );
+            DECLARE_ALIGNED_16( uint8_t fenc_buf[24*FENC_STRIDE] );
+            DECLARE_ALIGNED_16( uint8_t fdec_buf[27*FDEC_STRIDE] );
+
+            /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */            
+            DECLARE_ALIGNED_16( uint8_t i4x4_fdec_buf[16*16] );
+            DECLARE_ALIGNED_16( uint8_t i8x8_fdec_buf[16*16] );
+            DECLARE_ALIGNED_16( int16_t i8x8_dct_buf[3][64] );
+            DECLARE_ALIGNED_16( int16_t i4x4_dct_buf[15][16] );
  
              /* pointer over mb of the frame to be compressed */
              uint8_t *p_fenc[3];
@@ -449,22 +458,22 @@ struct x264_t
          struct
          {
              /* real intra4x4_pred_mode if I_4X4 or I_8X8, I_PRED_4x4_DC if mb available, -1 if not */
-            int     intra4x4_pred_mode[X264_SCAN8_SIZE];
+            int8_t  intra4x4_pred_mode[X264_SCAN8_SIZE];
  
              /* i_non_zero_count if available else 0x80 */
-            int     non_zero_count[X264_SCAN8_SIZE];
+            uint8_t non_zero_count[X264_SCAN8_SIZE];
  
              /* -1 if unused, -2 if unavailable */
-            int8_t  ref[2][X264_SCAN8_SIZE];
+            DECLARE_ALIGNED_4( int8_t ref[2][X264_SCAN8_SIZE] );
  
              /* 0 if not available */
-            int16_t mv[2][X264_SCAN8_SIZE][2];
-            int16_t mvd[2][X264_SCAN8_SIZE][2];
+            DECLARE_ALIGNED_16( int16_t mv[2][X264_SCAN8_SIZE][2] );
+            DECLARE_ALIGNED_4( int16_t mvd[2][X264_SCAN8_SIZE][2] );
  
              /* 1 if SKIP or DIRECT. set only for B-frames + CABAC */
-            int8_t  skip[X264_SCAN8_SIZE];
+            DECLARE_ALIGNED_4( int8_t skip[X264_SCAN8_SIZE] );
  
-            int16_t direct_mv[2][X264_SCAN8_SIZE][2];
+            DECLARE_ALIGNED_16( int16_t direct_mv[2][X264_SCAN8_SIZE][2] );
              int8_t  direct_ref[2][X264_SCAN8_SIZE];
              int     pskip_mv[2];
  
@@ -484,11 +493,11 @@ struct x264_t
          int     b_direct_auto_write; /* analyse direct modes, to use and/or save */
  
          /* B_direct and weighted prediction */
-        int     dist_scale_factor[16][2];
-        int     bipred_weight[32][4];
+        int16_t dist_scale_factor[16][2];
+        int16_t bipred_weight[32][4];
          /* maps fref1[0]'s ref indices into the current list0 */
-        int     map_col_to_list0_buf[2]; // for negative indices
-        int     map_col_to_list0[16];
+        int8_t  map_col_to_list0_buf[2]; // for negative indices
+        int8_t  map_col_to_list0[16];
      } mb;
  
      /* rate control encoding only */
@@ -561,13 +570,6 @@ struct x264_t
      x264_quant_function_t quantf;
      x264_deblock_function_t loopf;
  
-    /* vlc table for decoding purpose only */
-    x264_vlc_table_t *x264_coeff_token_lookup[5];
-    x264_vlc_table_t *x264_level_prefix_lookup;
-    x264_vlc_table_t *x264_total_zeros_lookup[15];
-    x264_vlc_table_t *x264_total_zeros_dc_lookup[3];
-    x264_vlc_table_t *x264_run_before_lookup[7];
-
  #if VISUALIZE
      struct visualize_t *visualize;
  #endif