Convert to a unified "pixel" type for pixel data

author Oskar Arvidsson <oskar@irock.se>

Tue, 1 Jun 2010 23:35:38 +0000 (01:35 +0200)

committer Fiona Glaser <fiona@x264.com>

Wed, 2 Jun 2010 05:18:26 +0000 (22:18 -0700)
author Oskar Arvidsson <oskar@irock.se>
Tue, 1 Jun 2010 23:35:38 +0000 (01:35 +0200)
committer Fiona Glaser <fiona@x264.com>
Wed, 2 Jun 2010 05:18:26 +0000 (22:18 -0700)
diff --git a/common/common.h b/common/common.h

index 93712fe59704b996ba17b0f2c6aba0eb5efcf434..d0d43d2682640ae28b6db04d4b01d435db8a61c7 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -100,6 +100,14 @@ typedef union { x264_uint128_t i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; u
  #define CP64(dst,src) M64(dst) = M64(src)
  #define CP128(dst,src) M128(dst) = M128(src)
  
+typedef uint8_t pixel;
+typedef uint32_t pixel4;
+
+#define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
+#define MPIXEL_X4(src) M32(src)
+#define CPPIXEL_X4(dst,src) CP32(dst,src)
+#define CPPIXEL_X8(dst,src) CP64(dst,src)
+
  #define X264_SCAN8_SIZE (6*8)
  #define X264_SCAN8_LUMA_SIZE (5*8)
  #define X264_SCAN8_0 (4+1*8)
@@ -172,7 +180,7 @@ void x264_log( x264_t *h, int i_level, const char *psz_fmt, ... );
  void x264_reduce_fraction( uint32_t *n, uint32_t *d );
  void x264_init_vlc_tables();
  
-static ALWAYS_INLINE uint8_t x264_clip_uint8( int x )
+static ALWAYS_INLINE pixel x264_clip_pixel( int x )
  {
      return x&(~255) ? (-x)>>31 : x;
  }
@@ -580,7 +588,7 @@ struct x264_t
                                               * NOTE: this will fail on resolutions above 2^16 MBs... */
  
           /* buffer for weighted versions of the reference frames */
-        uint8_t *p_weight_buf[16];
+        pixel *p_weight_buf[16];
  
          /* current value */
          int     i_type;
@@ -611,12 +619,12 @@ struct x264_t
              /* space for p_fenc and p_fdec */
  #define FENC_STRIDE 16
  #define FDEC_STRIDE 32
-            ALIGNED_16( uint8_t fenc_buf[24*FENC_STRIDE] );
-            ALIGNED_16( uint8_t fdec_buf[27*FDEC_STRIDE] );
+            ALIGNED_16( pixel fenc_buf[24*FENC_STRIDE] );
+            ALIGNED_16( pixel fdec_buf[27*FDEC_STRIDE] );
  
              /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */
-            ALIGNED_16( uint8_t i4x4_fdec_buf[16*16] );
-            ALIGNED_16( uint8_t i8x8_fdec_buf[16*16] );
+            ALIGNED_16( pixel i4x4_fdec_buf[16*16] );
+            ALIGNED_16( pixel i8x8_fdec_buf[16*16] );
              ALIGNED_16( int16_t i8x8_dct_buf[3][64] );
              ALIGNED_16( int16_t i4x4_dct_buf[15][16] );
              uint32_t i4x4_nnz_buf[4];
@@ -633,17 +641,17 @@ struct x264_t
              ALIGNED_16( uint32_t fenc_satd_cache[32] );
  
              /* pointer over mb of the frame to be compressed */
-            uint8_t *p_fenc[3];
+            pixel *p_fenc[3];
              /* pointer to the actual source frame, not a block copy */
-            uint8_t *p_fenc_plane[3];
+            pixel *p_fenc_plane[3];
  
              /* pointer over mb of the frame to be reconstructed  */
-            uint8_t *p_fdec[3];
+            pixel *p_fdec[3];
  
              /* pointer over mb of the references */
              int i_fref[2];
-            uint8_t *p_fref[2][32][4+2]; /* last: lN, lH, lV, lHV, cU, cV */
-            uint8_t *p_fref_w[32];  /* weighted fullpel luma */
+            pixel *p_fref[2][32][4+2]; /* last: lN, lH, lV, lHV, cU, cV */
+            pixel *p_fref_w[32];  /* weighted fullpel luma */
              uint16_t *p_integral[2][16];
  
              /* fref stride */
@@ -778,7 +786,7 @@ struct x264_t
  
      /* Buffers that are allocated per-thread even in sliced threads. */
      void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
-    uint8_t *intra_border_backup[2][3]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
+    pixel *intra_border_backup[2][3]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
      uint8_t (*deblock_strength[2])[2][4][4];
  
      /* CPU functions dependents */
diff --git a/common/dct.c b/common/dct.c

index 10fe2f77e4eb7d253662212ccd71629d0c7240c9..402f20d757d3cb4d835e2323e115548bcb11e75f 100644 (file)
--- a/common/dct.c
+++ b/common/dct.c
@@ -98,7 +98,7 @@ static void idct4x4dc( int16_t d[16] )
  }
  
  static inline void pixel_sub_wxh( int16_t *diff, int i_size,
-                                  uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
+                                  pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
  {
      for( int y = 0; y < i_size; y++ )
      {
@@ -109,7 +109,7 @@ static inline void pixel_sub_wxh( int16_t *diff, int i_size,
      }
  }
  
-static void sub4x4_dct( int16_t dct[16], uint8_t *pix1, uint8_t *pix2 )
+static void sub4x4_dct( int16_t dct[16], pixel *pix1, pixel *pix2 )
  {
      int16_t d[16];
      int16_t tmp[16];
@@ -143,7 +143,7 @@ static void sub4x4_dct( int16_t dct[16], uint8_t *pix1, uint8_t *pix2 )
      }
  }
  
-static void sub8x8_dct( int16_t dct[4][16], uint8_t *pix1, uint8_t *pix2 )
+static void sub8x8_dct( int16_t dct[4][16], pixel *pix1, pixel *pix2 )
  {
      sub4x4_dct( dct[0], &pix1[0], &pix2[0] );
      sub4x4_dct( dct[1], &pix1[4], &pix2[4] );
@@ -151,7 +151,7 @@ static void sub8x8_dct( int16_t dct[4][16], uint8_t *pix1, uint8_t *pix2 )
      sub4x4_dct( dct[3], &pix1[4*FENC_STRIDE+4], &pix2[4*FDEC_STRIDE+4] );
  }
  
-static void sub16x16_dct( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 )
+static void sub16x16_dct( int16_t dct[16][16], pixel *pix1, pixel *pix2 )
  {
      sub8x8_dct( &dct[ 0], &pix1[0], &pix2[0] );
      sub8x8_dct( &dct[ 4], &pix1[8], &pix2[8] );
@@ -159,7 +159,7 @@ static void sub16x16_dct( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 )
      sub8x8_dct( &dct[12], &pix1[8*FENC_STRIDE+8], &pix2[8*FDEC_STRIDE+8] );
  }
  
-static int sub4x4_dct_dc( uint8_t *pix1, uint8_t *pix2 )
+static int sub4x4_dct_dc( pixel *pix1, pixel *pix2 )
  {
      int16_t d[16];
      int sum = 0;
@@ -172,7 +172,7 @@ static int sub4x4_dct_dc( uint8_t *pix1, uint8_t *pix2 )
      return sum;
  }
  
-static void sub8x8_dct_dc( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 )
+static void sub8x8_dct_dc( int16_t dct[4], pixel *pix1, pixel *pix2 )
  {
      dct[0] = sub4x4_dct_dc( &pix1[0], &pix2[0] );
      dct[1] = sub4x4_dct_dc( &pix1[4], &pix2[4] );
@@ -190,7 +190,7 @@ static void sub8x8_dct_dc( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 )
      dct[3] = d2 - d3;
  }
  
-static void add4x4_idct( uint8_t *p_dst, int16_t dct[16] )
+static void add4x4_idct( pixel *p_dst, int16_t dct[16] )
  {
      int16_t d[16];
      int16_t tmp[16];
@@ -225,12 +225,12 @@ static void add4x4_idct( uint8_t *p_dst, int16_t dct[16] )
      for( int y = 0; y < 4; y++ )
      {
          for( int x = 0; x < 4; x++ )
-            p_dst[x] = x264_clip_uint8( p_dst[x] + d[y*4+x] );
+            p_dst[x] = x264_clip_pixel( p_dst[x] + d[y*4+x] );
          p_dst += FDEC_STRIDE;
      }
  }
  
-static void add8x8_idct( uint8_t *p_dst, int16_t dct[4][16] )
+static void add8x8_idct( pixel *p_dst, int16_t dct[4][16] )
  {
      add4x4_idct( &p_dst[0],               dct[0] );
      add4x4_idct( &p_dst[4],               dct[1] );
@@ -238,7 +238,7 @@ static void add8x8_idct( uint8_t *p_dst, int16_t dct[4][16] )
      add4x4_idct( &p_dst[4*FDEC_STRIDE+4], dct[3] );
  }
  
-static void add16x16_idct( uint8_t *p_dst, int16_t dct[16][16] )
+static void add16x16_idct( pixel *p_dst, int16_t dct[16][16] )
  {
      add8x8_idct( &p_dst[0],               &dct[0] );
      add8x8_idct( &p_dst[8],               &dct[4] );
@@ -277,7 +277,7 @@ static void add16x16_idct( uint8_t *p_dst, int16_t dct[16][16] )
      DST(7) = (a4>>2) - a7 ;\
  }
  
-static void sub8x8_dct8( int16_t dct[64], uint8_t *pix1, uint8_t *pix2 )
+static void sub8x8_dct8( int16_t dct[64], pixel *pix1, pixel *pix2 )
  {
      int16_t tmp[64];
  
@@ -298,7 +298,7 @@ static void sub8x8_dct8( int16_t dct[64], uint8_t *pix1, uint8_t *pix2 )
  #undef DST
  }
  
-static void sub16x16_dct8( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix2 )
+static void sub16x16_dct8( int16_t dct[4][64], pixel *pix1, pixel *pix2 )
  {
      sub8x8_dct8( dct[0], &pix1[0],               &pix2[0] );
      sub8x8_dct8( dct[1], &pix1[8],               &pix2[8] );
@@ -333,7 +333,7 @@ static void sub16x16_dct8( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix2 )
      DST(7, b0 - b7);\
  }
  
-static void add8x8_idct8( uint8_t *dst, int16_t dct[64] )
+static void add8x8_idct8( pixel *dst, int16_t dct[64] )
  {
      dct[0] += 32; // rounding for the >>6 at the end
  
@@ -345,14 +345,14 @@ static void add8x8_idct8( uint8_t *dst, int16_t dct[64] )
  #undef DST
  
  #define SRC(x)     dct[i*8+x]
-#define DST(x,rhs) dst[i + x*FDEC_STRIDE] = x264_clip_uint8( dst[i + x*FDEC_STRIDE] + ((rhs) >> 6) );
+#define DST(x,rhs) dst[i + x*FDEC_STRIDE] = x264_clip_pixel( dst[i + x*FDEC_STRIDE] + ((rhs) >> 6) );
      for( int i = 0; i < 8; i++ )
          IDCT8_1D
  #undef SRC
  #undef DST
  }
  
-static void add16x16_idct8( uint8_t *dst, int16_t dct[4][64] )
+static void add16x16_idct8( pixel *dst, int16_t dct[4][64] )
  {
      add8x8_idct8( &dst[0],               dct[0] );
      add8x8_idct8( &dst[8],               dct[1] );
@@ -360,19 +360,19 @@ static void add16x16_idct8( uint8_t *dst, int16_t dct[4][64] )
      add8x8_idct8( &dst[8*FDEC_STRIDE+8], dct[3] );
  }
  
-static void inline add4x4_idct_dc( uint8_t *p_dst, int16_t dc )
+static void inline add4x4_idct_dc( pixel *p_dst, int16_t dc )
  {
      dc = (dc + 32) >> 6;
      for( int i = 0; i < 4; i++, p_dst += FDEC_STRIDE )
      {
-        p_dst[0] = x264_clip_uint8( p_dst[0] + dc );
-        p_dst[1] = x264_clip_uint8( p_dst[1] + dc );
-        p_dst[2] = x264_clip_uint8( p_dst[2] + dc );
-        p_dst[3] = x264_clip_uint8( p_dst[3] + dc );
+        p_dst[0] = x264_clip_pixel( p_dst[0] + dc );
+        p_dst[1] = x264_clip_pixel( p_dst[1] + dc );
+        p_dst[2] = x264_clip_pixel( p_dst[2] + dc );
+        p_dst[3] = x264_clip_pixel( p_dst[3] + dc );
      }
  }
  
-static void add8x8_idct_dc( uint8_t *p_dst, int16_t dct[4] )
+static void add8x8_idct_dc( pixel *p_dst, int16_t dct[4] )
  {
      add4x4_idct_dc( &p_dst[0],               dct[0] );
      add4x4_idct_dc( &p_dst[4],               dct[1] );
@@ -380,7 +380,7 @@ static void add8x8_idct_dc( uint8_t *p_dst, int16_t dct[4] )
      add4x4_idct_dc( &p_dst[4*FDEC_STRIDE+4], dct[3] );
  }
  
-static void add16x16_idct_dc( uint8_t *p_dst, int16_t dct[16] )
+static void add16x16_idct_dc( pixel *p_dst, int16_t dct[16] )
  {
      for( int i = 0; i < 4; i++, dct += 4, p_dst += 4*FDEC_STRIDE )
      {
@@ -614,21 +614,21 @@ static void zigzag_scan_4x4_field( int16_t level[16], int16_t dct[16] )
      nz |= level[i];\
  }
  #define COPY4x4\
-    CP32( p_dst+0*FDEC_STRIDE, p_src+0*FENC_STRIDE );\
-    CP32( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
-    CP32( p_dst+2*FDEC_STRIDE, p_src+2*FENC_STRIDE );\
-    CP32( p_dst+3*FDEC_STRIDE, p_src+3*FENC_STRIDE );
+    CPPIXEL_X4( p_dst+0*FDEC_STRIDE, p_src+0*FENC_STRIDE );\
+    CPPIXEL_X4( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
+    CPPIXEL_X4( p_dst+2*FDEC_STRIDE, p_src+2*FENC_STRIDE );\
+    CPPIXEL_X4( p_dst+3*FDEC_STRIDE, p_src+3*FENC_STRIDE );
  #define COPY8x8\
-    CP64( p_dst+0*FDEC_STRIDE, p_src+0*FENC_STRIDE );\
-    CP64( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
-    CP64( p_dst+2*FDEC_STRIDE, p_src+2*FENC_STRIDE );\
-    CP64( p_dst+3*FDEC_STRIDE, p_src+3*FENC_STRIDE );\
-    CP64( p_dst+4*FDEC_STRIDE, p_src+4*FENC_STRIDE );\
-    CP64( p_dst+5*FDEC_STRIDE, p_src+5*FENC_STRIDE );\
-    CP64( p_dst+6*FDEC_STRIDE, p_src+6*FENC_STRIDE );\
-    CP64( p_dst+7*FDEC_STRIDE, p_src+7*FENC_STRIDE );
+    CPPIXEL_X8( p_dst+0*FDEC_STRIDE, p_src+0*FENC_STRIDE );\
+    CPPIXEL_X8( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
+    CPPIXEL_X8( p_dst+2*FDEC_STRIDE, p_src+2*FENC_STRIDE );\
+    CPPIXEL_X8( p_dst+3*FDEC_STRIDE, p_src+3*FENC_STRIDE );\
+    CPPIXEL_X8( p_dst+4*FDEC_STRIDE, p_src+4*FENC_STRIDE );\
+    CPPIXEL_X8( p_dst+5*FDEC_STRIDE, p_src+5*FENC_STRIDE );\
+    CPPIXEL_X8( p_dst+6*FDEC_STRIDE, p_src+6*FENC_STRIDE );\
+    CPPIXEL_X8( p_dst+7*FDEC_STRIDE, p_src+7*FENC_STRIDE );
  
-static int zigzag_sub_4x4_frame( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst )
+static int zigzag_sub_4x4_frame( int16_t level[16], const pixel *p_src, pixel *p_dst )
  {
      int nz = 0;
      ZIGZAG4_FRAME
@@ -636,7 +636,7 @@ static int zigzag_sub_4x4_frame( int16_t level[16], const uint8_t *p_src, uint8_
      return !!nz;
  }
  
-static int zigzag_sub_4x4_field( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst )
+static int zigzag_sub_4x4_field( int16_t level[16], const pixel *p_src, pixel *p_dst )
  {
      int nz = 0;
      ZIGZAG4_FIELD
@@ -652,7 +652,7 @@ static int zigzag_sub_4x4_field( int16_t level[16], const uint8_t *p_src, uint8_
      level[0] = 0;\
  }
  
-static int zigzag_sub_4x4ac_frame( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst, int16_t *dc )
+static int zigzag_sub_4x4ac_frame( int16_t level[16], const pixel *p_src, pixel *p_dst, int16_t *dc )
  {
      int nz = 0;
      ZIGZAG4_FRAME
@@ -660,7 +660,7 @@ static int zigzag_sub_4x4ac_frame( int16_t level[16], const uint8_t *p_src, uint
      return !!nz;
  }
  
-static int zigzag_sub_4x4ac_field( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst, int16_t *dc )
+static int zigzag_sub_4x4ac_field( int16_t level[16], const pixel *p_src, pixel *p_dst, int16_t *dc )
  {
      int nz = 0;
      ZIGZAG4_FIELD
@@ -668,14 +668,14 @@ static int zigzag_sub_4x4ac_field( int16_t level[16], const uint8_t *p_src, uint
      return !!nz;
  }
  
-static int zigzag_sub_8x8_frame( int16_t level[64], const uint8_t *p_src, uint8_t *p_dst )
+static int zigzag_sub_8x8_frame( int16_t level[64], const pixel *p_src, pixel *p_dst )
  {
      int nz = 0;
      ZIGZAG8_FRAME
      COPY8x8
      return !!nz;
  }
-static int zigzag_sub_8x8_field( int16_t level[64], const uint8_t *p_src, uint8_t *p_dst )
+static int zigzag_sub_8x8_field( int16_t level[64], const pixel *p_src, pixel *p_dst )
  {
      int nz = 0;
      ZIGZAG8_FIELD
diff --git a/common/dct.h b/common/dct.h

index 6f282b954076a2b083148c3e0913dbbd8e722c86..1305d784a3cc92fd82db8e1d514579e078327e7f 100644 (file)
--- a/common/dct.h
+++ b/common/dct.h
@@ -91,23 +91,23 @@ typedef struct
      // pix1  stride = FENC_STRIDE
      // pix2  stride = FDEC_STRIDE
      // p_dst stride = FDEC_STRIDE
-    void (*sub4x4_dct)   ( int16_t dct[16], uint8_t *pix1, uint8_t *pix2 );
-    void (*add4x4_idct)  ( uint8_t *p_dst, int16_t dct[16] );
+    void (*sub4x4_dct)   ( int16_t dct[16], pixel *pix1, pixel *pix2 );
+    void (*add4x4_idct)  ( pixel *p_dst, int16_t dct[16] );
  
-    void (*sub8x8_dct)   ( int16_t dct[4][16], uint8_t *pix1, uint8_t *pix2 );
-    void (*sub8x8_dct_dc)( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 );
-    void (*add8x8_idct)  ( uint8_t *p_dst, int16_t dct[4][16] );
-    void (*add8x8_idct_dc) ( uint8_t *p_dst, int16_t dct[4] );
+    void (*sub8x8_dct)   ( int16_t dct[4][16], pixel *pix1, pixel *pix2 );
+    void (*sub8x8_dct_dc)( int16_t dct[4], pixel *pix1, pixel *pix2 );
+    void (*add8x8_idct)  ( pixel *p_dst, int16_t dct[4][16] );
+    void (*add8x8_idct_dc) ( pixel *p_dst, int16_t dct[4] );
  
-    void (*sub16x16_dct) ( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 );
-    void (*add16x16_idct)( uint8_t *p_dst, int16_t dct[16][16] );
-    void (*add16x16_idct_dc) ( uint8_t *p_dst, int16_t dct[16] );
+    void (*sub16x16_dct) ( int16_t dct[16][16], pixel *pix1, pixel *pix2 );
+    void (*add16x16_idct)( pixel *p_dst, int16_t dct[16][16] );
+    void (*add16x16_idct_dc) ( pixel *p_dst, int16_t dct[16] );
  
-    void (*sub8x8_dct8)  ( int16_t dct[64], uint8_t *pix1, uint8_t *pix2 );
-    void (*add8x8_idct8) ( uint8_t *p_dst, int16_t dct[64] );
+    void (*sub8x8_dct8)  ( int16_t dct[64], pixel *pix1, pixel *pix2 );
+    void (*add8x8_idct8) ( pixel *p_dst, int16_t dct[64] );
  
-    void (*sub16x16_dct8) ( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix2 );
-    void (*add16x16_idct8)( uint8_t *p_dst, int16_t dct[4][64] );
+    void (*sub16x16_dct8) ( int16_t dct[4][64], pixel *pix1, pixel *pix2 );
+    void (*add16x16_idct8)( pixel *p_dst, int16_t dct[4][64] );
  
      void (*dct4x4dc) ( int16_t d[16] );
      void (*idct4x4dc)( int16_t d[16] );
@@ -118,9 +118,9 @@ typedef struct
  {
      void (*scan_8x8)( int16_t level[64], int16_t dct[64] );
      void (*scan_4x4)( int16_t level[16], int16_t dct[16] );
-    int  (*sub_8x8)  ( int16_t level[64], const uint8_t *p_src, uint8_t *p_dst );
-    int  (*sub_4x4)  ( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst );
-    int  (*sub_4x4ac)( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst, int16_t *dc );
+    int  (*sub_8x8)  ( int16_t level[64], const pixel *p_src, pixel *p_dst );
+    int  (*sub_4x4)  ( int16_t level[16], const pixel *p_src, pixel *p_dst );
+    int  (*sub_4x4ac)( int16_t level[16], const pixel *p_src, pixel *p_dst, int16_t *dc );
      void (*interleave_8x8_cavlc)( int16_t *dst, int16_t *src, uint8_t *nnz );
  
  } x264_zigzag_function_t;
diff --git a/common/deblock.c b/common/deblock.c

index 9e3a73e232f3f61f5166ae2b13c0c5b056c73b0a..784710e05b290f55cfd18285efbe42dfc2e23737 100644 (file)
--- a/common/deblock.c
+++ b/common/deblock.c
@@ -68,7 +68,7 @@ static const int8_t i_tc0_table[52+12*2][4] =
  #define tc0_table(x)   i_tc0_table[(x)+12]
  
  /* From ffmpeg */
-static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
+static inline void deblock_luma_c( pixel *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
  {
      for( int i = 0; i < 4; i++ )
      {
@@ -104,23 +104,23 @@ static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int a
                  }
  
                  delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
-                pix[-1*xstride] = x264_clip_uint8( p0 + delta );    /* p0' */
-                pix[ 0*xstride] = x264_clip_uint8( q0 - delta );    /* q0' */
+                pix[-1*xstride] = x264_clip_pixel( p0 + delta );    /* p0' */
+                pix[ 0*xstride] = x264_clip_pixel( q0 - delta );    /* q0' */
              }
              pix += ystride;
          }
      }
  }
-static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+static void deblock_v_luma_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
  {
      deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
  }
-static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+static void deblock_h_luma_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
  {
      deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
  }
  
-static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
+static inline void deblock_chroma_c( pixel *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
  {
      for( int i = 0; i < 4; i++ )
      {
@@ -140,23 +140,23 @@ static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int
              if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
              {
                  int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
-                pix[-1*xstride] = x264_clip_uint8( p0 + delta );    /* p0' */
-                pix[ 0*xstride] = x264_clip_uint8( q0 - delta );    /* q0' */
+                pix[-1*xstride] = x264_clip_pixel( p0 + delta );    /* p0' */
+                pix[ 0*xstride] = x264_clip_pixel( q0 - delta );    /* q0' */
              }
              pix += ystride;
          }
      }
  }
-static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+static void deblock_v_chroma_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
  {
      deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
  }
-static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+static void deblock_h_chroma_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
  {
      deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
  }
  
-static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
+static inline void deblock_luma_intra_c( pixel *pix, int xstride, int ystride, int alpha, int beta )
  {
      for( int d = 0; d < 16; d++ )
      {
@@ -199,16 +199,16 @@ static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride,
          pix += ystride;
      }
  }
-static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
+static void deblock_v_luma_intra_c( pixel *pix, int stride, int alpha, int beta )
  {
      deblock_luma_intra_c( pix, stride, 1, alpha, beta );
  }
-static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
+static void deblock_h_luma_intra_c( pixel *pix, int stride, int alpha, int beta )
  {
      deblock_luma_intra_c( pix, 1, stride, alpha, beta );
  }
  
-static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
+static inline void deblock_chroma_intra_c( pixel *pix, int xstride, int ystride, int alpha, int beta )
  {
      for( int d = 0; d < 8; d++ )
      {
@@ -225,11 +225,11 @@ static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystrid
          pix += ystride;
      }
  }
-static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
+static void deblock_v_chroma_intra_c( pixel *pix, int stride, int alpha, int beta )
  {
      deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
  }
-static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
+static void deblock_h_chroma_intra_c( pixel *pix, int stride, int alpha, int beta )
  {
      deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
  }
@@ -263,7 +263,7 @@ static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264
      }
  }
  
-static inline void deblock_edge( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
+static inline void deblock_edge( x264_t *h, pixel *pix1, pixel *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
  {
      int index_a = i_qp + h->sh.i_alpha_c0_offset;
      int alpha = alpha_table(index_a);
@@ -283,7 +283,7 @@ static inline void deblock_edge( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_
          pf_inter( pix2, i_stride, alpha, beta, tc );
  }
  
-static inline void deblock_edge_intra( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_intra_t pf_intra )
+static inline void deblock_edge_intra( x264_t *h, pixel *pix1, pixel *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_intra_t pf_intra )
  {
      int alpha = alpha_table(i_qp + h->sh.i_alpha_c0_offset);
      int beta  = beta_table(i_qp + h->sh.i_beta_offset);
@@ -315,9 +315,9 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
          int intra_cur = IS_INTRA( h->mb.type[mb_xy] );
          uint8_t (*bs)[4][4] = h->deblock_strength[mb_y&b_interlaced][mb_x];
  
-        uint8_t *pixy = h->fdec->plane[0] + 16*mb_y*stridey  + 16*mb_x;
-        uint8_t *pixu = h->fdec->plane[1] +  8*mb_y*strideuv +  8*mb_x;
-        uint8_t *pixv = h->fdec->plane[2] +  8*mb_y*strideuv +  8*mb_x;
+        pixel *pixy = h->fdec->plane[0] + 16*mb_y*stridey  + 16*mb_x;
+        pixel *pixu = h->fdec->plane[1] +  8*mb_y*strideuv +  8*mb_x;
+        pixel *pixv = h->fdec->plane[2] +  8*mb_y*strideuv +  8*mb_x;
          if( mb_y & b_interlaced )
          {
              pixy -= 15*stridey;
diff --git a/common/frame.c b/common/frame.c

index 00702ffc9271ede9bb60b6e1008891cd94e392fe..f77439526d9b957c0f9600501ad7398645c693d8 100644 (file)
--- a/common/frame.c
+++ b/common/frame.c
@@ -56,7 +56,7 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
      chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + 2*i_padv));
      for( int i = 1; i < 3; i++ )
      {
-        CHECKED_MALLOC( frame->buffer[i], chroma_plane_size );
+        CHECKED_MALLOC( frame->buffer[i], chroma_plane_size * sizeof(pixel) );
          frame->plane[i] = frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;
      }
  
@@ -87,14 +87,14 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
       * requires them to be in-phase wrt cacheline alignment. */
      if( h->param.analyse.i_subpel_refine && b_fdec )
      {
-        CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size );
+        CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size * sizeof(pixel) );
          for( int i = 0; i < 4; i++ )
              frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
          frame->plane[0] = frame->filtered[0];
      }
      else
      {
-        CHECKED_MALLOC( frame->buffer[0], luma_plane_size );
+        CHECKED_MALLOC( frame->buffer[0], luma_plane_size * sizeof(pixel) );
          frame->filtered[0] = frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH;
      }
  
@@ -136,7 +136,7 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
  
              luma_plane_size = frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV);
  
-            CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size );
+            CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) );
              for( int i = 0; i < 4; i++ )
                  frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * PADV + PADH) + i * luma_plane_size;
  
@@ -253,26 +253,30 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
      return 0;
  }
  
+static void ALWAYS_INLINE pixel_memset( pixel *dst, int value, int size )
+{
+    for( int i = 0; i < size; i++ )
+        dst[i] = value;
+}
  
-
-static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
+static void plane_expand_border( pixel *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
  {
  #define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
      for( int y = 0; y < i_height; y++ )
      {
          /* left band */
-        memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
+        pixel_memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
          /* right band */
-        memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
+        pixel_memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
      }
      /* upper band */
      if( b_pad_top )
          for( int y = 0; y < i_padv; y++ )
-            memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
+            memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), (i_width+2*i_padh) * sizeof(pixel) );
      /* lower band */
      if( b_pad_bottom )
          for( int y = 0; y < i_padv; y++ )
-            memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
+            memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), (i_width+2*i_padh) * sizeof(pixel) );
  #undef PPIXEL
  }
  
@@ -289,7 +293,7 @@ void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_e
          int padh = PADH >> !!i;
          int padv = PADV >> !!i;
          // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
-        uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
+        pixel *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
          if( b_end && !b_start )
              height += 4 >> (!!i + h->sh.b_mbaff);
          if( h->sh.b_mbaff )
@@ -318,7 +322,7 @@ void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y
      for( int i = 1; i < 4; i++ )
      {
          // buffer: 8 luma, to match the hpel filter
-        uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 4;
+        pixel *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 4;
          if( h->sh.b_mbaff )
          {
              plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
@@ -348,16 +352,17 @@ void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
          if( i_padx )
          {
              for( int y = 0; y < i_height; y++ )
-                memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
-                         frame->plane[i][y*frame->i_stride[i] + i_width - 1],
-                         i_padx );
+            {
+                pixel value = frame->plane[i][y*frame->i_stride[i] + i_width - 1];
+                pixel_memset( &frame->plane[i][y*frame->i_stride[i] + i_width], value, i_padx );
+            }
          }
          if( i_pady )
          {
              for( int y = i_height; y < i_height + i_pady; y++ )
                  memcpy( &frame->plane[i][y*frame->i_stride[i]],
                          &frame->plane[i][(i_height-(~y&h->param.b_interlaced)-1)*frame->i_stride[i]],
-                        i_width + i_padx );
+                        (i_width + i_padx) * sizeof(pixel) );
          }
      }
  }
@@ -489,7 +494,7 @@ void x264_frame_sort( x264_frame_t **list, int b_dts )
      } while( !b_ok );
  }
  
-void x264_weight_scale_plane( x264_t *h, uint8_t *dst, int i_dst_stride, uint8_t *src, int i_src_stride,
+void x264_weight_scale_plane( x264_t *h, pixel *dst, int i_dst_stride, pixel *src, int i_src_stride,
                           int i_width, int i_height, x264_weight_t *w )
  {
      /* Weight horizontal strips of height 16. This was found to be the optimal height
diff --git a/common/frame.h b/common/frame.h

index ca5cb7a1ef4b4ff2c112db76a2a04ae0a6e9b987..7d252c37648adedbbe64eb544f9fa139f94a2035 100644 (file)
--- a/common/frame.h
+++ b/common/frame.h
@@ -64,18 +64,18 @@ typedef struct x264_frame
      int     i_stride_lowres;
      int     i_width_lowres;
      int     i_lines_lowres;
-    uint8_t *plane[3];
-    uint8_t *filtered[4]; /* plane[0], H, V, HV */
-    uint8_t *lowres[4]; /* half-size copy of input frame: Orig, H, V, HV */
+    pixel *plane[3];
+    pixel *filtered[4]; /* plane[0], H, V, HV */
+    pixel *lowres[4]; /* half-size copy of input frame: Orig, H, V, HV */
      uint16_t *integral;
  
      /* for unrestricted mv we allocate more data than needed
       * allocated data are stored in buffer */
-    uint8_t *buffer[4];
-    uint8_t *buffer_lowres[4];
+    pixel *buffer[4];
+    pixel *buffer_lowres[4];
  
      x264_weight_t weight[16][3]; /* [ref_index][plane] */
-    uint8_t *weighted[16]; /* plane[0] weighted of the reference frames */
+    pixel *weighted[16]; /* plane[0] weighted of the reference frames */
      int b_duplicate;
      struct x264_frame *orig;
  
@@ -156,8 +156,8 @@ typedef struct
     x264_pthread_cond_t      cv_empty; /* event signaling that the list became emptier */
  } x264_synch_frame_list_t;
  
-typedef void (*x264_deblock_inter_t)( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
-typedef void (*x264_deblock_intra_t)( uint8_t *pix, int stride, int alpha, int beta );
+typedef void (*x264_deblock_inter_t)( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
+typedef void (*x264_deblock_intra_t)( pixel *pix, int stride, int alpha, int beta );
  typedef struct
  {
      x264_deblock_inter_t deblock_luma[2];
@@ -196,7 +196,7 @@ x264_frame_t *x264_frame_shift( x264_frame_t **list );
  void          x264_frame_push_unused( x264_t *h, x264_frame_t *frame );
  void          x264_frame_push_blank_unused( x264_t *h, x264_frame_t *frame );
  x264_frame_t *x264_frame_pop_blank_unused( x264_t *h );
-void x264_weight_scale_plane( x264_t *h, uint8_t *dst, int i_dst_stride, uint8_t *src, int i_src_stride,
+void x264_weight_scale_plane( x264_t *h, pixel *dst, int i_dst_stride, pixel *src, int i_src_stride,
                                int i_width, int i_height, x264_weight_t *w );
  x264_frame_t *x264_frame_pop_unused( x264_t *h, int b_fdec );
  void          x264_frame_sort( x264_frame_t **list, int b_dts );
diff --git a/common/macroblock.c b/common/macroblock.c

index 6e73ec4d6c1e8f75358fff3cd40a4d74fa61fc05..1f4ae43319ec9a3c1d67c39ad4df40c5450a3e19 100644 (file)
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -94,9 +94,9 @@ static NOINLINE void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int
      int mvy1   = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] ) + 4*4*y;
      int i_mode = x264_size2pixel[height][width];
      int i_stride0 = 16, i_stride1 = 16;
-    ALIGNED_ARRAY_16( uint8_t, tmp0,[16*16] );
-    ALIGNED_ARRAY_16( uint8_t, tmp1,[16*16] );
-    uint8_t *src0, *src1;
+    ALIGNED_ARRAY_16( pixel, tmp0,[16*16] );
+    ALIGNED_ARRAY_16( pixel, tmp1,[16*16] );
+    pixel *src0, *src1;
  
      src0 = h->mc.get_ref( tmp0, &i_stride0, h->mb.pic.p_fref[0][i_ref0], h->mb.pic.i_stride[0],
                            mvx0, mvy0, 4*width, 4*height, weight_none );
@@ -290,7 +290,7 @@ int x264_macroblock_cache_allocate( x264_t *h )
          }
  
          for( int i = 0; i < numweightbuf; i++ )
-            CHECKED_MALLOC( h->mb.p_weight_buf[i], luma_plane_size );
+            CHECKED_MALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
  #undef ALIGN
      }
  
@@ -329,7 +329,7 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
              for( int j = 0; j < 3; j++ )
              {
                  /* shouldn't really be initialized, just silences a valgrind false-positive in predict_8x8_filter_mmx */
-                CHECKED_MALLOCZERO( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32)>>!!j );
+                CHECKED_MALLOCZERO( h->intra_border_backup[i][j], ((h->sps->i_mb_width*16+32)>>!!j) * sizeof(pixel) );
                  h->intra_border_backup[i][j] += 8;
              }
              CHECKED_MALLOC( h->deblock_strength[i], sizeof(**h->deblock_strength) * h->sps->i_mb_width );
@@ -488,7 +488,7 @@ void x264_prefetch_fenc( x264_t *h, x264_frame_t *fenc, int i_mb_x, int i_mb_y )
                           fenc->plane[1+(i_mb_x&1)]+off_uv, stride_uv, i_mb_x );
  }
  
-static NOINLINE void copy_column8( uint8_t *dst, uint8_t *src )
+static NOINLINE void copy_column8( pixel *dst, pixel *src )
  {
      // input pointers are offset by 4 rows because that's faster (smaller instruction size on x86)
      for( int i = -4; i < 4; i++ )
@@ -503,8 +503,8 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
      const int i_pix_offset = h->mb.b_interlaced
                             ? w * (mb_x + (mb_y&~1) * i_stride) + (mb_y&1) * i_stride
                             : w * (mb_x + mb_y * i_stride);
-    const uint8_t *plane_fdec = &h->fdec->plane[i][i_pix_offset];
-    const uint8_t *intra_fdec = &h->intra_border_backup[mb_y & h->sh.b_mbaff][i][mb_x*16>>!!i];
+    const pixel *plane_fdec = &h->fdec->plane[i][i_pix_offset];
+    const pixel *intra_fdec = &h->intra_border_backup[mb_y & h->sh.b_mbaff][i][mb_x*16>>!!i];
      int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
      x264_frame_t **fref[2] = { h->fref0, h->fref1 };
      if( h->mb.b_interlaced )
@@ -513,7 +513,7 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
      h->mb.pic.p_fenc_plane[i] = &h->fenc->plane[i][i_pix_offset];
      h->mc.copy[i?PIXEL_8x8:PIXEL_16x16]( h->mb.pic.p_fenc[i], FENC_STRIDE,
          h->mb.pic.p_fenc_plane[i], i_stride2, w );
-    memcpy( &h->mb.pic.p_fdec[i][-1-FDEC_STRIDE], intra_fdec-1, w*3/2+1 );
+    memcpy( &h->mb.pic.p_fdec[i][-1-FDEC_STRIDE], intra_fdec-1, (w*3/2+1) * sizeof(pixel) );
      if( h->mb.b_interlaced )
          for( int j = 0; j < w; j++ )
              h->mb.pic.p_fdec[i][-1+j*FDEC_STRIDE] = plane_fdec[-1+j*i_stride2];
diff --git a/common/macroblock.h b/common/macroblock.h

index 875b0c610e63f529739df9c780125d1bdf65cb09..76f14e22d9cb9856ff127c8574361b3c1823afc1 100644 (file)
--- a/common/macroblock.h
+++ b/common/macroblock.h
@@ -341,6 +341,9 @@ static ALWAYS_INLINE uint32_t pack16to32_mask( int a, int b )
  #endif
  }
  
+#define pack_pixel_1to2 pack8to16
+#define pack_pixel_2to4 pack16to32
+
  #define array_non_zero(a) array_non_zero_int(a, sizeof(a))
  #define array_non_zero_int array_non_zero_int
  static ALWAYS_INLINE int array_non_zero_int( int16_t *v, int i_count )
diff --git a/common/mc.c b/common/mc.c

index e0dc6599231a3241fe2be7aaa252d923cff657f0..daff9e04ccbfccd814633bf7acddc326d6ff3506 100644 (file)
--- a/common/mc.c
+++ b/common/mc.c
@@ -34,9 +34,9 @@
  #endif
  
  
-static inline void pixel_avg( uint8_t *dst,  int i_dst_stride,
-                              uint8_t *src1, int i_src1_stride,
-                              uint8_t *src2, int i_src2_stride,
+static inline void pixel_avg( pixel *dst,  int i_dst_stride,
+                              pixel *src1, int i_src1_stride,
+                              pixel *src2, int i_src2_stride,
                                int i_width, int i_height )
  {
      for( int y = 0; y < i_height; y++ )
@@ -49,7 +49,7 @@ static inline void pixel_avg( uint8_t *dst,  int i_dst_stride,
      }
  }
  
-static inline void pixel_avg_wxh( uint8_t *dst, int i_dst, uint8_t *src1, int i_src1, uint8_t *src2, int i_src2, int width, int height )
+static inline void pixel_avg_wxh( pixel *dst, int i_dst, pixel *src1, int i_src1, pixel *src2, int i_src2, int width, int height )
  {
      for( int y = 0; y < height; y++ )
      {
@@ -63,8 +63,8 @@ static inline void pixel_avg_wxh( uint8_t *dst, int i_dst, uint8_t *src1, int i_
  
  /* Implicit weighted bipred only:
   * assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64 */
-#define op_scale2(x) dst[x] = x264_clip_uint8( (src1[x]*i_weight1 + src2[x]*i_weight2 + (1<<5)) >> 6 )
-static inline void pixel_avg_weight_wxh( uint8_t *dst, int i_dst, uint8_t *src1, int i_src1, uint8_t *src2, int i_src2, int width, int height, int i_weight1 )
+#define op_scale2(x) dst[x] = x264_clip_pixel( (src1[x]*i_weight1 + src2[x]*i_weight2 + (1<<5)) >> 6 )
+static inline void pixel_avg_weight_wxh( pixel *dst, int i_dst, pixel *src1, int i_src1, pixel *src2, int i_src2, int width, int height, int i_weight1 )
  {
      const int i_weight2 = 64 - i_weight1;
      for( int y = 0; y<height; y++, dst += i_dst, src1 += i_src1, src2 += i_src2 )
@@ -93,9 +93,9 @@ static inline void pixel_avg_weight_wxh( uint8_t *dst, int i_dst, uint8_t *src1,
  #undef op_scale2
  
  #define PIXEL_AVG_C( name, width, height ) \
-static void name( uint8_t *pix1, int i_stride_pix1, \
-                  uint8_t *pix2, int i_stride_pix2, \
-                  uint8_t *pix3, int i_stride_pix3, int weight ) \
+static void name( pixel *pix1, int i_stride_pix1, \
+                  pixel *pix2, int i_stride_pix2, \
+                  pixel *pix3, int i_stride_pix3, int weight ) \
  { \
      if( weight == 32 ) \
          pixel_avg_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, pix3, i_stride_pix3, width, height ); \
@@ -117,9 +117,9 @@ static void x264_weight_cache( x264_t *h, x264_weight_t *w )
  {
      w->weightfn = h->mc.weight;
  }
-#define opscale(x) dst[x] = x264_clip_uint8( ((src[x] * weight->i_scale + (1<<(weight->i_denom - 1))) >> weight->i_denom) + weight->i_offset )
-#define opscale_noden(x) dst[x] = x264_clip_uint8( src[x] * weight->i_scale + weight->i_offset )
-static inline void mc_weight( uint8_t *dst, int i_dst_stride, uint8_t *src, int i_src_stride, const x264_weight_t *weight, int i_width, int i_height )
+#define opscale(x) dst[x] = x264_clip_pixel( ((src[x] * weight->i_scale + (1<<(weight->i_denom - 1))) >> weight->i_denom) + weight->i_offset )
+#define opscale_noden(x) dst[x] = x264_clip_pixel( src[x] * weight->i_scale + weight->i_offset )
+static inline void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int i_width, int i_height )
  {
      if( weight->i_denom >= 1 )
      {
@@ -136,7 +136,7 @@ static inline void mc_weight( uint8_t *dst, int i_dst_stride, uint8_t *src, int
  }
  
  #define MC_WEIGHT_C( name, lx ) \
-    static void name( uint8_t *dst, int i_dst_stride, uint8_t *src, int i_src_stride, const x264_weight_t *weight, int height ) \
+    static void name( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int height ) \
  { \
      if( weight->i_denom >= 1 ) \
      { \
@@ -169,11 +169,11 @@ static weight_fn_t x264_mc_weight_wtab[6] =
      mc_weight_w20,
  };
  const x264_weight_t weight_none[3] = { {{0}} };
-static void mc_copy( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
+static void mc_copy( pixel *src, int i_src_stride, pixel *dst, int i_dst_stride, int i_width, int i_height )
  {
      for( int y = 0; y < i_height; y++ )
      {
-        memcpy( dst, src, i_width );
+        memcpy( dst, src, i_width * sizeof(pixel) );
  
          src += i_src_stride;
          dst += i_dst_stride;
@@ -181,7 +181,7 @@ static void mc_copy( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_str
  }
  
  #define TAPFILTER(pix, d) ((pix)[x-2*d] + (pix)[x+3*d] - 5*((pix)[x-d] + (pix)[x+2*d]) + 20*((pix)[x] + (pix)[x+d]))
-static void hpel_filter( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
+static void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
                           int stride, int width, int height, int16_t *buf )
  {
      for( int y = 0; y < height; y++ )
@@ -189,13 +189,13 @@ static void hpel_filter( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *s
          for( int x = -2; x < width+3; x++ )
          {
              int v = TAPFILTER(src,stride);
-            dstv[x] = x264_clip_uint8( (v + 16) >> 5 );
+            dstv[x] = x264_clip_pixel( (v + 16) >> 5 );
              buf[x+2] = v;
          }
          for( int x = 0; x < width; x++ )
-            dstc[x] = x264_clip_uint8( (TAPFILTER(buf+2,1) + 512) >> 10 );
+            dstc[x] = x264_clip_pixel( (TAPFILTER(buf+2,1) + 512) >> 10 );
          for( int x = 0; x < width; x++ )
-            dsth[x] = x264_clip_uint8( (TAPFILTER(src,1) + 16) >> 5 );
+            dsth[x] = x264_clip_pixel( (TAPFILTER(src,1) + 16) >> 5 );
          dsth += stride;
          dstv += stride;
          dstc += stride;
@@ -206,18 +206,18 @@ static void hpel_filter( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *s
  static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
  static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
  
-static void mc_luma( uint8_t *dst,    int i_dst_stride,
-                     uint8_t *src[4], int i_src_stride,
+static void mc_luma( pixel *dst,    int i_dst_stride,
+                     pixel *src[4], int i_src_stride,
                       int mvx, int mvy,
                       int i_width, int i_height, const x264_weight_t *weight )
  {
      int qpel_idx = ((mvy&3)<<2) + (mvx&3);
      int offset = (mvy>>2)*i_src_stride + (mvx>>2);
-    uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
+    pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
  
      if( qpel_idx & 5 ) /* qpel interpolation needed */
      {
-        uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
+        pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
          pixel_avg( dst, i_dst_stride, src1, i_src_stride,
                     src2, i_src_stride, i_width, i_height );
          if( weight->weightfn )
@@ -229,18 +229,18 @@ static void mc_luma( uint8_t *dst,    int i_dst_stride,
          mc_copy( src1, i_src_stride, dst, i_dst_stride, i_width, i_height );
  }
  
-static uint8_t *get_ref( uint8_t *dst,   int *i_dst_stride,
-                         uint8_t *src[4], int i_src_stride,
-                         int mvx, int mvy,
-                         int i_width, int i_height, const x264_weight_t *weight )
+static pixel *get_ref( pixel *dst,   int *i_dst_stride,
+                       pixel *src[4], int i_src_stride,
+                       int mvx, int mvy,
+                       int i_width, int i_height, const x264_weight_t *weight )
  {
      int qpel_idx = ((mvy&3)<<2) + (mvx&3);
      int offset = (mvy>>2)*i_src_stride + (mvx>>2);
-    uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
+    pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
  
      if( qpel_idx & 5 ) /* qpel interpolation needed */
      {
-        uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
+        pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
          pixel_avg( dst, *i_dst_stride, src1, i_src_stride,
                     src2, i_src_stride, i_width, i_height );
          if( weight->weightfn )
@@ -260,12 +260,12 @@ static uint8_t *get_ref( uint8_t *dst,   int *i_dst_stride,
  }
  
  /* full chroma mc (ie until 1/8 pixel)*/
-static void mc_chroma( uint8_t *dst, int i_dst_stride,
-                       uint8_t *src, int i_src_stride,
+static void mc_chroma( pixel *dst, int i_dst_stride,
+                       pixel *src, int i_src_stride,
                         int mvx, int mvy,
                         int i_width, int i_height )
  {
-    uint8_t *srcp;
+    pixel *srcp;
  
      int d8x = mvx&0x07;
      int d8y = mvy&0x07;
@@ -288,7 +288,7 @@ static void mc_chroma( uint8_t *dst, int i_dst_stride,
  }
  
  #define MC_COPY(W) \
-static void mc_copy_w##W( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int i_height ) \
+static void mc_copy_w##W( pixel *dst, int i_dst, pixel *src, int i_src, int i_height ) \
  { \
      mc_copy( src, i_src, dst, i_dst, W, i_height ); \
  }
@@ -296,7 +296,7 @@ MC_COPY( 16 )
  MC_COPY( 8 )
  MC_COPY( 4 )
  
-void x264_plane_copy_c( uint8_t *dst, int i_dst,
+void x264_plane_copy_c( pixel *dst, int i_dst,
                          uint8_t *src, int i_src, int w, int h)
  {
      while( h-- )
@@ -307,11 +307,11 @@ void x264_plane_copy_c( uint8_t *dst, int i_dst,
      }
  }
  
-static void prefetch_fenc_null( uint8_t *pix_y, int stride_y,
-                                uint8_t *pix_uv, int stride_uv, int mb_x )
+static void prefetch_fenc_null( pixel *pix_y, int stride_y,
+                                pixel *pix_uv, int stride_uv, int mb_x )
  {}
  
-static void prefetch_ref_null( uint8_t *pix, int stride, int parity )
+static void prefetch_ref_null( pixel *pix, int stride, int parity )
  {}
  
  static void memzero_aligned( void * dst, int n )
@@ -319,7 +319,7 @@ static void memzero_aligned( void * dst, int n )
      memset( dst, 0, n );
  }
  
-static void integral_init4h( uint16_t *sum, uint8_t *pix, int stride )
+static void integral_init4h( uint16_t *sum, pixel *pix, int stride )
  {
      int v = pix[0]+pix[1]+pix[2]+pix[3];
      for( int x = 0; x < stride-4; x++ )
@@ -329,7 +329,7 @@ static void integral_init4h( uint16_t *sum, uint8_t *pix, int stride )
      }
  }
  
-static void integral_init8h( uint16_t *sum, uint8_t *pix, int stride )
+static void integral_init8h( uint16_t *sum, pixel *pix, int stride )
  {
      int v = pix[0]+pix[1]+pix[2]+pix[3]+pix[4]+pix[5]+pix[6]+pix[7];
      for( int x = 0; x < stride-8; x++ )
@@ -355,7 +355,7 @@ static void integral_init8v( uint16_t *sum8, int stride )
  
  void x264_frame_init_lowres( x264_t *h, x264_frame_t *frame )
  {
-    uint8_t *src = frame->plane[0];
+    pixel *src = frame->plane[0];
      int i_stride = frame->i_stride[0];
      int i_height = frame->i_lines[0];
      int i_width  = frame->i_width[0];
@@ -363,7 +363,7 @@ void x264_frame_init_lowres( x264_t *h, x264_frame_t *frame )
      // duplicate last row and column so that their interpolation doesn't have to be special-cased
      for( int y = 0; y < i_height; y++ )
          src[i_width+y*i_stride] = src[i_width-1+y*i_stride];
-    memcpy( src+i_stride*i_height, src+i_stride*(i_height-1), i_width+1 );
+    memcpy( src+i_stride*i_height, src+i_stride*(i_height-1), (i_width+1) * sizeof(pixel) );
      h->mc.frame_init_lowres_core( src, frame->lowres[0], frame->lowres[1], frame->lowres[2], frame->lowres[3],
                                    i_stride, frame->i_stride_lowres, frame->i_width_lowres, frame->i_lines_lowres );
      x264_frame_expand_border_lowres( frame );
@@ -379,13 +379,13 @@ void x264_frame_init_lowres( x264_t *h, x264_frame_t *frame )
              frame->lowres_mvs[y][x][0][0] = 0x7FFF;
  }
  
-static void frame_init_lowres_core( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,
+static void frame_init_lowres_core( pixel *src0, pixel *dst0, pixel *dsth, pixel *dstv, pixel *dstc,
                                      int src_stride, int dst_stride, int width, int height )
  {
      for( int y = 0; y < height; y++ )
      {
-        uint8_t *src1 = src0+src_stride;
-        uint8_t *src2 = src1+src_stride;
+        pixel *src1 = src0+src_stride;
+        pixel *src2 = src1+src_stride;
          for( int x = 0; x<width; x++ )
          {
              // slower than naive bilinear, but matches asm
@@ -525,7 +525,7 @@ void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
              height += PADV-9;
          for( int y = start; y < height; y++ )
          {
-            uint8_t  *pix  = frame->plane[0] + y * stride - PADH;
+            pixel    *pix  = frame->plane[0] + y * stride - PADH;
              uint16_t *sum8 = frame->integral + (y+1) * stride - PADH;
              uint16_t *sum4;
              if( h->frames.b_have_sub8x8_esa )
diff --git a/common/mc.h b/common/mc.h

index 68bba4885621c358884a4d1d8f86765a5465915f..bb16d13eaebd053e83f23bc18bdf17e7c0e31d2c 100644 (file)
--- a/common/mc.h
+++ b/common/mc.h
@@ -22,7 +22,7 @@
  #define X264_MC_H
  
  struct x264_weight_t;
-typedef void (* weight_fn_t)( uint8_t *, int, uint8_t *,int, const struct x264_weight_t *, int );
+typedef void (* weight_fn_t)( pixel *, int, pixel *,int, const struct x264_weight_t *, int );
  typedef struct x264_weight_t
  {
      /* aligning the first member is a gcc hack to force the struct to be
@@ -57,49 +57,49 @@ extern const x264_weight_t weight_none[3];
  
  typedef struct
  {
-    void (*mc_luma)(uint8_t *dst, int i_dst, uint8_t **src, int i_src,
+    void (*mc_luma)(pixel *dst, int i_dst, pixel **src, int i_src,
                      int mvx, int mvy,
                      int i_width, int i_height, const x264_weight_t *weight );
  
      /* may round up the dimensions if they're not a power of 2 */
-    uint8_t* (*get_ref)(uint8_t *dst, int *i_dst, uint8_t **src, int i_src,
-                        int mvx, int mvy,
-                        int i_width, int i_height, const x264_weight_t *weight );
+    pixel* (*get_ref)(pixel *dst, int *i_dst, pixel **src, int i_src,
+                      int mvx, int mvy,
+                      int i_width, int i_height, const x264_weight_t *weight );
  
      /* mc_chroma may write up to 2 bytes of garbage to the right of dst,
       * so it must be run from left to right. */
-    void (*mc_chroma)(uint8_t *dst, int i_dst, uint8_t *src, int i_src,
+    void (*mc_chroma)(pixel *dst, int i_dst, pixel *src, int i_src,
                        int mvx, int mvy,
                        int i_width, int i_height );
  
-    void (*avg[10])( uint8_t *dst, int, uint8_t *src1, int, uint8_t *src2, int, int i_weight );
+    void (*avg[10])( pixel *dst, int, pixel *src1, int, pixel *src2, int, int i_weight );
  
      /* only 16x16, 8x8, and 4x4 defined */
-    void (*copy[7])( uint8_t *dst, int, uint8_t *src, int, int i_height );
-    void (*copy_16x16_unaligned)( uint8_t *dst, int, uint8_t *src, int, int i_height );
+    void (*copy[7])( pixel *dst, int, pixel *src, int, int i_height );
+    void (*copy_16x16_unaligned)( pixel *dst, int, pixel *src, int, int i_height );
  
-    void (*plane_copy)( uint8_t *dst, int i_dst,
+    void (*plane_copy)( pixel *dst, int i_dst,
                          uint8_t *src, int i_src, int w, int h);
  
-    void (*hpel_filter)( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
+    void (*hpel_filter)( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
                           int i_stride, int i_width, int i_height, int16_t *buf );
  
      /* prefetch the next few macroblocks of fenc or fdec */
-    void (*prefetch_fenc)( uint8_t *pix_y, int stride_y,
-                           uint8_t *pix_uv, int stride_uv, int mb_x );
+    void (*prefetch_fenc)( pixel *pix_y, int stride_y,
+                           pixel *pix_uv, int stride_uv, int mb_x );
      /* prefetch the next few macroblocks of a hpel reference frame */
-    void (*prefetch_ref)( uint8_t *pix, int stride, int parity );
+    void (*prefetch_ref)( pixel *pix, int stride, int parity );
  
      void *(*memcpy_aligned)( void *dst, const void *src, size_t n );
      void (*memzero_aligned)( void *dst, int n );
  
      /* successive elimination prefilter */
-    void (*integral_init4h)( uint16_t *sum, uint8_t *pix, int stride );
-    void (*integral_init8h)( uint16_t *sum, uint8_t *pix, int stride );
+    void (*integral_init4h)( uint16_t *sum, pixel *pix, int stride );
+    void (*integral_init8h)( uint16_t *sum, pixel *pix, int stride );
      void (*integral_init4v)( uint16_t *sum8, uint16_t *sum4, int stride );
      void (*integral_init8v)( uint16_t *sum8, int stride );
  
-    void (*frame_init_lowres_core)( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,
+    void (*frame_init_lowres_core)( pixel *src0, pixel *dst0, pixel *dsth, pixel *dstv, pixel *dstc,
                                      int src_stride, int dst_stride, int width, int height );
      weight_fn_t *weight;
      weight_fn_t *offsetadd;
diff --git a/common/pixel.c b/common/pixel.c

index 5759abf2bc9d61ac2ad964d2f17f163e57ac3ad8..e144292dde02f52e1af9779395a51403ace7fd8b 100644 (file)
--- a/common/pixel.c
+++ b/common/pixel.c
@@ -41,8 +41,8 @@
   * pixel_sad_WxH
   ****************************************************************************/
  #define PIXEL_SAD_C( name, lx, ly ) \
-static int name( uint8_t *pix1, int i_stride_pix1,  \
-                 uint8_t *pix2, int i_stride_pix2 ) \
+static int name( pixel *pix1, int i_stride_pix1,  \
+                 pixel *pix2, int i_stride_pix2 ) \
  {                                                   \
      int i_sum = 0;                                  \
      for( int y = 0; y < ly; y++ )                   \
@@ -71,8 +71,8 @@ PIXEL_SAD_C( x264_pixel_sad_4x4,    4,  4 )
   * pixel_ssd_WxH
   ****************************************************************************/
  #define PIXEL_SSD_C( name, lx, ly ) \
-static int name( uint8_t *pix1, int i_stride_pix1,  \
-                 uint8_t *pix2, int i_stride_pix2 ) \
+static int name( pixel *pix1, int i_stride_pix1,  \
+                 pixel *pix2, int i_stride_pix2 ) \
  {                                                   \
      int i_sum = 0;                                  \
      for( int y = 0; y < ly; y++ )                   \
@@ -96,7 +96,7 @@ PIXEL_SSD_C( x264_pixel_ssd_8x4,    8,  4 )
  PIXEL_SSD_C( x264_pixel_ssd_4x8,    4,  8 )
  PIXEL_SSD_C( x264_pixel_ssd_4x4,    4,  4 )
  
-int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height )
+int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height )
  {
      int64_t i_ssd = 0;
      int y;
@@ -141,7 +141,7 @@ int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1
   * pixel_var_wxh
   ****************************************************************************/
  #define PIXEL_VAR_C( name, w ) \
-static uint64_t name( uint8_t *pix, int i_stride ) \
+static uint64_t name( pixel *pix, int i_stride ) \
  {                                             \
      uint32_t sum = 0, sqr = 0;                \
      for( int y = 0; y < w; y++ )              \
@@ -162,7 +162,7 @@ PIXEL_VAR_C( x264_pixel_var_8x8,    8 )
  /****************************************************************************
   * pixel_var2_wxh
   ****************************************************************************/
-static int pixel_var2_8x8( uint8_t *pix1, int i_stride1, uint8_t *pix2, int i_stride2, int *ssd )
+static int pixel_var2_8x8( pixel *pix1, int i_stride1, pixel *pix2, int i_stride2, int *ssd )
  {
      uint32_t var = 0, sum = 0, sqr = 0;
      for( int y = 0; y < 8; y++ )
@@ -206,7 +206,7 @@ static ALWAYS_INLINE uint32_t abs2( uint32_t a )
   * pixel_satd_WxH: sum of 4x4 Hadamard transformed differences
   ****************************************************************************/
  
-static NOINLINE int x264_pixel_satd_4x4( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
+static NOINLINE int x264_pixel_satd_4x4( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
  {
      uint32_t tmp[4][2];
      uint32_t a0, a1, a2, a3, b0, b1;
@@ -231,7 +231,7 @@ static NOINLINE int x264_pixel_satd_4x4( uint8_t *pix1, int i_pix1, uint8_t *pix
      return sum >> 1;
  }
  
-static NOINLINE int x264_pixel_satd_8x4( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
+static NOINLINE int x264_pixel_satd_8x4( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
  {
      uint32_t tmp[4][4];
      uint32_t a0, a1, a2, a3;
@@ -253,7 +253,7 @@ static NOINLINE int x264_pixel_satd_8x4( uint8_t *pix1, int i_pix1, uint8_t *pix
  }
  
  #define PIXEL_SATD_C( w, h, sub )\
-static int x264_pixel_satd_##w##x##h( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )\
+static int x264_pixel_satd_##w##x##h( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )\
  {\
      int sum = sub( pix1, i_pix1, pix2, i_pix2 )\
              + sub( pix1+4*i_pix1, i_pix1, pix2+4*i_pix2, i_pix2 );\
@@ -275,7 +275,7 @@ PIXEL_SATD_C( 8,  8,  x264_pixel_satd_8x4 )
  PIXEL_SATD_C( 4,  8,  x264_pixel_satd_4x4 )
  
  
-static NOINLINE int sa8d_8x8( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
+static NOINLINE int sa8d_8x8( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
  {
      uint32_t tmp[8][4];
      uint32_t a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3;
@@ -309,13 +309,13 @@ static NOINLINE int sa8d_8x8( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pi
      return sum;
  }
  
-static int x264_pixel_sa8d_8x8( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
+static int x264_pixel_sa8d_8x8( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
  {
      int sum = sa8d_8x8( pix1, i_pix1, pix2, i_pix2 );
      return (sum+2)>>2;
  }
  
-static int x264_pixel_sa8d_16x16( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
+static int x264_pixel_sa8d_16x16( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
  {
      int sum = sa8d_8x8( pix1, i_pix1, pix2, i_pix2 )
              + sa8d_8x8( pix1+8, i_pix1, pix2+8, i_pix2 )
@@ -325,7 +325,7 @@ static int x264_pixel_sa8d_16x16( uint8_t *pix1, int i_pix1, uint8_t *pix2, int
  }
  
  
-static NOINLINE uint64_t pixel_hadamard_ac( uint8_t *pix, int stride )
+static NOINLINE uint64_t pixel_hadamard_ac( pixel *pix, int stride )
  {
      uint32_t tmp[32];
      uint32_t a0, a1, a2, a3, dc;
@@ -363,7 +363,7 @@ static NOINLINE uint64_t pixel_hadamard_ac( uint8_t *pix, int stride )
  }
  
  #define HADAMARD_AC(w,h) \
-static uint64_t x264_pixel_hadamard_ac_##w##x##h( uint8_t *pix, int stride )\
+static uint64_t x264_pixel_hadamard_ac_##w##x##h( pixel *pix, int stride )\
  {\
      uint64_t sum = pixel_hadamard_ac( pix, stride );\
      if( w==16 )\
@@ -384,13 +384,13 @@ HADAMARD_AC( 8, 8 )
   * pixel_sad_x4
   ****************************************************************************/
  #define SAD_X( size ) \
-static void x264_pixel_sad_x3_##size( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, int i_stride, int scores[3] )\
+static void x264_pixel_sad_x3_##size( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2, int i_stride, int scores[3] )\
  {\
      scores[0] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix0, i_stride );\
      scores[1] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix1, i_stride );\
      scores[2] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix2, i_stride );\
  }\
-static void x264_pixel_sad_x4_##size( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] )\
+static void x264_pixel_sad_x4_##size( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2, pixel *pix3, int i_stride, int scores[4] )\
  {\
      scores[0] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix0, i_stride );\
      scores[1] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix1, i_stride );\
@@ -419,13 +419,13 @@ SAD_X( 8x8_vis )
   ****************************************************************************/
  
  #define SATD_X( size, cpu ) \
-static void x264_pixel_satd_x3_##size##cpu( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, int i_stride, int scores[3] )\
+static void x264_pixel_satd_x3_##size##cpu( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2, int i_stride, int scores[3] )\
  {\
      scores[0] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix0, i_stride );\
      scores[1] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix1, i_stride );\
      scores[2] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix2, i_stride );\
  }\
-static void x264_pixel_satd_x4_##size##cpu( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] )\
+static void x264_pixel_satd_x4_##size##cpu( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2, pixel *pix3, int i_stride, int scores[4] )\
  {\
      scores[0] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix0, i_stride );\
      scores[1] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix1, i_stride );\
@@ -458,8 +458,8 @@ SATD_X_DECL7( _neon )
  /****************************************************************************
   * structural similarity metric
   ****************************************************************************/
-static void ssim_4x4x2_core( const uint8_t *pix1, int stride1,
-                             const uint8_t *pix2, int stride2,
+static void ssim_4x4x2_core( const pixel *pix1, int stride1,
+                             const pixel *pix2, int stride2,
                               int sums[2][4])
  {
      for( int z = 0; z < 2; z++ )
@@ -507,8 +507,8 @@ static float ssim_end4( int sum0[5][4], int sum1[5][4], int width )
  }
  
  float x264_pixel_ssim_wxh( x264_pixel_function_t *pf,
-                           uint8_t *pix1, int stride1,
-                           uint8_t *pix2, int stride2,
+                           pixel *pix1, int stride1,
+                           pixel *pix2, int stride2,
                             int width, int height, void *buf )
  {
      int z = 0;
diff --git a/common/pixel.h b/common/pixel.h

index 11026422576b9b077811edb321e2a8bc804b5d61..2c5330ea11a7ad1df2eca84381a03b7bae7631a4 100644 (file)
--- a/common/pixel.h
+++ b/common/pixel.h
@@ -23,9 +23,9 @@
  
  // SSD assumes all args aligned
  // other cmp functions assume first arg aligned
-typedef int  (*x264_pixel_cmp_t) ( uint8_t *, int, uint8_t *, int );
-typedef void (*x264_pixel_cmp_x3_t) ( uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int[3] );
-typedef void (*x264_pixel_cmp_x4_t) ( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int[4] );
+typedef int  (*x264_pixel_cmp_t) ( pixel *, int, pixel *, int );
+typedef void (*x264_pixel_cmp_x3_t) ( pixel *, pixel *, pixel *, pixel *, int, int[3] );
+typedef void (*x264_pixel_cmp_x4_t) ( pixel *, pixel *, pixel *, pixel *, pixel *, int, int[4] );
  
  enum
  {
@@ -73,13 +73,13 @@ typedef struct
      x264_pixel_cmp_x3_t fpelcmp_x3[7];
      x264_pixel_cmp_x4_t fpelcmp_x4[7];
      x264_pixel_cmp_t sad_aligned[7]; /* Aligned SAD for mbcmp */
-    int (*var2_8x8)( uint8_t *, int, uint8_t *, int, int * );
+    int (*var2_8x8)( pixel *, int, pixel *, int, int * );
  
-    uint64_t (*var[4])( uint8_t *pix, int stride );
-    uint64_t (*hadamard_ac[4])( uint8_t *pix, int stride );
+    uint64_t (*var[4])( pixel *pix, int stride );
+    uint64_t (*hadamard_ac[4])( pixel *pix, int stride );
  
-    void (*ssim_4x4x2_core)( const uint8_t *pix1, int stride1,
-                             const uint8_t *pix2, int stride2, int sums[2][4] );
+    void (*ssim_4x4x2_core)( const pixel *pix1, int stride1,
+                             const pixel *pix2, int stride2, int sums[2][4] );
      float (*ssim_end4)( int sum0[5][4], int sum1[5][4], int width );
  
      /* multiple parallel calls to cmp. */
@@ -95,22 +95,22 @@ typedef struct
  
      /* calculate satd or sad of V, H, and DC modes.
       * may be NULL, in which case just use pred+satd instead. */
-    void (*intra_mbcmp_x3_16x16)( uint8_t *fenc, uint8_t *fdec  , int res[3] );
-    void (*intra_satd_x3_16x16) ( uint8_t *fenc, uint8_t *fdec  , int res[3] );
-    void (*intra_sad_x3_16x16)  ( uint8_t *fenc, uint8_t *fdec  , int res[3] );
-    void (*intra_mbcmp_x3_8x8c) ( uint8_t *fenc, uint8_t *fdec  , int res[3] );
-    void (*intra_satd_x3_8x8c)  ( uint8_t *fenc, uint8_t *fdec  , int res[3] );
-    void (*intra_sad_x3_8x8c)   ( uint8_t *fenc, uint8_t *fdec  , int res[3] );
-    void (*intra_mbcmp_x3_4x4)  ( uint8_t *fenc, uint8_t *fdec  , int res[3] );
-    void (*intra_satd_x3_4x4)   ( uint8_t *fenc, uint8_t *fdec  , int res[3] );
-    void (*intra_sad_x3_4x4)    ( uint8_t *fenc, uint8_t *fdec  , int res[3] );
-    void (*intra_mbcmp_x3_8x8)  ( uint8_t *fenc, uint8_t edge[33], int res[3] );
-    void (*intra_sa8d_x3_8x8)   ( uint8_t *fenc, uint8_t edge[33], int res[3] );
-    void (*intra_sad_x3_8x8)    ( uint8_t *fenc, uint8_t edge[33], int res[3] );
+    void (*intra_mbcmp_x3_16x16)( pixel *fenc, pixel *fdec  , int res[3] );
+    void (*intra_satd_x3_16x16) ( pixel *fenc, pixel *fdec  , int res[3] );
+    void (*intra_sad_x3_16x16)  ( pixel *fenc, pixel *fdec  , int res[3] );
+    void (*intra_mbcmp_x3_8x8c) ( pixel *fenc, pixel *fdec  , int res[3] );
+    void (*intra_satd_x3_8x8c)  ( pixel *fenc, pixel *fdec  , int res[3] );
+    void (*intra_sad_x3_8x8c)   ( pixel *fenc, pixel *fdec  , int res[3] );
+    void (*intra_mbcmp_x3_4x4)  ( pixel *fenc, pixel *fdec  , int res[3] );
+    void (*intra_satd_x3_4x4)   ( pixel *fenc, pixel *fdec  , int res[3] );
+    void (*intra_sad_x3_4x4)    ( pixel *fenc, pixel *fdec  , int res[3] );
+    void (*intra_mbcmp_x3_8x8)  ( pixel *fenc, pixel edge[33], int res[3] );
+    void (*intra_sa8d_x3_8x8)   ( pixel *fenc, pixel edge[33], int res[3] );
+    void (*intra_sad_x3_8x8)    ( pixel *fenc, pixel edge[33], int res[3] );
  } x264_pixel_function_t;
  
  void x264_pixel_init( int cpu, x264_pixel_function_t *pixf );
-int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height );
-float x264_pixel_ssim_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height, void *buf );
+int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height );
+float x264_pixel_ssim_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height, void *buf );
  
  #endif
diff --git a/common/predict.c b/common/predict.c

index 3443a8ed2eda122d8a7e0d9028c6456acb264f55..86bbde525026240328e984c1239d2946233d80a2 100644 (file)
--- a/common/predict.c
+++ b/common/predict.c
@@ -44,79 +44,79 @@
  #define PREDICT_16x16_DC(v)\
      for( int i = 0; i < 16; i++ )\
      {\
-        M32( src+ 0 ) = v;\
-        M32( src+ 4 ) = v;\
-        M32( src+ 8 ) = v;\
-        M32( src+12 ) = v;\
+        MPIXEL_X4( src+ 0 ) = v;\
+        MPIXEL_X4( src+ 4 ) = v;\
+        MPIXEL_X4( src+ 8 ) = v;\
+        MPIXEL_X4( src+12 ) = v;\
          src += FDEC_STRIDE;\
      }
  
-static void predict_16x16_dc( uint8_t *src )
+static void predict_16x16_dc( pixel *src )
  {
-    uint32_t dc = 0;
+    pixel4 dc = 0;
  
      for( int i = 0; i < 16; i++ )
      {
          dc += src[-1 + i * FDEC_STRIDE];
          dc += src[i - FDEC_STRIDE];
      }
-    dc = (( dc + 16 ) >> 5) * 0x01010101;
+    dc = PIXEL_SPLAT_X4( ( dc + 16 ) >> 5 );
  
-    PREDICT_16x16_DC(dc);
+    PREDICT_16x16_DC( dc );
  }
-static void predict_16x16_dc_left( uint8_t *src )
+static void predict_16x16_dc_left( pixel *src )
  {
-    uint32_t dc = 0;
+    pixel4 dc = 0;
  
      for( int i = 0; i < 16; i++ )
          dc += src[-1 + i * FDEC_STRIDE];
-    dc = (( dc + 8 ) >> 4) * 0x01010101;
+    dc = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
  
-    PREDICT_16x16_DC(dc);
+    PREDICT_16x16_DC( dc );
  }
-static void predict_16x16_dc_top( uint8_t *src )
+static void predict_16x16_dc_top( pixel *src )
  {
-    uint32_t dc = 0;
+    pixel4 dc = 0;
  
      for( int i = 0; i < 16; i++ )
          dc += src[i - FDEC_STRIDE];
-    dc = (( dc + 8 ) >> 4) * 0x01010101;
+    dc = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
  
-    PREDICT_16x16_DC(dc);
+    PREDICT_16x16_DC( dc );
  }
-static void predict_16x16_dc_128( uint8_t *src )
+static void predict_16x16_dc_128( pixel *src )
  {
-    PREDICT_16x16_DC(0x80808080);
+    PREDICT_16x16_DC( PIXEL_SPLAT_X4( 0x80 ) );
  }
-static void predict_16x16_h( uint8_t *src )
+static void predict_16x16_h( pixel *src )
  {
      for( int i = 0; i < 16; i++ )
      {
-        const uint32_t v = 0x01010101 * src[-1];
-        M32( src+ 0 ) = v;
-        M32( src+ 4 ) = v;
-        M32( src+ 8 ) = v;
-        M32( src+12 ) = v;
+        const pixel4 v = PIXEL_SPLAT_X4( src[-1] );
+        MPIXEL_X4( src+ 0 ) = v;
+        MPIXEL_X4( src+ 4 ) = v;
+        MPIXEL_X4( src+ 8 ) = v;
+        MPIXEL_X4( src+12 ) = v;
          src += FDEC_STRIDE;
      }
  }
-static void predict_16x16_v( uint8_t *src )
+static void predict_16x16_v( pixel *src )
  {
-    uint32_t v0 = M32( &src[ 0-FDEC_STRIDE] );
-    uint32_t v1 = M32( &src[ 4-FDEC_STRIDE] );
-    uint32_t v2 = M32( &src[ 8-FDEC_STRIDE] );
-    uint32_t v3 = M32( &src[12-FDEC_STRIDE] );
+    pixel4 v0 = MPIXEL_X4( &src[ 0-FDEC_STRIDE] );
+    pixel4 v1 = MPIXEL_X4( &src[ 4-FDEC_STRIDE] );
+    pixel4 v2 = MPIXEL_X4( &src[ 8-FDEC_STRIDE] );
+    pixel4 v3 = MPIXEL_X4( &src[12-FDEC_STRIDE] );
  
      for( int i = 0; i < 16; i++ )
      {
-        M32( src+ 0 ) = v0;
-        M32( src+ 4 ) = v1;
-        M32( src+ 8 ) = v2;
-        M32( src+12 ) = v3;
+        MPIXEL_X4( src+ 0 ) = v0;
+        MPIXEL_X4( src+ 4 ) = v1;
+        MPIXEL_X4( src+ 8 ) = v2;
+        MPIXEL_X4( src+12 ) = v3;
          src += FDEC_STRIDE;
      }
  }
-static void predict_16x16_p( uint8_t *src )
+static void predict_16x16_p( pixel *src )
  {
      int H = 0, V = 0;
  
@@ -138,7 +138,7 @@ static void predict_16x16_p( uint8_t *src )
          int pix = i00;
          for( int x = 0; x < 16; x++ )
          {
-            src[x] = x264_clip_uint8( pix>>5 );
+            src[x] = x264_clip_pixel( pix>>5 );
              pix += b;
          }
          src += FDEC_STRIDE;
@@ -151,61 +151,61 @@ static void predict_16x16_p( uint8_t *src )
   * 8x8 prediction for intra chroma block
   ****************************************************************************/
  
-static void predict_8x8c_dc_128( uint8_t *src )
+static void predict_8x8c_dc_128( pixel *src )
  {
      for( int y = 0; y < 8; y++ )
      {
-        M32( src+0 ) = 0x80808080;
-        M32( src+4 ) = 0x80808080;
+        MPIXEL_X4( src+0 ) = PIXEL_SPLAT_X4( 0x80 );
+        MPIXEL_X4( src+4 ) = PIXEL_SPLAT_X4( 0x80 );
          src += FDEC_STRIDE;
      }
  }
-static void predict_8x8c_dc_left( uint8_t *src )
+static void predict_8x8c_dc_left( pixel *src )
  {
-    uint32_t dc0 = 0, dc1 = 0;
+    pixel4 dc0 = 0, dc1 = 0;
  
      for( int y = 0; y < 4; y++ )
      {
          dc0 += src[y * FDEC_STRIDE     - 1];
          dc1 += src[(y+4) * FDEC_STRIDE - 1];
      }
-    dc0 = (( dc0 + 2 ) >> 2)*0x01010101;
-    dc1 = (( dc1 + 2 ) >> 2)*0x01010101;
+    dc0 = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
+    dc1 = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
  
      for( int y = 0; y < 4; y++ )
      {
-        M32( src+0 ) = dc0;
-        M32( src+4 ) = dc0;
+        MPIXEL_X4( src+0 ) = dc0;
+        MPIXEL_X4( src+4 ) = dc0;
          src += FDEC_STRIDE;
      }
      for( int y = 0; y < 4; y++ )
      {
-        M32( src+0 ) = dc1;
-        M32( src+4 ) = dc1;
+        MPIXEL_X4( src+0 ) = dc1;
+        MPIXEL_X4( src+4 ) = dc1;
          src += FDEC_STRIDE;
      }
  
  }
-static void predict_8x8c_dc_top( uint8_t *src )
+static void predict_8x8c_dc_top( pixel *src )
  {
-    uint32_t dc0 = 0, dc1 = 0;
+    pixel4 dc0 = 0, dc1 = 0;
  
      for( int x = 0; x < 4; x++ )
      {
          dc0 += src[x     - FDEC_STRIDE];
          dc1 += src[x + 4 - FDEC_STRIDE];
      }
-    dc0 = (( dc0 + 2 ) >> 2)*0x01010101;
-    dc1 = (( dc1 + 2 ) >> 2)*0x01010101;
+    dc0 = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
+    dc1 = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
  
      for( int y = 0; y < 8; y++ )
      {
-        M32( src+0 ) = dc0;
-        M32( src+4 ) = dc1;
+        MPIXEL_X4( src+0 ) = dc0;
+        MPIXEL_X4( src+4 ) = dc1;
          src += FDEC_STRIDE;
      }
  }
-static void predict_8x8c_dc( uint8_t *src )
+static void predict_8x8c_dc( pixel *src )
  {
      int s0 = 0, s1 = 0, s2 = 0, s3 = 0;
  
@@ -225,48 +225,48 @@ static void predict_8x8c_dc( uint8_t *src )
         dc0 dc1
         dc2 dc3
       */
-    uint32_t dc0 = (( s0 + s2 + 4 ) >> 3)*0x01010101;
-    uint32_t dc1 = (( s1 + 2 ) >> 2)*0x01010101;
-    uint32_t dc2 = (( s3 + 2 ) >> 2)*0x01010101;
-    uint32_t dc3 = (( s1 + s3 + 4 ) >> 3)*0x01010101;
+    pixel4 dc0 = PIXEL_SPLAT_X4( ( s0 + s2 + 4 ) >> 3 );
+    pixel4 dc1 = PIXEL_SPLAT_X4( ( s1 + 2 ) >> 2 );
+    pixel4 dc2 = PIXEL_SPLAT_X4( ( s3 + 2 ) >> 2 );
+    pixel4 dc3 = PIXEL_SPLAT_X4( ( s1 + s3 + 4 ) >> 3 );
  
      for( int y = 0; y < 4; y++ )
      {
-        M32( src+0 ) = dc0;
-        M32( src+4 ) = dc1;
+        MPIXEL_X4( src+0 ) = dc0;
+        MPIXEL_X4( src+4 ) = dc1;
          src += FDEC_STRIDE;
      }
  
      for( int y = 0; y < 4; y++ )
      {
-        M32( src+0 ) = dc2;
-        M32( src+4 ) = dc3;
+        MPIXEL_X4( src+0 ) = dc2;
+        MPIXEL_X4( src+4 ) = dc3;
          src += FDEC_STRIDE;
      }
  }
-static void predict_8x8c_h( uint8_t *src )
+static void predict_8x8c_h( pixel *src )
  {
      for( int i = 0; i < 8; i++ )
      {
-        uint32_t v = 0x01010101 * src[-1];
-        M32( src+0 ) = v;
-        M32( src+4 ) = v;
+        pixel4 v = PIXEL_SPLAT_X4( src[-1] );
+        MPIXEL_X4( src+0 ) = v;
+        MPIXEL_X4( src+4 ) = v;
          src += FDEC_STRIDE;
      }
  }
-static void predict_8x8c_v( uint8_t *src )
+static void predict_8x8c_v( pixel *src )
  {
-    uint32_t v0 = M32( src+0-FDEC_STRIDE );
-    uint32_t v1 = M32( src+4-FDEC_STRIDE );
+    pixel4 v0 = MPIXEL_X4( src+0-FDEC_STRIDE );
+    pixel4 v1 = MPIXEL_X4( src+4-FDEC_STRIDE );
  
      for( int i = 0; i < 8; i++ )
      {
-        M32( src+0 ) = v0;
-        M32( src+4 ) = v1;
+        MPIXEL_X4( src+0 ) = v0;
+        MPIXEL_X4( src+4 ) = v1;
          src += FDEC_STRIDE;
      }
  }
-static void predict_8x8c_p( uint8_t *src )
+static void predict_8x8c_p( pixel *src )
  {
      int H = 0, V = 0;
  
@@ -286,7 +286,7 @@ static void predict_8x8c_p( uint8_t *src )
          int pix = i00;
          for( int x = 0; x < 8; x++ )
          {
-            src[x] = x264_clip_uint8( pix>>5 );
+            src[x] = x264_clip_pixel( pix>>5 );
              pix += b;
          }
          src += FDEC_STRIDE;
@@ -299,41 +299,41 @@ static void predict_8x8c_p( uint8_t *src )
   ****************************************************************************/
  
  #define SRC(x,y) src[(x)+(y)*FDEC_STRIDE]
-#define SRC32(x,y) M32( &SRC(x,y) )
+#define SRC_X4(x,y) MPIXEL_X4( &SRC(x,y) )
  
  #define PREDICT_4x4_DC(v)\
-    SRC32(0,0) = SRC32(0,1) = SRC32(0,2) = SRC32(0,3) = v;
+    SRC_X4(0,0) = SRC_X4(0,1) = SRC_X4(0,2) = SRC_X4(0,3) = v;
  
-static void predict_4x4_dc_128( uint8_t *src )
+static void predict_4x4_dc_128( pixel *src )
  {
-    PREDICT_4x4_DC(0x80808080);
+    PREDICT_4x4_DC( PIXEL_SPLAT_X4( 0x80 ) );
  }
-static void predict_4x4_dc_left( uint8_t *src )
+static void predict_4x4_dc_left( pixel *src )
  {
-    uint32_t dc = ((SRC(-1,0) + SRC(-1,1) + SRC(-1,2) + SRC(-1,3) + 2) >> 2) * 0x01010101;
-    PREDICT_4x4_DC(dc);
+    pixel4 dc = PIXEL_SPLAT_X4( (SRC(-1,0) + SRC(-1,1) + SRC(-1,2) + SRC(-1,3) + 2) >> 2 );
+    PREDICT_4x4_DC( dc );
  }
-static void predict_4x4_dc_top( uint8_t *src )
+static void predict_4x4_dc_top( pixel *src )
  {
-    uint32_t dc = ((SRC(0,-1) + SRC(1,-1) + SRC(2,-1) + SRC(3,-1) + 2) >> 2) * 0x01010101;
-    PREDICT_4x4_DC(dc);
+    pixel4 dc = PIXEL_SPLAT_X4( (SRC(0,-1) + SRC(1,-1) + SRC(2,-1) + SRC(3,-1) + 2) >> 2 );
+    PREDICT_4x4_DC( dc );
  }
-static void predict_4x4_dc( uint8_t *src )
+static void predict_4x4_dc( pixel *src )
  {
-    uint32_t dc = ((SRC(-1,0) + SRC(-1,1) + SRC(-1,2) + SRC(-1,3) +
-                    SRC(0,-1) + SRC(1,-1) + SRC(2,-1) + SRC(3,-1) + 4) >> 3) * 0x01010101;
-    PREDICT_4x4_DC(dc);
+    pixel4 dc = PIXEL_SPLAT_X4( (SRC(-1,0) + SRC(-1,1) + SRC(-1,2) + SRC(-1,3) +
+                                 SRC(0,-1) + SRC(1,-1) + SRC(2,-1) + SRC(3,-1) + 4) >> 3 );
+    PREDICT_4x4_DC( dc );
  }
-static void predict_4x4_h( uint8_t *src )
+static void predict_4x4_h( pixel *src )
  {
-    SRC32(0,0) = SRC(-1,0) * 0x01010101;
-    SRC32(0,1) = SRC(-1,1) * 0x01010101;
-    SRC32(0,2) = SRC(-1,2) * 0x01010101;
-    SRC32(0,3) = SRC(-1,3) * 0x01010101;
+    SRC_X4(0,0) = PIXEL_SPLAT_X4( SRC(-1,0) );
+    SRC_X4(0,1) = PIXEL_SPLAT_X4( SRC(-1,1) );
+    SRC_X4(0,2) = PIXEL_SPLAT_X4( SRC(-1,2) );
+    SRC_X4(0,3) = PIXEL_SPLAT_X4( SRC(-1,3) );
  }
-static void predict_4x4_v( uint8_t *src )
+static void predict_4x4_v( pixel *src )
  {
-    PREDICT_4x4_DC(SRC32(0,-1));
+    PREDICT_4x4_DC(SRC_X4(0,-1));
  }
  
  #define PREDICT_4x4_LOAD_LEFT\
@@ -357,7 +357,7 @@ static void predict_4x4_v( uint8_t *src )
  #define F1(a,b)   (((a)+(b)+1)>>1)
  #define F2(a,b,c) (((a)+2*(b)+(c)+2)>>2)
  
-static void predict_4x4_ddl( uint8_t *src )
+static void predict_4x4_ddl( pixel *src )
  {
      PREDICT_4x4_LOAD_TOP
      PREDICT_4x4_LOAD_TOP_RIGHT
@@ -369,7 +369,7 @@ static void predict_4x4_ddl( uint8_t *src )
      SRC(3,2)=SRC(2,3)= F2(t5,t6,t7);
      SRC(3,3)= F2(t6,t7,t7);
  }
-static void predict_4x4_ddr( uint8_t *src )
+static void predict_4x4_ddr( pixel *src )
  {
      int lt = SRC(-1,-1);
      PREDICT_4x4_LOAD_LEFT
@@ -383,7 +383,7 @@ static void predict_4x4_ddr( uint8_t *src )
      SRC(0,3)= F2(l1,l2,l3);
  }
  
-static void predict_4x4_vr( uint8_t *src )
+static void predict_4x4_vr( pixel *src )
  {
      int lt = SRC(-1,-1);
      PREDICT_4x4_LOAD_LEFT
@@ -400,7 +400,7 @@ static void predict_4x4_vr( uint8_t *src )
      SRC(3,0)= F1(t2,t3);
  }
  
-static void predict_4x4_hd( uint8_t *src )
+static void predict_4x4_hd( pixel *src )
  {
      int lt= SRC(-1,-1);
      PREDICT_4x4_LOAD_LEFT
@@ -417,7 +417,7 @@ static void predict_4x4_hd( uint8_t *src )
      SRC(3,0)= F2(t2,t1,t0);
  }
  
-static void predict_4x4_vl( uint8_t *src )
+static void predict_4x4_vl( pixel *src )
  {
      PREDICT_4x4_LOAD_TOP
      PREDICT_4x4_LOAD_TOP_RIGHT
@@ -433,7 +433,7 @@ static void predict_4x4_vl( uint8_t *src )
      SRC(3,3)= F2(t4,t5,t6);
  }
  
-static void predict_4x4_hu( uint8_t *src )
+static void predict_4x4_hu( pixel *src )
  {
      PREDICT_4x4_LOAD_LEFT
      SRC(0,0)= F1(l0,l1);
@@ -455,7 +455,7 @@ static void predict_4x4_hu( uint8_t *src )
  #define PT(x) \
      edge[16+x] = F2(SRC(x-1,-1), SRC(x,-1), SRC(x+1,-1));
  
-static void predict_8x8_filter( uint8_t *src, uint8_t edge[33], int i_neighbor, int i_filters )
+static void predict_8x8_filter( pixel *src, pixel edge[33], int i_neighbor, int i_filters )
  {
      /* edge[7..14] = l7..l0
       * edge[15] = lt
@@ -516,50 +516,49 @@ static void predict_8x8_filter( uint8_t *src, uint8_t edge[33], int i_neighbor,
  
  #define PREDICT_8x8_DC(v) \
      for( int y = 0; y < 8; y++ ) { \
-        M32( src+0 ) = v; \
-        M32( src+4 ) = v; \
+        MPIXEL_X4( src+0 ) = v; \
+        MPIXEL_X4( src+4 ) = v; \
          src += FDEC_STRIDE; \
      }
  
-static void predict_8x8_dc_128( uint8_t *src, uint8_t edge[33] )
+static void predict_8x8_dc_128( pixel *src, pixel edge[33] )
  {
-    PREDICT_8x8_DC(0x80808080);
+    PREDICT_8x8_DC( PIXEL_SPLAT_X4( 0x80 ) );
  }
-static void predict_8x8_dc_left( uint8_t *src, uint8_t edge[33] )
+static void predict_8x8_dc_left( pixel *src, pixel edge[33] )
  {
      PREDICT_8x8_LOAD_LEFT
-    uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
-    PREDICT_8x8_DC(dc);
+    pixel4 dc = PIXEL_SPLAT_X4( (l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3 );
+    PREDICT_8x8_DC( dc );
  }
-static void predict_8x8_dc_top( uint8_t *src, uint8_t edge[33] )
+static void predict_8x8_dc_top( pixel *src, pixel edge[33] )
  {
      PREDICT_8x8_LOAD_TOP
-    uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
-    PREDICT_8x8_DC(dc);
+    pixel4 dc = PIXEL_SPLAT_X4( (t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3 );
+    PREDICT_8x8_DC( dc );
  }
-static void predict_8x8_dc( uint8_t *src, uint8_t edge[33] )
+static void predict_8x8_dc( pixel *src, pixel edge[33] )
  {
      PREDICT_8x8_LOAD_LEFT
      PREDICT_8x8_LOAD_TOP
-    uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
-                   +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
-    PREDICT_8x8_DC(dc);
+    pixel4 dc = PIXEL_SPLAT_X4( (l0+l1+l2+l3+l4+l5+l6+l7+t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4 );
+    PREDICT_8x8_DC( dc );
  }
-static void predict_8x8_h( uint8_t *src, uint8_t edge[33] )
+static void predict_8x8_h( pixel *src, pixel edge[33] )
  {
      PREDICT_8x8_LOAD_LEFT
-#define ROW(y) M32( src+y*FDEC_STRIDE+0 ) =\
-               M32( src+y*FDEC_STRIDE+4 ) = 0x01010101U * l##y;
+#define ROW(y) MPIXEL_X4( src+y*FDEC_STRIDE+0 ) =\
+               MPIXEL_X4( src+y*FDEC_STRIDE+4 ) = PIXEL_SPLAT_X4( l##y );
      ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
  #undef ROW
  }
-static void predict_8x8_v( uint8_t *src, uint8_t edge[33] )
+static void predict_8x8_v( pixel *src, pixel edge[33] )
  {
      uint64_t top = M64( edge+16 );
      for( int y = 0; y < 8; y++ )
          M64( src+y*FDEC_STRIDE ) = top;
  }
-static void predict_8x8_ddl( uint8_t *src, uint8_t edge[33] )
+static void predict_8x8_ddl( pixel *src, pixel edge[33] )
  {
      PREDICT_8x8_LOAD_TOP
      PREDICT_8x8_LOAD_TOPRIGHT
@@ -579,7 +578,7 @@ static void predict_8x8_ddl( uint8_t *src, uint8_t edge[33] )
      SRC(6,7)=SRC(7,6)= F2(t13,t14,t15);
      SRC(7,7)= F2(t14,t15,t15);
  }
-static void predict_8x8_ddr( uint8_t *src, uint8_t edge[33] )
+static void predict_8x8_ddr( pixel *src, pixel edge[33] )
  {
      PREDICT_8x8_LOAD_TOP
      PREDICT_8x8_LOAD_LEFT
@@ -601,7 +600,7 @@ static void predict_8x8_ddr( uint8_t *src, uint8_t edge[33] )
      SRC(7,0)= F2(t5,t6,t7);
  
  }
-static void predict_8x8_vr( uint8_t *src, uint8_t edge[33] )
+static void predict_8x8_vr( pixel *src, pixel edge[33] )
  {
      PREDICT_8x8_LOAD_TOP
      PREDICT_8x8_LOAD_LEFT
@@ -629,34 +628,34 @@ static void predict_8x8_vr( uint8_t *src, uint8_t edge[33] )
      SRC(7,1)= F2(t5,t6,t7);
      SRC(7,0)= F1(t6,t7);
  }
-static void predict_8x8_hd( uint8_t *src, uint8_t edge[33] )
+static void predict_8x8_hd( pixel *src, pixel edge[33] )
  {
      PREDICT_8x8_LOAD_TOP
      PREDICT_8x8_LOAD_LEFT
      PREDICT_8x8_LOAD_TOPLEFT
-    int p1 = pack8to16(F1(l6,l7), F2(l5,l6,l7));
-    int p2 = pack8to16(F1(l5,l6), F2(l4,l5,l6));
-    int p3 = pack8to16(F1(l4,l5), F2(l3,l4,l5));
-    int p4 = pack8to16(F1(l3,l4), F2(l2,l3,l4));
-    int p5 = pack8to16(F1(l2,l3), F2(l1,l2,l3));
-    int p6 = pack8to16(F1(l1,l2), F2(l0,l1,l2));
-    int p7 = pack8to16(F1(l0,l1), F2(lt,l0,l1));
-    int p8 = pack8to16(F1(lt,l0), F2(l0,lt,t0));
-    int p9 = pack8to16(F2(t1,t0,lt), F2(t2,t1,t0));
-    int p10 = pack8to16(F2(t3,t2,t1), F2(t4,t3,t2));
-    int p11 = pack8to16(F2(t5,t4,t3), F2(t6,t5,t4));
-    SRC32(0,7)= pack16to32(p1,p2);
-    SRC32(0,6)= pack16to32(p2,p3);
-    SRC32(4,7)=SRC32(0,5)= pack16to32(p3,p4);
-    SRC32(4,6)=SRC32(0,4)= pack16to32(p4,p5);
-    SRC32(4,5)=SRC32(0,3)= pack16to32(p5,p6);
-    SRC32(4,4)=SRC32(0,2)= pack16to32(p6,p7);
-    SRC32(4,3)=SRC32(0,1)= pack16to32(p7,p8);
-    SRC32(4,2)=SRC32(0,0)= pack16to32(p8,p9);
-    SRC32(4,1)= pack16to32(p9,p10);
-    SRC32(4,0)= pack16to32(p10,p11);
-}
-static void predict_8x8_vl( uint8_t *src, uint8_t edge[33] )
+    int p1 = pack_pixel_1to2(F1(l6,l7), F2(l5,l6,l7));
+    int p2 = pack_pixel_1to2(F1(l5,l6), F2(l4,l5,l6));
+    int p3 = pack_pixel_1to2(F1(l4,l5), F2(l3,l4,l5));
+    int p4 = pack_pixel_1to2(F1(l3,l4), F2(l2,l3,l4));
+    int p5 = pack_pixel_1to2(F1(l2,l3), F2(l1,l2,l3));
+    int p6 = pack_pixel_1to2(F1(l1,l2), F2(l0,l1,l2));
+    int p7 = pack_pixel_1to2(F1(l0,l1), F2(lt,l0,l1));
+    int p8 = pack_pixel_1to2(F1(lt,l0), F2(l0,lt,t0));
+    int p9 = pack_pixel_1to2(F2(t1,t0,lt), F2(t2,t1,t0));
+    int p10 = pack_pixel_1to2(F2(t3,t2,t1), F2(t4,t3,t2));
+    int p11 = pack_pixel_1to2(F2(t5,t4,t3), F2(t6,t5,t4));
+    SRC_X4(0,7)= pack_pixel_2to4(p1,p2);
+    SRC_X4(0,6)= pack_pixel_2to4(p2,p3);
+    SRC_X4(4,7)=SRC_X4(0,5)= pack_pixel_2to4(p3,p4);
+    SRC_X4(4,6)=SRC_X4(0,4)= pack_pixel_2to4(p4,p5);
+    SRC_X4(4,5)=SRC_X4(0,3)= pack_pixel_2to4(p5,p6);
+    SRC_X4(4,4)=SRC_X4(0,2)= pack_pixel_2to4(p6,p7);
+    SRC_X4(4,3)=SRC_X4(0,1)= pack_pixel_2to4(p7,p8);
+    SRC_X4(4,2)=SRC_X4(0,0)= pack_pixel_2to4(p8,p9);
+    SRC_X4(4,1)= pack_pixel_2to4(p9,p10);
+    SRC_X4(4,0)= pack_pixel_2to4(p10,p11);
+}
+static void predict_8x8_vl( pixel *src, pixel edge[33] )
  {
      PREDICT_8x8_LOAD_TOP
      PREDICT_8x8_LOAD_TOPRIGHT
@@ -683,25 +682,25 @@ static void predict_8x8_vl( uint8_t *src, uint8_t edge[33] )
      SRC(7,6)= F1(t10,t11);
      SRC(7,7)= F2(t10,t11,t12);
  }
-static void predict_8x8_hu( uint8_t *src, uint8_t edge[33] )
+static void predict_8x8_hu( pixel *src, pixel edge[33] )
  {
      PREDICT_8x8_LOAD_LEFT
-    int p1 = pack8to16(F1(l0,l1), F2(l0,l1,l2));
-    int p2 = pack8to16(F1(l1,l2), F2(l1,l2,l3));
-    int p3 = pack8to16(F1(l2,l3), F2(l2,l3,l4));
-    int p4 = pack8to16(F1(l3,l4), F2(l3,l4,l5));
-    int p5 = pack8to16(F1(l4,l5), F2(l4,l5,l6));
-    int p6 = pack8to16(F1(l5,l6), F2(l5,l6,l7));
-    int p7 = pack8to16(F1(l6,l7), F2(l6,l7,l7));
-    int p8 = pack8to16(l7,l7);
-    SRC32(0,0)= pack16to32(p1,p2);
-    SRC32(0,1)= pack16to32(p2,p3);
-    SRC32(4,0)=SRC32(0,2)= pack16to32(p3,p4);
-    SRC32(4,1)=SRC32(0,3)= pack16to32(p4,p5);
-    SRC32(4,2)=SRC32(0,4)= pack16to32(p5,p6);
-    SRC32(4,3)=SRC32(0,5)= pack16to32(p6,p7);
-    SRC32(4,4)=SRC32(0,6)= pack16to32(p7,p8);
-    SRC32(4,5)=SRC32(4,6)= SRC32(0,7) = SRC32(4,7) = pack16to32(p8,p8);
+    int p1 = pack_pixel_1to2(F1(l0,l1), F2(l0,l1,l2));
+    int p2 = pack_pixel_1to2(F1(l1,l2), F2(l1,l2,l3));
+    int p3 = pack_pixel_1to2(F1(l2,l3), F2(l2,l3,l4));
+    int p4 = pack_pixel_1to2(F1(l3,l4), F2(l3,l4,l5));
+    int p5 = pack_pixel_1to2(F1(l4,l5), F2(l4,l5,l6));
+    int p6 = pack_pixel_1to2(F1(l5,l6), F2(l5,l6,l7));
+    int p7 = pack_pixel_1to2(F1(l6,l7), F2(l6,l7,l7));
+    int p8 = pack_pixel_1to2(l7,l7);
+    SRC_X4(0,0)= pack_pixel_2to4(p1,p2);
+    SRC_X4(0,1)= pack_pixel_2to4(p2,p3);
+    SRC_X4(4,0)=SRC_X4(0,2)= pack_pixel_2to4(p3,p4);
+    SRC_X4(4,1)=SRC_X4(0,3)= pack_pixel_2to4(p4,p5);
+    SRC_X4(4,2)=SRC_X4(0,4)= pack_pixel_2to4(p5,p6);
+    SRC_X4(4,3)=SRC_X4(0,5)= pack_pixel_2to4(p6,p7);
+    SRC_X4(4,4)=SRC_X4(0,6)= pack_pixel_2to4(p7,p8);
+    SRC_X4(4,5)=SRC_X4(4,6)= SRC_X4(0,7) = SRC_X4(4,7) = pack_pixel_2to4(p8,p8);
  }
  
  /****************************************************************************
diff --git a/common/predict.h b/common/predict.h

index d0210abcbff990ba70027d9b1a20b973ec237916..be8fab866422a9723c2f1a59577b7e423ff922d4 100644 (file)
--- a/common/predict.h
+++ b/common/predict.h
@@ -24,9 +24,9 @@
  #ifndef X264_PREDICT_H
  #define X264_PREDICT_H
  
-typedef void (*x264_predict_t)( uint8_t *src );
-typedef void (*x264_predict8x8_t)( uint8_t *src, uint8_t edge[33] );
-typedef void (*x264_predict_8x8_filter_t) ( uint8_t *src, uint8_t edge[33], int i_neighbor, int i_filters );
+typedef void (*x264_predict_t)( pixel *src );
+typedef void (*x264_predict8x8_t)( pixel *src, pixel edge[33] );
+typedef void (*x264_predict_8x8_filter_t) ( pixel *src, pixel edge[33], int i_neighbor, int i_filters );
  
  enum intra_chroma_pred_e
  {
diff --git a/common/visualize.c b/common/visualize.c

index 1b598d34485bd14f32b91dc8f36a1c477c5ba51f..36ab3a6bbc6465ab64e6b26e549ecf8fd5c8741e 100644 (file)
--- a/common/visualize.c
+++ b/common/visualize.c
@@ -160,7 +160,7 @@ void x264_visualize_show( x264_t *h )
      static const int zoom = 2;        /* Zoom factor */
  
      static const int pad = 32;
-    uint8_t *const frame = h->fdec->plane[0];
+    pixel *const frame = h->fdec->plane[0];
      const int width = h->param.i_width;
      const int height = h->param.i_height;
      const int stride = h->fdec->i_stride[0];
diff --git a/encoder/analyse.c b/encoder/analyse.c

index 441b75f1c03785f50eca2215a62772ad34846f6a..4357cd7a7d19c2609e5ad3c6feb309fdd0c09b99 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -295,7 +295,7 @@ void x264_analyse_weight_frame( x264_t *h, int end )
              int width = frame->i_width[0] + 2*PADH;
              int i_padv = PADV << h->param.b_interlaced;
              int offset, height;
-            uint8_t *src = frame->filtered[0] - frame->i_stride[0]*i_padv - PADH;
+            pixel *src = frame->filtered[0] - frame->i_stride[0]*i_padv - PADH;
              height = X264_MIN( 16 + end + i_padv, h->fref0[j]->i_lines[0] + i_padv*2 ) - h->fenc->i_lines_weighted;
              offset = h->fenc->i_lines_weighted*frame->i_stride[0];
              h->fenc->i_lines_weighted += height;
@@ -303,7 +303,7 @@ void x264_analyse_weight_frame( x264_t *h, int end )
                  for( int k = j; k < h->i_ref0; k++ )
                      if( h->sh.weight[k][0].weightfn )
                      {
-                        uint8_t *dst = h->fenc->weighted[k] - h->fenc->i_stride[0]*i_padv - PADH;
+                        pixel *dst = h->fenc->weighted[k] - h->fenc->i_stride[0]*i_padv - PADH;
                          x264_weight_scale_plane( h, dst + offset, frame->i_stride[0],
                                                   src + offset, frame->i_stride[0],
                                                   width, height, &h->sh.weight[k][0] );
@@ -550,7 +550,7 @@ static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int i_neighbour )
  /* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */
  static void inline x264_psy_trellis_init( x264_t *h, int do_both_dct )
  {
-    ALIGNED_16( static uint8_t zero[16*FDEC_STRIDE] ) = {0};
+    ALIGNED_16( static pixel zero[16*FDEC_STRIDE] ) = {0};
  
      if( do_both_dct || h->mb.b_transform_8x8 )
          h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], zero );
@@ -632,8 +632,8 @@ static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )
  static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
  {
      const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
-    uint8_t  *p_src = h->mb.pic.p_fenc[0];
-    uint8_t  *p_dst = h->mb.pic.p_fdec[0];
+    pixel *p_src = h->mb.pic.p_fenc[0];
+    pixel *p_dst = h->mb.pic.p_fdec[0];
  
      int idx;
      int b_merged_satd = !!h->pixf.intra_mbcmp_x3_16x16 && !h->mb.b_lossless;
@@ -686,7 +686,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
      /* 8x8 prediction selection */
      if( flags & X264_ANALYSE_I8x8 )
      {
-        ALIGNED_ARRAY_16( uint8_t, edge,[33] );
+        ALIGNED_ARRAY_16( pixel, edge,[33] );
          x264_pixel_cmp_t sa8d = (h->pixf.mbcmp[0] == h->pixf.satd[0]) ? h->pixf.sa8d[PIXEL_8x8] : h->pixf.mbcmp[PIXEL_8x8];
          int i_satd_thresh = a->i_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 );
  
@@ -702,8 +702,8 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
          {
              int x = idx&1;
              int y = idx>>1;
-            uint8_t *p_src_by = p_src + 8*x + 8*y*FENC_STRIDE;
-            uint8_t *p_dst_by = p_dst + 8*x + 8*y*FDEC_STRIDE;
+            pixel *p_src_by = p_src + 8*x + 8*y*FENC_STRIDE;
+            pixel *p_dst_by = p_dst + 8*x + 8*y*FDEC_STRIDE;
              int i_best = COST_MAX;
              int i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx );
  
@@ -794,8 +794,8 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
  
          for( idx = 0;; idx++ )
          {
-            uint8_t *p_src_by = p_src + block_idx_xy_fenc[idx];
-            uint8_t *p_dst_by = p_dst + block_idx_xy_fdec[idx];
+            pixel *p_src_by = p_src + block_idx_xy_fenc[idx];
+            pixel *p_dst_by = p_dst + block_idx_xy_fdec[idx];
              int i_best = COST_MAX;
              int i_pred_mode = x264_mb_predict_intra4x4_mode( h, idx );
  
@@ -803,7 +803,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
  
              if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                  /* emulate missing topright samples */
-                M32( &p_dst_by[4 - FDEC_STRIDE] ) = p_dst_by[3 - FDEC_STRIDE] * 0x01010101U;
+                MPIXEL_X4( &p_dst_by[4 - FDEC_STRIDE] ) = PIXEL_SPLAT_X4( p_dst_by[3 - FDEC_STRIDE] );
  
              if( b_merged_satd && predict_mode[5] >= 0 )
              {
@@ -906,7 +906,7 @@ static void x264_intra_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_thresh )
  
  static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
  {
-    uint8_t  *p_dst = h->mb.pic.p_fdec[0];
+    pixel *p_dst = h->mb.pic.p_fdec[0];
      uint64_t i_satd, i_best;
      h->mb.i_skip_intra = 0;
  
@@ -973,18 +973,18 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
  
      if( h->mb.i_type == I_4x4 )
      {
-        uint32_t pels[4] = {0}; // doesn't need initting, just shuts up a gcc warning
+        pixel4 pels[4] = {0}; // doesn't need initting, just shuts up a gcc warning
          int i_nnz = 0;
          for( int idx = 0; idx < 16; idx++ )
          {
-            uint8_t *p_dst_by = p_dst + block_idx_xy_fdec[idx];
+            pixel *p_dst_by = p_dst + block_idx_xy_fdec[idx];
              i_best = COST_MAX64;
  
              predict_mode = predict_4x4_mode_available( h->mb.i_neighbour4[idx] );
  
              if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                  /* emulate missing topright samples */
-                M32( &p_dst_by[4 - FDEC_STRIDE] ) = p_dst_by[3 - FDEC_STRIDE] * 0x01010101U;
+                MPIXEL_X4( &p_dst_by[4 - FDEC_STRIDE] ) = PIXEL_SPLAT_X4( p_dst_by[3 - FDEC_STRIDE] );
  
              for( ; *predict_mode >= 0; predict_mode++ )
              {
@@ -999,18 +999,18 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
                  {
                      a->i_predict4x4[idx] = i_mode;
                      i_best = i_satd;
-                    pels[0] = M32( p_dst_by+0*FDEC_STRIDE );
-                    pels[1] = M32( p_dst_by+1*FDEC_STRIDE );
-                    pels[2] = M32( p_dst_by+2*FDEC_STRIDE );
-                    pels[3] = M32( p_dst_by+3*FDEC_STRIDE );
+                    pels[0] = MPIXEL_X4( p_dst_by+0*FDEC_STRIDE );
+                    pels[1] = MPIXEL_X4( p_dst_by+1*FDEC_STRIDE );
+                    pels[2] = MPIXEL_X4( p_dst_by+2*FDEC_STRIDE );
+                    pels[3] = MPIXEL_X4( p_dst_by+3*FDEC_STRIDE );
                      i_nnz = h->mb.cache.non_zero_count[x264_scan8[idx]];
                  }
              }
  
-            M32( p_dst_by+0*FDEC_STRIDE ) = pels[0];
-            M32( p_dst_by+1*FDEC_STRIDE ) = pels[1];
-            M32( p_dst_by+2*FDEC_STRIDE ) = pels[2];
-            M32( p_dst_by+3*FDEC_STRIDE ) = pels[3];
+            MPIXEL_X4( p_dst_by+0*FDEC_STRIDE ) = pels[0];
+            MPIXEL_X4( p_dst_by+1*FDEC_STRIDE ) = pels[1];
+            MPIXEL_X4( p_dst_by+2*FDEC_STRIDE ) = pels[2];
+            MPIXEL_X4( p_dst_by+3*FDEC_STRIDE ) = pels[3];
              h->mb.cache.non_zero_count[x264_scan8[idx]] = i_nnz;
  
              h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[idx];
@@ -1018,13 +1018,13 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
      }
      else if( h->mb.i_type == I_8x8 )
      {
-        ALIGNED_ARRAY_16( uint8_t, edge,[33] );
+        ALIGNED_ARRAY_16( pixel, edge,[33] );
          for( int idx = 0; idx < 4; idx++ )
          {
-            uint64_t pels_h = 0;
-            uint8_t pels_v[7];
+            pixel4 pels_h[2] = {0};
+            pixel pels_v[7];
              uint16_t i_nnz[2] = {0}; //shut up gcc
-            uint8_t *p_dst_by;
+            pixel *p_dst_by;
              int cbp_luma_new = 0;
              int i_thresh = a->i_satd_i8x8_dir[a->i_predict8x8[idx]][idx] * 11/8;
  
@@ -1056,7 +1056,8 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
                      cbp_luma_new = h->mb.i_cbp_luma;
                      i_best = i_satd;
  
-                    pels_h = M64( p_dst_by+7*FDEC_STRIDE );
+                    pels_h[0] = MPIXEL_X4( p_dst_by+7*FDEC_STRIDE+0 );
+                    pels_h[1] = MPIXEL_X4( p_dst_by+7*FDEC_STRIDE+4 );
                      if( !(idx&1) )
                          for( int j = 0; j < 7; j++ )
                              pels_v[j] = p_dst_by[7+j*FDEC_STRIDE];
@@ -1065,7 +1066,8 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
                  }
              }
              a->i_cbp_i8x8_luma = cbp_luma_new;
-            M64( p_dst_by+7*FDEC_STRIDE ) = pels_h;
+            MPIXEL_X4( p_dst_by+7*FDEC_STRIDE+0 ) = pels_h[0];
+            MPIXEL_X4( p_dst_by+7*FDEC_STRIDE+4 ) = pels_h[1];
              if( !(idx&1) )
                  for( int j = 0; j < 7; j++ )
                      p_dst_by[7+j*FDEC_STRIDE] = pels_v[j];
@@ -1185,7 +1187,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
  static void x264_mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t *a )
  {
      x264_me_t m;
-    uint8_t  **p_fenc = h->mb.pic.p_fenc;
+    pixel **p_fenc = h->mb.pic.p_fenc;
      int i_maxref = h->mb.pic.i_fref[0]-1;
  
      h->mb.i_partition = D_8x8;
@@ -1281,7 +1283,7 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
       * don't bother analysing the dupes. */
      const int i_ref = h->mb.ref_blind_dupe == a->l0.me16x16.i_ref ? 0 : a->l0.me16x16.i_ref;
      const int i_ref_cost = h->param.b_cabac || i_ref ? REF_COST( 0, i_ref ) : 0;
-    uint8_t  **p_fenc = h->mb.pic.p_fenc;
+    pixel **p_fenc = h->mb.pic.p_fenc;
      int i_mvc;
      int16_t (*mvc)[2] = a->l0.mvc[i_ref];
  
@@ -1333,7 +1335,7 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
  static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a, int i_best_satd )
  {
      x264_me_t m;
-    uint8_t  **p_fenc = h->mb.pic.p_fenc;
+    pixel **p_fenc = h->mb.pic.p_fenc;
      ALIGNED_4( int16_t mvc[3][2] );
  
      /* XXX Needed for x264_mb_predict_mv */
@@ -1399,7 +1401,7 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a, int i
  static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a, int i_best_satd )
  {
      x264_me_t m;
-    uint8_t  **p_fenc = h->mb.pic.p_fenc;
+    pixel **p_fenc = h->mb.pic.p_fenc;
      ALIGNED_4( int16_t mvc[3][2] );
  
      /* XXX Needed for x264_mb_predict_mv */
@@ -1461,10 +1463,10 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a, int i
      a->l0.i_cost8x16 = a->l0.me8x16[0].cost + a->l0.me8x16[1].cost;
  }
  
-static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, uint8_t **p_fref, int i8x8, int pixel )
+static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, pixel **p_fref, int i8x8, int size )
  {
-    ALIGNED_ARRAY_8( uint8_t, pix1,[16*8] );
-    uint8_t *pix2 = pix1+8;
+    ALIGNED_ARRAY_8( pixel, pix1,[16*8] );
+    pixel *pix2 = pix1+8;
      const int i_stride = h->mb.pic.i_stride[1];
      const int or = 4*(i8x8&1) + 2*(i8x8&2)*i_stride;
      const int oe = 4*(i8x8&1) + 2*(i8x8&2)*FENC_STRIDE;
@@ -1481,7 +1483,7 @@ static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a,
          weight[2].weightfn[width>>2]( &pix2[x+y*16], 16, &pix2[x+y*16], 16, &weight[2], height );
  
  
-    if( pixel == PIXEL_4x4 )
+    if( size == PIXEL_4x4 )
      {
          x264_me_t *m = a->l0.me4x4[i8x8];
          CHROMA4x4MC( 2,2, m[0], 0,0 );
@@ -1489,7 +1491,7 @@ static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a,
          CHROMA4x4MC( 2,2, m[2], 0,2 );
          CHROMA4x4MC( 2,2, m[3], 2,2 );
      }
-    else if( pixel == PIXEL_8x4 )
+    else if( size == PIXEL_8x4 )
      {
          x264_me_t *m = a->l0.me8x4[i8x8];
          CHROMA4x4MC( 4,2, m[0], 0,0 );
@@ -1508,8 +1510,8 @@ static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a,
  
  static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
  {
-    uint8_t  **p_fref = h->mb.pic.p_fref[0][a->l0.me8x8[i8x8].i_ref];
-    uint8_t  **p_fenc = h->mb.pic.p_fenc;
+    pixel **p_fref = h->mb.pic.p_fref[0][a->l0.me8x8[i8x8].i_ref];
+    pixel **p_fenc = h->mb.pic.p_fenc;
      const int i_ref = a->l0.me8x8[i8x8].i_ref;
  
      /* XXX Needed for x264_mb_predict_mv */
@@ -1547,8 +1549,8 @@ static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8
  
  static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
  {
-    uint8_t  **p_fref = h->mb.pic.p_fref[0][a->l0.me8x8[i8x8].i_ref];
-    uint8_t  **p_fenc = h->mb.pic.p_fenc;
+    pixel **p_fref = h->mb.pic.p_fref[0][a->l0.me8x8[i8x8].i_ref];
+    pixel **p_fenc = h->mb.pic.p_fenc;
      const int i_ref = a->l0.me8x8[i8x8].i_ref;
  
      /* XXX Needed for x264_mb_predict_mv */
@@ -1583,8 +1585,8 @@ static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8
  
  static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
  {
-    uint8_t  **p_fref = h->mb.pic.p_fref[0][a->l0.me8x8[i8x8].i_ref];
-    uint8_t  **p_fenc = h->mb.pic.p_fenc;
+    pixel **p_fref = h->mb.pic.p_fref[0][a->l0.me8x8[i8x8].i_ref];
+    pixel **p_fenc = h->mb.pic.p_fenc;
      const int i_ref = a->l0.me8x8[i8x8].i_ref;
  
      /* XXX Needed for x264_mb_predict_mv */
@@ -1622,8 +1624,8 @@ static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
      /* Assumes that fdec still contains the results of
       * x264_mb_predict_mv_direct16x16 and x264_mb_mc */
  
-    uint8_t *p_fenc = h->mb.pic.p_fenc[0];
-    uint8_t *p_fdec = h->mb.pic.p_fdec[0];
+    pixel *p_fenc = h->mb.pic.p_fenc[0];
+    pixel *p_fdec = h->mb.pic.p_fdec[0];
  
      a->i_cost16x16direct = a->i_lambda * i_mb_b_cost_table[B_DIRECT];
      if( h->param.analyse.inter & X264_ANALYSE_BSUB16x16 )
@@ -1644,9 +1646,9 @@ static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
  
  static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
  {
-    ALIGNED_ARRAY_16( uint8_t, pix0,[16*16] );
-    ALIGNED_ARRAY_16( uint8_t, pix1,[16*16] );
-    uint8_t *src0, *src1;
+    ALIGNED_ARRAY_16( pixel, pix0,[16*16] );
+    ALIGNED_ARRAY_16( pixel, pix1,[16*16] );
+    pixel *src0, *src1;
      int stride0 = 16, stride1 = 16;
      int i_ref, i_mvc;
      ALIGNED_4( int16_t mvc[9][2] );
@@ -1870,7 +1872,7 @@ static inline void x264_mb_cache_mv_b8x16( x264_t *h, x264_mb_analysis_t *a, int
  
  static void x264_mb_analyse_inter_b8x8_mixed_ref( x264_t *h, x264_mb_analysis_t *a )
  {
-    ALIGNED_ARRAY_8( uint8_t, pix,[2],[8*8] );
+    ALIGNED_ARRAY_8( pixel, pix,[2],[8*8] );
      int i_maxref[2] = {h->mb.pic.i_fref[0]-1, h->mb.pic.i_fref[1]-1};
  
      /* early termination: if 16x16 chose ref 0, then evalute no refs older
@@ -1910,7 +1912,7 @@ static void x264_mb_analyse_inter_b8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
          int i_part_cost;
          int i_part_cost_bi;
          int stride[2] = {8,8};
-        uint8_t *src[2];
+        pixel *src[2];
          x264_me_t m;
          m.i_pixel = PIXEL_8x8;
          LOAD_FENC( &m, h->mb.pic.p_fenc, 8*x8, 8*y8 );
@@ -1975,10 +1977,10 @@ static void x264_mb_analyse_inter_b8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
  
  static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
  {
-    uint8_t **p_fref[2] =
+    pixel **p_fref[2] =
          { h->mb.pic.p_fref[0][a->l0.me16x16.i_ref],
            h->mb.pic.p_fref[1][a->l1.me16x16.i_ref] };
-    ALIGNED_ARRAY_8( uint8_t, pix,[2],[8*8] );
+    ALIGNED_ARRAY_8( pixel, pix,[2],[8*8] );
  
      /* XXX Needed for x264_mb_predict_mv */
      h->mb.i_partition = D_8x8;
@@ -1992,7 +1994,7 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
          int i_part_cost;
          int i_part_cost_bi = 0;
          int stride[2] = {8,8};
-        uint8_t *src[2];
+        pixel *src[2];
  
          for( int l = 0; l < 2; l++ )
          {
@@ -2045,7 +2047,7 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
  
  static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i_best_satd )
  {
-    ALIGNED_ARRAY_16( uint8_t, pix,[2],[16*8] );
+    ALIGNED_ARRAY_16( pixel, pix,[2],[16*8] );
      ALIGNED_4( int16_t mvc[3][2] );
  
      h->mb.i_partition = D_16x8;
@@ -2056,7 +2058,7 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i
          int i_part_cost;
          int i_part_cost_bi = 0;
          int stride[2] = {16,16};
-        uint8_t *src[2];
+        pixel *src[2];
          x264_me_t m;
          m.i_pixel = PIXEL_16x8;
          LOAD_FENC( &m, h->mb.pic.p_fenc, 0, 8*i );
@@ -2136,7 +2138,7 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i
  
  static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i_best_satd )
  {
-    ALIGNED_ARRAY_8( uint8_t, pix,[2],[8*16] );
+    ALIGNED_ARRAY_8( pixel, pix,[2],[8*16] );
      ALIGNED_4( int16_t mvc[3][2] );
  
      h->mb.i_partition = D_8x16;
@@ -2147,7 +2149,7 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i
          int i_part_cost;
          int i_part_cost_bi = 0;
          int stride[2] = {8,8};
-        uint8_t *src[2];
+        pixel *src[2];
          x264_me_t m;
          m.i_pixel = PIXEL_8x16;
          LOAD_FENC( &m, h->mb.pic.p_fenc, 8*i, 0 );
diff --git a/encoder/encoder.c b/encoder/encoder.c

index 57dd7eec8255767444d491d03e0595c09addedb3..f6b6d7fe908c717f3e4abd559fe389eb32be92f6 100644 (file)
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -1418,8 +1418,8 @@ static void x264_weighted_pred_init( x264_t *h )
          //scale full resolution frame
          if( h->sh.weight[j][0].weightfn && h->param.i_threads == 1 )
          {
-            uint8_t *src = h->fref0[j]->filtered[0] - h->fref0[j]->i_stride[0]*i_padv - PADH;
-            uint8_t *dst = h->fenc->weighted[j] - h->fenc->i_stride[0]*i_padv - PADH;
+            pixel *src = h->fref0[j]->filtered[0] - h->fref0[j]->i_stride[0]*i_padv - PADH;
+            pixel *dst = h->fenc->weighted[j] - h->fenc->i_stride[0]*i_padv - PADH;
              int stride = h->fenc->i_stride[0];
              int width = h->fenc->i_width[0] + PADH*2;
              int height = h->fenc->i_lines[0] + i_padv*2;
@@ -1570,7 +1570,7 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
              {
                  memcpy( h->intra_border_backup[j][i],
                          h->fdec->plane[i] + ((mb_y*16 >> !!i) + j - 1 - h->sh.b_mbaff) * h->fdec->i_stride[i],
-                        h->sps->i_mb_width*16 >> !!i );
+                        (h->sps->i_mb_width*16 >> !!i) * sizeof(pixel) );
              }
  
      if( b_deblock )
diff --git a/encoder/macroblock.c b/encoder/macroblock.c

index 5fbc0c1487f5f37d4ddf0594d6c28dcbf674809e..19d2f85ed74a659da465fa1b5f4dbc4ffc3b39ab 100644 (file)
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -110,8 +110,8 @@ static ALWAYS_INLINE int x264_quant_8x8( x264_t *h, int16_t dct[64], int i_qp, i
  void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp )
  {
      int nz;
-    uint8_t *p_src = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[idx]];
-    uint8_t *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[idx]];
+    pixel *p_src = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[idx]];
+    pixel *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[idx]];
      ALIGNED_ARRAY_16( int16_t, dct4x4,[16] );
  
      if( h->mb.b_lossless )
@@ -156,8 +156,8 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
      int y = idx>>1;
      int s8 = X264_SCAN8_0 + 2*x + 16*y;
      int nz;
-    uint8_t *p_src = &h->mb.pic.p_fenc[0][8*x + 8*y*FENC_STRIDE];
-    uint8_t *p_dst = &h->mb.pic.p_fdec[0][8*x + 8*y*FDEC_STRIDE];
+    pixel *p_src = &h->mb.pic.p_fenc[0][8*x + 8*y*FENC_STRIDE];
+    pixel *p_dst = &h->mb.pic.p_fdec[0][8*x + 8*y*FDEC_STRIDE];
      ALIGNED_ARRAY_16( int16_t, dct8x8,[64] );
  
      if( h->mb.b_lossless )
@@ -185,8 +185,8 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
  
  static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
  {
-    uint8_t  *p_src = h->mb.pic.p_fenc[0];
-    uint8_t  *p_dst = h->mb.pic.p_fdec[0];
+    pixel *p_src = h->mb.pic.p_fenc[0];
+    pixel *p_dst = h->mb.pic.p_fdec[0];
  
      ALIGNED_ARRAY_16( int16_t, dct4x4,[16],[16] );
      ALIGNED_ARRAY_16( int16_t, dct_dc4x4,[16] );
@@ -376,8 +376,8 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
  
      for( int ch = 0; ch < 2; ch++ )
      {
-        uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
-        uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];
+        pixel *p_src = h->mb.pic.p_fenc[1+ch];
+        pixel *p_dst = h->mb.pic.p_fdec[1+ch];
          int i_decimate_score = 0;
          int nz_ac = 0;
  
@@ -551,10 +551,10 @@ void x264_predict_lossless_8x8_chroma( x264_t *h, int i_mode )
      }
  }
  
-void x264_predict_lossless_4x4( x264_t *h, uint8_t *p_dst, int idx, int i_mode )
+void x264_predict_lossless_4x4( x264_t *h, pixel *p_dst, int idx, int i_mode )
  {
      int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
-    uint8_t *p_src = h->mb.pic.p_fenc_plane[0] + block_idx_x[idx]*4 + block_idx_y[idx]*4 * stride;
+    pixel *p_src = h->mb.pic.p_fenc_plane[0] + block_idx_x[idx]*4 + block_idx_y[idx]*4 * stride;
  
      if( i_mode == I_PRED_4x4_V )
          h->mc.copy[PIXEL_4x4]( p_dst, FDEC_STRIDE, p_src-stride, stride, 4 );
@@ -564,10 +564,10 @@ void x264_predict_lossless_4x4( x264_t *h, uint8_t *p_dst, int idx, int i_mode )
          h->predict_4x4[i_mode]( p_dst );
  }
  
-void x264_predict_lossless_8x8( x264_t *h, uint8_t *p_dst, int idx, int i_mode, uint8_t edge[33] )
+void x264_predict_lossless_8x8( x264_t *h, pixel *p_dst, int idx, int i_mode, pixel edge[33] )
  {
      int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
-    uint8_t *p_src = h->mb.pic.p_fenc_plane[0] + (idx&1)*8 + (idx>>1)*8*stride;
+    pixel *p_src = h->mb.pic.p_fenc_plane[0] + (idx&1)*8 + (idx>>1)*8*stride;
  
      if( i_mode == I_PRED_8x8_V )
          h->mc.copy[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src-stride, stride, 8 );
@@ -656,7 +656,7 @@ void x264_macroblock_encode( x264_t *h )
      }
      else if( h->mb.i_type == I_8x8 )
      {
-        ALIGNED_ARRAY_16( uint8_t, edge,[33] );
+        ALIGNED_ARRAY_16( pixel, edge,[33] );
          h->mb.b_transform_8x8 = 1;
          /* If we already encoded 3 of the 4 i8x8 blocks, we don't have to do them again. */
          if( h->mb.i_skip_intra )
@@ -673,8 +673,8 @@ void x264_macroblock_encode( x264_t *h )
          }
          for( int i = h->mb.i_skip_intra ? 3 : 0 ; i < 4; i++ )
          {
-            uint8_t  *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * FDEC_STRIDE];
-            int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
+            pixel *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * FDEC_STRIDE];
+            int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
              h->predict_8x8_filter( p_dst, edge, h->mb.i_neighbour8[i], x264_pred_i4x4_neighbors[i_mode] );
  
              if( h->mb.b_lossless )
@@ -703,12 +703,12 @@ void x264_macroblock_encode( x264_t *h )
          }
          for( int i = h->mb.i_skip_intra ? 15 : 0 ; i < 16; i++ )
          {
-            uint8_t  *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[i]];
-            int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
+            pixel *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[i]];
+            int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
  
              if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                  /* emulate missing topright samples */
-                M32( &p_dst[4-FDEC_STRIDE] ) = p_dst[3-FDEC_STRIDE] * 0x01010101U;
+                MPIXEL_X4( &p_dst[4-FDEC_STRIDE] ) = PIXEL_SPLAT_X4( p_dst[3-FDEC_STRIDE] );
  
              if( h->mb.b_lossless )
                  x264_predict_lossless_4x4( h, p_dst, i, i_mode );
@@ -967,8 +967,8 @@ int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
  
      for( int ch = 0; ch < 2; ch++ )
      {
-        uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
-        uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];
+        pixel *p_src = h->mb.pic.p_fenc[1+ch];
+        pixel *p_dst = h->mb.pic.p_fdec[1+ch];
  
          if( !b_bidir )
          {
@@ -1061,8 +1061,8 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
      int x = i8&1;
      int y = i8>>1;
      int s8 = X264_SCAN8_0 + 2*x + 16*y;
-    uint8_t *p_fenc = h->mb.pic.p_fenc[0] + 8*x + 8*y*FENC_STRIDE;
-    uint8_t *p_fdec = h->mb.pic.p_fdec[0] + 8*x + 8*y*FDEC_STRIDE;
+    pixel *p_fenc = h->mb.pic.p_fenc[0] + 8*x + 8*y*FENC_STRIDE;
+    pixel *p_fdec = h->mb.pic.p_fdec[0] + 8*x + 8*y*FDEC_STRIDE;
      int b_decimate = h->mb.b_dct_decimate;
      int nnz8x8 = 0;
      int nz;
@@ -1187,8 +1187,8 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
  void x264_macroblock_encode_p4x4( x264_t *h, int i4 )
  {
      int i_qp = h->mb.i_qp;
-    uint8_t *p_fenc = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[i4]];
-    uint8_t *p_fdec = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[i4]];
+    pixel *p_fenc = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[i4]];
+    pixel *p_fdec = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[i4]];
      int nz;
  
      /* Don't need motion compensation as this function is only used in qpel-RD, which caches pixel data. */
diff --git a/encoder/macroblock.h b/encoder/macroblock.h

index d05c00b3227c8a8139e5356cda72593fecf48c18..f42e6a4c40b2e5cb9ef774c54f9179d263b93084 100644 (file)
--- a/encoder/macroblock.h
+++ b/encoder/macroblock.h
@@ -39,8 +39,8 @@ int x264_macroblock_probe_skip( x264_t *h, int b_bidir );
      x264_macroblock_probe_skip( h, 1 )
  
  void x264_predict_lossless_8x8_chroma( x264_t *h, int i_mode );
-void x264_predict_lossless_4x4( x264_t *h, uint8_t *p_dst, int idx, int i_mode );
-void x264_predict_lossless_8x8( x264_t *h, uint8_t *p_dst, int idx, int i_mode, uint8_t edge[33] );
+void x264_predict_lossless_4x4( x264_t *h, pixel *p_dst, int idx, int i_mode );
+void x264_predict_lossless_8x8( x264_t *h, pixel *p_dst, int idx, int i_mode, pixel edge[33] );
  void x264_predict_lossless_16x16( x264_t *h, int i_mode );
  
  void x264_macroblock_encode      ( x264_t *h );
diff --git a/encoder/me.c b/encoder/me.c

index 82a380445a849b7189f9a7b84a53a6d361d3b04d..7efd592fbae55414bbd83e02af66be1f865565f4 100644 (file)
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -67,7 +67,7 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
  #define COST_MV_HPEL( mx, my ) \
  { \
      int stride2 = 16; \
-    uint8_t *src = h->mc.get_ref( pix, &stride2, m->p_fref, stride, mx, my, bw, bh, &m->weight[0] ); \
+    pixel *src = h->mc.get_ref( pix, &stride2, m->p_fref, stride, mx, my, bw, bh, &m->weight[0] ); \
      int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, src, stride2 ) \
               + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
      COPY3_IF_LT( bpred_cost, cost, bpred_mx, mx, bpred_my, my ); \
@@ -75,7 +75,7 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
  
  #define COST_MV_X3_DIR( m0x, m0y, m1x, m1y, m2x, m2y, costs )\
  {\
-    uint8_t *pix_base = p_fref_w + bmx + bmy*stride;\
+    pixel *pix_base = p_fref_w + bmx + bmy*stride;\
      h->pixf.fpelcmp_x3[i_pixel]( p_fenc,\
          pix_base + (m0x) + (m0y)*stride,\
          pix_base + (m1x) + (m1y)*stride,\
@@ -88,7 +88,7 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
  
  #define COST_MV_X4_DIR( m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y, costs )\
  {\
-    uint8_t *pix_base = p_fref_w + bmx + bmy*stride;\
+    pixel *pix_base = p_fref_w + bmx + bmy*stride;\
      h->pixf.fpelcmp_x4[i_pixel]( p_fenc,\
          pix_base + (m0x) + (m0y)*stride,\
          pix_base + (m1x) + (m1y)*stride,\
@@ -103,7 +103,7 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
  
  #define COST_MV_X4( m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y )\
  {\
-    uint8_t *pix_base = p_fref_w + omx + omy*stride;\
+    pixel *pix_base = p_fref_w + omx + omy*stride;\
      h->pixf.fpelcmp_x4[i_pixel]( p_fenc,\
          pix_base + (m0x) + (m0y)*stride,\
          pix_base + (m1x) + (m1y)*stride,\
@@ -180,9 +180,9 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
      int bmx, bmy, bcost;
      int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX;
      int omx, omy, pmx, pmy;
-    uint8_t *p_fenc = m->p_fenc[0];
-    uint8_t *p_fref_w = m->p_fref_w;
-    ALIGNED_ARRAY_16( uint8_t, pix,[16*16] );
+    pixel *p_fenc = m->p_fenc[0];
+    pixel *p_fref_w = m->p_fref_w;
+    ALIGNED_ARRAY_16( pixel, pix,[16*16] );
  
      int costs[16];
  
@@ -505,7 +505,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
                  else
                  {
                      int dir = 0;
-                    uint8_t *pix_base = p_fref_w + omx + (omy-4*i)*stride;
+                    pixel *pix_base = p_fref_w + omx + (omy-4*i)*stride;
                      int dy = i*stride;
  #define SADS(k,x0,y0,x1,y1,x2,y2,x3,y3)\
                      h->pixf.fpelcmp_x4[i_pixel]( p_fenc,\
@@ -588,7 +588,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
              uint16_t *sums_base = m->integral;
              /* due to a GCC bug on some platforms (win32?), zero[] may not actually be aligned.
               * this is not a problem because it is not used for any SSE instructions. */
-            ALIGNED_16( static uint8_t zero[8*FENC_STRIDE] );
+            ALIGNED_16( static pixel zero[8*FENC_STRIDE] );
              ALIGNED_ARRAY_16( int, enc_dc,[4] );
              int sad_size = i_pixel <= PIXEL_8x8 ? PIXEL_8x8 : PIXEL_4x4;
              int delta = x264_pixel_size[sad_size].w;
@@ -625,7 +625,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
                                                 cost_fpel_mvx+min_x, xs, width, bsad*17/16 );
                      for( i = 0; i < xn-2; i += 3 )
                      {
-                        uint8_t *ref = p_fref_w+min_x+my*stride;
+                        pixel *ref = p_fref_w+min_x+my*stride;
                          int sads[3];
                          h->pixf.sad_x3[i_pixel]( p_fenc, ref+xs[i], ref+xs[i+1], ref+xs[i+2], stride, sads );
                          for( int j = 0; j < 3; j++ )
@@ -774,7 +774,7 @@ void x264_me_refine_qpel_refdupe( x264_t *h, x264_me_t *m, int *p_halfpel_thresh
  #define COST_MV_SAD( mx, my ) \
  { \
      int stride = 16; \
-    uint8_t *src = h->mc.get_ref( pix, &stride, m->p_fref, m->i_stride[0], mx, my, bw, bh, &m->weight[0] ); \
+    pixel *src = h->mc.get_ref( pix, &stride, m->p_fref, m->i_stride[0], mx, my, bw, bh, &m->weight[0] ); \
      int cost = h->pixf.fpelcmp[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
               + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
      COPY3_IF_LT( bcost, cost, bmx, mx, bmy, my ); \
@@ -784,7 +784,7 @@ void x264_me_refine_qpel_refdupe( x264_t *h, x264_me_t *m, int *p_halfpel_thresh
  if( b_refine_qpel || (dir^1) != odir ) \
  { \
      int stride = 16; \
-    uint8_t *src = h->mc.get_ref( pix, &stride, m->p_fref, m->i_stride[0], mx, my, bw, bh, &m->weight[0] ); \
+    pixel *src = h->mc.get_ref( pix, &stride, m->p_fref, m->i_stride[0], mx, my, bw, bh, &m->weight[0] ); \
      int cost = h->pixf.mbcmp_unaligned[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
               + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
      if( b_chroma_me && cost < bcost ) \
@@ -816,7 +816,7 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
      const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
      const int mvy_offset = h->mb.b_interlaced & m->i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
  
-    ALIGNED_ARRAY_16( uint8_t, pix,[64*18] ); // really 17x17x2, but round up for alignment
+    ALIGNED_ARRAY_16( pixel, pix,[64*18] ); // really 17x17x2, but round up for alignment
  
      int bmx = m->mv[0];
      int bmy = m->mv[1];
@@ -838,7 +838,7 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
          int omx = bmx, omy = bmy;
          int costs[4];
          int stride = 64; // candidates are either all hpel or all qpel, so one stride is enough
-        uint8_t *src0, *src1, *src2, *src3;
+        pixel *src0, *src1, *src2, *src3;
          src0 = h->mc.get_ref( pix,    &stride, m->p_fref, m->i_stride[0], omx, omy-2, bw, bh+1, &m->weight[0] );
          src2 = h->mc.get_ref( pix+32, &stride, m->p_fref, m->i_stride[0], omx-2, omy, bw+4, bh, &m->weight[0] );
          src1 = src0 + stride;
@@ -945,13 +945,13 @@ static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_m
      const int i_pixel = m0->i_pixel;
      const int bw = x264_pixel_size[i_pixel].w;
      const int bh = x264_pixel_size[i_pixel].h;
-    ALIGNED_ARRAY_16( uint8_t, pixy_buf,[2],[9][16*16] );
-    ALIGNED_ARRAY_8( uint8_t, pixu_buf,[2],[9][8*8] );
-    ALIGNED_ARRAY_8( uint8_t, pixv_buf,[2],[9][8*8] );
-    uint8_t *src[2][9];
-    uint8_t *pix  = &h->mb.pic.p_fdec[0][8*x + 8*y*FDEC_STRIDE];
-    uint8_t *pixu = &h->mb.pic.p_fdec[1][4*x + 4*y*FDEC_STRIDE];
-    uint8_t *pixv = &h->mb.pic.p_fdec[2][4*x + 4*y*FDEC_STRIDE];
+    ALIGNED_ARRAY_16( pixel, pixy_buf,[2],[9][16*16] );
+    ALIGNED_ARRAY_8( pixel, pixu_buf,[2],[9][8*8] );
+    ALIGNED_ARRAY_8( pixel, pixv_buf,[2],[9][8*8] );
+    pixel *src[2][9];
+    pixel *pix  = &h->mb.pic.p_fdec[0][8*x + 8*y*FDEC_STRIDE];
+    pixel *pixu = &h->mb.pic.p_fdec[1][4*x + 4*y*FDEC_STRIDE];
+    pixel *pixv = &h->mb.pic.p_fdec[2][4*x + 4*y*FDEC_STRIDE];
      int ref0 = h->mb.cache.ref[0][s8];
      int ref1 = h->mb.cache.ref[1][s8];
      const int mv0y_offset = h->mb.b_interlaced & ref0 ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
@@ -1139,9 +1139,9 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int
      int i8 = i4>>2;
      uint16_t amvd;
  
-    uint8_t *pix  = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[i4]];
-    uint8_t *pixu = &h->mb.pic.p_fdec[1][(i8>>1)*4*FDEC_STRIDE+(i8&1)*4];
-    uint8_t *pixv = &h->mb.pic.p_fdec[2][(i8>>1)*4*FDEC_STRIDE+(i8&1)*4];
+    pixel *pix  = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[i4]];
+    pixel *pixu = &h->mb.pic.p_fdec[1][(i8>>1)*4*FDEC_STRIDE+(i8&1)*4];
+    pixel *pixv = &h->mb.pic.p_fdec[2][(i8>>1)*4*FDEC_STRIDE+(i8&1)*4];
  
      h->mb.b_skip_mc = 1;
  
diff --git a/encoder/me.h b/encoder/me.h

index 031819bcf7b6296493abcc256be72726d49cf08e..912b05d1aa6493231eea9061032009d5932257d5 100644 (file)
--- a/encoder/me.h
+++ b/encoder/me.h
@@ -38,9 +38,9 @@ typedef struct
      int      i_ref;
      const x264_weight_t *weight;
  
-    uint8_t *p_fref[6];
-    uint8_t *p_fref_w;
-    uint8_t *p_fenc[3];
+    pixel *p_fref[6];
+    pixel *p_fref_w;
+    pixel *p_fenc[3];
      uint16_t *integral;
      int      i_stride[2];
  
diff --git a/encoder/rdo.c b/encoder/rdo.c

index e66df6d6189169c74c7a7c658b4c2b99dc76eaf3..1fecea62358f32cfe9fe714042f404d7482006e8 100644 (file)
--- a/encoder/rdo.c
+++ b/encoder/rdo.c
@@ -65,41 +65,41 @@ static uint16_t cabac_size_5ones[128];
  #define COPY_CABAC_PART( pos, size )\
          memcpy( &cb->state[pos], &h->cabac.state[pos], size )
  
-static ALWAYS_INLINE uint64_t cached_hadamard( x264_t *h, int pixel, int x, int y )
+static ALWAYS_INLINE uint64_t cached_hadamard( x264_t *h, int size, int x, int y )
  {
      static const uint8_t hadamard_shift_x[4] = {4,   4,   3,   3};
      static const uint8_t hadamard_shift_y[4] = {4-0, 3-0, 4-1, 3-1};
      static const uint8_t  hadamard_offset[4] = {0,   1,   3,   5};
-    int cache_index = (x >> hadamard_shift_x[pixel]) + (y >> hadamard_shift_y[pixel])
-                    + hadamard_offset[pixel];
+    int cache_index = (x >> hadamard_shift_x[size]) + (y >> hadamard_shift_y[size])
+                    + hadamard_offset[size];
      uint64_t res = h->mb.pic.fenc_hadamard_cache[cache_index];
      if( res )
          return res - 1;
      else
      {
-        uint8_t *fenc = h->mb.pic.p_fenc[0] + x + y*FENC_STRIDE;
-        res = h->pixf.hadamard_ac[pixel]( fenc, FENC_STRIDE );
+        pixel *fenc = h->mb.pic.p_fenc[0] + x + y*FENC_STRIDE;
+        res = h->pixf.hadamard_ac[size]( fenc, FENC_STRIDE );
          h->mb.pic.fenc_hadamard_cache[cache_index] = res + 1;
          return res;
      }
  }
  
-static ALWAYS_INLINE int cached_satd( x264_t *h, int pixel, int x, int y )
+static ALWAYS_INLINE int cached_satd( x264_t *h, int size, int x, int y )
  {
      static const uint8_t satd_shift_x[3] = {3,   2,   2};
      static const uint8_t satd_shift_y[3] = {2-1, 3-2, 2-2};
      static const uint8_t  satd_offset[3] = {0,   8,   16};
-    ALIGNED_16( static uint8_t zero[16] );
-    int cache_index = (x >> satd_shift_x[pixel - PIXEL_8x4]) + (y >> satd_shift_y[pixel - PIXEL_8x4])
-                    + satd_offset[pixel - PIXEL_8x4];
+    ALIGNED_16( static pixel zero[16] );
+    int cache_index = (x >> satd_shift_x[size - PIXEL_8x4]) + (y >> satd_shift_y[size - PIXEL_8x4])
+                    + satd_offset[size - PIXEL_8x4];
      int res = h->mb.pic.fenc_satd_cache[cache_index];
      if( res )
          return res - 1;
      else
      {
-        uint8_t *fenc = h->mb.pic.p_fenc[0] + x + y*FENC_STRIDE;
-        int dc = h->pixf.sad[pixel]( fenc, FENC_STRIDE, zero, 0 ) >> 1;
-        res = h->pixf.satd[pixel]( fenc, FENC_STRIDE, zero, 0 ) - dc;
+        pixel *fenc = h->mb.pic.p_fenc[0] + x + y*FENC_STRIDE;
+        int dc = h->pixf.sad[size]( fenc, FENC_STRIDE, zero, 0 ) >> 1;
+        res = h->pixf.satd[size]( fenc, FENC_STRIDE, zero, 0 ) - dc;
          h->mb.pic.fenc_satd_cache[cache_index] = res + 1;
          return res;
      }
@@ -116,10 +116,10 @@ static ALWAYS_INLINE int cached_satd( x264_t *h, int pixel, int x, int y )
  
  static inline int ssd_plane( x264_t *h, int size, int p, int x, int y )
  {
-    ALIGNED_16(static uint8_t zero[16]);
+    ALIGNED_16(static pixel zero[16]);
      int satd = 0;
-    uint8_t *fdec = h->mb.pic.p_fdec[p] + x + y*FDEC_STRIDE;
-    uint8_t *fenc = h->mb.pic.p_fenc[p] + x + y*FENC_STRIDE;
+    pixel *fdec = h->mb.pic.p_fdec[p] + x + y*FDEC_STRIDE;
+    pixel *fenc = h->mb.pic.p_fenc[p] + x + y*FENC_STRIDE;
      if( p == 0 && h->mb.i_psy_rd )
      {
          /* If the plane is smaller than 8x8, we can't do an SA8D; this probably isn't a big problem. */
diff --git a/encoder/slicetype.c b/encoder/slicetype.c

index ace997bf28b0b7ad3668c79748fd6d428d149a1b..ff5961e4686b36997f0213a75fee040f93d597af 100644 (file)
--- a/encoder/slicetype.c
+++ b/encoder/slicetype.c
@@ -67,7 +67,7 @@ static void x264_weight_get_h264( unsigned int weight_nonh264, int offset, x264_
      w->i_scale = X264_MIN( w->i_scale, 127 );
  }
  
-static NOINLINE uint8_t *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, uint8_t *dest )
+static NOINLINE pixel *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dest )
  {
      int ref0_distance = fenc->i_frame - ref->i_frame - 1;
      /* Note: this will never run during lookahead as weights_analyse is only called if no
@@ -78,7 +78,7 @@ static NOINLINE uint8_t *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fe
          int i_lines = fenc->i_lines_lowres;
          int i_width = fenc->i_width_lowres;
          int i_mb_xy = 0;
-        uint8_t *p = dest;
+        pixel *p = dest;
  
          for( int y = 0; y < i_lines; y += 8, p += i_stride*8 )
              for( int x = 0; x < i_width; x += 8, i_mb_xy++ )
@@ -95,14 +95,14 @@ static NOINLINE uint8_t *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fe
      return ref->lowres[0];
  }
  
-static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, uint8_t *src, x264_weight_t *w )
+static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, pixel *src, x264_weight_t *w )
  {
      unsigned int cost = 0;
      int i_stride = fenc->i_stride_lowres;
      int i_lines = fenc->i_lines_lowres;
      int i_width = fenc->i_width_lowres;
-    uint8_t *fenc_plane = fenc->lowres[0];
-    ALIGNED_ARRAY_8( uint8_t, buf,[8*8] );
+    pixel *fenc_plane = fenc->lowres[0];
+    ALIGNED_ARRAY_8( pixel, buf,[8*8] );
      int pixoff = 0;
      int i_mb = 0;
  
@@ -175,7 +175,7 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
          x264_lowres_context_init( h, &a );
          x264_slicetype_frame_cost( h, &a, &fenc, 0, 0, 0, 0 );
      }
-    uint8_t *mcbuf = x264_weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] );
+    pixel *mcbuf = x264_weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] );
      origscore = minscore = x264_weight_cost( h, fenc, mcbuf, 0 );
  
      if( !minscore )
@@ -211,8 +211,8 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
      if( weights[0].weightfn && b_lookahead )
      {
          //scale lowres in lookahead for slicetype_frame_cost
-        uint8_t *src = ref->buffer_lowres[0];
-        uint8_t *dst = h->mb.p_weight_buf[0];
+        pixel *src = ref->buffer_lowres[0];
+        pixel *dst = h->mb.p_weight_buf[0];
          int width = ref->i_width_lowres + PADH*2;
          int height = ref->i_lines_lowres + PADV*2;
          x264_weight_scale_plane( h, dst, ref->i_stride_lowres, src, ref->i_stride_lowres,
@@ -242,8 +242,8 @@ static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
                              i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1) ||
                              h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2;
  
-    ALIGNED_ARRAY_8( uint8_t, pix1,[9*FDEC_STRIDE] );
-    uint8_t *pix2 = pix1+8;
+    ALIGNED_ARRAY_8( pixel, pix1,[9*FDEC_STRIDE] );
+    pixel *pix2 = pix1+8;
      x264_me_t m[2];
      int i_bcost = COST_MAX;
      int list_used = 0;
@@ -289,14 +289,14 @@ static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
          { \
              int hpel_idx1 = (((mv0)[0]&2)>>1) + ((mv0)[1]&2); \
              int hpel_idx2 = (((mv1)[0]&2)>>1) + ((mv1)[1]&2); \
-            uint8_t *src1 = m[0].p_fref[hpel_idx1] + ((mv0)[0]>>2) + ((mv0)[1]>>2) * m[0].i_stride[0]; \
-            uint8_t *src2 = m[1].p_fref[hpel_idx2] + ((mv1)[0]>>2) + ((mv1)[1]>>2) * m[1].i_stride[0]; \
+            pixel *src1 = m[0].p_fref[hpel_idx1] + ((mv0)[0]>>2) + ((mv0)[1]>>2) * m[0].i_stride[0]; \
+            pixel *src2 = m[1].p_fref[hpel_idx2] + ((mv1)[0]>>2) + ((mv1)[1]>>2) * m[1].i_stride[0]; \
              h->mc.avg[PIXEL_8x8]( pix1, 16, src1, m[0].i_stride[0], src2, m[1].i_stride[0], i_bipred_weight ); \
          } \
          else \
          { \
              int stride1 = 16, stride2 = 16; \
-            uint8_t *src1, *src2; \
+            pixel *src1, *src2; \
              src1 = h->mc.get_ref( pix1, &stride1, m[0].p_fref, m[0].i_stride[0], \
                                    (mv0)[0], (mv0)[1], 8, 8, w ); \
              src2 = h->mc.get_ref( pix2, &stride2, m[1].p_fref, m[1].i_stride[0], \
@@ -415,13 +415,13 @@ skip_motionest:
  lowres_intra_mb:
      if( !fenc->b_intra_calculated )
      {
-        ALIGNED_ARRAY_16( uint8_t, edge,[33] );
-        uint8_t *pix = &pix1[8+FDEC_STRIDE - 1];
-        uint8_t *src = &fenc->lowres[0][i_pel_offset - 1];
+        ALIGNED_ARRAY_16( pixel, edge,[33] );
+        pixel *pix = &pix1[8+FDEC_STRIDE - 1];
+        pixel *src = &fenc->lowres[0][i_pel_offset - 1];
          const int intra_penalty = 5;
          int satds[3];
  
-        memcpy( pix-FDEC_STRIDE, src-i_stride, 17 );
+        memcpy( pix-FDEC_STRIDE, src-i_stride, 17 * sizeof(pixel) );
          for( int i = 0; i < 8; i++ )
              pix[i*FDEC_STRIDE] = src[i*i_stride];
          pix++;
diff --git a/tools/checkasm.c b/tools/checkasm.c

index c663bef3ebb3de4c1190297b77256ab79e8ecd9d..e60dcde4214c72d16651b89256191a98ade4f45d 100644 (file)
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -37,9 +37,11 @@
  #endif
  
  /* buf1, buf2: initialised to random data and shouldn't write into them */
-uint8_t * buf1, * buf2;
+uint8_t *buf1, *buf2;
  /* buf3, buf4: used to store output */
-uint8_t * buf3, * buf4;
+uint8_t *buf3, *buf4;
+/* pbuf*: point to the same memory as above, just for type convenience */
+pixel *pbuf1, *pbuf2, *pbuf3, *pbuf4;
  
  int quiet = 0;
  
@@ -241,7 +243,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
      x264_predict_t predict_4x4[9+3];
      x264_predict8x8_t predict_8x8[9+3];
      x264_predict_8x8_filter_t predict_8x8_filter;
-    ALIGNED_16( uint8_t edge[33] );
+    ALIGNED_16( pixel edge[33] );
      uint16_t cost_mv[32];
      int ret = 0, ok, used_asm;
  
@@ -252,7 +254,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
      x264_predict_8x8c_init( 0, predict_8x8c );
      x264_predict_8x8_init( 0, predict_8x8, &predict_8x8_filter );
      x264_predict_4x4_init( 0, predict_4x4 );
-    predict_8x8_filter( buf2+40, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );
+    predict_8x8_filter( pbuf2+40, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );
  
      // maximize sum
      for( int i = 0; i < 256; i++ )
@@ -277,8 +279,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
              used_asm = 1; \
              for( int j = 0; j < 64; j++ ) \
              { \
-                res_c   = call_c( pixel_c.name[i], buf1, 16, buf2+j*!align, 64 ); \
-                res_asm = call_a( pixel_asm.name[i], buf1, 16, buf2+j*!align, 64 ); \
+                res_c   = call_c( pixel_c.name[i], pbuf1, 16, pbuf2+j*!align, 64 ); \
+                res_asm = call_a( pixel_asm.name[i], pbuf1, 16, pbuf2+j*!align, 64 ); \
                  if( res_c != res_asm ) \
                  { \
                      ok = 0; \
@@ -288,8 +290,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
              } \
              for( int j = 0; j < 0x1000 && ok; j += 256 ) \
              { \
-                res_c   = pixel_c  .name[i]( buf3+j, 16, buf4+j, 16 ); \
-                res_asm = pixel_asm.name[i]( buf3+j, 16, buf4+j, 16 ); \
+                res_c   = pixel_c  .name[i]( pbuf3+j, 16, pbuf4+j, 16 ); \
+                res_asm = pixel_asm.name[i]( pbuf3+j, 16, pbuf4+j, 16 ); \
                  if( res_c != res_asm ) \
                  { \
                      ok = 0; \
@@ -317,17 +319,17 @@ static int check_pixel( int cpu_ref, int cpu_new )
              used_asm = 1; \
              for( int j = 0; j < 64; j++ ) \
              { \
-                uint8_t *pix2 = buf2+j; \
-                res_c[0] = pixel_c.sad[i]( buf1, 16, pix2, 64 ); \
-                res_c[1] = pixel_c.sad[i]( buf1, 16, pix2+6, 64 ); \
-                res_c[2] = pixel_c.sad[i]( buf1, 16, pix2+1, 64 ); \
+                pixel *pix2 = pbuf2+j; \
+                res_c[0] = pixel_c.sad[i]( pbuf1, 16, pix2, 64 ); \
+                res_c[1] = pixel_c.sad[i]( pbuf1, 16, pix2+6, 64 ); \
+                res_c[2] = pixel_c.sad[i]( pbuf1, 16, pix2+1, 64 ); \
                  if( N == 4 ) \
                  { \
-                    res_c[3] = pixel_c.sad[i]( buf1, 16, pix2+10, 64 ); \
-                    call_a( pixel_asm.sad_x4[i], buf1, pix2, pix2+6, pix2+1, pix2+10, 64, res_asm ); \
+                    res_c[3] = pixel_c.sad[i]( pbuf1, 16, pix2+10, 64 ); \
+                    call_a( pixel_asm.sad_x4[i], pbuf1, pix2, pix2+6, pix2+1, pix2+10, 64, res_asm ); \
                  } \
                  else \
-                    call_a( pixel_asm.sad_x3[i], buf1, pix2, pix2+6, pix2+1, 64, res_asm ); \
+                    call_a( pixel_asm.sad_x3[i], pbuf1, pix2, pix2+6, pix2+1, 64, res_asm ); \
                  if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
                  { \
                      ok = 0; \
@@ -336,9 +338,9 @@ static int check_pixel( int cpu_ref, int cpu_new )
                               res_asm[0], res_asm[1], res_asm[2], res_asm[3] ); \
                  } \
                  if( N == 4 ) \
-                    call_c2( pixel_c.sad_x4[i], buf1, pix2, pix2+6, pix2+1, pix2+10, 64, res_asm ); \
+                    call_c2( pixel_c.sad_x4[i], pbuf1, pix2, pix2+6, pix2+1, pix2+10, 64, res_asm ); \
                  else \
-                    call_c2( pixel_c.sad_x3[i], buf1, pix2, pix2+6, pix2+1, 64, res_asm ); \
+                    call_c2( pixel_c.sad_x3[i], pbuf1, pix2, pix2+6, pix2+1, 64, res_asm ); \
              } \
          } \
      } \
@@ -353,17 +355,17 @@ static int check_pixel( int cpu_ref, int cpu_new )
          set_func_name( "%s_%s", "var", pixel_names[i] ); \
          used_asm = 1; \
          /* abi-check wrapper can't return uint64_t, so separate it from return value check */ \
-        call_c1( pixel_c.var[i], buf1, 16 ); \
-        call_a1( pixel_asm.var[i], buf1, 16 ); \
-        uint64_t res_c   = pixel_c.var[i]( buf1, 16 ); \
-        uint64_t res_asm = pixel_asm.var[i]( buf1, 16 ); \
+        call_c1( pixel_c.var[i], pbuf1, 16 ); \
+        call_a1( pixel_asm.var[i], pbuf1, 16 ); \
+        uint64_t res_c   = pixel_c.var[i]( pbuf1, 16 ); \
+        uint64_t res_asm = pixel_asm.var[i]( pbuf1, 16 ); \
          if( res_c != res_asm ) \
          { \
              ok = 0; \
              fprintf( stderr, "var[%d]: %d %d != %d %d [FAILED]\n", i, (int)res_c, (int)(res_c>>32), (int)res_asm, (int)(res_asm>>32) ); \
          } \
-        call_c2( pixel_c.var[i], buf1, 16 ); \
-        call_a2( pixel_asm.var[i], buf1, 16 ); \
+        call_c2( pixel_c.var[i], pbuf1, 16 ); \
+        call_a2( pixel_asm.var[i], pbuf1, 16 ); \
      }
  
      ok = 1; used_asm = 0;
@@ -377,8 +379,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
          int res_c, res_asm, ssd_c, ssd_asm;
          set_func_name( "var2_8x8" );
          used_asm = 1;
-        res_c   = call_c( pixel_c.var2_8x8, buf1, 16, buf2, 16, &ssd_c );
-        res_asm = call_a( pixel_asm.var2_8x8, buf1, 16, buf2, 16, &ssd_asm );
+        res_c   = call_c( pixel_c.var2_8x8, pbuf1, 16, pbuf2, 16, &ssd_c );
+        res_asm = call_a( pixel_asm.var2_8x8, pbuf1, 16, pbuf2, 16, &ssd_asm );
          if( res_c != res_asm || ssd_c != ssd_asm )
          {
              ok = 0;
@@ -396,9 +398,9 @@ static int check_pixel( int cpu_ref, int cpu_new )
              used_asm = 1;
              for( int j = 0; j < 32; j++ )
              {
-                uint8_t *pix = (j&16 ? buf1 : buf3) + (j&15)*256;
-                call_c1( pixel_c.hadamard_ac[i], buf1, 16 );
-                call_a1( pixel_asm.hadamard_ac[i], buf1, 16 );
+                pixel *pix = (j&16 ? pbuf1 : pbuf3) + (j&15)*256;
+                call_c1( pixel_c.hadamard_ac[i], pbuf1, 16 );
+                call_a1( pixel_asm.hadamard_ac[i], pbuf1, 16 );
                  uint64_t rc = pixel_c.hadamard_ac[i]( pix, 16 );
                  uint64_t ra = pixel_asm.hadamard_ac[i]( pix, 16 );
                  if( rc != ra )
@@ -408,8 +410,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
                      break;
                  }
              }
-            call_c2( pixel_c.hadamard_ac[i], buf1, 16 );
-            call_a2( pixel_asm.hadamard_ac[i], buf1, 16 );
+            call_c2( pixel_c.hadamard_ac[i], pbuf1, 16 );
+            call_a2( pixel_asm.hadamard_ac[i], pbuf1, 16 );
          }
      report( "pixel hadamard_ac :" );
  
@@ -419,13 +421,13 @@ static int check_pixel( int cpu_ref, int cpu_new )
          int res_c[3], res_asm[3]; \
          set_func_name( #name ); \
          used_asm = 1; \
-        memcpy( buf3, buf2, 1024 ); \
+        memcpy( buf3, buf2, 1024 * sizeof(pixel) ); \
          for( int i = 0; i < 3; i++ ) \
          { \
-            pred[i]( buf3+48, ##__VA_ARGS__ ); \
-            res_c[i] = pixel_c.satd( buf1+48, 16, buf3+48, 32 ); \
+            pred[i]( pbuf3+48, ##__VA_ARGS__ ); \
+            res_c[i] = pixel_c.satd( pbuf1+48, 16, pbuf3+48, 32 ); \
          } \
-        call_a( pixel_asm.name, buf1+48, i8x8 ? edge : buf3+48, res_asm ); \
+        call_a( pixel_asm.name, pbuf1+48, i8x8 ? edge : pbuf3+48, res_asm ); \
          if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
          { \
              ok = 0; \
@@ -454,16 +456,16 @@ static int check_pixel( int cpu_ref, int cpu_new )
          ALIGNED_16( int sums[5][4] ) = {{0}};
          used_asm = ok = 1;
          x264_emms();
-        res_c = x264_pixel_ssim_wxh( &pixel_c,   buf1+2, 32, buf2+2, 32, 32, 28, buf3 );
-        res_a = x264_pixel_ssim_wxh( &pixel_asm, buf1+2, 32, buf2+2, 32, 32, 28, buf3 );
+        res_c = x264_pixel_ssim_wxh( &pixel_c,   pbuf1+2, 32, pbuf2+2, 32, 32, 28, pbuf3 );
+        res_a = x264_pixel_ssim_wxh( &pixel_asm, pbuf1+2, 32, pbuf2+2, 32, 32, 28, pbuf3 );
          if( fabs( res_c - res_a ) > 1e-6 )
          {
              ok = 0;
              fprintf( stderr, "ssim: %.7f != %.7f [FAILED]\n", res_c, res_a );
          }
          set_func_name( "ssim_core" );
-        call_c2( pixel_c.ssim_4x4x2_core,   buf1+2, 32, buf2+2, 32, sums );
-        call_a2( pixel_asm.ssim_4x4x2_core, buf1+2, 32, buf2+2, 32, sums );
+        call_c2( pixel_c.ssim_4x4x2_core,   pbuf1+2, 32, pbuf2+2, 32, sums );
+        call_a2( pixel_asm.ssim_4x4x2_core, pbuf1+2, 32, pbuf2+2, 32, sums );
          set_func_name( "ssim_end" );
          call_c2( pixel_c.ssim_end4,   sums, sums, 4 );
          call_a2( pixel_asm.ssim_end4, sums, sums, 4 );
@@ -542,8 +544,8 @@ static int check_dct( int cpu_ref, int cpu_new )
      { \
          set_func_name( #name ); \
          used_asm = 1; \
-        call_c( dct_c.name, t1, buf1, buf2 ); \
-        call_a( dct_asm.name, t2, buf1, buf2 ); \
+        call_c( dct_c.name, t1, pbuf1, pbuf2 ); \
+        call_a( dct_asm.name, t2, pbuf1, pbuf2 ); \
          if( memcmp( t1, t2, size ) ) \
          { \
              ok = 0; \
@@ -565,8 +567,8 @@ static int check_dct( int cpu_ref, int cpu_new )
  
      // fdct and idct are denormalized by different factors, so quant/dequant
      // is needed to force the coefs into the right range.
-    dct_c.sub16x16_dct( dct4, buf1, buf2 );
-    dct_c.sub16x16_dct8( dct8, buf1, buf2 );
+    dct_c.sub16x16_dct( dct4, pbuf1, pbuf2 );
+    dct_c.sub16x16_dct8( dct8, pbuf1, pbuf2 );
      for( int i = 0; i < 16; i++ )
      {
          qf.quant_4x4( dct4[i], h->quant4_mf[CQM_4IY][20], h->quant4_bias[CQM_4IY][20] );
@@ -583,19 +585,19 @@ static int check_dct( int cpu_ref, int cpu_new )
      { \
          set_func_name( #name ); \
          used_asm = 1; \
-        memcpy( buf3, buf1, 32*32 ); \
-        memcpy( buf4, buf1, 32*32 ); \
-        memcpy( dct1, src, 512 ); \
-        memcpy( dct2, src, 512 ); \
-        call_c1( dct_c.name, buf3, (void*)dct1 ); \
-        call_a1( dct_asm.name, buf4, (void*)dct2 ); \
-        if( memcmp( buf3, buf4, 32*32 ) ) \
+        memcpy( buf3, buf1, 32*32 * sizeof(pixel) ); \
+        memcpy( buf4, buf1, 32*32 * sizeof(pixel) ); \
+        memcpy( dct1, src, 512 * sizeof(pixel) ); \
+        memcpy( dct2, src, 512 * sizeof(pixel) ); \
+        call_c1( dct_c.name, pbuf3, (void*)dct1 ); \
+        call_a1( dct_asm.name, pbuf4, (void*)dct2 ); \
+        if( memcmp( buf3, buf4, 32*32 * sizeof(pixel) ) ) \
          { \
              ok = 0; \
              fprintf( stderr, #name " [FAILED]\n" ); \
          } \
-        call_c2( dct_c.name, buf3, (void*)dct1 ); \
-        call_a2( dct_asm.name, buf4, (void*)dct2 ); \
+        call_c2( dct_c.name, pbuf3, (void*)dct1 ); \
+        call_a2( dct_asm.name, pbuf4, (void*)dct2 ); \
      }
      ok = 1; used_asm = 0;
      TEST_IDCT( add4x4_idct, dct4 );
@@ -667,17 +669,17 @@ static int check_dct( int cpu_ref, int cpu_new )
          int nz_a, nz_c; \
          set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
          used_asm = 1; \
-        memcpy( buf3, buf1, 16*FDEC_STRIDE ); \
-        memcpy( buf4, buf1, 16*FDEC_STRIDE ); \
-        nz_c = call_c1( zigzag_c.name, t1, buf2, buf3 ); \
-        nz_a = call_a1( zigzag_asm.name, t2, buf2, buf4 ); \
+        memcpy( buf3, buf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
+        memcpy( buf4, buf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
+        nz_c = call_c1( zigzag_c.name, t1, pbuf2, pbuf3 ); \
+        nz_a = call_a1( zigzag_asm.name, t2, pbuf2, pbuf4 ); \
          if( memcmp( t1, t2, size*sizeof(int16_t) )|| memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a ) \
          { \
              ok = 0; \
              fprintf( stderr, #name " [FAILED]\n" ); \
          } \
-        call_c2( zigzag_c.name, t1, buf2, buf3 ); \
-        call_a2( zigzag_asm.name, t2, buf2, buf4 ); \
+        call_c2( zigzag_c.name, t1, pbuf2, pbuf3 ); \
+        call_a2( zigzag_asm.name, t2, pbuf2, pbuf4 ); \
      }
  
  #define TEST_ZIGZAG_SUBAC( name, t1, t2 ) \
@@ -689,24 +691,24 @@ static int check_dct( int cpu_ref, int cpu_new )
          used_asm = 1; \
          for( int i = 0; i < 2; i++ ) \
          { \
-            memcpy( buf3, buf2, 16*FDEC_STRIDE ); \
-            memcpy( buf4, buf2, 16*FDEC_STRIDE ); \
+            memcpy( buf3, buf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
+            memcpy( buf4, buf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
              for( int j = 0; j < 4; j++ ) \
              { \
-                memcpy( buf3 + j*FDEC_STRIDE, (i?buf1:buf2) + j*FENC_STRIDE, 4 ); \
-                memcpy( buf4 + j*FDEC_STRIDE, (i?buf1:buf2) + j*FENC_STRIDE, 4 ); \
+                memcpy( pbuf3 + j*FDEC_STRIDE, (i?pbuf1:pbuf2) + j*FENC_STRIDE, 4 * sizeof(pixel) ); \
+                memcpy( pbuf4 + j*FDEC_STRIDE, (i?pbuf1:pbuf2) + j*FENC_STRIDE, 4 * sizeof(pixel) ); \
              } \
-            nz_c = call_c1( zigzag_c.name, t1, buf2, buf3, &dc_c ); \
-            nz_a = call_a1( zigzag_asm.name, t2, buf2, buf4, &dc_a ); \
-            if( memcmp( t1+1, t2+1, 15*sizeof(int16_t) ) || memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a || dc_c != dc_a ) \
+            nz_c = call_c1( zigzag_c.name, t1, pbuf2, pbuf3, &dc_c ); \
+            nz_a = call_a1( zigzag_asm.name, t2, pbuf2, pbuf4, &dc_a ); \
+            if( memcmp( t1+1, t2+1, 15*sizeof(int16_t) ) || memcmp( buf3, buf4, 16*FDEC_STRIDE * sizeof(pixel) ) || nz_c != nz_a || dc_c != dc_a ) \
              { \
                  ok = 0; \
                  fprintf( stderr, #name " [FAILED]\n" ); \
                  break; \
              } \
          } \
-        call_c2( zigzag_c.name, t1, buf2, buf3, &dc_c ); \
-        call_a2( zigzag_asm.name, t2, buf2, buf4, &dc_a ); \
+        call_c2( zigzag_c.name, t1, pbuf2, pbuf3, &dc_c ); \
+        call_a2( zigzag_asm.name, t2, pbuf2, pbuf4, &dc_a ); \
      }
  
  #define TEST_INTERLEAVE( name, t1, t2, dct, size ) \
@@ -719,10 +721,10 @@ static int check_dct( int cpu_ref, int cpu_new )
              memcpy(dct, buf1, size*sizeof(int16_t)); \
              for( int i = 0; i < size; i++ ) \
                  dct[i] = rand()&0x1F ? 0 : dct[i]; \
-            memcpy(buf3, buf4, 10*sizeof(uint8_t)); \
+            memcpy(buf3, buf4, 10); \
              call_c( zigzag_c.name, t1, dct, buf3 ); \
              call_a( zigzag_asm.name, t2, dct, buf4 ); \
-            if( memcmp( t1, t2, size*sizeof(int16_t) ) || memcmp( buf3, buf4, 10*sizeof(uint8_t) ) ) \
+            if( memcmp( t1, t2, size*sizeof(int16_t) ) || memcmp( buf3, buf4, 10 ) ) \
              { \
                  ok = 0; \
              } \
@@ -767,20 +769,20 @@ static int check_mc( int cpu_ref, int cpu_new )
      x264_mc_functions_t mc_c;
      x264_mc_functions_t mc_ref;
      x264_mc_functions_t mc_a;
-    x264_pixel_function_t pixel;
+    x264_pixel_function_t pixf;
  
-    uint8_t *src     = &buf1[2*64+2];
-    uint8_t *src2[4] = { &buf1[3*64+2], &buf1[5*64+2],
-                         &buf1[7*64+2], &buf1[9*64+2] };
-    uint8_t *dst1    = buf3;
-    uint8_t *dst2    = buf4;
+    pixel *src     = &(pbuf1)[2*64+2];
+    pixel *src2[4] = { &(pbuf1)[3*64+2], &(pbuf1)[5*64+2],
+                       &(pbuf1)[7*64+2], &(pbuf1)[9*64+2] };
+    pixel *dst1    = pbuf3;
+    pixel *dst2    = pbuf4;
  
      int ret = 0, ok, used_asm;
  
      x264_mc_init( 0, &mc_c );
      x264_mc_init( cpu_ref, &mc_ref );
      x264_mc_init( cpu_new, &mc_a );
-    x264_pixel_init( 0, &pixel );
+    x264_pixel_init( 0, &pixf );
  
  #define MC_TEST_LUMA( w, h ) \
          if( mc_a.mc_luma != mc_ref.mc_luma && !(w&(w-1)) && h<=16 ) \
@@ -800,7 +802,7 @@ static int check_mc( int cpu_ref, int cpu_new )
          } \
          if( mc_a.get_ref != mc_ref.get_ref ) \
          { \
-            uint8_t *ref = dst2; \
+            pixel *ref = dst2; \
              int ref_stride = 32; \
              const x264_weight_t *weight = weight_none; \
              set_func_name( "get_ref_%dx%d", w, h ); \
@@ -808,9 +810,9 @@ static int check_mc( int cpu_ref, int cpu_new )
              memset( buf3, 0xCD, 1024 ); \
              memset( buf4, 0xCD, 1024 ); \
              call_c( mc_c.mc_luma, dst1, 32, src2, 64, dx, dy, w, h, weight ); \
-            ref = (uint8_t*) call_a( mc_a.get_ref, ref, &ref_stride, src2, 64, dx, dy, w, h, weight ); \
+            ref = (pixel*)call_a( mc_a.get_ref, ref, &ref_stride, src2, 64, dx, dy, w, h, weight ); \
              for( int i = 0; i < h; i++ ) \
-                if( memcmp( dst1+i*32, ref+i*ref_stride, w ) ) \
+                if( memcmp( dst1+i*32, ref+i*ref_stride, w * sizeof(pixel) ) ) \
                  { \
                      fprintf( stderr, "get_ref[mv(%d,%d) %2dx%-2d]     [FAILED]\n", dx, dy, w, h ); \
                      ok = 0; \
@@ -876,21 +878,21 @@ static int check_mc( int cpu_ref, int cpu_new )
      ok = 1, used_asm = 0; \
      for( int i = 0; i < 10; i++ ) \
      { \
-        memcpy( buf3, buf1+320, 320 ); \
-        memcpy( buf4, buf1+320, 320 ); \
+        memcpy( buf3, pbuf1+320, 320 * sizeof(pixel) ); \
+        memcpy( buf4, pbuf1+320, 320 * sizeof(pixel) ); \
          if( mc_a.name[i] != mc_ref.name[i] ) \
          { \
              set_func_name( "%s_%s", #name, pixel_names[i] ); \
              used_asm = 1; \
-            call_c1( mc_c.name[i], buf3, 16, buf2+1, 16, buf1+18, 16, weight ); \
-            call_a1( mc_a.name[i], buf4, 16, buf2+1, 16, buf1+18, 16, weight ); \
-            if( memcmp( buf3, buf4, 320 ) ) \
+            call_c1( mc_c.name[i], pbuf3, 16, pbuf2+1, 16, pbuf1+18, 16, weight ); \
+            call_a1( mc_a.name[i], pbuf4, 16, pbuf2+1, 16, pbuf1+18, 16, weight ); \
+            if( memcmp( buf3, buf4, 320 * sizeof(pixel) ) ) \
              { \
                  ok = 0; \
                  fprintf( stderr, #name "[%d]: [FAILED]\n", i ); \
              } \
-            call_c2( mc_c.name[i], buf3, 16, buf2+1, 16, buf1+18, 16, weight ); \
-            call_a2( mc_a.name[i], buf4, 16, buf2+1, 16, buf1+18, 16, weight ); \
+            call_c2( mc_c.name[i], pbuf3, 16, pbuf2+1, 16, pbuf1+18, 16, weight ); \
+            call_a2( mc_a.name[i], pbuf4, 16, pbuf2+1, 16, pbuf1+18, 16, weight ); \
          } \
      } \
  }
@@ -904,11 +906,11 @@ static int check_mc( int cpu_ref, int cpu_new )
      ok = 1, used_asm = 0; \
      for( int i = 1; i <= 5; i++ ) \
      { \
-        ALIGNED_16( uint8_t buffC[640] ); \
-        ALIGNED_16( uint8_t buffA[640] ); \
+        ALIGNED_16( pixel buffC[640] ); \
+        ALIGNED_16( pixel buffA[640] ); \
          int j = X264_MAX( i*4, 2 ); \
-        memset( buffC, 0, 640 ); \
-        memset( buffA, 0, 640 ); \
+        memset( buffC, 0, 640 * sizeof(pixel) ); \
+        memset( buffA, 0, 640 * sizeof(pixel) ); \
          x264_t ha; \
          ha.mc = mc_a; \
          /* w12 is the same as w16 in some cases */ \
@@ -918,18 +920,18 @@ static int check_mc( int cpu_ref, int cpu_new )
          { \
              set_func_name( "%s_w%d", #name, j ); \
              used_asm = 1; \
-            call_c1( mc_c.weight[i], buffC, 32, buf2+align_off, 32, &weight, 16 ); \
+            call_c1( mc_c.weight[i], buffC, 32, pbuf2+align_off, 32, &weight, 16 ); \
              mc_a.weight_cache(&ha, &weight); \
-            call_a1( weight.weightfn[i], buffA, 32, buf2+align_off, 32, &weight, 16 ); \
+            call_a1( weight.weightfn[i], buffA, 32, pbuf2+align_off, 32, &weight, 16 ); \
              for( int k = 0; k < 16; k++ ) \
-                if( memcmp( &buffC[k*32], &buffA[k*32], j ) ) \
+                if( memcmp( &buffC[k*32], &buffA[k*32], j * sizeof(pixel) ) ) \
                  { \
                      ok = 0; \
                      fprintf( stderr, #name "[%d]: [FAILED] s:%d o:%d d%d\n", i, s, o, d ); \
                      break; \
                  } \
-            call_c2( mc_c.weight[i], buffC, 32, buf2+align_off, 32, &weight, 16 ); \
-            call_a2( weight.weightfn[i], buffA, 32, buf2+align_off, 32, &weight, 16 ); \
+            call_c2( mc_c.weight[i], buffC, 32, pbuf2+align_off, 32, &weight, 16 ); \
+            call_a2( weight.weightfn[i], buffA, 32, pbuf2+align_off, 32, &weight, 16 ); \
          } \
      }
  
@@ -974,20 +976,20 @@ static int check_mc( int cpu_ref, int cpu_new )
  
      if( mc_a.hpel_filter != mc_ref.hpel_filter )
      {
-        uint8_t *srchpel = buf1+8+2*64;
-        uint8_t *dstc[3] = { buf3+8, buf3+8+16*64, buf3+8+32*64 };
-        uint8_t *dsta[3] = { buf4+8, buf4+8+16*64, buf4+8+32*64 };
-        void *tmp = buf3+49*64;
+        pixel *srchpel = pbuf1+8+2*64;
+        pixel *dstc[3] = { pbuf3+8, pbuf3+8+16*64, pbuf3+8+32*64 };
+        pixel *dsta[3] = { pbuf4+8, pbuf4+8+16*64, pbuf4+8+32*64 };
+        void *tmp = pbuf3+49*64;
          set_func_name( "hpel_filter" );
          ok = 1; used_asm = 1;
-        memset( buf3, 0, 4096 );
-        memset( buf4, 0, 4096 );
+        memset( buf3, 0, 4096 * sizeof(pixel) );
+        memset( buf4, 0, 4096 * sizeof(pixel) );
          call_c( mc_c.hpel_filter, dstc[0], dstc[1], dstc[2], srchpel, 64, 48, 10, tmp );
          call_a( mc_a.hpel_filter, dsta[0], dsta[1], dsta[2], srchpel, 64, 48, 10, tmp );
          for( int i = 0; i < 3; i++ )
              for( int j = 0; j < 10; j++ )
                  //FIXME ideally the first pixels would match too, but they aren't actually used
-                if( memcmp( dstc[i]+j*64+2, dsta[i]+j*64+2, 43 ) )
+                if( memcmp( dstc[i]+j*64+2, dsta[i]+j*64+2, 43 * sizeof(pixel) ) )
                  {
                      ok = 0;
                      fprintf( stderr, "hpel filter differs at plane %c line %d\n", "hvc"[i], j );
@@ -1004,19 +1006,19 @@ static int check_mc( int cpu_ref, int cpu_new )
  
      if( mc_a.frame_init_lowres_core != mc_ref.frame_init_lowres_core )
      {
-        uint8_t *dstc[4] = { buf3, buf3+1024, buf3+2048, buf3+3072 };
-        uint8_t *dsta[4] = { buf4, buf4+1024, buf4+2048, buf4+3072 };
+        pixel *dstc[4] = { pbuf3, pbuf3+1024, pbuf3+2048, pbuf3+3072 };
+        pixel *dsta[4] = { pbuf4, pbuf4+1024, pbuf4+2048, pbuf4+3072 };
          set_func_name( "lowres_init" );
          ok = 1; used_asm = 1;
          for( int w = 40; w <= 48; w += 8 )
          {
              int stride = (w+8)&~15;
-            call_c( mc_c.frame_init_lowres_core, buf1, dstc[0], dstc[1], dstc[2], dstc[3], w*2, stride, w, 16 );
-            call_a( mc_a.frame_init_lowres_core, buf1, dsta[0], dsta[1], dsta[2], dsta[3], w*2, stride, w, 16 );
+            call_c( mc_c.frame_init_lowres_core, pbuf1, dstc[0], dstc[1], dstc[2], dstc[3], w*2, stride, w, 16 );
+            call_a( mc_a.frame_init_lowres_core, pbuf1, dsta[0], dsta[1], dsta[2], dsta[3], w*2, stride, w, 16 );
              for( int i = 0; i < 16; i++ )
              {
                  for( int j = 0; j < 4; j++ )
-                    if( memcmp( dstc[j]+i*stride, dsta[j]+i*stride, w ) )
+                    if( memcmp( dstc[j]+i*stride, dsta[j]+i*stride, w * sizeof(pixel) ) )
                      {
                          ok = 0;
                          fprintf( stderr, "frame_init_lowres differs at plane %d line %d\n", j, i );
@@ -1039,21 +1041,21 @@ static int check_mc( int cpu_ref, int cpu_new )
          int stride = 80;\
          set_func_name( #name );\
          used_asm = 1;\
-        memcpy( buf3, buf1, size*2*stride );\
-        memcpy( buf4, buf1, size*2*stride );\
+        memcpy( buf3, buf1, size*2*stride * sizeof(pixel) );\
+        memcpy( buf4, buf1, size*2*stride * sizeof(pixel) );\
          uint16_t *sum = (uint16_t*)buf3;\
          call_c1( mc_c.name, __VA_ARGS__ );\
          sum = (uint16_t*)buf4;\
          call_a1( mc_a.name, __VA_ARGS__ );\
-        if( memcmp( buf3, buf4, (stride-8)*2 )\
-            || (size>9 && memcmp( buf3+18*stride, buf4+18*stride, (stride-8)*2 )))\
+        if( memcmp( buf3, buf4, (stride-8)*2 * sizeof(pixel) )\
+            || (size>9 && memcmp( pbuf3+18*stride, pbuf4+18*stride, (stride-8)*2 * sizeof(pixel) )))\
              ok = 0;\
          call_c2( mc_c.name, __VA_ARGS__ );\
          call_a2( mc_a.name, __VA_ARGS__ );\
      }
      ok = 1; used_asm = 0;
-    INTEGRAL_INIT( integral_init4h, 2, sum+stride, buf2, stride );
-    INTEGRAL_INIT( integral_init8h, 2, sum+stride, buf2, stride );
+    INTEGRAL_INIT( integral_init4h, 2, sum+stride, pbuf2, stride );
+    INTEGRAL_INIT( integral_init8h, 2, sum+stride, pbuf2, stride );
      INTEGRAL_INIT( integral_init4v, 14, sum, sum+9*stride, stride );
      INTEGRAL_INIT( integral_init8v, 9, sum, stride );
      report( "integral init :" );
@@ -1121,21 +1123,21 @@ static int check_deblock( int cpu_ref, int cpu_new )
          for( int j = 0; j < 1024; j++ ) \
              /* two distributions of random to excersize different failure modes */ \
              buf3[j] = rand() & (i&1 ? 0xf : 0xff ); \
-        memcpy( buf4, buf3, 1024 ); \
+        memcpy( buf4, buf3, 1024 * sizeof(pixel) ); \
          if( db_a.name != db_ref.name ) \
          { \
              set_func_name( #name ); \
              used_asm = 1; \
-            call_c1( db_c.name, buf3+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
-            call_a1( db_a.name, buf4+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
-            if( memcmp( buf3, buf4, 1024 ) ) \
+            call_c1( db_c.name, pbuf3+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
+            call_a1( db_a.name, pbuf4+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
+            if( memcmp( buf3, buf4, 1024 * sizeof(pixel) ) ) \
              { \
                  ok = 0; \
                  fprintf( stderr, #name "(a=%d, b=%d): [FAILED]\n", alphas[i], betas[i] ); \
                  break; \
              } \
-            call_c2( db_c.name, buf3+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
-            call_a2( db_a.name, buf4+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
+            call_c2( db_c.name, pbuf3+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
+            call_a2( db_a.name, pbuf4+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
          } \
      }
  
@@ -1505,8 +1507,8 @@ static int check_quant( int cpu_ref, int cpu_new )
  static int check_intra( int cpu_ref, int cpu_new )
  {
      int ret = 0, ok = 1, used_asm = 0;
-    ALIGNED_16( uint8_t edge[33] );
-    ALIGNED_16( uint8_t edge2[33] );
+    ALIGNED_16( pixel edge[33] );
+    ALIGNED_16( pixel edge2[33] );
      struct
      {
          x264_predict_t      predict_16x16[4+3];
@@ -1531,18 +1533,18 @@ static int check_intra( int cpu_ref, int cpu_new )
      x264_predict_8x8_init( cpu_new, ip_a.predict_8x8, &ip_a.predict_8x8_filter );
      x264_predict_4x4_init( cpu_new, ip_a.predict_4x4 );
  
-    ip_c.predict_8x8_filter( buf1+48, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );
+    ip_c.predict_8x8_filter( pbuf1+48, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );
  
  #define INTRA_TEST( name, dir, w, ... )\
      if( ip_a.name[dir] != ip_ref.name[dir] )\
      {\
          set_func_name( "intra_%s_%s", #name, intra_##name##_names[dir] );\
          used_asm = 1;\
-        memcpy( buf3, buf1, 32*20 );\
-        memcpy( buf4, buf1, 32*20 );\
-        call_c( ip_c.name[dir], buf3+48, ##__VA_ARGS__ );\
-        call_a( ip_a.name[dir], buf4+48, ##__VA_ARGS__ );\
-        if( memcmp( buf3, buf4, 32*20 ) )\
+        memcpy( buf3, buf1, 32*20 * sizeof(pixel) );\
+        memcpy( buf4, buf1, 32*20 * sizeof(pixel) );\
+        call_c( ip_c.name[dir], pbuf3+48, ##__VA_ARGS__ );\
+        call_a( ip_a.name[dir], pbuf4+48, ##__VA_ARGS__ );\
+        if( memcmp( buf3, buf4, 32*20 * sizeof(pixel) ) )\
          {\
              fprintf( stderr, #name "[%d] :  [FAILED]\n", dir );\
              ok = 0;\
@@ -1582,10 +1584,10 @@ static int check_intra( int cpu_ref, int cpu_new )
          used_asm = 1;
          for( int i = 0; i < 32; i++ )
          {
-            memcpy( edge2, edge, 33 );
-            call_c(ip_c.predict_8x8_filter, buf1+48, edge, (i&24)>>1, i&7);
-            call_a(ip_a.predict_8x8_filter, buf1+48, edge2, (i&24)>>1, i&7);
-            if( memcmp( edge, edge2, 33 ) )
+            memcpy( edge2, edge, 33 * sizeof(pixel) );
+            call_c(ip_c.predict_8x8_filter, pbuf1+48, edge, (i&24)>>1, i&7);
+            call_a(ip_a.predict_8x8_filter, pbuf1+48, edge2, (i&24)>>1, i&7);
+            if( memcmp( edge, edge2, 33 * sizeof(pixel) ) )
              {
                  fprintf( stderr, "predict_8x8_filter :  [FAILED] %d %d\n", (i&24)>>1, i&7);
                  ok = 0;
@@ -1846,9 +1848,15 @@ int main(int argc, char *argv[])
          fprintf( stderr, "malloc failed, unable to initiate tests!\n" );
          return -1;
      }
-    buf2 = buf1 + 0xf00;
-    buf3 = buf2 + 0xf00;
-    buf4 = buf3 + 0x1000;
+#define INIT_POINTER_OFFSETS\
+    buf2 = buf1 + 0xf00;\
+    buf3 = buf2 + 0xf00;\
+    buf4 = buf3 + 0x1000;\
+    pbuf1 = (pixel*)buf1;\
+    pbuf2 = (pixel*)buf2;\
+    pbuf3 = (pixel*)buf3;\
+    pbuf4 = (pixel*)buf4;
+    INIT_POINTER_OFFSETS;
      for( int i = 0; i < 0x1e00; i++ )
          buf1[i] = rand() & 0xFF;
      memset( buf1+0x1e00, 0, 0x2000 );
@@ -1857,9 +1865,7 @@ int main(int argc, char *argv[])
      if( do_bench )
          for( int i = 0; i < BENCH_ALIGNS && !ret; i++ )
          {
-            buf2 = buf1 + 0xf00;
-            buf3 = buf2 + 0xf00;
-            buf4 = buf3 + 0x1000;
+            INIT_POINTER_OFFSETS;
              ret |= x264_stack_pagealign( check_all_flags, i*16 );
              buf1 += 16;
              quiet = 1;
author	Oskar Arvidsson <oskar@irock.se>
	Tue, 1 Jun 2010 23:35:38 +0000 (01:35 +0200)
committer	Fiona Glaser <fiona@x264.com>
	Wed, 2 Jun 2010 05:18:26 +0000 (22:18 -0700)
common/common.h		patch \| blob \| history
common/dct.c		patch \| blob \| history
common/dct.h		patch \| blob \| history
common/deblock.c		patch \| blob \| history
common/frame.c		patch \| blob \| history
common/frame.h		patch \| blob \| history
common/macroblock.c		patch \| blob \| history
common/macroblock.h		patch \| blob \| history
common/mc.c		patch \| blob \| history
common/mc.h		patch \| blob \| history
common/pixel.c		patch \| blob \| history
common/pixel.h		patch \| blob \| history
common/predict.c		patch \| blob \| history
common/predict.h		patch \| blob \| history
common/visualize.c		patch \| blob \| history
encoder/analyse.c		patch \| blob \| history
encoder/encoder.c		patch \| blob \| history
encoder/macroblock.c		patch \| blob \| history
encoder/macroblock.h		patch \| blob \| history
encoder/me.c		patch \| blob \| history
encoder/me.h		patch \| blob \| history
encoder/rdo.c		patch \| blob \| history
encoder/slicetype.c		patch \| blob \| history
tools/checkasm.c		patch \| blob \| history