GSOC merge part 2: ARM stack alignment

author David Conrad <lessen42@gmail.com>

Sun, 23 Aug 2009 06:40:33 +0000 (23:40 -0700)

committer Fiona Glaser <fiona@x264.com>

Sun, 23 Aug 2009 06:40:33 +0000 (23:40 -0700)
author David Conrad <lessen42@gmail.com>
Sun, 23 Aug 2009 06:40:33 +0000 (23:40 -0700)
committer Fiona Glaser <fiona@x264.com>
Sun, 23 Aug 2009 06:40:33 +0000 (23:40 -0700)
diff --git a/common/cabac.h b/common/cabac.h

index 9d0fddd2899038687f2882bc86e0f4332de111a5..35871b49ae6343aaee581b786fb3eb2998a49866 100644 (file)
--- a/common/cabac.h
+++ b/common/cabac.h
@@ -39,7 +39,7 @@ typedef struct
      uint8_t *p_end;
  
      /* aligned for memcpy_aligned starting here */
-    DECLARE_ALIGNED_16( int f8_bits_encoded ); // only if using x264_cabac_size_decision()
+    ALIGNED_16( int f8_bits_encoded ); // only if using x264_cabac_size_decision()
  
      /* context */
      uint8_t state[460];
diff --git a/common/common.h b/common/common.h

index 166ced26653312263f0e0950c3ae78f00a9933d1..f78a35c1d5d73f4d577d87d49d777bed20772fce 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -359,8 +359,8 @@ struct x264_t
  
      const uint8_t   *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */
  
-    DECLARE_ALIGNED_16( uint32_t nr_residual_sum[2][64] );
-    DECLARE_ALIGNED_16( uint16_t nr_offset[2][64] );
+    ALIGNED_16( uint32_t nr_residual_sum[2][64] );
+    ALIGNED_16( uint16_t nr_offset[2][64] );
      uint32_t        nr_count[2];
  
      /* Slice header */
@@ -413,11 +413,11 @@ struct x264_t
      /* Current MB DCT coeffs */
      struct
      {
-        DECLARE_ALIGNED_16( int16_t luma16x16_dc[16] );
-        DECLARE_ALIGNED_16( int16_t chroma_dc[2][4] );
+        ALIGNED_16( int16_t luma16x16_dc[16] );
+        ALIGNED_16( int16_t chroma_dc[2][4] );
          // FIXME share memory?
-        DECLARE_ALIGNED_16( int16_t luma8x8[4][64] );
-        DECLARE_ALIGNED_16( int16_t luma4x4[16+8][16] );
+        ALIGNED_16( int16_t luma8x8[4][64] );
+        ALIGNED_16( int16_t luma4x4[16+8][16] );
      } dct;
  
      /* MB table and cache for current frame/mb */
@@ -494,7 +494,7 @@ struct x264_t
          /* current value */
          int     i_type;
          int     i_partition;
-        DECLARE_ALIGNED_4( uint8_t i_sub_partition[4] );
+        ALIGNED_4( uint8_t i_sub_partition[4] );
          int     b_transform_8x8;
  
          int     i_cbp_luma;
@@ -517,22 +517,22 @@ struct x264_t
              /* space for p_fenc and p_fdec */
  #define FENC_STRIDE 16
  #define FDEC_STRIDE 32
-            DECLARE_ALIGNED_16( uint8_t fenc_buf[24*FENC_STRIDE] );
-            DECLARE_ALIGNED_16( uint8_t fdec_buf[27*FDEC_STRIDE] );
+            ALIGNED_16( uint8_t fenc_buf[24*FENC_STRIDE] );
+            ALIGNED_16( uint8_t fdec_buf[27*FDEC_STRIDE] );
  
              /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */
-            DECLARE_ALIGNED_16( uint8_t i4x4_fdec_buf[16*16] );
-            DECLARE_ALIGNED_16( uint8_t i8x8_fdec_buf[16*16] );
-            DECLARE_ALIGNED_16( int16_t i8x8_dct_buf[3][64] );
-            DECLARE_ALIGNED_16( int16_t i4x4_dct_buf[15][16] );
+            ALIGNED_16( uint8_t i4x4_fdec_buf[16*16] );
+            ALIGNED_16( uint8_t i8x8_fdec_buf[16*16] );
+            ALIGNED_16( int16_t i8x8_dct_buf[3][64] );
+            ALIGNED_16( int16_t i4x4_dct_buf[15][16] );
              uint32_t i4x4_nnz_buf[4];
              uint32_t i8x8_nnz_buf[4];
              int i4x4_cbp;
              int i8x8_cbp;
  
              /* Psy trellis DCT data */
-            DECLARE_ALIGNED_16( int16_t fenc_dct8[4][64] );
-            DECLARE_ALIGNED_16( int16_t fenc_dct4[16][16] );
+            ALIGNED_16( int16_t fenc_dct8[4][64] );
+            ALIGNED_16( int16_t fenc_dct4[16][16] );
  
              /* Psy RD SATD scores */
              int fenc_satd[4][4];
@@ -567,18 +567,18 @@ struct x264_t
              uint8_t non_zero_count[X264_SCAN8_SIZE];
  
              /* -1 if unused, -2 if unavailable */
-            DECLARE_ALIGNED_4( int8_t ref[2][X264_SCAN8_SIZE] );
+            ALIGNED_4( int8_t ref[2][X264_SCAN8_SIZE] );
  
              /* 0 if not available */
-            DECLARE_ALIGNED_16( int16_t mv[2][X264_SCAN8_SIZE][2] );
-            DECLARE_ALIGNED_8( int16_t mvd[2][X264_SCAN8_SIZE][2] );
+            ALIGNED_16( int16_t mv[2][X264_SCAN8_SIZE][2] );
+            ALIGNED_8( int16_t mvd[2][X264_SCAN8_SIZE][2] );
  
              /* 1 if SKIP or DIRECT. set only for B-frames + CABAC */
-            DECLARE_ALIGNED_4( int8_t skip[X264_SCAN8_SIZE] );
+            ALIGNED_4( int8_t skip[X264_SCAN8_SIZE] );
  
-            DECLARE_ALIGNED_16( int16_t direct_mv[2][X264_SCAN8_SIZE][2] );
-            DECLARE_ALIGNED_4( int8_t  direct_ref[2][X264_SCAN8_SIZE] );
-            DECLARE_ALIGNED_4( int16_t pskip_mv[2] );
+            ALIGNED_16( int16_t direct_mv[2][X264_SCAN8_SIZE][2] );
+            ALIGNED_4( int8_t  direct_ref[2][X264_SCAN8_SIZE] );
+            ALIGNED_4( int16_t pskip_mv[2] );
  
              /* number of neighbors (top and left) that used 8x8 dct */
              int     i_neighbour_transform_size;
diff --git a/common/frame.c b/common/frame.c

index d13cc7429ea9df5438c3e969f90c91b9ede8a004..18ddbc1815e7df44bef7c2519678fc27d4dae599 100644 (file)
--- a/common/frame.c
+++ b/common/frame.c
@@ -764,7 +764,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
          {\
              int i_edge = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));\
              int i_qpn, i, mbn_xy, mbn_8x8, mbn_4x4;\
-            DECLARE_ALIGNED_4( uint8_t bS[4] );  /* filtering strength */\
+            ALIGNED_4( uint8_t bS[4] );  /* filtering strength */\
              if( i_edge )\
                  i_edge+= b_8x8_transform;\
              else\
diff --git a/common/macroblock.c b/common/macroblock.c

index 4b282285843931105d197a3f5b0d55ac1e793168..f8b20c793e670e03c6ba1e03162c938c9bc7bee0 100644 (file)
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -221,7 +221,7 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
  static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
  {
      int ref[2];
-    DECLARE_ALIGNED_8( int16_t mv[2][2] );
+    ALIGNED_8( int16_t mv[2][2] );
      int i_list;
      int i8;
      const int8_t *l1ref0 = &h->fref1[0]->ref[0][ h->mb.i_b8_xy ];
@@ -520,8 +520,8 @@ static inline void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int he
      int       mvy1   = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
      int       i_mode = x264_size2pixel[height][width];
      int       i_stride0 = 16, i_stride1 = 16;
-    DECLARE_ALIGNED_16( uint8_t tmp0[16*16] );
-    DECLARE_ALIGNED_16( uint8_t tmp1[16*16] );
+    ALIGNED_ARRAY_16( uint8_t, tmp0,[16*16] );
+    ALIGNED_ARRAY_16( uint8_t, tmp1,[16*16] );
      uint8_t *src0, *src1;
  
      src0 = h->mc.get_ref( tmp0, &i_stride0, h->mb.pic.p_fref[0][i_ref0], h->mb.pic.i_stride[0],
diff --git a/common/osdep.h b/common/osdep.h

index a691d06dcd293dcbcca77d73ca38ab0ffb3fff9f..696bbc9d015a602ee81de8abceffdf79f5b3b29e 100644 (file)
--- a/common/osdep.h
+++ b/common/osdep.h
@@ -67,9 +67,20 @@
  #else
  #define DECLARE_ALIGNED( var, n ) var __attribute__((aligned(n)))
  #endif
-#define DECLARE_ALIGNED_16( var ) DECLARE_ALIGNED( var, 16 )
-#define DECLARE_ALIGNED_8( var )  DECLARE_ALIGNED( var, 8 )
-#define DECLARE_ALIGNED_4( var )  DECLARE_ALIGNED( var, 4 )
+#define ALIGNED_16( var ) DECLARE_ALIGNED( var, 16 )
+#define ALIGNED_8( var )  DECLARE_ALIGNED( var, 8 )
+#define ALIGNED_4( var )  DECLARE_ALIGNED( var, 4 )
+
+// current arm compilers only maintain 8-byte stack alignment
+// and cannot align stack variables to more than 8-bytes
+#ifdef ARCH_ARM
+#define ALIGNED_ARRAY_16( type, name, sub1, ... )\
+    ALIGNED_8( uint8_t name##_8 [sizeof(type sub1 __VA_ARGS__) + 8] );\
+    type (*name) __VA_ARGS__ = (void*)(name##_8 + ((intptr_t)name##_8 & 8))
+#else
+#define ALIGNED_ARRAY_16( type, name, sub1, ... )\
+    ALIGNED_16( type name sub1 __VA_ARGS__ )
+#endif
  
  #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
  #define UNUSED __attribute__((unused))
diff --git a/common/ppc/deblock.c b/common/ppc/deblock.c

index 35318120a961b4f1a297e22f1dcd0d4174a614b2..14171e615c15c0db8c8c3be21be7bde5981db76f 100644 (file)
--- a/common/ppc/deblock.c
+++ b/common/ppc/deblock.c
@@ -41,7 +41,7 @@
  static inline void write16x4(uint8_t *dst, int dst_stride,
                               register vec_u8_t r0, register vec_u8_t r1,
                               register vec_u8_t r2, register vec_u8_t r3) {
-    DECLARE_ALIGNED_16(unsigned char result[64]);
+    ALIGNED_16(unsigned char result[64]);
      uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst;
      int int_dst_stride = dst_stride/4;
  
@@ -220,7 +220,7 @@ static inline vec_u8_t h264_deblock_q1(register vec_u8_t p0,
  }
  
  #define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) {            \
-    DECLARE_ALIGNED_16(unsigned char temp[16]);                                              \
+    ALIGNED_16(unsigned char temp[16]);                                              \
      register vec_u8_t alphavec;                                                              \
      register vec_u8_t betavec;                                                               \
      register vec_u8_t mask;                                                                  \
diff --git a/common/ppc/mc.c b/common/ppc/mc.c

index 56ec9c1af3ff4234f6d6496f0f574ca6b1e795a5..c703d085bc66be163531cf4c7624174a38bc6a27 100644 (file)
--- a/common/ppc/mc.c
+++ b/common/ppc/mc.c
@@ -303,7 +303,7 @@ static void mc_chroma_altivec_4xh( uint8_t *dst, int i_dst_stride,
      int d8x = mvx & 0x07;
      int d8y = mvy & 0x07;
  
-    DECLARE_ALIGNED_16( uint16_t coeff[4] );
+    ALIGNED_16( uint16_t coeff[4] );
      coeff[0] = (8-d8x)*(8-d8y);
      coeff[1] = d8x    *(8-d8y);
      coeff[2] = (8-d8x)*d8y;
@@ -384,7 +384,7 @@ static void mc_chroma_altivec_8xh( uint8_t *dst, int i_dst_stride,
      int d8x = mvx & 0x07;
      int d8y = mvy & 0x07;
  
-    DECLARE_ALIGNED_16( uint16_t coeff[4] );
+    ALIGNED_16( uint16_t coeff[4] );
      coeff[0] = (8-d8x)*(8-d8y);
      coeff[1] = d8x    *(8-d8y);
      coeff[2] = (8-d8x)*d8y;
diff --git a/common/ppc/pixel.c b/common/ppc/pixel.c

index 360e71d853c816db31058bbf77faf8131b50b09a..844d7f4f69d1620cb9f72142368d0f8f50ea7f4b 100644 (file)
--- a/common/ppc/pixel.c
+++ b/common/ppc/pixel.c
@@ -33,7 +33,7 @@ static int name( uint8_t *pix1, int i_pix1,            \
                   uint8_t *pix2, int i_pix2 )           \
  {                                                      \
      int y;                                             \
-    DECLARE_ALIGNED_16( int sum );                     \
+    ALIGNED_16( int sum );                     \
                                                         \
      LOAD_ZERO;                                         \
      PREP_LOAD;                                         \
@@ -118,7 +118,7 @@ PIXEL_SAD_ALTIVEC( pixel_sad_8x8_altivec,   8,  8,  2s, 1 )
  static int pixel_satd_4x4_altivec( uint8_t *pix1, int i_pix1,
                                     uint8_t *pix2, int i_pix2 )
  {
-    DECLARE_ALIGNED_16( int i_satd );
+    ALIGNED_16( int i_satd );
  
      PREP_DIFF;
      PREP_LOAD_SRC( pix1 );
@@ -163,7 +163,7 @@ static int pixel_satd_4x4_altivec( uint8_t *pix1, int i_pix1,
  static int pixel_satd_4x8_altivec( uint8_t *pix1, int i_pix1,
                                     uint8_t *pix2, int i_pix2 )
  {
-    DECLARE_ALIGNED_16( int i_satd );
+    ALIGNED_16( int i_satd );
  
      PREP_DIFF;
      vec_s16_t diff0v, diff1v, diff2v, diff3v;
@@ -217,7 +217,7 @@ static int pixel_satd_4x8_altivec( uint8_t *pix1, int i_pix1,
  static int pixel_satd_8x4_altivec( uint8_t *pix1, int i_pix1,
                                     uint8_t *pix2, int i_pix2 )
  {
-    DECLARE_ALIGNED_16( int i_satd );
+    ALIGNED_16( int i_satd );
  
      PREP_DIFF;
      vec_s16_t diff0v, diff1v, diff2v, diff3v,
@@ -271,7 +271,7 @@ static int pixel_satd_8x4_altivec( uint8_t *pix1, int i_pix1,
  static int pixel_satd_8x8_altivec( uint8_t *pix1, int i_pix1,
                                     uint8_t *pix2, int i_pix2 )
  {
-    DECLARE_ALIGNED_16( int i_satd );
+    ALIGNED_16( int i_satd );
  
      PREP_DIFF;
      vec_s16_t diff0v, diff1v, diff2v, diff3v,
@@ -331,7 +331,7 @@ static int pixel_satd_8x8_altivec( uint8_t *pix1, int i_pix1,
  static int pixel_satd_8x16_altivec( uint8_t *pix1, int i_pix1,
                                      uint8_t *pix2, int i_pix2 )
  {
-    DECLARE_ALIGNED_16( int i_satd );
+    ALIGNED_16( int i_satd );
  
      PREP_DIFF;
      vec_s16_t diff0v, diff1v, diff2v, diff3v,
@@ -415,7 +415,7 @@ static int pixel_satd_8x16_altivec( uint8_t *pix1, int i_pix1,
  static int pixel_satd_16x8_altivec( uint8_t *pix1, int i_pix1,
                                      uint8_t *pix2, int i_pix2 )
  {
-    DECLARE_ALIGNED_16( int i_satd );
+    ALIGNED_16( int i_satd );
  
      LOAD_ZERO;
      PREP_LOAD;
@@ -499,7 +499,7 @@ static int pixel_satd_16x8_altivec( uint8_t *pix1, int i_pix1,
  static int pixel_satd_16x16_altivec( uint8_t *pix1, int i_pix1,
                                       uint8_t *pix2, int i_pix2 )
  {
-    DECLARE_ALIGNED_16( int i_satd );
+    ALIGNED_16( int i_satd );
  
      LOAD_ZERO;
      PREP_LOAD;
@@ -630,10 +630,10 @@ static void pixel_sad_x4_16x16_altivec( uint8_t *fenc,
                                          uint8_t *pix2, uint8_t *pix3,
                                          int i_stride, int scores[4] )
  {
-    DECLARE_ALIGNED_16( int sum0 );
-    DECLARE_ALIGNED_16( int sum1 );
-    DECLARE_ALIGNED_16( int sum2 );
-    DECLARE_ALIGNED_16( int sum3 );
+    ALIGNED_16( int sum0 );
+    ALIGNED_16( int sum1 );
+    ALIGNED_16( int sum2 );
+    ALIGNED_16( int sum3 );
      int y;
  
      LOAD_ZERO;
@@ -751,9 +751,9 @@ static void pixel_sad_x3_16x16_altivec( uint8_t *fenc, uint8_t *pix0,
                                          uint8_t *pix1, uint8_t *pix2,
                                          int i_stride, int scores[3] )
  {
-    DECLARE_ALIGNED_16( int sum0 );
-    DECLARE_ALIGNED_16( int sum1 );
-    DECLARE_ALIGNED_16( int sum2 );
+    ALIGNED_16( int sum0 );
+    ALIGNED_16( int sum1 );
+    ALIGNED_16( int sum2 );
      int y;
  
      LOAD_ZERO;
@@ -846,10 +846,10 @@ static void pixel_sad_x3_16x16_altivec( uint8_t *fenc, uint8_t *pix0,
  
  static void pixel_sad_x4_16x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] )
  {
-    DECLARE_ALIGNED_16( int sum0 );
-    DECLARE_ALIGNED_16( int sum1 );
-    DECLARE_ALIGNED_16( int sum2 );
-    DECLARE_ALIGNED_16( int sum3 );
+    ALIGNED_16( int sum0 );
+    ALIGNED_16( int sum1 );
+    ALIGNED_16( int sum2 );
+    ALIGNED_16( int sum3 );
      int y;
  
      LOAD_ZERO;
@@ -964,9 +964,9 @@ static void pixel_sad_x3_16x8_altivec( uint8_t *fenc, uint8_t *pix0,
                                         uint8_t *pix1, uint8_t *pix2,
                                         int i_stride, int scores[3] )
  {
-    DECLARE_ALIGNED_16( int sum0 );
-    DECLARE_ALIGNED_16( int sum1 );
-    DECLARE_ALIGNED_16( int sum2 );
+    ALIGNED_16( int sum0 );
+    ALIGNED_16( int sum1 );
+    ALIGNED_16( int sum2 );
      int y;
  
      LOAD_ZERO;
@@ -1062,10 +1062,10 @@ static void pixel_sad_x4_8x16_altivec( uint8_t *fenc,
                                         uint8_t *pix2, uint8_t *pix3,
                                         int i_stride, int scores[4] )
  {
-    DECLARE_ALIGNED_16( int sum0 );
-    DECLARE_ALIGNED_16( int sum1 );
-    DECLARE_ALIGNED_16( int sum2 );
-    DECLARE_ALIGNED_16( int sum3 );
+    ALIGNED_16( int sum0 );
+    ALIGNED_16( int sum1 );
+    ALIGNED_16( int sum2 );
+    ALIGNED_16( int sum3 );
      int y;
  
      LOAD_ZERO;
@@ -1183,9 +1183,9 @@ static void pixel_sad_x3_8x16_altivec( uint8_t *fenc, uint8_t *pix0,
                                         uint8_t *pix1, uint8_t *pix2,
                                         int i_stride, int scores[3] )
  {
-    DECLARE_ALIGNED_16( int sum0 );
-    DECLARE_ALIGNED_16( int sum1 );
-    DECLARE_ALIGNED_16( int sum2 );
+    ALIGNED_16( int sum0 );
+    ALIGNED_16( int sum1 );
+    ALIGNED_16( int sum2 );
      int y;
  
      LOAD_ZERO;
@@ -1283,10 +1283,10 @@ static void pixel_sad_x4_8x8_altivec( uint8_t *fenc,
                                        uint8_t *pix2, uint8_t *pix3,
                                        int i_stride, int scores[4] )
  {
-    DECLARE_ALIGNED_16( int sum0 );
-    DECLARE_ALIGNED_16( int sum1 );
-    DECLARE_ALIGNED_16( int sum2 );
-    DECLARE_ALIGNED_16( int sum3 );
+    ALIGNED_16( int sum0 );
+    ALIGNED_16( int sum1 );
+    ALIGNED_16( int sum2 );
+    ALIGNED_16( int sum3 );
      int y;
  
      LOAD_ZERO;
@@ -1404,9 +1404,9 @@ static void pixel_sad_x3_8x8_altivec( uint8_t *fenc, uint8_t *pix0,
                                        uint8_t *pix1, uint8_t *pix2,
                                        int i_stride, int scores[3] )
  {
-    DECLARE_ALIGNED_16( int sum0 );
-    DECLARE_ALIGNED_16( int sum1 );
-    DECLARE_ALIGNED_16( int sum2 );
+    ALIGNED_16( int sum0 );
+    ALIGNED_16( int sum1 );
+    ALIGNED_16( int sum2 );
      int y;
  
      LOAD_ZERO;
@@ -1506,7 +1506,7 @@ static void pixel_sad_x3_8x8_altivec( uint8_t *fenc, uint8_t *pix0,
  static int pixel_ssd_16x16_altivec ( uint8_t *pix1, int i_stride_pix1,
                                       uint8_t *pix2, int i_stride_pix2)
  {
-    DECLARE_ALIGNED_16( int sum );
+    ALIGNED_16( int sum );
  
      int y;
      LOAD_ZERO;
@@ -1586,7 +1586,7 @@ static int pixel_ssd_16x16_altivec ( uint8_t *pix1, int i_stride_pix1,
  static int pixel_ssd_8x8_altivec ( uint8_t *pix1, int i_stride_pix1,
                                     uint8_t *pix2, int i_stride_pix2)
  {
-    DECLARE_ALIGNED_16( int sum );
+    ALIGNED_16( int sum );
  
      int y;
      LOAD_ZERO;
@@ -1638,8 +1638,8 @@ static int pixel_ssd_8x8_altivec ( uint8_t *pix1, int i_stride_pix1,
   ****************************************************************************/
  static int x264_pixel_var_16x16_altivec( uint8_t *pix, int i_stride )
  {
-    DECLARE_ALIGNED_16(uint32_t sum_tab[4]);
-    DECLARE_ALIGNED_16(uint32_t sqr_tab[4]);
+    ALIGNED_16(uint32_t sum_tab[4]);
+    ALIGNED_16(uint32_t sqr_tab[4]);
  
      LOAD_ZERO;
      vec_u32_t sqr_v = zero_u32v;
@@ -1667,8 +1667,8 @@ static int x264_pixel_var_16x16_altivec( uint8_t *pix, int i_stride )
  
  static int x264_pixel_var_8x8_altivec( uint8_t *pix, int i_stride )
  {
-    DECLARE_ALIGNED_16(uint32_t sum_tab[4]);
-    DECLARE_ALIGNED_16(uint32_t sqr_tab[4]);
+    ALIGNED_16(uint32_t sum_tab[4]);
+    ALIGNED_16(uint32_t sqr_tab[4]);
  
      LOAD_ZERO;
      vec_u32_t sqr_v = zero_u32v;
@@ -1870,8 +1870,8 @@ static int pixel_sa8d_16x16_altivec( uint8_t *pix1, int i_pix1,
  
  static uint64_t pixel_hadamard_ac_altivec( uint8_t *pix, int stride, const vec_u8_t perm )
  {
-    DECLARE_ALIGNED_16( int32_t sum4_tab[4] );
-    DECLARE_ALIGNED_16( int32_t sum8_tab[4] );
+    ALIGNED_16( int32_t sum4_tab[4] );
+    ALIGNED_16( int32_t sum8_tab[4] );
      LOAD_ZERO;
  
      VEC_LOAD_HIGH( pix, 0 );
@@ -1937,7 +1937,7 @@ static uint64_t pixel_hadamard_ac_altivec( uint8_t *pix, int stride, const vec_u
  
      int sum8 = sum8_tab[3];
  
-    DECLARE_ALIGNED_16( int16_t tmp0_4_tab[8] );
+    ALIGNED_16( int16_t tmp0_4_tab[8] );
      vec_ste(vec_add(pix16_d0, pix16_d4), 0, tmp0_4_tab);
  
      sum4 -= tmp0_4_tab[0];
@@ -1997,7 +1997,7 @@ static void ssim_4x4x2_core_altivec( const uint8_t *pix1, int stride1,
                                       const uint8_t *pix2, int stride2,
                                       int sums[2][4] )
  {
-    DECLARE_ALIGNED_16( int temp[4] );
+    ALIGNED_16( int temp[4] );
  
      int y;
      vec_u8_t pix1v, pix2v;
diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c

index 0248038640c588be5e2f0e7e7eecfa9be52df4d0..5cfa6fd4a33486dcfe2e5f399bba92cf094c186e 100644 (file)
--- a/common/x86/predict-c.c
+++ b/common/x86/predict-c.c
@@ -75,9 +75,9 @@ extern void predict_16x16_dc_left_core_sse2( uint8_t *src, int i_dc_left );
  extern void predict_16x16_v_sse2( uint8_t *src );
  extern void predict_16x16_p_core_sse2( uint8_t *src, int i00, int b, int c );
  
-DECLARE_ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};
-DECLARE_ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
-DECLARE_ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
+ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};
+ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
+ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
  
  #define PREDICT_P_SUM(j,i)\
      H += i * ( src[j+i - FDEC_STRIDE ]  - src[j-i - FDEC_STRIDE ] );\
@@ -332,7 +332,7 @@ void x264_intra_sa8d_x3_8x8_##cpu( uint8_t *fenc, uint8_t edge[33], int res[3] )
      PREDICT_8x8_LOAD_TOP\
      PREDICT_8x8_LOAD_LEFT\
      int t;\
-    DECLARE_ALIGNED_16( int16_t sa8d_1d[2][8] );\
+    ALIGNED_16( int16_t sa8d_1d[2][8] );\
      SUMSUB(l0,l4,l1,l5,l2,l6,l3,l7);\
      SUMSUB(l0,l2,l1,l3,l4,l6,l5,l7);\
      SUMSUB(l0,l1,l2,l3,l4,l5,l6,l7);\
diff --git a/encoder/analyse.c b/encoder/analyse.c

index a566edcf0e9a173b6eddca9625906da4159a4cfb..c61bc25e7c5f47083d5946469a0015e8ae98f990 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -47,7 +47,7 @@ typedef struct
      /* 8x8 */
      int       i_cost8x8;
      /* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3] */
-    DECLARE_ALIGNED_4( int16_t mvc[32][5][2] );
+    ALIGNED_4( int16_t mvc[32][5][2] );
      x264_me_t me8x8[4];
  
      /* Sub 4x4 */
@@ -540,9 +540,9 @@ static void predict_4x4_mode_available( unsigned int i_neighbour,
  /* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */
  static void inline x264_psy_trellis_init( x264_t *h, int do_both_dct )
  {
-    DECLARE_ALIGNED_16( int16_t dct8x8[4][8][8] );
-    DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
-    DECLARE_ALIGNED_16( static uint8_t zero[16*FDEC_STRIDE] ) = {0};
+    ALIGNED_ARRAY_16( int16_t, dct8x8,[4],[8][8] );
+    ALIGNED_ARRAY_16( int16_t, dct4x4,[16],[4][4] );
+    ALIGNED_16( static uint8_t zero[16*FDEC_STRIDE] ) = {0};
      int i;
  
      if( do_both_dct || h->mb.b_transform_8x8 )
@@ -562,7 +562,7 @@ static void inline x264_psy_trellis_init( x264_t *h, int do_both_dct )
  /* Pre-calculate fenc satd scores for psy RD, minus DC coefficients */
  static inline void x264_mb_cache_fenc_satd( x264_t *h )
  {
-    DECLARE_ALIGNED_16( static uint8_t zero[16] ) = {0};
+    ALIGNED_16( static uint8_t zero[16] ) = {0};
      uint8_t *fenc;
      int x, y, satd_sum = 0, sa8d_sum = 0;
      if( h->param.analyse.i_trellis == 2 && h->mb.i_psy_trellis )
@@ -719,7 +719,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
      /* 8x8 prediction selection */
      if( flags & X264_ANALYSE_I8x8 )
      {
-        DECLARE_ALIGNED_16( uint8_t edge[33] );
+        ALIGNED_ARRAY_16( uint8_t, edge,[33] );
          x264_pixel_cmp_t sa8d = (h->pixf.mbcmp[0] == h->pixf.satd[0]) ? h->pixf.sa8d[PIXEL_8x8] : h->pixf.mbcmp[PIXEL_8x8];
          int i_satd_thresh = a->i_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 );
          int i_cost = 0;
@@ -1044,7 +1044,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
      }
      else if( h->mb.i_type == I_8x8 )
      {
-        DECLARE_ALIGNED_16( uint8_t edge[33] );
+        ALIGNED_ARRAY_16( uint8_t, edge,[33] );
          for( idx = 0; idx < 4; idx++ )
          {
              uint64_t pels_h = 0;
@@ -1125,7 +1125,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
  {
      x264_me_t m;
      int i_ref, i_mvc;
-    DECLARE_ALIGNED_4( int16_t mvc[8][2] );
+    ALIGNED_4( int16_t mvc[8][2] );
      int i_halfpel_thresh = INT_MAX;
      int *p_halfpel_thresh = h->mb.pic.i_fref[0]>1 ? &i_halfpel_thresh : NULL;
  
@@ -1322,7 +1322,7 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
  {
      x264_me_t m;
      uint8_t  **p_fenc = h->mb.pic.p_fenc;
-    DECLARE_ALIGNED_4( int16_t mvc[3][2] );
+    ALIGNED_4( int16_t mvc[3][2] );
      int i, j;
  
      /* XXX Needed for x264_mb_predict_mv */
@@ -1372,7 +1372,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
  {
      x264_me_t m;
      uint8_t  **p_fenc = h->mb.pic.p_fenc;
-    DECLARE_ALIGNED_4( int16_t mvc[3][2] );
+    ALIGNED_4( int16_t mvc[3][2] );
      int i, j;
  
      /* XXX Needed for x264_mb_predict_mv */
@@ -1419,7 +1419,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
  
  static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, uint8_t **p_fref, int i8x8, int pixel )
  {
-    DECLARE_ALIGNED_8( uint8_t pix1[16*8] );
+    ALIGNED_8( uint8_t pix1[16*8] );
      uint8_t *pix2 = pix1+8;
      const int i_stride = h->mb.pic.i_stride[1];
      const int or = 4*(i8x8&1) + 2*(i8x8&2)*i_stride;
@@ -1595,14 +1595,14 @@ static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
  
  static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
  {
-    DECLARE_ALIGNED_16( uint8_t pix0[16*16] );
-    DECLARE_ALIGNED_16( uint8_t pix1[16*16] );
+    ALIGNED_ARRAY_16( uint8_t, pix0,[16*16] );
+    ALIGNED_ARRAY_16( uint8_t, pix1,[16*16] );
      uint8_t *src0, *src1;
      int stride0 = 16, stride1 = 16;
  
      x264_me_t m;
      int i_ref, i_mvc;
-    DECLARE_ALIGNED_4( int16_t mvc[9][2] );
+    ALIGNED_4( int16_t mvc[9][2] );
      int i_halfpel_thresh = INT_MAX;
      int *p_halfpel_thresh = h->mb.pic.i_fref[0]>1 ? &i_halfpel_thresh : NULL;
  
@@ -1779,7 +1779,7 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
      uint8_t **p_fref[2] =
          { h->mb.pic.p_fref[0][a->l0.i_ref],
            h->mb.pic.p_fref[1][a->l1.i_ref] };
-    DECLARE_ALIGNED_8( uint8_t pix[2][8*8] );
+    ALIGNED_8( uint8_t pix[2][8*8] );
      int i, l;
  
      /* XXX Needed for x264_mb_predict_mv */
@@ -1844,8 +1844,8 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
      uint8_t **p_fref[2] =
          { h->mb.pic.p_fref[0][a->l0.i_ref],
            h->mb.pic.p_fref[1][a->l1.i_ref] };
-    DECLARE_ALIGNED_16( uint8_t pix[2][16*8] );
-    DECLARE_ALIGNED_4( int16_t mvc[2][2] );
+    ALIGNED_ARRAY_16( uint8_t, pix,[2],[16*8] );
+    ALIGNED_4( int16_t mvc[2][2] );
      int i, l;
  
      h->mb.i_partition = D_16x8;
@@ -1914,8 +1914,8 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
      uint8_t **p_fref[2] =
          { h->mb.pic.p_fref[0][a->l0.i_ref],
            h->mb.pic.p_fref[1][a->l1.i_ref] };
-    DECLARE_ALIGNED_8( uint8_t pix[2][8*16] );
-    DECLARE_ALIGNED_4( int16_t mvc[2][2] );
+    ALIGNED_8( uint8_t pix[2][8*16] );
+    ALIGNED_4( int16_t mvc[2][2] );
      int i, l;
  
      h->mb.i_partition = D_8x16;
diff --git a/encoder/cabac.c b/encoder/cabac.c

index 61d39d1a185d5c618b8c37a55f9c4924c208d3b6..751c52f6b5e0154b424cd8be8e45927deb935d45 100644 (file)
--- a/encoder/cabac.c
+++ b/encoder/cabac.c
@@ -412,7 +412,7 @@ static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_lis
  
  static NOINLINE uint32_t x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width )
  {
-    DECLARE_ALIGNED_4( int16_t mvp[2] );
+    ALIGNED_4( int16_t mvp[2] );
      uint32_t amvd;
      int mdx, mdy;
  
diff --git a/encoder/cavlc.c b/encoder/cavlc.c

index 0d88bfc6cc330db915a1d7fc0d22c78bc9b8af89..2ad05429683a815b5063190263ec164e045ee3ea 100644 (file)
--- a/encoder/cavlc.c
+++ b/encoder/cavlc.c
@@ -227,7 +227,7 @@ static void cavlc_qp_delta( x264_t *h, bs_t *s )
  
  static void cavlc_mb_mvd( x264_t *h, bs_t *s, int i_list, int idx, int width )
  {
-    DECLARE_ALIGNED_4( int16_t mvp[2] );
+    ALIGNED_4( int16_t mvp[2] );
      x264_mb_predict_mv( h, i_list, idx, width, mvp );
      bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0] );
      bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1] );
diff --git a/encoder/macroblock.c b/encoder/macroblock.c

index 47f0f94353bfbb0548d8aff7c91f53cc7f136849..60e14b8fd3c2732acf7d8cdffb85919142455fc9 100644 (file)
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -130,7 +130,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp )
      int nz;
      uint8_t *p_src = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[idx]];
      uint8_t *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[idx]];
-    DECLARE_ALIGNED_16( int16_t dct4x4[4][4] );
+    ALIGNED_ARRAY_16( int16_t, dct4x4,[4],[4] );
  
      if( h->mb.b_lossless )
      {
@@ -166,7 +166,7 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
      int nz;
      uint8_t *p_src = &h->mb.pic.p_fenc[0][x+y*FENC_STRIDE];
      uint8_t *p_dst = &h->mb.pic.p_fdec[0][x+y*FDEC_STRIDE];
-    DECLARE_ALIGNED_16( int16_t dct8x8[8][8] );
+    ALIGNED_ARRAY_16( int16_t, dct8x8,[8],[8] );
  
      if( h->mb.b_lossless )
      {
@@ -196,8 +196,8 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
      uint8_t  *p_src = h->mb.pic.p_fenc[0];
      uint8_t  *p_dst = h->mb.pic.p_fdec[0];
  
-    DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
-    DECLARE_ALIGNED_16( int16_t dct_dc4x4[4][4] );
+    ALIGNED_ARRAY_16( int16_t, dct4x4,[16],[4][4] );
+    ALIGNED_ARRAY_16( int16_t, dct_dc4x4,[4],[4] );
  
      int i, nz;
      int b_decimate = h->sh.i_type == SLICE_TYPE_B || (h->param.analyse.b_dct_decimate && h->sh.i_type == SLICE_TYPE_P);
@@ -280,7 +280,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
  {
      int i, ch, nz, nz_dc;
      int b_decimate = b_inter && (h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate);
-    DECLARE_ALIGNED_16( int16_t dct2x2[2][2]  );
+    ALIGNED_ARRAY_16( int16_t, dct2x2,[2],[2] );
      h->mb.i_cbp_chroma = 0;
  
      /* Early termination: check variance of chroma residual before encoding.
@@ -336,7 +336,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
          int i_decimate_score = 0;
          int nz_ac = 0;
  
-        DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
+        ALIGNED_ARRAY_16( int16_t, dct4x4,[4],[4][4] );
  
          if( h->mb.b_lossless )
          {
@@ -579,7 +579,7 @@ void x264_macroblock_encode( x264_t *h )
      }
      else if( h->mb.i_type == I_8x8 )
      {
-        DECLARE_ALIGNED_16( uint8_t edge[33] );
+        ALIGNED_ARRAY_16( uint8_t, edge,[33] );
          h->mb.b_transform_8x8 = 1;
          /* If we already encoded 3 of the 4 i8x8 blocks, we don't have to do them again. */
          if( h->mb.i_skip_intra )
@@ -674,7 +674,7 @@ void x264_macroblock_encode( x264_t *h )
          }
          else if( h->mb.b_transform_8x8 )
          {
-            DECLARE_ALIGNED_16( int16_t dct8x8[4][8][8] );
+            ALIGNED_ARRAY_16( int16_t, dct8x8,[4],[8][8] );
              b_decimate &= !h->mb.b_trellis; // 8x8 trellis is inherently optimal decimation
              h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
              h->nr_count[1] += h->mb.b_noise_reduction * 4;
@@ -725,7 +725,7 @@ void x264_macroblock_encode( x264_t *h )
          }
          else
          {
-            DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
+            ALIGNED_ARRAY_16( int16_t, dct4x4,[16],[4][4] );
              h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
              h->nr_count[0] += h->mb.b_noise_reduction * 16;
  
@@ -844,9 +844,9 @@ void x264_macroblock_encode( x264_t *h )
   *****************************************************************************/
  int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
  {
-    DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
-    DECLARE_ALIGNED_16( int16_t dct2x2[2][2] );
-    DECLARE_ALIGNED_16( int16_t dctscan[16] );
+    ALIGNED_ARRAY_16( int16_t, dct4x4,[4],[4][4] );
+    ALIGNED_ARRAY_16( int16_t, dct2x2,[2],[2] );
+    ALIGNED_ARRAY_16( int16_t, dctscan,[16] );
  
      int i_qp = h->mb.i_qp;
      int mvp[2];
@@ -1012,7 +1012,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
      {
          if( h->mb.b_transform_8x8 )
          {
-            DECLARE_ALIGNED_16( int16_t dct8x8[8][8] );
+            ALIGNED_ARRAY_16( int16_t, dct8x8,[8],[8] );
              h->dctf.sub8x8_dct8( dct8x8, p_fenc, p_fdec );
              nnz8x8 = x264_quant_8x8( h, dct8x8, i_qp, 0, i8 );
              if( nnz8x8 )
@@ -1038,7 +1038,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
          {
              int i4;
              int i_decimate_8x8 = 0;
-            DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
+            ALIGNED_ARRAY_16( int16_t, dct4x4,[4],[4][4] );
              h->dctf.sub8x8_dct( dct4x4, p_fenc, p_fdec );
              for( i4 = 0; i4 < 4; i4++ )
              {
@@ -1067,7 +1067,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
  
          for( ch = 0; ch < 2; ch++ )
          {
-            DECLARE_ALIGNED_16( int16_t dct4x4[4][4] );
+            ALIGNED_ARRAY_16( int16_t, dct4x4,[4],[4] );
              p_fenc = h->mb.pic.p_fenc[1+ch] + (i8&1)*4 + (i8>>1)*4*FENC_STRIDE;
              p_fdec = h->mb.pic.p_fdec[1+ch] + (i8&1)*4 + (i8>>1)*4*FDEC_STRIDE;
  
@@ -1115,7 +1115,7 @@ void x264_macroblock_encode_p4x4( x264_t *h, int i4 )
      }
      else
      {
-        DECLARE_ALIGNED_16( int16_t dct4x4[4][4] );
+        ALIGNED_ARRAY_16( int16_t, dct4x4,[4],[4] );
          h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
          nz = x264_quant_4x4( h, dct4x4, i_qp, DCT_LUMA_4x4, 0, i4 );
          h->mb.cache.non_zero_count[x264_scan8[i4]] = nz;
diff --git a/encoder/me.c b/encoder/me.c

index 969e1a9ecffc7cebbb42d83abd290b5260540830..db18271122318e91378db2d0b89a778a645bc9d9 100644 (file)
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -182,7 +182,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
      int omx, omy, pmx, pmy;
      uint8_t *p_fenc = m->p_fenc[0];
      uint8_t *p_fref = m->p_fref[0];
-    DECLARE_ALIGNED_16( uint8_t pix[16*16] );
+    ALIGNED_ARRAY_16( uint8_t, pix,[16*16] );
  
      int i, j;
      int dir;
@@ -563,8 +563,8 @@ me_hex2:
              uint16_t *sums_base = m->integral;
              /* due to a GCC bug on some platforms (win32?), zero[] may not actually be aligned.
               * this is not a problem because it is not used for any SSE instructions. */
-            DECLARE_ALIGNED_16( static uint8_t zero[8*FENC_STRIDE] );
-            DECLARE_ALIGNED_16( int enc_dc[4] );
+            ALIGNED_16( static uint8_t zero[8*FENC_STRIDE] );
+            ALIGNED_ARRAY_16( int, enc_dc,[4] );
              int sad_size = i_pixel <= PIXEL_8x8 ? PIXEL_8x8 : PIXEL_4x4;
              int delta = x264_pixel_size[sad_size].w;
              int16_t *xs = h->scratch_buffer;
@@ -777,7 +777,7 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
      const int i_pixel = m->i_pixel;
      const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
  
-    DECLARE_ALIGNED_16( uint8_t pix[2][32*18] ); // really 17x17, but round up for alignment
+    ALIGNED_ARRAY_16( uint8_t, pix,[2],[32*18] );   // really 17x17, but round up for alignment
      int omx, omy;
      int i;
  
@@ -950,9 +950,9 @@ static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_m
      const int16_t *p_cost_m0y = m0->p_cost_mv - m0->mvp[1];
      const int16_t *p_cost_m1x = m1->p_cost_mv - m1->mvp[0];
      const int16_t *p_cost_m1y = m1->p_cost_mv - m1->mvp[1];
-    DECLARE_ALIGNED_16( uint8_t pixy_buf[2][9][16*16] );
-    DECLARE_ALIGNED_8( uint8_t pixu_buf[2][9][8*8] );
-    DECLARE_ALIGNED_8( uint8_t pixv_buf[2][9][8*8] );
+    ALIGNED_ARRAY_16( uint8_t, pixy_buf,[2],[9][16*16] );
+    ALIGNED_8( uint8_t pixu_buf[2][9][8*8] );
+    ALIGNED_8( uint8_t pixv_buf[2][9][8*8] );
      uint8_t *src0[9];
      uint8_t *src1[9];
      uint8_t *pix  = &h->mb.pic.p_fdec[0][(i8>>1)*8*FDEC_STRIDE+(i8&1)*8];
@@ -972,7 +972,7 @@ static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_m
      int mc_list0 = 1, mc_list1 = 1;
      uint64_t bcostrd = COST_MAX64;
      /* each byte of visited represents 8 possible m1y positions, so a 4D array isn't needed */
-    DECLARE_ALIGNED_16( uint8_t visited[8][8][8] );
+    ALIGNED_ARRAY_16( uint8_t, visited,[8],[8][8] );
      /* all permutations of an offset in up to 2 of the dimensions */
      static const int8_t dia4d[32][4] = {
          {0,0,0,1}, {0,0,0,-1}, {0,0,1,0}, {0,0,-1,0},
@@ -989,7 +989,7 @@ static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_m
          bm0y > h->mb.mv_max_spel[1] - 8 || bm1y > h->mb.mv_max_spel[1] - 8 )
          return;
  
-    h->mc.memzero_aligned( visited, sizeof(visited) );
+    h->mc.memzero_aligned( visited, sizeof(uint8_t[8][8][8]) );
  
      BIME_CACHE( 0, 0, 0 );
      BIME_CACHE( 0, 0, 1 );
@@ -1082,7 +1082,7 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int
      const int bh = x264_pixel_size[m->i_pixel].h>>2;
      const int i_pixel = m->i_pixel;
  
-    DECLARE_ALIGNED_16( uint8_t pix[16*16] );
+    ALIGNED_ARRAY_16( uint8_t, pix,[16*16] );
      uint64_t bcost = m->i_pixel == PIXEL_16x16 ? m->cost : COST_MAX64;
      int bmx = m->mv[0];
      int bmy = m->mv[1];
diff --git a/encoder/me.h b/encoder/me.h

index 3910f7472661917679e86995d297cc325d4dc948..24f296fe17fd4c6750660b49c050422bf2944bb3 100644 (file)
--- a/encoder/me.h
+++ b/encoder/me.h
@@ -40,13 +40,13 @@ typedef struct
      uint16_t *integral;
      int      i_stride[2];
  
-    DECLARE_ALIGNED_4( int16_t mvp[2] );
+    ALIGNED_4( int16_t mvp[2] );
  
      /* output */
      int cost_mv;        /* lambda * nbits for the chosen mv */
      int cost;           /* satd + lambda * nbits */
-    DECLARE_ALIGNED_4( int16_t mv[2] );
-} DECLARE_ALIGNED_16( x264_me_t );
+    ALIGNED_4( int16_t mv[2] );
+} ALIGNED_16( x264_me_t );
  
  typedef struct {
      int sad;
diff --git a/encoder/rdo.c b/encoder/rdo.c

index dd5d5fe2dfab2ecda9eb5cb8f62d18143a1a5c1a..453ccb5b38a1ce6c4014fa3967b038001a47a5c7 100644 (file)
--- a/encoder/rdo.c
+++ b/encoder/rdo.c
@@ -104,7 +104,7 @@ static inline int sum_sa8d( x264_t *h, int pixel, int x, int y )
  
  static inline int ssd_plane( x264_t *h, int size, int p, int x, int y )
  {
-    DECLARE_ALIGNED_16(static uint8_t zero[16]);
+    ALIGNED_16(static uint8_t zero[16]);
      int satd = 0;
      uint8_t *fdec = h->mb.pic.p_fdec[p] + x + y*FDEC_STRIDE;
      uint8_t *fenc = h->mb.pic.p_fenc[p] + x + y*FENC_STRIDE;
diff --git a/encoder/slicetype.c b/encoder/slicetype.c

index ba04b1e4b6c1a7fb54f6bd2b0bb02cf1009535b9..9c8c127bda5470f1c79c50e5b722c4ca4fdcaebb 100644 (file)
--- a/encoder/slicetype.c
+++ b/encoder/slicetype.c
@@ -66,7 +66,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
      int16_t (*fenc_mvs[2])[2] = { &frames[b]->lowres_mvs[0][b-p0-1][i_mb_xy], &frames[b]->lowres_mvs[1][p1-b-1][i_mb_xy] };
      int (*fenc_costs[2]) = { &frames[b]->lowres_mv_costs[0][b-p0-1][i_mb_xy], &frames[b]->lowres_mv_costs[1][p1-b-1][i_mb_xy] };
  
-    DECLARE_ALIGNED_8( uint8_t pix1[9*FDEC_STRIDE] );
+    ALIGNED_8( uint8_t pix1[9*FDEC_STRIDE] );
      uint8_t *pix2 = pix1+8;
      x264_me_t m[2];
      int i_bcost = COST_MAX;
@@ -156,7 +156,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
          {
              int i_mvc = 0;
              int16_t (*fenc_mv)[2] = fenc_mvs[l];
-            DECLARE_ALIGNED_4( int16_t mvc[4][2] );
+            ALIGNED_4( int16_t mvc[4][2] );
  
              /* Reverse-order MV prediction. */
              *(uint32_t*)mvc[0] = 0;
@@ -204,7 +204,7 @@ lowres_intra_mb:
          int i_icost, b_intra;
          if( !fenc->b_intra_calculated )
          {
-            DECLARE_ALIGNED_16( uint8_t edge[33] );
+            ALIGNED_ARRAY_16( uint8_t, edge,[33] );
              uint8_t *pix = &pix1[8+FDEC_STRIDE - 1];
              uint8_t *src = &fenc->lowres[0][i_pel_offset - 1];
              const int intra_penalty = 5;
diff --git a/tools/checkasm.c b/tools/checkasm.c

index 36fd7340f823dbd0e08e93ab955fb5d564de8e8b..291c162162c271a7aa7217f73ed4abe0389df7be 100644 (file)
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -32,8 +32,8 @@
  
  // GCC doesn't align stack variables on ARM, so use .bss
  #ifdef ARCH_ARM
-#undef DECLARE_ALIGNED_16
-#define DECLARE_ALIGNED_16( var ) DECLARE_ALIGNED( static var, 16 )
+#undef ALIGNED_16
+#define ALIGNED_16( var ) DECLARE_ALIGNED( static var, 16 )
  #endif
  
  /* buf1, buf2: initialised to random data and shouldn't write into them */
@@ -236,7 +236,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
      x264_predict_t predict_4x4[9+3];
      x264_predict8x8_t predict_8x8[9+3];
      x264_predict_8x8_filter_t predict_8x8_filter;
-    DECLARE_ALIGNED_16( uint8_t edge[33] );
+    ALIGNED_16( uint8_t edge[33] );
      uint16_t cost_mv[32];
      int ret = 0, ok, used_asm;
      int i, j;
@@ -438,7 +438,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
          pixel_asm.ssim_end4 != pixel_ref.ssim_end4 )
      {
          float res_c, res_a;
-        DECLARE_ALIGNED_16( int sums[5][4] ) = {{0}};
+        ALIGNED_16( int sums[5][4] ) = {{0}};
          used_asm = ok = 1;
          x264_emms();
          res_c = x264_pixel_ssim_wxh( &pixel_c,   buf1+2, 32, buf2+2, 32, 32, 28, buf3 );
@@ -463,8 +463,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
      for( i=0; i<100 && ok; i++ )
          if( pixel_asm.ads[i&3] != pixel_ref.ads[i&3] )
          {
-            DECLARE_ALIGNED_16( uint16_t sums[72] );
-            DECLARE_ALIGNED_16( int dc[4] );
+            ALIGNED_16( uint16_t sums[72] );
+            ALIGNED_16( int dc[4] );
              int16_t mvs_a[32], mvs_c[32];
              int mvn_a, mvn_c;
              int thresh = rand() & 0x3fff;
@@ -500,11 +500,11 @@ static int check_dct( int cpu_ref, int cpu_new )
      x264_dct_function_t dct_asm;
      x264_quant_function_t qf;
      int ret = 0, ok, used_asm, i, j, interlace;
-    DECLARE_ALIGNED_16( int16_t dct1[16][4][4] );
-    DECLARE_ALIGNED_16( int16_t dct2[16][4][4] );
-    DECLARE_ALIGNED_16( int16_t dct4[16][4][4] );
-    DECLARE_ALIGNED_16( int16_t dct8[4][8][8] );
-    DECLARE_ALIGNED_8( int16_t dctdc[2][2][2] );
+    ALIGNED_16( int16_t dct1[16][4][4] );
+    ALIGNED_16( int16_t dct2[16][4][4] );
+    ALIGNED_16( int16_t dct4[16][4][4] );
+    ALIGNED_16( int16_t dct8[4][8][8] );
+    ALIGNED_8( int16_t dctdc[2][2][2] );
      x264_t h_buf;
      x264_t *h = &h_buf;
  
@@ -629,8 +629,8 @@ static int check_dct( int cpu_ref, int cpu_new )
      x264_zigzag_function_t zigzag_ref;
      x264_zigzag_function_t zigzag_asm;
  
-    DECLARE_ALIGNED_16( int16_t level1[64] );
-    DECLARE_ALIGNED_16( int16_t level2[64] );
+    ALIGNED_16( int16_t level1[64] );
+    ALIGNED_16( int16_t level2[64] );
  
  #define TEST_ZIGZAG_SCAN( name, t1, t2, dct, size )   \
      if( zigzag_asm.name != zigzag_ref.name ) \
@@ -1066,9 +1066,9 @@ static int check_quant( int cpu_ref, int cpu_new )
      x264_quant_function_t qf_c;
      x264_quant_function_t qf_ref;
      x264_quant_function_t qf_a;
-    DECLARE_ALIGNED_16( int16_t dct1[64] );
-    DECLARE_ALIGNED_16( int16_t dct2[64] );
-    DECLARE_ALIGNED_16( uint8_t cqm_buf[64] );
+    ALIGNED_16( int16_t dct1[64] );
+    ALIGNED_16( int16_t dct2[64] );
+    ALIGNED_16( uint8_t cqm_buf[64] );
      int ret = 0, ok, used_asm;
      int oks[2] = {1,1}, used_asms[2] = {0,0};
      int i, j, i_cqm, qp;
@@ -1382,8 +1382,8 @@ static int check_intra( int cpu_ref, int cpu_new )
  {
      int ret = 0, ok = 1, used_asm = 0;
      int i;
-    DECLARE_ALIGNED_16( uint8_t edge[33] );
-    DECLARE_ALIGNED_16( uint8_t edge2[33] );
+    ALIGNED_16( uint8_t edge[33] );
+    ALIGNED_16( uint8_t edge2[33] );
      struct
      {
          x264_predict_t      predict_16x16[4+3];
author	David Conrad <lessen42@gmail.com>
	Sun, 23 Aug 2009 06:40:33 +0000 (23:40 -0700)
committer	Fiona Glaser <fiona@x264.com>
	Sun, 23 Aug 2009 06:40:33 +0000 (23:40 -0700)
common/cabac.h		patch \| blob \| history
common/common.h		patch \| blob \| history
common/frame.c		patch \| blob \| history
common/macroblock.c		patch \| blob \| history
common/osdep.h		patch \| blob \| history
common/ppc/deblock.c		patch \| blob \| history
common/ppc/mc.c		patch \| blob \| history
common/ppc/pixel.c		patch \| blob \| history
common/x86/predict-c.c		patch \| blob \| history
encoder/analyse.c		patch \| blob \| history
encoder/cabac.c		patch \| blob \| history
encoder/cavlc.c		patch \| blob \| history
encoder/macroblock.c		patch \| blob \| history
encoder/me.c		patch \| blob \| history
encoder/me.h		patch \| blob \| history
encoder/rdo.c		patch \| blob \| history
encoder/slicetype.c		patch \| blob \| history
tools/checkasm.c		patch \| blob \| history