]> git.sesse.net Git - x264/commitdiff
GSOC merge part 2: ARM stack alignment
authorDavid Conrad <lessen42@gmail.com>
Sun, 23 Aug 2009 06:40:33 +0000 (23:40 -0700)
committerFiona Glaser <fiona@x264.com>
Sun, 23 Aug 2009 06:40:33 +0000 (23:40 -0700)
Neither GCC nor ARMCC support 16 byte stack alignment despite the fact that NEON loads require it.
These macros only work for arrays, but fortunately that covers almost all instances of stack alignment in x264.

18 files changed:
common/cabac.h
common/common.h
common/frame.c
common/macroblock.c
common/osdep.h
common/ppc/deblock.c
common/ppc/mc.c
common/ppc/pixel.c
common/x86/predict-c.c
encoder/analyse.c
encoder/cabac.c
encoder/cavlc.c
encoder/macroblock.c
encoder/me.c
encoder/me.h
encoder/rdo.c
encoder/slicetype.c
tools/checkasm.c

index 9d0fddd2899038687f2882bc86e0f4332de111a5..35871b49ae6343aaee581b786fb3eb2998a49866 100644 (file)
@@ -39,7 +39,7 @@ typedef struct
     uint8_t *p_end;
 
     /* aligned for memcpy_aligned starting here */
-    DECLARE_ALIGNED_16( int f8_bits_encoded ); // only if using x264_cabac_size_decision()
+    ALIGNED_16( int f8_bits_encoded ); // only if using x264_cabac_size_decision()
 
     /* context */
     uint8_t state[460];
index 166ced26653312263f0e0950c3ae78f00a9933d1..f78a35c1d5d73f4d577d87d49d777bed20772fce 100644 (file)
@@ -359,8 +359,8 @@ struct x264_t
 
     const uint8_t   *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */
 
-    DECLARE_ALIGNED_16( uint32_t nr_residual_sum[2][64] );
-    DECLARE_ALIGNED_16( uint16_t nr_offset[2][64] );
+    ALIGNED_16( uint32_t nr_residual_sum[2][64] );
+    ALIGNED_16( uint16_t nr_offset[2][64] );
     uint32_t        nr_count[2];
 
     /* Slice header */
@@ -413,11 +413,11 @@ struct x264_t
     /* Current MB DCT coeffs */
     struct
     {
-        DECLARE_ALIGNED_16( int16_t luma16x16_dc[16] );
-        DECLARE_ALIGNED_16( int16_t chroma_dc[2][4] );
+        ALIGNED_16( int16_t luma16x16_dc[16] );
+        ALIGNED_16( int16_t chroma_dc[2][4] );
         // FIXME share memory?
-        DECLARE_ALIGNED_16( int16_t luma8x8[4][64] );
-        DECLARE_ALIGNED_16( int16_t luma4x4[16+8][16] );
+        ALIGNED_16( int16_t luma8x8[4][64] );
+        ALIGNED_16( int16_t luma4x4[16+8][16] );
     } dct;
 
     /* MB table and cache for current frame/mb */
@@ -494,7 +494,7 @@ struct x264_t
         /* current value */
         int     i_type;
         int     i_partition;
-        DECLARE_ALIGNED_4( uint8_t i_sub_partition[4] );
+        ALIGNED_4( uint8_t i_sub_partition[4] );
         int     b_transform_8x8;
 
         int     i_cbp_luma;
@@ -517,22 +517,22 @@ struct x264_t
             /* space for p_fenc and p_fdec */
 #define FENC_STRIDE 16
 #define FDEC_STRIDE 32
-            DECLARE_ALIGNED_16( uint8_t fenc_buf[24*FENC_STRIDE] );
-            DECLARE_ALIGNED_16( uint8_t fdec_buf[27*FDEC_STRIDE] );
+            ALIGNED_16( uint8_t fenc_buf[24*FENC_STRIDE] );
+            ALIGNED_16( uint8_t fdec_buf[27*FDEC_STRIDE] );
 
             /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */
-            DECLARE_ALIGNED_16( uint8_t i4x4_fdec_buf[16*16] );
-            DECLARE_ALIGNED_16( uint8_t i8x8_fdec_buf[16*16] );
-            DECLARE_ALIGNED_16( int16_t i8x8_dct_buf[3][64] );
-            DECLARE_ALIGNED_16( int16_t i4x4_dct_buf[15][16] );
+            ALIGNED_16( uint8_t i4x4_fdec_buf[16*16] );
+            ALIGNED_16( uint8_t i8x8_fdec_buf[16*16] );
+            ALIGNED_16( int16_t i8x8_dct_buf[3][64] );
+            ALIGNED_16( int16_t i4x4_dct_buf[15][16] );
             uint32_t i4x4_nnz_buf[4];
             uint32_t i8x8_nnz_buf[4];
             int i4x4_cbp;
             int i8x8_cbp;
 
             /* Psy trellis DCT data */
-            DECLARE_ALIGNED_16( int16_t fenc_dct8[4][64] );
-            DECLARE_ALIGNED_16( int16_t fenc_dct4[16][16] );
+            ALIGNED_16( int16_t fenc_dct8[4][64] );
+            ALIGNED_16( int16_t fenc_dct4[16][16] );
 
             /* Psy RD SATD scores */
             int fenc_satd[4][4];
@@ -567,18 +567,18 @@ struct x264_t
             uint8_t non_zero_count[X264_SCAN8_SIZE];
 
             /* -1 if unused, -2 if unavailable */
-            DECLARE_ALIGNED_4( int8_t ref[2][X264_SCAN8_SIZE] );
+            ALIGNED_4( int8_t ref[2][X264_SCAN8_SIZE] );
 
             /* 0 if not available */
-            DECLARE_ALIGNED_16( int16_t mv[2][X264_SCAN8_SIZE][2] );
-            DECLARE_ALIGNED_8( int16_t mvd[2][X264_SCAN8_SIZE][2] );
+            ALIGNED_16( int16_t mv[2][X264_SCAN8_SIZE][2] );
+            ALIGNED_8( int16_t mvd[2][X264_SCAN8_SIZE][2] );
 
             /* 1 if SKIP or DIRECT. set only for B-frames + CABAC */
-            DECLARE_ALIGNED_4( int8_t skip[X264_SCAN8_SIZE] );
+            ALIGNED_4( int8_t skip[X264_SCAN8_SIZE] );
 
-            DECLARE_ALIGNED_16( int16_t direct_mv[2][X264_SCAN8_SIZE][2] );
-            DECLARE_ALIGNED_4( int8_t  direct_ref[2][X264_SCAN8_SIZE] );
-            DECLARE_ALIGNED_4( int16_t pskip_mv[2] );
+            ALIGNED_16( int16_t direct_mv[2][X264_SCAN8_SIZE][2] );
+            ALIGNED_4( int8_t  direct_ref[2][X264_SCAN8_SIZE] );
+            ALIGNED_4( int16_t pskip_mv[2] );
 
             /* number of neighbors (top and left) that used 8x8 dct */
             int     i_neighbour_transform_size;
index d13cc7429ea9df5438c3e969f90c91b9ede8a004..18ddbc1815e7df44bef7c2519678fc27d4dae599 100644 (file)
@@ -764,7 +764,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
         {\
             int i_edge = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));\
             int i_qpn, i, mbn_xy, mbn_8x8, mbn_4x4;\
-            DECLARE_ALIGNED_4( uint8_t bS[4] );  /* filtering strength */\
+            ALIGNED_4( uint8_t bS[4] );  /* filtering strength */\
             if( i_edge )\
                 i_edge+= b_8x8_transform;\
             else\
index 4b282285843931105d197a3f5b0d55ac1e793168..f8b20c793e670e03c6ba1e03162c938c9bc7bee0 100644 (file)
@@ -221,7 +221,7 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
 static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
 {
     int ref[2];
-    DECLARE_ALIGNED_8( int16_t mv[2][2] );
+    ALIGNED_8( int16_t mv[2][2] );
     int i_list;
     int i8;
     const int8_t *l1ref0 = &h->fref1[0]->ref[0][ h->mb.i_b8_xy ];
@@ -520,8 +520,8 @@ static inline void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int he
     int       mvy1   = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
     int       i_mode = x264_size2pixel[height][width];
     int       i_stride0 = 16, i_stride1 = 16;
-    DECLARE_ALIGNED_16( uint8_t tmp0[16*16] );
-    DECLARE_ALIGNED_16( uint8_t tmp1[16*16] );
+    ALIGNED_ARRAY_16( uint8_t, tmp0,[16*16] );
+    ALIGNED_ARRAY_16( uint8_t, tmp1,[16*16] );
     uint8_t *src0, *src1;
 
     src0 = h->mc.get_ref( tmp0, &i_stride0, h->mb.pic.p_fref[0][i_ref0], h->mb.pic.i_stride[0],
index a691d06dcd293dcbcca77d73ca38ab0ffb3fff9f..696bbc9d015a602ee81de8abceffdf79f5b3b29e 100644 (file)
 #else
 #define DECLARE_ALIGNED( var, n ) var __attribute__((aligned(n)))
 #endif
-#define DECLARE_ALIGNED_16( var ) DECLARE_ALIGNED( var, 16 )
-#define DECLARE_ALIGNED_8( var )  DECLARE_ALIGNED( var, 8 )
-#define DECLARE_ALIGNED_4( var )  DECLARE_ALIGNED( var, 4 )
+#define ALIGNED_16( var ) DECLARE_ALIGNED( var, 16 )
+#define ALIGNED_8( var )  DECLARE_ALIGNED( var, 8 )
+#define ALIGNED_4( var )  DECLARE_ALIGNED( var, 4 )
+
+// current arm compilers only maintain 8-byte stack alignment
+// and cannot align stack variables to more than 8-bytes
+#ifdef ARCH_ARM
+#define ALIGNED_ARRAY_16( type, name, sub1, ... )\
+    ALIGNED_8( uint8_t name##_8 [sizeof(type sub1 __VA_ARGS__) + 8] );\
+    type (*name) __VA_ARGS__ = (void*)(name##_8 + ((intptr_t)name##_8 & 8))
+#else
+#define ALIGNED_ARRAY_16( type, name, sub1, ... )\
+    ALIGNED_16( type name sub1 __VA_ARGS__ )
+#endif
 
 #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
 #define UNUSED __attribute__((unused))
index 35318120a961b4f1a297e22f1dcd0d4174a614b2..14171e615c15c0db8c8c3be21be7bde5981db76f 100644 (file)
@@ -41,7 +41,7 @@
 static inline void write16x4(uint8_t *dst, int dst_stride,
                              register vec_u8_t r0, register vec_u8_t r1,
                              register vec_u8_t r2, register vec_u8_t r3) {
-    DECLARE_ALIGNED_16(unsigned char result[64]);
+    ALIGNED_16(unsigned char result[64]);
     uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst;
     int int_dst_stride = dst_stride/4;
 
@@ -220,7 +220,7 @@ static inline vec_u8_t h264_deblock_q1(register vec_u8_t p0,
 }
 
 #define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) {            \
-    DECLARE_ALIGNED_16(unsigned char temp[16]);                                              \
+    ALIGNED_16(unsigned char temp[16]);                                              \
     register vec_u8_t alphavec;                                                              \
     register vec_u8_t betavec;                                                               \
     register vec_u8_t mask;                                                                  \
index 56ec9c1af3ff4234f6d6496f0f574ca6b1e795a5..c703d085bc66be163531cf4c7624174a38bc6a27 100644 (file)
@@ -303,7 +303,7 @@ static void mc_chroma_altivec_4xh( uint8_t *dst, int i_dst_stride,
     int d8x = mvx & 0x07;
     int d8y = mvy & 0x07;
 
-    DECLARE_ALIGNED_16( uint16_t coeff[4] );
+    ALIGNED_16( uint16_t coeff[4] );
     coeff[0] = (8-d8x)*(8-d8y);
     coeff[1] = d8x    *(8-d8y);
     coeff[2] = (8-d8x)*d8y;
@@ -384,7 +384,7 @@ static void mc_chroma_altivec_8xh( uint8_t *dst, int i_dst_stride,
     int d8x = mvx & 0x07;
     int d8y = mvy & 0x07;
 
-    DECLARE_ALIGNED_16( uint16_t coeff[4] );
+    ALIGNED_16( uint16_t coeff[4] );
     coeff[0] = (8-d8x)*(8-d8y);
     coeff[1] = d8x    *(8-d8y);
     coeff[2] = (8-d8x)*d8y;
index 360e71d853c816db31058bbf77faf8131b50b09a..844d7f4f69d1620cb9f72142368d0f8f50ea7f4b 100644 (file)
@@ -33,7 +33,7 @@ static int name( uint8_t *pix1, int i_pix1,            \
                  uint8_t *pix2, int i_pix2 )           \
 {                                                      \
     int y;                                             \
-    DECLARE_ALIGNED_16( int sum );                     \
+    ALIGNED_16( int sum );                     \
                                                        \
     LOAD_ZERO;                                         \
     PREP_LOAD;                                         \
@@ -118,7 +118,7 @@ PIXEL_SAD_ALTIVEC( pixel_sad_8x8_altivec,   8,  8,  2s, 1 )
 static int pixel_satd_4x4_altivec( uint8_t *pix1, int i_pix1,
                                    uint8_t *pix2, int i_pix2 )
 {
-    DECLARE_ALIGNED_16( int i_satd );
+    ALIGNED_16( int i_satd );
 
     PREP_DIFF;
     PREP_LOAD_SRC( pix1 );
@@ -163,7 +163,7 @@ static int pixel_satd_4x4_altivec( uint8_t *pix1, int i_pix1,
 static int pixel_satd_4x8_altivec( uint8_t *pix1, int i_pix1,
                                    uint8_t *pix2, int i_pix2 )
 {
-    DECLARE_ALIGNED_16( int i_satd );
+    ALIGNED_16( int i_satd );
 
     PREP_DIFF;
     vec_s16_t diff0v, diff1v, diff2v, diff3v;
@@ -217,7 +217,7 @@ static int pixel_satd_4x8_altivec( uint8_t *pix1, int i_pix1,
 static int pixel_satd_8x4_altivec( uint8_t *pix1, int i_pix1,
                                    uint8_t *pix2, int i_pix2 )
 {
-    DECLARE_ALIGNED_16( int i_satd );
+    ALIGNED_16( int i_satd );
 
     PREP_DIFF;
     vec_s16_t diff0v, diff1v, diff2v, diff3v,
@@ -271,7 +271,7 @@ static int pixel_satd_8x4_altivec( uint8_t *pix1, int i_pix1,
 static int pixel_satd_8x8_altivec( uint8_t *pix1, int i_pix1,
                                    uint8_t *pix2, int i_pix2 )
 {
-    DECLARE_ALIGNED_16( int i_satd );
+    ALIGNED_16( int i_satd );
 
     PREP_DIFF;
     vec_s16_t diff0v, diff1v, diff2v, diff3v,
@@ -331,7 +331,7 @@ static int pixel_satd_8x8_altivec( uint8_t *pix1, int i_pix1,
 static int pixel_satd_8x16_altivec( uint8_t *pix1, int i_pix1,
                                     uint8_t *pix2, int i_pix2 )
 {
-    DECLARE_ALIGNED_16( int i_satd );
+    ALIGNED_16( int i_satd );
 
     PREP_DIFF;
     vec_s16_t diff0v, diff1v, diff2v, diff3v,
@@ -415,7 +415,7 @@ static int pixel_satd_8x16_altivec( uint8_t *pix1, int i_pix1,
 static int pixel_satd_16x8_altivec( uint8_t *pix1, int i_pix1,
                                     uint8_t *pix2, int i_pix2 )
 {
-    DECLARE_ALIGNED_16( int i_satd );
+    ALIGNED_16( int i_satd );
 
     LOAD_ZERO;
     PREP_LOAD;
@@ -499,7 +499,7 @@ static int pixel_satd_16x8_altivec( uint8_t *pix1, int i_pix1,
 static int pixel_satd_16x16_altivec( uint8_t *pix1, int i_pix1,
                                      uint8_t *pix2, int i_pix2 )
 {
-    DECLARE_ALIGNED_16( int i_satd );
+    ALIGNED_16( int i_satd );
 
     LOAD_ZERO;
     PREP_LOAD;
@@ -630,10 +630,10 @@ static void pixel_sad_x4_16x16_altivec( uint8_t *fenc,
                                         uint8_t *pix2, uint8_t *pix3,
                                         int i_stride, int scores[4] )
 {
-    DECLARE_ALIGNED_16( int sum0 );
-    DECLARE_ALIGNED_16( int sum1 );
-    DECLARE_ALIGNED_16( int sum2 );
-    DECLARE_ALIGNED_16( int sum3 );
+    ALIGNED_16( int sum0 );
+    ALIGNED_16( int sum1 );
+    ALIGNED_16( int sum2 );
+    ALIGNED_16( int sum3 );
     int y;
 
     LOAD_ZERO;
@@ -751,9 +751,9 @@ static void pixel_sad_x3_16x16_altivec( uint8_t *fenc, uint8_t *pix0,
                                         uint8_t *pix1, uint8_t *pix2,
                                         int i_stride, int scores[3] )
 {
-    DECLARE_ALIGNED_16( int sum0 );
-    DECLARE_ALIGNED_16( int sum1 );
-    DECLARE_ALIGNED_16( int sum2 );
+    ALIGNED_16( int sum0 );
+    ALIGNED_16( int sum1 );
+    ALIGNED_16( int sum2 );
     int y;
 
     LOAD_ZERO;
@@ -846,10 +846,10 @@ static void pixel_sad_x3_16x16_altivec( uint8_t *fenc, uint8_t *pix0,
 
 static void pixel_sad_x4_16x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] )
 {
-    DECLARE_ALIGNED_16( int sum0 );
-    DECLARE_ALIGNED_16( int sum1 );
-    DECLARE_ALIGNED_16( int sum2 );
-    DECLARE_ALIGNED_16( int sum3 );
+    ALIGNED_16( int sum0 );
+    ALIGNED_16( int sum1 );
+    ALIGNED_16( int sum2 );
+    ALIGNED_16( int sum3 );
     int y;
 
     LOAD_ZERO;
@@ -964,9 +964,9 @@ static void pixel_sad_x3_16x8_altivec( uint8_t *fenc, uint8_t *pix0,
                                        uint8_t *pix1, uint8_t *pix2,
                                        int i_stride, int scores[3] )
 {
-    DECLARE_ALIGNED_16( int sum0 );
-    DECLARE_ALIGNED_16( int sum1 );
-    DECLARE_ALIGNED_16( int sum2 );
+    ALIGNED_16( int sum0 );
+    ALIGNED_16( int sum1 );
+    ALIGNED_16( int sum2 );
     int y;
 
     LOAD_ZERO;
@@ -1062,10 +1062,10 @@ static void pixel_sad_x4_8x16_altivec( uint8_t *fenc,
                                        uint8_t *pix2, uint8_t *pix3,
                                        int i_stride, int scores[4] )
 {
-    DECLARE_ALIGNED_16( int sum0 );
-    DECLARE_ALIGNED_16( int sum1 );
-    DECLARE_ALIGNED_16( int sum2 );
-    DECLARE_ALIGNED_16( int sum3 );
+    ALIGNED_16( int sum0 );
+    ALIGNED_16( int sum1 );
+    ALIGNED_16( int sum2 );
+    ALIGNED_16( int sum3 );
     int y;
 
     LOAD_ZERO;
@@ -1183,9 +1183,9 @@ static void pixel_sad_x3_8x16_altivec( uint8_t *fenc, uint8_t *pix0,
                                        uint8_t *pix1, uint8_t *pix2,
                                        int i_stride, int scores[3] )
 {
-    DECLARE_ALIGNED_16( int sum0 );
-    DECLARE_ALIGNED_16( int sum1 );
-    DECLARE_ALIGNED_16( int sum2 );
+    ALIGNED_16( int sum0 );
+    ALIGNED_16( int sum1 );
+    ALIGNED_16( int sum2 );
     int y;
 
     LOAD_ZERO;
@@ -1283,10 +1283,10 @@ static void pixel_sad_x4_8x8_altivec( uint8_t *fenc,
                                       uint8_t *pix2, uint8_t *pix3,
                                       int i_stride, int scores[4] )
 {
-    DECLARE_ALIGNED_16( int sum0 );
-    DECLARE_ALIGNED_16( int sum1 );
-    DECLARE_ALIGNED_16( int sum2 );
-    DECLARE_ALIGNED_16( int sum3 );
+    ALIGNED_16( int sum0 );
+    ALIGNED_16( int sum1 );
+    ALIGNED_16( int sum2 );
+    ALIGNED_16( int sum3 );
     int y;
 
     LOAD_ZERO;
@@ -1404,9 +1404,9 @@ static void pixel_sad_x3_8x8_altivec( uint8_t *fenc, uint8_t *pix0,
                                       uint8_t *pix1, uint8_t *pix2,
                                       int i_stride, int scores[3] )
 {
-    DECLARE_ALIGNED_16( int sum0 );
-    DECLARE_ALIGNED_16( int sum1 );
-    DECLARE_ALIGNED_16( int sum2 );
+    ALIGNED_16( int sum0 );
+    ALIGNED_16( int sum1 );
+    ALIGNED_16( int sum2 );
     int y;
 
     LOAD_ZERO;
@@ -1506,7 +1506,7 @@ static void pixel_sad_x3_8x8_altivec( uint8_t *fenc, uint8_t *pix0,
 static int pixel_ssd_16x16_altivec ( uint8_t *pix1, int i_stride_pix1,
                                      uint8_t *pix2, int i_stride_pix2)
 {
-    DECLARE_ALIGNED_16( int sum );
+    ALIGNED_16( int sum );
 
     int y;
     LOAD_ZERO;
@@ -1586,7 +1586,7 @@ static int pixel_ssd_16x16_altivec ( uint8_t *pix1, int i_stride_pix1,
 static int pixel_ssd_8x8_altivec ( uint8_t *pix1, int i_stride_pix1,
                                    uint8_t *pix2, int i_stride_pix2)
 {
-    DECLARE_ALIGNED_16( int sum );
+    ALIGNED_16( int sum );
 
     int y;
     LOAD_ZERO;
@@ -1638,8 +1638,8 @@ static int pixel_ssd_8x8_altivec ( uint8_t *pix1, int i_stride_pix1,
  ****************************************************************************/
 static int x264_pixel_var_16x16_altivec( uint8_t *pix, int i_stride )
 {
-    DECLARE_ALIGNED_16(uint32_t sum_tab[4]);
-    DECLARE_ALIGNED_16(uint32_t sqr_tab[4]);
+    ALIGNED_16(uint32_t sum_tab[4]);
+    ALIGNED_16(uint32_t sqr_tab[4]);
 
     LOAD_ZERO;
     vec_u32_t sqr_v = zero_u32v;
@@ -1667,8 +1667,8 @@ static int x264_pixel_var_16x16_altivec( uint8_t *pix, int i_stride )
 
 static int x264_pixel_var_8x8_altivec( uint8_t *pix, int i_stride )
 {
-    DECLARE_ALIGNED_16(uint32_t sum_tab[4]);
-    DECLARE_ALIGNED_16(uint32_t sqr_tab[4]);
+    ALIGNED_16(uint32_t sum_tab[4]);
+    ALIGNED_16(uint32_t sqr_tab[4]);
 
     LOAD_ZERO;
     vec_u32_t sqr_v = zero_u32v;
@@ -1870,8 +1870,8 @@ static int pixel_sa8d_16x16_altivec( uint8_t *pix1, int i_pix1,
 
 static uint64_t pixel_hadamard_ac_altivec( uint8_t *pix, int stride, const vec_u8_t perm )
 {
-    DECLARE_ALIGNED_16( int32_t sum4_tab[4] );
-    DECLARE_ALIGNED_16( int32_t sum8_tab[4] );
+    ALIGNED_16( int32_t sum4_tab[4] );
+    ALIGNED_16( int32_t sum8_tab[4] );
     LOAD_ZERO;
 
     VEC_LOAD_HIGH( pix, 0 );
@@ -1937,7 +1937,7 @@ static uint64_t pixel_hadamard_ac_altivec( uint8_t *pix, int stride, const vec_u
 
     int sum8 = sum8_tab[3];
 
-    DECLARE_ALIGNED_16( int16_t tmp0_4_tab[8] );
+    ALIGNED_16( int16_t tmp0_4_tab[8] );
     vec_ste(vec_add(pix16_d0, pix16_d4), 0, tmp0_4_tab);
 
     sum4 -= tmp0_4_tab[0];
@@ -1997,7 +1997,7 @@ static void ssim_4x4x2_core_altivec( const uint8_t *pix1, int stride1,
                                      const uint8_t *pix2, int stride2,
                                      int sums[2][4] )
 {
-    DECLARE_ALIGNED_16( int temp[4] );
+    ALIGNED_16( int temp[4] );
 
     int y;
     vec_u8_t pix1v, pix2v;
index 0248038640c588be5e2f0e7e7eecfa9be52df4d0..5cfa6fd4a33486dcfe2e5f399bba92cf094c186e 100644 (file)
@@ -75,9 +75,9 @@ extern void predict_16x16_dc_left_core_sse2( uint8_t *src, int i_dc_left );
 extern void predict_16x16_v_sse2( uint8_t *src );
 extern void predict_16x16_p_core_sse2( uint8_t *src, int i00, int b, int c );
 
-DECLARE_ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};
-DECLARE_ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
-DECLARE_ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
+ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};
+ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
+ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
 
 #define PREDICT_P_SUM(j,i)\
     H += i * ( src[j+i - FDEC_STRIDE ]  - src[j-i - FDEC_STRIDE ] );\
@@ -332,7 +332,7 @@ void x264_intra_sa8d_x3_8x8_##cpu( uint8_t *fenc, uint8_t edge[33], int res[3] )
     PREDICT_8x8_LOAD_TOP\
     PREDICT_8x8_LOAD_LEFT\
     int t;\
-    DECLARE_ALIGNED_16( int16_t sa8d_1d[2][8] );\
+    ALIGNED_16( int16_t sa8d_1d[2][8] );\
     SUMSUB(l0,l4,l1,l5,l2,l6,l3,l7);\
     SUMSUB(l0,l2,l1,l3,l4,l6,l5,l7);\
     SUMSUB(l0,l1,l2,l3,l4,l5,l6,l7);\
index a566edcf0e9a173b6eddca9625906da4159a4cfb..c61bc25e7c5f47083d5946469a0015e8ae98f990 100644 (file)
@@ -47,7 +47,7 @@ typedef struct
     /* 8x8 */
     int       i_cost8x8;
     /* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3] */
-    DECLARE_ALIGNED_4( int16_t mvc[32][5][2] );
+    ALIGNED_4( int16_t mvc[32][5][2] );
     x264_me_t me8x8[4];
 
     /* Sub 4x4 */
@@ -540,9 +540,9 @@ static void predict_4x4_mode_available( unsigned int i_neighbour,
 /* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */
 static void inline x264_psy_trellis_init( x264_t *h, int do_both_dct )
 {
-    DECLARE_ALIGNED_16( int16_t dct8x8[4][8][8] );
-    DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
-    DECLARE_ALIGNED_16( static uint8_t zero[16*FDEC_STRIDE] ) = {0};
+    ALIGNED_ARRAY_16( int16_t, dct8x8,[4],[8][8] );
+    ALIGNED_ARRAY_16( int16_t, dct4x4,[16],[4][4] );
+    ALIGNED_16( static uint8_t zero[16*FDEC_STRIDE] ) = {0};
     int i;
 
     if( do_both_dct || h->mb.b_transform_8x8 )
@@ -562,7 +562,7 @@ static void inline x264_psy_trellis_init( x264_t *h, int do_both_dct )
 /* Pre-calculate fenc satd scores for psy RD, minus DC coefficients */
 static inline void x264_mb_cache_fenc_satd( x264_t *h )
 {
-    DECLARE_ALIGNED_16( static uint8_t zero[16] ) = {0};
+    ALIGNED_16( static uint8_t zero[16] ) = {0};
     uint8_t *fenc;
     int x, y, satd_sum = 0, sa8d_sum = 0;
     if( h->param.analyse.i_trellis == 2 && h->mb.i_psy_trellis )
@@ -719,7 +719,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
     /* 8x8 prediction selection */
     if( flags & X264_ANALYSE_I8x8 )
     {
-        DECLARE_ALIGNED_16( uint8_t edge[33] );
+        ALIGNED_ARRAY_16( uint8_t, edge,[33] );
         x264_pixel_cmp_t sa8d = (h->pixf.mbcmp[0] == h->pixf.satd[0]) ? h->pixf.sa8d[PIXEL_8x8] : h->pixf.mbcmp[PIXEL_8x8];
         int i_satd_thresh = a->i_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 );
         int i_cost = 0;
@@ -1044,7 +1044,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
     }
     else if( h->mb.i_type == I_8x8 )
     {
-        DECLARE_ALIGNED_16( uint8_t edge[33] );
+        ALIGNED_ARRAY_16( uint8_t, edge,[33] );
         for( idx = 0; idx < 4; idx++ )
         {
             uint64_t pels_h = 0;
@@ -1125,7 +1125,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
 {
     x264_me_t m;
     int i_ref, i_mvc;
-    DECLARE_ALIGNED_4( int16_t mvc[8][2] );
+    ALIGNED_4( int16_t mvc[8][2] );
     int i_halfpel_thresh = INT_MAX;
     int *p_halfpel_thresh = h->mb.pic.i_fref[0]>1 ? &i_halfpel_thresh : NULL;
 
@@ -1322,7 +1322,7 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
 {
     x264_me_t m;
     uint8_t  **p_fenc = h->mb.pic.p_fenc;
-    DECLARE_ALIGNED_4( int16_t mvc[3][2] );
+    ALIGNED_4( int16_t mvc[3][2] );
     int i, j;
 
     /* XXX Needed for x264_mb_predict_mv */
@@ -1372,7 +1372,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
 {
     x264_me_t m;
     uint8_t  **p_fenc = h->mb.pic.p_fenc;
-    DECLARE_ALIGNED_4( int16_t mvc[3][2] );
+    ALIGNED_4( int16_t mvc[3][2] );
     int i, j;
 
     /* XXX Needed for x264_mb_predict_mv */
@@ -1419,7 +1419,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
 
 static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, uint8_t **p_fref, int i8x8, int pixel )
 {
-    DECLARE_ALIGNED_8( uint8_t pix1[16*8] );
+    ALIGNED_8( uint8_t pix1[16*8] );
     uint8_t *pix2 = pix1+8;
     const int i_stride = h->mb.pic.i_stride[1];
     const int or = 4*(i8x8&1) + 2*(i8x8&2)*i_stride;
@@ -1595,14 +1595,14 @@ static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
 
 static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
 {
-    DECLARE_ALIGNED_16( uint8_t pix0[16*16] );
-    DECLARE_ALIGNED_16( uint8_t pix1[16*16] );
+    ALIGNED_ARRAY_16( uint8_t, pix0,[16*16] );
+    ALIGNED_ARRAY_16( uint8_t, pix1,[16*16] );
     uint8_t *src0, *src1;
     int stride0 = 16, stride1 = 16;
 
     x264_me_t m;
     int i_ref, i_mvc;
-    DECLARE_ALIGNED_4( int16_t mvc[9][2] );
+    ALIGNED_4( int16_t mvc[9][2] );
     int i_halfpel_thresh = INT_MAX;
     int *p_halfpel_thresh = h->mb.pic.i_fref[0]>1 ? &i_halfpel_thresh : NULL;
 
@@ -1779,7 +1779,7 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
     uint8_t **p_fref[2] =
         { h->mb.pic.p_fref[0][a->l0.i_ref],
           h->mb.pic.p_fref[1][a->l1.i_ref] };
-    DECLARE_ALIGNED_8( uint8_t pix[2][8*8] );
+    ALIGNED_8( uint8_t pix[2][8*8] );
     int i, l;
 
     /* XXX Needed for x264_mb_predict_mv */
@@ -1844,8 +1844,8 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
     uint8_t **p_fref[2] =
         { h->mb.pic.p_fref[0][a->l0.i_ref],
           h->mb.pic.p_fref[1][a->l1.i_ref] };
-    DECLARE_ALIGNED_16( uint8_t pix[2][16*8] );
-    DECLARE_ALIGNED_4( int16_t mvc[2][2] );
+    ALIGNED_ARRAY_16( uint8_t, pix,[2],[16*8] );
+    ALIGNED_4( int16_t mvc[2][2] );
     int i, l;
 
     h->mb.i_partition = D_16x8;
@@ -1914,8 +1914,8 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
     uint8_t **p_fref[2] =
         { h->mb.pic.p_fref[0][a->l0.i_ref],
           h->mb.pic.p_fref[1][a->l1.i_ref] };
-    DECLARE_ALIGNED_8( uint8_t pix[2][8*16] );
-    DECLARE_ALIGNED_4( int16_t mvc[2][2] );
+    ALIGNED_8( uint8_t pix[2][8*16] );
+    ALIGNED_4( int16_t mvc[2][2] );
     int i, l;
 
     h->mb.i_partition = D_8x16;
index 61d39d1a185d5c618b8c37a55f9c4924c208d3b6..751c52f6b5e0154b424cd8be8e45927deb935d45 100644 (file)
@@ -412,7 +412,7 @@ static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_lis
 
 static NOINLINE uint32_t x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width )
 {
-    DECLARE_ALIGNED_4( int16_t mvp[2] );
+    ALIGNED_4( int16_t mvp[2] );
     uint32_t amvd;
     int mdx, mdy;
 
index 0d88bfc6cc330db915a1d7fc0d22c78bc9b8af89..2ad05429683a815b5063190263ec164e045ee3ea 100644 (file)
@@ -227,7 +227,7 @@ static void cavlc_qp_delta( x264_t *h, bs_t *s )
 
 static void cavlc_mb_mvd( x264_t *h, bs_t *s, int i_list, int idx, int width )
 {
-    DECLARE_ALIGNED_4( int16_t mvp[2] );
+    ALIGNED_4( int16_t mvp[2] );
     x264_mb_predict_mv( h, i_list, idx, width, mvp );
     bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0] );
     bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1] );
index 47f0f94353bfbb0548d8aff7c91f53cc7f136849..60e14b8fd3c2732acf7d8cdffb85919142455fc9 100644 (file)
@@ -130,7 +130,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp )
     int nz;
     uint8_t *p_src = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[idx]];
     uint8_t *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[idx]];
-    DECLARE_ALIGNED_16( int16_t dct4x4[4][4] );
+    ALIGNED_ARRAY_16( int16_t, dct4x4,[4],[4] );
 
     if( h->mb.b_lossless )
     {
@@ -166,7 +166,7 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
     int nz;
     uint8_t *p_src = &h->mb.pic.p_fenc[0][x+y*FENC_STRIDE];
     uint8_t *p_dst = &h->mb.pic.p_fdec[0][x+y*FDEC_STRIDE];
-    DECLARE_ALIGNED_16( int16_t dct8x8[8][8] );
+    ALIGNED_ARRAY_16( int16_t, dct8x8,[8],[8] );
 
     if( h->mb.b_lossless )
     {
@@ -196,8 +196,8 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
     uint8_t  *p_src = h->mb.pic.p_fenc[0];
     uint8_t  *p_dst = h->mb.pic.p_fdec[0];
 
-    DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
-    DECLARE_ALIGNED_16( int16_t dct_dc4x4[4][4] );
+    ALIGNED_ARRAY_16( int16_t, dct4x4,[16],[4][4] );
+    ALIGNED_ARRAY_16( int16_t, dct_dc4x4,[4],[4] );
 
     int i, nz;
     int b_decimate = h->sh.i_type == SLICE_TYPE_B || (h->param.analyse.b_dct_decimate && h->sh.i_type == SLICE_TYPE_P);
@@ -280,7 +280,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
 {
     int i, ch, nz, nz_dc;
     int b_decimate = b_inter && (h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate);
-    DECLARE_ALIGNED_16( int16_t dct2x2[2][2]  );
+    ALIGNED_ARRAY_16( int16_t, dct2x2,[2],[2] );
     h->mb.i_cbp_chroma = 0;
 
     /* Early termination: check variance of chroma residual before encoding.
@@ -336,7 +336,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
         int i_decimate_score = 0;
         int nz_ac = 0;
 
-        DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
+        ALIGNED_ARRAY_16( int16_t, dct4x4,[4],[4][4] );
 
         if( h->mb.b_lossless )
         {
@@ -579,7 +579,7 @@ void x264_macroblock_encode( x264_t *h )
     }
     else if( h->mb.i_type == I_8x8 )
     {
-        DECLARE_ALIGNED_16( uint8_t edge[33] );
+        ALIGNED_ARRAY_16( uint8_t, edge,[33] );
         h->mb.b_transform_8x8 = 1;
         /* If we already encoded 3 of the 4 i8x8 blocks, we don't have to do them again. */
         if( h->mb.i_skip_intra )
@@ -674,7 +674,7 @@ void x264_macroblock_encode( x264_t *h )
         }
         else if( h->mb.b_transform_8x8 )
         {
-            DECLARE_ALIGNED_16( int16_t dct8x8[4][8][8] );
+            ALIGNED_ARRAY_16( int16_t, dct8x8,[4],[8][8] );
             b_decimate &= !h->mb.b_trellis; // 8x8 trellis is inherently optimal decimation
             h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
             h->nr_count[1] += h->mb.b_noise_reduction * 4;
@@ -725,7 +725,7 @@ void x264_macroblock_encode( x264_t *h )
         }
         else
         {
-            DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
+            ALIGNED_ARRAY_16( int16_t, dct4x4,[16],[4][4] );
             h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
             h->nr_count[0] += h->mb.b_noise_reduction * 16;
 
@@ -844,9 +844,9 @@ void x264_macroblock_encode( x264_t *h )
  *****************************************************************************/
 int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
 {
-    DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
-    DECLARE_ALIGNED_16( int16_t dct2x2[2][2] );
-    DECLARE_ALIGNED_16( int16_t dctscan[16] );
+    ALIGNED_ARRAY_16( int16_t, dct4x4,[4],[4][4] );
+    ALIGNED_ARRAY_16( int16_t, dct2x2,[2],[2] );
+    ALIGNED_ARRAY_16( int16_t, dctscan,[16] );
 
     int i_qp = h->mb.i_qp;
     int mvp[2];
@@ -1012,7 +1012,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
     {
         if( h->mb.b_transform_8x8 )
         {
-            DECLARE_ALIGNED_16( int16_t dct8x8[8][8] );
+            ALIGNED_ARRAY_16( int16_t, dct8x8,[8],[8] );
             h->dctf.sub8x8_dct8( dct8x8, p_fenc, p_fdec );
             nnz8x8 = x264_quant_8x8( h, dct8x8, i_qp, 0, i8 );
             if( nnz8x8 )
@@ -1038,7 +1038,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
         {
             int i4;
             int i_decimate_8x8 = 0;
-            DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
+            ALIGNED_ARRAY_16( int16_t, dct4x4,[4],[4][4] );
             h->dctf.sub8x8_dct( dct4x4, p_fenc, p_fdec );
             for( i4 = 0; i4 < 4; i4++ )
             {
@@ -1067,7 +1067,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
 
         for( ch = 0; ch < 2; ch++ )
         {
-            DECLARE_ALIGNED_16( int16_t dct4x4[4][4] );
+            ALIGNED_ARRAY_16( int16_t, dct4x4,[4],[4] );
             p_fenc = h->mb.pic.p_fenc[1+ch] + (i8&1)*4 + (i8>>1)*4*FENC_STRIDE;
             p_fdec = h->mb.pic.p_fdec[1+ch] + (i8&1)*4 + (i8>>1)*4*FDEC_STRIDE;
 
@@ -1115,7 +1115,7 @@ void x264_macroblock_encode_p4x4( x264_t *h, int i4 )
     }
     else
     {
-        DECLARE_ALIGNED_16( int16_t dct4x4[4][4] );
+        ALIGNED_ARRAY_16( int16_t, dct4x4,[4],[4] );
         h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
         nz = x264_quant_4x4( h, dct4x4, i_qp, DCT_LUMA_4x4, 0, i4 );
         h->mb.cache.non_zero_count[x264_scan8[i4]] = nz;
index 969e1a9ecffc7cebbb42d83abd290b5260540830..db18271122318e91378db2d0b89a778a645bc9d9 100644 (file)
@@ -182,7 +182,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
     int omx, omy, pmx, pmy;
     uint8_t *p_fenc = m->p_fenc[0];
     uint8_t *p_fref = m->p_fref[0];
-    DECLARE_ALIGNED_16( uint8_t pix[16*16] );
+    ALIGNED_ARRAY_16( uint8_t, pix,[16*16] );
 
     int i, j;
     int dir;
@@ -563,8 +563,8 @@ me_hex2:
             uint16_t *sums_base = m->integral;
             /* due to a GCC bug on some platforms (win32?), zero[] may not actually be aligned.
              * this is not a problem because it is not used for any SSE instructions. */
-            DECLARE_ALIGNED_16( static uint8_t zero[8*FENC_STRIDE] );
-            DECLARE_ALIGNED_16( int enc_dc[4] );
+            ALIGNED_16( static uint8_t zero[8*FENC_STRIDE] );
+            ALIGNED_ARRAY_16( int, enc_dc,[4] );
             int sad_size = i_pixel <= PIXEL_8x8 ? PIXEL_8x8 : PIXEL_4x4;
             int delta = x264_pixel_size[sad_size].w;
             int16_t *xs = h->scratch_buffer;
@@ -777,7 +777,7 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
     const int i_pixel = m->i_pixel;
     const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
 
-    DECLARE_ALIGNED_16( uint8_t pix[2][32*18] ); // really 17x17, but round up for alignment
+    ALIGNED_ARRAY_16( uint8_t, pix,[2],[32*18] );   // really 17x17, but round up for alignment
     int omx, omy;
     int i;
 
@@ -950,9 +950,9 @@ static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_m
     const int16_t *p_cost_m0y = m0->p_cost_mv - m0->mvp[1];
     const int16_t *p_cost_m1x = m1->p_cost_mv - m1->mvp[0];
     const int16_t *p_cost_m1y = m1->p_cost_mv - m1->mvp[1];
-    DECLARE_ALIGNED_16( uint8_t pixy_buf[2][9][16*16] );
-    DECLARE_ALIGNED_8( uint8_t pixu_buf[2][9][8*8] );
-    DECLARE_ALIGNED_8( uint8_t pixv_buf[2][9][8*8] );
+    ALIGNED_ARRAY_16( uint8_t, pixy_buf,[2],[9][16*16] );
+    ALIGNED_8( uint8_t pixu_buf[2][9][8*8] );
+    ALIGNED_8( uint8_t pixv_buf[2][9][8*8] );
     uint8_t *src0[9];
     uint8_t *src1[9];
     uint8_t *pix  = &h->mb.pic.p_fdec[0][(i8>>1)*8*FDEC_STRIDE+(i8&1)*8];
@@ -972,7 +972,7 @@ static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_m
     int mc_list0 = 1, mc_list1 = 1;
     uint64_t bcostrd = COST_MAX64;
     /* each byte of visited represents 8 possible m1y positions, so a 4D array isn't needed */
-    DECLARE_ALIGNED_16( uint8_t visited[8][8][8] );
+    ALIGNED_ARRAY_16( uint8_t, visited,[8],[8][8] );
     /* all permutations of an offset in up to 2 of the dimensions */
     static const int8_t dia4d[32][4] = {
         {0,0,0,1}, {0,0,0,-1}, {0,0,1,0}, {0,0,-1,0},
@@ -989,7 +989,7 @@ static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_m
         bm0y > h->mb.mv_max_spel[1] - 8 || bm1y > h->mb.mv_max_spel[1] - 8 )
         return;
 
-    h->mc.memzero_aligned( visited, sizeof(visited) );
+    h->mc.memzero_aligned( visited, sizeof(uint8_t[8][8][8]) );
 
     BIME_CACHE( 0, 0, 0 );
     BIME_CACHE( 0, 0, 1 );
@@ -1082,7 +1082,7 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int
     const int bh = x264_pixel_size[m->i_pixel].h>>2;
     const int i_pixel = m->i_pixel;
 
-    DECLARE_ALIGNED_16( uint8_t pix[16*16] );
+    ALIGNED_ARRAY_16( uint8_t, pix,[16*16] );
     uint64_t bcost = m->i_pixel == PIXEL_16x16 ? m->cost : COST_MAX64;
     int bmx = m->mv[0];
     int bmy = m->mv[1];
index 3910f7472661917679e86995d297cc325d4dc948..24f296fe17fd4c6750660b49c050422bf2944bb3 100644 (file)
@@ -40,13 +40,13 @@ typedef struct
     uint16_t *integral;
     int      i_stride[2];
 
-    DECLARE_ALIGNED_4( int16_t mvp[2] );
+    ALIGNED_4( int16_t mvp[2] );
 
     /* output */
     int cost_mv;        /* lambda * nbits for the chosen mv */
     int cost;           /* satd + lambda * nbits */
-    DECLARE_ALIGNED_4( int16_t mv[2] );
-} DECLARE_ALIGNED_16( x264_me_t );
+    ALIGNED_4( int16_t mv[2] );
+} ALIGNED_16( x264_me_t );
 
 typedef struct {
     int sad;
index dd5d5fe2dfab2ecda9eb5cb8f62d18143a1a5c1a..453ccb5b38a1ce6c4014fa3967b038001a47a5c7 100644 (file)
@@ -104,7 +104,7 @@ static inline int sum_sa8d( x264_t *h, int pixel, int x, int y )
 
 static inline int ssd_plane( x264_t *h, int size, int p, int x, int y )
 {
-    DECLARE_ALIGNED_16(static uint8_t zero[16]);
+    ALIGNED_16(static uint8_t zero[16]);
     int satd = 0;
     uint8_t *fdec = h->mb.pic.p_fdec[p] + x + y*FDEC_STRIDE;
     uint8_t *fenc = h->mb.pic.p_fenc[p] + x + y*FENC_STRIDE;
index ba04b1e4b6c1a7fb54f6bd2b0bb02cf1009535b9..9c8c127bda5470f1c79c50e5b722c4ca4fdcaebb 100644 (file)
@@ -66,7 +66,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
     int16_t (*fenc_mvs[2])[2] = { &frames[b]->lowres_mvs[0][b-p0-1][i_mb_xy], &frames[b]->lowres_mvs[1][p1-b-1][i_mb_xy] };
     int (*fenc_costs[2]) = { &frames[b]->lowres_mv_costs[0][b-p0-1][i_mb_xy], &frames[b]->lowres_mv_costs[1][p1-b-1][i_mb_xy] };
 
-    DECLARE_ALIGNED_8( uint8_t pix1[9*FDEC_STRIDE] );
+    ALIGNED_8( uint8_t pix1[9*FDEC_STRIDE] );
     uint8_t *pix2 = pix1+8;
     x264_me_t m[2];
     int i_bcost = COST_MAX;
@@ -156,7 +156,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
         {
             int i_mvc = 0;
             int16_t (*fenc_mv)[2] = fenc_mvs[l];
-            DECLARE_ALIGNED_4( int16_t mvc[4][2] );
+            ALIGNED_4( int16_t mvc[4][2] );
 
             /* Reverse-order MV prediction. */
             *(uint32_t*)mvc[0] = 0;
@@ -204,7 +204,7 @@ lowres_intra_mb:
         int i_icost, b_intra;
         if( !fenc->b_intra_calculated )
         {
-            DECLARE_ALIGNED_16( uint8_t edge[33] );
+            ALIGNED_ARRAY_16( uint8_t, edge,[33] );
             uint8_t *pix = &pix1[8+FDEC_STRIDE - 1];
             uint8_t *src = &fenc->lowres[0][i_pel_offset - 1];
             const int intra_penalty = 5;
index 36fd7340f823dbd0e08e93ab955fb5d564de8e8b..291c162162c271a7aa7217f73ed4abe0389df7be 100644 (file)
@@ -32,8 +32,8 @@
 
 // GCC doesn't align stack variables on ARM, so use .bss
 #ifdef ARCH_ARM
-#undef DECLARE_ALIGNED_16
-#define DECLARE_ALIGNED_16( var ) DECLARE_ALIGNED( static var, 16 )
+#undef ALIGNED_16
+#define ALIGNED_16( var ) DECLARE_ALIGNED( static var, 16 )
 #endif
 
 /* buf1, buf2: initialised to random data and shouldn't write into them */
@@ -236,7 +236,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
     x264_predict_t predict_4x4[9+3];
     x264_predict8x8_t predict_8x8[9+3];
     x264_predict_8x8_filter_t predict_8x8_filter;
-    DECLARE_ALIGNED_16( uint8_t edge[33] );
+    ALIGNED_16( uint8_t edge[33] );
     uint16_t cost_mv[32];
     int ret = 0, ok, used_asm;
     int i, j;
@@ -438,7 +438,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
         pixel_asm.ssim_end4 != pixel_ref.ssim_end4 )
     {
         float res_c, res_a;
-        DECLARE_ALIGNED_16( int sums[5][4] ) = {{0}};
+        ALIGNED_16( int sums[5][4] ) = {{0}};
         used_asm = ok = 1;
         x264_emms();
         res_c = x264_pixel_ssim_wxh( &pixel_c,   buf1+2, 32, buf2+2, 32, 32, 28, buf3 );
@@ -463,8 +463,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
     for( i=0; i<100 && ok; i++ )
         if( pixel_asm.ads[i&3] != pixel_ref.ads[i&3] )
         {
-            DECLARE_ALIGNED_16( uint16_t sums[72] );
-            DECLARE_ALIGNED_16( int dc[4] );
+            ALIGNED_16( uint16_t sums[72] );
+            ALIGNED_16( int dc[4] );
             int16_t mvs_a[32], mvs_c[32];
             int mvn_a, mvn_c;
             int thresh = rand() & 0x3fff;
@@ -500,11 +500,11 @@ static int check_dct( int cpu_ref, int cpu_new )
     x264_dct_function_t dct_asm;
     x264_quant_function_t qf;
     int ret = 0, ok, used_asm, i, j, interlace;
-    DECLARE_ALIGNED_16( int16_t dct1[16][4][4] );
-    DECLARE_ALIGNED_16( int16_t dct2[16][4][4] );
-    DECLARE_ALIGNED_16( int16_t dct4[16][4][4] );
-    DECLARE_ALIGNED_16( int16_t dct8[4][8][8] );
-    DECLARE_ALIGNED_8( int16_t dctdc[2][2][2] );
+    ALIGNED_16( int16_t dct1[16][4][4] );
+    ALIGNED_16( int16_t dct2[16][4][4] );
+    ALIGNED_16( int16_t dct4[16][4][4] );
+    ALIGNED_16( int16_t dct8[4][8][8] );
+    ALIGNED_8( int16_t dctdc[2][2][2] );
     x264_t h_buf;
     x264_t *h = &h_buf;
 
@@ -629,8 +629,8 @@ static int check_dct( int cpu_ref, int cpu_new )
     x264_zigzag_function_t zigzag_ref;
     x264_zigzag_function_t zigzag_asm;
 
-    DECLARE_ALIGNED_16( int16_t level1[64] );
-    DECLARE_ALIGNED_16( int16_t level2[64] );
+    ALIGNED_16( int16_t level1[64] );
+    ALIGNED_16( int16_t level2[64] );
 
 #define TEST_ZIGZAG_SCAN( name, t1, t2, dct, size )   \
     if( zigzag_asm.name != zigzag_ref.name ) \
@@ -1066,9 +1066,9 @@ static int check_quant( int cpu_ref, int cpu_new )
     x264_quant_function_t qf_c;
     x264_quant_function_t qf_ref;
     x264_quant_function_t qf_a;
-    DECLARE_ALIGNED_16( int16_t dct1[64] );
-    DECLARE_ALIGNED_16( int16_t dct2[64] );
-    DECLARE_ALIGNED_16( uint8_t cqm_buf[64] );
+    ALIGNED_16( int16_t dct1[64] );
+    ALIGNED_16( int16_t dct2[64] );
+    ALIGNED_16( uint8_t cqm_buf[64] );
     int ret = 0, ok, used_asm;
     int oks[2] = {1,1}, used_asms[2] = {0,0};
     int i, j, i_cqm, qp;
@@ -1382,8 +1382,8 @@ static int check_intra( int cpu_ref, int cpu_new )
 {
     int ret = 0, ok = 1, used_asm = 0;
     int i;
-    DECLARE_ALIGNED_16( uint8_t edge[33] );
-    DECLARE_ALIGNED_16( uint8_t edge2[33] );
+    ALIGNED_16( uint8_t edge[33] );
+    ALIGNED_16( uint8_t edge2[33] );
     struct
     {
         x264_predict_t      predict_16x16[4+3];