]> git.sesse.net Git - x264/blobdiff - common/ppc/quant.c
Massive cosmetic and syntax cleanup
[x264] / common / ppc / quant.c
index d1d9d72ab68a4670f69dad86ca7237f2caf51e33..4b2825cfa32c90422ea58072e6236f06d83a1ab6 100644 (file)
 #include "quant.h"            
 
 // quant of a whole 4x4 block, unrolled 2x and "pre-scheduled"
-#define QUANT_16_U( idx0, idx1 )                                             \
-temp1v = vec_ld((idx0), *dct);                                               \
-temp2v = vec_ld((idx1), *dct);                                               \
-mfvA = vec_ld((idx0), mf);                                                   \
-mfvB = vec_ld((idx1), mf);                                                   \
-biasvA = vec_ld((idx0), bias);                                               \
-biasvB = vec_ld((idx1), bias);                                               \
-mskA = vec_cmplt(temp1v, zero_s16v);                                         \
-mskB = vec_cmplt(temp2v, zero_s16v);                                         \
-coefvA = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp1v), temp1v);             \
-coefvB = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp2v), temp2v);             \
-coefvA = vec_adds(coefvA, biasvA);                                           \
-coefvB = vec_adds(coefvB, biasvB);                                           \
-multEvenvA = vec_mule(coefvA, mfvA);                                         \
-multOddvA = vec_mulo(coefvA, mfvA);                                          \
-multEvenvB = vec_mule(coefvB, mfvB);                                         \
-multOddvB = vec_mulo(coefvB, mfvB);                                          \
-multEvenvA = vec_sr(multEvenvA, i_qbitsv);                                   \
-multOddvA = vec_sr(multOddvA, i_qbitsv);                                     \
-multEvenvB = vec_sr(multEvenvB, i_qbitsv);                                   \
-multOddvB = vec_sr(multOddvB, i_qbitsv);                                     \
-temp1v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA), vec_mergel(multEvenvA, multOddvA)); \
-temp2v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvB, multOddvB), vec_mergel(multEvenvB, multOddvB)); \
-temp1v = vec_xor(temp1v, mskA);                                              \
-temp2v = vec_xor(temp2v, mskB);                                              \
-temp1v = vec_adds(temp1v, vec_and(mskA, one));                               \
-vec_st(temp1v, (idx0), (int16_t*)dct);                                       \
-temp2v = vec_adds(temp2v, vec_and(mskB, one));                               \
-nz = vec_or(nz, vec_or(temp1v, temp2v));                                     \
-vec_st(temp2v, (idx1), (int16_t*)dct);
+#define QUANT_16_U( idx0, idx1 )                                    \
+{                                                                   \
+    temp1v = vec_ld((idx0), *dct);                                  \
+    temp2v = vec_ld((idx1), *dct);                                  \
+    mfvA = vec_ld((idx0), mf);                                      \
+    mfvB = vec_ld((idx1), mf);                                      \
+    biasvA = vec_ld((idx0), bias);                                  \
+    biasvB = vec_ld((idx1), bias);                                  \
+    mskA = vec_cmplt(temp1v, zero_s16v);                            \
+    mskB = vec_cmplt(temp2v, zero_s16v);                            \
+    coefvA = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp1v), temp1v);\
+    coefvB = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp2v), temp2v);\
+    coefvA = vec_adds(coefvA, biasvA);                              \
+    coefvB = vec_adds(coefvB, biasvB);                              \
+    multEvenvA = vec_mule(coefvA, mfvA);                            \
+    multOddvA = vec_mulo(coefvA, mfvA);                             \
+    multEvenvB = vec_mule(coefvB, mfvB);                            \
+    multOddvB = vec_mulo(coefvB, mfvB);                             \
+    multEvenvA = vec_sr(multEvenvA, i_qbitsv);                      \
+    multOddvA = vec_sr(multOddvA, i_qbitsv);                        \
+    multEvenvB = vec_sr(multEvenvB, i_qbitsv);                      \
+    multOddvB = vec_sr(multOddvB, i_qbitsv);                        \
+    temp1v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA), vec_mergel(multEvenvA, multOddvA)); \
+    temp2v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvB, multOddvB), vec_mergel(multEvenvB, multOddvB)); \
+    temp1v = vec_xor(temp1v, mskA);                                 \
+    temp2v = vec_xor(temp2v, mskB);                                 \
+    temp1v = vec_adds(temp1v, vec_and(mskA, one));                  \
+    vec_st(temp1v, (idx0), (int16_t*)dct);                          \
+    temp2v = vec_adds(temp2v, vec_and(mskB, one));                  \
+    nz = vec_or(nz, vec_or(temp1v, temp2v));                        \
+    vec_st(temp2v, (idx1), (int16_t*)dct);                          \
+}
                 
 int x264_quant_4x4_altivec( int16_t dct[4][4], uint16_t mf[16], uint16_t bias[16] )
 {
@@ -83,32 +85,34 @@ int x264_quant_4x4_altivec( int16_t dct[4][4], uint16_t mf[16], uint16_t bias[16
 }
 
 // DC quant of a whole 4x4 block, unrolled 2x and "pre-scheduled"
-#define QUANT_16_U_DC( idx0, idx1 )                             \
-temp1v = vec_ld((idx0), *dct);                                  \
-temp2v = vec_ld((idx1), *dct);                                  \
-mskA = vec_cmplt(temp1v, zero_s16v);                            \
-mskB = vec_cmplt(temp2v, zero_s16v);                            \
-coefvA = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp1v), temp1v);\
-coefvB = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp2v), temp2v);\
-coefvA = vec_add(coefvA, biasv);                                \
-coefvB = vec_add(coefvB, biasv);                                \
-multEvenvA = vec_mule(coefvA, mfv);                             \
-multOddvA = vec_mulo(coefvA, mfv);                              \
-multEvenvB = vec_mule(coefvB, mfv);                             \
-multOddvB = vec_mulo(coefvB, mfv);                              \
-multEvenvA = vec_sr(multEvenvA, i_qbitsv);                      \
-multOddvA = vec_sr(multOddvA, i_qbitsv);                        \
-multEvenvB = vec_sr(multEvenvB, i_qbitsv);                      \
-multOddvB = vec_sr(multOddvB, i_qbitsv);                        \
-temp1v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA), vec_mergel(multEvenvA, multOddvA)); \
-temp2v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvB, multOddvB), vec_mergel(multEvenvB, multOddvB)); \
-temp1v = vec_xor(temp1v, mskA);                                 \
-temp2v = vec_xor(temp2v, mskB);                                 \
-temp1v = vec_add(temp1v, vec_and(mskA, one));                   \
-vec_st(temp1v, (idx0), (int16_t*)dct);                          \
-temp2v = vec_add(temp2v, vec_and(mskB, one));                   \
-nz = vec_or(nz, vec_or(temp1v, temp2v));                        \
-vec_st(temp2v, (idx1), (int16_t*)dct);
+#define QUANT_16_U_DC( idx0, idx1 )                                 \
+{                                                                   \
+    temp1v = vec_ld((idx0), *dct);                                  \
+    temp2v = vec_ld((idx1), *dct);                                  \
+    mskA = vec_cmplt(temp1v, zero_s16v);                            \
+    mskB = vec_cmplt(temp2v, zero_s16v);                            \
+    coefvA = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp1v), temp1v);\
+    coefvB = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp2v), temp2v);\
+    coefvA = vec_add(coefvA, biasv);                                \
+    coefvB = vec_add(coefvB, biasv);                                \
+    multEvenvA = vec_mule(coefvA, mfv);                             \
+    multOddvA = vec_mulo(coefvA, mfv);                              \
+    multEvenvB = vec_mule(coefvB, mfv);                             \
+    multOddvB = vec_mulo(coefvB, mfv);                              \
+    multEvenvA = vec_sr(multEvenvA, i_qbitsv);                      \
+    multOddvA = vec_sr(multOddvA, i_qbitsv);                        \
+    multEvenvB = vec_sr(multEvenvB, i_qbitsv);                      \
+    multOddvB = vec_sr(multOddvB, i_qbitsv);                        \
+    temp1v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA), vec_mergel(multEvenvA, multOddvA)); \
+    temp2v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvB, multOddvB), vec_mergel(multEvenvB, multOddvB)); \
+    temp1v = vec_xor(temp1v, mskA);                                 \
+    temp2v = vec_xor(temp2v, mskB);                                 \
+    temp1v = vec_add(temp1v, vec_and(mskA, one));                   \
+    vec_st(temp1v, (idx0), (int16_t*)dct);                          \
+    temp2v = vec_add(temp2v, vec_and(mskB, one));                   \
+    nz = vec_or(nz, vec_or(temp1v, temp2v));                        \
+    vec_st(temp2v, (idx1), (int16_t*)dct);                          \
+}
 
 int x264_quant_4x4_dc_altivec( int16_t dct[4][4], int mf, int bias )
 {
@@ -146,22 +150,24 @@ int x264_quant_4x4_dc_altivec( int16_t dct[4][4], int mf, int bias )
 }
 
 // DC quant of a whole 2x2 block
-#define QUANT_4_U_DC( idx0 )                                    \
-const vec_u16_t sel = (vec_u16_t) CV(-1,-1,-1,-1,0,0,0,0);      \
-temp1v = vec_ld((idx0), *dct);                                  \
-mskA = vec_cmplt(temp1v, zero_s16v);                            \
-coefvA = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp1v), temp1v);\
-coefvA = vec_add(coefvA, biasv);                                \
-multEvenvA = vec_mule(coefvA, mfv);                             \
-multOddvA = vec_mulo(coefvA, mfv);                              \
-multEvenvA = vec_sr(multEvenvA, i_qbitsv);                      \
-multOddvA = vec_sr(multOddvA, i_qbitsv);                        \
-temp2v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA), vec_mergel(multEvenvA, multOddvA)); \
-temp2v = vec_xor(temp2v, mskA);                                 \
-temp2v = vec_add(temp2v, vec_and(mskA, one));                   \
-temp1v = vec_sel(temp1v, temp2v, sel);                          \
-nz = vec_or(nz, temp1v);                                        \
-vec_st(temp1v, (idx0), (int16_t*)dct);
+#define QUANT_4_U_DC( idx0 )                                        \
+{                                                                   \
+    const vec_u16_t sel = (vec_u16_t) CV(-1,-1,-1,-1,0,0,0,0);      \
+    temp1v = vec_ld((idx0), *dct);                                  \
+    mskA = vec_cmplt(temp1v, zero_s16v);                            \
+    coefvA = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp1v), temp1v);\
+    coefvA = vec_add(coefvA, biasv);                                \
+    multEvenvA = vec_mule(coefvA, mfv);                             \
+    multOddvA = vec_mulo(coefvA, mfv);                              \
+    multEvenvA = vec_sr(multEvenvA, i_qbitsv);                      \
+    multOddvA = vec_sr(multOddvA, i_qbitsv);                        \
+    temp2v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA), vec_mergel(multEvenvA, multOddvA)); \
+    temp2v = vec_xor(temp2v, mskA);                                 \
+    temp2v = vec_add(temp2v, vec_and(mskA, one));                   \
+    temp1v = vec_sel(temp1v, temp2v, sel);                          \
+    nz = vec_or(nz, temp1v);                                        \
+    vec_st(temp1v, (idx0), (int16_t*)dct);                          \
+}
 
 int x264_quant_2x2_dc_altivec( int16_t dct[2][2], int mf, int bias )
 {
@@ -218,12 +224,9 @@ int x264_quant_8x8_altivec( int16_t dct[8][8], uint16_t mf[64], uint16_t bias[64
     vec_u32_u qbits_u;
     qbits_u.s[0]=16;
     i_qbitsv = vec_splat(qbits_u.v, 0);
-    
-    int i;
 
-    for ( i=0; i<4; i++ ) {
-      QUANT_16_U( i*2*16, i*2*16+16 );
-    }
+    for( int i = 0; i < 4; i++ )
+        QUANT_16_U( i*2*16, i*2*16+16 );
     return vec_any_ne(nz, zero_s16v);
 }
 
@@ -268,9 +271,8 @@ int x264_quant_8x8_altivec( int16_t dct[8][8], uint16_t mf[64], uint16_t bias[64
 
 void x264_dequant_4x4_altivec( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp )
 {
-    const int i_mf = i_qp%6;
-    const int i_qbits = i_qp/6 - 4;
-    int y;
+    int i_mf = i_qp%6;
+    int i_qbits = i_qp/6 - 4;
 
     vec_s16_t dctv;
     vec_s16_t dct1v, dct2v;
@@ -286,7 +288,7 @@ void x264_dequant_4x4_altivec( int16_t dct[4][4], int dequant_mf[6][4][4], int i
         qbits_u.s[0]=i_qbits;
         i_qbitsv = vec_splat(qbits_u.v, 0);
 
-        for( y = 0; y < 4; y+=2 )
+        for( int y = 0; y < 4; y+=2 )
             DEQUANT_SHL();
     }
     else
@@ -308,16 +310,15 @@ void x264_dequant_4x4_altivec( int16_t dct[4][4], int dequant_mf[6][4][4], int i
         sixteen_u.s[0]=16;
         sixteenv = vec_splat(sixteen_u.v, 0);
 
-        for( y = 0; y < 4; y+=2 )
+        for( int y = 0; y < 4; y+=2 )
             DEQUANT_SHR();
     }
 }
 
 void x264_dequant_8x8_altivec( int16_t dct[8][8], int dequant_mf[6][8][8], int i_qp )
 {
-    const int i_mf = i_qp%6;
-    const int i_qbits = i_qp/6 - 6;
-    int y;
+    int i_mf = i_qp%6;
+    int i_qbits = i_qp/6 - 6;
 
     vec_s16_t dctv;
     vec_s16_t dct1v, dct2v;
@@ -333,7 +334,7 @@ void x264_dequant_8x8_altivec( int16_t dct[8][8], int dequant_mf[6][8][8], int i
         qbits_u.s[0]=i_qbits;
         i_qbitsv = vec_splat(qbits_u.v, 0);
 
-        for( y = 0; y < 8; y++ )
+        for( int y = 0; y < 8; y++ )
             DEQUANT_SHL();
     }
     else
@@ -355,7 +356,7 @@ void x264_dequant_8x8_altivec( int16_t dct[8][8], int dequant_mf[6][8][8], int i
         sixteen_u.s[0]=16;
         sixteenv = vec_splat(sixteen_u.v, 0);
 
-        for( y = 0; y < 8; y++ )
+        for( int y = 0; y < 8; y++ )
             DEQUANT_SHR();
     }
 }