]> git.sesse.net Git - x264/commitdiff
x86: Share the mbtree_propagate_list macro with aarch64
authorMartin Storsjö <martin@martin.st>
Thu, 3 Sep 2015 06:30:43 +0000 (09:30 +0300)
committerHenrik Gramner <henrik@gramner.com>
Sun, 11 Oct 2015 16:44:54 +0000 (18:44 +0200)
This avoids having to duplicate the same code for all architectures
that implement only the internal part of this function in assembler.

common/aarch64/mc-c.c
common/mc.c
common/mc.h
common/x86/mc-c.c

index b94e3d3d133a355d97aa91e4434cb0893b11f232..8d480d7ffe9796cb3a6c8c6bed2f5dd925180f09 100644 (file)
@@ -205,88 +205,7 @@ void x264_hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,
                             int height, int16_t *buf );
 #endif // !HIGH_BIT_DEPTH
 
-#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1)
-#define CLIP_ADD2(s,x)\
-do\
-{\
-    CLIP_ADD((s)[0], (x)[0]);\
-    CLIP_ADD((s)[1], (x)[1]);\
-} while(0)
-
-void x264_mbtree_propagate_list_internal_neon( int16_t (*mvs)[2],
-                                               int16_t *propagate_amount,
-                                               uint16_t *lowres_costs,
-                                               int16_t *output,
-                                               int bipred_weight, int mb_y,
-                                               int len );
-
-static void x264_mbtree_propagate_list_neon( x264_t *h, uint16_t *ref_costs,
-                                             int16_t (*mvs)[2],
-                                             int16_t *propagate_amount,
-                                             uint16_t *lowres_costs,
-                                             int bipred_weight, int mb_y,
-                                             int len, int list )
-{
-    int16_t *current = h->scratch_buffer2;
-
-    x264_mbtree_propagate_list_internal_neon( mvs, propagate_amount,
-                                              lowres_costs, current,
-                                              bipred_weight, mb_y, len );
-
-    unsigned stride = h->mb.i_mb_stride;
-    unsigned width = h->mb.i_mb_width;
-    unsigned height = h->mb.i_mb_height;
-
-    for( unsigned i = 0; i < len; current += 32 )
-    {
-        int end = X264_MIN( i+8, len );
-        for( ; i < end; i++, current += 2 )
-        {
-            if( !(lowres_costs[i] & (1 << (list+LOWRES_COST_SHIFT))) )
-                continue;
-
-            unsigned mbx = current[0];
-            unsigned mby = current[1];
-            unsigned idx0 = mbx + mby * stride;
-            unsigned idx2 = idx0 + stride;
-
-            /* Shortcut for the simple/common case of zero MV */
-            if( !M32( mvs[i] ) )
-            {
-                CLIP_ADD( ref_costs[idx0], current[16] );
-                continue;
-            }
-
-            if( mbx < width-1 && mby < height-1 )
-            {
-                CLIP_ADD2( ref_costs+idx0, current+16 );
-                CLIP_ADD2( ref_costs+idx2, current+32 );
-            }
-            else
-            {
-                /* Note: this takes advantage of unsigned representation to
-                 * catch negative mbx/mby. */
-                if( mby < height )
-                {
-                    if( mbx < width )
-                        CLIP_ADD( ref_costs[idx0+0], current[16] );
-                    if( mbx+1 < width )
-                        CLIP_ADD( ref_costs[idx0+1], current[17] );
-                }
-                if( mby+1 < height )
-                {
-                    if( mbx < width )
-                        CLIP_ADD( ref_costs[idx2+0], current[32] );
-                    if( mbx+1 < width )
-                        CLIP_ADD( ref_costs[idx2+1], current[33] );
-                }
-            }
-        }
-    }
-}
-
-#undef CLIP_ADD
-#undef CLIP_ADD2
+PROPAGATE_LIST(neon)
 
 void x264_mc_init_aarch64( int cpu, x264_mc_functions_t *pf )
 {
index 8c63e1b58568e48c6bf40931585f8992a4c570e8..57c1f23a0982f5937c6987831076ede1c0a0253e 100644 (file)
@@ -526,7 +526,6 @@ static void mbtree_propagate_list( x264_t *h, uint16_t *ref_costs, int16_t (*mvs
 
     for( unsigned i = 0; i < len; i++ )
     {
-#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1)
         int lists_used = lowres_costs[i]>>LOWRES_COST_SHIFT;
 
         if( !(lists_used & (1 << list)) )
@@ -540,7 +539,7 @@ static void mbtree_propagate_list( x264_t *h, uint16_t *ref_costs, int16_t (*mvs
         /* Early termination for simple case of mv0. */
         if( !M32( mvs[i] ) )
         {
-            CLIP_ADD( ref_costs[mb_y*stride + i], listamount );
+            MC_CLIP_ADD( ref_costs[mb_y*stride + i], listamount );
             continue;
         }
 
@@ -563,10 +562,10 @@ static void mbtree_propagate_list( x264_t *h, uint16_t *ref_costs, int16_t (*mvs
 
         if( mbx < width-1 && mby < height-1 )
         {
-            CLIP_ADD( ref_costs[idx0+0], idx0weight );
-            CLIP_ADD( ref_costs[idx0+1], idx1weight );
-            CLIP_ADD( ref_costs[idx2+0], idx2weight );
-            CLIP_ADD( ref_costs[idx2+1], idx3weight );
+            MC_CLIP_ADD( ref_costs[idx0+0], idx0weight );
+            MC_CLIP_ADD( ref_costs[idx0+1], idx1weight );
+            MC_CLIP_ADD( ref_costs[idx2+0], idx2weight );
+            MC_CLIP_ADD( ref_costs[idx2+1], idx3weight );
         }
         else
         {
@@ -575,20 +574,19 @@ static void mbtree_propagate_list( x264_t *h, uint16_t *ref_costs, int16_t (*mvs
             if( mby < height )
             {
                 if( mbx < width )
-                    CLIP_ADD( ref_costs[idx0+0], idx0weight );
+                    MC_CLIP_ADD( ref_costs[idx0+0], idx0weight );
                 if( mbx+1 < width )
-                    CLIP_ADD( ref_costs[idx0+1], idx1weight );
+                    MC_CLIP_ADD( ref_costs[idx0+1], idx1weight );
             }
             if( mby+1 < height )
             {
                 if( mbx < width )
-                    CLIP_ADD( ref_costs[idx2+0], idx2weight );
+                    MC_CLIP_ADD( ref_costs[idx2+0], idx2weight );
                 if( mbx+1 < width )
-                    CLIP_ADD( ref_costs[idx2+1], idx3weight );
+                    MC_CLIP_ADD( ref_costs[idx2+1], idx3weight );
             }
         }
     }
-#undef CLIP_ADD
 }
 
 void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
index 53aab379ee0ae6ceb44b2f6f3b7fecd62d21acd0..47184ea445330931205465131c8fa554d56a5d11 100644 (file)
 #ifndef X264_MC_H
 #define X264_MC_H
 
+#define MC_CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1)
+#define MC_CLIP_ADD2(s,x)\
+do\
+{\
+    MC_CLIP_ADD((s)[0], (x)[0]);\
+    MC_CLIP_ADD((s)[1], (x)[1]);\
+} while(0)
+
+#define PROPAGATE_LIST(cpu)\
+void x264_mbtree_propagate_list_internal_##cpu( int16_t (*mvs)[2], int16_t *propagate_amount,\
+                                                uint16_t *lowres_costs, int16_t *output,\
+                                                int bipred_weight, int mb_y, int len );\
+\
+static void x264_mbtree_propagate_list_##cpu( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],\
+                                              int16_t *propagate_amount, uint16_t *lowres_costs,\
+                                              int bipred_weight, int mb_y, int len, int list )\
+{\
+    int16_t *current = h->scratch_buffer2;\
+\
+    x264_mbtree_propagate_list_internal_##cpu( mvs, propagate_amount, lowres_costs,\
+                                               current, bipred_weight, mb_y, len );\
+\
+    unsigned stride = h->mb.i_mb_stride;\
+    unsigned width = h->mb.i_mb_width;\
+    unsigned height = h->mb.i_mb_height;\
+\
+    for( unsigned i = 0; i < len; current += 32 )\
+    {\
+        int end = X264_MIN( i+8, len );\
+        for( ; i < end; i++, current += 2 )\
+        {\
+            if( !(lowres_costs[i] & (1 << (list+LOWRES_COST_SHIFT))) )\
+                continue;\
+\
+            unsigned mbx = current[0];\
+            unsigned mby = current[1];\
+            unsigned idx0 = mbx + mby * stride;\
+            unsigned idx2 = idx0 + stride;\
+\
+            /* Shortcut for the simple/common case of zero MV */\
+            if( !M32( mvs[i] ) )\
+            {\
+                MC_CLIP_ADD( ref_costs[idx0], current[16] );\
+                continue;\
+            }\
+\
+            if( mbx < width-1 && mby < height-1 )\
+            {\
+                MC_CLIP_ADD2( ref_costs+idx0, current+16 );\
+                MC_CLIP_ADD2( ref_costs+idx2, current+32 );\
+            }\
+            else\
+            {\
+                /* Note: this takes advantage of unsigned representation to\
+                 * catch negative mbx/mby. */\
+                if( mby < height )\
+                {\
+                    if( mbx < width )\
+                        MC_CLIP_ADD( ref_costs[idx0+0], current[16] );\
+                    if( mbx+1 < width )\
+                        MC_CLIP_ADD( ref_costs[idx0+1], current[17] );\
+                }\
+                if( mby+1 < height )\
+                {\
+                    if( mbx < width )\
+                        MC_CLIP_ADD( ref_costs[idx2+0], current[32] );\
+                    if( mbx+1 < width )\
+                        MC_CLIP_ADD( ref_costs[idx2+1], current[33] );\
+                }\
+            }\
+        }\
+    }\
+}
+
 struct x264_weight_t;
 typedef void (* weight_fn_t)( pixel *, intptr_t, pixel *,intptr_t, const struct x264_weight_t *, int );
 typedef struct x264_weight_t
index d868706cec234757bb39ecb3c0f3a049baa2c68d..b437ca4ef5bd1ba5f4c61ef390debd4f905c835b 100644 (file)
@@ -590,7 +590,8 @@ PLANE_INTERLEAVE(avx)
 #endif
 
 #if HAVE_X86_INLINE_ASM
-#define CLIP_ADD(s,x)\
+#undef MC_CLIP_ADD
+#define MC_CLIP_ADD(s,x)\
 do\
 {\
     int temp;\
@@ -604,7 +605,8 @@ do\
     s = temp;\
 } while(0)
 
-#define CLIP_ADD2(s,x)\
+#undef MC_CLIP_ADD2
+#define MC_CLIP_ADD2(s,x)\
 do\
 {\
     asm("movd       %0, %%xmm0     \n"\
@@ -615,86 +617,10 @@ do\
         :"m"(M32(x))\
     );\
 } while(0)
-#else
-#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1)
-#define CLIP_ADD2(s,x)\
-do\
-{\
-    CLIP_ADD((s)[0], (x)[0]);\
-    CLIP_ADD((s)[1], (x)[1]);\
-} while(0)
 #endif
 
-#define PROPAGATE_LIST(cpu)\
-void x264_mbtree_propagate_list_internal_##cpu( int16_t (*mvs)[2], int16_t *propagate_amount,\
-                                                uint16_t *lowres_costs, int16_t *output,\
-                                                int bipred_weight, int mb_y, int len );\
-\
-static void x264_mbtree_propagate_list_##cpu( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],\
-                                              int16_t *propagate_amount, uint16_t *lowres_costs,\
-                                              int bipred_weight, int mb_y, int len, int list )\
-{\
-    int16_t *current = h->scratch_buffer2;\
-\
-    x264_mbtree_propagate_list_internal_##cpu( mvs, propagate_amount, lowres_costs,\
-                                               current, bipred_weight, mb_y, len );\
-\
-    unsigned stride = h->mb.i_mb_stride;\
-    unsigned width = h->mb.i_mb_width;\
-    unsigned height = h->mb.i_mb_height;\
-\
-    for( unsigned i = 0; i < len; current += 32 )\
-    {\
-        int end = X264_MIN( i+8, len );\
-        for( ; i < end; i++, current += 2 )\
-        {\
-            if( !(lowres_costs[i] & (1 << (list+LOWRES_COST_SHIFT))) )\
-                continue;\
-\
-            unsigned mbx = current[0];\
-            unsigned mby = current[1];\
-            unsigned idx0 = mbx + mby * stride;\
-            unsigned idx2 = idx0 + stride;\
-\
-            /* Shortcut for the simple/common case of zero MV */\
-            if( !M32( mvs[i] ) )\
-            {\
-                CLIP_ADD( ref_costs[idx0], current[16] );\
-                continue;\
-            }\
-\
-            if( mbx < width-1 && mby < height-1 )\
-            {\
-                CLIP_ADD2( ref_costs+idx0, current+16 );\
-                CLIP_ADD2( ref_costs+idx2, current+32 );\
-            }\
-            else\
-            {\
-                /* Note: this takes advantage of unsigned representation to\
-                 * catch negative mbx/mby. */\
-                if( mby < height )\
-                {\
-                    if( mbx < width )\
-                        CLIP_ADD( ref_costs[idx0+0], current[16] );\
-                    if( mbx+1 < width )\
-                        CLIP_ADD( ref_costs[idx0+1], current[17] );\
-                }\
-                if( mby+1 < height )\
-                {\
-                    if( mbx < width )\
-                        CLIP_ADD( ref_costs[idx2+0], current[32] );\
-                    if( mbx+1 < width )\
-                        CLIP_ADD( ref_costs[idx2+1], current[33] );\
-                }\
-            }\
-        }\
-    }\
-}
-
 PROPAGATE_LIST(ssse3)
 PROPAGATE_LIST(avx)
-#undef CLIP_ADD
-#undef CLIP_ADD2
 
 void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
 {