X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=common%2Fx86%2Fmc-c.c;h=b437ca4ef5bd1ba5f4c61ef390debd4f905c835b;hb=3e25eab0b7172e3c0b067b8b6d641ce148d03db9;hp=d868706cec234757bb39ecb3c0f3a049baa2c68d;hpb=a83edfa053f60ad0c8a164f31e7492a680eef361;p=x264 diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c index d868706c..b437ca4e 100644 --- a/common/x86/mc-c.c +++ b/common/x86/mc-c.c @@ -590,7 +590,8 @@ PLANE_INTERLEAVE(avx) #endif #if HAVE_X86_INLINE_ASM -#define CLIP_ADD(s,x)\ +#undef MC_CLIP_ADD +#define MC_CLIP_ADD(s,x)\ do\ {\ int temp;\ @@ -604,7 +605,8 @@ do\ s = temp;\ } while(0) -#define CLIP_ADD2(s,x)\ +#undef MC_CLIP_ADD2 +#define MC_CLIP_ADD2(s,x)\ do\ {\ asm("movd %0, %%xmm0 \n"\ @@ -615,86 +617,10 @@ do\ :"m"(M32(x))\ );\ } while(0) -#else -#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1) -#define CLIP_ADD2(s,x)\ -do\ -{\ - CLIP_ADD((s)[0], (x)[0]);\ - CLIP_ADD((s)[1], (x)[1]);\ -} while(0) #endif -#define PROPAGATE_LIST(cpu)\ -void x264_mbtree_propagate_list_internal_##cpu( int16_t (*mvs)[2], int16_t *propagate_amount,\ - uint16_t *lowres_costs, int16_t *output,\ - int bipred_weight, int mb_y, int len );\ -\ -static void x264_mbtree_propagate_list_##cpu( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],\ - int16_t *propagate_amount, uint16_t *lowres_costs,\ - int bipred_weight, int mb_y, int len, int list )\ -{\ - int16_t *current = h->scratch_buffer2;\ -\ - x264_mbtree_propagate_list_internal_##cpu( mvs, propagate_amount, lowres_costs,\ - current, bipred_weight, mb_y, len );\ -\ - unsigned stride = h->mb.i_mb_stride;\ - unsigned width = h->mb.i_mb_width;\ - unsigned height = h->mb.i_mb_height;\ -\ - for( unsigned i = 0; i < len; current += 32 )\ - {\ - int end = X264_MIN( i+8, len );\ - for( ; i < end; i++, current += 2 )\ - {\ - if( !(lowres_costs[i] & (1 << (list+LOWRES_COST_SHIFT))) )\ - continue;\ -\ - unsigned mbx = current[0];\ - unsigned mby = current[1];\ - unsigned idx0 = mbx + mby * stride;\ - unsigned idx2 = idx0 + stride;\ -\ - /* Shortcut for the simple/common case of zero MV */\ - if( !M32( mvs[i] ) )\ - {\ - CLIP_ADD( ref_costs[idx0], current[16] );\ - continue;\ - }\ -\ - if( mbx < width-1 && mby < height-1 )\ - {\ - CLIP_ADD2( ref_costs+idx0, current+16 );\ - CLIP_ADD2( ref_costs+idx2, current+32 );\ - }\ - else\ - {\ - /* Note: this takes advantage of unsigned representation to\ - * catch negative mbx/mby. */\ - if( mby < height )\ - {\ - if( mbx < width )\ - CLIP_ADD( ref_costs[idx0+0], current[16] );\ - if( mbx+1 < width )\ - CLIP_ADD( ref_costs[idx0+1], current[17] );\ - }\ - if( mby+1 < height )\ - {\ - if( mbx < width )\ - CLIP_ADD( ref_costs[idx2+0], current[32] );\ - if( mbx+1 < width )\ - CLIP_ADD( ref_costs[idx2+1], current[33] );\ - }\ - }\ - }\ - }\ -} - PROPAGATE_LIST(ssse3) PROPAGATE_LIST(avx) -#undef CLIP_ADD -#undef CLIP_ADD2 void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf ) {