X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=common%2Fmc.h;h=47184ea445330931205465131c8fa554d56a5d11;hb=9f422c0cd9c0abcd6a7abb10b51f8be883c39b2b;hp=09dda5579163a6161cf1e795edfc01993e8988a9;hpb=5b0cb86f27ba0c5433c404bed51c06a5124dfb49;p=x264 diff --git a/common/mc.h b/common/mc.h index 09dda557..47184ea4 100644 --- a/common/mc.h +++ b/common/mc.h @@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: motion compensation ***************************************************************************** - * Copyright (C) 2004-2011 x264 project + * Copyright (C) 2004-2015 x264 project * * Authors: Loren Merritt * @@ -26,8 +26,82 @@ #ifndef X264_MC_H #define X264_MC_H +#define MC_CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1) +#define MC_CLIP_ADD2(s,x)\ +do\ +{\ + MC_CLIP_ADD((s)[0], (x)[0]);\ + MC_CLIP_ADD((s)[1], (x)[1]);\ +} while(0) + +#define PROPAGATE_LIST(cpu)\ +void x264_mbtree_propagate_list_internal_##cpu( int16_t (*mvs)[2], int16_t *propagate_amount,\ + uint16_t *lowres_costs, int16_t *output,\ + int bipred_weight, int mb_y, int len );\ +\ +static void x264_mbtree_propagate_list_##cpu( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],\ + int16_t *propagate_amount, uint16_t *lowres_costs,\ + int bipred_weight, int mb_y, int len, int list )\ +{\ + int16_t *current = h->scratch_buffer2;\ +\ + x264_mbtree_propagate_list_internal_##cpu( mvs, propagate_amount, lowres_costs,\ + current, bipred_weight, mb_y, len );\ +\ + unsigned stride = h->mb.i_mb_stride;\ + unsigned width = h->mb.i_mb_width;\ + unsigned height = h->mb.i_mb_height;\ +\ + for( unsigned i = 0; i < len; current += 32 )\ + {\ + int end = X264_MIN( i+8, len );\ + for( ; i < end; i++, current += 2 )\ + {\ + if( !(lowres_costs[i] & (1 << (list+LOWRES_COST_SHIFT))) )\ + continue;\ +\ + unsigned mbx = current[0];\ + unsigned mby = current[1];\ + unsigned idx0 = mbx + mby * stride;\ + unsigned idx2 = idx0 + stride;\ +\ + /* Shortcut for the simple/common case of zero MV */\ + if( !M32( mvs[i] ) )\ + {\ + MC_CLIP_ADD( ref_costs[idx0], current[16] );\ + continue;\ + }\ +\ + if( mbx < width-1 && mby < height-1 )\ + {\ + MC_CLIP_ADD2( ref_costs+idx0, current+16 );\ + MC_CLIP_ADD2( ref_costs+idx2, current+32 );\ + }\ + else\ + {\ + /* Note: this takes advantage of unsigned representation to\ + * catch negative mbx/mby. */\ + if( mby < height )\ + {\ + if( mbx < width )\ + MC_CLIP_ADD( ref_costs[idx0+0], current[16] );\ + if( mbx+1 < width )\ + MC_CLIP_ADD( ref_costs[idx0+1], current[17] );\ + }\ + if( mby+1 < height )\ + {\ + if( mbx < width )\ + MC_CLIP_ADD( ref_costs[idx2+0], current[32] );\ + if( mbx+1 < width )\ + MC_CLIP_ADD( ref_costs[idx2+1], current[33] );\ + }\ + }\ + }\ + }\ +} + struct x264_weight_t; -typedef void (* weight_fn_t)( pixel *, int, pixel *,int, const struct x264_weight_t *, int ); +typedef void (* weight_fn_t)( pixel *, intptr_t, pixel *,intptr_t, const struct x264_weight_t *, int ); typedef struct x264_weight_t { /* aligning the first member is a gcc hack to force the struct to be @@ -41,6 +115,8 @@ typedef struct x264_weight_t } ALIGNED_16( x264_weight_t ); extern const x264_weight_t x264_weight_none[3]; +extern const uint8_t x264_hpel_ref0[16]; +extern const uint8_t x264_hpel_ref1[16]; #define SET_WEIGHT( w, b, s, d, o )\ {\ @@ -62,70 +138,75 @@ extern const x264_weight_t x264_weight_none[3]; typedef struct { - void (*mc_luma)( pixel *dst, int i_dst, pixel **src, int i_src, + void (*mc_luma)( pixel *dst, intptr_t i_dst, pixel **src, intptr_t i_src, int mvx, int mvy, int i_width, int i_height, const x264_weight_t *weight ); /* may round up the dimensions if they're not a power of 2 */ - pixel* (*get_ref)( pixel *dst, int *i_dst, pixel **src, int i_src, + pixel* (*get_ref)( pixel *dst, intptr_t *i_dst, pixel **src, intptr_t i_src, int mvx, int mvy, int i_width, int i_height, const x264_weight_t *weight ); /* mc_chroma may write up to 2 bytes of garbage to the right of dst, * so it must be run from left to right. */ - void (*mc_chroma)( pixel *dstu, pixel *dstv, int i_dst, pixel *src, int i_src, + void (*mc_chroma)( pixel *dstu, pixel *dstv, intptr_t i_dst, pixel *src, intptr_t i_src, int mvx, int mvy, int i_width, int i_height ); - void (*avg[12])( pixel *dst, int, pixel *src1, int, pixel *src2, int, int i_weight ); + void (*avg[12])( pixel *dst, intptr_t dst_stride, pixel *src1, intptr_t src1_stride, + pixel *src2, intptr_t src2_stride, int i_weight ); /* only 16x16, 8x8, and 4x4 defined */ - void (*copy[7])( pixel *dst, int, pixel *src, int, int i_height ); - void (*copy_16x16_unaligned)( pixel *dst, int, pixel *src, int, int i_height ); - - void (*store_interleave_chroma)( pixel *dst, int i_dst, pixel *srcu, pixel *srcv, int height ); - void (*load_deinterleave_chroma_fenc)( pixel *dst, pixel *src, int i_src, int height ); - void (*load_deinterleave_chroma_fdec)( pixel *dst, pixel *src, int i_src, int height ); - - void (*plane_copy)( pixel *dst, int i_dst, - pixel *src, int i_src, int w, int h ); - void (*plane_copy_interleave)( pixel *dst, int i_dst, - pixel *srcu, int i_srcu, - pixel *srcv, int i_srcv, int w, int h ); + void (*copy[7])( pixel *dst, intptr_t dst_stride, pixel *src, intptr_t src_stride, int i_height ); + void (*copy_16x16_unaligned)( pixel *dst, intptr_t dst_stride, pixel *src, intptr_t src_stride, int i_height ); + + void (*store_interleave_chroma)( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height ); + void (*load_deinterleave_chroma_fenc)( pixel *dst, pixel *src, intptr_t i_src, int height ); + void (*load_deinterleave_chroma_fdec)( pixel *dst, pixel *src, intptr_t i_src, int height ); + + void (*plane_copy)( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h ); + void (*plane_copy_swap)( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h ); + void (*plane_copy_interleave)( pixel *dst, intptr_t i_dst, pixel *srcu, intptr_t i_srcu, + pixel *srcv, intptr_t i_srcv, int w, int h ); /* may write up to 15 pixels off the end of each plane */ - void (*plane_copy_deinterleave)( pixel *dstu, int i_dstu, - pixel *dstv, int i_dstv, - pixel *src, int i_src, int w, int h ); - void (*plane_copy_deinterleave_rgb)( pixel *dsta, int i_dsta, - pixel *dstb, int i_dstb, - pixel *dstc, int i_dstc, - pixel *src, int i_src, int pw, int w, int h ); + void (*plane_copy_deinterleave)( pixel *dstu, intptr_t i_dstu, pixel *dstv, intptr_t i_dstv, + pixel *src, intptr_t i_src, int w, int h ); + void (*plane_copy_deinterleave_rgb)( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb, + pixel *dstc, intptr_t i_dstc, pixel *src, intptr_t i_src, int pw, int w, int h ); + void (*plane_copy_deinterleave_v210)( pixel *dsty, intptr_t i_dsty, + pixel *dstc, intptr_t i_dstc, + uint32_t *src, intptr_t i_src, int w, int h ); void (*hpel_filter)( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src, - int i_stride, int i_width, int i_height, int16_t *buf ); + intptr_t i_stride, int i_width, int i_height, int16_t *buf ); /* prefetch the next few macroblocks of fenc or fdec */ - void (*prefetch_fenc)( pixel *pix_y, int stride_y, - pixel *pix_uv, int stride_uv, int mb_x ); + void (*prefetch_fenc) ( pixel *pix_y, intptr_t stride_y, pixel *pix_uv, intptr_t stride_uv, int mb_x ); + void (*prefetch_fenc_420)( pixel *pix_y, intptr_t stride_y, pixel *pix_uv, intptr_t stride_uv, int mb_x ); + void (*prefetch_fenc_422)( pixel *pix_y, intptr_t stride_y, pixel *pix_uv, intptr_t stride_uv, int mb_x ); /* prefetch the next few macroblocks of a hpel reference frame */ - void (*prefetch_ref)( pixel *pix, int stride, int parity ); + void (*prefetch_ref)( pixel *pix, intptr_t stride, int parity ); void *(*memcpy_aligned)( void *dst, const void *src, size_t n ); - void (*memzero_aligned)( void *dst, int n ); + void (*memzero_aligned)( void *dst, size_t n ); /* successive elimination prefilter */ - void (*integral_init4h)( uint16_t *sum, pixel *pix, int stride ); - void (*integral_init8h)( uint16_t *sum, pixel *pix, int stride ); - void (*integral_init4v)( uint16_t *sum8, uint16_t *sum4, int stride ); - void (*integral_init8v)( uint16_t *sum8, int stride ); + void (*integral_init4h)( uint16_t *sum, pixel *pix, intptr_t stride ); + void (*integral_init8h)( uint16_t *sum, pixel *pix, intptr_t stride ); + void (*integral_init4v)( uint16_t *sum8, uint16_t *sum4, intptr_t stride ); + void (*integral_init8v)( uint16_t *sum8, intptr_t stride ); void (*frame_init_lowres_core)( pixel *src0, pixel *dst0, pixel *dsth, pixel *dstv, pixel *dstc, - int src_stride, int dst_stride, int width, int height ); + intptr_t src_stride, intptr_t dst_stride, int width, int height ); weight_fn_t *weight; weight_fn_t *offsetadd; weight_fn_t *offsetsub; void (*weight_cache)( x264_t *, x264_weight_t * ); - void (*mbtree_propagate_cost)( int *dst, uint16_t *propagate_in, uint16_t *intra_costs, + void (*mbtree_propagate_cost)( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs, uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len ); + + void (*mbtree_propagate_list)( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2], + int16_t *propagate_amount, uint16_t *lowres_costs, + int bipred_weight, int mb_y, int len, int list ); } x264_mc_functions_t; -void x264_mc_init( int cpu, x264_mc_functions_t *pf ); +void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent ); #endif