X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=common%2Fmc.c;h=8c63e1b58568e48c6bf40931585f8992a4c570e8;hb=5265b927b0f2e043dd39cbbbf3909da0862d60e6;hp=6f772afa2ec67187ea4c677bdee691a378b03ff6;hpb=9bbfc30284469a70374a75fecfa322c4740dc2b7;p=x264 diff --git a/common/mc.c b/common/mc.c index 6f772afa..8c63e1b5 100644 --- a/common/mc.c +++ b/common/mc.c @@ -1,7 +1,7 @@ /***************************************************************************** * mc.c: motion compensation ***************************************************************************** - * Copyright (C) 2003-2011 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -35,12 +35,17 @@ #if ARCH_ARM #include "arm/mc.h" #endif +#if ARCH_AARCH64 +#include "aarch64/mc.h" +#endif +#if ARCH_MIPS +#include "mips/mc.h" +#endif -static inline void pixel_avg( pixel *dst, int i_dst_stride, - pixel *src1, int i_src1_stride, - pixel *src2, int i_src2_stride, - int i_width, int i_height ) +static inline void pixel_avg( pixel *dst, intptr_t i_dst_stride, + pixel *src1, intptr_t i_src1_stride, + pixel *src2, intptr_t i_src2_stride, int i_width, int i_height ) { for( int y = 0; y < i_height; y++ ) { @@ -52,7 +57,9 @@ static inline void pixel_avg( pixel *dst, int i_dst_stride, } } -static inline void pixel_avg_wxh( pixel *dst, int i_dst, pixel *src1, int i_src1, pixel *src2, int i_src2, int width, int height ) +static inline void pixel_avg_wxh( pixel *dst, intptr_t i_dst, + pixel *src1, intptr_t i_src1, + pixel *src2, intptr_t i_src2, int width, int height ) { for( int y = 0; y < height; y++ ) { @@ -66,9 +73,11 @@ static inline void pixel_avg_wxh( pixel *dst, int i_dst, pixel *src1, int i_src1 /* Implicit weighted bipred only: * assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64 */ -static inline void pixel_avg_weight_wxh( pixel *dst, int i_dst, pixel *src1, int i_src1, pixel *src2, int i_src2, int width, int height, int i_weight1 ) +static inline void pixel_avg_weight_wxh( pixel *dst, intptr_t i_dst, + pixel *src1, intptr_t i_src1, + pixel *src2, intptr_t i_src2, int width, int height, int i_weight1 ) { - const int i_weight2 = 64 - i_weight1; + int i_weight2 = 64 - i_weight1; for( int y = 0; y> 6 ); @@ -76,9 +85,9 @@ static inline void pixel_avg_weight_wxh( pixel *dst, int i_dst, pixel *src1, int #undef op_scale2 #define PIXEL_AVG_C( name, width, height ) \ -static void name( pixel *pix1, int i_stride_pix1, \ - pixel *pix2, int i_stride_pix2, \ - pixel *pix3, int i_stride_pix3, int weight ) \ +static void name( pixel *pix1, intptr_t i_stride_pix1, \ + pixel *pix2, intptr_t i_stride_pix2, \ + pixel *pix3, intptr_t i_stride_pix3, int weight ) \ { \ if( weight == 32 ) \ pixel_avg_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, pix3, i_stride_pix3, width, height ); \ @@ -104,7 +113,8 @@ static void x264_weight_cache( x264_t *h, x264_weight_t *w ) } #define opscale(x) dst[x] = x264_clip_pixel( ((src[x] * scale + (1<<(denom - 1))) >> denom) + offset ) #define opscale_noden(x) dst[x] = x264_clip_pixel( src[x] * scale + offset ) -static void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int i_width, int i_height ) +static void mc_weight( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride, + const x264_weight_t *weight, int i_width, int i_height ) { int offset = weight->i_offset << (BIT_DEPTH-8); int scale = weight->i_scale; @@ -124,7 +134,7 @@ static void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_src_strid } #define MC_WEIGHT_C( name, width ) \ - static void name( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int height ) \ + static void name( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride, const x264_weight_t *weight, int height ) \ { \ mc_weight( dst, i_dst_stride, src, i_src_stride, weight, width, height );\ } @@ -146,7 +156,7 @@ static weight_fn_t x264_mc_weight_wtab[6] = mc_weight_w20, }; const x264_weight_t x264_weight_none[3] = { {{0}} }; -static void mc_copy( pixel *src, int i_src_stride, pixel *dst, int i_dst_stride, int i_width, int i_height ) +static void mc_copy( pixel *src, intptr_t i_src_stride, pixel *dst, intptr_t i_dst_stride, int i_width, int i_height ) { for( int y = 0; y < i_height; y++ ) { @@ -159,7 +169,7 @@ static void mc_copy( pixel *src, int i_src_stride, pixel *dst, int i_dst_stride, #define TAPFILTER(pix, d) ((pix)[x-2*d] + (pix)[x+3*d] - 5*((pix)[x-d] + (pix)[x+2*d]) + 20*((pix)[x] + (pix)[x+d])) static void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src, - int stride, int width, int height, int16_t *buf ) + intptr_t stride, int width, int height, int16_t *buf ) { const int pad = (BIT_DEPTH > 9) ? (-10 * PIXEL_MAX) : 0; for( int y = 0; y < height; y++ ) @@ -182,21 +192,21 @@ static void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src, } } -static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; -static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2}; +const uint8_t x264_hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; +const uint8_t x264_hpel_ref1[16] = {0,0,1,0,2,2,3,2,2,2,3,2,2,2,3,2}; -static void mc_luma( pixel *dst, int i_dst_stride, - pixel *src[4], int i_src_stride, +static void mc_luma( pixel *dst, intptr_t i_dst_stride, + pixel *src[4], intptr_t i_src_stride, int mvx, int mvy, int i_width, int i_height, const x264_weight_t *weight ) { int qpel_idx = ((mvy&3)<<2) + (mvx&3); int offset = (mvy>>2)*i_src_stride + (mvx>>2); - pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; + pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); pixel_avg( dst, i_dst_stride, src1, i_src_stride, src2, i_src_stride, i_width, i_height ); if( weight->weightfn ) @@ -208,18 +218,18 @@ static void mc_luma( pixel *dst, int i_dst_stride, mc_copy( src1, i_src_stride, dst, i_dst_stride, i_width, i_height ); } -static pixel *get_ref( pixel *dst, int *i_dst_stride, - pixel *src[4], int i_src_stride, +static pixel *get_ref( pixel *dst, intptr_t *i_dst_stride, + pixel *src[4], intptr_t i_src_stride, int mvx, int mvy, int i_width, int i_height, const x264_weight_t *weight ) { int qpel_idx = ((mvy&3)<<2) + (mvx&3); int offset = (mvy>>2)*i_src_stride + (mvx>>2); - pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; + pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); pixel_avg( dst, *i_dst_stride, src1, i_src_stride, src2, i_src_stride, i_width, i_height ); if( weight->weightfn ) @@ -239,8 +249,8 @@ static pixel *get_ref( pixel *dst, int *i_dst_stride, } /* full chroma mc (ie until 1/8 pixel)*/ -static void mc_chroma( pixel *dstu, pixel *dstv, int i_dst_stride, - pixel *src, int i_src_stride, +static void mc_chroma( pixel *dstu, pixel *dstv, intptr_t i_dst_stride, + pixel *src, intptr_t i_src_stride, int mvx, int mvy, int i_width, int i_height ) { @@ -273,7 +283,7 @@ static void mc_chroma( pixel *dstu, pixel *dstv, int i_dst_stride, } #define MC_COPY(W) \ -static void mc_copy_w##W( pixel *dst, int i_dst, pixel *src, int i_src, int i_height ) \ +static void mc_copy_w##W( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int i_height ) \ { \ mc_copy( src, i_src, dst, i_dst, W, i_height ); \ } @@ -281,8 +291,8 @@ MC_COPY( 16 ) MC_COPY( 8 ) MC_COPY( 4 ) -void x264_plane_copy_c( pixel *dst, int i_dst, - pixel *src, int i_src, int w, int h ) +void x264_plane_copy_c( pixel *dst, intptr_t i_dst, + pixel *src, intptr_t i_src, int w, int h ) { while( h-- ) { @@ -292,9 +302,20 @@ void x264_plane_copy_c( pixel *dst, int i_dst, } } -void x264_plane_copy_interleave_c( pixel *dst, int i_dst, - pixel *srcu, int i_srcu, - pixel *srcv, int i_srcv, int w, int h ) +void x264_plane_copy_swap_c( pixel *dst, intptr_t i_dst, + pixel *src, intptr_t i_src, int w, int h ) +{ + for( int y=0; y> 10 ) & 0x03FF; + *(dstc0++) = ( *src0 >> 20 ) & 0x03FF; + src0++; + *(dsty0++) = *src0 & 0x03FF; + *(dstc0++) = ( *src0 >> 10 ) & 0x03FF; + *(dsty0++) = ( *src0 >> 20 ) & 0x03FF; + src0++; + } + + dsty += i_dsty; + dstc += i_dstc; + src += i_src; + } +} + +static void store_interleave_chroma( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height ) { for( int y=0; ymb.i_mb_stride; + unsigned width = h->mb.i_mb_width; + unsigned height = h->mb.i_mb_height; + + for( unsigned i = 0; i < len; i++ ) + { +#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1) + int lists_used = lowres_costs[i]>>LOWRES_COST_SHIFT; + + if( !(lists_used & (1 << list)) ) + continue; + + int listamount = propagate_amount[i]; + /* Apply bipred weighting. */ + if( lists_used == 3 ) + listamount = (listamount * bipred_weight + 32) >> 6; + + /* Early termination for simple case of mv0. */ + if( !M32( mvs[i] ) ) + { + CLIP_ADD( ref_costs[mb_y*stride + i], listamount ); + continue; + } + + int x = mvs[i][0]; + int y = mvs[i][1]; + unsigned mbx = (x>>5)+i; + unsigned mby = (y>>5)+mb_y; + unsigned idx0 = mbx + mby * stride; + unsigned idx2 = idx0 + stride; + x &= 31; + y &= 31; + int idx0weight = (32-y)*(32-x); + int idx1weight = (32-y)*x; + int idx2weight = y*(32-x); + int idx3weight = y*x; + idx0weight = (idx0weight * listamount + 512) >> 10; + idx1weight = (idx1weight * listamount + 512) >> 10; + idx2weight = (idx2weight * listamount + 512) >> 10; + idx3weight = (idx3weight * listamount + 512) >> 10; + + if( mbx < width-1 && mby < height-1 ) + { + CLIP_ADD( ref_costs[idx0+0], idx0weight ); + CLIP_ADD( ref_costs[idx0+1], idx1weight ); + CLIP_ADD( ref_costs[idx2+0], idx2weight ); + CLIP_ADD( ref_costs[idx2+1], idx3weight ); + } + else + { + /* Note: this takes advantage of unsigned representation to + * catch negative mbx/mby. */ + if( mby < height ) + { + if( mbx < width ) + CLIP_ADD( ref_costs[idx0+0], idx0weight ); + if( mbx+1 < width ) + CLIP_ADD( ref_costs[idx0+1], idx1weight ); + } + if( mby+1 < height ) + { + if( mbx < width ) + CLIP_ADD( ref_costs[idx2+0], idx2weight ); + if( mbx+1 < width ) + CLIP_ADD( ref_costs[idx2+1], idx3weight ); + } + } + } +#undef CLIP_ADD +} + +void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent ) { pf->mc_luma = mc_luma; pf->get_ref = get_ref; @@ -500,9 +626,11 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf ) pf->load_deinterleave_chroma_fdec = load_deinterleave_chroma_fdec; pf->plane_copy = x264_plane_copy_c; + pf->plane_copy_swap = x264_plane_copy_swap_c; pf->plane_copy_interleave = x264_plane_copy_interleave_c; pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c; pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c; + pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_c; pf->hpel_filter = hpel_filter; @@ -519,6 +647,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf ) pf->integral_init8v = integral_init8v; pf->mbtree_propagate_cost = mbtree_propagate_cost; + pf->mbtree_propagate_list = mbtree_propagate_list; #if HAVE_MMX x264_mc_init_mmx( cpu, pf ); @@ -530,6 +659,19 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf ) #if HAVE_ARMV6 x264_mc_init_arm( cpu, pf ); #endif +#if ARCH_AARCH64 + x264_mc_init_aarch64( cpu, pf ); +#endif +#if HAVE_MSA + if( cpu&X264_CPU_MSA ) + x264_mc_init_mips( cpu, pf ); +#endif + + if( cpu_independent ) + { + pf->mbtree_propagate_cost = mbtree_propagate_cost; + pf->mbtree_propagate_list = mbtree_propagate_list; + } } void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )