X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=common%2Fmc.c;h=c7a544f6df881998c230a166098fb45bcaebfd04;hb=0d668be8d7525992c1c163c97551ee897e43c177;hp=ada8bdc04488dfb57912065ef1b0d9967a5c52ba;hpb=54e784fdf410bf6dd7dd2312251fbe576a0d03fd;p=x264 diff --git a/common/mc.c b/common/mc.c index ada8bdc0..c7a544f6 100644 --- a/common/mc.c +++ b/common/mc.c @@ -1,7 +1,7 @@ /***************************************************************************** - * mc.c: h264 encoder library (Motion Compensation) + * mc.c: motion compensation ***************************************************************************** - * Copyright (C) 2003-2008 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -19,25 +19,27 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. *****************************************************************************/ #include "common.h" -#ifdef HAVE_MMX +#if HAVE_MMX #include "x86/mc.h" #endif -#ifdef ARCH_PPC +#if ARCH_PPC #include "ppc/mc.h" #endif -#ifdef ARCH_ARM +#if ARCH_ARM #include "arm/mc.h" #endif -static inline void pixel_avg( uint8_t *dst, int i_dst_stride, - uint8_t *src1, int i_src1_stride, - uint8_t *src2, int i_src2_stride, - int i_width, int i_height ) +static inline void pixel_avg( pixel *dst, intptr_t i_dst_stride, + pixel *src1, intptr_t i_src1_stride, + pixel *src2, intptr_t i_src2_stride, int i_width, int i_height ) { for( int y = 0; y < i_height; y++ ) { @@ -49,7 +51,9 @@ static inline void pixel_avg( uint8_t *dst, int i_dst_stride, } } -static inline void pixel_avg_wxh( uint8_t *dst, int i_dst, uint8_t *src1, int i_src1, uint8_t *src2, int i_src2, int width, int height ) +static inline void pixel_avg_wxh( pixel *dst, intptr_t i_dst, + pixel *src1, intptr_t i_src1, + pixel *src2, intptr_t i_src2, int width, int height ) { for( int y = 0; y < height; y++ ) { @@ -63,39 +67,21 @@ static inline void pixel_avg_wxh( uint8_t *dst, int i_dst, uint8_t *src1, int i_ /* Implicit weighted bipred only: * assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64 */ -#define op_scale2(x) dst[x] = x264_clip_uint8( (src1[x]*i_weight1 + src2[x]*i_weight2 + (1<<5)) >> 6 ) -static inline void pixel_avg_weight_wxh( uint8_t *dst, int i_dst, uint8_t *src1, int i_src1, uint8_t *src2, int i_src2, int width, int height, int i_weight1 ) +static inline void pixel_avg_weight_wxh( pixel *dst, intptr_t i_dst, + pixel *src1, intptr_t i_src1, + pixel *src2, intptr_t i_src2, int width, int height, int i_weight1 ) { - const int i_weight2 = 64 - i_weight1; + int i_weight2 = 64 - i_weight1; for( int y = 0; y> 6 ); } #undef op_scale2 #define PIXEL_AVG_C( name, width, height ) \ -static void name( uint8_t *pix1, int i_stride_pix1, \ - uint8_t *pix2, int i_stride_pix2, \ - uint8_t *pix3, int i_stride_pix3, int weight ) \ +static void name( pixel *pix1, intptr_t i_stride_pix1, \ + pixel *pix2, intptr_t i_stride_pix2, \ + pixel *pix3, intptr_t i_stride_pix3, int weight ) \ { \ if( weight == 32 ) \ pixel_avg_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, pix3, i_stride_pix3, width, height ); \ @@ -107,9 +93,11 @@ PIXEL_AVG_C( pixel_avg_16x8, 16, 8 ) PIXEL_AVG_C( pixel_avg_8x16, 8, 16 ) PIXEL_AVG_C( pixel_avg_8x8, 8, 8 ) PIXEL_AVG_C( pixel_avg_8x4, 8, 4 ) +PIXEL_AVG_C( pixel_avg_4x16, 4, 16 ) PIXEL_AVG_C( pixel_avg_4x8, 4, 8 ) PIXEL_AVG_C( pixel_avg_4x4, 4, 4 ) PIXEL_AVG_C( pixel_avg_4x2, 4, 2 ) +PIXEL_AVG_C( pixel_avg_2x8, 2, 8 ) PIXEL_AVG_C( pixel_avg_2x4, 2, 4 ) PIXEL_AVG_C( pixel_avg_2x2, 2, 2 ) @@ -117,11 +105,15 @@ static void x264_weight_cache( x264_t *h, x264_weight_t *w ) { w->weightfn = h->mc.weight; } -#define opscale(x) dst[x] = x264_clip_uint8( ((src[x] * weight->i_scale + (1<<(weight->i_denom - 1))) >> weight->i_denom) + weight->i_offset ) -#define opscale_noden(x) dst[x] = x264_clip_uint8( src[x] * weight->i_scale + weight->i_offset ) -static inline void mc_weight( uint8_t *dst, int i_dst_stride, uint8_t *src, int i_src_stride, const x264_weight_t *weight, int i_width, int i_height ) +#define opscale(x) dst[x] = x264_clip_pixel( ((src[x] * scale + (1<<(denom - 1))) >> denom) + offset ) +#define opscale_noden(x) dst[x] = x264_clip_pixel( src[x] * scale + offset ) +static void mc_weight( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride, + const x264_weight_t *weight, int i_width, int i_height ) { - if( weight->i_denom >= 1 ) + int offset = weight->i_offset << (BIT_DEPTH-8); + int scale = weight->i_scale; + int denom = weight->i_denom; + if( denom >= 1 ) { for( int y = 0; y < i_height; y++, dst += i_dst_stride, src += i_src_stride ) for( int x = 0; x < i_width; x++ ) @@ -135,21 +127,10 @@ static inline void mc_weight( uint8_t *dst, int i_dst_stride, uint8_t *src, int } } -#define MC_WEIGHT_C( name, lx ) \ - static void name( uint8_t *dst, int i_dst_stride, uint8_t *src, int i_src_stride, const x264_weight_t *weight, int height ) \ +#define MC_WEIGHT_C( name, width ) \ + static void name( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride, const x264_weight_t *weight, int height ) \ { \ - if( weight->i_denom >= 1 ) \ - { \ - for( int y = 0; y < height; y++, dst += i_dst_stride, src += i_src_stride ) \ - for( int x = 0; x < lx; x++ ) \ - opscale( x ); \ - } \ - else \ - { \ - for( int y = 0; y < height; y++, dst += i_dst_stride, src += i_src_stride ) \ - for( int x = 0; x < lx; x++ ) \ - opscale_noden( x ); \ - } \ + mc_weight( dst, i_dst_stride, src, i_src_stride, weight, width, height );\ } MC_WEIGHT_C( mc_weight_w20, 20 ) @@ -168,12 +149,12 @@ static weight_fn_t x264_mc_weight_wtab[6] = mc_weight_w16, mc_weight_w20, }; -const x264_weight_t weight_none[3] = { {{0}} }; -static void mc_copy( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height ) +const x264_weight_t x264_weight_none[3] = { {{0}} }; +static void mc_copy( pixel *src, intptr_t i_src_stride, pixel *dst, intptr_t i_dst_stride, int i_width, int i_height ) { for( int y = 0; y < i_height; y++ ) { - memcpy( dst, src, i_width ); + memcpy( dst, src, i_width * sizeof(pixel) ); src += i_src_stride; dst += i_dst_stride; @@ -181,21 +162,23 @@ static void mc_copy( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_str } #define TAPFILTER(pix, d) ((pix)[x-2*d] + (pix)[x+3*d] - 5*((pix)[x-d] + (pix)[x+2*d]) + 20*((pix)[x] + (pix)[x+d])) -static void hpel_filter( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, - int stride, int width, int height, int16_t *buf ) +static void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src, + intptr_t stride, int width, int height, int16_t *buf ) { + const int pad = (BIT_DEPTH > 9) ? (-10 * PIXEL_MAX) : 0; for( int y = 0; y < height; y++ ) { for( int x = -2; x < width+3; x++ ) { int v = TAPFILTER(src,stride); - dstv[x] = x264_clip_uint8( (v + 16) >> 5 ); - buf[x+2] = v; + dstv[x] = x264_clip_pixel( (v + 16) >> 5 ); + /* transform v for storage in a 16-bit integer */ + buf[x+2] = v + pad; } for( int x = 0; x < width; x++ ) - dstc[x] = x264_clip_uint8( (TAPFILTER(buf+2,1) + 512) >> 10 ); + dstc[x] = x264_clip_pixel( (TAPFILTER(buf+2,1) - 32*pad + 512) >> 10 ); for( int x = 0; x < width; x++ ) - dsth[x] = x264_clip_uint8( (TAPFILTER(src,1) + 16) >> 5 ); + dsth[x] = x264_clip_pixel( (TAPFILTER(src,1) + 16) >> 5 ); dsth += stride; dstv += stride; dstc += stride; @@ -203,21 +186,21 @@ static void hpel_filter( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *s } } -static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; -static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2}; +static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; +static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2}; -static void mc_luma( uint8_t *dst, int i_dst_stride, - uint8_t *src[4], int i_src_stride, +static void mc_luma( pixel *dst, intptr_t i_dst_stride, + pixel *src[4], intptr_t i_src_stride, int mvx, int mvy, int i_width, int i_height, const x264_weight_t *weight ) { int qpel_idx = ((mvy&3)<<2) + (mvx&3); int offset = (mvy>>2)*i_src_stride + (mvx>>2); - uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; + pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); pixel_avg( dst, i_dst_stride, src1, i_src_stride, src2, i_src_stride, i_width, i_height ); if( weight->weightfn ) @@ -229,18 +212,18 @@ static void mc_luma( uint8_t *dst, int i_dst_stride, mc_copy( src1, i_src_stride, dst, i_dst_stride, i_width, i_height ); } -static uint8_t *get_ref( uint8_t *dst, int *i_dst_stride, - uint8_t *src[4], int i_src_stride, - int mvx, int mvy, - int i_width, int i_height, const x264_weight_t *weight ) +static pixel *get_ref( pixel *dst, intptr_t *i_dst_stride, + pixel *src[4], intptr_t i_src_stride, + int mvx, int mvy, + int i_width, int i_height, const x264_weight_t *weight ) { int qpel_idx = ((mvy&3)<<2) + (mvx&3); int offset = (mvy>>2)*i_src_stride + (mvx>>2); - uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; + pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); pixel_avg( dst, *i_dst_stride, src1, i_src_stride, src2, i_src_stride, i_width, i_height ); if( weight->weightfn ) @@ -260,12 +243,12 @@ static uint8_t *get_ref( uint8_t *dst, int *i_dst_stride, } /* full chroma mc (ie until 1/8 pixel)*/ -static void mc_chroma( uint8_t *dst, int i_dst_stride, - uint8_t *src, int i_src_stride, +static void mc_chroma( pixel *dstu, pixel *dstv, intptr_t i_dst_stride, + pixel *src, intptr_t i_src_stride, int mvx, int mvy, int i_width, int i_height ) { - uint8_t *srcp; + pixel *srcp; int d8x = mvx&0x07; int d8y = mvy&0x07; @@ -274,21 +257,27 @@ static void mc_chroma( uint8_t *dst, int i_dst_stride, int cC = (8-d8x)*d8y; int cD = d8x *d8y; - src += (mvy >> 3) * i_src_stride + (mvx >> 3); + src += (mvy >> 3) * i_src_stride + (mvx >> 3)*2; srcp = &src[i_src_stride]; for( int y = 0; y < i_height; y++ ) { for( int x = 0; x < i_width; x++ ) - dst[x] = ( cA*src[x] + cB*src[x+1] + cC*srcp[x] + cD*srcp[x+1] + 32 ) >> 6; - dst += i_dst_stride; + { + dstu[x] = ( cA*src[2*x] + cB*src[2*x+2] + + cC*srcp[2*x] + cD*srcp[2*x+2] + 32 ) >> 6; + dstv[x] = ( cA*src[2*x+1] + cB*src[2*x+3] + + cC*srcp[2*x+1] + cD*srcp[2*x+3] + 32 ) >> 6; + } + dstu += i_dst_stride; + dstv += i_dst_stride; src = srcp; srcp += i_src_stride; } } #define MC_COPY(W) \ -static void mc_copy_w##W( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int i_height ) \ +static void mc_copy_w##W( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int i_height ) \ { \ mc_copy( src, i_src, dst, i_dst, W, i_height ); \ } @@ -296,30 +285,118 @@ MC_COPY( 16 ) MC_COPY( 8 ) MC_COPY( 4 ) -void x264_plane_copy_c( uint8_t *dst, int i_dst, - uint8_t *src, int i_src, int w, int h) +void x264_plane_copy_c( pixel *dst, intptr_t i_dst, + pixel *src, intptr_t i_src, int w, int h ) { while( h-- ) { - memcpy( dst, src, w ); + memcpy( dst, src, w * sizeof(pixel) ); dst += i_dst; src += i_src; } } -static void prefetch_fenc_null( uint8_t *pix_y, int stride_y, - uint8_t *pix_uv, int stride_uv, int mb_x ) +void x264_plane_copy_interleave_c( pixel *dst, intptr_t i_dst, + pixel *srcu, intptr_t i_srcu, + pixel *srcv, intptr_t i_srcv, int w, int h ) +{ + for( int y=0; y> 10 ) & 0x03FF; + *(dstc0++) = ( *src0 >> 20 ) & 0x03FF; + src0++; + *(dsty0++) = *src0 & 0x03FF; + *(dstc0++) = ( *src0 >> 10 ) & 0x03FF; + *(dsty0++) = ( *src0 >> 20 ) & 0x03FF; + src0++; + } + + dsty += i_dsty; + dstc += i_dstc; + src += i_src; + } +} + +static void store_interleave_chroma( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height ) +{ + for( int y=0; yplane[0]; + pixel *src = frame->plane[0]; int i_stride = frame->i_stride[0]; int i_height = frame->i_lines[0]; int i_width = frame->i_width[0]; @@ -363,7 +440,7 @@ void x264_frame_init_lowres( x264_t *h, x264_frame_t *frame ) // duplicate last row and column so that their interpolation doesn't have to be special-cased for( int y = 0; y < i_height; y++ ) src[i_width+y*i_stride] = src[i_width-1+y*i_stride]; - memcpy( src+i_stride*i_height, src+i_stride*(i_height-1), i_width+1 ); + memcpy( src+i_stride*i_height, src+i_stride*(i_height-1), (i_width+1) * sizeof(pixel) ); h->mc.frame_init_lowres_core( src, frame->lowres[0], frame->lowres[1], frame->lowres[2], frame->lowres[3], i_stride, frame->i_stride_lowres, frame->i_width_lowres, frame->i_lines_lowres ); x264_frame_expand_border_lowres( frame ); @@ -379,13 +456,13 @@ void x264_frame_init_lowres( x264_t *h, x264_frame_t *frame ) frame->lowres_mvs[y][x][0][0] = 0x7FFF; } -static void frame_init_lowres_core( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, - int src_stride, int dst_stride, int width, int height ) +static void frame_init_lowres_core( pixel *src0, pixel *dst0, pixel *dsth, pixel *dstv, pixel *dstc, + intptr_t src_stride, intptr_t dst_stride, int width, int height ) { for( int y = 0; y < height; y++ ) { - uint8_t *src1 = src0+src_stride; - uint8_t *src2 = src1+src_stride; + pixel *src1 = src0+src_stride; + pixel *src2 = src1+src_stride; for( int x = 0; x>32)), "r"(y) - ); - return quotient; -} -#else -#define div_64_32(x,y) ((x)/(y)) -#endif - /* Estimate the total amount of influence on future quality that could be had if we * were to improve the reference samples used to inter predict any given macroblock. */ static void mbtree_propagate_cost( int *dst, uint16_t *propagate_in, uint16_t *intra_costs, - uint16_t *inter_costs, uint16_t *inv_qscales, int len ) + uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len ) { + float fps = *fps_factor / 256.f; for( int i = 0; i < len; i++ ) { - int propagate_amount = propagate_in[i] + ((intra_costs[i] * inv_qscales[i] + 128)>>8); - dst[i] = div_64_32((int64_t)propagate_amount * (intra_costs[i] - (inter_costs[i] & LOWRES_COST_MASK)), intra_costs[i]); + float intra_cost = intra_costs[i] * inv_qscales[i]; + float propagate_amount = propagate_in[i] + intra_cost*fps; + float propagate_num = intra_costs[i] - (inter_costs[i] & LOWRES_COST_MASK); + float propagate_denom = intra_costs[i]; + dst[i] = (int)(propagate_amount * propagate_num / propagate_denom + 0.5f); } } -void x264_mc_init( int cpu, x264_mc_functions_t *pf ) +void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent ) { pf->mc_luma = mc_luma; pf->get_ref = get_ref; + pf->mc_chroma = mc_chroma; pf->avg[PIXEL_16x16]= pixel_avg_16x16; @@ -442,9 +509,11 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf ) pf->avg[PIXEL_8x16] = pixel_avg_8x16; pf->avg[PIXEL_8x8] = pixel_avg_8x8; pf->avg[PIXEL_8x4] = pixel_avg_8x4; + pf->avg[PIXEL_4x16] = pixel_avg_4x16; pf->avg[PIXEL_4x8] = pixel_avg_4x8; pf->avg[PIXEL_4x4] = pixel_avg_4x4; pf->avg[PIXEL_4x2] = pixel_avg_4x2; + pf->avg[PIXEL_2x8] = pixel_avg_2x8; pf->avg[PIXEL_2x4] = pixel_avg_2x4; pf->avg[PIXEL_2x2] = pixel_avg_2x2; @@ -458,10 +527,20 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf ) pf->copy[PIXEL_8x8] = mc_copy_w8; pf->copy[PIXEL_4x4] = mc_copy_w4; + pf->store_interleave_chroma = store_interleave_chroma; + pf->load_deinterleave_chroma_fenc = load_deinterleave_chroma_fenc; + pf->load_deinterleave_chroma_fdec = load_deinterleave_chroma_fdec; + pf->plane_copy = x264_plane_copy_c; + pf->plane_copy_interleave = x264_plane_copy_interleave_c; + pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c; + pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c; + pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_c; + pf->hpel_filter = hpel_filter; - pf->prefetch_fenc = prefetch_fenc_null; + pf->prefetch_fenc_420 = prefetch_fenc_null; + pf->prefetch_fenc_422 = prefetch_fenc_null; pf->prefetch_ref = prefetch_ref_null; pf->memcpy_aligned = memcpy; pf->memzero_aligned = memzero_aligned; @@ -474,39 +553,63 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf ) pf->mbtree_propagate_cost = mbtree_propagate_cost; -#ifdef HAVE_MMX +#if HAVE_MMX x264_mc_init_mmx( cpu, pf ); #endif -#ifdef HAVE_ALTIVEC +#if HAVE_ALTIVEC if( cpu&X264_CPU_ALTIVEC ) x264_mc_altivec_init( pf ); #endif -#ifdef HAVE_ARMV6 +#if HAVE_ARMV6 x264_mc_init_arm( cpu, pf ); #endif + + if( cpu_independent ) + pf->mbtree_propagate_cost = mbtree_propagate_cost; } void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end ) { - const int b_interlaced = h->sh.b_mbaff; - const int stride = frame->i_stride[0] << b_interlaced; - const int width = frame->i_width[0]; - int start = (mb_y*16 >> b_interlaced) - 8; // buffer = 4 for deblock + 3 for 6tap, rounded to 8 - int height = ((b_end ? frame->i_lines[0] : mb_y*16) >> b_interlaced) + 8; - int offs = start*stride - 8; // buffer = 3 for 6tap, aligned to 8 for simd + const int b_interlaced = PARAM_INTERLACED; + int start = mb_y*16 - 8; // buffer = 4 for deblock + 3 for 6tap, rounded to 8 + int height = (b_end ? frame->i_lines[0] + 16*PARAM_INTERLACED : (mb_y+b_interlaced)*16) + 8; if( mb_y & b_interlaced ) return; - for( int y = 0; y <= b_interlaced; y++, offs += frame->i_stride[0] ) + for( int p = 0; p < (CHROMA444 ? 3 : 1); p++ ) { - h->mc.hpel_filter( - frame->filtered[1] + offs, - frame->filtered[2] + offs, - frame->filtered[3] + offs, - frame->plane[0] + offs, - stride, width + 16, height - start, - h->scratch_buffer ); + int stride = frame->i_stride[p]; + const int width = frame->i_width[p]; + int offs = start*stride - 8; // buffer = 3 for 6tap, aligned to 8 for simd + + if( !b_interlaced || h->mb.b_adaptive_mbaff ) + h->mc.hpel_filter( + frame->filtered[p][1] + offs, + frame->filtered[p][2] + offs, + frame->filtered[p][3] + offs, + frame->plane[p] + offs, + stride, width + 16, height - start, + h->scratch_buffer ); + + if( b_interlaced ) + { + /* MC must happen between pixels in the same field. */ + stride = frame->i_stride[p] << 1; + start = (mb_y*16 >> 1) - 8; + int height_fld = ((b_end ? frame->i_lines[p] : mb_y*16) >> 1) + 8; + offs = start*stride - 8; + for( int i = 0; i < 2; i++, offs += frame->i_stride[p] ) + { + h->mc.hpel_filter( + frame->filtered_fld[p][1] + offs, + frame->filtered_fld[p][2] + offs, + frame->filtered_fld[p][3] + offs, + frame->plane_fld[p] + offs, + stride, width + 16, height_fld - start, + h->scratch_buffer ); + } + } } /* generate integral image: @@ -516,6 +619,7 @@ void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end ) if( frame->integral ) { + int stride = frame->i_stride[0]; if( start < 0 ) { memset( frame->integral - PADV * stride - PADH, 0, stride * sizeof(uint16_t) ); @@ -525,7 +629,7 @@ void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end ) height += PADV-9; for( int y = start; y < height; y++ ) { - uint8_t *pix = frame->plane[0] + y * stride - PADH; + pixel *pix = frame->plane[0] + y * stride - PADH; uint16_t *sum8 = frame->integral + (y+1) * stride - PADH; uint16_t *sum4; if( h->frames.b_have_sub8x8_esa )