X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=common%2Fframe.c;h=60762201c3b262a9666cebb52919e19e1c1b3f10;hb=71f11146131d1804311d86535a6aa7d0ff777501;hp=ca6aba9093b14197ce232dec300b97670bd63402;hpb=ac411e297aaaec200b33b6dab082e12c55c3b7ef;p=x264 diff --git a/common/frame.c b/common/frame.c index ca6aba90..60762201 100644 --- a/common/frame.c +++ b/common/frame.c @@ -21,22 +21,22 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. *****************************************************************************/ -#include #include #include #include "common.h" -#include "macroblock.h" x264_frame_t *x264_frame_new( x264_t *h ) { x264_frame_t *frame = x264_malloc( sizeof( x264_frame_t ) ); - int i; + int i, j; int i_mb_count = h->mb.i_mb_count; int i_stride; int i_lines; + memset( frame, 0, sizeof(x264_frame_t) ); + /* allocate frame data (+64 for extra data for me) */ i_stride = ( ( h->param.i_width + 15 )&0xfffff0 )+ 64; i_lines = ( ( h->param.i_height + 15 )&0xfffff0 ); @@ -73,16 +73,36 @@ x264_frame_t *x264_frame_new( x264_t *h ) ( frame->i_lines[0] + 64 ) ); frame->filtered[i+1] = ((uint8_t*)frame->buffer[4+i]) + - frame->i_stride[0] * 32 + 32; + frame->i_stride[0] * 32 + 32; + } + + if( h->frames.b_have_lowres ) + { + frame->i_stride_lowres = frame->i_stride[0]/2 + 32; + frame->i_lines_lowres = frame->i_lines[0]/2; + for( i = 0; i < 4; i++ ) + { + frame->buffer[7+i] = x264_malloc( frame->i_stride_lowres * + ( frame->i_lines[0]/2 + 64 ) ); + frame->lowres[i] = ((uint8_t*)frame->buffer[7+i]) + + frame->i_stride_lowres * 32 + 32; + } } + if( h->param.analyse.i_me_method == X264_ME_ESA ) + { + frame->buffer[11] = x264_malloc( frame->i_stride[0] * (frame->i_lines[0] + 64) * sizeof(uint16_t) ); + frame->integral = (uint16_t*)frame->buffer[11] + frame->i_stride[0] * 32 + 32; + } frame->i_poc = -1; frame->i_type = X264_TYPE_AUTO; frame->i_qpplus1 = 0; frame->i_pts = -1; frame->i_frame = -1; + frame->i_frame_num = -1; + frame->mb_type= x264_malloc( i_mb_count * sizeof( int8_t) ); frame->mv[0] = x264_malloc( 2*16 * i_mb_count * sizeof( int16_t ) ); frame->ref[0] = x264_malloc( 4 * i_mb_count * sizeof( int8_t ) ); if( h->param.i_bframe ) @@ -96,20 +116,28 @@ x264_frame_t *x264_frame_new( x264_t *h ) frame->ref[1] = NULL; } + frame->i_row_bits = x264_malloc( i_lines/16 * sizeof( int ) ); + frame->i_row_qp = x264_malloc( i_lines/16 * sizeof( int ) ); + for( i = 0; i < h->param.i_bframe + 2; i++ ) + for( j = 0; j < h->param.i_bframe + 2; j++ ) + frame->i_row_satds[i][j] = x264_malloc( i_lines/16 * sizeof( int ) ); + return frame; } void x264_frame_delete( x264_frame_t *frame ) { - int i; + int i, j; for( i = 0; i < frame->i_plane; i++ ) - { x264_free( frame->buffer[i] ); - } - for( i = 4; i < 7; i++ ) /* filtered planes */ - { + for( i = 4; i < 12; i++ ) /* filtered planes */ x264_free( frame->buffer[i] ); - } + for( i = 0; i < X264_BFRAME_MAX+2; i++ ) + for( j = 0; j < X264_BFRAME_MAX+2; j++ ) + x264_free( frame->i_row_satds[i][j] ); + x264_free( frame->i_row_bits ); + x264_free( frame->i_row_qp ); + x264_free( frame->mb_type ); x264_free( frame->mv[0] ); x264_free( frame->mv[1] ); x264_free( frame->ref[0] ); @@ -158,103 +186,85 @@ void x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src +static void plane_expand_border( uint8_t *pix, int i_stride, int i_height, int i_pad ) +{ +#define PPIXEL(x, y) ( pix + (x) + (y)*i_stride ) + const int i_width = i_stride - 2*i_pad; + int y; + + for( y = 0; y < i_height; y++ ) + { + /* left band */ + memset( PPIXEL(-i_pad, y), PPIXEL(0, y)[0], i_pad ); + /* right band */ + memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_pad ); + } + /* upper band */ + for( y = 0; y < i_pad; y++ ) + memcpy( PPIXEL(-i_pad, -y-1), PPIXEL(-i_pad, 0), i_stride ); + /* lower band */ + for( y = 0; y < i_pad; y++ ) + memcpy( PPIXEL(-i_pad, i_height+y), PPIXEL(-i_pad, i_height-1), i_stride ); +#undef PPIXEL +} + void x264_frame_expand_border( x264_frame_t *frame ) { - int w; - int i, y; + int i; for( i = 0; i < frame->i_plane; i++ ) { -#define PPIXEL(x, y) ( frame->plane[i] + (x) +(y)*frame->i_stride[i] ) - w = ( i == 0 ) ? 32 : 16; - - for( y = 0; y < w; y++ ) - { - /* upper band */ - memcpy( PPIXEL(0,-y-1), PPIXEL(0,0), frame->i_stride[i] - 2 * w); - /* up left corner */ - memset( PPIXEL(-w,-y-1 ), PPIXEL(0,0)[0], w ); - /* up right corner */ - memset( PPIXEL(frame->i_stride[i] - 2*w,-y-1), PPIXEL( frame->i_stride[i]-1-2*w,0)[0], w ); - - /* lower band */ - memcpy( PPIXEL(0, frame->i_lines[i]+y), PPIXEL(0,frame->i_lines[i]-1), frame->i_stride[i] - 2 * w ); - /* low left corner */ - memset( PPIXEL(-w, frame->i_lines[i]+y), PPIXEL(0,frame->i_lines[i]-1)[0], w); - /* low right corner */ - memset( PPIXEL(frame->i_stride[i]-2*w, frame->i_lines[i]+y), PPIXEL(frame->i_stride[i]-1-2*w,frame->i_lines[i]-1)[0], w); - - } - for( y = 0; y < frame->i_lines[i]; y++ ) - { - /* left band */ - memset( PPIXEL( -w, y ), PPIXEL( 0, y )[0], w ); - /* right band */ - memset( PPIXEL( frame->i_stride[i]-2*w, y ), PPIXEL( frame->i_stride[i] - 1-2*w, y )[0], w ); - } -#undef PPIXEL + int i_pad = i ? 16 : 32; + plane_expand_border( frame->plane[i], frame->i_stride[i], frame->i_lines[i], i_pad ); } } void x264_frame_expand_border_filtered( x264_frame_t *frame ) { - /* during filtering, 8 extra pixels were filtered on each edge. we want to expand border from the last filtered pixel */ - int w; - int i, y; + int i; for( i = 1; i < 4; i++ ) + plane_expand_border( frame->filtered[i] - 8*frame->i_stride[0] - 8, frame->i_stride[0], frame->i_lines[0]+2*8, 24 ); +} + +void x264_frame_expand_border_lowres( x264_frame_t *frame ) +{ + int i; + for( i = 0; i < 4; i++ ) + plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_lines_lowres, 32 ); +} + +void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame ) +{ + int i, y; + for( i = 0; i < frame->i_plane; i++ ) { -#define PPIXEL(x, y) ( frame->filtered[i] + (x) +(y)*frame->i_stride[0] ) - w = 32; + int i_subsample = i ? 1 : 0; + int i_width = h->param.i_width >> i_subsample; + int i_height = h->param.i_height >> i_subsample; + int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample; + int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample; - for( y = 8; y < w; y++ ) + if( i_padx ) { - /* upper band */ - memcpy( PPIXEL(-8,-y-1), PPIXEL(-8,-8), frame->i_stride[0] - 2 * w + 16 ); - /* up left corner */ - memset( PPIXEL(-w,-y-1), PPIXEL(-8,-8)[0], w - 8 ); - /* up right corner */ - memset( PPIXEL(frame->i_stride[0] - 2*w + 8,-y-1), PPIXEL( frame->i_stride[0]-1-2*w+8,-8)[0], w - 8 ); - - /* lower band */ - memcpy( PPIXEL(-8, frame->i_lines[0]+y), PPIXEL(-8,frame->i_lines[0]+7), frame->i_stride[0] - 2 * w + 16 ); - /* low left corner */ - memset( PPIXEL(-w, frame->i_lines[0]+y), PPIXEL(-8,frame->i_lines[0]+7)[0], w - 8); - /* low right corner */ - memset( PPIXEL(frame->i_stride[0]-2*w+8, frame->i_lines[0]+y), PPIXEL(frame->i_stride[0]+7-2*w,frame->i_lines[0]+7)[0], w-8); - + for( y = 0; y < i_height; y++ ) + memset( &frame->plane[i][y*frame->i_stride[i] + i_width], + frame->plane[i][y*frame->i_stride[i] + i_width - 1], + i_padx ); } - for( y = -8; y < frame->i_lines[0]+8; y++ ) + if( i_pady ) { - /* left band */ - memset( PPIXEL( -w, y ), PPIXEL( -8, y )[0], w - 8 ); - /* right band */ - memset( PPIXEL( frame->i_stride[0]-2*w + 8, y ), PPIXEL( frame->i_stride[0] + 7 - 2*w, y )[0], w - 8 ); + for( y = i_height; y < i_height + i_pady; y++ ); + memcpy( &frame->plane[i][y*frame->i_stride[i]], + &frame->plane[i][(i_height-1)*frame->i_stride[i]], + i_width + i_padx ); } -#undef PPIXEL } } -/* FIXME theses tables are duplicated with the ones in macroblock.c */ -static const uint8_t block_idx_xy[4][4] = -{ - { 0, 2, 8, 10}, - { 1, 3, 9, 11}, - { 4, 6, 12, 14}, - { 5, 7, 13, 15} -}; -static const int i_chroma_qp_table[52] = -{ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 29, 30, 31, 32, 32, 33, 34, 34, 35, 35, - 36, 36, 37, 37, 37, 38, 38, 38, 39, 39, - 39, 39 -}; +/* Deblocking filter */ -/* Deblocking filter (p153) */ static const int i_alpha_table[52] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -295,355 +305,191 @@ static inline int clip_uint8( int a ) return a; } -static inline void deblocking_filter_edgev( x264_t *h, uint8_t *pix, int i_pix_stride, int bS[4], int i_QP ) +static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 ) { int i, d; - const int i_index_a = x264_clip3( i_QP + h->sh.i_alpha_c0_offset, 0, 51 ); - const int alpha = i_alpha_table[i_index_a]; - const int beta = i_beta_table[x264_clip3( i_QP + h->sh.i_beta_offset, 0, 51 )]; - - for( i = 0; i < 4; i++ ) - { - if( bS[i] == 0 ) - { - pix += 4 * i_pix_stride; + for( i = 0; i < 4; i++ ) { + if( tc0[i] < 0 ) { + pix += 4*ystride; continue; } - - if( bS[i] < 4 ) - { - const int tc0 = i_tc0_table[i_index_a][bS[i] - 1]; - - /* 4px edge length */ - for( d = 0; d < 4; d++ ) - { - const int p0 = pix[-1]; - const int p1 = pix[-2]; - const int p2 = pix[-3]; - const int q0 = pix[0]; - const int q1 = pix[1]; - const int q2 = pix[2]; - - if( abs( p0 - q0 ) < alpha && - abs( p1 - p0 ) < beta && - abs( q1 - q0 ) < beta ) - { - int tc = tc0; - int i_delta; - - if( abs( p2 - p0 ) < beta ) - { - pix[-2] = p1 + x264_clip3( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 ); - tc++; - } - if( abs( q2 - q0 ) < beta ) - { - pix[1] = q1 + x264_clip3( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 ); - tc++; - } - - i_delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); - pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */ - pix[0] = clip_uint8( q0 - i_delta ); /* q0' */ + for( d = 0; d < 4; d++ ) { + const int p2 = pix[-3*xstride]; + const int p1 = pix[-2*xstride]; + const int p0 = pix[-1*xstride]; + const int q0 = pix[ 0*xstride]; + const int q1 = pix[ 1*xstride]; + const int q2 = pix[ 2*xstride]; + + if( abs( p0 - q0 ) < alpha && + abs( p1 - p0 ) < beta && + abs( q1 - q0 ) < beta ) { + + int tc = tc0[i]; + int delta; + + if( abs( p2 - p0 ) < beta ) { + pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] ); + tc++; } - pix += i_pix_stride; - } - } - else - { - /* 4px edge length */ - for( d = 0; d < 4; d++ ) - { - const int p0 = pix[-1]; - const int p1 = pix[-2]; - const int p2 = pix[-3]; - - const int q0 = pix[0]; - const int q1 = pix[1]; - const int q2 = pix[2]; - - if( abs( p0 - q0 ) < alpha && - abs( p1 - p0 ) < beta && - abs( q1 - q0 ) < beta ) - { - if( abs( p0 - q0 ) < (( alpha >> 2 ) + 2 ) ) - { - if( abs( p2 - p0 ) < beta ) - { - const int p3 = pix[-4]; - /* p0', p1', p2' */ - pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; - pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; - pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; - } - else - { - /* p0' */ - pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; - } - if( abs( q2 - q0 ) < beta ) - { - const int q3 = pix[3]; - /* q0', q1', q2' */ - pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; - pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; - pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; - } - else - { - /* q0' */ - pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; - } - } - else - { - /* p0', q0' */ - pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; - pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; - } + if( abs( q2 - q0 ) < beta ) { + pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] ); + tc++; } - pix += i_pix_stride; + + delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); + pix[-1*xstride] = clip_uint8( p0 + delta ); /* p0' */ + pix[ 0*xstride] = clip_uint8( q0 - delta ); /* q0' */ } + pix += ystride; } } } +static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) +{ + deblock_luma_c( pix, stride, 1, alpha, beta, tc0 ); +} +static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) +{ + deblock_luma_c( pix, 1, stride, alpha, beta, tc0 ); +} -static inline void deblocking_filter_edgecv( x264_t *h, uint8_t *pix, int i_pix_stride, int bS[4], int i_QP ) +static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 ) { int i, d; - const int i_index_a = x264_clip3( i_QP + h->sh.i_alpha_c0_offset, 0, 51 ); - const int alpha = i_alpha_table[i_index_a]; - const int beta = i_beta_table[x264_clip3( i_QP + h->sh.i_beta_offset, 0, 51 )]; - - for( i = 0; i < 4; i++ ) - { - if( bS[i] == 0 ) - { - pix += 2 * i_pix_stride; + for( i = 0; i < 4; i++ ) { + const int tc = tc0[i]; + if( tc <= 0 ) { + pix += 2*ystride; continue; } - - if( bS[i] < 4 ) - { - const int tc = i_tc0_table[i_index_a][bS[i] - 1] + 1; - /* 2px edge length (because we use same bS than the one for luma) */ - for( d = 0; d < 2; d++ ) - { - const int p0 = pix[-1]; - const int p1 = pix[-2]; - const int q0 = pix[0]; - const int q1 = pix[1]; - - if( abs( p0 - q0 ) < alpha && - abs( p1 - p0 ) < beta && - abs( q1 - q0 ) < beta ) - { - const int i_delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); - - pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */ - pix[0] = clip_uint8( q0 - i_delta ); /* q0' */ - } - pix += i_pix_stride; - } - } - else - { - /* 2px edge length (because we use same bS than the one for luma) */ - for( d = 0; d < 2; d++ ) - { - const int p0 = pix[-1]; - const int p1 = pix[-2]; - const int q0 = pix[0]; - const int q1 = pix[1]; - - if( abs( p0 - q0 ) < alpha && - abs( p1 - p0 ) < beta && - abs( q1 - q0 ) < beta ) - { - pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ - pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ - } - pix += i_pix_stride; + for( d = 0; d < 2; d++ ) { + const int p1 = pix[-2*xstride]; + const int p0 = pix[-1*xstride]; + const int q0 = pix[ 0*xstride]; + const int q1 = pix[ 1*xstride]; + + if( abs( p0 - q0 ) < alpha && + abs( p1 - p0 ) < beta && + abs( q1 - q0 ) < beta ) { + + int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); + pix[-1*xstride] = clip_uint8( p0 + delta ); /* p0' */ + pix[ 0*xstride] = clip_uint8( q0 - delta ); /* q0' */ } + pix += ystride; } } } +static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) +{ + deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 ); +} +static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) +{ + deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 ); +} -static inline void deblocking_filter_edgeh( x264_t *h, uint8_t *pix, int i_pix_stride, int bS[4], int i_QP ) +static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta ) { - int i, d; - const int i_index_a = x264_clip3( i_QP + h->sh.i_alpha_c0_offset, 0, 51 ); - const int alpha = i_alpha_table[i_index_a]; - const int beta = i_beta_table[x264_clip3( i_QP + h->sh.i_beta_offset, 0, 51 )]; - - int i_pix_next = i_pix_stride; - - for( i = 0; i < 4; i++ ) - { - if( bS[i] == 0 ) - { - pix += 4; - continue; - } - - if( bS[i] < 4 ) - { - const int tc0 = i_tc0_table[i_index_a][bS[i] - 1]; - /* 4px edge length */ - for( d = 0; d < 4; d++ ) - { - const int p0 = pix[-i_pix_next]; - const int p1 = pix[-2*i_pix_next]; - const int p2 = pix[-3*i_pix_next]; - const int q0 = pix[0]; - const int q1 = pix[1*i_pix_next]; - const int q2 = pix[2*i_pix_next]; - - if( abs( p0 - q0 ) < alpha && - abs( p1 - p0 ) < beta && - abs( q1 - q0 ) < beta ) + int d; + for( d = 0; d < 16; d++ ) { + const int p2 = pix[-3*xstride]; + const int p1 = pix[-2*xstride]; + const int p0 = pix[-1*xstride]; + const int q0 = pix[ 0*xstride]; + const int q1 = pix[ 1*xstride]; + const int q2 = pix[ 2*xstride]; + + if( abs( p0 - q0 ) < alpha && + abs( p1 - p0 ) < beta && + abs( q1 - q0 ) < beta ) { + + if(abs( p0 - q0 ) < ((alpha >> 2) + 2) ){ + if( abs( p2 - p0 ) < beta) { - int tc = tc0; - int i_delta; - - if( abs( p2 - p0 ) < beta ) - { - pix[-2*i_pix_next] = p1 + x264_clip3( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 ); - tc++; - } - if( abs( q2 - q0 ) < beta ) - { - pix[i_pix_next] = q1 + x264_clip3( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 ); - tc++; - } - - i_delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); - pix[-i_pix_next] = clip_uint8( p0 + i_delta ); /* p0' */ - pix[0] = clip_uint8( q0 - i_delta ); /* q0' */ + const int p3 = pix[-4*xstride]; + /* p0', p1', p2' */ + pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; + pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; + pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; + } else { + /* p0' */ + pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; } - pix++; - } - } - else - { - /* 4px edge length */ - for( d = 0; d < 4; d++ ) - { - const int p0 = pix[-i_pix_next]; - const int p1 = pix[-2*i_pix_next]; - const int p2 = pix[-3*i_pix_next]; - const int q0 = pix[0]; - const int q1 = pix[1*i_pix_next]; - const int q2 = pix[2*i_pix_next]; - - if( abs( p0 - q0 ) < alpha && - abs( p1 - p0 ) < beta && - abs( q1 - q0 ) < beta ) + if( abs( q2 - q0 ) < beta) { - const int p3 = pix[-4*i_pix_next]; - const int q3 = pix[ 3*i_pix_next]; - - if( abs( p0 - q0 ) < (( alpha >> 2 ) + 2 ) ) - { - if( abs( p2 - p0 ) < beta ) - { - /* p0', p1', p2' */ - pix[-1*i_pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; - pix[-2*i_pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; - pix[-3*i_pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; - } - else - { - /* p0' */ - pix[-1*i_pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2; - } - if( abs( q2 - q0 ) < beta ) - { - /* q0', q1', q2' */ - pix[0*i_pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; - pix[1*i_pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; - pix[2*i_pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; - } - else - { - /* q0' */ - pix[0*i_pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2; - } - } - else - { - /* p0' */ - pix[-1*i_pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2; - /* q0' */ - pix[0*i_pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2; - } + const int q3 = pix[3*xstride]; + /* q0', q1', q2' */ + pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; + pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; + pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; + } else { + /* q0' */ + pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; } - pix++; + }else{ + /* p0', q0' */ + pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; + pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; } - } + pix += ystride; } } +static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta ) +{ + deblock_luma_intra_c( pix, stride, 1, alpha, beta ); +} +static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta ) +{ + deblock_luma_intra_c( pix, 1, stride, alpha, beta ); +} -static inline void deblocking_filter_edgech( x264_t *h, uint8_t *pix, int i_pix_stride, int bS[4], int i_QP ) -{ - int i, d; - const int i_index_a = x264_clip3( i_QP + h->sh.i_alpha_c0_offset, 0, 51 ); - const int alpha = i_alpha_table[i_index_a]; - const int beta = i_beta_table[x264_clip3( i_QP + h->sh.i_beta_offset, 0, 51 )]; - - int i_pix_next = i_pix_stride; - - for( i = 0; i < 4; i++ ) - { - if( bS[i] == 0 ) - { - pix += 2; - continue; +static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta ) +{ + int d; + for( d = 0; d < 8; d++ ) { + const int p1 = pix[-2*xstride]; + const int p0 = pix[-1*xstride]; + const int q0 = pix[ 0*xstride]; + const int q1 = pix[ 1*xstride]; + + if( abs( p0 - q0 ) < alpha && + abs( p1 - p0 ) < beta && + abs( q1 - q0 ) < beta ) { + + pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2; /* p0' */ + pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2; /* q0' */ } - if( bS[i] < 4 ) - { - int tc = i_tc0_table[i_index_a][bS[i] - 1] + 1; - /* 2px edge length (see deblocking_filter_edgecv) */ - for( d = 0; d < 2; d++ ) - { - const int p0 = pix[-1*i_pix_next]; - const int p1 = pix[-2*i_pix_next]; - const int q0 = pix[0]; - const int q1 = pix[1*i_pix_next]; - - if( abs( p0 - q0 ) < alpha && - abs( p1 - p0 ) < beta && - abs( q1 - q0 ) < beta ) - { - int i_delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); - pix[-i_pix_next] = clip_uint8( p0 + i_delta ); /* p0' */ - pix[0] = clip_uint8( q0 - i_delta ); /* q0' */ - } - pix++; - } - } - else - { - /* 2px edge length (see deblocking_filter_edgecv) */ - for( d = 0; d < 2; d++ ) - { - const int p0 = pix[-1*i_pix_next]; - const int p1 = pix[-2*i_pix_next]; - const int q0 = pix[0]; - const int q1 = pix[1*i_pix_next]; - - if( abs( p0 - q0 ) < alpha && - abs( p1 - p0 ) < beta && - abs( q1 - q0 ) < beta ) - { - pix[-i_pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ - pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ - } - pix++; - } - } + pix += ystride; + } +} +static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta ) +{ + deblock_chroma_intra_c( pix, stride, 1, alpha, beta ); +} +static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta ) +{ + deblock_chroma_intra_c( pix, 1, stride, alpha, beta ); +} + +static inline void deblock_edge( x264_t *h, uint8_t *pix, int i_stride, int bS[4], int i_qp, int b_chroma, + x264_deblock_inter_t pf_inter, x264_deblock_intra_t pf_intra ) +{ + int i; + const int index_a = x264_clip3( i_qp + h->sh.i_alpha_c0_offset, 0, 51 ); + const int alpha = i_alpha_table[index_a]; + const int beta = i_beta_table[x264_clip3( i_qp + h->sh.i_beta_offset, 0, 51 )]; + + if( bS[0] < 4 ) { + int8_t tc[4]; + for(i=0; i<4; i++) + tc[i] = (bS[i] ? i_tc0_table[index_a][bS[i] - 1] : -1) + b_chroma; + pf_inter( pix, i_stride, alpha, beta, tc ); + } else { + pf_intra( pix, i_stride, alpha, beta ); } } @@ -659,26 +505,40 @@ void x264_frame_deblocking_filter( x264_t *h, int i_slice_type ) const int mb_xy = mb_y * h->mb.i_mb_stride + mb_x; const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x; const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x; - int i_edge; - int i_dir; + const int b_8x8_transform = h->mb.mb_transform_size[mb_xy]; + const int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4; + int i_edge, i_dir; + + /* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of + * entropy coding, but per 64 coeffs for the purpose of deblocking */ + if( !h->param.b_cabac && b_8x8_transform ) + { + uint32_t *nnz = (uint32_t*)h->mb.non_zero_count[mb_xy]; + if( nnz[0] ) nnz[0] = 0x01010101; + if( nnz[1] ) nnz[1] = 0x01010101; + if( nnz[2] ) nnz[2] = 0x01010101; + if( nnz[3] ) nnz[3] = 0x01010101; + } /* i_dir == 0 -> vertical edge * i_dir == 1 -> horizontal edge */ for( i_dir = 0; i_dir < 2; i_dir++ ) { - int i_start; + int i_start = (i_dir ? mb_y : mb_x) ? 0 : 1; int i_qp, i_qpn; - i_start = (( i_dir == 0 && mb_x != 0 ) || ( i_dir == 1 && mb_y != 0 ) ) ? 0 : 1; - - for( i_edge = i_start; i_edge < 4; i_edge++ ) + for( i_edge = i_start; i_edge < i_edge_end; i_edge++ ) { - int mbn_xy = i_edge > 0 ? mb_xy : ( i_dir == 0 ? mb_xy - 1 : mb_xy - h->mb.i_mb_stride ); - int mbn_8x8 = i_edge > 0 ? mb_8x8 : ( i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8 ); - int mbn_4x4 = i_edge > 0 ? mb_4x4 : ( i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4 ); - + int mbn_xy, mbn_8x8, mbn_4x4; int bS[4]; /* filtering strength */ + if( b_8x8_transform && (i_edge&1) ) + continue; + + mbn_xy = i_edge > 0 ? mb_xy : ( i_dir == 0 ? mb_xy - 1 : mb_xy - h->mb.i_mb_stride ); + mbn_8x8 = i_edge > 0 ? mb_8x8 : ( i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8 ); + mbn_4x4 = i_edge > 0 ? mb_4x4 : ( i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4 ); + /* *** Get bS for each 4px for the current edge *** */ if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy] ) ) { @@ -736,33 +596,39 @@ void x264_frame_deblocking_filter( x264_t *h, int i_slice_type ) if( i_dir == 0 ) { /* vertical edge */ - deblocking_filter_edgev( h, &h->fdec->plane[0][16 * mb_y * h->fdec->i_stride[0]+ 16 * mb_x + 4 * i_edge], - h->fdec->i_stride[0], bS, (i_qp+i_qpn+1) >> 1); - if( (i_edge % 2) == 0 ) + deblock_edge( h, &h->fdec->plane[0][16*mb_y * h->fdec->i_stride[0] + 16*mb_x + 4*i_edge], + h->fdec->i_stride[0], bS, (i_qp+i_qpn+1) >> 1, 0, + h->loopf.deblock_h_luma, h->loopf.deblock_h_luma_intra ); + if( !(i_edge & 1) ) { /* U/V planes */ int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] + i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1; - deblocking_filter_edgecv( h, &h->fdec->plane[1][8*(mb_y*h->fdec->i_stride[1]+mb_x)+i_edge*2], - h->fdec->i_stride[1], bS, i_qpc ); - deblocking_filter_edgecv( h, &h->fdec->plane[2][8*(mb_y*h->fdec->i_stride[2]+mb_x)+i_edge*2], - h->fdec->i_stride[2], bS, i_qpc ); + deblock_edge( h, &h->fdec->plane[1][8*(mb_y*h->fdec->i_stride[1]+mb_x)+2*i_edge], + h->fdec->i_stride[1], bS, i_qpc, 1, + h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra ); + deblock_edge( h, &h->fdec->plane[2][8*(mb_y*h->fdec->i_stride[2]+mb_x)+2*i_edge], + h->fdec->i_stride[2], bS, i_qpc, 1, + h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra ); } } else { /* horizontal edge */ - deblocking_filter_edgeh( h, &h->fdec->plane[0][(16*mb_y + 4 * i_edge) * h->fdec->i_stride[0]+ 16 * mb_x], - h->fdec->i_stride[0], bS, (i_qp+i_qpn+1) >> 1 ); + deblock_edge( h, &h->fdec->plane[0][(16*mb_y + 4*i_edge) * h->fdec->i_stride[0] + 16*mb_x], + h->fdec->i_stride[0], bS, (i_qp+i_qpn+1) >> 1, 0, + h->loopf.deblock_v_luma, h->loopf.deblock_v_luma_intra ); /* U/V planes */ - if( ( i_edge % 2 ) == 0 ) + if( !(i_edge & 1) ) { int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] + i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1; - deblocking_filter_edgech( h, &h->fdec->plane[1][8*(mb_y*h->fdec->i_stride[1]+mb_x)+i_edge*2*h->fdec->i_stride[1]], - h->fdec->i_stride[1], bS, i_qpc ); - deblocking_filter_edgech( h, &h->fdec->plane[2][8*(mb_y*h->fdec->i_stride[2]+mb_x)+i_edge*2*h->fdec->i_stride[2]], - h->fdec->i_stride[2], bS, i_qpc ); + deblock_edge( h, &h->fdec->plane[1][8*(mb_y*h->fdec->i_stride[1]+mb_x)+2*i_edge*h->fdec->i_stride[1]], + h->fdec->i_stride[1], bS, i_qpc, 1, + h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra ); + deblock_edge( h, &h->fdec->plane[2][8*(mb_y*h->fdec->i_stride[2]+mb_x)+2*i_edge*h->fdec->i_stride[2]], + h->fdec->i_stride[2], bS, i_qpc, 1, + h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra ); } } } @@ -778,6 +644,57 @@ void x264_frame_deblocking_filter( x264_t *h, int i_slice_type ) } } +#ifdef HAVE_MMXEXT +void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ); +void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ); +void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta ); +void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta ); +#endif + +#ifdef ARCH_X86_64 +void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ); +void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ); +#elif defined( HAVE_MMXEXT ) +void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ); +void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ); + +void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) +{ + x264_deblock_v8_luma_mmxext( pix, stride, alpha, beta, tc0 ); + x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 ); +} +#endif +void x264_deblock_init( int cpu, x264_deblock_function_t *pf ) +{ + pf->deblock_v_luma = deblock_v_luma_c; + pf->deblock_h_luma = deblock_h_luma_c; + pf->deblock_v_chroma = deblock_v_chroma_c; + pf->deblock_h_chroma = deblock_h_chroma_c; + pf->deblock_v_luma_intra = deblock_v_luma_intra_c; + pf->deblock_h_luma_intra = deblock_h_luma_intra_c; + pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c; + pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c; + +#ifdef HAVE_MMXEXT + if( cpu&X264_CPU_MMXEXT ) + { + pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext; + pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext; + pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext; + pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext; +#ifdef ARCH_X86_64 + if( cpu&X264_CPU_SSE2 ) + { + pf->deblock_v_luma = x264_deblock_v_luma_sse2; + pf->deblock_h_luma = x264_deblock_h_luma_sse2; + } +#else + pf->deblock_v_luma = x264_deblock_v_luma_mmxext; + pf->deblock_h_luma = x264_deblock_h_luma_mmxext; +#endif + } +#endif +}