From 1f0e78d8ea5b0d260f8497d4817b1962f6b0894d Mon Sep 17 00:00:00 2001 From: Fiona Glaser Date: Mon, 19 Jan 2009 15:17:53 -0800 Subject: [PATCH] Eliminate support for direct_8x8_inference=0 The benefit in the most extreme contrived situation was at most 0.001db PSNR, at the cost of slower decoding. As this option was basically useless, it was a waste of code and prevented some other useful optimizations. Remove some unused mc code related to sub-8x8 partitions. Small deblocking speedup when p4x4 is used. Also remove unused x264_nal_decode prototype from x264.h. --- common/common.c | 3 - common/common.h | 2 +- common/frame.c | 2 +- common/macroblock.c | 140 ++++++-------------------------------------- common/macroblock.h | 30 ++-------- encoder/encoder.c | 3 - encoder/set.c | 6 +- x264.c | 6 -- x264.h | 7 +-- 9 files changed, 26 insertions(+), 173 deletions(-) diff --git a/common/common.c b/common/common.c index 6669cae8..c163e092 100644 --- a/common/common.c +++ b/common/common.c @@ -123,7 +123,6 @@ void x264_param_default( x264_param_t *param ) param->analyse.b_chroma_me = 1; param->analyse.i_mv_range_thread = -1; param->analyse.i_mv_range = -1; // set from level_idc - param->analyse.i_direct_8x8_inference = 1; param->analyse.i_chroma_qp_offset = 0; param->analyse.b_fast_pskip = 1; param->analyse.b_dct_decimate = 1; @@ -458,8 +457,6 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value ) p->analyse.b_weighted_bipred = atobool(value); OPT2("direct", "direct-pred") b_error |= parse_enum( value, x264_direct_pred_names, &p->analyse.i_direct_mv_pred ); - OPT("direct-8x8") - p->analyse.i_direct_8x8_inference = atoi(value); OPT("chroma-qp-offset") p->analyse.i_chroma_qp_offset = atoi(value); OPT("me") diff --git a/common/common.h b/common/common.h index 4e1782a2..78b1efb6 100644 --- a/common/common.h +++ b/common/common.h @@ -440,7 +440,7 @@ struct x264_t /* current value */ int i_type; int i_partition; - int i_sub_partition[4]; + DECLARE_ALIGNED_4( uint8_t i_sub_partition[4] ); int b_transform_8x8; int i_cbp_luma; diff --git a/common/frame.c b/common/frame.c index 021242f1..5b04d682 100644 --- a/common/frame.c +++ b/common/frame.c @@ -623,7 +623,6 @@ void x264_frame_deblock_row( x264_t *h, int mb_y ) const int b_interlaced = h->sh.b_mbaff; const int mvy_limit = 4 >> b_interlaced; const int qp_thresh = 15 - X264_MIN(h->sh.i_alpha_c0_offset, h->sh.i_beta_offset) - X264_MAX(0, h->param.analyse.i_chroma_qp_offset); - const int no_sub8x8 = !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8); int mb_x; int stridey = h->fdec->i_stride[0]; int stride2y = stridey << b_interlaced; @@ -641,6 +640,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y ) const int b_8x8_transform = h->mb.mb_transform_size[mb_xy]; const int i_qp = h->mb.qp[mb_xy]; int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4; + int no_sub8x8 = h->mb.type[mb_xy] != P_8x8 || !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8); uint8_t *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x; uint8_t *pixu = h->fdec->plane[1] + 8*mb_y*strideuv + 8*mb_x; uint8_t *pixv = h->fdec->plane[2] + 8*mb_y*strideuv + 8*mb_x; diff --git a/common/macroblock.c b/common/macroblock.c index fed6e2b4..5f5823a7 100644 --- a/common/macroblock.c +++ b/common/macroblock.c @@ -157,7 +157,6 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h ) int i_mb_4x4 = 16 * h->mb.i_mb_stride * h->mb.i_mb_y + 4 * h->mb.i_mb_x; int i_mb_8x8 = 4 * h->mb.i_mb_stride * h->mb.i_mb_y + 2 * h->mb.i_mb_x; int i8, i4; - int b8x8; const int type_col = h->fref1[0]->mb_type[ h->mb.i_mb_xy ]; x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 ); @@ -169,8 +168,6 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h ) x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, 0 ); return 1; } - b8x8 = h->sps->b_direct8x8_inference || - (type_col != P_8x8 && type_col != B_SKIP && type_col != B_DIRECT && type_col != B_8x8); for( i8 = 0; i8 < 4; i8++ ) { @@ -182,30 +179,12 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h ) if( i_ref >= 0 ) { const int dist_scale_factor = h->mb.dist_scale_factor[i_ref][0]; - + const int16_t *mv_col = h->fref1[0]->mv[0][ i_mb_4x4 + 3*x8 + 3*y8 * h->mb.i_b4_stride]; + const int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8; + const int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8; x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, i_ref ); - - if( b8x8 ) - { - const int16_t *mv_col = h->fref1[0]->mv[0][ i_mb_4x4 + 3*x8 + 3*y8 * h->mb.i_b4_stride]; - const int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8; - const int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8; - x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, pack16to32_mask(l0x, l0y) ); - x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) ); - } - else - { - for( i4 = 0; i4 < 4; i4++ ) - { - const int x4 = i4%2 + 2*x8; - const int y4 = i4/2 + 2*y8; - const int16_t *mv_col = h->fref1[0]->mv[0][ i_mb_4x4 + x4 + y4 * h->mb.i_b4_stride ]; - const int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8; - const int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8; - x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, pack16to32_mask(l0x, l0y) ); - x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) ); - } - } + x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, pack16to32_mask(l0x, l0y) ); + x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) ); } else { @@ -220,8 +199,7 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h ) if( h->param.i_threads > 1 ) { - int di = b8x8 ? 4 : 1; - for( i4=0; i4<16; i4+=di ) + for( i4=0; i4<16; i4+=4 ) { if( h->mb.cache.mv[0][x264_scan8[i4]][1] > h->mb.mv_max_spel[1] || h->mb.cache.mv[1][x264_scan8[i4]][1] > h->mb.mv_max_spel[1] ) @@ -247,8 +225,7 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h ) int ref[2]; DECLARE_ALIGNED_8( int16_t mv[2][2] ); int i_list; - int i8, i4; - int b8x8; + int i8; const int8_t *l1ref0 = &h->fref1[0]->ref[0][ h->mb.i_b8_xy ]; const int8_t *l1ref1 = &h->fref1[0]->ref[1][ h->mb.i_b8_xy ]; const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->fref1[0]->mv[0][ h->mb.i_b4_xy ]; @@ -310,9 +287,6 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h ) if( IS_INTRA( type_col ) || (ref[0]&&ref[1]) ) return 1; - b8x8 = h->sps->b_direct8x8_inference || - (type_col != P_8x8 && type_col != B_SKIP && type_col != B_DIRECT && type_col != B_8x8); - /* col_zero_flag */ for( i8=0; i8<4; i8++ ) { @@ -322,32 +296,13 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h ) if( l1ref0[o8] == 0 || ( l1ref0[o8] < 0 && l1ref1[o8] == 0 ) ) { const int16_t (*l1mv)[2] = (l1ref0[o8] == 0) ? l1mv0 : l1mv1; - if( b8x8 ) + const int16_t *mvcol = l1mv[3*x8 + 3*y8 * h->mb.i_b4_stride]; + if( abs( mvcol[0] ) <= 1 && abs( mvcol[1] ) <= 1 ) { - const int16_t *mvcol = l1mv[3*x8 + 3*y8 * h->mb.i_b4_stride]; - if( abs( mvcol[0] ) <= 1 && abs( mvcol[1] ) <= 1 ) - { - if( ref[0] == 0 ) - x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0 ); - if( ref[1] == 0 ) - x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0 ); - } - } - else - { - for( i4=0; i4<4; i4++ ) - { - const int x4 = i4%2 + 2*x8; - const int y4 = i4/2 + 2*y8; - const int16_t *mvcol = l1mv[x4 + y4 * h->mb.i_b4_stride]; - if( abs( mvcol[0] ) <= 1 && abs( mvcol[1] ) <= 1 ) - { - if( ref[0] == 0 ) - x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, 0 ); - if( ref[1] == 0 ) - x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, 0 ); - } - } + if( ref[0] == 0 ) + x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0 ); + if( ref[1] == 0 ) + x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0 ); } } } @@ -594,44 +549,13 @@ static void x264_mb_mc_direct8x8( x264_t *h, int x, int y ) { const int i8 = x264_scan8[0] + x + 8*y; - /* FIXME: optimize based on current block size, not global settings? */ - if( h->sps->b_direct8x8_inference ) - { - if( h->mb.cache.ref[0][i8] >= 0 ) - if( h->mb.cache.ref[1][i8] >= 0 ) - x264_mb_mc_01xywh( h, x, y, 2, 2 ); - else - x264_mb_mc_0xywh( h, x, y, 2, 2 ); + if( h->mb.cache.ref[0][i8] >= 0 ) + if( h->mb.cache.ref[1][i8] >= 0 ) + x264_mb_mc_01xywh( h, x, y, 2, 2 ); else - x264_mb_mc_1xywh( h, x, y, 2, 2 ); - } + x264_mb_mc_0xywh( h, x, y, 2, 2 ); else - { - if( h->mb.cache.ref[0][i8] >= 0 ) - { - if( h->mb.cache.ref[1][i8] >= 0 ) - { - x264_mb_mc_01xywh( h, x+0, y+0, 1, 1 ); - x264_mb_mc_01xywh( h, x+1, y+0, 1, 1 ); - x264_mb_mc_01xywh( h, x+0, y+1, 1, 1 ); - x264_mb_mc_01xywh( h, x+1, y+1, 1, 1 ); - } - else - { - x264_mb_mc_0xywh( h, x+0, y+0, 1, 1 ); - x264_mb_mc_0xywh( h, x+1, y+0, 1, 1 ); - x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 ); - x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 ); - } - } - else - { - x264_mb_mc_1xywh( h, x+0, y+0, 1, 1 ); - x264_mb_mc_1xywh( h, x+1, y+0, 1, 1 ); - x264_mb_mc_1xywh( h, x+0, y+1, 1, 1 ); - x264_mb_mc_1xywh( h, x+1, y+1, 1, 1 ); - } - } + x264_mb_mc_1xywh( h, x, y, 2, 2 ); } void x264_mb_mc_8x8( x264_t *h, int i8 ) @@ -660,37 +584,9 @@ void x264_mb_mc_8x8( x264_t *h, int i8 ) case D_L1_8x8: x264_mb_mc_1xywh( h, x, y, 2, 2 ); break; - case D_L1_8x4: - x264_mb_mc_1xywh( h, x, y+0, 2, 1 ); - x264_mb_mc_1xywh( h, x, y+1, 2, 1 ); - break; - case D_L1_4x8: - x264_mb_mc_1xywh( h, x+0, y, 1, 2 ); - x264_mb_mc_1xywh( h, x+1, y, 1, 2 ); - break; - case D_L1_4x4: - x264_mb_mc_1xywh( h, x+0, y+0, 1, 1 ); - x264_mb_mc_1xywh( h, x+1, y+0, 1, 1 ); - x264_mb_mc_1xywh( h, x+0, y+1, 1, 1 ); - x264_mb_mc_1xywh( h, x+1, y+1, 1, 1 ); - break; case D_BI_8x8: x264_mb_mc_01xywh( h, x, y, 2, 2 ); break; - case D_BI_8x4: - x264_mb_mc_01xywh( h, x, y+0, 2, 1 ); - x264_mb_mc_01xywh( h, x, y+1, 2, 1 ); - break; - case D_BI_4x8: - x264_mb_mc_01xywh( h, x+0, y, 1, 2 ); - x264_mb_mc_01xywh( h, x+1, y, 1, 2 ); - break; - case D_BI_4x4: - x264_mb_mc_01xywh( h, x+0, y+0, 1, 1 ); - x264_mb_mc_01xywh( h, x+1, y+0, 1, 1 ); - x264_mb_mc_01xywh( h, x+0, y+1, 1, 1 ); - x264_mb_mc_01xywh( h, x+1, y+1, 1, 1 ); - break; case D_DIRECT_8x8: x264_mb_mc_direct8x8( h, x, y ); break; diff --git a/common/macroblock.h b/common/macroblock.h index a2d7db4e..1b0f0ca2 100644 --- a/common/macroblock.h +++ b/common/macroblock.h @@ -455,36 +455,14 @@ static inline int x264_mb_transform_8x8_allowed( x264_t *h ) // large partitions are allowed // direct and 8x8 are conditional static const uint8_t partition_tab[X264_MBTYPE_MAX] = { - 0,0,0,0,1,2,0,2,1,1,1,1,1,1,1,1,1,2,0, + 0,0,0,0,1,2,0,1,1,1,1,1,1,1,1,1,1,1,0, }; - int p, i; if( !h->pps->b_transform_8x8_mode ) return 0; - p = partition_tab[h->mb.i_type]; - if( p < 2 ) - return p; - else if( h->mb.i_type == B_DIRECT ) - return h->sps->b_direct8x8_inference; - else if( h->mb.i_type == P_8x8 ) - { - if( !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8) ) - return 1; - for( i=0; i<4; i++ ) - if( h->mb.i_sub_partition[i] != D_L0_8x8 ) - return 0; - return 1; - } - else // B_8x8 - { - // x264 currently doesn't use sub-8x8 B partitions, so don't check for them - if( h->sps->b_direct8x8_inference ) - return 1; - for( i=0; i<4; i++ ) - if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 ) - return 0; - return 1; - } + if( h->mb.i_type != P_8x8 ) + return partition_tab[h->mb.i_type]; + return *(uint32_t*)h->mb.i_sub_partition == D_L0_8x8*0x01010101; } #endif diff --git a/encoder/encoder.c b/encoder/encoder.c index 132b26de..3ef62f74 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -549,8 +549,6 @@ static int x264_validate_parameters( x264_t *h ) h->param.analyse.i_mv_range = l->mv_range >> h->param.b_interlaced; else h->param.analyse.i_mv_range = x264_clip3(h->param.analyse.i_mv_range, 32, 512 >> h->param.b_interlaced); - if( h->param.analyse.i_direct_8x8_inference < 0 ) - h->param.analyse.i_direct_8x8_inference = l->direct8x8; } if( h->param.i_threads > 1 ) @@ -595,7 +593,6 @@ static int x264_validate_parameters( x264_t *h ) BOOLIFY( b_deblocking_filter ); BOOLIFY( b_interlaced ); BOOLIFY( analyse.b_transform_8x8 ); - BOOLIFY( analyse.i_direct_8x8_inference ); BOOLIFY( analyse.b_chroma_me ); BOOLIFY( analyse.b_fast_pskip ); BOOLIFY( rc.b_stat_write ); diff --git a/encoder/set.c b/encoder/set.c index e2ec1cc9..552df80b 100644 --- a/encoder/set.c +++ b/encoder/set.c @@ -133,9 +133,7 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param ) sps->i_mb_height = ( sps->i_mb_height + 1 ) & ~1; sps->b_frame_mbs_only = ! param->b_interlaced; sps->b_mb_adaptive_frame_field = param->b_interlaced; - sps->b_direct8x8_inference = param->analyse.i_direct_8x8_inference - || ! sps->b_frame_mbs_only - || !(param->analyse.inter & X264_ANALYSE_PSUB8x8); + sps->b_direct8x8_inference = 1; sps->crop.i_left = 0; sps->crop.i_top = 0; @@ -565,8 +563,6 @@ int x264_validate_levels( x264_t *h, int verbose ) if( h->param.i_fps_den > 0 ) CHECK( "MB rate", l->mbps, (int64_t)mbs * h->param.i_fps_num / h->param.i_fps_den ); - if( h->sps->b_direct8x8_inference < l->direct8x8 ) - ERROR( "direct 8x8 inference (0) < level requirement (1)\n" ); /* TODO check the rest of the limits */ return ret; diff --git a/x264.c b/x264.c index e1499452..04bb44ce 100644 --- a/x264.c +++ b/x264.c @@ -233,11 +233,6 @@ static void Help( x264_param_t *defaults, int b_longhelp ) H0( " --direct Direct MV prediction mode [\"%s\"]\n" " - none, spatial, temporal, auto\n", strtable_lookup( x264_direct_pred_names, defaults->analyse.i_direct_mv_pred ) ); - H1( " --direct-8x8 <-1|0|1> Direct prediction size [%d]\n" - " - 0: 4x4\n" - " - 1: 8x8\n" - " - -1: smallest possible according to level\n", - defaults->analyse.i_direct_8x8_inference ); H0( " -w, --weightb Weighted prediction for B-frames\n" ); H0( " --me Integer pixel motion estimation method [\"%s\"]\n", strtable_lookup( x264_motion_est_names, defaults->analyse.i_me_method ) ); @@ -425,7 +420,6 @@ static int Parse( int argc, char **argv, { "analyse", required_argument, NULL, 0 }, { "partitions", required_argument, NULL, 'A' }, { "direct", required_argument, NULL, 0 }, - { "direct-8x8", required_argument, NULL, 0 }, { "weightb", no_argument, NULL, 'w' }, { "me", required_argument, NULL, 0 }, { "merange", required_argument, NULL, 0 }, diff --git a/x264.h b/x264.h index 6e123ac9..820185f9 100644 --- a/x264.h +++ b/x264.h @@ -35,7 +35,7 @@ #include -#define X264_BUILD 65 +#define X264_BUILD 66 /* x264_t: * opaque handler for encoder */ @@ -228,7 +228,6 @@ typedef struct x264_param_t int b_transform_8x8; int b_weighted_bipred; /* implicit weighting for B-frames */ int i_direct_mv_pred; /* spatial vs temporal mv prediction */ - int i_direct_8x8_inference; /* forbid 4x4 direct partitions. -1 = auto, based on level */ int i_chroma_qp_offset; int i_me_method; /* motion estimation algorithm to use (X264_ME_*) */ @@ -409,10 +408,6 @@ typedef struct * XXX: it currently doesn't check for overflow */ int x264_nal_encode( void *, int *, int b_annexeb, x264_nal_t *nal ); -/* x264_nal_decode: - * decode a buffer nal into a x264_nal_t */ -int x264_nal_decode( x264_nal_t *nal, void *, int ); - /**************************************************************************** * Encoder functions: ****************************************************************************/ -- 2.39.2