From: Fiona Glaser Date: Tue, 19 Feb 2013 21:48:44 +0000 (-0800) Subject: Add slices-max feature X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=732e4f7e8b9ab6d214cbcf059445b4712709faa4;p=x264 Add slices-max feature The H.264 spec technically has limits on the number of slices per frame. x264 normally ignores this, since most use-cases that require large numbers of slices prefer it to. However, certain decoders may break with extremely large numbers of slices, as can occur with some slice-max-size/mbs settings. When set, x264 will refuse to create any slices beyond the maximum number, even if slice-max-size/mbs requires otherwise. --- diff --git a/common/common.c b/common/common.c index 43fd537a..23951de7 100644 --- a/common/common.c +++ b/common/common.c @@ -782,6 +782,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value ) p->i_slice_min_mbs = atoi(value); OPT("slices") p->i_slice_count = atoi(value); + OPT("slices-max") + p->i_slice_count_max = atoi(value); OPT("cabac") p->b_cabac = atobool(value); OPT("cabac-idc") @@ -1307,6 +1309,8 @@ char *x264_param2string( x264_param_t *p, int b_res ) s += sprintf( s, " sliced_threads=%d", p->b_sliced_threads ); if( p->i_slice_count ) s += sprintf( s, " slices=%d", p->i_slice_count ); + if( p->i_slice_count_max ) + s += sprintf( s, " slices_max=%d", p->i_slice_count_max ); if( p->i_slice_max_size ) s += sprintf( s, " slice_max_size=%d", p->i_slice_max_size ); if( p->i_slice_max_mbs ) diff --git a/common/frame.c b/common/frame.c index f91d45f6..c51fb9b8 100644 --- a/common/frame.c +++ b/common/frame.c @@ -659,6 +659,21 @@ void x264_threadslice_cond_wait( x264_t *h, int pass ) x264_pthread_mutex_unlock( &h->mutex ); } +int x264_frame_new_slice( x264_t *h, x264_frame_t *frame ) +{ + if( h->param.i_slice_count_max ) + { + int slice_count; + if( h->param.b_sliced_threads ) + slice_count = x264_pthread_fetch_and_add( &frame->i_slice_count, 1, &frame->mutex ); + else + slice_count = frame->i_slice_count++; + if( slice_count >= h->param.i_slice_count_max ) + return -1; + } + return 0; +} + /* list operators */ void x264_frame_push( x264_frame_t **list, x264_frame_t *frame ) @@ -721,6 +736,7 @@ x264_frame_t *x264_frame_pop_unused( x264_t *h, int b_fdec ) frame->b_scenecut = 1; frame->b_keyframe = 0; frame->b_corrupt = 0; + frame->i_slice_count = h->param.b_sliced_threads ? h->param.i_threads : 1; memset( frame->weight, 0, sizeof(frame->weight) ); memset( frame->f_weighted_cost_delta, 0, sizeof(frame->f_weighted_cost_delta) ); diff --git a/common/frame.h b/common/frame.h index 7027e211..468503ae 100644 --- a/common/frame.h +++ b/common/frame.h @@ -152,6 +152,7 @@ typedef struct x264_frame int i_reference_count; /* number of threads using this frame (not necessarily the number of pointers) */ x264_pthread_mutex_t mutex; x264_pthread_cond_t cv; + int i_slice_count; /* Atomically written to/read from with slice threads */ /* periodic intra refresh */ float f_pir_position; @@ -230,6 +231,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mba void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed ); void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed ); +int x264_frame_new_slice( x264_t *h, x264_frame_t *frame ); void x264_threadslice_cond_broadcast( x264_t *h, int pass ); void x264_threadslice_cond_wait( x264_t *h, int pass ); diff --git a/common/osdep.h b/common/osdep.h index 1e17c6a1..bcf8f6f5 100644 --- a/common/osdep.h +++ b/common/osdep.h @@ -204,6 +204,25 @@ int x264_threading_init( void ); #define x264_threading_init() 0 #endif +static ALWAYS_INLINE int x264_pthread_fetch_and_add( int *val, int add, x264_pthread_mutex_t *mutex ) +{ +#if HAVE_THREAD +#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ > 0) && ARCH_X86 + return __sync_fetch_and_add( val, add ); +#else + x264_pthread_mutex_lock( mutex ); + int res = *val; + *val += add; + x264_pthread_mutex_unlock( mutex ); + return res; +#endif +#else + int res = *val; + *val += add; + return res; +#endif +} + #define WORD_SIZE sizeof(void*) #define asm __asm__ diff --git a/encoder/encoder.c b/encoder/encoder.c index 2ac9a6a8..5a94209c 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -696,6 +696,7 @@ static int x264_validate_parameters( x264_t *h, int b_open ) if( h->param.i_slice_max_mbs || h->param.i_slice_max_size ) h->param.i_slice_count = 0; } + h->param.i_slice_count_max = X264_MAX( h->param.i_slice_count, h->param.i_slice_count_max ); if( h->param.b_bluray_compat ) { @@ -1515,6 +1516,7 @@ int x264_encoder_reconfig( x264_t *h, x264_param_t *param ) COPY( i_slice_max_mbs ); COPY( i_slice_min_mbs ); COPY( i_slice_count ); + COPY( i_slice_count_max ); COPY( b_tff ); /* VBV can't be turned on if it wasn't on to begin with */ @@ -2437,42 +2439,47 @@ reencode: /* We'll just re-encode this last macroblock if we go over the max slice size. */ if( total_bits - starting_bits > slice_max_size && !h->mb.b_reencode_mb ) { - /* Handle the most obnoxious slice-min-mbs edge case: we need to end the slice - * because it's gone over the maximum size, but doing so would violate slice-min-mbs. - * If possible, roll back to the last checkpoint and try again. - * We could try raising QP, but that would break in the case where a slice spans multiple - * rows, which the re-encoding infrastructure can't currently handle. */ - if( mb_xy < thread_last_mb && (thread_last_mb+1-mb_xy) < h->param.i_slice_min_mbs ) + if( !x264_frame_new_slice( h, h->fdec ) ) { - if( thread_last_mb-h->param.i_slice_min_mbs < h->sh.i_first_mb+h->param.i_slice_min_mbs ) + /* Handle the most obnoxious slice-min-mbs edge case: we need to end the slice + * because it's gone over the maximum size, but doing so would violate slice-min-mbs. + * If possible, roll back to the last checkpoint and try again. + * We could try raising QP, but that would break in the case where a slice spans multiple + * rows, which the re-encoding infrastructure can't currently handle. */ + if( mb_xy < thread_last_mb && (thread_last_mb+1-mb_xy) < h->param.i_slice_min_mbs ) { - x264_log( h, X264_LOG_WARNING, "slice-max-size violated (frame %d, cause: slice-min-mbs)\n", h->i_frame ); - slice_max_size = 0; - goto cont; + if( thread_last_mb-h->param.i_slice_min_mbs < h->sh.i_first_mb+h->param.i_slice_min_mbs ) + { + x264_log( h, X264_LOG_WARNING, "slice-max-size violated (frame %d, cause: slice-min-mbs)\n", h->i_frame ); + slice_max_size = 0; + goto cont; + } + x264_bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], &i_skip, 0 ); + h->mb.b_reencode_mb = 1; + h->sh.i_last_mb = thread_last_mb-h->param.i_slice_min_mbs; + break; } - x264_bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], &i_skip, 0 ); - h->mb.b_reencode_mb = 1; - h->sh.i_last_mb = thread_last_mb-h->param.i_slice_min_mbs; - break; - } - if( mb_xy-SLICE_MBAFF*h->mb.i_mb_stride != h->sh.i_first_mb ) - { - x264_bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], &i_skip, 0 ); - h->mb.b_reencode_mb = 1; - if( SLICE_MBAFF ) + if( mb_xy-SLICE_MBAFF*h->mb.i_mb_stride != h->sh.i_first_mb ) { - // set to bottom of previous mbpair - if( i_mb_x ) - h->sh.i_last_mb = mb_xy-1+h->mb.i_mb_stride*(!(i_mb_y&1)); + x264_bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], &i_skip, 0 ); + h->mb.b_reencode_mb = 1; + if( SLICE_MBAFF ) + { + // set to bottom of previous mbpair + if( i_mb_x ) + h->sh.i_last_mb = mb_xy-1+h->mb.i_mb_stride*(!(i_mb_y&1)); + else + h->sh.i_last_mb = (i_mb_y-2+!(i_mb_y&1))*h->mb.i_mb_stride + h->mb.i_mb_width - 1; + } else - h->sh.i_last_mb = (i_mb_y-2+!(i_mb_y&1))*h->mb.i_mb_stride + h->mb.i_mb_width - 1; + h->sh.i_last_mb = mb_xy-1; + break; } else - h->sh.i_last_mb = mb_xy-1; - break; + h->sh.i_last_mb = mb_xy; } else - h->sh.i_last_mb = mb_xy; + slice_max_size = 0; } } cont: @@ -2688,32 +2695,36 @@ static void *x264_slices_write( x264_t *h ) while( h->sh.i_first_mb + SLICE_MBAFF*h->mb.i_mb_stride <= last_thread_mb ) { h->sh.i_last_mb = last_thread_mb; - if( h->param.i_slice_max_mbs ) + if( !i_slice_num || !x264_frame_new_slice( h, h->fdec ) ) { - if( SLICE_MBAFF ) + if( h->param.i_slice_max_mbs ) { - // convert first to mbaff form, add slice-max-mbs, then convert back to normal form - int last_mbaff = 2*(h->sh.i_first_mb % h->mb.i_mb_width) - + h->mb.i_mb_width*(h->sh.i_first_mb / h->mb.i_mb_width) - + h->param.i_slice_max_mbs - 1; - int last_x = (last_mbaff % (2*h->mb.i_mb_width))/2; - int last_y = (last_mbaff / (2*h->mb.i_mb_width))*2 + 1; - h->sh.i_last_mb = last_x + h->mb.i_mb_stride*last_y; + if( SLICE_MBAFF ) + { + // convert first to mbaff form, add slice-max-mbs, then convert back to normal form + int last_mbaff = 2*(h->sh.i_first_mb % h->mb.i_mb_width) + + h->mb.i_mb_width*(h->sh.i_first_mb / h->mb.i_mb_width) + + h->param.i_slice_max_mbs - 1; + int last_x = (last_mbaff % (2*h->mb.i_mb_width))/2; + int last_y = (last_mbaff / (2*h->mb.i_mb_width))*2 + 1; + h->sh.i_last_mb = last_x + h->mb.i_mb_stride*last_y; + } + else + { + h->sh.i_last_mb = h->sh.i_first_mb + h->param.i_slice_max_mbs - 1; + if( h->sh.i_last_mb < last_thread_mb && last_thread_mb - h->sh.i_last_mb < h->param.i_slice_min_mbs ) + h->sh.i_last_mb = last_thread_mb - h->param.i_slice_min_mbs; + } + i_slice_num++; } - else + else if( h->param.i_slice_count && !h->param.b_sliced_threads ) { - h->sh.i_last_mb = h->sh.i_first_mb + h->param.i_slice_max_mbs - 1; - if( h->sh.i_last_mb < last_thread_mb && last_thread_mb - h->sh.i_last_mb < h->param.i_slice_min_mbs ) - h->sh.i_last_mb = last_thread_mb - h->param.i_slice_min_mbs; + int height = h->mb.i_mb_height >> PARAM_INTERLACED; + int width = h->mb.i_mb_width << PARAM_INTERLACED; + i_slice_num++; + h->sh.i_last_mb = (height * i_slice_num + h->param.i_slice_count/2) / h->param.i_slice_count * width - 1; } } - else if( h->param.i_slice_count && !h->param.b_sliced_threads ) - { - int height = h->mb.i_mb_height >> PARAM_INTERLACED; - int width = h->mb.i_mb_width << PARAM_INTERLACED; - i_slice_num++; - h->sh.i_last_mb = (height * i_slice_num + h->param.i_slice_count/2) / h->param.i_slice_count * width - 1; - } h->sh.i_last_mb = X264_MIN( h->sh.i_last_mb, last_thread_mb ); if( x264_stack_align( x264_slice_write, h ) ) goto fail; diff --git a/x264.c b/x264.c index 2e1d4b30..3791d8b2 100644 --- a/x264.c +++ b/x264.c @@ -596,6 +596,8 @@ static void help( x264_param_t *defaults, int longhelp ) H2( " --slices Number of slices per frame; forces rectangular\n" " slices and is overridden by other slicing options\n" ); else H1( " --slices Number of slices per frame\n" ); + H2( " --slices-max Absolute maximum slices per frame; overrides\n" + " slice-max-size/slice-max-mbs when necessary\n" ); H2( " --slice-max-size Limit the size of each slice in bytes\n"); H2( " --slice-max-mbs Limit the size of each slice in macroblocks (max)\n"); H2( " --slice-min-mbs Limit the size of each slice in macroblocks (min)\n"); @@ -974,6 +976,7 @@ static struct option long_options[] = { "slice-max-mbs", required_argument, NULL, 0 }, { "slice-min-mbs", required_argument, NULL, 0 }, { "slices", required_argument, NULL, 0 }, + { "slices-max", required_argument, NULL, 0 }, { "thread-input", no_argument, NULL, OPT_THREAD_INPUT }, { "sync-lookahead", required_argument, NULL, 0 }, { "non-deterministic", no_argument, NULL, 0 }, diff --git a/x264.h b/x264.h index 2a7927a9..98d38a0e 100644 --- a/x264.h +++ b/x264.h @@ -41,7 +41,7 @@ #include "x264_config.h" -#define X264_BUILD 131 +#define X264_BUILD 132 /* Application developers planning to link against a shared library version of * libx264 from a Microsoft Visual Studio or similar development environment @@ -479,6 +479,8 @@ typedef struct x264_param_t int i_slice_max_mbs; /* Max number of MBs per slice; overrides i_slice_count. */ int i_slice_min_mbs; /* Min number of MBs per slice */ int i_slice_count; /* Number of slices per frame: forces rectangular slices. */ + int i_slice_count_max; /* Absolute cap on slices per frame; stops applying slice-max-size + * and slice-max-mbs if this is reached. */ /* Optional callback for freeing this x264_param_t when it is done being used. * Only used when the x264_param_t sits in memory for an indefinite period of time,