X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=encoder%2Fratecontrol.c;h=e4a085c416e1c5dbbdeee974549a68a63f8bdd2c;hb=d23d18655249944c1ca894b451e2c82c7a584c62;hp=7aba57005bb73c608a6bfbe70367e4eebd67fd7b;hpb=8bed3a1418edf4b146d84445e692b17cf854bbe5;p=x264 diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c index 7aba5700..e4a085c4 100644 --- a/encoder/ratecontrol.c +++ b/encoder/ratecontrol.c @@ -1,7 +1,7 @@ /***************************************************************************** * ratecontrol.c: ratecontrol ***************************************************************************** - * Copyright (C) 2005-2010 x264 project + * Copyright (C) 2005-2016 x264 project * * Authors: Loren Merritt * Michael Niedermayer @@ -29,7 +29,6 @@ #define _ISOC99_SOURCE #undef NDEBUG // always check asserts, the speed effect is far too small to disable them -#include #include "common/common.h" #include "ratecontrol.h" @@ -63,10 +62,11 @@ typedef struct typedef struct { - double coeff; - double count; - double decay; - double offset; + float coeff_min; + float coeff; + float count; + float decay; + float offset; } predictor_t; struct x264_ratecontrol_t @@ -88,18 +88,21 @@ struct x264_ratecontrol_t int qp; /* qp for current frame */ float qpm; /* qp for current macroblock: precise float for AQ */ float qpa_rc; /* average of macroblocks' qp before aq */ - float qpa_aq; /* average of macroblocks' qp after aq */ + float qpa_rc_prev; + int qpa_aq; /* average of macroblocks' qp after aq */ + int qpa_aq_prev; float qp_novbv; /* QP for the current frame if 1-pass VBV was disabled. */ /* VBV stuff */ double buffer_size; int64_t buffer_fill_final; + int64_t buffer_fill_final_min; double buffer_fill; /* planned buffer, if all in-progress frames hit their bit budget */ double buffer_rate; /* # of bits added to buffer_fill after each frame */ double vbv_max_rate; /* # of bits added to buffer_fill per second */ predictor_t *pred; /* predict frame size from satd */ int single_frame_vbv; - double rate_factor_max_increment; /* Don't allow RF above (CRF + this value). */ + float rate_factor_max_increment; /* Don't allow RF above (CRF + this value). */ /* ABR stuff */ int last_satd; @@ -134,9 +137,21 @@ struct x264_ratecontrol_t double lmin[3]; /* min qscale by frame type */ double lmax[3]; double lstep; /* max change (multiply) in qscale per frame */ - uint16_t *qp_buffer[2]; /* Global buffers for converting MB-tree quantizer data. */ - int qpbuf_pos; /* In order to handle pyramid reordering, QP buffer acts as a stack. + struct + { + uint16_t *qp_buffer[2]; /* Global buffers for converting MB-tree quantizer data. */ + int qpbuf_pos; /* In order to handle pyramid reordering, QP buffer acts as a stack. * This value is the current position (0 or 1). */ + int src_mb_count; + + /* For rescaling */ + int rescale_enabled; + float *scale_buffer[2]; /* Intermediate buffers */ + int filtersize[2]; /* filter size (H/V) */ + float *coeffs[2]; + int *pos[2]; + int srcdim[2]; /* Source dimensions (W/H) */ + } mbtree; /* MBRC stuff */ float frame_size_estimated; /* Access to this variable must be atomic: double is @@ -144,7 +159,7 @@ struct x264_ratecontrol_t double frame_size_maximum; /* Maximum frame size due to MinCR */ double frame_size_planned; double slice_size_planned; - predictor_t (*row_pred)[2]; + predictor_t *row_pred; predictor_t row_preds[3][2]; predictor_t *pred_b_from_p; /* predict B-frame size from P-frame satd */ int bframes; /* # consecutive B-frames before this P-frame */ @@ -168,8 +183,8 @@ static int init_pass2(x264_t *); static float rate_estimate_qscale( x264_t *h ); static int update_vbv( x264_t *h, int bits ); static void update_vbv_plan( x264_t *h, int overhead ); -static double predict_size( predictor_t *p, double q, double var ); -static void update_predictor( predictor_t *p, double q, double var, double bits ); +static float predict_size( predictor_t *p, float q, float var ); +static void update_predictor( predictor_t *p, float q, float var, float bits ); #define CMP_OPT_FIRST_PASS( opt, param_val )\ {\ @@ -184,13 +199,13 @@ static void update_predictor( predictor_t *p, double q, double var, double bits * qp = h.264's quantizer * qscale = linearized quantizer = Lagrange multiplier */ -static inline double qp2qscale( double qp ) +static inline float qp2qscale( float qp ) { - return 0.85 * pow( 2.0, ( qp - 12.0 ) / 6.0 ); + return 0.85f * powf( 2.0f, ( qp - (12.0f + QP_BD_OFFSET) ) / 6.0f ); } -static inline double qscale2qp( double qscale ) +static inline float qscale2qp( float qscale ) { - return 12.0 + 6.0 * log2( qscale/0.85 ); + return (12.0f + QP_BD_OFFSET) + 6.0f * log2f( qscale/0.85f ); } /* Texture bitrate is not quite inversely proportional to qscale, @@ -206,32 +221,38 @@ static inline double qscale2bits( ratecontrol_entry_t *rce, double qscale ) + rce->misc_bits; } -static ALWAYS_INLINE uint32_t ac_energy_var( uint64_t sum_ssd, int shift, x264_frame_t *frame, int i ) +static ALWAYS_INLINE uint32_t ac_energy_var( uint64_t sum_ssd, int shift, x264_frame_t *frame, int i, int b_store ) { uint32_t sum = sum_ssd; uint32_t ssd = sum_ssd >> 32; - frame->i_pixel_sum[i] += sum; - frame->i_pixel_ssd[i] += ssd; + if( b_store ) + { + frame->i_pixel_sum[i] += sum; + frame->i_pixel_ssd[i] += ssd; + } return ssd - ((uint64_t)sum * sum >> shift); } -static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame, int i ) +static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame, int i, int b_chroma, int b_field, int b_store ) { - int w = i ? 8 : 16; + int height = b_chroma ? 16>>CHROMA_V_SHIFT : 16; int stride = frame->i_stride[i]; - int offset = h->mb.b_interlaced - ? 16 * mb_x + w * (mb_y&~1) * stride + (mb_y&1) * stride - : 16 * mb_x + w * mb_y * stride; - stride <<= h->mb.b_interlaced; - if( i ) + int offset = b_field + ? 16 * mb_x + height * (mb_y&~1) * stride + (mb_y&1) * stride + : 16 * mb_x + height * mb_y * stride; + stride <<= b_field; + if( b_chroma ) { - ALIGNED_ARRAY_16( pixel, pix,[FENC_STRIDE*8] ); - h->mc.load_deinterleave_8x8x2_fenc( pix, frame->plane[1] + offset, stride ); - return ac_energy_var( h->pixf.var[PIXEL_8x8]( pix, FENC_STRIDE ), 6, frame, 1 ) - + ac_energy_var( h->pixf.var[PIXEL_8x8]( pix+FENC_STRIDE/2, FENC_STRIDE ), 6, frame, 2 ); + ALIGNED_ARRAY_16( pixel, pix,[FENC_STRIDE*16] ); + int chromapix = h->luma2chroma_pixel[PIXEL_16x16]; + int shift = 7 - CHROMA_V_SHIFT; + + h->mc.load_deinterleave_chroma_fenc( pix, frame->plane[1] + offset, stride, height ); + return ac_energy_var( h->pixf.var[chromapix]( pix, FENC_STRIDE ), shift, frame, 1, b_store ) + + ac_energy_var( h->pixf.var[chromapix]( pix+FENC_STRIDE/2, FENC_STRIDE ), shift, frame, 2, b_store ); } else - return ac_energy_var( h->pixf.var[PIXEL_16x16]( frame->plane[0] + offset, stride ), 8, frame, 0 ); + return ac_energy_var( h->pixf.var[PIXEL_16x16]( frame->plane[i] + offset, stride ), 8, frame, i, b_store ); } // Find the total AC energy of the block in all planes. @@ -241,18 +262,46 @@ static NOINLINE uint32_t x264_ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_ * and putting it after floating point ops. As a result, we put the emms at the end of the * function and make sure that its always called before the float math. Noinline makes * sure no reordering goes on. */ - uint32_t var = ac_energy_plane( h, mb_x, mb_y, frame, 0 ); - var += ac_energy_plane( h, mb_x, mb_y, frame, 1 ); + uint32_t var; + x264_prefetch_fenc( h, frame, mb_x, mb_y ); + if( h->mb.b_adaptive_mbaff ) + { + /* We don't know the super-MB mode we're going to pick yet, so + * simply try both and pick the lower of the two. */ + uint32_t var_interlaced, var_progressive; + var_interlaced = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, 1, 1 ); + var_progressive = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, 0, 0 ); + if( CHROMA444 ) + { + var_interlaced += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, 1, 1 ); + var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, 0, 0 ); + var_interlaced += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, 1, 1 ); + var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, 0, 0 ); + } + else + { + var_interlaced += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 1, 1 ); + var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 0, 0 ); + } + var = X264_MIN( var_interlaced, var_progressive ); + } + else + { + var = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, PARAM_INTERLACED, 1 ); + if( CHROMA444 ) + { + var += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, PARAM_INTERLACED, 1 ); + var += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, PARAM_INTERLACED, 1 ); + } + else + var += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, PARAM_INTERLACED, 1 ); + } x264_emms(); return var; } void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_offsets ) { - /* constants chosen to result in approximately the same overall bitrate as without AQ. - * FIXME: while they're written in 5 significant digits, they're only tuned to 2. */ - float strength; - float avg_adj = 0.f; /* Initialize frame stats */ for( int i = 0; i < 3; i++ ) { @@ -296,23 +345,30 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off /* Actual adaptive quantization */ else { - if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE ) + /* constants chosen to result in approximately the same overall bitrate as without AQ. + * FIXME: while they're written in 5 significant digits, they're only tuned to 2. */ + float strength; + float avg_adj = 0.f; + float bias_strength = 0.f; + + if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE || h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE_BIASED ) { - float bit_depth_correction = powf(1 << (BIT_DEPTH-8), 0.5f); + float bit_depth_correction = 1.f / (1 << (2*(BIT_DEPTH-8))); float avg_adj_pow2 = 0.f; for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ ) for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ ) { uint32_t energy = x264_ac_energy_mb( h, mb_x, mb_y, frame ); - float qp_adj = powf( energy + 1, 0.125f ); + float qp_adj = powf( energy * bit_depth_correction + 1, 0.125f ); frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj; avg_adj += qp_adj; avg_adj_pow2 += qp_adj * qp_adj; } avg_adj /= h->mb.i_mb_count; avg_adj_pow2 /= h->mb.i_mb_count; - strength = h->param.rc.f_aq_strength * avg_adj / bit_depth_correction; - avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (14.f * bit_depth_correction)) / avg_adj; + strength = h->param.rc.f_aq_strength * avg_adj; + avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - 14.f) / avg_adj; + bias_strength = h->param.rc.f_aq_strength; } else strength = h->param.rc.f_aq_strength * 1.0397f; @@ -322,7 +378,12 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off { float qp_adj; int mb_xy = mb_x + mb_y*h->mb.i_mb_stride; - if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE ) + if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE_BIASED ) + { + qp_adj = frame->f_qp_offset[mb_xy]; + qp_adj = strength * (qp_adj - avg_adj) + bias_strength * (1.f - 14.f / (qp_adj * qp_adj)); + } + else if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE ) { qp_adj = frame->f_qp_offset[mb_xy]; qp_adj = strength * (qp_adj - avg_adj); @@ -346,12 +407,136 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off { uint64_t ssd = frame->i_pixel_ssd[i]; uint64_t sum = frame->i_pixel_sum[i]; - int width = h->mb.i_mb_width*16>>!!i; - int height = h->mb.i_mb_height*16>>!!i; + int width = 16*h->mb.i_mb_width >> (i && CHROMA_H_SHIFT); + int height = 16*h->mb.i_mb_height >> (i && CHROMA_V_SHIFT); frame->i_pixel_ssd[i] = ssd - (sum * sum + width * height / 2) / (width * height); } } +static int x264_macroblock_tree_rescale_init( x264_t *h, x264_ratecontrol_t *rc ) +{ + /* Use fractional QP array dimensions to compensate for edge padding */ + float srcdim[2] = {rc->mbtree.srcdim[0] / 16.f, rc->mbtree.srcdim[1] / 16.f}; + float dstdim[2] = { h->param.i_width / 16.f, h->param.i_height / 16.f}; + int srcdimi[2] = {ceil(srcdim[0]), ceil(srcdim[1])}; + int dstdimi[2] = {ceil(dstdim[0]), ceil(dstdim[1])}; + if( PARAM_INTERLACED ) + { + srcdimi[1] = (srcdimi[1]+1)&~1; + dstdimi[1] = (dstdimi[1]+1)&~1; + } + + rc->mbtree.src_mb_count = srcdimi[0] * srcdimi[1]; + + CHECKED_MALLOC( rc->mbtree.qp_buffer[0], rc->mbtree.src_mb_count * sizeof(uint16_t) ); + if( h->param.i_bframe_pyramid && h->param.rc.b_stat_read ) + CHECKED_MALLOC( rc->mbtree.qp_buffer[1], rc->mbtree.src_mb_count * sizeof(uint16_t) ); + rc->mbtree.qpbuf_pos = -1; + + /* No rescaling to do */ + if( srcdimi[0] == dstdimi[0] && srcdimi[1] == dstdimi[1] ) + return 0; + + rc->mbtree.rescale_enabled = 1; + + /* Allocate intermediate scaling buffers */ + CHECKED_MALLOC( rc->mbtree.scale_buffer[0], srcdimi[0] * srcdimi[1] * sizeof(float) ); + CHECKED_MALLOC( rc->mbtree.scale_buffer[1], dstdimi[0] * srcdimi[1] * sizeof(float) ); + + /* Allocate and calculate resize filter parameters and coefficients */ + for( int i = 0; i < 2; i++ ) + { + if( srcdim[i] > dstdim[i] ) // downscale + rc->mbtree.filtersize[i] = 1 + (2 * srcdimi[i] + dstdimi[i] - 1) / dstdimi[i]; + else // upscale + rc->mbtree.filtersize[i] = 3; + + CHECKED_MALLOC( rc->mbtree.coeffs[i], rc->mbtree.filtersize[i] * dstdimi[i] * sizeof(float) ); + CHECKED_MALLOC( rc->mbtree.pos[i], dstdimi[i] * sizeof(int) ); + + /* Initialize filter coefficients */ + float inc = srcdim[i] / dstdim[i]; + float dmul = inc > 1.f ? dstdim[i] / srcdim[i] : 1.f; + float dstinsrc = 0.5f * inc - 0.5f; + int filtersize = rc->mbtree.filtersize[i]; + for( int j = 0; j < dstdimi[i]; j++ ) + { + int pos = dstinsrc - (filtersize - 2.f) * 0.5f; + float sum = 0.0; + rc->mbtree.pos[i][j] = pos; + for( int k = 0; k < filtersize; k++ ) + { + float d = fabs( pos + k - dstinsrc ) * dmul; + float coeff = X264_MAX( 1.f - d, 0 ); + rc->mbtree.coeffs[i][j * filtersize + k] = coeff; + sum += coeff; + } + sum = 1.0f / sum; + for( int k = 0; k < filtersize; k++ ) + rc->mbtree.coeffs[i][j * filtersize + k] *= sum; + dstinsrc += inc; + } + } + + /* Write back actual qp array dimensions */ + rc->mbtree.srcdim[0] = srcdimi[0]; + rc->mbtree.srcdim[1] = srcdimi[1]; + return 0; +fail: + return -1; +} + +static void x264_macroblock_tree_rescale_destroy( x264_ratecontrol_t *rc ) +{ + for( int i = 0; i < 2; i++ ) + { + x264_free( rc->mbtree.qp_buffer[i] ); + x264_free( rc->mbtree.scale_buffer[i] ); + x264_free( rc->mbtree.coeffs[i] ); + x264_free( rc->mbtree.pos[i] ); + } +} + +static ALWAYS_INLINE float tapfilter( float *src, int pos, int max, int stride, float *coeff, int filtersize ) +{ + float sum = 0.f; + for( int i = 0; i < filtersize; i++, pos++ ) + sum += src[x264_clip3( pos, 0, max-1 )*stride] * coeff[i]; + return sum; +} + +static void x264_macroblock_tree_rescale( x264_t *h, x264_ratecontrol_t *rc, float *dst ) +{ + float *input, *output; + int filtersize, stride, height; + + /* H scale first */ + input = rc->mbtree.scale_buffer[0]; + output = rc->mbtree.scale_buffer[1]; + filtersize = rc->mbtree.filtersize[0]; + stride = rc->mbtree.srcdim[0]; + height = rc->mbtree.srcdim[1]; + for( int y = 0; y < height; y++, input += stride, output += h->mb.i_mb_width ) + { + float *coeff = rc->mbtree.coeffs[0]; + for( int x = 0; x < h->mb.i_mb_width; x++, coeff+=filtersize ) + output[x] = tapfilter( input, rc->mbtree.pos[0][x], stride, 1, coeff, filtersize ); + } + + /* V scale next */ + input = rc->mbtree.scale_buffer[1]; + output = dst; + filtersize = rc->mbtree.filtersize[1]; + stride = h->mb.i_mb_width; + height = rc->mbtree.srcdim[1]; + for( int x = 0; x < h->mb.i_mb_width; x++, input++, output++ ) + { + float *coeff = rc->mbtree.coeffs[1]; + for( int y = 0; y < h->mb.i_mb_height; y++, coeff+=filtersize ) + output[y*stride] = tapfilter( input, rc->mbtree.pos[1][y], height, stride, coeff, filtersize ); + } +} + int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame, float *quant_offsets ) { x264_ratecontrol_t *rc = h->rc; @@ -360,38 +545,43 @@ int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame, float *quant_offs if( rc->entry[frame->i_frame].kept_as_ref ) { uint8_t i_type; - if( rc->qpbuf_pos < 0 ) + if( rc->mbtree.qpbuf_pos < 0 ) { do { - rc->qpbuf_pos++; + rc->mbtree.qpbuf_pos++; if( !fread( &i_type, 1, 1, rc->p_mbtree_stat_file_in ) ) goto fail; - if( fread( rc->qp_buffer[rc->qpbuf_pos], sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_in ) != h->mb.i_mb_count ) + if( fread( rc->mbtree.qp_buffer[rc->mbtree.qpbuf_pos], sizeof(uint16_t), rc->mbtree.src_mb_count, rc->p_mbtree_stat_file_in ) != rc->mbtree.src_mb_count ) goto fail; - if( i_type != i_type_actual && rc->qpbuf_pos == 1 ) + if( i_type != i_type_actual && rc->mbtree.qpbuf_pos == 1 ) { - x264_log(h, X264_LOG_ERROR, "MB-tree frametype %d doesn't match actual frametype %d.\n", i_type, i_type_actual); + x264_log( h, X264_LOG_ERROR, "MB-tree frametype %d doesn't match actual frametype %d.\n", i_type, i_type_actual ); return -1; } } while( i_type != i_type_actual ); } - for( int i = 0; i < h->mb.i_mb_count; i++ ) + float *dst = rc->mbtree.rescale_enabled ? rc->mbtree.scale_buffer[0] : frame->f_qp_offset; + for( int i = 0; i < rc->mbtree.src_mb_count; i++ ) { - frame->f_qp_offset[i] = ((float)(int16_t)endian_fix16( rc->qp_buffer[rc->qpbuf_pos][i] )) * (1/256.0); - if( h->frames.b_have_lowres ) - frame->i_inv_qscale_factor[i] = x264_exp2fix8(frame->f_qp_offset[i]); + int16_t qp_fix8 = endian_fix16( rc->mbtree.qp_buffer[rc->mbtree.qpbuf_pos][i] ); + dst[i] = qp_fix8 * (1.f/256.f); } - rc->qpbuf_pos--; + if( rc->mbtree.rescale_enabled ) + x264_macroblock_tree_rescale( h, rc, frame->f_qp_offset ); + if( h->frames.b_have_lowres ) + for( int i = 0; i < h->mb.i_mb_count; i++ ) + frame->i_inv_qscale_factor[i] = x264_exp2fix8( frame->f_qp_offset[i] ); + rc->mbtree.qpbuf_pos--; } else x264_stack_align( x264_adaptive_quant_frame, h, frame, quant_offsets ); return 0; fail: - x264_log(h, X264_LOG_ERROR, "Incomplete MB-tree stats file.\n"); + x264_log( h, X264_LOG_ERROR, "Incomplete MB-tree stats file.\n" ); return -1; } @@ -402,22 +592,22 @@ int x264_reference_build_list_optimal( x264_t *h ) x264_weight_t weights[16][3]; int refcount[16]; - if( rce->refs != h->i_ref0 ) + if( rce->refs != h->i_ref[0] ) return -1; - memcpy( frames, h->fref0, sizeof(frames) ); + memcpy( frames, h->fref[0], sizeof(frames) ); memcpy( refcount, rce->refcount, sizeof(refcount) ); memcpy( weights, h->fenc->weight, sizeof(weights) ); memset( &h->fenc->weight[1][0], 0, sizeof(x264_weight_t[15][3]) ); /* For now don't reorder ref 0; it seems to lower quality in most cases due to skips. */ - for( int ref = 1; ref < h->i_ref0; ref++ ) + for( int ref = 1; ref < h->i_ref[0]; ref++ ) { int max = -1; int bestref = 1; - for( int i = 1; i < h->i_ref0; i++ ) + for( int i = 1; i < h->i_ref[0]; i++ ) /* Favor lower POC as a tiebreaker. */ COPY2_IF_GT( max, refcount[i], bestref, i ); @@ -425,7 +615,7 @@ int x264_reference_build_list_optimal( x264_t *h ) * that the optimal ordering doesnt place every duplicate. */ refcount[bestref] = -1; - h->fref0[ref] = frames[bestref]; + h->fref[0][ref] = frames[bestref]; memcpy( h->fenc->weight[ref], weights[bestref], sizeof(weights[bestref]) ); } @@ -460,6 +650,11 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init ) if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 ) { + /* We don't support changing the ABR bitrate right now, + so if the stream starts as CBR, keep it CBR. */ + if( rc->b_vbv_min_rate ) + h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate; + if( h->param.rc.i_vbv_buffer_size < (int)(h->param.rc.i_vbv_max_bitrate / rc->fps) ) { h->param.rc.i_vbv_buffer_size = h->param.rc.i_vbv_max_bitrate / rc->fps; @@ -467,17 +662,11 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init ) h->param.rc.i_vbv_buffer_size ); } - /* We don't support changing the ABR bitrate right now, - so if the stream starts as CBR, keep it CBR. */ - if( rc->b_vbv_min_rate ) - h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate; - - int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * 1000; - int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * 1000; + int kilobit_size = h->param.i_avcintra_class ? 1024 : 1000; + int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * kilobit_size; + int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * kilobit_size; /* Init HRD */ - h->sps->vui.hrd.i_bit_rate_unscaled = vbv_max_bitrate; - h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size; if( h->param.i_nal_hrd && b_init ) { h->sps->vui.hrd.i_cpb_cnt = 1; @@ -487,15 +676,12 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init ) #define BR_SHIFT 6 #define CPB_SHIFT 4 - int bitrate = 1000*h->param.rc.i_vbv_max_bitrate; - int bufsize = 1000*h->param.rc.i_vbv_buffer_size; - // normalize HRD size and rate to the value / scale notation - h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( bitrate ) - BR_SHIFT, 0, 15 ); - h->sps->vui.hrd.i_bit_rate_value = bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT ); + h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( vbv_max_bitrate ) - BR_SHIFT, 0, 15 ); + h->sps->vui.hrd.i_bit_rate_value = vbv_max_bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT ); h->sps->vui.hrd.i_bit_rate_unscaled = h->sps->vui.hrd.i_bit_rate_value << ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT ); - h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( bufsize ) - CPB_SHIFT, 0, 15 ); - h->sps->vui.hrd.i_cpb_size_value = bufsize >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT ); + h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( vbv_buffer_size ) - CPB_SHIFT, 0, 15 ); + h->sps->vui.hrd.i_cpb_size_value = vbv_buffer_size >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT ); h->sps->vui.hrd.i_cpb_size_unscaled = h->sps->vui.hrd.i_cpb_size_value << ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT ); #undef CPB_SHIFT @@ -522,7 +708,11 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init ) x264_log( h, X264_LOG_WARNING, "VBV parameters cannot be changed when NAL HRD is in use\n" ); return; } + h->sps->vui.hrd.i_bit_rate_unscaled = vbv_max_bitrate; + h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size; + if( rc->b_vbv_min_rate ) + rc->bitrate = (double)h->param.rc.i_bitrate * kilobit_size; rc->buffer_rate = vbv_max_bitrate / rc->fps; rc->vbv_max_rate = vbv_max_bitrate; rc->buffer_size = vbv_buffer_size; @@ -543,7 +733,8 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init ) if( h->param.rc.f_vbv_buffer_init > 1. ) h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init / h->param.rc.i_vbv_buffer_size, 0, 1 ); h->param.rc.f_vbv_buffer_init = x264_clip3f( X264_MAX( h->param.rc.f_vbv_buffer_init, rc->buffer_rate / rc->buffer_size ), 0, 1); - rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init * h->sps->vui.i_time_scale; + rc->buffer_fill_final = + rc->buffer_fill_final_min = rc->buffer_size * h->param.rc.f_vbv_buffer_init * h->sps->vui.i_time_scale; rc->b_vbv = 1; rc->b_vbv_min_rate = !rc->b_2pass && h->param.rc.i_rc_method == X264_RC_ABR @@ -578,7 +769,7 @@ int x264_ratecontrol_new( x264_t *h ) else rc->qcompress = h->param.rc.f_qcompress; - rc->bitrate = h->param.rc.i_bitrate * 1000.; + rc->bitrate = h->param.rc.i_bitrate * (h->param.i_avcintra_class ? 1024. : 1000.); rc->rate_tolerance = h->param.rc.f_rate_tolerance; rc->nmb = h->mb.i_mb_count; rc->last_non_b_pict_type = -1; @@ -586,7 +777,7 @@ int x264_ratecontrol_new( x264_t *h ) if( h->param.rc.i_rc_method == X264_RC_CRF && h->param.rc.b_stat_read ) { - x264_log(h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n"); + x264_log( h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n" ); return -1; } @@ -595,11 +786,11 @@ int x264_ratecontrol_new( x264_t *h ) if( h->param.i_nal_hrd ) { uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale; - uint64_t num = 180000; + uint64_t num = 90000; x264_reduce_fraction64( &num, &denom ); - rc->hrd_multiply_denom = 180000 / num; + rc->hrd_multiply_denom = 90000 / num; - double bits_required = log2( 180000 / rc->hrd_multiply_denom ) + double bits_required = log2( 90000 / rc->hrd_multiply_denom ) + log2( h->sps->vui.i_time_scale ) + log2( h->sps->vui.hrd.i_cpb_size_unscaled ); if( bits_required >= 63 ) @@ -611,7 +802,7 @@ int x264_ratecontrol_new( x264_t *h ) if( rc->rate_tolerance < 0.01 ) { - x264_log(h, X264_LOG_WARNING, "bitrate tolerance too small, using .01\n"); + x264_log( h, X264_LOG_WARNING, "bitrate tolerance too small, using .01\n" ); rc->rate_tolerance = 0.01; } @@ -637,10 +828,11 @@ int x264_ratecontrol_new( x264_t *h ) h->mb.ip_offset = rc->ip_offset + 0.5; rc->lstep = pow( 2, h->param.rc.i_qp_step / 6.0 ); - rc->last_qscale = qp2qscale( 26 ); + rc->last_qscale = qp2qscale( 26 + QP_BD_OFFSET ); int num_preds = h->param.b_sliced_threads * h->param.i_threads + 1; CHECKED_MALLOC( rc->pred, 5 * sizeof(predictor_t) * num_preds ); CHECKED_MALLOC( rc->pred_b_from_p, sizeof(predictor_t) ); + static const float pred_coeff_table[3] = { 1.0, 1.0, 1.5 }; for( int i = 0; i < 3; i++ ) { rc->last_qscale_for[i] = qp2qscale( ABR_INIT_QP ); @@ -648,20 +840,26 @@ int x264_ratecontrol_new( x264_t *h ) rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max ); for( int j = 0; j < num_preds; j++ ) { - rc->pred[i+j*5].coeff= 2.0; - rc->pred[i+j*5].count= 1.0; - rc->pred[i+j*5].decay= 0.5; - rc->pred[i+j*5].offset= 0.0; + rc->pred[i+j*5].coeff_min = pred_coeff_table[i] / 2; + rc->pred[i+j*5].coeff = pred_coeff_table[i]; + rc->pred[i+j*5].count = 1.0; + rc->pred[i+j*5].decay = 0.5; + rc->pred[i+j*5].offset = 0.0; } for( int j = 0; j < 2; j++ ) { - rc->row_preds[i][j].coeff= .25; - rc->row_preds[i][j].count= 1.0; - rc->row_preds[i][j].decay= 0.5; - rc->row_preds[i][j].offset= 0.0; + rc->row_preds[i][j].coeff_min = .25 / 4; + rc->row_preds[i][j].coeff = .25; + rc->row_preds[i][j].count = 1.0; + rc->row_preds[i][j].decay = 0.5; + rc->row_preds[i][j].offset = 0.0; } } - *rc->pred_b_from_p = rc->pred[0]; + rc->pred_b_from_p->coeff_min = 0.5 / 2; + rc->pred_b_from_p->coeff = 0.5; + rc->pred_b_from_p->count = 1.0; + rc->pred_b_from_p->decay = 0.5; + rc->pred_b_from_p->offset = 0.0; if( parse_zones( h ) < 0 ) { @@ -679,7 +877,7 @@ int x264_ratecontrol_new( x264_t *h ) stats_buf = stats_in = x264_slurp_file( h->param.rc.psz_stat_in ); if( !stats_buf ) { - x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n"); + x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n" ); return -1; } if( h->param.rc.b_mb_tree ) @@ -687,17 +885,23 @@ int x264_ratecontrol_new( x264_t *h ) char *mbtree_stats_in = x264_strcat_filename( h->param.rc.psz_stat_in, ".mbtree" ); if( !mbtree_stats_in ) return -1; - rc->p_mbtree_stat_file_in = fopen( mbtree_stats_in, "rb" ); + rc->p_mbtree_stat_file_in = x264_fopen( mbtree_stats_in, "rb" ); x264_free( mbtree_stats_in ); if( !rc->p_mbtree_stat_file_in ) { - x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n"); + x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n" ); return -1; } } /* check whether 1st pass options were compatible with current options */ - if( !strncmp( stats_buf, "#options:", 9 ) ) + if( strncmp( stats_buf, "#options:", 9 ) ) + { + x264_log( h, X264_LOG_ERROR, "options list in stats file not valid\n" ); + return -1; + } + + float res_factor, res_factor_bits; { int i, j; uint32_t k, l; @@ -712,14 +916,17 @@ int x264_ratecontrol_new( x264_t *h ) x264_log( h, X264_LOG_ERROR, "resolution specified in stats file not valid\n" ); return -1; } - else if( h->param.rc.b_mb_tree && (i != h->param.i_width || j != h->param.i_height) ) + else if( h->param.rc.b_mb_tree ) { - x264_log( h, X264_LOG_ERROR, "MB-tree doesn't support different resolution than 1st pass (%dx%d vs %dx%d)\n", - h->param.i_width, h->param.i_height, i, j ); - return -1; + rc->mbtree.srcdim[0] = i; + rc->mbtree.srcdim[1] = j; } + res_factor = (float)h->param.i_width * h->param.i_height / (i*j); + /* Change in bits relative to resolution isn't quite linear on typical sources, + * so we'll at least try to roughly approximate this effect. */ + res_factor_bits = powf( res_factor, 0.7 ); - if( ( p = strstr( opts, "timebase=" ) ) && sscanf( p, "timebase=%u/%u", &k, &l ) != 2 ) + if( !( p = strstr( opts, "timebase=" ) ) || sscanf( p, "timebase=%u/%u", &k, &l ) != 2 ) { x264_log( h, X264_LOG_ERROR, "timebase specified in stats file not valid\n" ); return -1; @@ -736,7 +943,20 @@ int x264_ratecontrol_new( x264_t *h ) CMP_OPT_FIRST_PASS( "bframes", h->param.i_bframe ); CMP_OPT_FIRST_PASS( "b_pyramid", h->param.i_bframe_pyramid ); CMP_OPT_FIRST_PASS( "intra_refresh", h->param.b_intra_refresh ); - CMP_OPT_FIRST_PASS( "open_gop", h->param.i_open_gop ); + CMP_OPT_FIRST_PASS( "open_gop", h->param.b_open_gop ); + CMP_OPT_FIRST_PASS( "bluray_compat", h->param.b_bluray_compat ); + + if( (p = strstr( opts, "interlaced=" )) ) + { + char *current = h->param.b_interlaced ? h->param.b_tff ? "tff" : "bff" : h->param.b_fake_interlaced ? "fake" : "0"; + char buf[5]; + sscanf( p, "interlaced=%4s", buf ); + if( strcmp( current, buf ) ) + { + x264_log( h, X264_LOG_ERROR, "different interlaced setting than first pass (%s vs %s)\n", current, buf ); + return -1; + } + } if( (p = strstr( opts, "keyint=" )) ) { @@ -780,7 +1000,7 @@ int x264_ratecontrol_new( x264_t *h ) p = strchr( p + 1, ';' ); if( !num_entries ) { - x264_log(h, X264_LOG_ERROR, "empty stats file\n"); + x264_log( h, X264_LOG_ERROR, "empty stats file\n" ); return -1; } rc->num_entries = num_entries; @@ -804,13 +1024,14 @@ int x264_ratecontrol_new( x264_t *h ) { ratecontrol_entry_t *rce = &rc->entry[i]; rce->pict_type = SLICE_TYPE_P; - rce->qscale = rce->new_qscale = qp2qscale( 20 ); + rce->qscale = rce->new_qscale = qp2qscale( 20 + QP_BD_OFFSET ); rce->misc_bits = rc->nmb + 10; rce->new_qp = 0; } /* read stats */ p = stats_in; + double total_qp_aq = 0; for( int i = 0; i < rc->num_entries; i++ ) { ratecontrol_entry_t *rce; @@ -818,7 +1039,7 @@ int x264_ratecontrol_new( x264_t *h ) char pict_type; int e; char *next; - float qp; + float qp_rc, qp_aq; int ref; next= strchr(p, ';'); @@ -834,10 +1055,16 @@ int x264_ratecontrol_new( x264_t *h ) rce = &rc->entry[frame_number]; rce->direct_mode = 0; - e += sscanf( p, " in:%*d out:%*d type:%c dur:%"SCNd64" cpbdur:%"SCNd64" q:%f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c", - &pict_type, &rce->i_duration, &rce->i_cpb_duration, &qp, &rce->tex_bits, + e += sscanf( p, " in:%*d out:%*d type:%c dur:%"SCNd64" cpbdur:%"SCNd64" q:%f aq:%f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c", + &pict_type, &rce->i_duration, &rce->i_cpb_duration, &qp_rc, &qp_aq, &rce->tex_bits, &rce->mv_bits, &rce->misc_bits, &rce->i_count, &rce->p_count, &rce->s_count, &rce->direct_mode ); + rce->tex_bits *= res_factor_bits; + rce->mv_bits *= res_factor_bits; + rce->misc_bits *= res_factor_bits; + rce->i_count *= res_factor; + rce->p_count *= res_factor; + rce->s_count *= res_factor; p = strstr( p, "ref:" ); if( !p ) @@ -894,15 +1121,18 @@ int x264_ratecontrol_new( x264_t *h ) break; default: e = -1; break; } - if( e < 12 ) + if( e < 13 ) { parse_error: x264_log( h, X264_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e ); return -1; } - rce->qscale = qp2qscale( qp ); + rce->qscale = qp2qscale( qp_rc ); + total_qp_aq += qp_aq; p = next; } + if( !h->param.b_stitchable ) + h->pps->i_pic_init_qp = SPEC_QP( (int)(total_qp_aq / rc->num_entries + 0.5) ); x264_free( stats_buf ); @@ -923,10 +1153,10 @@ parse_error: if( !rc->psz_stat_file_tmpname ) return -1; - rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" ); + rc->p_stat_file_out = x264_fopen( rc->psz_stat_file_tmpname, "wb" ); if( rc->p_stat_file_out == NULL ) { - x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n"); + x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n" ); return -1; } @@ -941,10 +1171,10 @@ parse_error: if( !rc->psz_mbtree_stat_file_tmpname || !rc->psz_mbtree_stat_file_name ) return -1; - rc->p_mbtree_stat_file_out = fopen( rc->psz_mbtree_stat_file_tmpname, "wb" ); + rc->p_mbtree_stat_file_out = x264_fopen( rc->psz_mbtree_stat_file_tmpname, "wb" ); if( rc->p_mbtree_stat_file_out == NULL ) { - x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n"); + x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n" ); return -1; } } @@ -952,10 +1182,13 @@ parse_error: if( h->param.rc.b_mb_tree && (h->param.rc.b_stat_read || h->param.rc.b_stat_write) ) { - CHECKED_MALLOC( rc->qp_buffer[0], h->mb.i_mb_count * sizeof(uint16_t) ); - if( h->param.i_bframe_pyramid && h->param.rc.b_stat_read ) - CHECKED_MALLOC( rc->qp_buffer[1], h->mb.i_mb_count * sizeof(uint16_t) ); - rc->qpbuf_pos = -1; + if( !h->param.rc.b_stat_read ) + { + rc->mbtree.srcdim[0] = h->param.i_width; + rc->mbtree.srcdim[1] = h->param.i_height; + } + if( x264_macroblock_tree_rescale_init( h, rc ) < 0 ) + return -1; } for( int i = 0; iparam.i_threads; i++ ) @@ -981,11 +1214,11 @@ static int parse_zone( x264_t *h, x264_zone_t *z, char *p ) char *tok, UNUSED *saveptr=NULL; z->param = NULL; z->f_bitrate_factor = 1; - if( 3 <= sscanf(p, "%u,%u,q=%u%n", &z->i_start, &z->i_end, &z->i_qp, &len) ) + if( 3 <= sscanf(p, "%d,%d,q=%d%n", &z->i_start, &z->i_end, &z->i_qp, &len) ) z->b_force_qp = 1; - else if( 3 <= sscanf(p, "%u,%u,b=%f%n", &z->i_start, &z->i_end, &z->f_bitrate_factor, &len) ) + else if( 3 <= sscanf(p, "%d,%d,b=%f%n", &z->i_start, &z->i_end, &z->f_bitrate_factor, &len) ) z->b_force_qp = 0; - else if( 2 <= sscanf(p, "%u,%u%n", &z->i_start, &z->i_end, &len) ) + else if( 2 <= sscanf(p, "%d,%d%n", &z->i_start, &z->i_end, &len) ) z->b_force_qp = 0; else { @@ -1118,7 +1351,7 @@ void x264_ratecontrol_delete( x264_t *h ) b_regular_file = x264_is_regular_file( rc->p_stat_file_out ); fclose( rc->p_stat_file_out ); if( h->i_frame >= rc->num_entries && b_regular_file ) - if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 ) + if( x264_rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 ) { x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n", rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ); @@ -1130,7 +1363,7 @@ void x264_ratecontrol_delete( x264_t *h ) b_regular_file = x264_is_regular_file( rc->p_mbtree_stat_file_out ); fclose( rc->p_mbtree_stat_file_out ); if( h->i_frame >= rc->num_entries && b_regular_file ) - if( rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 ) + if( x264_rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 ) { x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n", rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ); @@ -1143,8 +1376,7 @@ void x264_ratecontrol_delete( x264_t *h ) x264_free( rc->pred ); x264_free( rc->pred_b_from_p ); x264_free( rc->entry ); - x264_free( rc->qp_buffer[0] ); - x264_free( rc->qp_buffer[1] ); + x264_macroblock_tree_rescale_destroy( rc ); if( rc->zones ) { x264_free( rc->zones[0].param ); @@ -1179,7 +1411,7 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead ) x264_emms(); if( zone && (!rc->prev_zone || zone->param != rc->prev_zone->param) ) - x264_encoder_reconfig( h, zone->param ); + x264_encoder_reconfig_apply( h, zone->param ); rc->prev_zone = zone; if( h->param.rc.b_stat_read ) @@ -1199,7 +1431,9 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead ) if( rc->b_vbv ) { memset( h->fdec->i_row_bits, 0, h->mb.i_mb_height * sizeof(int) ); - rc->row_pred = &rc->row_preds[h->sh.i_type]; + memset( h->fdec->f_row_qp, 0, h->mb.i_mb_height * sizeof(float) ); + memset( h->fdec->f_row_qscale, 0, h->mb.i_mb_height * sizeof(float) ); + rc->row_pred = rc->row_preds[h->sh.i_type]; rc->buffer_rate = h->fenc->i_cpb_duration * rc->vbv_max_rate * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale; update_vbv_plan( h, overhead ); @@ -1209,12 +1443,11 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead ) int mincr = l->mincr; - /* Blu-ray requires this */ - if( l->level_idc == 41 && h->param.i_nal_hrd ) + if( h->param.b_bluray_compat ) mincr = 4; - /* High 10 doesn't require minCR, so just set the maximum to a large value. */ - if( h->sps->i_profile_idc == PROFILE_HIGH10 ) + /* Profiles above High don't require minCR, so just set the maximum to a large value. */ + if( h->sps->i_profile_idc > PROFILE_HIGH ) rc->frame_size_maximum = 1e9; else { @@ -1237,11 +1470,7 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead ) if( h->sh.i_type != SLICE_TYPE_B ) rc->bframes = h->fenc->i_bframes; - if( i_force_qp != X264_QP_AUTO ) - { - q = i_force_qp - 1; - } - else if( rc->b_abr ) + if( rc->b_abr ) { q = qscale2qp( rate_estimate_qscale( h ) ); } @@ -1265,12 +1494,14 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead ) q -= 6*log2f( zone->f_bitrate_factor ); } } + if( i_force_qp != X264_QP_AUTO ) + q = i_force_qp - 1; q = x264_clip3f( q, h->param.rc.i_qp_min, h->param.rc.i_qp_max ); - rc->qpa_rc = - rc->qpa_aq = 0; - rc->qp = x264_clip3( (int)(q + 0.5), 0, QP_MAX ); + rc->qpa_rc = rc->qpa_rc_prev = + rc->qpa_aq = rc->qpa_aq_prev = 0; + rc->qp = x264_clip3( q + 0.5f, 0, QP_MAX ); h->fdec->f_qp_avg_rc = h->fdec->f_qp_avg_aq = rc->qpm = q; @@ -1283,158 +1514,210 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead ) rc->last_non_b_pict_type = h->sh.i_type; } -static double predict_row_size( x264_t *h, int y, double qp ) +static float predict_row_size( x264_t *h, int y, float qscale ) { /* average between two predictors: * absolute SATD, and scaled bit cost of the colocated row in the previous frame */ x264_ratecontrol_t *rc = h->rc; - double pred_s = predict_size( rc->row_pred[0], qp2qscale( qp ), h->fdec->i_row_satd[y] ); - double pred_t = 0; - if( h->sh.i_type == SLICE_TYPE_I || qp >= h->fref0[0]->f_row_qp[y] ) + float pred_s = predict_size( &rc->row_pred[0], qscale, h->fdec->i_row_satd[y] ); + if( h->sh.i_type == SLICE_TYPE_I || qscale >= h->fref[0][0]->f_row_qscale[y] ) { if( h->sh.i_type == SLICE_TYPE_P - && h->fref0[0]->i_type == h->fdec->i_type - && h->fref0[0]->i_row_satd[y] > 0 - && (abs(h->fref0[0]->i_row_satd[y] - h->fdec->i_row_satd[y]) < h->fdec->i_row_satd[y]/2)) + && h->fref[0][0]->i_type == h->fdec->i_type + && h->fref[0][0]->f_row_qscale[y] > 0 + && h->fref[0][0]->i_row_satd[y] > 0 + && (abs(h->fref[0][0]->i_row_satd[y] - h->fdec->i_row_satd[y]) < h->fdec->i_row_satd[y]/2)) { - pred_t = h->fref0[0]->i_row_bits[y] * h->fdec->i_row_satd[y] / h->fref0[0]->i_row_satd[y] - * qp2qscale( h->fref0[0]->f_row_qp[y] ) / qp2qscale( qp ); + float pred_t = h->fref[0][0]->i_row_bits[y] * h->fdec->i_row_satd[y] / h->fref[0][0]->i_row_satd[y] + * h->fref[0][0]->f_row_qscale[y] / qscale; + return (pred_s + pred_t) * 0.5f; } - if( pred_t == 0 ) - pred_t = pred_s; - return (pred_s + pred_t) / 2; + return pred_s; } /* Our QP is lower than the reference! */ else { - double pred_intra = predict_size( rc->row_pred[1], qp2qscale( qp ), h->fdec->i_row_satds[0][0][y] ); + float pred_intra = predict_size( &rc->row_pred[1], qscale, h->fdec->i_row_satds[0][0][y] ); /* Sum: better to overestimate than underestimate by using only one of the two predictors. */ return pred_intra + pred_s; } } -static double row_bits_so_far( x264_t *h, int y ) +static int row_bits_so_far( x264_t *h, int y ) { - double bits = 0; + int bits = 0; for( int i = h->i_threadslice_start; i <= y; i++ ) bits += h->fdec->i_row_bits[i]; return bits; } -static double predict_row_size_sum( x264_t *h, int y, double qp ) +static float predict_row_size_sum( x264_t *h, int y, float qp ) { - double bits = row_bits_so_far(h, y); + float qscale = qp2qscale( qp ); + float bits = row_bits_so_far( h, y ); for( int i = y+1; i < h->i_threadslice_end; i++ ) - bits += predict_row_size( h, i, qp ); + bits += predict_row_size( h, i, qscale ); return bits; } - -void x264_ratecontrol_mb( x264_t *h, int bits ) +/* TODO: + * eliminate all use of qp in row ratecontrol: make it entirely qscale-based. + * make this function stop being needlessly O(N^2) + * update more often than once per row? */ +int x264_ratecontrol_mb( x264_t *h, int bits ) { x264_ratecontrol_t *rc = h->rc; const int y = h->mb.i_mb_y; - x264_emms(); - h->fdec->i_row_bits[y] += bits; - rc->qpa_rc += rc->qpm; rc->qpa_aq += h->mb.i_qp; - if( h->mb.i_mb_x != h->mb.i_mb_width - 1 || !rc->b_vbv ) - return; + if( h->mb.i_mb_x != h->mb.i_mb_width - 1 ) + return 0; + + x264_emms(); + rc->qpa_rc += rc->qpm * h->mb.i_mb_width; + if( !rc->b_vbv ) + return 0; + + float qscale = qp2qscale( rc->qpm ); h->fdec->f_row_qp[y] = rc->qpm; + h->fdec->f_row_qscale[y] = qscale; - update_predictor( rc->row_pred[0], qp2qscale( rc->qpm ), h->fdec->i_row_satd[y], h->fdec->i_row_bits[y] ); - if( h->sh.i_type == SLICE_TYPE_P && rc->qpm < h->fref0[0]->f_row_qp[y] ) - update_predictor( rc->row_pred[1], qp2qscale( rc->qpm ), h->fdec->i_row_satds[0][0][y], h->fdec->i_row_bits[y] ); + update_predictor( &rc->row_pred[0], qscale, h->fdec->i_row_satd[y], h->fdec->i_row_bits[y] ); + if( h->sh.i_type != SLICE_TYPE_I && rc->qpm < h->fref[0][0]->f_row_qp[y] ) + update_predictor( &rc->row_pred[1], qscale, h->fdec->i_row_satds[0][0][y], h->fdec->i_row_bits[y] ); + + /* update ratecontrol per-mbpair in MBAFF */ + if( SLICE_MBAFF && !(y&1) ) + return 0; + + /* FIXME: We don't currently support the case where there's a slice + * boundary in between. */ + int can_reencode_row = h->sh.i_first_mb <= ((h->mb.i_mb_y - SLICE_MBAFF) * h->mb.i_mb_stride); /* tweak quality based on difference from predicted size */ + float prev_row_qp = h->fdec->f_row_qp[y]; + float qp_absolute_max = h->param.rc.i_qp_max; + if( rc->rate_factor_max_increment ) + qp_absolute_max = X264_MIN( qp_absolute_max, rc->qp_novbv + rc->rate_factor_max_increment ); + float qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, qp_absolute_max ); + float qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min ); + float step_size = 0.5f; + float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned; + float max_frame_error = x264_clip3f( 1.0 / h->mb.i_mb_height, 0.05, 0.25 ); + float max_frame_size = rc->frame_size_maximum - rc->frame_size_maximum * max_frame_error; + max_frame_size = X264_MIN( max_frame_size, rc->buffer_fill - rc->buffer_rate * max_frame_error ); + float size_of_other_slices = 0; + if( h->param.b_sliced_threads ) + { + float size_of_other_slices_planned = 0; + for( int i = 0; i < h->param.i_threads; i++ ) + if( h != h->thread[i] ) + { + size_of_other_slices += h->thread[i]->rc->frame_size_estimated; + size_of_other_slices_planned += h->thread[i]->rc->slice_size_planned; + } + float weight = rc->slice_size_planned / rc->frame_size_planned; + size_of_other_slices = (size_of_other_slices - size_of_other_slices_planned) * weight + size_of_other_slices_planned; + } if( y < h->i_threadslice_end-1 ) { - float prev_row_qp = h->fdec->f_row_qp[y]; - float qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min ); - float qp_absolute_max = h->param.rc.i_qp_max; - if( rc->rate_factor_max_increment ) - qp_absolute_max = X264_MIN( qp_absolute_max, rc->qp_novbv + rc->rate_factor_max_increment ); - float qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, qp_absolute_max ); - float step_size = 0.5; - /* B-frames shouldn't use lower QP than their reference frames. */ if( h->sh.i_type == SLICE_TYPE_B ) { - qp_min = X264_MAX( qp_min, X264_MAX( h->fref0[0]->f_row_qp[y+1], h->fref1[0]->f_row_qp[y+1] ) ); + qp_min = X264_MAX( qp_min, X264_MAX( h->fref[0][0]->f_row_qp[y+1], h->fref[1][0]->f_row_qp[y+1] ) ); rc->qpm = X264_MAX( rc->qpm, qp_min ); } float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned; - float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned; - float max_frame_error = X264_MAX( 0.05, 1.0 / (h->mb.i_mb_height) ); - float size_of_other_slices = 0; - if( h->param.b_sliced_threads ) - { - float size_of_other_slices_planned = 0; - for( int i = 0; i < h->param.i_threads; i++ ) - if( h != h->thread[i] ) - { - size_of_other_slices += h->thread[i]->rc->frame_size_estimated; - size_of_other_slices_planned += h->thread[i]->rc->slice_size_planned; - } - float weight = rc->slice_size_planned / rc->frame_size_planned; - size_of_other_slices = (size_of_other_slices - size_of_other_slices_planned) * weight + size_of_other_slices_planned; - } - + buffer_left_planned = X264_MAX( buffer_left_planned, 0.f ); /* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */ float rc_tol = buffer_left_planned / h->param.i_threads * rc->rate_tolerance; - int b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices; + float b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices; - /* Don't modify the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */ + /* Don't increase the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */ /* area at the top of the frame was measured inaccurately. */ - if( row_bits_so_far( h, y ) < 0.05 * slice_size_planned ) - return; + if( row_bits_so_far( h, y ) < 0.05f * slice_size_planned ) + qp_max = qp_absolute_max = prev_row_qp; if( h->sh.i_type != SLICE_TYPE_I ) - rc_tol /= 2; + rc_tol *= 0.5f; if( !rc->b_vbv_min_rate ) qp_min = X264_MAX( qp_min, rc->qp_novbv ); while( rc->qpm < qp_max && ((b1 > rc->frame_size_planned + rc_tol) || - (rc->buffer_fill - b1 < buffer_left_planned * 0.5) || - (b1 > rc->frame_size_planned && rc->qpm < rc->qp_novbv)) ) + (b1 > rc->frame_size_planned && rc->qpm < rc->qp_novbv) || + (b1 > rc->buffer_fill - buffer_left_planned * 0.5f)) ) { rc->qpm += step_size; b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices; } - while( rc->qpm > qp_min + rc->qpm -= step_size; + while( rc->qpm > qp_min && rc->qpm < prev_row_qp && (rc->qpm > h->fdec->f_row_qp[0] || rc->single_frame_vbv) - && ((b1 < rc->frame_size_planned * 0.8 && rc->qpm <= prev_row_qp) - || b1 < (rc->buffer_fill - rc->buffer_size + rc->buffer_rate) * 1.1) ) + && (b1 < max_frame_size) + && ((b1 < rc->frame_size_planned * 0.8f) || + (b1 < (rc->buffer_fill - rc->buffer_size + rc->buffer_rate) * 0.95f)) ) { - rc->qpm -= step_size; b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices; + rc->qpm -= step_size; } + rc->qpm += step_size; /* avoid VBV underflow or MinCR violation */ - while( (rc->qpm < qp_absolute_max) - && ((rc->buffer_fill - b1 < rc->buffer_rate * max_frame_error) || - (rc->frame_size_maximum - b1 < rc->frame_size_maximum * max_frame_error))) + while( rc->qpm < qp_absolute_max && (b1 > max_frame_size) ) { rc->qpm += step_size; b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices; } + h->rc->frame_size_estimated = b1 - size_of_other_slices; + + /* If the current row was large enough to cause a large QP jump, try re-encoding it. */ + if( rc->qpm > qp_max && prev_row_qp < qp_max && can_reencode_row ) + { + /* Bump QP to halfway in between... close enough. */ + rc->qpm = x264_clip3f( (prev_row_qp + rc->qpm)*0.5f, prev_row_qp + 1.0f, qp_max ); + rc->qpa_rc = rc->qpa_rc_prev; + rc->qpa_aq = rc->qpa_aq_prev; + h->fdec->i_row_bits[y] = 0; + h->fdec->i_row_bits[y-SLICE_MBAFF] = 0; + return -1; + } + } + else + { h->rc->frame_size_estimated = predict_row_size_sum( h, y, rc->qpm ); + + /* Last-ditch attempt: if the last row of the frame underflowed the VBV, + * try again. */ + if( rc->qpm < qp_max && can_reencode_row + && (h->rc->frame_size_estimated + size_of_other_slices > max_frame_size) ) + { + rc->qpm = qp_max; + rc->qpa_rc = rc->qpa_rc_prev; + rc->qpa_aq = rc->qpa_aq_prev; + h->fdec->i_row_bits[y] = 0; + h->fdec->i_row_bits[y-SLICE_MBAFF] = 0; + return -1; + } } + + rc->qpa_rc_prev = rc->qpa_rc; + rc->qpa_aq_prev = rc->qpa_aq; + + return 0; } int x264_ratecontrol_qp( x264_t *h ) { x264_emms(); - return x264_clip3( h->rc->qpm + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max ); + return x264_clip3( h->rc->qpm + 0.5f, h->param.rc.i_qp_min, h->param.rc.i_qp_max ); } int x264_ratecontrol_mb_qp( x264_t *h ) @@ -1442,9 +1725,15 @@ int x264_ratecontrol_mb_qp( x264_t *h ) x264_emms(); float qp = h->rc->qpm; if( h->param.rc.i_aq_mode ) - /* MB-tree currently doesn't adjust quantizers in unreferenced frames. */ - qp += h->fdec->b_kept_as_ref ? h->fenc->f_qp_offset[h->mb.i_mb_xy] : h->fenc->f_qp_offset_aq[h->mb.i_mb_xy]; - return x264_clip3( qp + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max ); + { + /* MB-tree currently doesn't adjust quantizers in unreferenced frames. */ + float qp_offset = h->fdec->b_kept_as_ref ? h->fenc->f_qp_offset[h->mb.i_mb_xy] : h->fenc->f_qp_offset_aq[h->mb.i_mb_xy]; + /* Scale AQ's effect towards zero in emergency mode. */ + if( qp > QP_MAX_SPEC ) + qp_offset *= (QP_MAX - qp) / (QP_MAX - QP_MAX_SPEC); + qp += qp_offset; + } + return x264_clip3( qp + 0.5f, h->param.rc.i_qp_min, h->param.rc.i_qp_max ); } /* In 2pass, force the same frame types as in the 1st pass */ @@ -1464,10 +1753,10 @@ int x264_ratecontrol_slice_type( x264_t *h, int frame_num ) rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, QP_MAX ); rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, QP_MAX ); - x264_log(h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries); - x264_log(h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant); + x264_log( h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries ); + x264_log( h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant ); if( h->param.i_bframe_adaptive ) - x264_log(h, X264_LOG_ERROR, "disabling adaptive B-frames\n"); + x264_log( h, X264_LOG_ERROR, "disabling adaptive B-frames\n" ); for( int i = 0; i < h->param.i_threads; i++ ) { @@ -1520,7 +1809,8 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler ) h->stat.frame.i_mb_count_p += mbs[i]; h->fdec->f_qp_avg_rc = rc->qpa_rc /= h->mb.i_mb_count; - h->fdec->f_qp_avg_aq = rc->qpa_aq /= h->mb.i_mb_count; + h->fdec->f_qp_avg_aq = (float)rc->qpa_aq / h->mb.i_mb_count; + h->fdec->f_crf_avg = h->param.rc.f_rf_constant + h->fdec->f_qp_avg_rc - rc->qp_novbv; if( h->param.rc.b_stat_write ) { @@ -1534,10 +1824,11 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler ) dir_avg>0 ? 's' : dir_avg<0 ? 't' : '-' ) : '-'; if( fprintf( rc->p_stat_file_out, - "in:%d out:%d type:%c dur:%"PRId64" cpbdur:%"PRId64" q:%.2f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c ref:", + "in:%d out:%d type:%c dur:%"PRId64" cpbdur:%"PRId64" q:%.2f aq:%.2f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c ref:", h->fenc->i_frame, h->i_frame, c_type, h->fenc->i_duration, - h->fenc->i_cpb_duration, rc->qpa_rc, + h->fenc->i_cpb_duration, + rc->qpa_rc, h->fdec->f_qp_avg_aq, h->stat.frame.i_tex_bits, h->stat.frame.i_mv_bits, h->stat.frame.i_misc_bits, @@ -1549,10 +1840,10 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler ) /* Only write information for reference reordering once. */ int use_old_stats = h->param.rc.b_stat_read && rc->rce->refs > 1; - for( int i = 0; i < (use_old_stats ? rc->rce->refs : h->i_ref0); i++ ) + for( int i = 0; i < (use_old_stats ? rc->rce->refs : h->i_ref[0]); i++ ) { int refcount = use_old_stats ? rc->rce->refcount[i] - : h->param.b_interlaced ? h->stat.frame.i_mb_count_ref[0][i*2] + : PARAM_INTERLACED ? h->stat.frame.i_mb_count_ref[0][i*2] + h->stat.frame.i_mb_count_ref[0][i*2+1] : h->stat.frame.i_mb_count_ref[0][i]; if( fprintf( rc->p_stat_file_out, "%d ", refcount ) < 0 ) @@ -1584,10 +1875,10 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler ) uint8_t i_type = h->sh.i_type; /* Values are stored as big-endian FIX8.8 */ for( int i = 0; i < h->mb.i_mb_count; i++ ) - rc->qp_buffer[0][i] = endian_fix16( h->fenc->f_qp_offset[i]*256.0 ); + rc->mbtree.qp_buffer[0][i] = endian_fix16( h->fenc->f_qp_offset[i]*256.0 ); if( fwrite( &i_type, 1, 1, rc->p_mbtree_stat_file_out ) < 1 ) goto fail; - if( fwrite( rc->qp_buffer[0], sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_out ) < h->mb.i_mb_count ) + if( fwrite( rc->mbtree.qp_buffer[0], sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_out ) < h->mb.i_mb_count ) goto fail; } } @@ -1603,9 +1894,7 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler ) rc->cplxr_sum += bits * qp2qscale( rc->qpa_rc ) / (rc->last_rceq * fabs( h->param.rc.f_pb_factor )); } rc->cplxr_sum *= rc->cbr_decay; - double frame_duration = (double)h->fenc->i_duration * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale; - - rc->wanted_bits_window += frame_duration * rc->bitrate; + rc->wanted_bits_window += h->fenc->f_duration * rc->bitrate; rc->wanted_bits_window *= rc->cbr_decay; } @@ -1620,7 +1909,7 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler ) if( h->fenc->b_last_minigop_bframe ) { update_predictor( rc->pred_b_from_p, qp2qscale( rc->qpa_rc ), - h->fref1[h->i_ref1-1]->i_satd, rc->bframe_bits / rc->bframes ); + h->fref[1][h->i_ref[1]-1]->i_satd, rc->bframe_bits / rc->bframes ); rc->bframe_bits = 0; } } @@ -1644,15 +1933,16 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler ) h->fenc->hrd_timing.cpb_removal_time = rc->nrt_first_access_unit + (double)(h->fenc->i_cpb_delay - h->i_cpb_delay_pir_offset) * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale; - double cpb_earliest_arrival_time = h->fenc->hrd_timing.cpb_removal_time - (double)rc->initial_cpb_removal_delay / 90000; if( h->fenc->b_keyframe ) { - rc->nrt_first_access_unit = h->fenc->hrd_timing.cpb_removal_time; - rc->initial_cpb_removal_delay = h->initial_cpb_removal_delay; - rc->initial_cpb_removal_delay_offset = h->initial_cpb_removal_delay_offset; + rc->nrt_first_access_unit = h->fenc->hrd_timing.cpb_removal_time; + rc->initial_cpb_removal_delay = h->initial_cpb_removal_delay; + rc->initial_cpb_removal_delay_offset = h->initial_cpb_removal_delay_offset; } - else - cpb_earliest_arrival_time -= (double)rc->initial_cpb_removal_delay_offset / 90000; + + double cpb_earliest_arrival_time = h->fenc->hrd_timing.cpb_removal_time - (double)rc->initial_cpb_removal_delay / 90000; + if( !h->fenc->b_keyframe ) + cpb_earliest_arrival_time -= (double)rc->initial_cpb_removal_delay_offset / 90000; if( h->sps->vui.hrd.b_cbr_hrd ) h->fenc->hrd_timing.cpb_initial_arrival_time = rc->previous_cpb_final_arrival_time; @@ -1670,7 +1960,7 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler ) return 0; fail: - x264_log(h, X264_LOG_ERROR, "ratecontrol_end: stats file could not be written to\n"); + x264_log( h, X264_LOG_ERROR, "ratecontrol_end: stats file could not be written to\n" ); return -1; } @@ -1685,7 +1975,14 @@ static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor { x264_ratecontrol_t *rcc= h->rc; x264_zone_t *zone = get_zone( h, frame_num ); - double q = pow( rce->blurred_complexity, 1 - rcc->qcompress ); + double q; + if( h->param.rc.b_mb_tree ) + { + double timescale = (double)h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale; + q = pow( BASE_FRAME_DURATION / CLIP_DURATION(rce->i_duration * timescale), 1 - h->param.rc.f_qcompress ); + } + else + q = pow( rce->blurred_complexity, 1 - rcc->qcompress ); // avoid NaN's in the rc_eq if( !isfinite(q) || rce->tex_bits + rce->mv_bits == 0 ) @@ -1708,10 +2005,11 @@ static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor return q; } -static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q) +static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q, int frame_num) { x264_ratecontrol_t *rcc = h->rc; const int pict_type = rce->pict_type; + x264_zone_t *zone = get_zone( h, frame_num ); // force I/B quants as a function of P quants const double last_p_q = rcc->last_qscale_for[SLICE_TYPE_P]; @@ -1772,23 +2070,33 @@ static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q) rcc->accum_p_qp = mask * (qscale2qp( q ) + rcc->accum_p_qp); rcc->accum_p_norm = mask * (1 + rcc->accum_p_norm); } + + if( zone ) + { + if( zone->b_force_qp ) + q = qp2qscale( zone->i_qp ); + else + q /= zone->f_bitrate_factor; + } + return q; } -static double predict_size( predictor_t *p, double q, double var ) +static float predict_size( predictor_t *p, float q, float var ) { - return (p->coeff*var + p->offset) / (q*p->count); + return (p->coeff*var + p->offset) / (q*p->count); } -static void update_predictor( predictor_t *p, double q, double var, double bits ) +static void update_predictor( predictor_t *p, float q, float var, float bits ) { - const double range = 1.5; + float range = 1.5; if( var < 10 ) return; - double old_coeff = p->coeff / p->count; - double new_coeff = bits*q / var; - double new_coeff_clipped = x264_clip3f( new_coeff, old_coeff/range, old_coeff*range ); - double new_offset = bits*q - new_coeff_clipped * var; + float old_coeff = p->coeff / p->count; + float old_offset = p->offset / p->count; + float new_coeff = X264_MAX( (bits*q - old_offset) / var, p->coeff_min ); + float new_coeff_clipped = x264_clip3f( new_coeff, old_coeff/range, old_coeff*range ); + float new_offset = bits*q - new_coeff_clipped * var; if( new_offset >= 0 ) new_coeff = new_coeff_clipped; else @@ -1808,7 +2116,7 @@ static int update_vbv( x264_t *h, int bits ) int bitrate = h->sps->vui.hrd.i_bit_rate_unscaled; x264_ratecontrol_t *rcc = h->rc; x264_ratecontrol_t *rct = h->thread[0]->rc; - uint64_t buffer_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale; + int64_t buffer_size = (int64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale; if( rcc->last_satd >= h->mb.i_mb_count ) update_predictor( &rct->pred[h->sh.i_type], qp2qscale( rcc->qpa_rc ), rcc->last_satd, bits ); @@ -1816,21 +2124,45 @@ static int update_vbv( x264_t *h, int bits ) if( !rcc->b_vbv ) return filler; - rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale; - - if( rct->buffer_fill_final < 0 ) - x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, (double)rct->buffer_fill_final / h->sps->vui.i_time_scale ); - rct->buffer_fill_final = X264_MAX( rct->buffer_fill_final, 0 ); - rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration; + uint64_t buffer_diff = (uint64_t)bits * h->sps->vui.i_time_scale; + rct->buffer_fill_final -= buffer_diff; + rct->buffer_fill_final_min -= buffer_diff; - if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > buffer_size ) + if( rct->buffer_fill_final_min < 0 ) { - filler = ceil( (rct->buffer_fill_final - buffer_size) / (8. * h->sps->vui.i_time_scale) ); - bits = X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8; - rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale; + double underflow = (double)rct->buffer_fill_final_min / h->sps->vui.i_time_scale; + if( rcc->rate_factor_max_increment && rcc->qpm >= rcc->qp_novbv + rcc->rate_factor_max_increment ) + x264_log( h, X264_LOG_DEBUG, "VBV underflow due to CRF-max (frame %d, %.0f bits)\n", h->i_frame, underflow ); + else + x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, underflow ); + rct->buffer_fill_final = + rct->buffer_fill_final_min = 0; } + + if( h->param.i_avcintra_class ) + buffer_diff = buffer_size; else - rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, buffer_size ); + buffer_diff = (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration; + rct->buffer_fill_final += buffer_diff; + rct->buffer_fill_final_min += buffer_diff; + + if( rct->buffer_fill_final > buffer_size ) + { + if( h->param.rc.b_filler ) + { + int64_t scale = (int64_t)h->sps->vui.i_time_scale * 8; + filler = (rct->buffer_fill_final - buffer_size + scale - 1) / scale; + bits = h->param.i_avcintra_class ? filler * 8 : X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8; + buffer_diff = (uint64_t)bits * h->sps->vui.i_time_scale; + rct->buffer_fill_final -= buffer_diff; + rct->buffer_fill_final_min -= buffer_diff; + } + else + { + rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, buffer_size ); + rct->buffer_fill_final_min = X264_MIN( rct->buffer_fill_final_min, buffer_size ); + } + } return filler; } @@ -1841,23 +2173,27 @@ void x264_hrd_fullness( x264_t *h ) uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale / rct->hrd_multiply_denom; uint64_t cpb_state = rct->buffer_fill_final; uint64_t cpb_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale; - uint64_t multiply_factor = 180000 / rct->hrd_multiply_denom; + uint64_t multiply_factor = 90000 / rct->hrd_multiply_denom; - if( rct->buffer_fill_final < 0 || rct->buffer_fill_final > cpb_size ) + if( rct->buffer_fill_final < 0 || rct->buffer_fill_final > (int64_t)cpb_size ) { - x264_log( h, X264_LOG_WARNING, "CPB %s: %.0lf bits in a %.0lf-bit buffer\n", - rct->buffer_fill_final < 0 ? "underflow" : "overflow", (float)rct->buffer_fill_final/denom, (float)cpb_size/denom ); + x264_log( h, X264_LOG_WARNING, "CPB %s: %.0f bits in a %.0f-bit buffer\n", + rct->buffer_fill_final < 0 ? "underflow" : "overflow", + (double)rct->buffer_fill_final / h->sps->vui.i_time_scale, (double)cpb_size / h->sps->vui.i_time_scale ); } - h->initial_cpb_removal_delay = (multiply_factor * cpb_state + denom) / (2*denom); - h->initial_cpb_removal_delay_offset = (multiply_factor * cpb_size + denom) / (2*denom) - h->initial_cpb_removal_delay; + h->initial_cpb_removal_delay = (multiply_factor * cpb_state) / denom; + h->initial_cpb_removal_delay_offset = (multiply_factor * cpb_size) / denom - h->initial_cpb_removal_delay; + + int64_t decoder_buffer_fill = h->initial_cpb_removal_delay * denom / multiply_factor; + rct->buffer_fill_final_min = X264_MIN( rct->buffer_fill_final_min, decoder_buffer_fill ); } // provisionally update VBV according to the planned size of all frames currently in progress static void update_vbv_plan( x264_t *h, int overhead ) { x264_ratecontrol_t *rcc = h->rc; - rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final / h->sps->vui.i_time_scale; + rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final_min / h->sps->vui.i_time_scale; if( h->i_thread_frames > 1 ) { int j = h->rc - h->thread[0]->rc; @@ -1867,7 +2203,7 @@ static void update_vbv_plan( x264_t *h, int overhead ) double bits = t->rc->frame_size_planned; if( !t->b_thread_active ) continue; - bits = X264_MAX(bits, t->rc->frame_size_estimated); + bits = X264_MAX(bits, t->rc->frame_size_estimated); rcc->buffer_fill -= bits; rcc->buffer_fill = X264_MAX( rcc->buffer_fill, 0 ); rcc->buffer_fill += t->rc->buffer_rate; @@ -1893,6 +2229,8 @@ static double clip_qscale( x264_t *h, int pict_type, double q ) if( rcc->b_vbv && rcc->last_satd > 0 ) { + double fenc_cpb_duration = (double)h->fenc->i_cpb_duration * + h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale; /* Lookahead VBV: raise the quantizer as necessary such that no frames in * the lookahead overflow and such that the buffer is in a reasonable state * by the end of the lookahead. */ @@ -1908,6 +2246,7 @@ static double clip_qscale( x264_t *h, int pict_type, double q ) double buffer_fill_cur = rcc->buffer_fill - cur_bits; double target_fill; double total_duration = 0; + double last_duration = fenc_cpb_duration; frame_q[0] = h->sh.i_type == SLICE_TYPE_I ? q * h->param.rc.f_ip_factor : q; frame_q[1] = frame_q[0] * h->param.rc.f_pb_factor; frame_q[2] = frame_q[0] / h->param.rc.f_ip_factor; @@ -1915,8 +2254,8 @@ static double clip_qscale( x264_t *h, int pict_type, double q ) /* Loop over the planned future frames. */ for( int j = 0; buffer_fill_cur >= 0 && buffer_fill_cur <= rcc->buffer_size; j++ ) { - total_duration += h->fenc->f_planned_cpb_duration[j]; - buffer_fill_cur += rcc->vbv_max_rate * h->fenc->f_planned_cpb_duration[j]; + total_duration += last_duration; + buffer_fill_cur += rcc->vbv_max_rate * last_duration; int i_type = h->fenc->i_planned_type[j]; int i_satd = h->fenc->i_planned_satd[j]; if( i_type == X264_TYPE_AUTO ) @@ -1924,6 +2263,7 @@ static double clip_qscale( x264_t *h, int pict_type, double q ) i_type = IS_X264_TYPE_I( i_type ) ? SLICE_TYPE_I : IS_X264_TYPE_B( i_type ) ? SLICE_TYPE_B : SLICE_TYPE_P; cur_bits = predict_size( &rcc->pred[i_type], frame_q[i_type], i_satd ); buffer_fill_cur -= cur_bits; + last_duration = h->fenc->f_planned_cpb_duration[j]; } /* Try to get to get the buffer at least 50% filled, but don't set an impossible goal. */ target_fill = X264_MIN( rcc->buffer_fill + total_duration * rcc->vbv_max_rate * 0.5, rcc->buffer_size * 0.5 ); @@ -1957,45 +2297,44 @@ static double clip_qscale( x264_t *h, int pict_type, double q ) /* Now a hard threshold to make sure the frame fits in VBV. * This one is mostly for I-frames. */ double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd ); - double qf = 1.0; /* For small VBVs, allow the frame to use up the entire VBV. */ double max_fill_factor = h->param.rc.i_vbv_buffer_size >= 5*h->param.rc.i_vbv_max_bitrate / rcc->fps ? 2 : 1; /* For single-frame VBVs, request that the frame use up the entire VBV. */ double min_fill_factor = rcc->single_frame_vbv ? 1 : 2; if( bits > rcc->buffer_fill/max_fill_factor ) - qf = x264_clip3f( rcc->buffer_fill/(max_fill_factor*bits), 0.2, 1.0 ); - q /= qf; - bits *= qf; + { + double qf = x264_clip3f( rcc->buffer_fill/(max_fill_factor*bits), 0.2, 1.0 ); + q /= qf; + bits *= qf; + } if( bits < rcc->buffer_rate/min_fill_factor ) - q *= bits*min_fill_factor/rcc->buffer_rate; + { + double qf = x264_clip3f( bits*min_fill_factor/rcc->buffer_rate, 0.001, 1.0 ); + q *= qf; + } q = X264_MAX( q0, q ); } - /* Apply MinCR restrictions */ - double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd ); - if( bits > rcc->frame_size_maximum ) - q *= bits / rcc->frame_size_maximum; - bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd ); - /* Check B-frame complexity, and use up any bits that would * overflow before the next P-frame. */ if( h->sh.i_type == SLICE_TYPE_P && !rcc->single_frame_vbv ) { int nb = rcc->bframes; + double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd ); double pbbits = bits; double bbits = predict_size( rcc->pred_b_from_p, q * h->param.rc.f_pb_factor, rcc->last_satd ); double space; double bframe_cpb_duration = 0; double minigop_cpb_duration; for( int i = 0; i < nb; i++ ) - bframe_cpb_duration += h->fenc->f_planned_cpb_duration[1+i]; + bframe_cpb_duration += h->fenc->f_planned_cpb_duration[i]; if( bbits * nb > bframe_cpb_duration * rcc->vbv_max_rate ) nb = 0; pbbits += nb * bbits; - minigop_cpb_duration = bframe_cpb_duration + h->fenc->f_planned_cpb_duration[0]; + minigop_cpb_duration = bframe_cpb_duration + fenc_cpb_duration; space = rcc->buffer_fill + minigop_cpb_duration*rcc->vbv_max_rate - rcc->buffer_size; if( pbbits < space ) { @@ -2004,6 +2343,12 @@ static double clip_qscale( x264_t *h, int pict_type, double q ) q = X264_MAX( q0/2, q ); } + /* Apply MinCR and buffer fill restrictions */ + double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd ); + double frame_size_maximum = X264_MIN( rcc->frame_size_maximum, X264_MAX( rcc->buffer_fill, 0.001 ) ); + if( bits > frame_size_maximum ) + q *= bits / frame_size_maximum; + if( !rcc->b_vbv_min_rate ) q = X264_MAX( q0, q ); } @@ -2028,7 +2373,7 @@ static float rate_estimate_qscale( x264_t *h ) { float q; x264_ratecontrol_t *rcc = h->rc; - ratecontrol_entry_t UNINIT(rce); + ratecontrol_entry_t rce = {0}; int pict_type = h->sh.i_type; int64_t total_bits = 8*(h->stat.i_frame_size[SLICE_TYPE_I] + h->stat.i_frame_size[SLICE_TYPE_P] @@ -2050,16 +2395,16 @@ static float rate_estimate_qscale( x264_t *h ) /* B-frames don't have independent ratecontrol, but rather get the * average QP of the two adjacent P-frames + an offset */ - int i0 = IS_X264_TYPE_I(h->fref0[0]->i_type); - int i1 = IS_X264_TYPE_I(h->fref1[0]->i_type); - int dt0 = abs(h->fenc->i_poc - h->fref0[0]->i_poc); - int dt1 = abs(h->fenc->i_poc - h->fref1[0]->i_poc); - float q0 = h->fref0[0]->f_qp_avg_rc; - float q1 = h->fref1[0]->f_qp_avg_rc; + int i0 = IS_X264_TYPE_I(h->fref_nearest[0]->i_type); + int i1 = IS_X264_TYPE_I(h->fref_nearest[1]->i_type); + int dt0 = abs(h->fenc->i_poc - h->fref_nearest[0]->i_poc); + int dt1 = abs(h->fenc->i_poc - h->fref_nearest[1]->i_poc); + float q0 = h->fref_nearest[0]->f_qp_avg_rc; + float q1 = h->fref_nearest[1]->f_qp_avg_rc; - if( h->fref0[0]->i_type == X264_TYPE_BREF ) + if( h->fref_nearest[0]->i_type == X264_TYPE_BREF ) q0 -= rcc->pb_offset/2; - if( h->fref1[0]->i_type == X264_TYPE_BREF ) + if( h->fref_nearest[1]->i_type == X264_TYPE_BREF ) q1 -= rcc->pb_offset/2; if( i0 && i1 ) @@ -2079,7 +2424,10 @@ static float rate_estimate_qscale( x264_t *h ) if( rcc->b_2pass && rcc->b_vbv ) rcc->frame_size_planned = qscale2bits( &rce, qp2qscale( q ) ); else - rcc->frame_size_planned = predict_size( rcc->pred_b_from_p, qp2qscale( q ), h->fref1[h->i_ref1-1]->i_satd ); + rcc->frame_size_planned = predict_size( rcc->pred_b_from_p, qp2qscale( q ), h->fref[1][h->i_ref[1]-1]->i_satd ); + /* Limit planned size by MinCR */ + if( rcc->b_vbv ) + rcc->frame_size_planned = X264_MIN( rcc->frame_size_planned, rcc->frame_size_maximum ); h->rc->frame_size_estimated = rcc->frame_size_planned; /* For row SATDs */ @@ -2110,7 +2458,7 @@ static float rate_estimate_qscale( x264_t *h ) double bits = t->rc->frame_size_planned; if( !t->b_thread_active ) continue; - bits = X264_MAX(bits, t->rc->frame_size_estimated); + bits = X264_MAX(bits, t->rc->frame_size_estimated); predicted_bits += (int64_t)bits; } } @@ -2144,6 +2492,7 @@ static float rate_estimate_qscale( x264_t *h ) double w = x264_clip3f( cur_time*100, 0.0, 1.0 ); q *= pow( (double)total_bits / rcc->expected_bits_sum, w ); } + rcc->qp_novbv = qscale2qp( q ); if( rcc->b_vbv ) { /* Do not overflow vbv */ @@ -2184,7 +2533,7 @@ static float rate_estimate_qscale( x264_t *h ) rcc->last_satd = x264_rc_analyse_slice( h ); rcc->short_term_cplxsum *= 0.5; rcc->short_term_cplxcount *= 0.5; - rcc->short_term_cplxsum += rcc->last_satd; + rcc->short_term_cplxsum += rcc->last_satd / (CLIP_DURATION(h->fenc->f_duration) / BASE_FRAME_DURATION); rcc->short_term_cplxcount ++; rce.tex_bits = rcc->last_satd; @@ -2195,6 +2544,7 @@ static float rate_estimate_qscale( x264_t *h ) rce.s_count = 0; rce.qscale = 1; rce.pict_type = pict_type; + rce.i_duration = h->fenc->i_duration; if( h->param.rc.i_rc_method == X264_RC_CRF ) { @@ -2270,12 +2620,15 @@ static float rate_estimate_qscale( x264_t *h ) /* Always use up the whole VBV in this case. */ if( rcc->single_frame_vbv ) rcc->frame_size_planned = rcc->buffer_rate; + /* Limit planned size by MinCR */ + if( rcc->b_vbv ) + rcc->frame_size_planned = X264_MIN( rcc->frame_size_planned, rcc->frame_size_maximum ); h->rc->frame_size_estimated = rcc->frame_size_planned; return q; } } -void x264_threads_normalize_predictors( x264_t *h ) +static void x264_threads_normalize_predictors( x264_t *h ) { double totalsize = 0; for( int i = 0; i < h->param.i_threads; i++ ) @@ -2289,27 +2642,31 @@ void x264_threads_distribute_ratecontrol( x264_t *h ) { int row; x264_ratecontrol_t *rc = h->rc; + x264_emms(); + float qscale = qp2qscale( rc->qpm ); /* Initialize row predictors */ if( h->i_frame == 0 ) for( int i = 0; i < h->param.i_threads; i++ ) { - x264_ratecontrol_t *t = h->thread[i]->rc; - memcpy( t->row_preds, rc->row_preds, sizeof(rc->row_preds) ); + x264_t *t = h->thread[i]; + if( t != h ) + memcpy( t->rc->row_preds, rc->row_preds, sizeof(rc->row_preds) ); } for( int i = 0; i < h->param.i_threads; i++ ) { x264_t *t = h->thread[i]; - memcpy( t->rc, rc, offsetof(x264_ratecontrol_t, row_pred) ); - t->rc->row_pred = &t->rc->row_preds[h->sh.i_type]; + if( t != h ) + memcpy( t->rc, rc, offsetof(x264_ratecontrol_t, row_pred) ); + t->rc->row_pred = t->rc->row_preds[h->sh.i_type]; /* Calculate the planned slice size. */ if( rc->b_vbv && rc->frame_size_planned ) { int size = 0; for( row = t->i_threadslice_start; row < t->i_threadslice_end; row++ ) size += h->fdec->i_row_satd[row]; - t->rc->slice_size_planned = predict_size( &rc->pred[h->sh.i_type + (i+1)*5], rc->qpm, size ); + t->rc->slice_size_planned = predict_size( &rc->pred[h->sh.i_type + (i+1)*5], qscale, size ); } else t->rc->slice_size_planned = 0; @@ -2324,7 +2681,7 @@ void x264_threads_distribute_ratecontrol( x264_t *h ) for( int i = 0; i < h->param.i_threads; i++ ) { x264_t *t = h->thread[i]; - float max_frame_error = X264_MAX( 0.05, 1.0 / (t->i_threadslice_end - t->i_threadslice_start) ); + float max_frame_error = x264_clip3f( 1.0 / (t->i_threadslice_end - t->i_threadslice_start), 0.05, 0.25 ); t->rc->slice_size_planned += 2 * max_frame_error * rc->frame_size_planned; } x264_threads_normalize_predictors( h ); @@ -2378,15 +2735,16 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next ) COPY(short_term_cplxcount); COPY(bframes); COPY(prev_zone); - COPY(qpbuf_pos); + COPY(mbtree.qpbuf_pos); /* these vars can be updated by x264_ratecontrol_init_reconfigurable */ - COPY(buffer_rate); + COPY(bitrate); COPY(buffer_size); + COPY(buffer_rate); + COPY(vbv_max_rate); COPY(single_frame_vbv); COPY(cbr_decay); - COPY(b_vbv_min_rate); COPY(rate_factor_constant); - COPY(bitrate); + COPY(rate_factor_max_increment); #undef COPY } if( cur != next ) @@ -2416,7 +2774,7 @@ static int find_underflow( x264_t *h, double *fills, int *t0, int *t1, int over * we're adding or removing bits), and starting on the earliest frame that * can influence the buffer fill of that end frame. */ x264_ratecontrol_t *rcc = h->rc; - const double buffer_min = (over ? .1 : .1) * rcc->buffer_size; + const double buffer_min = .1 * rcc->buffer_size; const double buffer_max = .9 * rcc->buffer_size; double fill = fills[*t0-1]; double parity = over ? 1. : -1.; @@ -2541,10 +2899,11 @@ static int init_pass2( x264_t *h ) { x264_ratecontrol_t *rcc = h->rc; uint64_t all_const_bits = 0; + double timescale = (double)h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale; double duration = 0; for( int i = 0; i < rcc->num_entries; i++ ) duration += rcc->entry[i].i_duration; - duration *= (double)h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale; + duration *= timescale; uint64_t all_available_bits = h->param.rc.i_bitrate * 1000. * duration; double rate_factor, step_mult; double qblur = h->param.rc.f_qblur; @@ -2583,21 +2942,23 @@ static int init_pass2( x264_t *h ) for( int j = 1; j < cplxblur*2 && j < rcc->num_entries-i; j++ ) { ratecontrol_entry_t *rcj = &rcc->entry[i+j]; + double frame_duration = CLIP_DURATION(rcj->i_duration * timescale) / BASE_FRAME_DURATION; weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 ); if( weight < .0001 ) break; gaussian_weight = weight * exp( -j*j/200.0 ); weight_sum += gaussian_weight; - cplx_sum += gaussian_weight * (qscale2bits(rcj, 1) - rcj->misc_bits); + cplx_sum += gaussian_weight * (qscale2bits( rcj, 1 ) - rcj->misc_bits) / frame_duration; } /* weighted average of cplx of past frames */ weight = 1.0; for( int j = 0; j <= cplxblur*2 && j <= i; j++ ) { ratecontrol_entry_t *rcj = &rcc->entry[i-j]; + double frame_duration = CLIP_DURATION(rcj->i_duration * timescale) / BASE_FRAME_DURATION; gaussian_weight = weight * exp( -j*j/200.0 ); weight_sum += gaussian_weight; - cplx_sum += gaussian_weight * (qscale2bits( rcj, 1 ) - rcj->misc_bits); + cplx_sum += gaussian_weight * (qscale2bits( rcj, 1 ) - rcj->misc_bits) / frame_duration; weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 ); if( weight < .0001 ) break; @@ -2644,14 +3005,14 @@ static int init_pass2( x264_t *h ) /* find qscale */ for( int i = 0; i < rcc->num_entries; i++ ) { - qscale[i] = get_qscale( h, &rcc->entry[i], rate_factor, i ); + qscale[i] = get_qscale( h, &rcc->entry[i], rate_factor, -1 ); rcc->last_qscale_for[rcc->entry[i].pict_type] = qscale[i]; } /* fixed I/B qscale relative to P */ for( int i = rcc->num_entries-1; i >= 0; i-- ) { - qscale[i] = get_diff_limited_q( h, &rcc->entry[i], qscale[i] ); + qscale[i] = get_diff_limited_q( h, &rcc->entry[i], qscale[i], i ); assert(qscale[i] >= 0); }