/*****************************************************************************
* ratecontrol.c: ratecontrol
*****************************************************************************
- * Copyright (C) 2005-2011 x264 project
+ * Copyright (C) 2005-2012 x264 project
*
* Authors: Loren Merritt <lorenm@u.washington.edu>
* Michael Niedermayer <michaelni@gmx.at>
typedef struct
{
+ float coeff_min;
float coeff;
float count;
float decay;
int qp; /* qp for current frame */
float qpm; /* qp for current macroblock: precise float for AQ */
float qpa_rc; /* average of macroblocks' qp before aq */
+ float qpa_rc_prev;
int qpa_aq; /* average of macroblocks' qp after aq */
+ int qpa_aq_prev;
float qp_novbv; /* QP for the current frame if 1-pass VBV was disabled. */
/* VBV stuff */
double lmin[3]; /* min qscale by frame type */
double lmax[3];
double lstep; /* max change (multiply) in qscale per frame */
- uint16_t *qp_buffer[2]; /* Global buffers for converting MB-tree quantizer data. */
- int qpbuf_pos; /* In order to handle pyramid reordering, QP buffer acts as a stack.
+ struct
+ {
+ uint16_t *qp_buffer[2]; /* Global buffers for converting MB-tree quantizer data. */
+ int qpbuf_pos; /* In order to handle pyramid reordering, QP buffer acts as a stack.
* This value is the current position (0 or 1). */
+ int src_mb_count;
+
+ /* For rescaling */
+ int rescale_enabled;
+ float *scale_buffer[2]; /* Intermediate buffers */
+ int filtersize[2]; /* filter size (H/V) */
+ float *coeffs[2];
+ int *pos[2];
+ int srcdim[2]; /* Source dimensions (W/H) */
+ } mbtree;
/* MBRC stuff */
float frame_size_estimated; /* Access to this variable must be atomic: double is
}
}
+static int x264_macroblock_tree_rescale_init( x264_t *h, x264_ratecontrol_t *rc )
+{
+ /* Use fractional QP array dimensions to compensate for edge padding */
+ float srcdim[2] = {rc->mbtree.srcdim[0] / 16.f, rc->mbtree.srcdim[1] / 16.f};
+ float dstdim[2] = { h->param.i_width / 16.f, h->param.i_height / 16.f};
+ int srcdimi[2] = {ceil(srcdim[0]), ceil(srcdim[1])};
+ int dstdimi[2] = {ceil(dstdim[0]), ceil(dstdim[1])};
+ if( PARAM_INTERLACED )
+ {
+ srcdimi[1] = (srcdimi[1]+1)&~1;
+ dstdimi[1] = (dstdimi[1]+1)&~1;
+ }
+
+ rc->mbtree.src_mb_count = srcdimi[0] * srcdimi[1];
+
+ CHECKED_MALLOC( rc->mbtree.qp_buffer[0], rc->mbtree.src_mb_count * sizeof(uint16_t) );
+ if( h->param.i_bframe_pyramid && h->param.rc.b_stat_read )
+ CHECKED_MALLOC( rc->mbtree.qp_buffer[1], rc->mbtree.src_mb_count * sizeof(uint16_t) );
+ rc->mbtree.qpbuf_pos = -1;
+
+ /* No rescaling to do */
+ if( srcdimi[0] == dstdimi[0] && srcdimi[1] == dstdimi[1] )
+ return 0;
+
+ rc->mbtree.rescale_enabled = 1;
+
+ /* Allocate intermediate scaling buffers */
+ CHECKED_MALLOC( rc->mbtree.scale_buffer[0], srcdimi[0] * srcdimi[1] * sizeof(float) );
+ CHECKED_MALLOC( rc->mbtree.scale_buffer[1], dstdimi[0] * srcdimi[1] * sizeof(float) );
+
+ /* Allocate and calculate resize filter parameters and coefficients */
+ for( int i = 0; i < 2; i++ )
+ {
+ if( srcdim[i] > dstdim[i] ) // downscale
+ rc->mbtree.filtersize[i] = 1 + (2 * srcdimi[i] + dstdimi[i] - 1) / dstdimi[i];
+ else // upscale
+ rc->mbtree.filtersize[i] = 3;
+
+ CHECKED_MALLOC( rc->mbtree.coeffs[i], rc->mbtree.filtersize[i] * dstdimi[i] * sizeof(float) );
+ CHECKED_MALLOC( rc->mbtree.pos[i], dstdimi[i] * sizeof(int) );
+
+ /* Initialize filter coefficients */
+ float inc = srcdim[i] / dstdim[i];
+ float dmul = inc > 1.f ? dstdim[i] / srcdim[i] : 1.f;
+ float dstinsrc = 0.5f * inc - 0.5f;
+ int filtersize = rc->mbtree.filtersize[i];
+ for( int j = 0; j < dstdimi[i]; j++ )
+ {
+ int pos = dstinsrc - (filtersize - 2.f) * 0.5f;
+ float sum = 0.0;
+ rc->mbtree.pos[i][j] = pos;
+ for( int k = 0; k < filtersize; k++ )
+ {
+ float d = fabs( pos + k - dstinsrc ) * dmul;
+ float coeff = X264_MAX( 1.f - d, 0 );
+ rc->mbtree.coeffs[i][j * filtersize + k] = coeff;
+ sum += coeff;
+ }
+ sum = 1.0f / sum;
+ for( int k = 0; k < filtersize; k++ )
+ rc->mbtree.coeffs[i][j * filtersize + k] *= sum;
+ dstinsrc += inc;
+ }
+ }
+
+ /* Write back actual qp array dimensions */
+ rc->mbtree.srcdim[0] = srcdimi[0];
+ rc->mbtree.srcdim[1] = srcdimi[1];
+ return 0;
+fail:
+ return -1;
+}
+
+static void x264_macroblock_tree_rescale_destroy( x264_ratecontrol_t *rc )
+{
+ for( int i = 0; i < 2; i++ )
+ {
+ x264_free( rc->mbtree.qp_buffer[i] );
+ x264_free( rc->mbtree.scale_buffer[i] );
+ x264_free( rc->mbtree.coeffs[i] );
+ x264_free( rc->mbtree.pos[i] );
+ }
+}
+
+static ALWAYS_INLINE float tapfilter( float *src, int pos, int max, int stride, float *coeff, int filtersize )
+{
+ float sum = 0.f;
+ for( int i = 0; i < filtersize; i++, pos++ )
+ sum += src[x264_clip3( pos, 0, max-1 )*stride] * coeff[i];
+ return sum;
+}
+
+static void x264_macroblock_tree_rescale( x264_t *h, x264_ratecontrol_t *rc, float *dst )
+{
+ float *input, *output;
+ int filtersize, stride, height;
+
+ /* H scale first */
+ input = rc->mbtree.scale_buffer[0];
+ output = rc->mbtree.scale_buffer[1];
+ filtersize = rc->mbtree.filtersize[0];
+ stride = rc->mbtree.srcdim[0];
+ height = rc->mbtree.srcdim[1];
+ for( int y = 0; y < height; y++, input += stride, output += h->mb.i_mb_width )
+ {
+ float *coeff = rc->mbtree.coeffs[0];
+ for( int x = 0; x < h->mb.i_mb_width; x++, coeff+=filtersize )
+ output[x] = tapfilter( input, rc->mbtree.pos[0][x], stride, 1, coeff, filtersize );
+ }
+
+ /* V scale next */
+ input = rc->mbtree.scale_buffer[1];
+ output = dst;
+ filtersize = rc->mbtree.filtersize[1];
+ stride = h->mb.i_mb_width;
+ height = rc->mbtree.srcdim[1];
+ for( int x = 0; x < h->mb.i_mb_width; x++, input++, output++ )
+ {
+ float *coeff = rc->mbtree.coeffs[1];
+ for( int y = 0; y < h->mb.i_mb_height; y++, coeff+=filtersize )
+ output[y*stride] = tapfilter( input, rc->mbtree.pos[1][y], height, stride, coeff, filtersize );
+ }
+}
+
int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame, float *quant_offsets )
{
x264_ratecontrol_t *rc = h->rc;
if( rc->entry[frame->i_frame].kept_as_ref )
{
uint8_t i_type;
- if( rc->qpbuf_pos < 0 )
+ if( rc->mbtree.qpbuf_pos < 0 )
{
do
{
- rc->qpbuf_pos++;
+ rc->mbtree.qpbuf_pos++;
if( !fread( &i_type, 1, 1, rc->p_mbtree_stat_file_in ) )
goto fail;
- if( fread( rc->qp_buffer[rc->qpbuf_pos], sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_in ) != h->mb.i_mb_count )
+ if( fread( rc->mbtree.qp_buffer[rc->mbtree.qpbuf_pos], sizeof(uint16_t), rc->mbtree.src_mb_count, rc->p_mbtree_stat_file_in ) != rc->mbtree.src_mb_count )
goto fail;
- if( i_type != i_type_actual && rc->qpbuf_pos == 1 )
+ if( i_type != i_type_actual && rc->mbtree.qpbuf_pos == 1 )
{
x264_log( h, X264_LOG_ERROR, "MB-tree frametype %d doesn't match actual frametype %d.\n", i_type, i_type_actual );
return -1;
} while( i_type != i_type_actual );
}
- for( int i = 0; i < h->mb.i_mb_count; i++ )
+ float *dst = rc->mbtree.rescale_enabled ? rc->mbtree.scale_buffer[0] : frame->f_qp_offset;
+ for( int i = 0; i < rc->mbtree.src_mb_count; i++ )
{
- frame->f_qp_offset[i] = ((float)(int16_t)endian_fix16( rc->qp_buffer[rc->qpbuf_pos][i] )) * (1/256.0);
- if( h->frames.b_have_lowres )
- frame->i_inv_qscale_factor[i] = x264_exp2fix8(frame->f_qp_offset[i]);
+ int16_t qp_fix8 = endian_fix16( rc->mbtree.qp_buffer[rc->mbtree.qpbuf_pos][i] );
+ dst[i] = qp_fix8 * (1.f/256.f);
}
- rc->qpbuf_pos--;
+ if( rc->mbtree.rescale_enabled )
+ x264_macroblock_tree_rescale( h, rc, frame->f_qp_offset );
+ if( h->frames.b_have_lowres )
+ for( int i = 0; i < h->mb.i_mb_count; i++ )
+ frame->i_inv_qscale_factor[i] = x264_exp2fix8( frame->f_qp_offset[i] );
+ rc->mbtree.qpbuf_pos--;
}
else
x264_stack_align( x264_adaptive_quant_frame, h, frame, quant_offsets );
rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max );
for( int j = 0; j < num_preds; j++ )
{
- rc->pred[i+j*5].coeff= 2.0;
- rc->pred[i+j*5].count= 1.0;
- rc->pred[i+j*5].decay= 0.5;
- rc->pred[i+j*5].offset= 0.0;
+ rc->pred[i+j*5].coeff_min = 2.0 / 4;
+ rc->pred[i+j*5].coeff = 2.0;
+ rc->pred[i+j*5].count = 1.0;
+ rc->pred[i+j*5].decay = 0.5;
+ rc->pred[i+j*5].offset = 0.0;
}
for( int j = 0; j < 2; j++ )
{
- rc->row_preds[i][j].coeff= .25;
- rc->row_preds[i][j].count= 1.0;
- rc->row_preds[i][j].decay= 0.5;
- rc->row_preds[i][j].offset= 0.0;
+ rc->row_preds[i][j].coeff_min = .25 / 4;
+ rc->row_preds[i][j].coeff = .25;
+ rc->row_preds[i][j].count = 1.0;
+ rc->row_preds[i][j].decay = 0.5;
+ rc->row_preds[i][j].offset = 0.0;
}
}
*rc->pred_b_from_p = rc->pred[0];
x264_log( h, X264_LOG_ERROR, "resolution specified in stats file not valid\n" );
return -1;
}
- else if( h->param.rc.b_mb_tree && (i != h->param.i_width || j != h->param.i_height) )
+ else if( h->param.rc.b_mb_tree )
{
- x264_log( h, X264_LOG_ERROR, "MB-tree doesn't support different resolution than 1st pass (%dx%d vs %dx%d)\n",
- h->param.i_width, h->param.i_height, i, j );
- return -1;
+ rc->mbtree.srcdim[0] = i;
+ rc->mbtree.srcdim[1] = j;
}
res_factor = (float)h->param.i_width * h->param.i_height / (i*j);
/* Change in bits relative to resolution isn't quite linear on typical sources,
if( h->param.rc.b_mb_tree && (h->param.rc.b_stat_read || h->param.rc.b_stat_write) )
{
- CHECKED_MALLOC( rc->qp_buffer[0], h->mb.i_mb_count * sizeof(uint16_t) );
- if( h->param.i_bframe_pyramid && h->param.rc.b_stat_read )
- CHECKED_MALLOC( rc->qp_buffer[1], h->mb.i_mb_count * sizeof(uint16_t) );
- rc->qpbuf_pos = -1;
+ if( !h->param.rc.b_stat_read )
+ {
+ rc->mbtree.srcdim[0] = h->param.i_width;
+ rc->mbtree.srcdim[1] = h->param.i_height;
+ }
+ if( x264_macroblock_tree_rescale_init( h, rc ) < 0 )
+ return -1;
}
for( int i = 0; i<h->param.i_threads; i++ )
x264_free( rc->pred );
x264_free( rc->pred_b_from_p );
x264_free( rc->entry );
- x264_free( rc->qp_buffer[0] );
- x264_free( rc->qp_buffer[1] );
+ x264_macroblock_tree_rescale_destroy( rc );
if( rc->zones )
{
x264_free( rc->zones[0].param );
q = x264_clip3f( q, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
- rc->qpa_rc =
- rc->qpa_aq = 0;
+ rc->qpa_rc = rc->qpa_rc_prev =
+ rc->qpa_aq = rc->qpa_aq_prev = 0;
rc->qp = x264_clip3( q + 0.5f, 0, QP_MAX );
h->fdec->f_qp_avg_rc =
h->fdec->f_qp_avg_aq =
* eliminate all use of qp in row ratecontrol: make it entirely qscale-based.
* make this function stop being needlessly O(N^2)
* update more often than once per row? */
-void x264_ratecontrol_mb( x264_t *h, int bits )
+int x264_ratecontrol_mb( x264_t *h, int bits )
{
x264_ratecontrol_t *rc = h->rc;
const int y = h->mb.i_mb_y;
rc->qpa_aq += h->mb.i_qp;
if( h->mb.i_mb_x != h->mb.i_mb_width - 1 )
- return;
+ return 0;
x264_emms();
rc->qpa_rc += rc->qpm * h->mb.i_mb_width;
if( !rc->b_vbv )
- return;
+ return 0;
float qscale = qp2qscale( rc->qpm );
h->fdec->f_row_qp[y] = rc->qpm;
/* update ratecontrol per-mbpair in MBAFF */
if( SLICE_MBAFF && !(y&1) )
- return;
+ return 0;
+
+ /* FIXME: We don't currently support the case where there's a slice
+ * boundary in between. */
+ int can_reencode_row = h->sh.i_first_mb <= ((h->mb.i_mb_y - SLICE_MBAFF) * h->mb.i_mb_stride);
/* tweak quality based on difference from predicted size */
+ float prev_row_qp = h->fdec->f_row_qp[y];
+ float qp_absolute_max = h->param.rc.i_qp_max;
+ if( rc->rate_factor_max_increment )
+ qp_absolute_max = X264_MIN( qp_absolute_max, rc->qp_novbv + rc->rate_factor_max_increment );
+ float qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, qp_absolute_max );
+ float qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min );
+ float step_size = 0.5f;
+ float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
+ float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned;
+ float max_frame_error = X264_MAX( 0.05f, 1.0f / h->mb.i_mb_height );
+ float size_of_other_slices = 0;
+ if( h->param.b_sliced_threads )
+ {
+ float size_of_other_slices_planned = 0;
+ for( int i = 0; i < h->param.i_threads; i++ )
+ if( h != h->thread[i] )
+ {
+ size_of_other_slices += h->thread[i]->rc->frame_size_estimated;
+ size_of_other_slices_planned += h->thread[i]->rc->slice_size_planned;
+ }
+ float weight = rc->slice_size_planned / rc->frame_size_planned;
+ size_of_other_slices = (size_of_other_slices - size_of_other_slices_planned) * weight + size_of_other_slices_planned;
+ }
if( y < h->i_threadslice_end-1 )
{
- float prev_row_qp = h->fdec->f_row_qp[y];
- float qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min );
- float qp_absolute_max = h->param.rc.i_qp_max;
- if( rc->rate_factor_max_increment )
- qp_absolute_max = X264_MIN( qp_absolute_max, rc->qp_novbv + rc->rate_factor_max_increment );
- float qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, qp_absolute_max );
- float step_size = 0.5f;
-
/* B-frames shouldn't use lower QP than their reference frames. */
if( h->sh.i_type == SLICE_TYPE_B )
{
rc->qpm = X264_MAX( rc->qpm, qp_min );
}
- float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
- float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned;
- float max_frame_error = X264_MAX( 0.05f, 1.0f / h->mb.i_mb_height );
- float size_of_other_slices = 0;
- if( h->param.b_sliced_threads )
- {
- float size_of_other_slices_planned = 0;
- for( int i = 0; i < h->param.i_threads; i++ )
- if( h != h->thread[i] )
- {
- size_of_other_slices += h->thread[i]->rc->frame_size_estimated;
- size_of_other_slices_planned += h->thread[i]->rc->slice_size_planned;
- }
- float weight = rc->slice_size_planned / rc->frame_size_planned;
- size_of_other_slices = (size_of_other_slices - size_of_other_slices_planned) * weight + size_of_other_slices_planned;
- }
-
/* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */
float rc_tol = buffer_left_planned / h->param.i_threads * rc->rate_tolerance;
float b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
- /* Don't modify the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */
+ /* Don't increase the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */
/* area at the top of the frame was measured inaccurately. */
if( row_bits_so_far( h, y ) < 0.05f * slice_size_planned )
- return;
+ qp_max = qp_absolute_max = prev_row_qp;
if( h->sh.i_type != SLICE_TYPE_I )
rc_tol *= 0.5f;
}
h->rc->frame_size_estimated = b1 - size_of_other_slices;
+
+ /* If the current row was large enough to cause a large QP jump, try re-encoding it. */
+ if( rc->qpm > qp_max && prev_row_qp < qp_max && can_reencode_row )
+ {
+ /* Bump QP to halfway in between... close enough. */
+ rc->qpm = x264_clip3f( (prev_row_qp + rc->qpm)*0.5f, prev_row_qp + 1.0f, qp_max );
+ rc->qpa_rc = rc->qpa_rc_prev;
+ rc->qpa_aq = rc->qpa_aq_prev;
+ h->fdec->i_row_bits[y] = h->fdec->i_row_bits[y-SLICE_MBAFF] = 0;
+ return -1;
+ }
}
else
+ {
h->rc->frame_size_estimated = predict_row_size_sum( h, y, rc->qpm );
+
+ /* Last-ditch attempt: if the last row of the frame underflowed the VBV,
+ * try again. */
+ if( (h->rc->frame_size_estimated + size_of_other_slices) > (rc->buffer_fill - rc->buffer_rate * max_frame_error) &&
+ rc->qpm < qp_max && can_reencode_row )
+ {
+ rc->qpm = qp_max;
+ rc->qpa_rc = rc->qpa_rc_prev;
+ rc->qpa_aq = rc->qpa_aq_prev;
+ h->fdec->i_row_bits[y] = h->fdec->i_row_bits[y-SLICE_MBAFF] = 0;
+ return -1;
+ }
+ }
+
+ rc->qpa_rc_prev = rc->qpa_rc;
+ rc->qpa_aq_prev = rc->qpa_aq;
+
+ return 0;
}
int x264_ratecontrol_qp( x264_t *h )
h->fdec->f_qp_avg_rc = rc->qpa_rc /= h->mb.i_mb_count;
h->fdec->f_qp_avg_aq = (float)rc->qpa_aq / h->mb.i_mb_count;
+ h->fdec->f_crf_avg = h->param.rc.f_rf_constant + h->fdec->f_qp_avg_rc - rc->qp_novbv;
if( h->param.rc.b_stat_write )
{
uint8_t i_type = h->sh.i_type;
/* Values are stored as big-endian FIX8.8 */
for( int i = 0; i < h->mb.i_mb_count; i++ )
- rc->qp_buffer[0][i] = endian_fix16( h->fenc->f_qp_offset[i]*256.0 );
+ rc->mbtree.qp_buffer[0][i] = endian_fix16( h->fenc->f_qp_offset[i]*256.0 );
if( fwrite( &i_type, 1, 1, rc->p_mbtree_stat_file_out ) < 1 )
goto fail;
- if( fwrite( rc->qp_buffer[0], sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_out ) < h->mb.i_mb_count )
+ if( fwrite( rc->mbtree.qp_buffer[0], sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_out ) < h->mb.i_mb_count )
goto fail;
}
}
if( var < 10 )
return;
float old_coeff = p->coeff / p->count;
- float new_coeff = bits*q / var;
+ float new_coeff = X264_MAX( bits*q / var, p->coeff_min );
float new_coeff_clipped = x264_clip3f( new_coeff, old_coeff/range, old_coeff*range );
float new_offset = bits*q - new_coeff_clipped * var;
if( new_offset >= 0 )
double w = x264_clip3f( cur_time*100, 0.0, 1.0 );
q *= pow( (double)total_bits / rcc->expected_bits_sum, w );
}
+ rcc->qp_novbv = qscale2qp( q );
if( rcc->b_vbv )
{
/* Do not overflow vbv */
}
}
-void x264_threads_normalize_predictors( x264_t *h )
+static void x264_threads_normalize_predictors( x264_t *h )
{
double totalsize = 0;
for( int i = 0; i < h->param.i_threads; i++ )
{
int row;
x264_ratecontrol_t *rc = h->rc;
+ x264_emms();
+ float qscale = qp2qscale( rc->qpm );
/* Initialize row predictors */
if( h->i_frame == 0 )
int size = 0;
for( row = t->i_threadslice_start; row < t->i_threadslice_end; row++ )
size += h->fdec->i_row_satd[row];
- t->rc->slice_size_planned = predict_size( &rc->pred[h->sh.i_type + (i+1)*5], rc->qpm, size );
+ t->rc->slice_size_planned = predict_size( &rc->pred[h->sh.i_type + (i+1)*5], qscale, size );
}
else
t->rc->slice_size_planned = 0;
COPY(short_term_cplxcount);
COPY(bframes);
COPY(prev_zone);
- COPY(qpbuf_pos);
+ COPY(mbtree.qpbuf_pos);
/* these vars can be updated by x264_ratecontrol_init_reconfigurable */
COPY(bitrate);
COPY(buffer_size);