1 /***************************************************-*- coding: iso-8859-1 -*-
2 * ratecontrol.c: h264 encoder library (Rate Control)
3 *****************************************************************************
4 * Copyright (C) 2005-2008 x264 project
6 * Authors: Loren Merritt <lorenm@u.washington.edu>
7 * Michael Niedermayer <michaelni@gmx.at>
8 * Gabriel Bouvigne <gabriel.bouvigne@joost.com>
9 * Fiona Glaser <fiona@x264.com>
10 * Måns Rullgård <mru@mru.ath.cx>
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
25 *****************************************************************************/
27 #define _ISOC99_SOURCE
28 #undef NDEBUG // always check asserts, the speed effect is far too small to disable them
33 #include "common/common.h"
34 #include "common/cpu.h"
35 #include "ratecontrol.h"
45 uint64_t expected_bits; /*total expected bits up to the current frame (current one excluded)*/
52 float blurred_complexity;
54 } ratecontrol_entry_t;
63 struct x264_ratecontrol_t
72 double rate_tolerance;
73 int nmb; /* number of macroblocks in a frame */
77 ratecontrol_entry_t *rce;
78 int qp; /* qp for current frame */
79 int qpm; /* qp for current macroblock */
80 float f_qpm; /* qp for current macroblock: precise float for AQ */
81 float qpa_rc; /* average of macroblocks' qp before aq */
82 float qpa_aq; /* average of macroblocks' qp after aq */
87 double buffer_fill_final; /* real buffer as of the last finished frame */
88 double buffer_fill; /* planned buffer, if all in-progress frames hit their bit budget */
89 double buffer_rate; /* # of bits added to buffer_fill after each frame */
90 predictor_t *pred; /* predict frame size from satd */
95 double cplxr_sum; /* sum of bits*qscale/rceq */
96 double expected_bits_sum; /* sum of qscale2bits after rceq, ratefactor, and overflow, only includes finished frames */
97 double wanted_bits_window; /* target bitrate * window */
99 double short_term_cplxsum;
100 double short_term_cplxcount;
101 double rate_factor_constant;
106 FILE *p_stat_file_out;
107 char *psz_stat_file_tmpname;
109 int num_entries; /* number of ratecontrol_entry_ts */
110 ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
112 double last_qscale_for[5]; /* last qscale for a specific pict type, used for max_diff & ipb factor stuff */
113 int last_non_b_pict_type;
114 double accum_p_qp; /* for determining I-frame quant */
116 double last_accum_p_norm;
117 double lmin[5]; /* min qscale by frame type */
119 double lstep; /* max change (multiply) in qscale per frame */
122 double frame_size_estimated;
123 double frame_size_planned;
124 predictor_t *row_pred;
125 predictor_t row_preds[5];
126 predictor_t *pred_b_from_p; /* predict B-frame size from P-frame satd */
127 int bframes; /* # consecutive B-frames before this P-frame */
128 int bframe_bits; /* total cost of those frames */
132 x264_zone_t *prev_zone;
136 static int parse_zones( x264_t *h );
137 static int init_pass2(x264_t *);
138 static float rate_estimate_qscale( x264_t *h );
139 static void update_vbv( x264_t *h, int bits );
140 static void update_vbv_plan( x264_t *h );
141 static double predict_size( predictor_t *p, double q, double var );
142 static void update_predictor( predictor_t *p, double q, double var, double bits );
145 * qp = h.264's quantizer
146 * qscale = linearized quantizer = Lagrange multiplier
148 static inline double qp2qscale(double qp)
150 return 0.85 * pow(2.0, ( qp - 12.0 ) / 6.0);
152 static inline double qscale2qp(double qscale)
154 return 12.0 + 6.0 * log(qscale/0.85) / log(2.0);
157 /* Texture bitrate is not quite inversely proportional to qscale,
158 * probably due the the changing number of SKIP blocks.
159 * MV bits level off at about qp<=12, because the lambda used
160 * for motion estimation is constant there. */
161 static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale)
165 return (rce->tex_bits + .1) * pow( rce->qscale / qscale, 1.1 )
166 + rce->mv_bits * pow( X264_MAX(rce->qscale, 1) / X264_MAX(qscale, 1), 0.5 )
170 // Find the total AC energy of the block in all planes.
171 static NOINLINE int ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame )
173 /* This function contains annoying hacks because GCC has a habit of reordering emms
174 * and putting it after floating point ops. As a result, we put the emms at the end of the
175 * function and make sure that its always called before the float math. Noinline makes
176 * sure no reordering goes on. */
177 unsigned int var = 0, i;
178 for( i = 0; i < 3; i++ )
181 int stride = frame->i_stride[i];
182 int offset = h->mb.b_interlaced
183 ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride
184 : w * (mb_x + mb_y * stride);
185 int pix = i ? PIXEL_8x8 : PIXEL_16x16;
186 stride <<= h->mb.b_interlaced;
187 var += h->pixf.var[pix]( frame->plane[i]+offset, stride );
189 var = X264_MAX(var,1);
194 static const float log2_lut[128] = {
195 0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
196 0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
197 0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
198 0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
199 0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
200 0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
201 0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
202 0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
203 0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
204 0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
205 0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
206 0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
207 0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
208 0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
209 0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
210 0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
213 static const uint8_t exp2_lut[64] = {
214 1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 44, 47,
215 50, 53, 57, 60, 64, 67, 71, 74, 78, 81, 85, 89, 93, 96, 100, 104,
216 108, 112, 116, 120, 124, 128, 132, 137, 141, 145, 150, 154, 159, 163, 168, 172,
217 177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
220 static int x264_exp2fix8( float x )
224 if( x <= 0 ) return 0;
225 if( x >= 16 ) return 0xffff;
228 return (exp2_lut[f]+256) << i >> 8;
231 void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
233 /* constants chosen to result in approximately the same overall bitrate as without AQ.
234 * FIXME: while they're written in 5 significant digits, they're only tuned to 2. */
235 float strength = h->param.rc.f_aq_strength * 1.0397;
237 for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
238 for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
240 uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame );
241 int lz = x264_clz( energy );
242 float qp_adj = strength * (log2_lut[(energy<<lz>>24)&0x7f] - lz + 16.573f);
243 frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
244 if( h->frames.b_have_lowres )
245 frame->i_inv_qscale_factor[mb_x + mb_y*h->mb.i_mb_stride] = x264_exp2fix8(qp_adj*(-1.f/6.f));
250 /*****************************************************************************
251 * x264_adaptive_quant:
252 * adjust macroblock QP based on variance (AC energy) of the MB.
253 * high variance = higher QP
254 * low variance = lower QP
255 * This generally increases SSIM and lowers PSNR.
256 *****************************************************************************/
257 void x264_adaptive_quant( x264_t *h )
260 h->mb.i_qp = x264_clip3( h->rc->f_qpm + h->fenc->f_qp_offset[h->mb.i_mb_xy] + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
261 /* If the QP of this MB is within 1 of the previous MB, code the same QP as the previous MB,
262 * to lower the bit cost of the qp_delta. */
263 if( abs(h->mb.i_qp - h->mb.i_last_qp) == 1 )
264 h->mb.i_qp = h->mb.i_last_qp;
267 int x264_ratecontrol_new( x264_t *h )
269 x264_ratecontrol_t *rc;
274 rc = h->rc = x264_malloc( h->param.i_threads * sizeof(x264_ratecontrol_t) );
275 memset( rc, 0, h->param.i_threads * sizeof(x264_ratecontrol_t) );
277 rc->b_abr = h->param.rc.i_rc_method != X264_RC_CQP && !h->param.rc.b_stat_read;
278 rc->b_2pass = h->param.rc.i_rc_method == X264_RC_ABR && h->param.rc.b_stat_read;
280 /* FIXME: use integers */
281 if(h->param.i_fps_num > 0 && h->param.i_fps_den > 0)
282 rc->fps = (float) h->param.i_fps_num / h->param.i_fps_den;
286 rc->bitrate = h->param.rc.i_bitrate * 1000.;
287 rc->rate_tolerance = h->param.rc.f_rate_tolerance;
288 rc->nmb = h->mb.i_mb_count;
289 rc->last_non_b_pict_type = -1;
292 if( h->param.rc.i_rc_method == X264_RC_CRF && h->param.rc.b_stat_read )
294 x264_log(h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n");
297 if( h->param.rc.i_vbv_buffer_size )
299 if( h->param.rc.i_rc_method == X264_RC_CQP )
301 x264_log(h, X264_LOG_WARNING, "VBV is incompatible with constant QP, ignored.\n");
302 h->param.rc.i_vbv_max_bitrate = 0;
303 h->param.rc.i_vbv_buffer_size = 0;
305 else if( h->param.rc.i_vbv_max_bitrate == 0 )
307 x264_log( h, X264_LOG_DEBUG, "VBV maxrate unspecified, assuming CBR\n" );
308 h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
311 if( h->param.rc.i_vbv_max_bitrate < h->param.rc.i_bitrate &&
312 h->param.rc.i_vbv_max_bitrate > 0)
313 x264_log(h, X264_LOG_WARNING, "max bitrate less than average bitrate, ignored.\n");
314 else if( h->param.rc.i_vbv_max_bitrate > 0 &&
315 h->param.rc.i_vbv_buffer_size > 0 )
317 if( h->param.rc.i_vbv_buffer_size < 3 * h->param.rc.i_vbv_max_bitrate / rc->fps )
319 h->param.rc.i_vbv_buffer_size = 3 * h->param.rc.i_vbv_max_bitrate / rc->fps;
320 x264_log( h, X264_LOG_WARNING, "VBV buffer size too small, using %d kbit\n",
321 h->param.rc.i_vbv_buffer_size );
323 if( h->param.rc.f_vbv_buffer_init > 1. )
324 h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init / h->param.rc.i_vbv_buffer_size, 0, 1 );
325 rc->buffer_rate = h->param.rc.i_vbv_max_bitrate * 1000. / rc->fps;
326 rc->buffer_size = h->param.rc.i_vbv_buffer_size * 1000.;
327 rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
328 rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size
329 * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate);
331 rc->b_vbv_min_rate = !rc->b_2pass
332 && h->param.rc.i_rc_method == X264_RC_ABR
333 && h->param.rc.i_vbv_max_bitrate <= h->param.rc.i_bitrate;
335 else if( h->param.rc.i_vbv_max_bitrate )
337 x264_log(h, X264_LOG_WARNING, "VBV maxrate specified, but no bufsize.\n");
338 h->param.rc.i_vbv_max_bitrate = 0;
340 if(rc->rate_tolerance < 0.01)
342 x264_log(h, X264_LOG_WARNING, "bitrate tolerance too small, using .01\n");
343 rc->rate_tolerance = 0.01;
346 h->mb.b_variable_qp = rc->b_vbv || h->param.rc.i_aq_mode;
350 /* FIXME ABR_INIT_QP is actually used only in CRF */
351 #define ABR_INIT_QP ( h->param.rc.i_rc_method == X264_RC_CRF ? h->param.rc.f_rf_constant : 24 )
352 rc->accum_p_norm = .01;
353 rc->accum_p_qp = ABR_INIT_QP * rc->accum_p_norm;
354 /* estimated ratio that produces a reasonable QP for the first I-frame */
355 rc->cplxr_sum = .01 * pow( 7.0e5, h->param.rc.f_qcompress ) * pow( h->mb.i_mb_count, 0.5 );
356 rc->wanted_bits_window = 1.0 * rc->bitrate / rc->fps;
357 rc->last_non_b_pict_type = SLICE_TYPE_I;
360 if( h->param.rc.i_rc_method == X264_RC_CRF )
362 /* arbitrary rescaling to make CRF somewhat similar to QP */
363 double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
364 rc->rate_factor_constant = pow( base_cplx, 1 - h->param.rc.f_qcompress )
365 / qp2qscale( h->param.rc.f_rf_constant );
368 rc->ip_offset = 6.0 * log(h->param.rc.f_ip_factor) / log(2.0);
369 rc->pb_offset = 6.0 * log(h->param.rc.f_pb_factor) / log(2.0);
370 rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
371 rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant - rc->ip_offset + 0.5, 0, 51 );
372 rc->qp_constant[SLICE_TYPE_B] = x264_clip3( h->param.rc.i_qp_constant + rc->pb_offset + 0.5, 0, 51 );
374 rc->lstep = pow( 2, h->param.rc.i_qp_step / 6.0 );
375 rc->last_qscale = qp2qscale(26);
376 rc->pred = x264_malloc( 5*sizeof(predictor_t) );
377 rc->pred_b_from_p = x264_malloc( sizeof(predictor_t) );
378 for( i = 0; i < 5; i++ )
380 rc->last_qscale_for[i] = qp2qscale( ABR_INIT_QP );
381 rc->lmin[i] = qp2qscale( h->param.rc.i_qp_min );
382 rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max );
383 rc->pred[i].coeff= 2.0;
384 rc->pred[i].count= 1.0;
385 rc->pred[i].decay= 0.5;
386 rc->row_preds[i].coeff= .25;
387 rc->row_preds[i].count= 1.0;
388 rc->row_preds[i].decay= 0.5;
390 *rc->pred_b_from_p = rc->pred[0];
392 if( parse_zones( h ) < 0 )
394 x264_log( h, X264_LOG_ERROR, "failed to parse zones\n" );
398 /* Load stat file and init 2pass algo */
399 if( h->param.rc.b_stat_read )
401 char *p, *stats_in, *stats_buf;
403 /* read 1st pass stats */
404 assert( h->param.rc.psz_stat_in );
405 stats_buf = stats_in = x264_slurp_file( h->param.rc.psz_stat_in );
408 x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
412 /* check whether 1st pass options were compatible with current options */
413 if( !strncmp( stats_buf, "#options:", 9 ) )
416 char *opts = stats_buf;
417 stats_in = strchr( stats_buf, '\n' );
423 if( ( p = strstr( opts, "bframes=" ) ) && sscanf( p, "bframes=%d", &i )
424 && h->param.i_bframe != i )
426 x264_log( h, X264_LOG_ERROR, "different number of B-frames than 1st pass (%d vs %d)\n",
427 h->param.i_bframe, i );
431 /* since B-adapt doesn't (yet) take into account B-pyramid,
432 * the converse is not a problem */
433 if( strstr( opts, "b_pyramid=1" ) && !h->param.b_bframe_pyramid )
434 x264_log( h, X264_LOG_WARNING, "1st pass used B-pyramid, 2nd doesn't\n" );
436 if( ( p = strstr( opts, "keyint=" ) ) && sscanf( p, "keyint=%d", &i )
437 && h->param.i_keyint_max != i )
438 x264_log( h, X264_LOG_WARNING, "different keyint than 1st pass (%d vs %d)\n",
439 h->param.i_keyint_max, i );
441 if( strstr( opts, "qp=0" ) && h->param.rc.i_rc_method == X264_RC_ABR )
442 x264_log( h, X264_LOG_WARNING, "1st pass was lossless, bitrate prediction will be inaccurate\n" );
444 if( !strstr( opts, "direct=3" ) && h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO )
446 x264_log( h, X264_LOG_WARNING, "direct=auto not used on the first pass\n" );
447 h->mb.b_direct_auto_write = 1;
450 if( ( p = strstr( opts, "b_adapt=" ) ) && sscanf( p, "b_adapt=%d", &i ) && i >= X264_B_ADAPT_NONE && i <= X264_B_ADAPT_TRELLIS )
451 h->param.i_bframe_adaptive = i;
452 else if( h->param.i_bframe )
454 x264_log( h, X264_LOG_ERROR, "b_adapt method specified in stats file not valid\n" );
458 if( ( p = strstr( opts, "scenecut=" ) ) && sscanf( p, "scenecut=%d", &i ) && i >= -1 && i <= 100 )
460 h->param.i_scenecut_threshold = i;
461 h->param.b_pre_scenecut = !!strstr( p, "(pre)" );
465 x264_log( h, X264_LOG_ERROR, "scenecut method specified in stats file not valid\n" );
470 /* find number of pics */
473 p = strchr(p+1, ';');
476 x264_log(h, X264_LOG_ERROR, "empty stats file\n");
481 if( h->param.i_frame_total < rc->num_entries && h->param.i_frame_total > 0 )
483 x264_log( h, X264_LOG_WARNING, "2nd pass has fewer frames than 1st pass (%d vs %d)\n",
484 h->param.i_frame_total, rc->num_entries );
486 if( h->param.i_frame_total > rc->num_entries )
488 x264_log( h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d vs %d)\n",
489 h->param.i_frame_total, rc->num_entries );
493 rc->entry = (ratecontrol_entry_t*) x264_malloc(rc->num_entries * sizeof(ratecontrol_entry_t));
494 memset(rc->entry, 0, rc->num_entries * sizeof(ratecontrol_entry_t));
496 /* init all to skipped p frames */
497 for(i=0; i<rc->num_entries; i++)
499 ratecontrol_entry_t *rce = &rc->entry[i];
500 rce->pict_type = SLICE_TYPE_P;
501 rce->qscale = rce->new_qscale = qp2qscale(20);
502 rce->misc_bits = rc->nmb + 10;
508 for(i=0; i < rc->num_entries; i++)
510 ratecontrol_entry_t *rce;
517 next= strchr(p, ';');
520 (*next)=0; //sscanf is unbelievably slow on long strings
523 e = sscanf(p, " in:%d ", &frame_number);
525 if(frame_number < 0 || frame_number >= rc->num_entries)
527 x264_log(h, X264_LOG_ERROR, "bad frame number (%d) at stats line %d\n", frame_number, i);
530 rce = &rc->entry[frame_number];
531 rce->direct_mode = 0;
533 e += sscanf(p, " in:%*d out:%*d type:%c q:%f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c",
534 &pict_type, &qp, &rce->tex_bits,
535 &rce->mv_bits, &rce->misc_bits, &rce->i_count, &rce->p_count,
536 &rce->s_count, &rce->direct_mode);
540 case 'I': rce->kept_as_ref = 1;
541 case 'i': rce->pict_type = SLICE_TYPE_I; break;
542 case 'P': rce->pict_type = SLICE_TYPE_P; break;
543 case 'B': rce->kept_as_ref = 1;
544 case 'b': rce->pict_type = SLICE_TYPE_B; break;
545 default: e = -1; break;
549 x264_log(h, X264_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e);
552 rce->qscale = qp2qscale(qp);
556 x264_free(stats_buf);
558 if(h->param.rc.i_rc_method == X264_RC_ABR)
560 if(init_pass2(h) < 0) return -1;
561 } /* else we're using constant quant, so no need to run the bitrate allocation */
564 /* Open output file */
565 /* If input and output files are the same, output to a temp file
566 * and move it to the real name only when it's complete */
567 if( h->param.rc.b_stat_write )
571 rc->psz_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 6 );
572 strcpy( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
573 strcat( rc->psz_stat_file_tmpname, ".temp" );
575 rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
576 if( rc->p_stat_file_out == NULL )
578 x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
582 p = x264_param2string( &h->param, 1 );
583 fprintf( rc->p_stat_file_out, "#options: %s\n", p );
587 for( i=0; i<h->param.i_threads; i++ )
589 h->thread[i]->rc = rc+i;
593 memcpy( &h->thread[i]->param, &h->param, sizeof( x264_param_t ) );
594 h->thread[i]->mb.b_variable_qp = h->mb.b_variable_qp;
601 static int parse_zone( x264_t *h, x264_zone_t *z, char *p )
604 char *tok, UNUSED *saveptr;
606 z->f_bitrate_factor = 1;
607 if( 3 <= sscanf(p, "%u,%u,q=%u%n", &z->i_start, &z->i_end, &z->i_qp, &len) )
609 else if( 3 <= sscanf(p, "%u,%u,b=%f%n", &z->i_start, &z->i_end, &z->f_bitrate_factor, &len) )
611 else if( 2 <= sscanf(p, "%u,%u%n", &z->i_start, &z->i_end, &len) )
615 x264_log( h, X264_LOG_ERROR, "invalid zone: \"%s\"\n", p );
621 z->param = x264_malloc( sizeof(x264_param_t) );
622 memcpy( z->param, &h->param, sizeof(x264_param_t) );
623 while( (tok = strtok_r( p, ",", &saveptr )) )
625 char *val = strchr( tok, '=' );
631 if( x264_param_parse( z->param, tok, val ) )
633 x264_log( h, X264_LOG_ERROR, "invalid zone param: %s = %s\n", tok, val );
641 static int parse_zones( x264_t *h )
643 x264_ratecontrol_t *rc = h->rc;
645 if( h->param.rc.psz_zones && !h->param.rc.i_zones )
647 char *p, *tok, UNUSED *saveptr;
648 char *psz_zones = x264_malloc( strlen(h->param.rc.psz_zones)+1 );
649 strcpy( psz_zones, h->param.rc.psz_zones );
650 h->param.rc.i_zones = 1;
651 for( p = psz_zones; *p; p++ )
652 h->param.rc.i_zones += (*p == '/');
653 h->param.rc.zones = x264_malloc( h->param.rc.i_zones * sizeof(x264_zone_t) );
655 for( i = 0; i < h->param.rc.i_zones; i++ )
657 tok = strtok_r( p, "/", &saveptr );
658 if( !tok || parse_zone( h, &h->param.rc.zones[i], tok ) )
662 x264_free( psz_zones );
665 if( h->param.rc.i_zones > 0 )
667 for( i = 0; i < h->param.rc.i_zones; i++ )
669 x264_zone_t z = h->param.rc.zones[i];
670 if( z.i_start < 0 || z.i_start > z.i_end )
672 x264_log( h, X264_LOG_ERROR, "invalid zone: start=%d end=%d\n",
673 z.i_start, z.i_end );
676 else if( !z.b_force_qp && z.f_bitrate_factor <= 0 )
678 x264_log( h, X264_LOG_ERROR, "invalid zone: bitrate_factor=%f\n",
679 z.f_bitrate_factor );
684 rc->i_zones = h->param.rc.i_zones + 1;
685 rc->zones = x264_malloc( rc->i_zones * sizeof(x264_zone_t) );
686 memcpy( rc->zones+1, h->param.rc.zones, (rc->i_zones-1) * sizeof(x264_zone_t) );
688 // default zone to fall back to if none of the others match
689 rc->zones[0].i_start = 0;
690 rc->zones[0].i_end = INT_MAX;
691 rc->zones[0].b_force_qp = 0;
692 rc->zones[0].f_bitrate_factor = 1;
693 rc->zones[0].param = x264_malloc( sizeof(x264_param_t) );
694 memcpy( rc->zones[0].param, &h->param, sizeof(x264_param_t) );
695 for( i = 1; i < rc->i_zones; i++ )
697 if( !rc->zones[i].param )
698 rc->zones[i].param = rc->zones[0].param;
705 static x264_zone_t *get_zone( x264_t *h, int frame_num )
708 for( i = h->rc->i_zones-1; i >= 0; i-- )
710 x264_zone_t *z = &h->rc->zones[i];
711 if( frame_num >= z->i_start && frame_num <= z->i_end )
717 void x264_ratecontrol_summary( x264_t *h )
719 x264_ratecontrol_t *rc = h->rc;
720 if( rc->b_abr && h->param.rc.i_rc_method == X264_RC_ABR && rc->cbr_decay > .9999 )
722 double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
723 x264_log( h, X264_LOG_INFO, "final ratefactor: %.2f\n",
724 qscale2qp( pow( base_cplx, 1 - h->param.rc.f_qcompress )
725 * rc->cplxr_sum / rc->wanted_bits_window ) );
729 void x264_ratecontrol_delete( x264_t *h )
731 x264_ratecontrol_t *rc = h->rc;
734 if( rc->p_stat_file_out )
736 fclose( rc->p_stat_file_out );
737 if( h->i_frame >= rc->num_entries )
738 if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
740 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
741 rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
743 x264_free( rc->psz_stat_file_tmpname );
745 x264_free( rc->pred );
746 x264_free( rc->pred_b_from_p );
747 x264_free( rc->entry );
750 x264_free( rc->zones[0].param );
751 if( h->param.rc.psz_zones )
752 for( i=1; i<rc->i_zones; i++ )
753 if( rc->zones[i].param != rc->zones[0].param )
754 x264_free( rc->zones[i].param );
755 x264_free( rc->zones );
760 void x264_ratecontrol_set_estimated_size( x264_t *h, int bits )
762 x264_pthread_mutex_lock( &h->fenc->mutex );
763 h->rc->frame_size_estimated = bits;
764 x264_pthread_mutex_unlock( &h->fenc->mutex );
767 int x264_ratecontrol_get_estimated_size( x264_t const *h)
770 x264_pthread_mutex_lock( &h->fenc->mutex );
771 size = h->rc->frame_size_estimated;
772 x264_pthread_mutex_unlock( &h->fenc->mutex );
776 static void accum_p_qp_update( x264_t *h, float qp )
778 x264_ratecontrol_t *rc = h->rc;
779 rc->accum_p_qp *= .95;
780 rc->accum_p_norm *= .95;
781 rc->accum_p_norm += 1;
782 if( h->sh.i_type == SLICE_TYPE_I )
783 rc->accum_p_qp += qp + rc->ip_offset;
785 rc->accum_p_qp += qp;
788 /* Before encoding a frame, choose a QP for it */
789 void x264_ratecontrol_start( x264_t *h, int i_force_qp )
791 x264_ratecontrol_t *rc = h->rc;
792 ratecontrol_entry_t *rce = NULL;
793 x264_zone_t *zone = get_zone( h, h->fenc->i_frame );
798 if( zone && (!rc->prev_zone || zone->param != rc->prev_zone->param) )
799 x264_encoder_reconfig( h, zone->param );
800 rc->prev_zone = zone;
802 rc->qp_force = i_force_qp;
804 if( h->param.rc.b_stat_read )
806 int frame = h->fenc->i_frame;
807 assert( frame >= 0 && frame < rc->num_entries );
808 rce = h->rc->rce = &h->rc->entry[frame];
810 if( h->sh.i_type == SLICE_TYPE_B
811 && h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO )
813 h->sh.b_direct_spatial_mv_pred = ( rce->direct_mode == 's' );
814 h->mb.b_direct_auto_read = ( rce->direct_mode == 's' || rce->direct_mode == 't' );
820 memset( h->fdec->i_row_bits, 0, h->sps->i_mb_height * sizeof(int) );
821 rc->row_pred = &rc->row_preds[h->sh.i_type];
822 update_vbv_plan( h );
825 if( h->sh.i_type != SLICE_TYPE_B )
828 while( h->frames.current[rc->bframes] && IS_X264_TYPE_B(h->frames.current[rc->bframes]->i_type) )
838 q = qscale2qp( rate_estimate_qscale( h ) );
840 else if( rc->b_2pass )
842 rce->new_qscale = rate_estimate_qscale( h );
843 q = qscale2qp( rce->new_qscale );
847 if( h->sh.i_type == SLICE_TYPE_B && h->fdec->b_kept_as_ref )
848 q = ( rc->qp_constant[ SLICE_TYPE_B ] + rc->qp_constant[ SLICE_TYPE_P ] ) / 2;
850 q = rc->qp_constant[ h->sh.i_type ];
854 if( zone->b_force_qp )
855 q += zone->i_qp - rc->qp_constant[SLICE_TYPE_P];
857 q -= 6*log(zone->f_bitrate_factor)/log(2);
863 h->fdec->f_qp_avg_rc =
864 h->fdec->f_qp_avg_aq =
866 rc->qp = x264_clip3( (int)(q + 0.5), 0, 51 );
869 rce->new_qp = rc->qp;
871 /* accum_p_qp needs to be here so that future frames can benefit from the
872 * data before this frame is done. but this only works because threading
873 * guarantees to not re-encode any frames. so the non-threaded case does
874 * accum_p_qp later. */
875 if( h->param.i_threads > 1 )
876 accum_p_qp_update( h, rc->qp );
878 if( h->sh.i_type != SLICE_TYPE_B )
879 rc->last_non_b_pict_type = h->sh.i_type;
882 static double predict_row_size( x264_t *h, int y, int qp )
884 /* average between two predictors:
885 * absolute SATD, and scaled bit cost of the colocated row in the previous frame */
886 x264_ratecontrol_t *rc = h->rc;
887 double pred_s = predict_size( rc->row_pred, qp2qscale(qp), h->fdec->i_row_satd[y] );
889 if( h->sh.i_type != SLICE_TYPE_I
890 && h->fref0[0]->i_type == h->fdec->i_type
891 && h->fref0[0]->i_row_satd[y] > 0
892 && (abs(h->fref0[0]->i_row_satd[y] - h->fdec->i_row_satd[y]) < h->fdec->i_row_satd[y]/2))
894 pred_t = h->fref0[0]->i_row_bits[y] * h->fdec->i_row_satd[y] / h->fref0[0]->i_row_satd[y]
895 * qp2qscale(h->fref0[0]->i_row_qp[y]) / qp2qscale(qp);
900 return (pred_s + pred_t) / 2;
903 static double row_bits_so_far( x264_t *h, int y )
907 for( i = 0; i <= y; i++ )
908 bits += h->fdec->i_row_bits[i];
912 static double predict_row_size_sum( x264_t *h, int y, int qp )
915 double bits = row_bits_so_far(h, y);
916 for( i = y+1; i < h->sps->i_mb_height; i++ )
917 bits += predict_row_size( h, i, qp );
922 void x264_ratecontrol_mb( x264_t *h, int bits )
924 x264_ratecontrol_t *rc = h->rc;
925 const int y = h->mb.i_mb_y;
929 h->fdec->i_row_bits[y] += bits;
930 rc->qpa_rc += rc->f_qpm;
931 rc->qpa_aq += h->mb.i_qp;
933 if( h->mb.i_mb_x != h->sps->i_mb_width - 1 || !rc->b_vbv)
936 h->fdec->i_row_qp[y] = rc->qpm;
938 if( h->sh.i_type == SLICE_TYPE_B )
940 /* B-frames shouldn't use lower QP than their reference frames.
941 * This code is a bit overzealous in limiting B-frame quantizers, but it helps avoid
942 * underflows due to the fact that B-frames are not explicitly covered by VBV. */
943 if( y < h->sps->i_mb_height-1 )
946 int avg_qp = X264_MAX(h->fref0[0]->i_row_qp[y+1], h->fref1[0]->i_row_qp[y+1])
947 + rc->pb_offset * ((h->fenc->i_type == X264_TYPE_BREF) ? 0.5 : 1);
948 rc->qpm = X264_MIN(X264_MAX( rc->qp, avg_qp), 51); //avg_qp could go higher than 51 due to pb_offset
949 i_estimated = row_bits_so_far(h, y); //FIXME: compute full estimated size
950 if (i_estimated > h->rc->frame_size_planned)
951 x264_ratecontrol_set_estimated_size(h, i_estimated);
956 update_predictor( rc->row_pred, qp2qscale(rc->qpm), h->fdec->i_row_satd[y], h->fdec->i_row_bits[y] );
958 /* tweak quality based on difference from predicted size */
959 if( y < h->sps->i_mb_height-1 && h->stat.i_slice_count[h->sh.i_type] > 0 )
961 int prev_row_qp = h->fdec->i_row_qp[y];
962 int b0 = predict_row_size_sum( h, y, rc->qpm );
964 int i_qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, h->param.rc.i_qp_max );
965 int i_qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min );
966 float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
970 /* Don't modify the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */
971 /* area at the top of the frame was measured inaccurately. */
972 if(row_bits_so_far(h,y) < 0.05 * rc->frame_size_planned)
975 headroom = buffer_left_planned/rc->buffer_size;
976 if(h->sh.i_type != SLICE_TYPE_I)
980 if( !rc->b_vbv_min_rate )
981 i_qp_min = X264_MAX( i_qp_min, h->sh.i_qp );
983 while( rc->qpm < i_qp_max
984 && (b1 > rc->frame_size_planned * rc_tol
985 || (rc->buffer_fill - b1 < buffer_left_planned * 0.5)))
988 b1 = predict_row_size_sum( h, y, rc->qpm );
991 /* avoid VBV underflow */
992 while( (rc->qpm < h->param.rc.i_qp_max)
993 && (rc->buffer_fill - b1 < rc->buffer_size * 0.005))
996 b1 = predict_row_size_sum( h, y, rc->qpm );
999 while( rc->qpm > i_qp_min
1000 && rc->qpm > h->fdec->i_row_qp[0]
1001 && ((b1 < rc->frame_size_planned * 0.8 && rc->qpm <= prev_row_qp)
1002 || b1 < (rc->buffer_fill - rc->buffer_size + rc->buffer_rate) * 1.1) )
1005 b1 = predict_row_size_sum( h, y, rc->qpm );
1007 x264_ratecontrol_set_estimated_size(h, b1);
1010 /* loses the fractional part of the frame-wise qp */
1011 rc->f_qpm = rc->qpm;
1014 int x264_ratecontrol_qp( x264_t *h )
1019 /* In 2pass, force the same frame types as in the 1st pass */
1020 int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
1022 x264_ratecontrol_t *rc = h->rc;
1023 if( h->param.rc.b_stat_read )
1025 if( frame_num >= rc->num_entries )
1027 /* We could try to initialize everything required for ABR and
1028 * adaptive B-frames, but that would be complicated.
1029 * So just calculate the average QP used so far. */
1032 h->param.rc.i_qp_constant = (h->stat.i_slice_count[SLICE_TYPE_P] == 0) ? 24
1033 : 1 + h->stat.f_slice_qp[SLICE_TYPE_P] / h->stat.i_slice_count[SLICE_TYPE_P];
1034 rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
1035 rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
1036 rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
1038 x264_log(h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries);
1039 x264_log(h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant);
1040 if( h->param.i_bframe_adaptive )
1041 x264_log(h, X264_LOG_ERROR, "disabling adaptive B-frames\n");
1043 for( i = 0; i < h->param.i_threads; i++ )
1045 h->thread[i]->rc->b_abr = 0;
1046 h->thread[i]->rc->b_2pass = 0;
1047 h->thread[i]->param.rc.i_rc_method = X264_RC_CQP;
1048 h->thread[i]->param.rc.b_stat_read = 0;
1049 h->thread[i]->param.i_bframe_adaptive = 0;
1050 h->thread[i]->param.b_pre_scenecut = 0;
1051 h->thread[i]->param.i_scenecut_threshold = -1;
1052 if( h->thread[i]->param.i_bframe > 1 )
1053 h->thread[i]->param.i_bframe = 1;
1055 return X264_TYPE_AUTO;
1057 switch( rc->entry[frame_num].pict_type )
1060 return rc->entry[frame_num].kept_as_ref ? X264_TYPE_IDR : X264_TYPE_I;
1063 return rc->entry[frame_num].kept_as_ref ? X264_TYPE_BREF : X264_TYPE_B;
1072 return X264_TYPE_AUTO;
1076 /* After encoding one frame, save stats and update ratecontrol state */
1077 void x264_ratecontrol_end( x264_t *h, int bits )
1079 x264_ratecontrol_t *rc = h->rc;
1080 const int *mbs = h->stat.frame.i_mb_count;
1085 h->stat.frame.i_mb_count_skip = mbs[P_SKIP] + mbs[B_SKIP];
1086 h->stat.frame.i_mb_count_i = mbs[I_16x16] + mbs[I_8x8] + mbs[I_4x4];
1087 h->stat.frame.i_mb_count_p = mbs[P_L0] + mbs[P_8x8];
1088 for( i = B_DIRECT; i < B_8x8; i++ )
1089 h->stat.frame.i_mb_count_p += mbs[i];
1091 h->fdec->f_qp_avg_rc = rc->qpa_rc /= h->mb.i_mb_count;
1092 h->fdec->f_qp_avg_aq = rc->qpa_aq /= h->mb.i_mb_count;
1094 if( h->param.rc.b_stat_write )
1096 char c_type = h->sh.i_type==SLICE_TYPE_I ? (h->fenc->i_poc==0 ? 'I' : 'i')
1097 : h->sh.i_type==SLICE_TYPE_P ? 'P'
1098 : h->fenc->b_kept_as_ref ? 'B' : 'b';
1099 int dir_frame = h->stat.frame.i_direct_score[1] - h->stat.frame.i_direct_score[0];
1100 int dir_avg = h->stat.i_direct_score[1] - h->stat.i_direct_score[0];
1101 char c_direct = h->mb.b_direct_auto_write ?
1102 ( dir_frame>0 ? 's' : dir_frame<0 ? 't' :
1103 dir_avg>0 ? 's' : dir_avg<0 ? 't' : '-' )
1105 fprintf( rc->p_stat_file_out,
1106 "in:%d out:%d type:%c q:%.2f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c;\n",
1107 h->fenc->i_frame, h->i_frame,
1109 h->stat.frame.i_tex_bits,
1110 h->stat.frame.i_mv_bits,
1111 h->stat.frame.i_misc_bits,
1112 h->stat.frame.i_mb_count_i,
1113 h->stat.frame.i_mb_count_p,
1114 h->stat.frame.i_mb_count_skip,
1120 if( h->sh.i_type != SLICE_TYPE_B )
1121 rc->cplxr_sum += bits * qp2qscale(rc->qpa_rc) / rc->last_rceq;
1124 /* Depends on the fact that B-frame's QP is an offset from the following P-frame's.
1125 * Not perfectly accurate with B-refs, but good enough. */
1126 rc->cplxr_sum += bits * qp2qscale(rc->qpa_rc) / (rc->last_rceq * fabs(h->param.rc.f_pb_factor));
1128 rc->cplxr_sum *= rc->cbr_decay;
1129 rc->wanted_bits_window += rc->bitrate / rc->fps;
1130 rc->wanted_bits_window *= rc->cbr_decay;
1132 if( h->param.i_threads == 1 )
1133 accum_p_qp_update( h, rc->qpa_rc );
1138 rc->expected_bits_sum += qscale2bits( rc->rce, qp2qscale(rc->rce->new_qp) );
1141 if( h->mb.b_variable_qp )
1143 if( h->sh.i_type == SLICE_TYPE_B )
1145 rc->bframe_bits += bits;
1146 if( !h->frames.current[0] || !IS_X264_TYPE_B(h->frames.current[0]->i_type) )
1148 update_predictor( rc->pred_b_from_p, qp2qscale(rc->qpa_rc),
1149 h->fref1[h->i_ref1-1]->i_satd, rc->bframe_bits / rc->bframes );
1150 rc->bframe_bits = 0;
1155 update_vbv( h, bits );
1158 /****************************************************************************
1160 ***************************************************************************/
1163 * modify the bitrate curve from pass1 for one frame
1165 static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor, int frame_num)
1167 x264_ratecontrol_t *rcc= h->rc;
1169 x264_zone_t *zone = get_zone( h, frame_num );
1171 q = pow( rce->blurred_complexity, 1 - h->param.rc.f_qcompress );
1173 // avoid NaN's in the rc_eq
1174 if(!isfinite(q) || rce->tex_bits + rce->mv_bits == 0)
1175 q = rcc->last_qscale;
1180 rcc->last_qscale = q;
1185 if( zone->b_force_qp )
1186 q = qp2qscale(zone->i_qp);
1188 q /= zone->f_bitrate_factor;
1194 static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
1196 x264_ratecontrol_t *rcc = h->rc;
1197 const int pict_type = rce->pict_type;
1199 // force I/B quants as a function of P quants
1200 const double last_p_q = rcc->last_qscale_for[SLICE_TYPE_P];
1201 const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type];
1202 if( pict_type == SLICE_TYPE_I )
1205 double pq = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
1206 double ip_factor = fabs( h->param.rc.f_ip_factor );
1207 /* don't apply ip_factor if the following frame is also I */
1208 if( rcc->accum_p_norm <= 0 )
1210 else if( h->param.rc.f_ip_factor < 0 )
1212 else if( rcc->accum_p_norm >= 1 )
1215 q = rcc->accum_p_norm * pq / ip_factor + (1 - rcc->accum_p_norm) * iq;
1217 else if( pict_type == SLICE_TYPE_B )
1219 if( h->param.rc.f_pb_factor > 0 )
1221 if( !rce->kept_as_ref )
1222 q *= fabs( h->param.rc.f_pb_factor );
1224 else if( pict_type == SLICE_TYPE_P
1225 && rcc->last_non_b_pict_type == SLICE_TYPE_P
1226 && rce->tex_bits == 0 )
1231 /* last qscale / qdiff stuff */
1232 if(rcc->last_non_b_pict_type==pict_type
1233 && (pict_type!=SLICE_TYPE_I || rcc->last_accum_p_norm < 1))
1235 double last_q = rcc->last_qscale_for[pict_type];
1236 double max_qscale = last_q * rcc->lstep;
1237 double min_qscale = last_q / rcc->lstep;
1239 if (q > max_qscale) q = max_qscale;
1240 else if(q < min_qscale) q = min_qscale;
1243 rcc->last_qscale_for[pict_type] = q;
1244 if(pict_type!=SLICE_TYPE_B)
1245 rcc->last_non_b_pict_type = pict_type;
1246 if(pict_type==SLICE_TYPE_I)
1248 rcc->last_accum_p_norm = rcc->accum_p_norm;
1249 rcc->accum_p_norm = 0;
1250 rcc->accum_p_qp = 0;
1252 if(pict_type==SLICE_TYPE_P)
1254 float mask = 1 - pow( (float)rce->i_count / rcc->nmb, 2 );
1255 rcc->accum_p_qp = mask * (qscale2qp(q) + rcc->accum_p_qp);
1256 rcc->accum_p_norm = mask * (1 + rcc->accum_p_norm);
1261 static double predict_size( predictor_t *p, double q, double var )
1263 return p->coeff*var / (q*p->count);
1266 static void update_predictor( predictor_t *p, double q, double var, double bits )
1270 p->count *= p->decay;
1271 p->coeff *= p->decay;
1273 p->coeff += bits*q / var;
1276 // update VBV after encoding a frame
1277 static void update_vbv( x264_t *h, int bits )
1279 x264_ratecontrol_t *rcc = h->rc;
1280 x264_ratecontrol_t *rct = h->thread[0]->rc;
1282 if( rcc->last_satd >= h->mb.i_mb_count )
1283 update_predictor( &rct->pred[h->sh.i_type], qp2qscale(rcc->qpa_rc), rcc->last_satd, bits );
1288 rct->buffer_fill_final += rct->buffer_rate - bits;
1289 if( rct->buffer_fill_final < 0 )
1290 x264_log( h, X264_LOG_WARNING, "VBV underflow (%.0f bits)\n", rct->buffer_fill_final );
1291 rct->buffer_fill_final = x264_clip3f( rct->buffer_fill_final, 0, rct->buffer_size );
1294 // provisionally update VBV according to the planned size of all frames currently in progress
1295 static void update_vbv_plan( x264_t *h )
1297 x264_ratecontrol_t *rcc = h->rc;
1298 rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final;
1299 if( h->param.i_threads > 1 )
1301 int j = h->rc - h->thread[0]->rc;
1303 for( i=1; i<h->param.i_threads; i++ )
1305 x264_t *t = h->thread[ (j+i)%h->param.i_threads ];
1306 double bits = t->rc->frame_size_planned;
1307 if( !t->b_thread_active )
1309 bits = X264_MAX(bits, x264_ratecontrol_get_estimated_size(t));
1310 rcc->buffer_fill += rcc->buffer_rate - bits;
1311 rcc->buffer_fill = x264_clip3( rcc->buffer_fill, 0, rcc->buffer_size );
1316 // apply VBV constraints and clip qscale to between lmin and lmax
1317 static double clip_qscale( x264_t *h, int pict_type, double q )
1319 x264_ratecontrol_t *rcc = h->rc;
1320 double lmin = rcc->lmin[pict_type];
1321 double lmax = rcc->lmax[pict_type];
1324 /* B-frames are not directly subject to VBV,
1325 * since they are controlled by the P-frames' QPs.
1326 * FIXME: in 2pass we could modify previous frames' QP too,
1327 * instead of waiting for the buffer to fill */
1329 ( pict_type == SLICE_TYPE_P ||
1330 ( pict_type == SLICE_TYPE_I && rcc->last_non_b_pict_type == SLICE_TYPE_I ) ) )
1332 if( rcc->buffer_fill/rcc->buffer_size < 0.5 )
1333 q /= x264_clip3f( 2.0*rcc->buffer_fill/rcc->buffer_size, 0.5, 1.0 );
1336 if( rcc->b_vbv && rcc->last_satd > 0 )
1338 /* Now a hard threshold to make sure the frame fits in VBV.
1339 * This one is mostly for I-frames. */
1340 double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
1342 if( bits > rcc->buffer_fill/2 )
1343 qf = x264_clip3f( rcc->buffer_fill/(2*bits), 0.2, 1.0 );
1346 if( bits < rcc->buffer_rate/2 )
1347 q *= bits*2/rcc->buffer_rate;
1348 q = X264_MAX( q0, q );
1350 /* Check B-frame complexity, and use up any bits that would
1351 * overflow before the next P-frame. */
1352 if( h->sh.i_type == SLICE_TYPE_P )
1354 int nb = rcc->bframes;
1355 double pbbits = bits;
1356 double bbits = predict_size( rcc->pred_b_from_p, q * h->param.rc.f_pb_factor, rcc->last_satd );
1359 if( bbits > rcc->buffer_rate )
1361 pbbits += nb * bbits;
1363 space = rcc->buffer_fill + (1+nb)*rcc->buffer_rate - rcc->buffer_size;
1364 if( pbbits < space )
1366 q *= X264_MAX( pbbits / space,
1367 bits / (0.5 * rcc->buffer_size) );
1369 q = X264_MAX( q0-5, q );
1372 if( !rcc->b_vbv_min_rate )
1373 q = X264_MAX( q0, q );
1378 else if(rcc->b_2pass)
1380 double min2 = log(lmin);
1381 double max2 = log(lmax);
1382 q = (log(q) - min2)/(max2-min2) - 0.5;
1383 q = 1.0/(1.0 + exp(-4*q));
1384 q = q*(max2-min2) + min2;
1388 return x264_clip3f(q, lmin, lmax);
1391 // update qscale for 1 frame based on actual bits used so far
1392 static float rate_estimate_qscale( x264_t *h )
1395 x264_ratecontrol_t *rcc = h->rc;
1396 ratecontrol_entry_t rce;
1397 int pict_type = h->sh.i_type;
1398 double lmin = rcc->lmin[pict_type];
1399 double lmax = rcc->lmax[pict_type];
1400 int64_t total_bits = 8*(h->stat.i_slice_size[SLICE_TYPE_I]
1401 + h->stat.i_slice_size[SLICE_TYPE_P]
1402 + h->stat.i_slice_size[SLICE_TYPE_B]);
1407 if(pict_type != rce.pict_type)
1409 x264_log(h, X264_LOG_ERROR, "slice=%c but 2pass stats say %c\n",
1410 slice_type_to_char[pict_type], slice_type_to_char[rce.pict_type]);
1414 if( pict_type == SLICE_TYPE_B )
1416 /* B-frames don't have independent ratecontrol, but rather get the
1417 * average QP of the two adjacent P-frames + an offset */
1419 int i0 = IS_X264_TYPE_I(h->fref0[0]->i_type);
1420 int i1 = IS_X264_TYPE_I(h->fref1[0]->i_type);
1421 int dt0 = abs(h->fenc->i_poc - h->fref0[0]->i_poc);
1422 int dt1 = abs(h->fenc->i_poc - h->fref1[0]->i_poc);
1423 float q0 = h->fref0[0]->f_qp_avg_rc;
1424 float q1 = h->fref1[0]->f_qp_avg_rc;
1426 if( h->fref0[0]->i_type == X264_TYPE_BREF )
1427 q0 -= rcc->pb_offset/2;
1428 if( h->fref1[0]->i_type == X264_TYPE_BREF )
1429 q1 -= rcc->pb_offset/2;
1432 q = (q0 + q1) / 2 + rcc->ip_offset;
1438 q = (q0*dt1 + q1*dt0) / (dt0 + dt1);
1440 if(h->fenc->b_kept_as_ref)
1441 q += rcc->pb_offset/2;
1443 q += rcc->pb_offset;
1445 rcc->frame_size_planned = predict_size( rcc->pred_b_from_p, q, h->fref1[h->i_ref1-1]->i_satd );
1446 x264_ratecontrol_set_estimated_size(h, rcc->frame_size_planned);
1448 return qp2qscale(q);
1452 double abr_buffer = 2 * rcc->rate_tolerance * rcc->bitrate;
1456 //FIXME adjust abr_buffer based on distance to the end of the video
1458 int64_t predicted_bits = total_bits;
1462 if( h->param.i_threads > 1 )
1464 int j = h->rc - h->thread[0]->rc;
1466 for( i=1; i<h->param.i_threads; i++ )
1468 x264_t *t = h->thread[ (j+i)%h->param.i_threads ];
1469 double bits = t->rc->frame_size_planned;
1470 if( !t->b_thread_active )
1472 bits = X264_MAX(bits, x264_ratecontrol_get_estimated_size(t));
1473 predicted_bits += (int64_t)bits;
1479 if( h->fenc->i_frame < h->param.i_threads )
1480 predicted_bits += (int64_t)h->fenc->i_frame * rcc->bitrate / rcc->fps;
1482 predicted_bits += (int64_t)(h->param.i_threads - 1) * rcc->bitrate / rcc->fps;
1485 diff = predicted_bits - (int64_t)rce.expected_bits;
1487 q /= x264_clip3f((double)(abr_buffer - diff) / abr_buffer, .5, 2);
1488 if( ((h->fenc->i_frame + 1 - h->param.i_threads) >= rcc->fps) &&
1489 (rcc->expected_bits_sum > 0))
1491 /* Adjust quant based on the difference between
1492 * achieved and expected bitrate so far */
1493 double time = (double)h->fenc->i_frame / rcc->num_entries;
1494 double w = x264_clip3f( time*100, 0.0, 1.0 );
1495 q *= pow( (double)total_bits / rcc->expected_bits_sum, w );
1499 /* Do not overflow vbv */
1500 double expected_size = qscale2bits(&rce, q);
1501 double expected_vbv = rcc->buffer_fill + rcc->buffer_rate - expected_size;
1502 double expected_fullness = rce.expected_vbv / rcc->buffer_size;
1503 double qmax = q*(2 - expected_fullness);
1504 double size_constraint = 1 + expected_fullness;
1505 qmax = X264_MAX(qmax, rce.new_qscale);
1506 if (expected_fullness < .05)
1508 qmax = X264_MIN(qmax, lmax);
1509 while( ((expected_vbv < rce.expected_vbv/size_constraint) && (q < qmax)) ||
1510 ((expected_vbv < 0) && (q < lmax)))
1513 expected_size = qscale2bits(&rce, q);
1514 expected_vbv = rcc->buffer_fill + rcc->buffer_rate - expected_size;
1516 rcc->last_satd = x264_stack_align( x264_rc_analyse_slice, h );
1518 q = x264_clip3f( q, lmin, lmax );
1520 else /* 1pass ABR */
1522 /* Calculate the quantizer which would have produced the desired
1523 * average bitrate if it had been applied to all frames so far.
1524 * Then modulate that quant based on the current frame's complexity
1525 * relative to the average complexity so far (using the 2pass RCEQ).
1526 * Then bias the quant up or down if total size so far was far from
1528 * Result: Depending on the value of rate_tolerance, there is a
1529 * tradeoff between quality and bitrate precision. But at large
1530 * tolerances, the bit distribution approaches that of 2pass. */
1532 double wanted_bits, overflow=1, lmin, lmax;
1534 rcc->last_satd = x264_stack_align( x264_rc_analyse_slice, h );
1535 rcc->short_term_cplxsum *= 0.5;
1536 rcc->short_term_cplxcount *= 0.5;
1537 rcc->short_term_cplxsum += rcc->last_satd;
1538 rcc->short_term_cplxcount ++;
1540 rce.tex_bits = rcc->last_satd;
1541 rce.blurred_complexity = rcc->short_term_cplxsum / rcc->short_term_cplxcount;
1543 rce.p_count = rcc->nmb;
1547 rce.pict_type = pict_type;
1549 if( h->param.rc.i_rc_method == X264_RC_CRF )
1551 q = get_qscale( h, &rce, rcc->rate_factor_constant, h->fenc->i_frame );
1555 int i_frame_done = h->fenc->i_frame + 1 - h->param.i_threads;
1557 q = get_qscale( h, &rce, rcc->wanted_bits_window / rcc->cplxr_sum, h->fenc->i_frame );
1559 // FIXME is it simpler to keep track of wanted_bits in ratecontrol_end?
1560 wanted_bits = i_frame_done * rcc->bitrate / rcc->fps;
1561 if( wanted_bits > 0 )
1563 abr_buffer *= X264_MAX( 1, sqrt(i_frame_done/25) );
1564 overflow = x264_clip3f( 1.0 + (total_bits - wanted_bits) / abr_buffer, .5, 2 );
1569 if( pict_type == SLICE_TYPE_I && h->param.i_keyint_max > 1
1570 /* should test _next_ pict type, but that isn't decided yet */
1571 && rcc->last_non_b_pict_type != SLICE_TYPE_I )
1573 q = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
1574 q /= fabs( h->param.rc.f_ip_factor );
1576 else if( h->i_frame > 0 )
1578 /* Asymmetric clipping, because symmetric would prevent
1579 * overflow control in areas of rapidly oscillating complexity */
1580 lmin = rcc->last_qscale_for[pict_type] / rcc->lstep;
1581 lmax = rcc->last_qscale_for[pict_type] * rcc->lstep;
1582 if( overflow > 1.1 && h->i_frame > 3 )
1584 else if( overflow < 0.9 )
1587 q = x264_clip3f(q, lmin, lmax);
1589 else if( h->param.rc.i_rc_method == X264_RC_CRF )
1591 q = qp2qscale( ABR_INIT_QP ) / fabs( h->param.rc.f_ip_factor );
1594 //FIXME use get_diff_limited_q() ?
1595 q = clip_qscale( h, pict_type, q );
1598 rcc->last_qscale_for[pict_type] =
1599 rcc->last_qscale = q;
1601 if( !(rcc->b_2pass && !rcc->b_vbv) && h->fenc->i_frame == 0 )
1602 rcc->last_qscale_for[SLICE_TYPE_P] = q;
1604 if( rcc->b_2pass && rcc->b_vbv)
1605 rcc->frame_size_planned = qscale2bits(&rce, q);
1607 rcc->frame_size_planned = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
1608 x264_ratecontrol_set_estimated_size(h, rcc->frame_size_planned);
1613 void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
1617 #define COPY(var) memcpy(&cur->rc->var, &prev->rc->var, sizeof(cur->rc->var))
1618 /* these vars are updated in x264_ratecontrol_start()
1619 * so copy them from the context that most recently started (prev)
1620 * to the context that's about to start (cur).
1626 COPY(last_qscale_for);
1627 COPY(last_non_b_pict_type);
1628 COPY(short_term_cplxsum);
1629 COPY(short_term_cplxcount);
1636 #define COPY(var) next->rc->var = cur->rc->var
1637 /* these vars are updated in x264_ratecontrol_end()
1638 * so copy them from the context that most recently ended (cur)
1639 * to the context that's about to end (next)
1642 COPY(expected_bits_sum);
1643 COPY(wanted_bits_window);
1647 //FIXME row_preds[] (not strictly necessary, but would improve prediction)
1648 /* the rest of the variables are either constant or thread-local */
1651 static int find_underflow( x264_t *h, double *fills, int *t0, int *t1, int over )
1653 /* find an interval ending on an overflow or underflow (depending on whether
1654 * we're adding or removing bits), and starting on the earliest frame that
1655 * can influence the buffer fill of that end frame. */
1656 x264_ratecontrol_t *rcc = h->rc;
1657 const double buffer_min = (over ? .1 : .1) * rcc->buffer_size;
1658 const double buffer_max = .9 * rcc->buffer_size;
1659 double fill = fills[*t0-1];
1660 double parity = over ? 1. : -1.;
1661 int i, start=-1, end=-1;
1662 for(i = *t0; i < rcc->num_entries; i++)
1664 fill += (rcc->buffer_rate - qscale2bits(&rcc->entry[i], rcc->entry[i].new_qscale)) * parity;
1665 fill = x264_clip3f(fill, 0, rcc->buffer_size);
1667 if(fill <= buffer_min || i == 0)
1673 else if(fill >= buffer_max && start >= 0)
1678 return start>=0 && end>=0;
1681 static int fix_underflow( x264_t *h, int t0, int t1, double adjustment, double qscale_min, double qscale_max)
1683 x264_ratecontrol_t *rcc = h->rc;
1684 double qscale_orig, qscale_new;
1689 for(i = t0; i <= t1; i++)
1691 qscale_orig = rcc->entry[i].new_qscale;
1692 qscale_orig = x264_clip3f(qscale_orig, qscale_min, qscale_max);
1693 qscale_new = qscale_orig * adjustment;
1694 qscale_new = x264_clip3f(qscale_new, qscale_min, qscale_max);
1695 rcc->entry[i].new_qscale = qscale_new;
1696 adjusted = adjusted || (qscale_new != qscale_orig);
1701 static double count_expected_bits( x264_t *h )
1703 x264_ratecontrol_t *rcc = h->rc;
1704 double expected_bits = 0;
1706 for(i = 0; i < rcc->num_entries; i++)
1708 ratecontrol_entry_t *rce = &rcc->entry[i];
1709 rce->expected_bits = expected_bits;
1710 expected_bits += qscale2bits(rce, rce->new_qscale);
1712 return expected_bits;
1715 static void vbv_pass2( x264_t *h )
1717 /* for each interval of buffer_full .. underflow, uniformly increase the qp of all
1718 * frames in the interval until either buffer is full at some intermediate frame or the
1719 * last frame in the interval no longer underflows. Recompute intervals and repeat.
1720 * Then do the converse to put bits back into overflow areas until target size is met */
1722 x264_ratecontrol_t *rcc = h->rc;
1723 double *fills = x264_malloc((rcc->num_entries+1)*sizeof(double));
1724 double all_available_bits = h->param.rc.i_bitrate * 1000. * rcc->num_entries / rcc->fps;
1725 double expected_bits = 0;
1727 double prev_bits = 0;
1729 double qscale_min = qp2qscale(h->param.rc.i_qp_min);
1730 double qscale_max = qp2qscale(h->param.rc.i_qp_max);
1732 int adj_min, adj_max;
1736 /* adjust overall stream size */
1740 prev_bits = expected_bits;
1742 if(expected_bits != 0)
1743 { /* not first iteration */
1744 adjustment = X264_MAX(X264_MIN(expected_bits / all_available_bits, 0.999), 0.9);
1745 fills[-1] = rcc->buffer_size * h->param.rc.f_vbv_buffer_init;
1749 while(adj_min && find_underflow(h, fills, &t0, &t1, 1))
1751 adj_min = fix_underflow(h, t0, t1, adjustment, qscale_min, qscale_max);
1756 fills[-1] = rcc->buffer_size * (1. - h->param.rc.f_vbv_buffer_init);
1758 /* fix underflows -- should be done after overflow, as we'd better undersize target than underflowing VBV */
1760 while(adj_max && find_underflow(h, fills, &t0, &t1, 0))
1761 adj_max = fix_underflow(h, t0, t1, 1.001, qscale_min, qscale_max);
1763 expected_bits = count_expected_bits(h);
1764 } while((expected_bits < .995*all_available_bits) && ((int)(expected_bits+.5) > (int)(prev_bits+.5)) );
1767 x264_log( h, X264_LOG_WARNING, "vbv-maxrate issue, qpmax or vbv-maxrate too low\n");
1769 /* store expected vbv filling values for tracking when encoding */
1770 for(i = 0; i < rcc->num_entries; i++)
1771 rcc->entry[i].expected_vbv = rcc->buffer_size - fills[i];
1776 static int init_pass2( x264_t *h )
1778 x264_ratecontrol_t *rcc = h->rc;
1779 uint64_t all_const_bits = 0;
1780 uint64_t all_available_bits = (uint64_t)(h->param.rc.i_bitrate * 1000. * rcc->num_entries / rcc->fps);
1781 double rate_factor, step, step_mult;
1782 double qblur = h->param.rc.f_qblur;
1783 double cplxblur = h->param.rc.f_complexity_blur;
1784 const int filter_size = (int)(qblur*4) | 1;
1785 double expected_bits;
1786 double *qscale, *blurred_qscale;
1789 /* find total/average complexity & const_bits */
1790 for(i=0; i<rcc->num_entries; i++)
1792 ratecontrol_entry_t *rce = &rcc->entry[i];
1793 all_const_bits += rce->misc_bits;
1796 if( all_available_bits < all_const_bits)
1798 x264_log(h, X264_LOG_ERROR, "requested bitrate is too low. estimated minimum is %d kbps\n",
1799 (int)(all_const_bits * rcc->fps / (rcc->num_entries * 1000.)));
1803 /* Blur complexities, to reduce local fluctuation of QP.
1804 * We don't blur the QPs directly, because then one very simple frame
1805 * could drag down the QP of a nearby complex frame and give it more
1806 * bits than intended. */
1807 for(i=0; i<rcc->num_entries; i++)
1809 ratecontrol_entry_t *rce = &rcc->entry[i];
1810 double weight_sum = 0;
1811 double cplx_sum = 0;
1812 double weight = 1.0;
1813 double gaussian_weight;
1815 /* weighted average of cplx of future frames */
1816 for(j=1; j<cplxblur*2 && j<rcc->num_entries-i; j++)
1818 ratecontrol_entry_t *rcj = &rcc->entry[i+j];
1819 weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
1822 gaussian_weight = weight * exp(-j*j/200.0);
1823 weight_sum += gaussian_weight;
1824 cplx_sum += gaussian_weight * (qscale2bits(rcj, 1) - rcj->misc_bits);
1826 /* weighted average of cplx of past frames */
1828 for(j=0; j<=cplxblur*2 && j<=i; j++)
1830 ratecontrol_entry_t *rcj = &rcc->entry[i-j];
1831 gaussian_weight = weight * exp(-j*j/200.0);
1832 weight_sum += gaussian_weight;
1833 cplx_sum += gaussian_weight * (qscale2bits(rcj, 1) - rcj->misc_bits);
1834 weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
1838 rce->blurred_complexity = cplx_sum / weight_sum;
1841 qscale = x264_malloc(sizeof(double)*rcc->num_entries);
1843 blurred_qscale = x264_malloc(sizeof(double)*rcc->num_entries);
1845 blurred_qscale = qscale;
1847 /* Search for a factor which, when multiplied by the RCEQ values from
1848 * each frame, adds up to the desired total size.
1849 * There is no exact closed-form solution because of VBV constraints and
1850 * because qscale2bits is not invertible, but we can start with the simple
1851 * approximation of scaling the 1st pass by the ratio of bitrates.
1852 * The search range is probably overkill, but speed doesn't matter here. */
1855 for(i=0; i<rcc->num_entries; i++)
1856 expected_bits += qscale2bits(&rcc->entry[i], get_qscale(h, &rcc->entry[i], 1.0, i));
1857 step_mult = all_available_bits / expected_bits;
1860 for(step = 1E4 * step_mult; step > 1E-7 * step_mult; step *= 0.5)
1863 rate_factor += step;
1865 rcc->last_non_b_pict_type = -1;
1866 rcc->last_accum_p_norm = 1;
1867 rcc->accum_p_norm = 0;
1870 for(i=0; i<rcc->num_entries; i++)
1872 qscale[i] = get_qscale(h, &rcc->entry[i], rate_factor, i);
1875 /* fixed I/B qscale relative to P */
1876 for(i=rcc->num_entries-1; i>=0; i--)
1878 qscale[i] = get_diff_limited_q(h, &rcc->entry[i], qscale[i]);
1879 assert(qscale[i] >= 0);
1885 assert(filter_size%2==1);
1886 for(i=0; i<rcc->num_entries; i++)
1888 ratecontrol_entry_t *rce = &rcc->entry[i];
1890 double q=0.0, sum=0.0;
1892 for(j=0; j<filter_size; j++)
1894 int index = i+j-filter_size/2;
1896 double coeff = qblur==0 ? 1.0 : exp(-d*d/(qblur*qblur));
1897 if(index < 0 || index >= rcc->num_entries)
1899 if(rce->pict_type != rcc->entry[index].pict_type)
1901 q += qscale[index] * coeff;
1904 blurred_qscale[i] = q/sum;
1908 /* find expected bits */
1909 for(i=0; i<rcc->num_entries; i++)
1911 ratecontrol_entry_t *rce = &rcc->entry[i];
1912 rce->new_qscale = clip_qscale(h, rce->pict_type, blurred_qscale[i]);
1913 assert(rce->new_qscale >= 0);
1914 expected_bits += qscale2bits(rce, rce->new_qscale);
1917 if(expected_bits > all_available_bits) rate_factor -= step;
1922 x264_free(blurred_qscale);
1926 expected_bits = count_expected_bits(h);
1928 if(fabs(expected_bits/all_available_bits - 1.0) > 0.01)
1931 for(i=0; i<rcc->num_entries; i++)
1932 avgq += rcc->entry[i].new_qscale;
1933 avgq = qscale2qp(avgq / rcc->num_entries);
1935 if ((expected_bits > all_available_bits) || (!rcc->b_vbv))
1936 x264_log(h, X264_LOG_WARNING, "Error: 2pass curve failed to converge\n");
1937 x264_log(h, X264_LOG_WARNING, "target: %.2f kbit/s, expected: %.2f kbit/s, avg QP: %.4f\n",
1938 (float)h->param.rc.i_bitrate,
1939 expected_bits * rcc->fps / (rcc->num_entries * 1000.),
1941 if(expected_bits < all_available_bits && avgq < h->param.rc.i_qp_min + 2)
1943 if(h->param.rc.i_qp_min > 0)
1944 x264_log(h, X264_LOG_WARNING, "try reducing target bitrate or reducing qp_min (currently %d)\n", h->param.rc.i_qp_min);
1946 x264_log(h, X264_LOG_WARNING, "try reducing target bitrate\n");
1948 else if(expected_bits > all_available_bits && avgq > h->param.rc.i_qp_max - 2)
1950 if(h->param.rc.i_qp_max < 51)
1951 x264_log(h, X264_LOG_WARNING, "try increasing target bitrate or increasing qp_max (currently %d)\n", h->param.rc.i_qp_max);
1953 x264_log(h, X264_LOG_WARNING, "try increasing target bitrate\n");
1955 else if(!(rcc->b_2pass && rcc->b_vbv))
1956 x264_log(h, X264_LOG_WARNING, "internal error\n");