1 /***************************************************-*- coding: iso-8859-1 -*-
2 * ratecontrol.c: h264 encoder library (Rate Control)
3 *****************************************************************************
4 * Copyright (C) 2005-2008 x264 project
6 * Authors: Loren Merritt <lorenm@u.washington.edu>
7 * Michael Niedermayer <michaelni@gmx.at>
8 * Gabriel Bouvigne <gabriel.bouvigne@joost.com>
9 * Fiona Glaser <fiona@x264.com>
10 * Måns Rullgård <mru@mru.ath.cx>
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
25 *****************************************************************************/
27 #define _ISOC99_SOURCE
28 #undef NDEBUG // always check asserts, the speed effect is far too small to disable them
33 #include "common/common.h"
34 #include "common/cpu.h"
35 #include "ratecontrol.h"
45 uint64_t expected_bits; /*total expected bits up to the current frame (current one excluded)*/
52 float blurred_complexity;
54 } ratecontrol_entry_t;
63 struct x264_ratecontrol_t
72 double rate_tolerance;
73 int nmb; /* number of macroblocks in a frame */
77 ratecontrol_entry_t *rce;
78 int qp; /* qp for current frame */
79 int qpm; /* qp for current macroblock */
80 float f_qpm; /* qp for current macroblock: precise float for AQ */
81 float qpa_rc; /* average of macroblocks' qp before aq */
82 float qpa_aq; /* average of macroblocks' qp after aq */
87 double buffer_fill_final; /* real buffer as of the last finished frame */
88 double buffer_fill; /* planned buffer, if all in-progress frames hit their bit budget */
89 double buffer_rate; /* # of bits added to buffer_fill after each frame */
90 predictor_t *pred; /* predict frame size from satd */
95 double cplxr_sum; /* sum of bits*qscale/rceq */
96 double expected_bits_sum; /* sum of qscale2bits after rceq, ratefactor, and overflow, only includes finished frames */
97 double wanted_bits_window; /* target bitrate * window */
99 double short_term_cplxsum;
100 double short_term_cplxcount;
101 double rate_factor_constant;
106 FILE *p_stat_file_out;
107 char *psz_stat_file_tmpname;
109 int num_entries; /* number of ratecontrol_entry_ts */
110 ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
112 double last_qscale_for[5]; /* last qscale for a specific pict type, used for max_diff & ipb factor stuff */
113 int last_non_b_pict_type;
114 double accum_p_qp; /* for determining I-frame quant */
116 double last_accum_p_norm;
117 double lmin[5]; /* min qscale by frame type */
119 double lstep; /* max change (multiply) in qscale per frame */
122 double frame_size_estimated;
123 double frame_size_planned;
124 predictor_t *row_pred;
125 predictor_t row_preds[5];
126 predictor_t *pred_b_from_p; /* predict B-frame size from P-frame satd */
127 int bframes; /* # consecutive B-frames before this P-frame */
128 int bframe_bits; /* total cost of those frames */
132 x264_zone_t *prev_zone;
136 static int parse_zones( x264_t *h );
137 static int init_pass2(x264_t *);
138 static float rate_estimate_qscale( x264_t *h );
139 static void update_vbv( x264_t *h, int bits );
140 static void update_vbv_plan( x264_t *h );
141 static double predict_size( predictor_t *p, double q, double var );
142 static void update_predictor( predictor_t *p, double q, double var, double bits );
145 * qp = h.264's quantizer
146 * qscale = linearized quantizer = Lagrange multiplier
148 static inline double qp2qscale(double qp)
150 return 0.85 * pow(2.0, ( qp - 12.0 ) / 6.0);
152 static inline double qscale2qp(double qscale)
154 return 12.0 + 6.0 * log(qscale/0.85) / log(2.0);
157 /* Texture bitrate is not quite inversely proportional to qscale,
158 * probably due the the changing number of SKIP blocks.
159 * MV bits level off at about qp<=12, because the lambda used
160 * for motion estimation is constant there. */
161 static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale)
165 return (rce->tex_bits + .1) * pow( rce->qscale / qscale, 1.1 )
166 + rce->mv_bits * pow( X264_MAX(rce->qscale, 1) / X264_MAX(qscale, 1), 0.5 )
170 // Find the total AC energy of the block in all planes.
171 static NOINLINE uint32_t ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame )
173 /* This function contains annoying hacks because GCC has a habit of reordering emms
174 * and putting it after floating point ops. As a result, we put the emms at the end of the
175 * function and make sure that its always called before the float math. Noinline makes
176 * sure no reordering goes on. */
178 for( i = 0; i < 3; i++ )
181 int stride = frame->i_stride[i];
182 int offset = h->mb.b_interlaced
183 ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride
184 : w * (mb_x + mb_y * stride);
185 int pix = i ? PIXEL_8x8 : PIXEL_16x16;
186 stride <<= h->mb.b_interlaced;
187 var += h->pixf.var[pix]( frame->plane[i]+offset, stride );
193 static const float log2_lut[128] = {
194 0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
195 0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
196 0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
197 0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
198 0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
199 0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
200 0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
201 0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
202 0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
203 0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
204 0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
205 0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
206 0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
207 0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
208 0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
209 0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
212 static const uint8_t exp2_lut[64] = {
213 1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 44, 47,
214 50, 53, 57, 60, 64, 67, 71, 74, 78, 81, 85, 89, 93, 96, 100, 104,
215 108, 112, 116, 120, 124, 128, 132, 137, 141, 145, 150, 154, 159, 163, 168, 172,
216 177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
219 static ALWAYS_INLINE float x264_log2( uint32_t x )
221 int lz = x264_clz( x );
222 return log2_lut[(x<<lz>>24)&0x7f] + (31 - lz);
225 static ALWAYS_INLINE int x264_exp2fix8( float x )
229 if( x <= 0 ) return 0;
230 if( x >= 16 ) return 0xffff;
233 return (exp2_lut[f]+256) << i >> 8;
236 void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
238 /* constants chosen to result in approximately the same overall bitrate as without AQ.
239 * FIXME: while they're written in 5 significant digits, they're only tuned to 2. */
243 if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
245 for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
246 for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
248 uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame );
249 float qp_adj = x264_log2( energy + 2 );
251 frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
254 avg_adj /= h->mb.i_mb_count;
255 strength = h->param.rc.f_aq_strength * avg_adj * (1.f / 6000.f);
258 strength = h->param.rc.f_aq_strength * 1.0397f;
259 for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
260 for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
263 if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
265 qp_adj = frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride];
266 qp_adj = strength * (qp_adj - avg_adj);
270 uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame );
271 qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - 14.427f);
273 frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
274 if( h->frames.b_have_lowres )
275 frame->i_inv_qscale_factor[mb_x + mb_y*h->mb.i_mb_stride] = x264_exp2fix8(qp_adj*(-1.f/6.f));
280 /*****************************************************************************
281 * x264_adaptive_quant:
282 * adjust macroblock QP based on variance (AC energy) of the MB.
283 * high variance = higher QP
284 * low variance = lower QP
285 * This generally increases SSIM and lowers PSNR.
286 *****************************************************************************/
287 void x264_adaptive_quant( x264_t *h )
290 h->mb.i_qp = x264_clip3( h->rc->f_qpm + h->fenc->f_qp_offset[h->mb.i_mb_xy] + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
293 int x264_ratecontrol_new( x264_t *h )
295 x264_ratecontrol_t *rc;
300 rc = h->rc = x264_malloc( h->param.i_threads * sizeof(x264_ratecontrol_t) );
301 memset( rc, 0, h->param.i_threads * sizeof(x264_ratecontrol_t) );
303 rc->b_abr = h->param.rc.i_rc_method != X264_RC_CQP && !h->param.rc.b_stat_read;
304 rc->b_2pass = h->param.rc.i_rc_method == X264_RC_ABR && h->param.rc.b_stat_read;
306 /* FIXME: use integers */
307 if(h->param.i_fps_num > 0 && h->param.i_fps_den > 0)
308 rc->fps = (float) h->param.i_fps_num / h->param.i_fps_den;
312 rc->bitrate = h->param.rc.i_bitrate * 1000.;
313 rc->rate_tolerance = h->param.rc.f_rate_tolerance;
314 rc->nmb = h->mb.i_mb_count;
315 rc->last_non_b_pict_type = -1;
318 if( h->param.rc.i_rc_method == X264_RC_CRF && h->param.rc.b_stat_read )
320 x264_log(h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n");
323 if( h->param.rc.i_vbv_buffer_size )
325 if( h->param.rc.i_rc_method == X264_RC_CQP )
327 x264_log(h, X264_LOG_WARNING, "VBV is incompatible with constant QP, ignored.\n");
328 h->param.rc.i_vbv_max_bitrate = 0;
329 h->param.rc.i_vbv_buffer_size = 0;
331 else if( h->param.rc.i_vbv_max_bitrate == 0 )
333 x264_log( h, X264_LOG_DEBUG, "VBV maxrate unspecified, assuming CBR\n" );
334 h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
337 if( h->param.rc.i_vbv_max_bitrate < h->param.rc.i_bitrate &&
338 h->param.rc.i_vbv_max_bitrate > 0)
339 x264_log(h, X264_LOG_WARNING, "max bitrate less than average bitrate, ignored.\n");
340 else if( h->param.rc.i_vbv_max_bitrate > 0 &&
341 h->param.rc.i_vbv_buffer_size > 0 )
343 if( h->param.rc.i_vbv_buffer_size < 3 * h->param.rc.i_vbv_max_bitrate / rc->fps )
345 h->param.rc.i_vbv_buffer_size = 3 * h->param.rc.i_vbv_max_bitrate / rc->fps;
346 x264_log( h, X264_LOG_WARNING, "VBV buffer size too small, using %d kbit\n",
347 h->param.rc.i_vbv_buffer_size );
349 if( h->param.rc.f_vbv_buffer_init > 1. )
350 h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init / h->param.rc.i_vbv_buffer_size, 0, 1 );
351 rc->buffer_rate = h->param.rc.i_vbv_max_bitrate * 1000. / rc->fps;
352 rc->buffer_size = h->param.rc.i_vbv_buffer_size * 1000.;
353 rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
354 rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size
355 * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate);
357 rc->b_vbv_min_rate = !rc->b_2pass
358 && h->param.rc.i_rc_method == X264_RC_ABR
359 && h->param.rc.i_vbv_max_bitrate <= h->param.rc.i_bitrate;
361 else if( h->param.rc.i_vbv_max_bitrate )
363 x264_log(h, X264_LOG_WARNING, "VBV maxrate specified, but no bufsize.\n");
364 h->param.rc.i_vbv_max_bitrate = 0;
366 if(rc->rate_tolerance < 0.01)
368 x264_log(h, X264_LOG_WARNING, "bitrate tolerance too small, using .01\n");
369 rc->rate_tolerance = 0.01;
372 h->mb.b_variable_qp = rc->b_vbv || h->param.rc.i_aq_mode;
376 /* FIXME ABR_INIT_QP is actually used only in CRF */
377 #define ABR_INIT_QP ( h->param.rc.i_rc_method == X264_RC_CRF ? h->param.rc.f_rf_constant : 24 )
378 rc->accum_p_norm = .01;
379 rc->accum_p_qp = ABR_INIT_QP * rc->accum_p_norm;
380 /* estimated ratio that produces a reasonable QP for the first I-frame */
381 rc->cplxr_sum = .01 * pow( 7.0e5, h->param.rc.f_qcompress ) * pow( h->mb.i_mb_count, 0.5 );
382 rc->wanted_bits_window = 1.0 * rc->bitrate / rc->fps;
383 rc->last_non_b_pict_type = SLICE_TYPE_I;
386 if( h->param.rc.i_rc_method == X264_RC_CRF )
388 /* arbitrary rescaling to make CRF somewhat similar to QP */
389 double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
390 rc->rate_factor_constant = pow( base_cplx, 1 - h->param.rc.f_qcompress )
391 / qp2qscale( h->param.rc.f_rf_constant );
394 rc->ip_offset = 6.0 * log(h->param.rc.f_ip_factor) / log(2.0);
395 rc->pb_offset = 6.0 * log(h->param.rc.f_pb_factor) / log(2.0);
396 rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
397 rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant - rc->ip_offset + 0.5, 0, 51 );
398 rc->qp_constant[SLICE_TYPE_B] = x264_clip3( h->param.rc.i_qp_constant + rc->pb_offset + 0.5, 0, 51 );
400 rc->lstep = pow( 2, h->param.rc.i_qp_step / 6.0 );
401 rc->last_qscale = qp2qscale(26);
402 rc->pred = x264_malloc( 5*sizeof(predictor_t) );
403 rc->pred_b_from_p = x264_malloc( sizeof(predictor_t) );
404 for( i = 0; i < 5; i++ )
406 rc->last_qscale_for[i] = qp2qscale( ABR_INIT_QP );
407 rc->lmin[i] = qp2qscale( h->param.rc.i_qp_min );
408 rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max );
409 rc->pred[i].coeff= 2.0;
410 rc->pred[i].count= 1.0;
411 rc->pred[i].decay= 0.5;
412 rc->row_preds[i].coeff= .25;
413 rc->row_preds[i].count= 1.0;
414 rc->row_preds[i].decay= 0.5;
416 *rc->pred_b_from_p = rc->pred[0];
418 if( parse_zones( h ) < 0 )
420 x264_log( h, X264_LOG_ERROR, "failed to parse zones\n" );
424 /* Load stat file and init 2pass algo */
425 if( h->param.rc.b_stat_read )
427 char *p, *stats_in, *stats_buf;
429 /* read 1st pass stats */
430 assert( h->param.rc.psz_stat_in );
431 stats_buf = stats_in = x264_slurp_file( h->param.rc.psz_stat_in );
434 x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
438 /* check whether 1st pass options were compatible with current options */
439 if( !strncmp( stats_buf, "#options:", 9 ) )
442 char *opts = stats_buf;
443 stats_in = strchr( stats_buf, '\n' );
449 if( ( p = strstr( opts, "bframes=" ) ) && sscanf( p, "bframes=%d", &i )
450 && h->param.i_bframe != i )
452 x264_log( h, X264_LOG_ERROR, "different number of B-frames than 1st pass (%d vs %d)\n",
453 h->param.i_bframe, i );
457 /* since B-adapt doesn't (yet) take into account B-pyramid,
458 * the converse is not a problem */
459 if( strstr( opts, "b_pyramid=1" ) && !h->param.b_bframe_pyramid )
460 x264_log( h, X264_LOG_WARNING, "1st pass used B-pyramid, 2nd doesn't\n" );
462 if( ( p = strstr( opts, "keyint=" ) ) && sscanf( p, "keyint=%d", &i )
463 && h->param.i_keyint_max != i )
464 x264_log( h, X264_LOG_WARNING, "different keyint than 1st pass (%d vs %d)\n",
465 h->param.i_keyint_max, i );
467 if( strstr( opts, "qp=0" ) && h->param.rc.i_rc_method == X264_RC_ABR )
468 x264_log( h, X264_LOG_WARNING, "1st pass was lossless, bitrate prediction will be inaccurate\n" );
470 if( !strstr( opts, "direct=3" ) && h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO )
472 x264_log( h, X264_LOG_WARNING, "direct=auto not used on the first pass\n" );
473 h->mb.b_direct_auto_write = 1;
476 if( ( p = strstr( opts, "b_adapt=" ) ) && sscanf( p, "b_adapt=%d", &i ) && i >= X264_B_ADAPT_NONE && i <= X264_B_ADAPT_TRELLIS )
477 h->param.i_bframe_adaptive = i;
478 else if( h->param.i_bframe )
480 x264_log( h, X264_LOG_ERROR, "b_adapt method specified in stats file not valid\n" );
485 /* find number of pics */
488 p = strchr(p+1, ';');
491 x264_log(h, X264_LOG_ERROR, "empty stats file\n");
496 if( h->param.i_frame_total < rc->num_entries && h->param.i_frame_total > 0 )
498 x264_log( h, X264_LOG_WARNING, "2nd pass has fewer frames than 1st pass (%d vs %d)\n",
499 h->param.i_frame_total, rc->num_entries );
501 if( h->param.i_frame_total > rc->num_entries )
503 x264_log( h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d vs %d)\n",
504 h->param.i_frame_total, rc->num_entries );
508 rc->entry = (ratecontrol_entry_t*) x264_malloc(rc->num_entries * sizeof(ratecontrol_entry_t));
509 memset(rc->entry, 0, rc->num_entries * sizeof(ratecontrol_entry_t));
511 /* init all to skipped p frames */
512 for(i=0; i<rc->num_entries; i++)
514 ratecontrol_entry_t *rce = &rc->entry[i];
515 rce->pict_type = SLICE_TYPE_P;
516 rce->qscale = rce->new_qscale = qp2qscale(20);
517 rce->misc_bits = rc->nmb + 10;
523 for(i=0; i < rc->num_entries; i++)
525 ratecontrol_entry_t *rce;
532 next= strchr(p, ';');
535 (*next)=0; //sscanf is unbelievably slow on long strings
538 e = sscanf(p, " in:%d ", &frame_number);
540 if(frame_number < 0 || frame_number >= rc->num_entries)
542 x264_log(h, X264_LOG_ERROR, "bad frame number (%d) at stats line %d\n", frame_number, i);
545 rce = &rc->entry[frame_number];
546 rce->direct_mode = 0;
548 e += sscanf(p, " in:%*d out:%*d type:%c q:%f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c",
549 &pict_type, &qp, &rce->tex_bits,
550 &rce->mv_bits, &rce->misc_bits, &rce->i_count, &rce->p_count,
551 &rce->s_count, &rce->direct_mode);
555 case 'I': rce->kept_as_ref = 1;
556 case 'i': rce->pict_type = SLICE_TYPE_I; break;
557 case 'P': rce->pict_type = SLICE_TYPE_P; break;
558 case 'B': rce->kept_as_ref = 1;
559 case 'b': rce->pict_type = SLICE_TYPE_B; break;
560 default: e = -1; break;
564 x264_log(h, X264_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e);
567 rce->qscale = qp2qscale(qp);
571 x264_free(stats_buf);
573 if(h->param.rc.i_rc_method == X264_RC_ABR)
575 if(init_pass2(h) < 0) return -1;
576 } /* else we're using constant quant, so no need to run the bitrate allocation */
579 /* Open output file */
580 /* If input and output files are the same, output to a temp file
581 * and move it to the real name only when it's complete */
582 if( h->param.rc.b_stat_write )
586 rc->psz_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 6 );
587 strcpy( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
588 strcat( rc->psz_stat_file_tmpname, ".temp" );
590 rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
591 if( rc->p_stat_file_out == NULL )
593 x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
597 p = x264_param2string( &h->param, 1 );
598 fprintf( rc->p_stat_file_out, "#options: %s\n", p );
602 for( i=0; i<h->param.i_threads; i++ )
604 h->thread[i]->rc = rc+i;
608 memcpy( &h->thread[i]->param, &h->param, sizeof( x264_param_t ) );
609 h->thread[i]->mb.b_variable_qp = h->mb.b_variable_qp;
616 static int parse_zone( x264_t *h, x264_zone_t *z, char *p )
619 char *tok, UNUSED *saveptr;
621 z->f_bitrate_factor = 1;
622 if( 3 <= sscanf(p, "%u,%u,q=%u%n", &z->i_start, &z->i_end, &z->i_qp, &len) )
624 else if( 3 <= sscanf(p, "%u,%u,b=%f%n", &z->i_start, &z->i_end, &z->f_bitrate_factor, &len) )
626 else if( 2 <= sscanf(p, "%u,%u%n", &z->i_start, &z->i_end, &len) )
630 x264_log( h, X264_LOG_ERROR, "invalid zone: \"%s\"\n", p );
636 z->param = x264_malloc( sizeof(x264_param_t) );
637 memcpy( z->param, &h->param, sizeof(x264_param_t) );
638 while( (tok = strtok_r( p, ",", &saveptr )) )
640 char *val = strchr( tok, '=' );
646 if( x264_param_parse( z->param, tok, val ) )
648 x264_log( h, X264_LOG_ERROR, "invalid zone param: %s = %s\n", tok, val );
656 static int parse_zones( x264_t *h )
658 x264_ratecontrol_t *rc = h->rc;
660 if( h->param.rc.psz_zones && !h->param.rc.i_zones )
662 char *p, *tok, UNUSED *saveptr;
663 char *psz_zones = x264_malloc( strlen(h->param.rc.psz_zones)+1 );
664 strcpy( psz_zones, h->param.rc.psz_zones );
665 h->param.rc.i_zones = 1;
666 for( p = psz_zones; *p; p++ )
667 h->param.rc.i_zones += (*p == '/');
668 h->param.rc.zones = x264_malloc( h->param.rc.i_zones * sizeof(x264_zone_t) );
670 for( i = 0; i < h->param.rc.i_zones; i++ )
672 tok = strtok_r( p, "/", &saveptr );
673 if( !tok || parse_zone( h, &h->param.rc.zones[i], tok ) )
677 x264_free( psz_zones );
680 if( h->param.rc.i_zones > 0 )
682 for( i = 0; i < h->param.rc.i_zones; i++ )
684 x264_zone_t z = h->param.rc.zones[i];
685 if( z.i_start < 0 || z.i_start > z.i_end )
687 x264_log( h, X264_LOG_ERROR, "invalid zone: start=%d end=%d\n",
688 z.i_start, z.i_end );
691 else if( !z.b_force_qp && z.f_bitrate_factor <= 0 )
693 x264_log( h, X264_LOG_ERROR, "invalid zone: bitrate_factor=%f\n",
694 z.f_bitrate_factor );
699 rc->i_zones = h->param.rc.i_zones + 1;
700 rc->zones = x264_malloc( rc->i_zones * sizeof(x264_zone_t) );
701 memcpy( rc->zones+1, h->param.rc.zones, (rc->i_zones-1) * sizeof(x264_zone_t) );
703 // default zone to fall back to if none of the others match
704 rc->zones[0].i_start = 0;
705 rc->zones[0].i_end = INT_MAX;
706 rc->zones[0].b_force_qp = 0;
707 rc->zones[0].f_bitrate_factor = 1;
708 rc->zones[0].param = x264_malloc( sizeof(x264_param_t) );
709 memcpy( rc->zones[0].param, &h->param, sizeof(x264_param_t) );
710 for( i = 1; i < rc->i_zones; i++ )
712 if( !rc->zones[i].param )
713 rc->zones[i].param = rc->zones[0].param;
720 static x264_zone_t *get_zone( x264_t *h, int frame_num )
723 for( i = h->rc->i_zones-1; i >= 0; i-- )
725 x264_zone_t *z = &h->rc->zones[i];
726 if( frame_num >= z->i_start && frame_num <= z->i_end )
732 void x264_ratecontrol_summary( x264_t *h )
734 x264_ratecontrol_t *rc = h->rc;
735 if( rc->b_abr && h->param.rc.i_rc_method == X264_RC_ABR && rc->cbr_decay > .9999 )
737 double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
738 x264_log( h, X264_LOG_INFO, "final ratefactor: %.2f\n",
739 qscale2qp( pow( base_cplx, 1 - h->param.rc.f_qcompress )
740 * rc->cplxr_sum / rc->wanted_bits_window ) );
744 void x264_ratecontrol_delete( x264_t *h )
746 x264_ratecontrol_t *rc = h->rc;
749 if( rc->p_stat_file_out )
751 fclose( rc->p_stat_file_out );
752 if( h->i_frame >= rc->num_entries )
753 if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
755 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
756 rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
758 x264_free( rc->psz_stat_file_tmpname );
760 x264_free( rc->pred );
761 x264_free( rc->pred_b_from_p );
762 x264_free( rc->entry );
765 x264_free( rc->zones[0].param );
766 if( h->param.rc.psz_zones )
767 for( i=1; i<rc->i_zones; i++ )
768 if( rc->zones[i].param != rc->zones[0].param )
769 x264_free( rc->zones[i].param );
770 x264_free( rc->zones );
775 void x264_ratecontrol_set_estimated_size( x264_t *h, int bits )
777 x264_pthread_mutex_lock( &h->fenc->mutex );
778 h->rc->frame_size_estimated = bits;
779 x264_pthread_mutex_unlock( &h->fenc->mutex );
782 int x264_ratecontrol_get_estimated_size( x264_t const *h)
785 x264_pthread_mutex_lock( &h->fenc->mutex );
786 size = h->rc->frame_size_estimated;
787 x264_pthread_mutex_unlock( &h->fenc->mutex );
791 static void accum_p_qp_update( x264_t *h, float qp )
793 x264_ratecontrol_t *rc = h->rc;
794 rc->accum_p_qp *= .95;
795 rc->accum_p_norm *= .95;
796 rc->accum_p_norm += 1;
797 if( h->sh.i_type == SLICE_TYPE_I )
798 rc->accum_p_qp += qp + rc->ip_offset;
800 rc->accum_p_qp += qp;
803 /* Before encoding a frame, choose a QP for it */
804 void x264_ratecontrol_start( x264_t *h, int i_force_qp )
806 x264_ratecontrol_t *rc = h->rc;
807 ratecontrol_entry_t *rce = NULL;
808 x264_zone_t *zone = get_zone( h, h->fenc->i_frame );
813 if( zone && (!rc->prev_zone || zone->param != rc->prev_zone->param) )
814 x264_encoder_reconfig( h, zone->param );
815 rc->prev_zone = zone;
817 rc->qp_force = i_force_qp;
819 if( h->param.rc.b_stat_read )
821 int frame = h->fenc->i_frame;
822 assert( frame >= 0 && frame < rc->num_entries );
823 rce = h->rc->rce = &h->rc->entry[frame];
825 if( h->sh.i_type == SLICE_TYPE_B
826 && h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO )
828 h->sh.b_direct_spatial_mv_pred = ( rce->direct_mode == 's' );
829 h->mb.b_direct_auto_read = ( rce->direct_mode == 's' || rce->direct_mode == 't' );
835 memset( h->fdec->i_row_bits, 0, h->sps->i_mb_height * sizeof(int) );
836 rc->row_pred = &rc->row_preds[h->sh.i_type];
837 update_vbv_plan( h );
840 if( h->sh.i_type != SLICE_TYPE_B )
843 while( h->frames.current[rc->bframes] && IS_X264_TYPE_B(h->frames.current[rc->bframes]->i_type) )
853 q = qscale2qp( rate_estimate_qscale( h ) );
855 else if( rc->b_2pass )
857 rce->new_qscale = rate_estimate_qscale( h );
858 q = qscale2qp( rce->new_qscale );
862 if( h->sh.i_type == SLICE_TYPE_B && h->fdec->b_kept_as_ref )
863 q = ( rc->qp_constant[ SLICE_TYPE_B ] + rc->qp_constant[ SLICE_TYPE_P ] ) / 2;
865 q = rc->qp_constant[ h->sh.i_type ];
869 if( zone->b_force_qp )
870 q += zone->i_qp - rc->qp_constant[SLICE_TYPE_P];
872 q -= 6*log(zone->f_bitrate_factor)/log(2);
878 h->fdec->f_qp_avg_rc =
879 h->fdec->f_qp_avg_aq =
881 rc->qp = x264_clip3( (int)(q + 0.5), 0, 51 );
884 rce->new_qp = rc->qp;
886 accum_p_qp_update( h, rc->qp );
888 if( h->sh.i_type != SLICE_TYPE_B )
889 rc->last_non_b_pict_type = h->sh.i_type;
892 static double predict_row_size( x264_t *h, int y, int qp )
894 /* average between two predictors:
895 * absolute SATD, and scaled bit cost of the colocated row in the previous frame */
896 x264_ratecontrol_t *rc = h->rc;
897 double pred_s = predict_size( rc->row_pred, qp2qscale(qp), h->fdec->i_row_satd[y] );
899 if( h->sh.i_type != SLICE_TYPE_I
900 && h->fref0[0]->i_type == h->fdec->i_type
901 && h->fref0[0]->i_row_satd[y] > 0
902 && (abs(h->fref0[0]->i_row_satd[y] - h->fdec->i_row_satd[y]) < h->fdec->i_row_satd[y]/2))
904 pred_t = h->fref0[0]->i_row_bits[y] * h->fdec->i_row_satd[y] / h->fref0[0]->i_row_satd[y]
905 * qp2qscale(h->fref0[0]->i_row_qp[y]) / qp2qscale(qp);
910 return (pred_s + pred_t) / 2;
913 static double row_bits_so_far( x264_t *h, int y )
917 for( i = 0; i <= y; i++ )
918 bits += h->fdec->i_row_bits[i];
922 static double predict_row_size_sum( x264_t *h, int y, int qp )
925 double bits = row_bits_so_far(h, y);
926 for( i = y+1; i < h->sps->i_mb_height; i++ )
927 bits += predict_row_size( h, i, qp );
932 void x264_ratecontrol_mb( x264_t *h, int bits )
934 x264_ratecontrol_t *rc = h->rc;
935 const int y = h->mb.i_mb_y;
939 h->fdec->i_row_bits[y] += bits;
940 rc->qpa_rc += rc->f_qpm;
941 rc->qpa_aq += h->mb.i_qp;
943 if( h->mb.i_mb_x != h->sps->i_mb_width - 1 || !rc->b_vbv)
946 h->fdec->i_row_qp[y] = rc->qpm;
948 if( h->sh.i_type == SLICE_TYPE_B )
950 /* B-frames shouldn't use lower QP than their reference frames.
951 * This code is a bit overzealous in limiting B-frame quantizers, but it helps avoid
952 * underflows due to the fact that B-frames are not explicitly covered by VBV. */
953 if( y < h->sps->i_mb_height-1 )
956 int avg_qp = X264_MAX(h->fref0[0]->i_row_qp[y+1], h->fref1[0]->i_row_qp[y+1])
957 + rc->pb_offset * ((h->fenc->i_type == X264_TYPE_BREF) ? 0.5 : 1);
958 rc->qpm = X264_MIN(X264_MAX( rc->qp, avg_qp), 51); //avg_qp could go higher than 51 due to pb_offset
959 i_estimated = row_bits_so_far(h, y); //FIXME: compute full estimated size
960 if (i_estimated > h->rc->frame_size_planned)
961 x264_ratecontrol_set_estimated_size(h, i_estimated);
966 update_predictor( rc->row_pred, qp2qscale(rc->qpm), h->fdec->i_row_satd[y], h->fdec->i_row_bits[y] );
968 /* tweak quality based on difference from predicted size */
969 if( y < h->sps->i_mb_height-1 && h->stat.i_slice_count[h->sh.i_type] > 0 )
971 int prev_row_qp = h->fdec->i_row_qp[y];
972 int b0 = predict_row_size_sum( h, y, rc->qpm );
974 int i_qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, h->param.rc.i_qp_max );
975 int i_qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min );
976 float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
980 /* Don't modify the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */
981 /* area at the top of the frame was measured inaccurately. */
982 if(row_bits_so_far(h,y) < 0.05 * rc->frame_size_planned)
985 headroom = buffer_left_planned/rc->buffer_size;
986 if(h->sh.i_type != SLICE_TYPE_I)
990 if( !rc->b_vbv_min_rate )
991 i_qp_min = X264_MAX( i_qp_min, h->sh.i_qp );
993 while( rc->qpm < i_qp_max
994 && (b1 > rc->frame_size_planned * rc_tol
995 || (rc->buffer_fill - b1 < buffer_left_planned * 0.5)))
998 b1 = predict_row_size_sum( h, y, rc->qpm );
1001 /* avoid VBV underflow */
1002 while( (rc->qpm < h->param.rc.i_qp_max)
1003 && (rc->buffer_fill - b1 < rc->buffer_size * 0.005))
1006 b1 = predict_row_size_sum( h, y, rc->qpm );
1009 while( rc->qpm > i_qp_min
1010 && rc->qpm > h->fdec->i_row_qp[0]
1011 && ((b1 < rc->frame_size_planned * 0.8 && rc->qpm <= prev_row_qp)
1012 || b1 < (rc->buffer_fill - rc->buffer_size + rc->buffer_rate) * 1.1) )
1015 b1 = predict_row_size_sum( h, y, rc->qpm );
1017 x264_ratecontrol_set_estimated_size(h, b1);
1020 /* loses the fractional part of the frame-wise qp */
1021 rc->f_qpm = rc->qpm;
1024 int x264_ratecontrol_qp( x264_t *h )
1029 /* In 2pass, force the same frame types as in the 1st pass */
1030 int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
1032 x264_ratecontrol_t *rc = h->rc;
1033 if( h->param.rc.b_stat_read )
1035 if( frame_num >= rc->num_entries )
1037 /* We could try to initialize everything required for ABR and
1038 * adaptive B-frames, but that would be complicated.
1039 * So just calculate the average QP used so far. */
1042 h->param.rc.i_qp_constant = (h->stat.i_slice_count[SLICE_TYPE_P] == 0) ? 24
1043 : 1 + h->stat.f_slice_qp[SLICE_TYPE_P] / h->stat.i_slice_count[SLICE_TYPE_P];
1044 rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
1045 rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
1046 rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
1048 x264_log(h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries);
1049 x264_log(h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant);
1050 if( h->param.i_bframe_adaptive )
1051 x264_log(h, X264_LOG_ERROR, "disabling adaptive B-frames\n");
1053 for( i = 0; i < h->param.i_threads; i++ )
1055 h->thread[i]->rc->b_abr = 0;
1056 h->thread[i]->rc->b_2pass = 0;
1057 h->thread[i]->param.rc.i_rc_method = X264_RC_CQP;
1058 h->thread[i]->param.rc.b_stat_read = 0;
1059 h->thread[i]->param.i_bframe_adaptive = 0;
1060 h->thread[i]->param.i_scenecut_threshold = 0;
1061 if( h->thread[i]->param.i_bframe > 1 )
1062 h->thread[i]->param.i_bframe = 1;
1064 return X264_TYPE_AUTO;
1066 switch( rc->entry[frame_num].pict_type )
1069 return rc->entry[frame_num].kept_as_ref ? X264_TYPE_IDR : X264_TYPE_I;
1072 return rc->entry[frame_num].kept_as_ref ? X264_TYPE_BREF : X264_TYPE_B;
1081 return X264_TYPE_AUTO;
1085 /* After encoding one frame, save stats and update ratecontrol state */
1086 void x264_ratecontrol_end( x264_t *h, int bits )
1088 x264_ratecontrol_t *rc = h->rc;
1089 const int *mbs = h->stat.frame.i_mb_count;
1094 h->stat.frame.i_mb_count_skip = mbs[P_SKIP] + mbs[B_SKIP];
1095 h->stat.frame.i_mb_count_i = mbs[I_16x16] + mbs[I_8x8] + mbs[I_4x4];
1096 h->stat.frame.i_mb_count_p = mbs[P_L0] + mbs[P_8x8];
1097 for( i = B_DIRECT; i < B_8x8; i++ )
1098 h->stat.frame.i_mb_count_p += mbs[i];
1100 h->fdec->f_qp_avg_rc = rc->qpa_rc /= h->mb.i_mb_count;
1101 h->fdec->f_qp_avg_aq = rc->qpa_aq /= h->mb.i_mb_count;
1103 if( h->param.rc.b_stat_write )
1105 char c_type = h->sh.i_type==SLICE_TYPE_I ? (h->fenc->i_poc==0 ? 'I' : 'i')
1106 : h->sh.i_type==SLICE_TYPE_P ? 'P'
1107 : h->fenc->b_kept_as_ref ? 'B' : 'b';
1108 int dir_frame = h->stat.frame.i_direct_score[1] - h->stat.frame.i_direct_score[0];
1109 int dir_avg = h->stat.i_direct_score[1] - h->stat.i_direct_score[0];
1110 char c_direct = h->mb.b_direct_auto_write ?
1111 ( dir_frame>0 ? 's' : dir_frame<0 ? 't' :
1112 dir_avg>0 ? 's' : dir_avg<0 ? 't' : '-' )
1114 fprintf( rc->p_stat_file_out,
1115 "in:%d out:%d type:%c q:%.2f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c;\n",
1116 h->fenc->i_frame, h->i_frame,
1118 h->stat.frame.i_tex_bits,
1119 h->stat.frame.i_mv_bits,
1120 h->stat.frame.i_misc_bits,
1121 h->stat.frame.i_mb_count_i,
1122 h->stat.frame.i_mb_count_p,
1123 h->stat.frame.i_mb_count_skip,
1129 if( h->sh.i_type != SLICE_TYPE_B )
1130 rc->cplxr_sum += bits * qp2qscale(rc->qpa_rc) / rc->last_rceq;
1133 /* Depends on the fact that B-frame's QP is an offset from the following P-frame's.
1134 * Not perfectly accurate with B-refs, but good enough. */
1135 rc->cplxr_sum += bits * qp2qscale(rc->qpa_rc) / (rc->last_rceq * fabs(h->param.rc.f_pb_factor));
1137 rc->cplxr_sum *= rc->cbr_decay;
1138 rc->wanted_bits_window += rc->bitrate / rc->fps;
1139 rc->wanted_bits_window *= rc->cbr_decay;
1144 rc->expected_bits_sum += qscale2bits( rc->rce, qp2qscale(rc->rce->new_qp) );
1147 if( h->mb.b_variable_qp )
1149 if( h->sh.i_type == SLICE_TYPE_B )
1151 rc->bframe_bits += bits;
1152 if( !h->frames.current[0] || !IS_X264_TYPE_B(h->frames.current[0]->i_type) )
1154 update_predictor( rc->pred_b_from_p, qp2qscale(rc->qpa_rc),
1155 h->fref1[h->i_ref1-1]->i_satd, rc->bframe_bits / rc->bframes );
1156 /* In some cases, such as completely blank scenes, pred_b_from_p can go nuts */
1157 /* Hackily cap the predictor coeff in case this happens. */
1158 /* FIXME FIXME FIXME */
1159 rc->pred_b_from_p->coeff = X264_MIN( rc->pred_b_from_p->coeff, 10. );
1160 rc->bframe_bits = 0;
1165 update_vbv( h, bits );
1168 /****************************************************************************
1170 ***************************************************************************/
1173 * modify the bitrate curve from pass1 for one frame
1175 static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor, int frame_num)
1177 x264_ratecontrol_t *rcc= h->rc;
1179 x264_zone_t *zone = get_zone( h, frame_num );
1181 q = pow( rce->blurred_complexity, 1 - h->param.rc.f_qcompress );
1183 // avoid NaN's in the rc_eq
1184 if(!isfinite(q) || rce->tex_bits + rce->mv_bits == 0)
1185 q = rcc->last_qscale;
1190 rcc->last_qscale = q;
1195 if( zone->b_force_qp )
1196 q = qp2qscale(zone->i_qp);
1198 q /= zone->f_bitrate_factor;
1204 static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
1206 x264_ratecontrol_t *rcc = h->rc;
1207 const int pict_type = rce->pict_type;
1209 // force I/B quants as a function of P quants
1210 const double last_p_q = rcc->last_qscale_for[SLICE_TYPE_P];
1211 const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type];
1212 if( pict_type == SLICE_TYPE_I )
1215 double pq = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
1216 double ip_factor = fabs( h->param.rc.f_ip_factor );
1217 /* don't apply ip_factor if the following frame is also I */
1218 if( rcc->accum_p_norm <= 0 )
1220 else if( h->param.rc.f_ip_factor < 0 )
1222 else if( rcc->accum_p_norm >= 1 )
1225 q = rcc->accum_p_norm * pq / ip_factor + (1 - rcc->accum_p_norm) * iq;
1227 else if( pict_type == SLICE_TYPE_B )
1229 if( h->param.rc.f_pb_factor > 0 )
1231 if( !rce->kept_as_ref )
1232 q *= fabs( h->param.rc.f_pb_factor );
1234 else if( pict_type == SLICE_TYPE_P
1235 && rcc->last_non_b_pict_type == SLICE_TYPE_P
1236 && rce->tex_bits == 0 )
1241 /* last qscale / qdiff stuff */
1242 if(rcc->last_non_b_pict_type==pict_type
1243 && (pict_type!=SLICE_TYPE_I || rcc->last_accum_p_norm < 1))
1245 double last_q = rcc->last_qscale_for[pict_type];
1246 double max_qscale = last_q * rcc->lstep;
1247 double min_qscale = last_q / rcc->lstep;
1249 if (q > max_qscale) q = max_qscale;
1250 else if(q < min_qscale) q = min_qscale;
1253 rcc->last_qscale_for[pict_type] = q;
1254 if(pict_type!=SLICE_TYPE_B)
1255 rcc->last_non_b_pict_type = pict_type;
1256 if(pict_type==SLICE_TYPE_I)
1258 rcc->last_accum_p_norm = rcc->accum_p_norm;
1259 rcc->accum_p_norm = 0;
1260 rcc->accum_p_qp = 0;
1262 if(pict_type==SLICE_TYPE_P)
1264 float mask = 1 - pow( (float)rce->i_count / rcc->nmb, 2 );
1265 rcc->accum_p_qp = mask * (qscale2qp(q) + rcc->accum_p_qp);
1266 rcc->accum_p_norm = mask * (1 + rcc->accum_p_norm);
1271 static double predict_size( predictor_t *p, double q, double var )
1273 return p->coeff*var / (q*p->count);
1276 static void update_predictor( predictor_t *p, double q, double var, double bits )
1280 p->count *= p->decay;
1281 p->coeff *= p->decay;
1283 p->coeff += bits*q / var;
1286 // update VBV after encoding a frame
1287 static void update_vbv( x264_t *h, int bits )
1289 x264_ratecontrol_t *rcc = h->rc;
1290 x264_ratecontrol_t *rct = h->thread[0]->rc;
1292 if( rcc->last_satd >= h->mb.i_mb_count )
1293 update_predictor( &rct->pred[h->sh.i_type], qp2qscale(rcc->qpa_rc), rcc->last_satd, bits );
1298 rct->buffer_fill_final += rct->buffer_rate - bits;
1299 if( rct->buffer_fill_final < 0 )
1300 x264_log( h, X264_LOG_WARNING, "VBV underflow (%.0f bits)\n", rct->buffer_fill_final );
1301 rct->buffer_fill_final = x264_clip3f( rct->buffer_fill_final, 0, rct->buffer_size );
1304 // provisionally update VBV according to the planned size of all frames currently in progress
1305 static void update_vbv_plan( x264_t *h )
1307 x264_ratecontrol_t *rcc = h->rc;
1308 rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final;
1309 if( h->param.i_threads > 1 )
1311 int j = h->rc - h->thread[0]->rc;
1313 for( i=1; i<h->param.i_threads; i++ )
1315 x264_t *t = h->thread[ (j+i)%h->param.i_threads ];
1316 double bits = t->rc->frame_size_planned;
1317 if( !t->b_thread_active )
1319 bits = X264_MAX(bits, x264_ratecontrol_get_estimated_size(t));
1320 rcc->buffer_fill += rcc->buffer_rate - bits;
1321 rcc->buffer_fill = x264_clip3( rcc->buffer_fill, 0, rcc->buffer_size );
1326 // apply VBV constraints and clip qscale to between lmin and lmax
1327 static double clip_qscale( x264_t *h, int pict_type, double q )
1329 x264_ratecontrol_t *rcc = h->rc;
1330 double lmin = rcc->lmin[pict_type];
1331 double lmax = rcc->lmax[pict_type];
1334 /* B-frames are not directly subject to VBV,
1335 * since they are controlled by the P-frames' QPs.
1336 * FIXME: in 2pass we could modify previous frames' QP too,
1337 * instead of waiting for the buffer to fill */
1339 ( pict_type == SLICE_TYPE_P ||
1340 ( pict_type == SLICE_TYPE_I && rcc->last_non_b_pict_type == SLICE_TYPE_I ) ) )
1342 if( rcc->buffer_fill/rcc->buffer_size < 0.5 )
1343 q /= x264_clip3f( 2.0*rcc->buffer_fill/rcc->buffer_size, 0.5, 1.0 );
1346 if( rcc->b_vbv && rcc->last_satd > 0 )
1348 /* Now a hard threshold to make sure the frame fits in VBV.
1349 * This one is mostly for I-frames. */
1350 double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
1352 if( bits > rcc->buffer_fill/2 )
1353 qf = x264_clip3f( rcc->buffer_fill/(2*bits), 0.2, 1.0 );
1356 if( bits < rcc->buffer_rate/2 )
1357 q *= bits*2/rcc->buffer_rate;
1358 q = X264_MAX( q0, q );
1360 /* Check B-frame complexity, and use up any bits that would
1361 * overflow before the next P-frame. */
1362 if( h->sh.i_type == SLICE_TYPE_P )
1364 int nb = rcc->bframes;
1365 double pbbits = bits;
1366 double bbits = predict_size( rcc->pred_b_from_p, q * h->param.rc.f_pb_factor, rcc->last_satd );
1369 if( bbits > rcc->buffer_rate )
1371 pbbits += nb * bbits;
1373 space = rcc->buffer_fill + (1+nb)*rcc->buffer_rate - rcc->buffer_size;
1374 if( pbbits < space )
1376 q *= X264_MAX( pbbits / space,
1377 bits / (0.5 * rcc->buffer_size) );
1379 q = X264_MAX( q0-5, q );
1382 if( !rcc->b_vbv_min_rate )
1383 q = X264_MAX( q0, q );
1388 else if(rcc->b_2pass)
1390 double min2 = log(lmin);
1391 double max2 = log(lmax);
1392 q = (log(q) - min2)/(max2-min2) - 0.5;
1393 q = 1.0/(1.0 + exp(-4*q));
1394 q = q*(max2-min2) + min2;
1398 return x264_clip3f(q, lmin, lmax);
1401 // update qscale for 1 frame based on actual bits used so far
1402 static float rate_estimate_qscale( x264_t *h )
1405 x264_ratecontrol_t *rcc = h->rc;
1406 ratecontrol_entry_t rce;
1407 int pict_type = h->sh.i_type;
1408 double lmin = rcc->lmin[pict_type];
1409 double lmax = rcc->lmax[pict_type];
1410 int64_t total_bits = 8*(h->stat.i_slice_size[SLICE_TYPE_I]
1411 + h->stat.i_slice_size[SLICE_TYPE_P]
1412 + h->stat.i_slice_size[SLICE_TYPE_B]);
1417 if(pict_type != rce.pict_type)
1419 x264_log(h, X264_LOG_ERROR, "slice=%c but 2pass stats say %c\n",
1420 slice_type_to_char[pict_type], slice_type_to_char[rce.pict_type]);
1424 if( pict_type == SLICE_TYPE_B )
1426 /* B-frames don't have independent ratecontrol, but rather get the
1427 * average QP of the two adjacent P-frames + an offset */
1429 int i0 = IS_X264_TYPE_I(h->fref0[0]->i_type);
1430 int i1 = IS_X264_TYPE_I(h->fref1[0]->i_type);
1431 int dt0 = abs(h->fenc->i_poc - h->fref0[0]->i_poc);
1432 int dt1 = abs(h->fenc->i_poc - h->fref1[0]->i_poc);
1433 float q0 = h->fref0[0]->f_qp_avg_rc;
1434 float q1 = h->fref1[0]->f_qp_avg_rc;
1436 if( h->fref0[0]->i_type == X264_TYPE_BREF )
1437 q0 -= rcc->pb_offset/2;
1438 if( h->fref1[0]->i_type == X264_TYPE_BREF )
1439 q1 -= rcc->pb_offset/2;
1442 q = (q0 + q1) / 2 + rcc->ip_offset;
1448 q = (q0*dt1 + q1*dt0) / (dt0 + dt1);
1450 if(h->fenc->b_kept_as_ref)
1451 q += rcc->pb_offset/2;
1453 q += rcc->pb_offset;
1455 rcc->frame_size_planned = predict_size( rcc->pred_b_from_p, q, h->fref1[h->i_ref1-1]->i_satd );
1456 x264_ratecontrol_set_estimated_size(h, rcc->frame_size_planned);
1458 return qp2qscale(q);
1462 double abr_buffer = 2 * rcc->rate_tolerance * rcc->bitrate;
1466 //FIXME adjust abr_buffer based on distance to the end of the video
1468 int64_t predicted_bits = total_bits;
1472 if( h->param.i_threads > 1 )
1474 int j = h->rc - h->thread[0]->rc;
1476 for( i=1; i<h->param.i_threads; i++ )
1478 x264_t *t = h->thread[ (j+i)%h->param.i_threads ];
1479 double bits = t->rc->frame_size_planned;
1480 if( !t->b_thread_active )
1482 bits = X264_MAX(bits, x264_ratecontrol_get_estimated_size(t));
1483 predicted_bits += (int64_t)bits;
1489 if( h->fenc->i_frame < h->param.i_threads )
1490 predicted_bits += (int64_t)h->fenc->i_frame * rcc->bitrate / rcc->fps;
1492 predicted_bits += (int64_t)(h->param.i_threads - 1) * rcc->bitrate / rcc->fps;
1495 diff = predicted_bits - (int64_t)rce.expected_bits;
1497 q /= x264_clip3f((double)(abr_buffer - diff) / abr_buffer, .5, 2);
1498 if( ((h->fenc->i_frame + 1 - h->param.i_threads) >= rcc->fps) &&
1499 (rcc->expected_bits_sum > 0))
1501 /* Adjust quant based on the difference between
1502 * achieved and expected bitrate so far */
1503 double time = (double)h->fenc->i_frame / rcc->num_entries;
1504 double w = x264_clip3f( time*100, 0.0, 1.0 );
1505 q *= pow( (double)total_bits / rcc->expected_bits_sum, w );
1509 /* Do not overflow vbv */
1510 double expected_size = qscale2bits(&rce, q);
1511 double expected_vbv = rcc->buffer_fill + rcc->buffer_rate - expected_size;
1512 double expected_fullness = rce.expected_vbv / rcc->buffer_size;
1513 double qmax = q*(2 - expected_fullness);
1514 double size_constraint = 1 + expected_fullness;
1515 qmax = X264_MAX(qmax, rce.new_qscale);
1516 if (expected_fullness < .05)
1518 qmax = X264_MIN(qmax, lmax);
1519 while( ((expected_vbv < rce.expected_vbv/size_constraint) && (q < qmax)) ||
1520 ((expected_vbv < 0) && (q < lmax)))
1523 expected_size = qscale2bits(&rce, q);
1524 expected_vbv = rcc->buffer_fill + rcc->buffer_rate - expected_size;
1526 rcc->last_satd = x264_stack_align( x264_rc_analyse_slice, h );
1528 q = x264_clip3f( q, lmin, lmax );
1530 else /* 1pass ABR */
1532 /* Calculate the quantizer which would have produced the desired
1533 * average bitrate if it had been applied to all frames so far.
1534 * Then modulate that quant based on the current frame's complexity
1535 * relative to the average complexity so far (using the 2pass RCEQ).
1536 * Then bias the quant up or down if total size so far was far from
1538 * Result: Depending on the value of rate_tolerance, there is a
1539 * tradeoff between quality and bitrate precision. But at large
1540 * tolerances, the bit distribution approaches that of 2pass. */
1542 double wanted_bits, overflow=1, lmin, lmax;
1544 rcc->last_satd = x264_stack_align( x264_rc_analyse_slice, h );
1545 rcc->short_term_cplxsum *= 0.5;
1546 rcc->short_term_cplxcount *= 0.5;
1547 rcc->short_term_cplxsum += rcc->last_satd;
1548 rcc->short_term_cplxcount ++;
1550 rce.tex_bits = rcc->last_satd;
1551 rce.blurred_complexity = rcc->short_term_cplxsum / rcc->short_term_cplxcount;
1553 rce.p_count = rcc->nmb;
1557 rce.pict_type = pict_type;
1559 if( h->param.rc.i_rc_method == X264_RC_CRF )
1561 q = get_qscale( h, &rce, rcc->rate_factor_constant, h->fenc->i_frame );
1565 int i_frame_done = h->fenc->i_frame + 1 - h->param.i_threads;
1567 q = get_qscale( h, &rce, rcc->wanted_bits_window / rcc->cplxr_sum, h->fenc->i_frame );
1569 // FIXME is it simpler to keep track of wanted_bits in ratecontrol_end?
1570 wanted_bits = i_frame_done * rcc->bitrate / rcc->fps;
1571 if( wanted_bits > 0 )
1573 abr_buffer *= X264_MAX( 1, sqrt(i_frame_done/25) );
1574 overflow = x264_clip3f( 1.0 + (total_bits - wanted_bits) / abr_buffer, .5, 2 );
1579 if( pict_type == SLICE_TYPE_I && h->param.i_keyint_max > 1
1580 /* should test _next_ pict type, but that isn't decided yet */
1581 && rcc->last_non_b_pict_type != SLICE_TYPE_I )
1583 q = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
1584 q /= fabs( h->param.rc.f_ip_factor );
1586 else if( h->i_frame > 0 )
1588 /* Asymmetric clipping, because symmetric would prevent
1589 * overflow control in areas of rapidly oscillating complexity */
1590 lmin = rcc->last_qscale_for[pict_type] / rcc->lstep;
1591 lmax = rcc->last_qscale_for[pict_type] * rcc->lstep;
1592 if( overflow > 1.1 && h->i_frame > 3 )
1594 else if( overflow < 0.9 )
1597 q = x264_clip3f(q, lmin, lmax);
1599 else if( h->param.rc.i_rc_method == X264_RC_CRF )
1601 q = qp2qscale( ABR_INIT_QP ) / fabs( h->param.rc.f_ip_factor );
1604 //FIXME use get_diff_limited_q() ?
1605 q = clip_qscale( h, pict_type, q );
1608 rcc->last_qscale_for[pict_type] =
1609 rcc->last_qscale = q;
1611 if( !(rcc->b_2pass && !rcc->b_vbv) && h->fenc->i_frame == 0 )
1612 rcc->last_qscale_for[SLICE_TYPE_P] = q;
1614 if( rcc->b_2pass && rcc->b_vbv)
1615 rcc->frame_size_planned = qscale2bits(&rce, q);
1617 rcc->frame_size_planned = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
1618 x264_ratecontrol_set_estimated_size(h, rcc->frame_size_planned);
1623 void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
1627 #define COPY(var) memcpy(&cur->rc->var, &prev->rc->var, sizeof(cur->rc->var))
1628 /* these vars are updated in x264_ratecontrol_start()
1629 * so copy them from the context that most recently started (prev)
1630 * to the context that's about to start (cur).
1636 COPY(last_qscale_for);
1637 COPY(last_non_b_pict_type);
1638 COPY(short_term_cplxsum);
1639 COPY(short_term_cplxcount);
1646 #define COPY(var) next->rc->var = cur->rc->var
1647 /* these vars are updated in x264_ratecontrol_end()
1648 * so copy them from the context that most recently ended (cur)
1649 * to the context that's about to end (next)
1652 COPY(expected_bits_sum);
1653 COPY(wanted_bits_window);
1657 //FIXME row_preds[] (not strictly necessary, but would improve prediction)
1658 /* the rest of the variables are either constant or thread-local */
1661 static int find_underflow( x264_t *h, double *fills, int *t0, int *t1, int over )
1663 /* find an interval ending on an overflow or underflow (depending on whether
1664 * we're adding or removing bits), and starting on the earliest frame that
1665 * can influence the buffer fill of that end frame. */
1666 x264_ratecontrol_t *rcc = h->rc;
1667 const double buffer_min = (over ? .1 : .1) * rcc->buffer_size;
1668 const double buffer_max = .9 * rcc->buffer_size;
1669 double fill = fills[*t0-1];
1670 double parity = over ? 1. : -1.;
1671 int i, start=-1, end=-1;
1672 for(i = *t0; i < rcc->num_entries; i++)
1674 fill += (rcc->buffer_rate - qscale2bits(&rcc->entry[i], rcc->entry[i].new_qscale)) * parity;
1675 fill = x264_clip3f(fill, 0, rcc->buffer_size);
1677 if(fill <= buffer_min || i == 0)
1683 else if(fill >= buffer_max && start >= 0)
1688 return start>=0 && end>=0;
1691 static int fix_underflow( x264_t *h, int t0, int t1, double adjustment, double qscale_min, double qscale_max)
1693 x264_ratecontrol_t *rcc = h->rc;
1694 double qscale_orig, qscale_new;
1699 for(i = t0; i <= t1; i++)
1701 qscale_orig = rcc->entry[i].new_qscale;
1702 qscale_orig = x264_clip3f(qscale_orig, qscale_min, qscale_max);
1703 qscale_new = qscale_orig * adjustment;
1704 qscale_new = x264_clip3f(qscale_new, qscale_min, qscale_max);
1705 rcc->entry[i].new_qscale = qscale_new;
1706 adjusted = adjusted || (qscale_new != qscale_orig);
1711 static double count_expected_bits( x264_t *h )
1713 x264_ratecontrol_t *rcc = h->rc;
1714 double expected_bits = 0;
1716 for(i = 0; i < rcc->num_entries; i++)
1718 ratecontrol_entry_t *rce = &rcc->entry[i];
1719 rce->expected_bits = expected_bits;
1720 expected_bits += qscale2bits(rce, rce->new_qscale);
1722 return expected_bits;
1725 static void vbv_pass2( x264_t *h )
1727 /* for each interval of buffer_full .. underflow, uniformly increase the qp of all
1728 * frames in the interval until either buffer is full at some intermediate frame or the
1729 * last frame in the interval no longer underflows. Recompute intervals and repeat.
1730 * Then do the converse to put bits back into overflow areas until target size is met */
1732 x264_ratecontrol_t *rcc = h->rc;
1733 double *fills = x264_malloc((rcc->num_entries+1)*sizeof(double));
1734 double all_available_bits = h->param.rc.i_bitrate * 1000. * rcc->num_entries / rcc->fps;
1735 double expected_bits = 0;
1737 double prev_bits = 0;
1739 double qscale_min = qp2qscale(h->param.rc.i_qp_min);
1740 double qscale_max = qp2qscale(h->param.rc.i_qp_max);
1742 int adj_min, adj_max;
1746 /* adjust overall stream size */
1750 prev_bits = expected_bits;
1752 if(expected_bits != 0)
1753 { /* not first iteration */
1754 adjustment = X264_MAX(X264_MIN(expected_bits / all_available_bits, 0.999), 0.9);
1755 fills[-1] = rcc->buffer_size * h->param.rc.f_vbv_buffer_init;
1759 while(adj_min && find_underflow(h, fills, &t0, &t1, 1))
1761 adj_min = fix_underflow(h, t0, t1, adjustment, qscale_min, qscale_max);
1766 fills[-1] = rcc->buffer_size * (1. - h->param.rc.f_vbv_buffer_init);
1768 /* fix underflows -- should be done after overflow, as we'd better undersize target than underflowing VBV */
1770 while(adj_max && find_underflow(h, fills, &t0, &t1, 0))
1771 adj_max = fix_underflow(h, t0, t1, 1.001, qscale_min, qscale_max);
1773 expected_bits = count_expected_bits(h);
1774 } while((expected_bits < .995*all_available_bits) && ((int)(expected_bits+.5) > (int)(prev_bits+.5)) );
1777 x264_log( h, X264_LOG_WARNING, "vbv-maxrate issue, qpmax or vbv-maxrate too low\n");
1779 /* store expected vbv filling values for tracking when encoding */
1780 for(i = 0; i < rcc->num_entries; i++)
1781 rcc->entry[i].expected_vbv = rcc->buffer_size - fills[i];
1786 static int init_pass2( x264_t *h )
1788 x264_ratecontrol_t *rcc = h->rc;
1789 uint64_t all_const_bits = 0;
1790 uint64_t all_available_bits = (uint64_t)(h->param.rc.i_bitrate * 1000. * rcc->num_entries / rcc->fps);
1791 double rate_factor, step, step_mult;
1792 double qblur = h->param.rc.f_qblur;
1793 double cplxblur = h->param.rc.f_complexity_blur;
1794 const int filter_size = (int)(qblur*4) | 1;
1795 double expected_bits;
1796 double *qscale, *blurred_qscale;
1799 /* find total/average complexity & const_bits */
1800 for(i=0; i<rcc->num_entries; i++)
1802 ratecontrol_entry_t *rce = &rcc->entry[i];
1803 all_const_bits += rce->misc_bits;
1806 if( all_available_bits < all_const_bits)
1808 x264_log(h, X264_LOG_ERROR, "requested bitrate is too low. estimated minimum is %d kbps\n",
1809 (int)(all_const_bits * rcc->fps / (rcc->num_entries * 1000.)));
1813 /* Blur complexities, to reduce local fluctuation of QP.
1814 * We don't blur the QPs directly, because then one very simple frame
1815 * could drag down the QP of a nearby complex frame and give it more
1816 * bits than intended. */
1817 for(i=0; i<rcc->num_entries; i++)
1819 ratecontrol_entry_t *rce = &rcc->entry[i];
1820 double weight_sum = 0;
1821 double cplx_sum = 0;
1822 double weight = 1.0;
1823 double gaussian_weight;
1825 /* weighted average of cplx of future frames */
1826 for(j=1; j<cplxblur*2 && j<rcc->num_entries-i; j++)
1828 ratecontrol_entry_t *rcj = &rcc->entry[i+j];
1829 weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
1832 gaussian_weight = weight * exp(-j*j/200.0);
1833 weight_sum += gaussian_weight;
1834 cplx_sum += gaussian_weight * (qscale2bits(rcj, 1) - rcj->misc_bits);
1836 /* weighted average of cplx of past frames */
1838 for(j=0; j<=cplxblur*2 && j<=i; j++)
1840 ratecontrol_entry_t *rcj = &rcc->entry[i-j];
1841 gaussian_weight = weight * exp(-j*j/200.0);
1842 weight_sum += gaussian_weight;
1843 cplx_sum += gaussian_weight * (qscale2bits(rcj, 1) - rcj->misc_bits);
1844 weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
1848 rce->blurred_complexity = cplx_sum / weight_sum;
1851 qscale = x264_malloc(sizeof(double)*rcc->num_entries);
1853 blurred_qscale = x264_malloc(sizeof(double)*rcc->num_entries);
1855 blurred_qscale = qscale;
1857 /* Search for a factor which, when multiplied by the RCEQ values from
1858 * each frame, adds up to the desired total size.
1859 * There is no exact closed-form solution because of VBV constraints and
1860 * because qscale2bits is not invertible, but we can start with the simple
1861 * approximation of scaling the 1st pass by the ratio of bitrates.
1862 * The search range is probably overkill, but speed doesn't matter here. */
1865 for(i=0; i<rcc->num_entries; i++)
1866 expected_bits += qscale2bits(&rcc->entry[i], get_qscale(h, &rcc->entry[i], 1.0, i));
1867 step_mult = all_available_bits / expected_bits;
1870 for(step = 1E4 * step_mult; step > 1E-7 * step_mult; step *= 0.5)
1873 rate_factor += step;
1875 rcc->last_non_b_pict_type = -1;
1876 rcc->last_accum_p_norm = 1;
1877 rcc->accum_p_norm = 0;
1880 for(i=0; i<rcc->num_entries; i++)
1882 qscale[i] = get_qscale(h, &rcc->entry[i], rate_factor, i);
1885 /* fixed I/B qscale relative to P */
1886 for(i=rcc->num_entries-1; i>=0; i--)
1888 qscale[i] = get_diff_limited_q(h, &rcc->entry[i], qscale[i]);
1889 assert(qscale[i] >= 0);
1895 assert(filter_size%2==1);
1896 for(i=0; i<rcc->num_entries; i++)
1898 ratecontrol_entry_t *rce = &rcc->entry[i];
1900 double q=0.0, sum=0.0;
1902 for(j=0; j<filter_size; j++)
1904 int index = i+j-filter_size/2;
1906 double coeff = qblur==0 ? 1.0 : exp(-d*d/(qblur*qblur));
1907 if(index < 0 || index >= rcc->num_entries)
1909 if(rce->pict_type != rcc->entry[index].pict_type)
1911 q += qscale[index] * coeff;
1914 blurred_qscale[i] = q/sum;
1918 /* find expected bits */
1919 for(i=0; i<rcc->num_entries; i++)
1921 ratecontrol_entry_t *rce = &rcc->entry[i];
1922 rce->new_qscale = clip_qscale(h, rce->pict_type, blurred_qscale[i]);
1923 assert(rce->new_qscale >= 0);
1924 expected_bits += qscale2bits(rce, rce->new_qscale);
1927 if(expected_bits > all_available_bits) rate_factor -= step;
1932 x264_free(blurred_qscale);
1936 expected_bits = count_expected_bits(h);
1938 if(fabs(expected_bits/all_available_bits - 1.0) > 0.01)
1941 for(i=0; i<rcc->num_entries; i++)
1942 avgq += rcc->entry[i].new_qscale;
1943 avgq = qscale2qp(avgq / rcc->num_entries);
1945 if ((expected_bits > all_available_bits) || (!rcc->b_vbv))
1946 x264_log(h, X264_LOG_WARNING, "Error: 2pass curve failed to converge\n");
1947 x264_log(h, X264_LOG_WARNING, "target: %.2f kbit/s, expected: %.2f kbit/s, avg QP: %.4f\n",
1948 (float)h->param.rc.i_bitrate,
1949 expected_bits * rcc->fps / (rcc->num_entries * 1000.),
1951 if(expected_bits < all_available_bits && avgq < h->param.rc.i_qp_min + 2)
1953 if(h->param.rc.i_qp_min > 0)
1954 x264_log(h, X264_LOG_WARNING, "try reducing target bitrate or reducing qp_min (currently %d)\n", h->param.rc.i_qp_min);
1956 x264_log(h, X264_LOG_WARNING, "try reducing target bitrate\n");
1958 else if(expected_bits > all_available_bits && avgq > h->param.rc.i_qp_max - 2)
1960 if(h->param.rc.i_qp_max < 51)
1961 x264_log(h, X264_LOG_WARNING, "try increasing target bitrate or increasing qp_max (currently %d)\n", h->param.rc.i_qp_max);
1963 x264_log(h, X264_LOG_WARNING, "try increasing target bitrate\n");
1965 else if(!(rcc->b_2pass && rcc->b_vbv))
1966 x264_log(h, X264_LOG_WARNING, "internal error\n");