git.sesse.net Git - x264/blob - encoder/ratecontrol.c

   1 /***************************************************-*- coding: iso-8859-1 -*-
   2  * ratecontrol.c: h264 encoder library (Rate Control)
   3  *****************************************************************************
   4  * Copyright (C) 2005-2008 x264 project
   5  *
   6  * Authors: Loren Merritt <lorenm@u.washington.edu>
   7  *          Michael Niedermayer <michaelni@gmx.at>
   8  *          Gabriel Bouvigne <gabriel.bouvigne@joost.com>
   9  *          Fiona Glaser <fiona@x264.com>
  10  *          Måns Rullgård <mru@mru.ath.cx>
  11  *
  12  * This program is free software; you can redistribute it and/or modify
  13  * it under the terms of the GNU General Public License as published by
  14  * the Free Software Foundation; either version 2 of the License, or
  15  * (at your option) any later version.
  16  *
  17  * This program is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20  * GNU General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU General Public License
  23  * along with this program; if not, write to the Free Software
  24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  25  *****************************************************************************/
  26
  27 #define _ISOC99_SOURCE
  28 #undef NDEBUG // always check asserts, the speed effect is far too small to disable them
  29 #include <math.h>
  30 #include <limits.h>
  31 #include <assert.h>
  32
  33 #include "common/common.h"
  34 #include "common/cpu.h"
  35 #include "ratecontrol.h"
  36
  37 typedef struct
  38 {
  39     int pict_type;
  40     int kept_as_ref;
  41     float qscale;
  42     int mv_bits;
  43     int i_tex_bits;
  44     int p_tex_bits;
  45     int misc_bits;
  46     uint64_t expected_bits;
  47     double expected_vbv;
  48     float new_qscale;
  49     int new_qp;
  50     int i_count;
  51     int p_count;
  52     int s_count;
  53     float blurred_complexity;
  54     char direct_mode;
  55 } ratecontrol_entry_t;
  56
  57 typedef struct
  58 {
  59     double coeff;
  60     double count;
  61     double decay;
  62 } predictor_t;
  63
  64 struct x264_ratecontrol_t
  65 {
  66     /* constants */
  67     int b_abr;
  68     int b_2pass;
  69     int b_vbv;
  70     int b_vbv_min_rate;
  71     double fps;
  72     double bitrate;
  73     double rate_tolerance;
  74     int nmb;                    /* number of macroblocks in a frame */
  75     int qp_constant[5];
  76
  77     /* current frame */
  78     ratecontrol_entry_t *rce;
  79     int qp;                     /* qp for current frame */
  80     int qpm;                    /* qp for current macroblock */
  81     float f_qpm;                /* qp for current macroblock: precise float for AQ */
  82     float qpa_rc;               /* average of macroblocks' qp before aq */
  83     float qpa_aq;               /* average of macroblocks' qp after aq */
  84     int qp_force;
  85
  86     /* VBV stuff */
  87     double buffer_size;
  88     double buffer_fill_final;   /* real buffer as of the last finished frame */
  89     double buffer_fill;         /* planned buffer, if all in-progress frames hit their bit budget */
  90     double buffer_rate;         /* # of bits added to buffer_fill after each frame */
  91     predictor_t *pred;          /* predict frame size from satd */
  92
  93     /* ABR stuff */
  94     int    last_satd;
  95     double last_rceq;
  96     double cplxr_sum;           /* sum of bits*qscale/rceq */
  97     double expected_bits_sum;   /* sum of qscale2bits after rceq, ratefactor, and overflow */
  98     double wanted_bits_window;  /* target bitrate * window */
  99     double cbr_decay;
 100     double short_term_cplxsum;
 101     double short_term_cplxcount;
 102     double rate_factor_constant;
 103     double ip_offset;
 104     double pb_offset;
 105
 106     /* 2pass stuff */
 107     FILE *p_stat_file_out;
 108     char *psz_stat_file_tmpname;
 109
 110     int num_entries;            /* number of ratecontrol_entry_ts */
 111     ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
 112     double last_qscale;
 113     double last_qscale_for[5];  /* last qscale for a specific pict type, used for max_diff & ipb factor stuff  */
 114     int last_non_b_pict_type;
 115     double accum_p_qp;          /* for determining I-frame quant */
 116     double accum_p_norm;
 117     double last_accum_p_norm;
 118     double lmin[5];             /* min qscale by frame type */
 119     double lmax[5];
 120     double lstep;               /* max change (multiply) in qscale per frame */
 121     double i_cplx_sum[5];       /* estimated total texture bits in intra MBs at qscale=1 */
 122     double p_cplx_sum[5];
 123     double mv_bits_sum[5];
 124     int frame_count[5];         /* number of frames of each type */
 125
 126     /* MBRC stuff */
 127     double frame_size_estimated;
 128     double frame_size_planned;
 129     predictor_t *row_pred;
 130     predictor_t row_preds[5];
 131     predictor_t *pred_b_from_p; /* predict B-frame size from P-frame satd */
 132     int bframes;                /* # consecutive B-frames before this P-frame */
 133     int bframe_bits;            /* total cost of those frames */
 134
 135     /* AQ stuff */
 136     float aq_threshold;
 137     int *ac_energy;
 138
 139     int i_zones;
 140     x264_zone_t *zones;
 141     x264_zone_t *prev_zone;
 142 };
 143
 144
 145 static int parse_zones( x264_t *h );
 146 static int init_pass2(x264_t *);
 147 static float rate_estimate_qscale( x264_t *h );
 148 static void update_vbv( x264_t *h, int bits );
 149 static void update_vbv_plan( x264_t *h );
 150 static double predict_size( predictor_t *p, double q, double var );
 151 static void update_predictor( predictor_t *p, double q, double var, double bits );
 152 int  x264_rc_analyse_slice( x264_t *h );
 153
 154 /* Terminology:
 155  * qp = h.264's quantizer
 156  * qscale = linearized quantizer = Lagrange multiplier
 157  */
 158 static inline double qp2qscale(double qp)
 159 {
 160     return 0.85 * pow(2.0, ( qp - 12.0 ) / 6.0);
 161 }
 162 static inline double qscale2qp(double qscale)
 163 {
 164     return 12.0 + 6.0 * log(qscale/0.85) / log(2.0);
 165 }
 166
 167 /* Texture bitrate is not quite inversely proportional to qscale,
 168  * probably due the the changing number of SKIP blocks.
 169  * MV bits level off at about qp<=12, because the lambda used
 170  * for motion estimation is constant there. */
 171 static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale)
 172 {
 173     if(qscale<0.1)
 174         qscale = 0.1;
 175     return (rce->i_tex_bits + rce->p_tex_bits + .1) * pow( rce->qscale / qscale, 1.1 )
 176            + rce->mv_bits * pow( X264_MAX(rce->qscale, 1) / X264_MAX(qscale, 1), 0.5 )
 177            + rce->misc_bits;
 178 }
 179
 180 // Find the total AC energy of the block in all planes.
 181 static NOINLINE int ac_energy_mb( x264_t *h, int mb_x, int mb_y, int *satd )
 182 {
 183     /* This function contains annoying hacks because GCC has a habit of reordering emms
 184      * and putting it after floating point ops.  As a result, we put the emms at the end of the
 185      * function and make sure that its always called before the float math.  Noinline makes
 186      * sure no reordering goes on. */
 187     /* FIXME: This array is larger than necessary because a bug in GCC causes an all-zero
 188     * array to be placed in .bss despite .bss not being correctly aligned on some platforms (win32?) */
 189     DECLARE_ALIGNED_16( static uint8_t zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
 190     unsigned int var=0, sad, ssd, i;
 191     if( satd || h->param.rc.i_aq_mode == X264_AQ_GLOBAL )
 192     {
 193         for( i=0; i<3; i++ )
 194         {
 195             int w = i ? 8 : 16;
 196             int stride = h->fenc->i_stride[i];
 197             int offset = h->mb.b_interlaced
 198                 ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride
 199                 : w * (mb_x + mb_y * stride);
 200             int pix = i ? PIXEL_8x8 : PIXEL_16x16;
 201             stride <<= h->mb.b_interlaced;
 202             sad = h->pixf.sad[pix]( zero, 0, h->fenc->plane[i]+offset, stride );
 203             ssd = h->pixf.ssd[pix]( zero, 0, h->fenc->plane[i]+offset, stride );
 204             var += ssd - (sad * sad >> (i?6:8));
 205             // SATD to represent the block's overall complexity (bit cost) for intra encoding.
 206             // exclude the DC coef, because nothing short of an actual intra prediction will estimate DC cost.
 207             if( var && satd )
 208                 *satd += h->pixf.satd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ) - sad/2;
 209         }
 210         var = X264_MAX(var,1);
 211     }
 212     else var = h->rc->ac_energy[h->mb.i_mb_xy];
 213     x264_emms();
 214     return var;
 215 }
 216
 217 void x264_autosense_aq( x264_t *h )
 218 {
 219     double total = 0;
 220     double n = 0;
 221     int mb_x, mb_y;
 222     // FIXME: Some of the SATDs might be already calculated elsewhere (ratecontrol?). Can we reuse them?
 223     // FIXME: Is chroma SATD necessary?
 224     for( mb_y=0; mb_y<h->sps->i_mb_height; mb_y++ )
 225         for( mb_x=0; mb_x<h->sps->i_mb_width; mb_x++ )
 226         {
 227             int satd=0;
 228             int energy = ac_energy_mb( h, mb_x, mb_y, &satd );
 229             h->rc->ac_energy[mb_x + mb_y * h->sps->i_mb_width] = energy;
 230             /* Weight the energy value by the SATD value of the MB.
 231              * This represents the fact that the more complex blocks in a frame should
 232              * be weighted more when calculating the optimal threshold. This also helps
 233              * diminish the negative effect of large numbers of simple blocks in a frame,
 234              * such as in the case of a letterboxed film. */
 235             total += logf(energy) * satd;
 236             n += satd;
 237         }
 238     x264_emms();
 239     /* Calculate and store the threshold. */
 240     h->rc->aq_threshold = n ? total/n : 15;
 241 }
 242
 243 /*****************************************************************************
 244 * x264_adaptive_quant:
 245  * adjust macroblock QP based on variance (AC energy) of the MB.
 246  * high variance  = higher QP
 247  * low variance = lower QP
 248  * This generally increases SSIM and lowers PSNR.
 249 *****************************************************************************/
 250 void x264_adaptive_quant( x264_t *h )
 251 {
 252     int energy = ac_energy_mb( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL );
 253     /* Adjust the QP based on the AC energy of the macroblock. */
 254     float qp = h->rc->f_qpm;
 255     float qp_adj = 1.5 * (logf(energy) - h->rc->aq_threshold);
 256     if( h->param.rc.i_aq_mode == X264_AQ_LOCAL )
 257         qp_adj = x264_clip3f( qp_adj, -5, 5 );
 258     h->mb.i_qp = x264_clip3( qp + qp_adj * h->param.rc.f_aq_strength + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
 259     /* If the QP of this MB is within 1 of the previous MB, code the same QP as the previous MB,
 260      * to lower the bit cost of the qp_delta. */
 261     if( abs(h->mb.i_qp - h->mb.i_last_qp) == 1 )
 262         h->mb.i_qp = h->mb.i_last_qp;
 263     h->mb.i_chroma_qp = i_chroma_qp_table[x264_clip3( h->mb.i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
 264 }
 265
 266 int x264_ratecontrol_new( x264_t *h )
 267 {
 268     x264_ratecontrol_t *rc;
 269     int i;
 270
 271     x264_emms();
 272
 273     rc = h->rc = x264_malloc( h->param.i_threads * sizeof(x264_ratecontrol_t) );
 274     memset( rc, 0, h->param.i_threads * sizeof(x264_ratecontrol_t) );
 275
 276     rc->b_abr = h->param.rc.i_rc_method != X264_RC_CQP && !h->param.rc.b_stat_read;
 277     rc->b_2pass = h->param.rc.i_rc_method == X264_RC_ABR && h->param.rc.b_stat_read;
 278
 279     /* FIXME: use integers */
 280     if(h->param.i_fps_num > 0 && h->param.i_fps_den > 0)
 281         rc->fps = (float) h->param.i_fps_num / h->param.i_fps_den;
 282     else
 283         rc->fps = 25.0;
 284
 285     rc->bitrate = h->param.rc.i_bitrate * 1000.;
 286     rc->rate_tolerance = h->param.rc.f_rate_tolerance;
 287     rc->nmb = h->mb.i_mb_count;
 288     rc->last_non_b_pict_type = -1;
 289     rc->cbr_decay = 1.0;
 290
 291     if( h->param.rc.i_rc_method == X264_RC_CRF && h->param.rc.b_stat_read )
 292     {
 293         x264_log(h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n");
 294         return -1;
 295     }
 296     if( h->param.rc.i_vbv_buffer_size )
 297     {
 298         if( h->param.rc.i_rc_method == X264_RC_CQP )
 299             x264_log(h, X264_LOG_WARNING, "VBV is incompatible with constant QP, ignored.\n");
 300         else if( h->param.rc.i_vbv_max_bitrate == 0 )
 301         {
 302             x264_log( h, X264_LOG_DEBUG, "VBV maxrate unspecified, assuming CBR\n" );
 303             h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
 304         }
 305     }
 306     if( h->param.rc.i_vbv_max_bitrate < h->param.rc.i_bitrate &&
 307         h->param.rc.i_vbv_max_bitrate > 0)
 308         x264_log(h, X264_LOG_WARNING, "max bitrate less than average bitrate, ignored.\n");
 309     else if( h->param.rc.i_vbv_max_bitrate > 0 &&
 310              h->param.rc.i_vbv_buffer_size > 0 )
 311     {
 312         if( h->param.rc.i_vbv_buffer_size < 3 * h->param.rc.i_vbv_max_bitrate / rc->fps )
 313         {
 314             h->param.rc.i_vbv_buffer_size = 3 * h->param.rc.i_vbv_max_bitrate / rc->fps;
 315             x264_log( h, X264_LOG_WARNING, "VBV buffer size too small, using %d kbit\n",
 316                       h->param.rc.i_vbv_buffer_size );
 317         }
 318         if( h->param.rc.f_vbv_buffer_init > 1. )
 319             h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init / h->param.rc.i_vbv_buffer_size, 0, 1 );
 320         rc->buffer_rate = h->param.rc.i_vbv_max_bitrate * 1000. / rc->fps;
 321         rc->buffer_size = h->param.rc.i_vbv_buffer_size * 1000.;
 322         rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
 323         rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size
 324                       * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate);
 325         rc->b_vbv = 1;
 326         rc->b_vbv_min_rate = !rc->b_2pass
 327                           && h->param.rc.i_rc_method == X264_RC_ABR
 328                           && h->param.rc.i_vbv_max_bitrate <= h->param.rc.i_bitrate;
 329     }
 330     else if( h->param.rc.i_vbv_max_bitrate )
 331     {
 332         x264_log(h, X264_LOG_WARNING, "VBV maxrate specified, but no bufsize.\n");
 333         h->param.rc.i_vbv_max_bitrate = 0;
 334     }
 335     if(rc->rate_tolerance < 0.01)
 336     {
 337         x264_log(h, X264_LOG_WARNING, "bitrate tolerance too small, using .01\n");
 338         rc->rate_tolerance = 0.01;
 339     }
 340
 341     h->mb.b_variable_qp = rc->b_vbv || h->param.rc.i_aq_mode;
 342
 343     if( rc->b_abr )
 344     {
 345         /* FIXME ABR_INIT_QP is actually used only in CRF */
 346 #define ABR_INIT_QP ( h->param.rc.i_rc_method == X264_RC_CRF ? h->param.rc.f_rf_constant : 24 )
 347         rc->accum_p_norm = .01;
 348         rc->accum_p_qp = ABR_INIT_QP * rc->accum_p_norm;
 349         /* estimated ratio that produces a reasonable QP for the first I-frame */
 350         rc->cplxr_sum = .01 * pow( 7.0e5, h->param.rc.f_qcompress ) * pow( h->mb.i_mb_count, 0.5 );
 351         rc->wanted_bits_window = 1.0 * rc->bitrate / rc->fps;
 352         rc->last_non_b_pict_type = SLICE_TYPE_I;
 353     }
 354
 355     if( h->param.rc.i_rc_method == X264_RC_CRF )
 356     {
 357         /* arbitrary rescaling to make CRF somewhat similar to QP */
 358         double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
 359         rc->rate_factor_constant = pow( base_cplx, 1 - h->param.rc.f_qcompress )
 360                                  / qp2qscale( h->param.rc.f_rf_constant );
 361     }
 362
 363     rc->ip_offset = 6.0 * log(h->param.rc.f_ip_factor) / log(2.0);
 364     rc->pb_offset = 6.0 * log(h->param.rc.f_pb_factor) / log(2.0);
 365     rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
 366     rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant - rc->ip_offset + 0.5, 0, 51 );
 367     rc->qp_constant[SLICE_TYPE_B] = x264_clip3( h->param.rc.i_qp_constant + rc->pb_offset + 0.5, 0, 51 );
 368
 369     rc->lstep = pow( 2, h->param.rc.i_qp_step / 6.0 );
 370     rc->last_qscale = qp2qscale(26);
 371     rc->pred = x264_malloc( 5*sizeof(predictor_t) );
 372     rc->pred_b_from_p = x264_malloc( sizeof(predictor_t) );
 373     for( i = 0; i < 5; i++ )
 374     {
 375         rc->last_qscale_for[i] = qp2qscale( ABR_INIT_QP );
 376         rc->lmin[i] = qp2qscale( h->param.rc.i_qp_min );
 377         rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max );
 378         rc->pred[i].coeff= 2.0;
 379         rc->pred[i].count= 1.0;
 380         rc->pred[i].decay= 0.5;
 381         rc->row_preds[i].coeff= .25;
 382         rc->row_preds[i].count= 1.0;
 383         rc->row_preds[i].decay= 0.5;
 384     }
 385     *rc->pred_b_from_p = rc->pred[0];
 386
 387     if( parse_zones( h ) < 0 )
 388     {
 389         x264_log( h, X264_LOG_ERROR, "failed to parse zones\n" );
 390         return -1;
 391     }
 392
 393     /* Load stat file and init 2pass algo */
 394     if( h->param.rc.b_stat_read )
 395     {
 396         char *p, *stats_in, *stats_buf;
 397
 398         /* read 1st pass stats */
 399         assert( h->param.rc.psz_stat_in );
 400         stats_buf = stats_in = x264_slurp_file( h->param.rc.psz_stat_in );
 401         if( !stats_buf )
 402         {
 403             x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
 404             return -1;
 405         }
 406
 407         /* check whether 1st pass options were compatible with current options */
 408         if( !strncmp( stats_buf, "#options:", 9 ) )
 409         {
 410             int i;
 411             char *opts = stats_buf;
 412             stats_in = strchr( stats_buf, '\n' );
 413             if( !stats_in )
 414                 return -1;
 415             *stats_in = '\0';
 416             stats_in++;
 417
 418             if( ( p = strstr( opts, "bframes=" ) ) && sscanf( p, "bframes=%d", &i )
 419                 && h->param.i_bframe != i )
 420             {
 421                 x264_log( h, X264_LOG_ERROR, "different number of B-frames than 1st pass (%d vs %d)\n",
 422                           h->param.i_bframe, i );
 423                 return -1;
 424             }
 425
 426             /* since B-adapt doesn't (yet) take into account B-pyramid,
 427              * the converse is not a problem */
 428             if( strstr( opts, "b_pyramid=1" ) && !h->param.b_bframe_pyramid )
 429                 x264_log( h, X264_LOG_WARNING, "1st pass used B-pyramid, 2nd doesn't\n" );
 430
 431             if( ( p = strstr( opts, "keyint=" ) ) && sscanf( p, "keyint=%d", &i )
 432                 && h->param.i_keyint_max != i )
 433                 x264_log( h, X264_LOG_WARNING, "different keyint than 1st pass (%d vs %d)\n",
 434                           h->param.i_keyint_max, i );
 435
 436             if( strstr( opts, "qp=0" ) && h->param.rc.i_rc_method == X264_RC_ABR )
 437                 x264_log( h, X264_LOG_WARNING, "1st pass was lossless, bitrate prediction will be inaccurate\n" );
 438         }
 439
 440         /* find number of pics */
 441         p = stats_in;
 442         for(i=-1; p; i++)
 443             p = strchr(p+1, ';');
 444         if(i==0)
 445         {
 446             x264_log(h, X264_LOG_ERROR, "empty stats file\n");
 447             return -1;
 448         }
 449         rc->num_entries = i;
 450
 451         if( h->param.i_frame_total < rc->num_entries && h->param.i_frame_total > 0 )
 452         {
 453             x264_log( h, X264_LOG_WARNING, "2nd pass has fewer frames than 1st pass (%d vs %d)\n",
 454                       h->param.i_frame_total, rc->num_entries );
 455         }
 456         if( h->param.i_frame_total > rc->num_entries + h->param.i_bframe )
 457         {
 458             x264_log( h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d vs %d)\n",
 459                       h->param.i_frame_total, rc->num_entries );
 460             return -1;
 461         }
 462
 463         /* FIXME: ugly padding because VfW drops delayed B-frames */
 464         rc->num_entries += h->param.i_bframe;
 465
 466         rc->entry = (ratecontrol_entry_t*) x264_malloc(rc->num_entries * sizeof(ratecontrol_entry_t));
 467         memset(rc->entry, 0, rc->num_entries * sizeof(ratecontrol_entry_t));
 468
 469         /* init all to skipped p frames */
 470         for(i=0; i<rc->num_entries; i++)
 471         {
 472             ratecontrol_entry_t *rce = &rc->entry[i];
 473             rce->pict_type = SLICE_TYPE_P;
 474             rce->qscale = rce->new_qscale = qp2qscale(20);
 475             rce->misc_bits = rc->nmb + 10;
 476             rce->new_qp = 0;
 477         }
 478
 479         /* read stats */
 480         p = stats_in;
 481         for(i=0; i < rc->num_entries - h->param.i_bframe; i++)
 482         {
 483             ratecontrol_entry_t *rce;
 484             int frame_number;
 485             char pict_type;
 486             int e;
 487             char *next;
 488             float qp;
 489
 490             next= strchr(p, ';');
 491             if(next)
 492             {
 493                 (*next)=0; //sscanf is unbelievably slow on long strings
 494                 next++;
 495             }
 496             e = sscanf(p, " in:%d ", &frame_number);
 497
 498             if(frame_number < 0 || frame_number >= rc->num_entries)
 499             {
 500                 x264_log(h, X264_LOG_ERROR, "bad frame number (%d) at stats line %d\n", frame_number, i);
 501                 return -1;
 502             }
 503             rce = &rc->entry[frame_number];
 504             rce->direct_mode = 0;
 505
 506             e += sscanf(p, " in:%*d out:%*d type:%c q:%f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c",
 507                    &pict_type, &qp, &rce->i_tex_bits, &rce->p_tex_bits,
 508                    &rce->mv_bits, &rce->misc_bits, &rce->i_count, &rce->p_count,
 509                    &rce->s_count, &rce->direct_mode);
 510
 511             switch(pict_type)
 512             {
 513                 case 'I': rce->kept_as_ref = 1;
 514                 case 'i': rce->pict_type = SLICE_TYPE_I; break;
 515                 case 'P': rce->pict_type = SLICE_TYPE_P; break;
 516                 case 'B': rce->kept_as_ref = 1;
 517                 case 'b': rce->pict_type = SLICE_TYPE_B; break;
 518                 default:  e = -1; break;
 519             }
 520             if(e < 10)
 521             {
 522                 x264_log(h, X264_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e);
 523                 return -1;
 524             }
 525             rce->qscale = qp2qscale(qp);
 526             p = next;
 527         }
 528
 529         x264_free(stats_buf);
 530
 531         if(h->param.rc.i_rc_method == X264_RC_ABR)
 532         {
 533             if(init_pass2(h) < 0) return -1;
 534         } /* else we're using constant quant, so no need to run the bitrate allocation */
 535     }
 536
 537     /* Open output file */
 538     /* If input and output files are the same, output to a temp file
 539      * and move it to the real name only when it's complete */
 540     if( h->param.rc.b_stat_write )
 541     {
 542         char *p;
 543
 544         rc->psz_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 6 );
 545         strcpy( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
 546         strcat( rc->psz_stat_file_tmpname, ".temp" );
 547
 548         rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
 549         if( rc->p_stat_file_out == NULL )
 550         {
 551             x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
 552             return -1;
 553         }
 554
 555         p = x264_param2string( &h->param, 1 );
 556         fprintf( rc->p_stat_file_out, "#options: %s\n", p );
 557         x264_free( p );
 558     }
 559
 560     for( i=0; i<h->param.i_threads; i++ )
 561     {
 562         h->thread[i]->rc = rc+i;
 563         if( i )
 564             rc[i] = rc[0];
 565         if( h->param.rc.i_aq_mode == X264_AQ_LOCAL )
 566             rc[i].ac_energy = x264_malloc( h->mb.i_mb_count * sizeof(int) );
 567     }
 568
 569     return 0;
 570 }
 571
 572 static int parse_zone( x264_t *h, x264_zone_t *z, char *p )
 573 {
 574     int len = 0;
 575     char *tok, *saveptr;
 576     z->param = NULL;
 577     z->f_bitrate_factor = 1;
 578     if( 3 <= sscanf(p, "%u,%u,q=%u%n", &z->i_start, &z->i_end, &z->i_qp, &len) )
 579         z->b_force_qp = 1;
 580     else if( 3 <= sscanf(p, "%u,%u,b=%f%n", &z->i_start, &z->i_end, &z->f_bitrate_factor, &len) )
 581         z->b_force_qp = 0;
 582     else if( 2 <= sscanf(p, "%u,%u%n", &z->i_start, &z->i_end, &len) )
 583         z->b_force_qp = 0;
 584     else
 585     {
 586         x264_log( h, X264_LOG_ERROR, "invalid zone: \"%s\"\n", p );
 587         return -1;
 588     }
 589     p += len;
 590     if( !*p )
 591         return 0;
 592     z->param = malloc( sizeof(x264_param_t) );
 593     memcpy( z->param, &h->param, sizeof(x264_param_t) );
 594     while( (tok = strtok_r( p, ",", &saveptr )) )
 595     {
 596         char *val = strchr( tok, '=' );
 597         if( val )
 598         {
 599             *val = '\0';
 600             val++;
 601         }
 602         if( x264_param_parse( z->param, tok, val ) )
 603         {
 604             x264_log( h, X264_LOG_ERROR, "invalid zone param: %s = %s\n", tok, val );
 605             return -1;
 606         }
 607         p = NULL;
 608     }
 609     return 0;
 610 }
 611
 612 static int parse_zones( x264_t *h )
 613 {
 614     x264_ratecontrol_t *rc = h->rc;
 615     int i;
 616     if( h->param.rc.psz_zones && !h->param.rc.i_zones )
 617     {
 618         char *p, *tok, *saveptr;
 619         char *psz_zones = x264_malloc( strlen(h->param.rc.psz_zones)+1 );
 620         strcpy( psz_zones, h->param.rc.psz_zones );
 621         h->param.rc.i_zones = 1;
 622         for( p = psz_zones; *p; p++ )
 623             h->param.rc.i_zones += (*p == '/');
 624         h->param.rc.zones = x264_malloc( h->param.rc.i_zones * sizeof(x264_zone_t) );
 625         p = psz_zones;
 626         for( i = 0; i < h->param.rc.i_zones; i++ )
 627         {
 628             tok = strtok_r( p, "/", &saveptr );
 629             if( !tok || parse_zone( h, &h->param.rc.zones[i], tok ) )
 630                 return -1;
 631             p = NULL;
 632         }
 633         x264_free( psz_zones );
 634     }
 635
 636     if( h->param.rc.i_zones > 0 )
 637     {
 638         for( i = 0; i < h->param.rc.i_zones; i++ )
 639         {
 640             x264_zone_t z = h->param.rc.zones[i];
 641             if( z.i_start < 0 || z.i_start > z.i_end )
 642             {
 643                 x264_log( h, X264_LOG_ERROR, "invalid zone: start=%d end=%d\n",
 644                           z.i_start, z.i_end );
 645                 return -1;
 646             }
 647             else if( !z.b_force_qp && z.f_bitrate_factor <= 0 )
 648             {
 649                 x264_log( h, X264_LOG_ERROR, "invalid zone: bitrate_factor=%f\n",
 650                           z.f_bitrate_factor );
 651                 return -1;
 652             }
 653         }
 654
 655         rc->i_zones = h->param.rc.i_zones + 1;
 656         rc->zones = x264_malloc( rc->i_zones * sizeof(x264_zone_t) );
 657         memcpy( rc->zones+1, h->param.rc.zones, (rc->i_zones-1) * sizeof(x264_zone_t) );
 658
 659         // default zone to fall back to if none of the others match
 660         rc->zones[0].i_start = 0;
 661         rc->zones[0].i_end = INT_MAX;
 662         rc->zones[0].b_force_qp = 0;
 663         rc->zones[0].f_bitrate_factor = 1;
 664         rc->zones[0].param = x264_malloc( sizeof(x264_param_t) );
 665         memcpy( rc->zones[0].param, &h->param, sizeof(x264_param_t) );
 666         for( i = 1; i < rc->i_zones; i++ )
 667         {
 668             if( !rc->zones[i].param )
 669                 rc->zones[i].param = rc->zones[0].param;
 670         }
 671     }
 672
 673     return 0;
 674 }
 675
 676 x264_zone_t *get_zone( x264_t *h, int frame_num )
 677 {
 678     int i;
 679     for( i = h->rc->i_zones-1; i >= 0; i-- )
 680     {
 681         x264_zone_t *z = &h->rc->zones[i];
 682         if( frame_num >= z->i_start && frame_num <= z->i_end )
 683             return z;
 684     }
 685     return NULL;
 686 }
 687
 688 void x264_ratecontrol_summary( x264_t *h )
 689 {
 690     x264_ratecontrol_t *rc = h->rc;
 691     if( rc->b_abr && h->param.rc.i_rc_method == X264_RC_ABR && rc->cbr_decay > .9999 )
 692     {
 693         double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
 694         x264_log( h, X264_LOG_INFO, "final ratefactor: %.2f\n",
 695                   qscale2qp( pow( base_cplx, 1 - h->param.rc.f_qcompress )
 696                              * rc->cplxr_sum / rc->wanted_bits_window ) );
 697     }
 698 }
 699
 700 void x264_ratecontrol_delete( x264_t *h )
 701 {
 702     x264_ratecontrol_t *rc = h->rc;
 703     int i;
 704
 705     if( rc->p_stat_file_out )
 706     {
 707         fclose( rc->p_stat_file_out );
 708         if( h->i_frame >= rc->num_entries - h->param.i_bframe )
 709             if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
 710             {
 711                 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
 712                           rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
 713             }
 714         x264_free( rc->psz_stat_file_tmpname );
 715     }
 716     x264_free( rc->pred );
 717     x264_free( rc->pred_b_from_p );
 718     x264_free( rc->entry );
 719     if( rc->zones )
 720     {
 721         x264_free( rc->zones[0].param );
 722         if( h->param.rc.psz_zones )
 723             for( i=1; i<rc->i_zones; i++ )
 724                 if( rc->zones[i].param != rc->zones[0].param )
 725                     x264_free( rc->zones[i].param );
 726         x264_free( rc->zones );
 727     }
 728     for( i=0; i<h->param.i_threads; i++ )
 729         x264_free( rc[i].ac_energy );
 730     x264_free( rc );
 731 }
 732
 733 void x264_ratecontrol_set_estimated_size( x264_t *h, int bits )
 734 {
 735     x264_pthread_mutex_lock( &h->fenc->mutex );
 736     h->rc->frame_size_estimated = bits;
 737     x264_pthread_mutex_unlock( &h->fenc->mutex );
 738 }
 739
 740 int x264_ratecontrol_get_estimated_size( x264_t const *h)
 741 {
 742     int size;
 743     x264_pthread_mutex_lock( &h->fenc->mutex );
 744     size = h->rc->frame_size_estimated;
 745     x264_pthread_mutex_unlock( &h->fenc->mutex );
 746     return size;
 747 }
 748
 749 static void accum_p_qp_update( x264_t *h, float qp )
 750 {
 751     x264_ratecontrol_t *rc = h->rc;
 752     rc->accum_p_qp   *= .95;
 753     rc->accum_p_norm *= .95;
 754     rc->accum_p_norm += 1;
 755     if( h->sh.i_type == SLICE_TYPE_I )
 756         rc->accum_p_qp += qp + rc->ip_offset;
 757     else
 758         rc->accum_p_qp += qp;
 759 }
 760
 761 /* Before encoding a frame, choose a QP for it */
 762 void x264_ratecontrol_start( x264_t *h, int i_force_qp )
 763 {
 764     x264_ratecontrol_t *rc = h->rc;
 765     ratecontrol_entry_t *rce = NULL;
 766     x264_zone_t *zone = get_zone( h, h->fenc->i_frame );
 767     float q;
 768
 769     x264_emms();
 770
 771     if( zone && (!rc->prev_zone || zone->param != rc->prev_zone->param) )
 772         x264_encoder_reconfig( h, zone->param );
 773     rc->prev_zone = zone;
 774
 775     rc->qp_force = i_force_qp;
 776
 777     if( h->param.rc.b_stat_read )
 778     {
 779         int frame = h->fenc->i_frame;
 780         assert( frame >= 0 && frame < rc->num_entries );
 781         rce = h->rc->rce = &h->rc->entry[frame];
 782
 783         if( h->sh.i_type == SLICE_TYPE_B
 784             && h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO )
 785         {
 786             h->sh.b_direct_spatial_mv_pred = ( rce->direct_mode == 's' );
 787             h->mb.b_direct_auto_read = ( rce->direct_mode == 's' || rce->direct_mode == 't' );
 788         }
 789     }
 790
 791     if( rc->b_vbv )
 792     {
 793         memset( h->fdec->i_row_bits, 0, h->sps->i_mb_height * sizeof(int) );
 794         rc->row_pred = &rc->row_preds[h->sh.i_type];
 795         update_vbv_plan( h );
 796     }
 797
 798     if( h->sh.i_type != SLICE_TYPE_B )
 799     {
 800         rc->bframes = 0;
 801         while( h->frames.current[rc->bframes] && IS_X264_TYPE_B(h->frames.current[rc->bframes]->i_type) )
 802             rc->bframes++;
 803     }
 804
 805     if( i_force_qp )
 806     {
 807         q = i_force_qp - 1;
 808     }
 809     else if( rc->b_abr )
 810     {
 811         q = qscale2qp( rate_estimate_qscale( h ) );
 812     }
 813     else if( rc->b_2pass )
 814     {
 815         rce->new_qscale = rate_estimate_qscale( h );
 816         q = qscale2qp( rce->new_qscale );
 817     }
 818     else /* CQP */
 819     {
 820         if( h->sh.i_type == SLICE_TYPE_B && h->fdec->b_kept_as_ref )
 821             q = ( rc->qp_constant[ SLICE_TYPE_B ] + rc->qp_constant[ SLICE_TYPE_P ] ) / 2;
 822         else
 823             q = rc->qp_constant[ h->sh.i_type ];
 824
 825         if( zone )
 826         {
 827             if( zone->b_force_qp )
 828                 q += zone->i_qp - rc->qp_constant[SLICE_TYPE_P];
 829             else
 830                 q -= 6*log(zone->f_bitrate_factor)/log(2);
 831         }
 832     }
 833
 834     rc->qpa_rc =
 835     rc->qpa_aq = 0;
 836     h->fdec->f_qp_avg_rc =
 837     h->fdec->f_qp_avg_aq =
 838     rc->qpm =
 839     rc->qp = x264_clip3( (int)(q + 0.5), 0, 51 );
 840     rc->f_qpm = q;
 841     if( rce )
 842         rce->new_qp = rc->qp;
 843
 844     /* accum_p_qp needs to be here so that future frames can benefit from the
 845      * data before this frame is done. but this only works because threading
 846      * guarantees to not re-encode any frames. so the non-threaded case does
 847      * accum_p_qp later. */
 848     if( h->param.i_threads > 1 )
 849         accum_p_qp_update( h, rc->qp );
 850
 851     if( h->sh.i_type != SLICE_TYPE_B )
 852         rc->last_non_b_pict_type = h->sh.i_type;
 853
 854     /* Adaptive AQ thresholding algorithm. */
 855     if( h->param.rc.i_aq_mode == X264_AQ_GLOBAL )
 856         /* Arbitrary value for "center" of the AQ curve.
 857          * Chosen so that any given value of CRF has on average similar bitrate with and without AQ. */
 858         h->rc->aq_threshold = logf(5000);
 859     else if( h->param.rc.i_aq_mode == X264_AQ_LOCAL )
 860         x264_autosense_aq(h);
 861 }
 862
 863 double predict_row_size( x264_t *h, int y, int qp )
 864 {
 865     /* average between two predictors:
 866      * absolute SATD, and scaled bit cost of the colocated row in the previous frame */
 867     x264_ratecontrol_t *rc = h->rc;
 868     double pred_s = predict_size( rc->row_pred, qp2qscale(qp), h->fdec->i_row_satd[y] );
 869     double pred_t = 0;
 870     if( h->sh.i_type != SLICE_TYPE_I
 871         && h->fref0[0]->i_type == h->fdec->i_type
 872         && h->fref0[0]->i_row_satd[y] > 0 )
 873     {
 874         pred_t = h->fref0[0]->i_row_bits[y] * h->fdec->i_row_satd[y] / h->fref0[0]->i_row_satd[y]
 875                  * qp2qscale(h->fref0[0]->i_row_qp[y]) / qp2qscale(qp);
 876     }
 877     if( pred_t == 0 )
 878         pred_t = pred_s;
 879
 880     return (pred_s + pred_t) / 2;
 881 }
 882
 883 double row_bits_so_far( x264_t *h, int y )
 884 {
 885     int i;
 886     double bits = 0;
 887     for( i = 0; i <= y; i++ )
 888         bits += h->fdec->i_row_bits[i];
 889     return bits;
 890 }
 891
 892 double predict_row_size_sum( x264_t *h, int y, int qp )
 893 {
 894     int i;
 895     double bits = row_bits_so_far(h, y);
 896     for( i = y+1; i < h->sps->i_mb_height; i++ )
 897         bits += predict_row_size( h, i, qp );
 898     return bits;
 899 }
 900
 901
 902 void x264_ratecontrol_mb( x264_t *h, int bits )
 903 {
 904     x264_ratecontrol_t *rc = h->rc;
 905     const int y = h->mb.i_mb_y;
 906
 907     x264_emms();
 908
 909     h->fdec->i_row_bits[y] += bits;
 910     rc->qpa_rc += rc->f_qpm;
 911     rc->qpa_aq += h->mb.i_qp;
 912
 913     if( h->mb.i_mb_x != h->sps->i_mb_width - 1 || !rc->b_vbv)
 914         return;
 915
 916     h->fdec->i_row_qp[y] = rc->qpm;
 917
 918     if( h->sh.i_type == SLICE_TYPE_B )
 919     {
 920         /* B-frames shouldn't use lower QP than their reference frames.
 921          * This code is a bit overzealous in limiting B-frame quantizers, but it helps avoid
 922          * underflows due to the fact that B-frames are not explicitly covered by VBV. */
 923         if( y < h->sps->i_mb_height-1 )
 924         {
 925             int i_estimated;
 926             int avg_qp = X264_MAX(h->fref0[0]->i_row_qp[y+1], h->fref1[0]->i_row_qp[y+1])
 927                        + rc->pb_offset * ((h->fenc->i_type == X264_TYPE_BREF) ? 0.5 : 1);
 928             rc->qpm = X264_MIN(X264_MAX( rc->qp, avg_qp), 51); //avg_qp could go higher than 51 due to pb_offset
 929             i_estimated = row_bits_so_far(h, y); //FIXME: compute full estimated size
 930             if (i_estimated > h->rc->frame_size_planned)
 931                 x264_ratecontrol_set_estimated_size(h, i_estimated);
 932         }
 933     }
 934     else
 935     {
 936         update_predictor( rc->row_pred, qp2qscale(rc->qpm), h->fdec->i_row_satd[y], h->fdec->i_row_bits[y] );
 937
 938         /* tweak quality based on difference from predicted size */
 939         if( y < h->sps->i_mb_height-1 && h->stat.i_slice_count[h->sh.i_type] > 0 )
 940         {
 941             int prev_row_qp = h->fdec->i_row_qp[y];
 942             int b0 = predict_row_size_sum( h, y, rc->qpm );
 943             int b1 = b0;
 944             int i_qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, h->param.rc.i_qp_max );
 945             int i_qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min );
 946             float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
 947             float rc_tol = 1;
 948             float headroom = 0;
 949
 950             /* Don't modify the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */
 951             /* area at the top of the frame was measured inaccurately. */
 952             if(row_bits_so_far(h,y) < 0.05 * rc->frame_size_planned)
 953                 return;
 954
 955             headroom = buffer_left_planned/rc->buffer_size;
 956             if(h->sh.i_type != SLICE_TYPE_I)
 957                 headroom /= 2;
 958             rc_tol += headroom;
 959
 960             if( !rc->b_vbv_min_rate )
 961                 i_qp_min = X264_MAX( i_qp_min, h->sh.i_qp );
 962
 963             while( rc->qpm < i_qp_max
 964                    && (b1 > rc->frame_size_planned * rc_tol
 965                     || (rc->buffer_fill - b1 < buffer_left_planned * 0.5)))
 966             {
 967                 rc->qpm ++;
 968                 b1 = predict_row_size_sum( h, y, rc->qpm );
 969             }
 970
 971             /* avoid VBV underflow */
 972             while( (rc->qpm < h->param.rc.i_qp_max)
 973                    && (rc->buffer_fill - b1 < rc->buffer_size * 0.005))
 974             {
 975                 rc->qpm ++;
 976                 b1 = predict_row_size_sum( h, y, rc->qpm );
 977             }
 978
 979             while( rc->qpm > i_qp_min
 980                    && ((buffer_left_planned > rc->buffer_size * 0.4) || rc->qpm > h->fdec->i_row_qp[0])
 981                    && ((b1 < rc->frame_size_planned * 0.8 && rc->qpm <= prev_row_qp)
 982                      || b1 < (rc->buffer_fill - rc->buffer_size + rc->buffer_rate) * 1.1) )
 983             {
 984                 rc->qpm --;
 985                 b1 = predict_row_size_sum( h, y, rc->qpm );
 986             }
 987             x264_ratecontrol_set_estimated_size(h, b1);
 988         }
 989     }
 990     /* loses the fractional part of the frame-wise qp */
 991     rc->f_qpm = rc->qpm;
 992 }
 993
 994 int x264_ratecontrol_qp( x264_t *h )
 995 {
 996     return h->rc->qpm;
 997 }
 998
 999 /* In 2pass, force the same frame types as in the 1st pass */
1000 int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
1001 {
1002     x264_ratecontrol_t *rc = h->rc;
1003     if( h->param.rc.b_stat_read )
1004     {
1005         if( frame_num >= rc->num_entries )
1006         {
1007             /* We could try to initialize everything required for ABR and
1008              * adaptive B-frames, but that would be complicated.
1009              * So just calculate the average QP used so far. */
1010
1011             h->param.rc.i_qp_constant = (h->stat.i_slice_count[SLICE_TYPE_P] == 0) ? 24
1012                                       : 1 + h->stat.f_slice_qp[SLICE_TYPE_P] / h->stat.i_slice_count[SLICE_TYPE_P];
1013             rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
1014             rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
1015             rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
1016
1017             x264_log(h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries);
1018             x264_log(h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant);
1019             if( h->param.b_bframe_adaptive )
1020                 x264_log(h, X264_LOG_ERROR, "disabling adaptive B-frames\n");
1021
1022             rc->b_abr = 0;
1023             rc->b_2pass = 0;
1024             h->param.rc.i_rc_method = X264_RC_CQP;
1025             h->param.rc.b_stat_read = 0;
1026             h->param.b_bframe_adaptive = 0;
1027             if( h->param.i_bframe > 1 )
1028                 h->param.i_bframe = 1;
1029             return X264_TYPE_P;
1030         }
1031         switch( rc->entry[frame_num].pict_type )
1032         {
1033             case SLICE_TYPE_I:
1034                 return rc->entry[frame_num].kept_as_ref ? X264_TYPE_IDR : X264_TYPE_I;
1035
1036             case SLICE_TYPE_B:
1037                 return rc->entry[frame_num].kept_as_ref ? X264_TYPE_BREF : X264_TYPE_B;
1038
1039             case SLICE_TYPE_P:
1040             default:
1041                 return X264_TYPE_P;
1042         }
1043     }
1044     else
1045     {
1046         return X264_TYPE_AUTO;
1047     }
1048 }
1049
1050 /* After encoding one frame, save stats and update ratecontrol state */
1051 void x264_ratecontrol_end( x264_t *h, int bits )
1052 {
1053     x264_ratecontrol_t *rc = h->rc;
1054     const int *mbs = h->stat.frame.i_mb_count;
1055     int i;
1056
1057     x264_emms();
1058
1059     h->stat.frame.i_mb_count_skip = mbs[P_SKIP] + mbs[B_SKIP];
1060     h->stat.frame.i_mb_count_i = mbs[I_16x16] + mbs[I_8x8] + mbs[I_4x4];
1061     h->stat.frame.i_mb_count_p = mbs[P_L0] + mbs[P_8x8];
1062     for( i = B_DIRECT; i < B_8x8; i++ )
1063         h->stat.frame.i_mb_count_p += mbs[i];
1064
1065     h->fdec->f_qp_avg_rc = rc->qpa_rc /= h->mb.i_mb_count;
1066     h->fdec->f_qp_avg_aq = rc->qpa_aq /= h->mb.i_mb_count;
1067
1068     if( h->param.rc.b_stat_write )
1069     {
1070         char c_type = h->sh.i_type==SLICE_TYPE_I ? (h->fenc->i_poc==0 ? 'I' : 'i')
1071                     : h->sh.i_type==SLICE_TYPE_P ? 'P'
1072                     : h->fenc->b_kept_as_ref ? 'B' : 'b';
1073         int dir_frame = h->stat.frame.i_direct_score[1] - h->stat.frame.i_direct_score[0];
1074         int dir_avg = h->stat.i_direct_score[1] - h->stat.i_direct_score[0];
1075         char c_direct = h->mb.b_direct_auto_write ?
1076                         ( dir_frame>0 ? 's' : dir_frame<0 ? 't' :
1077                           dir_avg>0 ? 's' : dir_avg<0 ? 't' : '-' )
1078                         : '-';
1079         fprintf( rc->p_stat_file_out,
1080                  "in:%d out:%d type:%c q:%.2f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c;\n",
1081                  h->fenc->i_frame, h->i_frame,
1082                  c_type, rc->qpa_rc,
1083                  h->stat.frame.i_itex_bits, h->stat.frame.i_ptex_bits,
1084                  h->stat.frame.i_hdr_bits, h->stat.frame.i_misc_bits,
1085                  h->stat.frame.i_mb_count_i,
1086                  h->stat.frame.i_mb_count_p,
1087                  h->stat.frame.i_mb_count_skip,
1088                  c_direct);
1089     }
1090
1091     if( rc->b_abr )
1092     {
1093         if( h->sh.i_type != SLICE_TYPE_B )
1094             rc->cplxr_sum += bits * qp2qscale(rc->qpa_rc) / rc->last_rceq;
1095         else
1096         {
1097             /* Depends on the fact that B-frame's QP is an offset from the following P-frame's.
1098              * Not perfectly accurate with B-refs, but good enough. */
1099             rc->cplxr_sum += bits * qp2qscale(rc->qpa_rc) / (rc->last_rceq * fabs(h->param.rc.f_pb_factor));
1100         }
1101         rc->cplxr_sum *= rc->cbr_decay;
1102         rc->wanted_bits_window += rc->bitrate / rc->fps;
1103         rc->wanted_bits_window *= rc->cbr_decay;
1104
1105         if( h->param.i_threads == 1 )
1106             accum_p_qp_update( h, rc->qpa_rc );
1107     }
1108
1109     if( rc->b_2pass )
1110     {
1111         rc->expected_bits_sum += qscale2bits( rc->rce, qp2qscale(rc->rce->new_qp) );
1112     }
1113
1114     if( h->mb.b_variable_qp )
1115     {
1116         if( h->sh.i_type == SLICE_TYPE_B )
1117         {
1118             rc->bframe_bits += bits;
1119             if( !h->frames.current[0] || !IS_X264_TYPE_B(h->frames.current[0]->i_type) )
1120             {
1121                 update_predictor( rc->pred_b_from_p, qp2qscale(rc->qpa_rc),
1122                                   h->fref1[h->i_ref1-1]->i_satd, rc->bframe_bits / rc->bframes );
1123                 rc->bframe_bits = 0;
1124             }
1125         }
1126     }
1127
1128     update_vbv( h, bits );
1129 }
1130
1131 /****************************************************************************
1132  * 2 pass functions
1133  ***************************************************************************/
1134
1135 double x264_eval( char *s, double *const_value, const char **const_name,
1136                   double (**func1)(void *, double), const char **func1_name,
1137                   double (**func2)(void *, double, double), char **func2_name,
1138                   void *opaque );
1139
1140 /**
1141  * modify the bitrate curve from pass1 for one frame
1142  */
1143 static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor, int frame_num)
1144 {
1145     x264_ratecontrol_t *rcc= h->rc;
1146     const int pict_type = rce->pict_type;
1147     double q;
1148     x264_zone_t *zone = get_zone( h, frame_num );
1149
1150     double const_values[]={
1151         rce->i_tex_bits * rce->qscale,
1152         rce->p_tex_bits * rce->qscale,
1153         (rce->i_tex_bits + rce->p_tex_bits) * rce->qscale,
1154         rce->mv_bits * rce->qscale,
1155         (double)rce->i_count / rcc->nmb,
1156         (double)rce->p_count / rcc->nmb,
1157         (double)rce->s_count / rcc->nmb,
1158         rce->pict_type == SLICE_TYPE_I,
1159         rce->pict_type == SLICE_TYPE_P,
1160         rce->pict_type == SLICE_TYPE_B,
1161         h->param.rc.f_qcompress,
1162         rcc->i_cplx_sum[SLICE_TYPE_I] / rcc->frame_count[SLICE_TYPE_I],
1163         rcc->i_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
1164         rcc->p_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
1165         rcc->p_cplx_sum[SLICE_TYPE_B] / rcc->frame_count[SLICE_TYPE_B],
1166         (rcc->i_cplx_sum[pict_type] + rcc->p_cplx_sum[pict_type]) / rcc->frame_count[pict_type],
1167         rce->blurred_complexity,
1168         0
1169     };
1170     static const char *const_names[]={
1171         "iTex",
1172         "pTex",
1173         "tex",
1174         "mv",
1175         "iCount",
1176         "pCount",
1177         "sCount",
1178         "isI",
1179         "isP",
1180         "isB",
1181         "qComp",
1182         "avgIITex",
1183         "avgPITex",
1184         "avgPPTex",
1185         "avgBPTex",
1186         "avgTex",
1187         "blurCplx",
1188         NULL
1189     };
1190     static double (*func1[])(void *, double)={
1191 //      (void *)bits2qscale,
1192         (void *)qscale2bits,
1193         NULL
1194     };
1195     static const char *func1_names[]={
1196 //      "bits2qp",
1197         "qp2bits",
1198         NULL
1199     };
1200
1201     q = x264_eval((char*)h->param.rc.psz_rc_eq, const_values, const_names, func1, func1_names, NULL, NULL, rce);
1202
1203     // avoid NaN's in the rc_eq
1204     if(!isfinite(q) || rce->i_tex_bits + rce->p_tex_bits + rce->mv_bits == 0)
1205         q = rcc->last_qscale;
1206     else
1207     {
1208         rcc->last_rceq = q;
1209         q /= rate_factor;
1210         rcc->last_qscale = q;
1211     }
1212
1213     if( zone )
1214     {
1215         if( zone->b_force_qp )
1216             q = qp2qscale(zone->i_qp);
1217         else
1218             q /= zone->f_bitrate_factor;
1219     }
1220
1221     return q;
1222 }
1223
1224 static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
1225 {
1226     x264_ratecontrol_t *rcc = h->rc;
1227     const int pict_type = rce->pict_type;
1228
1229     // force I/B quants as a function of P quants
1230     const double last_p_q    = rcc->last_qscale_for[SLICE_TYPE_P];
1231     const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type];
1232     if( pict_type == SLICE_TYPE_I )
1233     {
1234         double iq = q;
1235         double pq = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
1236         double ip_factor = fabs( h->param.rc.f_ip_factor );
1237         /* don't apply ip_factor if the following frame is also I */
1238         if( rcc->accum_p_norm <= 0 )
1239             q = iq;
1240         else if( h->param.rc.f_ip_factor < 0 )
1241             q = iq / ip_factor;
1242         else if( rcc->accum_p_norm >= 1 )
1243             q = pq / ip_factor;
1244         else
1245             q = rcc->accum_p_norm * pq / ip_factor + (1 - rcc->accum_p_norm) * iq;
1246     }
1247     else if( pict_type == SLICE_TYPE_B )
1248     {
1249         if( h->param.rc.f_pb_factor > 0 )
1250             q = last_non_b_q;
1251         if( !rce->kept_as_ref )
1252             q *= fabs( h->param.rc.f_pb_factor );
1253     }
1254     else if( pict_type == SLICE_TYPE_P
1255              && rcc->last_non_b_pict_type == SLICE_TYPE_P
1256              && rce->i_tex_bits + rce->p_tex_bits == 0 )
1257     {
1258         q = last_p_q;
1259     }
1260
1261     /* last qscale / qdiff stuff */
1262     if(rcc->last_non_b_pict_type==pict_type
1263        && (pict_type!=SLICE_TYPE_I || rcc->last_accum_p_norm < 1))
1264     {
1265         double last_q = rcc->last_qscale_for[pict_type];
1266         double max_qscale = last_q * rcc->lstep;
1267         double min_qscale = last_q / rcc->lstep;
1268
1269         if     (q > max_qscale) q = max_qscale;
1270         else if(q < min_qscale) q = min_qscale;
1271     }
1272
1273     rcc->last_qscale_for[pict_type] = q;
1274     if(pict_type!=SLICE_TYPE_B)
1275         rcc->last_non_b_pict_type = pict_type;
1276     if(pict_type==SLICE_TYPE_I)
1277     {
1278         rcc->last_accum_p_norm = rcc->accum_p_norm;
1279         rcc->accum_p_norm = 0;
1280         rcc->accum_p_qp = 0;
1281     }
1282     if(pict_type==SLICE_TYPE_P)
1283     {
1284         float mask = 1 - pow( (float)rce->i_count / rcc->nmb, 2 );
1285         rcc->accum_p_qp   = mask * (qscale2qp(q) + rcc->accum_p_qp);
1286         rcc->accum_p_norm = mask * (1 + rcc->accum_p_norm);
1287     }
1288     return q;
1289 }
1290
1291 static double predict_size( predictor_t *p, double q, double var )
1292 {
1293      return p->coeff*var / (q*p->count);
1294 }
1295
1296 static void update_predictor( predictor_t *p, double q, double var, double bits )
1297 {
1298     if( var < 10 )
1299         return;
1300     p->count *= p->decay;
1301     p->coeff *= p->decay;
1302     p->count ++;
1303     p->coeff += bits*q / var;
1304 }
1305
1306 // update VBV after encoding a frame
1307 static void update_vbv( x264_t *h, int bits )
1308 {
1309     x264_ratecontrol_t *rcc = h->rc;
1310     x264_ratecontrol_t *rct = h->thread[0]->rc;
1311
1312     if( rcc->last_satd >= h->mb.i_mb_count )
1313         update_predictor( &rct->pred[h->sh.i_type], qp2qscale(rcc->qpa_rc), rcc->last_satd, bits );
1314
1315     if( !rcc->b_vbv )
1316         return;
1317
1318     rct->buffer_fill_final += rct->buffer_rate - bits;
1319     if( rct->buffer_fill_final < 0 )
1320         x264_log( h, X264_LOG_WARNING, "VBV underflow (%.0f bits)\n", rct->buffer_fill_final );
1321     rct->buffer_fill_final = x264_clip3f( rct->buffer_fill_final, 0, rct->buffer_size );
1322 }
1323
1324 // provisionally update VBV according to the planned size of all frames currently in progress
1325 static void update_vbv_plan( x264_t *h )
1326 {
1327     x264_ratecontrol_t *rcc = h->rc;
1328     rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final;
1329     if( h->param.i_threads > 1 )
1330     {
1331         int j = h->rc - h->thread[0]->rc;
1332         int i;
1333         for( i=1; i<h->param.i_threads; i++ )
1334         {
1335             x264_t *t = h->thread[ (j+i)%h->param.i_threads ];
1336             double bits = t->rc->frame_size_planned;
1337             if( !t->b_thread_active )
1338                 continue;
1339             bits  = X264_MAX(bits, x264_ratecontrol_get_estimated_size(t));
1340             rcc->buffer_fill += rcc->buffer_rate - bits;
1341             rcc->buffer_fill = x264_clip3( rcc->buffer_fill, 0, rcc->buffer_size );
1342         }
1343     }
1344 }
1345
1346 // apply VBV constraints and clip qscale to between lmin and lmax
1347 static double clip_qscale( x264_t *h, int pict_type, double q )
1348 {
1349     x264_ratecontrol_t *rcc = h->rc;
1350     double lmin = rcc->lmin[pict_type];
1351     double lmax = rcc->lmax[pict_type];
1352     double q0 = q;
1353
1354     /* B-frames are not directly subject to VBV,
1355      * since they are controlled by the P-frames' QPs.
1356      * FIXME: in 2pass we could modify previous frames' QP too,
1357      *        instead of waiting for the buffer to fill */
1358     if( rcc->b_vbv &&
1359         ( pict_type == SLICE_TYPE_P ||
1360           ( pict_type == SLICE_TYPE_I && rcc->last_non_b_pict_type == SLICE_TYPE_I ) ) )
1361     {
1362         if( rcc->buffer_fill/rcc->buffer_size < 0.5 )
1363             q /= x264_clip3f( 2.0*rcc->buffer_fill/rcc->buffer_size, 0.5, 1.0 );
1364     }
1365
1366     if( rcc->b_vbv && rcc->last_satd > 0 )
1367     {
1368         /* Now a hard threshold to make sure the frame fits in VBV.
1369          * This one is mostly for I-frames. */
1370         double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
1371         double qf = 1.0;
1372         if( bits > rcc->buffer_fill/2 )
1373             qf = x264_clip3f( rcc->buffer_fill/(2*bits), 0.2, 1.0 );
1374         q /= qf;
1375         bits *= qf;
1376         if( bits < rcc->buffer_rate/2 )
1377             q *= bits*2/rcc->buffer_rate;
1378         q = X264_MAX( q0, q );
1379
1380         /* Check B-frame complexity, and use up any bits that would
1381          * overflow before the next P-frame. */
1382         if( h->sh.i_type == SLICE_TYPE_P )
1383         {
1384             int nb = rcc->bframes;
1385             double pbbits = bits;
1386             double bbits = predict_size( rcc->pred_b_from_p, q * h->param.rc.f_pb_factor, rcc->last_satd );
1387             double space;
1388
1389             if( bbits > rcc->buffer_rate )
1390                 nb = 0;
1391             pbbits += nb * bbits;
1392
1393             space = rcc->buffer_fill + (1+nb)*rcc->buffer_rate - rcc->buffer_size;
1394             if( pbbits < space )
1395             {
1396                 q *= X264_MAX( pbbits / space,
1397                                bits / (0.5 * rcc->buffer_size) );
1398             }
1399             q = X264_MAX( q0-5, q );
1400         }
1401
1402         if( !rcc->b_vbv_min_rate )
1403             q = X264_MAX( q0, q );
1404     }
1405
1406     if(lmin==lmax)
1407         return lmin;
1408     else if(rcc->b_2pass)
1409     {
1410         double min2 = log(lmin);
1411         double max2 = log(lmax);
1412         q = (log(q) - min2)/(max2-min2) - 0.5;
1413         q = 1.0/(1.0 + exp(-4*q));
1414         q = q*(max2-min2) + min2;
1415         return exp(q);
1416     }
1417     else
1418         return x264_clip3f(q, lmin, lmax);
1419 }
1420
1421 // update qscale for 1 frame based on actual bits used so far
1422 static float rate_estimate_qscale( x264_t *h )
1423 {
1424     float q;
1425     x264_ratecontrol_t *rcc = h->rc;
1426     ratecontrol_entry_t rce;
1427     int pict_type = h->sh.i_type;
1428     double lmin = rcc->lmin[pict_type];
1429     double lmax = rcc->lmax[pict_type];
1430     int64_t total_bits = 8*(h->stat.i_slice_size[SLICE_TYPE_I]
1431                           + h->stat.i_slice_size[SLICE_TYPE_P]
1432                           + h->stat.i_slice_size[SLICE_TYPE_B]);
1433
1434     if( rcc->b_2pass )
1435     {
1436         rce = *rcc->rce;
1437         if(pict_type != rce.pict_type)
1438         {
1439             x264_log(h, X264_LOG_ERROR, "slice=%c but 2pass stats say %c\n",
1440                      slice_type_to_char[pict_type], slice_type_to_char[rce.pict_type]);
1441         }
1442     }
1443
1444     if( pict_type == SLICE_TYPE_B )
1445     {
1446         /* B-frames don't have independent ratecontrol, but rather get the
1447          * average QP of the two adjacent P-frames + an offset */
1448
1449         int i0 = IS_X264_TYPE_I(h->fref0[0]->i_type);
1450         int i1 = IS_X264_TYPE_I(h->fref1[0]->i_type);
1451         int dt0 = abs(h->fenc->i_poc - h->fref0[0]->i_poc);
1452         int dt1 = abs(h->fenc->i_poc - h->fref1[0]->i_poc);
1453         float q0 = h->fref0[0]->f_qp_avg_rc;
1454         float q1 = h->fref1[0]->f_qp_avg_rc;
1455
1456         if( h->fref0[0]->i_type == X264_TYPE_BREF )
1457             q0 -= rcc->pb_offset/2;
1458         if( h->fref1[0]->i_type == X264_TYPE_BREF )
1459             q1 -= rcc->pb_offset/2;
1460
1461         if(i0 && i1)
1462             q = (q0 + q1) / 2 + rcc->ip_offset;
1463         else if(i0)
1464             q = q1;
1465         else if(i1)
1466             q = q0;
1467         else
1468             q = (q0*dt1 + q1*dt0) / (dt0 + dt1);
1469
1470         if(h->fenc->b_kept_as_ref)
1471             q += rcc->pb_offset/2;
1472         else
1473             q += rcc->pb_offset;
1474
1475         rcc->frame_size_planned = predict_size( rcc->pred_b_from_p, q, h->fref1[h->i_ref1-1]->i_satd );
1476         x264_ratecontrol_set_estimated_size(h, rcc->frame_size_planned);
1477         rcc->last_satd = 0;
1478         return qp2qscale(q);
1479     }
1480     else
1481     {
1482         double abr_buffer = 2 * rcc->rate_tolerance * rcc->bitrate;
1483         if( rcc->b_2pass )
1484         {
1485             //FIXME adjust abr_buffer based on distance to the end of the video
1486             int64_t diff = total_bits - (int64_t)rce.expected_bits;
1487             q = rce.new_qscale;
1488             q /= x264_clip3f((double)(abr_buffer - diff) / abr_buffer, .5, 2);
1489             if( h->fenc->i_frame > 30 )
1490             {
1491                 /* Adjust quant based on the difference between
1492                  * achieved and expected bitrate so far */
1493                 double time = (double)h->fenc->i_frame / rcc->num_entries;
1494                 double w = x264_clip3f( time*100, 0.0, 1.0 );
1495                 q *= pow( (double)total_bits / rcc->expected_bits_sum, w );
1496             }
1497             if( rcc->b_vbv )
1498             {
1499                 double expected_size = qscale2bits(&rce, q);
1500                 double expected_vbv = rcc->buffer_fill + rcc->buffer_rate - expected_size;
1501                 double expected_fullness =  rce.expected_vbv / rcc->buffer_size;
1502                 double qmax = q*(2 - expected_fullness);
1503                 double size_constraint = 1 + expected_fullness;
1504                 if (expected_fullness < .05)
1505                     qmax = lmax;
1506                 qmax = X264_MIN(qmax, lmax);
1507                 while( (expected_vbv < rce.expected_vbv/size_constraint) && (q < qmax) )
1508                 {
1509                     q *= 1.05;
1510                     expected_size = qscale2bits(&rce, q);
1511                     expected_vbv = rcc->buffer_fill + rcc->buffer_rate - expected_size;
1512                 }
1513                 rcc->last_satd = x264_rc_analyse_slice( h );
1514             }
1515             q = x264_clip3f( q, lmin, lmax );
1516         }
1517         else /* 1pass ABR */
1518         {
1519             /* Calculate the quantizer which would have produced the desired
1520              * average bitrate if it had been applied to all frames so far.
1521              * Then modulate that quant based on the current frame's complexity
1522              * relative to the average complexity so far (using the 2pass RCEQ).
1523              * Then bias the quant up or down if total size so far was far from
1524              * the target.
1525              * Result: Depending on the value of rate_tolerance, there is a
1526              * tradeoff between quality and bitrate precision. But at large
1527              * tolerances, the bit distribution approaches that of 2pass. */
1528
1529             double wanted_bits, overflow=1, lmin, lmax;
1530
1531             rcc->last_satd = x264_rc_analyse_slice( h );
1532             rcc->short_term_cplxsum *= 0.5;
1533             rcc->short_term_cplxcount *= 0.5;
1534             rcc->short_term_cplxsum += rcc->last_satd;
1535             rcc->short_term_cplxcount ++;
1536
1537             rce.p_tex_bits = rcc->last_satd;
1538             rce.blurred_complexity = rcc->short_term_cplxsum / rcc->short_term_cplxcount;
1539             rce.i_tex_bits = 0;
1540             rce.mv_bits = 0;
1541             rce.p_count = rcc->nmb;
1542             rce.i_count = 0;
1543             rce.s_count = 0;
1544             rce.qscale = 1;
1545             rce.pict_type = pict_type;
1546
1547             if( h->param.rc.i_rc_method == X264_RC_CRF )
1548             {
1549                 q = get_qscale( h, &rce, rcc->rate_factor_constant, h->fenc->i_frame );
1550             }
1551             else
1552             {
1553                 int i_frame_done = h->fenc->i_frame + 1 - h->param.i_threads;
1554
1555                 q = get_qscale( h, &rce, rcc->wanted_bits_window / rcc->cplxr_sum, h->fenc->i_frame );
1556
1557                 // FIXME is it simpler to keep track of wanted_bits in ratecontrol_end?
1558                 wanted_bits = i_frame_done * rcc->bitrate / rcc->fps;
1559                 if( wanted_bits > 0 )
1560                 {
1561                     abr_buffer *= X264_MAX( 1, sqrt(i_frame_done/25) );
1562                     overflow = x264_clip3f( 1.0 + (total_bits - wanted_bits) / abr_buffer, .5, 2 );
1563                     q *= overflow;
1564                 }
1565             }
1566
1567             if( pict_type == SLICE_TYPE_I && h->param.i_keyint_max > 1
1568                 /* should test _next_ pict type, but that isn't decided yet */
1569                 && rcc->last_non_b_pict_type != SLICE_TYPE_I )
1570             {
1571                 q = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
1572                 q /= fabs( h->param.rc.f_ip_factor );
1573             }
1574             else if( h->i_frame > 0 )
1575             {
1576                 /* Asymmetric clipping, because symmetric would prevent
1577                  * overflow control in areas of rapidly oscillating complexity */
1578                 lmin = rcc->last_qscale_for[pict_type] / rcc->lstep;
1579                 lmax = rcc->last_qscale_for[pict_type] * rcc->lstep;
1580                 if( overflow > 1.1 && h->i_frame > 3 )
1581                     lmax *= rcc->lstep;
1582                 else if( overflow < 0.9 )
1583                     lmin /= rcc->lstep;
1584
1585                 q = x264_clip3f(q, lmin, lmax);
1586             }
1587             else if( h->param.rc.i_rc_method == X264_RC_CRF )
1588             {
1589                 q = qp2qscale( ABR_INIT_QP ) / fabs( h->param.rc.f_ip_factor );
1590             }
1591
1592             //FIXME use get_diff_limited_q() ?
1593             q = clip_qscale( h, pict_type, q );
1594         }
1595
1596         rcc->last_qscale_for[pict_type] =
1597         rcc->last_qscale = q;
1598
1599         if( !(rcc->b_2pass && !rcc->b_vbv) && h->fenc->i_frame == 0 )
1600             rcc->last_qscale_for[SLICE_TYPE_P] = q;
1601
1602         if( rcc->b_2pass && rcc->b_vbv)
1603             rcc->frame_size_planned = qscale2bits(&rce, q);
1604         else
1605             rcc->frame_size_planned = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
1606         x264_ratecontrol_set_estimated_size(h, rcc->frame_size_planned);
1607         return q;
1608     }
1609 }
1610
1611 void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
1612 {
1613     if( cur != prev )
1614     {
1615 #define COPY(var) memcpy(&cur->rc->var, &prev->rc->var, sizeof(cur->rc->var))
1616         /* these vars are updated in x264_ratecontrol_start()
1617          * so copy them from the context that most recently started (prev)
1618          * to the context that's about to start (cur).
1619          */
1620         COPY(accum_p_qp);
1621         COPY(accum_p_norm);
1622         COPY(last_satd);
1623         COPY(last_rceq);
1624         COPY(last_qscale_for);
1625         COPY(last_non_b_pict_type);
1626         COPY(short_term_cplxsum);
1627         COPY(short_term_cplxcount);
1628         COPY(bframes);
1629         COPY(prev_zone);
1630 #undef COPY
1631     }
1632     if( cur != next )
1633     {
1634 #define COPY(var) next->rc->var = cur->rc->var
1635         /* these vars are updated in x264_ratecontrol_end()
1636          * so copy them from the context that most recently ended (cur)
1637          * to the context that's about to end (next)
1638          */
1639         COPY(cplxr_sum);
1640         COPY(expected_bits_sum);
1641         COPY(wanted_bits_window);
1642         COPY(bframe_bits);
1643 #undef COPY
1644     }
1645     //FIXME row_preds[] (not strictly necessary, but would improve prediction)
1646     /* the rest of the variables are either constant or thread-local */
1647 }
1648
1649 static int find_underflow( x264_t *h, double *fills, int *t0, int *t1, int over )
1650 {
1651     /* find an interval ending on an overflow or underflow (depending on whether
1652      * we're adding or removing bits), and starting on the earliest frame that
1653      * can influence the buffer fill of that end frame. */
1654     x264_ratecontrol_t *rcc = h->rc;
1655     const double buffer_min = (over ? .1 : .1) * rcc->buffer_size;
1656     const double buffer_max = .9 * rcc->buffer_size;
1657     double fill = fills[*t0-1];
1658     double parity = over ? 1. : -1.;
1659     int i, start=-1, end=-1;
1660     for(i = *t0; i < rcc->num_entries; i++)
1661     {
1662         fill += (rcc->buffer_rate - qscale2bits(&rcc->entry[i], rcc->entry[i].new_qscale)) * parity;
1663         fill = x264_clip3f(fill, 0, rcc->buffer_size);
1664         fills[i] = fill;
1665         if(fill <= buffer_min || i == 0)
1666         {
1667             if(end >= 0)
1668                 break;
1669             start = i;
1670         }
1671         else if(fill >= buffer_max && start >= 0)
1672             end = i;
1673     }
1674     *t0 = start;
1675     *t1 = end;
1676     return start>=0 && end>=0;
1677 }
1678
1679 static int fix_underflow( x264_t *h, int t0, int t1, double adjustment, double qscale_min, double qscale_max)
1680 {
1681     x264_ratecontrol_t *rcc = h->rc;
1682     double qscale_orig, qscale_new;
1683     int i;
1684     int adjusted = 0;
1685     if(t0 > 0)
1686         t0++;
1687     for(i = t0; i <= t1; i++)
1688     {
1689         qscale_orig = rcc->entry[i].new_qscale;
1690         qscale_orig = x264_clip3f(qscale_orig, qscale_min, qscale_max);
1691         qscale_new  = qscale_orig * adjustment;
1692         qscale_new  = x264_clip3f(qscale_new, qscale_min, qscale_max);
1693         rcc->entry[i].new_qscale = qscale_new;
1694         adjusted = adjusted || (qscale_new != qscale_orig);
1695     }
1696     return adjusted;
1697 }
1698
1699 static double count_expected_bits( x264_t *h )
1700 {
1701     x264_ratecontrol_t *rcc = h->rc;
1702     double expected_bits = 0;
1703     int i;
1704     for(i = 0; i < rcc->num_entries; i++)
1705     {
1706         ratecontrol_entry_t *rce = &rcc->entry[i];
1707         rce->expected_bits = expected_bits;
1708         expected_bits += qscale2bits(rce, rce->new_qscale);
1709     }
1710     return expected_bits;
1711 }
1712
1713 static void vbv_pass2( x264_t *h )
1714 {
1715     /* for each interval of buffer_full .. underflow, uniformly increase the qp of all
1716      * frames in the interval until either buffer is full at some intermediate frame or the
1717      * last frame in the interval no longer underflows.  Recompute intervals and repeat.
1718      * Then do the converse to put bits back into overflow areas until target size is met */
1719
1720     x264_ratecontrol_t *rcc = h->rc;
1721     double *fills = x264_malloc((rcc->num_entries+1)*sizeof(double));
1722     double all_available_bits = h->param.rc.i_bitrate * 1000. * rcc->num_entries / rcc->fps;
1723     double expected_bits = 0;
1724     double adjustment;
1725     double prev_bits = 0;
1726     int i, t0, t1;
1727     double qscale_min = qp2qscale(h->param.rc.i_qp_min);
1728     double qscale_max = qp2qscale(h->param.rc.i_qp_max);
1729     int iterations = 0;
1730     int adj_min, adj_max;
1731
1732     fills++;
1733
1734     /* adjust overall stream size */
1735     do
1736     {
1737         iterations++;
1738         prev_bits = expected_bits;
1739
1740         if(expected_bits != 0)
1741         {   /* not first iteration */
1742             adjustment = X264_MAX(X264_MIN(expected_bits / all_available_bits, 0.999), 0.9);
1743             fills[-1] = rcc->buffer_size * h->param.rc.f_vbv_buffer_init;
1744             t0 = 0;
1745             /* fix overflows */
1746             adj_min = 1;
1747             while(adj_min && find_underflow(h, fills, &t0, &t1, 1))
1748             {
1749                 adj_min = fix_underflow(h, t0, t1, adjustment, qscale_min, qscale_max);
1750                 t0 = t1;
1751             }
1752         }
1753
1754         fills[-1] = rcc->buffer_size * (1. - h->param.rc.f_vbv_buffer_init);
1755         t0 = 0;
1756         /* fix underflows -- should be done after overflow, as we'd better undersize target than underflowing VBV */
1757         adj_max = 1;
1758         while(adj_max && find_underflow(h, fills, &t0, &t1, 0))
1759             adj_max = fix_underflow(h, t0, t1, 1.001, qscale_min, qscale_max);
1760
1761         expected_bits = count_expected_bits(h);
1762     } while(expected_bits < .995 * all_available_bits && expected_bits > prev_bits);
1763
1764     if (!adj_max)
1765         x264_log( h, X264_LOG_WARNING, "vbv-maxrate issue, qpmax or vbv-maxrate too low\n");
1766
1767     /* store expected vbv filling values for tracking when encoding */
1768     for(i = 0; i < rcc->num_entries; i++)
1769         rcc->entry[i].expected_vbv = rcc->buffer_size - fills[i];
1770
1771     x264_free(fills-1);
1772 }
1773
1774 static int init_pass2( x264_t *h )
1775 {
1776     x264_ratecontrol_t *rcc = h->rc;
1777     uint64_t all_const_bits = 0;
1778     uint64_t all_available_bits = (uint64_t)(h->param.rc.i_bitrate * 1000. * rcc->num_entries / rcc->fps);
1779     double rate_factor, step, step_mult;
1780     double qblur = h->param.rc.f_qblur;
1781     double cplxblur = h->param.rc.f_complexity_blur;
1782     const int filter_size = (int)(qblur*4) | 1;
1783     double expected_bits;
1784     double *qscale, *blurred_qscale;
1785     int i;
1786
1787     /* find total/average complexity & const_bits */
1788     for(i=0; i<rcc->num_entries; i++)
1789     {
1790         ratecontrol_entry_t *rce = &rcc->entry[i];
1791         all_const_bits += rce->misc_bits;
1792         rcc->i_cplx_sum[rce->pict_type] += rce->i_tex_bits * rce->qscale;
1793         rcc->p_cplx_sum[rce->pict_type] += rce->p_tex_bits * rce->qscale;
1794         rcc->mv_bits_sum[rce->pict_type] += rce->mv_bits * rce->qscale;
1795         rcc->frame_count[rce->pict_type] ++;
1796     }
1797
1798     if( all_available_bits < all_const_bits)
1799     {
1800         x264_log(h, X264_LOG_ERROR, "requested bitrate is too low. estimated minimum is %d kbps\n",
1801                  (int)(all_const_bits * rcc->fps / (rcc->num_entries * 1000.)));
1802         return -1;
1803     }
1804
1805     /* Blur complexities, to reduce local fluctuation of QP.
1806      * We don't blur the QPs directly, because then one very simple frame
1807      * could drag down the QP of a nearby complex frame and give it more
1808      * bits than intended. */
1809     for(i=0; i<rcc->num_entries; i++)
1810     {
1811         ratecontrol_entry_t *rce = &rcc->entry[i];
1812         double weight_sum = 0;
1813         double cplx_sum = 0;
1814         double weight = 1.0;
1815         double gaussian_weight;
1816         int j;
1817         /* weighted average of cplx of future frames */
1818         for(j=1; j<cplxblur*2 && j<rcc->num_entries-i; j++)
1819         {
1820             ratecontrol_entry_t *rcj = &rcc->entry[i+j];
1821             weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
1822             if(weight < .0001)
1823                 break;
1824             gaussian_weight = weight * exp(-j*j/200.0);
1825             weight_sum += gaussian_weight;
1826             cplx_sum += gaussian_weight * (qscale2bits(rcj, 1) - rcj->misc_bits);
1827         }
1828         /* weighted average of cplx of past frames */
1829         weight = 1.0;
1830         for(j=0; j<=cplxblur*2 && j<=i; j++)
1831         {
1832             ratecontrol_entry_t *rcj = &rcc->entry[i-j];
1833             gaussian_weight = weight * exp(-j*j/200.0);
1834             weight_sum += gaussian_weight;
1835             cplx_sum += gaussian_weight * (qscale2bits(rcj, 1) - rcj->misc_bits);
1836             weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
1837             if(weight < .0001)
1838                 break;
1839         }
1840         rce->blurred_complexity = cplx_sum / weight_sum;
1841     }
1842
1843     qscale = x264_malloc(sizeof(double)*rcc->num_entries);
1844     if(filter_size > 1)
1845         blurred_qscale = x264_malloc(sizeof(double)*rcc->num_entries);
1846     else
1847         blurred_qscale = qscale;
1848
1849     /* Search for a factor which, when multiplied by the RCEQ values from
1850      * each frame, adds up to the desired total size.
1851      * There is no exact closed-form solution because of VBV constraints and
1852      * because qscale2bits is not invertible, but we can start with the simple
1853      * approximation of scaling the 1st pass by the ratio of bitrates.
1854      * The search range is probably overkill, but speed doesn't matter here. */
1855
1856     expected_bits = 1;
1857     for(i=0; i<rcc->num_entries; i++)
1858         expected_bits += qscale2bits(&rcc->entry[i], get_qscale(h, &rcc->entry[i], 1.0, i));
1859     step_mult = all_available_bits / expected_bits;
1860
1861     rate_factor = 0;
1862     for(step = 1E4 * step_mult; step > 1E-7 * step_mult; step *= 0.5)
1863     {
1864         expected_bits = 0;
1865         rate_factor += step;
1866
1867         rcc->last_non_b_pict_type = -1;
1868         rcc->last_accum_p_norm = 1;
1869         rcc->accum_p_norm = 0;
1870
1871         /* find qscale */
1872         for(i=0; i<rcc->num_entries; i++)
1873         {
1874             qscale[i] = get_qscale(h, &rcc->entry[i], rate_factor, i);
1875         }
1876
1877         /* fixed I/B qscale relative to P */
1878         for(i=rcc->num_entries-1; i>=0; i--)
1879         {
1880             qscale[i] = get_diff_limited_q(h, &rcc->entry[i], qscale[i]);
1881             assert(qscale[i] >= 0);
1882         }
1883
1884         /* smooth curve */
1885         if(filter_size > 1)
1886         {
1887             assert(filter_size%2==1);
1888             for(i=0; i<rcc->num_entries; i++)
1889             {
1890                 ratecontrol_entry_t *rce = &rcc->entry[i];
1891                 int j;
1892                 double q=0.0, sum=0.0;
1893
1894                 for(j=0; j<filter_size; j++)
1895                 {
1896                     int index = i+j-filter_size/2;
1897                     double d = index-i;
1898                     double coeff = qblur==0 ? 1.0 : exp(-d*d/(qblur*qblur));
1899                     if(index < 0 || index >= rcc->num_entries)
1900                         continue;
1901                     if(rce->pict_type != rcc->entry[index].pict_type)
1902                         continue;
1903                     q += qscale[index] * coeff;
1904                     sum += coeff;
1905                 }
1906                 blurred_qscale[i] = q/sum;
1907             }
1908         }
1909
1910         /* find expected bits */
1911         for(i=0; i<rcc->num_entries; i++)
1912         {
1913             ratecontrol_entry_t *rce = &rcc->entry[i];
1914             rce->new_qscale = clip_qscale(h, rce->pict_type, blurred_qscale[i]);
1915             assert(rce->new_qscale >= 0);
1916             expected_bits += qscale2bits(rce, rce->new_qscale);
1917         }
1918
1919         if(expected_bits > all_available_bits) rate_factor -= step;
1920     }
1921
1922     x264_free(qscale);
1923     if(filter_size > 1)
1924         x264_free(blurred_qscale);
1925
1926     if(rcc->b_vbv)
1927         vbv_pass2(h);
1928     expected_bits = count_expected_bits(h);
1929
1930     if(fabs(expected_bits/all_available_bits - 1.0) > 0.01)
1931     {
1932         double avgq = 0;
1933         for(i=0; i<rcc->num_entries; i++)
1934             avgq += rcc->entry[i].new_qscale;
1935         avgq = qscale2qp(avgq / rcc->num_entries);
1936
1937         if ((expected_bits > all_available_bits) || (!rcc->b_vbv))
1938             x264_log(h, X264_LOG_WARNING, "Error: 2pass curve failed to converge\n");
1939         x264_log(h, X264_LOG_WARNING, "target: %.2f kbit/s, expected: %.2f kbit/s, avg QP: %.4f\n",
1940                  (float)h->param.rc.i_bitrate,
1941                  expected_bits * rcc->fps / (rcc->num_entries * 1000.),
1942                  avgq);
1943         if(expected_bits < all_available_bits && avgq < h->param.rc.i_qp_min + 2)
1944         {
1945             if(h->param.rc.i_qp_min > 0)
1946                 x264_log(h, X264_LOG_WARNING, "try reducing target bitrate or reducing qp_min (currently %d)\n", h->param.rc.i_qp_min);
1947             else
1948                 x264_log(h, X264_LOG_WARNING, "try reducing target bitrate\n");
1949         }
1950         else if(expected_bits > all_available_bits && avgq > h->param.rc.i_qp_max - 2)
1951         {
1952             if(h->param.rc.i_qp_max < 51)
1953                 x264_log(h, X264_LOG_WARNING, "try increasing target bitrate or increasing qp_max (currently %d)\n", h->param.rc.i_qp_max);
1954             else
1955                 x264_log(h, X264_LOG_WARNING, "try increasing target bitrate\n");
1956         }
1957         else if(!(rcc->b_2pass && rcc->b_vbv))
1958             x264_log(h, X264_LOG_WARNING, "internal error\n");
1959     }
1960
1961     return 0;
1962 }
1963
1964