git.sesse.net Git - x264/blob - encoder/ratecontrol.c

   1 /***************************************************-*- coding: iso-8859-1 -*-
   2  * ratecontrol.c: h264 encoder library (Rate Control)
   3  *****************************************************************************
   4  * Copyright (C) 2005 x264 project
   5  * $Id: ratecontrol.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
   6  *
   7  * Authors: Loren Merritt <lorenm@u.washington.edu>
   8  *          Michael Niedermayer <michaelni@gmx.at>
   9  *          Måns Rullgård <mru@mru.ath.cx>
  10  *
  11  * This program is free software; you can redistribute it and/or modify
  12  * it under the terms of the GNU General Public License as published by
  13  * the Free Software Foundation; either version 2 of the License, or
  14  * (at your option) any later version.
  15  *
  16  * This program is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19  * GNU General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU General Public License
  22  * along with this program; if not, write to the Free Software
  23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  24  *****************************************************************************/
  25
  26 #define _ISOC99_SOURCE
  27 #undef NDEBUG // always check asserts, the speed effect is far too small to disable them
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <string.h>
  31 #include <math.h>
  32 #include <limits.h>
  33 #include <assert.h>
  34
  35 #include "common/common.h"
  36 #include "common/cpu.h"
  37 #include "ratecontrol.h"
  38
  39 #if defined(SYS_FREEBSD) || defined(SYS_BEOS) || defined(SYS_NETBSD)
  40 #define exp2f(x) powf( 2, (x) )
  41 #endif
  42 #if defined(_MSC_VER) || defined(SYS_SunOS)
  43 #define exp2f(x) pow( 2, (x) )
  44 #define sqrtf sqrt
  45 #endif
  46 #ifdef WIN32 // POSIX says that rename() removes the destination, but win32 doesn't.
  47 #define rename(src,dst) (unlink(dst), rename(src,dst))
  48 #endif
  49
  50 typedef struct
  51 {
  52     int pict_type;
  53     int kept_as_ref;
  54     float qscale;
  55     int mv_bits;
  56     int i_tex_bits;
  57     int p_tex_bits;
  58     int misc_bits;
  59     uint64_t expected_bits;
  60     float new_qscale;
  61     int new_qp;
  62     int i_count;
  63     int p_count;
  64     int s_count;
  65     float blurred_complexity;
  66 } ratecontrol_entry_t;
  67
  68 typedef struct
  69 {
  70     double coeff;
  71     double count;
  72     double decay;
  73 } predictor_t;
  74
  75 struct x264_ratecontrol_t
  76 {
  77     /* constants */
  78     int b_abr;
  79     int b_2pass;
  80     double fps;
  81     double bitrate;
  82     double rate_tolerance;
  83     int nmb;                    /* number of macroblocks in a frame */
  84     int qp_constant[5];
  85
  86     /* current frame */
  87     ratecontrol_entry_t *rce;
  88     int qp;                     /* qp for current frame */
  89     float qpa;                  /* average of macroblocks' qp (same as qp if no adaptive quant) */
  90     int slice_type;
  91     int qp_force;
  92
  93     /* VBV stuff */
  94     double buffer_size;
  95     double buffer_fill;
  96     double buffer_rate;         /* # of bits added to buffer_fill after each frame */
  97     predictor_t pred[5];        /* predict frame size from satd */
  98
  99     /* ABR stuff */
 100     int    last_satd;
 101     double last_rceq;
 102     double cplxr_sum;           /* sum of bits*qscale/rceq */
 103     double expected_bits_sum;   /* sum of qscale2bits after rceq, ratefactor, and overflow */
 104     double wanted_bits_window;  /* target bitrate * window */
 105     double cbr_decay;
 106     double short_term_cplxsum;
 107     double short_term_cplxcount;
 108     double rate_factor_constant;
 109
 110     /* 2pass stuff */
 111     FILE *p_stat_file_out;
 112     char *psz_stat_file_tmpname;
 113
 114     int num_entries;            /* number of ratecontrol_entry_ts */
 115     ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
 116     double last_qscale;
 117     double last_qscale_for[5];  /* last qscale for a specific pict type, used for max_diff & ipb factor stuff  */
 118     int last_non_b_pict_type;
 119     double accum_p_qp;          /* for determining I-frame quant */
 120     double accum_p_norm;
 121     double last_accum_p_norm;
 122     double lmin[5];             /* min qscale by frame type */
 123     double lmax[5];
 124     double lstep;               /* max change (multiply) in qscale per frame */
 125     double i_cplx_sum[5];       /* estimated total texture bits in intra MBs at qscale=1 */
 126     double p_cplx_sum[5];
 127     double mv_bits_sum[5];
 128     int frame_count[5];         /* number of frames of each type */
 129
 130     int i_zones;
 131     x264_zone_t *zones;
 132 };
 133
 134
 135 static int parse_zones( x264_t *h );
 136 static int init_pass2(x264_t *);
 137 static float rate_estimate_qscale( x264_t *h, int pict_type );
 138 static void update_vbv( x264_t *h, int bits );
 139 int  x264_rc_analyse_slice( x264_t *h );
 140
 141 /* Terminology:
 142  * qp = h.264's quantizer
 143  * qscale = linearized quantizer = Lagrange multiplier
 144  */
 145 static inline double qp2qscale(double qp)
 146 {
 147     return 0.85 * pow(2.0, ( qp - 12.0 ) / 6.0);
 148 }
 149 static inline double qscale2qp(double qscale)
 150 {
 151     return 12.0 + 6.0 * log(qscale/0.85) / log(2.0);
 152 }
 153
 154 /* Texture bitrate is not quite inversely proportional to qscale,
 155  * probably due the the changing number of SKIP blocks.
 156  * MV bits level off at about qp<=12, because the lambda used
 157  * for motion estimation is constant there. */
 158 static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale)
 159 {
 160     if(qscale<0.1)
 161         qscale = 0.1;
 162     return (rce->i_tex_bits + rce->p_tex_bits + .1) * pow( rce->qscale / qscale, 1.1 )
 163            + rce->mv_bits * pow( X264_MAX(rce->qscale, 12) / X264_MAX(qscale, 12), 0.5 );
 164 }
 165
 166
 167 int x264_ratecontrol_new( x264_t *h )
 168 {
 169     x264_ratecontrol_t *rc;
 170     int i;
 171
 172     x264_cpu_restore( h->param.cpu );
 173
 174     h->rc = rc = x264_malloc( sizeof( x264_ratecontrol_t ) );
 175     memset(rc, 0, sizeof(*rc));
 176
 177     rc->b_abr = ( h->param.rc.b_cbr || h->param.rc.i_rf_constant ) && !h->param.rc.b_stat_read;
 178     rc->b_2pass = h->param.rc.b_cbr && h->param.rc.b_stat_read;
 179     h->mb.b_variable_qp = 0;
 180
 181     /* FIXME: use integers */
 182     if(h->param.i_fps_num > 0 && h->param.i_fps_den > 0)
 183         rc->fps = (float) h->param.i_fps_num / h->param.i_fps_den;
 184     else
 185         rc->fps = 25.0;
 186
 187     rc->bitrate = h->param.rc.i_bitrate * 1000;
 188     rc->rate_tolerance = h->param.rc.f_rate_tolerance;
 189     rc->nmb = h->mb.i_mb_count;
 190     rc->last_non_b_pict_type = -1;
 191     rc->cbr_decay = 1.0;
 192
 193     if( rc->b_2pass && h->param.rc.i_rf_constant )
 194         x264_log(h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n");
 195     if( h->param.rc.i_vbv_max_bitrate && !h->param.rc.b_cbr && !h->param.rc.i_rf_constant )
 196         x264_log(h, X264_LOG_ERROR, "VBV is incompatible with constant QP.\n");
 197     if( h->param.rc.i_vbv_max_bitrate < h->param.rc.i_bitrate &&
 198         h->param.rc.i_vbv_max_bitrate > 0)
 199         x264_log(h, X264_LOG_ERROR, "max bitrate less than average bitrate, ignored.\n");
 200     else if( h->param.rc.i_vbv_max_bitrate > 0 &&
 201              h->param.rc.i_vbv_buffer_size > 0 )
 202     {
 203         if( h->param.rc.i_vbv_buffer_size < 10 * h->param.rc.i_vbv_max_bitrate / rc->fps ) {
 204             h->param.rc.i_vbv_buffer_size = 10 * h->param.rc.i_vbv_max_bitrate / rc->fps;
 205             x264_log( h, X264_LOG_ERROR, "VBV buffer size too small, using %d kbit\n",
 206                       h->param.rc.i_vbv_buffer_size );
 207         }
 208         rc->buffer_rate = h->param.rc.i_vbv_max_bitrate * 1000 / rc->fps;
 209         rc->buffer_size = h->param.rc.i_vbv_buffer_size * 1000;
 210         rc->buffer_fill = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
 211         rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size
 212                       * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate);
 213     }
 214     else if( h->param.rc.i_vbv_max_bitrate || h->param.rc.i_vbv_buffer_size )
 215         x264_log(h, X264_LOG_ERROR, "VBV maxrate or buffer size specified, but not both.\n");
 216     if(rc->rate_tolerance < 0.01) {
 217         x264_log(h, X264_LOG_ERROR, "bitrate tolerance too small, using .01\n");
 218         rc->rate_tolerance = 0.01;
 219     }
 220
 221     if( rc->b_abr )
 222     {
 223         /* FIXME shouldn't need to arbitrarily specify a QP,
 224          * but this is more robust than BPP measures */
 225 #define ABR_INIT_QP ( h->param.rc.i_rf_constant > 0 ? h->param.rc.i_rf_constant : 24 )
 226         rc->accum_p_norm = .01;
 227         rc->accum_p_qp = ABR_INIT_QP * rc->accum_p_norm;
 228         rc->cplxr_sum = .01;
 229         rc->wanted_bits_window = .01;
 230     }
 231
 232     if( h->param.rc.i_rf_constant )
 233     {
 234         /* arbitrary rescaling to make CRF somewhat similar to QP */
 235         double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
 236         rc->rate_factor_constant = pow( base_cplx, 1 - h->param.rc.f_qcompress )
 237                                  / qp2qscale( h->param.rc.i_rf_constant );
 238     }
 239
 240     rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
 241     rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
 242     rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
 243
 244     rc->lstep = exp2f(h->param.rc.i_qp_step / 6.0);
 245     rc->last_qscale = qp2qscale(26);
 246     for( i = 0; i < 5; i++ )
 247     {
 248         rc->last_qscale_for[i] = qp2qscale(26);
 249         rc->lmin[i] = qp2qscale( h->param.rc.i_qp_min );
 250         rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max );
 251         rc->pred[i].coeff= 2.0;
 252         rc->pred[i].count= 1.0;
 253         rc->pred[i].decay= 0.5;
 254     }
 255
 256     if( parse_zones( h ) < 0 )
 257         return -1;
 258
 259     /* Load stat file and init 2pass algo */
 260     if( h->param.rc.b_stat_read )
 261     {
 262         char *p, *stats_in, *stats_buf;
 263
 264         /* read 1st pass stats */
 265         assert( h->param.rc.psz_stat_in );
 266         stats_buf = stats_in = x264_slurp_file( h->param.rc.psz_stat_in );
 267         if( !stats_buf )
 268         {
 269             x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
 270             return -1;
 271         }
 272
 273         /* check whether 1st pass options were compatible with current options */
 274         if( !strncmp( stats_buf, "#options:", 9 ) )
 275         {
 276             int i;
 277             char *opts = stats_buf;
 278             stats_in = strchr( stats_buf, '\n' );
 279             if( !stats_in )
 280                 return -1;
 281             *stats_in = '\0';
 282             stats_in++;
 283
 284             if( ( p = strstr( opts, "bframes=" ) ) && sscanf( p, "bframes=%d", &i )
 285                 && h->param.i_bframe != i )
 286             {
 287                 x264_log( h, X264_LOG_ERROR, "different number of B-frames than 1st pass (%d vs %d)\n",
 288                           h->param.i_bframe, i );
 289                 return -1;
 290             }
 291
 292             /* since B-adapt doesn't (yet) take into account B-pyramid,
 293              * the converse is not a problem */
 294             if( strstr( opts, "b_pyramid=1" ) && !h->param.b_bframe_pyramid )
 295                 x264_log( h, X264_LOG_WARNING, "1st pass used B-pyramid, 2nd doesn't\n" );
 296
 297             if( ( p = strstr( opts, "keyint=" ) ) && sscanf( p, "keyint=%d", &i )
 298                 && h->param.i_keyint_max != i )
 299                 x264_log( h, X264_LOG_WARNING, "different keyint than 1st pass (%d vs %d)\n",
 300                           h->param.i_keyint_max, i );
 301
 302             if( strstr( opts, "qp=0" ) && h->param.rc.b_cbr )
 303                 x264_log( h, X264_LOG_WARNING, "1st pass was lossless, bitrate prediction will be inaccurate\n" );
 304         }
 305
 306         /* find number of pics */
 307         p = stats_in;
 308         for(i=-1; p; i++)
 309             p = strchr(p+1, ';');
 310         if(i==0)
 311         {
 312             x264_log(h, X264_LOG_ERROR, "empty stats file\n");
 313             return -1;
 314         }
 315         rc->num_entries = i;
 316
 317         if( h->param.i_frame_total < rc->num_entries && h->param.i_frame_total > 0 )
 318         {
 319             x264_log( h, X264_LOG_WARNING, "2nd pass has fewer frames than 1st pass (%d vs %d)\n",
 320                       h->param.i_frame_total, rc->num_entries );
 321         }
 322         if( h->param.i_frame_total > rc->num_entries + h->param.i_bframe )
 323         {
 324             x264_log( h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d vs %d)\n",
 325                       h->param.i_frame_total, rc->num_entries );
 326             return -1;
 327         }
 328
 329         /* FIXME: ugly padding because VfW drops delayed B-frames */
 330         rc->num_entries += h->param.i_bframe;
 331
 332         rc->entry = (ratecontrol_entry_t*) x264_malloc(rc->num_entries * sizeof(ratecontrol_entry_t));
 333         memset(rc->entry, 0, rc->num_entries * sizeof(ratecontrol_entry_t));
 334
 335         /* init all to skipped p frames */
 336         for(i=0; i<rc->num_entries; i++){
 337             ratecontrol_entry_t *rce = &rc->entry[i];
 338             rce->pict_type = SLICE_TYPE_P;
 339             rce->qscale = rce->new_qscale = qp2qscale(20);
 340             rce->misc_bits = rc->nmb + 10;
 341             rce->new_qp = 0;
 342         }
 343
 344         /* read stats */
 345         p = stats_in;
 346         for(i=0; i < rc->num_entries - h->param.i_bframe; i++){
 347             ratecontrol_entry_t *rce;
 348             int frame_number;
 349             char pict_type;
 350             int e;
 351             char *next;
 352             float qp;
 353
 354             next= strchr(p, ';');
 355             if(next){
 356                 (*next)=0; //sscanf is unbelievably slow on looong strings
 357                 next++;
 358             }
 359             e = sscanf(p, " in:%d ", &frame_number);
 360
 361             if(frame_number < 0 || frame_number >= rc->num_entries)
 362             {
 363                 x264_log(h, X264_LOG_ERROR, "bad frame number (%d) at stats line %d\n", frame_number, i);
 364                 return -1;
 365             }
 366             rce = &rc->entry[frame_number];
 367
 368             e += sscanf(p, " in:%*d out:%*d type:%c q:%f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d",
 369                    &pict_type, &qp, &rce->i_tex_bits, &rce->p_tex_bits,
 370                    &rce->mv_bits, &rce->misc_bits, &rce->i_count, &rce->p_count, &rce->s_count);
 371
 372             switch(pict_type){
 373                 case 'I': rce->kept_as_ref = 1;
 374                 case 'i': rce->pict_type = SLICE_TYPE_I; break;
 375                 case 'P': rce->pict_type = SLICE_TYPE_P; break;
 376                 case 'B': rce->kept_as_ref = 1;
 377                 case 'b': rce->pict_type = SLICE_TYPE_B; break;
 378                 default:  e = -1; break;
 379             }
 380             if(e != 10){
 381                 x264_log(h, X264_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e);
 382                 return -1;
 383             }
 384             rce->qscale = qp2qscale(qp);
 385             p = next;
 386         }
 387
 388         x264_free(stats_buf);
 389
 390         if(h->param.rc.b_cbr)
 391         {
 392             if(init_pass2(h) < 0) return -1;
 393         } /* else we're using constant quant, so no need to run the bitrate allocation */
 394     }
 395
 396     /* Open output file */
 397     /* If input and output files are the same, output to a temp file
 398      * and move it to the real name only when it's complete */
 399     if( h->param.rc.b_stat_write )
 400     {
 401         char *p;
 402
 403         rc->psz_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 6 );
 404         strcpy( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
 405         strcat( rc->psz_stat_file_tmpname, ".temp" );
 406
 407         rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
 408         if( rc->p_stat_file_out == NULL )
 409         {
 410             x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
 411             return -1;
 412         }
 413
 414         p = x264_param2string( &h->param, 1 );
 415         fprintf( rc->p_stat_file_out, "#options: %s\n", p );
 416         x264_free( p );
 417     }
 418
 419     return 0;
 420 }
 421
 422 static int parse_zones( x264_t *h )
 423 {
 424     x264_ratecontrol_t *rc = h->rc;
 425     int i;
 426     if( h->param.rc.psz_zones && !h->param.rc.i_zones )
 427     {
 428         char *p;
 429         h->param.rc.i_zones = 1;
 430         for( p = h->param.rc.psz_zones; *p; p++ )
 431             h->param.rc.i_zones += (*p == '/');
 432         h->param.rc.zones = x264_malloc( h->param.rc.i_zones * sizeof(x264_zone_t) );
 433         p = h->param.rc.psz_zones;
 434         for( i = 0; i < h->param.rc.i_zones; i++)
 435         {
 436             x264_zone_t *z = &h->param.rc.zones[i];
 437             if( 3 == sscanf(p, "%u,%u,q=%u", &z->i_start, &z->i_end, &z->i_qp) )
 438                 z->b_force_qp = 1;
 439             else if( 3 == sscanf(p, "%u,%u,b=%f", &z->i_start, &z->i_end, &z->f_bitrate_factor) )
 440                 z->b_force_qp = 0;
 441             else
 442             {
 443                 char *slash = strchr(p, '/');
 444                 if(slash) *slash = '\0';
 445                 x264_log( h, X264_LOG_ERROR, "invalid zone: \"%s\"\n", p );
 446                 return -1;
 447             }
 448             p = strchr(p, '/') + 1;
 449         }
 450     }
 451
 452     if( h->param.rc.i_zones > 0 )
 453     {
 454         for( i = 0; i < h->param.rc.i_zones; i++ )
 455         {
 456             x264_zone_t z = h->param.rc.zones[i];
 457             if( z.i_start < 0 || z.i_start > z.i_end )
 458             {
 459                 x264_log( h, X264_LOG_ERROR, "invalid zone: start=%d end=%d\n",
 460                           z.i_start, z.i_end );
 461                 return -1;
 462             }
 463             else if( !z.b_force_qp && z.f_bitrate_factor <= 0 )
 464             {
 465                 x264_log( h, X264_LOG_ERROR, "invalid zone: bitrate_factor=%f\n",
 466                           z.f_bitrate_factor );
 467                 return -1;
 468             }
 469         }
 470
 471         rc->i_zones = h->param.rc.i_zones;
 472         rc->zones = x264_malloc( rc->i_zones * sizeof(x264_zone_t) );
 473         memcpy( rc->zones, h->param.rc.zones, rc->i_zones * sizeof(x264_zone_t) );
 474     }
 475
 476     return 0;
 477 }
 478
 479 void x264_ratecontrol_summary( x264_t *h )
 480 {
 481     x264_ratecontrol_t *rc = h->rc;
 482     if( rc->b_abr && !h->param.rc.i_rf_constant && !h->param.rc.i_vbv_max_bitrate )
 483     {
 484         double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
 485         x264_log( h, X264_LOG_INFO, "final ratefactor: %.2f\n",
 486                   qscale2qp( pow( base_cplx, 1 - h->param.rc.f_qcompress )
 487                              * rc->cplxr_sum / rc->wanted_bits_window ) );
 488     }
 489 }
 490
 491 void x264_ratecontrol_delete( x264_t *h )
 492 {
 493     x264_ratecontrol_t *rc = h->rc;
 494
 495     if( rc->p_stat_file_out )
 496     {
 497         fclose( rc->p_stat_file_out );
 498         if( h->i_frame >= rc->num_entries - h->param.i_bframe )
 499             if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
 500             {
 501                 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
 502                           rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
 503             }
 504         x264_free( rc->psz_stat_file_tmpname );
 505     }
 506     x264_free( rc->entry );
 507     x264_free( rc->zones );
 508     x264_free( rc );
 509 }
 510
 511 /* Before encoding a frame, choose a QP for it */
 512 void x264_ratecontrol_start( x264_t *h, int i_slice_type, int i_force_qp )
 513 {
 514     x264_ratecontrol_t *rc = h->rc;
 515
 516     x264_cpu_restore( h->param.cpu );
 517
 518     rc->qp_force = i_force_qp;
 519     rc->slice_type = i_slice_type;
 520
 521     if( i_force_qp )
 522     {
 523         rc->qpa = rc->qp = i_force_qp - 1;
 524     }
 525     else if( rc->b_abr )
 526     {
 527         rc->qpa = rc->qp =
 528             x264_clip3( (int)(qscale2qp( rate_estimate_qscale( h, i_slice_type ) ) + .5), 0, 51 );
 529     }
 530     else if( rc->b_2pass )
 531     {
 532         int frame = h->fenc->i_frame;
 533         ratecontrol_entry_t *rce;
 534         assert( frame >= 0 && frame < rc->num_entries );
 535         rce = h->rc->rce = &h->rc->entry[frame];
 536
 537         rce->new_qscale = rate_estimate_qscale( h, i_slice_type );
 538         rc->qpa = rc->qp = rce->new_qp =
 539             x264_clip3( (int)(qscale2qp(rce->new_qscale) + 0.5), 0, 51 );
 540     }
 541     else /* CQP */
 542     {
 543         int q;
 544         if( i_slice_type == SLICE_TYPE_B && h->fdec->b_kept_as_ref )
 545             q = ( rc->qp_constant[ SLICE_TYPE_B ] + rc->qp_constant[ SLICE_TYPE_P ] ) / 2;
 546         else
 547             q = rc->qp_constant[ i_slice_type ];
 548         rc->qpa = rc->qp = q;
 549     }
 550 }
 551
 552 void x264_ratecontrol_mb( x264_t *h, int bits )
 553 {
 554     /* currently no adaptive quant */
 555 }
 556
 557 int x264_ratecontrol_qp( x264_t *h )
 558 {
 559     return h->rc->qp;
 560 }
 561
 562 /* In 2pass, force the same frame types as in the 1st pass */
 563 int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
 564 {
 565     x264_ratecontrol_t *rc = h->rc;
 566     if( h->param.rc.b_stat_read )
 567     {
 568         if( frame_num >= rc->num_entries )
 569         {
 570             /* We could try to initialize everything required for ABR and
 571              * adaptive B-frames, but that would be complicated.
 572              * So just calculate the average QP used so far. */
 573
 574             h->param.rc.i_qp_constant = (h->stat.i_slice_count[SLICE_TYPE_P] == 0) ? 24
 575                                       : 1 + h->stat.i_slice_qp[SLICE_TYPE_P] / h->stat.i_slice_count[SLICE_TYPE_P];
 576             rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
 577             rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
 578             rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
 579
 580             x264_log(h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries);
 581             x264_log(h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant);
 582             if( h->param.b_bframe_adaptive )
 583                 x264_log(h, X264_LOG_ERROR, "disabling adaptive B-frames\n");
 584
 585             rc->b_abr = 0;
 586             rc->b_2pass = 0;
 587             h->param.rc.b_cbr = 0;
 588             h->param.rc.b_stat_read = 0;
 589             h->param.b_bframe_adaptive = 0;
 590             if( h->param.i_bframe > 1 )
 591                 h->param.i_bframe = 1;
 592             return X264_TYPE_P;
 593         }
 594         switch( rc->entry[frame_num].pict_type )
 595         {
 596             case SLICE_TYPE_I:
 597                 return rc->entry[frame_num].kept_as_ref ? X264_TYPE_IDR : X264_TYPE_I;
 598
 599             case SLICE_TYPE_B:
 600                 return rc->entry[frame_num].kept_as_ref ? X264_TYPE_BREF : X264_TYPE_B;
 601
 602             case SLICE_TYPE_P:
 603             default:
 604                 return X264_TYPE_P;
 605         }
 606     }
 607     else
 608     {
 609         return X264_TYPE_AUTO;
 610     }
 611 }
 612
 613 /* After encoding one frame, save stats and update ratecontrol state */
 614 void x264_ratecontrol_end( x264_t *h, int bits )
 615 {
 616     x264_ratecontrol_t *rc = h->rc;
 617     const int *mbs = h->stat.frame.i_mb_count;
 618     int i;
 619
 620     x264_cpu_restore( h->param.cpu );
 621
 622     h->stat.frame.i_mb_count_skip = mbs[P_SKIP] + mbs[B_SKIP];
 623     h->stat.frame.i_mb_count_i = mbs[I_16x16] + mbs[I_8x8] + mbs[I_4x4];
 624     h->stat.frame.i_mb_count_p = mbs[P_L0] + mbs[P_8x8];
 625     for( i = B_DIRECT; i < B_8x8; i++ )
 626         h->stat.frame.i_mb_count_p += mbs[i];
 627
 628     if( h->param.rc.b_stat_write )
 629     {
 630         char c_type = rc->slice_type==SLICE_TYPE_I ? (h->fenc->i_poc==0 ? 'I' : 'i')
 631                     : rc->slice_type==SLICE_TYPE_P ? 'P'
 632                     : h->fenc->b_kept_as_ref ? 'B' : 'b';
 633         fprintf( rc->p_stat_file_out,
 634                  "in:%d out:%d type:%c q:%.2f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d;\n",
 635                  h->fenc->i_frame, h->i_frame-1,
 636                  c_type, rc->qpa,
 637                  h->stat.frame.i_itex_bits, h->stat.frame.i_ptex_bits,
 638                  h->stat.frame.i_hdr_bits, h->stat.frame.i_misc_bits,
 639                  h->stat.frame.i_mb_count_i,
 640                  h->stat.frame.i_mb_count_p,
 641                  h->stat.frame.i_mb_count_skip);
 642     }
 643
 644     if( rc->b_abr )
 645     {
 646         if( rc->slice_type != SLICE_TYPE_B )
 647             rc->cplxr_sum += bits * qp2qscale(rc->qpa) / rc->last_rceq;
 648         else
 649         {
 650             /* Depends on the fact that B-frame's QP is an offset from the following P-frame's.
 651              * Not perfectly accurate with B-refs, but good enough. */
 652             rc->cplxr_sum += bits * qp2qscale(rc->qpa) / (rc->last_rceq * fabs(h->param.rc.f_pb_factor));
 653         }
 654         rc->cplxr_sum *= rc->cbr_decay;
 655         rc->wanted_bits_window += rc->bitrate / rc->fps;
 656         rc->wanted_bits_window *= rc->cbr_decay;
 657
 658         rc->accum_p_qp   *= .95;
 659         rc->accum_p_norm *= .95;
 660         rc->accum_p_norm += 1;
 661         if( rc->slice_type == SLICE_TYPE_I )
 662             rc->accum_p_qp += rc->qpa * fabs(h->param.rc.f_ip_factor);
 663         else
 664             rc->accum_p_qp += rc->qpa;
 665     }
 666
 667     if( rc->b_2pass )
 668     {
 669         rc->expected_bits_sum += qscale2bits( rc->rce, qp2qscale(rc->rce->new_qp) );
 670     }
 671
 672     update_vbv( h, bits );
 673
 674     if( rc->slice_type != SLICE_TYPE_B )
 675         rc->last_non_b_pict_type = rc->slice_type;
 676 }
 677
 678 /****************************************************************************
 679  * 2 pass functions
 680  ***************************************************************************/
 681
 682 double x264_eval( char *s, double *const_value, const char **const_name,
 683                   double (**func1)(void *, double), const char **func1_name,
 684                   double (**func2)(void *, double, double), char **func2_name,
 685                   void *opaque );
 686
 687 /**
 688  * modify the bitrate curve from pass1 for one frame
 689  */
 690 static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor, int frame_num)
 691 {
 692     x264_ratecontrol_t *rcc= h->rc;
 693     const int pict_type = rce->pict_type;
 694     double q;
 695     int i;
 696
 697     double const_values[]={
 698         rce->i_tex_bits * rce->qscale,
 699         rce->p_tex_bits * rce->qscale,
 700         (rce->i_tex_bits + rce->p_tex_bits) * rce->qscale,
 701         rce->mv_bits * rce->qscale,
 702         (double)rce->i_count / rcc->nmb,
 703         (double)rce->p_count / rcc->nmb,
 704         (double)rce->s_count / rcc->nmb,
 705         rce->pict_type == SLICE_TYPE_I,
 706         rce->pict_type == SLICE_TYPE_P,
 707         rce->pict_type == SLICE_TYPE_B,
 708         h->param.rc.f_qcompress,
 709         rcc->i_cplx_sum[SLICE_TYPE_I] / rcc->frame_count[SLICE_TYPE_I],
 710         rcc->i_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
 711         rcc->p_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
 712         rcc->p_cplx_sum[SLICE_TYPE_B] / rcc->frame_count[SLICE_TYPE_B],
 713         (rcc->i_cplx_sum[pict_type] + rcc->p_cplx_sum[pict_type]) / rcc->frame_count[pict_type],
 714         rce->blurred_complexity,
 715         0
 716     };
 717     static const char *const_names[]={
 718         "iTex",
 719         "pTex",
 720         "tex",
 721         "mv",
 722         "iCount",
 723         "pCount",
 724         "sCount",
 725         "isI",
 726         "isP",
 727         "isB",
 728         "qComp",
 729         "avgIITex",
 730         "avgPITex",
 731         "avgPPTex",
 732         "avgBPTex",
 733         "avgTex",
 734         "blurCplx",
 735         NULL
 736     };
 737     static double (*func1[])(void *, double)={
 738 //      (void *)bits2qscale,
 739         (void *)qscale2bits,
 740         NULL
 741     };
 742     static const char *func1_names[]={
 743 //      "bits2qp",
 744         "qp2bits",
 745         NULL
 746     };
 747
 748     q = x264_eval((char*)h->param.rc.psz_rc_eq, const_values, const_names, func1, func1_names, NULL, NULL, rce);
 749
 750     // avoid NaN's in the rc_eq
 751     if(q != q || rce->i_tex_bits + rce->p_tex_bits + rce->mv_bits == 0)
 752         q = rcc->last_qscale;
 753     else {
 754         rcc->last_rceq = q;
 755         q /= rate_factor;
 756         rcc->last_qscale = q;
 757     }
 758
 759     for( i = rcc->i_zones-1; i >= 0; i-- )
 760     {
 761         x264_zone_t *z = &rcc->zones[i];
 762         if( frame_num >= z->i_start && frame_num <= z->i_end )
 763         {
 764             if( z->b_force_qp )
 765                 q = qp2qscale(z->i_qp);
 766             else
 767                 q /= z->f_bitrate_factor;
 768             break;
 769         }
 770     }
 771
 772     return q;
 773 }
 774
 775 static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
 776 {
 777     x264_ratecontrol_t *rcc = h->rc;
 778     const int pict_type = rce->pict_type;
 779
 780     // force I/B quants as a function of P quants
 781     const double last_p_q    = rcc->last_qscale_for[SLICE_TYPE_P];
 782     const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type];
 783     if( pict_type == SLICE_TYPE_I )
 784     {
 785         double iq = q;
 786         double pq = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
 787         double ip_factor = fabs( h->param.rc.f_ip_factor );
 788         /* don't apply ip_factor if the following frame is also I */
 789         if( rcc->accum_p_norm <= 0 )
 790             q = iq;
 791         else if( h->param.rc.f_ip_factor < 0 )
 792             q = iq / ip_factor;
 793         else if( rcc->accum_p_norm >= 1 )
 794             q = pq / ip_factor;
 795         else
 796             q = rcc->accum_p_norm * pq / ip_factor + (1 - rcc->accum_p_norm) * iq;
 797     }
 798     else if( pict_type == SLICE_TYPE_B )
 799     {
 800         if( h->param.rc.f_pb_factor > 0 )
 801             q = last_non_b_q;
 802         if( !rce->kept_as_ref )
 803             q *= fabs( h->param.rc.f_pb_factor );
 804     }
 805     else if( pict_type == SLICE_TYPE_P
 806              && rcc->last_non_b_pict_type == SLICE_TYPE_P
 807              && rce->i_tex_bits + rce->p_tex_bits == 0 )
 808     {
 809         q = last_p_q;
 810     }
 811
 812     /* last qscale / qdiff stuff */
 813     if(rcc->last_non_b_pict_type==pict_type
 814        && (pict_type!=SLICE_TYPE_I || rcc->last_accum_p_norm < 1))
 815     {
 816         double last_q = rcc->last_qscale_for[pict_type];
 817         double max_qscale = last_q * rcc->lstep;
 818         double min_qscale = last_q / rcc->lstep;
 819
 820         if     (q > max_qscale) q = max_qscale;
 821         else if(q < min_qscale) q = min_qscale;
 822     }
 823
 824     rcc->last_qscale_for[pict_type] = q;
 825     if(pict_type!=SLICE_TYPE_B)
 826         rcc->last_non_b_pict_type = pict_type;
 827     if(pict_type==SLICE_TYPE_I)
 828     {
 829         rcc->last_accum_p_norm = rcc->accum_p_norm;
 830         rcc->accum_p_norm = 0;
 831         rcc->accum_p_qp = 0;
 832     }
 833     if(pict_type==SLICE_TYPE_P)
 834     {
 835         float mask = 1 - pow( (float)rce->i_count / rcc->nmb, 2 );
 836         rcc->accum_p_qp   = mask * (qscale2qp(q) + rcc->accum_p_qp);
 837         rcc->accum_p_norm = mask * (1 + rcc->accum_p_norm);
 838     }
 839     return q;
 840 }
 841
 842 static double predict_size( predictor_t *p, double q, double var )
 843 {
 844      return p->coeff*var / (q*p->count);
 845 }
 846
 847 static void update_predictor( predictor_t *p, double q, double var, double bits )
 848 {
 849     p->count *= p->decay;
 850     p->coeff *= p->decay;
 851     p->count ++;
 852     p->coeff += bits*q / var;
 853 }
 854
 855 static void update_vbv( x264_t *h, int bits )
 856 {
 857     x264_ratecontrol_t *rcc = h->rc;
 858     if( !rcc->buffer_size )
 859         return;
 860
 861     rcc->buffer_fill += rcc->buffer_rate - bits;
 862     if( rcc->buffer_fill < 0 && !rcc->b_2pass )
 863         x264_log( h, X264_LOG_WARNING, "VBV underflow (%.0f bits)\n", rcc->buffer_fill );
 864     rcc->buffer_fill = x264_clip3( rcc->buffer_fill, 0, rcc->buffer_size );
 865
 866     if(rcc->last_satd > 100)
 867         update_predictor( &rcc->pred[rcc->slice_type], qp2qscale(rcc->qpa), rcc->last_satd, bits );
 868 }
 869
 870 // apply VBV constraints and clip qscale to between lmin and lmax
 871 static double clip_qscale( x264_t *h, int pict_type, double q )
 872 {
 873     x264_ratecontrol_t *rcc = h->rc;
 874     double lmin = rcc->lmin[pict_type];
 875     double lmax = rcc->lmax[pict_type];
 876     double q0 = q;
 877
 878     /* B-frames are not directly subject to VBV,
 879      * since they are controlled by the P-frames' QPs.
 880      * FIXME: in 2pass we could modify previous frames' QP too,
 881      *        instead of waiting for the buffer to fill */
 882     if( rcc->buffer_size &&
 883         ( pict_type == SLICE_TYPE_P ||
 884           ( pict_type == SLICE_TYPE_I && rcc->last_non_b_pict_type == SLICE_TYPE_I ) ) )
 885     {
 886         if( rcc->buffer_fill/rcc->buffer_size < 0.5 )
 887             q /= x264_clip3f( 2.0*rcc->buffer_fill/rcc->buffer_size, 0.5, 1.0 );
 888     }
 889     /* Now a hard threshold to make sure the frame fits in VBV.
 890      * This one is mostly for I-frames. */
 891     if( rcc->buffer_size && rcc->last_satd > 0 )
 892     {
 893         double bits = predict_size( &rcc->pred[rcc->slice_type], q, rcc->last_satd );
 894         double qf = 1.0;
 895         if( bits > rcc->buffer_fill/2 )
 896             qf = x264_clip3f( rcc->buffer_fill/(2*bits), 0.2, 1.0 );
 897         q /= qf;
 898         bits *= qf;
 899         if( bits < rcc->buffer_rate/2 )
 900             q *= bits*2/rcc->buffer_rate;
 901         q = X264_MAX( q0, q );
 902     }
 903
 904     if(lmin==lmax)
 905         return lmin;
 906     else if(rcc->b_2pass)
 907     {
 908         double min2 = log(lmin);
 909         double max2 = log(lmax);
 910         q = (log(q) - min2)/(max2-min2) - 0.5;
 911         q = 1.0/(1.0 + exp(-4*q));
 912         q = q*(max2-min2) + min2;
 913         return exp(q);
 914     }
 915     else
 916         return x264_clip3f(q, lmin, lmax);
 917 }
 918
 919 // update qscale for 1 frame based on actual bits used so far
 920 static float rate_estimate_qscale(x264_t *h, int pict_type)
 921 {
 922     float q;
 923     x264_ratecontrol_t *rcc = h->rc;
 924     ratecontrol_entry_t rce;
 925     double lmin = rcc->lmin[pict_type];
 926     double lmax = rcc->lmax[pict_type];
 927     int64_t total_bits = 8*(h->stat.i_slice_size[SLICE_TYPE_I]
 928                           + h->stat.i_slice_size[SLICE_TYPE_P]
 929                           + h->stat.i_slice_size[SLICE_TYPE_B]);
 930
 931     if( rcc->b_2pass )
 932     {
 933         rce = *rcc->rce;
 934         if(pict_type != rce.pict_type)
 935         {
 936             x264_log(h, X264_LOG_ERROR, "slice=%c but 2pass stats say %c\n",
 937                      slice_type_to_char[pict_type], slice_type_to_char[rce.pict_type]);
 938         }
 939     }
 940
 941     if( pict_type == SLICE_TYPE_B )
 942     {
 943         rcc->last_satd = 0;
 944         if(h->fenc->b_kept_as_ref)
 945             q = rcc->last_qscale * sqrtf(h->param.rc.f_pb_factor);
 946         else
 947             q = rcc->last_qscale * h->param.rc.f_pb_factor;
 948         return x264_clip3f(q, lmin, lmax);
 949     }
 950     else
 951     {
 952         double abr_buffer = 2 * rcc->rate_tolerance * rcc->bitrate;
 953         if( rcc->b_2pass )
 954         {
 955             //FIXME adjust abr_buffer based on distance to the end of the video
 956             int64_t diff = total_bits - (int64_t)rce.expected_bits;
 957             q = rce.new_qscale;
 958             q /= x264_clip3f((double)(abr_buffer - diff) / abr_buffer, .5, 2);
 959             if( h->fenc->i_frame > 30 )
 960             {
 961                 /* Adjust quant based on the difference between
 962                  * achieved and expected bitrate so far */
 963                 double time = (double)h->fenc->i_frame / rcc->num_entries;
 964                 double w = x264_clip3f( time*100, 0.0, 1.0 );
 965                 q *= pow( (double)total_bits / rcc->expected_bits_sum, w );
 966             }
 967             q = x264_clip3f( q, lmin, lmax );
 968         }
 969         else /* 1pass ABR */
 970         {
 971             /* Calculate the quantizer which would have produced the desired
 972              * average bitrate if it had been applied to all frames so far.
 973              * Then modulate that quant based on the current frame's complexity
 974              * relative to the average complexity so far (using the 2pass RCEQ).
 975              * Then bias the quant up or down if total size so far was far from
 976              * the target.
 977              * Result: Depending on the value of rate_tolerance, there is a
 978              * tradeoff between quality and bitrate precision. But at large
 979              * tolerances, the bit distribution approaches that of 2pass. */
 980
 981             double wanted_bits, overflow, lmin, lmax;
 982
 983             rcc->last_satd = x264_rc_analyse_slice( h );
 984             rcc->short_term_cplxsum *= 0.5;
 985             rcc->short_term_cplxcount *= 0.5;
 986             rcc->short_term_cplxsum += rcc->last_satd;
 987             rcc->short_term_cplxcount ++;
 988
 989             rce.p_tex_bits = rcc->last_satd;
 990             rce.blurred_complexity = rcc->short_term_cplxsum / rcc->short_term_cplxcount;
 991             rce.i_tex_bits = 0;
 992             rce.mv_bits = 0;
 993             rce.p_count = rcc->nmb;
 994             rce.i_count = 0;
 995             rce.s_count = 0;
 996             rce.qscale = 1;
 997             rce.pict_type = pict_type;
 998
 999             if( h->param.rc.i_rf_constant )
1000             {
1001                 q = get_qscale( h, &rce, rcc->rate_factor_constant, h->fenc->i_frame );
1002                 overflow = 1;
1003             }
1004             else
1005             {
1006                 q = get_qscale( h, &rce, rcc->wanted_bits_window / rcc->cplxr_sum, h->fenc->i_frame );
1007
1008                 wanted_bits = h->fenc->i_frame * rcc->bitrate / rcc->fps;
1009                 abr_buffer *= X264_MAX( 1, sqrt(h->fenc->i_frame/25) );
1010                 overflow = x264_clip3f( 1.0 + (total_bits - wanted_bits) / abr_buffer, .5, 2 );
1011                 q *= overflow;
1012             }
1013
1014             if( pict_type == SLICE_TYPE_I && h->param.i_keyint_max > 1
1015                 /* should test _next_ pict type, but that isn't decided yet */
1016                 && rcc->last_non_b_pict_type != SLICE_TYPE_I )
1017             {
1018                 q = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
1019                 q /= fabs( h->param.rc.f_ip_factor );
1020                 q = clip_qscale( h, pict_type, q );
1021             }
1022             else
1023             {
1024                 if( h->stat.i_slice_count[h->param.i_keyint_max > 1 ? SLICE_TYPE_P : SLICE_TYPE_I] < 5 )
1025                 {
1026                     float w = h->stat.i_slice_count[SLICE_TYPE_P] / 5.;
1027                     float q2 = qp2qscale(ABR_INIT_QP);
1028                     q = q*w + q2*(1-w);
1029                 }
1030
1031                 /* Asymmetric clipping, because symmetric would prevent
1032                  * overflow control in areas of rapidly oscillating complexity */
1033                 lmin = rcc->last_qscale_for[pict_type] / rcc->lstep;
1034                 lmax = rcc->last_qscale_for[pict_type] * rcc->lstep;
1035                 if( overflow > 1.1 )
1036                     lmax *= rcc->lstep;
1037                 else if( overflow < 0.9 )
1038                     lmin /= rcc->lstep;
1039
1040                 q = x264_clip3f(q, lmin, lmax);
1041                 q = clip_qscale(h, pict_type, q);
1042                 //FIXME use get_diff_limited_q() ?
1043             }
1044         }
1045
1046         rcc->last_qscale_for[pict_type] =
1047         rcc->last_qscale = q;
1048
1049         return q;
1050     }
1051 }
1052
1053 static int init_pass2( x264_t *h )
1054 {
1055     x264_ratecontrol_t *rcc = h->rc;
1056     uint64_t all_const_bits = 0;
1057     uint64_t all_available_bits = (uint64_t)(h->param.rc.i_bitrate * 1000 * (double)rcc->num_entries / rcc->fps);
1058     double rate_factor, step, step_mult;
1059     double qblur = h->param.rc.f_qblur;
1060     double cplxblur = h->param.rc.f_complexity_blur;
1061     const int filter_size = (int)(qblur*4) | 1;
1062     double expected_bits;
1063     double *qscale, *blurred_qscale;
1064     int i;
1065
1066     /* find total/average complexity & const_bits */
1067     for(i=0; i<rcc->num_entries; i++){
1068         ratecontrol_entry_t *rce = &rcc->entry[i];
1069         all_const_bits += rce->misc_bits;
1070         rcc->i_cplx_sum[rce->pict_type] += rce->i_tex_bits * rce->qscale;
1071         rcc->p_cplx_sum[rce->pict_type] += rce->p_tex_bits * rce->qscale;
1072         rcc->mv_bits_sum[rce->pict_type] += rce->mv_bits * rce->qscale;
1073         rcc->frame_count[rce->pict_type] ++;
1074     }
1075
1076     if( all_available_bits < all_const_bits)
1077     {
1078         x264_log(h, X264_LOG_ERROR, "requested bitrate is too low. estimated minimum is %d kbps\n",
1079                  (int)(all_const_bits * rcc->fps / (rcc->num_entries * 1000)));
1080         return -1;
1081     }
1082
1083     /* Blur complexities, to reduce local fluctuation of QP.
1084      * We don't blur the QPs directly, because then one very simple frame
1085      * could drag down the QP of a nearby complex frame and give it more
1086      * bits than intended. */
1087     for(i=0; i<rcc->num_entries; i++){
1088         ratecontrol_entry_t *rce = &rcc->entry[i];
1089         double weight_sum = 0;
1090         double cplx_sum = 0;
1091         double weight = 1.0;
1092         int j;
1093         /* weighted average of cplx of future frames */
1094         for(j=1; j<cplxblur*2 && j<rcc->num_entries-i; j++){
1095             ratecontrol_entry_t *rcj = &rcc->entry[i+j];
1096             weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
1097             if(weight < .0001)
1098                 break;
1099             weight_sum += weight;
1100             cplx_sum += weight * qscale2bits(rcj, 1);
1101         }
1102         /* weighted average of cplx of past frames */
1103         weight = 1.0;
1104         for(j=0; j<=cplxblur*2 && j<=i; j++){
1105             ratecontrol_entry_t *rcj = &rcc->entry[i-j];
1106             weight_sum += weight;
1107             cplx_sum += weight * qscale2bits(rcj, 1);
1108             weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
1109             if(weight < .0001)
1110                 break;
1111         }
1112         rce->blurred_complexity = cplx_sum / weight_sum;
1113     }
1114
1115     qscale = x264_malloc(sizeof(double)*rcc->num_entries);
1116     if(filter_size > 1)
1117         blurred_qscale = x264_malloc(sizeof(double)*rcc->num_entries);
1118     else
1119         blurred_qscale = qscale;
1120
1121     /* Search for a factor which, when multiplied by the RCEQ values from
1122      * each frame, adds up to the desired total size.
1123      * There is no exact closed-form solution because of VBV constraints and
1124      * because qscale2bits is not invertible, but we can start with the simple
1125      * approximation of scaling the 1st pass by the ratio of bitrates.
1126      * The search range is probably overkill, but speed doesn't matter here. */
1127
1128     expected_bits = 1;
1129     for(i=0; i<rcc->num_entries; i++)
1130         expected_bits += qscale2bits(&rcc->entry[i], get_qscale(h, &rcc->entry[i], 1.0, i));
1131     step_mult = all_available_bits / expected_bits;
1132
1133     rate_factor = 0;
1134     for(step = 1E4 * step_mult; step > 1E-7 * step_mult; step *= 0.5){
1135         expected_bits = 0;
1136         rate_factor += step;
1137
1138         rcc->last_non_b_pict_type = -1;
1139         rcc->last_accum_p_norm = 1;
1140         rcc->accum_p_norm = 0;
1141         rcc->buffer_fill = rcc->buffer_size * h->param.rc.f_vbv_buffer_init;
1142
1143         /* find qscale */
1144         for(i=0; i<rcc->num_entries; i++){
1145             qscale[i] = get_qscale(h, &rcc->entry[i], rate_factor, i);
1146         }
1147
1148         /* fixed I/B qscale relative to P */
1149         for(i=rcc->num_entries-1; i>=0; i--){
1150             qscale[i] = get_diff_limited_q(h, &rcc->entry[i], qscale[i]);
1151             assert(qscale[i] >= 0);
1152         }
1153
1154         /* smooth curve */
1155         if(filter_size > 1){
1156             assert(filter_size%2==1);
1157             for(i=0; i<rcc->num_entries; i++){
1158                 ratecontrol_entry_t *rce = &rcc->entry[i];
1159                 int j;
1160                 double q=0.0, sum=0.0;
1161
1162                 for(j=0; j<filter_size; j++){
1163                     int index = i+j-filter_size/2;
1164                     double d = index-i;
1165                     double coeff = qblur==0 ? 1.0 : exp(-d*d/(qblur*qblur));
1166                     if(index < 0 || index >= rcc->num_entries) continue;
1167                     if(rce->pict_type != rcc->entry[index].pict_type) continue;
1168                     q += qscale[index] * coeff;
1169                     sum += coeff;
1170                 }
1171                 blurred_qscale[i] = q/sum;
1172             }
1173         }
1174
1175         /* find expected bits */
1176         for(i=0; i<rcc->num_entries; i++){
1177             ratecontrol_entry_t *rce = &rcc->entry[i];
1178             double bits;
1179             rce->new_qscale = clip_qscale(h, rce->pict_type, blurred_qscale[i]);
1180             assert(rce->new_qscale >= 0);
1181             bits = qscale2bits(rce, rce->new_qscale) + rce->misc_bits;
1182
1183             rce->expected_bits = expected_bits;
1184             expected_bits += bits;
1185             update_vbv(h, bits);
1186         }
1187
1188 //printf("expected:%llu available:%llu factor:%lf avgQ:%lf\n", (uint64_t)expected_bits, all_available_bits, rate_factor);
1189         if(expected_bits > all_available_bits) rate_factor -= step;
1190     }
1191
1192     x264_free(qscale);
1193     if(filter_size > 1)
1194         x264_free(blurred_qscale);
1195
1196     if(fabs(expected_bits/all_available_bits - 1.0) > 0.01)
1197     {
1198         double avgq = 0;
1199         for(i=0; i<rcc->num_entries; i++)
1200             avgq += rcc->entry[i].new_qscale;
1201         avgq = qscale2qp(avgq / rcc->num_entries);
1202
1203         x264_log(h, X264_LOG_ERROR, "Error: 2pass curve failed to converge\n");
1204         x264_log(h, X264_LOG_ERROR, "target: %.2f kbit/s, expected: %.2f kbit/s, avg QP: %.4f\n",
1205                  (float)h->param.rc.i_bitrate,
1206                  expected_bits * rcc->fps / (rcc->num_entries * 1000.),
1207                  avgq);
1208         if(expected_bits < all_available_bits && avgq < h->param.rc.i_qp_min + 2)
1209         {
1210             if(h->param.rc.i_qp_min > 0)
1211                 x264_log(h, X264_LOG_ERROR, "try reducing target bitrate or reducing qp_min (currently %d)\n", h->param.rc.i_qp_min);
1212             else
1213                 x264_log(h, X264_LOG_ERROR, "try reducing target bitrate\n");
1214         }
1215         else if(expected_bits > all_available_bits && avgq > h->param.rc.i_qp_max - 2)
1216         {
1217             if(h->param.rc.i_qp_max < 51)
1218                 x264_log(h, X264_LOG_ERROR, "try increasing target bitrate or increasing qp_max (currently %d)\n", h->param.rc.i_qp_max);
1219             else
1220                 x264_log(h, X264_LOG_ERROR, "try increasing target bitrate\n");
1221         }
1222         else
1223             x264_log(h, X264_LOG_ERROR, "internal error\n");
1224     }
1225
1226     return 0;
1227 }
1228
1229