git.sesse.net Git - x264/blob - encoder/ratecontrol.c

   1 /***************************************************-*- coding: iso-8859-1 -*-
   2  * ratecontrol.c: h264 encoder library (Rate Control)
   3  *****************************************************************************
   4  * Copyright (C) 2005 x264 project
   5  * $Id: ratecontrol.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
   6  *
   7  * Authors: Loren Merritt <lorenm@u.washington.edu>
   8  *          Michael Niedermayer <michaelni@gmx.at>
   9  *          Måns Rullgård <mru@mru.ath.cx>
  10  *
  11  * This program is free software; you can redistribute it and/or modify
  12  * it under the terms of the GNU General Public License as published by
  13  * the Free Software Foundation; either version 2 of the License, or
  14  * (at your option) any later version.
  15  *
  16  * This program is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19  * GNU General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU General Public License
  22  * along with this program; if not, write to the Free Software
  23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  24  *****************************************************************************/
  25
  26 #define _ISOC99_SOURCE
  27 #undef NDEBUG // always check asserts, the speed effect is far too small to disable them
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <string.h>
  31 #include <math.h>
  32 #include <limits.h>
  33 #include <assert.h>
  34
  35 #include "common/common.h"
  36 #include "common/cpu.h"
  37 #include "ratecontrol.h"
  38
  39 #if defined(SYS_FREEBSD) || defined(SYS_BEOS) || defined(SYS_NETBSD)
  40 #define exp2f(x) powf( 2, (x) )
  41 #endif
  42 #if defined(_MSC_VER) || defined(SYS_SunOS)
  43 #define exp2f(x) pow( 2, (x) )
  44 #define sqrtf sqrt
  45 #endif
  46 #ifdef WIN32 // POSIX says that rename() removes the destination, but win32 doesn't.
  47 #define rename(src,dst) (unlink(dst), rename(src,dst))
  48 #endif
  49
  50 typedef struct
  51 {
  52     int pict_type;
  53     int kept_as_ref;
  54     float qscale;
  55     int mv_bits;
  56     int i_tex_bits;
  57     int p_tex_bits;
  58     int misc_bits;
  59     uint64_t expected_bits;
  60     float new_qscale;
  61     int new_qp;
  62     int i_count;
  63     int p_count;
  64     int s_count;
  65     float blurred_complexity;
  66 } ratecontrol_entry_t;
  67
  68 typedef struct
  69 {
  70     double coeff;
  71     double count;
  72     double decay;
  73 } predictor_t;
  74
  75 struct x264_ratecontrol_t
  76 {
  77     /* constants */
  78     int b_abr;
  79     int b_2pass;
  80     double fps;
  81     double bitrate;
  82     double rate_tolerance;
  83     int nmb;                    /* number of macroblocks in a frame */
  84     int qp_constant[5];
  85
  86     /* current frame */
  87     ratecontrol_entry_t *rce;
  88     int qp;                     /* qp for current frame */
  89     float qpa;                  /* average of macroblocks' qp (same as qp if no adaptive quant) */
  90     int slice_type;
  91     int qp_force;
  92
  93     /* VBV stuff */
  94     double buffer_size;
  95     double buffer_fill;
  96     double buffer_rate;         /* # of bits added to buffer_fill after each frame */
  97     predictor_t pred[5];        /* predict frame size from satd */
  98
  99     /* ABR stuff */
 100     int    last_satd;
 101     double last_rceq;
 102     double cplxr_sum;           /* sum of bits*qscale/rceq */
 103     double expected_bits_sum;   /* sum of qscale2bits after rceq, ratefactor, and overflow */
 104     double wanted_bits_window;  /* target bitrate * window */
 105     double cbr_decay;
 106     double short_term_cplxsum;
 107     double short_term_cplxcount;
 108     double rate_factor_constant;
 109
 110     /* 2pass stuff */
 111     FILE *p_stat_file_out;
 112     char *psz_stat_file_tmpname;
 113
 114     int num_entries;            /* number of ratecontrol_entry_ts */
 115     ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
 116     double last_qscale;
 117     double last_qscale_for[5];  /* last qscale for a specific pict type, used for max_diff & ipb factor stuff  */
 118     int last_non_b_pict_type;
 119     double accum_p_qp;          /* for determining I-frame quant */
 120     double accum_p_norm;
 121     double last_accum_p_norm;
 122     double lmin[5];             /* min qscale by frame type */
 123     double lmax[5];
 124     double lstep;               /* max change (multiply) in qscale per frame */
 125     double i_cplx_sum[5];       /* estimated total texture bits in intra MBs at qscale=1 */
 126     double p_cplx_sum[5];
 127     double mv_bits_sum[5];
 128     int frame_count[5];         /* number of frames of each type */
 129
 130     int i_zones;
 131     x264_zone_t *zones;
 132 };
 133
 134
 135 static int parse_zones( x264_t *h );
 136 static int init_pass2(x264_t *);
 137 static float rate_estimate_qscale( x264_t *h, int pict_type );
 138 static void update_vbv( x264_t *h, int bits );
 139 int  x264_rc_analyse_slice( x264_t *h );
 140
 141 /* Terminology:
 142  * qp = h.264's quantizer
 143  * qscale = linearized quantizer = Lagrange multiplier
 144  */
 145 static inline double qp2qscale(double qp)
 146 {
 147     return 0.85 * pow(2.0, ( qp - 12.0 ) / 6.0);
 148 }
 149 static inline double qscale2qp(double qscale)
 150 {
 151     return 12.0 + 6.0 * log(qscale/0.85) / log(2.0);
 152 }
 153
 154 /* Texture bitrate is not quite inversely proportional to qscale,
 155  * probably due the the changing number of SKIP blocks.
 156  * MV bits level off at about qp<=12, because the lambda used
 157  * for motion estimation is constant there. */
 158 static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale)
 159 {
 160     if(qscale<0.1)
 161         qscale = 0.1;
 162     return (rce->i_tex_bits + rce->p_tex_bits + .1) * pow( rce->qscale / qscale, 1.1 )
 163            + rce->mv_bits * pow( X264_MAX(rce->qscale, 12) / X264_MAX(qscale, 12), 0.5 );
 164 }
 165
 166
 167 int x264_ratecontrol_new( x264_t *h )
 168 {
 169     x264_ratecontrol_t *rc;
 170     int i;
 171
 172     x264_cpu_restore( h->param.cpu );
 173
 174     h->rc = rc = x264_malloc( sizeof( x264_ratecontrol_t ) );
 175     memset(rc, 0, sizeof(*rc));
 176
 177     rc->b_abr = ( h->param.rc.b_cbr || h->param.rc.i_rf_constant ) && !h->param.rc.b_stat_read;
 178     rc->b_2pass = h->param.rc.b_cbr && h->param.rc.b_stat_read;
 179     h->mb.b_variable_qp = 0;
 180
 181     /* FIXME: use integers */
 182     if(h->param.i_fps_num > 0 && h->param.i_fps_den > 0)
 183         rc->fps = (float) h->param.i_fps_num / h->param.i_fps_den;
 184     else
 185         rc->fps = 25.0;
 186
 187     rc->bitrate = h->param.rc.i_bitrate * 1000;
 188     rc->rate_tolerance = h->param.rc.f_rate_tolerance;
 189     rc->nmb = h->mb.i_mb_count;
 190     rc->last_non_b_pict_type = -1;
 191     rc->cbr_decay = 1.0;
 192
 193     if( rc->b_2pass && h->param.rc.i_rf_constant )
 194         x264_log(h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n");
 195     if( h->param.rc.i_vbv_buffer_size && !h->param.rc.b_cbr && !h->param.rc.i_rf_constant )
 196         x264_log(h, X264_LOG_ERROR, "VBV is incompatible with constant QP.\n");
 197     if( h->param.rc.i_vbv_buffer_size && h->param.rc.b_cbr
 198         && h->param.rc.i_vbv_max_bitrate == 0 )
 199     {
 200         x264_log( h, X264_LOG_DEBUG, "VBV maxrate unspecified, assuming CBR\n" );
 201         h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
 202     }
 203     if( h->param.rc.i_vbv_max_bitrate < h->param.rc.i_bitrate &&
 204         h->param.rc.i_vbv_max_bitrate > 0)
 205         x264_log(h, X264_LOG_ERROR, "max bitrate less than average bitrate, ignored.\n");
 206     else if( h->param.rc.i_vbv_max_bitrate > 0 &&
 207              h->param.rc.i_vbv_buffer_size > 0 )
 208     {
 209         if( h->param.rc.i_vbv_buffer_size < 10 * h->param.rc.i_vbv_max_bitrate / rc->fps ) {
 210             h->param.rc.i_vbv_buffer_size = 10 * h->param.rc.i_vbv_max_bitrate / rc->fps;
 211             x264_log( h, X264_LOG_ERROR, "VBV buffer size too small, using %d kbit\n",
 212                       h->param.rc.i_vbv_buffer_size );
 213         }
 214         rc->buffer_rate = h->param.rc.i_vbv_max_bitrate * 1000 / rc->fps;
 215         rc->buffer_size = h->param.rc.i_vbv_buffer_size * 1000;
 216         rc->buffer_fill = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
 217         rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size
 218                       * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate);
 219     }
 220     else if( h->param.rc.i_vbv_max_bitrate || h->param.rc.i_vbv_buffer_size )
 221         x264_log(h, X264_LOG_ERROR, "VBV maxrate or buffer size specified, but not both.\n");
 222     if(rc->rate_tolerance < 0.01) {
 223         x264_log(h, X264_LOG_ERROR, "bitrate tolerance too small, using .01\n");
 224         rc->rate_tolerance = 0.01;
 225     }
 226
 227     if( rc->b_abr )
 228     {
 229         /* FIXME shouldn't need to arbitrarily specify a QP,
 230          * but this is more robust than BPP measures */
 231 #define ABR_INIT_QP ( h->param.rc.i_rf_constant > 0 ? h->param.rc.i_rf_constant : 24 )
 232         rc->accum_p_norm = .01;
 233         rc->accum_p_qp = ABR_INIT_QP * rc->accum_p_norm;
 234         rc->cplxr_sum = .01;
 235         rc->wanted_bits_window = .01;
 236     }
 237
 238     if( h->param.rc.i_rf_constant )
 239     {
 240         /* arbitrary rescaling to make CRF somewhat similar to QP */
 241         double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
 242         rc->rate_factor_constant = pow( base_cplx, 1 - h->param.rc.f_qcompress )
 243                                  / qp2qscale( h->param.rc.i_rf_constant );
 244     }
 245
 246     rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
 247     rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
 248     rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
 249
 250     rc->lstep = exp2f(h->param.rc.i_qp_step / 6.0);
 251     rc->last_qscale = qp2qscale(26);
 252     for( i = 0; i < 5; i++ )
 253     {
 254         rc->last_qscale_for[i] = qp2qscale(26);
 255         rc->lmin[i] = qp2qscale( h->param.rc.i_qp_min );
 256         rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max );
 257         rc->pred[i].coeff= 2.0;
 258         rc->pred[i].count= 1.0;
 259         rc->pred[i].decay= 0.5;
 260     }
 261
 262     if( parse_zones( h ) < 0 )
 263         return -1;
 264
 265     /* Load stat file and init 2pass algo */
 266     if( h->param.rc.b_stat_read )
 267     {
 268         char *p, *stats_in, *stats_buf;
 269
 270         /* read 1st pass stats */
 271         assert( h->param.rc.psz_stat_in );
 272         stats_buf = stats_in = x264_slurp_file( h->param.rc.psz_stat_in );
 273         if( !stats_buf )
 274         {
 275             x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
 276             return -1;
 277         }
 278
 279         /* check whether 1st pass options were compatible with current options */
 280         if( !strncmp( stats_buf, "#options:", 9 ) )
 281         {
 282             int i;
 283             char *opts = stats_buf;
 284             stats_in = strchr( stats_buf, '\n' );
 285             if( !stats_in )
 286                 return -1;
 287             *stats_in = '\0';
 288             stats_in++;
 289
 290             if( ( p = strstr( opts, "bframes=" ) ) && sscanf( p, "bframes=%d", &i )
 291                 && h->param.i_bframe != i )
 292             {
 293                 x264_log( h, X264_LOG_ERROR, "different number of B-frames than 1st pass (%d vs %d)\n",
 294                           h->param.i_bframe, i );
 295                 return -1;
 296             }
 297
 298             /* since B-adapt doesn't (yet) take into account B-pyramid,
 299              * the converse is not a problem */
 300             if( strstr( opts, "b_pyramid=1" ) && !h->param.b_bframe_pyramid )
 301                 x264_log( h, X264_LOG_WARNING, "1st pass used B-pyramid, 2nd doesn't\n" );
 302
 303             if( ( p = strstr( opts, "keyint=" ) ) && sscanf( p, "keyint=%d", &i )
 304                 && h->param.i_keyint_max != i )
 305                 x264_log( h, X264_LOG_WARNING, "different keyint than 1st pass (%d vs %d)\n",
 306                           h->param.i_keyint_max, i );
 307
 308             if( strstr( opts, "qp=0" ) && h->param.rc.b_cbr )
 309                 x264_log( h, X264_LOG_WARNING, "1st pass was lossless, bitrate prediction will be inaccurate\n" );
 310         }
 311
 312         /* find number of pics */
 313         p = stats_in;
 314         for(i=-1; p; i++)
 315             p = strchr(p+1, ';');
 316         if(i==0)
 317         {
 318             x264_log(h, X264_LOG_ERROR, "empty stats file\n");
 319             return -1;
 320         }
 321         rc->num_entries = i;
 322
 323         if( h->param.i_frame_total < rc->num_entries && h->param.i_frame_total > 0 )
 324         {
 325             x264_log( h, X264_LOG_WARNING, "2nd pass has fewer frames than 1st pass (%d vs %d)\n",
 326                       h->param.i_frame_total, rc->num_entries );
 327         }
 328         if( h->param.i_frame_total > rc->num_entries + h->param.i_bframe )
 329         {
 330             x264_log( h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d vs %d)\n",
 331                       h->param.i_frame_total, rc->num_entries );
 332             return -1;
 333         }
 334
 335         /* FIXME: ugly padding because VfW drops delayed B-frames */
 336         rc->num_entries += h->param.i_bframe;
 337
 338         rc->entry = (ratecontrol_entry_t*) x264_malloc(rc->num_entries * sizeof(ratecontrol_entry_t));
 339         memset(rc->entry, 0, rc->num_entries * sizeof(ratecontrol_entry_t));
 340
 341         /* init all to skipped p frames */
 342         for(i=0; i<rc->num_entries; i++){
 343             ratecontrol_entry_t *rce = &rc->entry[i];
 344             rce->pict_type = SLICE_TYPE_P;
 345             rce->qscale = rce->new_qscale = qp2qscale(20);
 346             rce->misc_bits = rc->nmb + 10;
 347             rce->new_qp = 0;
 348         }
 349
 350         /* read stats */
 351         p = stats_in;
 352         for(i=0; i < rc->num_entries - h->param.i_bframe; i++){
 353             ratecontrol_entry_t *rce;
 354             int frame_number;
 355             char pict_type;
 356             int e;
 357             char *next;
 358             float qp;
 359
 360             next= strchr(p, ';');
 361             if(next){
 362                 (*next)=0; //sscanf is unbelievably slow on looong strings
 363                 next++;
 364             }
 365             e = sscanf(p, " in:%d ", &frame_number);
 366
 367             if(frame_number < 0 || frame_number >= rc->num_entries)
 368             {
 369                 x264_log(h, X264_LOG_ERROR, "bad frame number (%d) at stats line %d\n", frame_number, i);
 370                 return -1;
 371             }
 372             rce = &rc->entry[frame_number];
 373
 374             e += sscanf(p, " in:%*d out:%*d type:%c q:%f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d",
 375                    &pict_type, &qp, &rce->i_tex_bits, &rce->p_tex_bits,
 376                    &rce->mv_bits, &rce->misc_bits, &rce->i_count, &rce->p_count, &rce->s_count);
 377
 378             switch(pict_type){
 379                 case 'I': rce->kept_as_ref = 1;
 380                 case 'i': rce->pict_type = SLICE_TYPE_I; break;
 381                 case 'P': rce->pict_type = SLICE_TYPE_P; break;
 382                 case 'B': rce->kept_as_ref = 1;
 383                 case 'b': rce->pict_type = SLICE_TYPE_B; break;
 384                 default:  e = -1; break;
 385             }
 386             if(e != 10){
 387                 x264_log(h, X264_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e);
 388                 return -1;
 389             }
 390             rce->qscale = qp2qscale(qp);
 391             p = next;
 392         }
 393
 394         x264_free(stats_buf);
 395
 396         if(h->param.rc.b_cbr)
 397         {
 398             if(init_pass2(h) < 0) return -1;
 399         } /* else we're using constant quant, so no need to run the bitrate allocation */
 400     }
 401
 402     /* Open output file */
 403     /* If input and output files are the same, output to a temp file
 404      * and move it to the real name only when it's complete */
 405     if( h->param.rc.b_stat_write )
 406     {
 407         char *p;
 408
 409         rc->psz_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 6 );
 410         strcpy( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
 411         strcat( rc->psz_stat_file_tmpname, ".temp" );
 412
 413         rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
 414         if( rc->p_stat_file_out == NULL )
 415         {
 416             x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
 417             return -1;
 418         }
 419
 420         p = x264_param2string( &h->param, 1 );
 421         fprintf( rc->p_stat_file_out, "#options: %s\n", p );
 422         x264_free( p );
 423     }
 424
 425     return 0;
 426 }
 427
 428 static int parse_zones( x264_t *h )
 429 {
 430     x264_ratecontrol_t *rc = h->rc;
 431     int i;
 432     if( h->param.rc.psz_zones && !h->param.rc.i_zones )
 433     {
 434         char *p;
 435         h->param.rc.i_zones = 1;
 436         for( p = h->param.rc.psz_zones; *p; p++ )
 437             h->param.rc.i_zones += (*p == '/');
 438         h->param.rc.zones = x264_malloc( h->param.rc.i_zones * sizeof(x264_zone_t) );
 439         p = h->param.rc.psz_zones;
 440         for( i = 0; i < h->param.rc.i_zones; i++)
 441         {
 442             x264_zone_t *z = &h->param.rc.zones[i];
 443             if( 3 == sscanf(p, "%u,%u,q=%u", &z->i_start, &z->i_end, &z->i_qp) )
 444                 z->b_force_qp = 1;
 445             else if( 3 == sscanf(p, "%u,%u,b=%f", &z->i_start, &z->i_end, &z->f_bitrate_factor) )
 446                 z->b_force_qp = 0;
 447             else
 448             {
 449                 char *slash = strchr(p, '/');
 450                 if(slash) *slash = '\0';
 451                 x264_log( h, X264_LOG_ERROR, "invalid zone: \"%s\"\n", p );
 452                 return -1;
 453             }
 454             p = strchr(p, '/') + 1;
 455         }
 456     }
 457
 458     if( h->param.rc.i_zones > 0 )
 459     {
 460         for( i = 0; i < h->param.rc.i_zones; i++ )
 461         {
 462             x264_zone_t z = h->param.rc.zones[i];
 463             if( z.i_start < 0 || z.i_start > z.i_end )
 464             {
 465                 x264_log( h, X264_LOG_ERROR, "invalid zone: start=%d end=%d\n",
 466                           z.i_start, z.i_end );
 467                 return -1;
 468             }
 469             else if( !z.b_force_qp && z.f_bitrate_factor <= 0 )
 470             {
 471                 x264_log( h, X264_LOG_ERROR, "invalid zone: bitrate_factor=%f\n",
 472                           z.f_bitrate_factor );
 473                 return -1;
 474             }
 475         }
 476
 477         rc->i_zones = h->param.rc.i_zones;
 478         rc->zones = x264_malloc( rc->i_zones * sizeof(x264_zone_t) );
 479         memcpy( rc->zones, h->param.rc.zones, rc->i_zones * sizeof(x264_zone_t) );
 480     }
 481
 482     return 0;
 483 }
 484
 485 void x264_ratecontrol_summary( x264_t *h )
 486 {
 487     x264_ratecontrol_t *rc = h->rc;
 488     if( rc->b_abr && !h->param.rc.i_rf_constant && !h->param.rc.i_vbv_max_bitrate )
 489     {
 490         double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
 491         x264_log( h, X264_LOG_INFO, "final ratefactor: %.2f\n",
 492                   qscale2qp( pow( base_cplx, 1 - h->param.rc.f_qcompress )
 493                              * rc->cplxr_sum / rc->wanted_bits_window ) );
 494     }
 495 }
 496
 497 void x264_ratecontrol_delete( x264_t *h )
 498 {
 499     x264_ratecontrol_t *rc = h->rc;
 500
 501     if( rc->p_stat_file_out )
 502     {
 503         fclose( rc->p_stat_file_out );
 504         if( h->i_frame >= rc->num_entries - h->param.i_bframe )
 505             if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
 506             {
 507                 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
 508                           rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
 509             }
 510         x264_free( rc->psz_stat_file_tmpname );
 511     }
 512     x264_free( rc->entry );
 513     x264_free( rc->zones );
 514     x264_free( rc );
 515 }
 516
 517 /* Before encoding a frame, choose a QP for it */
 518 void x264_ratecontrol_start( x264_t *h, int i_slice_type, int i_force_qp )
 519 {
 520     x264_ratecontrol_t *rc = h->rc;
 521
 522     x264_cpu_restore( h->param.cpu );
 523
 524     rc->qp_force = i_force_qp;
 525     rc->slice_type = i_slice_type;
 526
 527     if( i_force_qp )
 528     {
 529         rc->qpa = rc->qp = i_force_qp - 1;
 530     }
 531     else if( rc->b_abr )
 532     {
 533         rc->qpa = rc->qp =
 534             x264_clip3( (int)(qscale2qp( rate_estimate_qscale( h, i_slice_type ) ) + .5), 0, 51 );
 535     }
 536     else if( rc->b_2pass )
 537     {
 538         int frame = h->fenc->i_frame;
 539         ratecontrol_entry_t *rce;
 540         assert( frame >= 0 && frame < rc->num_entries );
 541         rce = h->rc->rce = &h->rc->entry[frame];
 542
 543         rce->new_qscale = rate_estimate_qscale( h, i_slice_type );
 544         rc->qpa = rc->qp = rce->new_qp =
 545             x264_clip3( (int)(qscale2qp(rce->new_qscale) + 0.5), 0, 51 );
 546     }
 547     else /* CQP */
 548     {
 549         int q;
 550         if( i_slice_type == SLICE_TYPE_B && h->fdec->b_kept_as_ref )
 551             q = ( rc->qp_constant[ SLICE_TYPE_B ] + rc->qp_constant[ SLICE_TYPE_P ] ) / 2;
 552         else
 553             q = rc->qp_constant[ i_slice_type ];
 554         rc->qpa = rc->qp = q;
 555     }
 556 }
 557
 558 void x264_ratecontrol_mb( x264_t *h, int bits )
 559 {
 560     /* currently no adaptive quant */
 561 }
 562
 563 int x264_ratecontrol_qp( x264_t *h )
 564 {
 565     return h->rc->qp;
 566 }
 567
 568 /* In 2pass, force the same frame types as in the 1st pass */
 569 int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
 570 {
 571     x264_ratecontrol_t *rc = h->rc;
 572     if( h->param.rc.b_stat_read )
 573     {
 574         if( frame_num >= rc->num_entries )
 575         {
 576             /* We could try to initialize everything required for ABR and
 577              * adaptive B-frames, but that would be complicated.
 578              * So just calculate the average QP used so far. */
 579
 580             h->param.rc.i_qp_constant = (h->stat.i_slice_count[SLICE_TYPE_P] == 0) ? 24
 581                                       : 1 + h->stat.i_slice_qp[SLICE_TYPE_P] / h->stat.i_slice_count[SLICE_TYPE_P];
 582             rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
 583             rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
 584             rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
 585
 586             x264_log(h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries);
 587             x264_log(h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant);
 588             if( h->param.b_bframe_adaptive )
 589                 x264_log(h, X264_LOG_ERROR, "disabling adaptive B-frames\n");
 590
 591             rc->b_abr = 0;
 592             rc->b_2pass = 0;
 593             h->param.rc.b_cbr = 0;
 594             h->param.rc.b_stat_read = 0;
 595             h->param.b_bframe_adaptive = 0;
 596             if( h->param.i_bframe > 1 )
 597                 h->param.i_bframe = 1;
 598             return X264_TYPE_P;
 599         }
 600         switch( rc->entry[frame_num].pict_type )
 601         {
 602             case SLICE_TYPE_I:
 603                 return rc->entry[frame_num].kept_as_ref ? X264_TYPE_IDR : X264_TYPE_I;
 604
 605             case SLICE_TYPE_B:
 606                 return rc->entry[frame_num].kept_as_ref ? X264_TYPE_BREF : X264_TYPE_B;
 607
 608             case SLICE_TYPE_P:
 609             default:
 610                 return X264_TYPE_P;
 611         }
 612     }
 613     else
 614     {
 615         return X264_TYPE_AUTO;
 616     }
 617 }
 618
 619 /* After encoding one frame, save stats and update ratecontrol state */
 620 void x264_ratecontrol_end( x264_t *h, int bits )
 621 {
 622     x264_ratecontrol_t *rc = h->rc;
 623     const int *mbs = h->stat.frame.i_mb_count;
 624     int i;
 625
 626     x264_cpu_restore( h->param.cpu );
 627
 628     h->stat.frame.i_mb_count_skip = mbs[P_SKIP] + mbs[B_SKIP];
 629     h->stat.frame.i_mb_count_i = mbs[I_16x16] + mbs[I_8x8] + mbs[I_4x4];
 630     h->stat.frame.i_mb_count_p = mbs[P_L0] + mbs[P_8x8];
 631     for( i = B_DIRECT; i < B_8x8; i++ )
 632         h->stat.frame.i_mb_count_p += mbs[i];
 633
 634     if( h->param.rc.b_stat_write )
 635     {
 636         char c_type = rc->slice_type==SLICE_TYPE_I ? (h->fenc->i_poc==0 ? 'I' : 'i')
 637                     : rc->slice_type==SLICE_TYPE_P ? 'P'
 638                     : h->fenc->b_kept_as_ref ? 'B' : 'b';
 639         fprintf( rc->p_stat_file_out,
 640                  "in:%d out:%d type:%c q:%.2f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d;\n",
 641                  h->fenc->i_frame, h->i_frame-1,
 642                  c_type, rc->qpa,
 643                  h->stat.frame.i_itex_bits, h->stat.frame.i_ptex_bits,
 644                  h->stat.frame.i_hdr_bits, h->stat.frame.i_misc_bits,
 645                  h->stat.frame.i_mb_count_i,
 646                  h->stat.frame.i_mb_count_p,
 647                  h->stat.frame.i_mb_count_skip);
 648     }
 649
 650     if( rc->b_abr )
 651     {
 652         if( rc->slice_type != SLICE_TYPE_B )
 653             rc->cplxr_sum += bits * qp2qscale(rc->qpa) / rc->last_rceq;
 654         else
 655         {
 656             /* Depends on the fact that B-frame's QP is an offset from the following P-frame's.
 657              * Not perfectly accurate with B-refs, but good enough. */
 658             rc->cplxr_sum += bits * qp2qscale(rc->qpa) / (rc->last_rceq * fabs(h->param.rc.f_pb_factor));
 659         }
 660         rc->cplxr_sum *= rc->cbr_decay;
 661         rc->wanted_bits_window += rc->bitrate / rc->fps;
 662         rc->wanted_bits_window *= rc->cbr_decay;
 663
 664         rc->accum_p_qp   *= .95;
 665         rc->accum_p_norm *= .95;
 666         rc->accum_p_norm += 1;
 667         if( rc->slice_type == SLICE_TYPE_I )
 668             rc->accum_p_qp += rc->qpa * fabs(h->param.rc.f_ip_factor);
 669         else
 670             rc->accum_p_qp += rc->qpa;
 671     }
 672
 673     if( rc->b_2pass )
 674     {
 675         rc->expected_bits_sum += qscale2bits( rc->rce, qp2qscale(rc->rce->new_qp) );
 676     }
 677
 678     update_vbv( h, bits );
 679
 680     if( rc->slice_type != SLICE_TYPE_B )
 681         rc->last_non_b_pict_type = rc->slice_type;
 682 }
 683
 684 /****************************************************************************
 685  * 2 pass functions
 686  ***************************************************************************/
 687
 688 double x264_eval( char *s, double *const_value, const char **const_name,
 689                   double (**func1)(void *, double), const char **func1_name,
 690                   double (**func2)(void *, double, double), char **func2_name,
 691                   void *opaque );
 692
 693 /**
 694  * modify the bitrate curve from pass1 for one frame
 695  */
 696 static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor, int frame_num)
 697 {
 698     x264_ratecontrol_t *rcc= h->rc;
 699     const int pict_type = rce->pict_type;
 700     double q;
 701     int i;
 702
 703     double const_values[]={
 704         rce->i_tex_bits * rce->qscale,
 705         rce->p_tex_bits * rce->qscale,
 706         (rce->i_tex_bits + rce->p_tex_bits) * rce->qscale,
 707         rce->mv_bits * rce->qscale,
 708         (double)rce->i_count / rcc->nmb,
 709         (double)rce->p_count / rcc->nmb,
 710         (double)rce->s_count / rcc->nmb,
 711         rce->pict_type == SLICE_TYPE_I,
 712         rce->pict_type == SLICE_TYPE_P,
 713         rce->pict_type == SLICE_TYPE_B,
 714         h->param.rc.f_qcompress,
 715         rcc->i_cplx_sum[SLICE_TYPE_I] / rcc->frame_count[SLICE_TYPE_I],
 716         rcc->i_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
 717         rcc->p_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
 718         rcc->p_cplx_sum[SLICE_TYPE_B] / rcc->frame_count[SLICE_TYPE_B],
 719         (rcc->i_cplx_sum[pict_type] + rcc->p_cplx_sum[pict_type]) / rcc->frame_count[pict_type],
 720         rce->blurred_complexity,
 721         0
 722     };
 723     static const char *const_names[]={
 724         "iTex",
 725         "pTex",
 726         "tex",
 727         "mv",
 728         "iCount",
 729         "pCount",
 730         "sCount",
 731         "isI",
 732         "isP",
 733         "isB",
 734         "qComp",
 735         "avgIITex",
 736         "avgPITex",
 737         "avgPPTex",
 738         "avgBPTex",
 739         "avgTex",
 740         "blurCplx",
 741         NULL
 742     };
 743     static double (*func1[])(void *, double)={
 744 //      (void *)bits2qscale,
 745         (void *)qscale2bits,
 746         NULL
 747     };
 748     static const char *func1_names[]={
 749 //      "bits2qp",
 750         "qp2bits",
 751         NULL
 752     };
 753
 754     q = x264_eval((char*)h->param.rc.psz_rc_eq, const_values, const_names, func1, func1_names, NULL, NULL, rce);
 755
 756     // avoid NaN's in the rc_eq
 757     if(q != q || rce->i_tex_bits + rce->p_tex_bits + rce->mv_bits == 0)
 758         q = rcc->last_qscale;
 759     else {
 760         rcc->last_rceq = q;
 761         q /= rate_factor;
 762         rcc->last_qscale = q;
 763     }
 764
 765     for( i = rcc->i_zones-1; i >= 0; i-- )
 766     {
 767         x264_zone_t *z = &rcc->zones[i];
 768         if( frame_num >= z->i_start && frame_num <= z->i_end )
 769         {
 770             if( z->b_force_qp )
 771                 q = qp2qscale(z->i_qp);
 772             else
 773                 q /= z->f_bitrate_factor;
 774             break;
 775         }
 776     }
 777
 778     return q;
 779 }
 780
 781 static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
 782 {
 783     x264_ratecontrol_t *rcc = h->rc;
 784     const int pict_type = rce->pict_type;
 785
 786     // force I/B quants as a function of P quants
 787     const double last_p_q    = rcc->last_qscale_for[SLICE_TYPE_P];
 788     const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type];
 789     if( pict_type == SLICE_TYPE_I )
 790     {
 791         double iq = q;
 792         double pq = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
 793         double ip_factor = fabs( h->param.rc.f_ip_factor );
 794         /* don't apply ip_factor if the following frame is also I */
 795         if( rcc->accum_p_norm <= 0 )
 796             q = iq;
 797         else if( h->param.rc.f_ip_factor < 0 )
 798             q = iq / ip_factor;
 799         else if( rcc->accum_p_norm >= 1 )
 800             q = pq / ip_factor;
 801         else
 802             q = rcc->accum_p_norm * pq / ip_factor + (1 - rcc->accum_p_norm) * iq;
 803     }
 804     else if( pict_type == SLICE_TYPE_B )
 805     {
 806         if( h->param.rc.f_pb_factor > 0 )
 807             q = last_non_b_q;
 808         if( !rce->kept_as_ref )
 809             q *= fabs( h->param.rc.f_pb_factor );
 810     }
 811     else if( pict_type == SLICE_TYPE_P
 812              && rcc->last_non_b_pict_type == SLICE_TYPE_P
 813              && rce->i_tex_bits + rce->p_tex_bits == 0 )
 814     {
 815         q = last_p_q;
 816     }
 817
 818     /* last qscale / qdiff stuff */
 819     if(rcc->last_non_b_pict_type==pict_type
 820        && (pict_type!=SLICE_TYPE_I || rcc->last_accum_p_norm < 1))
 821     {
 822         double last_q = rcc->last_qscale_for[pict_type];
 823         double max_qscale = last_q * rcc->lstep;
 824         double min_qscale = last_q / rcc->lstep;
 825
 826         if     (q > max_qscale) q = max_qscale;
 827         else if(q < min_qscale) q = min_qscale;
 828     }
 829
 830     rcc->last_qscale_for[pict_type] = q;
 831     if(pict_type!=SLICE_TYPE_B)
 832         rcc->last_non_b_pict_type = pict_type;
 833     if(pict_type==SLICE_TYPE_I)
 834     {
 835         rcc->last_accum_p_norm = rcc->accum_p_norm;
 836         rcc->accum_p_norm = 0;
 837         rcc->accum_p_qp = 0;
 838     }
 839     if(pict_type==SLICE_TYPE_P)
 840     {
 841         float mask = 1 - pow( (float)rce->i_count / rcc->nmb, 2 );
 842         rcc->accum_p_qp   = mask * (qscale2qp(q) + rcc->accum_p_qp);
 843         rcc->accum_p_norm = mask * (1 + rcc->accum_p_norm);
 844     }
 845     return q;
 846 }
 847
 848 static double predict_size( predictor_t *p, double q, double var )
 849 {
 850      return p->coeff*var / (q*p->count);
 851 }
 852
 853 static void update_predictor( predictor_t *p, double q, double var, double bits )
 854 {
 855     p->count *= p->decay;
 856     p->coeff *= p->decay;
 857     p->count ++;
 858     p->coeff += bits*q / var;
 859 }
 860
 861 static void update_vbv( x264_t *h, int bits )
 862 {
 863     x264_ratecontrol_t *rcc = h->rc;
 864     if( !rcc->buffer_size )
 865         return;
 866
 867     rcc->buffer_fill += rcc->buffer_rate - bits;
 868     if( rcc->buffer_fill < 0 && !rcc->b_2pass )
 869         x264_log( h, X264_LOG_WARNING, "VBV underflow (%.0f bits)\n", rcc->buffer_fill );
 870     rcc->buffer_fill = x264_clip3( rcc->buffer_fill, 0, rcc->buffer_size );
 871
 872     if(rcc->last_satd > 100)
 873         update_predictor( &rcc->pred[rcc->slice_type], qp2qscale(rcc->qpa), rcc->last_satd, bits );
 874 }
 875
 876 // apply VBV constraints and clip qscale to between lmin and lmax
 877 static double clip_qscale( x264_t *h, int pict_type, double q )
 878 {
 879     x264_ratecontrol_t *rcc = h->rc;
 880     double lmin = rcc->lmin[pict_type];
 881     double lmax = rcc->lmax[pict_type];
 882     double q0 = q;
 883
 884     /* B-frames are not directly subject to VBV,
 885      * since they are controlled by the P-frames' QPs.
 886      * FIXME: in 2pass we could modify previous frames' QP too,
 887      *        instead of waiting for the buffer to fill */
 888     if( rcc->buffer_size &&
 889         ( pict_type == SLICE_TYPE_P ||
 890           ( pict_type == SLICE_TYPE_I && rcc->last_non_b_pict_type == SLICE_TYPE_I ) ) )
 891     {
 892         if( rcc->buffer_fill/rcc->buffer_size < 0.5 )
 893             q /= x264_clip3f( 2.0*rcc->buffer_fill/rcc->buffer_size, 0.5, 1.0 );
 894     }
 895     /* Now a hard threshold to make sure the frame fits in VBV.
 896      * This one is mostly for I-frames. */
 897     if( rcc->buffer_size && rcc->last_satd > 0 )
 898     {
 899         double bits = predict_size( &rcc->pred[rcc->slice_type], q, rcc->last_satd );
 900         double qf = 1.0;
 901         if( bits > rcc->buffer_fill/2 )
 902             qf = x264_clip3f( rcc->buffer_fill/(2*bits), 0.2, 1.0 );
 903         q /= qf;
 904         bits *= qf;
 905         if( bits < rcc->buffer_rate/2 )
 906             q *= bits*2/rcc->buffer_rate;
 907         q = X264_MAX( q0, q );
 908     }
 909
 910     if(lmin==lmax)
 911         return lmin;
 912     else if(rcc->b_2pass)
 913     {
 914         double min2 = log(lmin);
 915         double max2 = log(lmax);
 916         q = (log(q) - min2)/(max2-min2) - 0.5;
 917         q = 1.0/(1.0 + exp(-4*q));
 918         q = q*(max2-min2) + min2;
 919         return exp(q);
 920     }
 921     else
 922         return x264_clip3f(q, lmin, lmax);
 923 }
 924
 925 // update qscale for 1 frame based on actual bits used so far
 926 static float rate_estimate_qscale(x264_t *h, int pict_type)
 927 {
 928     float q;
 929     x264_ratecontrol_t *rcc = h->rc;
 930     ratecontrol_entry_t rce;
 931     double lmin = rcc->lmin[pict_type];
 932     double lmax = rcc->lmax[pict_type];
 933     int64_t total_bits = 8*(h->stat.i_slice_size[SLICE_TYPE_I]
 934                           + h->stat.i_slice_size[SLICE_TYPE_P]
 935                           + h->stat.i_slice_size[SLICE_TYPE_B]);
 936
 937     if( rcc->b_2pass )
 938     {
 939         rce = *rcc->rce;
 940         if(pict_type != rce.pict_type)
 941         {
 942             x264_log(h, X264_LOG_ERROR, "slice=%c but 2pass stats say %c\n",
 943                      slice_type_to_char[pict_type], slice_type_to_char[rce.pict_type]);
 944         }
 945     }
 946
 947     if( pict_type == SLICE_TYPE_B )
 948     {
 949         rcc->last_satd = 0;
 950         if(h->fenc->b_kept_as_ref)
 951             q = rcc->last_qscale * sqrtf(h->param.rc.f_pb_factor);
 952         else
 953             q = rcc->last_qscale * h->param.rc.f_pb_factor;
 954         return x264_clip3f(q, lmin, lmax);
 955     }
 956     else
 957     {
 958         double abr_buffer = 2 * rcc->rate_tolerance * rcc->bitrate;
 959         if( rcc->b_2pass )
 960         {
 961             //FIXME adjust abr_buffer based on distance to the end of the video
 962             int64_t diff = total_bits - (int64_t)rce.expected_bits;
 963             q = rce.new_qscale;
 964             q /= x264_clip3f((double)(abr_buffer - diff) / abr_buffer, .5, 2);
 965             if( h->fenc->i_frame > 30 )
 966             {
 967                 /* Adjust quant based on the difference between
 968                  * achieved and expected bitrate so far */
 969                 double time = (double)h->fenc->i_frame / rcc->num_entries;
 970                 double w = x264_clip3f( time*100, 0.0, 1.0 );
 971                 q *= pow( (double)total_bits / rcc->expected_bits_sum, w );
 972             }
 973             q = x264_clip3f( q, lmin, lmax );
 974         }
 975         else /* 1pass ABR */
 976         {
 977             /* Calculate the quantizer which would have produced the desired
 978              * average bitrate if it had been applied to all frames so far.
 979              * Then modulate that quant based on the current frame's complexity
 980              * relative to the average complexity so far (using the 2pass RCEQ).
 981              * Then bias the quant up or down if total size so far was far from
 982              * the target.
 983              * Result: Depending on the value of rate_tolerance, there is a
 984              * tradeoff between quality and bitrate precision. But at large
 985              * tolerances, the bit distribution approaches that of 2pass. */
 986
 987             double wanted_bits, overflow, lmin, lmax;
 988
 989             rcc->last_satd = x264_rc_analyse_slice( h );
 990             rcc->short_term_cplxsum *= 0.5;
 991             rcc->short_term_cplxcount *= 0.5;
 992             rcc->short_term_cplxsum += rcc->last_satd;
 993             rcc->short_term_cplxcount ++;
 994
 995             rce.p_tex_bits = rcc->last_satd;
 996             rce.blurred_complexity = rcc->short_term_cplxsum / rcc->short_term_cplxcount;
 997             rce.i_tex_bits = 0;
 998             rce.mv_bits = 0;
 999             rce.p_count = rcc->nmb;
1000             rce.i_count = 0;
1001             rce.s_count = 0;
1002             rce.qscale = 1;
1003             rce.pict_type = pict_type;
1004
1005             if( h->param.rc.i_rf_constant )
1006             {
1007                 q = get_qscale( h, &rce, rcc->rate_factor_constant, h->fenc->i_frame );
1008                 overflow = 1;
1009             }
1010             else
1011             {
1012                 q = get_qscale( h, &rce, rcc->wanted_bits_window / rcc->cplxr_sum, h->fenc->i_frame );
1013
1014                 wanted_bits = h->fenc->i_frame * rcc->bitrate / rcc->fps;
1015                 abr_buffer *= X264_MAX( 1, sqrt(h->fenc->i_frame/25) );
1016                 overflow = x264_clip3f( 1.0 + (total_bits - wanted_bits) / abr_buffer, .5, 2 );
1017                 q *= overflow;
1018             }
1019
1020             if( pict_type == SLICE_TYPE_I && h->param.i_keyint_max > 1
1021                 /* should test _next_ pict type, but that isn't decided yet */
1022                 && rcc->last_non_b_pict_type != SLICE_TYPE_I )
1023             {
1024                 q = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
1025                 q /= fabs( h->param.rc.f_ip_factor );
1026                 q = clip_qscale( h, pict_type, q );
1027             }
1028             else
1029             {
1030                 if( h->stat.i_slice_count[h->param.i_keyint_max > 1 ? SLICE_TYPE_P : SLICE_TYPE_I] < 5 )
1031                 {
1032                     float w = h->stat.i_slice_count[SLICE_TYPE_P] / 5.;
1033                     float q2 = qp2qscale(ABR_INIT_QP);
1034                     q = q*w + q2*(1-w);
1035                 }
1036
1037                 /* Asymmetric clipping, because symmetric would prevent
1038                  * overflow control in areas of rapidly oscillating complexity */
1039                 lmin = rcc->last_qscale_for[pict_type] / rcc->lstep;
1040                 lmax = rcc->last_qscale_for[pict_type] * rcc->lstep;
1041                 if( overflow > 1.1 )
1042                     lmax *= rcc->lstep;
1043                 else if( overflow < 0.9 )
1044                     lmin /= rcc->lstep;
1045
1046                 q = x264_clip3f(q, lmin, lmax);
1047                 q = clip_qscale(h, pict_type, q);
1048                 //FIXME use get_diff_limited_q() ?
1049             }
1050         }
1051
1052         rcc->last_qscale_for[pict_type] =
1053         rcc->last_qscale = q;
1054
1055         return q;
1056     }
1057 }
1058
1059 static int init_pass2( x264_t *h )
1060 {
1061     x264_ratecontrol_t *rcc = h->rc;
1062     uint64_t all_const_bits = 0;
1063     uint64_t all_available_bits = (uint64_t)(h->param.rc.i_bitrate * 1000 * (double)rcc->num_entries / rcc->fps);
1064     double rate_factor, step, step_mult;
1065     double qblur = h->param.rc.f_qblur;
1066     double cplxblur = h->param.rc.f_complexity_blur;
1067     const int filter_size = (int)(qblur*4) | 1;
1068     double expected_bits;
1069     double *qscale, *blurred_qscale;
1070     int i;
1071
1072     /* find total/average complexity & const_bits */
1073     for(i=0; i<rcc->num_entries; i++){
1074         ratecontrol_entry_t *rce = &rcc->entry[i];
1075         all_const_bits += rce->misc_bits;
1076         rcc->i_cplx_sum[rce->pict_type] += rce->i_tex_bits * rce->qscale;
1077         rcc->p_cplx_sum[rce->pict_type] += rce->p_tex_bits * rce->qscale;
1078         rcc->mv_bits_sum[rce->pict_type] += rce->mv_bits * rce->qscale;
1079         rcc->frame_count[rce->pict_type] ++;
1080     }
1081
1082     if( all_available_bits < all_const_bits)
1083     {
1084         x264_log(h, X264_LOG_ERROR, "requested bitrate is too low. estimated minimum is %d kbps\n",
1085                  (int)(all_const_bits * rcc->fps / (rcc->num_entries * 1000)));
1086         return -1;
1087     }
1088
1089     /* Blur complexities, to reduce local fluctuation of QP.
1090      * We don't blur the QPs directly, because then one very simple frame
1091      * could drag down the QP of a nearby complex frame and give it more
1092      * bits than intended. */
1093     for(i=0; i<rcc->num_entries; i++){
1094         ratecontrol_entry_t *rce = &rcc->entry[i];
1095         double weight_sum = 0;
1096         double cplx_sum = 0;
1097         double weight = 1.0;
1098         int j;
1099         /* weighted average of cplx of future frames */
1100         for(j=1; j<cplxblur*2 && j<rcc->num_entries-i; j++){
1101             ratecontrol_entry_t *rcj = &rcc->entry[i+j];
1102             weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
1103             if(weight < .0001)
1104                 break;
1105             weight_sum += weight;
1106             cplx_sum += weight * qscale2bits(rcj, 1);
1107         }
1108         /* weighted average of cplx of past frames */
1109         weight = 1.0;
1110         for(j=0; j<=cplxblur*2 && j<=i; j++){
1111             ratecontrol_entry_t *rcj = &rcc->entry[i-j];
1112             weight_sum += weight;
1113             cplx_sum += weight * qscale2bits(rcj, 1);
1114             weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
1115             if(weight < .0001)
1116                 break;
1117         }
1118         rce->blurred_complexity = cplx_sum / weight_sum;
1119     }
1120
1121     qscale = x264_malloc(sizeof(double)*rcc->num_entries);
1122     if(filter_size > 1)
1123         blurred_qscale = x264_malloc(sizeof(double)*rcc->num_entries);
1124     else
1125         blurred_qscale = qscale;
1126
1127     /* Search for a factor which, when multiplied by the RCEQ values from
1128      * each frame, adds up to the desired total size.
1129      * There is no exact closed-form solution because of VBV constraints and
1130      * because qscale2bits is not invertible, but we can start with the simple
1131      * approximation of scaling the 1st pass by the ratio of bitrates.
1132      * The search range is probably overkill, but speed doesn't matter here. */
1133
1134     expected_bits = 1;
1135     for(i=0; i<rcc->num_entries; i++)
1136         expected_bits += qscale2bits(&rcc->entry[i], get_qscale(h, &rcc->entry[i], 1.0, i));
1137     step_mult = all_available_bits / expected_bits;
1138
1139     rate_factor = 0;
1140     for(step = 1E4 * step_mult; step > 1E-7 * step_mult; step *= 0.5){
1141         expected_bits = 0;
1142         rate_factor += step;
1143
1144         rcc->last_non_b_pict_type = -1;
1145         rcc->last_accum_p_norm = 1;
1146         rcc->accum_p_norm = 0;
1147         rcc->buffer_fill = rcc->buffer_size * h->param.rc.f_vbv_buffer_init;
1148
1149         /* find qscale */
1150         for(i=0; i<rcc->num_entries; i++){
1151             qscale[i] = get_qscale(h, &rcc->entry[i], rate_factor, i);
1152         }
1153
1154         /* fixed I/B qscale relative to P */
1155         for(i=rcc->num_entries-1; i>=0; i--){
1156             qscale[i] = get_diff_limited_q(h, &rcc->entry[i], qscale[i]);
1157             assert(qscale[i] >= 0);
1158         }
1159
1160         /* smooth curve */
1161         if(filter_size > 1){
1162             assert(filter_size%2==1);
1163             for(i=0; i<rcc->num_entries; i++){
1164                 ratecontrol_entry_t *rce = &rcc->entry[i];
1165                 int j;
1166                 double q=0.0, sum=0.0;
1167
1168                 for(j=0; j<filter_size; j++){
1169                     int index = i+j-filter_size/2;
1170                     double d = index-i;
1171                     double coeff = qblur==0 ? 1.0 : exp(-d*d/(qblur*qblur));
1172                     if(index < 0 || index >= rcc->num_entries) continue;
1173                     if(rce->pict_type != rcc->entry[index].pict_type) continue;
1174                     q += qscale[index] * coeff;
1175                     sum += coeff;
1176                 }
1177                 blurred_qscale[i] = q/sum;
1178             }
1179         }
1180
1181         /* find expected bits */
1182         for(i=0; i<rcc->num_entries; i++){
1183             ratecontrol_entry_t *rce = &rcc->entry[i];
1184             double bits;
1185             rce->new_qscale = clip_qscale(h, rce->pict_type, blurred_qscale[i]);
1186             assert(rce->new_qscale >= 0);
1187             bits = qscale2bits(rce, rce->new_qscale) + rce->misc_bits;
1188
1189             rce->expected_bits = expected_bits;
1190             expected_bits += bits;
1191             update_vbv(h, bits);
1192         }
1193
1194 //printf("expected:%llu available:%llu factor:%lf avgQ:%lf\n", (uint64_t)expected_bits, all_available_bits, rate_factor);
1195         if(expected_bits > all_available_bits) rate_factor -= step;
1196     }
1197
1198     x264_free(qscale);
1199     if(filter_size > 1)
1200         x264_free(blurred_qscale);
1201
1202     if(fabs(expected_bits/all_available_bits - 1.0) > 0.01)
1203     {
1204         double avgq = 0;
1205         for(i=0; i<rcc->num_entries; i++)
1206             avgq += rcc->entry[i].new_qscale;
1207         avgq = qscale2qp(avgq / rcc->num_entries);
1208
1209         x264_log(h, X264_LOG_ERROR, "Error: 2pass curve failed to converge\n");
1210         x264_log(h, X264_LOG_ERROR, "target: %.2f kbit/s, expected: %.2f kbit/s, avg QP: %.4f\n",
1211                  (float)h->param.rc.i_bitrate,
1212                  expected_bits * rcc->fps / (rcc->num_entries * 1000.),
1213                  avgq);
1214         if(expected_bits < all_available_bits && avgq < h->param.rc.i_qp_min + 2)
1215         {
1216             if(h->param.rc.i_qp_min > 0)
1217                 x264_log(h, X264_LOG_ERROR, "try reducing target bitrate or reducing qp_min (currently %d)\n", h->param.rc.i_qp_min);
1218             else
1219                 x264_log(h, X264_LOG_ERROR, "try reducing target bitrate\n");
1220         }
1221         else if(expected_bits > all_available_bits && avgq > h->param.rc.i_qp_max - 2)
1222         {
1223             if(h->param.rc.i_qp_max < 51)
1224                 x264_log(h, X264_LOG_ERROR, "try increasing target bitrate or increasing qp_max (currently %d)\n", h->param.rc.i_qp_max);
1225             else
1226                 x264_log(h, X264_LOG_ERROR, "try increasing target bitrate\n");
1227         }
1228         else
1229             x264_log(h, X264_LOG_ERROR, "internal error\n");
1230     }
1231
1232     return 0;
1233 }
1234
1235