git.sesse.net Git - x264/blob - encoder/ratecontrol.c

   1 /***************************************************-*- coding: iso-8859-1 -*-
   2  * ratecontrol.c: h264 encoder library (Rate Control)
   3  *****************************************************************************
   4  * Copyright (C) 2005 x264 project
   5  * $Id: ratecontrol.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
   6  *
   7  * Authors: Loren Merritt <lorenm@u.washington.edu>
   8  *          Michael Niedermayer <michaelni@gmx.at>
   9  *          Måns Rullgård <mru@mru.ath.cx>
  10  *
  11  * This program is free software; you can redistribute it and/or modify
  12  * it under the terms of the GNU General Public License as published by
  13  * the Free Software Foundation; either version 2 of the License, or
  14  * (at your option) any later version.
  15  *
  16  * This program is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19  * GNU General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU General Public License
  22  * along with this program; if not, write to the Free Software
  23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  24  *****************************************************************************/
  25
  26 #define _ISOC99_SOURCE
  27 #undef NDEBUG // always check asserts, the speed effect is far too small to disable them
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <string.h>
  31 #include <math.h>
  32 #include <limits.h>
  33 #include <assert.h>
  34
  35 #include "common/common.h"
  36 #include "common/cpu.h"
  37 #include "common/macroblock.h"
  38 #include "ratecontrol.h"
  39
  40 #if defined(SYS_FREEBSD) || defined(SYS_BEOS)
  41 #define exp2f(x) powf( 2, (x) )
  42 #endif
  43 #ifdef _MSC_VER
  44 #define exp2f(x) pow( 2, (x) )
  45 #define sqrtf sqrt
  46 #endif
  47 #ifdef WIN32 // POSIX says that rename() removes the destination, but win32 doesn't.
  48 #define rename(src,dst) (unlink(dst), rename(src,dst))
  49 #endif
  50
  51 typedef struct
  52 {
  53     int pict_type;
  54     int kept_as_ref;
  55     float qscale;
  56     int mv_bits;
  57     int i_tex_bits;
  58     int p_tex_bits;
  59     int misc_bits;
  60     uint64_t expected_bits;
  61     float new_qscale;
  62     int new_qp;
  63     int i_count;
  64     int p_count;
  65     int s_count;
  66     float blurred_complexity;
  67 } ratecontrol_entry_t;
  68
  69 typedef struct
  70 {
  71     double coeff;
  72     double count;
  73     double decay;
  74 } predictor_t;
  75
  76 struct x264_ratecontrol_t
  77 {
  78     /* constants */
  79     int b_abr;
  80     int b_2pass;
  81     double fps;
  82     double bitrate;
  83     double rate_tolerance;
  84     int nmb;                    /* number of macroblocks in a frame */
  85     int qp_constant[5];
  86
  87     /* current frame */
  88     ratecontrol_entry_t *rce;
  89     int qp;                     /* qp for current frame */
  90     float qpa;                  /* average of macroblocks' qp (same as qp if no adaptive quant) */
  91     int slice_type;
  92     int qp_force;
  93
  94     /* VBV stuff */
  95     double buffer_size;
  96     double buffer_fill;
  97     double buffer_rate;         /* # of bits added to buffer_fill after each frame */
  98     predictor_t pred[5];        /* predict frame size from satd */
  99
 100     /* ABR stuff */
 101     int    last_satd;
 102     double last_rceq;
 103     double cplxr_sum;           /* sum of bits*qscale/rceq */
 104     double expected_bits_sum;   /* sum of qscale2bits after rceq, ratefactor, and overflow */
 105     double wanted_bits_window;  /* target bitrate * window */
 106     double cbr_decay;
 107     double short_term_cplxsum;
 108     double short_term_cplxcount;
 109
 110     /* 2pass stuff */
 111     FILE *p_stat_file_out;
 112     char *psz_stat_file_tmpname;
 113
 114     int num_entries;            /* number of ratecontrol_entry_ts */
 115     ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
 116     double last_qscale;
 117     double last_qscale_for[5];  /* last qscale for a specific pict type, used for max_diff & ipb factor stuff  */
 118     int last_non_b_pict_type;
 119     double accum_p_qp;          /* for determining I-frame quant */
 120     double accum_p_norm;
 121     double last_accum_p_norm;
 122     double lmin[5];             /* min qscale by frame type */
 123     double lmax[5];
 124     double lstep;               /* max change (multiply) in qscale per frame */
 125     double i_cplx_sum[5];       /* estimated total texture bits in intra MBs at qscale=1 */
 126     double p_cplx_sum[5];
 127     double mv_bits_sum[5];
 128     int frame_count[5];         /* number of frames of each type */
 129
 130     int i_zones;
 131     x264_zone_t *zones;
 132 };
 133
 134
 135 static int init_pass2(x264_t *);
 136 static float rate_estimate_qscale( x264_t *h, int pict_type );
 137 static void update_vbv( x264_t *h, int bits );
 138 int  x264_rc_analyse_slice( x264_t *h );
 139
 140 /* Terminology:
 141  * qp = h.264's quantizer
 142  * qscale = linearized quantizer = Lagrange multiplier
 143  */
 144 static inline double qp2qscale(double qp)
 145 {
 146     return 0.85 * pow(2.0, ( qp - 12.0 ) / 6.0);
 147 }
 148 static inline double qscale2qp(double qscale)
 149 {
 150     return 12.0 + 6.0 * log(qscale/0.85) / log(2.0);
 151 }
 152
 153 /* Texture bitrate is not quite inversely proportional to qscale,
 154  * probably due the the changing number of SKIP blocks.
 155  * MV bits level off at about qp<=12, because the lambda used
 156  * for motion estimation is constant there. */
 157 static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale)
 158 {
 159     if(qscale<0.1)
 160         qscale = 0.1;
 161     return (rce->i_tex_bits + rce->p_tex_bits + .1) * pow( rce->qscale / qscale, 1.1 )
 162            + rce->mv_bits * pow( X264_MAX(rce->qscale, 12) / X264_MAX(qscale, 12), 0.5 );
 163 }
 164
 165 /* There is no analytical inverse to the above formula. */
 166 #if 0
 167 static inline double bits2qscale(ratecontrol_entry_t *rce, double bits)
 168 {
 169     if(bits<1.0)
 170         bits = 1.0;
 171     return (rce->i_tex_bits + rce->p_tex_bits + rce->mv_bits + .1) * rce->qscale / bits;
 172 }
 173 #endif
 174
 175
 176 int x264_ratecontrol_new( x264_t *h )
 177 {
 178     x264_ratecontrol_t *rc;
 179     int i;
 180
 181     x264_cpu_restore( h->param.cpu );
 182
 183     h->rc = rc = x264_malloc( sizeof( x264_ratecontrol_t ) );
 184     memset(rc, 0, sizeof(*rc));
 185
 186     rc->b_abr = h->param.rc.b_cbr && !h->param.rc.b_stat_read;
 187     rc->b_2pass = h->param.rc.b_cbr && h->param.rc.b_stat_read;
 188     h->mb.b_variable_qp = 0;
 189
 190     /* FIXME: use integers */
 191     if(h->param.i_fps_num > 0 && h->param.i_fps_den > 0)
 192         rc->fps = (float) h->param.i_fps_num / h->param.i_fps_den;
 193     else
 194         rc->fps = 25.0;
 195
 196     rc->bitrate = h->param.rc.i_bitrate * 1000;
 197     rc->rate_tolerance = h->param.rc.f_rate_tolerance;
 198     rc->nmb = h->mb.i_mb_count;
 199     rc->last_non_b_pict_type = -1;
 200     rc->cbr_decay = 1.0;
 201
 202     if( h->param.rc.i_vbv_max_bitrate < h->param.rc.i_bitrate &&
 203         h->param.rc.i_vbv_max_bitrate > 0)
 204         x264_log(h, X264_LOG_ERROR, "max bitrate less than average bitrate, ignored.\n");
 205     else if( h->param.rc.i_vbv_max_bitrate > 0 &&
 206              h->param.rc.i_vbv_buffer_size > 0 )
 207     {
 208         if( h->param.rc.i_vbv_buffer_size < 10 * h->param.rc.i_vbv_max_bitrate / rc->fps ) {
 209             h->param.rc.i_vbv_buffer_size = 10 * h->param.rc.i_vbv_max_bitrate / rc->fps;
 210             x264_log( h, X264_LOG_ERROR, "VBV buffer size too small, using %d kbit\n",
 211                       h->param.rc.i_vbv_buffer_size );
 212         }
 213         rc->buffer_rate = h->param.rc.i_vbv_max_bitrate * 1000 / rc->fps;
 214         rc->buffer_size = h->param.rc.i_vbv_buffer_size * 1000;
 215         rc->buffer_fill = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
 216         rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size
 217                       * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate);
 218     }
 219     else if( h->param.rc.i_vbv_max_bitrate || h->param.rc.i_vbv_buffer_size )
 220         x264_log(h, X264_LOG_ERROR, "VBV maxrate or buffer size specified, but not both.\n");
 221
 222     if( rc->b_abr )
 223     {
 224         /* FIXME shouldn't need to arbitrarily specify a QP,
 225          * but this is more robust than BPP measures */
 226 #define ABR_INIT_QP 24
 227         rc->accum_p_norm = .01;
 228         rc->accum_p_qp = ABR_INIT_QP * rc->accum_p_norm;
 229         rc->cplxr_sum = .01;
 230         rc->wanted_bits_window = .01;
 231     }
 232
 233     rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
 234     rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
 235     rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
 236
 237     rc->lstep = exp2f(h->param.rc.i_qp_step / 6.0);
 238     rc->last_qscale = qp2qscale(26);
 239     for( i = 0; i < 5; i++ )
 240     {
 241         rc->last_qscale_for[i] = qp2qscale(26);
 242         rc->lmin[i] = qp2qscale( h->param.rc.i_qp_min );
 243         rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max );
 244         rc->pred[i].coeff= 2.0;
 245         rc->pred[i].count= 1.0;
 246         rc->pred[i].decay= 0.5;
 247     }
 248 #if 0 // FIXME: do we want to assign lmin/lmax based on ip_factor, or leave them all the same?
 249     rc->lmin[SLICE_TYPE_I] /= fabs(h->param.f_ip_factor);
 250     rc->lmax[SLICE_TYPE_I] /= fabs(h->param.f_ip_factor);
 251     rc->lmin[SLICE_TYPE_B] *= fabs(h->param.f_pb_factor);
 252     rc->lmax[SLICE_TYPE_B] *= fabs(h->param.f_pb_factor);
 253 #endif
 254
 255     if( h->param.rc.i_zones > 0 )
 256     {
 257         for( i = 0; i < h->param.rc.i_zones; i++ )
 258         {
 259             x264_zone_t z = h->param.rc.zones[i];
 260             if( z.i_start < 0 || z.i_start > z.i_end )
 261             {
 262                 x264_log( h, X264_LOG_ERROR, "invalid zone: start=%d end=%d\n",
 263                           z.i_start, z.i_end );
 264                 return -1;
 265             }
 266             else if( !z.b_force_qp && z.f_bitrate_factor <= 0 )
 267             {
 268                 x264_log( h, X264_LOG_ERROR, "invalid zone: bitrate_factor=%f\n",
 269                           z.f_bitrate_factor );
 270                 return -1;
 271             }
 272         }
 273
 274         rc->i_zones = h->param.rc.i_zones;
 275         rc->zones = x264_malloc( rc->i_zones * sizeof(x264_zone_t) );
 276         memcpy( rc->zones, h->param.rc.zones, rc->i_zones * sizeof(x264_zone_t) );
 277     }
 278
 279     /* Load stat file and init 2pass algo */
 280     if( h->param.rc.b_stat_read )
 281     {
 282         int stats_size;
 283         char *p, *stats_in;
 284         FILE *stats_file;
 285
 286         /* read 1st pass stats */
 287         assert( h->param.rc.psz_stat_in );
 288         stats_file = fopen( h->param.rc.psz_stat_in, "rb");
 289         if(!stats_file)
 290         {
 291             x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
 292             return -1;
 293         }
 294         // FIXME: error checking
 295         fseek(stats_file, 0, SEEK_END);
 296         stats_size = ftell(stats_file);
 297         fseek(stats_file, 0, SEEK_SET);
 298         stats_in = x264_malloc(stats_size+10);
 299         fread(stats_in, 1, stats_size, stats_file);
 300         fclose(stats_file);
 301
 302         /* find number of pics */
 303         p = stats_in;
 304         for(i=-1; p; i++)
 305             p = strchr(p+1, ';');
 306         if(i==0)
 307         {
 308             x264_log(h, X264_LOG_ERROR, "empty stats file\n");
 309             return -1;
 310         }
 311         i += h->param.i_bframe;
 312         rc->entry = (ratecontrol_entry_t*) x264_malloc(i*sizeof(ratecontrol_entry_t));
 313         memset(rc->entry, 0, i*sizeof(ratecontrol_entry_t));
 314         /* FIXME: num_entries is sometimes treated as number of frames in the video */
 315         rc->num_entries= i;
 316
 317         /* init all to skipped p frames */
 318         for(i=0; i<rc->num_entries; i++){
 319             ratecontrol_entry_t *rce = &rc->entry[i];
 320             rce->pict_type = SLICE_TYPE_P;
 321             rce->qscale = rce->new_qscale = qp2qscale(20);
 322             rce->misc_bits = rc->nmb + 10;
 323             rce->new_qp = 0;
 324         }
 325
 326         /* read stats */
 327         p = stats_in;
 328         for(i=0; i < rc->num_entries - h->param.i_bframe; i++){
 329             ratecontrol_entry_t *rce;
 330             int frame_number;
 331             char pict_type;
 332             int e;
 333             char *next;
 334             float qp;
 335
 336             next= strchr(p, ';');
 337             if(next){
 338                 (*next)=0; //sscanf is unbelievably slow on looong strings
 339                 next++;
 340             }
 341             e = sscanf(p, " in:%d ", &frame_number);
 342
 343             if(frame_number < 0 || frame_number >= rc->num_entries)
 344             {
 345                 x264_log(h, X264_LOG_ERROR, "bad frame number (%d) at stats line %d\n", frame_number, i);
 346                 return -1;
 347             }
 348             rce = &rc->entry[frame_number];
 349
 350             e += sscanf(p, " in:%*d out:%*d type:%c q:%f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d",
 351                    &pict_type, &qp, &rce->i_tex_bits, &rce->p_tex_bits,
 352                    &rce->mv_bits, &rce->misc_bits, &rce->i_count, &rce->p_count, &rce->s_count);
 353
 354             switch(pict_type){
 355                 case 'I': rce->kept_as_ref = 1;
 356                 case 'i': rce->pict_type = SLICE_TYPE_I; break;
 357                 case 'P': rce->pict_type = SLICE_TYPE_P; break;
 358                 case 'B': rce->kept_as_ref = 1;
 359                 case 'b': rce->pict_type = SLICE_TYPE_B; break;
 360                 default:  e = -1; break;
 361             }
 362             if(e != 10){
 363                 x264_log(h, X264_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e);
 364                 return -1;
 365             }
 366             rce->qscale = qp2qscale(qp);
 367             p = next;
 368         }
 369
 370         x264_free(stats_in);
 371
 372         if(h->param.rc.b_cbr)
 373         {
 374             if(init_pass2(h) < 0) return -1;
 375         } /* else we're using constant quant, so no need to run the bitrate allocation */
 376     }
 377
 378     /* Open output file */
 379     /* If input and output files are the same, output to a temp file
 380      * and move it to the real name only when it's complete */
 381     if( h->param.rc.b_stat_write )
 382     {
 383         rc->psz_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 6 );
 384         strcpy( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
 385         strcat( rc->psz_stat_file_tmpname, ".temp" );
 386
 387         rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
 388         if( rc->p_stat_file_out == NULL )
 389         {
 390             x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
 391             return -1;
 392         }
 393     }
 394
 395     return 0;
 396 }
 397
 398 void x264_ratecontrol_delete( x264_t *h )
 399 {
 400     x264_ratecontrol_t *rc = h->rc;
 401
 402     if( rc->p_stat_file_out )
 403     {
 404         fclose( rc->p_stat_file_out );
 405         if( h->i_frame >= rc->num_entries - h->param.i_bframe )
 406             if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
 407             {
 408                 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
 409                           rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
 410             }
 411         x264_free( rc->psz_stat_file_tmpname );
 412     }
 413     x264_free( rc->entry );
 414     x264_free( rc->zones );
 415     x264_free( rc );
 416 }
 417
 418 /* Before encoding a frame, choose a QP for it */
 419 void x264_ratecontrol_start( x264_t *h, int i_slice_type, int i_force_qp )
 420 {
 421     x264_ratecontrol_t *rc = h->rc;
 422
 423     x264_cpu_restore( h->param.cpu );
 424
 425     rc->qp_force = i_force_qp;
 426     rc->slice_type = i_slice_type;
 427
 428     if( i_force_qp )
 429     {
 430         rc->qpa = rc->qp = i_force_qp - 1;
 431     }
 432     else if( rc->b_abr )
 433     {
 434         rc->qpa = rc->qp =
 435             x264_clip3( (int)(qscale2qp( rate_estimate_qscale( h, i_slice_type ) ) + .5), 0, 51 );
 436     }
 437     else if( rc->b_2pass )
 438     {
 439         int frame = h->fenc->i_frame;
 440         ratecontrol_entry_t *rce;
 441         assert( frame >= 0 && frame < rc->num_entries );
 442         rce = h->rc->rce = &h->rc->entry[frame];
 443
 444         rce->new_qscale = rate_estimate_qscale( h, i_slice_type );
 445         rc->qpa = rc->qp = rce->new_qp =
 446             x264_clip3( (int)(qscale2qp(rce->new_qscale) + 0.5), 0, 51 );
 447     }
 448     else /* CQP */
 449     {
 450         int q;
 451         if( i_slice_type == SLICE_TYPE_B && h->fdec->b_kept_as_ref )
 452             q = ( rc->qp_constant[ SLICE_TYPE_B ] + rc->qp_constant[ SLICE_TYPE_P ] ) / 2;
 453         else
 454             q = rc->qp_constant[ i_slice_type ];
 455         rc->qpa = rc->qp = q;
 456     }
 457 }
 458
 459 void x264_ratecontrol_mb( x264_t *h, int bits )
 460 {
 461     /* currently no adaptive quant */
 462 }
 463
 464 int x264_ratecontrol_qp( x264_t *h )
 465 {
 466     return h->rc->qp;
 467 }
 468
 469 /* In 2pass, force the same frame types as in the 1st pass */
 470 int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
 471 {
 472     if( h->param.rc.b_stat_read )
 473     {
 474         if( frame_num >= h->rc->num_entries )
 475         {
 476             x264_log(h, X264_LOG_ERROR, "More input frames than in the 1st pass\n");
 477             return X264_TYPE_P;
 478         }
 479         switch( h->rc->entry[frame_num].pict_type )
 480         {
 481             case SLICE_TYPE_I:
 482                 return h->rc->entry[frame_num].kept_as_ref ? X264_TYPE_IDR : X264_TYPE_I;
 483
 484             case SLICE_TYPE_B:
 485                 return h->rc->entry[frame_num].kept_as_ref ? X264_TYPE_BREF : X264_TYPE_B;
 486
 487             case SLICE_TYPE_P:
 488             default:
 489                 return X264_TYPE_P;
 490         }
 491     }
 492     else
 493     {
 494         return X264_TYPE_AUTO;
 495     }
 496 }
 497
 498 /* After encoding one frame, save stats and update ratecontrol state */
 499 void x264_ratecontrol_end( x264_t *h, int bits )
 500 {
 501     x264_ratecontrol_t *rc = h->rc;
 502     int i;
 503
 504     x264_cpu_restore( h->param.cpu );
 505
 506     h->stat.frame.i_mb_count_skip = h->stat.frame.i_mb_count[P_SKIP] + h->stat.frame.i_mb_count[B_SKIP];
 507     h->stat.frame.i_mb_count_p = h->stat.frame.i_mb_count[P_L0] + h->stat.frame.i_mb_count[P_8x8];
 508     for( i = B_DIRECT; i < B_8x8; i++ )
 509         h->stat.frame.i_mb_count_p += h->stat.frame.i_mb_count[i];
 510
 511     if( h->param.rc.b_stat_write )
 512     {
 513         char c_type = rc->slice_type==SLICE_TYPE_I ? (h->fenc->i_poc==0 ? 'I' : 'i')
 514                     : rc->slice_type==SLICE_TYPE_P ? 'P'
 515                     : h->fenc->b_kept_as_ref ? 'B' : 'b';
 516         fprintf( rc->p_stat_file_out,
 517                  "in:%d out:%d type:%c q:%.2f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d;\n",
 518                  h->fenc->i_frame, h->i_frame-1,
 519                  c_type, rc->qpa,
 520                  h->stat.frame.i_itex_bits, h->stat.frame.i_ptex_bits,
 521                  h->stat.frame.i_hdr_bits, h->stat.frame.i_misc_bits,
 522                  h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_16x16],
 523                  h->stat.frame.i_mb_count_p,
 524                  h->stat.frame.i_mb_count_skip);
 525     }
 526
 527     if( rc->b_abr )
 528     {
 529         if( rc->slice_type != SLICE_TYPE_B )
 530             rc->cplxr_sum += bits * qp2qscale(rc->qpa) / rc->last_rceq;
 531         else
 532         {
 533             /* Depends on the fact that B-frame's QP is an offset from the following P-frame's.
 534              * Not perfectly accurate with B-refs, but good enough. */
 535             rc->cplxr_sum += bits * qp2qscale(rc->qpa) / (rc->last_rceq * fabs(h->param.rc.f_pb_factor));
 536         }
 537         rc->cplxr_sum *= rc->cbr_decay;
 538         rc->wanted_bits_window += rc->bitrate / rc->fps;
 539         rc->wanted_bits_window *= rc->cbr_decay;
 540
 541         rc->accum_p_qp   *= .95;
 542         rc->accum_p_norm *= .95;
 543         rc->accum_p_norm += 1;
 544         if( rc->slice_type == SLICE_TYPE_I )
 545             rc->accum_p_qp += rc->qpa * fabs(h->param.rc.f_ip_factor);
 546         else
 547             rc->accum_p_qp += rc->qpa;
 548     }
 549
 550     if( rc->b_2pass )
 551     {
 552         rc->expected_bits_sum += qscale2bits( rc->rce, qp2qscale(rc->rce->new_qp) );
 553     }
 554
 555     update_vbv( h, bits );
 556
 557     if( rc->slice_type != SLICE_TYPE_B )
 558         rc->last_non_b_pict_type = rc->slice_type;
 559 }
 560
 561 /****************************************************************************
 562  * 2 pass functions
 563  ***************************************************************************/
 564
 565 double x264_eval( char *s, double *const_value, const char **const_name,
 566                   double (**func1)(void *, double), const char **func1_name,
 567                   double (**func2)(void *, double, double), char **func2_name,
 568                   void *opaque );
 569
 570 /**
 571  * modify the bitrate curve from pass1 for one frame
 572  */
 573 static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor, int frame_num)
 574 {
 575     x264_ratecontrol_t *rcc= h->rc;
 576     const int pict_type = rce->pict_type;
 577     double q;
 578     int i;
 579
 580     double const_values[]={
 581         rce->i_tex_bits * rce->qscale,
 582         rce->p_tex_bits * rce->qscale,
 583         (rce->i_tex_bits + rce->p_tex_bits) * rce->qscale,
 584         rce->mv_bits * rce->qscale,
 585         (double)rce->i_count / rcc->nmb,
 586         (double)rce->p_count / rcc->nmb,
 587         (double)rce->s_count / rcc->nmb,
 588         rce->pict_type == SLICE_TYPE_I,
 589         rce->pict_type == SLICE_TYPE_P,
 590         rce->pict_type == SLICE_TYPE_B,
 591         h->param.rc.f_qcompress,
 592         rcc->i_cplx_sum[SLICE_TYPE_I] / rcc->frame_count[SLICE_TYPE_I],
 593         rcc->i_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
 594         rcc->p_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
 595         rcc->p_cplx_sum[SLICE_TYPE_B] / rcc->frame_count[SLICE_TYPE_B],
 596         (rcc->i_cplx_sum[pict_type] + rcc->p_cplx_sum[pict_type]) / rcc->frame_count[pict_type],
 597         rce->blurred_complexity,
 598         0
 599     };
 600     static const char *const_names[]={
 601         "iTex",
 602         "pTex",
 603         "tex",
 604         "mv",
 605         "iCount",
 606         "pCount",
 607         "sCount",
 608         "isI",
 609         "isP",
 610         "isB",
 611         "qComp",
 612         "avgIITex",
 613         "avgPITex",
 614         "avgPPTex",
 615         "avgBPTex",
 616         "avgTex",
 617         "blurCplx",
 618         NULL
 619     };
 620     static double (*func1[])(void *, double)={
 621 //      (void *)bits2qscale,
 622         (void *)qscale2bits,
 623         NULL
 624     };
 625     static const char *func1_names[]={
 626 //      "bits2qp",
 627         "qp2bits",
 628         NULL
 629     };
 630
 631     q = x264_eval((char*)h->param.rc.psz_rc_eq, const_values, const_names, func1, func1_names, NULL, NULL, rce);
 632
 633     // avoid NaN's in the rc_eq
 634     if(q != q || rce->i_tex_bits + rce->p_tex_bits + rce->mv_bits == 0)
 635         q = rcc->last_qscale;
 636     else {
 637         rcc->last_rceq = q;
 638         q /= rate_factor;
 639         rcc->last_qscale = q;
 640     }
 641
 642     for( i = rcc->i_zones-1; i >= 0; i-- )
 643     {
 644         x264_zone_t *z = &rcc->zones[i];
 645         if( frame_num >= z->i_start && frame_num <= z->i_end )
 646         {
 647             if( z->b_force_qp )
 648                 q = qp2qscale(z->i_qp);
 649             else
 650                 q /= z->f_bitrate_factor;
 651             break;
 652         }
 653     }
 654
 655     return q;
 656 }
 657
 658 static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
 659 {
 660     x264_ratecontrol_t *rcc = h->rc;
 661     const int pict_type = rce->pict_type;
 662
 663     // force I/B quants as a function of P quants
 664     const double last_p_q    = rcc->last_qscale_for[SLICE_TYPE_P];
 665     const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type];
 666     if( pict_type == SLICE_TYPE_I )
 667     {
 668         double iq = q;
 669         double pq = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
 670         double ip_factor = fabs( h->param.rc.f_ip_factor );
 671         /* don't apply ip_factor if the following frame is also I */
 672         if( rcc->accum_p_norm <= 0 )
 673             q = iq;
 674         else if( h->param.rc.f_ip_factor < 0 )
 675             q = iq / ip_factor;
 676         else if( rcc->accum_p_norm >= 1 )
 677             q = pq / ip_factor;
 678         else
 679             q = rcc->accum_p_norm * pq / ip_factor + (1 - rcc->accum_p_norm) * iq;
 680     }
 681     else if( pict_type == SLICE_TYPE_B )
 682     {
 683         if( h->param.rc.f_pb_factor > 0 )
 684             q = last_non_b_q;
 685         if( !rce->kept_as_ref )
 686             q *= fabs( h->param.rc.f_pb_factor );
 687     }
 688     else if( pict_type == SLICE_TYPE_P
 689              && rcc->last_non_b_pict_type == SLICE_TYPE_P
 690              && rce->i_tex_bits + rce->p_tex_bits == 0 )
 691     {
 692         q = last_p_q;
 693     }
 694
 695     /* last qscale / qdiff stuff */
 696     if(rcc->last_non_b_pict_type==pict_type
 697        && (pict_type!=SLICE_TYPE_I || rcc->last_accum_p_norm < 1))
 698     {
 699         double last_q = rcc->last_qscale_for[pict_type];
 700         double max_qscale = last_q * rcc->lstep;
 701         double min_qscale = last_q / rcc->lstep;
 702
 703         if     (q > max_qscale) q = max_qscale;
 704         else if(q < min_qscale) q = min_qscale;
 705     }
 706
 707     rcc->last_qscale_for[pict_type] = q;
 708     if(pict_type!=SLICE_TYPE_B)
 709         rcc->last_non_b_pict_type = pict_type;
 710     if(pict_type==SLICE_TYPE_I)
 711     {
 712         rcc->last_accum_p_norm = rcc->accum_p_norm;
 713         rcc->accum_p_norm = 0;
 714         rcc->accum_p_qp = 0;
 715     }
 716     if(pict_type==SLICE_TYPE_P)
 717     {
 718         float mask = 1 - pow( (float)rce->i_count / rcc->nmb, 2 );
 719         rcc->accum_p_qp   = mask * (qscale2qp(q) + rcc->accum_p_qp);
 720         rcc->accum_p_norm = mask * (1 + rcc->accum_p_norm);
 721     }
 722     return q;
 723 }
 724
 725 static double predict_size( predictor_t *p, double q, double var )
 726 {
 727      return p->coeff*var / (q*p->count);
 728 }
 729
 730 static void update_predictor( predictor_t *p, double q, double var, double bits )
 731 {
 732     p->count *= p->decay;
 733     p->coeff *= p->decay;
 734     p->count ++;
 735     p->coeff += bits*q / var;
 736 }
 737
 738 static void update_vbv( x264_t *h, int bits )
 739 {
 740     x264_ratecontrol_t *rcc = h->rc;
 741     if( !rcc->buffer_size )
 742         return;
 743
 744     rcc->buffer_fill += rcc->buffer_rate - bits;
 745     if( rcc->buffer_fill < 0 && !rcc->b_2pass )
 746         x264_log( h, X264_LOG_WARNING, "VBV underflow (%.0f bits)\n", rcc->buffer_fill );
 747     rcc->buffer_fill = x264_clip3( rcc->buffer_fill, 0, rcc->buffer_size );
 748
 749     if(rcc->last_satd > 100)
 750         update_predictor( &rcc->pred[rcc->slice_type], qp2qscale(rcc->qpa), rcc->last_satd, bits );
 751 }
 752
 753 // apply VBV constraints and clip qscale to between lmin and lmax
 754 static double clip_qscale( x264_t *h, int pict_type, double q )
 755 {
 756     x264_ratecontrol_t *rcc = h->rc;
 757     double lmin = rcc->lmin[pict_type];
 758     double lmax = rcc->lmax[pict_type];
 759     double q0 = q;
 760
 761     /* B-frames are not directly subject to VBV,
 762      * since they are controlled by the P-frames' QPs.
 763      * FIXME: in 2pass we could modify previous frames' QP too,
 764      *        instead of waiting for the buffer to fill */
 765     if( rcc->buffer_size &&
 766         ( pict_type == SLICE_TYPE_P ||
 767           ( pict_type == SLICE_TYPE_I && rcc->last_non_b_pict_type == SLICE_TYPE_I ) ) )
 768     {
 769         if( rcc->buffer_fill/rcc->buffer_size < 0.5 )
 770             q /= x264_clip3f( 2.0*rcc->buffer_fill/rcc->buffer_size, 0.5, 1.0 );
 771     }
 772     /* Now a hard threshold to make sure the frame fits in VBV.
 773      * This one is mostly for I-frames. */
 774     if( rcc->buffer_size && rcc->last_satd > 0 )
 775     {
 776         double bits = predict_size( &rcc->pred[rcc->slice_type], q, rcc->last_satd );
 777         double qf = 1.0;
 778         if( bits > rcc->buffer_fill/2 )
 779             qf = x264_clip3f( rcc->buffer_fill/(2*bits), 0.2, 1.0 );
 780         q /= qf;
 781         bits *= qf;
 782         if( bits < rcc->buffer_rate/2 )
 783             q *= bits*2/rcc->buffer_rate;
 784         q = X264_MAX( q0, q );
 785     }
 786
 787     if(lmin==lmax)
 788         return lmin;
 789     else if(rcc->b_2pass)
 790     {
 791         double min2 = log(lmin);
 792         double max2 = log(lmax);
 793         q = (log(q) - min2)/(max2-min2) - 0.5;
 794         q = 1.0/(1.0 + exp(-4*q));
 795         q = q*(max2-min2) + min2;
 796         return exp(q);
 797     }
 798     else
 799         return x264_clip3f(q, lmin, lmax);
 800 }
 801
 802 // update qscale for 1 frame based on actual bits used so far
 803 static float rate_estimate_qscale(x264_t *h, int pict_type)
 804 {
 805     float q;
 806     x264_ratecontrol_t *rcc = h->rc;
 807     ratecontrol_entry_t rce;
 808     double lmin = rcc->lmin[pict_type];
 809     double lmax = rcc->lmax[pict_type];
 810     int64_t total_bits = 8*(h->stat.i_slice_size[SLICE_TYPE_I]
 811                           + h->stat.i_slice_size[SLICE_TYPE_P]
 812                           + h->stat.i_slice_size[SLICE_TYPE_B]);
 813
 814     if( rcc->b_2pass )
 815     {
 816         rce = *rcc->rce;
 817         if(pict_type != rce.pict_type)
 818         {
 819             x264_log(h, X264_LOG_ERROR, "slice=%c but 2pass stats say %c\n",
 820                      slice_type_to_char[pict_type], slice_type_to_char[rce.pict_type]);
 821         }
 822     }
 823
 824     if( pict_type == SLICE_TYPE_B )
 825     {
 826         rcc->last_satd = 0;
 827         if(h->fenc->b_kept_as_ref)
 828             q = rcc->last_qscale * sqrtf(h->param.rc.f_pb_factor);
 829         else
 830             q = rcc->last_qscale * h->param.rc.f_pb_factor;
 831         return x264_clip3f(q, lmin, lmax);
 832     }
 833     else
 834     {
 835         double abr_buffer = 2 * rcc->rate_tolerance * rcc->bitrate;
 836         if( rcc->b_2pass )
 837         {
 838             //FIXME adjust abr_buffer based on distance to the end of the video
 839             int64_t diff = total_bits - (int64_t)rce.expected_bits;
 840             q = rce.new_qscale;
 841             q /= x264_clip3f((double)(abr_buffer - diff) / abr_buffer, .5, 2);
 842             if( h->fenc->i_frame > 30 )
 843             {
 844                 /* Adjust quant based on the difference between
 845                  * achieved and expected bitrate so far */
 846                 double time = (double)h->fenc->i_frame / rcc->num_entries;
 847                 double w = x264_clip3f( time*100, 0.0, 1.0 );
 848                 q *= pow( (double)total_bits / rcc->expected_bits_sum, w );
 849             }
 850             q = x264_clip3f( q, lmin, lmax );
 851         }
 852         else /* 1pass ABR */
 853         {
 854             /* Calculate the quantizer which would have produced the desired
 855              * average bitrate if it had been applied to all frames so far.
 856              * Then modulate that quant based on the current frame's complexity
 857              * relative to the average complexity so far (using the 2pass RCEQ).
 858              * Then bias the quant up or down if total size so far was far from
 859              * the target.
 860              * Result: Depending on the value of rate_tolerance, there is a
 861              * tradeoff between quality and bitrate precision. But at large
 862              * tolerances, the bit distribution approaches that of 2pass. */
 863
 864             double wanted_bits, overflow, lmin, lmax;
 865
 866             rcc->last_satd = x264_rc_analyse_slice( h );
 867             rcc->short_term_cplxsum *= 0.5;
 868             rcc->short_term_cplxcount *= 0.5;
 869             rcc->short_term_cplxsum += rcc->last_satd;
 870             rcc->short_term_cplxcount ++;
 871
 872             rce.p_tex_bits = rcc->last_satd;
 873             rce.blurred_complexity = rcc->short_term_cplxsum / rcc->short_term_cplxcount;
 874             rce.i_tex_bits = 0;
 875             rce.mv_bits = 0;
 876             rce.p_count = rcc->nmb;
 877             rce.i_count = 0;
 878             rce.s_count = 0;
 879             rce.qscale = 1;
 880             rce.pict_type = pict_type;
 881             q = get_qscale(h, &rce, rcc->wanted_bits_window / rcc->cplxr_sum, h->fenc->i_frame);
 882
 883             wanted_bits = h->fenc->i_frame * rcc->bitrate / rcc->fps;
 884             abr_buffer *= X264_MAX( 1, sqrt(h->fenc->i_frame/25) );
 885             overflow = x264_clip3f( 1.0 + (total_bits - wanted_bits) / abr_buffer, .5, 2 );
 886             q *= overflow;
 887
 888             if( pict_type == SLICE_TYPE_I
 889                 /* should test _next_ pict type, but that isn't decided yet */
 890                 && rcc->last_non_b_pict_type != SLICE_TYPE_I )
 891             {
 892                 q = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
 893                 q /= fabs( h->param.rc.f_ip_factor );
 894                 q = clip_qscale( h, pict_type, q );
 895             }
 896             else
 897             {
 898                 if( h->stat.i_slice_count[SLICE_TYPE_P] < 5 )
 899                 {
 900                     float w = h->stat.i_slice_count[SLICE_TYPE_P] / 5.;
 901                     float q2 = qp2qscale(ABR_INIT_QP);
 902                     q = q*w + q2*(1-w);
 903                 }
 904
 905                 /* Asymmetric clipping, because symmetric would prevent
 906                  * overflow control in areas of rapidly oscillating complexity */
 907                 lmin = rcc->last_qscale_for[pict_type] / rcc->lstep;
 908                 lmax = rcc->last_qscale_for[pict_type] * rcc->lstep;
 909                 if( overflow > 1.1 )
 910                     lmax *= rcc->lstep;
 911                 else if( overflow < 0.9 )
 912                     lmin /= rcc->lstep;
 913
 914                 q = x264_clip3f(q, lmin, lmax);
 915                 q = clip_qscale(h, pict_type, q);
 916                 //FIXME use get_diff_limited_q() ?
 917             }
 918         }
 919
 920         rcc->last_qscale_for[pict_type] =
 921         rcc->last_qscale = q;
 922
 923         return q;
 924     }
 925 }
 926
 927 static int init_pass2( x264_t *h )
 928 {
 929     x264_ratecontrol_t *rcc = h->rc;
 930     uint64_t all_const_bits = 0;
 931     uint64_t all_available_bits = (uint64_t)(h->param.rc.i_bitrate * 1000 * (double)rcc->num_entries / rcc->fps);
 932     double rate_factor, step, step_mult;
 933     double qblur = h->param.rc.f_qblur;
 934     double cplxblur = h->param.rc.f_complexity_blur;
 935     const int filter_size = (int)(qblur*4) | 1;
 936     double expected_bits;
 937     double *qscale, *blurred_qscale;
 938     int i;
 939
 940     /* find total/average complexity & const_bits */
 941     for(i=0; i<rcc->num_entries; i++){
 942         ratecontrol_entry_t *rce = &rcc->entry[i];
 943         all_const_bits += rce->misc_bits;
 944         rcc->i_cplx_sum[rce->pict_type] += rce->i_tex_bits * rce->qscale;
 945         rcc->p_cplx_sum[rce->pict_type] += rce->p_tex_bits * rce->qscale;
 946         rcc->mv_bits_sum[rce->pict_type] += rce->mv_bits * rce->qscale;
 947         rcc->frame_count[rce->pict_type] ++;
 948     }
 949
 950     if( all_available_bits < all_const_bits)
 951     {
 952         x264_log(h, X264_LOG_ERROR, "requested bitrate is too low. estimated minimum is %d kbps\n",
 953                  (int)(all_const_bits * rcc->fps / (rcc->num_entries * 1000)));
 954         return -1;
 955     }
 956
 957     /* Blur complexities, to reduce local fluctuation of QP.
 958      * We don't blur the QPs directly, because then one very simple frame
 959      * could drag down the QP of a nearby complex frame and give it more
 960      * bits than intended. */
 961     for(i=0; i<rcc->num_entries; i++){
 962         ratecontrol_entry_t *rce = &rcc->entry[i];
 963         double weight_sum = 0;
 964         double cplx_sum = 0;
 965         double weight = 1.0;
 966         int j;
 967         /* weighted average of cplx of future frames */
 968         for(j=1; j<cplxblur*2 && j<rcc->num_entries-i; j++){
 969             ratecontrol_entry_t *rcj = &rcc->entry[i+j];
 970             weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
 971             if(weight < .0001)
 972                 break;
 973             weight_sum += weight;
 974             cplx_sum += weight * qscale2bits(rcj, 1);
 975         }
 976         /* weighted average of cplx of past frames */
 977         weight = 1.0;
 978         for(j=0; j<=cplxblur*2 && j<=i; j++){
 979             ratecontrol_entry_t *rcj = &rcc->entry[i-j];
 980             weight_sum += weight;
 981             cplx_sum += weight * qscale2bits(rcj, 1);
 982             weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
 983             if(weight < .0001)
 984                 break;
 985         }
 986         rce->blurred_complexity = cplx_sum / weight_sum;
 987     }
 988
 989     qscale = x264_malloc(sizeof(double)*rcc->num_entries);
 990     if(filter_size > 1)
 991         blurred_qscale = x264_malloc(sizeof(double)*rcc->num_entries);
 992     else
 993         blurred_qscale = qscale;
 994
 995     /* Search for a factor which, when multiplied by the RCEQ values from
 996      * each frame, adds up to the desired total size.
 997      * There is no exact closed-form solution because of VBV constraints and
 998      * because qscale2bits is not invertible, but we can start with the simple
 999      * approximation of scaling the 1st pass by the ratio of bitrates.
1000      * The search range is probably overkill, but speed doesn't matter here. */
1001
1002     expected_bits = 1;
1003     for(i=0; i<rcc->num_entries; i++)
1004         expected_bits += qscale2bits(&rcc->entry[i], get_qscale(h, &rcc->entry[i], 1.0, i));
1005     step_mult = all_available_bits / expected_bits;
1006
1007     rate_factor = 0;
1008     for(step = 1E4 * step_mult; step > 1E-7 * step_mult; step *= 0.5){
1009         expected_bits = 0;
1010         rate_factor += step;
1011
1012         rcc->last_non_b_pict_type = -1;
1013         rcc->last_accum_p_norm = 1;
1014         rcc->accum_p_norm = 0;
1015         rcc->buffer_fill = rcc->buffer_size * h->param.rc.f_vbv_buffer_init;
1016
1017         /* find qscale */
1018         for(i=0; i<rcc->num_entries; i++){
1019             qscale[i] = get_qscale(h, &rcc->entry[i], rate_factor, i);
1020         }
1021
1022         /* fixed I/B qscale relative to P */
1023         for(i=rcc->num_entries-1; i>=0; i--){
1024             qscale[i] = get_diff_limited_q(h, &rcc->entry[i], qscale[i]);
1025             assert(qscale[i] >= 0);
1026         }
1027
1028         /* smooth curve */
1029         if(filter_size > 1){
1030             assert(filter_size%2==1);
1031             for(i=0; i<rcc->num_entries; i++){
1032                 ratecontrol_entry_t *rce = &rcc->entry[i];
1033                 int j;
1034                 double q=0.0, sum=0.0;
1035
1036                 for(j=0; j<filter_size; j++){
1037                     int index = i+j-filter_size/2;
1038                     double d = index-i;
1039                     double coeff = qblur==0 ? 1.0 : exp(-d*d/(qblur*qblur));
1040                     if(index < 0 || index >= rcc->num_entries) continue;
1041                     if(rce->pict_type != rcc->entry[index].pict_type) continue;
1042                     q += qscale[index] * coeff;
1043                     sum += coeff;
1044                 }
1045                 blurred_qscale[i] = q/sum;
1046             }
1047         }
1048
1049         /* find expected bits */
1050         for(i=0; i<rcc->num_entries; i++){
1051             ratecontrol_entry_t *rce = &rcc->entry[i];
1052             double bits;
1053             rce->new_qscale = clip_qscale(h, rce->pict_type, blurred_qscale[i]);
1054             assert(rce->new_qscale >= 0);
1055             bits = qscale2bits(rce, rce->new_qscale) + rce->misc_bits;
1056
1057             rce->expected_bits = expected_bits;
1058             expected_bits += bits;
1059             update_vbv(h, bits);
1060         }
1061
1062 //printf("expected:%llu available:%llu factor:%lf avgQ:%lf\n", (uint64_t)expected_bits, all_available_bits, rate_factor);
1063         if(expected_bits > all_available_bits) rate_factor -= step;
1064     }
1065
1066     x264_free(qscale);
1067     if(filter_size > 1)
1068         x264_free(blurred_qscale);
1069
1070     if(fabs(expected_bits/all_available_bits - 1.0) > 0.01)
1071     {
1072         double avgq = 0;
1073         for(i=0; i<rcc->num_entries; i++)
1074             avgq += rcc->entry[i].new_qscale;
1075         avgq = qscale2qp(avgq / rcc->num_entries);
1076
1077         x264_log(h, X264_LOG_ERROR, "Error: 2pass curve failed to converge\n");
1078         x264_log(h, X264_LOG_ERROR, "target: %.2f kbit/s, expected: %.2f kbit/s, avg QP: %.4f\n",
1079                  (float)h->param.rc.i_bitrate,
1080                  expected_bits * rcc->fps / (rcc->num_entries * 1000.),
1081                  avgq);
1082         if(expected_bits < all_available_bits && avgq < h->param.rc.i_qp_min + 2)
1083         {
1084             if(h->param.rc.i_qp_min > 0)
1085                 x264_log(h, X264_LOG_ERROR, "try reducing target bitrate or reducing qp_min (currently %d)\n", h->param.rc.i_qp_min);
1086             else
1087                 x264_log(h, X264_LOG_ERROR, "try reducing target bitrate\n");
1088         }
1089         else if(expected_bits > all_available_bits && avgq > h->param.rc.i_qp_max - 2)
1090         {
1091             if(h->param.rc.i_qp_max < 51)
1092                 x264_log(h, X264_LOG_ERROR, "try increasing target bitrate or increasing qp_max (currently %d)\n", h->param.rc.i_qp_max);
1093             else
1094                 x264_log(h, X264_LOG_ERROR, "try increasing target bitrate\n");
1095         }
1096         else
1097             x264_log(h, X264_LOG_ERROR, "internal error\n");
1098     }
1099
1100     return 0;
1101 }
1102
1103