git.sesse.net Git - x264/blob - encoder/ratecontrol.c

   1 /***************************************************-*- coding: iso-8859-1 -*-
   2  * ratecontrol.c: h264 encoder library (Rate Control)
   3  *****************************************************************************
   4  * Copyright (C) 2003 Laurent Aimar
   5  * $Id: ratecontrol.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
   6  *
   7  * Authors: Måns Rullgård <mru@mru.ath.cx>
   8  * 2 pass code: Michael Niedermayer <michaelni@gmx.at>
   9  *              Loren Merritt <lorenm@u.washington.edu>
  10  *
  11  * This program is free software; you can redistribute it and/or modify
  12  * it under the terms of the GNU General Public License as published by
  13  * the Free Software Foundation; either version 2 of the License, or
  14  * (at your option) any later version.
  15  *
  16  * This program is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19  * GNU General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU General Public License
  22  * along with this program; if not, write to the Free Software
  23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  24  *****************************************************************************/
  25
  26 #define _ISOC99_SOURCE
  27 #undef NDEBUG // always check asserts, the speed effect is far too small to disable them
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <string.h>
  31 #include <math.h>
  32 #include <limits.h>
  33 #include <assert.h>
  34
  35 #include "common/common.h"
  36 #include "common/cpu.h"
  37 #include "common/macroblock.h"
  38 #include "ratecontrol.h"
  39
  40 #ifdef SYS_MACOSX
  41 #define exp2f(x) ( (float) exp2( (x) ) )
  42 #endif
  43 #ifdef SYS_FREEBSD
  44 #define exp2f(x) powf( 2, (x) )
  45 #endif
  46 #ifdef _MSC_VER
  47 #define exp2f(x) pow( 2, (x) )
  48 #endif
  49
  50 typedef struct
  51 {
  52     int pict_type;
  53     int idr;
  54     float qscale;
  55     int mv_bits;
  56     int i_tex_bits;
  57     int p_tex_bits;
  58     int misc_bits;
  59     uint64_t expected_bits;
  60     float new_qscale;
  61     int new_qp;
  62     int i_count;
  63     int p_count;
  64     int s_count;
  65     float blurred_complexity;
  66 } ratecontrol_entry_t;
  67
  68 struct x264_ratecontrol_t
  69 {
  70     /* constants */
  71     double fps;
  72     int gop_size;
  73     int bitrate;
  74     int nmb;                    /* number of macroblocks in a frame */
  75     int buffer_size;
  76     int rcbufrate;
  77     int init_qp;
  78     int qp_constant[5];
  79
  80     /* 1st pass stuff */
  81     int gop_qp;
  82     int buffer_fullness;
  83     int frames;                 /* frames in current gop */
  84     int pframes;
  85     int slice_type;
  86     int mb;                     /* MBs processed in current frame */
  87     int bits_gop;               /* allocated bits current gop */
  88     int bits_last_gop;          /* bits consumed in gop */
  89     int qp;                     /* qp for current frame */
  90     int qpm;                    /* qp for next MB */
  91     float qpa;                  /* average qp for last frame */
  92     int qps;
  93     float qp_avg_p;             /* average QP for P frames */
  94     float qp_last_p;
  95     int fbits;                  /* bits allocated for current frame */
  96     int ufbits;                 /* bits used for current frame */
  97     int nzcoeffs;               /* # of 0-quantized coefficients */
  98     int ncoeffs;                /* total # of coefficients */
  99     int overhead;
 100
 101     /* 2pass stuff */
 102     FILE *p_stat_file_out;
 103     char *psz_stat_file_tmpname;
 104
 105     int num_entries;            /* number of ratecontrol_entry_ts */
 106     ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
 107     double last_qscale;
 108     double last_qscale_for[5];  /* last qscale for a specific pict type, used for max_diff & ipb factor stuff  */
 109     int last_non_b_pict_type;
 110     double accum_p_qp;          /* for determining I-frame quant */
 111     double accum_p_norm;
 112     double last_accum_p_norm;
 113     double lmin[5];             /* min qscale by frame type */
 114     double lmax[5];
 115     double lstep;               /* max change (multiply) in qscale per frame */
 116     double i_cplx_sum[5];       /* estimated total texture bits in intra MBs at qscale=1 */
 117     double p_cplx_sum[5];
 118     double mv_bits_sum[5];
 119     int frame_count[5];         /* number of frames of each type */
 120 };
 121
 122
 123 static int init_pass2(x264_t *);
 124 static float rate_estimate_qscale( x264_t *h, int pict_type );
 125
 126 /* Terminology:
 127  * qp = h.264's quantizer
 128  * qscale = linearized quantizer = Lagrange multiplier
 129  */
 130 static inline double qp2qscale(double qp)
 131 {
 132     return 0.85 * pow(2.0, ( qp - 12.0 ) / 6.0);
 133 }
 134 static inline double qscale2qp(double qscale)
 135 {
 136     return 12.0 + 6.0 * log(qscale/0.85) / log(2.0);
 137 }
 138
 139 /* FIXME: The multiplier actually seems to be closer to
 140  * bits = tex * pow(qscale, 1.25) + mv * pow(qscale, 0.5)
 141  * MV bits levels off at about qp<=10, but that's only due to inaccuracy in
 142  * the qscale used for motion estimation. */
 143 static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale)
 144 {
 145     if(qscale<0.1)
 146         qscale = 0.1;
 147     return (rce->i_tex_bits + rce->p_tex_bits + .1) * rce->qscale / qscale
 148            + rce->mv_bits * pow( rce->qscale / qscale, 0.5 );
 149 }
 150
 151 // there is no analytical inverse to the above
 152 #if 0
 153 static inline double bits2qscale(ratecontrol_entry_t *rce, double bits)
 154 {
 155     if(bits<1.0)
 156         bits = 1.0;
 157     return (rce->i_tex_bits + rce->p_tex_bits + rce->mv_bits + .1) * rce->qscale / bits;
 158 }
 159 #endif
 160
 161
 162 int x264_ratecontrol_new( x264_t *h )
 163 {
 164     x264_ratecontrol_t *rc;
 165     float bpp;
 166     int i;
 167
 168     /* Needed(?) for 2 pass */
 169     x264_cpu_restore( h->param.cpu );
 170
 171     h->rc = rc = x264_malloc( sizeof( x264_ratecontrol_t ) );
 172     memset(rc, 0, sizeof(*rc));
 173
 174     /* FIXME: use integers */
 175     if(h->param.i_fps_num > 0 && h->param.i_fps_den > 0)
 176         rc->fps = (float) h->param.i_fps_num / h->param.i_fps_den;
 177     else
 178         rc->fps = 25.0;
 179
 180     rc->gop_size = h->param.i_keyint_max;
 181     rc->bitrate = h->param.rc.i_bitrate * 1000;
 182     rc->nmb = h->mb.i_mb_count;
 183
 184     rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
 185     rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
 186     rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
 187
 188     /* Init 1pass CBR algo */
 189     if( h->param.rc.b_cbr ){
 190         rc->buffer_size = h->param.rc.i_rc_buffer_size * 1000;
 191         rc->buffer_fullness = h->param.rc.i_rc_init_buffer;
 192         rc->rcbufrate = rc->bitrate / rc->fps;
 193
 194         if(rc->buffer_size < rc->rcbufrate){
 195             x264_log(h, X264_LOG_WARNING, "rc buffer size %i too small\n",
 196                      rc->buffer_size);
 197             rc->buffer_size = 0;
 198         }
 199
 200         if(rc->buffer_size <= 0)
 201             rc->buffer_size = rc->bitrate / 2;
 202
 203         if(rc->buffer_fullness > rc->buffer_size || rc->buffer_fullness < 0){
 204             x264_log(h, X264_LOG_WARNING, "invalid initial buffer fullness %i\n",
 205                      rc->buffer_fullness);
 206             rc->buffer_fullness = 0;
 207         }
 208
 209         bpp = rc->bitrate / (rc->fps * h->param.i_width * h->param.i_height);
 210         if(bpp <= 0.6)
 211             rc->init_qp = 31;
 212         else if(bpp <= 1.4)
 213             rc->init_qp = 25;
 214         else if(bpp <= 2.4)
 215             rc->init_qp = 20;
 216         else
 217             rc->init_qp = 10;
 218         rc->gop_qp = rc->init_qp;
 219
 220         rc->bits_last_gop = 0;
 221
 222         x264_log(h, X264_LOG_DEBUG, "%f fps, %i bps, bufsize %i\n",
 223                  rc->fps, rc->bitrate, rc->buffer_size);
 224     }
 225
 226
 227     rc->lstep = exp2f(h->param.rc.i_qp_step / 6.0);
 228     rc->last_qscale = qp2qscale(26);
 229     for( i = 0; i < 5; i++ )
 230     {
 231         rc->last_qscale_for[i] = qp2qscale(26);
 232         rc->lmin[i] = qp2qscale( h->param.rc.i_qp_min );
 233         rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max );
 234     }
 235 #if 0 // FIXME: do we want to assign lmin/lmax based on ip_factor, or leave them all the same?
 236     rc->lmin[SLICE_TYPE_I] /= fabs(h->param.f_ip_factor);
 237     rc->lmax[SLICE_TYPE_I] /= fabs(h->param.f_ip_factor);
 238     rc->lmin[SLICE_TYPE_B] *= fabs(h->param.f_pb_factor);
 239     rc->lmax[SLICE_TYPE_B] *= fabs(h->param.f_pb_factor);
 240 #endif
 241
 242     /* Load stat file and init 2pass algo */
 243     if( h->param.rc.b_stat_read )
 244     {
 245         int stats_size;
 246         char *p, *stats_in;
 247         FILE *stats_file;
 248
 249         /* read 1st pass stats */
 250         assert( h->param.rc.psz_stat_in );
 251         stats_file = fopen( h->param.rc.psz_stat_in, "rb");
 252         if(!stats_file)
 253         {
 254             x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
 255             return -1;
 256         }
 257         // FIXME: error checking
 258         fseek(stats_file, 0, SEEK_END);
 259         stats_size = ftell(stats_file);
 260         fseek(stats_file, 0, SEEK_SET);
 261         stats_in = x264_malloc(stats_size+10);
 262         fread(stats_in, 1, stats_size, stats_file);
 263         fclose(stats_file);
 264
 265         /* find number of pics */
 266         p = stats_in;
 267         for(i=-1; p; i++){
 268             p = strchr(p+1, ';');
 269         }
 270         i += h->param.i_bframe;
 271         rc->entry = (ratecontrol_entry_t*) x264_malloc(i*sizeof(ratecontrol_entry_t));
 272         memset(rc->entry, 0, i*sizeof(ratecontrol_entry_t));
 273         rc->num_entries= i;
 274
 275         /* init all to skipped p frames */
 276         for(i=0; i<rc->num_entries; i++){
 277             ratecontrol_entry_t *rce = &rc->entry[i];
 278             rce->pict_type = SLICE_TYPE_P;
 279             rce->qscale = rce->new_qscale = qp2qscale(20);
 280             rce->misc_bits = rc->nmb + 10;
 281             rce->new_qp = 0;
 282         }
 283
 284         /* read stats */
 285         p = stats_in;
 286         for(i=0; i < rc->num_entries - h->param.i_bframe; i++){
 287             ratecontrol_entry_t *rce;
 288             int frame_number;
 289             char pict_type;
 290             int e;
 291             char *next;
 292             float qp;
 293
 294             next= strchr(p, ';');
 295             if(next){
 296                 (*next)=0; //sscanf is unbelievably slow on looong strings
 297                 next++;
 298             }
 299             e = sscanf(p, " in:%d ", &frame_number);
 300
 301             assert(frame_number >= 0);
 302             assert(frame_number < rc->num_entries);
 303             rce = &rc->entry[frame_number];
 304
 305             e += sscanf(p, " in:%*d out:%*d type:%c q:%f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d",
 306                    &pict_type, &qp, &rce->i_tex_bits, &rce->p_tex_bits,
 307                    &rce->mv_bits, &rce->misc_bits, &rce->i_count, &rce->p_count, &rce->s_count);
 308
 309             switch(pict_type){
 310                 case 'I': rce->idr = 1;
 311                 case 'i': rce->pict_type = SLICE_TYPE_I; break;
 312                 case 'P': rce->pict_type = SLICE_TYPE_P; break;
 313                 case 'B': rce->pict_type = SLICE_TYPE_B; break;
 314                 default:  e = -1; break;
 315             }
 316             if(e != 10){
 317                 x264_log(h, X264_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e);
 318                 return -1;
 319             }
 320             rce->qscale = qp2qscale(qp);
 321             p = next;
 322         }
 323
 324         x264_free(stats_in);
 325
 326         /* If using 2pass with constant quant, no need to run the bitrate allocation */
 327         if(h->param.rc.b_cbr)
 328         {
 329             if(init_pass2(h) < 0) return -1;
 330         }
 331     }
 332
 333     /* Open output file */
 334     /* If input and output files are the same, output to a temp file
 335      * and move it to the real name only when it's complete */
 336     if( h->param.rc.b_stat_write )
 337     {
 338         if( h->param.rc.b_stat_read && !strcmp( h->param.rc.psz_stat_in, h->param.rc.psz_stat_out ) )
 339         {
 340             rc->psz_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 5 );
 341             strcpy( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
 342             strcat( rc->psz_stat_file_tmpname, ".new" );
 343         }
 344         else
 345             rc->psz_stat_file_tmpname = h->param.rc.psz_stat_out;
 346
 347         rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
 348         if( rc->p_stat_file_out == NULL )
 349         {
 350             x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
 351             return -1;
 352         }
 353     }
 354
 355     return 0;
 356 }
 357
 358 void x264_ratecontrol_delete( x264_t *h )
 359 {
 360     x264_ratecontrol_t *rc = h->rc;
 361
 362     if( rc->p_stat_file_out )
 363     {
 364         fclose( rc->p_stat_file_out );
 365         if( rc->psz_stat_file_tmpname != h->param.rc.psz_stat_out )
 366         {
 367             if( h->i_frame >= rc->num_entries - h->param.i_bframe )
 368                 rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
 369             x264_free( rc->psz_stat_file_tmpname );
 370         }
 371     }
 372     if( rc->entry )
 373         x264_free(rc->entry);
 374     x264_free( rc );
 375 }
 376
 377 void x264_ratecontrol_start( x264_t *h, int i_slice_type )
 378 {
 379     x264_ratecontrol_t *rc = h->rc;
 380     int gframes, iframes, pframes, bframes;
 381     int minbits, maxbits;
 382     int gbits, fbits;
 383     int zn = 0;
 384     float kp;
 385     int gbuf;
 386
 387     rc->slice_type = i_slice_type;
 388
 389     x264_cpu_restore( h->param.cpu );
 390
 391     if( !h->param.rc.b_cbr )
 392     {
 393         rc->qpm = rc->qpa = rc->qp =
 394             rc->qp_constant[ i_slice_type ];
 395         return;
 396     }
 397     else if( h->param.rc.b_stat_read )
 398     {
 399         int frame = h->fenc->i_frame;
 400         ratecontrol_entry_t *rce;
 401         assert( frame >= 0 && frame < rc->num_entries );
 402         rce = &h->rc->entry[frame];
 403
 404         rce->new_qscale = rate_estimate_qscale( h, i_slice_type );
 405         rc->qpm = rc->qpa = rc->qp = rce->new_qp =
 406             (int)(qscale2qp(rce->new_qscale) + 0.5);
 407         return;
 408     }
 409
 410     switch(i_slice_type){
 411     case SLICE_TYPE_I:
 412         gbuf = rc->buffer_fullness + (rc->gop_size-1) * rc->rcbufrate;
 413         rc->bits_gop = gbuf - rc->buffer_size / 2;
 414
 415         if(!rc->mb && rc->pframes){
 416             int qp = rc->qp_avg_p / rc->pframes + 0.5;
 417 #if 0 /* JM does this without explaining why */
 418             int gdq = (float) rc->gop_size / 15 + 0.5;
 419             if(gdq > 2)
 420                 gdq = 2;
 421             qp -= gdq;
 422             if(qp > rc->qp_last_p - 2)
 423                 qp--;
 424 #endif
 425             qp = x264_clip3(qp, rc->gop_qp - 4, rc->gop_qp + 4);
 426             qp = x264_clip3(qp, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
 427             rc->gop_qp = qp;
 428         } else if(rc->frames > 4){
 429             rc->gop_qp = rc->init_qp;
 430         }
 431
 432         kp = h->param.rc.f_ip_factor * h->param.rc.f_pb_factor;
 433
 434         x264_log(h, X264_LOG_DEBUG,"gbuf=%i bits_gop=%i frames=%i gop_qp=%i\n",
 435                  gbuf, rc->bits_gop, rc->frames, rc->gop_qp);
 436
 437         rc->bits_last_gop = 0;
 438         rc->frames = 0;
 439         rc->pframes = 0;
 440         rc->qp_avg_p = 0;
 441         break;
 442
 443     case SLICE_TYPE_P:
 444         kp = h->param.rc.f_pb_factor;
 445         break;
 446
 447     case SLICE_TYPE_B:
 448         kp = 1.0;
 449         break;
 450
 451     default:
 452         x264_log(h, X264_LOG_WARNING, "ratecontrol: unknown slice type %i\n",
 453                  i_slice_type);
 454         kp = 1.0;
 455         break;
 456     }
 457
 458     gframes = rc->gop_size - rc->frames;
 459     iframes = gframes / rc->gop_size;
 460     pframes = gframes / (h->param.i_bframe + 1) - iframes;
 461     bframes = gframes - pframes - iframes;
 462
 463     gbits = rc->bits_gop - rc->bits_last_gop;
 464     fbits = kp * gbits /
 465         (h->param.rc.f_ip_factor * h->param.rc.f_pb_factor * iframes +
 466          h->param.rc.f_pb_factor * pframes + bframes);
 467
 468     minbits = rc->buffer_fullness + rc->rcbufrate - rc->buffer_size;
 469     if(minbits < 0)
 470         minbits = 0;
 471     maxbits = rc->buffer_fullness;
 472     rc->fbits = x264_clip3(fbits, minbits, maxbits);
 473
 474     if(i_slice_type == SLICE_TYPE_I){
 475         rc->qp = rc->gop_qp;
 476     } else if(rc->ncoeffs && rc->ufbits){
 477         int dqp, nonzc;
 478
 479         nonzc = (rc->ncoeffs - rc->nzcoeffs);
 480         if(nonzc == 0)
 481             zn = rc->ncoeffs;
 482         else if(rc->fbits < INT_MAX / nonzc)
 483             zn = rc->ncoeffs - rc->fbits * nonzc / rc->ufbits;
 484         else
 485             zn = 0;
 486         zn = x264_clip3(zn, 0, rc->ncoeffs);
 487         dqp = h->param.rc.i_rc_sens * exp2f(rc->qpa / 6) *
 488             (zn - rc->nzcoeffs) / rc->nzcoeffs;
 489         dqp = x264_clip3(dqp, -h->param.rc.i_qp_step, h->param.rc.i_qp_step);
 490         rc->qp = (int)(rc->qpa + dqp + .5);
 491     }
 492
 493     if(rc->fbits > 0.9 * maxbits)
 494         rc->qp += 2;
 495     else if(rc->fbits > 0.8 * maxbits)
 496         rc->qp += 1;
 497     else if(rc->fbits < 1.1 * minbits)
 498         rc->qp -= 2;
 499     else if(rc->fbits < 1.2 * minbits)
 500         rc->qp -= 1;
 501
 502     rc->qp = x264_clip3(rc->qp, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
 503     rc->qpm = rc->qp;
 504
 505     x264_log(h, X264_LOG_DEBUG, "fbits=%i, qp=%i, z=%i, min=%i, max=%i\n",
 506              rc->fbits, rc->qpm, zn, minbits, maxbits);
 507
 508     rc->fbits -= rc->overhead;
 509     rc->ufbits = 0;
 510     rc->ncoeffs = 0;
 511     rc->nzcoeffs = 0;
 512     rc->mb = 0;
 513     rc->qps = 0;
 514 }
 515
 516 void x264_ratecontrol_mb( x264_t *h, int bits )
 517 {
 518     x264_ratecontrol_t *rc = h->rc;
 519     int rbits;
 520     int zn, enz, nonz;
 521     int rcoeffs;
 522     int dqp;
 523     int i;
 524
 525     if( !h->param.rc.b_cbr || h->param.rc.b_stat_read )
 526         return;
 527
 528     x264_cpu_restore( h->param.cpu );
 529
 530     rc->qps += rc->qpm;
 531     rc->ufbits += bits;
 532     rc->mb++;
 533
 534     for(i = 0; i < 16 + 8; i++)
 535         rc->nzcoeffs += 16 - h->mb.cache.non_zero_count[x264_scan8[i]];
 536     rc->ncoeffs += 16 * (16 + 8);
 537
 538     if(rc->mb < rc->nmb / 16)
 539         return;
 540     else if(rc->mb == rc->nmb)
 541         return;
 542
 543     rcoeffs = (rc->nmb - rc->mb) * 16 * 24;
 544     rbits = rc->fbits - rc->ufbits;
 545 /*     if(rbits < 0) */
 546 /*      rbits = 0; */
 547
 548 /*     zn = (rc->nmb - rc->mb) * 16 * 24; */
 549     nonz = (rc->ncoeffs - rc->nzcoeffs);
 550     if(nonz == 0)
 551         zn = rcoeffs;
 552     else if(rc->ufbits && rbits < INT_MAX / nonz)
 553         zn = rcoeffs - rbits * nonz / rc->ufbits;
 554     else
 555         zn = 0;
 556     zn = x264_clip3(zn, 0, rcoeffs);
 557     enz = rc->nzcoeffs * (rc->nmb - rc->mb) / rc->mb;
 558     dqp = (float) 2*h->param.rc.i_rc_sens * exp2f((float) rc->qps / rc->mb / 6) *
 559         (zn - enz) / enz;
 560     rc->qpm = x264_clip3(rc->qpm + dqp, rc->qp - 3, rc->qp + 3);
 561     if(rbits <= 0)
 562         rc->qpm++;
 563     rc->qpm = x264_clip3(rc->qpm, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
 564 }
 565
 566 int  x264_ratecontrol_qp( x264_t *h )
 567 {
 568     return h->rc->qpm;
 569 }
 570
 571 int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
 572 {
 573     if( h->param.rc.b_stat_read )
 574     {
 575         if( frame_num >= h->rc->num_entries )
 576         {
 577             x264_log(h, X264_LOG_ERROR, "More input frames than in the 1st pass\n");
 578             return X264_TYPE_P;
 579         }
 580         switch( h->rc->entry[frame_num].pict_type )
 581         {
 582             case SLICE_TYPE_I:
 583                 return h->rc->entry[frame_num].idr ? X264_TYPE_IDR : X264_TYPE_I;
 584
 585             case SLICE_TYPE_B:
 586                 return X264_TYPE_B;
 587
 588             case SLICE_TYPE_P:
 589             default:
 590                 return X264_TYPE_P;
 591         }
 592     }
 593     else
 594     {
 595         return X264_TYPE_AUTO;
 596     }
 597 }
 598
 599 void x264_ratecontrol_end( x264_t *h, int bits )
 600 {
 601     x264_ratecontrol_t *rc = h->rc;
 602     int i;
 603
 604     x264_cpu_restore( h->param.cpu );
 605
 606     h->stat.frame.i_mb_count_skip = h->stat.frame.i_mb_count[P_SKIP] + h->stat.frame.i_mb_count[B_SKIP];
 607     h->stat.frame.i_mb_count_p = h->stat.frame.i_mb_count[P_L0] + h->stat.frame.i_mb_count[P_8x8];
 608     for( i = B_DIRECT; i < B_8x8; i++ )
 609         h->stat.frame.i_mb_count_p += h->stat.frame.i_mb_count[i];
 610
 611     if( h->param.rc.b_stat_write )
 612     {
 613         char c_type = rc->slice_type==SLICE_TYPE_I ? (h->fenc->i_poc==0 ? 'I' : 'i')
 614                     : rc->slice_type==SLICE_TYPE_P ? 'P' : 'B';
 615         fprintf( rc->p_stat_file_out,
 616                  "in:%d out:%d type:%c q:%.3f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d;\n",
 617                  h->fenc->i_frame, h->i_frame-1,
 618                  c_type, rc->qpa,
 619                  h->stat.frame.i_itex_bits, h->stat.frame.i_ptex_bits,
 620                  h->stat.frame.i_hdr_bits, h->stat.frame.i_misc_bits,
 621                  h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_16x16],
 622                  h->stat.frame.i_mb_count_p,
 623                  h->stat.frame.i_mb_count_skip);
 624     }
 625
 626     if( !h->param.rc.b_cbr || h->param.rc.b_stat_read )
 627         return;
 628
 629     rc->buffer_fullness += rc->rcbufrate - bits;
 630     if(rc->buffer_fullness < 0){
 631         x264_log(h, X264_LOG_WARNING, "buffer underflow %i\n",
 632                  rc->buffer_fullness);
 633         rc->buffer_fullness = 0;
 634     }
 635
 636     rc->qpa = (float)rc->qps / rc->mb;
 637     if(rc->slice_type == SLICE_TYPE_P){
 638         rc->qp_avg_p += rc->qpa;
 639         rc->qp_last_p = rc->qpa;
 640         rc->pframes++;
 641     } else if(rc->slice_type == SLICE_TYPE_I){
 642         float err = (float) rc->ufbits / rc->fbits;
 643         if(err > 1.1)
 644             rc->gop_qp++;
 645         else if(err < 0.9)
 646             rc->gop_qp--;
 647     }
 648
 649     rc->overhead = bits - rc->ufbits;
 650
 651     x264_log(h, X264_LOG_DEBUG, "bits=%i, qp=%.1f, z=%i, zr=%6.3f, buf=%i\n",
 652              bits, rc->qpa, rc->nzcoeffs, (float) rc->nzcoeffs / rc->ncoeffs,
 653              rc->buffer_fullness);
 654
 655     rc->bits_last_gop += bits;
 656     rc->frames++;
 657     rc->mb = 0;
 658 }
 659
 660 /****************************************************************************
 661  * 2 pass functions
 662  ***************************************************************************/
 663 double x264_eval( char *s, double *const_value, const char **const_name,
 664                   double (**func1)(void *, double), const char **func1_name,
 665                   double (**func2)(void *, double, double), char **func2_name,
 666                   void *opaque );
 667
 668 /**
 669  * modifies the bitrate curve from pass1 for one frame
 670  */
 671 static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor)
 672 {
 673     x264_ratecontrol_t *rcc= h->rc;
 674     const int pict_type = rce->pict_type;
 675     double q;
 676
 677     double const_values[]={
 678         rce->i_tex_bits * rce->qscale,
 679         rce->p_tex_bits * rce->qscale,
 680         (rce->i_tex_bits + rce->p_tex_bits) * rce->qscale,
 681         rce->mv_bits * rce->qscale,
 682         (double)rce->i_count / rcc->nmb,
 683         (double)rce->p_count / rcc->nmb,
 684         (double)rce->s_count / rcc->nmb,
 685         rce->pict_type == SLICE_TYPE_I,
 686         rce->pict_type == SLICE_TYPE_P,
 687         rce->pict_type == SLICE_TYPE_B,
 688         h->param.rc.f_qcompress,
 689         rcc->i_cplx_sum[SLICE_TYPE_I] / rcc->frame_count[SLICE_TYPE_I],
 690         rcc->i_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
 691         rcc->p_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
 692         rcc->p_cplx_sum[SLICE_TYPE_B] / rcc->frame_count[SLICE_TYPE_B],
 693         (rcc->i_cplx_sum[pict_type] + rcc->p_cplx_sum[pict_type]) / rcc->frame_count[pict_type],
 694         rce->blurred_complexity,
 695         0
 696     };
 697     static const char *const_names[]={
 698         "iTex",
 699         "pTex",
 700         "tex",
 701         "mv",
 702         "iCount",
 703         "pCount",
 704         "sCount",
 705         "isI",
 706         "isP",
 707         "isB",
 708         "qComp",
 709         "avgIITex",
 710         "avgPITex",
 711         "avgPPTex",
 712         "avgBPTex",
 713         "avgTex",
 714         "blurCplx",
 715         NULL
 716     };
 717     static double (*func1[])(void *, double)={
 718 //      (void *)bits2qscale,
 719         (void *)qscale2bits,
 720         NULL
 721     };
 722     static const char *func1_names[]={
 723 //      "bits2qp",
 724         "qp2bits",
 725         NULL
 726     };
 727
 728     q = x264_eval((char*)h->param.rc.psz_rc_eq, const_values, const_names, func1, func1_names, NULL, NULL, rce);
 729     q /= rate_factor;
 730
 731     // avoid NaN's in the rc_eq
 732     if(q != q || rce->i_tex_bits + rce->p_tex_bits + rce->mv_bits == 0)
 733         q = rcc->last_qscale;
 734     else
 735         rcc->last_qscale = q;
 736
 737     return q;
 738 }
 739
 740 static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
 741 {
 742     x264_ratecontrol_t *rcc = h->rc;
 743     const int pict_type = rce->pict_type;
 744
 745     // force I/B quants as a function of P quants
 746     const double last_p_q    = rcc->last_qscale_for[SLICE_TYPE_P];
 747     const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type];
 748     if( pict_type == SLICE_TYPE_I )
 749     {
 750         double iq = q;
 751         double pq = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
 752         double ip_factor = fabs( h->param.rc.f_ip_factor );
 753         /* don't apply ip_factor if the following frame is also I */
 754         if( rcc->accum_p_norm <= 0 )
 755             q = iq;
 756         else if( h->param.rc.f_ip_factor < 0 )
 757             q = iq / ip_factor;
 758         else if( rcc->accum_p_norm >= 1 )
 759             q = pq / ip_factor;
 760         else
 761             q = rcc->accum_p_norm * pq / ip_factor + (1 - rcc->accum_p_norm) * iq;
 762     }
 763     else if( pict_type == SLICE_TYPE_B )
 764     {
 765         if( h->param.rc.f_pb_factor > 0 )
 766             q = last_non_b_q;
 767         q *= fabs( h->param.rc.f_pb_factor );
 768     }
 769     else if( pict_type == SLICE_TYPE_P
 770              && rcc->last_non_b_pict_type == SLICE_TYPE_P
 771              && rce->i_tex_bits + rce->p_tex_bits == 0 )
 772     {
 773         q = last_p_q;
 774     }
 775
 776     /* last qscale / qdiff stuff */
 777     /* TODO take intro account whether the I-frame is a scene cut
 778      * or just a seek point */
 779     if(rcc->last_non_b_pict_type==pict_type
 780        && (pict_type!=SLICE_TYPE_I || rcc->last_accum_p_norm < 1))
 781     {
 782         double last_q = rcc->last_qscale_for[pict_type];
 783         double max_qscale = last_q * rcc->lstep;
 784         double min_qscale = last_q / rcc->lstep;
 785
 786         if     (q > max_qscale) q = max_qscale;
 787         else if(q < min_qscale) q = min_qscale;
 788     }
 789
 790     rcc->last_qscale_for[pict_type] = q;
 791     if(pict_type!=SLICE_TYPE_B)
 792         rcc->last_non_b_pict_type = pict_type;
 793     if(pict_type==SLICE_TYPE_I)
 794     {
 795         rcc->last_accum_p_norm = rcc->accum_p_norm;
 796         rcc->accum_p_norm = 0;
 797         rcc->accum_p_qp = 0;
 798     }
 799     if(pict_type==SLICE_TYPE_P)
 800     {
 801         float mask = 1 - pow( (float)rce->i_count / rcc->nmb, 2 );
 802         rcc->accum_p_qp   = mask * (qscale2qp(q) + rcc->accum_p_qp);
 803         rcc->accum_p_norm = mask * (1 + rcc->accum_p_norm);
 804     }
 805     return q;
 806 }
 807
 808 // clip a qscale to between lmin and lmax
 809 static double clip_qscale( x264_t *h, ratecontrol_entry_t *rce, double q )
 810 {
 811     double lmin = h->rc->lmin[rce->pict_type];
 812     double lmax = h->rc->lmax[rce->pict_type];
 813
 814     if(lmin==lmax){
 815         return lmin;
 816     }else{
 817         double min2 = log(lmin);
 818         double max2 = log(lmax);
 819         q = (log(q) - min2)/(max2-min2) - 0.5;
 820         q = 1.0/(1.0 + exp(-4*q));
 821         q = q*(max2-min2) + min2;
 822         return exp(q);
 823     }
 824 }
 825
 826 // update qscale for 1 frame based on actual bits used so far
 827 static float rate_estimate_qscale(x264_t *h, int pict_type)
 828 {
 829     float q;
 830     float br_compensation;
 831     double diff;
 832     int picture_number = h->fenc->i_frame;
 833     x264_ratecontrol_t *rcc = h->rc;
 834     ratecontrol_entry_t *rce;
 835     double lmin = rcc->lmin[pict_type];
 836     double lmax = rcc->lmax[pict_type];
 837     int64_t total_bits = 8*(h->stat.i_slice_size[SLICE_TYPE_I]
 838                           + h->stat.i_slice_size[SLICE_TYPE_P]
 839                           + h->stat.i_slice_size[SLICE_TYPE_B]);
 840
 841 //printf("input_pic_num:%d pic_num:%d frame_rate:%d\n", s->input_picture_number, s->picture_number, s->frame_rate);
 842
 843     rce = &rcc->entry[picture_number];
 844
 845     assert(pict_type == rce->pict_type);
 846
 847     if(rce->pict_type == SLICE_TYPE_B)
 848     {
 849         return rcc->last_qscale * h->param.rc.f_pb_factor;
 850     }
 851     else
 852     {
 853         diff = (int64_t)total_bits - (int64_t)rce->expected_bits;
 854         br_compensation = (rcc->buffer_size - diff) / rcc->buffer_size;
 855         br_compensation = x264_clip3f(br_compensation, .5, 2);
 856
 857         q = rce->new_qscale / br_compensation;
 858         q = x264_clip3f(q, lmin, lmax);
 859         rcc->last_qscale = q;
 860         return q;
 861     }
 862 }
 863
 864 static int init_pass2( x264_t *h )
 865 {
 866     x264_ratecontrol_t *rcc = h->rc;
 867     uint64_t all_const_bits = 0;
 868     uint64_t all_available_bits = (uint64_t)(h->param.rc.i_bitrate * 1000 * (double)rcc->num_entries / rcc->fps);
 869     double rate_factor, step, step_mult;
 870     double qblur = h->param.rc.f_qblur;
 871     double cplxblur = h->param.rc.f_complexity_blur;
 872     const int filter_size = (int)(qblur*4) | 1;
 873     double expected_bits;
 874     double *qscale, *blurred_qscale;
 875     int i;
 876
 877     /* find total/average complexity & const_bits */
 878     for(i=0; i<rcc->num_entries; i++){
 879         ratecontrol_entry_t *rce = &rcc->entry[i];
 880         all_const_bits += rce->misc_bits;
 881         rcc->i_cplx_sum[rce->pict_type] += rce->i_tex_bits * rce->qscale;
 882         rcc->p_cplx_sum[rce->pict_type] += rce->p_tex_bits * rce->qscale;
 883         rcc->mv_bits_sum[rce->pict_type] += rce->mv_bits * rce->qscale;
 884         rcc->frame_count[rce->pict_type] ++;
 885     }
 886
 887     if( all_available_bits < all_const_bits)
 888     {
 889         x264_log(h, X264_LOG_ERROR, "requested bitrate is too low. estimated minimum is %d kbps\n",
 890                  (int)(all_const_bits * rcc->fps / (rcc->num_entries * 1000)));
 891         return -1;
 892     }
 893
 894     for(i=0; i<rcc->num_entries; i++){
 895         ratecontrol_entry_t *rce = &rcc->entry[i];
 896         double weight_sum = 0;
 897         double cplx_sum = 0;
 898         double weight = 1.0;
 899         int j;
 900         /* weighted average of cplx of future frames */
 901         for(j=1; j<cplxblur*2 && j<rcc->num_entries-i; j++){
 902             ratecontrol_entry_t *rcj = &rcc->entry[i+j];
 903             weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
 904             if(weight < .0001)
 905                 break;
 906             weight_sum += weight;
 907             cplx_sum += weight * qscale2bits(rcj, 1);
 908         }
 909         /* weighted average of cplx of past frames */
 910         weight = 1.0;
 911         for(j=0; j<cplxblur*2 && j<=i; j++){
 912             ratecontrol_entry_t *rcj = &rcc->entry[i-j];
 913             weight_sum += weight;
 914             cplx_sum += weight * qscale2bits(rcj, 1);
 915             weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
 916             if(weight < .0001)
 917                 break;
 918         }
 919         rce->blurred_complexity = cplx_sum / weight_sum;
 920     }
 921
 922     qscale = x264_malloc(sizeof(double)*rcc->num_entries);
 923     if(filter_size > 1)
 924         blurred_qscale = x264_malloc(sizeof(double)*rcc->num_entries);
 925     else
 926         blurred_qscale = qscale;
 927
 928     expected_bits = 1;
 929     for(i=0; i<rcc->num_entries; i++)
 930         expected_bits += qscale2bits(&rcc->entry[i], get_qscale(h, &rcc->entry[i], 1.0));
 931     step_mult = all_available_bits / expected_bits;
 932
 933     rate_factor = 0;
 934     for(step = 1E4 * step_mult; step > 1E-7 * step_mult; step *= 0.5){
 935         expected_bits = 0;
 936         rate_factor += step;
 937
 938         rcc->last_non_b_pict_type = -1;
 939         rcc->last_accum_p_norm = 1;
 940
 941         /* find qscale */
 942         for(i=0; i<rcc->num_entries; i++){
 943             qscale[i] = get_qscale(h, &rcc->entry[i], rate_factor);
 944         }
 945
 946         /* fixed I/B QP relative to P mode */
 947         for(i=rcc->num_entries-1; i>=0; i--){
 948             qscale[i] = get_diff_limited_q(h, &rcc->entry[i], qscale[i]);
 949             assert(qscale[i] >= 0);
 950         }
 951
 952         /* smooth curve */
 953         if(filter_size > 1){
 954             assert(filter_size%2==1);
 955             for(i=0; i<rcc->num_entries; i++){
 956                 ratecontrol_entry_t *rce = &rcc->entry[i];
 957                 int j;
 958                 double q=0.0, sum=0.0;
 959
 960                 for(j=0; j<filter_size; j++){
 961                     int index = i+j-filter_size/2;
 962                     double d = index-i;
 963                     double coeff = qblur==0 ? 1.0 : exp(-d*d/(qblur*qblur));
 964                     if(index < 0 || index >= rcc->num_entries) continue;
 965                     if(rce->pict_type != rcc->entry[index].pict_type) continue;
 966                     q += qscale[index] * coeff;
 967                     sum += coeff;
 968                 }
 969                 blurred_qscale[i] = q/sum;
 970             }
 971         }
 972
 973         /* find expected bits */
 974         for(i=0; i<rcc->num_entries; i++){
 975             ratecontrol_entry_t *rce = &rcc->entry[i];
 976             double bits;
 977             rce->new_qscale = clip_qscale(h, rce, blurred_qscale[i]);
 978             assert(rce->new_qscale >= 0);
 979             bits = qscale2bits(rce, rce->new_qscale) + rce->misc_bits;
 980
 981             rce->expected_bits = expected_bits;
 982             expected_bits += bits;
 983         }
 984
 985 //printf("expected:%llu available:%llu factor:%lf avgQ:%lf\n", (uint64_t)expected_bits, all_available_bits, rate_factor);
 986         if(expected_bits > all_available_bits) rate_factor -= step;
 987     }
 988
 989     x264_free(qscale);
 990     if(filter_size > 1)
 991         x264_free(blurred_qscale);
 992
 993     if(fabs(expected_bits/all_available_bits - 1.0) > 0.01)
 994     {
 995         double avgq = 0;
 996         for(i=0; i<rcc->num_entries; i++)
 997             avgq += rcc->entry[i].new_qscale;
 998         avgq = qscale2qp(avgq / rcc->num_entries);
 999
1000         x264_log(h, X264_LOG_ERROR, "Error: 2pass curve failed to converge\n");
1001         x264_log(h, X264_LOG_ERROR, "expected: %.0f KiB, available: %.0f KiB, avg QP: %.4f\n", expected_bits/8192., all_available_bits/8192., avgq);
1002         if(expected_bits < all_available_bits && avgq < h->param.rc.i_qp_min + 2)
1003             x264_log(h, X264_LOG_ERROR, "try reducing bitrate or reducing qp_min\n");
1004         else if(expected_bits > all_available_bits && avgq > h->param.rc.i_qp_min - 2)
1005             x264_log(h, X264_LOG_ERROR, "try increasing bitrate or increasing qp_max\n");
1006         else
1007             x264_log(h, X264_LOG_ERROR, "internal error\n");
1008         return -1;
1009     }
1010
1011     return 0;
1012 }
1013
1014