git.sesse.net Git - x264/blob - encoder/ratecontrol.c

   1 /***************************************************-*- coding: iso-8859-1 -*-
   2  * ratecontrol.c: h264 encoder library (Rate Control)
   3  *****************************************************************************
   4  * Copyright (C) 2003 Laurent Aimar
   5  * $Id: ratecontrol.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
   6  *
   7  * Authors: Måns Rullgård <mru@mru.ath.cx>
   8  * 2 pass code: Michael Niedermayer <michaelni@gmx.at>
   9  *              Loren Merritt <lorenm@u.washington.edu>
  10  *
  11  * This program is free software; you can redistribute it and/or modify
  12  * it under the terms of the GNU General Public License as published by
  13  * the Free Software Foundation; either version 2 of the License, or
  14  * (at your option) any later version.
  15  *
  16  * This program is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19  * GNU General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU General Public License
  22  * along with this program; if not, write to the Free Software
  23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  24  *****************************************************************************/
  25
  26 #define _ISOC99_SOURCE
  27 #undef NDEBUG // always check asserts, the speed effect is far too small to disable them
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <string.h>
  31 #include <math.h>
  32 #include <limits.h>
  33 #include <assert.h>
  34
  35 #include "common/common.h"
  36 #include "common/cpu.h"
  37 #include "common/macroblock.h"
  38 #include "ratecontrol.h"
  39
  40 #ifdef SYS_MACOSX
  41 #define exp2f(x) ( (float) exp2( (x) ) )
  42 #endif
  43 #ifdef SYS_FREEBSD
  44 #define exp2f(x) powf( 2, (x) )
  45 #endif
  46 #ifdef _MSC_VER
  47 #define exp2f(x) pow( 2, (x) )
  48 #endif
  49 #ifdef WIN32 // POSIX says that rename() removes the destination, but win32 doesn't.
  50 #define rename(src,dst) (unlink(dst), rename(src,dst))
  51 #endif
  52
  53 typedef struct
  54 {
  55     int pict_type;
  56     int idr;
  57     float qscale;
  58     int mv_bits;
  59     int i_tex_bits;
  60     int p_tex_bits;
  61     int misc_bits;
  62     uint64_t expected_bits;
  63     float new_qscale;
  64     int new_qp;
  65     int i_count;
  66     int p_count;
  67     int s_count;
  68     float blurred_complexity;
  69 } ratecontrol_entry_t;
  70
  71 struct x264_ratecontrol_t
  72 {
  73     /* constants */
  74     double fps;
  75     int gop_size;
  76     int bitrate;
  77     int nmb;                    /* number of macroblocks in a frame */
  78     int buffer_size;
  79     int rcbufrate;
  80     int init_qp;
  81     int qp_constant[5];
  82
  83     /* 1st pass stuff */
  84     int gop_qp;
  85     int buffer_fullness;
  86     int frames;                 /* frames in current gop */
  87     int pframes;
  88     int slice_type;
  89     int mb;                     /* MBs processed in current frame */
  90     int bits_gop;               /* allocated bits current gop */
  91     int bits_last_gop;          /* bits consumed in gop */
  92     int qp;                     /* qp for current frame */
  93     int qpm;                    /* qp for next MB */
  94     float qpa;                  /* average qp for last frame */
  95     int qps;
  96     float qp_avg_p;             /* average QP for P frames */
  97     float qp_last_p;
  98     int fbits;                  /* bits allocated for current frame */
  99     int ufbits;                 /* bits used for current frame */
 100     int nzcoeffs;               /* # of 0-quantized coefficients */
 101     int ncoeffs;                /* total # of coefficients */
 102     int overhead;
 103
 104     /* 2pass stuff */
 105     FILE *p_stat_file_out;
 106     char *psz_stat_file_tmpname;
 107
 108     int num_entries;            /* number of ratecontrol_entry_ts */
 109     ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
 110     double last_qscale;
 111     double last_qscale_for[5];  /* last qscale for a specific pict type, used for max_diff & ipb factor stuff  */
 112     int last_non_b_pict_type;
 113     double accum_p_qp;          /* for determining I-frame quant */
 114     double accum_p_norm;
 115     double last_accum_p_norm;
 116     double lmin[5];             /* min qscale by frame type */
 117     double lmax[5];
 118     double lstep;               /* max change (multiply) in qscale per frame */
 119     double i_cplx_sum[5];       /* estimated total texture bits in intra MBs at qscale=1 */
 120     double p_cplx_sum[5];
 121     double mv_bits_sum[5];
 122     int frame_count[5];         /* number of frames of each type */
 123 };
 124
 125
 126 static int init_pass2(x264_t *);
 127 static float rate_estimate_qscale( x264_t *h, int pict_type );
 128
 129 /* Terminology:
 130  * qp = h.264's quantizer
 131  * qscale = linearized quantizer = Lagrange multiplier
 132  */
 133 static inline double qp2qscale(double qp)
 134 {
 135     return 0.85 * pow(2.0, ( qp - 12.0 ) / 6.0);
 136 }
 137 static inline double qscale2qp(double qscale)
 138 {
 139     return 12.0 + 6.0 * log(qscale/0.85) / log(2.0);
 140 }
 141
 142 /* Texture bitrate is not quite inversely proportional to qscale,
 143  * probably due the the changing number of SKIP blocks.
 144  * MV bits level off at about qp<=12, because the lambda used
 145  * for motion estimation is constant there. */
 146 static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale)
 147 {
 148     if(qscale<0.1)
 149         qscale = 0.1;
 150     return (rce->i_tex_bits + rce->p_tex_bits + .1) * pow( rce->qscale / qscale, 1.1 )
 151            + rce->mv_bits * pow( X264_MAX(rce->qscale, 12) / X264_MAX(qscale, 12), 0.5 );
 152 }
 153
 154 /* There is no analytical inverse to the above formula. */
 155 #if 0
 156 static inline double bits2qscale(ratecontrol_entry_t *rce, double bits)
 157 {
 158     if(bits<1.0)
 159         bits = 1.0;
 160     return (rce->i_tex_bits + rce->p_tex_bits + rce->mv_bits + .1) * rce->qscale / bits;
 161 }
 162 #endif
 163
 164
 165 int x264_ratecontrol_new( x264_t *h )
 166 {
 167     x264_ratecontrol_t *rc;
 168     float bpp;
 169     int i;
 170
 171     /* Needed(?) for 2 pass */
 172     x264_cpu_restore( h->param.cpu );
 173
 174     h->rc = rc = x264_malloc( sizeof( x264_ratecontrol_t ) );
 175     memset(rc, 0, sizeof(*rc));
 176
 177     /* FIXME: use integers */
 178     if(h->param.i_fps_num > 0 && h->param.i_fps_den > 0)
 179         rc->fps = (float) h->param.i_fps_num / h->param.i_fps_den;
 180     else
 181         rc->fps = 25.0;
 182
 183     rc->gop_size = h->param.i_keyint_max;
 184     rc->bitrate = h->param.rc.i_bitrate * 1000;
 185     rc->nmb = h->mb.i_mb_count;
 186
 187     rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
 188     rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
 189     rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
 190
 191     /* Init 1pass CBR algo */
 192     if( h->param.rc.b_cbr ){
 193         rc->buffer_size = h->param.rc.i_rc_buffer_size * 1000;
 194         rc->buffer_fullness = h->param.rc.i_rc_init_buffer;
 195         rc->rcbufrate = rc->bitrate / rc->fps;
 196
 197         if(rc->buffer_size < rc->rcbufrate){
 198             x264_log(h, X264_LOG_WARNING, "rc buffer size %i too small\n",
 199                      rc->buffer_size);
 200             rc->buffer_size = 0;
 201         }
 202
 203         if(rc->buffer_size <= 0)
 204             rc->buffer_size = rc->bitrate / 2;
 205
 206         if(rc->buffer_fullness > rc->buffer_size || rc->buffer_fullness < 0){
 207             x264_log(h, X264_LOG_WARNING, "invalid initial buffer fullness %i\n",
 208                      rc->buffer_fullness);
 209             rc->buffer_fullness = 0;
 210         }
 211
 212         bpp = rc->bitrate / (rc->fps * h->param.i_width * h->param.i_height);
 213         if(bpp <= 0.6)
 214             rc->init_qp = 31;
 215         else if(bpp <= 1.4)
 216             rc->init_qp = 25;
 217         else if(bpp <= 2.4)
 218             rc->init_qp = 20;
 219         else
 220             rc->init_qp = 10;
 221         rc->gop_qp = rc->init_qp;
 222
 223         rc->bits_last_gop = 0;
 224
 225         x264_log(h, X264_LOG_DEBUG, "%f fps, %i bps, bufsize %i\n",
 226                  rc->fps, rc->bitrate, rc->buffer_size);
 227     }
 228
 229
 230     rc->lstep = exp2f(h->param.rc.i_qp_step / 6.0);
 231     rc->last_qscale = qp2qscale(26);
 232     for( i = 0; i < 5; i++ )
 233     {
 234         rc->last_qscale_for[i] = qp2qscale(26);
 235         rc->lmin[i] = qp2qscale( h->param.rc.i_qp_min );
 236         rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max );
 237     }
 238 #if 0 // FIXME: do we want to assign lmin/lmax based on ip_factor, or leave them all the same?
 239     rc->lmin[SLICE_TYPE_I] /= fabs(h->param.f_ip_factor);
 240     rc->lmax[SLICE_TYPE_I] /= fabs(h->param.f_ip_factor);
 241     rc->lmin[SLICE_TYPE_B] *= fabs(h->param.f_pb_factor);
 242     rc->lmax[SLICE_TYPE_B] *= fabs(h->param.f_pb_factor);
 243 #endif
 244
 245     /* Load stat file and init 2pass algo */
 246     if( h->param.rc.b_stat_read )
 247     {
 248         int stats_size;
 249         char *p, *stats_in;
 250         FILE *stats_file;
 251
 252         /* read 1st pass stats */
 253         assert( h->param.rc.psz_stat_in );
 254         stats_file = fopen( h->param.rc.psz_stat_in, "rb");
 255         if(!stats_file)
 256         {
 257             x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
 258             return -1;
 259         }
 260         // FIXME: error checking
 261         fseek(stats_file, 0, SEEK_END);
 262         stats_size = ftell(stats_file);
 263         fseek(stats_file, 0, SEEK_SET);
 264         stats_in = x264_malloc(stats_size+10);
 265         fread(stats_in, 1, stats_size, stats_file);
 266         fclose(stats_file);
 267
 268         /* find number of pics */
 269         p = stats_in;
 270         for(i=-1; p; i++){
 271             p = strchr(p+1, ';');
 272         }
 273         i += h->param.i_bframe;
 274         rc->entry = (ratecontrol_entry_t*) x264_malloc(i*sizeof(ratecontrol_entry_t));
 275         memset(rc->entry, 0, i*sizeof(ratecontrol_entry_t));
 276         rc->num_entries= i;
 277
 278         /* init all to skipped p frames */
 279         for(i=0; i<rc->num_entries; i++){
 280             ratecontrol_entry_t *rce = &rc->entry[i];
 281             rce->pict_type = SLICE_TYPE_P;
 282             rce->qscale = rce->new_qscale = qp2qscale(20);
 283             rce->misc_bits = rc->nmb + 10;
 284             rce->new_qp = 0;
 285         }
 286
 287         /* read stats */
 288         p = stats_in;
 289         for(i=0; i < rc->num_entries - h->param.i_bframe; i++){
 290             ratecontrol_entry_t *rce;
 291             int frame_number;
 292             char pict_type;
 293             int e;
 294             char *next;
 295             float qp;
 296
 297             next= strchr(p, ';');
 298             if(next){
 299                 (*next)=0; //sscanf is unbelievably slow on looong strings
 300                 next++;
 301             }
 302             e = sscanf(p, " in:%d ", &frame_number);
 303
 304             assert(frame_number >= 0);
 305             assert(frame_number < rc->num_entries);
 306             rce = &rc->entry[frame_number];
 307
 308             e += sscanf(p, " in:%*d out:%*d type:%c q:%f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d",
 309                    &pict_type, &qp, &rce->i_tex_bits, &rce->p_tex_bits,
 310                    &rce->mv_bits, &rce->misc_bits, &rce->i_count, &rce->p_count, &rce->s_count);
 311
 312             switch(pict_type){
 313                 case 'I': rce->idr = 1;
 314                 case 'i': rce->pict_type = SLICE_TYPE_I; break;
 315                 case 'P': rce->pict_type = SLICE_TYPE_P; break;
 316                 case 'B': rce->pict_type = SLICE_TYPE_B; break;
 317                 default:  e = -1; break;
 318             }
 319             if(e != 10){
 320                 x264_log(h, X264_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e);
 321                 return -1;
 322             }
 323             rce->qscale = qp2qscale(qp);
 324             p = next;
 325         }
 326
 327         x264_free(stats_in);
 328
 329         /* If using 2pass with constant quant, no need to run the bitrate allocation */
 330         if(h->param.rc.b_cbr)
 331         {
 332             if(init_pass2(h) < 0) return -1;
 333         }
 334     }
 335
 336     /* Open output file */
 337     /* If input and output files are the same, output to a temp file
 338      * and move it to the real name only when it's complete */
 339     if( h->param.rc.b_stat_write )
 340     {
 341         rc->psz_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 6 );
 342         strcpy( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
 343         strcat( rc->psz_stat_file_tmpname, ".temp" );
 344
 345         rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
 346         if( rc->p_stat_file_out == NULL )
 347         {
 348             x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
 349             return -1;
 350         }
 351     }
 352
 353     return 0;
 354 }
 355
 356 void x264_ratecontrol_delete( x264_t *h )
 357 {
 358     x264_ratecontrol_t *rc = h->rc;
 359
 360     if( rc->p_stat_file_out )
 361     {
 362         fclose( rc->p_stat_file_out );
 363         if( h->i_frame >= rc->num_entries - h->param.i_bframe )
 364             if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
 365             {
 366                 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
 367                           rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
 368             }
 369         x264_free( rc->psz_stat_file_tmpname );
 370     }
 371     if( rc->entry )
 372         x264_free(rc->entry);
 373     x264_free( rc );
 374 }
 375
 376 void x264_ratecontrol_start( x264_t *h, int i_slice_type )
 377 {
 378     x264_ratecontrol_t *rc = h->rc;
 379     int gframes, iframes, pframes, bframes;
 380     int minbits, maxbits;
 381     int gbits, fbits;
 382     int zn = 0;
 383     float kp;
 384     int gbuf;
 385
 386     rc->slice_type = i_slice_type;
 387
 388     x264_cpu_restore( h->param.cpu );
 389
 390     if( !h->param.rc.b_cbr )
 391     {
 392         rc->qpm = rc->qpa = rc->qp =
 393             rc->qp_constant[ i_slice_type ];
 394         return;
 395     }
 396     else if( h->param.rc.b_stat_read )
 397     {
 398         int frame = h->fenc->i_frame;
 399         ratecontrol_entry_t *rce;
 400         assert( frame >= 0 && frame < rc->num_entries );
 401         rce = &h->rc->entry[frame];
 402
 403         rce->new_qscale = rate_estimate_qscale( h, i_slice_type );
 404         rc->qpm = rc->qpa = rc->qp = rce->new_qp =
 405             (int)(qscale2qp(rce->new_qscale) + 0.5);
 406         return;
 407     }
 408
 409     switch(i_slice_type){
 410     case SLICE_TYPE_I:
 411         gbuf = rc->buffer_fullness + (rc->gop_size-1) * rc->rcbufrate;
 412         rc->bits_gop = gbuf - rc->buffer_size / 2;
 413
 414         if(!rc->mb && rc->pframes){
 415             int qp = rc->qp_avg_p / rc->pframes + 0.5;
 416 #if 0 /* JM does this without explaining why */
 417             int gdq = (float) rc->gop_size / 15 + 0.5;
 418             if(gdq > 2)
 419                 gdq = 2;
 420             qp -= gdq;
 421             if(qp > rc->qp_last_p - 2)
 422                 qp--;
 423 #endif
 424             qp = x264_clip3(qp, rc->gop_qp - 4, rc->gop_qp + 4);
 425             qp = x264_clip3(qp, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
 426             rc->gop_qp = qp;
 427         } else if(rc->frames > 4){
 428             rc->gop_qp = rc->init_qp;
 429         }
 430
 431         kp = h->param.rc.f_ip_factor * h->param.rc.f_pb_factor;
 432
 433         x264_log(h, X264_LOG_DEBUG,"gbuf=%i bits_gop=%i frames=%i gop_qp=%i\n",
 434                  gbuf, rc->bits_gop, rc->frames, rc->gop_qp);
 435
 436         rc->bits_last_gop = 0;
 437         rc->frames = 0;
 438         rc->pframes = 0;
 439         rc->qp_avg_p = 0;
 440         break;
 441
 442     case SLICE_TYPE_P:
 443         kp = h->param.rc.f_pb_factor;
 444         break;
 445
 446     case SLICE_TYPE_B:
 447         kp = 1.0;
 448         break;
 449
 450     default:
 451         x264_log(h, X264_LOG_WARNING, "ratecontrol: unknown slice type %i\n",
 452                  i_slice_type);
 453         kp = 1.0;
 454         break;
 455     }
 456
 457     gframes = rc->gop_size - rc->frames;
 458     iframes = gframes / rc->gop_size;
 459     pframes = gframes / (h->param.i_bframe + 1) - iframes;
 460     bframes = gframes - pframes - iframes;
 461
 462     gbits = rc->bits_gop - rc->bits_last_gop;
 463     fbits = kp * gbits /
 464         (h->param.rc.f_ip_factor * h->param.rc.f_pb_factor * iframes +
 465          h->param.rc.f_pb_factor * pframes + bframes);
 466
 467     minbits = rc->buffer_fullness + rc->rcbufrate - rc->buffer_size;
 468     if(minbits < 0)
 469         minbits = 0;
 470     maxbits = rc->buffer_fullness;
 471     rc->fbits = x264_clip3(fbits, minbits, maxbits);
 472
 473     if(i_slice_type == SLICE_TYPE_I){
 474         rc->qp = rc->gop_qp;
 475     } else if(rc->ncoeffs && rc->ufbits){
 476         int dqp, nonzc;
 477
 478         nonzc = (rc->ncoeffs - rc->nzcoeffs);
 479         if(nonzc == 0)
 480             zn = rc->ncoeffs;
 481         else if(rc->fbits < INT_MAX / nonzc)
 482             zn = rc->ncoeffs - rc->fbits * nonzc / rc->ufbits;
 483         else
 484             zn = 0;
 485         zn = x264_clip3(zn, 0, rc->ncoeffs);
 486         dqp = h->param.rc.i_rc_sens * exp2f(rc->qpa / 6) *
 487             (zn - rc->nzcoeffs) / rc->nzcoeffs;
 488         dqp = x264_clip3(dqp, -h->param.rc.i_qp_step, h->param.rc.i_qp_step);
 489         rc->qp = (int)(rc->qpa + dqp + .5);
 490     }
 491
 492     if(rc->fbits > 0.9 * maxbits)
 493         rc->qp += 2;
 494     else if(rc->fbits > 0.8 * maxbits)
 495         rc->qp += 1;
 496     else if(rc->fbits < 1.1 * minbits)
 497         rc->qp -= 2;
 498     else if(rc->fbits < 1.2 * minbits)
 499         rc->qp -= 1;
 500
 501     rc->qp = x264_clip3(rc->qp, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
 502     rc->qpm = rc->qp;
 503
 504     x264_log(h, X264_LOG_DEBUG, "fbits=%i, qp=%i, z=%i, min=%i, max=%i\n",
 505              rc->fbits, rc->qpm, zn, minbits, maxbits);
 506
 507     rc->fbits -= rc->overhead;
 508     rc->ufbits = 0;
 509     rc->ncoeffs = 0;
 510     rc->nzcoeffs = 0;
 511     rc->mb = 0;
 512     rc->qps = 0;
 513 }
 514
 515 void x264_ratecontrol_mb( x264_t *h, int bits )
 516 {
 517     x264_ratecontrol_t *rc = h->rc;
 518     int rbits;
 519     int zn, enz, nonz;
 520     int rcoeffs;
 521     int dqp;
 522     int i;
 523
 524     if( !h->param.rc.b_cbr || h->param.rc.b_stat_read )
 525         return;
 526
 527     x264_cpu_restore( h->param.cpu );
 528
 529     rc->qps += rc->qpm;
 530     rc->ufbits += bits;
 531     rc->mb++;
 532
 533     for(i = 0; i < 16 + 8; i++)
 534         rc->nzcoeffs += 16 - h->mb.cache.non_zero_count[x264_scan8[i]];
 535     rc->ncoeffs += 16 * (16 + 8);
 536
 537     if(rc->mb < rc->nmb / 16)
 538         return;
 539     else if(rc->mb == rc->nmb)
 540         return;
 541
 542     rcoeffs = (rc->nmb - rc->mb) * 16 * 24;
 543     rbits = rc->fbits - rc->ufbits;
 544 /*     if(rbits < 0) */
 545 /*      rbits = 0; */
 546
 547 /*     zn = (rc->nmb - rc->mb) * 16 * 24; */
 548     nonz = (rc->ncoeffs - rc->nzcoeffs);
 549     if(nonz == 0)
 550         zn = rcoeffs;
 551     else if(rc->ufbits && rbits < INT_MAX / nonz)
 552         zn = rcoeffs - rbits * nonz / rc->ufbits;
 553     else
 554         zn = 0;
 555     zn = x264_clip3(zn, 0, rcoeffs);
 556     enz = rc->nzcoeffs * (rc->nmb - rc->mb) / rc->mb;
 557     dqp = (float) 2*h->param.rc.i_rc_sens * exp2f((float) rc->qps / rc->mb / 6) *
 558         (zn - enz) / enz;
 559     rc->qpm = x264_clip3(rc->qpm + dqp, rc->qp - 3, rc->qp + 3);
 560     if(rbits <= 0)
 561         rc->qpm++;
 562     rc->qpm = x264_clip3(rc->qpm, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
 563 }
 564
 565 int  x264_ratecontrol_qp( x264_t *h )
 566 {
 567     return h->rc->qpm;
 568 }
 569
 570 int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
 571 {
 572     if( h->param.rc.b_stat_read )
 573     {
 574         if( frame_num >= h->rc->num_entries )
 575         {
 576             x264_log(h, X264_LOG_ERROR, "More input frames than in the 1st pass\n");
 577             return X264_TYPE_P;
 578         }
 579         switch( h->rc->entry[frame_num].pict_type )
 580         {
 581             case SLICE_TYPE_I:
 582                 return h->rc->entry[frame_num].idr ? X264_TYPE_IDR : X264_TYPE_I;
 583
 584             case SLICE_TYPE_B:
 585                 return X264_TYPE_B;
 586
 587             case SLICE_TYPE_P:
 588             default:
 589                 return X264_TYPE_P;
 590         }
 591     }
 592     else
 593     {
 594         return X264_TYPE_AUTO;
 595     }
 596 }
 597
 598 void x264_ratecontrol_end( x264_t *h, int bits )
 599 {
 600     x264_ratecontrol_t *rc = h->rc;
 601     int i;
 602
 603     x264_cpu_restore( h->param.cpu );
 604
 605     h->stat.frame.i_mb_count_skip = h->stat.frame.i_mb_count[P_SKIP] + h->stat.frame.i_mb_count[B_SKIP];
 606     h->stat.frame.i_mb_count_p = h->stat.frame.i_mb_count[P_L0] + h->stat.frame.i_mb_count[P_8x8];
 607     for( i = B_DIRECT; i < B_8x8; i++ )
 608         h->stat.frame.i_mb_count_p += h->stat.frame.i_mb_count[i];
 609
 610     if( h->param.rc.b_stat_write )
 611     {
 612         char c_type = rc->slice_type==SLICE_TYPE_I ? (h->fenc->i_poc==0 ? 'I' : 'i')
 613                     : rc->slice_type==SLICE_TYPE_P ? 'P' : 'B';
 614         fprintf( rc->p_stat_file_out,
 615                  "in:%d out:%d type:%c q:%.3f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d;\n",
 616                  h->fenc->i_frame, h->i_frame-1,
 617                  c_type, rc->qpa,
 618                  h->stat.frame.i_itex_bits, h->stat.frame.i_ptex_bits,
 619                  h->stat.frame.i_hdr_bits, h->stat.frame.i_misc_bits,
 620                  h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_16x16],
 621                  h->stat.frame.i_mb_count_p,
 622                  h->stat.frame.i_mb_count_skip);
 623     }
 624
 625     if( !h->param.rc.b_cbr || h->param.rc.b_stat_read )
 626         return;
 627
 628     rc->buffer_fullness += rc->rcbufrate - bits;
 629     if(rc->buffer_fullness < 0){
 630         x264_log(h, X264_LOG_WARNING, "buffer underflow %i\n",
 631                  rc->buffer_fullness);
 632         rc->buffer_fullness = 0;
 633     }
 634
 635     rc->qpa = (float)rc->qps / rc->mb;
 636     if(rc->slice_type == SLICE_TYPE_P){
 637         rc->qp_avg_p += rc->qpa;
 638         rc->qp_last_p = rc->qpa;
 639         rc->pframes++;
 640     } else if(rc->slice_type == SLICE_TYPE_I){
 641         float err = (float) rc->ufbits / rc->fbits;
 642         if(err > 1.1)
 643             rc->gop_qp++;
 644         else if(err < 0.9)
 645             rc->gop_qp--;
 646     }
 647
 648     rc->overhead = bits - rc->ufbits;
 649
 650     x264_log(h, X264_LOG_DEBUG, "bits=%i, qp=%.1f, z=%i, zr=%6.3f, buf=%i\n",
 651              bits, rc->qpa, rc->nzcoeffs, (float) rc->nzcoeffs / rc->ncoeffs,
 652              rc->buffer_fullness);
 653
 654     rc->bits_last_gop += bits;
 655     rc->frames++;
 656     rc->mb = 0;
 657 }
 658
 659 /****************************************************************************
 660  * 2 pass functions
 661  ***************************************************************************/
 662 double x264_eval( char *s, double *const_value, const char **const_name,
 663                   double (**func1)(void *, double), const char **func1_name,
 664                   double (**func2)(void *, double, double), char **func2_name,
 665                   void *opaque );
 666
 667 /**
 668  * modifies the bitrate curve from pass1 for one frame
 669  */
 670 static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor)
 671 {
 672     x264_ratecontrol_t *rcc= h->rc;
 673     const int pict_type = rce->pict_type;
 674     double q;
 675
 676     double const_values[]={
 677         rce->i_tex_bits * rce->qscale,
 678         rce->p_tex_bits * rce->qscale,
 679         (rce->i_tex_bits + rce->p_tex_bits) * rce->qscale,
 680         rce->mv_bits * rce->qscale,
 681         (double)rce->i_count / rcc->nmb,
 682         (double)rce->p_count / rcc->nmb,
 683         (double)rce->s_count / rcc->nmb,
 684         rce->pict_type == SLICE_TYPE_I,
 685         rce->pict_type == SLICE_TYPE_P,
 686         rce->pict_type == SLICE_TYPE_B,
 687         h->param.rc.f_qcompress,
 688         rcc->i_cplx_sum[SLICE_TYPE_I] / rcc->frame_count[SLICE_TYPE_I],
 689         rcc->i_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
 690         rcc->p_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
 691         rcc->p_cplx_sum[SLICE_TYPE_B] / rcc->frame_count[SLICE_TYPE_B],
 692         (rcc->i_cplx_sum[pict_type] + rcc->p_cplx_sum[pict_type]) / rcc->frame_count[pict_type],
 693         rce->blurred_complexity,
 694         0
 695     };
 696     static const char *const_names[]={
 697         "iTex",
 698         "pTex",
 699         "tex",
 700         "mv",
 701         "iCount",
 702         "pCount",
 703         "sCount",
 704         "isI",
 705         "isP",
 706         "isB",
 707         "qComp",
 708         "avgIITex",
 709         "avgPITex",
 710         "avgPPTex",
 711         "avgBPTex",
 712         "avgTex",
 713         "blurCplx",
 714         NULL
 715     };
 716     static double (*func1[])(void *, double)={
 717 //      (void *)bits2qscale,
 718         (void *)qscale2bits,
 719         NULL
 720     };
 721     static const char *func1_names[]={
 722 //      "bits2qp",
 723         "qp2bits",
 724         NULL
 725     };
 726
 727     q = x264_eval((char*)h->param.rc.psz_rc_eq, const_values, const_names, func1, func1_names, NULL, NULL, rce);
 728     q /= rate_factor;
 729
 730     // avoid NaN's in the rc_eq
 731     if(q != q || rce->i_tex_bits + rce->p_tex_bits + rce->mv_bits == 0)
 732         q = rcc->last_qscale;
 733     else
 734         rcc->last_qscale = q;
 735
 736     return q;
 737 }
 738
 739 static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
 740 {
 741     x264_ratecontrol_t *rcc = h->rc;
 742     const int pict_type = rce->pict_type;
 743
 744     // force I/B quants as a function of P quants
 745     const double last_p_q    = rcc->last_qscale_for[SLICE_TYPE_P];
 746     const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type];
 747     if( pict_type == SLICE_TYPE_I )
 748     {
 749         double iq = q;
 750         double pq = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
 751         double ip_factor = fabs( h->param.rc.f_ip_factor );
 752         /* don't apply ip_factor if the following frame is also I */
 753         if( rcc->accum_p_norm <= 0 )
 754             q = iq;
 755         else if( h->param.rc.f_ip_factor < 0 )
 756             q = iq / ip_factor;
 757         else if( rcc->accum_p_norm >= 1 )
 758             q = pq / ip_factor;
 759         else
 760             q = rcc->accum_p_norm * pq / ip_factor + (1 - rcc->accum_p_norm) * iq;
 761     }
 762     else if( pict_type == SLICE_TYPE_B )
 763     {
 764         if( h->param.rc.f_pb_factor > 0 )
 765             q = last_non_b_q;
 766         q *= fabs( h->param.rc.f_pb_factor );
 767     }
 768     else if( pict_type == SLICE_TYPE_P
 769              && rcc->last_non_b_pict_type == SLICE_TYPE_P
 770              && rce->i_tex_bits + rce->p_tex_bits == 0 )
 771     {
 772         q = last_p_q;
 773     }
 774
 775     /* last qscale / qdiff stuff */
 776     /* TODO take intro account whether the I-frame is a scene cut
 777      * or just a seek point */
 778     if(rcc->last_non_b_pict_type==pict_type
 779        && (pict_type!=SLICE_TYPE_I || rcc->last_accum_p_norm < 1))
 780     {
 781         double last_q = rcc->last_qscale_for[pict_type];
 782         double max_qscale = last_q * rcc->lstep;
 783         double min_qscale = last_q / rcc->lstep;
 784
 785         if     (q > max_qscale) q = max_qscale;
 786         else if(q < min_qscale) q = min_qscale;
 787     }
 788
 789     rcc->last_qscale_for[pict_type] = q;
 790     if(pict_type!=SLICE_TYPE_B)
 791         rcc->last_non_b_pict_type = pict_type;
 792     if(pict_type==SLICE_TYPE_I)
 793     {
 794         rcc->last_accum_p_norm = rcc->accum_p_norm;
 795         rcc->accum_p_norm = 0;
 796         rcc->accum_p_qp = 0;
 797     }
 798     if(pict_type==SLICE_TYPE_P)
 799     {
 800         float mask = 1 - pow( (float)rce->i_count / rcc->nmb, 2 );
 801         rcc->accum_p_qp   = mask * (qscale2qp(q) + rcc->accum_p_qp);
 802         rcc->accum_p_norm = mask * (1 + rcc->accum_p_norm);
 803     }
 804     return q;
 805 }
 806
 807 // clip a qscale to between lmin and lmax
 808 static double clip_qscale( x264_t *h, ratecontrol_entry_t *rce, double q )
 809 {
 810     double lmin = h->rc->lmin[rce->pict_type];
 811     double lmax = h->rc->lmax[rce->pict_type];
 812
 813     if(lmin==lmax){
 814         return lmin;
 815     }else{
 816         double min2 = log(lmin);
 817         double max2 = log(lmax);
 818         q = (log(q) - min2)/(max2-min2) - 0.5;
 819         q = 1.0/(1.0 + exp(-4*q));
 820         q = q*(max2-min2) + min2;
 821         return exp(q);
 822     }
 823 }
 824
 825 // update qscale for 1 frame based on actual bits used so far
 826 static float rate_estimate_qscale(x264_t *h, int pict_type)
 827 {
 828     float q;
 829     float br_compensation;
 830     double diff;
 831     int picture_number = h->fenc->i_frame;
 832     x264_ratecontrol_t *rcc = h->rc;
 833     ratecontrol_entry_t *rce;
 834     double lmin = rcc->lmin[pict_type];
 835     double lmax = rcc->lmax[pict_type];
 836     int64_t total_bits = 8*(h->stat.i_slice_size[SLICE_TYPE_I]
 837                           + h->stat.i_slice_size[SLICE_TYPE_P]
 838                           + h->stat.i_slice_size[SLICE_TYPE_B]);
 839
 840 //printf("input_pic_num:%d pic_num:%d frame_rate:%d\n", s->input_picture_number, s->picture_number, s->frame_rate);
 841
 842     rce = &rcc->entry[picture_number];
 843
 844     assert(pict_type == rce->pict_type);
 845
 846     if(rce->pict_type == SLICE_TYPE_B)
 847     {
 848         return rcc->last_qscale * h->param.rc.f_pb_factor;
 849     }
 850     else
 851     {
 852         diff = (int64_t)total_bits - (int64_t)rce->expected_bits;
 853         br_compensation = (rcc->buffer_size - diff) / rcc->buffer_size;
 854         br_compensation = x264_clip3f(br_compensation, .5, 2);
 855
 856         q = rce->new_qscale / br_compensation;
 857         q = x264_clip3f(q, lmin, lmax);
 858         rcc->last_qscale = q;
 859         return q;
 860     }
 861 }
 862
 863 static int init_pass2( x264_t *h )
 864 {
 865     x264_ratecontrol_t *rcc = h->rc;
 866     uint64_t all_const_bits = 0;
 867     uint64_t all_available_bits = (uint64_t)(h->param.rc.i_bitrate * 1000 * (double)rcc->num_entries / rcc->fps);
 868     double rate_factor, step, step_mult;
 869     double qblur = h->param.rc.f_qblur;
 870     double cplxblur = h->param.rc.f_complexity_blur;
 871     const int filter_size = (int)(qblur*4) | 1;
 872     double expected_bits;
 873     double *qscale, *blurred_qscale;
 874     int i;
 875
 876     /* find total/average complexity & const_bits */
 877     for(i=0; i<rcc->num_entries; i++){
 878         ratecontrol_entry_t *rce = &rcc->entry[i];
 879         all_const_bits += rce->misc_bits;
 880         rcc->i_cplx_sum[rce->pict_type] += rce->i_tex_bits * rce->qscale;
 881         rcc->p_cplx_sum[rce->pict_type] += rce->p_tex_bits * rce->qscale;
 882         rcc->mv_bits_sum[rce->pict_type] += rce->mv_bits * rce->qscale;
 883         rcc->frame_count[rce->pict_type] ++;
 884     }
 885
 886     if( all_available_bits < all_const_bits)
 887     {
 888         x264_log(h, X264_LOG_ERROR, "requested bitrate is too low. estimated minimum is %d kbps\n",
 889                  (int)(all_const_bits * rcc->fps / (rcc->num_entries * 1000)));
 890         return -1;
 891     }
 892
 893     for(i=0; i<rcc->num_entries; i++){
 894         ratecontrol_entry_t *rce = &rcc->entry[i];
 895         double weight_sum = 0;
 896         double cplx_sum = 0;
 897         double weight = 1.0;
 898         int j;
 899         /* weighted average of cplx of future frames */
 900         for(j=1; j<cplxblur*2 && j<rcc->num_entries-i; j++){
 901             ratecontrol_entry_t *rcj = &rcc->entry[i+j];
 902             weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
 903             if(weight < .0001)
 904                 break;
 905             weight_sum += weight;
 906             cplx_sum += weight * qscale2bits(rcj, 1);
 907         }
 908         /* weighted average of cplx of past frames */
 909         weight = 1.0;
 910         for(j=0; j<cplxblur*2 && j<=i; j++){
 911             ratecontrol_entry_t *rcj = &rcc->entry[i-j];
 912             weight_sum += weight;
 913             cplx_sum += weight * qscale2bits(rcj, 1);
 914             weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
 915             if(weight < .0001)
 916                 break;
 917         }
 918         rce->blurred_complexity = cplx_sum / weight_sum;
 919     }
 920
 921     qscale = x264_malloc(sizeof(double)*rcc->num_entries);
 922     if(filter_size > 1)
 923         blurred_qscale = x264_malloc(sizeof(double)*rcc->num_entries);
 924     else
 925         blurred_qscale = qscale;
 926
 927     expected_bits = 1;
 928     for(i=0; i<rcc->num_entries; i++)
 929         expected_bits += qscale2bits(&rcc->entry[i], get_qscale(h, &rcc->entry[i], 1.0));
 930     step_mult = all_available_bits / expected_bits;
 931
 932     rate_factor = 0;
 933     for(step = 1E4 * step_mult; step > 1E-7 * step_mult; step *= 0.5){
 934         expected_bits = 0;
 935         rate_factor += step;
 936
 937         rcc->last_non_b_pict_type = -1;
 938         rcc->last_accum_p_norm = 1;
 939
 940         /* find qscale */
 941         for(i=0; i<rcc->num_entries; i++){
 942             qscale[i] = get_qscale(h, &rcc->entry[i], rate_factor);
 943         }
 944
 945         /* fixed I/B QP relative to P mode */
 946         for(i=rcc->num_entries-1; i>=0; i--){
 947             qscale[i] = get_diff_limited_q(h, &rcc->entry[i], qscale[i]);
 948             assert(qscale[i] >= 0);
 949         }
 950
 951         /* smooth curve */
 952         if(filter_size > 1){
 953             assert(filter_size%2==1);
 954             for(i=0; i<rcc->num_entries; i++){
 955                 ratecontrol_entry_t *rce = &rcc->entry[i];
 956                 int j;
 957                 double q=0.0, sum=0.0;
 958
 959                 for(j=0; j<filter_size; j++){
 960                     int index = i+j-filter_size/2;
 961                     double d = index-i;
 962                     double coeff = qblur==0 ? 1.0 : exp(-d*d/(qblur*qblur));
 963                     if(index < 0 || index >= rcc->num_entries) continue;
 964                     if(rce->pict_type != rcc->entry[index].pict_type) continue;
 965                     q += qscale[index] * coeff;
 966                     sum += coeff;
 967                 }
 968                 blurred_qscale[i] = q/sum;
 969             }
 970         }
 971
 972         /* find expected bits */
 973         for(i=0; i<rcc->num_entries; i++){
 974             ratecontrol_entry_t *rce = &rcc->entry[i];
 975             double bits;
 976             rce->new_qscale = clip_qscale(h, rce, blurred_qscale[i]);
 977             assert(rce->new_qscale >= 0);
 978             bits = qscale2bits(rce, rce->new_qscale) + rce->misc_bits;
 979
 980             rce->expected_bits = expected_bits;
 981             expected_bits += bits;
 982         }
 983
 984 //printf("expected:%llu available:%llu factor:%lf avgQ:%lf\n", (uint64_t)expected_bits, all_available_bits, rate_factor);
 985         if(expected_bits > all_available_bits) rate_factor -= step;
 986     }
 987
 988     x264_free(qscale);
 989     if(filter_size > 1)
 990         x264_free(blurred_qscale);
 991
 992     if(fabs(expected_bits/all_available_bits - 1.0) > 0.01)
 993     {
 994         double avgq = 0;
 995         for(i=0; i<rcc->num_entries; i++)
 996             avgq += rcc->entry[i].new_qscale;
 997         avgq = qscale2qp(avgq / rcc->num_entries);
 998
 999         x264_log(h, X264_LOG_ERROR, "Error: 2pass curve failed to converge\n");
1000         x264_log(h, X264_LOG_ERROR, "target: %.2f kbit/s, got: %.2f kbit/s, avg QP: %.4f\n",
1001                  (float)h->param.rc.i_bitrate,
1002                  expected_bits * rcc->fps / (rcc->num_entries * 1000.),
1003                  avgq);
1004         if(expected_bits < all_available_bits && avgq < h->param.rc.i_qp_min + 2)
1005             x264_log(h, X264_LOG_ERROR, "try reducing target bitrate or reducing qp_min (currently %d)\n", h->param.rc.i_qp_min);
1006         else if(expected_bits > all_available_bits && avgq > h->param.rc.i_qp_max - 2)
1007         {
1008             if(h->param.rc.i_qp_max < 51)
1009                 x264_log(h, X264_LOG_ERROR, "try increasing target bitrate or increasing qp_max (currently %d)\n", h->param.rc.i_qp_max);
1010             else
1011                 x264_log(h, X264_LOG_ERROR, "try increasing target bitrate\n");
1012         }
1013         else
1014             x264_log(h, X264_LOG_ERROR, "internal error\n");
1015         return -1;
1016     }
1017
1018     return 0;
1019 }
1020
1021