git.sesse.net Git - x264/blob - encoder/ratecontrol.c

   1 /***************************************************-*- coding: iso-8859-1 -*-
   2  * ratecontrol.c: h264 encoder library (Rate Control)
   3  *****************************************************************************
   4  * Copyright (C) 2003 Laurent Aimar
   5  * $Id: ratecontrol.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
   6  *
   7  * Authors: Måns Rullgård <mru@mru.ath.cx>
   8  * 2 pass code: Michael Niedermayer <michaelni@gmx.at>
   9  *              Loren Merritt <lorenm@u.washington.edu>
  10  *
  11  * This program is free software; you can redistribute it and/or modify
  12  * it under the terms of the GNU General Public License as published by
  13  * the Free Software Foundation; either version 2 of the License, or
  14  * (at your option) any later version.
  15  *
  16  * This program is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19  * GNU General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU General Public License
  22  * along with this program; if not, write to the Free Software
  23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  24  *****************************************************************************/
  25
  26 #define _ISOC99_SOURCE
  27 #undef NDEBUG // always check asserts, the speed effect is far too small to disable them
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <string.h>
  31 #include <math.h>
  32 #include <limits.h>
  33 #include <assert.h>
  34
  35 #include "common/common.h"
  36 #include "common/cpu.h"
  37 #include "common/macroblock.h"
  38 #include "ratecontrol.h"
  39
  40 #if defined(SYS_FREEBSD) || defined(SYS_BEOS)
  41 #define exp2f(x) powf( 2, (x) )
  42 #endif
  43 #ifdef _MSC_VER
  44 #define exp2f(x) pow( 2, (x) )
  45 #define sqrtf sqrt
  46 #endif
  47 #ifdef WIN32 // POSIX says that rename() removes the destination, but win32 doesn't.
  48 #define rename(src,dst) (unlink(dst), rename(src,dst))
  49 #endif
  50
  51 typedef struct
  52 {
  53     int pict_type;
  54     int kept_as_ref;
  55     float qscale;
  56     int mv_bits;
  57     int i_tex_bits;
  58     int p_tex_bits;
  59     int misc_bits;
  60     uint64_t expected_bits;
  61     float new_qscale;
  62     int new_qp;
  63     int i_count;
  64     int p_count;
  65     int s_count;
  66     float blurred_complexity;
  67 } ratecontrol_entry_t;
  68
  69 struct x264_ratecontrol_t
  70 {
  71     /* constants */
  72     double fps;
  73     int gop_size;
  74     int bitrate;
  75     int nmb;                    /* number of macroblocks in a frame */
  76     int buffer_size;
  77     int rcbufrate;
  78     int init_qp;
  79     int qp_constant[5];
  80
  81     /* 1st pass stuff */
  82     int gop_qp;
  83     int buffer_fullness;
  84     int frames;                 /* frames in current gop */
  85     int pframes;
  86     int slice_type;
  87     int mb;                     /* MBs processed in current frame */
  88     int bits_gop;               /* allocated bits current gop */
  89     int bits_last_gop;          /* bits consumed in gop */
  90     int qp;                     /* qp for current frame */
  91     int qpm;                    /* qp for next MB */
  92     float qpa;                  /* average qp for last frame */
  93     int qps;
  94     float qp_avg_p;             /* average QP for P frames */
  95     float qp_last_p;
  96     int fbits;                  /* bits allocated for current frame */
  97     int ufbits;                 /* bits used for current frame */
  98     int nzcoeffs;               /* # of 0-quantized coefficients */
  99     int ncoeffs;                /* total # of coefficients */
 100     int overhead;
 101     int qp_force;
 102
 103     /* 2pass stuff */
 104     FILE *p_stat_file_out;
 105     char *psz_stat_file_tmpname;
 106
 107     int num_entries;            /* number of ratecontrol_entry_ts */
 108     ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
 109     double last_qscale;
 110     double last_qscale_for[5];  /* last qscale for a specific pict type, used for max_diff & ipb factor stuff  */
 111     int last_non_b_pict_type;
 112     double accum_p_qp;          /* for determining I-frame quant */
 113     double accum_p_norm;
 114     double last_accum_p_norm;
 115     double lmin[5];             /* min qscale by frame type */
 116     double lmax[5];
 117     double lstep;               /* max change (multiply) in qscale per frame */
 118     double i_cplx_sum[5];       /* estimated total texture bits in intra MBs at qscale=1 */
 119     double p_cplx_sum[5];
 120     double mv_bits_sum[5];
 121     int frame_count[5];         /* number of frames of each type */
 122 };
 123
 124
 125 static int init_pass2(x264_t *);
 126 static float rate_estimate_qscale( x264_t *h, int pict_type );
 127
 128 /* Terminology:
 129  * qp = h.264's quantizer
 130  * qscale = linearized quantizer = Lagrange multiplier
 131  */
 132 static inline double qp2qscale(double qp)
 133 {
 134     return 0.85 * pow(2.0, ( qp - 12.0 ) / 6.0);
 135 }
 136 static inline double qscale2qp(double qscale)
 137 {
 138     return 12.0 + 6.0 * log(qscale/0.85) / log(2.0);
 139 }
 140
 141 /* Texture bitrate is not quite inversely proportional to qscale,
 142  * probably due the the changing number of SKIP blocks.
 143  * MV bits level off at about qp<=12, because the lambda used
 144  * for motion estimation is constant there. */
 145 static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale)
 146 {
 147     if(qscale<0.1)
 148         qscale = 0.1;
 149     return (rce->i_tex_bits + rce->p_tex_bits + .1) * pow( rce->qscale / qscale, 1.1 )
 150            + rce->mv_bits * pow( X264_MAX(rce->qscale, 12) / X264_MAX(qscale, 12), 0.5 );
 151 }
 152
 153 /* There is no analytical inverse to the above formula. */
 154 #if 0
 155 static inline double bits2qscale(ratecontrol_entry_t *rce, double bits)
 156 {
 157     if(bits<1.0)
 158         bits = 1.0;
 159     return (rce->i_tex_bits + rce->p_tex_bits + rce->mv_bits + .1) * rce->qscale / bits;
 160 }
 161 #endif
 162
 163
 164 int x264_ratecontrol_new( x264_t *h )
 165 {
 166     x264_ratecontrol_t *rc;
 167     float bpp;
 168     int i;
 169
 170     /* Needed(?) for 2 pass */
 171     x264_cpu_restore( h->param.cpu );
 172
 173     h->rc = rc = x264_malloc( sizeof( x264_ratecontrol_t ) );
 174     memset(rc, 0, sizeof(*rc));
 175
 176     /* FIXME: use integers */
 177     if(h->param.i_fps_num > 0 && h->param.i_fps_den > 0)
 178         rc->fps = (float) h->param.i_fps_num / h->param.i_fps_den;
 179     else
 180         rc->fps = 25.0;
 181
 182     rc->gop_size = h->param.i_keyint_max;
 183     rc->bitrate = h->param.rc.i_bitrate * 1000;
 184     rc->nmb = h->mb.i_mb_count;
 185
 186     rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
 187     rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
 188     rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
 189
 190     /* Currently there is no adaptive quant, and per-MB ratecontrol is used only in CBR. */
 191     h->mb.b_variable_qp = h->param.rc.b_cbr && !h->param.rc.b_stat_read;
 192
 193     /* Init 1pass CBR algo */
 194     if( h->param.rc.b_cbr ){
 195         rc->buffer_size = h->param.rc.i_rc_buffer_size * 1000;
 196         rc->buffer_fullness = h->param.rc.i_rc_init_buffer;
 197         rc->rcbufrate = rc->bitrate / rc->fps;
 198
 199         if(rc->buffer_size < rc->rcbufrate){
 200             x264_log(h, X264_LOG_WARNING, "rc buffer size %i too small\n",
 201                      rc->buffer_size);
 202             rc->buffer_size = 0;
 203         }
 204
 205         if(rc->buffer_size <= 0)
 206             rc->buffer_size = rc->bitrate / 2;
 207
 208         if(rc->buffer_fullness > rc->buffer_size || rc->buffer_fullness < 0){
 209             x264_log(h, X264_LOG_WARNING, "invalid initial buffer fullness %i\n",
 210                      rc->buffer_fullness);
 211             rc->buffer_fullness = 0;
 212         }
 213
 214         bpp = rc->bitrate / (rc->fps * h->param.i_width * h->param.i_height);
 215         if(bpp <= 0.6)
 216             rc->init_qp = 31;
 217         else if(bpp <= 1.4)
 218             rc->init_qp = 25;
 219         else if(bpp <= 2.4)
 220             rc->init_qp = 20;
 221         else
 222             rc->init_qp = 10;
 223         rc->gop_qp = rc->init_qp;
 224
 225         rc->bits_last_gop = 0;
 226
 227         x264_log(h, X264_LOG_DEBUG, "%f fps, %i bps, bufsize %i\n",
 228                  rc->fps, rc->bitrate, rc->buffer_size);
 229     }
 230
 231
 232     rc->lstep = exp2f(h->param.rc.i_qp_step / 6.0);
 233     rc->last_qscale = qp2qscale(26);
 234     for( i = 0; i < 5; i++ )
 235     {
 236         rc->last_qscale_for[i] = qp2qscale(26);
 237         rc->lmin[i] = qp2qscale( h->param.rc.i_qp_min );
 238         rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max );
 239     }
 240 #if 0 // FIXME: do we want to assign lmin/lmax based on ip_factor, or leave them all the same?
 241     rc->lmin[SLICE_TYPE_I] /= fabs(h->param.f_ip_factor);
 242     rc->lmax[SLICE_TYPE_I] /= fabs(h->param.f_ip_factor);
 243     rc->lmin[SLICE_TYPE_B] *= fabs(h->param.f_pb_factor);
 244     rc->lmax[SLICE_TYPE_B] *= fabs(h->param.f_pb_factor);
 245 #endif
 246
 247     /* Load stat file and init 2pass algo */
 248     if( h->param.rc.b_stat_read )
 249     {
 250         int stats_size;
 251         char *p, *stats_in;
 252         FILE *stats_file;
 253
 254         /* read 1st pass stats */
 255         assert( h->param.rc.psz_stat_in );
 256         stats_file = fopen( h->param.rc.psz_stat_in, "rb");
 257         if(!stats_file)
 258         {
 259             x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
 260             return -1;
 261         }
 262         // FIXME: error checking
 263         fseek(stats_file, 0, SEEK_END);
 264         stats_size = ftell(stats_file);
 265         fseek(stats_file, 0, SEEK_SET);
 266         stats_in = x264_malloc(stats_size+10);
 267         fread(stats_in, 1, stats_size, stats_file);
 268         fclose(stats_file);
 269
 270         /* find number of pics */
 271         p = stats_in;
 272         for(i=-1; p; i++){
 273             p = strchr(p+1, ';');
 274         }
 275         i += h->param.i_bframe;
 276         rc->entry = (ratecontrol_entry_t*) x264_malloc(i*sizeof(ratecontrol_entry_t));
 277         memset(rc->entry, 0, i*sizeof(ratecontrol_entry_t));
 278         rc->num_entries= i;
 279
 280         /* init all to skipped p frames */
 281         for(i=0; i<rc->num_entries; i++){
 282             ratecontrol_entry_t *rce = &rc->entry[i];
 283             rce->pict_type = SLICE_TYPE_P;
 284             rce->qscale = rce->new_qscale = qp2qscale(20);
 285             rce->misc_bits = rc->nmb + 10;
 286             rce->new_qp = 0;
 287         }
 288
 289         /* read stats */
 290         p = stats_in;
 291         for(i=0; i < rc->num_entries - h->param.i_bframe; i++){
 292             ratecontrol_entry_t *rce;
 293             int frame_number;
 294             char pict_type;
 295             int e;
 296             char *next;
 297             float qp;
 298
 299             next= strchr(p, ';');
 300             if(next){
 301                 (*next)=0; //sscanf is unbelievably slow on looong strings
 302                 next++;
 303             }
 304             e = sscanf(p, " in:%d ", &frame_number);
 305
 306             assert(frame_number >= 0);
 307             assert(frame_number < rc->num_entries);
 308             rce = &rc->entry[frame_number];
 309
 310             e += sscanf(p, " in:%*d out:%*d type:%c q:%f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d",
 311                    &pict_type, &qp, &rce->i_tex_bits, &rce->p_tex_bits,
 312                    &rce->mv_bits, &rce->misc_bits, &rce->i_count, &rce->p_count, &rce->s_count);
 313
 314             switch(pict_type){
 315                 case 'I': rce->kept_as_ref = 1;
 316                 case 'i': rce->pict_type = SLICE_TYPE_I; break;
 317                 case 'P': rce->pict_type = SLICE_TYPE_P; break;
 318                 case 'B': rce->kept_as_ref = 1;
 319                 case 'b': rce->pict_type = SLICE_TYPE_B; break;
 320                 default:  e = -1; break;
 321             }
 322             if(e != 10){
 323                 x264_log(h, X264_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e);
 324                 return -1;
 325             }
 326             rce->qscale = qp2qscale(qp);
 327             p = next;
 328         }
 329
 330         x264_free(stats_in);
 331
 332         /* If using 2pass with constant quant, no need to run the bitrate allocation */
 333         if(h->param.rc.b_cbr)
 334         {
 335             if(init_pass2(h) < 0) return -1;
 336         }
 337     }
 338
 339     /* Open output file */
 340     /* If input and output files are the same, output to a temp file
 341      * and move it to the real name only when it's complete */
 342     if( h->param.rc.b_stat_write )
 343     {
 344         rc->psz_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 6 );
 345         strcpy( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
 346         strcat( rc->psz_stat_file_tmpname, ".temp" );
 347
 348         rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
 349         if( rc->p_stat_file_out == NULL )
 350         {
 351             x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
 352             return -1;
 353         }
 354     }
 355
 356     return 0;
 357 }
 358
 359 void x264_ratecontrol_delete( x264_t *h )
 360 {
 361     x264_ratecontrol_t *rc = h->rc;
 362
 363     if( rc->p_stat_file_out )
 364     {
 365         fclose( rc->p_stat_file_out );
 366         if( h->i_frame >= rc->num_entries - h->param.i_bframe )
 367             if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
 368             {
 369                 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
 370                           rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
 371             }
 372         x264_free( rc->psz_stat_file_tmpname );
 373     }
 374     if( rc->entry )
 375         x264_free(rc->entry);
 376     x264_free( rc );
 377 }
 378
 379 void x264_ratecontrol_start( x264_t *h, int i_slice_type, int i_force_qp )
 380 {
 381     x264_ratecontrol_t *rc = h->rc;
 382     int gframes, iframes, pframes, bframes;
 383     int minbits, maxbits;
 384     int gbits, fbits;
 385     int zn = 0;
 386     float kp;
 387     int gbuf;
 388
 389     rc->slice_type = i_slice_type;
 390
 391     x264_cpu_restore( h->param.cpu );
 392
 393     rc->qp_force = i_force_qp;
 394
 395     if( !h->param.rc.b_cbr )
 396     {
 397         int q;
 398         if( i_force_qp )
 399             q = i_force_qp - 1;
 400         else if( i_slice_type == SLICE_TYPE_B && h->fdec->b_kept_as_ref )
 401             q = ( rc->qp_constant[ SLICE_TYPE_B ] + rc->qp_constant[ SLICE_TYPE_P ] ) / 2;
 402         else
 403             q = rc->qp_constant[ i_slice_type ];
 404         rc->qpm = rc->qpa = rc->qp = q;
 405         return;
 406     }
 407     else if( h->param.rc.b_stat_read )
 408     {
 409         int frame = h->fenc->i_frame;
 410         ratecontrol_entry_t *rce;
 411         assert( frame >= 0 && frame < rc->num_entries );
 412         rce = &h->rc->entry[frame];
 413
 414         rce->new_qscale = rate_estimate_qscale( h, i_slice_type );
 415         rc->qpm = rc->qpa = rc->qp = rce->new_qp =
 416             (int)(qscale2qp(rce->new_qscale) + 0.5);
 417         return;
 418     }
 419
 420     switch(i_slice_type){
 421     case SLICE_TYPE_I:
 422         gbuf = rc->buffer_fullness + (rc->gop_size-1) * rc->rcbufrate;
 423         rc->bits_gop = gbuf - rc->buffer_size / 2;
 424
 425         if(!rc->mb && rc->pframes){
 426             int qp = rc->qp_avg_p / rc->pframes + 0.5;
 427 #if 0 /* JM does this without explaining why */
 428             int gdq = (float) rc->gop_size / 15 + 0.5;
 429             if(gdq > 2)
 430                 gdq = 2;
 431             qp -= gdq;
 432             if(qp > rc->qp_last_p - 2)
 433                 qp--;
 434 #endif
 435             qp = x264_clip3(qp, rc->gop_qp - 4, rc->gop_qp + 4);
 436             qp = x264_clip3(qp, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
 437             rc->gop_qp = qp;
 438         } else if(rc->frames > 4){
 439             rc->gop_qp = rc->init_qp;
 440         }
 441
 442         kp = h->param.rc.f_ip_factor * h->param.rc.f_pb_factor;
 443
 444         x264_log(h, X264_LOG_DEBUG,"gbuf=%i bits_gop=%i frames=%i gop_qp=%i\n",
 445                  gbuf, rc->bits_gop, rc->frames, rc->gop_qp);
 446
 447         rc->bits_last_gop = 0;
 448         rc->frames = 0;
 449         rc->pframes = 0;
 450         rc->qp_avg_p = 0;
 451         break;
 452
 453     case SLICE_TYPE_P:
 454         kp = h->param.rc.f_pb_factor;
 455         break;
 456
 457     case SLICE_TYPE_B:
 458         kp = 1.0;
 459         break;
 460
 461     default:
 462         x264_log(h, X264_LOG_WARNING, "ratecontrol: unknown slice type %i\n",
 463                  i_slice_type);
 464         kp = 1.0;
 465         break;
 466     }
 467
 468     gframes = rc->gop_size - rc->frames;
 469     iframes = gframes / rc->gop_size;
 470     pframes = gframes / (h->param.i_bframe + 1) - iframes;
 471     bframes = gframes - pframes - iframes;
 472
 473     gbits = rc->bits_gop - rc->bits_last_gop;
 474     fbits = kp * gbits /
 475         (h->param.rc.f_ip_factor * h->param.rc.f_pb_factor * iframes +
 476          h->param.rc.f_pb_factor * pframes + bframes);
 477
 478     minbits = rc->buffer_fullness + rc->rcbufrate - rc->buffer_size;
 479     if(minbits < 0)
 480         minbits = 0;
 481     maxbits = rc->buffer_fullness;
 482     rc->fbits = x264_clip3(fbits, minbits, maxbits);
 483
 484     if(i_slice_type == SLICE_TYPE_I){
 485         rc->qp = rc->gop_qp;
 486     } else if(rc->ncoeffs && rc->ufbits){
 487         int dqp, nonzc;
 488
 489         nonzc = (rc->ncoeffs - rc->nzcoeffs);
 490         if(nonzc == 0)
 491             zn = rc->ncoeffs;
 492         else if(rc->fbits < INT_MAX / nonzc)
 493             zn = rc->ncoeffs - rc->fbits * nonzc / rc->ufbits;
 494         else
 495             zn = 0;
 496         zn = x264_clip3(zn, 0, rc->ncoeffs);
 497         dqp = h->param.rc.i_rc_sens * exp2f(rc->qpa / 6) *
 498             (zn - rc->nzcoeffs) / rc->nzcoeffs;
 499         dqp = x264_clip3(dqp, -h->param.rc.i_qp_step, h->param.rc.i_qp_step);
 500         rc->qp = (int)(rc->qpa + dqp + .5);
 501     }
 502
 503     if(rc->fbits > 0.9 * maxbits)
 504         rc->qp += 2;
 505     else if(rc->fbits > 0.8 * maxbits)
 506         rc->qp += 1;
 507     else if(rc->fbits < 1.1 * minbits)
 508         rc->qp -= 2;
 509     else if(rc->fbits < 1.2 * minbits)
 510         rc->qp -= 1;
 511
 512     if( i_force_qp > 0 ) {
 513         rc->qpm = rc->qpa = rc->qp = i_force_qp - 1;
 514     } else {
 515         rc->qp = rc->qpm =
 516             x264_clip3(rc->qp, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
 517     }
 518
 519     x264_log(h, X264_LOG_DEBUG, "fbits=%i, qp=%i, z=%i, min=%i, max=%i\n",
 520              rc->fbits, rc->qpm, zn, minbits, maxbits);
 521
 522     rc->fbits -= rc->overhead;
 523     rc->ufbits = 0;
 524     rc->ncoeffs = 0;
 525     rc->nzcoeffs = 0;
 526     rc->mb = 0;
 527     rc->qps = 0;
 528 }
 529
 530 void x264_ratecontrol_mb( x264_t *h, int bits )
 531 {
 532     x264_ratecontrol_t *rc = h->rc;
 533     int rbits;
 534     int zn, enz, nonz;
 535     int rcoeffs;
 536     int dqp;
 537     int i;
 538
 539     x264_cpu_restore( h->param.cpu );
 540
 541     rc->qps += rc->qpm;
 542     rc->ufbits += bits;
 543     rc->mb++;
 544
 545     for(i = 0; i < 16 + 8; i++)
 546         rc->nzcoeffs += 16 - h->mb.cache.non_zero_count[x264_scan8[i]];
 547     rc->ncoeffs += 16 * (16 + 8);
 548
 549     if(rc->mb < rc->nmb / 16)
 550         return;
 551     else if(rc->mb == rc->nmb)
 552         return;
 553     else if(rc->qp_force > 0)
 554         return;
 555
 556     rcoeffs = (rc->nmb - rc->mb) * 16 * 24;
 557     rbits = rc->fbits - rc->ufbits;
 558 /*     if(rbits < 0) */
 559 /*      rbits = 0; */
 560
 561 /*     zn = (rc->nmb - rc->mb) * 16 * 24; */
 562     nonz = (rc->ncoeffs - rc->nzcoeffs);
 563     if(nonz == 0)
 564         zn = rcoeffs;
 565     else if(rc->ufbits && rbits < INT_MAX / nonz)
 566         zn = rcoeffs - rbits * nonz / rc->ufbits;
 567     else
 568         zn = 0;
 569     zn = x264_clip3(zn, 0, rcoeffs);
 570     enz = rc->nzcoeffs * (rc->nmb - rc->mb) / rc->mb;
 571     dqp = (float) 2*h->param.rc.i_rc_sens * exp2f((float) rc->qps / rc->mb / 6) *
 572         (zn - enz) / enz;
 573     rc->qpm = x264_clip3(rc->qpm + dqp, rc->qp - 3, rc->qp + 3);
 574     if(rbits <= 0)
 575         rc->qpm++;
 576     rc->qpm = x264_clip3(rc->qpm, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
 577 }
 578
 579 int  x264_ratecontrol_qp( x264_t *h )
 580 {
 581     return h->rc->qpm;
 582 }
 583
 584 int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
 585 {
 586     if( h->param.rc.b_stat_read )
 587     {
 588         if( frame_num >= h->rc->num_entries )
 589         {
 590             x264_log(h, X264_LOG_ERROR, "More input frames than in the 1st pass\n");
 591             return X264_TYPE_P;
 592         }
 593         switch( h->rc->entry[frame_num].pict_type )
 594         {
 595             case SLICE_TYPE_I:
 596                 return h->rc->entry[frame_num].kept_as_ref ? X264_TYPE_IDR : X264_TYPE_I;
 597
 598             case SLICE_TYPE_B:
 599                 return h->rc->entry[frame_num].kept_as_ref ? X264_TYPE_BREF : X264_TYPE_B;
 600
 601             case SLICE_TYPE_P:
 602             default:
 603                 return X264_TYPE_P;
 604         }
 605     }
 606     else
 607     {
 608         return X264_TYPE_AUTO;
 609     }
 610 }
 611
 612 void x264_ratecontrol_end( x264_t *h, int bits )
 613 {
 614     x264_ratecontrol_t *rc = h->rc;
 615     int i;
 616
 617     x264_cpu_restore( h->param.cpu );
 618
 619     h->stat.frame.i_mb_count_skip = h->stat.frame.i_mb_count[P_SKIP] + h->stat.frame.i_mb_count[B_SKIP];
 620     h->stat.frame.i_mb_count_p = h->stat.frame.i_mb_count[P_L0] + h->stat.frame.i_mb_count[P_8x8];
 621     for( i = B_DIRECT; i < B_8x8; i++ )
 622         h->stat.frame.i_mb_count_p += h->stat.frame.i_mb_count[i];
 623
 624     if( h->param.rc.b_stat_write )
 625     {
 626         char c_type = rc->slice_type==SLICE_TYPE_I ? (h->fenc->i_poc==0 ? 'I' : 'i')
 627                     : rc->slice_type==SLICE_TYPE_P ? 'P'
 628                     : h->fenc->b_kept_as_ref ? 'B' : 'b';
 629         fprintf( rc->p_stat_file_out,
 630                  "in:%d out:%d type:%c q:%.3f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d;\n",
 631                  h->fenc->i_frame, h->i_frame-1,
 632                  c_type, rc->qpa,
 633                  h->stat.frame.i_itex_bits, h->stat.frame.i_ptex_bits,
 634                  h->stat.frame.i_hdr_bits, h->stat.frame.i_misc_bits,
 635                  h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_16x16],
 636                  h->stat.frame.i_mb_count_p,
 637                  h->stat.frame.i_mb_count_skip);
 638     }
 639
 640     if( !h->param.rc.b_cbr || h->param.rc.b_stat_read )
 641         return;
 642
 643     rc->buffer_fullness += rc->rcbufrate - bits;
 644     if(rc->buffer_fullness < 0){
 645         x264_log(h, X264_LOG_WARNING, "buffer underflow %i\n",
 646                  rc->buffer_fullness);
 647         rc->buffer_fullness = 0;
 648     }
 649
 650     rc->qpa = (float)rc->qps / rc->mb;
 651     if(rc->slice_type == SLICE_TYPE_P){
 652         rc->qp_avg_p += rc->qpa;
 653         rc->qp_last_p = rc->qpa;
 654         rc->pframes++;
 655     } else if(rc->slice_type == SLICE_TYPE_I){
 656         float err = (float) rc->ufbits / rc->fbits;
 657         if(err > 1.1)
 658             rc->gop_qp++;
 659         else if(err < 0.9)
 660             rc->gop_qp--;
 661     }
 662
 663     rc->overhead = bits - rc->ufbits;
 664
 665     x264_log(h, X264_LOG_DEBUG, "bits=%i, qp=%.1f, z=%i, zr=%6.3f, buf=%i\n",
 666              bits, rc->qpa, rc->nzcoeffs, (float) rc->nzcoeffs / rc->ncoeffs,
 667              rc->buffer_fullness);
 668
 669     rc->bits_last_gop += bits;
 670     rc->frames++;
 671     rc->mb = 0;
 672 }
 673
 674 /****************************************************************************
 675  * 2 pass functions
 676  ***************************************************************************/
 677 double x264_eval( char *s, double *const_value, const char **const_name,
 678                   double (**func1)(void *, double), const char **func1_name,
 679                   double (**func2)(void *, double, double), char **func2_name,
 680                   void *opaque );
 681
 682 /**
 683  * modifies the bitrate curve from pass1 for one frame
 684  */
 685 static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor)
 686 {
 687     x264_ratecontrol_t *rcc= h->rc;
 688     const int pict_type = rce->pict_type;
 689     double q;
 690
 691     double const_values[]={
 692         rce->i_tex_bits * rce->qscale,
 693         rce->p_tex_bits * rce->qscale,
 694         (rce->i_tex_bits + rce->p_tex_bits) * rce->qscale,
 695         rce->mv_bits * rce->qscale,
 696         (double)rce->i_count / rcc->nmb,
 697         (double)rce->p_count / rcc->nmb,
 698         (double)rce->s_count / rcc->nmb,
 699         rce->pict_type == SLICE_TYPE_I,
 700         rce->pict_type == SLICE_TYPE_P,
 701         rce->pict_type == SLICE_TYPE_B,
 702         h->param.rc.f_qcompress,
 703         rcc->i_cplx_sum[SLICE_TYPE_I] / rcc->frame_count[SLICE_TYPE_I],
 704         rcc->i_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
 705         rcc->p_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
 706         rcc->p_cplx_sum[SLICE_TYPE_B] / rcc->frame_count[SLICE_TYPE_B],
 707         (rcc->i_cplx_sum[pict_type] + rcc->p_cplx_sum[pict_type]) / rcc->frame_count[pict_type],
 708         rce->blurred_complexity,
 709         0
 710     };
 711     static const char *const_names[]={
 712         "iTex",
 713         "pTex",
 714         "tex",
 715         "mv",
 716         "iCount",
 717         "pCount",
 718         "sCount",
 719         "isI",
 720         "isP",
 721         "isB",
 722         "qComp",
 723         "avgIITex",
 724         "avgPITex",
 725         "avgPPTex",
 726         "avgBPTex",
 727         "avgTex",
 728         "blurCplx",
 729         NULL
 730     };
 731     static double (*func1[])(void *, double)={
 732 //      (void *)bits2qscale,
 733         (void *)qscale2bits,
 734         NULL
 735     };
 736     static const char *func1_names[]={
 737 //      "bits2qp",
 738         "qp2bits",
 739         NULL
 740     };
 741
 742     q = x264_eval((char*)h->param.rc.psz_rc_eq, const_values, const_names, func1, func1_names, NULL, NULL, rce);
 743     q /= rate_factor;
 744
 745     // avoid NaN's in the rc_eq
 746     if(q != q || rce->i_tex_bits + rce->p_tex_bits + rce->mv_bits == 0)
 747         q = rcc->last_qscale;
 748     else
 749         rcc->last_qscale = q;
 750
 751     return q;
 752 }
 753
 754 static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
 755 {
 756     x264_ratecontrol_t *rcc = h->rc;
 757     const int pict_type = rce->pict_type;
 758
 759     // force I/B quants as a function of P quants
 760     const double last_p_q    = rcc->last_qscale_for[SLICE_TYPE_P];
 761     const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type];
 762     if( pict_type == SLICE_TYPE_I )
 763     {
 764         double iq = q;
 765         double pq = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
 766         double ip_factor = fabs( h->param.rc.f_ip_factor );
 767         /* don't apply ip_factor if the following frame is also I */
 768         if( rcc->accum_p_norm <= 0 )
 769             q = iq;
 770         else if( h->param.rc.f_ip_factor < 0 )
 771             q = iq / ip_factor;
 772         else if( rcc->accum_p_norm >= 1 )
 773             q = pq / ip_factor;
 774         else
 775             q = rcc->accum_p_norm * pq / ip_factor + (1 - rcc->accum_p_norm) * iq;
 776     }
 777     else if( pict_type == SLICE_TYPE_B )
 778     {
 779         if( h->param.rc.f_pb_factor > 0 )
 780             q = last_non_b_q;
 781         if( !rce->kept_as_ref )
 782             q *= fabs( h->param.rc.f_pb_factor );
 783     }
 784     else if( pict_type == SLICE_TYPE_P
 785              && rcc->last_non_b_pict_type == SLICE_TYPE_P
 786              && rce->i_tex_bits + rce->p_tex_bits == 0 )
 787     {
 788         q = last_p_q;
 789     }
 790
 791     /* last qscale / qdiff stuff */
 792     /* TODO take intro account whether the I-frame is a scene cut
 793      * or just a seek point */
 794     if(rcc->last_non_b_pict_type==pict_type
 795        && (pict_type!=SLICE_TYPE_I || rcc->last_accum_p_norm < 1))
 796     {
 797         double last_q = rcc->last_qscale_for[pict_type];
 798         double max_qscale = last_q * rcc->lstep;
 799         double min_qscale = last_q / rcc->lstep;
 800
 801         if     (q > max_qscale) q = max_qscale;
 802         else if(q < min_qscale) q = min_qscale;
 803     }
 804
 805     rcc->last_qscale_for[pict_type] = q;
 806     if(pict_type!=SLICE_TYPE_B)
 807         rcc->last_non_b_pict_type = pict_type;
 808     if(pict_type==SLICE_TYPE_I)
 809     {
 810         rcc->last_accum_p_norm = rcc->accum_p_norm;
 811         rcc->accum_p_norm = 0;
 812         rcc->accum_p_qp = 0;
 813     }
 814     if(pict_type==SLICE_TYPE_P)
 815     {
 816         float mask = 1 - pow( (float)rce->i_count / rcc->nmb, 2 );
 817         rcc->accum_p_qp   = mask * (qscale2qp(q) + rcc->accum_p_qp);
 818         rcc->accum_p_norm = mask * (1 + rcc->accum_p_norm);
 819     }
 820     return q;
 821 }
 822
 823 // clip a qscale to between lmin and lmax
 824 static double clip_qscale( x264_t *h, ratecontrol_entry_t *rce, double q )
 825 {
 826     double lmin = h->rc->lmin[rce->pict_type];
 827     double lmax = h->rc->lmax[rce->pict_type];
 828
 829     if(lmin==lmax){
 830         return lmin;
 831     }else{
 832         double min2 = log(lmin);
 833         double max2 = log(lmax);
 834         q = (log(q) - min2)/(max2-min2) - 0.5;
 835         q = 1.0/(1.0 + exp(-4*q));
 836         q = q*(max2-min2) + min2;
 837         return exp(q);
 838     }
 839 }
 840
 841 // update qscale for 1 frame based on actual bits used so far
 842 static float rate_estimate_qscale(x264_t *h, int pict_type)
 843 {
 844     float q;
 845     float br_compensation;
 846     double diff;
 847     int picture_number = h->fenc->i_frame;
 848     x264_ratecontrol_t *rcc = h->rc;
 849     ratecontrol_entry_t *rce;
 850     double lmin = rcc->lmin[pict_type];
 851     double lmax = rcc->lmax[pict_type];
 852     int64_t total_bits = 8*(h->stat.i_slice_size[SLICE_TYPE_I]
 853                           + h->stat.i_slice_size[SLICE_TYPE_P]
 854                           + h->stat.i_slice_size[SLICE_TYPE_B]);
 855
 856 //printf("input_pic_num:%d pic_num:%d frame_rate:%d\n", s->input_picture_number, s->picture_number, s->frame_rate);
 857
 858     rce = &rcc->entry[picture_number];
 859
 860     assert(pict_type == rce->pict_type);
 861
 862     if(rce->pict_type == SLICE_TYPE_B)
 863     {
 864         if(h->fenc->b_kept_as_ref)
 865             return rcc->last_qscale * sqrtf(h->param.rc.f_pb_factor);
 866         else
 867             return rcc->last_qscale * h->param.rc.f_pb_factor;
 868     }
 869     else
 870     {
 871         diff = (int64_t)total_bits - (int64_t)rce->expected_bits;
 872         br_compensation = (rcc->buffer_size - diff) / rcc->buffer_size;
 873         br_compensation = x264_clip3f(br_compensation, .5, 2);
 874
 875         q = rce->new_qscale / br_compensation;
 876         q = x264_clip3f(q, lmin, lmax);
 877         rcc->last_qscale = q;
 878         return q;
 879     }
 880 }
 881
 882 static int init_pass2( x264_t *h )
 883 {
 884     x264_ratecontrol_t *rcc = h->rc;
 885     uint64_t all_const_bits = 0;
 886     uint64_t all_available_bits = (uint64_t)(h->param.rc.i_bitrate * 1000 * (double)rcc->num_entries / rcc->fps);
 887     double rate_factor, step, step_mult;
 888     double qblur = h->param.rc.f_qblur;
 889     double cplxblur = h->param.rc.f_complexity_blur;
 890     const int filter_size = (int)(qblur*4) | 1;
 891     double expected_bits;
 892     double *qscale, *blurred_qscale;
 893     int i;
 894
 895     /* find total/average complexity & const_bits */
 896     for(i=0; i<rcc->num_entries; i++){
 897         ratecontrol_entry_t *rce = &rcc->entry[i];
 898         all_const_bits += rce->misc_bits;
 899         rcc->i_cplx_sum[rce->pict_type] += rce->i_tex_bits * rce->qscale;
 900         rcc->p_cplx_sum[rce->pict_type] += rce->p_tex_bits * rce->qscale;
 901         rcc->mv_bits_sum[rce->pict_type] += rce->mv_bits * rce->qscale;
 902         rcc->frame_count[rce->pict_type] ++;
 903     }
 904
 905     if( all_available_bits < all_const_bits)
 906     {
 907         x264_log(h, X264_LOG_ERROR, "requested bitrate is too low. estimated minimum is %d kbps\n",
 908                  (int)(all_const_bits * rcc->fps / (rcc->num_entries * 1000)));
 909         return -1;
 910     }
 911
 912     for(i=0; i<rcc->num_entries; i++){
 913         ratecontrol_entry_t *rce = &rcc->entry[i];
 914         double weight_sum = 0;
 915         double cplx_sum = 0;
 916         double weight = 1.0;
 917         int j;
 918         /* weighted average of cplx of future frames */
 919         for(j=1; j<cplxblur*2 && j<rcc->num_entries-i; j++){
 920             ratecontrol_entry_t *rcj = &rcc->entry[i+j];
 921             weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
 922             if(weight < .0001)
 923                 break;
 924             weight_sum += weight;
 925             cplx_sum += weight * qscale2bits(rcj, 1);
 926         }
 927         /* weighted average of cplx of past frames */
 928         weight = 1.0;
 929         for(j=0; j<=cplxblur*2 && j<=i; j++){
 930             ratecontrol_entry_t *rcj = &rcc->entry[i-j];
 931             weight_sum += weight;
 932             cplx_sum += weight * qscale2bits(rcj, 1);
 933             weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
 934             if(weight < .0001)
 935                 break;
 936         }
 937         rce->blurred_complexity = cplx_sum / weight_sum;
 938     }
 939
 940     qscale = x264_malloc(sizeof(double)*rcc->num_entries);
 941     if(filter_size > 1)
 942         blurred_qscale = x264_malloc(sizeof(double)*rcc->num_entries);
 943     else
 944         blurred_qscale = qscale;
 945
 946     expected_bits = 1;
 947     for(i=0; i<rcc->num_entries; i++)
 948         expected_bits += qscale2bits(&rcc->entry[i], get_qscale(h, &rcc->entry[i], 1.0));
 949     step_mult = all_available_bits / expected_bits;
 950
 951     rate_factor = 0;
 952     for(step = 1E4 * step_mult; step > 1E-7 * step_mult; step *= 0.5){
 953         expected_bits = 0;
 954         rate_factor += step;
 955
 956         rcc->last_non_b_pict_type = -1;
 957         rcc->last_accum_p_norm = 1;
 958
 959         /* find qscale */
 960         for(i=0; i<rcc->num_entries; i++){
 961             qscale[i] = get_qscale(h, &rcc->entry[i], rate_factor);
 962         }
 963
 964         /* fixed I/B QP relative to P mode */
 965         for(i=rcc->num_entries-1; i>=0; i--){
 966             qscale[i] = get_diff_limited_q(h, &rcc->entry[i], qscale[i]);
 967             assert(qscale[i] >= 0);
 968         }
 969
 970         /* smooth curve */
 971         if(filter_size > 1){
 972             assert(filter_size%2==1);
 973             for(i=0; i<rcc->num_entries; i++){
 974                 ratecontrol_entry_t *rce = &rcc->entry[i];
 975                 int j;
 976                 double q=0.0, sum=0.0;
 977
 978                 for(j=0; j<filter_size; j++){
 979                     int index = i+j-filter_size/2;
 980                     double d = index-i;
 981                     double coeff = qblur==0 ? 1.0 : exp(-d*d/(qblur*qblur));
 982                     if(index < 0 || index >= rcc->num_entries) continue;
 983                     if(rce->pict_type != rcc->entry[index].pict_type) continue;
 984                     q += qscale[index] * coeff;
 985                     sum += coeff;
 986                 }
 987                 blurred_qscale[i] = q/sum;
 988             }
 989         }
 990
 991         /* find expected bits */
 992         for(i=0; i<rcc->num_entries; i++){
 993             ratecontrol_entry_t *rce = &rcc->entry[i];
 994             double bits;
 995             rce->new_qscale = clip_qscale(h, rce, blurred_qscale[i]);
 996             assert(rce->new_qscale >= 0);
 997             bits = qscale2bits(rce, rce->new_qscale) + rce->misc_bits;
 998
 999             rce->expected_bits = expected_bits;
1000             expected_bits += bits;
1001         }
1002
1003 //printf("expected:%llu available:%llu factor:%lf avgQ:%lf\n", (uint64_t)expected_bits, all_available_bits, rate_factor);
1004         if(expected_bits > all_available_bits) rate_factor -= step;
1005     }
1006
1007     x264_free(qscale);
1008     if(filter_size > 1)
1009         x264_free(blurred_qscale);
1010
1011     if(fabs(expected_bits/all_available_bits - 1.0) > 0.01)
1012     {
1013         double avgq = 0;
1014         for(i=0; i<rcc->num_entries; i++)
1015             avgq += rcc->entry[i].new_qscale;
1016         avgq = qscale2qp(avgq / rcc->num_entries);
1017
1018         x264_log(h, X264_LOG_ERROR, "Error: 2pass curve failed to converge\n");
1019         x264_log(h, X264_LOG_ERROR, "target: %.2f kbit/s, got: %.2f kbit/s, avg QP: %.4f\n",
1020                  (float)h->param.rc.i_bitrate,
1021                  expected_bits * rcc->fps / (rcc->num_entries * 1000.),
1022                  avgq);
1023         if(expected_bits < all_available_bits && avgq < h->param.rc.i_qp_min + 2)
1024             x264_log(h, X264_LOG_ERROR, "try reducing target bitrate or reducing qp_min (currently %d)\n", h->param.rc.i_qp_min);
1025         else if(expected_bits > all_available_bits && avgq > h->param.rc.i_qp_max - 2)
1026         {
1027             if(h->param.rc.i_qp_max < 51)
1028                 x264_log(h, X264_LOG_ERROR, "try increasing target bitrate or increasing qp_max (currently %d)\n", h->param.rc.i_qp_max);
1029             else
1030                 x264_log(h, X264_LOG_ERROR, "try increasing target bitrate\n");
1031         }
1032         else
1033             x264_log(h, X264_LOG_ERROR, "internal error\n");
1034         return -1;
1035     }
1036
1037     return 0;
1038 }
1039
1040