git.sesse.net Git - x264/blob - encoder/ratecontrol.c

   1 /***************************************************-*- coding: iso-8859-1 -*-
   2  * ratecontrol.c: h264 encoder library (Rate Control)
   3  *****************************************************************************
   4  * Copyright (C) 2005 x264 project
   5  * $Id: ratecontrol.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
   6  *
   7  * Authors: Loren Merritt <lorenm@u.washington.edu>
   8  *          Michael Niedermayer <michaelni@gmx.at>
   9  *          Måns Rullgård <mru@mru.ath.cx>
  10  *
  11  * This program is free software; you can redistribute it and/or modify
  12  * it under the terms of the GNU General Public License as published by
  13  * the Free Software Foundation; either version 2 of the License, or
  14  * (at your option) any later version.
  15  *
  16  * This program is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19  * GNU General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU General Public License
  22  * along with this program; if not, write to the Free Software
  23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  24  *****************************************************************************/
  25
  26 #define _ISOC99_SOURCE
  27 #undef NDEBUG // always check asserts, the speed effect is far too small to disable them
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <string.h>
  31 #include <math.h>
  32 #include <limits.h>
  33 #include <assert.h>
  34
  35 #include "common/common.h"
  36 #include "common/cpu.h"
  37 #include "common/macroblock.h"
  38 #include "ratecontrol.h"
  39
  40 #if defined(SYS_FREEBSD) || defined(SYS_BEOS)
  41 #define exp2f(x) powf( 2, (x) )
  42 #endif
  43 #ifdef _MSC_VER
  44 #define exp2f(x) pow( 2, (x) )
  45 #define sqrtf sqrt
  46 #endif
  47 #ifdef WIN32 // POSIX says that rename() removes the destination, but win32 doesn't.
  48 #define rename(src,dst) (unlink(dst), rename(src,dst))
  49 #endif
  50
  51 typedef struct
  52 {
  53     int pict_type;
  54     int kept_as_ref;
  55     float qscale;
  56     int mv_bits;
  57     int i_tex_bits;
  58     int p_tex_bits;
  59     int misc_bits;
  60     uint64_t expected_bits;
  61     float new_qscale;
  62     int new_qp;
  63     int i_count;
  64     int p_count;
  65     int s_count;
  66     float blurred_complexity;
  67 } ratecontrol_entry_t;
  68
  69 typedef struct
  70 {
  71     double coeff;
  72     double count;
  73     double decay;
  74 } predictor_t;
  75
  76 struct x264_ratecontrol_t
  77 {
  78     /* constants */
  79     int b_abr;
  80     int b_2pass;
  81     double fps;
  82     double bitrate;
  83     double rate_tolerance;
  84     int nmb;                    /* number of macroblocks in a frame */
  85     int qp_constant[5];
  86
  87     /* current frame */
  88     ratecontrol_entry_t *rce;
  89     int qp;                     /* qp for current frame */
  90     float qpa;                  /* average of macroblocks' qp (same as qp if no adaptive quant) */
  91     int slice_type;
  92     int qp_force;
  93
  94     /* VBV stuff */
  95     double buffer_size;
  96     double buffer_fill;
  97     double buffer_rate;         /* # of bits added to buffer_fill after each frame */
  98     predictor_t pred[5];        /* predict frame size from satd */
  99
 100     /* ABR stuff */
 101     int    last_satd;
 102     double last_rceq;
 103     double cplxr_sum;           /* sum of bits*qscale/rceq */
 104     double expected_bits_sum;   /* sum of qscale2bits after rceq, ratefactor, and overflow */
 105     double wanted_bits_window;  /* target bitrate * window */
 106     double cbr_decay;
 107     double short_term_cplxsum;
 108     double short_term_cplxcount;
 109
 110     /* 2pass stuff */
 111     FILE *p_stat_file_out;
 112     char *psz_stat_file_tmpname;
 113
 114     int num_entries;            /* number of ratecontrol_entry_ts */
 115     ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
 116     double last_qscale;
 117     double last_qscale_for[5];  /* last qscale for a specific pict type, used for max_diff & ipb factor stuff  */
 118     int last_non_b_pict_type;
 119     double accum_p_qp;          /* for determining I-frame quant */
 120     double accum_p_norm;
 121     double last_accum_p_norm;
 122     double lmin[5];             /* min qscale by frame type */
 123     double lmax[5];
 124     double lstep;               /* max change (multiply) in qscale per frame */
 125     double i_cplx_sum[5];       /* estimated total texture bits in intra MBs at qscale=1 */
 126     double p_cplx_sum[5];
 127     double mv_bits_sum[5];
 128     int frame_count[5];         /* number of frames of each type */
 129 };
 130
 131
 132 static int init_pass2(x264_t *);
 133 static float rate_estimate_qscale( x264_t *h, int pict_type );
 134 static void update_vbv( x264_t *h, int bits );
 135 int  x264_rc_analyse_slice( x264_t *h );
 136
 137 /* Terminology:
 138  * qp = h.264's quantizer
 139  * qscale = linearized quantizer = Lagrange multiplier
 140  */
 141 static inline double qp2qscale(double qp)
 142 {
 143     return 0.85 * pow(2.0, ( qp - 12.0 ) / 6.0);
 144 }
 145 static inline double qscale2qp(double qscale)
 146 {
 147     return 12.0 + 6.0 * log(qscale/0.85) / log(2.0);
 148 }
 149
 150 /* Texture bitrate is not quite inversely proportional to qscale,
 151  * probably due the the changing number of SKIP blocks.
 152  * MV bits level off at about qp<=12, because the lambda used
 153  * for motion estimation is constant there. */
 154 static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale)
 155 {
 156     if(qscale<0.1)
 157         qscale = 0.1;
 158     return (rce->i_tex_bits + rce->p_tex_bits + .1) * pow( rce->qscale / qscale, 1.1 )
 159            + rce->mv_bits * pow( X264_MAX(rce->qscale, 12) / X264_MAX(qscale, 12), 0.5 );
 160 }
 161
 162 /* There is no analytical inverse to the above formula. */
 163 #if 0
 164 static inline double bits2qscale(ratecontrol_entry_t *rce, double bits)
 165 {
 166     if(bits<1.0)
 167         bits = 1.0;
 168     return (rce->i_tex_bits + rce->p_tex_bits + rce->mv_bits + .1) * rce->qscale / bits;
 169 }
 170 #endif
 171
 172
 173 int x264_ratecontrol_new( x264_t *h )
 174 {
 175     x264_ratecontrol_t *rc;
 176     int i;
 177
 178     x264_cpu_restore( h->param.cpu );
 179
 180     h->rc = rc = x264_malloc( sizeof( x264_ratecontrol_t ) );
 181     memset(rc, 0, sizeof(*rc));
 182
 183     rc->b_abr = h->param.rc.b_cbr && !h->param.rc.b_stat_read;
 184     rc->b_2pass = h->param.rc.b_cbr && h->param.rc.b_stat_read;
 185     h->mb.b_variable_qp = 0;
 186
 187     /* FIXME: use integers */
 188     if(h->param.i_fps_num > 0 && h->param.i_fps_den > 0)
 189         rc->fps = (float) h->param.i_fps_num / h->param.i_fps_den;
 190     else
 191         rc->fps = 25.0;
 192
 193     rc->bitrate = h->param.rc.i_bitrate * 1000;
 194     rc->rate_tolerance = h->param.rc.f_rate_tolerance;
 195     rc->nmb = h->mb.i_mb_count;
 196     rc->last_non_b_pict_type = -1;
 197     rc->cbr_decay = 1.0;
 198
 199     if( h->param.rc.i_vbv_max_bitrate < h->param.rc.i_bitrate &&
 200         h->param.rc.i_vbv_max_bitrate > 0)
 201         x264_log(h, X264_LOG_ERROR, "max bitrate less than average bitrate, ignored.\n");
 202     else if( h->param.rc.i_vbv_max_bitrate > 0 &&
 203              h->param.rc.i_vbv_buffer_size > 0 )
 204     {
 205         if( h->param.rc.i_vbv_buffer_size < 10 * h->param.rc.i_vbv_max_bitrate / rc->fps ) {
 206             h->param.rc.i_vbv_buffer_size = 10 * h->param.rc.i_vbv_max_bitrate / rc->fps;
 207             x264_log( h, X264_LOG_ERROR, "VBV buffer size too small, using %d kbit\n",
 208                       h->param.rc.i_vbv_buffer_size );
 209         }
 210         rc->buffer_rate = h->param.rc.i_vbv_max_bitrate * 1000 / rc->fps;
 211         rc->buffer_size = h->param.rc.i_vbv_buffer_size * 1000;
 212         rc->buffer_fill = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
 213         rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size
 214                       * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate);
 215     }
 216     else if( h->param.rc.i_vbv_max_bitrate || h->param.rc.i_vbv_buffer_size )
 217         x264_log(h, X264_LOG_ERROR, "VBV maxrate or buffer size specified, but not both.\n");
 218
 219     if( rc->b_abr )
 220     {
 221         /* FIXME shouldn't need to arbitrarily specify a QP,
 222          * but this is more robust than BPP measures */
 223 #define ABR_INIT_QP 24
 224         rc->accum_p_norm = .01;
 225         rc->accum_p_qp = ABR_INIT_QP * rc->accum_p_norm;
 226         rc->cplxr_sum = .01;
 227         rc->wanted_bits_window = .01;
 228     }
 229
 230     rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
 231     rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
 232     rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
 233
 234     rc->lstep = exp2f(h->param.rc.i_qp_step / 6.0);
 235     rc->last_qscale = qp2qscale(26);
 236     for( i = 0; i < 5; i++ )
 237     {
 238         rc->last_qscale_for[i] = qp2qscale(26);
 239         rc->lmin[i] = qp2qscale( h->param.rc.i_qp_min );
 240         rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max );
 241         rc->pred[i].coeff= 2.0;
 242         rc->pred[i].count= 1.0;
 243         rc->pred[i].decay= 0.5;
 244     }
 245 #if 0 // FIXME: do we want to assign lmin/lmax based on ip_factor, or leave them all the same?
 246     rc->lmin[SLICE_TYPE_I] /= fabs(h->param.f_ip_factor);
 247     rc->lmax[SLICE_TYPE_I] /= fabs(h->param.f_ip_factor);
 248     rc->lmin[SLICE_TYPE_B] *= fabs(h->param.f_pb_factor);
 249     rc->lmax[SLICE_TYPE_B] *= fabs(h->param.f_pb_factor);
 250 #endif
 251
 252     /* Load stat file and init 2pass algo */
 253     if( h->param.rc.b_stat_read )
 254     {
 255         int stats_size;
 256         char *p, *stats_in;
 257         FILE *stats_file;
 258
 259         /* read 1st pass stats */
 260         assert( h->param.rc.psz_stat_in );
 261         stats_file = fopen( h->param.rc.psz_stat_in, "rb");
 262         if(!stats_file)
 263         {
 264             x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
 265             return -1;
 266         }
 267         // FIXME: error checking
 268         fseek(stats_file, 0, SEEK_END);
 269         stats_size = ftell(stats_file);
 270         fseek(stats_file, 0, SEEK_SET);
 271         stats_in = x264_malloc(stats_size+10);
 272         fread(stats_in, 1, stats_size, stats_file);
 273         fclose(stats_file);
 274
 275         /* find number of pics */
 276         p = stats_in;
 277         for(i=-1; p; i++)
 278             p = strchr(p+1, ';');
 279         if(i==0)
 280         {
 281             x264_log(h, X264_LOG_ERROR, "empty stats file\n");
 282             return -1;
 283         }
 284         i += h->param.i_bframe;
 285         rc->entry = (ratecontrol_entry_t*) x264_malloc(i*sizeof(ratecontrol_entry_t));
 286         memset(rc->entry, 0, i*sizeof(ratecontrol_entry_t));
 287         /* FIXME: num_entries is sometimes treated as number of frames in the video */
 288         rc->num_entries= i;
 289
 290         /* init all to skipped p frames */
 291         for(i=0; i<rc->num_entries; i++){
 292             ratecontrol_entry_t *rce = &rc->entry[i];
 293             rce->pict_type = SLICE_TYPE_P;
 294             rce->qscale = rce->new_qscale = qp2qscale(20);
 295             rce->misc_bits = rc->nmb + 10;
 296             rce->new_qp = 0;
 297         }
 298
 299         /* read stats */
 300         p = stats_in;
 301         for(i=0; i < rc->num_entries - h->param.i_bframe; i++){
 302             ratecontrol_entry_t *rce;
 303             int frame_number;
 304             char pict_type;
 305             int e;
 306             char *next;
 307             float qp;
 308
 309             next= strchr(p, ';');
 310             if(next){
 311                 (*next)=0; //sscanf is unbelievably slow on looong strings
 312                 next++;
 313             }
 314             e = sscanf(p, " in:%d ", &frame_number);
 315
 316             if(frame_number < 0 || frame_number >= rc->num_entries)
 317             {
 318                 x264_log(h, X264_LOG_ERROR, "bad frame number (%d) at stats line %d\n", frame_number, i);
 319                 return -1;
 320             }
 321             rce = &rc->entry[frame_number];
 322
 323             e += sscanf(p, " in:%*d out:%*d type:%c q:%f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d",
 324                    &pict_type, &qp, &rce->i_tex_bits, &rce->p_tex_bits,
 325                    &rce->mv_bits, &rce->misc_bits, &rce->i_count, &rce->p_count, &rce->s_count);
 326
 327             switch(pict_type){
 328                 case 'I': rce->kept_as_ref = 1;
 329                 case 'i': rce->pict_type = SLICE_TYPE_I; break;
 330                 case 'P': rce->pict_type = SLICE_TYPE_P; break;
 331                 case 'B': rce->kept_as_ref = 1;
 332                 case 'b': rce->pict_type = SLICE_TYPE_B; break;
 333                 default:  e = -1; break;
 334             }
 335             if(e != 10){
 336                 x264_log(h, X264_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e);
 337                 return -1;
 338             }
 339             rce->qscale = qp2qscale(qp);
 340             p = next;
 341         }
 342
 343         x264_free(stats_in);
 344
 345         if(h->param.rc.b_cbr)
 346         {
 347             if(init_pass2(h) < 0) return -1;
 348         } /* else we're using constant quant, so no need to run the bitrate allocation */
 349     }
 350
 351     /* Open output file */
 352     /* If input and output files are the same, output to a temp file
 353      * and move it to the real name only when it's complete */
 354     if( h->param.rc.b_stat_write )
 355     {
 356         rc->psz_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 6 );
 357         strcpy( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
 358         strcat( rc->psz_stat_file_tmpname, ".temp" );
 359
 360         rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
 361         if( rc->p_stat_file_out == NULL )
 362         {
 363             x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
 364             return -1;
 365         }
 366     }
 367
 368     return 0;
 369 }
 370
 371 void x264_ratecontrol_delete( x264_t *h )
 372 {
 373     x264_ratecontrol_t *rc = h->rc;
 374
 375     if( rc->p_stat_file_out )
 376     {
 377         fclose( rc->p_stat_file_out );
 378         if( h->i_frame >= rc->num_entries - h->param.i_bframe )
 379             if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
 380             {
 381                 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
 382                           rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
 383             }
 384         x264_free( rc->psz_stat_file_tmpname );
 385     }
 386     if( rc->entry )
 387         x264_free(rc->entry);
 388     x264_free( rc );
 389 }
 390
 391 /* Before encoding a frame, choose a QP for it */
 392 void x264_ratecontrol_start( x264_t *h, int i_slice_type, int i_force_qp )
 393 {
 394     x264_ratecontrol_t *rc = h->rc;
 395
 396     x264_cpu_restore( h->param.cpu );
 397
 398     rc->qp_force = i_force_qp;
 399     rc->slice_type = i_slice_type;
 400
 401     if( i_force_qp )
 402     {
 403         rc->qpa = rc->qp = i_force_qp - 1;
 404     }
 405     else if( rc->b_abr )
 406     {
 407         rc->qpa = rc->qp =
 408             x264_clip3( (int)(qscale2qp( rate_estimate_qscale( h, i_slice_type ) ) + .5), 0, 51 );
 409     }
 410     else if( rc->b_2pass )
 411     {
 412         int frame = h->fenc->i_frame;
 413         ratecontrol_entry_t *rce;
 414         assert( frame >= 0 && frame < rc->num_entries );
 415         rce = h->rc->rce = &h->rc->entry[frame];
 416
 417         rce->new_qscale = rate_estimate_qscale( h, i_slice_type );
 418         rc->qpa = rc->qp = rce->new_qp =
 419             x264_clip3( (int)(qscale2qp(rce->new_qscale) + 0.5), 0, 51 );
 420     }
 421     else /* CQP */
 422     {
 423         int q;
 424         if( i_slice_type == SLICE_TYPE_B && h->fdec->b_kept_as_ref )
 425             q = ( rc->qp_constant[ SLICE_TYPE_B ] + rc->qp_constant[ SLICE_TYPE_P ] ) / 2;
 426         else
 427             q = rc->qp_constant[ i_slice_type ];
 428         rc->qpa = rc->qp = q;
 429     }
 430 }
 431
 432 void x264_ratecontrol_mb( x264_t *h, int bits )
 433 {
 434     /* currently no adaptive quant */
 435 }
 436
 437 int x264_ratecontrol_qp( x264_t *h )
 438 {
 439     return h->rc->qp;
 440 }
 441
 442 /* In 2pass, force the same frame types as in the 1st pass */
 443 int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
 444 {
 445     if( h->param.rc.b_stat_read )
 446     {
 447         if( frame_num >= h->rc->num_entries )
 448         {
 449             x264_log(h, X264_LOG_ERROR, "More input frames than in the 1st pass\n");
 450             return X264_TYPE_P;
 451         }
 452         switch( h->rc->entry[frame_num].pict_type )
 453         {
 454             case SLICE_TYPE_I:
 455                 return h->rc->entry[frame_num].kept_as_ref ? X264_TYPE_IDR : X264_TYPE_I;
 456
 457             case SLICE_TYPE_B:
 458                 return h->rc->entry[frame_num].kept_as_ref ? X264_TYPE_BREF : X264_TYPE_B;
 459
 460             case SLICE_TYPE_P:
 461             default:
 462                 return X264_TYPE_P;
 463         }
 464     }
 465     else
 466     {
 467         return X264_TYPE_AUTO;
 468     }
 469 }
 470
 471 /* After encoding one frame, save stats and update ratecontrol state */
 472 void x264_ratecontrol_end( x264_t *h, int bits )
 473 {
 474     x264_ratecontrol_t *rc = h->rc;
 475     int i;
 476
 477     x264_cpu_restore( h->param.cpu );
 478
 479     h->stat.frame.i_mb_count_skip = h->stat.frame.i_mb_count[P_SKIP] + h->stat.frame.i_mb_count[B_SKIP];
 480     h->stat.frame.i_mb_count_p = h->stat.frame.i_mb_count[P_L0] + h->stat.frame.i_mb_count[P_8x8];
 481     for( i = B_DIRECT; i < B_8x8; i++ )
 482         h->stat.frame.i_mb_count_p += h->stat.frame.i_mb_count[i];
 483
 484     if( h->param.rc.b_stat_write )
 485     {
 486         char c_type = rc->slice_type==SLICE_TYPE_I ? (h->fenc->i_poc==0 ? 'I' : 'i')
 487                     : rc->slice_type==SLICE_TYPE_P ? 'P'
 488                     : h->fenc->b_kept_as_ref ? 'B' : 'b';
 489         fprintf( rc->p_stat_file_out,
 490                  "in:%d out:%d type:%c q:%.2f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d;\n",
 491                  h->fenc->i_frame, h->i_frame-1,
 492                  c_type, rc->qpa,
 493                  h->stat.frame.i_itex_bits, h->stat.frame.i_ptex_bits,
 494                  h->stat.frame.i_hdr_bits, h->stat.frame.i_misc_bits,
 495                  h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_16x16],
 496                  h->stat.frame.i_mb_count_p,
 497                  h->stat.frame.i_mb_count_skip);
 498     }
 499
 500     if( rc->b_abr )
 501     {
 502         if( rc->slice_type != SLICE_TYPE_B )
 503             rc->cplxr_sum += bits * qp2qscale(rc->qpa) / rc->last_rceq;
 504         else
 505         {
 506             /* Depends on the fact that B-frame's QP is an offset from the following P-frame's.
 507              * Not perfectly accurate with B-refs, but good enough. */
 508             rc->cplxr_sum += bits * qp2qscale(rc->qpa) / (rc->last_rceq * fabs(h->param.rc.f_pb_factor));
 509         }
 510         rc->cplxr_sum *= rc->cbr_decay;
 511         rc->wanted_bits_window += rc->bitrate / rc->fps;
 512         rc->wanted_bits_window *= rc->cbr_decay;
 513
 514         rc->accum_p_qp   *= .95;
 515         rc->accum_p_norm *= .95;
 516         rc->accum_p_norm += 1;
 517         if( rc->slice_type == SLICE_TYPE_I )
 518             rc->accum_p_qp += rc->qpa * fabs(h->param.rc.f_ip_factor);
 519         else
 520             rc->accum_p_qp += rc->qpa;
 521     }
 522
 523     if( rc->b_2pass )
 524     {
 525         rc->expected_bits_sum += qscale2bits( rc->rce, qp2qscale(rc->rce->new_qp) );
 526     }
 527
 528     update_vbv( h, bits );
 529
 530     if( rc->slice_type != SLICE_TYPE_B )
 531         rc->last_non_b_pict_type = rc->slice_type;
 532 }
 533
 534 /****************************************************************************
 535  * 2 pass functions
 536  ***************************************************************************/
 537
 538 double x264_eval( char *s, double *const_value, const char **const_name,
 539                   double (**func1)(void *, double), const char **func1_name,
 540                   double (**func2)(void *, double, double), char **func2_name,
 541                   void *opaque );
 542
 543 /**
 544  * modify the bitrate curve from pass1 for one frame
 545  */
 546 static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor)
 547 {
 548     x264_ratecontrol_t *rcc= h->rc;
 549     const int pict_type = rce->pict_type;
 550     double q;
 551
 552     double const_values[]={
 553         rce->i_tex_bits * rce->qscale,
 554         rce->p_tex_bits * rce->qscale,
 555         (rce->i_tex_bits + rce->p_tex_bits) * rce->qscale,
 556         rce->mv_bits * rce->qscale,
 557         (double)rce->i_count / rcc->nmb,
 558         (double)rce->p_count / rcc->nmb,
 559         (double)rce->s_count / rcc->nmb,
 560         rce->pict_type == SLICE_TYPE_I,
 561         rce->pict_type == SLICE_TYPE_P,
 562         rce->pict_type == SLICE_TYPE_B,
 563         h->param.rc.f_qcompress,
 564         rcc->i_cplx_sum[SLICE_TYPE_I] / rcc->frame_count[SLICE_TYPE_I],
 565         rcc->i_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
 566         rcc->p_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
 567         rcc->p_cplx_sum[SLICE_TYPE_B] / rcc->frame_count[SLICE_TYPE_B],
 568         (rcc->i_cplx_sum[pict_type] + rcc->p_cplx_sum[pict_type]) / rcc->frame_count[pict_type],
 569         rce->blurred_complexity,
 570         0
 571     };
 572     static const char *const_names[]={
 573         "iTex",
 574         "pTex",
 575         "tex",
 576         "mv",
 577         "iCount",
 578         "pCount",
 579         "sCount",
 580         "isI",
 581         "isP",
 582         "isB",
 583         "qComp",
 584         "avgIITex",
 585         "avgPITex",
 586         "avgPPTex",
 587         "avgBPTex",
 588         "avgTex",
 589         "blurCplx",
 590         NULL
 591     };
 592     static double (*func1[])(void *, double)={
 593 //      (void *)bits2qscale,
 594         (void *)qscale2bits,
 595         NULL
 596     };
 597     static const char *func1_names[]={
 598 //      "bits2qp",
 599         "qp2bits",
 600         NULL
 601     };
 602
 603     q = x264_eval((char*)h->param.rc.psz_rc_eq, const_values, const_names, func1, func1_names, NULL, NULL, rce);
 604     q /= rate_factor;
 605
 606     // avoid NaN's in the rc_eq
 607     if(q != q || rce->i_tex_bits + rce->p_tex_bits + rce->mv_bits == 0)
 608         q = rcc->last_qscale;
 609     else
 610         rcc->last_qscale = q;
 611
 612     return q;
 613 }
 614
 615 static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
 616 {
 617     x264_ratecontrol_t *rcc = h->rc;
 618     const int pict_type = rce->pict_type;
 619
 620     // force I/B quants as a function of P quants
 621     const double last_p_q    = rcc->last_qscale_for[SLICE_TYPE_P];
 622     const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type];
 623     if( pict_type == SLICE_TYPE_I )
 624     {
 625         double iq = q;
 626         double pq = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
 627         double ip_factor = fabs( h->param.rc.f_ip_factor );
 628         /* don't apply ip_factor if the following frame is also I */
 629         if( rcc->accum_p_norm <= 0 )
 630             q = iq;
 631         else if( h->param.rc.f_ip_factor < 0 )
 632             q = iq / ip_factor;
 633         else if( rcc->accum_p_norm >= 1 )
 634             q = pq / ip_factor;
 635         else
 636             q = rcc->accum_p_norm * pq / ip_factor + (1 - rcc->accum_p_norm) * iq;
 637     }
 638     else if( pict_type == SLICE_TYPE_B )
 639     {
 640         if( h->param.rc.f_pb_factor > 0 )
 641             q = last_non_b_q;
 642         if( !rce->kept_as_ref )
 643             q *= fabs( h->param.rc.f_pb_factor );
 644     }
 645     else if( pict_type == SLICE_TYPE_P
 646              && rcc->last_non_b_pict_type == SLICE_TYPE_P
 647              && rce->i_tex_bits + rce->p_tex_bits == 0 )
 648     {
 649         q = last_p_q;
 650     }
 651
 652     /* last qscale / qdiff stuff */
 653     if(rcc->last_non_b_pict_type==pict_type
 654        && (pict_type!=SLICE_TYPE_I || rcc->last_accum_p_norm < 1))
 655     {
 656         double last_q = rcc->last_qscale_for[pict_type];
 657         double max_qscale = last_q * rcc->lstep;
 658         double min_qscale = last_q / rcc->lstep;
 659
 660         if     (q > max_qscale) q = max_qscale;
 661         else if(q < min_qscale) q = min_qscale;
 662     }
 663
 664     rcc->last_qscale_for[pict_type] = q;
 665     if(pict_type!=SLICE_TYPE_B)
 666         rcc->last_non_b_pict_type = pict_type;
 667     if(pict_type==SLICE_TYPE_I)
 668     {
 669         rcc->last_accum_p_norm = rcc->accum_p_norm;
 670         rcc->accum_p_norm = 0;
 671         rcc->accum_p_qp = 0;
 672     }
 673     if(pict_type==SLICE_TYPE_P)
 674     {
 675         float mask = 1 - pow( (float)rce->i_count / rcc->nmb, 2 );
 676         rcc->accum_p_qp   = mask * (qscale2qp(q) + rcc->accum_p_qp);
 677         rcc->accum_p_norm = mask * (1 + rcc->accum_p_norm);
 678     }
 679     return q;
 680 }
 681
 682 static double predict_size( predictor_t *p, double q, double var )
 683 {
 684      return p->coeff*var / (q*p->count);
 685 }
 686
 687 static void update_predictor( predictor_t *p, double q, double var, double bits )
 688 {
 689     p->count *= p->decay;
 690     p->coeff *= p->decay;
 691     p->count ++;
 692     p->coeff += bits*q / var;
 693 }
 694
 695 static void update_vbv( x264_t *h, int bits )
 696 {
 697     x264_ratecontrol_t *rcc = h->rc;
 698     if( !rcc->buffer_size )
 699         return;
 700
 701     rcc->buffer_fill += rcc->buffer_rate - bits;
 702     if( rcc->buffer_fill < 0 && !rcc->b_2pass )
 703         x264_log( h, X264_LOG_WARNING, "VBV underflow (%.0f bits)\n", rcc->buffer_fill );
 704     rcc->buffer_fill = x264_clip3( rcc->buffer_fill, 0, rcc->buffer_size );
 705
 706     if(rcc->last_satd > 100)
 707         update_predictor( &rcc->pred[rcc->slice_type], qp2qscale(rcc->qpa), rcc->last_satd, bits );
 708 }
 709
 710 // apply VBV constraints and clip qscale to between lmin and lmax
 711 static double clip_qscale( x264_t *h, int pict_type, double q )
 712 {
 713     x264_ratecontrol_t *rcc = h->rc;
 714     double lmin = rcc->lmin[pict_type];
 715     double lmax = rcc->lmax[pict_type];
 716     double q0 = q;
 717
 718     /* B-frames are not directly subject to VBV,
 719      * since they are controlled by the P-frames' QPs.
 720      * FIXME: in 2pass we could modify previous frames' QP too,
 721      *        instead of waiting for the buffer to fill */
 722     if( rcc->buffer_size &&
 723         ( pict_type == SLICE_TYPE_P ||
 724           ( pict_type == SLICE_TYPE_I && rcc->last_non_b_pict_type == SLICE_TYPE_I ) ) )
 725     {
 726         if( rcc->buffer_fill/rcc->buffer_size < 0.5 )
 727             q /= x264_clip3f( 2.0*rcc->buffer_fill/rcc->buffer_size, 0.5, 1.0 );
 728     }
 729     /* Now a hard threshold to make sure the frame fits in VBV.
 730      * This one is mostly for I-frames. */
 731     if( rcc->buffer_size && rcc->last_satd > 0 )
 732     {
 733         double bits = predict_size( &rcc->pred[rcc->slice_type], q, rcc->last_satd );
 734         double qf = 1.0;
 735         if( bits > rcc->buffer_fill/2 )
 736             qf = x264_clip3f( rcc->buffer_fill/(2*bits), 0.2, 1.0 );
 737         q /= qf;
 738         bits *= qf;
 739         if( bits < rcc->buffer_rate/2 )
 740             q *= bits*2/rcc->buffer_rate;
 741         q = X264_MAX( q0, q );
 742     }
 743
 744     if(lmin==lmax)
 745         return lmin;
 746     else if(rcc->b_2pass)
 747     {
 748         double min2 = log(lmin);
 749         double max2 = log(lmax);
 750         q = (log(q) - min2)/(max2-min2) - 0.5;
 751         q = 1.0/(1.0 + exp(-4*q));
 752         q = q*(max2-min2) + min2;
 753         return exp(q);
 754     }
 755     else
 756         return x264_clip3f(q, lmin, lmax);
 757 }
 758
 759 // update qscale for 1 frame based on actual bits used so far
 760 static float rate_estimate_qscale(x264_t *h, int pict_type)
 761 {
 762     float q;
 763     x264_ratecontrol_t *rcc = h->rc;
 764     ratecontrol_entry_t rce;
 765     double lmin = rcc->lmin[pict_type];
 766     double lmax = rcc->lmax[pict_type];
 767     int64_t total_bits = 8*(h->stat.i_slice_size[SLICE_TYPE_I]
 768                           + h->stat.i_slice_size[SLICE_TYPE_P]
 769                           + h->stat.i_slice_size[SLICE_TYPE_B]);
 770
 771     if( rcc->b_2pass )
 772     {
 773         rce = *rcc->rce;
 774         if(pict_type != rce.pict_type)
 775         {
 776             x264_log(h, X264_LOG_ERROR, "slice=%c but 2pass stats say %c\n",
 777                      slice_type_to_char[pict_type], slice_type_to_char[rce.pict_type]);
 778         }
 779     }
 780
 781     if( pict_type == SLICE_TYPE_B )
 782     {
 783         rcc->last_satd = 0;
 784         if(h->fenc->b_kept_as_ref)
 785             q = rcc->last_qscale * sqrtf(h->param.rc.f_pb_factor);
 786         else
 787             q = rcc->last_qscale * h->param.rc.f_pb_factor;
 788         return x264_clip3f(q, lmin, lmax);
 789     }
 790     else
 791     {
 792         double abr_buffer = 2 * rcc->rate_tolerance * rcc->bitrate;
 793         if( rcc->b_2pass )
 794         {
 795             //FIXME adjust abr_buffer based on distance to the end of the video
 796             int64_t diff = total_bits - (int64_t)rce.expected_bits;
 797             q = rce.new_qscale;
 798             q /= x264_clip3f((double)(abr_buffer - diff) / abr_buffer, .5, 2);
 799             if( h->fenc->i_frame > 30 )
 800             {
 801                 /* Adjust quant based on the difference between
 802                  * achieved and expected bitrate so far */
 803                 double time = (double)h->fenc->i_frame / rcc->num_entries;
 804                 double w = x264_clip3f( time*100, 0.0, 1.0 );
 805                 q *= pow( (double)total_bits / rcc->expected_bits_sum, w );
 806             }
 807             q = x264_clip3f( q, lmin, lmax );
 808         }
 809         else /* 1pass ABR */
 810         {
 811             /* Calculate the quantizer which would have produced the desired
 812              * average bitrate if it had been applied to all frames so far.
 813              * Then modulate that quant based on the current frame's complexity
 814              * relative to the average complexity so far (using the 2pass RCEQ).
 815              * Then bias the quant up or down if total size so far was far from
 816              * the target.
 817              * Result: Depending on the value of rate_tolerance, there is a
 818              * tradeoff between quality and bitrate precision. But at large
 819              * tolerances, the bit distribution approaches that of 2pass. */
 820
 821             double wanted_bits, overflow, lmin, lmax;
 822
 823             rcc->last_satd = x264_rc_analyse_slice( h );
 824             rcc->short_term_cplxsum *= 0.5;
 825             rcc->short_term_cplxcount *= 0.5;
 826             rcc->short_term_cplxsum += rcc->last_satd;
 827             rcc->short_term_cplxcount ++;
 828
 829             rce.p_tex_bits = rcc->last_satd;
 830             rce.blurred_complexity = rcc->short_term_cplxsum / rcc->short_term_cplxcount;
 831             rce.i_tex_bits = 0;
 832             rce.mv_bits = 0;
 833             rce.p_count = rcc->nmb;
 834             rce.i_count = 0;
 835             rce.s_count = 0;
 836             rce.qscale = 1;
 837             rce.pict_type = pict_type;
 838             rcc->last_rceq = get_qscale(h, &rce, 1);
 839
 840             wanted_bits = h->fenc->i_frame * rcc->bitrate / rcc->fps;
 841             abr_buffer *= X264_MAX( 1, sqrt(h->fenc->i_frame/25) );
 842             overflow = x264_clip3f( 1.0 + (total_bits - wanted_bits) / abr_buffer, .5, 2 );
 843
 844             q = rcc->last_rceq * overflow * rcc->cplxr_sum / rcc->wanted_bits_window;
 845
 846             if( pict_type == SLICE_TYPE_I
 847                 /* should test _next_ pict type, but that isn't decided yet */
 848                 && rcc->last_non_b_pict_type != SLICE_TYPE_I )
 849             {
 850                 q = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
 851                 q /= fabs( h->param.rc.f_ip_factor );
 852                 q = clip_qscale( h, pict_type, q );
 853             }
 854             else
 855             {
 856                 if( h->stat.i_slice_count[SLICE_TYPE_P] < 5 )
 857                 {
 858                     float w = h->stat.i_slice_count[SLICE_TYPE_P] / 5.;
 859                     float q2 = qp2qscale(ABR_INIT_QP);
 860                     q = q*w + q2*(1-w);
 861                 }
 862
 863                 /* Asymmetric clipping, because symmetric would prevent
 864                  * overflow control in areas of rapidly oscillating complexity */
 865                 lmin = rcc->last_qscale_for[pict_type] / rcc->lstep;
 866                 lmax = rcc->last_qscale_for[pict_type] * rcc->lstep;
 867                 if( overflow > 1.1 )
 868                     lmax *= rcc->lstep;
 869                 else if( overflow < 0.9 )
 870                     lmin /= rcc->lstep;
 871
 872                 q = x264_clip3f(q, lmin, lmax);
 873                 q = clip_qscale(h, pict_type, q);
 874                 //FIXME use get_diff_limited_q() ?
 875             }
 876         }
 877
 878         rcc->last_qscale_for[pict_type] =
 879         rcc->last_qscale = q;
 880
 881         return q;
 882     }
 883 }
 884
 885 static int init_pass2( x264_t *h )
 886 {
 887     x264_ratecontrol_t *rcc = h->rc;
 888     uint64_t all_const_bits = 0;
 889     uint64_t all_available_bits = (uint64_t)(h->param.rc.i_bitrate * 1000 * (double)rcc->num_entries / rcc->fps);
 890     double rate_factor, step, step_mult;
 891     double qblur = h->param.rc.f_qblur;
 892     double cplxblur = h->param.rc.f_complexity_blur;
 893     const int filter_size = (int)(qblur*4) | 1;
 894     double expected_bits;
 895     double *qscale, *blurred_qscale;
 896     int i;
 897
 898     /* find total/average complexity & const_bits */
 899     for(i=0; i<rcc->num_entries; i++){
 900         ratecontrol_entry_t *rce = &rcc->entry[i];
 901         all_const_bits += rce->misc_bits;
 902         rcc->i_cplx_sum[rce->pict_type] += rce->i_tex_bits * rce->qscale;
 903         rcc->p_cplx_sum[rce->pict_type] += rce->p_tex_bits * rce->qscale;
 904         rcc->mv_bits_sum[rce->pict_type] += rce->mv_bits * rce->qscale;
 905         rcc->frame_count[rce->pict_type] ++;
 906     }
 907
 908     if( all_available_bits < all_const_bits)
 909     {
 910         x264_log(h, X264_LOG_ERROR, "requested bitrate is too low. estimated minimum is %d kbps\n",
 911                  (int)(all_const_bits * rcc->fps / (rcc->num_entries * 1000)));
 912         return -1;
 913     }
 914
 915     /* Blur complexities, to reduce local fluctuation of QP.
 916      * We don't blur the QPs directly, because then one very simple frame
 917      * could drag down the QP of a nearby complex frame and give it more
 918      * bits than intended. */
 919     for(i=0; i<rcc->num_entries; i++){
 920         ratecontrol_entry_t *rce = &rcc->entry[i];
 921         double weight_sum = 0;
 922         double cplx_sum = 0;
 923         double weight = 1.0;
 924         int j;
 925         /* weighted average of cplx of future frames */
 926         for(j=1; j<cplxblur*2 && j<rcc->num_entries-i; j++){
 927             ratecontrol_entry_t *rcj = &rcc->entry[i+j];
 928             weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
 929             if(weight < .0001)
 930                 break;
 931             weight_sum += weight;
 932             cplx_sum += weight * qscale2bits(rcj, 1);
 933         }
 934         /* weighted average of cplx of past frames */
 935         weight = 1.0;
 936         for(j=0; j<=cplxblur*2 && j<=i; j++){
 937             ratecontrol_entry_t *rcj = &rcc->entry[i-j];
 938             weight_sum += weight;
 939             cplx_sum += weight * qscale2bits(rcj, 1);
 940             weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
 941             if(weight < .0001)
 942                 break;
 943         }
 944         rce->blurred_complexity = cplx_sum / weight_sum;
 945     }
 946
 947     qscale = x264_malloc(sizeof(double)*rcc->num_entries);
 948     if(filter_size > 1)
 949         blurred_qscale = x264_malloc(sizeof(double)*rcc->num_entries);
 950     else
 951         blurred_qscale = qscale;
 952
 953     /* Search for a factor which, when multiplied by the RCEQ values from
 954      * each frame, adds up to the desired total size.
 955      * There is no exact closed-form solution because of VBV constraints and
 956      * because qscale2bits is not invertible, but we can start with the simple
 957      * approximation of scaling the 1st pass by the ratio of bitrates.
 958      * The search range is probably overkill, but speed doesn't matter here. */
 959
 960     expected_bits = 1;
 961     for(i=0; i<rcc->num_entries; i++)
 962         expected_bits += qscale2bits(&rcc->entry[i], get_qscale(h, &rcc->entry[i], 1.0));
 963     step_mult = all_available_bits / expected_bits;
 964
 965     rate_factor = 0;
 966     for(step = 1E4 * step_mult; step > 1E-7 * step_mult; step *= 0.5){
 967         expected_bits = 0;
 968         rate_factor += step;
 969
 970         rcc->last_non_b_pict_type = -1;
 971         rcc->last_accum_p_norm = 1;
 972         rcc->accum_p_norm = 0;
 973         rcc->buffer_fill = rcc->buffer_size * h->param.rc.f_vbv_buffer_init;
 974
 975         /* find qscale */
 976         for(i=0; i<rcc->num_entries; i++){
 977             qscale[i] = get_qscale(h, &rcc->entry[i], rate_factor);
 978         }
 979
 980         /* fixed I/B qscale relative to P */
 981         for(i=rcc->num_entries-1; i>=0; i--){
 982             qscale[i] = get_diff_limited_q(h, &rcc->entry[i], qscale[i]);
 983             assert(qscale[i] >= 0);
 984         }
 985
 986         /* smooth curve */
 987         if(filter_size > 1){
 988             assert(filter_size%2==1);
 989             for(i=0; i<rcc->num_entries; i++){
 990                 ratecontrol_entry_t *rce = &rcc->entry[i];
 991                 int j;
 992                 double q=0.0, sum=0.0;
 993
 994                 for(j=0; j<filter_size; j++){
 995                     int index = i+j-filter_size/2;
 996                     double d = index-i;
 997                     double coeff = qblur==0 ? 1.0 : exp(-d*d/(qblur*qblur));
 998                     if(index < 0 || index >= rcc->num_entries) continue;
 999                     if(rce->pict_type != rcc->entry[index].pict_type) continue;
1000                     q += qscale[index] * coeff;
1001                     sum += coeff;
1002                 }
1003                 blurred_qscale[i] = q/sum;
1004             }
1005         }
1006
1007         /* find expected bits */
1008         for(i=0; i<rcc->num_entries; i++){
1009             ratecontrol_entry_t *rce = &rcc->entry[i];
1010             double bits;
1011             rce->new_qscale = clip_qscale(h, rce->pict_type, blurred_qscale[i]);
1012             assert(rce->new_qscale >= 0);
1013             bits = qscale2bits(rce, rce->new_qscale) + rce->misc_bits;
1014
1015             rce->expected_bits = expected_bits;
1016             expected_bits += bits;
1017             update_vbv(h, bits);
1018         }
1019
1020 //printf("expected:%llu available:%llu factor:%lf avgQ:%lf\n", (uint64_t)expected_bits, all_available_bits, rate_factor);
1021         if(expected_bits > all_available_bits) rate_factor -= step;
1022     }
1023
1024     x264_free(qscale);
1025     if(filter_size > 1)
1026         x264_free(blurred_qscale);
1027
1028     if(fabs(expected_bits/all_available_bits - 1.0) > 0.01)
1029     {
1030         double avgq = 0;
1031         for(i=0; i<rcc->num_entries; i++)
1032             avgq += rcc->entry[i].new_qscale;
1033         avgq = qscale2qp(avgq / rcc->num_entries);
1034
1035         x264_log(h, X264_LOG_ERROR, "Error: 2pass curve failed to converge\n");
1036         x264_log(h, X264_LOG_ERROR, "target: %.2f kbit/s, expected: %.2f kbit/s, avg QP: %.4f\n",
1037                  (float)h->param.rc.i_bitrate,
1038                  expected_bits * rcc->fps / (rcc->num_entries * 1000.),
1039                  avgq);
1040         if(expected_bits < all_available_bits && avgq < h->param.rc.i_qp_min + 2)
1041         {
1042             if(h->param.rc.i_qp_min > 0)
1043                 x264_log(h, X264_LOG_ERROR, "try reducing target bitrate or reducing qp_min (currently %d)\n", h->param.rc.i_qp_min);
1044             else
1045                 x264_log(h, X264_LOG_ERROR, "try reducing target bitrate\n");
1046         }
1047         else if(expected_bits > all_available_bits && avgq > h->param.rc.i_qp_max - 2)
1048         {
1049             if(h->param.rc.i_qp_max < 51)
1050                 x264_log(h, X264_LOG_ERROR, "try increasing target bitrate or increasing qp_max (currently %d)\n", h->param.rc.i_qp_max);
1051             else
1052                 x264_log(h, X264_LOG_ERROR, "try increasing target bitrate\n");
1053         }
1054         else
1055             x264_log(h, X264_LOG_ERROR, "internal error\n");
1056     }
1057
1058     return 0;
1059 }
1060
1061