Mark some local functions as static, cosmetics

[x264] / encoder / ratecontrol.c
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c

index 13b115ff5545b0d9d53c4da4fddf77c5f2fa213d..7e77accf9774ec8c0317b1c8a73a77f4ac9d359d 100644 (file)
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -1,7 +1,7 @@
  /*****************************************************************************
   * ratecontrol.c: ratecontrol
   *****************************************************************************
- * Copyright (C) 2005-2010 x264 project
+ * Copyright (C) 2005-2011 x264 project
   *
   * Authors: Loren Merritt <lorenm@u.washington.edu>
   *          Michael Niedermayer <michaelni@gmx.at>
@@ -29,7 +29,6 @@
  
  #define _ISOC99_SOURCE
  #undef NDEBUG // always check asserts, the speed effect is far too small to disable them
-#include <math.h>
  
  #include "common/common.h"
  #include "ratecontrol.h"
@@ -57,16 +56,16 @@ typedef struct
      int16_t i_weight_denom[2];
      int refcount[16];
      int refs;
-    int i_duration;
-    int i_cpb_duration;
+    int64_t i_duration;
+    int64_t i_cpb_duration;
  } ratecontrol_entry_t;
  
  typedef struct
  {
-    double coeff;
-    double count;
-    double decay;
-    double offset;
+    float coeff;
+    float count;
+    float decay;
+    float offset;
  } predictor_t;
  
  struct x264_ratecontrol_t
@@ -88,7 +87,7 @@ struct x264_ratecontrol_t
      int qp;                     /* qp for current frame */
      float qpm;                  /* qp for current macroblock: precise float for AQ */
      float qpa_rc;               /* average of macroblocks' qp before aq */
-    float qpa_aq;               /* average of macroblocks' qp after aq */
+    int   qpa_aq;               /* average of macroblocks' qp after aq */
      float qp_novbv;             /* QP for the current frame if 1-pass VBV was disabled. */
  
      /* VBV stuff */
@@ -168,8 +167,8 @@ static int init_pass2(x264_t *);
  static float rate_estimate_qscale( x264_t *h );
  static int update_vbv( x264_t *h, int bits );
  static void update_vbv_plan( x264_t *h, int overhead );
-static double predict_size( predictor_t *p, double q, double var );
-static void update_predictor( predictor_t *p, double q, double var, double bits );
+static float predict_size( predictor_t *p, float q, float var );
+static void update_predictor( predictor_t *p, float q, float var, float bits );
  
  #define CMP_OPT_FIRST_PASS( opt, param_val )\
  {\
@@ -184,13 +183,13 @@ static void update_predictor( predictor_t *p, double q, double var, double bits
   * qp = h.264's quantizer
   * qscale = linearized quantizer = Lagrange multiplier
   */
-static inline double qp2qscale( double qp )
+static inline float qp2qscale( float qp )
  {
-    return 0.85 * pow( 2.0, ( qp - 12.0 ) / 6.0 );
+    return 0.85f * powf( 2.0f, ( qp - 12.0f ) / 6.0f );
  }
-static inline double qscale2qp( double qscale )
+static inline float qscale2qp( float qscale )
  {
-    return 12.0 + 6.0 * log2( qscale/0.85 );
+    return 12.0f + 6.0f * log2f( qscale/0.85f );
  }
  
  /* Texture bitrate is not quite inversely proportional to qscale,
@@ -206,32 +205,38 @@ static inline double qscale2bits( ratecontrol_entry_t *rce, double qscale )
             + rce->misc_bits;
  }
  
-static ALWAYS_INLINE uint32_t ac_energy_var( uint64_t sum_ssd, int shift, x264_frame_t *frame, int i )
+static ALWAYS_INLINE uint32_t ac_energy_var( uint64_t sum_ssd, int shift, x264_frame_t *frame, int i, int b_store )
  {
      uint32_t sum = sum_ssd;
      uint32_t ssd = sum_ssd >> 32;
-    frame->i_pixel_sum[i] += sum;
-    frame->i_pixel_ssd[i] += ssd;
+    if( b_store )
+    {
+        frame->i_pixel_sum[i] += sum;
+        frame->i_pixel_ssd[i] += ssd;
+    }
      return ssd - ((uint64_t)sum * sum >> shift);
  }
  
-static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame, int i )
+static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame, int i, int b_chroma, int b_field, int b_store )
  {
-    int w = i ? 8 : 16;
+    int height = b_chroma ? 16>>CHROMA_V_SHIFT : 16;
      int stride = frame->i_stride[i];
-    int offset = h->mb.b_interlaced
-        ? 16 * mb_x + w * (mb_y&~1) * stride + (mb_y&1) * stride
-        : 16 * mb_x + w * mb_y * stride;
-    stride <<= h->mb.b_interlaced;
-    if( i )
+    int offset = b_field
+        ? 16 * mb_x + height * (mb_y&~1) * stride + (mb_y&1) * stride
+        : 16 * mb_x + height * mb_y * stride;
+    stride <<= b_field;
+    if( b_chroma )
      {
-        ALIGNED_ARRAY_16( pixel, pix,[FENC_STRIDE*8] );
-        h->mc.load_deinterleave_8x8x2_fenc( pix, frame->plane[1] + offset, stride );
-        return ac_energy_var( h->pixf.var[PIXEL_8x8]( pix, FENC_STRIDE ), 6, frame, 1 )
-             + ac_energy_var( h->pixf.var[PIXEL_8x8]( pix+FENC_STRIDE/2, FENC_STRIDE ), 6, frame, 2 );
+        ALIGNED_ARRAY_16( pixel, pix,[FENC_STRIDE*16] );
+        int chromapix = h->luma2chroma_pixel[PIXEL_16x16];
+        int shift = 7 - CHROMA_V_SHIFT;
+
+        h->mc.load_deinterleave_chroma_fenc( pix, frame->plane[1] + offset, stride, height );
+        return ac_energy_var( h->pixf.var[chromapix]( pix,               FENC_STRIDE ), shift, frame, 1, b_store )
+             + ac_energy_var( h->pixf.var[chromapix]( pix+FENC_STRIDE/2, FENC_STRIDE ), shift, frame, 2, b_store );
      }
      else
-        return ac_energy_var( h->pixf.var[PIXEL_16x16]( frame->plane[0] + offset, stride ), 8, frame, 0 );
+        return ac_energy_var( h->pixf.var[PIXEL_16x16]( frame->plane[i] + offset, stride ), 8, frame, i, b_store );
  }
  
  // Find the total AC energy of the block in all planes.
@@ -241,8 +246,40 @@ static NOINLINE uint32_t x264_ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_
       * and putting it after floating point ops.  As a result, we put the emms at the end of the
       * function and make sure that its always called before the float math.  Noinline makes
       * sure no reordering goes on. */
-    uint32_t var = ac_energy_plane( h, mb_x, mb_y, frame, 0 );
-    var         += ac_energy_plane( h, mb_x, mb_y, frame, 1 );
+    uint32_t var;
+    x264_prefetch_fenc( h, frame, mb_x, mb_y );
+    if( h->mb.b_adaptive_mbaff )
+    {
+        /* We don't know the super-MB mode we're going to pick yet, so
+         * simply try both and pick the lower of the two. */
+        uint32_t var_interlaced, var_progressive;
+        var_interlaced   = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, 1, 1 );
+        var_progressive  = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, 0, 0 );
+        if( CHROMA444 )
+        {
+            var_interlaced  += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, 1, 1 );
+            var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, 0, 0 );
+            var_interlaced  += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, 1, 1 );
+            var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, 0, 0 );
+        }
+        else
+        {
+            var_interlaced  += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 1, 1 );
+            var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 0, 0 );
+        }
+        var = X264_MIN( var_interlaced, var_progressive );
+    }
+    else
+    {
+        var  = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, PARAM_INTERLACED, 1 );
+        if( CHROMA444 )
+        {
+            var += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, PARAM_INTERLACED, 1 );
+            var += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, PARAM_INTERLACED, 1 );
+        }
+        else
+            var += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, PARAM_INTERLACED, 1 );
+    }
      x264_emms();
      return var;
  }
@@ -346,8 +383,8 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
      {
          uint64_t ssd = frame->i_pixel_ssd[i];
          uint64_t sum = frame->i_pixel_sum[i];
-        int width = h->mb.i_mb_width*16>>!!i;
-        int height = h->mb.i_mb_height*16>>!!i;
+        int width  = 16*h->mb.i_mb_width  >> (i && CHROMA_H_SHIFT);
+        int height = 16*h->mb.i_mb_height >> (i && CHROMA_V_SHIFT);
          frame->i_pixel_ssd[i] = ssd - (sum * sum + width * height / 2) / (width * height);
      }
  }
@@ -373,7 +410,7 @@ int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame, float *quant_offs
  
                  if( i_type != i_type_actual && rc->qpbuf_pos == 1 )
                  {
-                    x264_log(h, X264_LOG_ERROR, "MB-tree frametype %d doesn't match actual frametype %d.\n", i_type, i_type_actual);
+                    x264_log( h, X264_LOG_ERROR, "MB-tree frametype %d doesn't match actual frametype %d.\n", i_type, i_type_actual );
                      return -1;
                  }
              } while( i_type != i_type_actual );
@@ -391,7 +428,7 @@ int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame, float *quant_offs
          x264_stack_align( x264_adaptive_quant_frame, h, frame, quant_offsets );
      return 0;
  fail:
-    x264_log(h, X264_LOG_ERROR, "Incomplete MB-tree stats file.\n");
+    x264_log( h, X264_LOG_ERROR, "Incomplete MB-tree stats file.\n" );
      return -1;
  }
  
@@ -402,22 +439,22 @@ int x264_reference_build_list_optimal( x264_t *h )
      x264_weight_t weights[16][3];
      int refcount[16];
  
-    if( rce->refs != h->i_ref0 )
+    if( rce->refs != h->i_ref[0] )
          return -1;
  
-    memcpy( frames, h->fref0, sizeof(frames) );
+    memcpy( frames, h->fref[0], sizeof(frames) );
      memcpy( refcount, rce->refcount, sizeof(refcount) );
      memcpy( weights, h->fenc->weight, sizeof(weights) );
      memset( &h->fenc->weight[1][0], 0, sizeof(x264_weight_t[15][3]) );
  
      /* For now don't reorder ref 0; it seems to lower quality
         in most cases due to skips. */
-    for( int ref = 1; ref < h->i_ref0; ref++ )
+    for( int ref = 1; ref < h->i_ref[0]; ref++ )
      {
          int max = -1;
          int bestref = 1;
  
-        for( int i = 1; i < h->i_ref0; i++ )
+        for( int i = 1; i < h->i_ref[0]; i++ )
              /* Favor lower POC as a tiebreaker. */
              COPY2_IF_GT( max, refcount[i], bestref, i );
  
@@ -425,7 +462,7 @@ int x264_reference_build_list_optimal( x264_t *h )
           * that the optimal ordering doesnt place every duplicate. */
  
          refcount[bestref] = -1;
-        h->fref0[ref] = frames[bestref];
+        h->fref[0][ref] = frames[bestref];
          memcpy( h->fenc->weight[ref], weights[bestref], sizeof(weights[bestref]) );
      }
  
@@ -460,6 +497,11 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
  
      if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 )
      {
+        /* We don't support changing the ABR bitrate right now,
+           so if the stream starts as CBR, keep it CBR. */
+        if( rc->b_vbv_min_rate )
+            h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
+
          if( h->param.rc.i_vbv_buffer_size < (int)(h->param.rc.i_vbv_max_bitrate / rc->fps) )
          {
              h->param.rc.i_vbv_buffer_size = h->param.rc.i_vbv_max_bitrate / rc->fps;
@@ -467,17 +509,10 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
                        h->param.rc.i_vbv_buffer_size );
          }
  
-        /* We don't support changing the ABR bitrate right now,
-           so if the stream starts as CBR, keep it CBR. */
-        if( rc->b_vbv_min_rate )
-            h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
-
          int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * 1000;
          int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * 1000;
  
          /* Init HRD */
-        h->sps->vui.hrd.i_bit_rate_unscaled = vbv_max_bitrate;
-        h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size;
          if( h->param.i_nal_hrd && b_init )
          {
              h->sps->vui.hrd.i_cpb_cnt = 1;
@@ -522,7 +557,11 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
              x264_log( h, X264_LOG_WARNING, "VBV parameters cannot be changed when NAL HRD is in use\n" );
              return;
          }
+        h->sps->vui.hrd.i_bit_rate_unscaled = vbv_max_bitrate;
+        h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size;
  
+        if( rc->b_vbv_min_rate )
+            rc->bitrate = h->param.rc.i_bitrate * 1000.;
          rc->buffer_rate = vbv_max_bitrate / rc->fps;
          rc->vbv_max_rate = vbv_max_bitrate;
          rc->buffer_size = vbv_buffer_size;
@@ -586,7 +625,7 @@ int x264_ratecontrol_new( x264_t *h )
  
      if( h->param.rc.i_rc_method == X264_RC_CRF && h->param.rc.b_stat_read )
      {
-        x264_log(h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n");
+        x264_log( h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n" );
          return -1;
      }
  
@@ -611,7 +650,7 @@ int x264_ratecontrol_new( x264_t *h )
  
      if( rc->rate_tolerance < 0.01 )
      {
-        x264_log(h, X264_LOG_WARNING, "bitrate tolerance too small, using .01\n");
+        x264_log( h, X264_LOG_WARNING, "bitrate tolerance too small, using .01\n" );
          rc->rate_tolerance = 0.01;
      }
  
@@ -679,7 +718,7 @@ int x264_ratecontrol_new( x264_t *h )
          stats_buf = stats_in = x264_slurp_file( h->param.rc.psz_stat_in );
          if( !stats_buf )
          {
-            x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
+            x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n" );
              return -1;
          }
          if( h->param.rc.b_mb_tree )
@@ -691,13 +730,19 @@ int x264_ratecontrol_new( x264_t *h )
              x264_free( mbtree_stats_in );
              if( !rc->p_mbtree_stat_file_in )
              {
-                x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n");
+                x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n" );
                  return -1;
              }
          }
  
          /* check whether 1st pass options were compatible with current options */
-        if( !strncmp( stats_buf, "#options:", 9 ) )
+        if( strncmp( stats_buf, "#options:", 9 ) )
+        {
+            x264_log( h, X264_LOG_ERROR, "options list in stats file not valid\n" );
+            return -1;
+        }
+
+        float res_factor, res_factor_bits;
          {
              int i, j;
              uint32_t k, l;
@@ -718,6 +763,10 @@ int x264_ratecontrol_new( x264_t *h )
                            h->param.i_width, h->param.i_height, i, j );
                  return -1;
              }
+            res_factor = (float)h->param.i_width * h->param.i_height / (i*j);
+            /* Change in bits relative to resolution isn't quite linear on typical sources,
+             * so we'll at least try to roughly approximate this effect. */
+            res_factor_bits = powf( res_factor, 0.7 );
  
              if( ( p = strstr( opts, "timebase=" ) ) && sscanf( p, "timebase=%u/%u", &k, &l ) != 2 )
              {
@@ -736,7 +785,20 @@ int x264_ratecontrol_new( x264_t *h )
              CMP_OPT_FIRST_PASS( "bframes", h->param.i_bframe );
              CMP_OPT_FIRST_PASS( "b_pyramid", h->param.i_bframe_pyramid );
              CMP_OPT_FIRST_PASS( "intra_refresh", h->param.b_intra_refresh );
-            CMP_OPT_FIRST_PASS( "open_gop", h->param.i_open_gop );
+            CMP_OPT_FIRST_PASS( "open_gop", h->param.b_open_gop );
+            CMP_OPT_FIRST_PASS( "bluray_compat", h->param.b_bluray_compat );
+
+            if( (p = strstr( opts, "interlaced=" )) )
+            {
+                char *current = h->param.b_interlaced ? h->param.b_tff ? "tff" : "bff" : h->param.b_fake_interlaced ? "fake" : "0";
+                char buf[5];
+                sscanf( p, "interlaced=%4s", buf );
+                if( strcmp( current, buf ) )
+                {
+                    x264_log( h, X264_LOG_ERROR, "different interlaced setting than first pass (%s vs %s)\n", current, buf );
+                    return -1;
+                }
+            }
  
              if( (p = strstr( opts, "keyint=" )) )
              {
@@ -780,7 +842,7 @@ int x264_ratecontrol_new( x264_t *h )
              p = strchr( p + 1, ';' );
          if( !num_entries )
          {
-            x264_log(h, X264_LOG_ERROR, "empty stats file\n");
+            x264_log( h, X264_LOG_ERROR, "empty stats file\n" );
              return -1;
          }
          rc->num_entries = num_entries;
@@ -834,10 +896,16 @@ int x264_ratecontrol_new( x264_t *h )
              rce = &rc->entry[frame_number];
              rce->direct_mode = 0;
  
-            e += sscanf( p, " in:%*d out:%*d type:%c dur:%d cpbdur:%d q:%f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c",
+            e += sscanf( p, " in:%*d out:%*d type:%c dur:%"SCNd64" cpbdur:%"SCNd64" q:%f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c",
                     &pict_type, &rce->i_duration, &rce->i_cpb_duration, &qp, &rce->tex_bits,
                     &rce->mv_bits, &rce->misc_bits, &rce->i_count, &rce->p_count,
                     &rce->s_count, &rce->direct_mode );
+            rce->tex_bits  *= res_factor_bits;
+            rce->mv_bits   *= res_factor_bits;
+            rce->misc_bits *= res_factor_bits;
+            rce->i_count   *= res_factor;
+            rce->p_count   *= res_factor;
+            rce->s_count   *= res_factor;
  
              p = strstr( p, "ref:" );
              if( !p )
@@ -926,7 +994,7 @@ parse_error:
          rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
          if( rc->p_stat_file_out == NULL )
          {
-            x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
+            x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n" );
              return -1;
          }
  
@@ -944,7 +1012,7 @@ parse_error:
              rc->p_mbtree_stat_file_out = fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
              if( rc->p_mbtree_stat_file_out == NULL )
              {
-                x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n");
+                x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n" );
                  return -1;
              }
          }
@@ -966,6 +1034,7 @@ parse_error:
              rc[i] = rc[0];
              h->thread[i]->param = h->param;
              h->thread[i]->mb.b_variable_qp = h->mb.b_variable_qp;
+            h->thread[i]->mb.ip_offset = h->mb.ip_offset;
          }
      }
  
@@ -980,11 +1049,11 @@ static int parse_zone( x264_t *h, x264_zone_t *z, char *p )
      char *tok, UNUSED *saveptr=NULL;
      z->param = NULL;
      z->f_bitrate_factor = 1;
-    if( 3 <= sscanf(p, "%u,%u,q=%u%n", &z->i_start, &z->i_end, &z->i_qp, &len) )
+    if( 3 <= sscanf(p, "%d,%d,q=%d%n", &z->i_start, &z->i_end, &z->i_qp, &len) )
          z->b_force_qp = 1;
-    else if( 3 <= sscanf(p, "%u,%u,b=%f%n", &z->i_start, &z->i_end, &z->f_bitrate_factor, &len) )
+    else if( 3 <= sscanf(p, "%d,%d,b=%f%n", &z->i_start, &z->i_end, &z->f_bitrate_factor, &len) )
          z->b_force_qp = 0;
-    else if( 2 <= sscanf(p, "%u,%u%n", &z->i_start, &z->i_end, &len) )
+    else if( 2 <= sscanf(p, "%d,%d%n", &z->i_start, &z->i_end, &len) )
          z->b_force_qp = 0;
      else
      {
@@ -1198,6 +1267,8 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
      if( rc->b_vbv )
      {
          memset( h->fdec->i_row_bits, 0, h->mb.i_mb_height * sizeof(int) );
+        memset( h->fdec->f_row_qp, 0, h->mb.i_mb_height * sizeof(float) );
+        memset( h->fdec->f_row_qscale, 0, h->mb.i_mb_height * sizeof(float) );
          rc->row_pred = &rc->row_preds[h->sh.i_type];
          rc->buffer_rate = h->fenc->i_cpb_duration * rc->vbv_max_rate * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
          update_vbv_plan( h, overhead );
@@ -1208,12 +1279,11 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
  
          int mincr = l->mincr;
  
-        /* Blu-ray requires this */
-        if( l->level_idc == 41 && h->param.i_nal_hrd )
+        if( h->param.b_bluray_compat )
              mincr = 4;
  
-        /* High 10 doesn't require minCR, so just set the maximum to a large value. */
-        if( h->sps->i_profile_idc == PROFILE_HIGH10 )
+        /* Profiles above High don't require minCR, so just set the maximum to a large value. */
+        if( h->sps->i_profile_idc > PROFILE_HIGH )
              rc->frame_size_maximum = 1e9;
          else
          {
@@ -1236,11 +1306,7 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
      if( h->sh.i_type != SLICE_TYPE_B )
          rc->bframes = h->fenc->i_bframes;
  
-    if( i_force_qp != X264_QP_AUTO )
-    {
-        q = i_force_qp - 1;
-    }
-    else if( rc->b_abr )
+    if( rc->b_abr )
      {
          q = qscale2qp( rate_estimate_qscale( h ) );
      }
@@ -1264,12 +1330,14 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
                  q -= 6*log2f( zone->f_bitrate_factor );
          }
      }
+    if( i_force_qp != X264_QP_AUTO )
+        q = i_force_qp - 1;
  
      q = x264_clip3f( q, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
  
      rc->qpa_rc =
      rc->qpa_aq = 0;
-    rc->qp = x264_clip3( (int)(q + 0.5), 0, QP_MAX );
+    rc->qp = x264_clip3( q + 0.5f, 0, QP_MAX );
      h->fdec->f_qp_avg_rc =
      h->fdec->f_qp_avg_aq =
      rc->qpm = q;
@@ -1282,72 +1350,84 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
          rc->last_non_b_pict_type = h->sh.i_type;
  }
  
-static double predict_row_size( x264_t *h, int y, double qp )
+static float predict_row_size( x264_t *h, int y, float qscale )
  {
      /* average between two predictors:
       * absolute SATD, and scaled bit cost of the colocated row in the previous frame */
      x264_ratecontrol_t *rc = h->rc;
-    double pred_s = predict_size( rc->row_pred[0], qp2qscale( qp ), h->fdec->i_row_satd[y] );
-    double pred_t = 0;
-    if( h->sh.i_type == SLICE_TYPE_I || qp >= h->fref0[0]->f_row_qp[y] )
+    float pred_s = predict_size( rc->row_pred[0], qscale, h->fdec->i_row_satd[y] );
+    if( h->sh.i_type == SLICE_TYPE_I || qscale >= h->fref[0][0]->f_row_qscale[y] )
      {
          if( h->sh.i_type == SLICE_TYPE_P
-            && h->fref0[0]->i_type == h->fdec->i_type
-            && h->fref0[0]->i_row_satd[y] > 0
-            && (abs(h->fref0[0]->i_row_satd[y] - h->fdec->i_row_satd[y]) < h->fdec->i_row_satd[y]/2))
+            && h->fref[0][0]->i_type == h->fdec->i_type
+            && h->fref[0][0]->f_row_qscale[y] > 0
+            && h->fref[0][0]->i_row_satd[y] > 0
+            && (abs(h->fref[0][0]->i_row_satd[y] - h->fdec->i_row_satd[y]) < h->fdec->i_row_satd[y]/2))
          {
-            pred_t = h->fref0[0]->i_row_bits[y] * h->fdec->i_row_satd[y] / h->fref0[0]->i_row_satd[y]
-                     * qp2qscale( h->fref0[0]->f_row_qp[y] ) / qp2qscale( qp );
+            float pred_t = h->fref[0][0]->i_row_bits[y] * h->fdec->i_row_satd[y] / h->fref[0][0]->i_row_satd[y]
+                         * h->fref[0][0]->f_row_qscale[y] / qscale;
+            return (pred_s + pred_t) * 0.5f;
          }
-        if( pred_t == 0 )
-            pred_t = pred_s;
-        return (pred_s + pred_t) / 2;
+        return pred_s;
      }
      /* Our QP is lower than the reference! */
      else
      {
-        double pred_intra = predict_size( rc->row_pred[1], qp2qscale( qp ), h->fdec->i_row_satds[0][0][y] );
+        float pred_intra = predict_size( rc->row_pred[1], qscale, h->fdec->i_row_satds[0][0][y] );
          /* Sum: better to overestimate than underestimate by using only one of the two predictors. */
          return pred_intra + pred_s;
      }
  }
  
-static double row_bits_so_far( x264_t *h, int y )
+static int row_bits_so_far( x264_t *h, int y )
  {
-    double bits = 0;
+    int bits = 0;
      for( int i = h->i_threadslice_start; i <= y; i++ )
          bits += h->fdec->i_row_bits[i];
      return bits;
  }
  
-static double predict_row_size_sum( x264_t *h, int y, double qp )
+static float predict_row_size_sum( x264_t *h, int y, float qp )
  {
-    double bits = row_bits_so_far(h, y);
+    float qscale = qp2qscale( qp );
+    float bits = row_bits_so_far( h, y );
      for( int i = y+1; i < h->i_threadslice_end; i++ )
-        bits += predict_row_size( h, i, qp );
+        bits += predict_row_size( h, i, qscale );
      return bits;
  }
  
-
+/* TODO:
+ *  eliminate all use of qp in row ratecontrol: make it entirely qscale-based.
+ *  make this function stop being needlessly O(N^2)
+ *  update more often than once per row? */
  void x264_ratecontrol_mb( x264_t *h, int bits )
  {
      x264_ratecontrol_t *rc = h->rc;
      const int y = h->mb.i_mb_y;
  
-    x264_emms();
-
      h->fdec->i_row_bits[y] += bits;
-    rc->qpa_rc += rc->qpm;
      rc->qpa_aq += h->mb.i_qp;
  
-    if( h->mb.i_mb_x != h->mb.i_mb_width - 1 || !rc->b_vbv )
+    if( h->mb.i_mb_x != h->mb.i_mb_width - 1 )
+        return;
+
+    x264_emms();
+    rc->qpa_rc += rc->qpm * h->mb.i_mb_width;
+
+    if( !rc->b_vbv )
          return;
  
+    float qscale = qp2qscale( rc->qpm );
      h->fdec->f_row_qp[y] = rc->qpm;
+    h->fdec->f_row_qscale[y] = qscale;
  
-    update_predictor( rc->row_pred[0], qp2qscale( rc->qpm ), h->fdec->i_row_satd[y], h->fdec->i_row_bits[y] );
-    if( h->sh.i_type == SLICE_TYPE_P && rc->qpm < h->fref0[0]->f_row_qp[y] )
-        update_predictor( rc->row_pred[1], qp2qscale( rc->qpm ), h->fdec->i_row_satds[0][0][y], h->fdec->i_row_bits[y] );
+    update_predictor( rc->row_pred[0], qscale, h->fdec->i_row_satd[y], h->fdec->i_row_bits[y] );
+    if( h->sh.i_type == SLICE_TYPE_P && rc->qpm < h->fref[0][0]->f_row_qp[y] )
+        update_predictor( rc->row_pred[1], qscale, h->fdec->i_row_satds[0][0][y], h->fdec->i_row_bits[y] );
+
+    /* update ratecontrol per-mbpair in MBAFF */
+    if( SLICE_MBAFF && !(y&1) )
+        return;
  
      /* tweak quality based on difference from predicted size */
      if( y < h->i_threadslice_end-1 )
@@ -1358,18 +1438,18 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
          if( rc->rate_factor_max_increment )
              qp_absolute_max = X264_MIN( qp_absolute_max, rc->qp_novbv + rc->rate_factor_max_increment );
          float qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, qp_absolute_max );
-        float step_size = 0.5;
+        float step_size = 0.5f;
  
          /* B-frames shouldn't use lower QP than their reference frames. */
          if( h->sh.i_type == SLICE_TYPE_B )
          {
-            qp_min = X264_MAX( qp_min, X264_MAX( h->fref0[0]->f_row_qp[y+1], h->fref1[0]->f_row_qp[y+1] ) );
+            qp_min = X264_MAX( qp_min, X264_MAX( h->fref[0][0]->f_row_qp[y+1], h->fref[1][0]->f_row_qp[y+1] ) );
              rc->qpm = X264_MAX( rc->qpm, qp_min );
          }
  
          float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
          float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned;
-        float max_frame_error = X264_MAX( 0.05, 1.0 / (h->mb.i_mb_height) );
+        float max_frame_error = X264_MAX( 0.05f, 1.0f / h->mb.i_mb_height );
          float size_of_other_slices = 0;
          if( h->param.b_sliced_threads )
          {
@@ -1386,22 +1466,22 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
  
          /* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */
          float rc_tol = buffer_left_planned / h->param.i_threads * rc->rate_tolerance;
-        int b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
+        float b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
  
          /* Don't modify the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */
          /* area at the top of the frame was measured inaccurately. */
-        if( row_bits_so_far( h, y ) < 0.05 * slice_size_planned )
+        if( row_bits_so_far( h, y ) < 0.05f * slice_size_planned )
              return;
  
          if( h->sh.i_type != SLICE_TYPE_I )
-            rc_tol /= 2;
+            rc_tol *= 0.5f;
  
          if( !rc->b_vbv_min_rate )
              qp_min = X264_MAX( qp_min, rc->qp_novbv );
  
          while( rc->qpm < qp_max
                 && ((b1 > rc->frame_size_planned + rc_tol) ||
-                   (rc->buffer_fill - b1 < buffer_left_planned * 0.5) ||
+                   (rc->buffer_fill - b1 < buffer_left_planned * 0.5f) ||
                     (b1 > rc->frame_size_planned && rc->qpm < rc->qp_novbv)) )
          {
              rc->qpm += step_size;
@@ -1410,8 +1490,8 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
  
          while( rc->qpm > qp_min
                 && (rc->qpm > h->fdec->f_row_qp[0] || rc->single_frame_vbv)
-               && ((b1 < rc->frame_size_planned * 0.8 && rc->qpm <= prev_row_qp)
-               || b1 < (rc->buffer_fill - rc->buffer_size + rc->buffer_rate) * 1.1) )
+               && ((b1 < rc->frame_size_planned * 0.8f && rc->qpm <= prev_row_qp)
+               || b1 < (rc->buffer_fill - rc->buffer_size + rc->buffer_rate) * 1.1f) )
          {
              rc->qpm -= step_size;
              b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
@@ -1426,14 +1506,16 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
              b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
          }
  
-        h->rc->frame_size_estimated = predict_row_size_sum( h, y, rc->qpm );
+        h->rc->frame_size_estimated = b1 - size_of_other_slices;
      }
+    else
+        h->rc->frame_size_estimated = predict_row_size_sum( h, y, rc->qpm );
  }
  
  int x264_ratecontrol_qp( x264_t *h )
  {
      x264_emms();
-    return x264_clip3( h->rc->qpm + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
+    return x264_clip3( h->rc->qpm + 0.5f, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
  }
  
  int x264_ratecontrol_mb_qp( x264_t *h )
@@ -1441,9 +1523,15 @@ int x264_ratecontrol_mb_qp( x264_t *h )
      x264_emms();
      float qp = h->rc->qpm;
      if( h->param.rc.i_aq_mode )
-        /* MB-tree currently doesn't adjust quantizers in unreferenced frames. */
-        qp += h->fdec->b_kept_as_ref ? h->fenc->f_qp_offset[h->mb.i_mb_xy] : h->fenc->f_qp_offset_aq[h->mb.i_mb_xy];
-    return x264_clip3( qp + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
+    {
+         /* MB-tree currently doesn't adjust quantizers in unreferenced frames. */
+        float qp_offset = h->fdec->b_kept_as_ref ? h->fenc->f_qp_offset[h->mb.i_mb_xy] : h->fenc->f_qp_offset_aq[h->mb.i_mb_xy];
+        /* Scale AQ's effect towards zero in emergency mode. */
+        if( qp > QP_MAX_SPEC )
+            qp_offset *= (QP_MAX - qp) / (QP_MAX - QP_MAX_SPEC);
+        qp += qp_offset;
+    }
+    return x264_clip3( qp + 0.5f, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
  }
  
  /* In 2pass, force the same frame types as in the 1st pass */
@@ -1463,10 +1551,10 @@ int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
              rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, QP_MAX );
              rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, QP_MAX );
  
-            x264_log(h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries);
-            x264_log(h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant);
+            x264_log( h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries );
+            x264_log( h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant );
              if( h->param.i_bframe_adaptive )
-                x264_log(h, X264_LOG_ERROR, "disabling adaptive B-frames\n");
+                x264_log( h, X264_LOG_ERROR, "disabling adaptive B-frames\n" );
  
              for( int i = 0; i < h->param.i_threads; i++ )
              {
@@ -1519,7 +1607,7 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
          h->stat.frame.i_mb_count_p += mbs[i];
  
      h->fdec->f_qp_avg_rc = rc->qpa_rc /= h->mb.i_mb_count;
-    h->fdec->f_qp_avg_aq = rc->qpa_aq /= h->mb.i_mb_count;
+    h->fdec->f_qp_avg_aq = (float)rc->qpa_aq / h->mb.i_mb_count;
  
      if( h->param.rc.b_stat_write )
      {
@@ -1533,7 +1621,7 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
                            dir_avg>0 ? 's' : dir_avg<0 ? 't' : '-' )
                          : '-';
          if( fprintf( rc->p_stat_file_out,
-                 "in:%d out:%d type:%c dur:%d cpbdur:%d q:%.2f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c ref:",
+                 "in:%d out:%d type:%c dur:%"PRId64" cpbdur:%"PRId64" q:%.2f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c ref:",
                   h->fenc->i_frame, h->i_frame,
                   c_type, h->fenc->i_duration,
                   h->fenc->i_cpb_duration, rc->qpa_rc,
@@ -1548,10 +1636,10 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
  
          /* Only write information for reference reordering once. */
          int use_old_stats = h->param.rc.b_stat_read && rc->rce->refs > 1;
-        for( int i = 0; i < (use_old_stats ? rc->rce->refs : h->i_ref0); i++ )
+        for( int i = 0; i < (use_old_stats ? rc->rce->refs : h->i_ref[0]); i++ )
          {
              int refcount = use_old_stats         ? rc->rce->refcount[i]
-                         : h->param.b_interlaced ? h->stat.frame.i_mb_count_ref[0][i*2]
+                         : PARAM_INTERLACED      ? h->stat.frame.i_mb_count_ref[0][i*2]
                                                   + h->stat.frame.i_mb_count_ref[0][i*2+1]
                           :                         h->stat.frame.i_mb_count_ref[0][i];
              if( fprintf( rc->p_stat_file_out, "%d ", refcount ) < 0 )
@@ -1565,12 +1653,12 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
                  goto fail;
              if( h->sh.weight[0][1].weightfn || h->sh.weight[0][2].weightfn )
              {
-                if( fprintf( rc->p_stat_file_out, ",%d,%d,%d,%d,%d\n",
+                if( fprintf( rc->p_stat_file_out, ",%d,%d,%d,%d,%d ",
                               h->sh.weight[0][1].i_denom, h->sh.weight[0][1].i_scale, h->sh.weight[0][1].i_offset,
                               h->sh.weight[0][2].i_scale, h->sh.weight[0][2].i_offset ) < 0 )
                      goto fail;
              }
-            else if( fprintf( rc->p_stat_file_out, "\n" ) < 0 )
+            else if( fprintf( rc->p_stat_file_out, " " ) < 0 )
                  goto fail;
          }
  
@@ -1602,9 +1690,7 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
              rc->cplxr_sum += bits * qp2qscale( rc->qpa_rc ) / (rc->last_rceq * fabs( h->param.rc.f_pb_factor ));
          }
          rc->cplxr_sum *= rc->cbr_decay;
-        double frame_duration = (double)h->fenc->i_duration * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
-
-        rc->wanted_bits_window += frame_duration * rc->bitrate;
+        rc->wanted_bits_window += h->fenc->f_duration * rc->bitrate;
          rc->wanted_bits_window *= rc->cbr_decay;
      }
  
@@ -1619,7 +1705,7 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
              if( h->fenc->b_last_minigop_bframe )
              {
                  update_predictor( rc->pred_b_from_p, qp2qscale( rc->qpa_rc ),
-                                  h->fref1[h->i_ref1-1]->i_satd, rc->bframe_bits / rc->bframes );
+                                  h->fref[1][h->i_ref[1]-1]->i_satd, rc->bframe_bits / rc->bframes );
                  rc->bframe_bits = 0;
              }
          }
@@ -1669,7 +1755,7 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
  
      return 0;
  fail:
-    x264_log(h, X264_LOG_ERROR, "ratecontrol_end: stats file could not be written to\n");
+    x264_log( h, X264_LOG_ERROR, "ratecontrol_end: stats file could not be written to\n" );
      return -1;
  }
  
@@ -1684,7 +1770,14 @@ static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor
  {
      x264_ratecontrol_t *rcc= h->rc;
      x264_zone_t *zone = get_zone( h, frame_num );
-    double q = pow( rce->blurred_complexity, 1 - rcc->qcompress );
+    double q;
+    if( h->param.rc.b_mb_tree )
+    {
+        double timescale = (double)h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
+        q = pow( BASE_FRAME_DURATION / CLIP_DURATION(rce->i_duration * timescale), 1 - h->param.rc.f_qcompress );
+    }
+    else
+        q = pow( rce->blurred_complexity, 1 - rcc->qcompress );
  
      // avoid NaN's in the rc_eq
      if( !isfinite(q) || rce->tex_bits + rce->mv_bits == 0 )
@@ -1707,10 +1800,11 @@ static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor
      return q;
  }
  
-static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
+static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q, int frame_num)
  {
      x264_ratecontrol_t *rcc = h->rc;
      const int pict_type = rce->pict_type;
+    x264_zone_t *zone = get_zone( h, frame_num );
  
      // force I/B quants as a function of P quants
      const double last_p_q    = rcc->last_qscale_for[SLICE_TYPE_P];
@@ -1771,23 +1865,32 @@ static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
          rcc->accum_p_qp   = mask * (qscale2qp( q ) + rcc->accum_p_qp);
          rcc->accum_p_norm = mask * (1 + rcc->accum_p_norm);
      }
+
+    if( zone )
+    {
+        if( zone->b_force_qp )
+            q = qp2qscale( zone->i_qp );
+        else
+            q /= zone->f_bitrate_factor;
+    }
+
      return q;
  }
  
-static double predict_size( predictor_t *p, double q, double var )
+static float predict_size( predictor_t *p, float q, float var )
  {
-     return (p->coeff*var + p->offset) / (q*p->count);
+    return (p->coeff*var + p->offset) / (q*p->count);
  }
  
-static void update_predictor( predictor_t *p, double q, double var, double bits )
+static void update_predictor( predictor_t *p, float q, float var, float bits )
  {
-    const double range = 1.5;
+    float range = 1.5;
      if( var < 10 )
          return;
-    double old_coeff = p->coeff / p->count;
-    double new_coeff = bits*q / var;
-    double new_coeff_clipped = x264_clip3f( new_coeff, old_coeff/range, old_coeff*range );
-    double new_offset = bits*q - new_coeff_clipped * var;
+    float old_coeff = p->coeff / p->count;
+    float new_coeff = bits*q / var;
+    float new_coeff_clipped = x264_clip3f( new_coeff, old_coeff/range, old_coeff*range );
+    float new_offset = bits*q - new_coeff_clipped * var;
      if( new_offset >= 0 )
          new_coeff = new_coeff_clipped;
      else
@@ -1824,7 +1927,8 @@ static int update_vbv( x264_t *h, int bits )
  
      if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > buffer_size )
      {
-        filler = ceil( (rct->buffer_fill_final - buffer_size) / (8. * h->sps->vui.i_time_scale) );
+        int64_t scale = (int64_t)h->sps->vui.i_time_scale * 8;
+        filler = (rct->buffer_fill_final - buffer_size + scale - 1) / scale;
          bits = X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
          rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
      }
@@ -1866,7 +1970,7 @@ static void update_vbv_plan( x264_t *h, int overhead )
              double bits = t->rc->frame_size_planned;
              if( !t->b_thread_active )
                  continue;
-            bits  = X264_MAX(bits, t->rc->frame_size_estimated);
+            bits = X264_MAX(bits, t->rc->frame_size_estimated);
              rcc->buffer_fill -= bits;
              rcc->buffer_fill = X264_MAX( rcc->buffer_fill, 0 );
              rcc->buffer_fill += t->rc->buffer_rate;
@@ -2027,7 +2131,7 @@ static float rate_estimate_qscale( x264_t *h )
  {
      float q;
      x264_ratecontrol_t *rcc = h->rc;
-    ratecontrol_entry_t rce;
+    ratecontrol_entry_t UNINIT(rce);
      int pict_type = h->sh.i_type;
      int64_t total_bits = 8*(h->stat.i_frame_size[SLICE_TYPE_I]
                            + h->stat.i_frame_size[SLICE_TYPE_P]
@@ -2049,16 +2153,16 @@ static float rate_estimate_qscale( x264_t *h )
          /* B-frames don't have independent ratecontrol, but rather get the
           * average QP of the two adjacent P-frames + an offset */
  
-        int i0 = IS_X264_TYPE_I(h->fref0[0]->i_type);
-        int i1 = IS_X264_TYPE_I(h->fref1[0]->i_type);
-        int dt0 = abs(h->fenc->i_poc - h->fref0[0]->i_poc);
-        int dt1 = abs(h->fenc->i_poc - h->fref1[0]->i_poc);
-        float q0 = h->fref0[0]->f_qp_avg_rc;
-        float q1 = h->fref1[0]->f_qp_avg_rc;
+        int i0 = IS_X264_TYPE_I(h->fref_nearest[0]->i_type);
+        int i1 = IS_X264_TYPE_I(h->fref_nearest[1]->i_type);
+        int dt0 = abs(h->fenc->i_poc - h->fref_nearest[0]->i_poc);
+        int dt1 = abs(h->fenc->i_poc - h->fref_nearest[1]->i_poc);
+        float q0 = h->fref_nearest[0]->f_qp_avg_rc;
+        float q1 = h->fref_nearest[1]->f_qp_avg_rc;
  
-        if( h->fref0[0]->i_type == X264_TYPE_BREF )
+        if( h->fref_nearest[0]->i_type == X264_TYPE_BREF )
              q0 -= rcc->pb_offset/2;
-        if( h->fref1[0]->i_type == X264_TYPE_BREF )
+        if( h->fref_nearest[1]->i_type == X264_TYPE_BREF )
              q1 -= rcc->pb_offset/2;
  
          if( i0 && i1 )
@@ -2078,7 +2182,10 @@ static float rate_estimate_qscale( x264_t *h )
          if( rcc->b_2pass && rcc->b_vbv )
              rcc->frame_size_planned = qscale2bits( &rce, qp2qscale( q ) );
          else
-            rcc->frame_size_planned = predict_size( rcc->pred_b_from_p, qp2qscale( q ), h->fref1[h->i_ref1-1]->i_satd );
+            rcc->frame_size_planned = predict_size( rcc->pred_b_from_p, qp2qscale( q ), h->fref[1][h->i_ref[1]-1]->i_satd );
+        /* Limit planned size by MinCR */
+        if( rcc->b_vbv )
+            rcc->frame_size_planned = X264_MIN( rcc->frame_size_planned, rcc->frame_size_maximum );
          h->rc->frame_size_estimated = rcc->frame_size_planned;
  
          /* For row SATDs */
@@ -2109,7 +2216,7 @@ static float rate_estimate_qscale( x264_t *h )
                          double bits = t->rc->frame_size_planned;
                          if( !t->b_thread_active )
                              continue;
-                        bits  = X264_MAX(bits, t->rc->frame_size_estimated);
+                        bits = X264_MAX(bits, t->rc->frame_size_estimated);
                          predicted_bits += (int64_t)bits;
                      }
                  }
@@ -2183,7 +2290,7 @@ static float rate_estimate_qscale( x264_t *h )
              rcc->last_satd = x264_rc_analyse_slice( h );
              rcc->short_term_cplxsum *= 0.5;
              rcc->short_term_cplxcount *= 0.5;
-            rcc->short_term_cplxsum += rcc->last_satd;
+            rcc->short_term_cplxsum += rcc->last_satd / (CLIP_DURATION(h->fenc->f_duration) / BASE_FRAME_DURATION);
              rcc->short_term_cplxcount ++;
  
              rce.tex_bits = rcc->last_satd;
@@ -2194,6 +2301,7 @@ static float rate_estimate_qscale( x264_t *h )
              rce.s_count = 0;
              rce.qscale = 1;
              rce.pict_type = pict_type;
+            rce.i_duration = h->fenc->i_duration;
  
              if( h->param.rc.i_rc_method == X264_RC_CRF )
              {
@@ -2269,12 +2377,15 @@ static float rate_estimate_qscale( x264_t *h )
          /* Always use up the whole VBV in this case. */
          if( rcc->single_frame_vbv )
              rcc->frame_size_planned = rcc->buffer_rate;
+        /* Limit planned size by MinCR */
+        if( rcc->b_vbv )
+            rcc->frame_size_planned = X264_MIN( rcc->frame_size_planned, rcc->frame_size_maximum );
          h->rc->frame_size_estimated = rcc->frame_size_planned;
          return q;
      }
  }
  
-void x264_threads_normalize_predictors( x264_t *h )
+static void x264_threads_normalize_predictors( x264_t *h )
  {
      double totalsize = 0;
      for( int i = 0; i < h->param.i_threads; i++ )
@@ -2379,13 +2490,14 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
          COPY(prev_zone);
          COPY(qpbuf_pos);
          /* these vars can be updated by x264_ratecontrol_init_reconfigurable */
-        COPY(buffer_rate);
+        COPY(bitrate);
          COPY(buffer_size);
+        COPY(buffer_rate);
+        COPY(vbv_max_rate);
          COPY(single_frame_vbv);
          COPY(cbr_decay);
-        COPY(b_vbv_min_rate);
          COPY(rate_factor_constant);
-        COPY(bitrate);
+        COPY(rate_factor_max_increment);
  #undef COPY
      }
      if( cur != next )
@@ -2540,10 +2652,11 @@ static int init_pass2( x264_t *h )
  {
      x264_ratecontrol_t *rcc = h->rc;
      uint64_t all_const_bits = 0;
+    double timescale = (double)h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
      double duration = 0;
      for( int i = 0; i < rcc->num_entries; i++ )
          duration += rcc->entry[i].i_duration;
-    duration *= (double)h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
+    duration *= timescale;
      uint64_t all_available_bits = h->param.rc.i_bitrate * 1000. * duration;
      double rate_factor, step_mult;
      double qblur = h->param.rc.f_qblur;
@@ -2582,21 +2695,23 @@ static int init_pass2( x264_t *h )
          for( int j = 1; j < cplxblur*2 && j < rcc->num_entries-i; j++ )
          {
              ratecontrol_entry_t *rcj = &rcc->entry[i+j];
+            double frame_duration = CLIP_DURATION(rcj->i_duration * timescale) / BASE_FRAME_DURATION;
              weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
              if( weight < .0001 )
                  break;
              gaussian_weight = weight * exp( -j*j/200.0 );
              weight_sum += gaussian_weight;
-            cplx_sum += gaussian_weight * (qscale2bits(rcj, 1) - rcj->misc_bits);
+            cplx_sum += gaussian_weight * (qscale2bits( rcj, 1 ) - rcj->misc_bits) / frame_duration;
          }
          /* weighted average of cplx of past frames */
          weight = 1.0;
          for( int j = 0; j <= cplxblur*2 && j <= i; j++ )
          {
              ratecontrol_entry_t *rcj = &rcc->entry[i-j];
+            double frame_duration = CLIP_DURATION(rcj->i_duration * timescale) / BASE_FRAME_DURATION;
              gaussian_weight = weight * exp( -j*j/200.0 );
              weight_sum += gaussian_weight;
-            cplx_sum += gaussian_weight * (qscale2bits( rcj, 1 ) - rcj->misc_bits);
+            cplx_sum += gaussian_weight * (qscale2bits( rcj, 1 ) - rcj->misc_bits) / frame_duration;
              weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
              if( weight < .0001 )
                  break;
@@ -2643,14 +2758,14 @@ static int init_pass2( x264_t *h )
          /* find qscale */
          for( int i = 0; i < rcc->num_entries; i++ )
          {
-            qscale[i] = get_qscale( h, &rcc->entry[i], rate_factor, i );
+            qscale[i] = get_qscale( h, &rcc->entry[i], rate_factor, -1 );
              rcc->last_qscale_for[rcc->entry[i].pict_type] = qscale[i];
          }
  
          /* fixed I/B qscale relative to P */
          for( int i = rcc->num_entries-1; i >= 0; i-- )
          {
-            qscale[i] = get_diff_limited_q( h, &rcc->entry[i], qscale[i] );
+            qscale[i] = get_diff_limited_q( h, &rcc->entry[i], qscale[i], i );
              assert(qscale[i] >= 0);
          }