Fix crash with VBV + forced QP

[x264] / encoder / ratecontrol.c
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c

index f1d6260dadf15a5af4d2a1b827192dec1802cf0e..43763c3db9554e2fcde283a1f68bf055158bdcde 100644 (file)
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -1,7 +1,7 @@
  /*****************************************************************************
   * ratecontrol.c: ratecontrol
   *****************************************************************************
- * Copyright (C) 2005-2010 x264 project
+ * Copyright (C) 2005-2011 x264 project
   *
   * Authors: Loren Merritt <lorenm@u.washington.edu>
   *          Michael Niedermayer <michaelni@gmx.at>
@@ -29,7 +29,6 @@
  
  #define _ISOC99_SOURCE
  #undef NDEBUG // always check asserts, the speed effect is far too small to disable them
-#include <math.h>
  
  #include "common/common.h"
  #include "ratecontrol.h"
@@ -53,12 +52,12 @@ typedef struct
      int s_count;
      float blurred_complexity;
      char direct_mode;
-    int16_t weight[2];
-    int16_t i_weight_denom;
+    int16_t weight[3][2];
+    int16_t i_weight_denom[2];
      int refcount[16];
      int refs;
-    int i_duration;
-    int i_cpb_duration;
+    int64_t i_duration;
+    int64_t i_cpb_duration;
  } ratecontrol_entry_t;
  
  typedef struct
@@ -206,32 +205,35 @@ static inline double qscale2bits( ratecontrol_entry_t *rce, double qscale )
             + rce->misc_bits;
  }
  
-static ALWAYS_INLINE uint32_t ac_energy_var( uint64_t sum_ssd, int shift, x264_frame_t *frame, int i )
+static ALWAYS_INLINE uint32_t ac_energy_var( uint64_t sum_ssd, int shift, x264_frame_t *frame, int i, int b_store )
  {
      uint32_t sum = sum_ssd;
      uint32_t ssd = sum_ssd >> 32;
-    frame->i_pixel_sum[i] += sum;
-    frame->i_pixel_ssd[i] += ssd;
+    if( b_store )
+    {
+        frame->i_pixel_sum[i] += sum;
+        frame->i_pixel_ssd[i] += ssd;
+    }
      return ssd - ((uint64_t)sum * sum >> shift);
  }
  
-static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame, int i )
+static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame, int i, int field, int b_store )
  {
      int w = i ? 8 : 16;
      int stride = frame->i_stride[i];
-    int offset = h->mb.b_interlaced
+    int offset = field
          ? 16 * mb_x + w * (mb_y&~1) * stride + (mb_y&1) * stride
          : 16 * mb_x + w * mb_y * stride;
-    stride <<= h->mb.b_interlaced;
+    stride <<= field;
      if( i )
      {
          ALIGNED_ARRAY_16( pixel, pix,[FENC_STRIDE*8] );
          h->mc.load_deinterleave_8x8x2_fenc( pix, frame->plane[1] + offset, stride );
-        return ac_energy_var( h->pixf.var[PIXEL_8x8]( pix, FENC_STRIDE ), 6, frame, i )
-             + ac_energy_var( h->pixf.var[PIXEL_8x8]( pix+FENC_STRIDE/2, FENC_STRIDE ), 6, frame, i );
+        return ac_energy_var( h->pixf.var[PIXEL_8x8]( pix, FENC_STRIDE ), 6, frame, 1, b_store )
+             + ac_energy_var( h->pixf.var[PIXEL_8x8]( pix+FENC_STRIDE/2, FENC_STRIDE ), 6, frame, 2, b_store );
      }
      else
-        return ac_energy_var( h->pixf.var[PIXEL_16x16]( frame->plane[0] + offset, stride ), 8, frame, i );
+        return ac_energy_var( h->pixf.var[PIXEL_16x16]( frame->plane[0] + offset, stride ), 8, frame, 0, b_store );
  }
  
  // Find the total AC energy of the block in all planes.
@@ -241,8 +243,23 @@ static NOINLINE uint32_t x264_ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_
       * and putting it after floating point ops.  As a result, we put the emms at the end of the
       * function and make sure that its always called before the float math.  Noinline makes
       * sure no reordering goes on. */
-    uint32_t var = ac_energy_plane( h, mb_x, mb_y, frame, 0 );
-    var         += ac_energy_plane( h, mb_x, mb_y, frame, 1 );
+    uint32_t var;
+    if( h->mb.b_adaptive_mbaff )
+    {
+        /* We don't know the super-MB mode we're going to pick yet, so
+         * simply try both and pick the lower of the two. */
+        uint32_t var_interlaced, var_progressive;
+        var_interlaced   = ac_energy_plane( h, mb_x, mb_y, frame, 0, 1, 1 );
+        var_interlaced  += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 1 );
+        var_progressive  = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, 0 );
+        var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, 0 );
+        var = X264_MIN( var_interlaced, var_progressive );
+    }
+    else
+    {
+        var  = ac_energy_plane( h, mb_x, mb_y, frame, 0, PARAM_INTERLACED, 1 );
+        var += ac_energy_plane( h, mb_x, mb_y, frame, 1, PARAM_INTERLACED, 1 );
+    }
      x264_emms();
      return var;
  }
@@ -284,7 +301,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
              }
          }
          /* Need variance data for weighted prediction */
-        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE || h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
+        if( h->param.analyse.i_weighted_pred )
          {
              for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ )
                  for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ )
@@ -402,22 +419,22 @@ int x264_reference_build_list_optimal( x264_t *h )
      x264_weight_t weights[16][3];
      int refcount[16];
  
-    if( rce->refs != h->i_ref0 )
+    if( rce->refs != h->i_ref[0] )
          return -1;
  
-    memcpy( frames, h->fref0, sizeof(frames) );
+    memcpy( frames, h->fref[0], sizeof(frames) );
      memcpy( refcount, rce->refcount, sizeof(refcount) );
      memcpy( weights, h->fenc->weight, sizeof(weights) );
      memset( &h->fenc->weight[1][0], 0, sizeof(x264_weight_t[15][3]) );
  
      /* For now don't reorder ref 0; it seems to lower quality
         in most cases due to skips. */
-    for( int ref = 1; ref < h->i_ref0; ref++ )
+    for( int ref = 1; ref < h->i_ref[0]; ref++ )
      {
          int max = -1;
          int bestref = 1;
  
-        for( int i = 1; i < h->i_ref0; i++ )
+        for( int i = 1; i < h->i_ref[0]; i++ )
              /* Favor lower POC as a tiebreaker. */
              COPY2_IF_GT( max, refcount[i], bestref, i );
  
@@ -425,7 +442,7 @@ int x264_reference_build_list_optimal( x264_t *h )
           * that the optimal ordering doesnt place every duplicate. */
  
          refcount[bestref] = -1;
-        h->fref0[ref] = frames[bestref];
+        h->fref[0][ref] = frames[bestref];
          memcpy( h->fenc->weight[ref], weights[bestref], sizeof(weights[bestref]) );
      }
  
@@ -460,6 +477,11 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
  
      if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 )
      {
+        /* We don't support changing the ABR bitrate right now,
+           so if the stream starts as CBR, keep it CBR. */
+        if( rc->b_vbv_min_rate )
+            h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
+
          if( h->param.rc.i_vbv_buffer_size < (int)(h->param.rc.i_vbv_max_bitrate / rc->fps) )
          {
              h->param.rc.i_vbv_buffer_size = h->param.rc.i_vbv_max_bitrate / rc->fps;
@@ -467,17 +489,10 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
                        h->param.rc.i_vbv_buffer_size );
          }
  
-        /* We don't support changing the ABR bitrate right now,
-           so if the stream starts as CBR, keep it CBR. */
-        if( rc->b_vbv_min_rate )
-            h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
-
          int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * 1000;
          int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * 1000;
  
          /* Init HRD */
-        h->sps->vui.hrd.i_bit_rate_unscaled = vbv_max_bitrate;
-        h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size;
          if( h->param.i_nal_hrd && b_init )
          {
              h->sps->vui.hrd.i_cpb_cnt = 1;
@@ -522,7 +537,11 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
              x264_log( h, X264_LOG_WARNING, "VBV parameters cannot be changed when NAL HRD is in use\n" );
              return;
          }
+        h->sps->vui.hrd.i_bit_rate_unscaled = vbv_max_bitrate;
+        h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size;
  
+        if( rc->b_vbv_min_rate )
+            rc->bitrate = h->param.rc.i_bitrate * 1000.;
          rc->buffer_rate = vbv_max_bitrate / rc->fps;
          rc->vbv_max_rate = vbv_max_bitrate;
          rc->buffer_size = vbv_buffer_size;
@@ -736,7 +755,8 @@ int x264_ratecontrol_new( x264_t *h )
              CMP_OPT_FIRST_PASS( "bframes", h->param.i_bframe );
              CMP_OPT_FIRST_PASS( "b_pyramid", h->param.i_bframe_pyramid );
              CMP_OPT_FIRST_PASS( "intra_refresh", h->param.b_intra_refresh );
-            CMP_OPT_FIRST_PASS( "open_gop", h->param.i_open_gop );
+            CMP_OPT_FIRST_PASS( "open_gop", h->param.b_open_gop );
+            CMP_OPT_FIRST_PASS( "bluray_compat", h->param.b_bluray_compat );
  
              if( (p = strstr( opts, "keyint=" )) )
              {
@@ -834,7 +854,7 @@ int x264_ratecontrol_new( x264_t *h )
              rce = &rc->entry[frame_number];
              rce->direct_mode = 0;
  
-            e += sscanf( p, " in:%*d out:%*d type:%c dur:%d cpbdur:%d q:%f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c",
+            e += sscanf( p, " in:%*d out:%*d type:%c dur:%"SCNd64" cpbdur:%"SCNd64" q:%f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c",
                     &pict_type, &rce->i_duration, &rce->i_cpb_duration, &qp, &rce->tex_bits,
                     &rce->mv_bits, &rce->misc_bits, &rce->i_count, &rce->p_count,
                     &rce->s_count, &rce->direct_mode );
@@ -854,11 +874,19 @@ int x264_ratecontrol_new( x264_t *h )
              rce->refs = ref;
  
              /* find weights */
-            rce->i_weight_denom = -1;
+            rce->i_weight_denom[0] = rce->i_weight_denom[1] = -1;
              char *w = strchr( p, 'w' );
              if( w )
-                if( sscanf( w, "w:%hd,%hd,%hd", &rce->i_weight_denom, &rce->weight[0], &rce->weight[1] ) != 3 )
-                    rce->i_weight_denom = -1;
+            {
+                int count = sscanf( w, "w:%hd,%hd,%hd,%hd,%hd,%hd,%hd,%hd",
+                                    &rce->i_weight_denom[0], &rce->weight[0][0], &rce->weight[0][1],
+                                    &rce->i_weight_denom[1], &rce->weight[1][0], &rce->weight[1][1],
+                                    &rce->weight[2][0], &rce->weight[2][1] );
+                if( count == 3 )
+                    rce->i_weight_denom[1] = -1;
+                else if ( count != 8 )
+                    rce->i_weight_denom[0] = rce->i_weight_denom[1] = -1;
+            }
  
              if( pict_type != 'b' )
                  rce->kept_as_ref = 1;
@@ -958,6 +986,7 @@ parse_error:
              rc[i] = rc[0];
              h->thread[i]->param = h->param;
              h->thread[i]->mb.b_variable_qp = h->mb.b_variable_qp;
+            h->thread[i]->mb.ip_offset = h->mb.ip_offset;
          }
      }
  
@@ -972,11 +1001,11 @@ static int parse_zone( x264_t *h, x264_zone_t *z, char *p )
      char *tok, UNUSED *saveptr=NULL;
      z->param = NULL;
      z->f_bitrate_factor = 1;
-    if( 3 <= sscanf(p, "%u,%u,q=%u%n", &z->i_start, &z->i_end, &z->i_qp, &len) )
+    if( 3 <= sscanf(p, "%d,%d,q=%d%n", &z->i_start, &z->i_end, &z->i_qp, &len) )
          z->b_force_qp = 1;
-    else if( 3 <= sscanf(p, "%u,%u,b=%f%n", &z->i_start, &z->i_end, &z->f_bitrate_factor, &len) )
+    else if( 3 <= sscanf(p, "%d,%d,b=%f%n", &z->i_start, &z->i_end, &z->f_bitrate_factor, &len) )
          z->b_force_qp = 0;
-    else if( 2 <= sscanf(p, "%u,%u%n", &z->i_start, &z->i_end, &len) )
+    else if( 2 <= sscanf(p, "%d,%d%n", &z->i_start, &z->i_end, &len) )
          z->b_force_qp = 0;
      else
      {
@@ -1200,8 +1229,7 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
  
          int mincr = l->mincr;
  
-        /* Blu-ray requires this */
-        if( l->level_idc == 41 && h->param.i_nal_hrd )
+        if( h->param.b_bluray_compat )
              mincr = 4;
  
          /* High 10 doesn't require minCR, so just set the maximum to a large value. */
@@ -1228,11 +1256,7 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
      if( h->sh.i_type != SLICE_TYPE_B )
          rc->bframes = h->fenc->i_bframes;
  
-    if( i_force_qp != X264_QP_AUTO )
-    {
-        q = i_force_qp - 1;
-    }
-    else if( rc->b_abr )
+    if( rc->b_abr )
      {
          q = qscale2qp( rate_estimate_qscale( h ) );
      }
@@ -1256,12 +1280,14 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
                  q -= 6*log2f( zone->f_bitrate_factor );
          }
      }
+    if( i_force_qp != X264_QP_AUTO )
+        q = i_force_qp - 1;
  
      q = x264_clip3f( q, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
  
      rc->qpa_rc =
      rc->qpa_aq = 0;
-    rc->qp = x264_clip3( (int)(q + 0.5), 0, QP_MAX );
+    rc->qp = x264_clip3( q + 0.5f, 0, QP_MAX );
      h->fdec->f_qp_avg_rc =
      h->fdec->f_qp_avg_aq =
      rc->qpm = q;
@@ -1281,15 +1307,15 @@ static double predict_row_size( x264_t *h, int y, double qp )
      x264_ratecontrol_t *rc = h->rc;
      double pred_s = predict_size( rc->row_pred[0], qp2qscale( qp ), h->fdec->i_row_satd[y] );
      double pred_t = 0;
-    if( h->sh.i_type == SLICE_TYPE_I || qp >= h->fref0[0]->f_row_qp[y] )
+    if( h->sh.i_type == SLICE_TYPE_I || qp >= h->fref[0][0]->f_row_qp[y] )
      {
          if( h->sh.i_type == SLICE_TYPE_P
-            && h->fref0[0]->i_type == h->fdec->i_type
-            && h->fref0[0]->i_row_satd[y] > 0
-            && (abs(h->fref0[0]->i_row_satd[y] - h->fdec->i_row_satd[y]) < h->fdec->i_row_satd[y]/2))
+            && h->fref[0][0]->i_type == h->fdec->i_type
+            && h->fref[0][0]->i_row_satd[y] > 0
+            && (abs(h->fref[0][0]->i_row_satd[y] - h->fdec->i_row_satd[y]) < h->fdec->i_row_satd[y]/2))
          {
-            pred_t = h->fref0[0]->i_row_bits[y] * h->fdec->i_row_satd[y] / h->fref0[0]->i_row_satd[y]
-                     * qp2qscale( h->fref0[0]->f_row_qp[y] ) / qp2qscale( qp );
+            pred_t = h->fref[0][0]->i_row_bits[y] * h->fdec->i_row_satd[y] / h->fref[0][0]->i_row_satd[y]
+                     * qp2qscale( h->fref[0][0]->f_row_qp[y] ) / qp2qscale( qp );
          }
          if( pred_t == 0 )
              pred_t = pred_s;
@@ -1307,7 +1333,7 @@ static double predict_row_size( x264_t *h, int y, double qp )
  static double row_bits_so_far( x264_t *h, int y )
  {
      double bits = 0;
-    for( int i = h->i_threadslice_start; i <= y; i++ )
+    for( int i = h->i_threadslice_start+SLICE_MBAFF; i <= y; i+=(SLICE_MBAFF+1) )
          bits += h->fdec->i_row_bits[i];
      return bits;
  }
@@ -1315,7 +1341,7 @@ static double row_bits_so_far( x264_t *h, int y )
  static double predict_row_size_sum( x264_t *h, int y, double qp )
  {
      double bits = row_bits_so_far(h, y);
-    for( int i = y+1; i < h->i_threadslice_end; i++ )
+    for( int i = y+1+SLICE_MBAFF; i < h->i_threadslice_end; i+=(1+SLICE_MBAFF) )
          bits += predict_row_size( h, i, qp );
      return bits;
  }
@@ -1329,8 +1355,16 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
      x264_emms();
  
      h->fdec->i_row_bits[y] += bits;
-    rc->qpa_rc += rc->qpm;
-    rc->qpa_aq += h->mb.i_qp;
+    if( SLICE_MBAFF )
+    {
+        rc->qpa_rc += rc->qpm*2.0f;
+        rc->qpa_aq += h->mb.i_qp + h->mb.i_last_qp;
+    }
+    else
+    {
+        rc->qpa_rc += rc->qpm;
+        rc->qpa_aq += h->mb.i_qp;
+    }
  
      if( h->mb.i_mb_x != h->mb.i_mb_width - 1 || !rc->b_vbv )
          return;
@@ -1338,7 +1372,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
      h->fdec->f_row_qp[y] = rc->qpm;
  
      update_predictor( rc->row_pred[0], qp2qscale( rc->qpm ), h->fdec->i_row_satd[y], h->fdec->i_row_bits[y] );
-    if( h->sh.i_type == SLICE_TYPE_P && rc->qpm < h->fref0[0]->f_row_qp[y] )
+    if( h->sh.i_type == SLICE_TYPE_P && rc->qpm < h->fref[0][0]->f_row_qp[y] )
          update_predictor( rc->row_pred[1], qp2qscale( rc->qpm ), h->fdec->i_row_satds[0][0][y], h->fdec->i_row_bits[y] );
  
      /* tweak quality based on difference from predicted size */
@@ -1355,7 +1389,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
          /* B-frames shouldn't use lower QP than their reference frames. */
          if( h->sh.i_type == SLICE_TYPE_B )
          {
-            qp_min = X264_MAX( qp_min, X264_MAX( h->fref0[0]->f_row_qp[y+1], h->fref1[0]->f_row_qp[y+1] ) );
+            qp_min = X264_MAX( qp_min, X264_MAX( h->fref[0][0]->f_row_qp[y+1+SLICE_MBAFF], h->fref[1][0]->f_row_qp[y+1+SLICE_MBAFF] ) );
              rc->qpm = X264_MAX( rc->qpm, qp_min );
          }
  
@@ -1425,7 +1459,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
  int x264_ratecontrol_qp( x264_t *h )
  {
      x264_emms();
-    return x264_clip3( h->rc->qpm + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
+    return x264_clip3( h->rc->qpm + 0.5f, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
  }
  
  int x264_ratecontrol_mb_qp( x264_t *h )
@@ -1433,9 +1467,15 @@ int x264_ratecontrol_mb_qp( x264_t *h )
      x264_emms();
      float qp = h->rc->qpm;
      if( h->param.rc.i_aq_mode )
-        /* MB-tree currently doesn't adjust quantizers in unreferenced frames. */
-        qp += h->fdec->b_kept_as_ref ? h->fenc->f_qp_offset[h->mb.i_mb_xy] : h->fenc->f_qp_offset_aq[h->mb.i_mb_xy];
-    return x264_clip3( qp + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
+    {
+         /* MB-tree currently doesn't adjust quantizers in unreferenced frames. */
+        float qp_offset = h->fdec->b_kept_as_ref ? h->fenc->f_qp_offset[h->mb.i_mb_xy] : h->fenc->f_qp_offset_aq[h->mb.i_mb_xy];
+        /* Scale AQ's effect towards zero in emergency mode. */
+        if( qp > QP_MAX_SPEC )
+            qp_offset *= (QP_MAX - qp) / (QP_MAX - QP_MAX_SPEC);
+        qp += qp_offset;
+    }
+    return x264_clip3( qp + 0.5f, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
  }
  
  /* In 2pass, force the same frame types as in the 1st pass */
@@ -1485,8 +1525,15 @@ void x264_ratecontrol_set_weights( x264_t *h, x264_frame_t *frm )
      ratecontrol_entry_t *rce = &h->rc->entry[frm->i_frame];
      if( h->param.analyse.i_weighted_pred <= 0 )
          return;
-    if( rce->i_weight_denom >= 0 )
-        SET_WEIGHT( frm->weight[0][0], 1, rce->weight[0], rce->i_weight_denom, rce->weight[1] );
+
+    if( rce->i_weight_denom[0] >= 0 )
+        SET_WEIGHT( frm->weight[0][0], 1, rce->weight[0][0], rce->i_weight_denom[0], rce->weight[0][1] );
+
+    if( rce->i_weight_denom[1] >= 0 )
+    {
+        SET_WEIGHT( frm->weight[0][1], 1, rce->weight[1][0], rce->i_weight_denom[1], rce->weight[1][1] );
+        SET_WEIGHT( frm->weight[0][2], 1, rce->weight[2][0], rce->i_weight_denom[1], rce->weight[2][1] );
+    }
  }
  
  /* After encoding one frame, save stats and update ratecontrol state */
@@ -1518,7 +1565,7 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
                            dir_avg>0 ? 's' : dir_avg<0 ? 't' : '-' )
                          : '-';
          if( fprintf( rc->p_stat_file_out,
-                 "in:%d out:%d type:%c dur:%d cpbdur:%d q:%.2f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c ref:",
+                 "in:%d out:%d type:%c dur:%"PRId64" cpbdur:%"PRId64" q:%.2f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c ref:",
                   h->fenc->i_frame, h->i_frame,
                   c_type, h->fenc->i_duration,
                   h->fenc->i_cpb_duration, rc->qpa_rc,
@@ -1533,19 +1580,29 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
  
          /* Only write information for reference reordering once. */
          int use_old_stats = h->param.rc.b_stat_read && rc->rce->refs > 1;
-        for( int i = 0; i < (use_old_stats ? rc->rce->refs : h->i_ref0); i++ )
+        for( int i = 0; i < (use_old_stats ? rc->rce->refs : h->i_ref[0]); i++ )
          {
              int refcount = use_old_stats         ? rc->rce->refcount[i]
-                         : h->param.b_interlaced ? h->stat.frame.i_mb_count_ref[0][i*2]
+                         : PARAM_INTERLACED      ? h->stat.frame.i_mb_count_ref[0][i*2]
                                                   + h->stat.frame.i_mb_count_ref[0][i*2+1]
                           :                         h->stat.frame.i_mb_count_ref[0][i];
              if( fprintf( rc->p_stat_file_out, "%d ", refcount ) < 0 )
                  goto fail;
          }
  
-        if( h->sh.weight[0][0].weightfn )
+        if( h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE && h->sh.weight[0][0].weightfn )
          {
-            if( fprintf( rc->p_stat_file_out, "w:%"PRId32",%"PRId32",%"PRId32, h->sh.weight[0][0].i_denom, h->sh.weight[0][0].i_scale, h->sh.weight[0][0].i_offset ) < 0 )
+            if( fprintf( rc->p_stat_file_out, "w:%d,%d,%d",
+                         h->sh.weight[0][0].i_denom, h->sh.weight[0][0].i_scale, h->sh.weight[0][0].i_offset ) < 0 )
+                goto fail;
+            if( h->sh.weight[0][1].weightfn || h->sh.weight[0][2].weightfn )
+            {
+                if( fprintf( rc->p_stat_file_out, ",%d,%d,%d,%d,%d ",
+                             h->sh.weight[0][1].i_denom, h->sh.weight[0][1].i_scale, h->sh.weight[0][1].i_offset,
+                             h->sh.weight[0][2].i_scale, h->sh.weight[0][2].i_offset ) < 0 )
+                    goto fail;
+            }
+            else if( fprintf( rc->p_stat_file_out, " " ) < 0 )
                  goto fail;
          }
  
@@ -1577,9 +1634,7 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
              rc->cplxr_sum += bits * qp2qscale( rc->qpa_rc ) / (rc->last_rceq * fabs( h->param.rc.f_pb_factor ));
          }
          rc->cplxr_sum *= rc->cbr_decay;
-        double frame_duration = (double)h->fenc->i_duration * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
-
-        rc->wanted_bits_window += frame_duration * rc->bitrate;
+        rc->wanted_bits_window += h->fenc->f_duration * rc->bitrate;
          rc->wanted_bits_window *= rc->cbr_decay;
      }
  
@@ -1594,7 +1649,7 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
              if( h->fenc->b_last_minigop_bframe )
              {
                  update_predictor( rc->pred_b_from_p, qp2qscale( rc->qpa_rc ),
-                                  h->fref1[h->i_ref1-1]->i_satd, rc->bframe_bits / rc->bframes );
+                                  h->fref[1][h->i_ref[1]-1]->i_satd, rc->bframe_bits / rc->bframes );
                  rc->bframe_bits = 0;
              }
          }
@@ -1659,7 +1714,14 @@ static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor
  {
      x264_ratecontrol_t *rcc= h->rc;
      x264_zone_t *zone = get_zone( h, frame_num );
-    double q = pow( rce->blurred_complexity, 1 - rcc->qcompress );
+    double q;
+    if( h->param.rc.b_mb_tree )
+    {
+        double timescale = (double)h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
+        q = pow( BASE_FRAME_DURATION / CLIP_DURATION(rce->i_duration * timescale), 1 - h->param.rc.f_qcompress );
+    }
+    else
+        q = pow( rce->blurred_complexity, 1 - rcc->qcompress );
  
      // avoid NaN's in the rc_eq
      if( !isfinite(q) || rce->tex_bits + rce->mv_bits == 0 )
@@ -1682,10 +1744,11 @@ static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor
      return q;
  }
  
-static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
+static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q, int frame_num)
  {
      x264_ratecontrol_t *rcc = h->rc;
      const int pict_type = rce->pict_type;
+    x264_zone_t *zone = get_zone( h, frame_num );
  
      // force I/B quants as a function of P quants
      const double last_p_q    = rcc->last_qscale_for[SLICE_TYPE_P];
@@ -1746,6 +1809,15 @@ static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
          rcc->accum_p_qp   = mask * (qscale2qp( q ) + rcc->accum_p_qp);
          rcc->accum_p_norm = mask * (1 + rcc->accum_p_norm);
      }
+
+    if( zone )
+    {
+        if( zone->b_force_qp )
+            q = qp2qscale( zone->i_qp );
+        else
+            q /= zone->f_bitrate_factor;
+    }
+
      return q;
  }
  
@@ -1799,7 +1871,8 @@ static int update_vbv( x264_t *h, int bits )
  
      if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > buffer_size )
      {
-        filler = ceil( (rct->buffer_fill_final - buffer_size) / (8. * h->sps->vui.i_time_scale) );
+        int64_t scale = (int64_t)h->sps->vui.i_time_scale * 8;
+        filler = (rct->buffer_fill_final - buffer_size + scale - 1) / scale;
          bits = X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
          rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
      }
@@ -2002,7 +2075,7 @@ static float rate_estimate_qscale( x264_t *h )
  {
      float q;
      x264_ratecontrol_t *rcc = h->rc;
-    ratecontrol_entry_t rce;
+    ratecontrol_entry_t UNINIT(rce);
      int pict_type = h->sh.i_type;
      int64_t total_bits = 8*(h->stat.i_frame_size[SLICE_TYPE_I]
                            + h->stat.i_frame_size[SLICE_TYPE_P]
@@ -2024,16 +2097,16 @@ static float rate_estimate_qscale( x264_t *h )
          /* B-frames don't have independent ratecontrol, but rather get the
           * average QP of the two adjacent P-frames + an offset */
  
-        int i0 = IS_X264_TYPE_I(h->fref0[0]->i_type);
-        int i1 = IS_X264_TYPE_I(h->fref1[0]->i_type);
-        int dt0 = abs(h->fenc->i_poc - h->fref0[0]->i_poc);
-        int dt1 = abs(h->fenc->i_poc - h->fref1[0]->i_poc);
-        float q0 = h->fref0[0]->f_qp_avg_rc;
-        float q1 = h->fref1[0]->f_qp_avg_rc;
+        int i0 = IS_X264_TYPE_I(h->fref_nearest[0]->i_type);
+        int i1 = IS_X264_TYPE_I(h->fref_nearest[1]->i_type);
+        int dt0 = abs(h->fenc->i_poc - h->fref_nearest[0]->i_poc);
+        int dt1 = abs(h->fenc->i_poc - h->fref_nearest[1]->i_poc);
+        float q0 = h->fref_nearest[0]->f_qp_avg_rc;
+        float q1 = h->fref_nearest[1]->f_qp_avg_rc;
  
-        if( h->fref0[0]->i_type == X264_TYPE_BREF )
+        if( h->fref_nearest[0]->i_type == X264_TYPE_BREF )
              q0 -= rcc->pb_offset/2;
-        if( h->fref1[0]->i_type == X264_TYPE_BREF )
+        if( h->fref_nearest[1]->i_type == X264_TYPE_BREF )
              q1 -= rcc->pb_offset/2;
  
          if( i0 && i1 )
@@ -2053,7 +2126,10 @@ static float rate_estimate_qscale( x264_t *h )
          if( rcc->b_2pass && rcc->b_vbv )
              rcc->frame_size_planned = qscale2bits( &rce, qp2qscale( q ) );
          else
-            rcc->frame_size_planned = predict_size( rcc->pred_b_from_p, qp2qscale( q ), h->fref1[h->i_ref1-1]->i_satd );
+            rcc->frame_size_planned = predict_size( rcc->pred_b_from_p, qp2qscale( q ), h->fref[1][h->i_ref[1]-1]->i_satd );
+        /* Limit planned size by MinCR */
+        if( rcc->b_vbv )
+            rcc->frame_size_planned = X264_MIN( rcc->frame_size_planned, rcc->frame_size_maximum );
          h->rc->frame_size_estimated = rcc->frame_size_planned;
  
          /* For row SATDs */
@@ -2158,7 +2234,7 @@ static float rate_estimate_qscale( x264_t *h )
              rcc->last_satd = x264_rc_analyse_slice( h );
              rcc->short_term_cplxsum *= 0.5;
              rcc->short_term_cplxcount *= 0.5;
-            rcc->short_term_cplxsum += rcc->last_satd;
+            rcc->short_term_cplxsum += rcc->last_satd / (CLIP_DURATION(h->fenc->f_duration) / BASE_FRAME_DURATION);
              rcc->short_term_cplxcount ++;
  
              rce.tex_bits = rcc->last_satd;
@@ -2169,6 +2245,7 @@ static float rate_estimate_qscale( x264_t *h )
              rce.s_count = 0;
              rce.qscale = 1;
              rce.pict_type = pict_type;
+            rce.i_duration = h->fenc->i_duration;
  
              if( h->param.rc.i_rc_method == X264_RC_CRF )
              {
@@ -2244,6 +2321,9 @@ static float rate_estimate_qscale( x264_t *h )
          /* Always use up the whole VBV in this case. */
          if( rcc->single_frame_vbv )
              rcc->frame_size_planned = rcc->buffer_rate;
+        /* Limit planned size by MinCR */
+        if( rcc->b_vbv )
+            rcc->frame_size_planned = X264_MIN( rcc->frame_size_planned, rcc->frame_size_maximum );
          h->rc->frame_size_estimated = rcc->frame_size_planned;
          return q;
      }
@@ -2354,13 +2434,14 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
          COPY(prev_zone);
          COPY(qpbuf_pos);
          /* these vars can be updated by x264_ratecontrol_init_reconfigurable */
-        COPY(buffer_rate);
+        COPY(bitrate);
          COPY(buffer_size);
+        COPY(buffer_rate);
+        COPY(vbv_max_rate);
          COPY(single_frame_vbv);
          COPY(cbr_decay);
-        COPY(b_vbv_min_rate);
          COPY(rate_factor_constant);
-        COPY(bitrate);
+        COPY(rate_factor_max_increment);
  #undef COPY
      }
      if( cur != next )
@@ -2515,10 +2596,11 @@ static int init_pass2( x264_t *h )
  {
      x264_ratecontrol_t *rcc = h->rc;
      uint64_t all_const_bits = 0;
+    double timescale = (double)h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
      double duration = 0;
      for( int i = 0; i < rcc->num_entries; i++ )
          duration += rcc->entry[i].i_duration;
-    duration *= (double)h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
+    duration *= timescale;
      uint64_t all_available_bits = h->param.rc.i_bitrate * 1000. * duration;
      double rate_factor, step_mult;
      double qblur = h->param.rc.f_qblur;
@@ -2557,21 +2639,23 @@ static int init_pass2( x264_t *h )
          for( int j = 1; j < cplxblur*2 && j < rcc->num_entries-i; j++ )
          {
              ratecontrol_entry_t *rcj = &rcc->entry[i+j];
+            double frame_duration = CLIP_DURATION(rcj->i_duration * timescale) / BASE_FRAME_DURATION;
              weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
              if( weight < .0001 )
                  break;
              gaussian_weight = weight * exp( -j*j/200.0 );
              weight_sum += gaussian_weight;
-            cplx_sum += gaussian_weight * (qscale2bits(rcj, 1) - rcj->misc_bits);
+            cplx_sum += gaussian_weight * (qscale2bits( rcj, 1 ) - rcj->misc_bits) / frame_duration;
          }
          /* weighted average of cplx of past frames */
          weight = 1.0;
          for( int j = 0; j <= cplxblur*2 && j <= i; j++ )
          {
              ratecontrol_entry_t *rcj = &rcc->entry[i-j];
+            double frame_duration = CLIP_DURATION(rcj->i_duration * timescale) / BASE_FRAME_DURATION;
              gaussian_weight = weight * exp( -j*j/200.0 );
              weight_sum += gaussian_weight;
-            cplx_sum += gaussian_weight * (qscale2bits( rcj, 1 ) - rcj->misc_bits);
+            cplx_sum += gaussian_weight * (qscale2bits( rcj, 1 ) - rcj->misc_bits) / frame_duration;
              weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
              if( weight < .0001 )
                  break;
@@ -2618,14 +2702,14 @@ static int init_pass2( x264_t *h )
          /* find qscale */
          for( int i = 0; i < rcc->num_entries; i++ )
          {
-            qscale[i] = get_qscale( h, &rcc->entry[i], rate_factor, i );
+            qscale[i] = get_qscale( h, &rcc->entry[i], rate_factor, -1 );
              rcc->last_qscale_for[rcc->entry[i].pict_type] = qscale[i];
          }
  
          /* fixed I/B qscale relative to P */
          for( int i = rcc->num_entries-1; i >= 0; i-- )
          {
-            qscale[i] = get_diff_limited_q( h, &rcc->entry[i], qscale[i] );
+            qscale[i] = get_diff_limited_q( h, &rcc->entry[i], qscale[i], i );
              assert(qscale[i] >= 0);
          }