Fix 2pass ratecontrol with --nal-hrd cbr

[x264] / encoder / ratecontrol.c
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c

index 9b19511f0876c5b0ed49aa672470f5ba05155fdc..e0e6761148693490d719f3ccba6fe67c217851fe 100644 (file)
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -1,7 +1,7 @@
  /*****************************************************************************
- * ratecontrol.c: h264 encoder library (Rate Control)
+ * ratecontrol.c: ratecontrol
   *****************************************************************************
- * Copyright (C) 2005-2008 x264 project
+ * Copyright (C) 2005-2010 x264 project
   *
   * Authors: Loren Merritt <lorenm@u.washington.edu>
   *          Michael Niedermayer <michaelni@gmx.at>
@@ -22,6 +22,9 @@
   * You should have received a copy of the GNU General Public License
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
   *****************************************************************************/
  
  #define _ISOC99_SOURCE
@@ -78,7 +81,7 @@ struct x264_ratecontrol_t
      double rate_tolerance;
      double qcompress;
      int nmb;                    /* number of macroblocks in a frame */
-    int qp_constant[5];
+    int qp_constant[3];
  
      /* current frame */
      ratecontrol_entry_t *rce;
@@ -104,6 +107,7 @@ struct x264_ratecontrol_t
      double last_rceq;
      double cplxr_sum;           /* sum of bits*qscale/rceq */
      double expected_bits_sum;   /* sum of qscale2bits after rceq, ratefactor, and overflow, only includes finished frames */
+    int64_t filler_bits_sum;    /* sum in bits of finished frames' filler data */
      double wanted_bits_window;  /* target bitrate * window */
      double cbr_decay;
      double short_term_cplxsum;
@@ -123,13 +127,13 @@ struct x264_ratecontrol_t
      int num_entries;            /* number of ratecontrol_entry_ts */
      ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
      double last_qscale;
-    double last_qscale_for[5];  /* last qscale for a specific pict type, used for max_diff & ipb factor stuff  */
+    double last_qscale_for[3];  /* last qscale for a specific pict type, used for max_diff & ipb factor stuff */
      int last_non_b_pict_type;
      double accum_p_qp;          /* for determining I-frame quant */
      double accum_p_norm;
      double last_accum_p_norm;
-    double lmin[5];             /* min qscale by frame type */
-    double lmax[5];
+    double lmin[3];             /* min qscale by frame type */
+    double lmax[3];
      double lstep;               /* max change (multiply) in qscale per frame */
      uint16_t *qp_buffer[2];     /* Global buffers for converting MB-tree quantizer data. */
      int qpbuf_pos;              /* In order to handle pyramid reordering, QP buffer acts as a stack.
@@ -141,9 +145,8 @@ struct x264_ratecontrol_t
      double frame_size_maximum;  /* Maximum frame size due to MinCR */
      double frame_size_planned;
      double slice_size_planned;
-    double max_frame_error;
      predictor_t (*row_pred)[2];
-    predictor_t row_preds[5][2];
+    predictor_t row_preds[3][2];
      predictor_t *pred_b_from_p; /* predict B-frame size from P-frame satd */
      int bframes;                /* # consecutive B-frames before this P-frame */
      int bframe_bits;            /* total cost of those frames */
@@ -386,7 +389,7 @@ int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame, float *quant_offs
          rc->qpbuf_pos--;
      }
      else
-        x264_adaptive_quant_frame( h, frame, quant_offsets );
+        x264_stack_align( x264_adaptive_quant_frame, h, frame, quant_offsets );
      return 0;
  fail:
      x264_log(h, X264_LOG_ERROR, "Incomplete MB-tree stats file.\n");
@@ -639,7 +642,7 @@ int x264_ratecontrol_new( x264_t *h )
      int num_preds = h->param.b_sliced_threads * h->param.i_threads + 1;
      CHECKED_MALLOC( rc->pred, 5 * sizeof(predictor_t) * num_preds );
      CHECKED_MALLOC( rc->pred_b_from_p, sizeof(predictor_t) );
-    for( int i = 0; i < 5; i++ )
+    for( int i = 0; i < 3; i++ )
      {
          rc->last_qscale_for[i] = qp2qscale( ABR_INIT_QP );
          rc->lmin[i] = qp2qscale( h->param.rc.i_qp_min );
@@ -1360,15 +1363,20 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
  
          float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
          float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned;
+        float max_frame_error = X264_MAX( 0.05, 1.0 / (h->mb.i_mb_height) );
          float size_of_other_slices = 0;
          if( h->param.b_sliced_threads )
          {
+            float size_of_other_slices_planned = 0;
              for( int i = 0; i < h->param.i_threads; i++ )
                  if( h != h->thread[i] )
+                {
                      size_of_other_slices += h->thread[i]->rc->frame_size_estimated;
+                    size_of_other_slices_planned += h->thread[i]->rc->slice_size_planned;
+                }
+            float weight = rc->slice_size_planned / rc->frame_size_planned;
+            size_of_other_slices = (size_of_other_slices - size_of_other_slices_planned) * weight + size_of_other_slices_planned;
          }
-        else
-            rc->max_frame_error = X264_MAX( 0.05, 1.0 / (h->mb.i_mb_width) );
  
          /* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */
          float rc_tol = buffer_left_planned / h->param.i_threads * rc->rate_tolerance;
@@ -1405,8 +1413,8 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
  
          /* avoid VBV underflow or MinCR violation */
          while( (rc->qpm < qp_absolute_max)
-               && ((rc->buffer_fill - b1 < rc->buffer_rate * rc->max_frame_error) ||
-                   (rc->frame_size_maximum - b1 < rc->frame_size_maximum * rc->max_frame_error)))
+               && ((rc->buffer_fill - b1 < rc->buffer_rate * max_frame_error) ||
+                   (rc->frame_size_maximum - b1 < rc->frame_size_maximum * max_frame_error)))
          {
              rc->qpm += step_size;
              b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
@@ -1595,6 +1603,7 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
      }
  
      *filler = update_vbv( h, bits );
+    rc->filler_bits_sum += *filler * 8;
  
      if( h->sps->vui.b_nal_hrd_parameters_present )
      {
@@ -1999,7 +2008,8 @@ static float rate_estimate_qscale( x264_t *h )
      int pict_type = h->sh.i_type;
      int64_t total_bits = 8*(h->stat.i_frame_size[SLICE_TYPE_I]
                            + h->stat.i_frame_size[SLICE_TYPE_P]
-                          + h->stat.i_frame_size[SLICE_TYPE_B]);
+                          + h->stat.i_frame_size[SLICE_TYPE_B])
+                       - rcc->filler_bits_sum;
  
      if( rcc->b_2pass )
      {
@@ -2287,8 +2297,8 @@ void x264_threads_distribute_ratecontrol( x264_t *h )
              for( int i = 0; i < h->param.i_threads; i++ )
              {
                  x264_t *t = h->thread[i];
-                t->rc->max_frame_error = X264_MAX( 0.05, 1.0 / (t->i_threadslice_end - t->i_threadslice_start) );
-                t->rc->slice_size_planned += 2 * t->rc->max_frame_error * rc->frame_size_planned;
+                float max_frame_error = X264_MAX( 0.05, 1.0 / (t->i_threadslice_end - t->i_threadslice_start) );
+                t->rc->slice_size_planned += 2 * max_frame_error * rc->frame_size_planned;
              }
              x264_threads_normalize_predictors( h );
          }
@@ -2360,6 +2370,7 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
           * to the context that's about to end (next) */
          COPY(cplxr_sum);
          COPY(expected_bits_sum);
+        COPY(filler_bits_sum);
          COPY(wanted_bits_window);
          COPY(bframe_bits);
          COPY(initial_cpb_removal_delay);
@@ -2514,6 +2525,7 @@ static int init_pass2( x264_t *h )
      const int filter_size = (int)(qblur*4) | 1;
      double expected_bits;
      double *qscale, *blurred_qscale;
+    double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
  
      /* find total/average complexity & const_bits */
      for( int i = 0; i < rcc->num_entries; i++ )
@@ -2598,6 +2610,10 @@ static int init_pass2( x264_t *h )
          rcc->last_accum_p_norm = 1;
          rcc->accum_p_norm = 0;
  
+        rcc->last_qscale_for[0] =
+        rcc->last_qscale_for[1] =
+        rcc->last_qscale_for[2] = pow( base_cplx, 1 - rcc->qcompress ) / rate_factor;
+
          /* find qscale */
          for( int i = 0; i < rcc->num_entries; i++ )
          {