macroblock-level ratecontrol: improved vbv strictness, and improved quality when...

author Loren Merritt <pengvado@videolan.org>

Thu, 9 Mar 2006 15:59:08 +0000 (15:59 +0000)

committer Loren Merritt <pengvado@videolan.org>

Thu, 9 Mar 2006 15:59:08 +0000 (15:59 +0000)
author Loren Merritt <pengvado@videolan.org>
Thu, 9 Mar 2006 15:59:08 +0000 (15:59 +0000)
committer Loren Merritt <pengvado@videolan.org>
Thu, 9 Mar 2006 15:59:08 +0000 (15:59 +0000)
diff --git a/common/common.h b/common/common.h

index 8a9207bbc8303d9c8edaa929325cc4a0f16ce661..b0ae118fdf4d20632a0d5ab1460e50f6b38bb19d 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -55,6 +55,10 @@
  #define UNUSED
  #endif
  
+#define X264_BFRAME_MAX 16
+#define X264_SLICE_MAX 4
+#define X264_NAL_MAX (4 + X264_SLICE_MAX)
+
  /****************************************************************************
   * Includes
   ****************************************************************************/
@@ -217,10 +221,6 @@ static const int x264_scan8[16+2*4] =
   5   R R
  */
  
-#define X264_BFRAME_MAX 16
-#define X264_SLICE_MAX 4
-#define X264_NAL_MAX (4 + X264_SLICE_MAX)
-
  typedef struct x264_ratecontrol_t   x264_ratecontrol_t;
  typedef struct x264_vlc_table_t     x264_vlc_table_t;
  
diff --git a/common/frame.c b/common/frame.c

index 098256b80148857353a0259c0984dd6be023453f..ed73bb67da94519868dd89726a7163fc6d15f1c3 100644 (file)
--- a/common/frame.c
+++ b/common/frame.c
@@ -29,7 +29,7 @@
  x264_frame_t *x264_frame_new( x264_t *h )
  {
      x264_frame_t   *frame = x264_malloc( sizeof( x264_frame_t ) );
-    int i;
+    int i, j;
  
      int i_mb_count = h->mb.i_mb_count;
      int i_stride;
@@ -116,20 +116,27 @@ x264_frame_t *x264_frame_new( x264_t *h )
          frame->ref[1] = NULL;
      }
  
+    frame->i_row_bits = x264_malloc( i_lines/16 * sizeof( int ) );
+    frame->i_row_qp   = x264_malloc( i_lines/16 * sizeof( int ) );
+    for( i = 0; i < h->param.i_bframe + 2; i++ )
+        for( j = 0; j < h->param.i_bframe + 2; j++ )
+            frame->i_row_satds[i][j] = x264_malloc( i_lines/16 * sizeof( int ) );
+
      return frame;
  }
  
  void x264_frame_delete( x264_frame_t *frame )
  {
-    int i;
+    int i, j;
      for( i = 0; i < frame->i_plane; i++ )
-    {
          x264_free( frame->buffer[i] );
-    }
      for( i = 4; i < 12; i++ ) /* filtered planes */
-    {
          x264_free( frame->buffer[i] );
-    }
+    for( i = 0; i < X264_BFRAME_MAX+2; i++ )
+        for( j = 0; j < X264_BFRAME_MAX+2; j++ )
+            x264_free( frame->i_row_satds[i][j] );
+    x264_free( frame->i_row_bits );
+    x264_free( frame->i_row_qp );
      x264_free( frame->mb_type );
      x264_free( frame->mv[0] );
      x264_free( frame->mv[1] );
diff --git a/common/frame.h b/common/frame.h

index 13b0e6329e24a088599976a95863ca80b2c2a3f8..e682569a04195076213e626c78c23b8da71bdb9f 100644 (file)
--- a/common/frame.h
+++ b/common/frame.h
@@ -36,6 +36,7 @@ typedef struct
      int     i_frame;    /* Presentation frame number */
      int     i_frame_num; /* Coded frame number */
      int     b_kept_as_ref;
+    float   f_qp_avg;
  
      /* YUV buffer */
      int     i_plane;
@@ -62,8 +63,13 @@ typedef struct
      /* for adaptive B-frame decision.
       * contains the SATD cost of the lowres frame encoded in various modes
       * FIXME: how big an array do we need? */
-    int     i_cost_est[16][16];
-    int     i_intra_mbs[16];
+    int     i_cost_est[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2];
+    int     i_satd; // the i_cost_est of the selected frametype
+    int     i_intra_mbs[X264_BFRAME_MAX+2];
+    int     *i_row_satds[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2];
+    int     *i_row_satd;
+    int     *i_row_bits;
+    int     *i_row_qp;
  
  } x264_frame_t;
  
diff --git a/encoder/cavlc.c b/encoder/cavlc.c

index 3c863422fea7617b5edc306f8c0700e585d317fd..893d90dfc17c982eeaae96e052f0fe21379bbcde 100644 (file)
--- a/encoder/cavlc.c
+++ b/encoder/cavlc.c
@@ -254,6 +254,18 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int *l, i
      }
  }
  
+static void cavlc_qp_delta( x264_t *h, bs_t *s )
+{
+    int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
+    if( i_dqp )
+    {
+        i_dqp = i_dqp <= 0 ? (-2*i_dqp) : (2*i_dqp - 1);
+        if( i_dqp > 52 )
+            i_dqp = 103 - i_dqp;
+    }
+    bs_write_ue( s, i_dqp );
+}
+
  static void x264_sub_mb_mv_write_cavlc( x264_t *h, bs_t *s, int i_list )
  {
      int i;
@@ -676,7 +688,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
      /* write residual */
      if( i_mb_type == I_16x16 )
      {
-        bs_write_se( s, h->mb.i_qp - h->mb.i_last_qp );
+        cavlc_qp_delta( h, s );
  
          /* DC Luma */
          block_residual_write_cavlc( h, s, BLOCK_INDEX_LUMA_DC , h->dct.luma16x16_dc, 16 );
@@ -688,7 +700,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
      }
      else if( h->mb.i_cbp_luma != 0 || h->mb.i_cbp_chroma != 0 )
      {
-        bs_write_se( s, h->mb.i_qp - h->mb.i_last_qp );
+        cavlc_qp_delta( h, s );
          x264_macroblock_luma_write_cavlc( h, s );
      }
      if( h->mb.i_cbp_chroma != 0 )
diff --git a/encoder/encoder.c b/encoder/encoder.c

index 15aa22f4ccdfa2d220738fc41d18ed662fb8f85d..6ed7862e13991c314251de81d023b5b02106cb6b 100644 (file)
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -189,7 +189,7 @@ static void x264_slice_header_init( x264_t *h, x264_slice_header_t *sh,
      /* If effective qp <= 15, deblocking would have no effect anyway */
      if( param->b_deblocking_filter
          && ( h->mb.b_variable_qp
-        || 15 < i_qp + X264_MAX(param->i_deblocking_filter_alphac0, param->i_deblocking_filter_beta) ) )
+        || 15 < i_qp + 2 * X264_MAX(param->i_deblocking_filter_alphac0, param->i_deblocking_filter_beta) ) )
      {
          sh->i_disable_deblocking_filter_idc = 0;
      }
@@ -633,6 +633,7 @@ x264_t *x264_encoder_open   ( x264_param_t *param )
               param->cpu&X264_CPU_ALTIVEC ? "Altivec " : "" );
  
      h->thread[0] = h;
+    h->i_thread_num = 0;
      for( i = 1; i < param->i_threads; i++ )
          h->thread[i] = x264_malloc( sizeof(x264_t) );
  
@@ -1087,6 +1088,7 @@ static inline int x264_slices_write( x264_t *h )
  
      if( h->param.i_threads == 1 )
      {
+        x264_ratecontrol_threads_start( h );
          x264_slice_write( h );
          i_frame_size = h->out.nal[h->out.i_nal-1].i_payload;
      }
@@ -1104,11 +1106,13 @@ static inline int x264_slices_write( x264_t *h )
                  memcpy( t, h, sizeof(x264_t) );
                  t->out.p_bitstream += i*i_bs_size;
                  bs_init( &t->out.bs, t->out.p_bitstream, i_bs_size );
+                t->i_thread_num = i;
              }
              t->sh.i_first_mb = (i    * h->sps->i_mb_height / h->param.i_threads) * h->sps->i_mb_width;
              t->sh.i_last_mb = ((i+1) * h->sps->i_mb_height / h->param.i_threads) * h->sps->i_mb_width;
              t->out.i_nal = i_nal + i;
          }
+        x264_ratecontrol_threads_start( h );
  
          /* dispatch */
  #if HAVE_PTHREAD
@@ -1498,24 +1502,21 @@ do_encode:
  
      /* ---------------------- Update encoder state ------------------------- */
  
+    /* update rc */
+    x264_cpu_restore( h->param.cpu );
+    x264_ratecontrol_end( h, i_frame_size * 8 );
+
      /* handle references */
      if( i_nal_ref_idc != NAL_PRIORITY_DISPOSABLE )
-    {
          x264_reference_update( h );
-    }
+    x264_frame_put( h->frames.unused, h->fenc );
  
      /* increase frame count */
      h->i_frame++;
  
      /* restore CPU state (before using float again) */
-    /* XXX: not needed? (done above) */
      x264_cpu_restore( h->param.cpu );
  
-    /* update rc */
-    x264_ratecontrol_end( h, i_frame_size * 8 );
-
-    x264_frame_put( h->frames.unused, h->fenc );
-
      x264_noise_reduction_update( h );
  
      TIMER_STOP( i_mtime_encode_frame );
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c

index c54437262962eda317e5e089609a48f039fd9033..578f2d7e8f6ca19495b7db20bfeadab7189f2719 100644 (file)
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -84,6 +84,7 @@ struct x264_ratecontrol_t
      /* constants */
      int b_abr;
      int b_2pass;
+    int b_vbv;
      double fps;
      double bitrate;
      double rate_tolerance;
@@ -93,7 +94,8 @@ struct x264_ratecontrol_t
      /* current frame */
      ratecontrol_entry_t *rce;
      int qp;                     /* qp for current frame */
-    float qpa;                  /* average of macroblocks' qp (same as qp if no adaptive quant) */
+    int qpm;                    /* qp for current macroblock */
+    float qpa;                  /* average of macroblocks' qp */
      int slice_type;
      int qp_force;
  
@@ -113,6 +115,8 @@ struct x264_ratecontrol_t
      double short_term_cplxsum;
      double short_term_cplxcount;
      double rate_factor_constant;
+    double ip_offset;
+    double pb_offset;
  
      /* 2pass stuff */
      FILE *p_stat_file_out;
@@ -134,6 +138,15 @@ struct x264_ratecontrol_t
      double mv_bits_sum[5];
      int frame_count[5];         /* number of frames of each type */
  
+    /* MBRC stuff */
+    double frame_size_planned;
+    int first_row, last_row;    /* region of the frame to be encoded by this thread */
+    predictor_t *row_pred;
+    predictor_t row_preds[5];
+    predictor_t pred_b_from_p;  /* predict B-frame size from P-frame satd */
+    int bframes;                /* # consecutive B-frames before this P-frame */
+    int bframe_bits;            /* total cost of those frames */
+
      int i_zones;
      x264_zone_t *zones;
  };
@@ -143,6 +156,8 @@ static int parse_zones( x264_t *h );
  static int init_pass2(x264_t *);
  static float rate_estimate_qscale( x264_t *h, int pict_type );
  static void update_vbv( x264_t *h, int bits );
+static double predict_size( predictor_t *p, double q, double var );
+static void update_predictor( predictor_t *p, double q, double var, double bits );
  int  x264_rc_analyse_slice( x264_t *h );
  
  /* Terminology:
@@ -179,12 +194,11 @@ int x264_ratecontrol_new( x264_t *h )
  
      x264_cpu_restore( h->param.cpu );
  
-    h->rc = rc = x264_malloc( sizeof( x264_ratecontrol_t ) );
-    memset(rc, 0, sizeof(*rc));
+    h->rc = rc = x264_malloc( h->param.i_threads * sizeof(x264_ratecontrol_t) );
+    memset( rc, 0, h->param.i_threads * sizeof(x264_ratecontrol_t) );
  
      rc->b_abr = ( h->param.rc.b_cbr || h->param.rc.i_rf_constant ) && !h->param.rc.b_stat_read;
      rc->b_2pass = h->param.rc.b_cbr && h->param.rc.b_stat_read;
-    h->mb.b_variable_qp = 0;
      
      /* FIXME: use integers */
      if(h->param.i_fps_num > 0 && h->param.i_fps_den > 0)
@@ -217,8 +231,8 @@ int x264_ratecontrol_new( x264_t *h )
      else if( h->param.rc.i_vbv_max_bitrate > 0 &&
               h->param.rc.i_vbv_buffer_size > 0 )
      {
-        if( h->param.rc.i_vbv_buffer_size < 10 * h->param.rc.i_vbv_max_bitrate / rc->fps ) {
-            h->param.rc.i_vbv_buffer_size = 10 * h->param.rc.i_vbv_max_bitrate / rc->fps;
+        if( h->param.rc.i_vbv_buffer_size < 3 * h->param.rc.i_vbv_max_bitrate / rc->fps ) {
+            h->param.rc.i_vbv_buffer_size = 3 * h->param.rc.i_vbv_max_bitrate / rc->fps;
              x264_log( h, X264_LOG_ERROR, "VBV buffer size too small, using %d kbit\n",
                        h->param.rc.i_vbv_buffer_size );
          }
@@ -227,6 +241,7 @@ int x264_ratecontrol_new( x264_t *h )
          rc->buffer_fill = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
          rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size
                        * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate);
+        rc->b_vbv = 1;
      }
      else if( h->param.rc.i_vbv_max_bitrate )
          x264_log(h, X264_LOG_ERROR, "VBV maxrate specified, but no bufsize.\n");
@@ -235,6 +250,8 @@ int x264_ratecontrol_new( x264_t *h )
          rc->rate_tolerance = 0.01;
      }
  
+    h->mb.b_variable_qp = rc->b_vbv;
+
      if( rc->b_abr )
      {
          /* FIXME shouldn't need to arbitrarily specify a QP,
@@ -254,9 +271,11 @@ int x264_ratecontrol_new( x264_t *h )
                                   / qp2qscale( h->param.rc.i_rf_constant );
      }
  
+    rc->ip_offset = 6.0 * log(h->param.rc.f_ip_factor) / log(2.0);
+    rc->pb_offset = 6.0 * log(h->param.rc.f_pb_factor) / log(2.0);
      rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
-    rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
-    rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
+    rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant - rc->ip_offset + 0.5, 0, 51 );
+    rc->qp_constant[SLICE_TYPE_B] = x264_clip3( h->param.rc.i_qp_constant + rc->pb_offset + 0.5, 0, 51 );
  
      rc->lstep = exp2f(h->param.rc.i_qp_step / 6.0);
      rc->last_qscale = qp2qscale(26);
@@ -268,7 +287,11 @@ int x264_ratecontrol_new( x264_t *h )
          rc->pred[i].coeff= 2.0;
          rc->pred[i].count= 1.0;
          rc->pred[i].decay= 0.5;
+        rc->row_preds[i].coeff= .25;
+        rc->row_preds[i].count= 1.0;
+        rc->row_preds[i].decay= 0.5;
      }
+    rc->pred_b_from_p = rc->pred[0];
  
      if( parse_zones( h ) < 0 )
          return -1;
@@ -552,19 +575,34 @@ void x264_ratecontrol_start( x264_t *h, int i_slice_type, int i_force_qp )
          }
      }
  
+    if( h->fdec->i_row_bits )
+    {
+        memset( h->fdec->i_row_bits, 0, h->sps->i_mb_height * sizeof(int) );
+    }
+
+    if( i_slice_type != SLICE_TYPE_B )
+    {
+        rc->bframe_bits = 0;
+        rc->bframes = 0;
+        while( h->frames.current[rc->bframes] && IS_X264_TYPE_B(h->frames.current[rc->bframes]->i_type) )
+            rc->bframes++;
+    }
+
+    rc->qpa = 0;
+
      if( i_force_qp )
      {
-        rc->qpa = rc->qp = i_force_qp - 1;
+        rc->qpm = rc->qp = i_force_qp - 1;
      }
      else if( rc->b_abr )
      {
-        rc->qpa = rc->qp =
+        rc->qpm = rc->qp =
              x264_clip3( (int)(qscale2qp( rate_estimate_qscale( h, i_slice_type ) ) + .5), 0, 51 );
      }
      else if( rc->b_2pass )
      {
          rce->new_qscale = rate_estimate_qscale( h, i_slice_type );
-        rc->qpa = rc->qp = rce->new_qp =
+        rc->qpm = rc->qp = rce->new_qp =
              x264_clip3( (int)(qscale2qp(rce->new_qscale) + 0.5), 0, 51 );
      }
      else /* CQP */
@@ -574,18 +612,103 @@ void x264_ratecontrol_start( x264_t *h, int i_slice_type, int i_force_qp )
              q = ( rc->qp_constant[ SLICE_TYPE_B ] + rc->qp_constant[ SLICE_TYPE_P ] ) / 2;
          else
              q = rc->qp_constant[ i_slice_type ];
-        rc->qpa = rc->qp = q;
+        rc->qpm = rc->qp = q;
      }
  }
  
+double predict_row_size( x264_t *h, int y, int qp )
+{
+    /* average between two predictors:
+     * absolute SATD, and scaled bit cost of the colocated row in the previous frame */
+    x264_ratecontrol_t *rc = h->rc;
+    double pred_s = predict_size( rc->row_pred, qp2qscale(qp), h->fdec->i_row_satd[y] );
+    double pred_t = 0;
+    if( rc->slice_type != SLICE_TYPE_I 
+        && h->fref0[0]->i_type == h->fdec->i_type
+        && h->fref0[0]->i_row_satd[y] > 0 )
+    {
+        pred_t = h->fref0[0]->i_row_bits[y] * h->fdec->i_row_satd[y] / h->fref0[0]->i_row_satd[y]
+                 * qp2qscale(h->fref0[0]->i_row_qp[y]) / qp2qscale(qp);
+    }
+    if( pred_t == 0 )
+        pred_t = pred_s;
+
+    return (pred_s + pred_t) / 2;
+}
+
+double predict_row_size_sum( x264_t *h, int y, int qp )
+{
+    int i;
+    double bits = 0;
+    for( i = h->rc->first_row; i <= y; i++ )
+        bits += h->fdec->i_row_bits[i];
+    for( i = y+1; i <= h->rc->last_row; i++ )
+        bits += predict_row_size( h, i, qp );
+    return bits;
+}
+
  void x264_ratecontrol_mb( x264_t *h, int bits )
  {
-    /* currently no adaptive quant */
+    x264_ratecontrol_t *rc = h->rc;
+    const int y = h->mb.i_mb_y;
+
+    x264_cpu_restore( h->param.cpu );
+
+    h->fdec->i_row_bits[y] += bits;
+    rc->qpa += rc->qpm;
+
+    if( h->mb.i_mb_x != h->sps->i_mb_width - 1 || !rc->b_vbv )
+        return;
+
+    h->fdec->i_row_qp[y] = rc->qpm;
+
+    if( rc->slice_type == SLICE_TYPE_B )
+    {
+        /* B-frames shouldn't use lower QP than their reference frames */
+        if( y < rc->last_row )
+        {
+            rc->qpm = X264_MAX( rc->qp,
+                      X264_MIN( h->fref0[0]->i_row_qp[y+1],
+                                h->fref1[0]->i_row_qp[y+1] ));
+        }
+    }
+    else
+    {
+        update_predictor( rc->row_pred, qp2qscale(rc->qpm), h->fdec->i_row_satd[y], h->fdec->i_row_bits[y] );
+
+        /* tweak quality based on difference from predicted size */
+        if( y < rc->last_row && h->stat.i_slice_count[rc->slice_type] > 0 )
+        {
+            int prev_row_qp = h->fdec->i_row_qp[y];
+            int b0 = predict_row_size_sum( h, y, rc->qpm );
+            int b1 = b0;
+            int i_qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, h->param.rc.i_qp_max );
+            int i_qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min );
+            float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
+
+            while( rc->qpm < i_qp_max
+                   && (b1 > rc->frame_size_planned * 1.15
+                    || (rc->buffer_fill - b1 < buffer_left_planned * 0.5)))
+            {
+                rc->qpm ++;
+                b1 = predict_row_size_sum( h, y, rc->qpm );
+            }
+
+            while( rc->qpm > i_qp_min
+                   && buffer_left_planned > rc->buffer_size * 0.4
+                   && ((b1 < rc->frame_size_planned * 0.8 && rc->qpm <= prev_row_qp)
+                     || b1 < (rc->buffer_fill - rc->buffer_size + rc->buffer_rate) * 1.1) )
+            {
+                rc->qpm --;
+                b1 = predict_row_size_sum( h, y, rc->qpm );
+            }
+        }
+    }
  }
  
  int x264_ratecontrol_qp( x264_t *h )
  {
-    return h->rc->qp;
+    return h->rc->qpm;
  }
  
  /* In 2pass, force the same frame types as in the 1st pass */
@@ -654,6 +777,16 @@ void x264_ratecontrol_end( x264_t *h, int bits )
      for( i = B_DIRECT; i < B_8x8; i++ )
          h->stat.frame.i_mb_count_p += mbs[i];
  
+    if( rc->b_vbv )
+    {
+        for( i = 1; i < h->param.i_threads; i++ )
+            rc->qpa += rc[i].qpa;
+        rc->qpa /= h->mb.i_mb_count;
+    }
+    else
+        rc->qpa = rc->qp;
+    h->fdec->f_qp_avg = rc->qpa;
+
      if( h->param.rc.b_stat_write )
      {
          char c_type = rc->slice_type==SLICE_TYPE_I ? (h->fenc->i_poc==0 ? 'I' : 'i')
@@ -667,7 +800,7 @@ void x264_ratecontrol_end( x264_t *h, int bits )
                          : '-';
          fprintf( rc->p_stat_file_out,
                   "in:%d out:%d type:%c q:%.2f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c;\n",
-                 h->fenc->i_frame, h->i_frame-1,
+                 h->fenc->i_frame, h->i_frame,
                   c_type, rc->qpa,
                   h->stat.frame.i_itex_bits, h->stat.frame.i_ptex_bits,
                   h->stat.frame.i_hdr_bits, h->stat.frame.i_misc_bits,
@@ -705,6 +838,24 @@ void x264_ratecontrol_end( x264_t *h, int bits )
          rc->expected_bits_sum += qscale2bits( rc->rce, qp2qscale(rc->rce->new_qp) );
      }
  
+    if( rc->b_vbv )
+    {
+        if( rc->slice_type == SLICE_TYPE_B )
+        {
+            rc->bframe_bits += bits;
+            if( !h->frames.current[0] || !IS_X264_TYPE_B(h->frames.current[0]->i_type) )
+                update_predictor( &rc->pred_b_from_p, qp2qscale(rc->qpa), h->fref1[0]->i_satd, rc->bframe_bits / rc->bframes );
+        }
+        else
+        {
+            /* Update row predictor based on data collected by other threads. */
+            int y;
+            for( y = rc->last_row+1; y < h->sps->i_mb_height; y++ )
+                update_predictor( rc->row_pred, qp2qscale(h->fdec->i_row_qp[y]), h->fdec->i_row_satd[y], h->fdec->i_row_bits[y] );
+            rc->row_preds[rc->slice_type] = *rc->row_pred;
+        }
+    }
+
      update_vbv( h, bits );
  
      if( rc->slice_type != SLICE_TYPE_B )
@@ -891,16 +1042,17 @@ static void update_predictor( predictor_t *p, double q, double var, double bits
  static void update_vbv( x264_t *h, int bits )
  {
      x264_ratecontrol_t *rcc = h->rc;
-    if( !rcc->buffer_size )
+
+    if( rcc->last_satd >= h->mb.i_mb_count )
+        update_predictor( &rcc->pred[rcc->slice_type], qp2qscale(rcc->qpa), rcc->last_satd, bits );
+
+    if( !rcc->b_vbv )
          return;
  
      rcc->buffer_fill += rcc->buffer_rate - bits;
      if( rcc->buffer_fill < 0 && !rcc->b_2pass )
          x264_log( h, X264_LOG_WARNING, "VBV underflow (%.0f bits)\n", rcc->buffer_fill );
      rcc->buffer_fill = x264_clip3( rcc->buffer_fill, 0, rcc->buffer_size );
-
-    if(rcc->last_satd > 100)
-        update_predictor( &rcc->pred[rcc->slice_type], qp2qscale(rcc->qpa), rcc->last_satd, bits );
  }
  
  // apply VBV constraints and clip qscale to between lmin and lmax
@@ -915,17 +1067,18 @@ static double clip_qscale( x264_t *h, int pict_type, double q )
       * since they are controlled by the P-frames' QPs.
       * FIXME: in 2pass we could modify previous frames' QP too,
       *        instead of waiting for the buffer to fill */
-    if( rcc->buffer_size &&
+    if( rcc->b_vbv &&
          ( pict_type == SLICE_TYPE_P ||
            ( pict_type == SLICE_TYPE_I && rcc->last_non_b_pict_type == SLICE_TYPE_I ) ) )
      {
          if( rcc->buffer_fill/rcc->buffer_size < 0.5 )
              q /= x264_clip3f( 2.0*rcc->buffer_fill/rcc->buffer_size, 0.5, 1.0 );
      }
-    /* Now a hard threshold to make sure the frame fits in VBV.
-     * This one is mostly for I-frames. */
-    if( rcc->buffer_size && rcc->last_satd > 0 )
+
+    if( rcc->b_vbv && rcc->last_satd > 0 )
      {
+        /* Now a hard threshold to make sure the frame fits in VBV.
+         * This one is mostly for I-frames. */
          double bits = predict_size( &rcc->pred[rcc->slice_type], q, rcc->last_satd );
          double qf = 1.0;
          if( bits > rcc->buffer_fill/2 )
@@ -935,6 +1088,28 @@ static double clip_qscale( x264_t *h, int pict_type, double q )
          if( bits < rcc->buffer_rate/2 )
              q *= bits*2/rcc->buffer_rate;
          q = X264_MAX( q0, q );
+
+        /* Check B-frame complexity, and use up any bits that would
+         * overflow before the next P-frame. */
+        if( rcc->slice_type == SLICE_TYPE_P )
+        {
+            int nb = rcc->bframes;
+            double pbbits = bits;
+            double bbits = predict_size( &rcc->pred_b_from_p, q * h->param.rc.f_pb_factor, rcc->last_satd );
+            double space;
+
+            if( bbits > rcc->buffer_rate )
+                nb = 0;
+            pbbits += nb * bbits;
+
+            space = rcc->buffer_fill + (1+nb)*rcc->buffer_rate - rcc->buffer_size;
+            if( pbbits < space )
+            {
+                q *= X264_MAX( pbbits / space,
+                               bits / (0.5 * rcc->buffer_size) );
+            }
+            q = X264_MAX( q0-5, q );
+        }
      }
  
      if(lmin==lmax)
@@ -976,12 +1151,37 @@ static float rate_estimate_qscale(x264_t *h, int pict_type)
  
      if( pict_type == SLICE_TYPE_B )
      {
-        rcc->last_satd = 0;
+        /* B-frames don't have independent ratecontrol, but rather get the
+         * average QP of the two adjacent P-frames + an offset */
+
+        int i0 = IS_X264_TYPE_I(h->fref0[0]->i_type);
+        int i1 = IS_X264_TYPE_I(h->fref1[0]->i_type);
+        int dt0 = abs(h->fenc->i_poc - h->fref0[0]->i_poc);
+        int dt1 = abs(h->fenc->i_poc - h->fref1[0]->i_poc);
+        float q0 = h->fref0[0]->f_qp_avg;
+        float q1 = h->fref1[0]->f_qp_avg;
+
+        if( h->fref0[0]->i_type == X264_TYPE_BREF )
+            q0 -= rcc->pb_offset/2;
+        if( h->fref1[0]->i_type == X264_TYPE_BREF )
+            q1 -= rcc->pb_offset/2;
+
+        if(i0 && i1)
+            q = (q0 + q1) / 2 + rcc->ip_offset;
+        else if(i0)
+            q = q1;
+        else if(i1)
+            q = q0;
+        else
+            q = (q0*dt1 + q1*dt0) / (dt0 + dt1);
+
          if(h->fenc->b_kept_as_ref)
-            q = rcc->last_qscale * sqrtf(h->param.rc.f_pb_factor);
+            q += rcc->pb_offset/2;
          else
-            q = rcc->last_qscale * h->param.rc.f_pb_factor;
-        return x264_clip3f(q, lmin, lmax);
+            q += rcc->pb_offset;
+
+        rcc->last_satd = 0;
+        return qp2qscale(q);
      }
      else
      {
@@ -1082,10 +1282,47 @@ static float rate_estimate_qscale(x264_t *h, int pict_type)
          rcc->last_qscale_for[pict_type] =
          rcc->last_qscale = q;
  
+        rcc->frame_size_planned = predict_size( &rcc->pred[rcc->slice_type], q, rcc->last_satd );
+
          return q;
      }
  }
  
+/* Distribute bits among the slices, proportional to their estimated complexity */
+void x264_ratecontrol_threads_start( x264_t *h )
+{
+    x264_ratecontrol_t *rc = h->rc;
+    int t, y;
+    double den = 0;
+    double frame_size_planned = rc->frame_size_planned;
+
+    for( t = 0; t < h->param.i_threads; t++ )
+    {
+        h->thread[t]->rc = &rc[t];
+        if( t > 0 )
+            rc[t] = rc[0];
+    }
+
+    if( !rc->b_vbv || rc->slice_type == SLICE_TYPE_B )
+        return;
+
+    for( t = 0; t < h->param.i_threads; t++ )
+    {
+        rc[t].first_row = h->thread[t]->sh.i_first_mb / h->sps->i_mb_width;
+        rc[t].last_row = (h->thread[t]->sh.i_last_mb-1) / h->sps->i_mb_width;
+        rc[t].frame_size_planned = 1;
+        rc[t].row_pred = &rc[t].row_preds[rc->slice_type];
+        if( h->param.i_threads > 1 )
+        {
+            for( y = rc[t].first_row; y<= rc[t].last_row; y++ )
+                rc[t].frame_size_planned += predict_row_size( h, y, qscale2qp(rc[t].qp) );
+        }
+        den += rc[t].frame_size_planned;
+    }
+    for( t = 0; t < h->param.i_threads; t++ )
+        rc[t].frame_size_planned *= frame_size_planned / den;
+}
+
  static int init_pass2( x264_t *h )
  {
      x264_ratecontrol_t *rcc = h->rc;
diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h

index ceb50830fbecadf5b69182867fec701169a81d3f..a18c4922bfabbf770520e96219ab409df2f08c51 100644 (file)
--- a/encoder/ratecontrol.h
+++ b/encoder/ratecontrol.h
@@ -28,6 +28,7 @@ int  x264_ratecontrol_new   ( x264_t * );
  void x264_ratecontrol_delete( x264_t * );
  
  void x264_ratecontrol_start( x264_t *, int i_slice_type, int i_force_qp );
+void x264_ratecontrol_threads_start( x264_t * );
  int  x264_ratecontrol_slice_type( x264_t *, int i_frame );
  void x264_ratecontrol_mb( x264_t *, int bits );
  int  x264_ratecontrol_qp( x264_t * );
diff --git a/encoder/slicetype_decision.c b/encoder/slicetype_decision.c

index f7dc52dad5776bae6946fd367954c80be088da5b..bc3effa598660d8fa13c1ffda242e248e98e500c 100644 (file)
--- a/encoder/slicetype_decision.c
+++ b/encoder/slicetype_decision.c
@@ -57,7 +57,6 @@ int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
  
      uint8_t pix1[9*9], pix2[8*8];
      x264_me_t m[2];
-    int mvc[4][2], i_mvc;
      int i_bcost = COST_MAX;
      int i_cost_bak;
      int l, i;
@@ -73,7 +72,7 @@ int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
      h->mb.mv_max_fpel[0] = 8*( h->sps->i_mb_width - h->mb.i_mb_x - 1 ) + 4;
      h->mb.mv_min_spel[0] = 4*( h->mb.mv_min_fpel[0] - 8 );
      h->mb.mv_max_spel[0] = 4*( h->mb.mv_max_fpel[0] + 8 );
-    if( h->mb.i_mb_x <= 1)
+    if( h->mb.i_mb_x <= 1 )
      {
          h->mb.mv_min_fpel[1] = -8*h->mb.i_mb_y - 4;
          h->mb.mv_max_fpel[1] = 8*( h->sps->i_mb_height - h->mb.i_mb_y - 1 ) + 4;
@@ -154,18 +153,35 @@ int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
      i_cost_bak = i_bcost;
      for( l = 0; l < 1 + b_bidir; l++ )
      {
+        int mvc[4][2] = {{0}}, i_mvc;
          int16_t (*fenc_mv)[2] = &fenc->mv[l][i_mb_xy];
-        mvc[0][0] = fenc_mv[-1][0];
-        mvc[0][1] = fenc_mv[-1][1];
-        mvc[1][0] = fenc_mv[-i_mb_stride][0];
-        mvc[1][1] = fenc_mv[-i_mb_stride][1];
-        mvc[2][0] = fenc_mv[-i_mb_stride+1][0];
-        mvc[2][1] = fenc_mv[-i_mb_stride+1][1];
-        mvc[3][0] = fenc_mv[-i_mb_stride-1][0];
-        mvc[3][1] = fenc_mv[-i_mb_stride-1][1];
+        i_mvc = 0;
+        if( i_mb_x > 0 )
+        {
+            mvc[i_mvc][0] = fenc_mv[-1][0];
+            mvc[i_mvc][1] = fenc_mv[-1][1];
+            i_mvc++;
+        }
+        if( i_mb_y > 0 )
+        {
+            mvc[i_mvc][0] = fenc_mv[-i_mb_stride][0];
+            mvc[i_mvc][1] = fenc_mv[-i_mb_stride][1];
+            i_mvc++;
+            if( i_mb_x < h->sps->i_mb_width - 1 )
+            {
+                mvc[i_mvc][0] = fenc_mv[-i_mb_stride+1][0];
+                mvc[i_mvc][1] = fenc_mv[-i_mb_stride+1][1];
+                i_mvc++;
+            }
+            if( i_mb_x > 0 )
+            {
+                mvc[i_mvc][0] = fenc_mv[-i_mb_stride-1][0];
+                mvc[i_mvc][1] = fenc_mv[-i_mb_stride-1][1];
+                i_mvc++;
+            }
+        }
          m[l].mvp[0] = x264_median( mvc[0][0], mvc[1][0], mvc[2][0] );
          m[l].mvp[1] = x264_median( mvc[0][1], mvc[1][1], mvc[2][1] );
-        i_mvc = 4;
  
          x264_me_search( h, &m[l], mvc, i_mvc );
  
@@ -200,8 +216,12 @@ lowres_intra_mb:
          }
          if( i_bcost != i_cost_bak )
          {
-            if( !b_bidir )
+            if( !b_bidir
+                && i_mb_x > 0 && i_mb_x < h->sps->i_mb_width - 1
+                && i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1 )
+            {
                  fenc->i_intra_mbs[b-p0]++;
+            }
              if( p1 > p0+1 )
                  i_bcost = i_bcost * 9 / 8; // arbitray penalty for I-blocks in and after B-frames
          }
@@ -217,6 +237,7 @@ int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
  {
      int i_score = 0;
      int dist_scale_factor = 128;
+    int *row_satd = frames[b]->i_row_satds[b-p0][p1-b];
  
      /* Check whether we already evaluated this frame
       * If we have tried this frame as P, then we have also tried
@@ -235,10 +256,31 @@ int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
      if( p1 != p0 )
          dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
  
-    /* Skip the outermost ring of macroblocks, to simplify mv range and intra prediction. */
-    for( h->mb.i_mb_y = 1; h->mb.i_mb_y < h->sps->i_mb_height - 1; h->mb.i_mb_y++ )
-        for( h->mb.i_mb_x = 1; h->mb.i_mb_x < h->sps->i_mb_width - 1; h->mb.i_mb_x++ )
-            i_score += x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor );
+    /* the edge mbs seem to reduce the predictive quality of the
+     * whole frame's score, but are needed for a spatial distribution. */
+    if( h->param.rc.i_vbv_buffer_size )
+    {
+        for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
+        {
+            row_satd[ h->mb.i_mb_y ] = 0;
+            for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
+            {
+                int i_mb_cost = x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor );
+                row_satd[ h->mb.i_mb_y ] += i_mb_cost;
+                if( h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->sps->i_mb_height - 1 &&
+                    h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1 )
+                {
+                    i_score += i_mb_cost;
+                }
+            }
+        }
+    }
+    else
+    {
+        for( h->mb.i_mb_y = 1; h->mb.i_mb_y < h->sps->i_mb_height - 1; h->mb.i_mb_y++ )
+            for( h->mb.i_mb_x = 1; h->mb.i_mb_x < h->sps->i_mb_width - 1; h->mb.i_mb_x++ )
+                i_score += x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor );
+    }
  
      if( b != p1 )
          i_score = i_score * 100 / (120 + h->param.i_bframe_bias);
@@ -376,22 +418,38 @@ void x264_slicetype_decide( x264_t *h )
  
  int x264_rc_analyse_slice( x264_t *h )
  {
-    int p1 = 0;
      x264_mb_analysis_t a;
      x264_frame_t *frames[X264_BFRAME_MAX+2] = { NULL, };
+    int p0=0, p1, b;
+    int cost;
  
-    if( IS_X264_TYPE_I(h->fenc->i_type) )
-        return x264_slicetype_frame_cost( h, &a, &h->fenc, 0, 0, 0 );
-
-    while( h->frames.current[p1] && IS_X264_TYPE_B( h->frames.current[p1]->i_type ) )
-        p1++;
-    p1++;
-    if( h->fenc->i_cost_est[p1][0] >= 0 )
-        return h->fenc->i_cost_est[p1][0];
-
-    frames[0] = h->fref0[0];
-    frames[p1] = h->fenc;
      x264_lowres_context_init( h, &a );
  
-    return x264_slicetype_frame_cost( h, &a, frames, 0, p1, p1 );
+    if( IS_X264_TYPE_I(h->fenc->i_type) )
+    {
+        p1 = b = 0;
+    }
+    else if( X264_TYPE_P == h->fenc->i_type )
+    {
+        p1 = 0;
+        while( h->frames.current[p1] && IS_X264_TYPE_B( h->frames.current[p1]->i_type ) )
+            p1++;
+        p1++;
+        b = p1;
+    }
+    else //B
+    {
+        p1 = (h->fref1[0]->i_poc - h->fref0[0]->i_poc)/2;
+        b  = (h->fref1[0]->i_poc - h->fenc->i_poc)/2;
+        frames[p1] = h->fref1[0];
+    }
+    frames[p0] = h->fref0[0];
+    frames[b] = h->fenc;
+
+    cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b );
+    h->fenc->i_row_satd = h->fenc->i_row_satds[b-p0][p1-b];
+    h->fdec->i_row_satd = h->fdec->i_row_satds[b-p0][p1-b];
+    h->fdec->i_satd = cost;
+    memcpy( h->fdec->i_row_satd, h->fenc->i_row_satd, h->sps->i_mb_height * sizeof(int) );
+    return cost;
  }
author	Loren Merritt <pengvado@videolan.org>
	Thu, 9 Mar 2006 15:59:08 +0000 (15:59 +0000)
committer	Loren Merritt <pengvado@videolan.org>
	Thu, 9 Mar 2006 15:59:08 +0000 (15:59 +0000)
common/common.h		patch \| blob \| history
common/frame.c		patch \| blob \| history
common/frame.h		patch \| blob \| history
encoder/cavlc.c		patch \| blob \| history
encoder/encoder.c		patch \| blob \| history
encoder/ratecontrol.c		patch \| blob \| history
encoder/ratecontrol.h		patch \| blob \| history
encoder/slicetype_decision.c		patch \| blob \| history