Merge some of adaptive quant and weightp

author Fiona Glaser <fiona@x264.com>

Wed, 26 May 2010 19:55:35 +0000 (12:55 -0700)

committer Fiona Glaser <fiona@x264.com>

Mon, 31 May 2010 18:43:05 +0000 (11:43 -0700)
author Fiona Glaser <fiona@x264.com>
Wed, 26 May 2010 19:55:35 +0000 (12:55 -0700)
committer Fiona Glaser <fiona@x264.com>
Mon, 31 May 2010 18:43:05 +0000 (11:43 -0700)
diff --git a/common/frame.h b/common/frame.h

index 91d27b5125d3239debfbed952500f8f38c622757..ca5cb7a1ef4b4ff2c112db76a2a04ae0a6e9b987 100644 (file)
--- a/common/frame.h
+++ b/common/frame.h
@@ -118,8 +118,8 @@ typedef struct x264_frame
      uint16_t *i_inv_qscale_factor;
      int     b_scenecut; /* Set to zero if the frame cannot possibly be part of a real scenecut. */
      float   f_weighted_cost_delta[X264_BFRAME_MAX+2];
-    uint32_t i_pixel_sum;
-    uint64_t i_pixel_ssd;
+    uint32_t i_pixel_sum[3];
+    uint64_t i_pixel_ssd[3];
  
      /* hrd */
      x264_hrd_t hrd_timing;
diff --git a/encoder/analyse.h b/encoder/analyse.h

index 7c2c22c9ec7e2ec8bd8b38a8fb39cb2cea51c392..53e4c2e9362bd2a09e2173dc3bd134771278896a 100644 (file)
--- a/encoder/analyse.h
+++ b/encoder/analyse.h
@@ -33,7 +33,6 @@ void x264_slicetype_decide( x264_t *h );
  void x264_slicetype_analyse( x264_t *h, int keyframe );
  
  int x264_weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t *w );
-void x264_weight_plane_analyse( x264_t *h, x264_frame_t *frame );
  
  int  x264_lookahead_init( x264_t *h, int i_slicetype_length );
  int  x264_lookahead_is_empty( x264_t *h );
diff --git a/encoder/encoder.c b/encoder/encoder.c

index c1cf92e98d6dd325fb1ffb938fea93e1bca6459e..b5a90686f283eba6e0ff81f8224ffa0fc204d78c 100644 (file)
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -2246,21 +2246,17 @@ int     x264_encoder_encode( x264_t *h,
                  fenc->i_pic_struct = PIC_STRUCT_PROGRESSIVE;
          }
  
-        if( h->frames.b_have_lowres )
-        {
-            if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE || h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
-                x264_weight_plane_analyse( h, fenc );
-            x264_frame_init_lowres( h, fenc );
-        }
-
          if( h->param.rc.b_mb_tree && h->param.rc.b_stat_read )
          {
              if( x264_macroblock_tree_read( h, fenc ) )
                  return -1;
          }
-        else if( h->param.rc.i_aq_mode )
+        else
              x264_adaptive_quant_frame( h, fenc );
  
+        if( h->frames.b_have_lowres )
+            x264_frame_init_lowres( h, fenc );
+
          /* 2: Place the frame into the queue for its slice type decision */
          x264_lookahead_put_frame( h, fenc );
  
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c

index cf1e283ce586a2f22ebb790b0e1a1a2dc8cc6972..ecf07748f4b3fd605f155ba1e8e098dea6bc3f1b 100644 (file)
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -215,12 +215,14 @@ static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x2
      stride <<= h->mb.b_interlaced;
      uint64_t res = h->pixf.var[pix]( frame->plane[i] + offset, stride );
      uint32_t sum = (uint32_t)res;
-    uint32_t sqr = res >> 32;
-    return sqr - (sum * sum >> shift);
+    uint32_t ssd = res >> 32;
+    frame->i_pixel_sum[i] += sum;
+    frame->i_pixel_ssd[i] += ssd;
+    return ssd - (sum * sum >> shift);
  }
  
  // Find the total AC energy of the block in all planes.
-static NOINLINE uint32_t ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame )
+static NOINLINE uint32_t x264_ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame )
  {
      /* This function contains annoying hacks because GCC has a habit of reordering emms
       * and putting it after floating point ops.  As a result, we put the emms at the end of the
@@ -239,56 +241,90 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
       * FIXME: while they're written in 5 significant digits, they're only tuned to 2. */
      float strength;
      float avg_adj = 0.f;
-    /* Need to init it anyways for MB tree. */
-    if( h->param.rc.f_aq_strength == 0 )
-    {
-        memset( frame->f_qp_offset, 0, h->mb.i_mb_count * sizeof(float) );
-        memset( frame->f_qp_offset_aq, 0, h->mb.i_mb_count * sizeof(float) );
-        if( h->frames.b_have_lowres )
-            for( int mb_xy = 0; mb_xy < h->mb.i_mb_count; mb_xy++ )
-                frame->i_inv_qscale_factor[mb_xy] = 256;
-        return;
+    int width = h->sps->i_mb_width;
+    int height = h->sps->i_mb_height;
+    /* Initialize frame stats */
+    for( int i = 0; i < 3; i++ )
+    {
+        frame->i_pixel_sum[i] = 0;
+        frame->i_pixel_ssd[i] = 0;
      }
  
-    if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
+    /* Degenerate cases */
+    if( h->param.rc.i_aq_mode == X264_AQ_NONE || h->param.rc.f_aq_strength == 0 )
      {
-        float avg_adj_pow2 = 0.f;
-        for( int mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
-            for( int mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
-            {
-                uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame );
-                float qp_adj = powf( energy + 1, 0.125f );
-                frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
-                avg_adj += qp_adj;
-                avg_adj_pow2 += qp_adj * qp_adj;
-            }
-        avg_adj /= h->mb.i_mb_count;
-        avg_adj_pow2 /= h->mb.i_mb_count;
-        strength = h->param.rc.f_aq_strength * avg_adj;
-        avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - 14.f) / avg_adj;
+        /* Need to init it anyways for MB tree */
+        if( h->param.rc.f_aq_strength == 0 )
+        {
+            memset( frame->f_qp_offset, 0, h->mb.i_mb_count * sizeof(float) );
+            memset( frame->f_qp_offset_aq, 0, h->mb.i_mb_count * sizeof(float) );
+            if( h->frames.b_have_lowres )
+                for( int mb_xy = 0; mb_xy < h->mb.i_mb_count; mb_xy++ )
+                    frame->i_inv_qscale_factor[mb_xy] = 256;
+        }
+        /* Need variance data for weighted prediction */
+        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE || h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
+        {
+            for( int mb_y = 0; mb_y < height; mb_y++ )
+                for( int mb_x = 0; mb_x < width; mb_x++ )
+                    x264_ac_energy_mb( h, mb_x, mb_y, frame );
+        }
+        else
+            return;
      }
+    /* Actual adaptive quantization */
      else
-        strength = h->param.rc.f_aq_strength * 1.0397f;
-
-    for( int mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
-        for( int mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
+    {
+        if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
          {
-            float qp_adj;
-            if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
-            {
-                qp_adj = frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride];
-                qp_adj = strength * (qp_adj - avg_adj);
-            }
-            else
+            float avg_adj_pow2 = 0.f;
+            for( int mb_y = 0; mb_y < height; mb_y++ )
+                for( int mb_x = 0; mb_x < width; mb_x++ )
+                {
+                    uint32_t energy = x264_ac_energy_mb( h, mb_x, mb_y, frame );
+                    float qp_adj = powf( energy + 1, 0.125f );
+                    frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
+                    avg_adj += qp_adj;
+                    avg_adj_pow2 += qp_adj * qp_adj;
+                }
+            avg_adj /= h->mb.i_mb_count;
+            avg_adj_pow2 /= h->mb.i_mb_count;
+            strength = h->param.rc.f_aq_strength * avg_adj;
+            avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - 14.f) / avg_adj;
+        }
+        else
+            strength = h->param.rc.f_aq_strength * 1.0397f;
+
+        for( int mb_y = 0; mb_y < height; mb_y++ )
+            for( int mb_x = 0; mb_x < width; mb_x++ )
              {
-                uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame );
-                qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - 14.427f);
+                float qp_adj;
+                if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
+                {
+                    qp_adj = frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride];
+                    qp_adj = strength * (qp_adj - avg_adj);
+                }
+                else
+                {
+                    uint32_t energy = x264_ac_energy_mb( h, mb_x, mb_y, frame );
+                    qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - 14.427f);
+                }
+                frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] =
+                frame->f_qp_offset_aq[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
+                if( h->frames.b_have_lowres )
+                    frame->i_inv_qscale_factor[mb_x + mb_y*h->mb.i_mb_stride] = x264_exp2fix8(qp_adj);
              }
-            frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] =
-            frame->f_qp_offset_aq[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
-            if( h->frames.b_have_lowres )
-                frame->i_inv_qscale_factor[mb_x + mb_y*h->mb.i_mb_stride] = x264_exp2fix8(qp_adj);
-        }
+    }
+
+    /* Remove mean from SSD calculation */
+    for( int i = 0; i < 3; i++ )
+    {
+        uint64_t ssd = frame->i_pixel_ssd[i];
+        uint64_t sum = frame->i_pixel_sum[i];
+        int w = width*16>>!!i;
+        int h = height*16>>!!i;
+        frame->i_pixel_ssd[i] = ssd - (sum * sum + w * h / 2) / (w * h);
+    }
  }
  
  int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame )
diff --git a/encoder/slicetype.c b/encoder/slicetype.c

index e050f95aee9a117a18e96a285befcc7a7feba596..30b416ba4176da6ccee4d956a2e6b377c5fb1295 100644 (file)
--- a/encoder/slicetype.c
+++ b/encoder/slicetype.c
@@ -67,25 +67,6 @@ static void x264_weight_get_h264( unsigned int weight_nonh264, int offset, x264_
      w->i_scale = X264_MIN( w->i_scale, 127 );
  }
  
-void x264_weight_plane_analyse( x264_t *h, x264_frame_t *frame )
-{
-    uint32_t sad = 0;
-    uint64_t ssd = 0;
-    uint8_t *p = frame->plane[0];
-    int stride = frame->i_stride[0];
-    int width = frame->i_width[0];
-    int height = frame->i_lines[0];
-    for( int y = 0; y < height>>4; y++, p += stride*16 )
-        for( int x = 0; x < width; x += 16 )
-        {
-            uint64_t res = h->pixf.var[PIXEL_16x16]( p + x, stride );
-            sad += (uint32_t)res;
-            ssd += res >> 32;
-        }
-    frame->i_pixel_sum = sad;
-    frame->i_pixel_ssd = ssd - ((uint64_t)sad * sad + width * height / 2) / (width * height);
-}
-
  static NOINLINE uint8_t *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, uint8_t *dest )
  {
      int ref0_distance = fenc->i_frame - ref->i_frame - 1;
@@ -167,10 +148,10 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
      int found;
      x264_weight_t *weights = fenc->weight[0];
  
-    fenc_var = round( sqrt( fenc->i_pixel_ssd ) );
-    ref_var  = round( sqrt(  ref->i_pixel_ssd ) );
-    fenc_mean = (float)fenc->i_pixel_sum / (fenc->i_lines[0] * fenc->i_width[0]);
-    ref_mean  = (float) ref->i_pixel_sum / (fenc->i_lines[0] * fenc->i_width[0]);
+    fenc_var = round( sqrt( fenc->i_pixel_ssd[0] ) );
+    ref_var  = round( sqrt(  ref->i_pixel_ssd[0] ) );
+    fenc_mean = (float)fenc->i_pixel_sum[0] / (fenc->i_lines[0] * fenc->i_width[0]);
+    ref_mean  = (float) ref->i_pixel_sum[0] / (fenc->i_lines[0] * fenc->i_width[0]);
  
      //early termination
      if( fabs( ref_mean - fenc_mean ) < 0.5 && fabs( 1 - fenc_var / ref_var ) < epsilon )
@@ -534,8 +515,8 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
          do_search[1] = b != p1 && frames[b]->lowres_mvs[1][p1-b-1][0][0] == 0x7FFF;
          if( do_search[0] )
          {
-            if( ( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART
-                  || h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE ) && b == p1 )
+            if( ( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART ||
+                  h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE ) && b == p1 )
              {
                  x264_emms();
                  x264_weights_analyse( h, frames[b], frames[p0], 1 );
author	Fiona Glaser <fiona@x264.com>
	Wed, 26 May 2010 19:55:35 +0000 (12:55 -0700)
committer	Fiona Glaser <fiona@x264.com>
	Mon, 31 May 2010 18:43:05 +0000 (11:43 -0700)
common/frame.h		patch \| blob \| history
encoder/analyse.h		patch \| blob \| history
encoder/encoder.c		patch \| blob \| history
encoder/ratecontrol.c		patch \| blob \| history
encoder/slicetype.c		patch \| blob \| history