Cache ref costs and use more accurate MV costs

author Fiona Glaser <fiona@x264.com>

Wed, 14 Jan 2009 02:11:50 +0000 (21:11 -0500)

committer Fiona Glaser <fiona@x264.com>

Wed, 14 Jan 2009 02:11:50 +0000 (21:11 -0500)
author Fiona Glaser <fiona@x264.com>
Wed, 14 Jan 2009 02:11:50 +0000 (21:11 -0500)
committer Fiona Glaser <fiona@x264.com>
Wed, 14 Jan 2009 02:11:50 +0000 (21:11 -0500)
diff --git a/encoder/analyse.c b/encoder/analyse.c

index 96574333ba8bcd1b15f8ac8041fda66d911d7cab..bd53ebfe487e505495960ce8d49787d9e7874f47 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -22,6 +22,7 @@
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
   *****************************************************************************/
  
+#define _ISOC99_SOURCE
  #include <math.h>
  #include <limits.h>
  #ifndef _MSC_VER
@@ -29,6 +30,7 @@
  #endif
  
  #include "common/common.h"
+#include "common/cpu.h"
  #include "macroblock.h"
  #include "me.h"
  #include "ratecontrol.h"
@@ -77,6 +79,8 @@ typedef struct
      int i_lambda2;
      int i_qp;
      int16_t *p_cost_mv;
+    uint16_t *p_cost_ref0;
+    uint16_t *p_cost_ref1;
      int i_mbrd;
  
  
@@ -168,6 +172,7 @@ static const int i_sub_mb_p_cost_table[4] = {
  static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );
  
  uint16_t *x264_cost_mv_fpel[52][4];
+uint16_t x264_cost_ref[52][3][33];
  
  /* initialize an array of lambda*nbits for all possible mvs */
  static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
@@ -177,6 +182,7 @@ static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
  
      if( !p_cost_mv[a->i_qp] )
      {
+        x264_emms();
          /* could be faster, but isn't called many times */
          /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
          p_cost_mv[a->i_qp] = x264_malloc( (4*4*2048 + 1) * sizeof(int16_t) );
@@ -184,10 +190,15 @@ static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
          for( i = 0; i <= 2*4*2048; i++ )
          {
              p_cost_mv[a->i_qp][-i] =
-            p_cost_mv[a->i_qp][i]  = a->i_lambda * bs_size_se( i );
+            p_cost_mv[a->i_qp][i]  = a->i_lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f;
          }
+        for( i = 0; i < 3; i++ )
+            for( j = 0; j < 33; j++ )
+                x264_cost_ref[a->i_qp][i][j] = a->i_lambda * bs_size_te( i, j );
      }
      a->p_cost_mv = p_cost_mv[a->i_qp];
+    a->p_cost_ref0 = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
+    a->p_cost_ref1 = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
  
      /* FIXME is this useful for all me methods? */
      if( h->param.analyse.i_me_method >= X264_ME_ESA && !x264_cost_mv_fpel[a->i_qp][0] )
@@ -1038,7 +1049,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
      (m)->integral = &h->mb.pic.p_integral[list][ref][(xoff)+(yoff)*(m)->i_stride[0]];
  
  #define REF_COST(list, ref) \
-    (a->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l##list##_active - 1, ref ))
+    (a->p_cost_ref##list[ref])
  
  static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
  {
author	Fiona Glaser <fiona@x264.com>
	Wed, 14 Jan 2009 02:11:50 +0000 (21:11 -0500)
committer	Fiona Glaser <fiona@x264.com>
	Wed, 14 Jan 2009 02:11:50 +0000 (21:11 -0500)