Clip log2_max_frame_num

[x264] / encoder / analyse.c
diff --git a/encoder/analyse.c b/encoder/analyse.c

index daf7e67e760f027c61ec84a2bf1e70730a48e8ab..c1c9314a0a38d2958179e1a63e6aa5c19026a52d 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -77,7 +77,7 @@ typedef struct
      int i_lambda;
      int i_lambda2;
      int i_qp;
-    int16_t *p_cost_mv;
+    uint16_t *p_cost_mv;
      uint16_t *p_cost_ref0;
      uint16_t *p_cost_ref1;
      int i_mbrd;
@@ -237,46 +237,36 @@ static const int i_sub_mb_p_cost_table[4] = {
  
  static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );
  
-/* Indexed by lambda instead of qp because, due to rounding,
- * some quantizers share lambdas.  This saves memory. */
-uint16_t *x264_cost_mv_fpel[92][4];
-uint16_t x264_cost_ref[92][3][33];
+static uint16_t x264_cost_ref[92][3][33];
+static x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
  
-/* initialize an array of lambda*nbits for all possible mvs */
-static int x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
+int x264_analyse_init_costs( x264_t *h, int qp )
  {
-    static int16_t *p_cost_mv[92];
      int i, j;
-
-    if( !p_cost_mv[a->i_lambda] )
-    {
-        x264_emms();
-        /* could be faster, but isn't called many times */
-        /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
-        CHECKED_MALLOC( p_cost_mv[a->i_lambda], (4*4*2048 + 1) * sizeof(int16_t) );
-        p_cost_mv[a->i_lambda] += 2*4*2048;
-        for( i = 0; i <= 2*4*2048; i++ )
-        {
-            p_cost_mv[a->i_lambda][-i] =
-            p_cost_mv[a->i_lambda][i]  = a->i_lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f;
-        }
-        for( i = 0; i < 3; i++ )
-            for( j = 0; j < 33; j++ )
-                x264_cost_ref[a->i_lambda][i][j] = i ? a->i_lambda * bs_size_te( i, j ) : 0;
-    }
-    a->p_cost_mv = p_cost_mv[a->i_lambda];
-    a->p_cost_ref0 = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
-    a->p_cost_ref1 = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
-
-    /* FIXME is this useful for all me methods? */
-    if( h->param.analyse.i_me_method >= X264_ME_ESA && !x264_cost_mv_fpel[a->i_lambda][0] )
+    int lambda = x264_lambda_tab[qp];
+    if( h->cost_mv[lambda] )
+        return 0;
+    /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
+    CHECKED_MALLOC( h->cost_mv[lambda], (4*4*2048 + 1) * sizeof(uint16_t) );
+    h->cost_mv[lambda] += 2*4*2048;
+    for( i = 0; i <= 2*4*2048; i++ )
+    {
+        h->cost_mv[lambda][-i] =
+        h->cost_mv[lambda][i]  = lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f;
+    }
+    x264_pthread_mutex_lock( &cost_ref_mutex );
+    for( i = 0; i < 3; i++ )
+        for( j = 0; j < 33; j++ )
+            x264_cost_ref[lambda][i][j] = i ? lambda * bs_size_te( i, j ) : 0;
+    x264_pthread_mutex_unlock( &cost_ref_mutex );
+    if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[lambda][0] )
      {
          for( j=0; j<4; j++ )
          {
-            CHECKED_MALLOC( x264_cost_mv_fpel[a->i_lambda][j], (4*2048 + 1) * sizeof(int16_t) );
-            x264_cost_mv_fpel[a->i_lambda][j] += 2*2048;
+            CHECKED_MALLOC( h->cost_mv_fpel[lambda][j], (4*2048 + 1) * sizeof(uint16_t) );
+            h->cost_mv_fpel[lambda][j] += 2*2048;
              for( i = -2*2048; i < 2*2048; i++ )
-                x264_cost_mv_fpel[a->i_lambda][j][i] = p_cost_mv[a->i_lambda][i*4+j];
+                h->cost_mv_fpel[lambda][j][i] = h->cost_mv[lambda][i*4+j];
          }
      }
      return 0;
@@ -284,6 +274,27 @@ fail:
      return -1;
  }
  
+void x264_analyse_free_costs( x264_t *h )
+{
+    int i, j;
+    for( i = 0; i < 92; i++ )
+    {
+        if( h->cost_mv[i] )
+            x264_free( h->cost_mv[i] - 2*4*2048 );
+        if( h->cost_mv_fpel[i][0] )
+            for( j = 0; j < 4; j++ )
+                x264_free( h->cost_mv_fpel[i][j] - 2*2048 );
+    }
+}
+
+/* initialize an array of lambda*nbits for all possible mvs */
+static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
+{
+    a->p_cost_mv = h->cost_mv[a->i_lambda];
+    a->p_cost_ref0 = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
+    a->p_cost_ref1 = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
+}
+
  static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
  {
      int i = h->param.analyse.i_subpel_refine - (h->sh.i_type == SLICE_TYPE_B);
@@ -2317,7 +2328,7 @@ static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
  /*****************************************************************************
   * x264_macroblock_analyse:
   *****************************************************************************/
-int x264_macroblock_analyse( x264_t *h )
+void x264_macroblock_analyse( x264_t *h )
  {
      x264_mb_analysis_t analysis;
      int i_cost = COST_MAX;
@@ -2392,13 +2403,12 @@ int x264_macroblock_analyse( x264_t *h )
              int i_thresh16x8;
              int i_satd_inter, i_satd_intra;
  
-            if( x264_mb_analyse_load_costs( h, &analysis ) )
-                return -1;
+            x264_mb_analyse_load_costs( h, &analysis );
  
              x264_mb_analyse_inter_p16x16( h, &analysis );
  
              if( h->mb.i_type == P_SKIP )
-                return 0;
+                return;
  
              if( flags & X264_ANALYSE_PSUB16x16 )
              {
@@ -2574,7 +2584,7 @@ int x264_macroblock_analyse( x264_t *h )
                  else if( i_partition == D_16x16 )
                  {
                      x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.i_ref );
-                    analysis.l0.me16x16.cost = analysis.l0.i_rd16x16;
+                    analysis.l0.me16x16.cost = i_cost;
                      x264_me_refine_qpel_rd( h, &analysis.l0.me16x16, analysis.i_lambda2, 0, 0 );
                  }
                  else if( i_partition == D_16x8 )
@@ -2607,20 +2617,20 @@ int x264_macroblock_analyse( x264_t *h )
                          }
                          else if( h->mb.i_sub_partition[i8x8] == D_L0_8x4 )
                          {
-                           x264_me_refine_qpel_rd( h, &analysis.l0.me8x4[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 );
-                           x264_me_refine_qpel_rd( h, &analysis.l0.me8x4[i8x8][1], analysis.i_lambda2, i8x8*4+2, 0 );
+                            x264_me_refine_qpel_rd( h, &analysis.l0.me8x4[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 );
+                            x264_me_refine_qpel_rd( h, &analysis.l0.me8x4[i8x8][1], analysis.i_lambda2, i8x8*4+2, 0 );
                          }
                          else if( h->mb.i_sub_partition[i8x8] == D_L0_4x8 )
                          {
-                           x264_me_refine_qpel_rd( h, &analysis.l0.me4x8[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 );
-                           x264_me_refine_qpel_rd( h, &analysis.l0.me4x8[i8x8][1], analysis.i_lambda2, i8x8*4+1, 0 );
+                            x264_me_refine_qpel_rd( h, &analysis.l0.me4x8[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 );
+                            x264_me_refine_qpel_rd( h, &analysis.l0.me4x8[i8x8][1], analysis.i_lambda2, i8x8*4+1, 0 );
                          }
                          else if( h->mb.i_sub_partition[i8x8] == D_L0_4x4 )
                          {
-                           x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 );
-                           x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][1], analysis.i_lambda2, i8x8*4+1, 0 );
-                           x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][2], analysis.i_lambda2, i8x8*4+2, 0 );
-                           x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][3], analysis.i_lambda2, i8x8*4+3, 0 );
+                            x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 );
+                            x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][1], analysis.i_lambda2, i8x8*4+1, 0 );
+                            x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][2], analysis.i_lambda2, i8x8*4+2, 0 );
+                            x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][3], analysis.i_lambda2, i8x8*4+3, 0 );
                          }
                      }
                  }
@@ -2686,8 +2696,7 @@ int x264_macroblock_analyse( x264_t *h )
              int i_satd_inter;
              h->mb.b_skip_mc = 0;
  
-            if( x264_mb_analyse_load_costs( h, &analysis ) )
-                return -1;
+            x264_mb_analyse_load_costs( h, &analysis );
  
              /* select best inter mode */
              /* direct must be first */
@@ -2713,7 +2722,7 @@ int x264_macroblock_analyse( x264_t *h )
                  {
                      h->mb.i_type = B_SKIP;
                      x264_analyse_update_cache( h, &analysis );
-                    return 0;
+                    return;
                  }
              }
  
@@ -2878,12 +2887,12 @@ int x264_macroblock_analyse( x264_t *h )
                  {
                      if( i_type == B_L0_L0 )
                      {
-                        analysis.l0.me16x16.cost = analysis.l0.i_rd16x16;
+                        analysis.l0.me16x16.cost = i_cost;
                          x264_me_refine_qpel_rd( h, &analysis.l0.me16x16, analysis.i_lambda2, 0, 0 );
                      }
                      else if( i_type == B_L1_L1 )
                      {
-                        analysis.l1.me16x16.cost = analysis.l1.i_rd16x16;
+                        analysis.l1.me16x16.cost = i_cost;
                          x264_me_refine_qpel_rd( h, &analysis.l1.me16x16, analysis.i_lambda2, 0, 1 );
                      }
                      else if( i_type == B_BI_BI )
@@ -2933,6 +2942,19 @@ int x264_macroblock_analyse( x264_t *h )
  
      x264_analyse_update_cache( h, &analysis );
  
+    /* In rare cases we can end up qpel-RDing our way back to a larger partition size
+     * without realizing it.  Check for this and account for it if necessary. */
+    if( analysis.i_mbrd >= 2 )
+    {
+        /* Don't bother with bipred or 8x8-and-below, the odds are incredibly low. */
+        static const uint8_t check_mv_lists[X264_MBTYPE_MAX] = {[P_L0]=1, [B_L0_L0]=1, [B_L1_L1]=2};
+        int list = check_mv_lists[h->mb.i_type] - 1;
+        if( list >= 0 && h->mb.i_partition != D_16x16 &&
+            *(uint32_t*)&h->mb.cache.mv[list][x264_scan8[0]] == *(uint32_t*)&h->mb.cache.mv[list][x264_scan8[12]] &&
+            h->mb.cache.ref[list][x264_scan8[0]] == h->mb.cache.ref[list][x264_scan8[12]] )
+                h->mb.i_partition = D_16x16;
+    }
+
      if( !analysis.i_mbrd )
          x264_mb_analyse_transform( h );
  
@@ -2945,7 +2967,6 @@ int x264_macroblock_analyse( x264_t *h )
          x264_psy_trellis_init( h, 0 );
      if( h->mb.b_trellis == 1 || h->mb.b_noise_reduction )
          h->mb.i_skip_intra = 0;
-    return 0;
  }
  
  /*-------------------- Update MB from the analysis ----------------------*/