Use the correct default B-ref placement with B-pyramid

[x264] / common / macroblock.c
diff --git a/common/macroblock.c b/common/macroblock.c

index 2d3e7e145f6a85ee511a8b68324a232e081f04a1..9c05f005e13bad609786a36670aeec6e3c5d9108 100644 (file)
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -1,12 +1,12 @@
  /*****************************************************************************
   * macroblock.c: macroblock common functions
   *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2016 x264 project
   *
   * Authors: Fiona Glaser <fiona@x264.com>
   *          Laurent Aimar <fenrir@via.ecp.fr>
   *          Loren Merritt <lorenm@u.washington.edu>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
@@ -256,25 +256,26 @@ int x264_macroblock_cache_allocate( x264_t *h )
  
      h->mb.b_interlaced = PARAM_INTERLACED;
  
-    CHECKED_MALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
-    CHECKED_MALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
-    CHECKED_MALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
-    CHECKED_MALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
-    memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
+    PREALLOC_INIT
+
+    PREALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
+    PREALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
+    PREALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
+    PREALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
  
      /* 0 -> 3 top(4), 4 -> 6 : left(3) */
-    CHECKED_MALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
+    PREALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
  
      /* all coeffs */
-    CHECKED_MALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
+    PREALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
  
      if( h->param.b_cabac )
      {
-        CHECKED_MALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
-        CHECKED_MALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
-        CHECKED_MALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
+        PREALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
+        PREALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
+        PREALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
          if( h->param.i_bframe )
-            CHECKED_MALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
+            PREALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
      }
  
      for( int i = 0; i < 2; i++ )
@@ -284,11 +285,7 @@ int x264_macroblock_cache_allocate( x264_t *h )
              i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
  
          for( int j = !i; j < i_refs; j++ )
-        {
-            CHECKED_MALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
-            M32( h->mb.mvr[i][j][0] ) = 0;
-            h->mb.mvr[i][j]++;
-        }
+            PREALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
      }
  
      if( h->param.analyse.i_weighted_pred )
@@ -325,7 +322,24 @@ int x264_macroblock_cache_allocate( x264_t *h )
          }
  
          for( int i = 0; i < numweightbuf; i++ )
-            CHECKED_MALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
+            PREALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
+    }
+
+    PREALLOC_END( h->mb.base );
+
+    memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
+
+    for( int i = 0; i < 2; i++ )
+    {
+        int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << PARAM_INTERLACED;
+        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
+            i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
+
+        for( int j = !i; j < i_refs; j++ )
+        {
+            M32( h->mb.mvr[i][j][0] ) = 0;
+            h->mb.mvr[i][j]++;
+        }
      }
  
      return 0;
@@ -334,26 +348,7 @@ fail:
  }
  void x264_macroblock_cache_free( x264_t *h )
  {
-    for( int i = 0; i < 2; i++ )
-        for( int j = !i; j < X264_REF_MAX*2; j++ )
-            if( h->mb.mvr[i][j] )
-                x264_free( h->mb.mvr[i][j]-1 );
-    for( int i = 0; i < X264_REF_MAX; i++ )
-        x264_free( h->mb.p_weight_buf[i] );
-
-    if( h->param.b_cabac )
-    {
-        x264_free( h->mb.skipbp );
-        x264_free( h->mb.chroma_pred_mode );
-        x264_free( h->mb.mvd[0] );
-        x264_free( h->mb.mvd[1] );
-    }
-    x264_free( h->mb.slice_table );
-    x264_free( h->mb.intra4x4_pred_mode );
-    x264_free( h->mb.non_zero_count );
-    x264_free( h->mb.mb_transform_size );
-    x264_free( h->mb.cbp );
-    x264_free( h->mb.qp );
+    x264_free( h->mb.base );
  }
  
  int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
@@ -394,7 +389,7 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
              ((me_range*2+24) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
          scratch_size = X264_MAX3( buf_hpel, buf_ssim, buf_tesa );
      }
-    int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int);
+    int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int16_t);
      scratch_size = X264_MAX( scratch_size, buf_mbtree );
      if( scratch_size )
          CHECKED_MALLOC( h->scratch_buffer, scratch_size );
@@ -402,7 +397,9 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
          h->scratch_buffer = NULL;
  
      int buf_lookahead_threads = (h->mb.i_mb_height + (4 + 32) * h->param.i_lookahead_threads) * sizeof(int) * 2;
-    CHECKED_MALLOC( h->scratch_buffer2, buf_lookahead_threads );
+    int buf_mbtree2 = buf_mbtree * 12; /* size of the internal propagate_list asm buffer */
+    scratch_size = X264_MAX( buf_lookahead_threads, buf_mbtree2 );
+    CHECKED_MALLOC( h->scratch_buffer2, scratch_size );
  
      return 0;
  fail:
@@ -1161,7 +1158,7 @@ static void ALWAYS_INLINE x264_macroblock_cache_load( x264_t *h, int mb_x, int m
              {
                  // Looking at the bottom field so always take the bottom macroblock of the pair.
                  h->mb.cache.topright_ref[l][0] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]];
-                h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]];
+                h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[1]];
                  h->mb.cache.topright_ref[l][2] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[2]];
                  CP32( h->mb.cache.topright_mv[l][0], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[0]] );
                  CP32( h->mb.cache.topright_mv[l][1], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[1]] );
@@ -1258,8 +1255,13 @@ static void ALWAYS_INLINE x264_macroblock_cache_load( x264_t *h, int mb_x, int m
          }
      }
  
-    if( b_mbaff && mb_x == 0 && !(mb_y&1) && mb_y > 0 )
-        h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_xy - h->mb.i_mb_stride];
+    if( b_mbaff && mb_x == 0 && !(mb_y&1) )
+    {
+        if( h->mb.i_mb_top_xy >= h->sh.i_first_mb )
+            h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_top_xy];
+        else
+            h->mb.field_decoding_flag = 0;
+    }
  
      /* Check whether skip here would cause decoder to predict interlace mode incorrectly.
       * FIXME: It might be better to change the interlace type rather than forcing a skip to be non-skip. */
@@ -1267,26 +1269,8 @@ static void ALWAYS_INLINE x264_macroblock_cache_load( x264_t *h, int mb_x, int m
      if( b_mbaff )
      {
          if( MB_INTERLACED != h->mb.field_decoding_flag &&
-            h->mb.i_mb_prev_xy >= 0 && IS_SKIP(h->mb.type[h->mb.i_mb_prev_xy]) )
+            (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
              h->mb.b_allow_skip = 0;
-        if( (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
-        {
-            if( h->mb.i_neighbour & MB_LEFT )
-            {
-                if( h->mb.field[h->mb.i_mb_xy - 1] != MB_INTERLACED )
-                    h->mb.b_allow_skip = 0;
-            }
-            else if( h->mb.i_neighbour & MB_TOP )
-            {
-                if( h->mb.field[h->mb.i_mb_top_xy] != MB_INTERLACED )
-                    h->mb.b_allow_skip = 0;
-            }
-            else // Frame mb pair is predicted
-            {
-                if( MB_INTERLACED )
-                    h->mb.b_allow_skip = 0;
-            }
-        }
      }
  
      if( h->param.b_cabac )
@@ -1452,8 +1436,10 @@ void x264_macroblock_deblock_strength( x264_t *h )
      uint8_t (*bs)[8][4] = h->mb.cache.deblock_strength;
      if( IS_INTRA( h->mb.i_type ) )
      {
-        memset( bs[0][1], 3, 3*4*sizeof(uint8_t) );
-        memset( bs[1][1], 3, 3*4*sizeof(uint8_t) );
+        M32( bs[0][1] ) = 0x03030303;
+        M64( bs[0][2] ) = 0x0303030303030303ULL;
+        M32( bs[1][1] ) = 0x03030303;
+        M64( bs[1][2] ) = 0x0303030303030303ULL;
          return;
      }
  
@@ -1466,7 +1452,9 @@ void x264_macroblock_deblock_strength( x264_t *h )
              M32( bs[0][0] ) = 0x02020202;
              M32( bs[0][2] ) = 0x02020202;
              M32( bs[0][4] ) = 0x02020202;
-            memset( bs[1][0], 2, 5*4*sizeof(uint8_t) ); /* [1][1] and [1][3] has to be set for 4:2:2 */
+            M64( bs[1][0] ) = 0x0202020202020202ULL; /* [1][1] and [1][3] has to be set for 4:2:2 */
+            M64( bs[1][2] ) = 0x0202020202020202ULL;
+            M32( bs[1][4] ) = 0x02020202;
              return;
          }
      }