Clean up MBAFF deblocking code

author Fiona Glaser <fiona@x264.com>

Thu, 14 Jul 2011 21:04:11 +0000 (14:04 -0700)

committer Fiona Glaser <fiona@x264.com>

Fri, 22 Jul 2011 11:03:22 +0000 (04:03 -0700)
author Fiona Glaser <fiona@x264.com>
Thu, 14 Jul 2011 21:04:11 +0000 (14:04 -0700)
committer Fiona Glaser <fiona@x264.com>
Fri, 22 Jul 2011 11:03:22 +0000 (04:03 -0700)
diff --git a/common/deblock.c b/common/deblock.c

index a14ecc273a41dcf867d693aa813bdb326c8f72e6..0626daab43be688b9faf13cbc008f7901ac3409c 100644 (file)
--- a/common/deblock.c
+++ b/common/deblock.c
@@ -269,7 +269,7 @@ static void deblock_h_chroma_intra_c( pixel *pix, int stride, int alpha, int bet
  
  static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
                                  int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit,
-                                int bframe, x264_t *h )
+                                int bframe )
  {
      for( int dir = 0; dir < 2; dir++ )
      {
@@ -296,162 +296,6 @@ static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264
      }
  }
  
-void deblock_strength_mbaff_c( uint8_t nnz_cache[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
-                               int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
-                               int mvy_limit, int bframe, x264_t *h )
-{
-    int neighbour_field[2];
-    neighbour_field[0] = h->mb.i_mb_left_xy[0] >= 0 && h->mb.field[h->mb.i_mb_left_xy[0]];
-    neighbour_field[1] = h->mb.i_mb_top_xy >= 0 && h->mb.field[h->mb.i_mb_top_xy];
-    int intra_cur = IS_INTRA( h->mb.i_type );
-
-    if( !intra_cur )
-    {
-        for( int dir = 0; dir < 2; dir++ )
-        {
-            int edge_stride = dir ? 8 : 1;
-            int part_stride = dir ? 1 : 8;
-            for( int edge = 0; edge < 4; edge++ )
-            {
-                for( int i = 0, q = X264_SCAN8_0+edge*edge_stride; i < 4; i++, q += part_stride )
-                {
-                    int p = q - edge_stride;
-                    if( nnz_cache[q] || nnz_cache[p] )
-                    {
-                        bs[dir][edge][i] = 2;
-                    }
-                    else if( (edge == 0 && MB_INTERLACED != neighbour_field[dir]) ||
-                             ref[0][q] != ref[0][p] ||
-                             abs( mv[0][q][0] - mv[0][p][0] ) >= 4 ||
-                             abs( mv[0][q][1] - mv[0][p][1] ) >= mvy_limit ||
-                            (bframe && (ref[1][q] != ref[1][p] ||
-                             abs( mv[1][q][0] - mv[1][p][0] ) >= 4 ||
-                             abs( mv[1][q][1] - mv[1][p][1] ) >= mvy_limit )) )
-                    {
-                        bs[dir][edge][i] = 1;
-                    }
-                    else
-                        bs[dir][edge][i] = 0;
-                }
-            }
-        }
-    }
-
-    if( h->mb.i_neighbour & MB_LEFT )
-    {
-        if( h->mb.field[h->mb.i_mb_left_xy[0]] != MB_INTERLACED )
-        {
-            static const uint8_t offset[2][2][8] = {
-                {   { 0, 0, 0, 0, 1, 1, 1, 1 },
-                    { 2, 2, 2, 2, 3, 3, 3, 3 }, },
-                {   { 0, 1, 2, 3, 0, 1, 2, 3 },
-                    { 0, 1, 2, 3, 0, 1, 2, 3 }, }
-            };
-            uint8_t bS[8];
-
-            if( intra_cur )
-                memset( bS, 4, 8 );
-            else
-            {
-                const uint8_t *off = offset[MB_INTERLACED][h->mb.i_mb_y&1];
-                uint8_t (*nnz)[48] = h->mb.non_zero_count;
-
-                for( int i = 0; i < 8; i++ )
-                {
-                    int left = h->mb.i_mb_left_xy[MB_INTERLACED ? i>>2 : i&1];
-                    int nnz_this = h->mb.cache.non_zero_count[x264_scan8[0]+8*(i>>1)];
-                    int nnz_left = nnz[left][3 + 4*off[i]];
-                    if( !h->param.b_cabac && h->pps->b_transform_8x8_mode )
-                    {
-                        int j = off[i]&~1;
-                        if( h->mb.mb_transform_size[left] )
-                            nnz_left = !!(M16( &nnz[left][2+4*j] ) | M16( &nnz[left][2+4*(1+j)] ));
-                    }
-                    if( IS_INTRA( h->mb.type[left] ) )
-                        bS[i] = 4;
-                    else if( nnz_left || nnz_this )
-                        bS[i] = 2;
-                    else // As left is different interlaced.
-                        bS[i] = 1;
-                }
-            }
-
-            if( MB_INTERLACED )
-            {
-                for( int i = 0; i < 4; i++ ) bs[0][0][i] = bS[i];
-                for( int i = 0; i < 4; i++ ) bs[0][4][i] = bS[4+i];
-            }
-            else
-            {
-                for( int i = 0; i < 4; i++ ) bs[0][0][i] = bS[2*i];
-                for( int i = 0; i < 4; i++ ) bs[0][4][i] = bS[1+2*i];
-            }
-        }
-    }
-
-    if( h->mb.i_neighbour & MB_TOP )
-    {
-        if( !(h->mb.i_mb_y&1) && !MB_INTERLACED && h->mb.field[h->mb.i_mb_top_xy] )
-        {
-            /* Need to filter both fields (even for frame macroblocks).
-             * Filter top two rows using the top macroblock of the above
-             * pair and then the bottom one. */
-            int mbn_xy = h->mb.i_mb_xy - 2 * h->mb.i_mb_stride;
-            uint32_t nnz_cur[4];
-            nnz_cur[0] = h->mb.cache.non_zero_count[x264_scan8[0]+0];
-            nnz_cur[1] = h->mb.cache.non_zero_count[x264_scan8[0]+1];
-            nnz_cur[2] = h->mb.cache.non_zero_count[x264_scan8[0]+2];
-            nnz_cur[3] = h->mb.cache.non_zero_count[x264_scan8[0]+3];
-            /* Munge NNZ for cavlc + 8x8dct */
-            if( !h->param.b_cabac && h->pps->b_transform_8x8_mode &&
-                h->mb.mb_transform_size[h->mb.i_mb_xy] )
-            {
-                int nnz0 = M16( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[ 2]] );
-                int nnz1 = M16( &h->mb.cache.non_zero_count[x264_scan8[ 4]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[ 6]] );
-                nnz_cur[0] = nnz_cur[1] = !!nnz0;
-                nnz_cur[2] = nnz_cur[3] = !!nnz1;
-            }
-
-            for( int j = 0; j < 2; j++, mbn_xy += h->mb.i_mb_stride )
-            {
-                int mbn_intra = IS_INTRA( h->mb.type[mbn_xy] );
-                uint8_t (*nnz)[48] = h->mb.non_zero_count;
-
-                uint32_t nnz_top[4];
-                nnz_top[0] = nnz[mbn_xy][3*4+0];
-                nnz_top[1] = nnz[mbn_xy][3*4+1];
-                nnz_top[2] = nnz[mbn_xy][3*4+2];
-                nnz_top[3] = nnz[mbn_xy][3*4+3];
-
-                if( !h->param.b_cabac && h->pps->b_transform_8x8_mode &&
-                    (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[mbn_xy] )
-                {
-                    int nnz_top0 = M16( &nnz[mbn_xy][8] ) | M16( &nnz[mbn_xy][12] );
-                    int nnz_top1 = M16( &nnz[mbn_xy][10] ) | M16( &nnz[mbn_xy][14] );
-                    nnz_top[0] = nnz_top[1] = nnz_top0 ? 0x0101 : 0;
-                    nnz_top[2] = nnz_top[3] = nnz_top1 ? 0x0101 : 0;
-                }
-
-                uint8_t bS[4];
-                if( intra_cur || mbn_intra )
-                    M32( bS ) = 0x03030303;
-                else
-                {
-                    for( int i = 0; i < 4; i++ )
-                    {
-                        if( nnz_cur[i] || nnz_top[i] )
-                            bS[i] = 2;
-                        else
-                            bS[i] = 1;
-                    }
-                }
-                for( int i = 0; i < 4; i++ )
-                    bs[1][4*j][i] = bS[i];
-            }
-        }
-    }
-}
-
  static ALWAYS_INLINE void deblock_edge( x264_t *h, pixel *pix, int i_stride, uint8_t bS[4], int i_qp, int a, int b, int b_chroma, x264_deblock_inter_t pf_inter )
  {
      int index_a = i_qp + a;
@@ -557,7 +401,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
                  left_qp[0] = h->mb.qp[h->mb.i_mb_left_xy[0]];
                  luma_qp[0] = (qp + left_qp[0] + 1) >> 1;
                  chroma_qp[0] = (qpc + h->chroma_qp_table[left_qp[0]] + 1) >> 1;
-                if( bs[0][0][0] == 4 )
+                if( intra_cur || IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[0]] ) )
                  {
                      deblock_edge_intra( h, pixy,           2*stridey,  bs[0][0], luma_qp[0],   a, b, 0, luma_intra_deblock );
                      deblock_edge_intra( h, pixuv,          2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_intra_deblock );
@@ -576,7 +420,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
                  left_qp[1] = h->mb.qp[h->mb.i_mb_left_xy[1]];
                  luma_qp[1] = (qp + left_qp[1] + 1) >> 1;
                  chroma_qp[1] = (qpc + h->chroma_qp_table[left_qp[1]] + 1) >> 1;
-                if( bs[0][4][0] == 4)
+                if( intra_cur || IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[1]] ) )
                  {
                      deblock_edge_intra( h, pixy           + (stridey<<offy),   2*stridey,  bs[0][4], luma_qp[1],   a, b, 0, luma_intra_deblock );
                      deblock_edge_intra( h, pixuv          + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_intra_deblock );
@@ -615,11 +459,14 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
              {
                  int mbn_xy = mb_xy - 2 * h->mb.i_mb_stride;
  
-                for(int j=0; j<2; j++, mbn_xy += h->mb.i_mb_stride)
+                for( int j = 0; j < 2; j++, mbn_xy += h->mb.i_mb_stride )
                  {
                      int qpt = h->mb.qp[mbn_xy];
                      int qp_top = (qp + qpt + 1) >> 1;
                      int qpc_top = (qpc + h->chroma_qp_table[qpt] + 1) >> 1;
+                    int intra_top = IS_INTRA( h->mb.type[mbn_xy] );
+                    if( intra_cur || intra_top )
+                        M32( bs[1][4*j] ) = 0x03030303;
  
                      // deblock the first horizontal edge of the even rows, then the first horizontal edge of the odd rows
                      deblock_edge( h, pixy      + j*stridey,  2* stridey, bs[1][4*j], qp_top, a, b, 0, deblock_v_luma_c );
@@ -646,7 +493,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
                  }
                  else
                  {
-                    if( intra_top )
+                    if( intra_cur || intra_top )
                          M32( bs[1][0] ) = 0x03030303;
                      FILTER(       , 1, 0, qp_top, qpc_top );
                  }
@@ -691,7 +538,7 @@ void x264_macroblock_deblock( x264_t *h )
      }
      else
          h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
-                                   bs, 4 >> SLICE_MBAFF, h->sh.i_type == SLICE_TYPE_B, h );
+                                   bs, 4 >> MB_INTERLACED, h->sh.i_type == SLICE_TYPE_B );
  
      int transform_8x8 = h->mb.b_transform_8x8;
  
@@ -742,16 +589,16 @@ void x264_deblock_h_chroma_intra_sse2( pixel *pix, int stride, int alpha, int be
  void x264_deblock_h_chroma_intra_avx ( pixel *pix, int stride, int alpha, int beta );
  void x264_deblock_strength_mmxext( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
                                     int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
-                                   int mvy_limit, int bframe, x264_t *h );
+                                   int mvy_limit, int bframe );
  void x264_deblock_strength_sse2  ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
                                     int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
-                                   int mvy_limit, int bframe, x264_t *h );
+                                   int mvy_limit, int bframe );
  void x264_deblock_strength_ssse3 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
                                     int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
-                                   int mvy_limit, int bframe, x264_t *h );
+                                   int mvy_limit, int bframe );
  void x264_deblock_strength_avx   ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
                                     int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
-                                   int mvy_limit, int bframe, x264_t *h );
+                                   int mvy_limit, int bframe );
  #if ARCH_X86
  void x264_deblock_h_luma_mmxext( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
  void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
@@ -873,6 +720,4 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
     }
  #endif
  #endif // !HIGH_BIT_DEPTH
-
-    if( b_mbaff ) pf->deblock_strength = deblock_strength_mbaff_c;
  }
diff --git a/common/frame.h b/common/frame.h

index db066f3971cac246c65ccfa2437dd0bd9f45c18c..fb27df8f491ae0c2cf28fac6bf73ed4487086935 100644 (file)
--- a/common/frame.h
+++ b/common/frame.h
@@ -184,7 +184,7 @@ typedef struct
      x264_deblock_intra_t deblock_chroma_intra[2];
      void (*deblock_strength) ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
                                 int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit,
-                               int bframe, x264_t *h );
+                               int bframe );
  } x264_deblock_function_t;
  
  void          x264_frame_delete( x264_frame_t *frame );
diff --git a/common/macroblock.c b/common/macroblock.c

index 8d1c157eea6eef154f15de65e1ddaf3732dd8546..7ac6490866596409817414d5077dd34ee7c8a6cf 100644 (file)
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -1375,14 +1375,90 @@ void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_
          h->mb.i_neighbour |= MB_TOP;
  }
  
+static void x264_macroblock_deblock_strength_mbaff( x264_t *h, uint8_t (*bs)[8][4] )
+{
+    if( (h->mb.i_neighbour & MB_LEFT) && h->mb.field[h->mb.i_mb_left_xy[0]] != MB_INTERLACED )
+    {
+        static const uint8_t offset[2][2][8] =
+        {   {   { 0, 0, 0, 0, 1, 1, 1, 1 },
+                { 2, 2, 2, 2, 3, 3, 3, 3 }, },
+            {   { 0, 1, 2, 3, 0, 1, 2, 3 },
+                { 0, 1, 2, 3, 0, 1, 2, 3 }, }
+        };
+        ALIGNED_ARRAY_8( uint8_t, tmpbs, [8] );
+
+        const uint8_t *off = offset[MB_INTERLACED][h->mb.i_mb_y&1];
+        uint8_t (*nnz)[48] = h->mb.non_zero_count;
+
+        for( int i = 0; i < 8; i++ )
+        {
+            int left = h->mb.i_mb_left_xy[MB_INTERLACED ? i>>2 : i&1];
+            int nnz_this = h->mb.cache.non_zero_count[x264_scan8[0]+8*(i>>1)];
+            int nnz_left = nnz[left][3 + 4*off[i]];
+            if( !h->param.b_cabac && h->pps->b_transform_8x8_mode )
+            {
+                int j = off[i]&~1;
+                if( h->mb.mb_transform_size[left] )
+                    nnz_left = !!(M16( &nnz[left][2+4*j] ) | M16( &nnz[left][2+4*(1+j)] ));
+            }
+            tmpbs[i] = (nnz_left || nnz_this) ? 2 : 1;
+        }
+
+        if( MB_INTERLACED )
+        {
+            CP32( bs[0][0], &tmpbs[0] );
+            CP32( bs[0][4], &tmpbs[4] );
+        }
+        else
+        {
+            for( int i = 0; i < 4; i++ ) bs[0][0][i] = tmpbs[2*i];
+            for( int i = 0; i < 4; i++ ) bs[0][4][i] = tmpbs[1+2*i];
+        }
+    }
+
+    if( (h->mb.i_neighbour & MB_TOP) && MB_INTERLACED != h->mb.field[h->mb.i_mb_top_xy] )
+    {
+        if( !(h->mb.i_mb_y&1) && !MB_INTERLACED )
+        {
+            /* Need to filter both fields (even for frame macroblocks).
+             * Filter top two rows using the top macroblock of the above
+             * pair and then the bottom one. */
+            int mbn_xy = h->mb.i_mb_xy - 2 * h->mb.i_mb_stride;
+            uint8_t *nnz_cur = &h->mb.cache.non_zero_count[x264_scan8[0]];
+
+            for( int j = 0; j < 2; j++, mbn_xy += h->mb.i_mb_stride )
+            {
+                uint8_t (*nnz)[48] = h->mb.non_zero_count;
+
+                ALIGNED_4( uint8_t nnz_top[4] );
+                CP32( nnz_top, &nnz[mbn_xy][3*4] );
+
+                if( !h->param.b_cabac && h->pps->b_transform_8x8_mode && h->mb.mb_transform_size[mbn_xy] )
+                {
+                    int nnz_top0 = M16( &nnz[mbn_xy][8] ) | M16( &nnz[mbn_xy][12] );
+                    int nnz_top1 = M16( &nnz[mbn_xy][10] ) | M16( &nnz[mbn_xy][14] );
+                    nnz_top[0] = nnz_top[1] = nnz_top0 ? 0x0101 : 0;
+                    nnz_top[2] = nnz_top[3] = nnz_top1 ? 0x0101 : 0;
+                }
+
+                for( int i = 0; i < 4; i++ )
+                    bs[1][4*j][i] = (nnz_cur[i] || nnz_top[i]) ? 2 : 1;
+            }
+        }
+        else
+            for( int i = 0; i < 4; i++ )
+                bs[1][0][i] = X264_MAX( bs[1][0][i], 1 );
+    }
+}
+
  void x264_macroblock_deblock_strength( x264_t *h )
  {
      uint8_t (*bs)[8][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
      if( IS_INTRA( h->mb.type[h->mb.i_mb_xy] ) )
      {
-        memset( bs[0], 3, 4*4*sizeof(uint8_t) );
-        memset( bs[1], 3, 4*4*sizeof(uint8_t) );
-        if( !SLICE_MBAFF ) return;
+        memset( bs[0][1], 3, 3*4*sizeof(uint8_t) );
+        memset( bs[1][1], 3, 3*4*sizeof(uint8_t) );
+        return;
      }
  
      /* If we have multiple slices and we're deblocking on slice edges, we
@@ -1528,9 +1604,11 @@ void x264_macroblock_deblock_strength( x264_t *h )
          }
      }
  
-    int mvy_limit = 4 >> MB_INTERLACED;
      h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
-                               bs, mvy_limit, h->sh.i_type == SLICE_TYPE_B, h );
+                               bs, 4 >> MB_INTERLACED, h->sh.i_type == SLICE_TYPE_B );
+
+    if( SLICE_MBAFF )
+        x264_macroblock_deblock_strength_mbaff( h, bs );
  }
  
  static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff )
diff --git a/tools/checkasm.c b/tools/checkasm.c

index 61b0f4b3d2145c7527f484f2a8af2ffa799f0446..8a30e325510fb40342525136c8a06fe30f1c3cd3 100644 (file)
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -1396,8 +1396,8 @@ static int check_deblock( int cpu_ref, int cpu_new )
                          mv[j][k][l] = ((rand()&7) != 7) ? (rand()&7) - 3 : (rand()&1023) - 512;
                  }
              set_func_name( "deblock_strength" );
-            call_c( db_c.deblock_strength, nnz, ref, mv, bs[0], 2<<(i&1), ((i>>1)&1), NULL );
-            call_a( db_a.deblock_strength, nnz, ref, mv, bs[1], 2<<(i&1), ((i>>1)&1), NULL );
+            call_c( db_c.deblock_strength, nnz, ref, mv, bs[0], 2<<(i&1), ((i>>1)&1) );
+            call_a( db_a.deblock_strength, nnz, ref, mv, bs[1], 2<<(i&1), ((i>>1)&1) );
              if( memcmp( bs[0], bs[1], sizeof(bs[0]) ) )
              {
                  ok = 0;
author	Fiona Glaser <fiona@x264.com>
	Thu, 14 Jul 2011 21:04:11 +0000 (14:04 -0700)
committer	Fiona Glaser <fiona@x264.com>
	Fri, 22 Jul 2011 11:03:22 +0000 (04:03 -0700)
common/deblock.c		patch \| blob \| history
common/frame.h		patch \| blob \| history
common/macroblock.c		patch \| blob \| history
tools/checkasm.c		patch \| blob \| history