static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit,
- int bframe, x264_t *h )
+ int bframe )
{
for( int dir = 0; dir < 2; dir++ )
{
}
}
-void deblock_strength_mbaff_c( uint8_t nnz_cache[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
- int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
- int mvy_limit, int bframe, x264_t *h )
-{
- int neighbour_field[2];
- neighbour_field[0] = h->mb.i_mb_left_xy[0] >= 0 && h->mb.field[h->mb.i_mb_left_xy[0]];
- neighbour_field[1] = h->mb.i_mb_top_xy >= 0 && h->mb.field[h->mb.i_mb_top_xy];
- int intra_cur = IS_INTRA( h->mb.i_type );
-
- if( !intra_cur )
- {
- for( int dir = 0; dir < 2; dir++ )
- {
- int edge_stride = dir ? 8 : 1;
- int part_stride = dir ? 1 : 8;
- for( int edge = 0; edge < 4; edge++ )
- {
- for( int i = 0, q = X264_SCAN8_0+edge*edge_stride; i < 4; i++, q += part_stride )
- {
- int p = q - edge_stride;
- if( nnz_cache[q] || nnz_cache[p] )
- {
- bs[dir][edge][i] = 2;
- }
- else if( (edge == 0 && MB_INTERLACED != neighbour_field[dir]) ||
- ref[0][q] != ref[0][p] ||
- abs( mv[0][q][0] - mv[0][p][0] ) >= 4 ||
- abs( mv[0][q][1] - mv[0][p][1] ) >= mvy_limit ||
- (bframe && (ref[1][q] != ref[1][p] ||
- abs( mv[1][q][0] - mv[1][p][0] ) >= 4 ||
- abs( mv[1][q][1] - mv[1][p][1] ) >= mvy_limit )) )
- {
- bs[dir][edge][i] = 1;
- }
- else
- bs[dir][edge][i] = 0;
- }
- }
- }
- }
-
- if( h->mb.i_neighbour & MB_LEFT )
- {
- if( h->mb.field[h->mb.i_mb_left_xy[0]] != MB_INTERLACED )
- {
- static const uint8_t offset[2][2][8] = {
- { { 0, 0, 0, 0, 1, 1, 1, 1 },
- { 2, 2, 2, 2, 3, 3, 3, 3 }, },
- { { 0, 1, 2, 3, 0, 1, 2, 3 },
- { 0, 1, 2, 3, 0, 1, 2, 3 }, }
- };
- uint8_t bS[8];
-
- if( intra_cur )
- memset( bS, 4, 8 );
- else
- {
- const uint8_t *off = offset[MB_INTERLACED][h->mb.i_mb_y&1];
- uint8_t (*nnz)[48] = h->mb.non_zero_count;
-
- for( int i = 0; i < 8; i++ )
- {
- int left = h->mb.i_mb_left_xy[MB_INTERLACED ? i>>2 : i&1];
- int nnz_this = h->mb.cache.non_zero_count[x264_scan8[0]+8*(i>>1)];
- int nnz_left = nnz[left][3 + 4*off[i]];
- if( !h->param.b_cabac && h->pps->b_transform_8x8_mode )
- {
- int j = off[i]&~1;
- if( h->mb.mb_transform_size[left] )
- nnz_left = !!(M16( &nnz[left][2+4*j] ) | M16( &nnz[left][2+4*(1+j)] ));
- }
- if( IS_INTRA( h->mb.type[left] ) )
- bS[i] = 4;
- else if( nnz_left || nnz_this )
- bS[i] = 2;
- else // As left is different interlaced.
- bS[i] = 1;
- }
- }
-
- if( MB_INTERLACED )
- {
- for( int i = 0; i < 4; i++ ) bs[0][0][i] = bS[i];
- for( int i = 0; i < 4; i++ ) bs[0][4][i] = bS[4+i];
- }
- else
- {
- for( int i = 0; i < 4; i++ ) bs[0][0][i] = bS[2*i];
- for( int i = 0; i < 4; i++ ) bs[0][4][i] = bS[1+2*i];
- }
- }
- }
-
- if( h->mb.i_neighbour & MB_TOP )
- {
- if( !(h->mb.i_mb_y&1) && !MB_INTERLACED && h->mb.field[h->mb.i_mb_top_xy] )
- {
- /* Need to filter both fields (even for frame macroblocks).
- * Filter top two rows using the top macroblock of the above
- * pair and then the bottom one. */
- int mbn_xy = h->mb.i_mb_xy - 2 * h->mb.i_mb_stride;
- uint32_t nnz_cur[4];
- nnz_cur[0] = h->mb.cache.non_zero_count[x264_scan8[0]+0];
- nnz_cur[1] = h->mb.cache.non_zero_count[x264_scan8[0]+1];
- nnz_cur[2] = h->mb.cache.non_zero_count[x264_scan8[0]+2];
- nnz_cur[3] = h->mb.cache.non_zero_count[x264_scan8[0]+3];
- /* Munge NNZ for cavlc + 8x8dct */
- if( !h->param.b_cabac && h->pps->b_transform_8x8_mode &&
- h->mb.mb_transform_size[h->mb.i_mb_xy] )
- {
- int nnz0 = M16( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[ 2]] );
- int nnz1 = M16( &h->mb.cache.non_zero_count[x264_scan8[ 4]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[ 6]] );
- nnz_cur[0] = nnz_cur[1] = !!nnz0;
- nnz_cur[2] = nnz_cur[3] = !!nnz1;
- }
-
- for( int j = 0; j < 2; j++, mbn_xy += h->mb.i_mb_stride )
- {
- int mbn_intra = IS_INTRA( h->mb.type[mbn_xy] );
- uint8_t (*nnz)[48] = h->mb.non_zero_count;
-
- uint32_t nnz_top[4];
- nnz_top[0] = nnz[mbn_xy][3*4+0];
- nnz_top[1] = nnz[mbn_xy][3*4+1];
- nnz_top[2] = nnz[mbn_xy][3*4+2];
- nnz_top[3] = nnz[mbn_xy][3*4+3];
-
- if( !h->param.b_cabac && h->pps->b_transform_8x8_mode &&
- (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[mbn_xy] )
- {
- int nnz_top0 = M16( &nnz[mbn_xy][8] ) | M16( &nnz[mbn_xy][12] );
- int nnz_top1 = M16( &nnz[mbn_xy][10] ) | M16( &nnz[mbn_xy][14] );
- nnz_top[0] = nnz_top[1] = nnz_top0 ? 0x0101 : 0;
- nnz_top[2] = nnz_top[3] = nnz_top1 ? 0x0101 : 0;
- }
-
- uint8_t bS[4];
- if( intra_cur || mbn_intra )
- M32( bS ) = 0x03030303;
- else
- {
- for( int i = 0; i < 4; i++ )
- {
- if( nnz_cur[i] || nnz_top[i] )
- bS[i] = 2;
- else
- bS[i] = 1;
- }
- }
- for( int i = 0; i < 4; i++ )
- bs[1][4*j][i] = bS[i];
- }
- }
- }
-}
-
static ALWAYS_INLINE void deblock_edge( x264_t *h, pixel *pix, int i_stride, uint8_t bS[4], int i_qp, int a, int b, int b_chroma, x264_deblock_inter_t pf_inter )
{
int index_a = i_qp + a;
left_qp[0] = h->mb.qp[h->mb.i_mb_left_xy[0]];
luma_qp[0] = (qp + left_qp[0] + 1) >> 1;
chroma_qp[0] = (qpc + h->chroma_qp_table[left_qp[0]] + 1) >> 1;
- if( bs[0][0][0] == 4 )
+ if( intra_cur || IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[0]] ) )
{
deblock_edge_intra( h, pixy, 2*stridey, bs[0][0], luma_qp[0], a, b, 0, luma_intra_deblock );
deblock_edge_intra( h, pixuv, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_intra_deblock );
left_qp[1] = h->mb.qp[h->mb.i_mb_left_xy[1]];
luma_qp[1] = (qp + left_qp[1] + 1) >> 1;
chroma_qp[1] = (qpc + h->chroma_qp_table[left_qp[1]] + 1) >> 1;
- if( bs[0][4][0] == 4)
+ if( intra_cur || IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[1]] ) )
{
deblock_edge_intra( h, pixy + (stridey<<offy), 2*stridey, bs[0][4], luma_qp[1], a, b, 0, luma_intra_deblock );
deblock_edge_intra( h, pixuv + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_intra_deblock );
{
int mbn_xy = mb_xy - 2 * h->mb.i_mb_stride;
- for(int j=0; j<2; j++, mbn_xy += h->mb.i_mb_stride)
+ for( int j = 0; j < 2; j++, mbn_xy += h->mb.i_mb_stride )
{
int qpt = h->mb.qp[mbn_xy];
int qp_top = (qp + qpt + 1) >> 1;
int qpc_top = (qpc + h->chroma_qp_table[qpt] + 1) >> 1;
+ int intra_top = IS_INTRA( h->mb.type[mbn_xy] );
+ if( intra_cur || intra_top )
+ M32( bs[1][4*j] ) = 0x03030303;
// deblock the first horizontal edge of the even rows, then the first horizontal edge of the odd rows
deblock_edge( h, pixy + j*stridey, 2* stridey, bs[1][4*j], qp_top, a, b, 0, deblock_v_luma_c );
}
else
{
- if( intra_top )
+ if( intra_cur || intra_top )
M32( bs[1][0] ) = 0x03030303;
FILTER( , 1, 0, qp_top, qpc_top );
}
}
else
h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
- bs, 4 >> SLICE_MBAFF, h->sh.i_type == SLICE_TYPE_B, h );
+ bs, 4 >> MB_INTERLACED, h->sh.i_type == SLICE_TYPE_B );
int transform_8x8 = h->mb.b_transform_8x8;
void x264_deblock_h_chroma_intra_avx ( pixel *pix, int stride, int alpha, int beta );
void x264_deblock_strength_mmxext( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
- int mvy_limit, int bframe, x264_t *h );
+ int mvy_limit, int bframe );
void x264_deblock_strength_sse2 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
- int mvy_limit, int bframe, x264_t *h );
+ int mvy_limit, int bframe );
void x264_deblock_strength_ssse3 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
- int mvy_limit, int bframe, x264_t *h );
+ int mvy_limit, int bframe );
void x264_deblock_strength_avx ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
- int mvy_limit, int bframe, x264_t *h );
+ int mvy_limit, int bframe );
#if ARCH_X86
void x264_deblock_h_luma_mmxext( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
}
#endif
#endif // !HIGH_BIT_DEPTH
-
- if( b_mbaff ) pf->deblock_strength = deblock_strength_mbaff_c;
}
h->mb.i_neighbour |= MB_TOP;
}
+static void x264_macroblock_deblock_strength_mbaff( x264_t *h, uint8_t (*bs)[8][4] )
+{
+ if( (h->mb.i_neighbour & MB_LEFT) && h->mb.field[h->mb.i_mb_left_xy[0]] != MB_INTERLACED )
+ {
+ static const uint8_t offset[2][2][8] =
+ { { { 0, 0, 0, 0, 1, 1, 1, 1 },
+ { 2, 2, 2, 2, 3, 3, 3, 3 }, },
+ { { 0, 1, 2, 3, 0, 1, 2, 3 },
+ { 0, 1, 2, 3, 0, 1, 2, 3 }, }
+ };
+ ALIGNED_ARRAY_8( uint8_t, tmpbs, [8] );
+
+ const uint8_t *off = offset[MB_INTERLACED][h->mb.i_mb_y&1];
+ uint8_t (*nnz)[48] = h->mb.non_zero_count;
+
+ for( int i = 0; i < 8; i++ )
+ {
+ int left = h->mb.i_mb_left_xy[MB_INTERLACED ? i>>2 : i&1];
+ int nnz_this = h->mb.cache.non_zero_count[x264_scan8[0]+8*(i>>1)];
+ int nnz_left = nnz[left][3 + 4*off[i]];
+ if( !h->param.b_cabac && h->pps->b_transform_8x8_mode )
+ {
+ int j = off[i]&~1;
+ if( h->mb.mb_transform_size[left] )
+ nnz_left = !!(M16( &nnz[left][2+4*j] ) | M16( &nnz[left][2+4*(1+j)] ));
+ }
+ tmpbs[i] = (nnz_left || nnz_this) ? 2 : 1;
+ }
+
+ if( MB_INTERLACED )
+ {
+ CP32( bs[0][0], &tmpbs[0] );
+ CP32( bs[0][4], &tmpbs[4] );
+ }
+ else
+ {
+ for( int i = 0; i < 4; i++ ) bs[0][0][i] = tmpbs[2*i];
+ for( int i = 0; i < 4; i++ ) bs[0][4][i] = tmpbs[1+2*i];
+ }
+ }
+
+ if( (h->mb.i_neighbour & MB_TOP) && MB_INTERLACED != h->mb.field[h->mb.i_mb_top_xy] )
+ {
+ if( !(h->mb.i_mb_y&1) && !MB_INTERLACED )
+ {
+ /* Need to filter both fields (even for frame macroblocks).
+ * Filter top two rows using the top macroblock of the above
+ * pair and then the bottom one. */
+ int mbn_xy = h->mb.i_mb_xy - 2 * h->mb.i_mb_stride;
+ uint8_t *nnz_cur = &h->mb.cache.non_zero_count[x264_scan8[0]];
+
+ for( int j = 0; j < 2; j++, mbn_xy += h->mb.i_mb_stride )
+ {
+ uint8_t (*nnz)[48] = h->mb.non_zero_count;
+
+ ALIGNED_4( uint8_t nnz_top[4] );
+ CP32( nnz_top, &nnz[mbn_xy][3*4] );
+
+ if( !h->param.b_cabac && h->pps->b_transform_8x8_mode && h->mb.mb_transform_size[mbn_xy] )
+ {
+ int nnz_top0 = M16( &nnz[mbn_xy][8] ) | M16( &nnz[mbn_xy][12] );
+ int nnz_top1 = M16( &nnz[mbn_xy][10] ) | M16( &nnz[mbn_xy][14] );
+ nnz_top[0] = nnz_top[1] = nnz_top0 ? 0x0101 : 0;
+ nnz_top[2] = nnz_top[3] = nnz_top1 ? 0x0101 : 0;
+ }
+
+ for( int i = 0; i < 4; i++ )
+ bs[1][4*j][i] = (nnz_cur[i] || nnz_top[i]) ? 2 : 1;
+ }
+ }
+ else
+ for( int i = 0; i < 4; i++ )
+ bs[1][0][i] = X264_MAX( bs[1][0][i], 1 );
+ }
+}
+
void x264_macroblock_deblock_strength( x264_t *h )
{
uint8_t (*bs)[8][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
if( IS_INTRA( h->mb.type[h->mb.i_mb_xy] ) )
{
- memset( bs[0], 3, 4*4*sizeof(uint8_t) );
- memset( bs[1], 3, 4*4*sizeof(uint8_t) );
- if( !SLICE_MBAFF ) return;
+ memset( bs[0][1], 3, 3*4*sizeof(uint8_t) );
+ memset( bs[1][1], 3, 3*4*sizeof(uint8_t) );
+ return;
}
/* If we have multiple slices and we're deblocking on slice edges, we
}
}
- int mvy_limit = 4 >> MB_INTERLACED;
h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
- bs, mvy_limit, h->sh.i_type == SLICE_TYPE_B, h );
+ bs, 4 >> MB_INTERLACED, h->sh.i_type == SLICE_TYPE_B );
+
+ if( SLICE_MBAFF )
+ x264_macroblock_deblock_strength_mbaff( h, bs );
}
static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff )