/*****************************************************************************
* macroblock.c: macroblock common functions
*****************************************************************************
- * Copyright (C) 2003-2011 x264 project
+ * Copyright (C) 2003-2016 x264 project
*
* Authors: Fiona Glaser <fiona@x264.com>
* Laurent Aimar <fenrir@via.ecp.fr>
* Loren Merritt <lorenm@u.washington.edu>
+ * Henrik Gramner <henrik@gramner.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
}
else
{
- // chroma is offset if MCing from a field of opposite parity
- if( MB_INTERLACED & i_ref )
+ int v_shift = CHROMA_V_SHIFT;
+ // Chroma in 4:2:0 is offset if MCing from a field of opposite parity
+ if( v_shift & MB_INTERLACED & i_ref )
mvy += (h->mb.i_mb_y & 1)*4 - 2;
- h->mc.mc_chroma( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x],
- &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
+ int offset = (4*FDEC_STRIDE>>v_shift)*y + 2*x;
+ height = 4*height >> v_shift;
+
+ h->mc.mc_chroma( &h->mb.pic.p_fdec[1][offset],
+ &h->mb.pic.p_fdec[2][offset], FDEC_STRIDE,
h->mb.pic.p_fref[0][i_ref][4], h->mb.pic.i_stride[1],
- mvx, mvy, 2*width, 2*height );
+ mvx, 2*mvy>>v_shift, 2*width, height );
if( h->sh.weight[i_ref][1].weightfn )
- h->sh.weight[i_ref][1].weightfn[width>>1]( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
- &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
- &h->sh.weight[i_ref][1], height*2 );
+ h->sh.weight[i_ref][1].weightfn[width>>1]( &h->mb.pic.p_fdec[1][offset], FDEC_STRIDE,
+ &h->mb.pic.p_fdec[1][offset], FDEC_STRIDE,
+ &h->sh.weight[i_ref][1], height );
if( h->sh.weight[i_ref][2].weightfn )
- h->sh.weight[i_ref][2].weightfn[width>>1]( &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
- &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
- &h->sh.weight[i_ref][2],height*2 );
+ h->sh.weight[i_ref][2].weightfn[width>>1]( &h->mb.pic.p_fdec[2][offset], FDEC_STRIDE,
+ &h->mb.pic.p_fdec[2][offset], FDEC_STRIDE,
+ &h->sh.weight[i_ref][2], height );
}
}
static NOINLINE void x264_mb_mc_1xywh( x264_t *h, int x, int y, int width, int height )
}
else
{
- if( MB_INTERLACED & i_ref )
+ int v_shift = CHROMA_V_SHIFT;
+ if( v_shift & MB_INTERLACED & i_ref )
mvy += (h->mb.i_mb_y & 1)*4 - 2;
- h->mc.mc_chroma( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x],
- &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
+ int offset = (4*FDEC_STRIDE>>v_shift)*y + 2*x;
+ h->mc.mc_chroma( &h->mb.pic.p_fdec[1][offset],
+ &h->mb.pic.p_fdec[2][offset], FDEC_STRIDE,
h->mb.pic.p_fref[1][i_ref][4], h->mb.pic.i_stride[1],
- mvx, mvy, 2*width, 2*height );
+ mvx, 2*mvy>>v_shift, 2*width, 4*height>>v_shift );
}
}
int mvy0 = x264_clip3( h->mb.cache.mv[0][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] ) + 4*4*y;
int mvy1 = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] ) + 4*4*y;
int i_mode = x264_size2pixel[height][width];
- int i_stride0 = 16, i_stride1 = 16;
- ALIGNED_ARRAY_16( pixel, tmp0,[16*16] );
- ALIGNED_ARRAY_16( pixel, tmp1,[16*16] );
+ intptr_t i_stride0 = 16, i_stride1 = 16;
+ ALIGNED_ARRAY_N( pixel, tmp0,[16*16] );
+ ALIGNED_ARRAY_N( pixel, tmp1,[16*16] );
pixel *src0, *src1;
MC_LUMA_BI( 0 );
}
else
{
- if( MB_INTERLACED & i_ref0 )
+ int v_shift = CHROMA_V_SHIFT;
+ if( v_shift & MB_INTERLACED & i_ref0 )
mvy0 += (h->mb.i_mb_y & 1)*4 - 2;
- if( MB_INTERLACED & i_ref1 )
+ if( v_shift & MB_INTERLACED & i_ref1 )
mvy1 += (h->mb.i_mb_y & 1)*4 - 2;
h->mc.mc_chroma( tmp0, tmp0+8, 16, h->mb.pic.p_fref[0][i_ref0][4], h->mb.pic.i_stride[1],
- mvx0, mvy0, 2*width, 2*height );
+ mvx0, 2*mvy0>>v_shift, 2*width, 4*height>>v_shift );
h->mc.mc_chroma( tmp1, tmp1+8, 16, h->mb.pic.p_fref[1][i_ref1][4], h->mb.pic.i_stride[1],
- mvx1, mvy1, 2*width, 2*height );
- h->mc.avg[i_mode+3]( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE, tmp0, 16, tmp1, 16, weight );
- h->mc.avg[i_mode+3]( &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE, tmp0+8, 16, tmp1+8, 16, weight );
+ mvx1, 2*mvy1>>v_shift, 2*width, 4*height>>v_shift );
+
+ int chromapix = h->luma2chroma_pixel[i_mode];
+ int offset = (4*FDEC_STRIDE>>v_shift)*y + 2*x;
+ h->mc.avg[chromapix]( &h->mb.pic.p_fdec[1][offset], FDEC_STRIDE, tmp0, 16, tmp1, 16, weight );
+ h->mc.avg[chromapix]( &h->mb.pic.p_fdec[2][offset], FDEC_STRIDE, tmp0+8, 16, tmp1+8, 16, weight );
}
}
h->mb.b_interlaced = PARAM_INTERLACED;
- CHECKED_MALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
- CHECKED_MALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
- CHECKED_MALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
- CHECKED_MALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
- CHECKED_MALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
- memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
+ PREALLOC_INIT
+
+ PREALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
+ PREALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
+ PREALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
+ PREALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
/* 0 -> 3 top(4), 4 -> 6 : left(3) */
- CHECKED_MALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
+ PREALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
/* all coeffs */
- CHECKED_MALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
+ PREALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
if( h->param.b_cabac )
{
- CHECKED_MALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
- CHECKED_MALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
- CHECKED_MALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
+ PREALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
+ PREALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
+ PREALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
+ if( h->param.i_bframe )
+ PREALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
}
for( int i = 0; i < 2; i++ )
i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
for( int j = !i; j < i_refs; j++ )
- {
- CHECKED_MALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
- M32( h->mb.mvr[i][j][0] ) = 0;
- h->mb.mvr[i][j]++;
- }
+ PREALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
}
if( h->param.analyse.i_weighted_pred )
}
else
{
- luma_plane_size = h->fdec->i_stride[0] * (h->mb.i_mb_height*16+2*i_padv);
+ /* Both ref and fenc is stored for 4:2:0 and 4:2:2 which means that 4:2:0 and 4:4:4
+ * needs the same amount of space and 4:2:2 needs twice that much */
+ luma_plane_size = h->fdec->i_stride[0] * (h->mb.i_mb_height*(16<<(CHROMA_FORMAT==CHROMA_422))+2*i_padv);
if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
//smart can weight one ref and one offset -1 in 8-bit
}
for( int i = 0; i < numweightbuf; i++ )
- CHECKED_MALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
+ PREALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
+ }
+
+ PREALLOC_END( h->mb.base );
+
+ memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
+
+ for( int i = 0; i < 2; i++ )
+ {
+ int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << PARAM_INTERLACED;
+ if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
+ i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
+
+ for( int j = !i; j < i_refs; j++ )
+ {
+ M32( h->mb.mvr[i][j][0] ) = 0;
+ h->mb.mvr[i][j]++;
+ }
}
return 0;
}
void x264_macroblock_cache_free( x264_t *h )
{
- for( int i = 0; i < 2; i++ )
- for( int j = !i; j < X264_REF_MAX*2; j++ )
- if( h->mb.mvr[i][j] )
- x264_free( h->mb.mvr[i][j]-1 );
- for( int i = 0; i < X264_REF_MAX; i++ )
- x264_free( h->mb.p_weight_buf[i] );
-
- if( h->param.b_cabac )
- {
- x264_free( h->mb.chroma_pred_mode );
- x264_free( h->mb.mvd[0] );
- x264_free( h->mb.mvd[1] );
- }
- x264_free( h->mb.slice_table );
- x264_free( h->mb.intra4x4_pred_mode );
- x264_free( h->mb.non_zero_count );
- x264_free( h->mb.mb_transform_size );
- x264_free( h->mb.skipbp );
- x264_free( h->mb.cbp );
- x264_free( h->mb.qp );
+ x264_free( h->mb.base );
}
int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
{
if( !b_lookahead )
{
- for( int i = 0; i <= 4*PARAM_INTERLACED; i++ )
+ for( int i = 0; i < (PARAM_INTERLACED ? 5 : 2); i++ )
for( int j = 0; j < (CHROMA444 ? 3 : 2); j++ )
{
- /* shouldn't really be initialized, just silences a valgrind false-positive in predict_8x8_filter_mmx */
- CHECKED_MALLOCZERO( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * sizeof(pixel) );
+ CHECKED_MALLOC( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * sizeof(pixel) );
h->intra_border_backup[i][j] += 16;
- if( !PARAM_INTERLACED )
- h->intra_border_backup[1][j] = h->intra_border_backup[i][j];
}
for( int i = 0; i <= PARAM_INTERLACED; i++ )
{
- CHECKED_MALLOC( h->deblock_strength[i], sizeof(**h->deblock_strength) * h->mb.i_mb_width );
+ if( h->param.b_sliced_threads )
+ {
+ /* Only allocate the first one, and allocate it for the whole frame, because we
+ * won't be deblocking until after the frame is fully encoded. */
+ if( h == h->thread[0] && !i )
+ CHECKED_MALLOC( h->deblock_strength[0], sizeof(**h->deblock_strength) * h->mb.i_mb_count );
+ else
+ h->deblock_strength[i] = h->thread[0]->deblock_strength[0];
+ }
+ else
+ CHECKED_MALLOC( h->deblock_strength[i], sizeof(**h->deblock_strength) * h->mb.i_mb_width );
h->deblock_strength[1] = h->deblock_strength[i];
}
}
int scratch_size = 0;
if( !b_lookahead )
{
- int buf_hpel = (h->thread[0]->fdec->i_width[0]+48) * sizeof(int16_t);
+ int buf_hpel = (h->thread[0]->fdec->i_width[0]+48+32) * sizeof(int16_t);
int buf_ssim = h->param.analyse.b_ssim * 8 * (h->param.i_width/4+3) * sizeof(int);
int me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range);
int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
((me_range*2+24) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
scratch_size = X264_MAX3( buf_hpel, buf_ssim, buf_tesa );
}
- int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int);
+ int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int16_t);
scratch_size = X264_MAX( scratch_size, buf_mbtree );
if( scratch_size )
CHECKED_MALLOC( h->scratch_buffer, scratch_size );
else
h->scratch_buffer = NULL;
+ int buf_lookahead_threads = (h->mb.i_mb_height + (4 + 32) * h->param.i_lookahead_threads) * sizeof(int) * 2;
+ int buf_mbtree2 = buf_mbtree * 12; /* size of the internal propagate_list asm buffer */
+ scratch_size = X264_MAX( buf_lookahead_threads, buf_mbtree2 );
+ CHECKED_MALLOC( h->scratch_buffer2, scratch_size );
+
return 0;
fail:
return -1;
if( !b_lookahead )
{
for( int i = 0; i <= PARAM_INTERLACED; i++ )
- x264_free( h->deblock_strength[i] );
- for( int i = 0; i <= 4*PARAM_INTERLACED; i++ )
+ if( !h->param.b_sliced_threads || (h == h->thread[0] && !i) )
+ x264_free( h->deblock_strength[i] );
+ for( int i = 0; i < (PARAM_INTERLACED ? 5 : 2); i++ )
for( int j = 0; j < (CHROMA444 ? 3 : 2); j++ )
x264_free( h->intra_border_backup[i][j] - 16 );
}
x264_free( h->scratch_buffer );
+ x264_free( h->scratch_buffer2 );
}
void x264_macroblock_slice_init( x264_t *h )
}
else if( h->sh.i_type == SLICE_TYPE_P )
{
- memset( h->mb.cache.skip, 0, sizeof( h->mb.cache.skip ) );
-
if( h->sh.i_disable_deblocking_filter_idc != 1 && h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
{
deblock_ref_table(-2) = -2;
(h->param.analyse.b_dct_decimate && h->sh.i_type != SLICE_TYPE_I);
h->mb.i_mb_prev_xy = -1;
+ /* 4:2:0 4:2:2 4:4:4
+ * fdec fenc fdec fenc fdec fenc
+ * y y y y y y y Y Y Y Y y y y y y y y Y Y Y Y y y y y y y y Y Y Y Y
+ * y Y Y Y Y Y Y Y Y y Y Y Y Y Y Y Y Y y Y Y Y Y Y Y Y Y
+ * y Y Y Y Y Y Y Y Y y Y Y Y Y Y Y Y Y y Y Y Y Y Y Y Y Y
+ * y Y Y Y Y Y Y Y Y y Y Y Y Y Y Y Y Y y Y Y Y Y Y Y Y Y
+ * y Y Y Y Y U U V V y Y Y Y Y U U V V y Y Y Y Y U U U U
+ * u u u v v v U U V V u u u v v v U U V V u u u u u u u U U U U
+ * u U U v V V u U U v V V U U V V u U U U U U U U U
+ * u U U v V V u U U v V V U U V V u U U U U U U U U
+ * u U U v V V u U U U U V V V V
+ * u U U v V V u U U U U V V V V
+ * v v v v v v v V V V V
+ * v V V V V V V V V
+ * v V V V V
+ * v V V V V
+ * v V V V V
+ */
h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf;
h->mb.pic.p_fdec[0] = h->mb.pic.fdec_buf + 2*FDEC_STRIDE;
h->mb.pic.p_fenc[1] = h->mb.pic.fenc_buf + 16*FENC_STRIDE;
h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 32*FENC_STRIDE;
h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 36*FDEC_STRIDE;
}
- /* fdec: fenc:
- * yyyyyyy
- * yYYYY YYYY
- * yYYYY YYYY
- * yYYYY YYYY
- * yYYYY YYYY
- * uuu vvv UUVV
- * uUU vVV UUVV
- * uUU vVV
- */
else
{
h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 16*FENC_STRIDE + 8;
int stride_y = fenc->i_stride[0];
int stride_uv = fenc->i_stride[1];
int off_y = 16 * i_mb_x + 16 * i_mb_y * stride_y;
- int off_uv = 16 * i_mb_x + 8 * i_mb_y * stride_uv;
+ int off_uv = 16 * i_mb_x + (16 * i_mb_y * stride_uv >> CHROMA_V_SHIFT);
h->mc.prefetch_fenc( fenc->plane[0]+off_y, stride_y,
fenc->plane[1]+off_uv, stride_uv, i_mb_x );
}
static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff )
{
int mb_interlaced = b_mbaff && MB_INTERLACED;
- int w = b_chroma ? 8 : 16;
+ int height = b_chroma ? 16 >> CHROMA_V_SHIFT : 16;
int i_stride = h->fdec->i_stride[i];
int i_stride2 = i_stride << mb_interlaced;
int i_pix_offset = mb_interlaced
- ? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
- : 16 * mb_x + w * mb_y * i_stride;
+ ? 16 * mb_x + height * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
+ : 16 * mb_x + height * mb_y * i_stride;
pixel *plane_fdec = &h->fdec->plane[i][i_pix_offset];
- int fdec_idx = b_mbaff ? (mb_interlaced ? (3 + (mb_y&1)) : (mb_y&1) ? 2 : 4) : 0;
+ int fdec_idx = b_mbaff ? (mb_interlaced ? (3 + (mb_y&1)) : (mb_y&1) ? 2 : 4) : !(mb_y&1);
pixel *intra_fdec = &h->intra_border_backup[fdec_idx][i][mb_x*16];
int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
/* ref_pix_offset[0] references the current field and [1] the opposite field. */
h->mb.pic.p_fenc_plane[i] = &h->fenc->plane[i][i_pix_offset];
if( b_chroma )
{
- h->mc.load_deinterleave_8x8x2_fenc( h->mb.pic.p_fenc[1], h->mb.pic.p_fenc_plane[1], i_stride2 );
+ h->mc.load_deinterleave_chroma_fenc( h->mb.pic.p_fenc[1], h->mb.pic.p_fenc_plane[1], i_stride2, height );
memcpy( h->mb.pic.p_fdec[1]-FDEC_STRIDE, intra_fdec, 8*sizeof(pixel) );
memcpy( h->mb.pic.p_fdec[2]-FDEC_STRIDE, intra_fdec+8, 8*sizeof(pixel) );
- if( b_mbaff )
- {
- h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = intra_fdec[-1-8];
- h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = intra_fdec[-1];
- }
+ h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = intra_fdec[-1-8];
+ h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = intra_fdec[-1];
}
else
{
h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[i], FENC_STRIDE, h->mb.pic.p_fenc_plane[i], i_stride2, 16 );
memcpy( h->mb.pic.p_fdec[i]-FDEC_STRIDE, intra_fdec, 24*sizeof(pixel) );
- if( b_mbaff )
- h->mb.pic.p_fdec[i][-FDEC_STRIDE-1] = intra_fdec[-1];
+ h->mb.pic.p_fdec[i][-FDEC_STRIDE-1] = intra_fdec[-1];
}
- if( b_mbaff )
+ if( b_mbaff || h->mb.b_reencode_mb )
{
- for( int j = 0; j < w; j++ )
+ for( int j = 0; j < height; j++ )
if( b_chroma )
{
h->mb.pic.p_fdec[1][-1+j*FDEC_STRIDE] = plane_fdec[-2+j*i_stride2];
const x264_left_table_t *left_index_table = h->mb.left_index_table;
+ h->mb.cache.deblock_strength = h->deblock_strength[mb_y&1][h->param.b_sliced_threads?h->mb.i_mb_xy:mb_x];
+
/* load cache */
if( h->mb.i_neighbour & MB_TOP )
{
/* load non_zero_count */
CP32( &h->mb.cache.non_zero_count[x264_scan8[ 0] - 8], &nnz[top][12] );
- CP32( &h->mb.cache.non_zero_count[x264_scan8[16] - 8], &nnz[top][16+4 + 8*CHROMA444] );
- CP32( &h->mb.cache.non_zero_count[x264_scan8[32] - 8], &nnz[top][32+4 + 8*CHROMA444] );
+ CP32( &h->mb.cache.non_zero_count[x264_scan8[16] - 8], &nnz[top][16-4 + (16>>CHROMA_V_SHIFT)] );
+ CP32( &h->mb.cache.non_zero_count[x264_scan8[32] - 8], &nnz[top][32-4 + (16>>CHROMA_V_SHIFT)] );
/* Finish the prefetching */
for( int l = 0; l < lists; l++ )
h->mb.cache.non_zero_count[x264_scan8[ 8] - 1] = nnz[lbot][left_index_table->nnz[2]];
h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[lbot][left_index_table->nnz[3]];
- if( CHROMA444 )
+ if( CHROMA_FORMAT >= CHROMA_422 )
{
- h->mb.cache.non_zero_count[x264_scan8[16+ 0] - 1] = nnz[ltop][left_index_table->nnz[0]+16];
- h->mb.cache.non_zero_count[x264_scan8[16+ 2] - 1] = nnz[ltop][left_index_table->nnz[1]+16];
- h->mb.cache.non_zero_count[x264_scan8[16+ 8] - 1] = nnz[lbot][left_index_table->nnz[2]+16];
- h->mb.cache.non_zero_count[x264_scan8[16+10] - 1] = nnz[lbot][left_index_table->nnz[3]+16];
- h->mb.cache.non_zero_count[x264_scan8[32+ 0] - 1] = nnz[ltop][left_index_table->nnz[0]+32];
- h->mb.cache.non_zero_count[x264_scan8[32+ 2] - 1] = nnz[ltop][left_index_table->nnz[1]+32];
- h->mb.cache.non_zero_count[x264_scan8[32+ 8] - 1] = nnz[lbot][left_index_table->nnz[2]+32];
- h->mb.cache.non_zero_count[x264_scan8[32+10] - 1] = nnz[lbot][left_index_table->nnz[3]+32];
+ int offset = (4>>CHROMA_H_SHIFT) - 4;
+ h->mb.cache.non_zero_count[x264_scan8[16+ 0] - 1] = nnz[ltop][left_index_table->nnz[0]+16+offset];
+ h->mb.cache.non_zero_count[x264_scan8[16+ 2] - 1] = nnz[ltop][left_index_table->nnz[1]+16+offset];
+ h->mb.cache.non_zero_count[x264_scan8[16+ 8] - 1] = nnz[lbot][left_index_table->nnz[2]+16+offset];
+ h->mb.cache.non_zero_count[x264_scan8[16+10] - 1] = nnz[lbot][left_index_table->nnz[3]+16+offset];
+ h->mb.cache.non_zero_count[x264_scan8[32+ 0] - 1] = nnz[ltop][left_index_table->nnz[0]+32+offset];
+ h->mb.cache.non_zero_count[x264_scan8[32+ 2] - 1] = nnz[ltop][left_index_table->nnz[1]+32+offset];
+ h->mb.cache.non_zero_count[x264_scan8[32+ 8] - 1] = nnz[lbot][left_index_table->nnz[2]+32+offset];
+ h->mb.cache.non_zero_count[x264_scan8[32+10] - 1] = nnz[lbot][left_index_table->nnz[3]+32+offset];
}
else
{
h->mb.cache.non_zero_count[x264_scan8[16+ 2] - 1] =
h->mb.cache.non_zero_count[x264_scan8[32+ 0] - 1] =
h->mb.cache.non_zero_count[x264_scan8[32+ 2] - 1] = 0x80;
- if( CHROMA444 )
+ if( CHROMA_FORMAT >= CHROMA_422 )
{
h->mb.cache.non_zero_count[x264_scan8[16+ 8] - 1] =
h->mb.cache.non_zero_count[x264_scan8[16+10] - 1] =
{
x264_copy_column8( h->mb.pic.p_fdec[1]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[1]+ 7+ 4*FDEC_STRIDE );
x264_copy_column8( h->mb.pic.p_fdec[2]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[2]+ 7+ 4*FDEC_STRIDE );
+ if( CHROMA_FORMAT == CHROMA_422 )
+ {
+ x264_copy_column8( h->mb.pic.p_fdec[1]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[1]+ 7+12*FDEC_STRIDE );
+ x264_copy_column8( h->mb.pic.p_fdec[2]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[2]+ 7+12*FDEC_STRIDE );
+ }
x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 1, 0 );
}
}
{
// Looking at the bottom field so always take the bottom macroblock of the pair.
h->mb.cache.topright_ref[l][0] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]];
- h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]];
+ h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[1]];
h->mb.cache.topright_ref[l][2] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[2]];
CP32( h->mb.cache.topright_mv[l][0], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[0]] );
CP32( h->mb.cache.topright_mv[l][1], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[1]] );
}
}
- if( b_mbaff && mb_x == 0 && !(mb_y&1) && mb_y > 0 )
- h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_xy - h->mb.i_mb_stride];
+ if( b_mbaff && mb_x == 0 && !(mb_y&1) )
+ {
+ if( h->mb.i_mb_top_xy >= h->sh.i_first_mb )
+ h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_top_xy];
+ else
+ h->mb.field_decoding_flag = 0;
+ }
/* Check whether skip here would cause decoder to predict interlace mode incorrectly.
* FIXME: It might be better to change the interlace type rather than forcing a skip to be non-skip. */
if( b_mbaff )
{
if( MB_INTERLACED != h->mb.field_decoding_flag &&
- h->mb.i_mb_prev_xy >= 0 && IS_SKIP(h->mb.type[h->mb.i_mb_prev_xy]) )
+ (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
h->mb.b_allow_skip = 0;
- if( (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
- {
- if( h->mb.i_neighbour & MB_LEFT )
- {
- if( h->mb.field[h->mb.i_mb_xy - 1] != MB_INTERLACED )
- h->mb.b_allow_skip = 0;
- }
- else if( h->mb.i_neighbour & MB_TOP )
- {
- if( h->mb.field[h->mb.i_mb_top_xy] != MB_INTERLACED )
- h->mb.b_allow_skip = 0;
- }
- else // Frame mb pair is predicted
- {
- if( MB_INTERLACED )
- h->mb.b_allow_skip = 0;
- }
- }
}
if( h->param.b_cabac )
if( !h->param.b_cabac && h->pps->b_transform_8x8_mode && h->mb.mb_transform_size[mbn_xy] )
{
- int nnz_top0 = M16( &nnz[mbn_xy][8] ) | M16( &nnz[mbn_xy][12] );
- int nnz_top1 = M16( &nnz[mbn_xy][10] ) | M16( &nnz[mbn_xy][14] );
- nnz_top[0] = nnz_top[1] = nnz_top0 ? 0x0101 : 0;
- nnz_top[2] = nnz_top[3] = nnz_top1 ? 0x0101 : 0;
+ nnz_top[0] = nnz_top[1] = M16( &nnz[mbn_xy][ 8] ) || M16( &nnz[mbn_xy][12] );
+ nnz_top[2] = nnz_top[3] = M16( &nnz[mbn_xy][10] ) || M16( &nnz[mbn_xy][14] );
}
for( int i = 0; i < 4; i++ )
void x264_macroblock_deblock_strength( x264_t *h )
{
- uint8_t (*bs)[8][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
+ uint8_t (*bs)[8][4] = h->mb.cache.deblock_strength;
if( IS_INTRA( h->mb.i_type ) )
{
- memset( bs[0][1], 3, 3*4*sizeof(uint8_t) );
- memset( bs[1][1], 3, 3*4*sizeof(uint8_t) );
+ M32( bs[0][1] ) = 0x03030303;
+ M64( bs[0][2] ) = 0x0303030303030303ULL;
+ M32( bs[1][1] ) = 0x03030303;
+ M64( bs[1][2] ) = 0x0303030303030303ULL;
return;
}
/* Early termination: in this case, nnz guarantees all edges use strength 2.*/
- if( h->mb.b_transform_8x8 && (h->mb.i_cbp_luma&7) == 7 && !CHROMA444 )
+ if( h->mb.b_transform_8x8 && !CHROMA444 )
{
- M32( bs[0][0] ) = 0x02020202;
- M32( bs[0][2] ) = 0x02020202;
- M32( bs[0][4] ) = 0x02020202;
- M32( bs[1][0] ) = 0x02020202;
- M32( bs[1][2] ) = 0x02020202;
- M32( bs[1][4] ) = 0x02020202;
- return;
+ int cbp_mask = 0xf >> CHROMA_V_SHIFT;
+ if( (h->mb.i_cbp_luma&cbp_mask) == cbp_mask )
+ {
+ M32( bs[0][0] ) = 0x02020202;
+ M32( bs[0][2] ) = 0x02020202;
+ M32( bs[0][4] ) = 0x02020202;
+ M64( bs[1][0] ) = 0x0202020202020202ULL; /* [1][1] and [1][3] has to be set for 4:2:2 */
+ M64( bs[1][2] ) = 0x0202020202020202ULL;
+ M32( bs[1][4] ) = 0x02020202;
+ return;
+ }
}
int neighbour_changed = 0;
static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff )
{
- int w = b_chroma ? 8 : 16;
+ int height = b_chroma ? 16>>CHROMA_V_SHIFT : 16;
int i_stride = h->fdec->i_stride[i];
int i_stride2 = i_stride << (b_mbaff && MB_INTERLACED);
int i_pix_offset = (b_mbaff && MB_INTERLACED)
- ? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
- : 16 * mb_x + w * mb_y * i_stride;
+ ? 16 * mb_x + height * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
+ : 16 * mb_x + height * mb_y * i_stride;
if( b_chroma )
- h->mc.store_interleave_8x8x2( &h->fdec->plane[1][i_pix_offset], i_stride2, h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2] );
+ h->mc.store_interleave_chroma( &h->fdec->plane[1][i_pix_offset], i_stride2, h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2], height );
else
h->mc.copy[PIXEL_16x16]( &h->fdec->plane[i][i_pix_offset], i_stride2, h->mb.pic.p_fdec[i], FDEC_STRIDE, 16 );
}
* For progressive mbs this is the bottom two rows, and for interlaced the
* bottom row of each field. We also store samples needed for the next
* mbpair in intra_border_backup[2]. */
- int backup_dst = !b_mbaff ? 0 : (mb_y&1) ? 1 : MB_INTERLACED ? 0 : 2;
+ int backup_dst = !b_mbaff ? (mb_y&1) : (mb_y&1) ? 1 : MB_INTERLACED ? 0 : 2;
memcpy( &h->intra_border_backup[backup_dst][0][mb_x*16 ], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15, 16*sizeof(pixel) );
if( CHROMA444 )
{
}
else
{
- memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+FDEC_STRIDE*7, 8*sizeof(pixel) );
- memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16+8], h->mb.pic.p_fdec[2]+FDEC_STRIDE*7, 8*sizeof(pixel) );
+ int backup_src = (15>>CHROMA_V_SHIFT) * FDEC_STRIDE;
+ memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+backup_src, 8*sizeof(pixel) );
+ memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16+8], h->mb.pic.p_fdec[2]+backup_src, 8*sizeof(pixel) );
}
if( b_mbaff )
{
}
else
{
- backup_src = (MB_INTERLACED ? 3 : 6) * FDEC_STRIDE;
+ if( CHROMA_FORMAT == CHROMA_420 )
+ backup_src = (MB_INTERLACED ? 3 : 6) * FDEC_STRIDE;
memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+backup_src, 8*sizeof(pixel) );
memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16+8], h->mb.pic.p_fdec[2]+backup_src, 8*sizeof(pixel) );
}
}
}
- else
- {
- /* In progressive we update intra_border_backup in-place, so the topleft neighbor will
- * no longer exist there when load_pic_pointers wants it. Move it within p_fdec instead. */
- h->mb.pic.p_fdec[0][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[0][-FDEC_STRIDE+15];
- h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[1][-FDEC_STRIDE+7 + 8*CHROMA444];
- h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[2][-FDEC_STRIDE+7 + 8*CHROMA444];
- }
}
void x264_macroblock_cache_save( x264_t *h )
CP32( &nnz[16+1*4], &h->mb.cache.non_zero_count[x264_scan8[16+2]] );
CP32( &nnz[32+0*4], &h->mb.cache.non_zero_count[x264_scan8[32+0]] );
CP32( &nnz[32+1*4], &h->mb.cache.non_zero_count[x264_scan8[32+2]] );
- if( CHROMA444 )
+ if( CHROMA_FORMAT >= CHROMA_422 )
{
CP32( &nnz[16+2*4], &h->mb.cache.non_zero_count[x264_scan8[16+ 8]] );
CP32( &nnz[16+3*4], &h->mb.cache.non_zero_count[x264_scan8[16+10]] );
uint8_t (*mvd0)[2] = h->mb.mvd[0][i_mb_xy];
uint8_t (*mvd1)[2] = h->mb.mvd[1][i_mb_xy];
if( IS_INTRA(i_mb_type) && i_mb_type != I_PCM )
- h->mb.chroma_pred_mode[i_mb_xy] = x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ];
+ h->mb.chroma_pred_mode[i_mb_xy] = x264_mb_chroma_pred_mode_fix[h->mb.i_chroma_pred_mode];
else
h->mb.chroma_pred_mode[i_mb_xy] = I_PRED_CHROMA_DC;