X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;ds=sidebyside;f=common%2Fmacroblock.h;h=7974f5ab1bf21a067898747b25f6206d73ae6111;hb=5c6570495f8f1c716b294aee1430d8766a4beb9c;hp=a2d7db4e92b6824b4a84a63567b452a5a7f30364;hpb=8761805b8240c0da5f9d6d79b1a2affe3b5213ad;p=x264 diff --git a/common/macroblock.h b/common/macroblock.h index a2d7db4e..7974f5ab 100644 --- a/common/macroblock.h +++ b/common/macroblock.h @@ -1,7 +1,7 @@ /***************************************************************************** - * macroblock.h: h264 encoder library + * macroblock.h: macroblock common functions ***************************************************************************** - * Copyright (C) 2005-2008 x264 project + * Copyright (C) 2005-2015 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -20,6 +20,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. *****************************************************************************/ #ifndef X264_MACROBLOCK_H @@ -167,7 +170,11 @@ static const uint8_t x264_mb_partition_count_table[17] = }; static const uint8_t x264_mb_partition_pixel_table[17] = { - 6, 4, 5, 3, 6, 4, 5, 3, 6, 4, 5, 3, 3, 3, 1, 2, 0 + PIXEL_4x4, PIXEL_8x4, PIXEL_4x8, PIXEL_8x8, /* D_L0_* */ + PIXEL_4x4, PIXEL_8x4, PIXEL_4x8, PIXEL_8x8, /* D_L1_* */ + PIXEL_4x4, PIXEL_8x4, PIXEL_4x8, PIXEL_8x8, /* D_BI_* */ + PIXEL_8x8, /* D_DIRECT_8x8 */ + PIXEL_8x8, PIXEL_16x8, PIXEL_8x16, PIXEL_16x16, /* 8x8 .. 16x16 */ }; /* zigzags are transposed with respect to the tables in the standard */ @@ -238,39 +245,80 @@ static const uint16_t block_idx_xy_fdec[16] = 2*4 + 3*4*FDEC_STRIDE, 3*4 + 3*4*FDEC_STRIDE }; -static const uint8_t i_chroma_qp_table[52+12*2] = +#define QP(qP) ( (qP)+QP_BD_OFFSET ) +static const uint8_t i_chroma_qp_table[QP_MAX+1+12*2] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 29, 30, 31, 32, 32, 33, 34, 34, 35, 35, - 36, 36, 37, 37, 37, 38, 38, 38, 39, 39, - 39, 39, - 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, +#if BIT_DEPTH > 9 + QP(-12),QP(-11),QP(-10), QP(-9), QP(-8), QP(-7), +#endif +#if BIT_DEPTH > 8 + QP(-6), QP(-5), QP(-4), QP(-3), QP(-2), QP(-1), +#endif + QP(0), QP(1), QP(2), QP(3), QP(4), QP(5), + QP(6), QP(7), QP(8), QP(9), QP(10), QP(11), + QP(12), QP(13), QP(14), QP(15), QP(16), QP(17), + QP(18), QP(19), QP(20), QP(21), QP(22), QP(23), + QP(24), QP(25), QP(26), QP(27), QP(28), QP(29), + QP(29), QP(30), QP(31), QP(32), QP(32), QP(33), + QP(34), QP(34), QP(35), QP(35), QP(36), QP(36), + QP(37), QP(37), QP(37), QP(38), QP(38), QP(38), + QP(39), QP(39), QP(39), QP(39), + QP(39), QP(39), QP(39), QP(39), QP(39), QP(39), + QP(39), QP(39), QP(39), QP(39), QP(39), QP(39), }; +#undef QP enum cabac_ctx_block_cat_e { - DCT_LUMA_DC = 0, - DCT_LUMA_AC = 1, - DCT_LUMA_4x4 = 2, - DCT_CHROMA_DC = 3, - DCT_CHROMA_AC = 4, - DCT_LUMA_8x8 = 5, + DCT_LUMA_DC = 0, + DCT_LUMA_AC = 1, + DCT_LUMA_4x4 = 2, + DCT_CHROMA_DC = 3, + DCT_CHROMA_AC = 4, + DCT_LUMA_8x8 = 5, + DCT_CHROMAU_DC = 6, + DCT_CHROMAU_AC = 7, + DCT_CHROMAU_4x4 = 8, + DCT_CHROMAU_8x8 = 9, + DCT_CHROMAV_DC = 10, + DCT_CHROMAV_AC = 11, + DCT_CHROMAV_4x4 = 12, + DCT_CHROMAV_8x8 = 13, }; +static const uint8_t ctx_cat_plane[6][3] = +{ + { DCT_LUMA_DC, DCT_CHROMAU_DC, DCT_CHROMAV_DC}, + { DCT_LUMA_AC, DCT_CHROMAU_AC, DCT_CHROMAV_AC}, + {DCT_LUMA_4x4, DCT_CHROMAU_4x4, DCT_CHROMAV_4x4}, + {0}, + {0}, + {DCT_LUMA_8x8, DCT_CHROMAU_8x8, DCT_CHROMAV_8x8} +}; + +/* Per-frame allocation: is allocated per-thread only in frame-threads mode. */ +int x264_macroblock_cache_allocate( x264_t *h ); +void x264_macroblock_cache_free( x264_t *h ); + +/* Per-thread allocation: is allocated per-thread even in sliced-threads mode. */ +int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead ); +void x264_macroblock_thread_free( x264_t *h, int b_lookahead ); -int x264_macroblock_cache_init( x264_t *h ); void x264_macroblock_slice_init( x264_t *h ); -void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ); +void x264_macroblock_thread_init( x264_t *h ); +void x264_macroblock_cache_load_progressive( x264_t *h, int mb_x, int mb_y ); +void x264_macroblock_cache_load_interlaced( x264_t *h, int mb_x, int mb_y ); +void x264_macroblock_deblock_strength( x264_t *h ); void x264_macroblock_cache_save( x264_t *h ); -void x264_macroblock_cache_end( x264_t *h ); void x264_macroblock_bipred_init( x264_t *h ); void x264_prefetch_fenc( x264_t *h, x264_frame_t *fenc, int i_mb_x, int i_mb_y ); +void x264_copy_column8( pixel *dst, pixel *src ); + /* x264_mb_predict_mv_16x16: * set mvp with predicted mv for D_16x16 block * h->mb. need only valid values from other blocks */ @@ -291,10 +339,6 @@ void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mv * if b_changed != NULL, set it to whether refs or mvs differ from * before this functioncall. */ int x264_mb_predict_mv_direct16x16( x264_t *h, int *b_changed ); -/* x264_mb_load_mv_direct8x8: - * set h->mb.cache.mv and h->mb.cache.ref for B_DIRECT - * must be called only after x264_mb_predict_mv_direct16x16 */ -void x264_mb_load_mv_direct8x8( x264_t *h, int idx ); /* x264_mb_predict_mv_ref16x16: * set mvc with D_16x16 prediction. * uses all neighbors, even those that didn't end up using this ref. @@ -304,25 +348,25 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[ void x264_mb_mc( x264_t *h ); void x264_mb_mc_8x8( x264_t *h, int i8 ); -static ALWAYS_INLINE uint32_t pack16to32( int a, int b ) +static ALWAYS_INLINE uint32_t pack16to32( uint32_t a, uint32_t b ) { -#ifdef WORDS_BIGENDIAN +#if WORDS_BIGENDIAN return b + (a<<16); #else return a + (b<<16); #endif } -static ALWAYS_INLINE uint32_t pack8to16( int a, int b ) +static ALWAYS_INLINE uint32_t pack8to16( uint32_t a, uint32_t b ) { -#ifdef WORDS_BIGENDIAN +#if WORDS_BIGENDIAN return b + (a<<8); #else return a + (b<<8); #endif } -static ALWAYS_INLINE uint32_t pack8to32( int a, int b, int c, int d ) +static ALWAYS_INLINE uint32_t pack8to32( uint32_t a, uint32_t b, uint32_t c, uint32_t d ) { -#ifdef WORDS_BIGENDIAN +#if WORDS_BIGENDIAN return d + (c<<8) + (b<<16) + (a<<24); #else return a + (b<<8) + (c<<16) + (d<<24); @@ -330,97 +374,30 @@ static ALWAYS_INLINE uint32_t pack8to32( int a, int b, int c, int d ) } static ALWAYS_INLINE uint32_t pack16to32_mask( int a, int b ) { -#ifdef WORDS_BIGENDIAN +#if WORDS_BIGENDIAN return (b&0xFFFF) + (a<<16); #else return (a&0xFFFF) + (b<<16); #endif } -static ALWAYS_INLINE void x264_macroblock_cache_rect1( void *dst, int width, int height, uint8_t val ) -{ - if( width == 4 ) - { - uint32_t val2 = val * 0x01010101; - ((uint32_t*)dst)[0] = val2; - if( height >= 2 ) ((uint32_t*)dst)[2] = val2; - if( height == 4 ) ((uint32_t*)dst)[4] = val2; - if( height == 4 ) ((uint32_t*)dst)[6] = val2; - } - else // 2 - { - uint32_t val2 = val * 0x0101; - ((uint16_t*)dst)[ 0] = val2; - if( height >= 2 ) ((uint16_t*)dst)[ 4] = val2; - if( height == 4 ) ((uint16_t*)dst)[ 8] = val2; - if( height == 4 ) ((uint16_t*)dst)[12] = val2; - } -} -static ALWAYS_INLINE void x264_macroblock_cache_rect4( void *dst, int width, int height, uint32_t val ) -{ - int dy; - if( width == 1 || WORD_SIZE < 8 ) - { - for( dy = 0; dy < height; dy++ ) - { - ((uint32_t*)dst)[8*dy+0] = val; - if( width >= 2 ) ((uint32_t*)dst)[8*dy+1] = val; - if( width == 4 ) ((uint32_t*)dst)[8*dy+2] = val; - if( width == 4 ) ((uint32_t*)dst)[8*dy+3] = val; - } - } - else - { - uint64_t val64 = val + ((uint64_t)val<<32); - for( dy = 0; dy < height; dy++ ) - { - ((uint64_t*)dst)[4*dy+0] = val64; - if( width == 4 ) ((uint64_t*)dst)[4*dy+1] = val64; - } - } -} -#define x264_macroblock_cache_mv_ptr(a,x,y,w,h,l,mv) x264_macroblock_cache_mv(a,x,y,w,h,l,*(uint32_t*)mv) -static ALWAYS_INLINE void x264_macroblock_cache_mv( x264_t *h, int x, int y, int width, int height, int i_list, uint32_t mv ) -{ - x264_macroblock_cache_rect4( &h->mb.cache.mv[i_list][X264_SCAN8_0+x+8*y], width, height, mv ); -} -static ALWAYS_INLINE void x264_macroblock_cache_mvd( x264_t *h, int x, int y, int width, int height, int i_list, uint32_t mv ) -{ - x264_macroblock_cache_rect4( &h->mb.cache.mvd[i_list][X264_SCAN8_0+x+8*y], width, height, mv ); -} -static ALWAYS_INLINE void x264_macroblock_cache_ref( x264_t *h, int x, int y, int width, int height, int i_list, uint8_t ref ) -{ - x264_macroblock_cache_rect1( &h->mb.cache.ref[i_list][X264_SCAN8_0+x+8*y], width, height, ref ); -} -static ALWAYS_INLINE void x264_macroblock_cache_skip( x264_t *h, int x, int y, int width, int height, int b_skip ) -{ - x264_macroblock_cache_rect1( &h->mb.cache.skip[X264_SCAN8_0+x+8*y], width, height, b_skip ); -} -static ALWAYS_INLINE void x264_macroblock_cache_intra8x8_pred( x264_t *h, int x, int y, int i_mode ) -{ - int8_t *cache = &h->mb.cache.intra4x4_pred_mode[X264_SCAN8_0+x+8*y]; - cache[0] = cache[1] = cache[8] = cache[9] = i_mode; -} -#define array_non_zero(a) array_non_zero_int(a, sizeof(a)) -#define array_non_zero_int array_non_zero_int_c -static ALWAYS_INLINE int array_non_zero_int_c( void *v, int i_count ) +static ALWAYS_INLINE uint64_t pack32to64( uint32_t a, uint32_t b ) { - uint64_t *x = v; - if(i_count == 8) - return !!x[0]; - else if(i_count == 16) - return !!(x[0]|x[1]); - else if(i_count == 32) - return !!(x[0]|x[1]|x[2]|x[3]); - else - { - int i; - i_count /= sizeof(uint64_t); - for( i = 0; i < i_count; i++ ) - if( x[i] ) return 1; - return 0; - } +#if WORDS_BIGENDIAN + return b + ((uint64_t)a<<32); +#else + return a + ((uint64_t)b<<32); +#endif } -static inline int x264_mb_predict_intra4x4_mode( x264_t *h, int idx ) + +#if HIGH_BIT_DEPTH +# define pack_pixel_1to2 pack16to32 +# define pack_pixel_2to4 pack32to64 +#else +# define pack_pixel_1to2 pack8to16 +# define pack_pixel_2to4 pack16to32 +#endif + +static ALWAYS_INLINE int x264_mb_predict_intra4x4_mode( x264_t *h, int idx ) { const int ma = h->mb.cache.intra4x4_pred_mode[x264_scan8[idx] - 1]; const int mb = h->mb.cache.intra4x4_pred_mode[x264_scan8[idx] - 8]; @@ -432,7 +409,7 @@ static inline int x264_mb_predict_intra4x4_mode( x264_t *h, int idx ) return m; } -static inline int x264_mb_predict_non_zero_code( x264_t *h, int idx ) +static ALWAYS_INLINE int x264_mb_predict_non_zero_code( x264_t *h, int idx ) { const int za = h->mb.cache.non_zero_count[x264_scan8[idx] - 1]; const int zb = h->mb.cache.non_zero_count[x264_scan8[idx] - 8]; @@ -440,51 +417,27 @@ static inline int x264_mb_predict_non_zero_code( x264_t *h, int idx ) int i_ret = za + zb; if( i_ret < 0x80 ) - { i_ret = ( i_ret + 1 ) >> 1; - } return i_ret & 0x7f; } + +/* intra and skip are disallowed, p8x8 is conditional. */ +static const uint8_t x264_transform_allowed[X264_MBTYPE_MAX] = +{ + 0,0,0,0,1,2,0,1,1,1,1,1,1,1,1,1,1,1,0 +}; + /* x264_mb_transform_8x8_allowed: * check whether any partition is smaller than 8x8 (or at least * might be, according to just partition type.) * doesn't check for cbp */ -static inline int x264_mb_transform_8x8_allowed( x264_t *h ) +static ALWAYS_INLINE int x264_mb_transform_8x8_allowed( x264_t *h ) { - // intra and skip are disallowed - // large partitions are allowed - // direct and 8x8 are conditional - static const uint8_t partition_tab[X264_MBTYPE_MAX] = { - 0,0,0,0,1,2,0,2,1,1,1,1,1,1,1,1,1,2,0, - }; - int p, i; - if( !h->pps->b_transform_8x8_mode ) return 0; - p = partition_tab[h->mb.i_type]; - if( p < 2 ) - return p; - else if( h->mb.i_type == B_DIRECT ) - return h->sps->b_direct8x8_inference; - else if( h->mb.i_type == P_8x8 ) - { - if( !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8) ) - return 1; - for( i=0; i<4; i++ ) - if( h->mb.i_sub_partition[i] != D_L0_8x8 ) - return 0; - return 1; - } - else // B_8x8 - { - // x264 currently doesn't use sub-8x8 B partitions, so don't check for them - if( h->sps->b_direct8x8_inference ) - return 1; - for( i=0; i<4; i++ ) - if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 ) - return 0; - return 1; - } + if( h->mb.i_type != P_8x8 ) + return x264_transform_allowed[h->mb.i_type]; + return M32( h->mb.i_sub_partition ) == D_L0_8x8*0x01010101; } #endif