X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=encoder%2Fmacroblock.h;h=585d58583814ed52879d5515678d832cfd3f9968;hb=d23d18655249944c1ca894b451e2c82c7a584c62;hp=49d13a25fc7b296b6d2719eab1bbc64b7347f357;hpb=bdbd4fe7709e129f90cf3d7d59b500e915c6b187;p=x264 diff --git a/encoder/macroblock.h b/encoder/macroblock.h index 49d13a25..585d5858 100644 --- a/encoder/macroblock.h +++ b/encoder/macroblock.h @@ -1,7 +1,7 @@ /***************************************************************************** - * macroblock.h: h264 encoder library + * macroblock.h: macroblock encoding ***************************************************************************** - * Copyright (C) 2003-2008 x264 project + * Copyright (C) 2003-2016 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -19,6 +19,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. *****************************************************************************/ #ifndef X264_ENCODER_MACROBLOCK_H @@ -26,35 +29,167 @@ #include "common/macroblock.h" -extern const int x264_lambda2_tab[52]; -extern const int x264_lambda_tab[52]; +extern const int x264_lambda2_tab[QP_MAX_MAX+1]; +extern const uint16_t x264_lambda_tab[QP_MAX_MAX+1]; -void x264_rdo_init( ); +void x264_rdo_init( void ); int x264_macroblock_probe_skip( x264_t *h, int b_bidir ); -static inline int x264_macroblock_probe_pskip( x264_t *h ) - { return x264_macroblock_probe_skip( h, 0 ); } -static inline int x264_macroblock_probe_bskip( x264_t *h ) - { return x264_macroblock_probe_skip( h, 1 ); } +#define x264_macroblock_probe_pskip( h )\ + x264_macroblock_probe_skip( h, 0 ) +#define x264_macroblock_probe_bskip( h )\ + x264_macroblock_probe_skip( h, 1 ) + +void x264_predict_lossless_4x4( x264_t *h, pixel *p_dst, int p, int idx, int i_mode ); +void x264_predict_lossless_8x8( x264_t *h, pixel *p_dst, int p, int idx, int i_mode, pixel edge[36] ); +void x264_predict_lossless_16x16( x264_t *h, int p, int i_mode ); +void x264_predict_lossless_chroma( x264_t *h, int i_mode ); void x264_macroblock_encode ( x264_t *h ); void x264_macroblock_write_cabac ( x264_t *h, x264_cabac_t *cb ); -void x264_macroblock_write_cavlc ( x264_t *h, bs_t *s ); +void x264_macroblock_write_cavlc ( x264_t *h ); void x264_macroblock_encode_p8x8( x264_t *h, int i8 ); -void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale ); -void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale ); -void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale ); +void x264_macroblock_encode_p4x4( x264_t *h, int i4 ); +void x264_mb_encode_chroma( x264_t *h, int b_inter, int i_qp ); void x264_cabac_mb_skip( x264_t *h, int b_skip ); -void x264_quant_4x4_trellis( x264_t *h, int16_t dct[4][4], int i_quant_cat, - int i_qp, int i_ctxBlockCat, int b_intra ); -void x264_quant_8x8_trellis( x264_t *h, int16_t dct[8][8], int i_quant_cat, - int i_qp, int b_intra ); +int x264_quant_luma_dc_trellis( x264_t *h, dctcoef *dct, int i_quant_cat, int i_qp, + int ctx_block_cat, int b_intra, int idx ); +int x264_quant_chroma_dc_trellis( x264_t *h, dctcoef *dct, int i_qp, int b_intra, int idx ); +int x264_quant_4x4_trellis( x264_t *h, dctcoef *dct, int i_quant_cat, + int i_qp, int ctx_block_cat, int b_intra, int b_chroma, int idx ); +int x264_quant_8x8_trellis( x264_t *h, dctcoef *dct, int i_quant_cat, + int i_qp, int ctx_block_cat, int b_intra, int b_chroma, int idx ); void x264_noise_reduction_update( x264_t *h ); +static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, int ctx_block_cat, int b_intra, int p, int idx ) +{ + int i_quant_cat = b_intra ? (p?CQM_4IC:CQM_4IY) : (p?CQM_4PC:CQM_4PY); + if( h->mb.b_noise_reduction ) + h->quantf.denoise_dct( dct, h->nr_residual_sum[0+!!p*2], h->nr_offset[0+!!p*2], 16 ); + if( h->mb.b_trellis ) + return x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, ctx_block_cat, b_intra, !!p, idx+p*16 ); + else + return h->quantf.quant_4x4( dct, h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] ); +} + +static ALWAYS_INLINE int x264_quant_8x8( x264_t *h, dctcoef dct[64], int i_qp, int ctx_block_cat, int b_intra, int p, int idx ) +{ + int i_quant_cat = b_intra ? (p?CQM_8IC:CQM_8IY) : (p?CQM_8PC:CQM_8PY); + if( h->mb.b_noise_reduction ) + h->quantf.denoise_dct( dct, h->nr_residual_sum[1+!!p*2], h->nr_offset[1+!!p*2], 64 ); + if( h->mb.b_trellis ) + return x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, ctx_block_cat, b_intra, !!p, idx+p*4 ); + else + return h->quantf.quant_8x8( dct, h->quant8_mf[i_quant_cat][i_qp], h->quant8_bias[i_quant_cat][i_qp] ); +} + +#define STORE_8x8_NNZ( p, idx, nz )\ +do\ +{\ + M16( &h->mb.cache.non_zero_count[x264_scan8[p*16+idx*4]+0] ) = (nz) * 0x0101;\ + M16( &h->mb.cache.non_zero_count[x264_scan8[p*16+idx*4]+8] ) = (nz) * 0x0101;\ +} while(0) + +#define CLEAR_16x16_NNZ( p ) \ +do\ +{\ + M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 0*8] ) = 0;\ + M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 1*8] ) = 0;\ + M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 2*8] ) = 0;\ + M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 3*8] ) = 0;\ +} while(0) + +/* A special for loop that iterates branchlessly over each set + * bit in a 4-bit input. */ +#define FOREACH_BIT(idx,start,mask) for( int idx = start, msk = mask, skip; msk && (skip = x264_ctz_4bit(msk), idx += skip, msk >>= skip+1, 1); idx++ ) + +static ALWAYS_INLINE void x264_mb_encode_i4x4( x264_t *h, int p, int idx, int i_qp, int i_mode, int b_predict ) +{ + int nz; + pixel *p_src = &h->mb.pic.p_fenc[p][block_idx_xy_fenc[idx]]; + pixel *p_dst = &h->mb.pic.p_fdec[p][block_idx_xy_fdec[idx]]; + ALIGNED_ARRAY_N( dctcoef, dct4x4,[16] ); + + if( b_predict ) + { + if( h->mb.b_lossless ) + x264_predict_lossless_4x4( h, p_dst, p, idx, i_mode ); + else + h->predict_4x4[i_mode]( p_dst ); + } + + if( h->mb.b_lossless ) + { + nz = h->zigzagf.sub_4x4( h->dct.luma4x4[p*16+idx], p_src, p_dst ); + h->mb.cache.non_zero_count[x264_scan8[p*16+idx]] = nz; + h->mb.i_cbp_luma |= nz<<(idx>>2); + return; + } + + h->dctf.sub4x4_dct( dct4x4, p_src, p_dst ); + + nz = x264_quant_4x4( h, dct4x4, i_qp, ctx_cat_plane[DCT_LUMA_4x4][p], 1, p, idx ); + h->mb.cache.non_zero_count[x264_scan8[p*16+idx]] = nz; + if( nz ) + { + h->mb.i_cbp_luma |= 1<<(idx>>2); + h->zigzagf.scan_4x4( h->dct.luma4x4[p*16+idx], dct4x4 ); + h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[p?CQM_4IC:CQM_4IY], i_qp ); + h->dctf.add4x4_idct( p_dst, dct4x4 ); + } +} + +static ALWAYS_INLINE void x264_mb_encode_i8x8( x264_t *h, int p, int idx, int i_qp, int i_mode, pixel *edge, int b_predict ) +{ + int x = idx&1; + int y = idx>>1; + int nz; + pixel *p_src = &h->mb.pic.p_fenc[p][8*x + 8*y*FENC_STRIDE]; + pixel *p_dst = &h->mb.pic.p_fdec[p][8*x + 8*y*FDEC_STRIDE]; + ALIGNED_ARRAY_N( dctcoef, dct8x8,[64] ); + ALIGNED_ARRAY_32( pixel, edge_buf,[36] ); + + if( b_predict ) + { + if( !edge ) + { + h->predict_8x8_filter( p_dst, edge_buf, h->mb.i_neighbour8[idx], x264_pred_i4x4_neighbors[i_mode] ); + edge = edge_buf; + } + + if( h->mb.b_lossless ) + x264_predict_lossless_8x8( h, p_dst, p, idx, i_mode, edge ); + else + h->predict_8x8[i_mode]( p_dst, edge ); + } + + if( h->mb.b_lossless ) + { + nz = h->zigzagf.sub_8x8( h->dct.luma8x8[p*4+idx], p_src, p_dst ); + STORE_8x8_NNZ( p, idx, nz ); + h->mb.i_cbp_luma |= nz<dctf.sub8x8_dct8( dct8x8, p_src, p_dst ); + + nz = x264_quant_8x8( h, dct8x8, i_qp, ctx_cat_plane[DCT_LUMA_8x8][p], 1, p, idx ); + if( nz ) + { + h->mb.i_cbp_luma |= 1<zigzagf.scan_8x8( h->dct.luma8x8[p*4+idx], dct8x8 ); + h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[p?CQM_8IC:CQM_8IY], i_qp ); + h->dctf.add8x8_idct8( p_dst, dct8x8 ); + STORE_8x8_NNZ( p, idx, 1 ); + } + else + STORE_8x8_NNZ( p, idx, 0 ); +} + #endif