X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=common%2Fcommon.h;h=5ee3ef8aad14349c60d54cc9c2e96b6196a305d3;hb=7a125e4a89b6c1cfd5066706939b7dee5a755254;hp=e748116c84ac1a7280cd2e0b0696661988b8b458;hpb=8614594835ce25879c0d01ca88625ea444d577f2;p=x264 diff --git a/common/common.h b/common/common.h index e748116c..5ee3ef8a 100644 --- a/common/common.h +++ b/common/common.h @@ -24,18 +24,46 @@ #ifndef _COMMON_H #define _COMMON_H 1 -#ifdef HAVE_STDINT_H -#include -#else -#include -#endif -#include +/**************************************************************************** + * Macros + ****************************************************************************/ +#define X264_MIN(a,b) ( (a)<(b) ? (a) : (b) ) +#define X264_MAX(a,b) ( (a)>(b) ? (a) : (b) ) +#define X264_MIN3(a,b,c) X264_MIN((a),X264_MIN((b),(c))) +#define X264_MAX3(a,b,c) X264_MAX((a),X264_MAX((b),(c))) +#define X264_MIN4(a,b,c,d) X264_MIN((a),X264_MIN3((b),(c),(d))) +#define X264_MAX4(a,b,c,d) X264_MAX((a),X264_MAX3((b),(c),(d))) +#define XCHG(type,a,b) { type t = a; a = b; b = t; } +#define FIX8(f) ((int)(f*(1<<8)+.5)) + +#define CHECKED_MALLOC( var, size )\ +{\ + var = x264_malloc( size );\ + if( !var )\ + {\ + x264_log( h, X264_LOG_ERROR, "malloc failed\n" );\ + goto fail;\ + }\ +} -#ifdef _MSC_VER -#define snprintf _snprintf -#define X264_VERSION "" // no configure script for msvc -#endif +#define X264_BFRAME_MAX 16 +#define X264_THREAD_MAX 128 +#define X264_SLICE_MAX 4 +#define X264_NAL_MAX (4 + X264_SLICE_MAX) + +// number of pixels (per thread) in progress at any given time. +// 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety +#define X264_THREAD_HEIGHT 24 +/**************************************************************************** + * Includes + ****************************************************************************/ +#include "osdep.h" +#include +#include +#include +#include +#include #include "x264.h" #include "bs.h" #include "set.h" @@ -45,40 +73,43 @@ #include "frame.h" #include "dct.h" #include "cabac.h" -#include "csp.h" - -/**************************************************************************** - * Macros - ****************************************************************************/ -#define X264_MIN(a,b) ( (a)<(b) ? (a) : (b) ) -#define X264_MAX(a,b) ( (a)>(b) ? (a) : (b) ) -#define X264_ABS(a) ( (a)< 0 ? -(a) : (a) ) -#define X264_MIN3(a,b,c) X264_MIN((a),X264_MIN((b),(c))) -#define X264_MIN4(a,b,c,d) X264_MIN((a),X264_MIN3((b),(c),(d))) +#include "quant.h" /**************************************************************************** * Generals functions ****************************************************************************/ /* x264_malloc : will do or emulate a memalign - * XXX you HAVE TO use x264_free for buffer allocated - * with x264_malloc - */ + * you have to use x264_free for buffers allocated with x264_malloc */ void *x264_malloc( int ); void *x264_realloc( void *p, int i_size ); void x264_free( void * ); +/* x264_slurp_file: malloc space for the whole file and read it */ +char *x264_slurp_file( const char *filename ); + /* mdate: return the current date in microsecond */ int64_t x264_mdate( void ); +/* x264_param2string: return a (malloced) string containing most of + * the encoding options */ +char *x264_param2string( x264_param_t *p, int b_res ); + /* log */ void x264_log( x264_t *h, int i_level, const char *psz_fmt, ... ); +void x264_reduce_fraction( int *n, int *d ); + +static inline uint8_t x264_clip_uint8( int x ) +{ + return x&(~255) ? (-x)>>31 : x; +} + static inline int x264_clip3( int v, int i_min, int i_max ) { return ( (v < i_min) ? i_min : (v > i_max) ? i_max : v ); } -static inline float x264_clip3f( float v, float f_min, float f_max ) +static inline double x264_clip3f( double v, double f_min, double f_max ) { return ( (v < f_min) ? f_min : (v > f_max) ? f_max : v ); } @@ -127,6 +158,7 @@ typedef struct int i_frame_num; + int b_mbaff; int b_field_pic; int b_bottom_field; @@ -153,6 +185,7 @@ typedef struct int i_cabac_init_idc; + int i_qp; int i_qp_delta; int b_sp_for_swidth; int i_qs_delta; @@ -195,10 +228,6 @@ static const int x264_scan8[16+2*4] = 5 R R */ -#define X264_BFRAME_MAX 16 -#define X264_SLICE_MAX 4 -#define X264_NAL_MAX (4 + X264_SLICE_MAX) - typedef struct x264_ratecontrol_t x264_ratecontrol_t; typedef struct x264_vlc_table_t x264_vlc_table_t; @@ -207,7 +236,10 @@ struct x264_t /* encoder parameters */ x264_param_t param; - x264_t *thread[X264_SLICE_MAX]; + x264_t *thread[X264_THREAD_MAX]; + x264_pthread_t thread_handle; + int b_thread_active; + int i_thread_phase; /* which thread to use for the next frame */ /* bitstream output */ struct @@ -217,6 +249,7 @@ struct x264_t int i_bitstream; /* size of p_bitstream */ uint8_t *p_bitstream; /* will hold data for all nal */ bs_t bs; + int i_frame_size; } out; /* frame number/poc */ @@ -239,6 +272,22 @@ struct x264_t x264_pps_t *pps; int i_idr_pic_id; + /* quantization matrix for decoding, [cqm][qp%6][coef_y][coef_x] */ + int (*dequant4_mf[4])[4][4]; /* [4][6][4][4] */ + int (*dequant8_mf[2])[8][8]; /* [2][6][8][8] */ + /* quantization matrix for trellis, [cqm][qp][coef] */ + int (*unquant4_mf[4])[16]; /* [4][52][16] */ + int (*unquant8_mf[2])[64]; /* [2][52][64] */ + /* quantization matrix for deadzone */ + uint16_t (*quant4_mf[4])[16]; /* [4][52][16] */ + uint16_t (*quant8_mf[2])[64]; /* [2][52][64] */ + uint16_t (*quant4_bias[4])[16]; /* [4][52][16] */ + uint16_t (*quant8_bias[2])[64]; /* [2][52][64] */ + + uint32_t nr_residual_sum[2][64]; + uint32_t nr_offset[2][64]; + uint32_t nr_count[2]; + /* Slice header */ x264_slice_header_t sh; @@ -252,12 +301,12 @@ struct x264_t /* Temporary buffer (frames types not yet decided) */ x264_frame_t *next[X264_BFRAME_MAX+3]; /* Unused frames */ - x264_frame_t *unused[X264_BFRAME_MAX+3]; + x264_frame_t *unused[X264_BFRAME_MAX + X264_THREAD_MAX*2 + 16+4]; /* For adaptive B decision */ x264_frame_t *last_nonb; - /* frames used for reference +1 for decoding + sentinels */ - x264_frame_t *reference[16+2+1+2]; + /* frames used for reference + sentinels */ + x264_frame_t *reference[16+2]; int i_last_idr; /* Frame number of the last IDR */ @@ -267,6 +316,7 @@ struct x264_t int i_max_ref0; int i_max_ref1; int i_delay; /* Number of frames buffered for B reordering */ + int b_have_lowres; /* Whether 1/2 resolution luma planes are being used */ } frames; /* current frame being encoded */ @@ -319,9 +369,18 @@ struct x264_t int i_me_method; int i_subpel_refine; int b_chroma_me; + int b_trellis; + int b_noise_reduction; + + int b_interlaced; + /* Allowed qpel MV range to stay within the picture + emulated edge pixels */ int mv_min[2]; int mv_max[2]; + /* Subpel MV range for motion search. + * same mv_min/max but includes levels' i_mv_range. */ + int mv_min_spel[2]; + int mv_max_spel[2]; /* Fullpel MV range for motion search */ int mv_min_fpel[2]; int mv_max_fpel[2]; @@ -334,6 +393,8 @@ struct x264_t int i_mb_type_left; int i_mb_type_topleft; int i_mb_type_topright; + int i_mb_prev_xy; + int i_mb_top_xy; /* mb table */ int8_t *type; /* mb type */ @@ -345,9 +406,11 @@ struct x264_t int16_t (*mv[2])[2]; /* mb mv. set to 0 for intra mb */ int16_t (*mvd[2])[2]; /* mb mv difference with predict. set to 0 if intra. cabac only */ int8_t *ref[2]; /* mb ref. set to -1 if non used (intra or Lx only) */ - int16_t (*mvr[2][16])[2]; /* 16x16 mv for each possible ref */ + int16_t (*mvr[2][32])[2]; /* 16x16 mv for each possible ref */ int8_t *skipbp; /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */ int8_t *mb_transform_size; /* transform_size_8x8_flag of each mb */ + uint8_t *intra_border_backup[2][3]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */ + uint8_t (*nnz_backup)[16]; /* when using cavlc + 8x8dct, the deblocker uses a modified nnz */ /* current value */ int i_type; @@ -361,18 +424,38 @@ struct x264_t int i_intra16x16_pred_mode; int i_chroma_pred_mode; + /* skip flags for i4x4 and i8x8 + * 0 = encode as normal. + * 1 (non-RD only) = the DCT is still in h->dct, restore fdec and skip reconstruction. + * 2 (RD only) = the DCT has since been overwritten by RD; restore that too. */ + int i_skip_intra; + struct { + /* space for p_fenc and p_fdec */ +#define FENC_STRIDE 16 +#define FDEC_STRIDE 32 + DECLARE_ALIGNED( uint8_t, fenc_buf[24*FENC_STRIDE], 16 ); + DECLARE_ALIGNED( uint8_t, fdec_buf[27*FDEC_STRIDE], 16 ); + + /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */ + DECLARE_ALIGNED( uint8_t, i4x4_fdec_buf[16*16], 16 ); + DECLARE_ALIGNED( uint8_t, i8x8_fdec_buf[16*16], 16 ); + DECLARE_ALIGNED( int, i8x8_dct_buf[3][64], 16 ); + DECLARE_ALIGNED( int, i4x4_dct_buf[15][16], 16 ); + /* pointer over mb of the frame to be compressed */ uint8_t *p_fenc[3]; - /* pointer over mb of the frame to be reconstrucated */ + /* pointer over mb of the frame to be reconstructed */ uint8_t *p_fdec[3]; /* pointer over mb of the references */ - uint8_t *p_fref[2][16][4+2]; /* last: lN, lH, lV, lHV, cU, cV */ + int i_fref[2]; + uint8_t *p_fref[2][32][4+2]; /* last: lN, lH, lV, lHV, cU, cV */ + uint16_t *p_integral[2][16]; - /* common stride */ + /* fref stride */ int i_stride[3]; } pic; @@ -380,49 +463,51 @@ struct x264_t struct { /* real intra4x4_pred_mode if I_4X4 or I_8X8, I_PRED_4x4_DC if mb available, -1 if not */ - int intra4x4_pred_mode[X264_SCAN8_SIZE]; + int8_t intra4x4_pred_mode[X264_SCAN8_SIZE]; - /* i_non_zero_count if availble else 0x80 */ - int non_zero_count[X264_SCAN8_SIZE]; + /* i_non_zero_count if available else 0x80 */ + int8_t non_zero_count[X264_SCAN8_SIZE]; - /* -1 if unused, -2 if unavaible */ - int8_t ref[2][X264_SCAN8_SIZE]; + /* -1 if unused, -2 if unavailable */ + DECLARE_ALIGNED( int8_t, ref[2][X264_SCAN8_SIZE], 4 ); - /* 0 if non avaible */ - int16_t mv[2][X264_SCAN8_SIZE][2]; - int16_t mvd[2][X264_SCAN8_SIZE][2]; + /* 0 if not available */ + DECLARE_ALIGNED( int16_t, mv[2][X264_SCAN8_SIZE][2], 16 ); + DECLARE_ALIGNED( int16_t, mvd[2][X264_SCAN8_SIZE][2], 4 ); /* 1 if SKIP or DIRECT. set only for B-frames + CABAC */ - int8_t skip[X264_SCAN8_SIZE]; + DECLARE_ALIGNED( int8_t, skip[X264_SCAN8_SIZE], 4 ); - int16_t direct_mv[2][X264_SCAN8_SIZE][2]; + DECLARE_ALIGNED( int16_t, direct_mv[2][X264_SCAN8_SIZE][2], 16 ) ; int8_t direct_ref[2][X264_SCAN8_SIZE]; + int pskip_mv[2]; /* number of neighbors (top and left) that used 8x8 dct */ int i_neighbour_transform_size; - int b_transform_8x8_allowed; + int i_neighbour_interlaced; } cache; /* */ + int i_qp; /* current qp */ + int i_chroma_qp; int i_last_qp; /* last qp */ int i_last_dqp; /* last delta qp */ int b_variable_qp; /* whether qp is allowed to vary per macroblock */ + int b_lossless; + int b_direct_auto_read; /* take stats for --direct auto from the 2pass log */ + int b_direct_auto_write; /* analyse direct modes, to use and/or save */ /* B_direct and weighted prediction */ - int dist_scale_factor[16][16]; - int bipred_weight[16][16]; + int16_t dist_scale_factor[16][2]; + int16_t bipred_weight[32][4]; /* maps fref1[0]'s ref indices into the current list0 */ - int map_col_to_list0_buf[2]; // for negative indices - int map_col_to_list0[16]; + int8_t map_col_to_list0_buf[2]; // for negative indices + int8_t map_col_to_list0[16]; } mb; /* rate control encoding only */ x264_ratecontrol_t *rc; - int i_last_inter_size; - int i_last_intra_size; - int i_last_intra_qp; - /* stats */ struct { @@ -441,30 +526,43 @@ struct x264_t int i_mb_count_i; int i_mb_count_p; int i_mb_count_skip; + int i_mb_count_8x8dct[2]; + int i_mb_count_size[7]; + int i_mb_count_ref[32]; /* Estimated (SATD) cost as Intra/Predicted frame */ /* XXX: both omit the cost of MBs coded as P_SKIP */ int i_intra_cost; int i_inter_cost; + int i_mbs_analysed; + /* Adaptive direct mv pred */ + int i_direct_score[2]; } frame; - /* Cummulated stats */ + /* Cumulated stats */ /* per slice info */ - int i_slice_count[5]; + int i_slice_count[5]; int64_t i_slice_size[5]; - int i_slice_qp[5]; + double f_slice_qp[5]; /* */ int64_t i_sqe_global[5]; - float f_psnr_average[5]; - float f_psnr_mean_y[5]; - float f_psnr_mean_u[5]; - float f_psnr_mean_v[5]; + double f_psnr_average[5]; + double f_psnr_mean_y[5]; + double f_psnr_mean_u[5]; + double f_psnr_mean_v[5]; + double f_ssim_mean_y[5]; /* */ int64_t i_mb_count[5][19]; + int64_t i_mb_count_8x8dct[2]; + int64_t i_mb_count_size[2][7]; + int64_t i_mb_count_ref[2][32]; + /* */ + int i_direct_score[2]; + int i_direct_frames[2]; } stat; - /* CPU functions dependants */ + /* CPU functions dependents */ x264_predict_t predict_16x16[4+3]; x264_predict_t predict_8x8c[4+3]; x264_predict8x8_t predict_8x8[9+3]; @@ -473,7 +571,9 @@ struct x264_t x264_pixel_function_t pixf; x264_mc_functions_t mc; x264_dct_function_t dctf; - x264_csp_function_t csp; + x264_zigzag_function_t zigzagf; + x264_quant_function_t quantf; + x264_deblock_function_t loopf; /* vlc table for decoding purpose only */ x264_vlc_table_t *x264_coeff_token_lookup[5]; @@ -487,5 +587,8 @@ struct x264_t #endif }; +// included at the end because it needs x264_t +#include "macroblock.h" + #endif