X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=common%2Fcommon.h;h=53a6ff03c0b834f592867fbcb348e3a8e8a89649;hb=8ecdeb2709b4b7095237330e68e9a76ea8060a2f;hp=5763c2ed7813d391678272923734484c97807153;hpb=dd713cae59c062440b046fe75d60af83d049de3c;p=x264 diff --git a/common/common.h b/common/common.h index 5763c2ed..53a6ff03 100644 --- a/common/common.h +++ b/common/common.h @@ -1,7 +1,7 @@ /***************************************************************************** * common.h: misc common functions ***************************************************************************** - * Copyright (C) 2003-2011 x264 project + * Copyright (C) 2003-2013 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -40,6 +40,7 @@ #define IS_DISPOSABLE(type) ( type == X264_TYPE_B ) #define FIX8(f) ((int)(f*(1<<8)+.5)) #define ALIGN(x,a) (((x)+((a)-1))&~((a)-1)) +#define ARRAY_ELEMS(a) ((sizeof(a))/(sizeof(a[0]))) #define CHECKED_MALLOC( var, size )\ do {\ @@ -53,9 +54,12 @@ do {\ memset( var, 0, size );\ } while( 0 ) +#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0])) + #define X264_BFRAME_MAX 16 #define X264_REF_MAX 16 #define X264_THREAD_MAX 128 +#define X264_LOOKAHEAD_THREAD_MAX 16 #define X264_PCM_COST (FRAME_SIZE(256*BIT_DEPTH)+16) #define X264_LOOKAHEAD_MAX 250 #define QP_BD_OFFSET (6*(BIT_DEPTH-8)) @@ -92,6 +96,10 @@ do {\ #include #include +#if HAVE_OPENCL +#include "opencl.h" +#endif + #if HAVE_INTERLACED # define MB_INTERLACED h->mb.b_interlaced # define SLICE_MBAFF h->sh.b_mbaff @@ -201,6 +209,7 @@ static const uint8_t x264_scan8[16*3 + 3] = }; #include "x264.h" +#include "cabac.h" #include "bitstream.h" #include "set.h" #include "predict.h" @@ -208,7 +217,6 @@ static const uint8_t x264_scan8[16*3 + 3] = #include "mc.h" #include "frame.h" #include "dct.h" -#include "cabac.h" #include "quant.h" #include "cpu.h" #include "threadpool.h" @@ -236,7 +244,7 @@ void x264_log( x264_t *h, int i_level, const char *psz_fmt, ... ); void x264_reduce_fraction( uint32_t *n, uint32_t *d ); void x264_reduce_fraction64( uint64_t *n, uint64_t *d ); -void x264_cavlc_init( void ); +void x264_cavlc_init( x264_t *h ); void x264_cabac_init( x264_t *h ); static ALWAYS_INLINE pixel x264_clip_pixel( int x ) @@ -290,17 +298,6 @@ static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum( uint8_t *mvdleft, uint8_t *mvd return amvd0 + (amvd1<<8); } -static void ALWAYS_INLINE x264_predictor_roundclip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max ) -{ - for( int i = 0; i < i_mvc; i++ ) - { - int mx = (mvc[i][0] + 2) >> 2; - int my = (mvc[i][1] + 2) >> 2; - dst[i][0] = x264_clip3( mx, mv_x_min, mv_x_max ); - dst[i][1] = x264_clip3( my, mv_y_min, mv_y_max ); - } -} - extern const uint8_t x264_exp2_lut[64]; extern const float x264_log2_lut[128]; extern const float x264_log2_lz_lut[32]; @@ -385,6 +382,7 @@ typedef struct } ref_pic_list_order[2][X264_REF_MAX]; /* P-frame weighting */ + int b_weighted_pred; x264_weight_t weight[X264_REF_MAX*2][3]; int i_mmco_remove_from_end; @@ -434,17 +432,51 @@ typedef struct x264_left_table_t uint8_t ref[4]; } x264_left_table_t; +/* Current frame stats */ +typedef struct +{ + /* MV bits (MV+Ref+Block Type) */ + int i_mv_bits; + /* Texture bits (DCT coefs) */ + int i_tex_bits; + /* ? */ + int i_misc_bits; + /* MB type counts */ + int i_mb_count[19]; + int i_mb_count_i; + int i_mb_count_p; + int i_mb_count_skip; + int i_mb_count_8x8dct[2]; + int i_mb_count_ref[2][X264_REF_MAX*2]; + int i_mb_partition[17]; + int i_mb_cbp[6]; + int i_mb_pred_mode[4][13]; + int i_mb_field[3]; + /* Adaptive direct mv pred */ + int i_direct_score[2]; + /* Metrics */ + int64_t i_ssd[3]; + double f_ssim; + int i_ssim_cnt; +} x264_frame_stat_t; + struct x264_t { /* encoder parameters */ x264_param_t param; x264_t *thread[X264_THREAD_MAX+1]; + x264_t *lookahead_thread[X264_LOOKAHEAD_THREAD_MAX]; int b_thread_active; int i_thread_phase; /* which thread to use for the next frame */ + int i_thread_idx; /* which thread this is */ int i_threadslice_start; /* first row in this thread slice */ int i_threadslice_end; /* row after the end of this thread slice */ + int i_threadslice_pass; /* which pass of encoding we are on */ x264_threadpool_t *threadpool; + x264_threadpool_t *lookaheadpool; + x264_pthread_mutex_t mutex; + x264_pthread_cond_t cv; /* bitstream output */ struct @@ -481,6 +513,7 @@ struct x264_t int64_t i_cpb_delay_lookahead; int64_t i_cpb_delay_pir_offset; + int64_t i_cpb_delay_pir_offset_next; int b_queued_intra_refresh; int64_t i_last_idr_pts; @@ -498,6 +531,8 @@ struct x264_t udctcoef (*quant8_mf[4])[64]; /* [4][52][64] */ udctcoef (*quant4_bias[4])[16]; /* [4][52][16] */ udctcoef (*quant8_bias[4])[64]; /* [4][52][64] */ + udctcoef (*quant4_bias0[4])[16]; /* [4][52][16] */ + udctcoef (*quant8_bias0[4])[64]; /* [4][52][64] */ udctcoef (*nr_offset_emergency)[4][64]; /* mv/ref cost arrays. */ @@ -575,11 +610,11 @@ struct x264_t /* Current MB DCT coeffs */ struct { - ALIGNED_16( dctcoef luma16x16_dc[3][16] ); + ALIGNED_N( dctcoef luma16x16_dc[3][16] ); ALIGNED_16( dctcoef chroma_dc[2][8] ); // FIXME share memory? - ALIGNED_16( dctcoef luma8x8[12][64] ); - ALIGNED_16( dctcoef luma4x4[16*3][16] ); + ALIGNED_N( dctcoef luma8x8[12][64] ); + ALIGNED_N( dctcoef luma4x4[16*3][16] ); } dct; /* MB table and cache for current frame/mb */ @@ -632,8 +667,7 @@ struct x264_t int mv_miny_spel_row[3]; int mv_maxy_spel_row[3]; /* Fullpel MV range for motion search */ - int mv_min_fpel[2]; - int mv_max_fpel[2]; + ALIGNED_8( int16_t mv_limit_fpel[2][2] ); /* min_x, min_y, max_x, max_y */ int mv_miny_fpel_row[3]; int mv_maxy_fpel_row[3]; @@ -719,7 +753,7 @@ struct x264_t #define FENC_STRIDE 16 #define FDEC_STRIDE 32 ALIGNED_16( pixel fenc_buf[48*FENC_STRIDE] ); - ALIGNED_16( pixel fdec_buf[52*FDEC_STRIDE] ); + ALIGNED_N( pixel fdec_buf[52*FDEC_STRIDE] ); /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */ ALIGNED_16( pixel i4x4_fdec_buf[16*16] ); @@ -793,6 +827,9 @@ struct x264_t /* extra data required for mbaff in mv prediction */ int16_t topright_mv[2][3][2]; int8_t topright_ref[2][3]; + + /* current mb deblock strength */ + uint8_t (*deblock_strength)[8][4]; } cache; /* */ @@ -830,32 +867,7 @@ struct x264_t struct { /* Current frame stats */ - struct - { - /* MV bits (MV+Ref+Block Type) */ - int i_mv_bits; - /* Texture bits (DCT coefs) */ - int i_tex_bits; - /* ? */ - int i_misc_bits; - /* MB type counts */ - int i_mb_count[19]; - int i_mb_count_i; - int i_mb_count_p; - int i_mb_count_skip; - int i_mb_count_8x8dct[2]; - int i_mb_count_ref[2][X264_REF_MAX*2]; - int i_mb_partition[17]; - int i_mb_cbp[6]; - int i_mb_pred_mode[4][13]; - int i_mb_field[3]; - /* Adaptive direct mv pred */ - int i_direct_score[2]; - /* Metrics */ - int64_t i_ssd[3]; - double f_ssim; - int i_ssim_cnt; - } frame; + x264_frame_stat_t frame; /* Cumulated stats */ @@ -901,6 +913,7 @@ struct x264_t /* Buffers that are allocated per-thread even in sliced threads. */ void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */ + void *scratch_buffer2; /* if the first one's already in use */ pixel *intra_border_backup[5][3]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */ /* Deblock strength values are stored for each 4x4 partition. In MBAFF * there are four extra values that need to be stored, located in [4][i]. */ @@ -929,11 +942,48 @@ struct x264_t struct visualize_t *visualize; #endif x264_lookahead_t *lookahead; + +#if HAVE_OPENCL + x264_opencl_t opencl; +#endif }; // included at the end because it needs x264_t #include "macroblock.h" +static int ALWAYS_INLINE x264_predictor_roundclip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv ) +{ + int cnt = 0; + for( int i = 0; i < i_mvc; i++ ) + { + int mx = (mvc[i][0] + 2) >> 2; + int my = (mvc[i][1] + 2) >> 2; + uint32_t mv = pack16to32_mask(mx, my); + if( !mv || mv == pmv ) continue; + dst[cnt][0] = x264_clip3( mx, mv_limit[0][0], mv_limit[1][0] ); + dst[cnt][1] = x264_clip3( my, mv_limit[0][1], mv_limit[1][1] ); + cnt++; + } + return cnt; +} + +static int ALWAYS_INLINE x264_predictor_clip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv ) +{ + int cnt = 0; + int qpel_limit[4] = {mv_limit[0][0] << 2, mv_limit[0][1] << 2, mv_limit[1][0] << 2, mv_limit[1][1] << 2}; + for( int i = 0; i < i_mvc; i++ ) + { + uint32_t mv = M32( mvc[i] ); + int mx = mvc[i][0]; + int my = mvc[i][1]; + if( !mv || mv == pmv ) continue; + dst[cnt][0] = x264_clip3( mx, qpel_limit[0], qpel_limit[2] ); + dst[cnt][1] = x264_clip3( my, qpel_limit[1], qpel_limit[3] ); + cnt++; + } + return cnt; +} + #if ARCH_X86 || ARCH_X86_64 #include "x86/util.h" #endif