Output bit depth is specified on compilation time via --bit-depth.
There is currently almost no assembly code available for high-bit-depth modes, so encoding will be very slow.
Input is still 8-bit only; this will change in the future.
Note that very few H.264 decoders support >8 bit depth currently.
Also note that the quantizer scale differs for higher bit depth. For example, for 10-bit, the quantizer (and crf) ranges from 0 to 63 instead of 0 to 51.
MC_WEIGHT(_offsetadd)
MC_WEIGHT(_offsetsub)
+void x264_mc_copy_w4_neon( uint8_t *, int, uint8_t *, int, int );
+void x264_mc_copy_w8_neon( uint8_t *, int, uint8_t *, int, int );
+void x264_mc_copy_w16_neon( uint8_t *, int, uint8_t *, int, int );
+void x264_mc_copy_w16_aligned_neon( uint8_t *, int, uint8_t *, int, int );
+
+void x264_mc_chroma_neon( uint8_t *, int, uint8_t *, int, int, int, int, int );
+void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int, int, int);
+
+void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, int, int );
+void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
+void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
+
+#if !X264_HIGH_BIT_DEPTH
static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w )
{
if( w->i_scale == 1<<w->i_denom )
w->weightfn = x264_mc_wtab_neon;
}
-void x264_mc_copy_w4_neon( uint8_t *, int, uint8_t *, int, int );
-void x264_mc_copy_w8_neon( uint8_t *, int, uint8_t *, int, int );
-void x264_mc_copy_w16_neon( uint8_t *, int, uint8_t *, int, int );
-void x264_mc_copy_w16_aligned_neon( uint8_t *, int, uint8_t *, int, int );
-
-void x264_mc_chroma_neon( uint8_t *, int, uint8_t *, int, int, int, int, int );
-void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int, int, int);
-
static void (* const x264_pixel_avg_wtab_neon[6])( uint8_t *, int, uint8_t *, int, uint8_t *, int ) =
{
NULL,
}
}
-void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, int, int );
-void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
-void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
-
static void hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
int stride, int width, int height, int16_t *buf )
{
src += stride;
}
}
+#endif // !X264_HIGH_BIT_DEPTH
void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
{
if( !(cpu&X264_CPU_ARMV6) )
return;
+#if !X264_HIGH_BIT_DEPTH
pf->prefetch_fenc = x264_prefetch_fenc_arm;
pf->prefetch_ref = x264_prefetch_ref_arm;
+#endif // !X264_HIGH_BIT_DEPTH
if( !(cpu&X264_CPU_NEON) )
return;
+#if !X264_HIGH_BIT_DEPTH
pf->copy_16x16_unaligned = x264_mc_copy_w16_neon;
pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_neon;
pf->copy[PIXEL_8x8] = x264_mc_copy_w8_neon;
pf->offsetsub = x264_mc_offsetsub_wtab_neon;
pf->weight_cache = x264_weight_cache_neon;
-// Apple's gcc stupidly cannot align stack variables, and ALIGNED_ARRAY can't work on structs
-#ifndef SYS_MACOSX
- pf->memcpy_aligned = x264_memcpy_aligned_neon;
-#endif
- pf->memzero_aligned = x264_memzero_aligned_neon;
-
pf->mc_chroma = x264_mc_chroma_neon;
pf->mc_luma = mc_luma_neon;
pf->get_ref = get_ref_neon;
pf->hpel_filter = hpel_filter_neon;
pf->frame_init_lowres_core = x264_frame_init_lowres_core_neon;
+#endif // !X264_HIGH_BIT_DEPTH
+
+// Apple's gcc stupidly cannot align stack variables, and ALIGNED_ARRAY can't work on structs
+#ifndef SYS_MACOSX
+ pf->memcpy_aligned = x264_memcpy_aligned_neon;
+#endif
+ pf->memzero_aligned = x264_memzero_aligned_neon;
}
if (!(cpu&X264_CPU_ARMV6))
return;
+#if !X264_HIGH_BIT_DEPTH
pf[I_PRED_4x4_H] = x264_predict_4x4_h_armv6;
pf[I_PRED_4x4_DC] = x264_predict_4x4_dc_armv6;
pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_armv6;
return;
pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_neon;
+#endif // !X264_HIGH_BIT_DEPTH
}
void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] )
if (!(cpu&X264_CPU_NEON))
return;
+#if !X264_HIGH_BIT_DEPTH
pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_neon;
pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_neon;
pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left_neon;
pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_neon;
pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_neon;
pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_neon;
+#endif // !X264_HIGH_BIT_DEPTH
}
void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter )
if (!(cpu&X264_CPU_NEON))
return;
+#if !X264_HIGH_BIT_DEPTH
pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_neon;
pf[I_PRED_8x8_H] = x264_predict_8x8_h_neon;
+#endif // !X264_HIGH_BIT_DEPTH
}
void x264_predict_16x16_init_arm( int cpu, x264_predict_t pf[7] )
if (!(cpu&X264_CPU_NEON))
return;
+#if !X264_HIGH_BIT_DEPTH
pf[I_PRED_16x16_DC ] = x264_predict_16x16_dc_neon;
pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_neon;
pf[I_PRED_16x16_DC_LEFT]= x264_predict_16x16_dc_left_neon;
pf[I_PRED_16x16_H ] = x264_predict_16x16_h_neon;
pf[I_PRED_16x16_V ] = x264_predict_16x16_v_neon;
pf[I_PRED_16x16_P ] = x264_predict_16x16_p_neon;
+#endif // !X264_HIGH_BIT_DEPTH
}
typedef struct
{
int last;
- int16_t level[16];
+ dctcoef level[16];
uint8_t run[16];
} x264_run_level_t;
param->rc.i_vbv_max_bitrate = 0;
param->rc.i_vbv_buffer_size = 0;
param->rc.f_vbv_buffer_init = 0.9;
- param->rc.i_qp_constant = 23;
- param->rc.f_rf_constant = 23;
+ param->rc.i_qp_constant = 23 + QP_BD_OFFSET;
+ param->rc.f_rf_constant = 23 + QP_BD_OFFSET;
param->rc.i_qp_min = 10;
- param->rc.i_qp_max = 51;
+ param->rc.i_qp_max = QP_MAX;
param->rc.i_qp_step = 4;
param->rc.f_ip_factor = 1.4;
param->rc.f_pb_factor = 1.3;
if( !profile )
return 0;
+#if BIT_DEPTH > 8
+ if( !strcasecmp( profile, "baseline" ) || !strcasecmp( profile, "main" ) ||
+ !strcasecmp( profile, "high" ) )
+ {
+ x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support a bit depth of %d.\n", profile, BIT_DEPTH );
+ return -1;
+ }
+#endif
+
if( !strcasecmp( profile, "baseline" ) )
{
param->analyse.b_transform_8x8 = 0;
param->analyse.b_transform_8x8 = 0;
param->i_cqm_preset = X264_CQM_FLAT;
}
- else if( !strcasecmp( profile, "high" ) )
+ else if( !strcasecmp( profile, "high" ) || !strcasecmp( profile, "high10" ) )
{
/* Default */
}
#define X264_BFRAME_MAX 16
#define X264_THREAD_MAX 128
-#define X264_PCM_COST (386*8)
+#define X264_PCM_COST (384*BIT_DEPTH+16)
#define X264_LOOKAHEAD_MAX 250
+#define QP_BD_OFFSET (6*(BIT_DEPTH-8))
+#define QP_MAX (51+QP_BD_OFFSET)
+#define QP_MAX_MAX (51+2*6)
+#define LAMBDA_MAX (91 << (BIT_DEPTH-8))
+#define PIXEL_MAX ((1 << BIT_DEPTH)-1)
// arbitrary, but low because SATD scores are 1/4 normal
-#define X264_LOOKAHEAD_QP 12
+#define X264_LOOKAHEAD_QP (12+QP_BD_OFFSET)
// number of pixels (per thread) in progress at any given time.
// 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
#define CP64(dst,src) M64(dst) = M64(src)
#define CP128(dst,src) M128(dst) = M128(src)
-typedef uint8_t pixel;
-typedef uint32_t pixel4;
-typedef int16_t dctcoef;
+#if X264_HIGH_BIT_DEPTH
+ typedef uint16_t pixel;
+ typedef uint64_t pixel4;
+ typedef int32_t dctcoef;
-#define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
-#define MPIXEL_X4(src) M32(src)
-#define CPPIXEL_X4(dst,src) CP32(dst,src)
-#define CPPIXEL_X8(dst,src) CP64(dst,src)
-#define MDCT_X2(dct) M32(dct)
-#define CPDCT_X2(dst,src) CP32(dst,src)
-#define CPDCT_X4(dst,src) CP64(dst,src)
+# define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL)
+# define MPIXEL_X4(src) M64(src)
+#else
+ typedef uint8_t pixel;
+ typedef uint32_t pixel4;
+ typedef int16_t dctcoef;
+
+# define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
+# define MPIXEL_X4(src) M32(src)
+#endif
+
+#define CPPIXEL_X4(dst,src) MPIXEL_X4(dst) = MPIXEL_X4(src)
#define X264_SCAN8_SIZE (6*8)
#define X264_SCAN8_LUMA_SIZE (5*8)
static ALWAYS_INLINE pixel x264_clip_pixel( int x )
{
- return x&(~255) ? (-x)>>31 : x;
+ return ( (x & ~PIXEL_MAX) ? (-x)>>31 & PIXEL_MAX : x );
}
static ALWAYS_INLINE int x264_clip3( int v, int i_min, int i_max )
/* mv/ref cost arrays. Indexed by lambda instead of
* qp because, due to rounding, some quantizers share
* lambdas. This saves memory. */
- uint16_t *cost_mv[92];
- uint16_t *cost_mv_fpel[92][4];
+ uint16_t *cost_mv[LAMBDA_MAX+1];
+ uint16_t *cost_mv_fpel[LAMBDA_MAX+1][4];
const uint8_t *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */
dctf->dct4x4dc = dct4x4dc;
dctf->idct4x4dc = idct4x4dc;
+#if !X264_HIGH_BIT_DEPTH
#if HAVE_MMX
if( cpu&X264_CPU_MMX )
{
dctf->add16x16_idct8= x264_add16x16_idct8_neon;
}
#endif
+#endif // !X264_HIGH_BIT_DEPTH
}
void x264_dct_init_weights( void )
static void zigzag_scan_4x4_field( dctcoef level[16], dctcoef dct[16] )
{
- CPDCT_X2( level, dct );
+ memcpy( level, dct, 2 * sizeof(dctcoef) );
ZIG(2,0,1) ZIG(3,2,0) ZIG(4,3,0) ZIG(5,1,1)
- CPDCT_X2( level+6, dct+6 );
- CPDCT_X4( level+8, dct+8 );
- CPDCT_X4( level+12, dct+12 );
+ memcpy( level+6, dct+6, 10 * sizeof(dctcoef) );
}
#undef ZIG
CPPIXEL_X4( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
CPPIXEL_X4( p_dst+2*FDEC_STRIDE, p_src+2*FENC_STRIDE );\
CPPIXEL_X4( p_dst+3*FDEC_STRIDE, p_src+3*FENC_STRIDE );
+#define CPPIXEL_X8(dst,src) ( CPPIXEL_X4(dst,src), CPPIXEL_X4(dst+4,src+4) )
#define COPY8x8\
CPPIXEL_X8( p_dst+0*FDEC_STRIDE, p_src+0*FENC_STRIDE );\
CPPIXEL_X8( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
pf->sub_8x8 = zigzag_sub_8x8_field;
pf->sub_4x4 = zigzag_sub_4x4_field;
pf->sub_4x4ac = zigzag_sub_4x4ac_field;
+#if !X264_HIGH_BIT_DEPTH
#if HAVE_MMX
if( cpu&X264_CPU_MMXEXT )
{
if( cpu&X264_CPU_ALTIVEC )
pf->scan_4x4 = x264_zigzag_scan_4x4_field_altivec;
#endif
+#endif // !X264_HIGH_BIT_DEPTH
}
else
{
pf->sub_8x8 = zigzag_sub_8x8_frame;
pf->sub_4x4 = zigzag_sub_4x4_frame;
pf->sub_4x4ac = zigzag_sub_4x4ac_frame;
+#if !X264_HIGH_BIT_DEPTH
#if HAVE_MMX
if( cpu&X264_CPU_MMX )
pf->scan_4x4 = x264_zigzag_scan_4x4_frame_mmx;
if( cpu&X264_CPU_NEON )
pf->scan_4x4 = x264_zigzag_scan_4x4_frame_neon;
#endif
+#endif // !X264_HIGH_BIT_DEPTH
}
pf->interleave_8x8_cavlc = zigzag_interleave_8x8_cavlc;
+#if !X264_HIGH_BIT_DEPTH
#if HAVE_MMX
if( cpu&X264_CPU_MMX )
pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_mmx;
if( cpu&X264_CPU_SHUFFLE_IS_FAST )
pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
#endif
+#endif // !X264_HIGH_BIT_DEPTH
}
#include "common.h"
/* Deblocking filter */
-static const uint8_t i_alpha_table[52+12*2] =
+static const uint8_t i_alpha_table[52+12*3] =
{
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
255,255,
255,255,255,255,255,255,255,255,255,255,255,255,
};
-static const uint8_t i_beta_table[52+12*2] =
+static const uint8_t i_beta_table[52+12*3] =
{
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
};
-static const int8_t i_tc0_table[52+12*2][4] =
+static const int8_t i_tc0_table[52+12*3][4] =
{
{-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
{-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
{-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
{-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
+ {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
+ {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
{-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 },
{-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 },
{-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 },
{-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
{-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
};
-#define alpha_table(x) i_alpha_table[(x)+12]
-#define beta_table(x) i_beta_table[(x)+12]
-#define tc0_table(x) i_tc0_table[(x)+12]
+#define alpha_table(x) i_alpha_table[(x)+24]
+#define beta_table(x) i_beta_table[(x)+24]
+#define tc0_table(x) i_tc0_table[(x)+24]
/* From ffmpeg */
static inline void deblock_luma_c( pixel *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
static inline void deblock_edge( x264_t *h, pixel *pix1, pixel *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
{
- int index_a = i_qp + h->sh.i_alpha_c0_offset;
- int alpha = alpha_table(index_a);
- int beta = beta_table(i_qp + h->sh.i_beta_offset);
+ int index_a = i_qp-QP_BD_OFFSET + h->sh.i_alpha_c0_offset;
+ int index_b = i_qp-QP_BD_OFFSET + h->sh.i_beta_offset;
+ int alpha = alpha_table(index_a) << (BIT_DEPTH-8);
+ int beta = beta_table(index_b) << (BIT_DEPTH-8);
int8_t tc[4];
if( !M32(bS) || !alpha || !beta )
return;
- tc[0] = tc0_table(index_a)[bS[0]] + b_chroma;
- tc[1] = tc0_table(index_a)[bS[1]] + b_chroma;
- tc[2] = tc0_table(index_a)[bS[2]] + b_chroma;
- tc[3] = tc0_table(index_a)[bS[3]] + b_chroma;
+ tc[0] = (tc0_table(index_a)[bS[0]] << (BIT_DEPTH-8)) + b_chroma;
+ tc[1] = (tc0_table(index_a)[bS[1]] << (BIT_DEPTH-8)) + b_chroma;
+ tc[2] = (tc0_table(index_a)[bS[2]] << (BIT_DEPTH-8)) + b_chroma;
+ tc[3] = (tc0_table(index_a)[bS[3]] << (BIT_DEPTH-8)) + b_chroma;
pf_inter( pix1, i_stride, alpha, beta, tc );
if( b_chroma )
static inline void deblock_edge_intra( x264_t *h, pixel *pix1, pixel *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_intra_t pf_intra )
{
- int alpha = alpha_table(i_qp + h->sh.i_alpha_c0_offset);
- int beta = beta_table(i_qp + h->sh.i_beta_offset);
+ int index_a = i_qp-QP_BD_OFFSET + h->sh.i_alpha_c0_offset;
+ int index_b = i_qp-QP_BD_OFFSET + h->sh.i_beta_offset;
+ int alpha = alpha_table(index_a) << (BIT_DEPTH-8);
+ int beta = beta_table(index_b) << (BIT_DEPTH-8);
if( !alpha || !beta )
return;
#if HAVE_MMX
if( cpu&X264_CPU_MMXEXT )
{
+#if !X264_HIGH_BIT_DEPTH
pf->deblock_chroma[1] = x264_deblock_v_chroma_mmxext;
pf->deblock_chroma[0] = x264_deblock_h_chroma_mmxext;
pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_mmxext;
pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_mmxext;
pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_mmxext;
#endif
+#endif // !X264_HIGH_BIT_DEPTH
pf->deblock_strength = x264_deblock_strength_mmxext;
if( cpu&X264_CPU_SSE2 )
{
pf->deblock_strength = x264_deblock_strength_sse2;
+#if !X264_HIGH_BIT_DEPTH
if( !(cpu&X264_CPU_STACK_MOD4) )
{
pf->deblock_luma[1] = x264_deblock_v_luma_sse2;
pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_sse2;
pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_sse2;
}
+#endif // !X264_HIGH_BIT_DEPTH
}
if( cpu&X264_CPU_SSSE3 )
pf->deblock_strength = x264_deblock_strength_ssse3;
}
#endif
+#if !X264_HIGH_BIT_DEPTH
#if HAVE_ALTIVEC
if( cpu&X264_CPU_ALTIVEC )
{
pf->deblock_chroma[0] = x264_deblock_h_chroma_neon;
}
#endif
+#endif // !X264_HIGH_BIT_DEPTH
}
int scratch_size = 0;
if( !b_lookahead )
{
- int buf_hpel = (h->thread[0]->fdec->i_width[0]+48) * sizeof(int16_t);
+ int buf_hpel = (h->thread[0]->fdec->i_width[0]+48) * sizeof(dctcoef);
int buf_ssim = h->param.analyse.b_ssim * 8 * (h->param.i_width/4+3) * sizeof(int);
int me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range);
int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
2*4 + 3*4*FDEC_STRIDE, 3*4 + 3*4*FDEC_STRIDE
};
-static const uint8_t i_chroma_qp_table[52+12*2] =
+#define QP(qP) ( (qP)+QP_BD_OFFSET )
+static const uint8_t i_chroma_qp_table[QP_MAX+1+12*2] =
{
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
- 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
- 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 29, 30, 31, 32, 32, 33, 34, 34, 35, 35,
- 36, 36, 37, 37, 37, 38, 38, 38, 39, 39,
- 39, 39,
- 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+#if BIT_DEPTH > 9
+ QP(-12),QP(-11),QP(-10), QP(-9), QP(-8), QP(-7),
+#endif
+#if BIT_DEPTH > 8
+ QP(-6), QP(-5), QP(-4), QP(-3), QP(-2), QP(-1),
+#endif
+ QP(0), QP(1), QP(2), QP(3), QP(4), QP(5),
+ QP(6), QP(7), QP(8), QP(9), QP(10), QP(11),
+ QP(12), QP(13), QP(14), QP(15), QP(16), QP(17),
+ QP(18), QP(19), QP(20), QP(21), QP(22), QP(23),
+ QP(24), QP(25), QP(26), QP(27), QP(28), QP(29),
+ QP(29), QP(30), QP(31), QP(32), QP(32), QP(33),
+ QP(34), QP(34), QP(35), QP(35), QP(36), QP(36),
+ QP(37), QP(37), QP(37), QP(38), QP(38), QP(38),
+ QP(39), QP(39), QP(39), QP(39),
+ QP(39), QP(39), QP(39), QP(39), QP(39), QP(39),
+ QP(39), QP(39), QP(39), QP(39), QP(39), QP(39),
};
+#undef QP
enum cabac_ctx_block_cat_e
{
return (a&0xFFFF) + (b<<16);
#endif
}
+static ALWAYS_INLINE uint64_t pack32to64( uint32_t a, uint32_t b )
+{
+#ifdef WORDS_BIGENDIAN
+ return b + ((uint64_t)a<<32);
+#else
+ return a + ((uint64_t)b<<32);
+#endif
+}
-#define pack_pixel_1to2 pack8to16
-#define pack_pixel_2to4 pack16to32
+#if X264_HIGH_BIT_DEPTH
+# define pack_pixel_1to2 pack16to32
+# define pack_pixel_2to4 pack32to64
+#else
+# define pack_pixel_1to2 pack8to16
+# define pack_pixel_2to4 pack16to32
+#endif
-#define array_non_zero(a) array_non_zero_int(a, sizeof(a))
+#define array_non_zero(a) array_non_zero_int(a, sizeof(a)/sizeof(dctcoef))
#define array_non_zero_int array_non_zero_int
static ALWAYS_INLINE int array_non_zero_int( dctcoef *v, int i_count )
{
- if(i_count == 8)
- return !!M64( &v[0] );
- else if(i_count == 16)
- return !!(M64( &v[0] ) | M64( &v[4] ));
- else if(i_count == 32)
- return !!(M64( &v[0] ) | M64( &v[4] ) | M64( &v[8] ) | M64( &v[12] ));
- else
- {
- for( int i = 0; i < i_count; i+=4 )
- if( M64( &v[i] ) ) return 1;
- return 0;
- }
+ for( int i = 0; i < i_count; i++ )
+ if( v[i] )
+ return 1;
+ return 0;
}
static ALWAYS_INLINE int x264_mb_predict_intra4x4_mode( x264_t *h, int idx )
{
{
w->weightfn = h->mc.weight;
}
-#define opscale(x) dst[x] = x264_clip_pixel( ((src[x] * weight->i_scale + (1<<(weight->i_denom - 1))) >> weight->i_denom) + weight->i_offset )
-#define opscale_noden(x) dst[x] = x264_clip_pixel( src[x] * weight->i_scale + weight->i_offset )
-static inline void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int i_width, int i_height )
+#define opscale(x) dst[x] = x264_clip_pixel( ((src[x] * scale + (1<<(denom - 1))) >> denom) + offset )
+#define opscale_noden(x) dst[x] = x264_clip_pixel( src[x] * scale + offset )
+static void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int i_width, int i_height )
{
- if( weight->i_denom >= 1 )
+ int offset = weight->i_offset << (BIT_DEPTH-8);
+ int scale = weight->i_scale;
+ int denom = weight->i_denom;
+ if( denom >= 1 )
{
for( int y = 0; y < i_height; y++, dst += i_dst_stride, src += i_src_stride )
for( int x = 0; x < i_width; x++ )
}
}
-#define MC_WEIGHT_C( name, lx ) \
+#define MC_WEIGHT_C( name, width ) \
static void name( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int height ) \
{ \
- if( weight->i_denom >= 1 ) \
- { \
- for( int y = 0; y < height; y++, dst += i_dst_stride, src += i_src_stride ) \
- for( int x = 0; x < lx; x++ ) \
- opscale( x ); \
- } \
- else \
- { \
- for( int y = 0; y < height; y++, dst += i_dst_stride, src += i_src_stride ) \
- for( int x = 0; x < lx; x++ ) \
- opscale_noden( x ); \
- } \
+ mc_weight( dst, i_dst_stride, src, i_src_stride, weight, width, height );\
}
MC_WEIGHT_C( mc_weight_w20, 20 )
#define TAPFILTER(pix, d) ((pix)[x-2*d] + (pix)[x+3*d] - 5*((pix)[x-d] + (pix)[x+2*d]) + 20*((pix)[x] + (pix)[x+d]))
static void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
- int stride, int width, int height, int16_t *buf )
+ int stride, int width, int height, dctcoef *buf )
{
for( int y = 0; y < height; y++ )
{
{
while( h-- )
{
+#if X264_HIGH_BIT_DEPTH
+ for( int i = 0; i < w; i++ )
+ dst[i] = src[i] << (BIT_DEPTH-8);
+#else
memcpy( dst, src, w );
+#endif
dst += i_dst;
src += i_src;
}
uint8_t *src, int i_src, int w, int h);
void (*hpel_filter)( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
- int i_stride, int i_width, int i_height, int16_t *buf );
+ int i_stride, int i_width, int i_height, dctcoef *buf );
/* prefetch the next few macroblocks of fenc or fdec */
void (*prefetch_fenc)( pixel *pix_y, int stride_y,
pix2 += i_stride2;
}
sum = abs(sum);
- var = sqr - (sum * sum >> 6);
+ var = sqr - ((uint64_t)sum * sum >> 6);
*ssd = sqr;
return var;
}
SAD_X( 4x8 )
SAD_X( 4x4 )
+#if !X264_HIGH_BIT_DEPTH
#if ARCH_UltraSparc
SAD_X( 16x16_vis )
SAD_X( 16x8_vis )
SAD_X( 8x16_vis )
SAD_X( 8x8_vis )
#endif
+#endif // !X264_HIGH_BIT_DEPTH
/****************************************************************************
* pixel_satd_x4
SATD_X( 4x4, cpu )
SATD_X_DECL7()
+#if !X264_HIGH_BIT_DEPTH
#if HAVE_MMX
SATD_X_DECL7( _mmxext )
SATD_X_DECL6( _sse2 )
#if HAVE_ARMV6
SATD_X_DECL7( _neon )
#endif
+#endif // !X264_HIGH_BIT_DEPTH
#define INTRA_MBCMP_8x8( mbcmp )\
void x264_intra_##mbcmp##_x3_8x8( pixel *fenc, pixel edge[33], int res[3] )\
static float ssim_end1( int s1, int s2, int ss, int s12 )
{
- static const int ssim_c1 = (int)(.01*.01*255*255*64 + .5);
- static const int ssim_c2 = (int)(.03*.03*255*255*64*63 + .5);
+ static const int ssim_c1 = (int)(.01*.01*PIXEL_MAX*PIXEL_MAX*64 + .5);
+ static const int ssim_c2 = (int)(.03*.03*PIXEL_MAX*PIXEL_MAX*64*63 + .5);
int vars = ss*64 - s1*s1 - s2*s2;
int covar = s12*64 - s1*s2;
return (float)(2*s1*s2 + ssim_c1) * (float)(2*covar + ssim_c2)
pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16;
pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16;
+#if !X264_HIGH_BIT_DEPTH
#if HAVE_MMX
if( cpu&X264_CPU_MMX )
{
}
}
#endif
+#endif // !X264_HIGH_BIT_DEPTH
#if HAVE_ALTIVEC
if( cpu&X264_CPU_ALTIVEC )
{
x264_pixel_altivec_init( pixf );
}
#endif
+#if !X264_HIGH_BIT_DEPTH
#if ARCH_UltraSparc
INIT4( sad, _vis );
INIT4( sad_x3, _vis );
INIT4( sad_x4, _vis );
#endif
+#endif // !X264_HIGH_BIT_DEPTH
pixf->ads[PIXEL_8x16] =
pixf->ads[PIXEL_8x4] =
#include "common/common.h"
#include "ppccommon.h"
+#if !X264_HIGH_BIT_DEPTH
#define VEC_DCT(a0,a1,a2,a3,b0,b1,b2,b3) \
b1 = vec_add( a0, a3 ); \
b3 = vec_add( a1, a2 ); \
vec_st( tmp0v, 0x00, level );
vec_st( tmp1v, 0x10, level );
}
+#endif // !X264_HIGH_BIT_DEPTH
#include "common/common.h"
#include "ppccommon.h"
+#if !X264_HIGH_BIT_DEPTH
#define transpose4x16(r0, r1, r2, r3) \
{ \
register vec_u8_t r4; \
transpose4x16(line1, line2, line3, line4);
write16x4(pix-2, stride, line1, line2, line3, line4);
}
+#endif // !X264_HIGH_BIT_DEPTH
#include "mc.h"
#include "ppccommon.h"
+#if !X264_HIGH_BIT_DEPTH
typedef void (*pf_mc_t)( uint8_t *src, int i_src,
uint8_t *dst, int i_dst, int i_height );
dstc += dst_stride;
}
}
+#endif // !X264_HIGH_BIT_DEPTH
void x264_mc_altivec_init( x264_mc_functions_t *pf )
{
+#if !X264_HIGH_BIT_DEPTH
pf->mc_luma = mc_luma_altivec;
pf->get_ref = get_ref_altivec;
pf->mc_chroma = mc_chroma_altivec;
pf->hpel_filter = x264_hpel_filter_altivec;
pf->frame_init_lowres_core = frame_init_lowres_core_altivec;
+#endif // !X264_HIGH_BIT_DEPTH
}
#include "common/common.h"
#include "ppccommon.h"
+#if !X264_HIGH_BIT_DEPTH
/***********************************************************************
* SAD routines
**********************************************************************/
sums[0][3] = temp[0];
sums[1][3] = temp[1];
}
+#endif // !X264_HIGH_BIT_DEPTH
/****************************************************************************
* x264_pixel_init:
****************************************************************************/
void x264_pixel_altivec_init( x264_pixel_function_t *pixf )
{
+#if !X264_HIGH_BIT_DEPTH
pixf->sad[PIXEL_16x16] = pixel_sad_16x16_altivec;
pixf->sad[PIXEL_8x16] = pixel_sad_8x16_altivec;
pixf->sad[PIXEL_16x8] = pixel_sad_16x8_altivec;
pixf->hadamard_ac[PIXEL_8x8] = x264_pixel_hadamard_ac_8x8_altivec;
pixf->ssim_4x4x2_core = ssim_4x4x2_core_altivec;
+#endif // !X264_HIGH_BIT_DEPTH
}
#include "pixel.h"
#include "ppccommon.h"
+#if !X264_HIGH_BIT_DEPTH
static void predict_8x8c_p_altivec( uint8_t *src )
{
int H = 0, V = 0;
src += FDEC_STRIDE;
}
}
+#endif // !X264_HIGH_BIT_DEPTH
/****************************************************************************
****************************************************************************/
void x264_predict_16x16_init_altivec( x264_predict_t pf[7] )
{
+#if !X264_HIGH_BIT_DEPTH
pf[I_PRED_16x16_V ] = predict_16x16_v_altivec;
pf[I_PRED_16x16_H ] = predict_16x16_h_altivec;
pf[I_PRED_16x16_DC] = predict_16x16_dc_altivec;
pf[I_PRED_16x16_DC_LEFT] = predict_16x16_dc_left_altivec;
pf[I_PRED_16x16_DC_TOP ] = predict_16x16_dc_top_altivec;
pf[I_PRED_16x16_DC_128 ] = predict_16x16_dc_128_altivec;
+#endif // !X264_HIGH_BIT_DEPTH
}
void x264_predict_8x8c_init_altivec( x264_predict_t pf[7] )
{
+#if !X264_HIGH_BIT_DEPTH
pf[I_PRED_CHROMA_P] = predict_8x8c_p_altivec;
+#endif // !X264_HIGH_BIT_DEPTH
}
#include "ppccommon.h"
#include "quant.h"
+#if !X264_HIGH_BIT_DEPTH
// quant of a whole 4x4 block, unrolled 2x and "pre-scheduled"
#define QUANT_16_U( idx0, idx1 ) \
{ \
DEQUANT_SHR();
}
}
+#endif // !X264_HIGH_BIT_DEPTH
void x264_predict_16x16_dc_c( pixel *src )
{
- pixel4 dc = 0;
+ int dc = 0;
for( int i = 0; i < 16; i++ )
{
dc += src[-1 + i * FDEC_STRIDE];
dc += src[i - FDEC_STRIDE];
}
- dc = PIXEL_SPLAT_X4( ( dc + 16 ) >> 5 );
+ pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 16 ) >> 5 );
- PREDICT_16x16_DC( dc );
+ PREDICT_16x16_DC( dcsplat );
}
static void x264_predict_16x16_dc_left_c( pixel *src )
{
- pixel4 dc = 0;
+ int dc = 0;
for( int i = 0; i < 16; i++ )
dc += src[-1 + i * FDEC_STRIDE];
- dc = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
+ pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
- PREDICT_16x16_DC( dc );
+ PREDICT_16x16_DC( dcsplat );
}
static void x264_predict_16x16_dc_top_c( pixel *src )
{
- pixel4 dc = 0;
+ int dc = 0;
for( int i = 0; i < 16; i++ )
dc += src[i - FDEC_STRIDE];
- dc = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
+ pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
- PREDICT_16x16_DC( dc );
+ PREDICT_16x16_DC( dcsplat );
}
static void x264_predict_16x16_dc_128_c( pixel *src )
{
- PREDICT_16x16_DC( PIXEL_SPLAT_X4( 0x80 ) );
+ PREDICT_16x16_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
}
void x264_predict_16x16_h_c( pixel *src )
{
{
for( int y = 0; y < 8; y++ )
{
- MPIXEL_X4( src+0 ) = PIXEL_SPLAT_X4( 0x80 );
- MPIXEL_X4( src+4 ) = PIXEL_SPLAT_X4( 0x80 );
+ MPIXEL_X4( src+0 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) );
+ MPIXEL_X4( src+4 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) );
src += FDEC_STRIDE;
}
}
static void x264_predict_8x8c_dc_left_c( pixel *src )
{
- pixel4 dc0 = 0, dc1 = 0;
+ int dc0 = 0, dc1 = 0;
for( int y = 0; y < 4; y++ )
{
dc0 += src[y * FDEC_STRIDE - 1];
dc1 += src[(y+4) * FDEC_STRIDE - 1];
}
- dc0 = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
- dc1 = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
+ pixel4 dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
+ pixel4 dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
for( int y = 0; y < 4; y++ )
{
- MPIXEL_X4( src+0 ) = dc0;
- MPIXEL_X4( src+4 ) = dc0;
+ MPIXEL_X4( src+0 ) = dc0splat;
+ MPIXEL_X4( src+4 ) = dc0splat;
src += FDEC_STRIDE;
}
for( int y = 0; y < 4; y++ )
{
- MPIXEL_X4( src+0 ) = dc1;
- MPIXEL_X4( src+4 ) = dc1;
+ MPIXEL_X4( src+0 ) = dc1splat;
+ MPIXEL_X4( src+4 ) = dc1splat;
src += FDEC_STRIDE;
}
}
static void x264_predict_8x8c_dc_top_c( pixel *src )
{
- pixel4 dc0 = 0, dc1 = 0;
+ int dc0 = 0, dc1 = 0;
for( int x = 0; x < 4; x++ )
{
dc0 += src[x - FDEC_STRIDE];
dc1 += src[x + 4 - FDEC_STRIDE];
}
- dc0 = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
- dc1 = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
+ pixel4 dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
+ pixel4 dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
for( int y = 0; y < 8; y++ )
{
- MPIXEL_X4( src+0 ) = dc0;
- MPIXEL_X4( src+4 ) = dc1;
+ MPIXEL_X4( src+0 ) = dc0splat;
+ MPIXEL_X4( src+4 ) = dc1splat;
src += FDEC_STRIDE;
}
}
static void x264_predict_4x4_dc_128_c( pixel *src )
{
- PREDICT_4x4_DC( PIXEL_SPLAT_X4( 0x80 ) );
+ PREDICT_4x4_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
}
static void x264_predict_4x4_dc_left_c( pixel *src )
{
}
else
{
- M64( edge+24 ) = SRC(7,-1) * 0x0101010101010101ULL;
+ MPIXEL_X4( edge+24 ) = PIXEL_SPLAT_X4( SRC(7,-1) );
+ MPIXEL_X4( edge+28 ) = PIXEL_SPLAT_X4( SRC(7,-1) );
edge[32] = SRC(7,-1);
}
}
static void x264_predict_8x8_dc_128_c( pixel *src, pixel edge[33] )
{
- PREDICT_8x8_DC( PIXEL_SPLAT_X4( 0x80 ) );
+ PREDICT_8x8_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
}
static void x264_predict_8x8_dc_left_c( pixel *src, pixel edge[33] )
{
}
void x264_predict_8x8_v_c( pixel *src, pixel edge[33] )
{
- uint64_t top = M64( edge+16 );
+ pixel4 top[2] = { MPIXEL_X4( edge+16 ),
+ MPIXEL_X4( edge+20 ) };
for( int y = 0; y < 8; y++ )
- M64( src+y*FDEC_STRIDE ) = top;
+ {
+ MPIXEL_X4( src+y*FDEC_STRIDE+0 ) = top[0];
+ MPIXEL_X4( src+y*FDEC_STRIDE+4 ) = top[1];
+ }
}
static void x264_predict_8x8_ddl_c( pixel *src, pixel edge[33] )
{
for( int i = 1; i < size; i++ )
{
int level = dct[i];
- int sign = level>>15;
+ int sign = level>>31;
level = (level+sign)^sign;
sum[i] += level;
level -= offset[i];
int i_score = 0;
int idx = i_max - 1;
- /* Yes, dct[idx-1] is guaranteed to be 32-bit aligned. idx>=0 instead of 1 works correctly for the same reason */
- while( idx >= 0 && MDCT_X2( &dct[idx-1] ) == 0 )
- idx -= 2;
- if( idx >= 0 && dct[idx] == 0 )
+ while( idx >= 0 && dct[idx] == 0 )
idx--;
while( idx >= 0 )
{
static int ALWAYS_INLINE x264_coeff_last_internal( dctcoef *l, int i_count )
{
- int i_last;
- for( i_last = i_count-1; i_last >= 3; i_last -= 4 )
- if( M64( l+i_last-3 ) )
- break;
+ int i_last = i_count-1;
while( i_last >= 0 && l[i_last] == 0 )
i_last--;
return i_last;
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15;
pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16;
+#if !X264_HIGH_BIT_DEPTH
#if HAVE_MMX
if( cpu&X264_CPU_MMX )
{
pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_neon;
}
#endif
+#endif // !X264_HIGH_BIT_DEPTH
pf->coeff_last[ DCT_LUMA_DC] = pf->coeff_last[DCT_LUMA_4x4];
pf->coeff_last[DCT_CHROMA_AC] = pf->coeff_last[ DCT_LUMA_AC];
pf->coeff_level_run[ DCT_LUMA_DC] = pf->coeff_level_run[DCT_LUMA_4x4];
32 - 11, 32 - 21 };
int max_qp_err = -1;
int max_chroma_qp_err = -1;
+ int min_qp_err = QP_MAX+1;
for( int i = 0; i < 6; i++ )
{
}
else
{
- CHECKED_MALLOC( h-> quant4_mf[i], 52*size*sizeof(uint16_t) );
+ CHECKED_MALLOC( h-> quant4_mf[i], (QP_MAX+1)*size*sizeof(uint16_t) );
CHECKED_MALLOC( h->dequant4_mf[i], 6*size*sizeof(int) );
- CHECKED_MALLOC( h->unquant4_mf[i], 52*size*sizeof(int) );
+ CHECKED_MALLOC( h->unquant4_mf[i], (QP_MAX+1)*size*sizeof(int) );
}
for( j = (i<4 ? 0 : 4); j < i; j++ )
if( j < i )
h->quant4_bias[i] = h->quant4_bias[j];
else
- CHECKED_MALLOC( h->quant4_bias[i], 52*size*sizeof(uint16_t) );
+ CHECKED_MALLOC( h->quant4_bias[i], (QP_MAX+1)*size*sizeof(uint16_t) );
}
for( int q = 0; q < 6; q++ )
quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]);
}
}
- for( int q = 0; q < 52; q++ )
+ for( int q = 0; q < QP_MAX+1; q++ )
{
int j;
for( int i_list = 0; i_list < 4; i_list++ )
{
h->unquant4_mf[i_list][q][i] = (1ULL << (q/6 + 15 + 8)) / quant4_mf[i_list][q%6][i];
h->quant4_mf[i_list][q][i] = j = SHIFT(quant4_mf[i_list][q%6][i], q/6 - 1);
+ if( !j )
+ {
+ min_qp_err = X264_MIN( min_qp_err, q );
+ continue;
+ }
// round to nearest, unless that would cause the deadzone to be negative
h->quant4_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
if( j > 0xffff && q > max_qp_err && (i_list == CQM_4IY || i_list == CQM_4PY) )
{
h->unquant8_mf[i_list][q][i] = (1ULL << (q/6 + 16 + 8)) / quant8_mf[i_list][q%6][i];
h->quant8_mf[i_list][q][i] = j = SHIFT(quant8_mf[i_list][q%6][i], q/6);
+ if( !j )
+ {
+ min_qp_err = X264_MIN( min_qp_err, q );
+ continue;
+ }
h->quant8_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
if( j > 0xffff && q > max_qp_err )
max_qp_err = q;
x264_log( h, X264_LOG_ERROR, "but min chroma QP is implied to be %d.\n", h->chroma_qp_table[h->param.rc.i_qp_min] );
return -1;
}
+ if( !h->mb.b_lossless && min_qp_err <= h->param.rc.i_qp_max )
+ {
+ x264_log( h, X264_LOG_ERROR, "Quantization underflow. Your CQM is incompatible with QP > %d,\n", min_qp_err-1 );
+ x264_log( h, X264_LOG_ERROR, "but max QP is implied to be %d.\n", h->param.rc.i_qp_max );
+ return -1;
+ }
return 0;
fail:
x264_cqm_delete( h );
PIXEL_AVG_WALL(sse2_misalign)
PIXEL_AVG_WALL(cache64_ssse3)
+#if !X264_HIGH_BIT_DEPTH
#define PIXEL_AVG_WTAB(instr, name1, name2, name3, name4, name5)\
static void (* const x264_pixel_avg_wtab_##instr[6])( uint8_t *, int, uint8_t *, int, uint8_t *, int ) =\
{\
x264_plane_copy_core_mmxext( dst+i_dst, i_dst, src+i_src, i_src, (w+15)&~15, h-1 );
}
}
+#endif // !X264_HIGH_BIT_DEPTH
void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
{
if( !(cpu&X264_CPU_MMX) )
return;
+ pf->memcpy_aligned = x264_memcpy_aligned_mmx;
+ pf->memzero_aligned = x264_memzero_aligned_mmx;
+#if !X264_HIGH_BIT_DEPTH
pf->copy_16x16_unaligned = x264_mc_copy_w16_mmx;
pf->copy[PIXEL_16x16] = x264_mc_copy_w16_mmx;
pf->copy[PIXEL_8x8] = x264_mc_copy_w8_mmx;
pf->copy[PIXEL_4x4] = x264_mc_copy_w4_mmx;
- pf->memcpy_aligned = x264_memcpy_aligned_mmx;
- pf->memzero_aligned = x264_memzero_aligned_mmx;
pf->integral_init4v = x264_integral_init4v_mmx;
pf->integral_init8v = x264_integral_init8v_mmx;
+#endif // !X264_HIGH_BIT_DEPTH
if( !(cpu&X264_CPU_MMXEXT) )
return;
+#if !X264_HIGH_BIT_DEPTH
pf->mc_luma = mc_luma_mmxext;
pf->get_ref = get_ref_mmxext;
pf->mc_chroma = x264_mc_chroma_mmxext;
pf->frame_init_lowres_core = x264_frame_init_lowres_core_cache32_mmxext;
}
#endif
+#endif // !X264_HIGH_BIT_DEPTH
if( !(cpu&X264_CPU_SSE2) )
return;
pf->memcpy_aligned = x264_memcpy_aligned_sse2;
pf->memzero_aligned = x264_memzero_aligned_sse2;
+#if !X264_HIGH_BIT_DEPTH
pf->integral_init4v = x264_integral_init4v_sse2;
pf->integral_init8v = x264_integral_init8v_sse2;
pf->hpel_filter = x264_hpel_filter_sse2_amd;
pf->integral_init4h = x264_integral_init4h_sse4;
pf->integral_init8h = x264_integral_init8h_sse4;
+#endif // !X264_HIGH_BIT_DEPTH
}
void x264_predict_16x16_v_sse2( uint8_t *src );
void x264_predict_16x16_p_core_sse2( uint8_t *src, int i00, int b, int c );
+#if !X264_HIGH_BIT_DEPTH
ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};
ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
#else
INTRA_SA8D_X3(mmxext)
#endif
+#endif // !X264_HIGH_BIT_DEPTH
/****************************************************************************
* Exported functions:
{
if( !(cpu&X264_CPU_MMX) )
return;
+#if !X264_HIGH_BIT_DEPTH
pf[I_PRED_16x16_V] = x264_predict_16x16_v_mmx;
if( !(cpu&X264_CPU_MMXEXT) )
return;
#ifdef __GNUC__
pf[I_PRED_16x16_P] = x264_predict_16x16_p_ssse3;
#endif
+#endif // !X264_HIGH_BIT_DEPTH
}
void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
{
if( !(cpu&X264_CPU_MMX) )
return;
+#if !X264_HIGH_BIT_DEPTH
#if ARCH_X86_64
pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left;
#endif
#ifdef __GNUC__
pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_ssse3;
#endif
+#endif // !X264_HIGH_BIT_DEPTH
}
void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter )
{
if( !(cpu&X264_CPU_MMXEXT) )
return;
+#if !X264_HIGH_BIT_DEPTH
pf[I_PRED_8x8_V] = x264_predict_8x8_v_mmxext;
pf[I_PRED_8x8_H] = x264_predict_8x8_h_mmxext;
pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_mmxext;
pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_ssse3;
pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_ssse3;
*predict_8x8_filter = x264_predict_8x8_filter_ssse3;
+#endif // !X264_HIGH_BIT_DEPTH
}
void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
{
if( !(cpu&X264_CPU_MMXEXT) )
return;
+#if !X264_HIGH_BIT_DEPTH
pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmxext;
pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmxext;
pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_mmxext;
pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3;
pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_ssse3;
+#endif // !X264_HIGH_BIT_DEPTH
}
echo " --enable-visualize enables visualization (X11 only)"
echo " --enable-pic build position-independent code"
echo " --enable-shared build libx264.so"
+echo " --bit-depth=BIT_DEPTH sets output bit depth (8-10), default 8"
echo " --extra-asflags=EASFLAGS add EASFLAGS to ASFLAGS"
echo " --extra-cflags=ECFLAGS add ECFLAGS to CFLAGS"
echo " --extra-ldflags=ELDFLAGS add ELDFLAGS to LDFLAGS"
pic="no"
vis="no"
shared="no"
+bit_depth="8"
CFLAGS="$CFLAGS -Wall -I."
LDFLAGS="$LDFLAGS"
CFLAGS="$CFLAGS --sysroot=${opt#--sysroot=}"
LDFLAGS="$LDFLAGS --sysroot=${opt#--sysroot=}"
;;
+ --bit-depth=*)
+ bit_depth="${opt#--bit-depth=}"
+ if [ "$bit_depth" -lt "8" -o "$bit_depth" -gt "10" ]; then
+ echo "Supplied bit depth must be in range [8,10]."
+ exit 1
+ fi
+ bit_depth=`expr $bit_depth + 0`
+ ;;
*)
echo "Unknown option $opt, ignored"
;;
CFLAGS="-Wshadow $CFLAGS"
fi
+if [ "$bit_depth" -gt "8" ]; then
+ define X264_HIGH_BIT_DEPTH
+fi
+
+define BIT_DEPTH $bit_depth
+
rm -f conftest*
# generate config files
PIC: $pic
shared: $shared
visualize: $vis
+bit depth: $bit_depth
EOF
echo >> config.log
} x264_mb_analysis_t;
/* lambda = pow(2,qp/6-2) */
-const uint8_t x264_lambda_tab[52] = {
- 1, 1, 1, 1, 1, 1, 1, 1, /* 0-7 */
- 1, 1, 1, 1, /* 8-11 */
- 1, 1, 1, 1, 2, 2, 2, 2, /* 12-19 */
- 3, 3, 3, 4, 4, 4, 5, 6, /* 20-27 */
- 6, 7, 8, 9,10,11,13,14, /* 28-35 */
- 16,18,20,23,25,29,32,36, /* 36-43 */
- 40,45,51,57,64,72,81,91 /* 44-51 */
+const uint16_t x264_lambda_tab[QP_MAX_MAX+1] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, /* 0- 7 */
+ 1, 1, 1, 1, 1, 1, 1, 1, /* 8-15 */
+ 2, 2, 2, 2, 3, 3, 3, 4, /* 16-23 */
+ 4, 4, 5, 6, 6, 7, 8, 9, /* 24-31 */
+ 10, 11, 13, 14, 16, 18, 20, 23, /* 32-39 */
+ 25, 29, 32, 36, 40, 45, 51, 57, /* 40-47 */
+ 64, 72, 81, 91, 102, 114, 128, 144, /* 48-55 */
+ 161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */
};
/* lambda2 = pow(lambda,2) * .9 * 256 */
-const int x264_lambda2_tab[52] = {
- 14, 18, 22, 28, 36, 45, 57, 72, /* 0 - 7 */
- 91, 115, 145, 182, 230, 290, 365, 460, /* 8 - 15 */
- 580, 731, 921, 1161, 1462, 1843, 2322, 2925, /* 16 - 23 */
- 3686, 4644, 5851, 7372, 9289, 11703, 14745, 18578, /* 24 - 31 */
- 23407, 29491, 37156, 46814, 58982, 74313, 93628, 117964, /* 32 - 39 */
-148626, 187257, 235929, 297252, 374514, 471859, 594505, 749029, /* 40 - 47 */
-943718, 1189010, 1498059, 1887436 /* 48 - 51 */
+const int x264_lambda2_tab[QP_MAX_MAX+1] = {
+ 14, 18, 22, 28, 36, 45, 57, 72, /* 0- 7 */
+ 91, 115, 145, 182, 230, 290, 365, 460, /* 8-15 */
+ 580, 731, 921, 1161, 1462, 1843, 2322, 2925, /* 16-23 */
+ 3686, 4644, 5851, 7372, 9289, 11703, 14745, 18578, /* 24-31 */
+ 23407, 29491, 37156, 46814, 58982, 74313, 93628, 117964, /* 32-39 */
+ 148626, 187257, 235929, 297252, 374514, 471859, 594505, 749029, /* 40-47 */
+ 943718,1189010,1498059, 1887436, 2378021, 2996119, 3774873, 4756042, /* 48-55 */
+5992238,7549747,9512085,11984476,15099494,19024170,23968953,30198988, /* 56-63 */
};
const uint8_t x264_exp2_lut[64] = {
// should the intra and inter lambdas be different?
// I'm just matching the behaviour of deadzone quant.
-static const int x264_trellis_lambda2_tab[2][52] = {
+static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] = {
// inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
- { 46, 58, 73, 92, 117, 147,
- 185, 233, 294, 370, 466, 587,
- 740, 932, 1174, 1480, 1864, 2349,
- 2959, 3728, 4697, 5918, 7457, 9395,
- 11837, 14914, 18790, 23674, 29828, 37581,
- 47349, 59656, 75163, 94699, 119313, 150326,
- 189399, 238627, 300652, 378798, 477255, 601304,
- 757596, 954511, 1202608, 1515192, 1909022, 2405217,
- 3030384, 3818045, 4810435, 6060769 },
+ { 46, 58, 73, 92, 117, 147,
+ 185, 233, 294, 370, 466, 587,
+ 740, 932, 1174, 1480, 1864, 2349,
+ 2959, 3728, 4697, 5918, 7457, 9395,
+ 11837, 14914, 18790, 23674, 29828, 37581,
+ 47349, 59656, 75163, 94699, 119313, 150326,
+ 189399, 238627, 300652, 378798, 477255, 601304,
+ 757596, 954511, 1202608, 1515192, 1909022, 2405217,
+ 3030384, 3818045, 4810435, 6060769, 7636091, 9620872,
+ 12121539,15272182,19241743,24243077,30544363,38483486,
+ 48486154,61088726,76966972,96972308 },
// intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
- { 27, 34, 43, 54, 68, 86,
- 108, 136, 172, 216, 273, 343,
- 433, 545, 687, 865, 1090, 1374,
- 1731, 2180, 2747, 3461, 4361, 5494,
- 6922, 8721, 10988, 13844, 17442, 21976,
- 27688, 34885, 43953, 55377, 69771, 87906,
- 110755, 139543, 175813, 221511, 279087, 351627,
- 443023, 558174, 703255, 886046, 1116348, 1406511,
- 1772093, 2232697, 2813022, 3544186 }
+ { 27, 34, 43, 54, 68, 86,
+ 108, 136, 172, 216, 273, 343,
+ 433, 545, 687, 865, 1090, 1374,
+ 1731, 2180, 2747, 3461, 4361, 5494,
+ 6922, 8721, 10988, 13844, 17442, 21976,
+ 27688, 34885, 43953, 55377, 69771, 87906,
+ 110755, 139543, 175813, 221511, 279087, 351627,
+ 443023, 558174, 703255, 886046, 1116348, 1406511,
+ 1772093, 2232697, 2813022, 3544186, 4465396, 5626046,
+ 7088374, 8930791,11252092,14176748,17861583,22504184,
+ 28353495,35723165,45008368,56706990 }
};
static const uint16_t x264_chroma_lambda2_offset_tab[] = {
static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );
-static uint16_t x264_cost_ref[92][3][33];
+static uint16_t x264_cost_ref[LAMBDA_MAX+1][3][33];
static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
int x264_analyse_init_costs( x264_t *h, int qp )
void x264_analyse_free_costs( x264_t *h )
{
- for( int i = 0; i < 92; i++ )
+ for( int i = 0; i < LAMBDA_MAX+1; i++ )
{
if( h->cost_mv[i] )
x264_free( h->cost_mv[i] - 2*4*2048 );
if( i_dqp != 0 )
{
int val = i_dqp <= 0 ? (-2*i_dqp) : (2*i_dqp - 1);
- /* dqp is interpreted modulo 52 */
- if( val >= 51 && val != 52 )
- val = 103 - val;
+ /* dqp is interpreted modulo (QP_MAX+1) */
+ if( val >= QP_MAX && val != QP_MAX+1 )
+ val = 2*QP_MAX+1 - val;
do
{
x264_cabac_encode_decision( cb, 60 + ctx, 1 );
i_mb_pos_tex = x264_cabac_pos( cb );
h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
- memcpy( cb->p, h->mb.pic.p_fenc[0], 256 );
- cb->p += 256;
- for( int i = 0; i < 8; i++ )
- memcpy( cb->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
- cb->p += 64;
- for( int i = 0; i < 8; i++ )
- memcpy( cb->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
- cb->p += 64;
+ bs_t s;
+ bs_init( &s, cb->p, cb->p_end - cb->p );
+ for( int i = 0; i < 256; i++ )
+ bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[0][i] );
+ for( int ch = 0; ch < 2; ch++ )
+ for( int i = 0; i < 8; i++ )
+ for( int j = 0; j < 8; j++ )
+ bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] );
+
+ bs_flush( &s );
+ cb->p = s.p;
x264_cabac_encode_init_core( cb );
h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
bs_t *s = &h->out.bs;
static const uint16_t next_suffix[7] = { 0, 3, 6, 12, 24, 48, 0xffff };
int i_level_prefix = 15;
- int mask = level >> 15;
+ int mask = level >> 31;
int abs_level = (level^mask)-mask;
int i_level_code = abs_level*2-mask-2;
if( ( i_level_code >> i_suffix_length ) < 15 )
if( i_dqp )
{
- if( i_dqp < -26 )
- i_dqp += 52;
- else if( i_dqp > 25 )
- i_dqp -= 52;
+ if( i_dqp < -(QP_MAX+1)/2 )
+ i_dqp += QP_MAX+1;
+ else if( i_dqp > QP_MAX/2 )
+ i_dqp -= QP_MAX+1;
}
bs_write_se( s, i_dqp );
}
bs_align_0( s );
- memcpy( s->p, h->mb.pic.p_fenc[0], 256 );
- s->p += 256;
- for( int i = 0; i < 8; i++ )
- memcpy( s->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
- s->p += 64;
- for( int i = 0; i < 8; i++ )
- memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
- s->p += 64;
+ for( int i = 0; i < 256; i++ )
+ bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[0][i] );
+ for( int ch = 0; ch < 2; ch++ )
+ for( int i = 0; i < 8; i++ )
+ for( int j = 0; j < 8; j++ )
+ bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] );
bs_init( s, s->p, s->p_end - s->p );
s->p_start = p_start;
****************************************************************************/
static float x264_psnr( int64_t i_sqe, int64_t i_size )
{
- double f_mse = (double)i_sqe / ((double)65025.0 * (double)i_size);
+ double f_mse = (double)i_sqe / (PIXEL_MAX*PIXEL_MAX * (double)i_size);
if( f_mse <= 0.0000000001 ) /* Max 100dB */
return 100;
FILE *f = fopen( h->param.psz_dump_yuv, "r+b" );
if( !f )
return;
+ int bytes_per_pixel = (BIT_DEPTH+7)/8;
/* Write the frame in display order */
- fseek( f, (uint64_t)h->fdec->i_frame * h->param.i_height * h->param.i_width * 3/2, SEEK_SET );
+ fseek( f, (uint64_t)h->fdec->i_frame * h->param.i_height * h->param.i_width * 3/2 * bytes_per_pixel, SEEK_SET );
for( int i = 0; i < h->fdec->i_plane; i++ )
for( int y = 0; y < h->param.i_height >> !!i; y++ )
- fwrite( &h->fdec->plane[i][y*h->fdec->i_stride[i]], 1, h->param.i_width >> !!i, f );
+ for( int j = 0; j < h->param.i_width >> !!i; j++ )
+ fwrite( &h->fdec->plane[i][y*h->fdec->i_stride[i]]+j, bytes_per_pixel, 1, f );
fclose( f );
}
x264_log( h, X264_LOG_ERROR, "no ratecontrol method specified\n" );
return -1;
}
- h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, 0, 51 );
- h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
+ h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, 0, QP_MAX );
+ h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX );
if( h->param.rc.i_rc_method == X264_RC_CRF )
{
h->param.rc.i_qp_constant = h->param.rc.f_rf_constant;
float qp_p = h->param.rc.i_qp_constant;
float qp_i = qp_p - 6*log2f( h->param.rc.f_ip_factor );
float qp_b = qp_p + 6*log2f( h->param.rc.f_pb_factor );
- h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, 51 );
- h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, 51 );
+ h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, QP_MAX );
+ h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, QP_MAX );
h->param.rc.i_aq_mode = 0;
h->param.rc.b_mb_tree = 0;
}
- h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, 51 );
+ h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, QP_MAX );
h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
if( h->param.rc.i_vbv_buffer_size )
{
if( x264_analyse_init_costs( h, X264_LOOKAHEAD_QP ) )
goto fail;
+ static const uint16_t cost_mv_correct[7] = { 24, 47, 95, 189, 379, 757, 1515 };
/* Checks for known miscompilation issues. */
- if( h->cost_mv[1][2013] != 24 )
+ if( h->cost_mv[x264_lambda_tab[X264_LOOKAHEAD_QP]][2013] != cost_mv_correct[BIT_DEPTH-8] )
{
x264_log( h, X264_LOG_ERROR, "MV cost test failed: x264 has been miscompiled!\n" );
goto fail;
fclose( f );
}
- x264_log( h, X264_LOG_INFO, "profile %s, level %d.%d\n",
- h->sps->i_profile_idc == PROFILE_BASELINE ? "Baseline" :
- h->sps->i_profile_idc == PROFILE_MAIN ? "Main" :
- h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
- "High 4:4:4 Predictive", h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
+ const char *profile = h->sps->i_profile_idc == PROFILE_BASELINE ? "Baseline" :
+ h->sps->i_profile_idc == PROFILE_MAIN ? "Main" :
+ h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
+ h->sps->i_profile_idc == PROFILE_HIGH10 ? "High 10" :
+ "High 4:4:4 Predictive";
+
+ if( h->sps->i_profile_idc < PROFILE_HIGH10 )
+ {
+ x264_log( h, X264_LOG_INFO, "profile %s, level %d.%d\n",
+ profile, h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
+ }
+ else
+ {
+ x264_log( h, X264_LOG_INFO, "profile %s, level %d.%d, bit depth %d\n",
+ profile, h->sps->i_level_idc/10, h->sps->i_level_idc%10, BIT_DEPTH );
+ }
return h;
fail:
bs_align_1( &h->out.bs );
/* init cabac */
- x264_cabac_context_init( &h->cabac, h->sh.i_type, h->sh.i_qp, h->sh.i_cabac_init_idc );
+ x264_cabac_context_init( &h->cabac, h->sh.i_type, x264_clip3( h->sh.i_qp-QP_BD_OFFSET, 0, 51 ), h->sh.i_cabac_init_idc );
x264_cabac_encode_init ( &h->cabac, h->out.bs.p, h->out.bs.p_end );
}
h->mb.i_last_qp = h->sh.i_qp;
for( int i = 0; i < 3; i++ )
{
pic_out->img.i_stride[i] = h->fdec->i_stride[i];
+ // FIXME This breaks the API when pixel != uint8_t.
pic_out->img.plane[i] = h->fdec->plane[i];
}
#include "common/macroblock.h"
-extern const int x264_lambda2_tab[52];
-extern const uint8_t x264_lambda_tab[52];
+extern const int x264_lambda2_tab[QP_MAX_MAX+1];
+extern const uint16_t x264_lambda_tab[QP_MAX_MAX+1];
void x264_rdo_init( void );
void x264_me_refine_bidir_satd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight );
uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel );
-extern uint16_t *x264_cost_mv_fpel[92][4];
+extern uint16_t *x264_cost_mv_fpel[LAMBDA_MAX+1][4];
#define COPY1_IF_LT(x,y)\
if((y)<(x))\
uint32_t ssd = res >> 32;
frame->i_pixel_sum[i] += sum;
frame->i_pixel_ssd[i] += ssd;
- return ssd - (sum * sum >> shift);
+ return ssd - ((uint64_t)sum * sum >> shift);
}
// Find the total AC energy of the block in all planes.
{
if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
{
+ float bit_depth_correction = powf(1 << (BIT_DEPTH-8), 0.5f);
float avg_adj_pow2 = 0.f;
for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ )
for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ )
}
avg_adj /= h->mb.i_mb_count;
avg_adj_pow2 /= h->mb.i_mb_count;
- strength = h->param.rc.f_aq_strength * avg_adj;
- avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - 14.f) / avg_adj;
+ strength = h->param.rc.f_aq_strength * avg_adj / bit_depth_correction;
+ avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (14.f * bit_depth_correction)) / avg_adj;
}
else
strength = h->param.rc.f_aq_strength * 1.0397f;
else
{
uint32_t energy = x264_ac_energy_mb( h, mb_x, mb_y, frame );
- qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - 14.427f);
+ qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - (14.427f + 2*(BIT_DEPTH-8)));
}
if( quant_offsets )
qp_adj += quant_offsets[mb_xy];
rc->ip_offset = 6.0 * log2f( h->param.rc.f_ip_factor );
rc->pb_offset = 6.0 * log2f( h->param.rc.f_pb_factor );
rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
- rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant - rc->ip_offset + 0.5, 0, 51 );
- rc->qp_constant[SLICE_TYPE_B] = x264_clip3( h->param.rc.i_qp_constant + rc->pb_offset + 0.5, 0, 51 );
+ rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant - rc->ip_offset + 0.5, 0, QP_MAX );
+ rc->qp_constant[SLICE_TYPE_B] = x264_clip3( h->param.rc.i_qp_constant + rc->pb_offset + 0.5, 0, QP_MAX );
h->mb.ip_offset = rc->ip_offset + 0.5;
rc->lstep = pow( 2, h->param.rc.i_qp_step / 6.0 );
if( l->level_idc == 41 && h->param.i_nal_hrd )
mincr = 4;
- /* The spec has a bizarre special case for the first frame. */
- if( h->i_frame == 0 )
- {
- //384 * ( Max( PicSizeInMbs, fR * MaxMBPS ) + MaxMBPS * ( tr( 0 ) - tr,n( 0 ) ) ) / MinCR
- double fr = 1. / 172;
- int pic_size_in_mbs = h->mb.i_mb_width * h->mb.i_mb_height;
- rc->frame_size_maximum = 384 * 8 * X264_MAX( pic_size_in_mbs, fr*l->mbps ) / mincr;
- }
+ /* High 10 doesn't require minCR, so just set the maximum to a large value. */
+ if( h->sps->i_profile_idc == PROFILE_HIGH10 )
+ rc->frame_size_maximum = 1e9;
else
{
- //384 * MaxMBPS * ( tr( n ) - tr( n - 1 ) ) / MinCR
- rc->frame_size_maximum = 384 * 8 * ((double)h->fenc->i_cpb_duration * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale) * l->mbps / mincr;
+ /* The spec has a bizarre special case for the first frame. */
+ if( h->i_frame == 0 )
+ {
+ //384 * ( Max( PicSizeInMbs, fR * MaxMBPS ) + MaxMBPS * ( tr( 0 ) - tr,n( 0 ) ) ) / MinCR
+ double fr = 1. / 172;
+ int pic_size_in_mbs = h->mb.i_mb_width * h->mb.i_mb_height;
+ rc->frame_size_maximum = 384 * BIT_DEPTH * X264_MAX( pic_size_in_mbs, fr*l->mbps ) / mincr;
+ }
+ else
+ {
+ //384 * MaxMBPS * ( tr( n ) - tr( n - 1 ) ) / MinCR
+ rc->frame_size_maximum = 384 * BIT_DEPTH * ((double)h->fenc->i_cpb_duration * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale) * l->mbps / mincr;
+ }
}
}
rc->qpa_rc =
rc->qpa_aq = 0;
- rc->qp = x264_clip3( (int)(q + 0.5), 0, 51 );
+ rc->qp = x264_clip3( (int)(q + 0.5), 0, QP_MAX );
h->fdec->f_qp_avg_rc =
h->fdec->f_qp_avg_aq =
rc->qpm = q;
* So just calculate the average QP used so far. */
h->param.rc.i_qp_constant = (h->stat.i_frame_count[SLICE_TYPE_P] == 0) ? 24
: 1 + h->stat.f_frame_qp[SLICE_TYPE_P] / h->stat.i_frame_count[SLICE_TYPE_P];
- rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
- rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
- rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
+ rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX );
+ rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, QP_MAX );
+ rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, QP_MAX );
x264_log(h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries);
x264_log(h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant);
}
else if( expected_bits > all_available_bits && avgq > h->param.rc.i_qp_max - 2 )
{
- if( h->param.rc.i_qp_max < 51 )
+ if( h->param.rc.i_qp_max < QP_MAX )
x264_log( h, X264_LOG_WARNING, "try increasing target bitrate or increasing qp_max (currently %d)\n", h->param.rc.i_qp_max );
else
x264_log( h, X264_LOG_WARNING, "try increasing target bitrate\n");
/* We only need to zero an empty 4x4 block. 8x8 can be
implicitly emptied via zero nnz, as can dc. */
if( i_coefs == 16 && !dc )
- {
- M128( &dct[0] ) = M128_ZERO;
- M128( &dct[8] ) = M128_ZERO;
- }
+ memset( dct, 0, 16 * sizeof(dctcoef) );
return 0;
}
if( bnode == &nodes_cur[0] )
{
if( i_coefs == 16 && !dc )
- {
- M128( &dct[0] ) = M128_ZERO;
- M128( &dct[8] ) = M128_ZERO;
- }
+ memset( dct, 0, 16 * sizeof(dctcoef) );
return 0;
}
sps->b_qpprime_y_zero_transform_bypass = param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant == 0;
if( sps->b_qpprime_y_zero_transform_bypass )
sps->i_profile_idc = PROFILE_HIGH444_PREDICTIVE;
+ else if( BIT_DEPTH > 8 )
+ sps->i_profile_idc = PROFILE_HIGH10;
else if( param->analyse.b_transform_8x8 || param->i_cqm_preset != X264_CQM_FLAT )
sps->i_profile_idc = PROFILE_HIGH;
else if( param->b_cabac || param->i_bframe > 0 || param->b_interlaced || param->b_fake_interlaced || param->analyse.i_weighted_pred > 0 )
if( sps->i_profile_idc >= PROFILE_HIGH )
{
bs_write_ue( s, 1 ); // chroma_format_idc = 4:2:0
- bs_write_ue( s, 0 ); // bit_depth_luma_minus8
- bs_write_ue( s, 0 ); // bit_depth_chroma_minus8
+ bs_write_ue( s, BIT_DEPTH-8 ); // bit_depth_luma_minus8
+ bs_write_ue( s, BIT_DEPTH-8 ); // bit_depth_chroma_minus8
bs_write( s, 1, sps->b_qpprime_y_zero_transform_bypass );
bs_write( s, 1, 0 ); // seq_scaling_matrix_present_flag
}
bs_write( s, 1, pps->b_weighted_pred );
bs_write( s, 2, pps->b_weighted_bipred );
- bs_write_se( s, pps->i_pic_init_qp - 26 );
+ bs_write_se( s, pps->i_pic_init_qp - 26 - QP_BD_OFFSET );
bs_write_se( s, pps->i_pic_init_qs - 26 );
bs_write_se( s, pps->i_chroma_qp_index_offset );
int ret = 0;
int mbs = h->sps->i_mb_width * h->sps->i_mb_height;
int dpb = mbs * 384 * h->sps->vui.i_max_dec_frame_buffering;
- int cbp_factor = h->sps->i_profile_idc==PROFILE_HIGH ? 5 : 4;
+ int cbp_factor = h->sps->i_profile_idc==PROFILE_HIGH10 ? 12 :
+ h->sps->i_profile_idc==PROFILE_HIGH ? 5 : 4;
const x264_level_t *l = x264_levels;
while( l->level_idc != 0 && l->level_idc != h->param.i_level_idc )
(mv1)[0], (mv1)[1], 8, 8, w ); \
h->mc.avg[PIXEL_8x8]( pix1, 16, src1, stride1, src2, stride2, i_bipred_weight ); \
} \
- i_cost = penalty + h->pixf.mbcmp[PIXEL_8x8]( \
+ i_cost = penalty * a->i_lambda + h->pixf.mbcmp[PIXEL_8x8]( \
m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); \
COPY2_IF_LT( i_bcost, i_cost, list_used, 3 ); \
}
}
x264_me_search( h, &m[l], mvc, i_mvc );
- m[l].cost -= 2; // remove mvcost from skip mbs
+ m[l].cost -= 2 * a->i_lambda; // remove mvcost from skip mbs
if( M32( m[l].mv ) )
- m[l].cost += 5;
+ m[l].cost += 5 * a->i_lambda;
skip_motionest:
CP32( fenc_mvs[l], m[l].mv );
ALIGNED_ARRAY_16( pixel, edge,[33] );
pixel *pix = &pix1[8+FDEC_STRIDE - 1];
pixel *src = &fenc->lowres[0][i_pel_offset - 1];
- const int intra_penalty = 5;
+ const int intra_penalty = 5 * a->i_lambda;
int satds[3];
memcpy( pix-FDEC_STRIDE, src-i_stride, 17 * sizeof(pixel) );
}
}
- fenc->lowres_costs[b-p0][p1-b][i_mb_xy] = i_bcost + (list_used << LOWRES_COST_SHIFT);
+ fenc->lowres_costs[b-p0][p1-b][i_mb_xy] = X264_MIN( i_bcost, LOWRES_COST_MASK ) + (list_used << LOWRES_COST_SHIFT);
}
#undef TRY_BIDIR
uint8_t *buf1, *buf2;
/* buf3, buf4: used to store output */
uint8_t *buf3, *buf4;
-/* pbuf*: point to the same memory as above, just for type convenience */
-pixel *pbuf1, *pbuf2, *pbuf3, *pbuf4;
+/* pbuf1, pbuf2: initialised to random pixel data and shouldn't write into them. */
+pixel *pbuf1, *pbuf2;
+/* pbuf3, pbuf4: point to buf3, buf4, just for type convenience */
+pixel *pbuf3, *pbuf4;
int quiet = 0;
int z = i|(i>>4);
z ^= z>>2;
z ^= z>>1;
- buf3[i] = ~(buf4[i] = -(z&1));
+ pbuf4[i] = -(z&1) & PIXEL_MAX;
+ pbuf3[i] = ~pbuf4[i] & PIXEL_MAX;
}
// random pattern made of maxed pixel differences, in case an intermediate value overflows
for( int i = 256; i < 0x1000; i++ )
- buf3[i] = ~(buf4[i] = -(buf1[i&~0x88]&1));
+ {
+ pbuf4[i] = -(pbuf1[i&~0x88]&1) & PIXEL_MAX;
+ pbuf3[i] = ~(pbuf4[i]) & PIXEL_MAX;
+ }
#define TEST_PIXEL( name, align ) \
ok = 1, used_asm = 0; \
used_asm = 1; \
call_c( dct_c.name, t1, pbuf1, pbuf2 ); \
call_a( dct_asm.name, t2, pbuf1, pbuf2 ); \
- if( memcmp( t1, t2, size ) ) \
+ if( memcmp( t1, t2, size*sizeof(dctcoef) ) ) \
{ \
ok = 0; \
fprintf( stderr, #name " [FAILED]\n" ); \
} \
}
ok = 1; used_asm = 0;
- TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16*2 );
- TEST_DCT( sub8x8_dct, dct1, dct2, 16*2*4 );
- TEST_DCT( sub8x8_dct_dc, dctdc[0], dctdc[1], 4*2 );
- TEST_DCT( sub16x16_dct, dct1, dct2, 16*2*16 );
+ TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16 );
+ TEST_DCT( sub8x8_dct, dct1, dct2, 16*4 );
+ TEST_DCT( sub8x8_dct_dc, dctdc[0], dctdc[1], 4 );
+ TEST_DCT( sub16x16_dct, dct1, dct2, 16*16 );
report( "sub_dct4 :" );
ok = 1; used_asm = 0;
- TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64*2 );
- TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*2*4 );
+ TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64 );
+ TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*4 );
report( "sub_dct8 :" );
#undef TEST_DCT
{ \
set_func_name( #name ); \
used_asm = 1; \
- memcpy( buf3, buf1, 32*32 * sizeof(pixel) ); \
- memcpy( buf4, buf1, 32*32 * sizeof(pixel) ); \
- memcpy( dct1, src, 512 * sizeof(pixel) ); \
- memcpy( dct2, src, 512 * sizeof(pixel) ); \
+ memcpy( pbuf3, pbuf1, 32*32 * sizeof(pixel) ); \
+ memcpy( pbuf4, pbuf1, 32*32 * sizeof(pixel) ); \
+ memcpy( dct1, src, 256 * sizeof(dctcoef) ); \
+ memcpy( dct2, src, 256 * sizeof(dctcoef) ); \
call_c1( dct_c.name, pbuf3, (void*)dct1 ); \
call_a1( dct_asm.name, pbuf4, (void*)dct2 ); \
- if( memcmp( buf3, buf4, 32*32 * sizeof(pixel) ) ) \
+ if( memcmp( pbuf3, pbuf4, 32*32 * sizeof(pixel) ) ) \
{ \
ok = 0; \
fprintf( stderr, #name " [FAILED]\n" ); \
dct1[0][j] = !i ? (j^j>>1^j>>2^j>>3)&1 ? 4080 : -4080 /* max dc */\
: i<8 ? (*p++)&1 ? 4080 : -4080 /* max elements */\
: ((*p++)&0x1fff)-0x1000; /* general case */\
- memcpy( dct2, dct1, 32 );\
+ memcpy( dct2, dct1, 16 * sizeof(dctcoef) );\
call_c1( dct_c.name, dct1[0] );\
call_a1( dct_asm.name, dct2[0] );\
- if( memcmp( dct1, dct2, 32 ) )\
+ if( memcmp( dct1, dct2, 16 * sizeof(dctcoef) ) )\
ok = 0;\
}\
call_c2( dct_c.name, dct1[0] );\
int nz_a, nz_c; \
set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
used_asm = 1; \
- memcpy( buf3, buf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
- memcpy( buf4, buf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
+ memcpy( pbuf3, pbuf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
+ memcpy( pbuf4, pbuf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
nz_c = call_c1( zigzag_c.name, t1, pbuf2, pbuf3 ); \
nz_a = call_a1( zigzag_asm.name, t2, pbuf2, pbuf4 ); \
- if( memcmp( t1, t2, size*sizeof(dctcoef) )|| memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a ) \
+ if( memcmp( t1, t2, size*sizeof(dctcoef) ) || memcmp( pbuf3, pbuf4, 16*FDEC_STRIDE*sizeof(pixel) ) || nz_c != nz_a ) \
{ \
ok = 0; \
fprintf( stderr, #name " [FAILED]\n" ); \
used_asm = 1; \
for( int i = 0; i < 2; i++ ) \
{ \
- memcpy( buf3, buf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
- memcpy( buf4, buf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
+ memcpy( pbuf3, pbuf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
+ memcpy( pbuf4, pbuf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
for( int j = 0; j < 4; j++ ) \
{ \
memcpy( pbuf3 + j*FDEC_STRIDE, (i?pbuf1:pbuf2) + j*FENC_STRIDE, 4 * sizeof(pixel) ); \
} \
nz_c = call_c1( zigzag_c.name, t1, pbuf2, pbuf3, &dc_c ); \
nz_a = call_a1( zigzag_asm.name, t2, pbuf2, pbuf4, &dc_a ); \
- if( memcmp( t1+1, t2+1, 15*sizeof(dctcoef) ) || memcmp( buf3, buf4, 16*FDEC_STRIDE * sizeof(pixel) ) || nz_c != nz_a || dc_c != dc_a ) \
+ if( memcmp( t1+1, t2+1, 15*sizeof(dctcoef) ) || memcmp( pbuf3, pbuf4, 16*FDEC_STRIDE * sizeof(pixel) ) || nz_c != nz_a || dc_c != dc_a ) \
{ \
ok = 0; \
fprintf( stderr, #name " [FAILED]\n" ); \
const x264_weight_t *weight = weight_none; \
set_func_name( "mc_luma_%dx%d", w, h ); \
used_asm = 1; \
- memset( buf3, 0xCD, 1024 ); \
- memset( buf4, 0xCD, 1024 ); \
+ for( int i = 0; i < 1024; i++ ) \
+ pbuf3[i] = pbuf4[i] = 0xCD; \
call_c( mc_c.mc_luma, dst1, 32, src2, 64, dx, dy, w, h, weight ); \
call_a( mc_a.mc_luma, dst2, 32, src2, 64, dx, dy, w, h, weight ); \
- if( memcmp( buf3, buf4, 1024 ) ) \
+ if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \
{ \
fprintf( stderr, "mc_luma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
ok = 0; \
const x264_weight_t *weight = weight_none; \
set_func_name( "get_ref_%dx%d", w, h ); \
used_asm = 1; \
- memset( buf3, 0xCD, 1024 ); \
- memset( buf4, 0xCD, 1024 ); \
+ for( int i = 0; i < 1024; i++ ) \
+ pbuf3[i] = pbuf4[i] = 0xCD; \
call_c( mc_c.mc_luma, dst1, 32, src2, 64, dx, dy, w, h, weight ); \
ref = (pixel*)call_a( mc_a.get_ref, ref, &ref_stride, src2, 64, dx, dy, w, h, weight ); \
for( int i = 0; i < h; i++ ) \
{ \
set_func_name( "mc_chroma_%dx%d", w, h ); \
used_asm = 1; \
- memset( buf3, 0xCD, 1024 ); \
- memset( buf4, 0xCD, 1024 ); \
+ for( int i = 0; i < 1024; i++ ) \
+ pbuf3[i] = pbuf4[i] = 0xCD; \
call_c( mc_c.mc_chroma, dst1, 16, src, 64, dx, dy, w, h ); \
call_a( mc_a.mc_chroma, dst2, 16, src, 64, dx, dy, w, h ); \
/* mc_chroma width=2 may write garbage to the right of dst. ignore that. */ \
for( int j = 0; j < h; j++ ) \
for( int i = w; i < 4; i++ ) \
dst2[i+j*16] = dst1[i+j*16]; \
- if( memcmp( buf3, buf4, 1024 ) ) \
+ if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \
{ \
fprintf( stderr, "mc_chroma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
ok = 0; \
ok = 1, used_asm = 0; \
for( int i = 0; i < 10; i++ ) \
{ \
- memcpy( buf3, pbuf1+320, 320 * sizeof(pixel) ); \
- memcpy( buf4, pbuf1+320, 320 * sizeof(pixel) ); \
+ memcpy( pbuf3, pbuf1+320, 320 * sizeof(pixel) ); \
+ memcpy( pbuf4, pbuf1+320, 320 * sizeof(pixel) ); \
if( mc_a.name[i] != mc_ref.name[i] ) \
{ \
set_func_name( "%s_%s", #name, pixel_names[i] ); \
used_asm = 1; \
call_c1( mc_c.name[i], pbuf3, 16, pbuf2+1, 16, pbuf1+18, 16, weight ); \
call_a1( mc_a.name[i], pbuf4, 16, pbuf2+1, 16, pbuf1+18, 16, weight ); \
- if( memcmp( buf3, buf4, 320 * sizeof(pixel) ) ) \
+ if( memcmp( pbuf3, pbuf4, 320 * sizeof(pixel) ) ) \
{ \
ok = 0; \
fprintf( stderr, #name "[%d]: [FAILED]\n", i ); \
void *tmp = pbuf3+49*64;
set_func_name( "hpel_filter" );
ok = 1; used_asm = 1;
- memset( buf3, 0, 4096 * sizeof(pixel) );
- memset( buf4, 0, 4096 * sizeof(pixel) );
+ memset( pbuf3, 0, 4096 * sizeof(pixel) );
+ memset( pbuf4, 0, 4096 * sizeof(pixel) );
call_c( mc_c.hpel_filter, dstc[0], dstc[1], dstc[2], srchpel, 64, 48, 10, tmp );
call_a( mc_a.hpel_filter, dsta[0], dsta[1], dsta[2], srchpel, 64, 48, 10, tmp );
for( int i = 0; i < 3; i++ )
int stride = 80;\
set_func_name( #name );\
used_asm = 1;\
- memcpy( buf3, buf1, size*2*stride * sizeof(pixel) );\
- memcpy( buf4, buf1, size*2*stride * sizeof(pixel) );\
- uint16_t *sum = (uint16_t*)buf3;\
+ memcpy( pbuf3, pbuf1, size*2*stride * sizeof(pixel) );\
+ memcpy( pbuf4, pbuf1, size*2*stride * sizeof(pixel) );\
+ uint16_t *sum = (uint16_t*)pbuf3;\
call_c1( mc_c.name, __VA_ARGS__ );\
- sum = (uint16_t*)buf4;\
+ sum = (uint16_t*)pbuf4;\
call_a1( mc_a.name, __VA_ARGS__ );\
- if( memcmp( buf3, buf4, (stride-8)*2 * sizeof(pixel) )\
+ if( memcmp( pbuf3, pbuf4, (stride-8)*2 * sizeof(pixel) )\
|| (size>9 && memcmp( pbuf3+18*stride, pbuf4+18*stride, (stride-8)*2 * sizeof(pixel) )))\
ok = 0;\
call_c2( mc_c.name, __VA_ARGS__ );\
/* not exactly the real values of a,b,tc but close enough */
for( int i = 35, a = 255, c = 250; i >= 0; i-- )
{
- alphas[i] = a;
- betas[i] = (i+1)/2;
- tcs[i][0] = tcs[i][3] = (c+6)/10;
- tcs[i][1] = (c+7)/15;
- tcs[i][2] = (c+9)/20;
+ alphas[i] = a << (BIT_DEPTH-8);
+ betas[i] = (i+1)/2 << (BIT_DEPTH-8);
+ tcs[i][0] = tcs[i][3] = (c+6)/10 << (BIT_DEPTH-8);
+ tcs[i][1] = (c+7)/15 << (BIT_DEPTH-8);
+ tcs[i][2] = (c+9)/20 << (BIT_DEPTH-8);
a = a*9/10;
c = c*9/10;
}
int off = 8*32 + (i&15)*4*!align; /* benchmark various alignments of h filter */ \
for( int j = 0; j < 1024; j++ ) \
/* two distributions of random to excersize different failure modes */ \
- buf3[j] = rand() & (i&1 ? 0xf : 0xff ); \
- memcpy( buf4, buf3, 1024 * sizeof(pixel) ); \
+ pbuf3[j] = rand() & (i&1 ? 0xf : PIXEL_MAX ); \
+ memcpy( pbuf4, pbuf3, 1024 * sizeof(pixel) ); \
if( db_a.name != db_ref.name ) \
{ \
set_func_name( #name ); \
used_asm = 1; \
call_c1( db_c.name, pbuf3+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
call_a1( db_a.name, pbuf4+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
- if( memcmp( buf3, buf4, 1024 * sizeof(pixel) ) ) \
+ if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \
{ \
ok = 0; \
fprintf( stderr, #name "(a=%d, b=%d): [FAILED]\n", alphas[i], betas[i] ); \
h->pps = h->pps_array;
x264_param_default( &h->param );
h->chroma_qp_table = i_chroma_qp_table + 12;
- h->param.rc.i_qp_min = 26;
+ h->param.rc.i_qp_min = 26 + QP_BD_OFFSET;
h->param.analyse.b_transform_8x8 = 1;
for( int i_cqm = 0; i_cqm < 4; i_cqm++ )
}
else
{
+ int max_scale = BIT_DEPTH < 10 ? 255 : 228;
if( i_cqm == 2 )
for( int i = 0; i < 64; i++ )
- cqm_buf[i] = 10 + rand() % 246;
+ cqm_buf[i] = 10 + rand() % (max_scale - 9);
else
for( int i = 0; i < 64; i++ )
cqm_buf[i] = 1;
{ \
set_func_name( #name ); \
used_asms[0] = 1; \
- for( int qp = 51; qp > 0; qp-- ) \
+ for( int qp = QP_MAX; qp > 0; qp-- ) \
{ \
for( int j = 0; j < 2; j++ ) \
{ \
dct1[i] = dct2[i] = j ? (rand() & 0x1fff) - 0xfff : 0; \
result_c = call_c1( qf_c.name, dct1, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
result_a = call_a1( qf_a.name, dct2, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
- if( memcmp( dct1, dct2, 16*2 ) || result_c != result_a ) \
+ if( memcmp( dct1, dct2, 16*sizeof(dctcoef) ) || result_c != result_a ) \
{ \
oks[0] = 0; \
fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \
{ \
set_func_name( #qname ); \
used_asms[0] = 1; \
- for( int qp = 51; qp > 0; qp-- ) \
+ for( int qp = QP_MAX; qp > 0; qp-- ) \
{ \
for( int j = 0; j < 2; j++ ) \
{ \
INIT_QUANT##w(j) \
int result_c = call_c1( qf_c.qname, dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
int result_a = call_a1( qf_a.qname, dct2, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
- if( memcmp( dct1, dct2, w*w*2 ) || result_c != result_a ) \
+ if( memcmp( dct1, dct2, w*w*sizeof(dctcoef) ) || result_c != result_a ) \
{ \
oks[0] = 0; \
fprintf( stderr, #qname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
{ \
set_func_name( "%s_%s", #dqname, i_cqm?"cqm":"flat" ); \
used_asms[1] = 1; \
- for( int qp = 51; qp > 0; qp-- ) \
+ for( int qp = QP_MAX; qp > 0; qp-- ) \
{ \
INIT_QUANT##w(1) \
call_c1( qf_c.qname, dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
- memcpy( dct2, dct1, w*w*2 ); \
+ memcpy( dct2, dct1, w*w*sizeof(dctcoef) ); \
call_c1( qf_c.dqname, dct1, h->dequant##w##_mf[block], qp ); \
call_a1( qf_a.dqname, dct2, h->dequant##w##_mf[block], qp ); \
- if( memcmp( dct1, dct2, w*w*2 ) ) \
+ if( memcmp( dct1, dct2, w*w*sizeof(dctcoef) ) ) \
{ \
oks[1] = 0; \
fprintf( stderr, #dqname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
{ \
set_func_name( "%s_%s", #dqname, i_cqm?"cqm":"flat" ); \
used_asms[1] = 1; \
- for( int qp = 51; qp > 0; qp-- ) \
+ for( int qp = QP_MAX; qp > 0; qp-- ) \
{ \
for( int i = 0; i < 16; i++ ) \
dct1[i] = rand(); \
call_c1( qf_c.qname, dct1, h->quant##w##_mf[block][qp][0]>>1, h->quant##w##_bias[block][qp][0]>>1 ); \
- memcpy( dct2, dct1, w*w*2 ); \
+ memcpy( dct2, dct1, w*w*sizeof(dctcoef) ); \
call_c1( qf_c.dqname, dct1, h->dequant##w##_mf[block], qp ); \
call_a1( qf_a.dqname, dct2, h->dequant##w##_mf[block], qp ); \
- if( memcmp( dct1, dct2, w*w*2 ) ) \
+ if( memcmp( dct1, dct2, w*w*sizeof(dctcoef) ) ) \
{ \
oks[1] = 0; \
fprintf( stderr, #dqname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
for( int size = 16; size <= 64; size += 48 )
{
set_func_name( "denoise_dct" );
- memcpy( dct1, buf1, size*2 );
- memcpy( dct2, buf1, size*2 );
+ memcpy( dct1, buf1, size*sizeof(dctcoef) );
+ memcpy( dct2, buf1, size*sizeof(dctcoef) );
memcpy( buf3+256, buf3, 256 );
call_c1( qf_c.denoise_dct, dct1, (uint32_t*)buf3, (uint16_t*)buf2, size );
call_a1( qf_a.denoise_dct, dct2, (uint32_t*)(buf3+256), (uint16_t*)buf2, size );
- if( memcmp( dct1, dct2, size*2 ) || memcmp( buf3+4, buf3+256+4, (size-1)*sizeof(uint32_t) ) )
+ if( memcmp( dct1, dct2, size*sizeof(dctcoef) ) || memcmp( buf3+4, buf3+256+4, (size-1)*sizeof(uint32_t) ) )
ok = 0;
call_c2( qf_c.denoise_dct, dct1, (uint32_t*)buf3, (uint16_t*)buf2, size );
call_a2( qf_a.denoise_dct, dct2, (uint32_t*)(buf3+256), (uint16_t*)buf2, size );
{ \
int nnz = 0; \
int max = rand() & (w*w-1); \
- memset( dct1, 0, w*w*2 ); \
+ memset( dct1, 0, w*w*sizeof(dctcoef) ); \
for( int idx = ac; idx < max; idx++ ) \
nnz |= dct1[idx] = !(rand()&3) + (!(rand()&15))*rand(); \
if( !nnz ) \
x264_run_level_t runlevel_c, runlevel_a; \
int nnz = 0; \
int max = rand() & (w*w-1); \
- memset( dct1, 0, w*w*2 ); \
+ memset( dct1, 0, w*w*sizeof(dctcoef) ); \
memcpy( &runlevel_a, buf1+i, sizeof(x264_run_level_t) ); \
memcpy( &runlevel_c, buf1+i, sizeof(x264_run_level_t) ); \
for( int idx = ac; idx < max; idx++ ) \
int result_c = call_c( qf_c.lastname, dct1+ac, &runlevel_c ); \
int result_a = call_a( qf_a.lastname, dct1+ac, &runlevel_a ); \
if( result_c != result_a || runlevel_c.last != runlevel_a.last || \
- memcmp(runlevel_c.level, runlevel_a.level, sizeof(int16_t)*result_c) || \
+ memcmp(runlevel_c.level, runlevel_a.level, sizeof(dctcoef)*result_c) || \
memcmp(runlevel_c.run, runlevel_a.run, sizeof(uint8_t)*(result_c-1)) ) \
{ \
ok = 0; \
{\
set_func_name( "intra_%s_%s", #name, intra_##name##_names[dir] );\
used_asm = 1;\
- memcpy( buf3, buf1, 32*20 * sizeof(pixel) );\
- memcpy( buf4, buf1, 32*20 * sizeof(pixel) );\
+ memcpy( pbuf3, pbuf1, 32*20 * sizeof(pixel) );\
+ memcpy( pbuf4, pbuf1, 32*20 * sizeof(pixel) );\
call_c( ip_c.name[dir], pbuf3+48, ##__VA_ARGS__ );\
call_a( ip_a.name[dir], pbuf4+48, ##__VA_ARGS__ );\
- if( memcmp( buf3, buf4, 32*20 * sizeof(pixel) ) )\
+ if( memcmp( pbuf3, pbuf4, 32*20 * sizeof(pixel) ) )\
{\
fprintf( stderr, #name "[%d] : [FAILED]\n", dir );\
ok = 0;\
{\
printf( "%2x ", edge[14-j] );\
for( int k = 0; k < w; k++ )\
- printf( "%2x ", buf4[48+k+j*32] );\
+ printf( "%2x ", pbuf4[48+k+j*32] );\
printf( "\n" );\
}\
printf( "\n" );\
{\
printf( " " );\
for( int k = 0; k < w; k++ )\
- printf( "%2x ", buf3[48+k+j*32] );\
+ printf( "%2x ", pbuf3[48+k+j*32] );\
printf( "\n" );\
}\
}\
fprintf( stderr, "x264: using random seed %u\n", seed );
srand( seed );
- buf1 = x264_malloc( 0x3e00 + 16*BENCH_ALIGNS );
- if( !buf1 )
+ buf1 = x264_malloc( 0x1e00 + 0x2000*sizeof(pixel) + 16*BENCH_ALIGNS );
+ pbuf1 = x264_malloc( 0x1e00*sizeof(pixel) + 16*BENCH_ALIGNS );
+ if( !buf1 || !pbuf1 )
{
fprintf( stderr, "malloc failed, unable to initiate tests!\n" );
return -1;
#define INIT_POINTER_OFFSETS\
buf2 = buf1 + 0xf00;\
buf3 = buf2 + 0xf00;\
- buf4 = buf3 + 0x1000;\
- pbuf1 = (pixel*)buf1;\
- pbuf2 = (pixel*)buf2;\
+ buf4 = buf3 + 0x1000*sizeof(pixel);\
+ pbuf2 = pbuf1 + 0xf00;\
pbuf3 = (pixel*)buf3;\
pbuf4 = (pixel*)buf4;
INIT_POINTER_OFFSETS;
for( int i = 0; i < 0x1e00; i++ )
+ {
buf1[i] = rand() & 0xFF;
- memset( buf1+0x1e00, 0, 0x2000 );
+ pbuf1[i] = rand() & PIXEL_MAX;
+ }
+ memset( buf1+0x1e00, 0, 0x2000*sizeof(pixel) );
/* 16-byte alignment is guaranteed whenever it's useful, but some functions also vary in speed depending on %64 */
if( do_bench )
INIT_POINTER_OFFSETS;
ret |= x264_stack_pagealign( check_all_flags, i*16 );
buf1 += 16;
+ pbuf1 += 16;
quiet = 1;
fprintf( stderr, "%d/%d\r", i+1, BENCH_ALIGNS );
}
" .mkv -> Matroska\n"
" .flv -> Flash Video\n"
" .mp4 -> MP4 if compiled with GPAC support (%s)\n"
+ "Output bit depth: %d (configured at compile time)\n"
"\n"
"Options:\n"
"\n"
"no",
#endif
#if HAVE_GPAC
- "yes"
+ "yes",
#else
- "no"
+ "no",
#endif
+ BIT_DEPTH
);
H0( "Example usage:\n" );
H0( "\n" );
H0( "\n" );
H0( "Presets:\n" );
H0( "\n" );
- H0( " --profile Force the limits of an H.264 profile [high]\n"
+ H0( " --profile Force the limits of an H.264 profile\n"
" Overrides all settings.\n" );
H2( " - baseline:\n"
" --no-8x8dct --bframes 0 --no-cabac\n"
" --no-8x8dct --cqm flat\n"
" No lossless.\n"
" - high:\n"
- " No lossless.\n" );
- else H0( " - baseline,main,high\n" );
+ " No lossless.\n"
+ " - high10:\n"
+ " No lossless.\n"
+ " Support for bit depth 8-10.\n" );
+ else H0( " - baseline,main,high,high10\n" );
H0( " --preset Use a preset to select encoding settings [medium]\n"
" Overridden by user settings.\n" );
H2( " - ultrafast:\n"
H0( "\n" );
H0( "Ratecontrol:\n" );
H0( "\n" );
- H1( " -q, --qp <integer> Force constant QP (0-51, 0=lossless)\n" );
+ H1( " -q, --qp <integer> Force constant QP (0-%d, 0=lossless)\n", QP_MAX );
H0( " -B, --bitrate <integer> Set bitrate (kbit/s)\n" );
- H0( " --crf <float> Quality-based VBR (0-51, 0=lossless) [%.1f]\n", defaults->rc.f_rf_constant );
+ H0( " --crf <float> Quality-based VBR (0-%d, 0=lossless) [%.1f]\n", QP_MAX, defaults->rc.f_rf_constant );
H1( " --rc-lookahead <integer> Number of frames for frametype lookahead [%d]\n", defaults->rc.i_lookahead );
H0( " --vbv-maxrate <integer> Max local bitrate (kbit/s) [%d]\n", defaults->rc.i_vbv_max_bitrate );
H0( " --vbv-bufsize <integer> Set size of the VBV buffer (kbit) [%d]\n", defaults->rc.i_vbv_buffer_size );
#else
printf( "using a non-gcc compiler\n" );
#endif
+ printf( "configuration: --bit-depth=%d\n", BIT_DEPTH );
exit(0);
case OPT_FRAMES:
param->i_frame_total = X264_MAX( atoi( optarg ), 0 );
else if( type == 'B' ) pic->i_type = X264_TYPE_BREF;
else if( type == 'b' ) pic->i_type = X264_TYPE_B;
else ret = 0;
- if( ret != 3 || qp < -1 || qp > 51 )
+ if( ret != 3 || qp < -1 || qp > QP_MAX )
{
x264_cli_log( "x264", X264_LOG_ERROR, "can't parse qpfile for frame %d\n", i_frame );
fclose( opt->qpfile );
{
int i_rc_method; /* X264_RC_* */
- int i_qp_constant; /* 0-51 */
+ int i_qp_constant; /* 0 to (51 + 6*(BIT_DEPTH-8)) */
int i_qp_min; /* min allowed QP value */
int i_qp_max; /* max allowed QP value */
int i_qp_step; /* max QP step between frames */
/* x264_param_apply_profile:
* Applies the restrictions of the given profile.
* Currently available profiles are, from most to least restrictive: */
-static const char * const x264_profile_names[] = { "baseline", "main", "high", 0 };
+static const char * const x264_profile_names[] = { "baseline", "main", "high", "high10", 0 };
/* (can be NULL, in which case the function will do nothing)
*