DECL_SUF( x264_pixel_avg_8x16, ( pixel *, int, pixel *, int, pixel *, int, int ))
DECL_SUF( x264_pixel_avg_8x8, ( pixel *, int, pixel *, int, pixel *, int, int ))
DECL_SUF( x264_pixel_avg_8x4, ( pixel *, int, pixel *, int, pixel *, int, int ))
+DECL_SUF( x264_pixel_avg_4x16, ( pixel *, int, pixel *, int, pixel *, int, int ))
DECL_SUF( x264_pixel_avg_4x8, ( pixel *, int, pixel *, int, pixel *, int, int ))
DECL_SUF( x264_pixel_avg_4x4, ( pixel *, int, pixel *, int, pixel *, int, int ))
DECL_SUF( x264_pixel_avg_4x2, ( pixel *, int, pixel *, int, pixel *, int, int ))
void x264_mc_copy_w16_mmx( pixel *, int, pixel *, int, int );
void x264_mc_copy_w16_sse2( pixel *, int, pixel *, int, int );
void x264_mc_copy_w16_aligned_sse2( pixel *, int, pixel *, int, int );
-void x264_prefetch_fenc_mmx2( uint8_t *, int, uint8_t *, int, int );
-void x264_prefetch_ref_mmx2( uint8_t *, int, int );
+void x264_prefetch_fenc_420_mmx2( pixel *, int, pixel *, int, int );
+void x264_prefetch_fenc_422_mmx2( pixel *, int, pixel *, int, int );
+void x264_prefetch_ref_mmx2( pixel *, int, int );
void x264_plane_copy_core_mmx2( pixel *, int, pixel *, int, int w, int h);
void x264_plane_copy_c( pixel *, int, pixel *, int, int w, int h );
void x264_plane_copy_interleave_core_mmx2( pixel *dst, int i_dst,
void x264_plane_copy_deinterleave_avx( uint16_t *dstu, int i_dstu,
uint16_t *dstv, int i_dstv,
uint16_t *src, int i_src, int w, int h );
-void x264_store_interleave_8x8x2_mmx2( pixel *dst, int i_dst, pixel *srcu, pixel *srcv );
-void x264_store_interleave_8x8x2_sse2( pixel *dst, int i_dst, pixel *srcu, pixel *srcv );
-void x264_store_interleave_8x8x2_avx( pixel *dst, int i_dst, pixel *srcu, pixel *srcv );
-void x264_load_deinterleave_8x8x2_fenc_mmx( pixel *dst, pixel *src, int i_src );
-void x264_load_deinterleave_8x8x2_fenc_sse2( pixel *dst, pixel *src, int i_src );
-void x264_load_deinterleave_8x8x2_fenc_ssse3( uint8_t *dst, uint8_t *src, int i_src );
-void x264_load_deinterleave_8x8x2_fenc_avx( uint16_t *dst, uint16_t *src, int i_src );
-void x264_load_deinterleave_8x8x2_fdec_mmx( pixel *dst, pixel *src, int i_src );
-void x264_load_deinterleave_8x8x2_fdec_sse2( pixel *dst, pixel *src, int i_src );
-void x264_load_deinterleave_8x8x2_fdec_ssse3( uint8_t *dst, uint8_t *src, int i_src );
-void x264_load_deinterleave_8x8x2_fdec_avx( uint16_t *dst, uint16_t *src, int i_src );
+void x264_store_interleave_chroma_mmx2( pixel *dst, int i_dst, pixel *srcu, pixel *srcv, int height );
+void x264_store_interleave_chroma_sse2( pixel *dst, int i_dst, pixel *srcu, pixel *srcv, int height );
+void x264_store_interleave_chroma_avx( pixel *dst, int i_dst, pixel *srcu, pixel *srcv, int height );
+void x264_load_deinterleave_chroma_fenc_mmx( pixel *dst, pixel *src, int i_src, int height );
+void x264_load_deinterleave_chroma_fenc_sse2( pixel *dst, pixel *src, int i_src, int height );
+void x264_load_deinterleave_chroma_fenc_ssse3( uint8_t *dst, uint8_t *src, int i_src, int height );
+void x264_load_deinterleave_chroma_fenc_avx( uint16_t *dst, uint16_t *src, int i_src, int height );
+void x264_load_deinterleave_chroma_fdec_mmx( pixel *dst, pixel *src, int i_src, int height );
+void x264_load_deinterleave_chroma_fdec_sse2( pixel *dst, pixel *src, int i_src, int height );
+void x264_load_deinterleave_chroma_fdec_ssse3( uint8_t *dst, uint8_t *src, int i_src, int height );
+void x264_load_deinterleave_chroma_fdec_avx( uint16_t *dst, uint16_t *src, int i_src, int height );
void *x264_memcpy_aligned_mmx( void * dst, const void * src, size_t n );
void *x264_memcpy_aligned_sse2( void * dst, const void * src, size_t n );
void x264_memzero_aligned_mmx( void * dst, int n );
uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
void x264_mbtree_propagate_cost_avx( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
+void x264_mbtree_propagate_cost_fma4( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+ uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
#define MC_CHROMA(cpu)\
void x264_mc_chroma_##cpu( pixel *dstu, pixel *dstv, int i_dst,\
};
MC_COPY_WTAB(mmx,mmx,mmx,mmx)
+#if HIGH_BIT_DEPTH
+MC_COPY_WTAB(sse2,mmx,sse2,sse2)
+#else
MC_COPY_WTAB(sse2,mmx,mmx,sse2)
+#endif
#define MC_WEIGHT_WTAB(function, instr, name1, name2, w12version)\
static void (* x264_mc_##function##_wtab_##instr[6])( pixel *, int, pixel *, int, const x264_weight_t *, int ) =\
if( !(cpu&X264_CPU_MMX) )
return;
- pf->load_deinterleave_8x8x2_fenc = x264_load_deinterleave_8x8x2_fenc_mmx;
- pf->load_deinterleave_8x8x2_fdec = x264_load_deinterleave_8x8x2_fdec_mmx;
+ pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_mmx;
+ pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_mmx;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_mmx;
if( !(cpu&X264_CPU_MMX2) )
return;
+ pf->prefetch_fenc_420 = x264_prefetch_fenc_420_mmx2;
+ pf->prefetch_fenc_422 = x264_prefetch_fenc_422_mmx2;
+ pf->prefetch_ref = x264_prefetch_ref_mmx2;
+
pf->plane_copy = x264_plane_copy_mmx2;
pf->plane_copy_interleave = x264_plane_copy_interleave_mmx2;
- pf->store_interleave_8x8x2 = x264_store_interleave_8x8x2_mmx2;
+ pf->store_interleave_chroma = x264_store_interleave_chroma_mmx2;
pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_mmx2;
pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_mmx2;
pf->avg[PIXEL_8x16] = x264_pixel_avg_8x16_mmx2;
pf->avg[PIXEL_8x8] = x264_pixel_avg_8x8_mmx2;
pf->avg[PIXEL_8x4] = x264_pixel_avg_8x4_mmx2;
+ pf->avg[PIXEL_4x16] = x264_pixel_avg_4x16_mmx2;
pf->avg[PIXEL_4x8] = x264_pixel_avg_4x8_mmx2;
pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_mmx2;
pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_mmx2;
pf->frame_init_lowres_core = x264_frame_init_lowres_core_sse2;
- pf->load_deinterleave_8x8x2_fenc = x264_load_deinterleave_8x8x2_fenc_sse2;
- pf->load_deinterleave_8x8x2_fdec = x264_load_deinterleave_8x8x2_fdec_sse2;
+ pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_sse2;
+ pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_sse2;
pf->plane_copy_interleave = x264_plane_copy_interleave_sse2;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_sse2;
pf->integral_init4v = x264_integral_init4v_sse2;
pf->integral_init8v = x264_integral_init8v_sse2;
pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_sse2;
- pf->store_interleave_8x8x2 = x264_store_interleave_8x8x2_sse2;
+ pf->store_interleave_chroma = x264_store_interleave_chroma_sse2;
pf->offsetadd = x264_mc_offsetadd_wtab_sse2;
pf->offsetsub = x264_mc_offsetsub_wtab_sse2;
pf->avg[PIXEL_8x16] = x264_pixel_avg_8x16_sse2;
pf->avg[PIXEL_8x8] = x264_pixel_avg_8x8_sse2;
pf->avg[PIXEL_8x4] = x264_pixel_avg_8x4_sse2;
+ pf->avg[PIXEL_4x16] = x264_pixel_avg_4x16_sse2;
pf->avg[PIXEL_4x8] = x264_pixel_avg_4x8_sse2;
pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_sse2;
pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_sse2;
if( !(cpu&X264_CPU_AVX) )
return;
- pf->load_deinterleave_8x8x2_fenc = x264_load_deinterleave_8x8x2_fenc_avx;
- pf->load_deinterleave_8x8x2_fdec = x264_load_deinterleave_8x8x2_fdec_avx;
+ pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_avx;
+ pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_avx;
pf->plane_copy_interleave = x264_plane_copy_interleave_avx;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_avx;
- pf->store_interleave_8x8x2 = x264_store_interleave_8x8x2_avx;
+ pf->store_interleave_chroma = x264_store_interleave_chroma_avx;
if( !(cpu&X264_CPU_STACK_MOD4) )
pf->mc_chroma = x264_mc_chroma_avx;
#else // !HIGH_BIT_DEPTH
- pf->prefetch_fenc = x264_prefetch_fenc_mmx2;
- pf->prefetch_ref = x264_prefetch_ref_mmx2;
#if ARCH_X86 // all x86_64 cpus with cacheline split issues use sse2 instead
if( cpu&X264_CPU_CACHELINE_32 )
if( cpu&X264_CPU_SSE2_IS_FAST )
{
- pf->store_interleave_8x8x2 = x264_store_interleave_8x8x2_sse2; // FIXME sse2fast? sse2medium?
- pf->load_deinterleave_8x8x2_fenc = x264_load_deinterleave_8x8x2_fenc_sse2;
- pf->load_deinterleave_8x8x2_fdec = x264_load_deinterleave_8x8x2_fdec_sse2;
+ pf->store_interleave_chroma = x264_store_interleave_chroma_sse2; // FIXME sse2fast? sse2medium?
+ pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_sse2;
+ pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_sse2;
pf->plane_copy_interleave = x264_plane_copy_interleave_sse2;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_sse2;
pf->mc_luma = mc_luma_sse2;
pf->avg[PIXEL_8x16] = x264_pixel_avg_8x16_ssse3;
pf->avg[PIXEL_8x8] = x264_pixel_avg_8x8_ssse3;
pf->avg[PIXEL_8x4] = x264_pixel_avg_8x4_ssse3;
+ pf->avg[PIXEL_4x16] = x264_pixel_avg_4x16_ssse3;
pf->avg[PIXEL_4x8] = x264_pixel_avg_4x8_ssse3;
pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_ssse3;
pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_ssse3;
- pf->load_deinterleave_8x8x2_fenc = x264_load_deinterleave_8x8x2_fenc_ssse3;
- pf->load_deinterleave_8x8x2_fdec = x264_load_deinterleave_8x8x2_fdec_ssse3;
+ pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_ssse3;
+ pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_ssse3;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_ssse3;
pf->hpel_filter = x264_hpel_filter_ssse3;
if( !(cpu&X264_CPU_AVX) )
return;
pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_avx;
+
+ if( !(cpu&X264_CPU_FMA4) )
+ return;
+ pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_fma4;
}