#include "common/common.h"
#include "mc.h"
-/* NASM functions */
-extern void x264_pixel_avg_16x16_sse2( uint8_t *, int, uint8_t *, int );
-extern void x264_pixel_avg_16x8_sse2( uint8_t *, int, uint8_t *, int );
-extern void x264_pixel_avg_16x16_mmxext( uint8_t *, int, uint8_t *, int );
-extern void x264_pixel_avg_16x8_mmxext( uint8_t *, int, uint8_t *, int );
-extern void x264_pixel_avg_8x16_mmxext( uint8_t *, int, uint8_t *, int );
-extern void x264_pixel_avg_8x8_mmxext( uint8_t *, int, uint8_t *, int );
-extern void x264_pixel_avg_8x4_mmxext( uint8_t *, int, uint8_t *, int );
-extern void x264_pixel_avg_4x8_mmxext( uint8_t *, int, uint8_t *, int );
-extern void x264_pixel_avg_4x4_mmxext( uint8_t *, int, uint8_t *, int );
-extern void x264_pixel_avg_4x2_mmxext( uint8_t *, int, uint8_t *, int );
+#define DECL_SUF( func, args )\
+ void func##_mmxext args;\
+ void func##_sse2 args;\
+ void func##_ssse3 args;
+
+DECL_SUF( x264_pixel_avg_16x16, ( uint8_t *, int, uint8_t *, int, uint8_t *, int, int ))
+DECL_SUF( x264_pixel_avg_16x8, ( uint8_t *, int, uint8_t *, int, uint8_t *, int, int ))
+DECL_SUF( x264_pixel_avg_8x16, ( uint8_t *, int, uint8_t *, int, uint8_t *, int, int ))
+DECL_SUF( x264_pixel_avg_8x8, ( uint8_t *, int, uint8_t *, int, uint8_t *, int, int ))
+DECL_SUF( x264_pixel_avg_8x4, ( uint8_t *, int, uint8_t *, int, uint8_t *, int, int ))
+DECL_SUF( x264_pixel_avg_4x8, ( uint8_t *, int, uint8_t *, int, uint8_t *, int, int ))
+DECL_SUF( x264_pixel_avg_4x4, ( uint8_t *, int, uint8_t *, int, uint8_t *, int, int ))
+DECL_SUF( x264_pixel_avg_4x2, ( uint8_t *, int, uint8_t *, int, uint8_t *, int, int ))
extern void x264_mc_copy_w4_mmx( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w8_mmx( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w16_mmx( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w16_sse2( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w16_sse3( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w16_aligned_sse2( uint8_t *, int, uint8_t *, int, int );
-extern void x264_pixel_avg_weight_4x4_mmxext( uint8_t *, int, uint8_t *, int, int );
-extern void x264_pixel_avg_weight_w8_mmxext( uint8_t *, int, uint8_t *, int, int, int );
-extern void x264_pixel_avg_weight_w8_sse2( uint8_t *, int, uint8_t *, int, int, int );
-extern void x264_pixel_avg_weight_w16_sse2( uint8_t *, int, uint8_t *, int, int, int );
-extern void x264_pixel_avg_weight_w16_mmxext( uint8_t *, int, uint8_t *, int, int, int );
extern void x264_prefetch_fenc_mmxext( uint8_t *, int, uint8_t *, int, int );
extern void x264_prefetch_ref_mmxext( uint8_t *, int, int );
extern void x264_mc_chroma_mmxext( uint8_t *src, int i_src_stride,
PIXEL_AVG_WALL(cache64_sse2)
PIXEL_AVG_WALL(sse2)
-#define AVG_WEIGHT(W,H,name) \
-static void x264_pixel_avg_weight_ ## W ## x ## H ## _##name( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int i_weight_dst ) \
-{ \
- x264_pixel_avg_weight_w ## W ## _##name( dst, i_dst, src, i_src, i_weight_dst, H ); \
-}
-
-AVG_WEIGHT(16,16,mmxext)
-AVG_WEIGHT(16,8,mmxext)
-AVG_WEIGHT(8,16,mmxext)
-AVG_WEIGHT(8,8,mmxext)
-AVG_WEIGHT(8,4,mmxext)
-AVG_WEIGHT(16,16,sse2)
-AVG_WEIGHT(16,8,sse2)
-AVG_WEIGHT(8,16,sse2)
-AVG_WEIGHT(8,8,sse2)
-AVG_WEIGHT(8,4,sse2)
-
#define PIXEL_AVG_WTAB(instr, name1, name2, name3, name4, name5)\
static void (* const x264_pixel_avg_wtab_##instr[6])( uint8_t *, int, uint8_t *, int, uint8_t *, int ) =\
{\
PIXEL_AVG_WTAB(mmxext, mmxext, mmxext, mmxext, mmxext, mmxext)
#ifdef ARCH_X86
PIXEL_AVG_WTAB(cache32_mmxext, mmxext, cache32_mmxext, cache32_mmxext, cache32_mmxext, cache32_mmxext)
-#endif
PIXEL_AVG_WTAB(cache64_mmxext, mmxext, cache64_mmxext, cache64_mmxext, cache64_mmxext, cache64_mmxext)
+#endif
PIXEL_AVG_WTAB(sse2, mmxext, mmxext, mmxext, sse2, sse2)
PIXEL_AVG_WTAB(cache64_sse2, mmxext, cache64_mmxext, cache64_sse2, cache64_sse2, cache64_sse2)
HPEL(8, mmxext, mmxext, mmxext, mmxext)
HPEL(16, sse2_amd, mmxext, mmxext, sse2)
+#ifdef ARCH_X86_64
+void x264_hpel_filter_sse2( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, int stride, int width, int height );
+void x264_hpel_filter_ssse3( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, int stride, int width, int height );
+#else
HPEL(16, sse2, sse2, sse2, sse2)
-HPEL(16, ssse3, sse2, ssse3, sse2)
+HPEL(16, ssse3, sse2, ssse3, ssse3)
+#endif
void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
{
if( !(cpu&X264_CPU_MMX) )
return;
+ pf->copy_16x16_unaligned = x264_mc_copy_w16_mmx;
pf->copy[PIXEL_16x16] = x264_mc_copy_w16_mmx;
pf->copy[PIXEL_8x8] = x264_mc_copy_w8_mmx;
pf->copy[PIXEL_4x4] = x264_mc_copy_w4_mmx;
pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_mmxext;
pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_mmxext;
- pf->avg_weight[PIXEL_16x16] = x264_pixel_avg_weight_16x16_mmxext;
- pf->avg_weight[PIXEL_16x8] = x264_pixel_avg_weight_16x8_mmxext;
- pf->avg_weight[PIXEL_8x16] = x264_pixel_avg_weight_8x16_mmxext;
- pf->avg_weight[PIXEL_8x8] = x264_pixel_avg_weight_8x8_mmxext;
- pf->avg_weight[PIXEL_8x4] = x264_pixel_avg_weight_8x4_mmxext;
- pf->avg_weight[PIXEL_4x4] = x264_pixel_avg_weight_4x4_mmxext;
- // avg_weight_4x8 is rare and 4x2 is not used
-
pf->plane_copy = x264_plane_copy_mmxext;
pf->hpel_filter = x264_hpel_filter_mmxext;
pf->frame_init_lowres_core = x264_frame_init_lowres_core_mmxext;
pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_sse2;
pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_sse2;
pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_sse2;
- if( !(cpu&X264_CPU_STACK_MOD4) )
- {
- pf->avg_weight[PIXEL_16x16] = x264_pixel_avg_weight_16x16_sse2;
- pf->avg_weight[PIXEL_16x8] = x264_pixel_avg_weight_16x8_sse2;
- pf->avg_weight[PIXEL_8x16] = x264_pixel_avg_weight_8x16_sse2;
- pf->avg_weight[PIXEL_8x8] = x264_pixel_avg_weight_8x8_sse2;
- pf->avg_weight[PIXEL_8x4] = x264_pixel_avg_weight_8x4_sse2;
- }
+ pf->avg[PIXEL_8x16] = x264_pixel_avg_8x16_sse2;
+ pf->avg[PIXEL_8x8] = x264_pixel_avg_8x8_sse2;
+ pf->avg[PIXEL_8x4] = x264_pixel_avg_8x4_sse2;
pf->hpel_filter = x264_hpel_filter_sse2;
pf->frame_init_lowres_core = x264_frame_init_lowres_core_sse2;
pf->mc_chroma = x264_mc_chroma_sse2;
if( !(cpu&X264_CPU_SSSE3) )
return;
+ pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_ssse3;
+ pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_ssse3;
+ pf->avg[PIXEL_8x16] = x264_pixel_avg_8x16_ssse3;
+ pf->avg[PIXEL_8x8] = x264_pixel_avg_8x8_ssse3;
+ pf->avg[PIXEL_8x4] = x264_pixel_avg_8x4_ssse3;
+ pf->avg[PIXEL_4x8] = x264_pixel_avg_4x8_ssse3;
+ pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_ssse3;
+ pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_ssse3;
+
pf->hpel_filter = x264_hpel_filter_ssse3;
pf->frame_init_lowres_core = x264_frame_init_lowres_core_ssse3;
pf->mc_chroma = x264_mc_chroma_ssse3;