# define CPU_CAPABILITY_3DNOW (1<<4)
# define VLC_CPU_MMXEXT 32
# define VLC_CPU_SSE 64
-# define CPU_CAPABILITY_SSE2 (1<<7)
+# define VLC_CPU_SSE2 128
# define CPU_CAPABILITY_SSE3 (1<<8)
# define CPU_CAPABILITY_SSSE3 (1<<9)
# define CPU_CAPABILITY_SSE4_1 (1<<10)
# endif
# endif
+# ifdef __SSE2__
+# define vlc_CPU_SSE2() (1)
+# else
+# define vlc_CPU_SSE2() ((vlc_CPU() & VLC_CPU_SSE2) != 0)
+# endif
+
# elif defined (__ppc__) || defined (__ppc64__) || defined (__powerpc__)
# define HAVE_FPU 1
# define VLC_CPU_ALTIVEC 2
p_context->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !vlc_CPU_SSE() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE;
- if( !(i_cpu & CPU_CAPABILITY_SSE2) )
+ if( !vlc_CPU_SSE2() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE2;
# ifdef AV_CPU_FLAG_SSE3
if( !(i_cpu & CPU_CAPABILITY_SSE3) )
/* Execute the instruction op only if SSE2 is supported. */
#ifdef CAN_COMPILE_SSE2
-# define ASM_SSE2(cpu, op) do { \
- if (cpu & CPU_CAPABILITY_SSE2) \
- asm volatile (op); \
+# ifdef __SSE2__
+# define ASM_SSE2(cpu, op) asm volatile (op)
+# else
+# define ASM_SSE2(cpu, op) do { \
+ if (cpu & VLC_CPU_SSE2) \
+ asm volatile (op); \
} while (0)
+# undef vlc_CPU_SSE2
+# define vlc_CPU_SSE2() ((cpu & VLC_CPU_SSE2) != 0)
+# endif
#else
-# define ASM_SSE2(cpu, op)
+# define ASM_SSE2(cpu, op)
#endif
/* Optimized copy from "Uncacheable Speculative Write Combining" memory
} else
#endif
#ifdef CAN_COMPILE_SSE2
- if (cpu & CPU_CAPABILITY_SSE2) {
+ if (vlc_CPU_SSE2()) {
if (!unaligned) {
for (; x+63 < width; x += 64)
COPY64(&dst[x], &src[x], "movdqa", "movdqa");
bool unaligned = ((intptr_t)dst & 0x0f) != 0;
#ifdef CAN_COMPILE_SSE2
- if (cpu & CPU_CAPABILITY_SSE2) {
+ if (vlc_CPU_SSE2()) {
if (!unaligned) {
for (; x+63 < width; x += 64)
COPY64(&dst[x], &src[x], "movdqa", "movntdq");
} else
#endif
#ifdef CAN_COMPILE_SSE2
- if (cpu & CPU_CAPABILITY_SSE2) {
+ if (vlc_CPU_SSE2()) {
for (x = 0; x < (width & ~31); x += 32) {
asm volatile (
"movdqu (%[mask]), %%xmm7\n"
p_context->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !vlc_CPU_SSE() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE;
- if( !(i_cpu & CPU_CAPABILITY_SSE2) )
+ if( !vlc_CPU_SSE2() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE2;
# ifdef AV_CPU_FLAG_SSE3
if( !(i_cpu & CPU_CAPABILITY_SSE3) )
p_sys->param.cpu &= ~X264_CPU_MMXEXT;
if( !vlc_CPU_SSE() )
p_sys->param.cpu &= ~X264_CPU_SSE;
- if( !(vlc_CPU() & CPU_CAPABILITY_SSE2) )
+ if( !vlc_CPU_SSE2() )
p_sys->param.cpu &= ~X264_CPU_SSE2;
#endif
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !vlc_CPU_SSE() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE;
- if( !(i_cpu & CPU_CAPABILITY_SSE2) )
+ if( !vlc_cpu_SSE2() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE2;
# ifdef AV_CPU_FLAG_SSE3
if( !(i_cpu & CPU_CAPABILITY_SSE3) )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !vlc_CPU_SSE() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE;
- if( !(i_cpu & CPU_CAPABILITY_SSE2) )
+ if( !vlc_CPU_SSE2() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE2;
# ifdef AV_CPU_FLAG_SSE3
if( !(i_cpu & CPU_CAPABILITY_SSE3) )
set_description( N_( "SSE2 I420,IYUV,YV12 to "
"RV15,RV16,RV24,RV32 conversions") )
set_capability( "video filter2", 120 )
-# define vlc_CPU_capable() ((vlc_CPU() & CPU_CAPABILITY_SSE2) != 0)
+# define vlc_CPU_capable() vlc_CPU_SSE2()
#endif
set_callbacks( Activate, Deactivate )
vlc_module_end ()
#elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
set_capability( "video filter2", 250 )
-# define vlc_CPU_capable() (vlc_CPU() & CPU_CAPABILITY_SSE2)
+# define vlc_CPU_capable() vlc_CPU_SSE2()
#elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
set_description(
_("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
set_capability( "video filter2", 250 )
-# define vlc_CPU_capable() (vlc_CPU_ALTIVEC())
+# define vlc_CPU_capable() vlc_CPU_ALTIVEC()
#endif
set_callbacks( Activate, NULL )
vlc_module_end ()
#elif defined (MODULE_NAME_IS_i422_yuy2_sse2)
set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
set_capability( "video filter2", 120 )
-# define vlc_CPU_capable() ((vlc_CPU() & CPU_CAPABILITY_SSE2) != 0)
+# define vlc_CPU_capable() vlc_CPU_SSE2()
# define VLC_TARGET VLC_SSE
#endif
set_callbacks( Activate, NULL )
void (*filter)(uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next,
int w, int prefs, int mrefs, int parity, int mode);
- filter = yadif_filter_line_c;
-#if defined(HAVE_YADIF_MMX)
- if( vlc_CPU_MMX() )
- filter = yadif_filter_line_mmx;
+#if defined(HAVE_YADIF_SSSE3)
+ if( vlc_CPU() & CPU_CAPABILITY_SSSE3 )
+ filter = yadif_filter_line_ssse3;
+ else
#endif
#if defined(HAVE_YADIF_SSE2)
- if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
+ if( vlc_CPU_SSE2() )
filter = yadif_filter_line_sse2;
+ else
#endif
-#if defined(HAVE_YADIF_SSSE3)
- if( vlc_CPU() & CPU_CAPABILITY_SSSE3 )
- filter = yadif_filter_line_ssse3;
+#if defined(HAVE_YADIF_MMX)
+ if( vlc_CPU_MMX() )
+ filter = yadif_filter_line_mmx;
+ else
#endif
+ filter = yadif_filter_line_c;
for( int n = 0; n < p_dst->i_planes; n++ )
{
p_sys->pf_merge = MergeAltivec;
else
#endif
-#if defined(CAN_COMPILE_SSE)
- if( (vlc_CPU() & CPU_CAPABILITY_SSE2) )
+#if defined(CAN_COMPILE_SSE2)
+ if( vlc_CPU_SSE2() )
{
p_sys->pf_merge = chroma->pixel_size == 1 ? Merge8BitSSE2 : Merge16BitSSE2;
p_sys->pf_end_merge = EndMMX;
cfg->buf = NULL;
#if HAVE_SSE2 && HAVE_6REGS
- if (vlc_CPU() & CPU_CAPABILITY_SSE2)
+ if (vlc_CPU_SSE2())
cfg->blur_line = blur_line_sse2;
else
#endif
sys->blend = BlockBlendC;
sys->emms = NULL;
#if defined(CAN_COMPILE_SSE2) && 1
- if (vlc_CPU() & CPU_CAPABILITY_SSE2) {
+ if (vlc_CPU_SSE2()) {
sys->blend = BlockBlendSse2;
sys->emms = Emms;
}
const uint8_t filling_const_8v = 128 + i_intensity / 14;
#if defined(CAN_COMPILE_SSE2)
- if (vlc_CPU() & CPU_CAPABILITY_SSE2)
+ if (vlc_CPU_SSE2())
{
/* prepared value for faster broadcasting in xmm register */
int i_intensity_spread = 0x10001 * (uint8_t) i_intensity;
# endif
}
-# if defined (__SSE2__)
- i_capabilities |= CPU_CAPABILITY_SSE2;
-# elif defined (CAN_COMPILE_SSE2)
+# if defined (CAN_COMPILE_SSE2)
if ((i_edx & 0x04000000) && vlc_CPU_check ("SSE2", SSE2_test))
- i_capabilities |= CPU_CAPABILITY_SSE2;
+ i_capabilities |= VLC_CPU_SSE2;
# endif
# if defined (__SSE3__)
if (vlc_CPU_MMX()) p += sprintf (p, "MMX ");
if (vlc_CPU_MMXEXT()) p += sprintf (p, "MMXEXT ");
if (vlc_CPU_SSE()) p += sprintf (p, "SSE ");;
- PRINT_CAPABILITY(CPU_CAPABILITY_SSE2, "SSE2");
+ if (vlc_CPU_SSE2()) p += sprintf (p, "SSE2 ");;
PRINT_CAPABILITY(CPU_CAPABILITY_SSE3, "SSE3");
PRINT_CAPABILITY(CPU_CAPABILITY_SSSE3, "SSSE3");
PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_1, "SSE4.1");
core_caps |= VLC_CPU_SSE | VLC_CPU_MMXEXT;
if (!strcmp (cap, "mmxext"))
core_caps |= VLC_CPU_MMXEXT;
-# ifndef __SSE2__
if (!strcmp (cap, "sse2"))
- core_caps |= CPU_CAPABILITY_SSE2;
-# endif
+ core_caps |= VLC_CPU_SSE2;
# ifndef __SSE3__
if (!strcmp (cap, "pni"))
core_caps |= CPU_CAPABILITY_SSE3;
/* Always enable capabilities that were forced during compilation */
#if defined (__i386__) || defined (__x86_64__)
-# ifdef __SSE2__
- all_caps |= CPU_CAPABILITY_SSE2;
-# endif
# ifdef __SSE3__
all_caps |= CPU_CAPABILITY_SSE3;
# endif