uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
void x264_mbtree_propagate_cost_fma4( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
-void x264_mbtree_propagate_cost_avx2_fma3( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
- uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
+void x264_mbtree_propagate_cost_avx2( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+ uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
#define MC_CHROMA(cpu)\
void x264_mc_chroma_##cpu( pixel *dstu, pixel *dstv, intptr_t i_dst, pixel *src, intptr_t i_src,\
if( !(cpu&X264_CPU_AVX2) )
return;
pf->get_ref = get_ref_avx2;
-
- if( cpu&X264_CPU_FMA3 )
- pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_avx2_fma3;
+ pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_avx2;
}
continue;
printf( "%s_%s%s: %"PRId64"\n", benchs[i].name,
#if HAVE_MMX
- b->cpu&X264_CPU_AVX2 && b->cpu&X264_CPU_FMA3 ? "avx2_fma3" :
b->cpu&X264_CPU_AVX2 ? "avx2" :
b->cpu&X264_CPU_FMA3 ? "fma3" :
b->cpu&X264_CPU_FMA4 ? "fma4" :
b->cpu&X264_CPU_XOP ? "xop" :
b->cpu&X264_CPU_AVX ? "avx" :
+ b->cpu&X264_CPU_SSE42 ? "sse42" :
b->cpu&X264_CPU_SSE4 ? "sse4" :
b->cpu&X264_CPU_SSSE3 ? "ssse3" :
b->cpu&X264_CPU_SSE3 ? "sse3" :
#endif
if( cpu_detect & X264_CPU_LZCNT )
{
- ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "MMX_LZCNT" );
+ ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "MMX LZCNT" );
cpu1 &= ~X264_CPU_LZCNT;
}
ret |= add_flags( &cpu0, &cpu1, X264_CPU_SLOW_CTZ, "MMX SlowCTZ" );
cpu1 &= ~X264_CPU_SLOW_SHUFFLE;
ret |= add_flags( &cpu0, &cpu1, X264_CPU_SLOW_CTZ, "SSE2 SlowCTZ" );
cpu1 &= ~X264_CPU_SLOW_CTZ;
- }
- if( cpu_detect & X264_CPU_LZCNT )
- {
- ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "SSE_LZCNT" );
- cpu1 &= ~X264_CPU_LZCNT;
+ if( cpu_detect & X264_CPU_LZCNT )
+ {
+ ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "SSE2 LZCNT" );
+ cpu1 &= ~X264_CPU_LZCNT;
+ }
}
if( cpu_detect & X264_CPU_SSE3 )
{
ret |= add_flags( &cpu0, &cpu1, X264_CPU_CACHELINE_64, "SSSE3 Cache64 SlowAtom" );
cpu1 &= ~X264_CPU_CACHELINE_64;
cpu1 &= ~X264_CPU_SLOW_ATOM;
+ if( cpu_detect & X264_CPU_LZCNT )
+ {
+ ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "SSSE3 LZCNT" );
+ cpu1 &= ~X264_CPU_LZCNT;
+ }
}
if( cpu_detect & X264_CPU_SSE4 )
ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE4, "SSE4" );
+ if( cpu_detect & X264_CPU_SSE42 )
+ ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE42, "SSE4.2" );
if( cpu_detect & X264_CPU_AVX )
ret |= add_flags( &cpu0, &cpu1, X264_CPU_AVX, "AVX" );
if( cpu_detect & X264_CPU_XOP )
ret |= add_flags( &cpu0, &cpu1, X264_CPU_FMA4, "FMA4" );
cpu1 &= ~X264_CPU_FMA4;
}
- if( cpu_detect & X264_CPU_BMI1 )
+ if( cpu_detect & X264_CPU_FMA3 )
{
- ret |= add_flags( &cpu0, &cpu1, X264_CPU_BMI1, "BMI1" );
- cpu1 &= ~X264_CPU_BMI1;
+ ret |= add_flags( &cpu0, &cpu1, X264_CPU_FMA3, "FMA3" );
+ cpu1 &= ~X264_CPU_FMA3;
}
if( cpu_detect & X264_CPU_AVX2 )
{
- ret |= add_flags( &cpu0, &cpu1, X264_CPU_AVX2, "AVX2" );
+ ret |= add_flags( &cpu0, &cpu1, X264_CPU_FMA3 | X264_CPU_AVX2, "AVX2" );
if( cpu_detect & X264_CPU_LZCNT )
{
- ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "AVX2_LZCNT" );
+ ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "AVX2 LZCNT" );
cpu1 &= ~X264_CPU_LZCNT;
}
}
+ if( cpu_detect & X264_CPU_BMI1 )
+ {
+ ret |= add_flags( &cpu0, &cpu1, X264_CPU_BMI1, "BMI1" );
+ cpu1 &= ~X264_CPU_BMI1;
+ }
if( cpu_detect & X264_CPU_BMI2 )
{
ret |= add_flags( &cpu0, &cpu1, X264_CPU_BMI1|X264_CPU_BMI2, "BMI2" );
cpu1 &= ~(X264_CPU_BMI1|X264_CPU_BMI2);
}
- if( cpu_detect & X264_CPU_FMA3 )
- {
- ret |= add_flags( &cpu0, &cpu1, X264_CPU_FMA3, "FMA3" );
- cpu1 &= ~X264_CPU_FMA3;
- }
#elif ARCH_PPC
if( cpu_detect & X264_CPU_ALTIVEC )
{