]> git.sesse.net Git - ffmpeg/commitdiff
x86: check for AV_CPU_FLAG_AVXSLOW where useful
authorJames Almer <jamrial@gmail.com>
Sun, 31 May 2015 17:20:29 +0000 (14:20 -0300)
committerMichael Niedermayer <michaelni@gmx.at>
Sun, 31 May 2015 22:15:35 +0000 (00:15 +0200)
Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
libavcodec/x86/dcadsp_init.c
libavcodec/x86/dct_init.c
libavcodec/x86/fft_init.c
libavcodec/x86/vp9dsp_init.c
libavfilter/x86/af_volume_init.c
libavutil/x86/float_dsp_init.c
libavutil/x86/lls_init.c
libswresample/x86/audio_convert_init.c
libswresample/x86/rematrix_init.c
libswresample/x86/resample_init.c

index bb86c260371d595692ebccb7ade89ba641c7c531..1a19f6b8079017b2d70357b5b1099f62e599228b 100644 (file)
@@ -103,10 +103,10 @@ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
     if (EXTERNAL_SSE2(cpu_flags)) {
         s->synth_filter_float = synth_filter_sse2;
     }
-    if (EXTERNAL_AVX(cpu_flags)) {
+    if (EXTERNAL_AVX_FAST(cpu_flags)) {
         s->synth_filter_float = synth_filter_avx;
     }
-    if (EXTERNAL_FMA3(cpu_flags)) {
+    if (EXTERNAL_FMA3(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_AVXSLOW)) {
         s->synth_filter_float = synth_filter_fma3;
     }
 #endif /* HAVE_YASM */
index 30c8f12bf2080a94b541432fb6b8bf5ba9bc287b..daf2bb4e5d211af4358a027ff6d50de3384ee185 100644 (file)
@@ -34,6 +34,6 @@ av_cold void ff_dct_init_x86(DCTContext *s)
         s->dct32 = ff_dct32_float_sse;
     if (EXTERNAL_SSE2(cpu_flags))
         s->dct32 = ff_dct32_float_sse2;
-    if (EXTERNAL_AVX(cpu_flags))
+    if (EXTERNAL_AVX_FAST(cpu_flags))
         s->dct32 = ff_dct32_float_avx;
 }
index 5682230c8ef05c3d7edf209cbbbac708a19f761b..5085f113801442d289066d0a5bbfde839a9fc09c 100644 (file)
@@ -48,7 +48,7 @@ av_cold void ff_fft_init_x86(FFTContext *s)
         s->fft_calc    = ff_fft_calc_sse;
         s->fft_permutation = FF_FFT_PERM_SWAP_LSBS;
     }
-    if (EXTERNAL_AVX(cpu_flags) && s->nbits >= 5) {
+    if (EXTERNAL_AVX_FAST(cpu_flags) && s->nbits >= 5) {
         /* AVX for SB */
         s->imdct_half      = ff_imdct_half_avx;
         s->fft_calc        = ff_fft_calc_avx;
index 979bd936ace5b5f70c5f11a84281c0a87b55ac1f..00e7125a0c2b2c90fb58a1524a2d20ed8b033096 100644 (file)
@@ -483,12 +483,14 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp)
         dsp->itxfm_add[TX_32X32][ADST_DCT] =
         dsp->itxfm_add[TX_32X32][DCT_ADST] =
         dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx;
-        init_fpel(1, 0, 32, put, avx);
-        init_fpel(0, 0, 64, put, avx);
         init_lpf(avx);
         init_dir_tm_h_ipred(8, avx);
         init_dir_tm_h_ipred(16, avx);
         init_dir_tm_h_ipred(32, avx);
+    }
+    if (EXTERNAL_AVX_FAST(cpu_flags)) {
+        init_fpel(1, 0, 32, put, avx);
+        init_fpel(0, 0, 64, put, avx);
         init_ipred(32, avx, v, VERT);
     }
 
index 57c7eab65f93e437e8e8a26463ad1b8b7e57d627..88f5a9679ad51fd2b4da1dded2b6d8775516d06b 100644 (file)
@@ -52,7 +52,7 @@ av_cold void ff_volume_init_x86(VolumeContext *vol)
             vol->scale_samples = ff_scale_samples_s32_ssse3_atom;
             vol->samples_align = 4;
         }
-        if (EXTERNAL_AVX(cpu_flags)) {
+        if (EXTERNAL_AVX_FAST(cpu_flags)) {
             vol->scale_samples = ff_scale_samples_s32_avx;
             vol->samples_align = 8;
         }
index 64b3a4d6bd63644397f41b531984ede586458f49..f211f2396b298ccd0994ad399d1690fa5a22d668 100644 (file)
@@ -85,14 +85,14 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
     if (EXTERNAL_SSE2(cpu_flags)) {
         fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
     }
-    if (EXTERNAL_AVX(cpu_flags)) {
+    if (EXTERNAL_AVX_FAST(cpu_flags)) {
         fdsp->vector_fmul = ff_vector_fmul_avx;
         fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
         fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx;
         fdsp->vector_fmul_add    = ff_vector_fmul_add_avx;
         fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
     }
-    if (EXTERNAL_FMA3(cpu_flags)) {
+    if (EXTERNAL_FMA3(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_AVXSLOW)) {
         fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3;
         fdsp->vector_fmul_add    = ff_vector_fmul_add_fma3;
     }
index f53190488a5616dff58f18c88cef8ab61c6b633d..81f141cbebef70f2202c976bdb966fed71821aed 100644 (file)
@@ -35,7 +35,7 @@ av_cold void ff_init_lls_x86(LLSModel *m)
         if (m->indep_count >= 4)
             m->evaluate_lls = ff_evaluate_lls_sse2;
     }
-    if (EXTERNAL_AVX(cpu_flags)) {
+    if (EXTERNAL_AVX_FAST(cpu_flags)) {
         m->update_lls = ff_update_lls_avx;
     }
 }
index 7f25d981a9c56b8530a98f4690c24ca62179b640..5e5e91d1422a36a0aed63906592884d29f7118bd 100644 (file)
@@ -145,9 +145,11 @@ MULTI_CAPS_FUNC(SSE2, sse2)
                 ac->simd_f =  ff_unpack_2ch_int16_to_float_a_ssse3;
         }
     }
-    if(EXTERNAL_AVX(mm_flags)) {
+    if(EXTERNAL_AVX_FAST(mm_flags)) {
         if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
             ac->simd_f =  ff_int32_to_float_a_avx;
+    }
+    if(EXTERNAL_AVX(mm_flags)) {
         if(channels == 6) {
             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
                 ac->simd_f =  ff_pack_6ch_float_to_float_a_avx;
index 918479a4a808f3c9f75042f8655c1c078976f92c..5f2c5fe170a49005fffd8c1facebd8db4dfcb72f 100644 (file)
@@ -73,7 +73,7 @@ av_cold int swri_rematrix_init_x86(struct SwrContext *s){
             s->mix_1_1_simd = ff_mix_1_1_a_float_sse;
             s->mix_2_1_simd = ff_mix_2_1_a_float_sse;
         }
-        if(EXTERNAL_AVX(mm_flags)) {
+        if(EXTERNAL_AVX_FAST(mm_flags)) {
             s->mix_1_1_simd = ff_mix_1_1_a_float_avx;
             s->mix_2_1_simd = ff_mix_2_1_a_float_avx;
         }
index 93001d65cbb620bc176df5220bc24d06ecfc49a4..bc444cfb8b5efaafd208b85278f1b3928e142b55 100644 (file)
@@ -67,11 +67,11 @@ av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
             c->dsp.resample = c->linear ? ff_resample_linear_float_sse
                                         : ff_resample_common_float_sse;
         }
-        if (EXTERNAL_AVX(mm_flags)) {
+        if (EXTERNAL_AVX_FAST(mm_flags)) {
             c->dsp.resample = c->linear ? ff_resample_linear_float_avx
                                         : ff_resample_common_float_avx;
         }
-        if (EXTERNAL_FMA3(mm_flags)) {
+        if (EXTERNAL_FMA3(mm_flags) && !(mm_flags & AV_CPU_FLAG_AVXSLOW)) {
             c->dsp.resample = c->linear ? ff_resample_linear_float_fma3
                                         : ff_resample_common_float_fma3;
         }