]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/x86/dcadsp_init.c
x86: hpeldsp: Split off VP3-specific bits into a separate file
[ffmpeg] / libavcodec / x86 / dcadsp_init.c
index c234bd29f5502ecea3fb84caa003086cbeaa5bba..8632c4a98f07e37f0026db1d57dc346d9a71e622 100644 (file)
@@ -23,9 +23,6 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/dcadsp.h"
 
-void ff_int8x8_fmul_int32_sse(float *dst, const int8_t *src, int scale);
-void ff_int8x8_fmul_int32_sse2(float *dst, const int8_t *src, int scale);
-void ff_int8x8_fmul_int32_sse4(float *dst, const int8_t *src, int scale);
 void ff_dca_lfe_fir0_sse(float *out, const float *in, const float *coefs);
 void ff_dca_lfe_fir1_sse(float *out, const float *in, const float *coefs);
 
@@ -34,18 +31,58 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
     int cpu_flags = av_get_cpu_flags();
 
     if (EXTERNAL_SSE(cpu_flags)) {
-#if ARCH_X86_32
-        s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse;
-#endif
         s->lfe_fir[0]        = ff_dca_lfe_fir0_sse;
         s->lfe_fir[1]        = ff_dca_lfe_fir1_sse;
     }
+}
+
+
+#define SYNTH_FILTER_FUNC(opt)                                                 \
+void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32],   \
+                                 const float window[512],                      \
+                                 float out[32], intptr_t offset, float scale); \
+static void synth_filter_##opt(FFTContext *imdct,                              \
+                               float *synth_buf_ptr, int *synth_buf_offset,    \
+                               float synth_buf2[32], const float window[512],  \
+                               float out[32], const float in[32], float scale) \
+{                                                                              \
+    float *synth_buf= synth_buf_ptr + *synth_buf_offset;                       \
+                                                                               \
+    imdct->imdct_half(imdct, synth_buf, in);                                   \
+                                                                               \
+    ff_synth_filter_inner_##opt(synth_buf, synth_buf2, window,                 \
+                                out, *synth_buf_offset, scale);                \
+                                                                               \
+    *synth_buf_offset = (*synth_buf_offset - 32) & 511;                        \
+}                                                                              \
 
+#if HAVE_YASM
+#if ARCH_X86_32
+SYNTH_FILTER_FUNC(sse)
+#endif
+SYNTH_FILTER_FUNC(sse2)
+SYNTH_FILTER_FUNC(avx)
+SYNTH_FILTER_FUNC(fma3)
+#endif /* HAVE_YASM */
+
+av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
+{
+#if HAVE_YASM
+    int cpu_flags = av_get_cpu_flags();
+
+#if ARCH_X86_32
+    if (EXTERNAL_SSE(cpu_flags)) {
+        s->synth_filter_float = synth_filter_sse;
+    }
+#endif
     if (EXTERNAL_SSE2(cpu_flags)) {
-        s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse2;
+        s->synth_filter_float = synth_filter_sse2;
     }
-
-    if (EXTERNAL_SSE4(cpu_flags)) {
-        s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse4;
+    if (EXTERNAL_AVX_FAST(cpu_flags)) {
+        s->synth_filter_float = synth_filter_avx;
+    }
+    if (EXTERNAL_FMA3(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_AVXSLOW)) {
+        s->synth_filter_float = synth_filter_fma3;
     }
+#endif /* HAVE_YASM */
 }