]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/x86/vc1dsp_init.c
x86: synth filter float: implement SSE2 version
[ffmpeg] / libavcodec / x86 / vc1dsp_init.c
index c359c4acdb38398b59577a69d92e69b1b887e5bd..9bd5ae3666eaa78dc697e1cab4b37f5357ba9be9 100644 (file)
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/vc1dsp.h"
+#include "dsputil_x86.h"
 #include "vc1dsp.h"
 #include "config.h"
 
@@ -60,12 +62,18 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq)
     ff_vc1_h_loop_filter8_sse4(src,          stride, pq);
     ff_vc1_h_loop_filter8_sse4(src+8*stride, stride, pq);
 }
+
+static void avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src,
+                                      ptrdiff_t stride, int rnd)
+{
+    ff_avg_pixels8_mmxext(dst, src, stride, 8);
+}
 #endif /* HAVE_YASM */
 
 void ff_put_vc1_chroma_mc8_nornd_mmx  (uint8_t *dst, uint8_t *src,
                                        int stride, int h, int x, int y);
-void ff_avg_vc1_chroma_mc8_nornd_mmx(uint8_t *dst, uint8_t *src,
-                                       int stride, int h, int x, int y);
+void ff_avg_vc1_chroma_mc8_nornd_mmxext(uint8_t *dst, uint8_t *src,
+                                        int stride, int h, int x, int y);
 void ff_avg_vc1_chroma_mc8_nornd_3dnow(uint8_t *dst, uint8_t *src,
                                        int stride, int h, int x, int y);
 void ff_put_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src,
@@ -76,12 +84,12 @@ void ff_avg_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src,
 
 av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
 {
-    int mm_flags = av_get_cpu_flags();
+    int cpu_flags = av_get_cpu_flags();
 
-    if (INLINE_MMX(mm_flags))
+    if (INLINE_MMX(cpu_flags))
         ff_vc1dsp_init_mmx(dsp);
 
-    if (INLINE_MMXEXT(mm_flags))
+    if (INLINE_MMXEXT(cpu_flags))
         ff_vc1dsp_init_mmxext(dsp);
 
 #define ASSIGN_LF(EXT) \
@@ -93,29 +101,30 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
         dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT
 
 #if HAVE_YASM
-    if (mm_flags & AV_CPU_FLAG_MMX) {
+    if (EXTERNAL_MMX(cpu_flags)) {
         dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx;
     }
-
-    if (mm_flags & AV_CPU_FLAG_MMXEXT) {
-        ASSIGN_LF(mmxext);
-        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmx2;
-    } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
+    if (EXTERNAL_AMD3DNOW(cpu_flags)) {
         dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow;
     }
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        ASSIGN_LF(mmxext);
+        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext;
 
-    if (mm_flags & AV_CPU_FLAG_SSE2) {
+        dsp->avg_vc1_mspel_pixels_tab[0]         = avg_vc1_mspel_mc00_mmxext;
+    }
+    if (EXTERNAL_SSE2(cpu_flags)) {
         dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_sse2;
         dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse2;
         dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2;
         dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2;
     }
-    if (mm_flags & AV_CPU_FLAG_SSSE3) {
+    if (EXTERNAL_SSSE3(cpu_flags)) {
         ASSIGN_LF(ssse3);
         dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_ssse3;
         dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_ssse3;
     }
-    if (mm_flags & AV_CPU_FLAG_SSE4) {
+    if (EXTERNAL_SSE4(cpu_flags)) {
         dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse4;
         dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4;
     }