]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/x86/vc1dsp_mmx.c
x86: h264_intrapred: port to cpuflags
[ffmpeg] / libavcodec / x86 / vc1dsp_mmx.c
index f7ca714481491c1e4ce42ad7f1bd2ff79e119228..b02582f6153797a364a1a36aa4eccd785f1a81e3 100644 (file)
 #include "libavutil/internal.h"
 #include "libavutil/mem.h"
 #include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
 #include "libavcodec/dsputil.h"
 #include "dsputil_mmx.h"
 #include "libavcodec/vc1dsp.h"
+#include "vc1dsp.h"
 
 #if HAVE_INLINE_ASM
 
@@ -465,7 +467,10 @@ VC1_MSPEL_MC(avg_)
 static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
      put_vc1_mspel_mc(dst, src, stride, a, b, rnd);                     \
 }\
-static void avg_vc1_mspel_mc ## a ## b ## _mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
+static void avg_vc1_mspel_mc ## a ## b ## _mmxext(uint8_t *dst,         \
+                                                  const uint8_t *src,   \
+                                                  int stride, int rnd)  \
+{                                                                       \
      avg_vc1_mspel_mc(dst, src, stride, a, b, rnd);                     \
 }
 
@@ -488,7 +493,8 @@ DECLARE_FUNCTION(3, 1)
 DECLARE_FUNCTION(3, 2)
 DECLARE_FUNCTION(3, 3)
 
-static void vc1_inv_trans_4x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block)
+static void vc1_inv_trans_4x4_dc_mmxext(uint8_t *dest, int linesize,
+                                        DCTELEM *block)
 {
     int dc = block[0];
     dc = (17 * dc +  4) >> 3;
@@ -526,7 +532,8 @@ static void vc1_inv_trans_4x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc
     );
 }
 
-static void vc1_inv_trans_4x8_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block)
+static void vc1_inv_trans_4x8_dc_mmxext(uint8_t *dest, int linesize,
+                                        DCTELEM *block)
 {
     int dc = block[0];
     dc = (17 * dc +  4) >> 3;
@@ -587,7 +594,8 @@ static void vc1_inv_trans_4x8_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc
     );
 }
 
-static void vc1_inv_trans_8x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block)
+static void vc1_inv_trans_8x4_dc_mmxext(uint8_t *dest, int linesize,
+                                        DCTELEM *block)
 {
     int dc = block[0];
     dc = ( 3 * dc +  1) >> 1;
@@ -625,7 +633,8 @@ static void vc1_inv_trans_8x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc
     );
 }
 
-static void vc1_inv_trans_8x8_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block)
+static void vc1_inv_trans_8x8_dc_mmxext(uint8_t *dest, int linesize,
+                                        DCTELEM *block)
 {
     int dc = block[0];
     dc = (3 * dc +  1) >> 1;
@@ -686,58 +695,8 @@ static void vc1_inv_trans_8x8_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc
     );
 }
 
-#endif /* HAVE_INLINE_ASM */
-
-#define LOOP_FILTER(EXT) \
-void ff_vc1_v_loop_filter4_ ## EXT(uint8_t *src, int stride, int pq); \
-void ff_vc1_h_loop_filter4_ ## EXT(uint8_t *src, int stride, int pq); \
-void ff_vc1_v_loop_filter8_ ## EXT(uint8_t *src, int stride, int pq); \
-void ff_vc1_h_loop_filter8_ ## EXT(uint8_t *src, int stride, int pq); \
-\
-static void vc1_v_loop_filter16_ ## EXT(uint8_t *src, int stride, int pq) \
-{ \
-    ff_vc1_v_loop_filter8_ ## EXT(src,   stride, pq); \
-    ff_vc1_v_loop_filter8_ ## EXT(src+8, stride, pq); \
-} \
-\
-static void vc1_h_loop_filter16_ ## EXT(uint8_t *src, int stride, int pq) \
-{ \
-    ff_vc1_h_loop_filter8_ ## EXT(src,          stride, pq); \
-    ff_vc1_h_loop_filter8_ ## EXT(src+8*stride, stride, pq); \
-}
-
-#if HAVE_YASM
-LOOP_FILTER(mmx2)
-LOOP_FILTER(sse2)
-LOOP_FILTER(ssse3)
-
-void ff_vc1_h_loop_filter8_sse4(uint8_t *src, int stride, int pq);
-
-static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq)
-{
-    ff_vc1_h_loop_filter8_sse4(src,          stride, pq);
-    ff_vc1_h_loop_filter8_sse4(src+8*stride, stride, pq);
-}
-
-#endif
-
-void ff_put_vc1_chroma_mc8_mmx_nornd  (uint8_t *dst, uint8_t *src,
-                                       int stride, int h, int x, int y);
-void ff_avg_vc1_chroma_mc8_mmx2_nornd (uint8_t *dst, uint8_t *src,
-                                       int stride, int h, int x, int y);
-void ff_avg_vc1_chroma_mc8_3dnow_nornd(uint8_t *dst, uint8_t *src,
-                                       int stride, int h, int x, int y);
-void ff_put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
-                                       int stride, int h, int x, int y);
-void ff_avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
-                                       int stride, int h, int x, int y);
-
-void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
+av_cold void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
 {
-    int mm_flags = av_get_cpu_flags();
-
-#if HAVE_INLINE_ASM
-    if (mm_flags & AV_CPU_FLAG_MMX) {
         dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx;
         dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx;
         dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_mmx;
@@ -757,70 +716,33 @@ void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
         dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_mmx;
         dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_mmx;
         dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_mmx;
-    }
-
-    if (mm_flags & AV_CPU_FLAG_MMXEXT) {
-        dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2;
-        dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmx2;
-        dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmx2;
-        dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_mmx2;
-
-        dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_mmx2;
-        dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_mmx2;
-        dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_mmx2;
-        dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_mmx2;
-
-        dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_mmx2;
-        dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_mmx2;
-        dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_mmx2;
-        dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_mmx2;
-
-        dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_mmx2;
-        dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_mmx2;
-        dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_mmx2;
-        dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_mmx2;
-
-        dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_mmx2;
-        dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_mmx2;
-        dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmx2;
-        dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmx2;
-    }
-#endif /* HAVE_INLINE_ASM */
+}
 
-#define ASSIGN_LF(EXT) \
-        dsp->vc1_v_loop_filter4  = ff_vc1_v_loop_filter4_ ## EXT; \
-        dsp->vc1_h_loop_filter4  = ff_vc1_h_loop_filter4_ ## EXT; \
-        dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_ ## EXT; \
-        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_ ## EXT; \
-        dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \
-        dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT
-
-#if HAVE_YASM
-    if (mm_flags & AV_CPU_FLAG_MMX) {
-        dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd;
-    }
-
-    if (mm_flags & AV_CPU_FLAG_MMXEXT) {
-        ASSIGN_LF(mmx2);
-        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_nornd;
-    } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
-        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_3dnow_nornd;
-    }
-
-    if (mm_flags & AV_CPU_FLAG_SSE2) {
-        dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_sse2;
-        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse2;
-        dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2;
-        dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2;
-    }
-    if (mm_flags & AV_CPU_FLAG_SSSE3) {
-        ASSIGN_LF(ssse3);
-        dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_ssse3_nornd;
-        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_ssse3_nornd;
-    }
-    if (mm_flags & AV_CPU_FLAG_SSE4) {
-        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse4;
-        dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4;
-    }
-#endif
+av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp)
+{
+        dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmxext;
+        dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmxext;
+        dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmxext;
+        dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_mmxext;
+
+        dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_mmxext;
+        dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_mmxext;
+        dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_mmxext;
+        dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_mmxext;
+
+        dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_mmxext;
+        dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_mmxext;
+        dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_mmxext;
+        dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_mmxext;
+
+        dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_mmxext;
+        dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_mmxext;
+        dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_mmxext;
+        dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_mmxext;
+
+        dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_mmxext;
+        dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_mmxext;
+        dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmxext;
+        dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmxext;
 }
+#endif /* HAVE_INLINE_ASM */