*/
#include "libavutil/cpu.h"
-#include "libavutil/x86_cpu.h"
+#include "libavutil/x86/asm.h"
#include "libavcodec/dsputil.h"
#include "libavcodec/h264dsp.h"
#include "libavcodec/mpegvideo.h"
#include "libavcodec/simple_idct.h"
-#include "libavcodec/ac3dec.h"
#include "dsputil_mmx.h"
#include "idct_xvid.h"
DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 };
DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
+#if HAVE_INLINE_ASM
+
#define JUMPALIGN() __asm__ volatile (".p2align 3"::)
#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)
#define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e)
#define OP_AVG(a, b, c, e) PAVGB_MMX(a, b, c, e)
-#include "dsputil_mmx_rnd_template.c"
+#include "dsputil_rnd_template.c"
#undef DEF
#undef SET_RND
#define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f)
#define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e)
-#include "dsputil_mmx_rnd_template.c"
+#include "dsputil_rnd_template.c"
#undef DEF
#undef SET_RND
#define PAVGB "pavgusb"
#define OP_AVG PAVGB
-#include "dsputil_mmx_avg_template.c"
+#include "dsputil_avg_template.c"
#undef DEF
#undef PAVGB
#undef OP_AVG
/***********************************/
-/* MMX2 specific */
+/* MMXEXT specific */
-#define DEF(x) x ## _mmx2
+#define DEF(x) x ## _mmxext
-/* Introduced only in MMX2 set */
+/* Introduced only in MMXEXT set */
#define PAVGB "pavgb"
#define OP_AVG PAVGB
-#include "dsputil_mmx_avg_template.c"
+#include "dsputil_avg_template.c"
#undef DEF
#undef PAVGB
#define put_no_rnd_pixels16_mmx put_pixels16_mmx
#define put_no_rnd_pixels8_mmx put_pixels8_mmx
-#define put_pixels16_mmx2 put_pixels16_mmx
-#define put_pixels8_mmx2 put_pixels8_mmx
-#define put_pixels4_mmx2 put_pixels4_mmx
-#define put_no_rnd_pixels16_mmx2 put_no_rnd_pixels16_mmx
-#define put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx
+#define put_pixels16_mmxext put_pixels16_mmx
+#define put_pixels8_mmxext put_pixels8_mmx
+#define put_pixels4_mmxext put_pixels4_mmx
+#define put_no_rnd_pixels16_mmxext put_no_rnd_pixels16_mmx
+#define put_no_rnd_pixels8_mmxext put_no_rnd_pixels8_mmx
#define put_pixels16_3dnow put_pixels16_mmx
#define put_pixels8_3dnow put_pixels8_mmx
#define put_pixels4_3dnow put_pixels4_mmx
pix = pixels;
/* unrolled loop */
__asm__ volatile (
- "movq %3, %%mm0 \n\t"
- "movq 8%3, %%mm1 \n\t"
- "movq 16%3, %%mm2 \n\t"
- "movq 24%3, %%mm3 \n\t"
- "movq 32%3, %%mm4 \n\t"
- "movq 40%3, %%mm5 \n\t"
- "movq 48%3, %%mm6 \n\t"
- "movq 56%3, %%mm7 \n\t"
+ "movq (%3), %%mm0 \n\t"
+ "movq 8(%3), %%mm1 \n\t"
+ "movq 16(%3), %%mm2 \n\t"
+ "movq 24(%3), %%mm3 \n\t"
+ "movq 32(%3), %%mm4 \n\t"
+ "movq 40(%3), %%mm5 \n\t"
+ "movq 48(%3), %%mm6 \n\t"
+ "movq 56(%3), %%mm7 \n\t"
"packuswb %%mm1, %%mm0 \n\t"
"packuswb %%mm3, %%mm2 \n\t"
"packuswb %%mm5, %%mm4 \n\t"
"movq %%mm4, (%0, %1, 2) \n\t"
"movq %%mm6, (%0, %2) \n\t"
:: "r"(pix), "r"((x86_reg)line_size), "r"((x86_reg)line_size * 3),
- "m"(*p)
+ "r"(p)
: "memory");
pix += line_size * 4;
p += 32;
"packuswb %%mm5, %%mm5 \n\t" \
OP(%%mm5, out, %%mm7, d)
-#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW) \
-static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \
- uint8_t *src, \
- int dstStride, \
- int srcStride, \
- int h) \
+#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMXEXT, OP_3DNOW) \
+static void OPNAME ## mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, \
+ uint8_t *src, \
+ int dstStride, \
+ int srcStride, \
+ int h) \
{ \
uint64_t temp; \
\
"psraw $5, %%mm3 \n\t" \
"movq %5, %%mm1 \n\t" \
"packuswb %%mm3, %%mm1 \n\t" \
- OP_MMX2(%%mm1, (%1), %%mm4, q) \
+ OP_MMXEXT(%%mm1, (%1), %%mm4, q) \
/* mm0 = GHIJ, mm2 = FGHI, mm5 = HIJK, mm6 = IJKL, mm7 = 0 */ \
\
"movq 9(%0), %%mm1 \n\t" /* JKLMNOPQ */ \
"paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */ \
"psraw $5, %%mm4 \n\t" \
"packuswb %%mm4, %%mm0 \n\t" \
- OP_MMX2(%%mm0, 8(%1), %%mm4, q) \
+ OP_MMXEXT(%%mm0, 8(%1), %%mm4, q) \
\
"add %3, %0 \n\t" \
"add %4, %1 \n\t" \
} \
} \
\
-static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, \
- uint8_t *src, \
- int dstStride, \
- int srcStride, \
- int h) \
+static void OPNAME ## mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, \
+ uint8_t *src, \
+ int dstStride, \
+ int srcStride, \
+ int h) \
{ \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */ \
"psraw $5, %%mm3 \n\t" \
"packuswb %%mm3, %%mm0 \n\t" \
- OP_MMX2(%%mm0, (%1), %%mm4, q) \
+ OP_MMXEXT(%%mm0, (%1), %%mm4, q) \
\
"add %3, %0 \n\t" \
"add %4, %1 \n\t" \
"pavgusb "#temp", "#a" \n\t" \
"mov"#size" "#a", "#b" \n\t"
-#define AVG_MMX2_OP(a, b, temp, size) \
+#define AVG_MMXEXT_OP(a, b, temp, size) \
"mov"#size" "#b", "#temp" \n\t" \
"pavgb "#temp", "#a" \n\t" \
"mov"#size" "#a", "#b" \n\t"
QPEL_BASE(put_, ff_pw_16, _, PUT_OP, PUT_OP)
-QPEL_BASE(avg_, ff_pw_16, _, AVG_MMX2_OP, AVG_3DNOW_OP)
+QPEL_BASE(avg_, ff_pw_16, _, AVG_MMXEXT_OP, AVG_3DNOW_OP)
QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP)
QPEL_OP(put_, ff_pw_16, _, PUT_OP, 3dnow)
QPEL_OP(avg_, ff_pw_16, _, AVG_3DNOW_OP, 3dnow)
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow)
-QPEL_OP(put_, ff_pw_16, _, PUT_OP, mmx2)
-QPEL_OP(avg_, ff_pw_16, _, AVG_MMX2_OP, mmx2)
-QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2)
+QPEL_OP(put_, ff_pw_16, _, PUT_OP, mmxext)
+QPEL_OP(avg_, ff_pw_16, _, AVG_MMXEXT_OP, mmxext)
+QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmxext)
/***********************************/
/* bilinear qpel: not compliant to any spec, only for -lavdopts fast */
QPEL_2TAP_L3(OPNAME, SIZE, MMX, 13, stride, -stride, 1) \
QPEL_2TAP_L3(OPNAME, SIZE, MMX, 33, stride + 1, -stride, -1) \
-QPEL_2TAP(put_, 16, mmx2)
-QPEL_2TAP(avg_, 16, mmx2)
-QPEL_2TAP(put_, 8, mmx2)
-QPEL_2TAP(avg_, 8, mmx2)
+QPEL_2TAP(put_, 16, mmxext)
+QPEL_2TAP(avg_, 16, mmxext)
+QPEL_2TAP(put_, 8, mmxext)
+QPEL_2TAP(avg_, 8, mmxext)
QPEL_2TAP(put_, 16, 3dnow)
QPEL_2TAP(avg_, 16, 3dnow)
QPEL_2TAP(put_, 8, 3dnow)
avg_pixels16_xy2_mmx(dst, src, stride, 16);
}
+#endif /* HAVE_INLINE_ASM */
+
#if HAVE_YASM
typedef void emu_edge_core_func(uint8_t *buf, const uint8_t *src,
x86_reg linesize, x86_reg start_y,
}
#endif /* HAVE_YASM */
-typedef void emulated_edge_mc_func(uint8_t *dst, const uint8_t *src,
- int linesize, int block_w, int block_h,
- int src_x, int src_y, int w, int h);
+#if HAVE_INLINE_ASM
-static av_always_inline void gmc(uint8_t *dst, uint8_t *src,
- int stride, int h, int ox, int oy,
- int dxx, int dxy, int dyx, int dyy,
- int shift, int r, int width, int height,
- emulated_edge_mc_func *emu_edge_fn)
+static void gmc_mmx(uint8_t *dst, uint8_t *src,
+ int stride, int h, int ox, int oy,
+ int dxx, int dxy, int dyx, int dyy,
+ int shift, int r, int width, int height)
{
const int w = 8;
const int ix = ox >> (16 + shift);
const uint16_t dxy4[4] = { dxys, dxys, dxys, dxys };
const uint16_t dyy4[4] = { dyys, dyys, dyys, dyys };
const uint64_t shift2 = 2 * shift;
- uint8_t edge_buf[(h + 1) * stride];
int x, y;
const int dxw = (dxx - (1 << (16 + shift))) * (w - 1);
((ox ^ (ox + dxw)) | (ox ^ (ox + dxh)) | (ox ^ (ox + dxw + dxh)) |
(oy ^ (oy + dyw)) | (oy ^ (oy + dyh)) | (oy ^ (oy + dyw + dyh))) >> (16 + shift)
// uses more than 16 bits of subpel mv (only at huge resolution)
- || (dxx | dxy | dyx | dyy) & 15) {
+ || (dxx | dxy | dyx | dyy) & 15 ||
+ (unsigned)ix >= width - w ||
+ (unsigned)iy >= height - h) {
// FIXME could still use mmx for some of the rows
ff_gmc_c(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy,
shift, r, width, height);
}
src += ix + iy * stride;
- if ((unsigned)ix >= width - w ||
- (unsigned)iy >= height - h) {
- emu_edge_fn(edge_buf, src, stride, w + 1, h + 1, ix, iy, width, height);
- src = edge_buf;
- }
__asm__ volatile (
"movd %0, %%mm6 \n\t"
}
}
-#if HAVE_YASM
-#if ARCH_X86_32
-static void gmc_mmx(uint8_t *dst, uint8_t *src,
- int stride, int h, int ox, int oy,
- int dxx, int dxy, int dyx, int dyy,
- int shift, int r, int width, int height)
-{
- gmc(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r,
- width, height, &emulated_edge_mc_mmx);
-}
-#endif
-static void gmc_sse(uint8_t *dst, uint8_t *src,
- int stride, int h, int ox, int oy,
- int dxx, int dxy, int dyx, int dyy,
- int shift, int r, int width, int height)
-{
- gmc(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r,
- width, height, &emulated_edge_mc_sse);
-}
-#else
-static void gmc_mmx(uint8_t *dst, uint8_t *src,
- int stride, int h, int ox, int oy,
- int dxx, int dxy, int dyx, int dyy,
- int shift, int r, int width, int height)
-{
- gmc(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r,
- width, height, &ff_emulated_edge_mc_8);
-}
-#endif
-
#define PREFETCH(name, op) \
static void name(void *mem, int stride, int h) \
{ \
} while (--h); \
}
-PREFETCH(prefetch_mmx2, prefetcht0)
+PREFETCH(prefetch_mmxext, prefetcht0)
PREFETCH(prefetch_3dnow, prefetch)
#undef PREFETCH
-#include "h264_qpel_mmx.c"
+#endif /* HAVE_INLINE_ASM */
+
+#include "h264_qpel.c"
void ff_put_h264_chroma_mc8_mmx_rnd (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
(uint8_t *dst, uint8_t *src, \
int stride, int h, int x, int y);
-CHROMA_MC(put, 2, 10, mmxext)
-CHROMA_MC(avg, 2, 10, mmxext)
-CHROMA_MC(put, 4, 10, mmxext)
-CHROMA_MC(avg, 4, 10, mmxext)
+CHROMA_MC(put, 2, 10, mmx2)
+CHROMA_MC(avg, 2, 10, mmx2)
+CHROMA_MC(put, 4, 10, mmx2)
+CHROMA_MC(avg, 4, 10, mmx2)
CHROMA_MC(put, 8, 10, sse2)
CHROMA_MC(avg, 8, 10, sse2)
CHROMA_MC(put, 8, 10, avx)
CHROMA_MC(avg, 8, 10, avx)
+#if HAVE_INLINE_ASM
+
/* CAVS-specific */
-void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride)
+void ff_put_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride)
{
put_pixels8_mmx(dst, src, stride, 8);
}
-void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride)
+void ff_avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride)
{
avg_pixels8_mmx(dst, src, stride, 8);
}
-void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride)
+void ff_put_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride)
{
put_pixels16_mmx(dst, src, stride, 16);
}
-void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride)
+void ff_avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride)
{
avg_pixels16_mmx(dst, src, stride, 16);
}
put_pixels8_mmx(dst, src, stride, 8);
}
-void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src,
- int stride, int rnd)
+void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src,
+ int stride, int rnd)
{
- avg_pixels8_mmx2(dst, src, stride, 8);
-}
-
-/* XXX: Those functions should be suppressed ASAP when all IDCTs are
- * converted. */
-#if CONFIG_GPL
-static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size,
- DCTELEM *block)
-{
- ff_mmx_idct(block);
- ff_put_pixels_clamped_mmx(block, dest, line_size);
-}
-
-static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size,
- DCTELEM *block)
-{
- ff_mmx_idct(block);
- ff_add_pixels_clamped_mmx(block, dest, line_size);
-}
-
-static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size,
- DCTELEM *block)
-{
- ff_mmxext_idct(block);
- ff_put_pixels_clamped_mmx(block, dest, line_size);
-}
-
-static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size,
- DCTELEM *block)
-{
- ff_mmxext_idct(block);
- ff_add_pixels_clamped_mmx(block, dest, line_size);
-}
-#endif
-
-static void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_idct_xvid_mmx(block);
- ff_put_pixels_clamped_mmx(block, dest, line_size);
-}
-
-static void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_idct_xvid_mmx(block);
- ff_add_pixels_clamped_mmx(block, dest, line_size);
-}
-
-static void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_idct_xvid_mmx2(block);
- ff_put_pixels_clamped_mmx(block, dest, line_size);
-}
-
-static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_idct_xvid_mmx2(block);
- ff_add_pixels_clamped_mmx(block, dest, line_size);
+ avg_pixels8_mmxext(dst, src, stride, 8);
}
static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
}
}
-#define IF1(x) x
-#define IF0(x)
-
-#define MIX5(mono, stereo) \
- __asm__ volatile ( \
- "movss 0(%2), %%xmm5 \n" \
- "movss 8(%2), %%xmm6 \n" \
- "movss 24(%2), %%xmm7 \n" \
- "shufps $0, %%xmm5, %%xmm5 \n" \
- "shufps $0, %%xmm6, %%xmm6 \n" \
- "shufps $0, %%xmm7, %%xmm7 \n" \
- "1: \n" \
- "movaps (%0, %1), %%xmm0 \n" \
- "movaps 0x400(%0, %1), %%xmm1 \n" \
- "movaps 0x800(%0, %1), %%xmm2 \n" \
- "movaps 0xc00(%0, %1), %%xmm3 \n" \
- "movaps 0x1000(%0, %1), %%xmm4 \n" \
- "mulps %%xmm5, %%xmm0 \n" \
- "mulps %%xmm6, %%xmm1 \n" \
- "mulps %%xmm5, %%xmm2 \n" \
- "mulps %%xmm7, %%xmm3 \n" \
- "mulps %%xmm7, %%xmm4 \n" \
- stereo("addps %%xmm1, %%xmm0 \n") \
- "addps %%xmm1, %%xmm2 \n" \
- "addps %%xmm3, %%xmm0 \n" \
- "addps %%xmm4, %%xmm2 \n" \
- mono("addps %%xmm2, %%xmm0 \n") \
- "movaps %%xmm0, (%0, %1) \n" \
- stereo("movaps %%xmm2, 0x400(%0, %1) \n") \
- "add $16, %0 \n" \
- "jl 1b \n" \
- : "+&r"(i) \
- : "r"(samples[0] + len), "r"(matrix) \
- : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", \
- "%xmm4", "%xmm5", "%xmm6", "%xmm7",) \
- "memory" \
- );
-
-#define MIX_MISC(stereo) \
- __asm__ volatile ( \
- "1: \n" \
- "movaps (%3, %0), %%xmm0 \n" \
- stereo("movaps %%xmm0, %%xmm1 \n") \
- "mulps %%xmm4, %%xmm0 \n" \
- stereo("mulps %%xmm5, %%xmm1 \n") \
- "lea 1024(%3, %0), %1 \n" \
- "mov %5, %2 \n" \
- "2: \n" \
- "movaps (%1), %%xmm2 \n" \
- stereo("movaps %%xmm2, %%xmm3 \n") \
- "mulps (%4, %2), %%xmm2 \n" \
- stereo("mulps 16(%4, %2), %%xmm3 \n") \
- "addps %%xmm2, %%xmm0 \n" \
- stereo("addps %%xmm3, %%xmm1 \n") \
- "add $1024, %1 \n" \
- "add $32, %2 \n" \
- "jl 2b \n" \
- "movaps %%xmm0, (%3, %0) \n" \
- stereo("movaps %%xmm1, 1024(%3, %0) \n") \
- "add $16, %0 \n" \
- "jl 1b \n" \
- : "+&r"(i), "=&r"(j), "=&r"(k) \
- : "r"(samples[0] + len), "r"(matrix_simd + in_ch), \
- "g"((intptr_t) - 32 * (in_ch - 1)) \
- : "memory" \
- );
-
-static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2],
- int out_ch, int in_ch, int len)
-{
- int (*matrix_cmp)[2] = (int(*)[2])matrix;
- intptr_t i, j, k;
-
- i = -len * sizeof(float);
- if (in_ch == 5 && out_ch == 2 &&
- !(matrix_cmp[0][1] | matrix_cmp[2][0] |
- matrix_cmp[3][1] | matrix_cmp[4][0] |
- (matrix_cmp[1][0] ^ matrix_cmp[1][1]) |
- (matrix_cmp[0][0] ^ matrix_cmp[2][1]))) {
- MIX5(IF0, IF1);
- } else if (in_ch == 5 && out_ch == 1 &&
- matrix_cmp[0][0] == matrix_cmp[2][0] &&
- matrix_cmp[3][0] == matrix_cmp[4][0]) {
- MIX5(IF1, IF0);
- } else {
- DECLARE_ALIGNED(16, float, matrix_simd)[AC3_MAX_CHANNELS][2][4];
- j = 2 * in_ch * sizeof(float);
- __asm__ volatile (
- "1: \n"
- "sub $8, %0 \n"
- "movss (%2, %0), %%xmm4 \n"
- "movss 4(%2, %0), %%xmm5 \n"
- "shufps $0, %%xmm4, %%xmm4 \n"
- "shufps $0, %%xmm5, %%xmm5 \n"
- "movaps %%xmm4, (%1, %0, 4) \n"
- "movaps %%xmm5, 16(%1, %0, 4) \n"
- "jg 1b \n"
- : "+&r"(j)
- : "r"(matrix_simd), "r"(matrix)
- : "memory"
- );
- if (out_ch == 2) {
- MIX_MISC(IF1);
- } else {
- MIX_MISC(IF0);
- }
- }
-}
-
#if HAVE_6REGS
-static void vector_fmul_window_3dnow2(float *dst, const float *src0,
- const float *src1, const float *win,
- int len)
+static void vector_fmul_window_3dnowext(float *dst, const float *src0,
+ const float *src1, const float *win,
+ int len)
{
x86_reg i = -len * 4;
x86_reg j = len * 4 - 8;
);
}
-void ff_vp3_idct_mmx(int16_t *input_data);
-void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block);
-void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block);
-
-void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size,
- const DCTELEM *block);
-
-void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
-void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
-
-void ff_vp3_idct_sse2(int16_t *input_data);
-void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block);
-void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block);
+#endif /* HAVE_INLINE_ASM */
int32_t ff_scalarproduct_int16_mmx2(const int16_t *v1, const int16_t *v2,
int order);
{
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
+#if HAVE_INLINE_ASM
c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
SET_HPEL_FUNCS(put_no_rnd, 1, 8, mmx);
SET_HPEL_FUNCS(avg, 1, 8, mmx);
SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx);
+
+ switch (avctx->idct_algo) {
+ case FF_IDCT_AUTO:
+ case FF_IDCT_SIMPLEMMX:
+ c->idct_put = ff_simple_idct_put_mmx;
+ c->idct_add = ff_simple_idct_add_mmx;
+ c->idct = ff_simple_idct_mmx;
+ c->idct_permutation_type = FF_SIMPLE_IDCT_PERM;
+ break;
+ case FF_IDCT_XVIDMMX:
+ c->idct_put = ff_idct_xvid_mmx_put;
+ c->idct_add = ff_idct_xvid_mmx_add;
+ c->idct = ff_idct_xvid_mmx;
+ break;
+ }
}
-#if ARCH_X86_32 || !HAVE_YASM
c->gmc = gmc_mmx;
-#endif
-#if ARCH_X86_32 && HAVE_YASM
- if (!high_bit_depth)
- c->emulated_edge_mc = emulated_edge_mc_mmx;
-#endif
c->add_bytes = add_bytes_mmx;
c->h263_v_loop_filter = h263_v_loop_filter_mmx;
c->h263_h_loop_filter = h263_h_loop_filter_mmx;
}
+#endif /* HAVE_INLINE_ASM */
#if HAVE_YASM
+#if ARCH_X86_32
+ if (!high_bit_depth)
+ c->emulated_edge_mc = emulated_edge_mc_mmx;
+#endif
+
if (!high_bit_depth && CONFIG_H264CHROMA) {
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_mmx_rnd;
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx;
}
-static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx,
- int mm_flags)
+static void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
+ int mm_flags)
{
const int bit_depth = avctx->bits_per_raw_sample;
const int high_bit_depth = bit_depth > 8;
- c->prefetch = prefetch_mmx2;
+#if HAVE_INLINE_ASM
+ c->prefetch = prefetch_mmxext;
if (!high_bit_depth) {
- c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
- c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
+ c->put_pixels_tab[0][1] = put_pixels16_x2_mmxext;
+ c->put_pixels_tab[0][2] = put_pixels16_y2_mmxext;
- c->avg_pixels_tab[0][0] = avg_pixels16_mmx2;
- c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
- c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
+ c->avg_pixels_tab[0][0] = avg_pixels16_mmxext;
+ c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmxext;
+ c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmxext;
- c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
- c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
+ c->put_pixels_tab[1][1] = put_pixels8_x2_mmxext;
+ c->put_pixels_tab[1][2] = put_pixels8_y2_mmxext;
- c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
- c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
- c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
+ c->avg_pixels_tab[1][0] = avg_pixels8_mmxext;
+ c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmxext;
+ c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmxext;
}
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
if (!high_bit_depth) {
- c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
- c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
- c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
- c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
+ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext;
+ c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmxext;
+ c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmxext;
+ c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmxext;
- c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
- c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
+ c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext;
+ c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmxext;
}
+ }
- if (CONFIG_VP3_DECODER && HAVE_YASM) {
- c->vp3_v_loop_filter = ff_vp3_v_loop_filter_mmx2;
- c->vp3_h_loop_filter = ff_vp3_h_loop_filter_mmx2;
- }
+ if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) {
+ c->idct_put = ff_idct_xvid_mmxext_put;
+ c->idct_add = ff_idct_xvid_mmxext_add;
+ c->idct = ff_idct_xvid_mmxext;
}
- if (CONFIG_VP3_DECODER && HAVE_YASM)
- c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2;
- if (CONFIG_VP3_DECODER && (avctx->codec_id == CODEC_ID_VP3 ||
- avctx->codec_id == CODEC_ID_THEORA)) {
- c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx2;
- c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2;
+ if (CONFIG_VP3_DECODER && (avctx->codec_id == AV_CODEC_ID_VP3 ||
+ avctx->codec_id == AV_CODEC_ID_THEORA)) {
+ c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmxext;
+ c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmxext;
}
+#endif /* HAVE_INLINE_ASM */
if (CONFIG_H264QPEL) {
- SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, );
- SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, );
- SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, );
- SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2, );
- SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2, );
- SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2, );
+#if HAVE_INLINE_ASM
+ SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
+ SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
+ SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
+ SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
+#endif /* HAVE_INLINE_ASM */
if (!high_bit_depth) {
- SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2, );
- SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2, );
- SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2, );
- SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2, );
- SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2, );
- SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2, );
+#if HAVE_INLINE_ASM
+ SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, );
+ SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, );
+ SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, );
+#endif /* HAVE_INLINE_ASM */
} else if (bit_depth == 10) {
#if HAVE_YASM
#if !ARCH_X86_64
#endif
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_);
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_);
-#endif
+#endif /* HAVE_YASM */
}
- SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2, );
- SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, );
- SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, );
- SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, );
+#if HAVE_INLINE_ASM
+ SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmxext, );
+ SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmxext, );
+ SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmxext, );
+ SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmxext, );
+#endif /* HAVE_INLINE_ASM */
}
#if HAVE_YASM
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmx2;
}
if (bit_depth == 10 && CONFIG_H264CHROMA) {
- c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext;
- c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext;
- c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext;
- c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext;
+ c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmx2;
+ c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmx2;
+ c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmx2;
+ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmx2;
}
- c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
+ /* slower than cmov version on AMD */
+ if (!(mm_flags & AV_CPU_FLAG_3DNOW))
+ c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
c->scalarproduct_int16 = ff_scalarproduct_int16_mmx2;
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmx2;
} else {
c->apply_window_int16 = ff_apply_window_int16_mmxext;
}
-#endif
+#endif /* HAVE_YASM */
}
static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
{
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
+#if HAVE_INLINE_ASM
c->prefetch = prefetch_3dnow;
if (!high_bit_depth) {
}
}
- if (CONFIG_VP3_DECODER && (avctx->codec_id == CODEC_ID_VP3 ||
- avctx->codec_id == CODEC_ID_THEORA)) {
+ if (CONFIG_VP3_DECODER && (avctx->codec_id == AV_CODEC_ID_VP3 ||
+ avctx->codec_id == AV_CODEC_ID_THEORA)) {
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_3dnow;
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow;
}
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, );
}
+ c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
+#endif /* HAVE_INLINE_ASM */
+
#if HAVE_YASM
if (!high_bit_depth && CONFIG_H264CHROMA) {
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_3dnow_rnd;
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow;
}
-#endif
-
- c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
-
-#if HAVE_7REGS
- if (mm_flags & AV_CPU_FLAG_CMOV)
- c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;
-#endif
+#endif /* HAVE_YASM */
}
-static void dsputil_init_3dnow2(DSPContext *c, AVCodecContext *avctx,
- int mm_flags)
+static void dsputil_init_3dnowext(DSPContext *c, AVCodecContext *avctx,
+ int mm_flags)
{
-#if HAVE_6REGS
- c->vector_fmul_window = vector_fmul_window_3dnow2;
+#if HAVE_AMD3DNOWEXT_INLINE && HAVE_6REGS
+ c->vector_fmul_window = vector_fmul_window_3dnowext;
#endif
}
{
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
+#if HAVE_INLINE_ASM
if (!high_bit_depth) {
if (!(CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)) {
/* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
}
c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
- c->ac3_downmix = ac3_downmix_sse;
-#if HAVE_YASM
- c->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
- c->vector_fmul_add = ff_vector_fmul_add_sse;
-#endif
#if HAVE_6REGS
c->vector_fmul_window = vector_fmul_window_sse;
#endif
c->vector_clipf = vector_clipf_sse;
+#endif /* HAVE_INLINE_ASM */
#if HAVE_YASM
+ c->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
+ c->vector_fmul_add = ff_vector_fmul_add_sse;
+
c->scalarproduct_float = ff_scalarproduct_float_sse;
c->butterflies_float_interleave = ff_butterflies_float_interleave_sse;
if (!high_bit_depth)
c->emulated_edge_mc = emulated_edge_mc_sse;
- c->gmc = gmc_sse;
-#endif
+#endif /* HAVE_YASM */
}
static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
int mm_flags)
{
const int bit_depth = avctx->bits_per_raw_sample;
+
+#if HAVE_INLINE_ASM
const int high_bit_depth = bit_depth > 8;
if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
H264_QPEL_FUNCS(3, 3, sse2);
}
+ if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) {
+ c->idct_put = ff_idct_xvid_sse2_put;
+ c->idct_add = ff_idct_xvid_sse2_add;
+ c->idct = ff_idct_xvid_sse2;
+ c->idct_permutation_type = FF_SSE2_IDCT_PERM;
+ }
+#endif /* HAVE_INLINE_ASM */
+
#if HAVE_YASM
if (bit_depth == 10) {
if (CONFIG_H264QPEL) {
c->apply_window_int16 = ff_apply_window_int16_sse2;
}
c->bswap_buf = ff_bswap32_buf_sse2;
-#endif
+#endif /* HAVE_YASM */
}
static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
int mm_flags)
{
-#if HAVE_SSSE3
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
const int bit_depth = avctx->bits_per_raw_sample;
+#if HAVE_SSSE3_INLINE
if (!high_bit_depth && CONFIG_H264QPEL) {
H264_QPEL_FUNCS(1, 0, ssse3);
H264_QPEL_FUNCS(1, 1, ssse3);
H264_QPEL_FUNCS(3, 2, ssse3);
H264_QPEL_FUNCS(3, 3, ssse3);
}
-#if HAVE_YASM
- else if (bit_depth == 10 && CONFIG_H264QPEL) {
+#endif /* HAVE_SSSE3_INLINE */
+
+#if HAVE_SSSE3_EXTERNAL
+ if (bit_depth == 10 && CONFIG_H264QPEL) {
H264_QPEL_FUNCS_10(1, 0, ssse3_cache64);
H264_QPEL_FUNCS_10(2, 0, ssse3_cache64);
H264_QPEL_FUNCS_10(3, 0, ssse3_cache64);
if (!(mm_flags & (AV_CPU_FLAG_SSE42|AV_CPU_FLAG_3DNOW))) // cachesplit
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3;
c->bswap_buf = ff_bswap32_buf_ssse3;
-#endif
-#endif
+#endif /* HAVE_SSSE3_EXTERNAL */
}
static void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx,
int mm_flags)
{
-#if HAVE_YASM
+#if HAVE_SSE4_EXTERNAL
c->vector_clip_int32 = ff_vector_clip_int32_sse4;
-#endif
+#endif /* HAVE_SSE4_EXTERNAL */
}
static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
{
-#if HAVE_AVX && HAVE_YASM
+#if HAVE_AVX_EXTERNAL
const int bit_depth = avctx->bits_per_raw_sample;
if (bit_depth == 10) {
c->butterflies_float_interleave = ff_butterflies_float_interleave_avx;
c->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
c->vector_fmul_add = ff_vector_fmul_add_avx;
-#endif
+#endif /* HAVE_AVX_EXTERNAL */
}
void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
{
int mm_flags = av_get_cpu_flags();
-#if 0
- av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:");
- if (mm_flags & AV_CPU_FLAG_MMX)
- av_log(avctx, AV_LOG_INFO, " mmx");
- if (mm_flags & AV_CPU_FLAG_MMX2)
- av_log(avctx, AV_LOG_INFO, " mmx2");
- if (mm_flags & AV_CPU_FLAG_3DNOW)
- av_log(avctx, AV_LOG_INFO, " 3dnow");
- if (mm_flags & AV_CPU_FLAG_SSE)
- av_log(avctx, AV_LOG_INFO, " sse");
- if (mm_flags & AV_CPU_FLAG_SSE2)
- av_log(avctx, AV_LOG_INFO, " sse2");
- av_log(avctx, AV_LOG_INFO, "\n");
-#endif
-
- if (mm_flags & AV_CPU_FLAG_MMX) {
- const int idct_algo = avctx->idct_algo;
-
- if (avctx->bits_per_raw_sample <= 8) {
- if (idct_algo == FF_IDCT_AUTO || idct_algo == FF_IDCT_SIMPLEMMX) {
- c->idct_put = ff_simple_idct_put_mmx;
- c->idct_add = ff_simple_idct_add_mmx;
- c->idct = ff_simple_idct_mmx;
- c->idct_permutation_type = FF_SIMPLE_IDCT_PERM;
-#if CONFIG_GPL
- } else if (idct_algo == FF_IDCT_LIBMPEG2MMX) {
- if (mm_flags & AV_CPU_FLAG_MMX2) {
- c->idct_put = ff_libmpeg2mmx2_idct_put;
- c->idct_add = ff_libmpeg2mmx2_idct_add;
- c->idct = ff_mmxext_idct;
- } else {
- c->idct_put = ff_libmpeg2mmx_idct_put;
- c->idct_add = ff_libmpeg2mmx_idct_add;
- c->idct = ff_mmx_idct;
- }
- c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
+#if HAVE_7REGS && HAVE_INLINE_ASM
+ if (mm_flags & AV_CPU_FLAG_CMOV)
+ c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;
#endif
- } else if ((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER ||
- CONFIG_VP6_DECODER) &&
- idct_algo == FF_IDCT_VP3 && HAVE_YASM) {
- if (mm_flags & AV_CPU_FLAG_SSE2) {
- c->idct_put = ff_vp3_idct_put_sse2;
- c->idct_add = ff_vp3_idct_add_sse2;
- c->idct = ff_vp3_idct_sse2;
- c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
- } else {
- c->idct_put = ff_vp3_idct_put_mmx;
- c->idct_add = ff_vp3_idct_add_mmx;
- c->idct = ff_vp3_idct_mmx;
- c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM;
- }
- } else if (idct_algo == FF_IDCT_CAVS) {
- c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
- } else if (idct_algo == FF_IDCT_XVIDMMX) {
- if (mm_flags & AV_CPU_FLAG_SSE2) {
- c->idct_put = ff_idct_xvid_sse2_put;
- c->idct_add = ff_idct_xvid_sse2_add;
- c->idct = ff_idct_xvid_sse2;
- c->idct_permutation_type = FF_SSE2_IDCT_PERM;
- } else if (mm_flags & AV_CPU_FLAG_MMX2) {
- c->idct_put = ff_idct_xvid_mmx2_put;
- c->idct_add = ff_idct_xvid_mmx2_add;
- c->idct = ff_idct_xvid_mmx2;
- } else {
- c->idct_put = ff_idct_xvid_mmx_put;
- c->idct_add = ff_idct_xvid_mmx_add;
- c->idct = ff_idct_xvid_mmx;
- }
- }
- }
+ if (mm_flags & AV_CPU_FLAG_MMX)
dsputil_init_mmx(c, avctx, mm_flags);
- }
- if (mm_flags & AV_CPU_FLAG_MMX2)
- dsputil_init_mmx2(c, avctx, mm_flags);
+ if (mm_flags & AV_CPU_FLAG_MMXEXT)
+ dsputil_init_mmxext(c, avctx, mm_flags);
- if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW)
+ if (mm_flags & AV_CPU_FLAG_3DNOW)
dsputil_init_3dnow(c, avctx, mm_flags);
- if (mm_flags & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT)
- dsputil_init_3dnow2(c, avctx, mm_flags);
+ if (mm_flags & AV_CPU_FLAG_3DNOWEXT)
+ dsputil_init_3dnowext(c, avctx, mm_flags);
- if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE)
+ if (mm_flags & AV_CPU_FLAG_SSE)
dsputil_init_sse(c, avctx, mm_flags);
if (mm_flags & AV_CPU_FLAG_SSE2)
if (mm_flags & AV_CPU_FLAG_SSSE3)
dsputil_init_ssse3(c, avctx, mm_flags);
- if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE)
+ if (mm_flags & AV_CPU_FLAG_SSE4)
dsputil_init_sse4(c, avctx, mm_flags);
if (mm_flags & AV_CPU_FLAG_AVX)