2 * This file is part of FFmpeg.
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/mem_internal.h"
23 #include "libavcodec/x86/fdct.h"
24 #include "libavcodec/x86/xvididct.h"
25 #include "libavcodec/x86/simple_idct.h"
27 #if (CONFIG_PRORES_DECODER || CONFIG_PRORES_LGPL_DECODER) && ARCH_X86_64 && HAVE_X86ASM
28 void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
29 int16_t *block, int16_t *qmat);
31 #define PR_WRAP(INSN) \
32 static void ff_prores_idct_put_10_##INSN##_wrap(int16_t *dst){ \
33 LOCAL_ALIGNED(16, int16_t, qmat, [64]); \
34 LOCAL_ALIGNED(16, int16_t, tmp, [64]); \
37 for(i=0; i<64; i++){ \
41 ff_prores_idct_put_10_##INSN (dst, 16, tmp, qmat); \
43 for(i=0; i<64; i++) { \
50 # if HAVE_AVX_EXTERNAL
51 void ff_prores_idct_put_10_avx(uint16_t *dst, int linesize,
52 int16_t *block, int16_t *qmat);
58 static const struct algo fdct_tab_arch[] = {
60 { "MMX", ff_fdct_mmx, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX },
62 #if HAVE_MMXEXT_INLINE
63 { "MMXEXT", ff_fdct_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT },
66 { "SSE2", ff_fdct_sse2, FF_IDCT_PERM_NONE, AV_CPU_FLAG_SSE2 },
71 static const struct algo idct_tab_arch[] = {
73 { "SIMPLE-MMX", ff_simple_idct_mmx, FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_MMX },
75 #if CONFIG_MPEG4_DECODER && HAVE_X86ASM
77 { "XVID-MMX", ff_xvid_idct_mmx, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX, 1 },
78 { "XVID-MMXEXT", ff_xvid_idct_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT, 1 },
80 #if HAVE_SSE2_EXTERNAL
81 { "XVID-SSE2", ff_xvid_idct_sse2, FF_IDCT_PERM_SSE2, AV_CPU_FLAG_SSE2, 1 },
83 #endif /* CONFIG_MPEG4_DECODER && HAVE_X86ASM */
84 #if (CONFIG_PRORES_DECODER || CONFIG_PRORES_LGPL_DECODER) && ARCH_X86_64 && HAVE_X86ASM
85 { "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2, 1 },
86 # if HAVE_AVX_EXTERNAL
87 { "PR-AVX", ff_prores_idct_put_10_avx_wrap, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX, 1 },
92 #if HAVE_SSE2_EXTERNAL
93 { "SIMPLE8-SSE2", ff_simple_idct8_sse2, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2},
94 { "SIMPLE10-SSE2", ff_simple_idct10_sse2, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2},
95 { "SIMPLE12-SSE2", ff_simple_idct12_sse2, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2, 1 },
98 { "SIMPLE8-AVX", ff_simple_idct8_avx, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX},
99 { "SIMPLE10-AVX", ff_simple_idct10_avx, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX},
100 { "SIMPLE12-AVX", ff_simple_idct12_avx, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX, 1 },
107 static const uint8_t idct_simple_mmx_perm[64] = {
108 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
109 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
110 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
111 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
112 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
113 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
114 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
115 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
118 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
120 static int permute_x86(int16_t dst[64], const int16_t src[64],
121 enum idct_permutation_type perm_type)
126 case FF_IDCT_PERM_SIMPLE:
127 for (i = 0; i < 64; i++)
128 dst[idct_simple_mmx_perm[i]] = src[i];
130 case FF_IDCT_PERM_SSE2:
131 for (i = 0; i < 64; i++)
132 dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];