2 * This file is part of FFmpeg.
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "simple_idct.h"
25 #if (CONFIG_PRORES_DECODER || CONFIG_PRORES_LGPL_DECODER) && ARCH_X86_64 && HAVE_YASM
26 void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
27 int16_t *block, int16_t *qmat);
29 #define PR_WRAP(INSN) \
30 static void ff_prores_idct_put_10_##INSN##_wrap(int16_t *dst){ \
31 LOCAL_ALIGNED(16, int16_t, qmat, [64]); \
32 LOCAL_ALIGNED(16, int16_t, tmp, [64]); \
35 for(i=0; i<64; i++){ \
39 ff_prores_idct_put_10_##INSN (dst, 16, tmp, qmat); \
41 for(i=0; i<64; i++) { \
48 # if HAVE_AVX_EXTERNAL
49 void ff_prores_idct_put_10_avx(uint16_t *dst, int linesize,
50 int16_t *block, int16_t *qmat);
56 static const struct algo fdct_tab_arch[] = {
58 { "MMX", ff_fdct_mmx, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX },
60 #if HAVE_MMXEXT_INLINE
61 { "MMXEXT", ff_fdct_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT },
64 { "SSE2", ff_fdct_sse2, FF_IDCT_PERM_NONE, AV_CPU_FLAG_SSE2 },
69 static const struct algo idct_tab_arch[] = {
71 { "SIMPLE-MMX", ff_simple_idct_mmx, FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_MMX },
73 #if CONFIG_MPEG4_DECODER && HAVE_YASM
75 { "XVID-MMX", ff_xvid_idct_mmx, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX, 1 },
76 { "XVID-MMXEXT", ff_xvid_idct_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT, 1 },
78 #if HAVE_SSE2_EXTERNAL
79 { "XVID-SSE2", ff_xvid_idct_sse2, FF_IDCT_PERM_SSE2, AV_CPU_FLAG_SSE2, 1 },
81 #endif /* CONFIG_MPEG4_DECODER && HAVE_YASM */
82 #if (CONFIG_PRORES_DECODER || CONFIG_PRORES_LGPL_DECODER) && ARCH_X86_64 && HAVE_YASM
83 { "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2, 1 },
84 # if HAVE_AVX_EXTERNAL
85 { "PR-AVX", ff_prores_idct_put_10_avx_wrap, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX, 1 },
90 #if HAVE_SSE2_EXTERNAL
91 { "SIMPLE10-SSE2", ff_simple_idct10_sse2, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2},
92 { "SIMPLE12-SSE2", ff_simple_idct12_sse2, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2, 1 },
95 { "SIMPLE10-AVX", ff_simple_idct10_avx, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX},
96 { "SIMPLE12-AVX", ff_simple_idct12_avx, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX, 1 },
103 static const uint8_t idct_simple_mmx_perm[64] = {
104 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
105 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
106 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
107 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
108 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
109 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
110 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
111 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
114 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
116 static int permute_x86(int16_t dst[64], const int16_t src[64],
117 enum idct_permutation_type perm_type)
122 case FF_IDCT_PERM_SIMPLE:
123 for (i = 0; i < 64; i++)
124 dst[idct_simple_mmx_perm[i]] = src[i];
126 case FF_IDCT_PERM_SSE2:
127 for (i = 0; i < 64; i++)
128 dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];