]> git.sesse.net Git - ffmpeg/blob - libavfilter/x86/vf_spp.c
avformat/mov, movenc: Stop exporting rotation via metadata
[ffmpeg] / libavfilter / x86 / vf_spp.c
1 /*
2  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20
21
22 #include "libavutil/attributes.h"
23 #include "libavutil/cpu.h"
24 #include "libavutil/crc.h"
25 #include "libavutil/mem.h"
26 #include "libavutil/x86/asm.h"
27 #include "libavfilter/vf_spp.h"
28
29 #if HAVE_MMX_INLINE
30 static void hardthresh_mmx(int16_t dst[64], const int16_t src[64],
31                            int qp, const uint8_t *permutation)
32 {
33     int bias = 0; //FIXME
34     unsigned int threshold1;
35
36     threshold1 = qp * ((1<<4) - bias) - 1;
37
38 #define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3)    \
39     "movq " #src0 ", %%mm0      \n"                                     \
40     "movq " #src1 ", %%mm1      \n"                                     \
41     "movq " #src2 ", %%mm2      \n"                                     \
42     "movq " #src3 ", %%mm3      \n"                                     \
43     "psubw %%mm4, %%mm0         \n"                                     \
44     "psubw %%mm4, %%mm1         \n"                                     \
45     "psubw %%mm4, %%mm2         \n"                                     \
46     "psubw %%mm4, %%mm3         \n"                                     \
47     "paddusw %%mm5, %%mm0       \n"                                     \
48     "paddusw %%mm5, %%mm1       \n"                                     \
49     "paddusw %%mm5, %%mm2       \n"                                     \
50     "paddusw %%mm5, %%mm3       \n"                                     \
51     "paddw %%mm6, %%mm0         \n"                                     \
52     "paddw %%mm6, %%mm1         \n"                                     \
53     "paddw %%mm6, %%mm2         \n"                                     \
54     "paddw %%mm6, %%mm3         \n"                                     \
55     "psubusw %%mm6, %%mm0       \n"                                     \
56     "psubusw %%mm6, %%mm1       \n"                                     \
57     "psubusw %%mm6, %%mm2       \n"                                     \
58     "psubusw %%mm6, %%mm3       \n"                                     \
59     "psraw $3, %%mm0            \n"                                     \
60     "psraw $3, %%mm1            \n"                                     \
61     "psraw $3, %%mm2            \n"                                     \
62     "psraw $3, %%mm3            \n"                                     \
63                                                                         \
64     "movq %%mm0, %%mm7          \n"                                     \
65     "punpcklwd %%mm2, %%mm0     \n" /*A*/                               \
66     "punpckhwd %%mm2, %%mm7     \n" /*C*/                               \
67     "movq %%mm1, %%mm2          \n"                                     \
68     "punpcklwd %%mm3, %%mm1     \n" /*B*/                               \
69     "punpckhwd %%mm3, %%mm2     \n" /*D*/                               \
70     "movq %%mm0, %%mm3          \n"                                     \
71     "punpcklwd %%mm1, %%mm0     \n" /*A*/                               \
72     "punpckhwd %%mm7, %%mm3     \n" /*C*/                               \
73     "punpcklwd %%mm2, %%mm7     \n" /*B*/                               \
74     "punpckhwd %%mm2, %%mm1     \n" /*D*/                               \
75                                                                         \
76     "movq %%mm0, " #dst0 "      \n"                                     \
77     "movq %%mm7, " #dst1 "      \n"                                     \
78     "movq %%mm3, " #dst2 "      \n"                                     \
79     "movq %%mm1, " #dst3 "      \n"
80
81     __asm__ volatile(
82         "movd %2, %%mm4             \n"
83         "movd %3, %%mm5             \n"
84         "movd %4, %%mm6             \n"
85         "packssdw %%mm4, %%mm4      \n"
86         "packssdw %%mm5, %%mm5      \n"
87         "packssdw %%mm6, %%mm6      \n"
88         "packssdw %%mm4, %%mm4      \n"
89         "packssdw %%mm5, %%mm5      \n"
90         "packssdw %%mm6, %%mm6      \n"
91         REQUANT_CORE(  (%1),  8(%1), 16(%1), 24(%1),  (%0), 8(%0), 64(%0), 72(%0))
92         REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
93         REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
94         REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
95         : : "r" (src), "r" (dst), "g" (threshold1+1), "g" (threshold1+5), "g" (threshold1-4) //FIXME maybe more accurate then needed?
96     );
97     dst[0] = (src[0] + 4) >> 3;
98 }
99
100 static void softthresh_mmx(int16_t dst[64], const int16_t src[64],
101                            int qp, const uint8_t *permutation)
102 {
103     int bias = 0; //FIXME
104     unsigned int threshold1;
105
106     threshold1 = qp*((1<<4) - bias) - 1;
107
108 #undef REQUANT_CORE
109 #define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3)    \
110     "movq " #src0 ", %%mm0      \n"                                     \
111     "movq " #src1 ", %%mm1      \n"                                     \
112     "pxor %%mm6, %%mm6          \n"                                     \
113     "pxor %%mm7, %%mm7          \n"                                     \
114     "pcmpgtw %%mm0, %%mm6       \n"                                     \
115     "pcmpgtw %%mm1, %%mm7       \n"                                     \
116     "pxor %%mm6, %%mm0          \n"                                     \
117     "pxor %%mm7, %%mm1          \n"                                     \
118     "psubusw %%mm4, %%mm0       \n"                                     \
119     "psubusw %%mm4, %%mm1       \n"                                     \
120     "pxor %%mm6, %%mm0          \n"                                     \
121     "pxor %%mm7, %%mm1          \n"                                     \
122     "movq " #src2 ", %%mm2      \n"                                     \
123     "movq " #src3 ", %%mm3      \n"                                     \
124     "pxor %%mm6, %%mm6          \n"                                     \
125     "pxor %%mm7, %%mm7          \n"                                     \
126     "pcmpgtw %%mm2, %%mm6       \n"                                     \
127     "pcmpgtw %%mm3, %%mm7       \n"                                     \
128     "pxor %%mm6, %%mm2          \n"                                     \
129     "pxor %%mm7, %%mm3          \n"                                     \
130     "psubusw %%mm4, %%mm2       \n"                                     \
131     "psubusw %%mm4, %%mm3       \n"                                     \
132     "pxor %%mm6, %%mm2          \n"                                     \
133     "pxor %%mm7, %%mm3          \n"                                     \
134                                                                         \
135     "paddsw %%mm5, %%mm0        \n"                                     \
136     "paddsw %%mm5, %%mm1        \n"                                     \
137     "paddsw %%mm5, %%mm2        \n"                                     \
138     "paddsw %%mm5, %%mm3        \n"                                     \
139     "psraw $3, %%mm0            \n"                                     \
140     "psraw $3, %%mm1            \n"                                     \
141     "psraw $3, %%mm2            \n"                                     \
142     "psraw $3, %%mm3            \n"                                     \
143                                                                         \
144     "movq %%mm0, %%mm7          \n"                                     \
145     "punpcklwd %%mm2, %%mm0     \n" /*A*/                               \
146     "punpckhwd %%mm2, %%mm7     \n" /*C*/                               \
147     "movq %%mm1, %%mm2          \n"                                     \
148     "punpcklwd %%mm3, %%mm1     \n" /*B*/                               \
149     "punpckhwd %%mm3, %%mm2     \n" /*D*/                               \
150     "movq %%mm0, %%mm3          \n"                                     \
151     "punpcklwd %%mm1, %%mm0     \n" /*A*/                               \
152     "punpckhwd %%mm7, %%mm3     \n" /*C*/                               \
153     "punpcklwd %%mm2, %%mm7     \n" /*B*/                               \
154     "punpckhwd %%mm2, %%mm1     \n" /*D*/                               \
155                                                                         \
156     "movq %%mm0, " #dst0 "      \n"                                     \
157     "movq %%mm7, " #dst1 "      \n"                                     \
158     "movq %%mm3, " #dst2 "      \n"                                     \
159     "movq %%mm1, " #dst3 "      \n"
160
161     __asm__ volatile(
162         "movd %2, %%mm4             \n"
163         "movd %3, %%mm5             \n"
164         "packssdw %%mm4, %%mm4      \n"
165         "packssdw %%mm5, %%mm5      \n"
166         "packssdw %%mm4, %%mm4      \n"
167         "packssdw %%mm5, %%mm5      \n"
168         REQUANT_CORE(  (%1),  8(%1), 16(%1), 24(%1),  (%0), 8(%0), 64(%0), 72(%0))
169         REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
170         REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
171         REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
172         : : "r" (src), "r" (dst), "g" (threshold1), "rm" (4) //FIXME maybe more accurate then needed?
173     );
174
175     dst[0] = (src[0] + 4) >> 3;
176 }
177
178 static void store_slice_mmx(uint8_t *dst, const int16_t *src,
179                             int dst_stride, int src_stride,
180                             int width, int height, int log2_scale,
181                             const uint8_t dither[8][8])
182 {
183     int y;
184
185     for (y = 0; y < height; y++) {
186         uint8_t *dst1 = dst;
187         const int16_t *src1 = src;
188         __asm__ volatile(
189             "movq (%3), %%mm3           \n"
190             "movq (%3), %%mm4           \n"
191             "movd %4, %%mm2             \n"
192             "pxor %%mm0, %%mm0          \n"
193             "punpcklbw %%mm0, %%mm3     \n"
194             "punpckhbw %%mm0, %%mm4     \n"
195             "psraw %%mm2, %%mm3         \n"
196             "psraw %%mm2, %%mm4         \n"
197             "movd %5, %%mm2             \n"
198             "1:                         \n"
199             "movq (%0), %%mm0           \n"
200             "movq 8(%0), %%mm1          \n"
201             "paddw %%mm3, %%mm0         \n"
202             "paddw %%mm4, %%mm1         \n"
203             "psraw %%mm2, %%mm0         \n"
204             "psraw %%mm2, %%mm1         \n"
205             "packuswb %%mm1, %%mm0      \n"
206             "movq %%mm0, (%1)           \n"
207             "add $16, %0                \n"
208             "add $8, %1                 \n"
209             "cmp %2, %1                 \n"
210             " jb 1b                     \n"
211             : "+r" (src1), "+r"(dst1)
212             : "r"(dst + width), "r"(dither[y]), "g"(log2_scale), "g"(MAX_LEVEL - log2_scale)
213         );
214         src += src_stride;
215         dst += dst_stride;
216     }
217 }
218
219 #endif /* HAVE_MMX_INLINE */
220
221 av_cold void ff_spp_init_x86(SPPContext *s)
222 {
223 #if HAVE_MMX_INLINE
224     int cpu_flags = av_get_cpu_flags();
225
226     if (cpu_flags & AV_CPU_FLAG_MMX) {
227         static const uint32_t mmx_idct_perm_crc = 0xe5e8adc4;
228         uint32_t idct_perm_crc =
229             av_crc(av_crc_get_table(AV_CRC_32_IEEE), 0,
230                    s->dct->idct_permutation,
231                    sizeof(s->dct->idct_permutation));
232         int64_t bps;
233         s->store_slice = store_slice_mmx;
234         av_opt_get_int(s->dct, "bits_per_sample", 0, &bps);
235         if (bps <= 8 && idct_perm_crc == mmx_idct_perm_crc) {
236             switch (s->mode) {
237             case 0: s->requantize = hardthresh_mmx; break;
238             case 1: s->requantize = softthresh_mmx; break;
239             }
240         }
241     }
242 #endif
243 }