]> git.sesse.net Git - ffmpeg/blob - libavcodec/dsputil_template.c
Merge commit '59444c76e6d43529a12dbd80b6dd29c6ba4079a9'
[ffmpeg] / libavcodec / dsputil_template.c
1 /*
2  * DSP utils
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * DSP utils
28  */
29
30 #include "pixels.h"
31
32 #include "bit_depth_template.c"
33
34 #if BIT_DEPTH == 8
35 /* draw the edges of width 'w' of an image of size width, height */
36 // FIXME: Check that this is OK for MPEG-4 interlaced.
37 static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height,
38                               int w, int h, int sides)
39 {
40     pixel *buf = (pixel *) _buf;
41     int wrap   = _wrap / sizeof(pixel);
42     pixel *ptr = buf, *last_line;
43     int i;
44
45     /* left and right */
46     for (i = 0; i < height; i++) {
47         memset(ptr - w, ptr[0], w);
48         memset(ptr + width, ptr[width - 1], w);
49         ptr += wrap;
50     }
51
52     /* top and bottom + corners */
53     buf -= w;
54     last_line = buf + (height - 1) * wrap;
55     if (sides & EDGE_TOP)
56         for (i = 0; i < h; i++)
57             // top
58             memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel));
59     if (sides & EDGE_BOTTOM)
60         for (i = 0; i < h; i++)
61             // bottom
62             memcpy(last_line + (i + 1) * wrap, last_line,
63                    (width + w + w) * sizeof(pixel));
64 }
65 #endif
66
67 static void FUNCC(get_pixels)(int16_t *av_restrict block, const uint8_t *_pixels,
68                               int line_size)
69 {
70     const pixel *pixels = (const pixel *) _pixels;
71     int i;
72
73     /* read the pixels */
74     for (i = 0; i < 8; i++) {
75         block[0] = pixels[0];
76         block[1] = pixels[1];
77         block[2] = pixels[2];
78         block[3] = pixels[3];
79         block[4] = pixels[4];
80         block[5] = pixels[5];
81         block[6] = pixels[6];
82         block[7] = pixels[7];
83         pixels  += line_size / sizeof(pixel);
84         block   += 8;
85     }
86 }
87
88 #if BIT_DEPTH == 8
89 static void FUNCC(clear_block)(int16_t *block)
90 {
91     memset(block, 0, sizeof(int16_t) * 64);
92 }
93
94 static void FUNCC(clear_blocks)(int16_t *blocks)
95 {
96     memset(blocks, 0, sizeof(int16_t) * 6 * 64);
97 }
98 #endif
99
100 #if BIT_DEPTH == 8
101 #include "hpel_template.c"
102 #endif
103
104 #define PIXOP2(OPNAME, OP)                                              \
105 static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst,     \
106                                                       const uint8_t *src1, \
107                                                       const uint8_t *src2, \
108                                                       int dst_stride,   \
109                                                       int src_stride1,  \
110                                                       int src_stride2,  \
111                                                       int h)            \
112 {                                                                       \
113     int i;                                                              \
114                                                                         \
115     for (i = 0; i < h; i++) {                                           \
116         pixel4 a, b;                                                    \
117         a = AV_RN4P(&src1[i * src_stride1]);                            \
118         b = AV_RN4P(&src2[i * src_stride2]);                            \
119         OP(*((pixel4 *) &dst[i * dst_stride]),                          \
120            no_rnd_avg_pixel4(a, b));                                    \
121         a = AV_RN4P(&src1[i * src_stride1 + 4 * sizeof(pixel)]);        \
122         b = AV_RN4P(&src2[i * src_stride2 + 4 * sizeof(pixel)]);        \
123         OP(*((pixel4 *) &dst[i * dst_stride + 4 * sizeof(pixel)]),      \
124            no_rnd_avg_pixel4(a, b));                                    \
125     }                                                                   \
126 }                                                                       \
127                                                                         \
128 static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst,    \
129                                                        const uint8_t *src1, \
130                                                        const uint8_t *src2, \
131                                                        int dst_stride,  \
132                                                        int src_stride1, \
133                                                        int src_stride2, \
134                                                        int h)           \
135 {                                                                       \
136     FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst, src1, src2, dst_stride,     \
137                                        src_stride1, src_stride2, h);    \
138     FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst  + 8 * sizeof(pixel),        \
139                                        src1 + 8 * sizeof(pixel),        \
140                                        src2 + 8 * sizeof(pixel),        \
141                                        dst_stride, src_stride1,         \
142                                        src_stride2, h);                 \
143 }                                                                       \
144                                                                         \
145 static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst,            \
146                                                const uint8_t *src1,     \
147                                                const uint8_t *src2,     \
148                                                const uint8_t *src3,     \
149                                                const uint8_t *src4,     \
150                                                int dst_stride,          \
151                                                int src_stride1,         \
152                                                int src_stride2,         \
153                                                int src_stride3,         \
154                                                int src_stride4,         \
155                                                int h)                   \
156 {                                                                       \
157     /* FIXME HIGH BIT DEPTH */                                          \
158     int i;                                                              \
159                                                                         \
160     for (i = 0; i < h; i++) {                                           \
161         uint32_t a, b, c, d, l0, l1, h0, h1;                            \
162         a  = AV_RN32(&src1[i * src_stride1]);                           \
163         b  = AV_RN32(&src2[i * src_stride2]);                           \
164         c  = AV_RN32(&src3[i * src_stride3]);                           \
165         d  = AV_RN32(&src4[i * src_stride4]);                           \
166         l0 = (a & 0x03030303UL) +                                       \
167              (b & 0x03030303UL) +                                       \
168                   0x02020202UL;                                         \
169         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
170              ((b & 0xFCFCFCFCUL) >> 2);                                 \
171         l1 = (c & 0x03030303UL) +                                       \
172              (d & 0x03030303UL);                                        \
173         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
174              ((d & 0xFCFCFCFCUL) >> 2);                                 \
175         OP(*((uint32_t *) &dst[i * dst_stride]),                        \
176            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
177         a  = AV_RN32(&src1[i * src_stride1 + 4]);                       \
178         b  = AV_RN32(&src2[i * src_stride2 + 4]);                       \
179         c  = AV_RN32(&src3[i * src_stride3 + 4]);                       \
180         d  = AV_RN32(&src4[i * src_stride4 + 4]);                       \
181         l0 = (a & 0x03030303UL) +                                       \
182              (b & 0x03030303UL) +                                       \
183                   0x02020202UL;                                         \
184         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
185              ((b & 0xFCFCFCFCUL) >> 2);                                 \
186         l1 = (c & 0x03030303UL) +                                       \
187              (d & 0x03030303UL);                                        \
188         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
189              ((d & 0xFCFCFCFCUL) >> 2);                                 \
190         OP(*((uint32_t *) &dst[i * dst_stride + 4]),                    \
191            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
192     }                                                                   \
193 }                                                                       \
194                                                                         \
195 static inline void FUNC(OPNAME ## _no_rnd_pixels8_l4)(uint8_t *dst,     \
196                                                       const uint8_t *src1, \
197                                                       const uint8_t *src2, \
198                                                       const uint8_t *src3, \
199                                                       const uint8_t *src4, \
200                                                       int dst_stride,   \
201                                                       int src_stride1,  \
202                                                       int src_stride2,  \
203                                                       int src_stride3,  \
204                                                       int src_stride4,  \
205                                                       int h)            \
206 {                                                                       \
207     /* FIXME HIGH BIT DEPTH */                                          \
208     int i;                                                              \
209                                                                         \
210     for (i = 0; i < h; i++) {                                           \
211         uint32_t a, b, c, d, l0, l1, h0, h1;                            \
212         a  = AV_RN32(&src1[i * src_stride1]);                           \
213         b  = AV_RN32(&src2[i * src_stride2]);                           \
214         c  = AV_RN32(&src3[i * src_stride3]);                           \
215         d  = AV_RN32(&src4[i * src_stride4]);                           \
216         l0 = (a & 0x03030303UL) +                                       \
217              (b & 0x03030303UL) +                                       \
218                   0x01010101UL;                                         \
219         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
220              ((b & 0xFCFCFCFCUL) >> 2);                                 \
221         l1 = (c & 0x03030303UL) +                                       \
222              (d & 0x03030303UL);                                        \
223         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
224              ((d & 0xFCFCFCFCUL) >> 2);                                 \
225         OP(*((uint32_t *) &dst[i * dst_stride]),                        \
226            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
227         a  = AV_RN32(&src1[i * src_stride1 + 4]);                       \
228         b  = AV_RN32(&src2[i * src_stride2 + 4]);                       \
229         c  = AV_RN32(&src3[i * src_stride3 + 4]);                       \
230         d  = AV_RN32(&src4[i * src_stride4 + 4]);                       \
231         l0 = (a & 0x03030303UL) +                                       \
232              (b & 0x03030303UL) +                                       \
233                   0x01010101UL;                                         \
234         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
235              ((b & 0xFCFCFCFCUL) >> 2);                                 \
236         l1 = (c & 0x03030303UL) +                                       \
237              (d & 0x03030303UL);                                        \
238         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
239              ((d & 0xFCFCFCFCUL) >> 2);                                 \
240         OP(*((uint32_t *) &dst[i * dst_stride + 4]),                    \
241            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
242     }                                                                   \
243 }                                                                       \
244 static inline void FUNC(OPNAME ## _pixels16_l4)(uint8_t *dst,           \
245                                                 const uint8_t *src1,    \
246                                                 const uint8_t *src2,    \
247                                                 const uint8_t *src3,    \
248                                                 const uint8_t *src4,    \
249                                                 int dst_stride,         \
250                                                 int src_stride1,        \
251                                                 int src_stride2,        \
252                                                 int src_stride3,        \
253                                                 int src_stride4,        \
254                                                 int h)                  \
255 {                                                                       \
256     FUNC(OPNAME ## _pixels8_l4)(dst, src1, src2, src3, src4, dst_stride, \
257                                 src_stride1, src_stride2, src_stride3,  \
258                                 src_stride4, h);                        \
259     FUNC(OPNAME ## _pixels8_l4)(dst  + 8 * sizeof(pixel),               \
260                                 src1 + 8 * sizeof(pixel),               \
261                                 src2 + 8 * sizeof(pixel),               \
262                                 src3 + 8 * sizeof(pixel),               \
263                                 src4 + 8 * sizeof(pixel),               \
264                                 dst_stride, src_stride1, src_stride2,   \
265                                 src_stride3, src_stride4, h);           \
266 }                                                                       \
267 static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst,    \
268                                                        const uint8_t *src1, \
269                                                        const uint8_t *src2, \
270                                                        const uint8_t *src3, \
271                                                        const uint8_t *src4, \
272                                                        int dst_stride,  \
273                                                        int src_stride1, \
274                                                        int src_stride2, \
275                                                        int src_stride3, \
276                                                        int src_stride4, \
277                                                        int h)           \
278 {                                                                       \
279     FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst, src1, src2, src3, src4,     \
280                                        dst_stride, src_stride1, src_stride2, \
281                                        src_stride3, src_stride4, h);    \
282     FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst  + 8 * sizeof(pixel),        \
283                                        src1 + 8 * sizeof(pixel),        \
284                                        src2 + 8 * sizeof(pixel),        \
285                                        src3 + 8 * sizeof(pixel),        \
286                                        src4 + 8 * sizeof(pixel),        \
287                                        dst_stride, src_stride1, src_stride2, \
288                                        src_stride3, src_stride4, h);    \
289 }                                                                       \
290                                                                         \
291 static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block,        \
292                                                  const uint8_t *pixels, \
293                                                  ptrdiff_t line_size,   \
294                                                  int h)                 \
295 {                                                                       \
296     /* FIXME HIGH BIT DEPTH */                                          \
297     int j;                                                              \
298                                                                         \
299     for (j = 0; j < 2; j++) {                                           \
300         int i;                                                          \
301         const uint32_t a = AV_RN32(pixels);                             \
302         const uint32_t b = AV_RN32(pixels + 1);                         \
303         uint32_t l0 = (a & 0x03030303UL) +                              \
304                       (b & 0x03030303UL) +                              \
305                            0x02020202UL;                                \
306         uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +                       \
307                       ((b & 0xFCFCFCFCUL) >> 2);                        \
308         uint32_t l1, h1;                                                \
309                                                                         \
310         pixels += line_size;                                            \
311         for (i = 0; i < h; i += 2) {                                    \
312             uint32_t a = AV_RN32(pixels);                               \
313             uint32_t b = AV_RN32(pixels + 1);                           \
314             l1 = (a & 0x03030303UL) +                                   \
315                  (b & 0x03030303UL);                                    \
316             h1 = ((a & 0xFCFCFCFCUL) >> 2) +                            \
317                  ((b & 0xFCFCFCFCUL) >> 2);                             \
318             OP(*((uint32_t *) block),                                   \
319                h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));            \
320             pixels += line_size;                                        \
321             block  += line_size;                                        \
322             a = AV_RN32(pixels);                                        \
323             b = AV_RN32(pixels + 1);                                    \
324             l0 = (a & 0x03030303UL) +                                   \
325                  (b & 0x03030303UL) +                                   \
326                       0x02020202UL;                                     \
327             h0 = ((a & 0xFCFCFCFCUL) >> 2) +                            \
328                  ((b & 0xFCFCFCFCUL) >> 2);                             \
329             OP(*((uint32_t *) block),                                   \
330                h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));            \
331             pixels += line_size;                                        \
332             block  += line_size;                                        \
333         }                                                               \
334         pixels += 4 - line_size * (h + 1);                              \
335         block  += 4 - line_size * h;                                    \
336     }                                                                   \
337 }                                                                       \
338                                                                         \
339 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2),                          \
340                FUNCC(OPNAME ## _pixels8_xy2),                           \
341                8 * sizeof(pixel))                                       \
342
343 #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
344 #define op_put(a, b) a = b
345 #if BIT_DEPTH == 8
346 #define put_no_rnd_pixels8_8_c put_pixels8_8_c
347 PIXOP2(avg, op_avg)
348 PIXOP2(put, op_put)
349 #endif
350 #undef op_avg
351 #undef op_put