]> git.sesse.net Git - ffmpeg/blob - libavcodec/dsputil_template.c
dsputil_template: Detemplatize the code
[ffmpeg] / libavcodec / dsputil_template.c
1 /*
2  * DSP utils
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * DSP utils
28  */
29
30 #include "pixels.h"
31
32 /* draw the edges of width 'w' of an image of size width, height */
33 // FIXME: Check that this is OK for MPEG-4 interlaced.
34 static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height,
35                            int w, int h, int sides)
36 {
37     uint8_t *ptr = buf, *last_line;
38     int i;
39
40     /* left and right */
41     for (i = 0; i < height; i++) {
42         memset(ptr - w, ptr[0], w);
43         memset(ptr + width, ptr[width - 1], w);
44         ptr += wrap;
45     }
46
47     /* top and bottom + corners */
48     buf -= w;
49     last_line = buf + (height - 1) * wrap;
50     if (sides & EDGE_TOP)
51         for (i = 0; i < h; i++)
52             // top
53             memcpy(buf - (i + 1) * wrap, buf, width + w + w);
54     if (sides & EDGE_BOTTOM)
55         for (i = 0; i < h; i++)
56             // bottom
57             memcpy(last_line + (i + 1) * wrap, last_line, width + w + w);
58 }
59
60 static void clear_block_8_c(int16_t *block)
61 {
62     memset(block, 0, sizeof(int16_t) * 64);
63 }
64
65 static void clear_blocks_8_c(int16_t *blocks)
66 {
67     memset(blocks, 0, sizeof(int16_t) * 6 * 64);
68 }
69
70 #define PIXOP2(OPNAME, OP)                                              \
71 static inline void OPNAME ## _no_rnd_pixels8_l2_8(uint8_t *dst,         \
72                                                   const uint8_t *src1,  \
73                                                   const uint8_t *src2,  \
74                                                   int dst_stride,       \
75                                                   int src_stride1,      \
76                                                   int src_stride2,      \
77                                                   int h)                \
78 {                                                                       \
79     int i;                                                              \
80                                                                         \
81     for (i = 0; i < h; i++) {                                           \
82         uint32_t a, b;                                                  \
83         a = AV_RN32(&src1[i * src_stride1]);                            \
84         b = AV_RN32(&src2[i * src_stride2]);                            \
85         OP(*((uint32_t *) &dst[i * dst_stride]),                        \
86            no_rnd_avg32(a, b));                                         \
87         a = AV_RN32(&src1[i * src_stride1 + 4]);                        \
88         b = AV_RN32(&src2[i * src_stride2 + 4]);                        \
89         OP(*((uint32_t *) &dst[i * dst_stride + 4]),                    \
90            no_rnd_avg32(a, b));                                         \
91     }                                                                   \
92 }                                                                       \
93                                                                         \
94 static inline void OPNAME ## _no_rnd_pixels16_l2_8(uint8_t *dst,        \
95                                                    const uint8_t *src1, \
96                                                    const uint8_t *src2, \
97                                                    int dst_stride,      \
98                                                    int src_stride1,     \
99                                                    int src_stride2,     \
100                                                    int h)               \
101 {                                                                       \
102     OPNAME ## _no_rnd_pixels8_l2_8(dst, src1, src2, dst_stride,         \
103                                    src_stride1, src_stride2, h);        \
104     OPNAME ## _no_rnd_pixels8_l2_8(dst  + 8,                            \
105                                    src1 + 8,                            \
106                                    src2 + 8,                            \
107                                    dst_stride, src_stride1,             \
108                                    src_stride2, h);                     \
109 }                                                                       \
110                                                                         \
111 static inline void OPNAME ## _pixels8_l4_8(uint8_t *dst,                \
112                                            const uint8_t *src1,         \
113                                            const uint8_t *src2,         \
114                                            const uint8_t *src3,         \
115                                            const uint8_t *src4,         \
116                                            int dst_stride,              \
117                                            int src_stride1,             \
118                                            int src_stride2,             \
119                                            int src_stride3,             \
120                                            int src_stride4,             \
121                                            int h)                       \
122 {                                                                       \
123     /* FIXME HIGH BIT DEPTH */                                          \
124     int i;                                                              \
125                                                                         \
126     for (i = 0; i < h; i++) {                                           \
127         uint32_t a, b, c, d, l0, l1, h0, h1;                            \
128         a  = AV_RN32(&src1[i * src_stride1]);                           \
129         b  = AV_RN32(&src2[i * src_stride2]);                           \
130         c  = AV_RN32(&src3[i * src_stride3]);                           \
131         d  = AV_RN32(&src4[i * src_stride4]);                           \
132         l0 = (a & 0x03030303UL) +                                       \
133              (b & 0x03030303UL) +                                       \
134                   0x02020202UL;                                         \
135         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
136              ((b & 0xFCFCFCFCUL) >> 2);                                 \
137         l1 = (c & 0x03030303UL) +                                       \
138              (d & 0x03030303UL);                                        \
139         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
140              ((d & 0xFCFCFCFCUL) >> 2);                                 \
141         OP(*((uint32_t *) &dst[i * dst_stride]),                        \
142            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
143         a  = AV_RN32(&src1[i * src_stride1 + 4]);                       \
144         b  = AV_RN32(&src2[i * src_stride2 + 4]);                       \
145         c  = AV_RN32(&src3[i * src_stride3 + 4]);                       \
146         d  = AV_RN32(&src4[i * src_stride4 + 4]);                       \
147         l0 = (a & 0x03030303UL) +                                       \
148              (b & 0x03030303UL) +                                       \
149                   0x02020202UL;                                         \
150         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
151              ((b & 0xFCFCFCFCUL) >> 2);                                 \
152         l1 = (c & 0x03030303UL) +                                       \
153              (d & 0x03030303UL);                                        \
154         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
155              ((d & 0xFCFCFCFCUL) >> 2);                                 \
156         OP(*((uint32_t *) &dst[i * dst_stride + 4]),                    \
157            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
158     }                                                                   \
159 }                                                                       \
160                                                                         \
161 static inline void OPNAME ## _no_rnd_pixels8_l4_8(uint8_t *dst,         \
162                                                   const uint8_t *src1,  \
163                                                   const uint8_t *src2,  \
164                                                   const uint8_t *src3,  \
165                                                   const uint8_t *src4,  \
166                                                   int dst_stride,       \
167                                                   int src_stride1,      \
168                                                   int src_stride2,      \
169                                                   int src_stride3,      \
170                                                   int src_stride4,      \
171                                                   int h)                \
172 {                                                                       \
173     /* FIXME HIGH BIT DEPTH */                                          \
174     int i;                                                              \
175                                                                         \
176     for (i = 0; i < h; i++) {                                           \
177         uint32_t a, b, c, d, l0, l1, h0, h1;                            \
178         a  = AV_RN32(&src1[i * src_stride1]);                           \
179         b  = AV_RN32(&src2[i * src_stride2]);                           \
180         c  = AV_RN32(&src3[i * src_stride3]);                           \
181         d  = AV_RN32(&src4[i * src_stride4]);                           \
182         l0 = (a & 0x03030303UL) +                                       \
183              (b & 0x03030303UL) +                                       \
184                   0x01010101UL;                                         \
185         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
186              ((b & 0xFCFCFCFCUL) >> 2);                                 \
187         l1 = (c & 0x03030303UL) +                                       \
188              (d & 0x03030303UL);                                        \
189         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
190              ((d & 0xFCFCFCFCUL) >> 2);                                 \
191         OP(*((uint32_t *) &dst[i * dst_stride]),                        \
192            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
193         a  = AV_RN32(&src1[i * src_stride1 + 4]);                       \
194         b  = AV_RN32(&src2[i * src_stride2 + 4]);                       \
195         c  = AV_RN32(&src3[i * src_stride3 + 4]);                       \
196         d  = AV_RN32(&src4[i * src_stride4 + 4]);                       \
197         l0 = (a & 0x03030303UL) +                                       \
198              (b & 0x03030303UL) +                                       \
199                   0x01010101UL;                                         \
200         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
201              ((b & 0xFCFCFCFCUL) >> 2);                                 \
202         l1 = (c & 0x03030303UL) +                                       \
203              (d & 0x03030303UL);                                        \
204         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
205              ((d & 0xFCFCFCFCUL) >> 2);                                 \
206         OP(*((uint32_t *) &dst[i * dst_stride + 4]),                    \
207            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
208     }                                                                   \
209 }                                                                       \
210                                                                         \
211 static inline void OPNAME ## _pixels16_l4_8(uint8_t *dst,               \
212                                             const uint8_t *src1,        \
213                                             const uint8_t *src2,        \
214                                             const uint8_t *src3,        \
215                                             const uint8_t *src4,        \
216                                             int dst_stride,             \
217                                             int src_stride1,            \
218                                             int src_stride2,            \
219                                             int src_stride3,            \
220                                             int src_stride4,            \
221                                             int h)                      \
222 {                                                                       \
223     OPNAME ## _pixels8_l4_8(dst, src1, src2, src3, src4, dst_stride,    \
224                             src_stride1, src_stride2, src_stride3,      \
225                             src_stride4, h);                            \
226     OPNAME ## _pixels8_l4_8(dst  + 8,                                   \
227                             src1 + 8, src2 + 8,                         \
228                             src3 + 8, src4 + 8,                         \
229                             dst_stride, src_stride1, src_stride2,       \
230                             src_stride3, src_stride4, h);               \
231 }                                                                       \
232                                                                         \
233 static inline void OPNAME ## _no_rnd_pixels16_l4_8(uint8_t *dst,        \
234                                                    const uint8_t *src1, \
235                                                    const uint8_t *src2, \
236                                                    const uint8_t *src3, \
237                                                    const uint8_t *src4, \
238                                                    int dst_stride,      \
239                                                    int src_stride1,     \
240                                                    int src_stride2,     \
241                                                    int src_stride3,     \
242                                                    int src_stride4,     \
243                                                    int h)               \
244 {                                                                       \
245     OPNAME ## _no_rnd_pixels8_l4_8(dst, src1, src2, src3, src4,         \
246                                    dst_stride, src_stride1,             \
247                                    src_stride2, src_stride3,            \
248                                    src_stride4, h);                     \
249     OPNAME ## _no_rnd_pixels8_l4_8(dst  + 8,                            \
250                                    src1 + 8, src2 + 8,                  \
251                                    src3 + 8, src4 + 8,                  \
252                                    dst_stride, src_stride1,             \
253                                    src_stride2, src_stride3,            \
254                                    src_stride4, h);                     \
255 }                                                                       \
256                                                                         \
257 static inline void OPNAME ## _pixels8_xy2_8_c(uint8_t *block,           \
258                                               const uint8_t *pixels,    \
259                                               ptrdiff_t line_size,      \
260                                               int h)                    \
261 {                                                                       \
262     /* FIXME HIGH BIT DEPTH */                                          \
263     int j;                                                              \
264                                                                         \
265     for (j = 0; j < 2; j++) {                                           \
266         int i;                                                          \
267         const uint32_t a = AV_RN32(pixels);                             \
268         const uint32_t b = AV_RN32(pixels + 1);                         \
269         uint32_t l0 = (a & 0x03030303UL) +                              \
270                       (b & 0x03030303UL) +                              \
271                            0x02020202UL;                                \
272         uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +                       \
273                       ((b & 0xFCFCFCFCUL) >> 2);                        \
274         uint32_t l1, h1;                                                \
275                                                                         \
276         pixels += line_size;                                            \
277         for (i = 0; i < h; i += 2) {                                    \
278             uint32_t a = AV_RN32(pixels);                               \
279             uint32_t b = AV_RN32(pixels + 1);                           \
280             l1 = (a & 0x03030303UL) +                                   \
281                  (b & 0x03030303UL);                                    \
282             h1 = ((a & 0xFCFCFCFCUL) >> 2) +                            \
283                  ((b & 0xFCFCFCFCUL) >> 2);                             \
284             OP(*((uint32_t *) block),                                   \
285                h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));            \
286             pixels += line_size;                                        \
287             block  += line_size;                                        \
288             a = AV_RN32(pixels);                                        \
289             b = AV_RN32(pixels + 1);                                    \
290             l0 = (a & 0x03030303UL) +                                   \
291                  (b & 0x03030303UL) +                                   \
292                       0x02020202UL;                                     \
293             h0 = ((a & 0xFCFCFCFCUL) >> 2) +                            \
294                  ((b & 0xFCFCFCFCUL) >> 2);                             \
295             OP(*((uint32_t *) block),                                   \
296                h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));            \
297             pixels += line_size;                                        \
298             block  += line_size;                                        \
299         }                                                               \
300         pixels += 4 - line_size * (h + 1);                              \
301         block  += 4 - line_size * h;                                    \
302     }                                                                   \
303 }                                                                       \
304                                                                         \
305 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_8_c,                             \
306                OPNAME ## _pixels8_xy2_8_c,                              \
307                8)                                                       \
308
309 #define op_avg(a, b) a = rnd_avg32(a, b)
310 #define op_put(a, b) a = b
311 #define put_no_rnd_pixels8_8_c put_pixels8_8_c
312 PIXOP2(avg, op_avg)
313 PIXOP2(put, op_put)
314 #undef op_avg
315 #undef op_put