]> git.sesse.net Git - ffmpeg/blob - libavcodec/dsputil_template.c
Merge commit 'd3c3c1664a958923f234283e66fbcbfe69a6927f'
[ffmpeg] / libavcodec / dsputil_template.c
1 /*
2  * DSP utils
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * DSP utils
28  */
29
30 #include "pixels.h"
31
32 #include "bit_depth_template.c"
33
34 #if BIT_DEPTH == 8
35 /* draw the edges of width 'w' of an image of size width, height */
36 // FIXME: Check that this is OK for MPEG-4 interlaced.
37 static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height,
38                               int w, int h, int sides)
39 {
40     pixel *buf = (pixel *) _buf;
41     int wrap   = _wrap / sizeof(pixel);
42     pixel *ptr = buf, *last_line;
43     int i;
44
45     /* left and right */
46     for (i = 0; i < height; i++) {
47         memset(ptr - w, ptr[0], w);
48         memset(ptr + width, ptr[width - 1], w);
49         ptr += wrap;
50     }
51
52     /* top and bottom + corners */
53     buf -= w;
54     last_line = buf + (height - 1) * wrap;
55     if (sides & EDGE_TOP)
56         for (i = 0; i < h; i++)
57             // top
58             memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel));
59     if (sides & EDGE_BOTTOM)
60         for (i = 0; i < h; i++)
61             // bottom
62             memcpy(last_line + (i + 1) * wrap, last_line,
63                    (width + w + w) * sizeof(pixel));
64 }
65 #endif
66
67 static void FUNCC(get_pixels)(int16_t *av_restrict block, const uint8_t *_pixels,
68                               int line_size)
69 {
70     const pixel *pixels = (const pixel *) _pixels;
71     int i;
72
73     /* read the pixels */
74     for (i = 0; i < 8; i++) {
75         block[0] = pixels[0];
76         block[1] = pixels[1];
77         block[2] = pixels[2];
78         block[3] = pixels[3];
79         block[4] = pixels[4];
80         block[5] = pixels[5];
81         block[6] = pixels[6];
82         block[7] = pixels[7];
83         pixels  += line_size / sizeof(pixel);
84         block   += 8;
85     }
86 }
87
88 #if BIT_DEPTH == 8
89 static void FUNCC(clear_block)(int16_t *block)
90 {
91     memset(block, 0, sizeof(int16_t) * 64);
92 }
93
94 static void FUNCC(clear_blocks)(int16_t *blocks)
95 {
96     memset(blocks, 0, sizeof(int16_t) * 6 * 64);
97 }
98 #endif
99
100 #define PIXOP2(OPNAME, OP)                                              \
101 static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst,     \
102                                                       const uint8_t *src1, \
103                                                       const uint8_t *src2, \
104                                                       int dst_stride,   \
105                                                       int src_stride1,  \
106                                                       int src_stride2,  \
107                                                       int h)            \
108 {                                                                       \
109     int i;                                                              \
110                                                                         \
111     for (i = 0; i < h; i++) {                                           \
112         pixel4 a, b;                                                    \
113         a = AV_RN4P(&src1[i * src_stride1]);                            \
114         b = AV_RN4P(&src2[i * src_stride2]);                            \
115         OP(*((pixel4 *) &dst[i * dst_stride]),                          \
116            no_rnd_avg_pixel4(a, b));                                    \
117         a = AV_RN4P(&src1[i * src_stride1 + 4 * sizeof(pixel)]);        \
118         b = AV_RN4P(&src2[i * src_stride2 + 4 * sizeof(pixel)]);        \
119         OP(*((pixel4 *) &dst[i * dst_stride + 4 * sizeof(pixel)]),      \
120            no_rnd_avg_pixel4(a, b));                                    \
121     }                                                                   \
122 }                                                                       \
123                                                                         \
124 static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst,    \
125                                                        const uint8_t *src1, \
126                                                        const uint8_t *src2, \
127                                                        int dst_stride,  \
128                                                        int src_stride1, \
129                                                        int src_stride2, \
130                                                        int h)           \
131 {                                                                       \
132     FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst, src1, src2, dst_stride,     \
133                                        src_stride1, src_stride2, h);    \
134     FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst  + 8 * sizeof(pixel),        \
135                                        src1 + 8 * sizeof(pixel),        \
136                                        src2 + 8 * sizeof(pixel),        \
137                                        dst_stride, src_stride1,         \
138                                        src_stride2, h);                 \
139 }                                                                       \
140                                                                         \
141 static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst,            \
142                                                const uint8_t *src1,     \
143                                                const uint8_t *src2,     \
144                                                const uint8_t *src3,     \
145                                                const uint8_t *src4,     \
146                                                int dst_stride,          \
147                                                int src_stride1,         \
148                                                int src_stride2,         \
149                                                int src_stride3,         \
150                                                int src_stride4,         \
151                                                int h)                   \
152 {                                                                       \
153     /* FIXME HIGH BIT DEPTH */                                          \
154     int i;                                                              \
155                                                                         \
156     for (i = 0; i < h; i++) {                                           \
157         uint32_t a, b, c, d, l0, l1, h0, h1;                            \
158         a  = AV_RN32(&src1[i * src_stride1]);                           \
159         b  = AV_RN32(&src2[i * src_stride2]);                           \
160         c  = AV_RN32(&src3[i * src_stride3]);                           \
161         d  = AV_RN32(&src4[i * src_stride4]);                           \
162         l0 = (a & 0x03030303UL) +                                       \
163              (b & 0x03030303UL) +                                       \
164                   0x02020202UL;                                         \
165         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
166              ((b & 0xFCFCFCFCUL) >> 2);                                 \
167         l1 = (c & 0x03030303UL) +                                       \
168              (d & 0x03030303UL);                                        \
169         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
170              ((d & 0xFCFCFCFCUL) >> 2);                                 \
171         OP(*((uint32_t *) &dst[i * dst_stride]),                        \
172            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
173         a  = AV_RN32(&src1[i * src_stride1 + 4]);                       \
174         b  = AV_RN32(&src2[i * src_stride2 + 4]);                       \
175         c  = AV_RN32(&src3[i * src_stride3 + 4]);                       \
176         d  = AV_RN32(&src4[i * src_stride4 + 4]);                       \
177         l0 = (a & 0x03030303UL) +                                       \
178              (b & 0x03030303UL) +                                       \
179                   0x02020202UL;                                         \
180         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
181              ((b & 0xFCFCFCFCUL) >> 2);                                 \
182         l1 = (c & 0x03030303UL) +                                       \
183              (d & 0x03030303UL);                                        \
184         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
185              ((d & 0xFCFCFCFCUL) >> 2);                                 \
186         OP(*((uint32_t *) &dst[i * dst_stride + 4]),                    \
187            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
188     }                                                                   \
189 }                                                                       \
190                                                                         \
191 static inline void FUNC(OPNAME ## _no_rnd_pixels8_l4)(uint8_t *dst,     \
192                                                       const uint8_t *src1, \
193                                                       const uint8_t *src2, \
194                                                       const uint8_t *src3, \
195                                                       const uint8_t *src4, \
196                                                       int dst_stride,   \
197                                                       int src_stride1,  \
198                                                       int src_stride2,  \
199                                                       int src_stride3,  \
200                                                       int src_stride4,  \
201                                                       int h)            \
202 {                                                                       \
203     /* FIXME HIGH BIT DEPTH */                                          \
204     int i;                                                              \
205                                                                         \
206     for (i = 0; i < h; i++) {                                           \
207         uint32_t a, b, c, d, l0, l1, h0, h1;                            \
208         a  = AV_RN32(&src1[i * src_stride1]);                           \
209         b  = AV_RN32(&src2[i * src_stride2]);                           \
210         c  = AV_RN32(&src3[i * src_stride3]);                           \
211         d  = AV_RN32(&src4[i * src_stride4]);                           \
212         l0 = (a & 0x03030303UL) +                                       \
213              (b & 0x03030303UL) +                                       \
214                   0x01010101UL;                                         \
215         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
216              ((b & 0xFCFCFCFCUL) >> 2);                                 \
217         l1 = (c & 0x03030303UL) +                                       \
218              (d & 0x03030303UL);                                        \
219         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
220              ((d & 0xFCFCFCFCUL) >> 2);                                 \
221         OP(*((uint32_t *) &dst[i * dst_stride]),                        \
222            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
223         a  = AV_RN32(&src1[i * src_stride1 + 4]);                       \
224         b  = AV_RN32(&src2[i * src_stride2 + 4]);                       \
225         c  = AV_RN32(&src3[i * src_stride3 + 4]);                       \
226         d  = AV_RN32(&src4[i * src_stride4 + 4]);                       \
227         l0 = (a & 0x03030303UL) +                                       \
228              (b & 0x03030303UL) +                                       \
229                   0x01010101UL;                                         \
230         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
231              ((b & 0xFCFCFCFCUL) >> 2);                                 \
232         l1 = (c & 0x03030303UL) +                                       \
233              (d & 0x03030303UL);                                        \
234         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
235              ((d & 0xFCFCFCFCUL) >> 2);                                 \
236         OP(*((uint32_t *) &dst[i * dst_stride + 4]),                    \
237            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
238     }                                                                   \
239 }                                                                       \
240 static inline void FUNC(OPNAME ## _pixels16_l4)(uint8_t *dst,           \
241                                                 const uint8_t *src1,    \
242                                                 const uint8_t *src2,    \
243                                                 const uint8_t *src3,    \
244                                                 const uint8_t *src4,    \
245                                                 int dst_stride,         \
246                                                 int src_stride1,        \
247                                                 int src_stride2,        \
248                                                 int src_stride3,        \
249                                                 int src_stride4,        \
250                                                 int h)                  \
251 {                                                                       \
252     FUNC(OPNAME ## _pixels8_l4)(dst, src1, src2, src3, src4, dst_stride, \
253                                 src_stride1, src_stride2, src_stride3,  \
254                                 src_stride4, h);                        \
255     FUNC(OPNAME ## _pixels8_l4)(dst  + 8 * sizeof(pixel),               \
256                                 src1 + 8 * sizeof(pixel),               \
257                                 src2 + 8 * sizeof(pixel),               \
258                                 src3 + 8 * sizeof(pixel),               \
259                                 src4 + 8 * sizeof(pixel),               \
260                                 dst_stride, src_stride1, src_stride2,   \
261                                 src_stride3, src_stride4, h);           \
262 }                                                                       \
263 static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst,    \
264                                                        const uint8_t *src1, \
265                                                        const uint8_t *src2, \
266                                                        const uint8_t *src3, \
267                                                        const uint8_t *src4, \
268                                                        int dst_stride,  \
269                                                        int src_stride1, \
270                                                        int src_stride2, \
271                                                        int src_stride3, \
272                                                        int src_stride4, \
273                                                        int h)           \
274 {                                                                       \
275     FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst, src1, src2, src3, src4,     \
276                                        dst_stride, src_stride1, src_stride2, \
277                                        src_stride3, src_stride4, h);    \
278     FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst  + 8 * sizeof(pixel),        \
279                                        src1 + 8 * sizeof(pixel),        \
280                                        src2 + 8 * sizeof(pixel),        \
281                                        src3 + 8 * sizeof(pixel),        \
282                                        src4 + 8 * sizeof(pixel),        \
283                                        dst_stride, src_stride1, src_stride2, \
284                                        src_stride3, src_stride4, h);    \
285 }                                                                       \
286                                                                         \
287 static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block,        \
288                                                  const uint8_t *pixels, \
289                                                  ptrdiff_t line_size,   \
290                                                  int h)                 \
291 {                                                                       \
292     /* FIXME HIGH BIT DEPTH */                                          \
293     int j;                                                              \
294                                                                         \
295     for (j = 0; j < 2; j++) {                                           \
296         int i;                                                          \
297         const uint32_t a = AV_RN32(pixels);                             \
298         const uint32_t b = AV_RN32(pixels + 1);                         \
299         uint32_t l0 = (a & 0x03030303UL) +                              \
300                       (b & 0x03030303UL) +                              \
301                            0x02020202UL;                                \
302         uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +                       \
303                       ((b & 0xFCFCFCFCUL) >> 2);                        \
304         uint32_t l1, h1;                                                \
305                                                                         \
306         pixels += line_size;                                            \
307         for (i = 0; i < h; i += 2) {                                    \
308             uint32_t a = AV_RN32(pixels);                               \
309             uint32_t b = AV_RN32(pixels + 1);                           \
310             l1 = (a & 0x03030303UL) +                                   \
311                  (b & 0x03030303UL);                                    \
312             h1 = ((a & 0xFCFCFCFCUL) >> 2) +                            \
313                  ((b & 0xFCFCFCFCUL) >> 2);                             \
314             OP(*((uint32_t *) block),                                   \
315                h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));            \
316             pixels += line_size;                                        \
317             block  += line_size;                                        \
318             a = AV_RN32(pixels);                                        \
319             b = AV_RN32(pixels + 1);                                    \
320             l0 = (a & 0x03030303UL) +                                   \
321                  (b & 0x03030303UL) +                                   \
322                       0x02020202UL;                                     \
323             h0 = ((a & 0xFCFCFCFCUL) >> 2) +                            \
324                  ((b & 0xFCFCFCFCUL) >> 2);                             \
325             OP(*((uint32_t *) block),                                   \
326                h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));            \
327             pixels += line_size;                                        \
328             block  += line_size;                                        \
329         }                                                               \
330         pixels += 4 - line_size * (h + 1);                              \
331         block  += 4 - line_size * h;                                    \
332     }                                                                   \
333 }                                                                       \
334                                                                         \
335 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2),                          \
336                FUNCC(OPNAME ## _pixels8_xy2),                           \
337                8 * sizeof(pixel))                                       \
338
339 #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
340 #define op_put(a, b) a = b
341 #if BIT_DEPTH == 8
342 #define put_no_rnd_pixels8_8_c put_pixels8_8_c
343 PIXOP2(avg, op_avg)
344 PIXOP2(put, op_put)
345 #endif
346 #undef op_avg
347 #undef op_put