]> git.sesse.net Git - ffmpeg/blob - libavcodec/dsputil_template.c
libopenjpeg: Support rgba64 encoding
[ffmpeg] / libavcodec / dsputil_template.c
1 /*
2  * DSP utils
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * DSP utils
28  */
29
30 #include "bit_depth_template.c"
31
32 #if BIT_DEPTH == 8
33 /* draw the edges of width 'w' of an image of size width, height */
34 // FIXME: Check that this is OK for MPEG-4 interlaced.
35 static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height,
36                               int w, int h, int sides)
37 {
38     pixel *buf = (pixel *) _buf;
39     int wrap   = _wrap / sizeof(pixel);
40     pixel *ptr = buf, *last_line;
41     int i;
42
43     /* left and right */
44     for (i = 0; i < height; i++) {
45         memset(ptr - w, ptr[0], w);
46         memset(ptr + width, ptr[width - 1], w);
47         ptr += wrap;
48     }
49
50     /* top and bottom + corners */
51     buf -= w;
52     last_line = buf + (height - 1) * wrap;
53     if (sides & EDGE_TOP)
54         for (i = 0; i < h; i++)
55             // top
56             memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel));
57     if (sides & EDGE_BOTTOM)
58         for (i = 0; i < h; i++)
59             // bottom
60             memcpy(last_line + (i + 1) * wrap, last_line,
61                    (width + w + w) * sizeof(pixel));
62 }
63 #endif
64
65 static void FUNCC(get_pixels)(int16_t *restrict block, const uint8_t *_pixels,
66                               int line_size)
67 {
68     const pixel *pixels = (const pixel *) _pixels;
69     int i;
70
71     /* read the pixels */
72     for (i = 0; i < 8; i++) {
73         block[0] = pixels[0];
74         block[1] = pixels[1];
75         block[2] = pixels[2];
76         block[3] = pixels[3];
77         block[4] = pixels[4];
78         block[5] = pixels[5];
79         block[6] = pixels[6];
80         block[7] = pixels[7];
81         pixels  += line_size / sizeof(pixel);
82         block   += 8;
83     }
84 }
85
86 #if BIT_DEPTH == 8
87 static void FUNCC(clear_block)(int16_t *block)
88 {
89     memset(block, 0, sizeof(int16_t) * 64);
90 }
91
92 static void FUNCC(clear_blocks)(int16_t *blocks)
93 {
94     memset(blocks, 0, sizeof(int16_t) * 6 * 64);
95 }
96 #endif
97
98 #if BIT_DEPTH == 8
99 #include "hpel_template.c"
100 #endif
101
102 #define PIXOP2(OPNAME, OP)                                              \
103 static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst,     \
104                                                       const uint8_t *src1, \
105                                                       const uint8_t *src2, \
106                                                       int dst_stride,   \
107                                                       int src_stride1,  \
108                                                       int src_stride2,  \
109                                                       int h)            \
110 {                                                                       \
111     int i;                                                              \
112                                                                         \
113     for (i = 0; i < h; i++) {                                           \
114         pixel4 a, b;                                                    \
115         a = AV_RN4P(&src1[i * src_stride1]);                            \
116         b = AV_RN4P(&src2[i * src_stride2]);                            \
117         OP(*((pixel4 *) &dst[i * dst_stride]),                          \
118            no_rnd_avg_pixel4(a, b));                                    \
119         a = AV_RN4P(&src1[i * src_stride1 + 4 * sizeof(pixel)]);        \
120         b = AV_RN4P(&src2[i * src_stride2 + 4 * sizeof(pixel)]);        \
121         OP(*((pixel4 *) &dst[i * dst_stride + 4 * sizeof(pixel)]),      \
122            no_rnd_avg_pixel4(a, b));                                    \
123     }                                                                   \
124 }                                                                       \
125                                                                         \
126 static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst,    \
127                                                        const uint8_t *src1, \
128                                                        const uint8_t *src2, \
129                                                        int dst_stride,  \
130                                                        int src_stride1, \
131                                                        int src_stride2, \
132                                                        int h)           \
133 {                                                                       \
134     FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst, src1, src2, dst_stride,     \
135                                        src_stride1, src_stride2, h);    \
136     FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst  + 8 * sizeof(pixel),        \
137                                        src1 + 8 * sizeof(pixel),        \
138                                        src2 + 8 * sizeof(pixel),        \
139                                        dst_stride, src_stride1,         \
140                                        src_stride2, h);                 \
141 }                                                                       \
142                                                                         \
143 static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst,            \
144                                                const uint8_t *src1,     \
145                                                const uint8_t *src2,     \
146                                                const uint8_t *src3,     \
147                                                const uint8_t *src4,     \
148                                                int dst_stride,          \
149                                                int src_stride1,         \
150                                                int src_stride2,         \
151                                                int src_stride3,         \
152                                                int src_stride4,         \
153                                                int h)                   \
154 {                                                                       \
155     /* FIXME HIGH BIT DEPTH */                                          \
156     int i;                                                              \
157                                                                         \
158     for (i = 0; i < h; i++) {                                           \
159         uint32_t a, b, c, d, l0, l1, h0, h1;                            \
160         a  = AV_RN32(&src1[i * src_stride1]);                           \
161         b  = AV_RN32(&src2[i * src_stride2]);                           \
162         c  = AV_RN32(&src3[i * src_stride3]);                           \
163         d  = AV_RN32(&src4[i * src_stride4]);                           \
164         l0 = (a & 0x03030303UL) +                                       \
165              (b & 0x03030303UL) +                                       \
166                   0x02020202UL;                                         \
167         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
168              ((b & 0xFCFCFCFCUL) >> 2);                                 \
169         l1 = (c & 0x03030303UL) +                                       \
170              (d & 0x03030303UL);                                        \
171         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
172              ((d & 0xFCFCFCFCUL) >> 2);                                 \
173         OP(*((uint32_t *) &dst[i * dst_stride]),                        \
174            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
175         a  = AV_RN32(&src1[i * src_stride1 + 4]);                       \
176         b  = AV_RN32(&src2[i * src_stride2 + 4]);                       \
177         c  = AV_RN32(&src3[i * src_stride3 + 4]);                       \
178         d  = AV_RN32(&src4[i * src_stride4 + 4]);                       \
179         l0 = (a & 0x03030303UL) +                                       \
180              (b & 0x03030303UL) +                                       \
181                   0x02020202UL;                                         \
182         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
183              ((b & 0xFCFCFCFCUL) >> 2);                                 \
184         l1 = (c & 0x03030303UL) +                                       \
185              (d & 0x03030303UL);                                        \
186         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
187              ((d & 0xFCFCFCFCUL) >> 2);                                 \
188         OP(*((uint32_t *) &dst[i * dst_stride + 4]),                    \
189            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
190     }                                                                   \
191 }                                                                       \
192                                                                         \
193 static inline void FUNC(OPNAME ## _no_rnd_pixels8_l4)(uint8_t *dst,     \
194                                                       const uint8_t *src1, \
195                                                       const uint8_t *src2, \
196                                                       const uint8_t *src3, \
197                                                       const uint8_t *src4, \
198                                                       int dst_stride,   \
199                                                       int src_stride1,  \
200                                                       int src_stride2,  \
201                                                       int src_stride3,  \
202                                                       int src_stride4,  \
203                                                       int h)            \
204 {                                                                       \
205     /* FIXME HIGH BIT DEPTH */                                          \
206     int i;                                                              \
207                                                                         \
208     for (i = 0; i < h; i++) {                                           \
209         uint32_t a, b, c, d, l0, l1, h0, h1;                            \
210         a  = AV_RN32(&src1[i * src_stride1]);                           \
211         b  = AV_RN32(&src2[i * src_stride2]);                           \
212         c  = AV_RN32(&src3[i * src_stride3]);                           \
213         d  = AV_RN32(&src4[i * src_stride4]);                           \
214         l0 = (a & 0x03030303UL) +                                       \
215              (b & 0x03030303UL) +                                       \
216                   0x01010101UL;                                         \
217         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
218              ((b & 0xFCFCFCFCUL) >> 2);                                 \
219         l1 = (c & 0x03030303UL) +                                       \
220              (d & 0x03030303UL);                                        \
221         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
222              ((d & 0xFCFCFCFCUL) >> 2);                                 \
223         OP(*((uint32_t *) &dst[i * dst_stride]),                        \
224            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
225         a  = AV_RN32(&src1[i * src_stride1 + 4]);                       \
226         b  = AV_RN32(&src2[i * src_stride2 + 4]);                       \
227         c  = AV_RN32(&src3[i * src_stride3 + 4]);                       \
228         d  = AV_RN32(&src4[i * src_stride4 + 4]);                       \
229         l0 = (a & 0x03030303UL) +                                       \
230              (b & 0x03030303UL) +                                       \
231                   0x01010101UL;                                         \
232         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
233              ((b & 0xFCFCFCFCUL) >> 2);                                 \
234         l1 = (c & 0x03030303UL) +                                       \
235              (d & 0x03030303UL);                                        \
236         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
237              ((d & 0xFCFCFCFCUL) >> 2);                                 \
238         OP(*((uint32_t *) &dst[i * dst_stride + 4]),                    \
239            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
240     }                                                                   \
241 }                                                                       \
242 static inline void FUNC(OPNAME ## _pixels16_l4)(uint8_t *dst,           \
243                                                 const uint8_t *src1,    \
244                                                 const uint8_t *src2,    \
245                                                 const uint8_t *src3,    \
246                                                 const uint8_t *src4,    \
247                                                 int dst_stride,         \
248                                                 int src_stride1,        \
249                                                 int src_stride2,        \
250                                                 int src_stride3,        \
251                                                 int src_stride4,        \
252                                                 int h)                  \
253 {                                                                       \
254     FUNC(OPNAME ## _pixels8_l4)(dst, src1, src2, src3, src4, dst_stride, \
255                                 src_stride1, src_stride2, src_stride3,  \
256                                 src_stride4, h);                        \
257     FUNC(OPNAME ## _pixels8_l4)(dst  + 8 * sizeof(pixel),               \
258                                 src1 + 8 * sizeof(pixel),               \
259                                 src2 + 8 * sizeof(pixel),               \
260                                 src3 + 8 * sizeof(pixel),               \
261                                 src4 + 8 * sizeof(pixel),               \
262                                 dst_stride, src_stride1, src_stride2,   \
263                                 src_stride3, src_stride4, h);           \
264 }                                                                       \
265 static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst,    \
266                                                        const uint8_t *src1, \
267                                                        const uint8_t *src2, \
268                                                        const uint8_t *src3, \
269                                                        const uint8_t *src4, \
270                                                        int dst_stride,  \
271                                                        int src_stride1, \
272                                                        int src_stride2, \
273                                                        int src_stride3, \
274                                                        int src_stride4, \
275                                                        int h)           \
276 {                                                                       \
277     FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst, src1, src2, src3, src4,     \
278                                        dst_stride, src_stride1, src_stride2, \
279                                        src_stride3, src_stride4, h);    \
280     FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst  + 8 * sizeof(pixel),        \
281                                        src1 + 8 * sizeof(pixel),        \
282                                        src2 + 8 * sizeof(pixel),        \
283                                        src3 + 8 * sizeof(pixel),        \
284                                        src4 + 8 * sizeof(pixel),        \
285                                        dst_stride, src_stride1, src_stride2, \
286                                        src_stride3, src_stride4, h);    \
287 }                                                                       \
288                                                                         \
289 static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block,        \
290                                                  const uint8_t *pixels, \
291                                                  ptrdiff_t line_size,   \
292                                                  int h)                 \
293 {                                                                       \
294     /* FIXME HIGH BIT DEPTH */                                          \
295     int j;                                                              \
296                                                                         \
297     for (j = 0; j < 2; j++) {                                           \
298         int i;                                                          \
299         const uint32_t a = AV_RN32(pixels);                             \
300         const uint32_t b = AV_RN32(pixels + 1);                         \
301         uint32_t l0 = (a & 0x03030303UL) +                              \
302                       (b & 0x03030303UL) +                              \
303                            0x02020202UL;                                \
304         uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +                       \
305                       ((b & 0xFCFCFCFCUL) >> 2);                        \
306         uint32_t l1, h1;                                                \
307                                                                         \
308         pixels += line_size;                                            \
309         for (i = 0; i < h; i += 2) {                                    \
310             uint32_t a = AV_RN32(pixels);                               \
311             uint32_t b = AV_RN32(pixels + 1);                           \
312             l1 = (a & 0x03030303UL) +                                   \
313                  (b & 0x03030303UL);                                    \
314             h1 = ((a & 0xFCFCFCFCUL) >> 2) +                            \
315                  ((b & 0xFCFCFCFCUL) >> 2);                             \
316             OP(*((uint32_t *) block),                                   \
317                h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));            \
318             pixels += line_size;                                        \
319             block  += line_size;                                        \
320             a = AV_RN32(pixels);                                        \
321             b = AV_RN32(pixels + 1);                                    \
322             l0 = (a & 0x03030303UL) +                                   \
323                  (b & 0x03030303UL) +                                   \
324                       0x02020202UL;                                     \
325             h0 = ((a & 0xFCFCFCFCUL) >> 2) +                            \
326                  ((b & 0xFCFCFCFCUL) >> 2);                             \
327             OP(*((uint32_t *) block),                                   \
328                h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));            \
329             pixels += line_size;                                        \
330             block  += line_size;                                        \
331         }                                                               \
332         pixels += 4 - line_size * (h + 1);                              \
333         block  += 4 - line_size * h;                                    \
334     }                                                                   \
335 }                                                                       \
336                                                                         \
337 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2),                          \
338                FUNCC(OPNAME ## _pixels8_xy2),                           \
339                8 * sizeof(pixel))                                       \
340
341 #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
342 #define op_put(a, b) a = b
343 #if BIT_DEPTH == 8
344 #define put_no_rnd_pixels8_8_c put_pixels8_8_c
345 PIXOP2(avg, op_avg)
346 PIXOP2(put, op_put)
347 #endif
348 #undef op_avg
349 #undef op_put