]> git.sesse.net Git - ffmpeg/blob - libavcodec/hpeldsp_template.c
Merge commit 'd0aabeab23755ee906440505ad2097c0f1493e80'
[ffmpeg] / libavcodec / hpeldsp_template.c
1 /*
2  * Half-pel DSP functions
3  *
4  * Copyright (c) 2000, 2001 Fabrice Bellard
5  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6  *
7  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 /**
27  * @file
28  * Half-pel DSP functions
29  */
30
31 #include "pixels.h"
32
33 #include "bit_depth_template.c"
34
35 #include "hpel_template.c"
36 #include "tpel_template.c"
37
38 #define PIXOP2(OPNAME, OP)                                              \
39 static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst,     \
40                                                       const uint8_t *src1, \
41                                                       const uint8_t *src2, \
42                                                       int dst_stride,   \
43                                                       int src_stride1,  \
44                                                       int src_stride2,  \
45                                                       int h)            \
46 {                                                                       \
47     int i;                                                              \
48                                                                         \
49     for (i = 0; i < h; i++) {                                           \
50         pixel4 a, b;                                                    \
51         a = AV_RN4P(&src1[i * src_stride1]);                            \
52         b = AV_RN4P(&src2[i * src_stride2]);                            \
53         OP(*((pixel4 *) &dst[i * dst_stride]),                          \
54            no_rnd_avg_pixel4(a, b));                                    \
55         a = AV_RN4P(&src1[i * src_stride1 + 4 * sizeof(pixel)]);        \
56         b = AV_RN4P(&src2[i * src_stride2 + 4 * sizeof(pixel)]);        \
57         OP(*((pixel4 *) &dst[i * dst_stride + 4 * sizeof(pixel)]),      \
58            no_rnd_avg_pixel4(a, b));                                    \
59     }                                                                   \
60 }                                                                       \
61                                                                         \
62 static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block,  \
63                                                        const uint8_t *pixels, \
64                                                        ptrdiff_t line_size, \
65                                                        int h)           \
66 {                                                                       \
67     FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels + sizeof(pixel), \
68                                        line_size, line_size, line_size, h); \
69 }                                                                       \
70                                                                         \
71 static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block,         \
72                                                 const uint8_t *pixels,  \
73                                                 ptrdiff_t line_size,    \
74                                                 int h)                  \
75 {                                                                       \
76     FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels + sizeof(pixel),  \
77                                 line_size, line_size, line_size, h);    \
78 }                                                                       \
79                                                                         \
80 static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block,  \
81                                                        const uint8_t *pixels, \
82                                                        ptrdiff_t line_size, \
83                                                        int h)           \
84 {                                                                       \
85     FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels + line_size, \
86                                        line_size, line_size, line_size, h); \
87 }                                                                       \
88                                                                         \
89 static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block,         \
90                                                 const uint8_t *pixels,  \
91                                                 ptrdiff_t line_size,    \
92                                                 int h)                  \
93 {                                                                       \
94     FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels + line_size,      \
95                                 line_size, line_size, line_size, h);    \
96 }                                                                       \
97                                                                         \
98 static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block,         \
99                                                 const uint8_t *pixels,  \
100                                                 ptrdiff_t line_size,    \
101                                                 int h)                  \
102 {                                                                       \
103     FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels + sizeof(pixel),  \
104                                 line_size, line_size, line_size, h);    \
105 }                                                                       \
106                                                                         \
107 static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block,         \
108                                                 const uint8_t *pixels,  \
109                                                 ptrdiff_t line_size,    \
110                                                 int h)                  \
111 {                                                                       \
112     FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels + line_size,      \
113                                 line_size, line_size, line_size, h);    \
114 }                                                                       \
115                                                                         \
116 static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block,         \
117                                                 const uint8_t *pixels,  \
118                                                 ptrdiff_t line_size,    \
119                                                 int h)                  \
120 {                                                                       \
121     FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels + sizeof(pixel),  \
122                                 line_size, line_size, line_size, h);    \
123 }                                                                       \
124                                                                         \
125 static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block,         \
126                                                 const uint8_t *pixels,  \
127                                                 ptrdiff_t line_size,    \
128                                                 int h)                  \
129 {                                                                       \
130     FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels + line_size,      \
131                                 line_size, line_size, line_size, h);    \
132 }                                                                       \
133                                                                         \
134 static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *_block,       \
135                                                  const uint8_t *_pixels, \
136                                                  ptrdiff_t line_size,   \
137                                                  int h)                 \
138 {                                                                       \
139     pixel *block        = (pixel *) _block;                             \
140     const pixel *pixels = (const pixel *) _pixels;                      \
141     int i, a1, b1;                                                      \
142     int a0 = pixels[0];                                                 \
143     int b0 = pixels[1] + 2;                                             \
144                                                                         \
145     a0 += b0;                                                           \
146     b0 += pixels[2];                                                    \
147     line_size >>= sizeof(pixel)-1;                                      \
148     pixels += line_size;                                                \
149     for (i = 0; i < h; i += 2) {                                        \
150         a1  = pixels[0];                                                \
151         b1  = pixels[1];                                                \
152         a1 += b1;                                                       \
153         b1 += pixels[2];                                                \
154                                                                         \
155         block[0] = (a1 + a0) >> 2; /* FIXME non put */                  \
156         block[1] = (b1 + b0) >> 2;                                      \
157                                                                         \
158         pixels += line_size;                                            \
159         block  += line_size;                                            \
160                                                                         \
161         a0  = pixels[0];                                                \
162         b0  = pixels[1] + 2;                                            \
163         a0 += b0;                                                       \
164         b0 += pixels[2];                                                \
165                                                                         \
166         block[0] = (a1 + a0) >> 2;                                      \
167         block[1] = (b1 + b0) >> 2;                                      \
168         pixels  += line_size;                                           \
169         block   += line_size;                                           \
170     }                                                                   \
171 }                                                                       \
172                                                                         \
173 static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block,        \
174                                                  const uint8_t *pixels, \
175                                                  ptrdiff_t line_size,   \
176                                                  int h)                 \
177 {                                                                       \
178     /* FIXME HIGH BIT DEPTH */                                          \
179     int i;                                                              \
180     const uint32_t a = AV_RN32(pixels);                                 \
181     const uint32_t b = AV_RN32(pixels + 1);                             \
182     uint32_t l0 = (a & 0x03030303UL) +                                  \
183                   (b & 0x03030303UL) +                                  \
184                        0x02020202UL;                                    \
185     uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +                           \
186                   ((b & 0xFCFCFCFCUL) >> 2);                            \
187     uint32_t l1, h1;                                                    \
188                                                                         \
189     pixels += line_size;                                                \
190     for (i = 0; i < h; i += 2) {                                        \
191         uint32_t a = AV_RN32(pixels);                                   \
192         uint32_t b = AV_RN32(pixels + 1);                               \
193         l1 = (a & 0x03030303UL) +                                       \
194              (b & 0x03030303UL);                                        \
195         h1 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
196              ((b & 0xFCFCFCFCUL) >> 2);                                 \
197         OP(*((uint32_t *) block), h0 + h1 +                             \
198            (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                          \
199         pixels += line_size;                                            \
200         block  += line_size;                                            \
201         a  = AV_RN32(pixels);                                           \
202         b  = AV_RN32(pixels + 1);                                       \
203         l0 = (a & 0x03030303UL) +                                       \
204              (b & 0x03030303UL) +                                       \
205                   0x02020202UL;                                         \
206         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
207              ((b & 0xFCFCFCFCUL) >> 2);                                 \
208         OP(*((uint32_t *) block), h0 + h1 +                             \
209            (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                          \
210         pixels += line_size;                                            \
211         block  += line_size;                                            \
212     }                                                                   \
213 }                                                                       \
214                                                                         \
215 static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block,        \
216                                                  const uint8_t *pixels, \
217                                                  ptrdiff_t line_size,   \
218                                                  int h)                 \
219 {                                                                       \
220     /* FIXME HIGH BIT DEPTH */                                          \
221     int j;                                                              \
222                                                                         \
223     for (j = 0; j < 2; j++) {                                           \
224         int i;                                                          \
225         const uint32_t a = AV_RN32(pixels);                             \
226         const uint32_t b = AV_RN32(pixels + 1);                         \
227         uint32_t l0 = (a & 0x03030303UL) +                              \
228                       (b & 0x03030303UL) +                              \
229                            0x02020202UL;                                \
230         uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +                       \
231                       ((b & 0xFCFCFCFCUL) >> 2);                        \
232         uint32_t l1, h1;                                                \
233                                                                         \
234         pixels += line_size;                                            \
235         for (i = 0; i < h; i += 2) {                                    \
236             uint32_t a = AV_RN32(pixels);                               \
237             uint32_t b = AV_RN32(pixels + 1);                           \
238             l1 = (a & 0x03030303UL) +                                   \
239                  (b & 0x03030303UL);                                    \
240             h1 = ((a & 0xFCFCFCFCUL) >> 2) +                            \
241                  ((b & 0xFCFCFCFCUL) >> 2);                             \
242             OP(*((uint32_t *) block), h0 + h1 +                         \
243                (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                      \
244             pixels += line_size;                                        \
245             block  += line_size;                                        \
246             a  = AV_RN32(pixels);                                       \
247             b  = AV_RN32(pixels + 1);                                   \
248             l0 = (a & 0x03030303UL) +                                   \
249                  (b & 0x03030303UL) +                                   \
250                       0x02020202UL;                                     \
251             h0 = ((a & 0xFCFCFCFCUL) >> 2) +                            \
252                  ((b & 0xFCFCFCFCUL) >> 2);                             \
253             OP(*((uint32_t *) block), h0 + h1 +                         \
254                (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                      \
255             pixels += line_size;                                        \
256             block  += line_size;                                        \
257         }                                                               \
258         pixels += 4 - line_size * (h + 1);                              \
259         block  += 4 - line_size * h;                                    \
260     }                                                                   \
261 }                                                                       \
262                                                                         \
263 static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, \
264                                                         const uint8_t *pixels, \
265                                                         ptrdiff_t line_size, \
266                                                         int h)          \
267 {                                                                       \
268     /* FIXME HIGH BIT DEPTH */                                          \
269     int j;                                                              \
270                                                                         \
271     for (j = 0; j < 2; j++) {                                           \
272         int i;                                                          \
273         const uint32_t a = AV_RN32(pixels);                             \
274         const uint32_t b = AV_RN32(pixels + 1);                         \
275         uint32_t l0 = (a & 0x03030303UL) +                              \
276                       (b & 0x03030303UL) +                              \
277                            0x01010101UL;                                \
278         uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +                       \
279                       ((b & 0xFCFCFCFCUL) >> 2);                        \
280         uint32_t l1, h1;                                                \
281                                                                         \
282         pixels += line_size;                                            \
283         for (i = 0; i < h; i += 2) {                                    \
284             uint32_t a = AV_RN32(pixels);                               \
285             uint32_t b = AV_RN32(pixels + 1);                           \
286             l1 = (a & 0x03030303UL) +                                   \
287                  (b & 0x03030303UL);                                    \
288             h1 = ((a & 0xFCFCFCFCUL) >> 2) +                            \
289                  ((b & 0xFCFCFCFCUL) >> 2);                             \
290             OP(*((uint32_t *) block), h0 + h1 +                         \
291                (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                      \
292             pixels += line_size;                                        \
293             block  += line_size;                                        \
294             a  = AV_RN32(pixels);                                       \
295             b  = AV_RN32(pixels + 1);                                   \
296             l0 = (a & 0x03030303UL) +                                   \
297                  (b & 0x03030303UL) +                                   \
298                       0x01010101UL;                                     \
299             h0 = ((a & 0xFCFCFCFCUL) >> 2) +                            \
300                  ((b & 0xFCFCFCFCUL) >> 2);                             \
301             OP(*((uint32_t *) block), h0 + h1 +                         \
302                (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                      \
303             pixels += line_size;                                        \
304             block  += line_size;                                        \
305         }                                                               \
306         pixels += 4 - line_size * (h + 1);                              \
307         block  += 4 - line_size * h;                                    \
308     }                                                                   \
309 }                                                                       \
310                                                                         \
311 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_x2),                           \
312                FUNCC(OPNAME ## _pixels8_x2),                            \
313                8 * sizeof(pixel))                                       \
314 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_y2),                           \
315                FUNCC(OPNAME ## _pixels8_y2),                            \
316                8 * sizeof(pixel))                                       \
317 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2),                          \
318                FUNCC(OPNAME ## _pixels8_xy2),                           \
319                8 * sizeof(pixel))                                       \
320 av_unused CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16),             \
321                          FUNCC(OPNAME ## _pixels8),                     \
322                          8 * sizeof(pixel))                             \
323 CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_x2),                    \
324                FUNCC(OPNAME ## _no_rnd_pixels8_x2),                     \
325                8 * sizeof(pixel))                                       \
326 CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2),                    \
327                FUNCC(OPNAME ## _no_rnd_pixels8_y2),                     \
328                8 * sizeof(pixel))                                       \
329 CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2),                   \
330                FUNCC(OPNAME ## _no_rnd_pixels8_xy2),                    \
331                8 * sizeof(pixel))                                       \
332
333 #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
334 #define op_put(a, b) a = b
335 #if BIT_DEPTH == 8
336 #define put_no_rnd_pixels8_8_c put_pixels8_8_c
337 PIXOP2(avg, op_avg)
338 PIXOP2(put, op_put)
339 #endif
340 #undef op_avg
341 #undef op_put