]> git.sesse.net Git - ffmpeg/blob - libavcodec/dsputil_template.c
dsputil: remove unused functions copy_block{2, 4, 8, 16}.
[ffmpeg] / libavcodec / dsputil_template.c
1 /*
2  * DSP utils
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * DSP utils
28  */
29
30 #include "bit_depth_template.c"
31
32 /* draw the edges of width 'w' of an image of size width, height */
33 //FIXME check that this is ok for mpeg4 interlaced
34 static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, int w, int h, int sides)
35 {
36     pixel *buf = (pixel*)_buf;
37     int wrap = _wrap / sizeof(pixel);
38     pixel *ptr, *last_line;
39     int i;
40
41     /* left and right */
42     ptr = buf;
43     for(i=0;i<height;i++) {
44 #if BIT_DEPTH > 8
45         int j;
46         for (j = 0; j < w; j++) {
47             ptr[j-w] = ptr[0];
48             ptr[j+width] = ptr[width-1];
49         }
50 #else
51         memset(ptr - w, ptr[0], w);
52         memset(ptr + width, ptr[width-1], w);
53 #endif
54         ptr += wrap;
55     }
56
57     /* top and bottom + corners */
58     buf -= w;
59     last_line = buf + (height - 1) * wrap;
60     if (sides & EDGE_TOP)
61         for(i = 0; i < h; i++)
62             memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top
63     if (sides & EDGE_BOTTOM)
64         for (i = 0; i < h; i++)
65             memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom
66 }
67
68 #define DCTELEM_FUNCS(dctcoef, suffix)                                  \
69 static void FUNCC(get_pixels ## suffix)(int16_t *restrict _block,       \
70                                         const uint8_t *_pixels,         \
71                                         int line_size)                  \
72 {                                                                       \
73     const pixel *pixels = (const pixel *) _pixels;                      \
74     dctcoef *restrict block = (dctcoef *) _block;                       \
75     int i;                                                              \
76                                                                         \
77     /* read the pixels */                                               \
78     for(i=0;i<8;i++) {                                                  \
79         block[0] = pixels[0];                                           \
80         block[1] = pixels[1];                                           \
81         block[2] = pixels[2];                                           \
82         block[3] = pixels[3];                                           \
83         block[4] = pixels[4];                                           \
84         block[5] = pixels[5];                                           \
85         block[6] = pixels[6];                                           \
86         block[7] = pixels[7];                                           \
87         pixels += line_size / sizeof(pixel);                            \
88         block += 8;                                                     \
89     }                                                                   \
90 }                                                                       \
91                                                                         \
92 static void FUNCC(add_pixels8 ## suffix)(uint8_t *restrict _pixels,     \
93                                          int16_t *_block,               \
94                                          int line_size)                 \
95 {                                                                       \
96     int i;                                                              \
97     pixel *restrict pixels = (pixel *restrict)_pixels;                  \
98     dctcoef *block = (dctcoef*)_block;                                  \
99     line_size /= sizeof(pixel);                                         \
100                                                                         \
101     for(i=0;i<8;i++) {                                                  \
102         pixels[0] += block[0];                                          \
103         pixels[1] += block[1];                                          \
104         pixels[2] += block[2];                                          \
105         pixels[3] += block[3];                                          \
106         pixels[4] += block[4];                                          \
107         pixels[5] += block[5];                                          \
108         pixels[6] += block[6];                                          \
109         pixels[7] += block[7];                                          \
110         pixels += line_size;                                            \
111         block += 8;                                                     \
112     }                                                                   \
113 }                                                                       \
114                                                                         \
115 static void FUNCC(add_pixels4 ## suffix)(uint8_t *restrict _pixels,     \
116                                          int16_t *_block,               \
117                                          int line_size)                 \
118 {                                                                       \
119     int i;                                                              \
120     pixel *restrict pixels = (pixel *restrict)_pixels;                  \
121     dctcoef *block = (dctcoef*)_block;                                  \
122     line_size /= sizeof(pixel);                                         \
123                                                                         \
124     for(i=0;i<4;i++) {                                                  \
125         pixels[0] += block[0];                                          \
126         pixels[1] += block[1];                                          \
127         pixels[2] += block[2];                                          \
128         pixels[3] += block[3];                                          \
129         pixels += line_size;                                            \
130         block += 4;                                                     \
131     }                                                                   \
132 }                                                                       \
133                                                                         \
134 static void FUNCC(clear_block ## suffix)(int16_t *block)                \
135 {                                                                       \
136     memset(block, 0, sizeof(dctcoef)*64);                               \
137 }                                                                       \
138                                                                         \
139 /**                                                                     \
140  * memset(blocks, 0, sizeof(int16_t)*6*64)                              \
141  */                                                                     \
142 static void FUNCC(clear_blocks ## suffix)(int16_t *blocks)              \
143 {                                                                       \
144     memset(blocks, 0, sizeof(dctcoef)*6*64);                            \
145 }
146
147 DCTELEM_FUNCS(int16_t, _16)
148 #if BIT_DEPTH > 8
149 DCTELEM_FUNCS(dctcoef, _32)
150 #endif
151
152 #include "hpel_template.c"
153
154 #define PIXOP2(OPNAME, OP) \
155 static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
156                                                 int src_stride1, int src_stride2, int h){\
157     int i;\
158     for(i=0; i<h; i++){\
159         pixel4 a,b;\
160         a= AV_RN4P(&src1[i*src_stride1  ]);\
161         b= AV_RN4P(&src2[i*src_stride2  ]);\
162         OP(*((pixel4*)&dst[i*dst_stride  ]), no_rnd_avg_pixel4(a, b));\
163         a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
164         b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
165         OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), no_rnd_avg_pixel4(a, b));\
166     }\
167 }\
168 \
169 static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
170                                                 int src_stride1, int src_stride2, int h){\
171     FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
172     FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
173 }\
174 \
175 static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
176     FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
177 }\
178 \
179 static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
180     FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
181 }\
182 \
183 static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
184     FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
185 }\
186 \
187 static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
188     FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
189 }\
190 \
191 static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
192                  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
193     /* FIXME HIGH BIT DEPTH */\
194     int i;\
195     for(i=0; i<h; i++){\
196         uint32_t a, b, c, d, l0, l1, h0, h1;\
197         a= AV_RN32(&src1[i*src_stride1]);\
198         b= AV_RN32(&src2[i*src_stride2]);\
199         c= AV_RN32(&src3[i*src_stride3]);\
200         d= AV_RN32(&src4[i*src_stride4]);\
201         l0=  (a&0x03030303UL)\
202            + (b&0x03030303UL)\
203            + 0x02020202UL;\
204         h0= ((a&0xFCFCFCFCUL)>>2)\
205           + ((b&0xFCFCFCFCUL)>>2);\
206         l1=  (c&0x03030303UL)\
207            + (d&0x03030303UL);\
208         h1= ((c&0xFCFCFCFCUL)>>2)\
209           + ((d&0xFCFCFCFCUL)>>2);\
210         OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
211         a= AV_RN32(&src1[i*src_stride1+4]);\
212         b= AV_RN32(&src2[i*src_stride2+4]);\
213         c= AV_RN32(&src3[i*src_stride3+4]);\
214         d= AV_RN32(&src4[i*src_stride4+4]);\
215         l0=  (a&0x03030303UL)\
216            + (b&0x03030303UL)\
217            + 0x02020202UL;\
218         h0= ((a&0xFCFCFCFCUL)>>2)\
219           + ((b&0xFCFCFCFCUL)>>2);\
220         l1=  (c&0x03030303UL)\
221            + (d&0x03030303UL);\
222         h1= ((c&0xFCFCFCFCUL)>>2)\
223           + ((d&0xFCFCFCFCUL)>>2);\
224         OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
225     }\
226 }\
227 \
228 static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
229     FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
230 }\
231 \
232 static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
233     FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
234 }\
235 \
236 static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
237     FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
238 }\
239 \
240 static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
241     FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
242 }\
243 \
244 static inline void FUNC(OPNAME ## _no_rnd_pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
245                  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
246     /* FIXME HIGH BIT DEPTH*/\
247     int i;\
248     for(i=0; i<h; i++){\
249         uint32_t a, b, c, d, l0, l1, h0, h1;\
250         a= AV_RN32(&src1[i*src_stride1]);\
251         b= AV_RN32(&src2[i*src_stride2]);\
252         c= AV_RN32(&src3[i*src_stride3]);\
253         d= AV_RN32(&src4[i*src_stride4]);\
254         l0=  (a&0x03030303UL)\
255            + (b&0x03030303UL)\
256            + 0x01010101UL;\
257         h0= ((a&0xFCFCFCFCUL)>>2)\
258           + ((b&0xFCFCFCFCUL)>>2);\
259         l1=  (c&0x03030303UL)\
260            + (d&0x03030303UL);\
261         h1= ((c&0xFCFCFCFCUL)>>2)\
262           + ((d&0xFCFCFCFCUL)>>2);\
263         OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
264         a= AV_RN32(&src1[i*src_stride1+4]);\
265         b= AV_RN32(&src2[i*src_stride2+4]);\
266         c= AV_RN32(&src3[i*src_stride3+4]);\
267         d= AV_RN32(&src4[i*src_stride4+4]);\
268         l0=  (a&0x03030303UL)\
269            + (b&0x03030303UL)\
270            + 0x01010101UL;\
271         h0= ((a&0xFCFCFCFCUL)>>2)\
272           + ((b&0xFCFCFCFCUL)>>2);\
273         l1=  (c&0x03030303UL)\
274            + (d&0x03030303UL);\
275         h1= ((c&0xFCFCFCFCUL)>>2)\
276           + ((d&0xFCFCFCFCUL)>>2);\
277         OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
278     }\
279 }\
280 static inline void FUNC(OPNAME ## _pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
281                  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
282     FUNC(OPNAME ## _pixels8_l4)(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
283     FUNC(OPNAME ## _pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
284 }\
285 static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
286                  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
287     FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
288     FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
289 }\
290 \
291 static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *_block, const uint8_t *_pixels, int line_size, int h)\
292 {\
293         int i, a0, b0, a1, b1;\
294         pixel *block = (pixel*)_block;\
295         const pixel *pixels = (const pixel*)_pixels;\
296         line_size /= sizeof(pixel);\
297         a0= pixels[0];\
298         b0= pixels[1] + 2;\
299         a0 += b0;\
300         b0 += pixels[2];\
301 \
302         pixels+=line_size;\
303         for(i=0; i<h; i+=2){\
304             a1= pixels[0];\
305             b1= pixels[1];\
306             a1 += b1;\
307             b1 += pixels[2];\
308 \
309             block[0]= (a1+a0)>>2; /* FIXME non put */\
310             block[1]= (b1+b0)>>2;\
311 \
312             pixels+=line_size;\
313             block +=line_size;\
314 \
315             a0= pixels[0];\
316             b0= pixels[1] + 2;\
317             a0 += b0;\
318             b0 += pixels[2];\
319 \
320             block[0]= (a1+a0)>>2;\
321             block[1]= (b1+b0)>>2;\
322             pixels+=line_size;\
323             block +=line_size;\
324         }\
325 }\
326 \
327 static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
328 {\
329         /* FIXME HIGH BIT DEPTH */\
330         int i;\
331         const uint32_t a= AV_RN32(pixels  );\
332         const uint32_t b= AV_RN32(pixels+1);\
333         uint32_t l0=  (a&0x03030303UL)\
334                     + (b&0x03030303UL)\
335                     + 0x02020202UL;\
336         uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
337                    + ((b&0xFCFCFCFCUL)>>2);\
338         uint32_t l1,h1;\
339 \
340         pixels+=line_size;\
341         for(i=0; i<h; i+=2){\
342             uint32_t a= AV_RN32(pixels  );\
343             uint32_t b= AV_RN32(pixels+1);\
344             l1=  (a&0x03030303UL)\
345                + (b&0x03030303UL);\
346             h1= ((a&0xFCFCFCFCUL)>>2)\
347               + ((b&0xFCFCFCFCUL)>>2);\
348             OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
349             pixels+=line_size;\
350             block +=line_size;\
351             a= AV_RN32(pixels  );\
352             b= AV_RN32(pixels+1);\
353             l0=  (a&0x03030303UL)\
354                + (b&0x03030303UL)\
355                + 0x02020202UL;\
356             h0= ((a&0xFCFCFCFCUL)>>2)\
357               + ((b&0xFCFCFCFCUL)>>2);\
358             OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
359             pixels+=line_size;\
360             block +=line_size;\
361         }\
362 }\
363 \
364 static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
365 {\
366     /* FIXME HIGH BIT DEPTH */\
367     int j;\
368     for(j=0; j<2; j++){\
369         int i;\
370         const uint32_t a= AV_RN32(pixels  );\
371         const uint32_t b= AV_RN32(pixels+1);\
372         uint32_t l0=  (a&0x03030303UL)\
373                     + (b&0x03030303UL)\
374                     + 0x02020202UL;\
375         uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
376                    + ((b&0xFCFCFCFCUL)>>2);\
377         uint32_t l1,h1;\
378 \
379         pixels+=line_size;\
380         for(i=0; i<h; i+=2){\
381             uint32_t a= AV_RN32(pixels  );\
382             uint32_t b= AV_RN32(pixels+1);\
383             l1=  (a&0x03030303UL)\
384                + (b&0x03030303UL);\
385             h1= ((a&0xFCFCFCFCUL)>>2)\
386               + ((b&0xFCFCFCFCUL)>>2);\
387             OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
388             pixels+=line_size;\
389             block +=line_size;\
390             a= AV_RN32(pixels  );\
391             b= AV_RN32(pixels+1);\
392             l0=  (a&0x03030303UL)\
393                + (b&0x03030303UL)\
394                + 0x02020202UL;\
395             h0= ((a&0xFCFCFCFCUL)>>2)\
396               + ((b&0xFCFCFCFCUL)>>2);\
397             OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
398             pixels+=line_size;\
399             block +=line_size;\
400         }\
401         pixels+=4-line_size*(h+1);\
402         block +=4-line_size*h;\
403     }\
404 }\
405 \
406 static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
407 {\
408     /* FIXME HIGH BIT DEPTH */\
409     int j;\
410     for(j=0; j<2; j++){\
411         int i;\
412         const uint32_t a= AV_RN32(pixels  );\
413         const uint32_t b= AV_RN32(pixels+1);\
414         uint32_t l0=  (a&0x03030303UL)\
415                     + (b&0x03030303UL)\
416                     + 0x01010101UL;\
417         uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
418                    + ((b&0xFCFCFCFCUL)>>2);\
419         uint32_t l1,h1;\
420 \
421         pixels+=line_size;\
422         for(i=0; i<h; i+=2){\
423             uint32_t a= AV_RN32(pixels  );\
424             uint32_t b= AV_RN32(pixels+1);\
425             l1=  (a&0x03030303UL)\
426                + (b&0x03030303UL);\
427             h1= ((a&0xFCFCFCFCUL)>>2)\
428               + ((b&0xFCFCFCFCUL)>>2);\
429             OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
430             pixels+=line_size;\
431             block +=line_size;\
432             a= AV_RN32(pixels  );\
433             b= AV_RN32(pixels+1);\
434             l0=  (a&0x03030303UL)\
435                + (b&0x03030303UL)\
436                + 0x01010101UL;\
437             h0= ((a&0xFCFCFCFCUL)>>2)\
438               + ((b&0xFCFCFCFCUL)>>2);\
439             OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
440             pixels+=line_size;\
441             block +=line_size;\
442         }\
443         pixels+=4-line_size*(h+1);\
444         block +=4-line_size*h;\
445     }\
446 }\
447 \
448 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_x2) , FUNCC(OPNAME ## _pixels8_x2) , 8*sizeof(pixel))\
449 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_y2) , FUNCC(OPNAME ## _pixels8_y2) , 8*sizeof(pixel))\
450 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2), FUNCC(OPNAME ## _pixels8_xy2), 8*sizeof(pixel))\
451 av_unused CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16)    , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
452 CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_x2) , FUNCC(OPNAME ## _no_rnd_pixels8_x2) , 8*sizeof(pixel))\
453 CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2) , FUNCC(OPNAME ## _no_rnd_pixels8_y2) , 8*sizeof(pixel))\
454 CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2), FUNCC(OPNAME ## _no_rnd_pixels8_xy2), 8*sizeof(pixel))\
455
456 #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
457 #define op_put(a, b) a = b
458 #if BIT_DEPTH == 8
459 #define put_no_rnd_pixels8_8_c put_pixels8_8_c
460 PIXOP2(avg, op_avg)
461 PIXOP2(put, op_put)
462 #endif
463 #undef op_avg
464 #undef op_put
465
466 #define H264_CHROMA_MC(OPNAME, OP)\
467 static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
468     pixel *dst = (pixel*)_dst;\
469     pixel *src = (pixel*)_src;\
470     const int A=(8-x)*(8-y);\
471     const int B=(  x)*(8-y);\
472     const int C=(8-x)*(  y);\
473     const int D=(  x)*(  y);\
474     int i;\
475     stride /= sizeof(pixel);\
476     \
477     assert(x<8 && y<8 && x>=0 && y>=0);\
478 \
479     if(D){\
480         for(i=0; i<h; i++){\
481             OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
482             OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
483             dst+= stride;\
484             src+= stride;\
485         }\
486     }else{\
487         const int E= B+C;\
488         const int step= C ? stride : 1;\
489         for(i=0; i<h; i++){\
490             OP(dst[0], (A*src[0] + E*src[step+0]));\
491             OP(dst[1], (A*src[1] + E*src[step+1]));\
492             dst+= stride;\
493             src+= stride;\
494         }\
495     }\
496 }\
497 \
498 static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
499     pixel *dst = (pixel*)_dst;\
500     pixel *src = (pixel*)_src;\
501     const int A=(8-x)*(8-y);\
502     const int B=(  x)*(8-y);\
503     const int C=(8-x)*(  y);\
504     const int D=(  x)*(  y);\
505     int i;\
506     stride /= sizeof(pixel);\
507     \
508     assert(x<8 && y<8 && x>=0 && y>=0);\
509 \
510     if(D){\
511         for(i=0; i<h; i++){\
512             OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
513             OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
514             OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
515             OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
516             dst+= stride;\
517             src+= stride;\
518         }\
519     }else{\
520         const int E= B+C;\
521         const int step= C ? stride : 1;\
522         for(i=0; i<h; i++){\
523             OP(dst[0], (A*src[0] + E*src[step+0]));\
524             OP(dst[1], (A*src[1] + E*src[step+1]));\
525             OP(dst[2], (A*src[2] + E*src[step+2]));\
526             OP(dst[3], (A*src[3] + E*src[step+3]));\
527             dst+= stride;\
528             src+= stride;\
529         }\
530     }\
531 }\
532 \
533 static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
534     pixel *dst = (pixel*)_dst;\
535     pixel *src = (pixel*)_src;\
536     const int A=(8-x)*(8-y);\
537     const int B=(  x)*(8-y);\
538     const int C=(8-x)*(  y);\
539     const int D=(  x)*(  y);\
540     int i;\
541     stride /= sizeof(pixel);\
542     \
543     assert(x<8 && y<8 && x>=0 && y>=0);\
544 \
545     if(D){\
546         for(i=0; i<h; i++){\
547             OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
548             OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
549             OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
550             OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
551             OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
552             OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
553             OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
554             OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
555             dst+= stride;\
556             src+= stride;\
557         }\
558     }else{\
559         const int E= B+C;\
560         const int step= C ? stride : 1;\
561         for(i=0; i<h; i++){\
562             OP(dst[0], (A*src[0] + E*src[step+0]));\
563             OP(dst[1], (A*src[1] + E*src[step+1]));\
564             OP(dst[2], (A*src[2] + E*src[step+2]));\
565             OP(dst[3], (A*src[3] + E*src[step+3]));\
566             OP(dst[4], (A*src[4] + E*src[step+4]));\
567             OP(dst[5], (A*src[5] + E*src[step+5]));\
568             OP(dst[6], (A*src[6] + E*src[step+6]));\
569             OP(dst[7], (A*src[7] + E*src[step+7]));\
570             dst+= stride;\
571             src+= stride;\
572         }\
573     }\
574 }
575
576 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
577 #define op_put(a, b) a = (((b) + 32)>>6)
578
579 H264_CHROMA_MC(put_       , op_put)
580 H264_CHROMA_MC(avg_       , op_avg)
581 #undef op_avg
582 #undef op_put
583
584 void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
585     FUNCC(put_pixels8)(dst, src, stride, 8);
586 }
587 void FUNCC(ff_avg_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
588     FUNCC(avg_pixels8)(dst, src, stride, 8);
589 }
590 void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
591     FUNCC(put_pixels16)(dst, src, stride, 16);
592 }
593 void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
594     FUNCC(avg_pixels16)(dst, src, stride, 16);
595 }