3 * Copyright (c) 2000, 2001 Fabrice Bellard.
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
24 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
25 void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
26 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
27 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
28 void (*ff_gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
29 void (*ff_gmc )(UINT8 *dst, UINT8 *src, int stride, int h, int ox, int oy,
30 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
31 void (*clear_blocks)(DCTELEM *blocks);
32 int (*pix_sum)(UINT8 * pix, int line_size);
33 int (*pix_norm1)(UINT8 * pix, int line_size);
35 op_pixels_abs_func pix_abs16x16;
36 op_pixels_abs_func pix_abs16x16_x2;
37 op_pixels_abs_func pix_abs16x16_y2;
38 op_pixels_abs_func pix_abs16x16_xy2;
40 op_pixels_abs_func pix_abs8x8;
41 op_pixels_abs_func pix_abs8x8_x2;
42 op_pixels_abs_func pix_abs8x8_y2;
43 op_pixels_abs_func pix_abs8x8_xy2;
47 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
48 UINT32 squareTbl[512];
50 const UINT8 ff_zigzag_direct[64] = {
51 0, 1, 8, 16, 9, 2, 3, 10,
52 17, 24, 32, 25, 18, 11, 4, 5,
53 12, 19, 26, 33, 40, 48, 41, 34,
54 27, 20, 13, 6, 7, 14, 21, 28,
55 35, 42, 49, 56, 57, 50, 43, 36,
56 29, 22, 15, 23, 30, 37, 44, 51,
57 58, 59, 52, 45, 38, 31, 39, 46,
58 53, 60, 61, 54, 47, 55, 62, 63
61 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
62 UINT16 __align8 inv_zigzag_direct16[64];
64 const UINT8 ff_alternate_horizontal_scan[64] = {
65 0, 1, 2, 3, 8, 9, 16, 17,
66 10, 11, 4, 5, 6, 7, 15, 14,
67 13, 12, 19, 18, 24, 25, 32, 33,
68 26, 27, 20, 21, 22, 23, 28, 29,
69 30, 31, 34, 35, 40, 41, 48, 49,
70 42, 43, 36, 37, 38, 39, 44, 45,
71 46, 47, 50, 51, 56, 57, 58, 59,
72 52, 53, 54, 55, 60, 61, 62, 63,
75 const UINT8 ff_alternate_vertical_scan[64] = {
76 0, 8, 16, 24, 1, 9, 2, 10,
77 17, 25, 32, 40, 48, 56, 57, 49,
78 41, 33, 26, 18, 3, 11, 4, 12,
79 19, 27, 34, 42, 50, 58, 35, 43,
80 51, 59, 20, 28, 5, 13, 6, 14,
81 21, 29, 36, 44, 52, 60, 37, 45,
82 53, 61, 22, 30, 7, 15, 23, 31,
83 38, 46, 54, 62, 39, 47, 55, 63,
86 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
88 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
89 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
90 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
91 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
92 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
93 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
94 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
95 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
96 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
97 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
98 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
99 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
100 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
101 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
102 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
103 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
104 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
105 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
106 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
107 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
108 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
109 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
110 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
111 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
112 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
113 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
114 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
115 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
116 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
117 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
118 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
119 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
122 int pix_sum_c(UINT8 * pix, int line_size)
127 for (i = 0; i < 16; i++) {
128 for (j = 0; j < 16; j += 8) {
139 pix += line_size - 16;
144 int pix_norm1_c(UINT8 * pix, int line_size)
147 UINT32 *sq = squareTbl + 256;
150 for (i = 0; i < 16; i++) {
151 for (j = 0; j < 16; j += 8) {
162 pix += line_size - 16;
168 void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
172 /* read the pixels */
174 block[0] = pixels[0];
175 block[1] = pixels[1];
176 block[2] = pixels[2];
177 block[3] = pixels[3];
178 block[4] = pixels[4];
179 block[5] = pixels[5];
180 block[6] = pixels[6];
181 block[7] = pixels[7];
187 void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2,
191 /* read the pixels */
193 block[0] = s1[0] - s2[0];
194 block[1] = s1[1] - s2[1];
195 block[2] = s1[2] - s2[2];
196 block[3] = s1[3] - s2[3];
197 block[4] = s1[4] - s2[4];
198 block[5] = s1[5] - s2[5];
199 block[6] = s1[6] - s2[6];
200 block[7] = s1[7] - s2[7];
208 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
212 UINT8 *cm = cropTbl + MAX_NEG_CROP;
214 /* read the pixels */
216 pixels[0] = cm[block[0]];
217 pixels[1] = cm[block[1]];
218 pixels[2] = cm[block[2]];
219 pixels[3] = cm[block[3]];
220 pixels[4] = cm[block[4]];
221 pixels[5] = cm[block[5]];
222 pixels[6] = cm[block[6]];
223 pixels[7] = cm[block[7]];
230 void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
234 UINT8 *cm = cropTbl + MAX_NEG_CROP;
236 /* read the pixels */
238 pixels[0] = cm[pixels[0] + block[0]];
239 pixels[1] = cm[pixels[1] + block[1]];
240 pixels[2] = cm[pixels[2] + block[2]];
241 pixels[3] = cm[pixels[3] + block[3]];
242 pixels[4] = cm[pixels[4] + block[4]];
243 pixels[5] = cm[pixels[5] + block[5]];
244 pixels[6] = cm[pixels[6] + block[6]];
245 pixels[7] = cm[pixels[7] + block[7]];
252 #define PIXOP2(OPNAME, OP) \
253 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
257 OP(*((uint64_t*)block), LD64(pixels));\
263 static void OPNAME ## _no_rnd_pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
267 const uint64_t a= LD64(pixels );\
268 const uint64_t b= LD64(pixels+1);\
269 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
275 static void OPNAME ## _pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
279 const uint64_t a= LD64(pixels );\
280 const uint64_t b= LD64(pixels+1);\
281 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
287 static void OPNAME ## _no_rnd_pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
291 const uint64_t a= LD64(pixels );\
292 const uint64_t b= LD64(pixels+line_size);\
293 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
299 static void OPNAME ## _pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
303 const uint64_t a= LD64(pixels );\
304 const uint64_t b= LD64(pixels+line_size);\
305 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
311 static void OPNAME ## _pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
314 const uint64_t a= LD64(pixels );\
315 const uint64_t b= LD64(pixels+1);\
316 uint64_t l0= (a&0x0303030303030303ULL)\
317 + (b&0x0303030303030303ULL)\
318 + 0x0202020202020202ULL;\
319 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
320 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
324 for(i=0; i<h; i+=2){\
325 uint64_t a= LD64(pixels );\
326 uint64_t b= LD64(pixels+1);\
327 l1= (a&0x0303030303030303ULL)\
328 + (b&0x0303030303030303ULL);\
329 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
330 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
331 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
336 l0= (a&0x0303030303030303ULL)\
337 + (b&0x0303030303030303ULL)\
338 + 0x0202020202020202ULL;\
339 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
340 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
341 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
347 static void OPNAME ## _no_rnd_pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
350 const uint64_t a= LD64(pixels );\
351 const uint64_t b= LD64(pixels+1);\
352 uint64_t l0= (a&0x0303030303030303ULL)\
353 + (b&0x0303030303030303ULL)\
354 + 0x0101010101010101ULL;\
355 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
356 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
360 for(i=0; i<h; i+=2){\
361 uint64_t a= LD64(pixels );\
362 uint64_t b= LD64(pixels+1);\
363 l1= (a&0x0303030303030303ULL)\
364 + (b&0x0303030303030303ULL);\
365 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
366 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
367 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
372 l0= (a&0x0303030303030303ULL)\
373 + (b&0x0303030303030303ULL)\
374 + 0x0101010101010101ULL;\
375 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
376 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
377 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
383 CALL_2X_PIXELS(OPNAME ## _pixels16 , OPNAME ## _pixels , 8)\
384 CALL_2X_PIXELS(OPNAME ## _pixels16_x2 , OPNAME ## _pixels_x2 , 8)\
385 CALL_2X_PIXELS(OPNAME ## _pixels16_y2 , OPNAME ## _pixels_y2 , 8)\
386 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2, OPNAME ## _pixels_xy2, 8)\
387 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2 , OPNAME ## _no_rnd_pixels_x2 , 8)\
388 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2 , OPNAME ## _no_rnd_pixels_y2 , 8)\
389 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2, OPNAME ## _no_rnd_pixels_xy2, 8)\
391 void (*OPNAME ## _pixels_tab[2][4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
394 OPNAME ## _pixels_x2,\
395 OPNAME ## _pixels_y2,\
396 OPNAME ## _pixels_xy2},\
398 OPNAME ## _pixels16,\
399 OPNAME ## _pixels16_x2,\
400 OPNAME ## _pixels16_y2,\
401 OPNAME ## _pixels16_xy2}\
404 void (*OPNAME ## _no_rnd_pixels_tab[2][4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
407 OPNAME ## _no_rnd_pixels_x2,\
408 OPNAME ## _no_rnd_pixels_y2,\
409 OPNAME ## _no_rnd_pixels_xy2},\
411 OPNAME ## _pixels16,\
412 OPNAME ## _no_rnd_pixels16_x2,\
413 OPNAME ## _no_rnd_pixels16_y2,\
414 OPNAME ## _no_rnd_pixels16_xy2}\
417 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
418 #else // 64 bit variant
420 #define PIXOP2(OPNAME, OP) \
421 static void OPNAME ## _pixels8(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
424 OP(*((uint32_t*)(block )), LD32(pixels ));\
425 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
430 static inline void OPNAME ## _no_rnd_pixels8(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
431 OPNAME ## _pixels8(block, pixels, line_size, h);\
434 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
435 int src_stride1, int src_stride2, int h){\
439 a= LD32(&src1[i*src_stride1 ]);\
440 b= LD32(&src2[i*src_stride2 ]);\
441 OP(*((uint32_t*)&dst[i*dst_stride ]), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
442 a= LD32(&src1[i*src_stride1+4]);\
443 b= LD32(&src2[i*src_stride2+4]);\
444 OP(*((uint32_t*)&dst[i*dst_stride+4]), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
448 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
449 int src_stride1, int src_stride2, int h){\
453 a= LD32(&src1[i*src_stride1 ]);\
454 b= LD32(&src2[i*src_stride2 ]);\
455 OP(*((uint32_t*)&dst[i*dst_stride ]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
456 a= LD32(&src1[i*src_stride1+4]);\
457 b= LD32(&src2[i*src_stride2+4]);\
458 OP(*((uint32_t*)&dst[i*dst_stride+4]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
462 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
463 int src_stride1, int src_stride2, int h){\
464 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
465 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
468 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
469 int src_stride1, int src_stride2, int h){\
470 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
471 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
474 static inline void OPNAME ## _no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
475 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
478 static inline void OPNAME ## _pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
479 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
482 static inline void OPNAME ## _no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
483 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
486 static inline void OPNAME ## _pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
487 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
490 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
491 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
494 uint32_t a, b, c, d, l0, l1, h0, h1;\
495 a= LD32(&src1[i*src_stride1]);\
496 b= LD32(&src2[i*src_stride2]);\
497 c= LD32(&src3[i*src_stride3]);\
498 d= LD32(&src4[i*src_stride4]);\
499 l0= (a&0x03030303UL)\
502 h0= ((a&0xFCFCFCFCUL)>>2)\
503 + ((b&0xFCFCFCFCUL)>>2);\
504 l1= (c&0x03030303UL)\
506 h1= ((c&0xFCFCFCFCUL)>>2)\
507 + ((d&0xFCFCFCFCUL)>>2);\
508 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
509 a= LD32(&src1[i*src_stride1+4]);\
510 b= LD32(&src2[i*src_stride2+4]);\
511 c= LD32(&src3[i*src_stride3+4]);\
512 d= LD32(&src4[i*src_stride4+4]);\
513 l0= (a&0x03030303UL)\
516 h0= ((a&0xFCFCFCFCUL)>>2)\
517 + ((b&0xFCFCFCFCUL)>>2);\
518 l1= (c&0x03030303UL)\
520 h1= ((c&0xFCFCFCFCUL)>>2)\
521 + ((d&0xFCFCFCFCUL)>>2);\
522 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
525 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
526 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
529 uint32_t a, b, c, d, l0, l1, h0, h1;\
530 a= LD32(&src1[i*src_stride1]);\
531 b= LD32(&src2[i*src_stride2]);\
532 c= LD32(&src3[i*src_stride3]);\
533 d= LD32(&src4[i*src_stride4]);\
534 l0= (a&0x03030303UL)\
537 h0= ((a&0xFCFCFCFCUL)>>2)\
538 + ((b&0xFCFCFCFCUL)>>2);\
539 l1= (c&0x03030303UL)\
541 h1= ((c&0xFCFCFCFCUL)>>2)\
542 + ((d&0xFCFCFCFCUL)>>2);\
543 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
544 a= LD32(&src1[i*src_stride1+4]);\
545 b= LD32(&src2[i*src_stride2+4]);\
546 c= LD32(&src3[i*src_stride3+4]);\
547 d= LD32(&src4[i*src_stride4+4]);\
548 l0= (a&0x03030303UL)\
551 h0= ((a&0xFCFCFCFCUL)>>2)\
552 + ((b&0xFCFCFCFCUL)>>2);\
553 l1= (c&0x03030303UL)\
555 h1= ((c&0xFCFCFCFCUL)>>2)\
556 + ((d&0xFCFCFCFCUL)>>2);\
557 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
560 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
561 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
562 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
563 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
565 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
566 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
567 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
568 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
571 static inline void OPNAME ## _pixels8_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
576 const uint32_t a= LD32(pixels );\
577 const uint32_t b= LD32(pixels+1);\
578 uint32_t l0= (a&0x03030303UL)\
581 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
582 + ((b&0xFCFCFCFCUL)>>2);\
586 for(i=0; i<h; i+=2){\
587 uint32_t a= LD32(pixels );\
588 uint32_t b= LD32(pixels+1);\
589 l1= (a&0x03030303UL)\
591 h1= ((a&0xFCFCFCFCUL)>>2)\
592 + ((b&0xFCFCFCFCUL)>>2);\
593 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
598 l0= (a&0x03030303UL)\
601 h0= ((a&0xFCFCFCFCUL)>>2)\
602 + ((b&0xFCFCFCFCUL)>>2);\
603 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
607 pixels+=4-line_size*(h+1);\
608 block +=4-line_size*h;\
612 static inline void OPNAME ## _no_rnd_pixels8_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
617 const uint32_t a= LD32(pixels );\
618 const uint32_t b= LD32(pixels+1);\
619 uint32_t l0= (a&0x03030303UL)\
622 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
623 + ((b&0xFCFCFCFCUL)>>2);\
627 for(i=0; i<h; i+=2){\
628 uint32_t a= LD32(pixels );\
629 uint32_t b= LD32(pixels+1);\
630 l1= (a&0x03030303UL)\
632 h1= ((a&0xFCFCFCFCUL)>>2)\
633 + ((b&0xFCFCFCFCUL)>>2);\
634 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
639 l0= (a&0x03030303UL)\
642 h0= ((a&0xFCFCFCFCUL)>>2)\
643 + ((b&0xFCFCFCFCUL)>>2);\
644 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
648 pixels+=4-line_size*(h+1);\
649 block +=4-line_size*h;\
653 CALL_2X_PIXELS(OPNAME ## _pixels16 , OPNAME ## _pixels8 , 8)\
654 CALL_2X_PIXELS(OPNAME ## _pixels16_x2 , OPNAME ## _pixels8_x2 , 8)\
655 CALL_2X_PIXELS(OPNAME ## _pixels16_y2 , OPNAME ## _pixels8_y2 , 8)\
656 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2, OPNAME ## _pixels8_xy2, 8)\
657 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16 , OPNAME ## _pixels8 , 8)\
658 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2 , OPNAME ## _no_rnd_pixels8_x2 , 8)\
659 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2 , OPNAME ## _no_rnd_pixels8_y2 , 8)\
660 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2, OPNAME ## _no_rnd_pixels8_xy2, 8)\
662 void (*OPNAME ## _pixels_tab[2][4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
664 OPNAME ## _pixels16,\
665 OPNAME ## _pixels16_x2,\
666 OPNAME ## _pixels16_y2,\
667 OPNAME ## _pixels16_xy2},\
670 OPNAME ## _pixels8_x2,\
671 OPNAME ## _pixels8_y2,\
672 OPNAME ## _pixels8_xy2},\
675 void (*OPNAME ## _no_rnd_pixels_tab[2][4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
677 OPNAME ## _pixels16,\
678 OPNAME ## _no_rnd_pixels16_x2,\
679 OPNAME ## _no_rnd_pixels16_y2,\
680 OPNAME ## _no_rnd_pixels16_xy2},\
683 OPNAME ## _no_rnd_pixels8_x2,\
684 OPNAME ## _no_rnd_pixels8_y2,\
685 OPNAME ## _no_rnd_pixels8_xy2},\
688 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
690 #define op_put(a, b) a = b
698 /* FIXME this stuff could be removed as its ot really used anymore */
699 #define PIXOP(BTYPE, OPNAME, OP, INCR) \
701 static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
722 static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
730 OP(p[0], avg2(pix[0], pix[1])); \
731 OP(p[1], avg2(pix[1], pix[2])); \
732 OP(p[2], avg2(pix[2], pix[3])); \
733 OP(p[3], avg2(pix[3], pix[4])); \
734 OP(p[4], avg2(pix[4], pix[5])); \
735 OP(p[5], avg2(pix[5], pix[6])); \
736 OP(p[6], avg2(pix[6], pix[7])); \
737 OP(p[7], avg2(pix[7], pix[8])); \
743 static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
751 pix1 = pixels + line_size; \
753 OP(p[0], avg2(pix[0], pix1[0])); \
754 OP(p[1], avg2(pix[1], pix1[1])); \
755 OP(p[2], avg2(pix[2], pix1[2])); \
756 OP(p[3], avg2(pix[3], pix1[3])); \
757 OP(p[4], avg2(pix[4], pix1[4])); \
758 OP(p[5], avg2(pix[5], pix1[5])); \
759 OP(p[6], avg2(pix[6], pix1[6])); \
760 OP(p[7], avg2(pix[7], pix1[7])); \
767 static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
775 pix1 = pixels + line_size; \
777 OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1])); \
778 OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2])); \
779 OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3])); \
780 OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4])); \
781 OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5])); \
782 OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6])); \
783 OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7])); \
784 OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8])); \
791 void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \
793 OPNAME ## _pixels_x2, \
794 OPNAME ## _pixels_y2, \
795 OPNAME ## _pixels_xy2, \
798 /* rounding primitives */
799 #define avg2(a,b) ((a+b+1)>>1)
800 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
802 #define op_avg(a, b) a = avg2(a, b)
803 #define op_sub(a, b) a -= b
804 #define op_put(a, b) a = b
806 PIXOP(DCTELEM, sub, op_sub, 8)
807 PIXOP(uint8_t, avg, op_avg, line_size)
808 PIXOP(uint8_t, put, op_put, line_size)
810 /* not rounding primitives */
813 #define avg2(a,b) ((a+b)>>1)
814 #define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
816 PIXOP(uint8_t, avg_no_rnd, op_avg, line_size)
817 PIXOP(uint8_t, put_no_rnd, op_put, line_size)
818 /* motion estimation */
824 #define avg2(a,b) ((a+b+1)>>1)
825 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
828 static void gmc1_c(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, int rounder)
830 const int A=(16-x16)*(16-y16);
831 const int B=( x16)*(16-y16);
832 const int C=(16-x16)*( y16);
833 const int D=( x16)*( y16);
838 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
839 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
840 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
841 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
842 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
843 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
844 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
845 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
851 static void gmc_c(UINT8 *dst, UINT8 *src, int stride, int h, int ox, int oy,
852 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
855 const int s= 1<<shift;
865 for(x=0; x<8; x++){ //XXX FIXME optimize
866 int src_x, src_y, frac_x, frac_y, index;
875 if((unsigned)src_x < width){
876 if((unsigned)src_y < height){
877 index= src_x + src_y*stride;
878 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
879 + src[index +1]* frac_x )*(s-frac_y)
880 + ( src[index+stride ]*(s-frac_x)
881 + src[index+stride+1]* frac_x )* frac_y
884 index= src_x + clip(src_y, 0, height)*stride;
885 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
886 + src[index +1]* frac_x )*s
890 if((unsigned)src_y < height){
891 index= clip(src_x, 0, width) + src_y*stride;
892 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
893 + src[index+stride ]* frac_y )*s
896 index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
897 dst[y*stride + x]= src[index ];
909 static inline void copy_block17(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h)
914 ST32(dst , LD32(src ));
915 ST32(dst+4 , LD32(src+4 ));
916 ST32(dst+8 , LD32(src+8 ));
917 ST32(dst+12, LD32(src+12));
924 static inline void copy_block9(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h)
929 ST32(dst , LD32(src ));
930 ST32(dst+4 , LD32(src+4 ));
937 #define QPEL_MC(r, OPNAME, RND, OP) \
938 static void OPNAME ## mpeg4_qpel8_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h){\
939 UINT8 *cm = cropTbl + MAX_NEG_CROP;\
943 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
944 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
945 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
946 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
947 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
948 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
949 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
950 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
956 static void OPNAME ## mpeg4_qpel8_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w){\
957 UINT8 *cm = cropTbl + MAX_NEG_CROP;\
961 const int src0= src[0*srcStride];\
962 const int src1= src[1*srcStride];\
963 const int src2= src[2*srcStride];\
964 const int src3= src[3*srcStride];\
965 const int src4= src[4*srcStride];\
966 const int src5= src[5*srcStride];\
967 const int src6= src[6*srcStride];\
968 const int src7= src[7*srcStride];\
969 const int src8= src[8*srcStride];\
970 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
971 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
972 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
973 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
974 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
975 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
976 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
977 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
983 static void OPNAME ## mpeg4_qpel16_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h){\
984 UINT8 *cm = cropTbl + MAX_NEG_CROP;\
988 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
989 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
990 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
991 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
992 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
993 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
994 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
995 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
996 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
997 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
998 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
999 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1000 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1001 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1002 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1003 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1009 static void OPNAME ## mpeg4_qpel16_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w){\
1010 UINT8 *cm = cropTbl + MAX_NEG_CROP;\
1014 const int src0= src[0*srcStride];\
1015 const int src1= src[1*srcStride];\
1016 const int src2= src[2*srcStride];\
1017 const int src3= src[3*srcStride];\
1018 const int src4= src[4*srcStride];\
1019 const int src5= src[5*srcStride];\
1020 const int src6= src[6*srcStride];\
1021 const int src7= src[7*srcStride];\
1022 const int src8= src[8*srcStride];\
1023 const int src9= src[9*srcStride];\
1024 const int src10= src[10*srcStride];\
1025 const int src11= src[11*srcStride];\
1026 const int src12= src[12*srcStride];\
1027 const int src13= src[13*srcStride];\
1028 const int src14= src[14*srcStride];\
1029 const int src15= src[15*srcStride];\
1030 const int src16= src[16*srcStride];\
1031 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1032 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1033 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1034 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1035 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1036 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1037 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1038 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1039 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1040 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1041 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1042 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1043 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1044 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1045 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1046 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1052 static void OPNAME ## qpel8_mc00_c (UINT8 *dst, UINT8 *src, int stride){\
1053 OPNAME ## pixels8(dst, src, stride, 8);\
1056 static void OPNAME ## qpel8_mc10_c(UINT8 *dst, UINT8 *src, int stride){\
1058 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1059 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
1062 static void OPNAME ## qpel8_mc20_c(UINT8 *dst, UINT8 *src, int stride){\
1063 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
1066 static void OPNAME ## qpel8_mc30_c(UINT8 *dst, UINT8 *src, int stride){\
1068 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1069 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
1072 static void OPNAME ## qpel8_mc01_c(UINT8 *dst, UINT8 *src, int stride){\
1075 copy_block9(full, src, 16, stride, 9);\
1076 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16, 8);\
1077 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
1080 static void OPNAME ## qpel8_mc02_c(UINT8 *dst, UINT8 *src, int stride){\
1082 copy_block9(full, src, 16, stride, 9);\
1083 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16, 8);\
1086 static void OPNAME ## qpel8_mc03_c(UINT8 *dst, UINT8 *src, int stride){\
1089 copy_block9(full, src, 16, stride, 9);\
1090 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16, 8);\
1091 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
1093 static void OPNAME ## qpel8_mc11_c(UINT8 *dst, UINT8 *src, int stride){\
1098 copy_block9(full, src, 16, stride, 9);\
1099 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1100 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16, 8);\
1101 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
1102 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1104 static void OPNAME ## qpel8_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
1109 copy_block9(full, src, 16, stride, 9);\
1110 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1111 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16, 8);\
1112 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
1113 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1115 static void OPNAME ## qpel8_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
1120 copy_block9(full, src, 16, stride, 9);\
1121 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1122 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16, 8);\
1123 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
1124 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1126 static void OPNAME ## qpel8_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
1131 copy_block9(full, src, 16, stride, 9);\
1132 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
1133 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16, 8);\
1134 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
1135 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1137 static void OPNAME ## qpel8_mc21_c(UINT8 *dst, UINT8 *src, int stride){\
1140 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1141 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
1142 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1144 static void OPNAME ## qpel8_mc23_c(UINT8 *dst, UINT8 *src, int stride){\
1147 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1148 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
1149 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1151 static void OPNAME ## qpel8_mc12_c(UINT8 *dst, UINT8 *src, int stride){\
1156 copy_block9(full, src, 16, stride, 9);\
1157 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1158 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16, 8);\
1159 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
1160 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1162 static void OPNAME ## qpel8_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
1167 copy_block9(full, src, 16, stride, 9);\
1168 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1169 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16, 8);\
1170 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
1171 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1173 static void OPNAME ## qpel8_mc22_c(UINT8 *dst, UINT8 *src, int stride){\
1175 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1176 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8, 8);\
1178 static void OPNAME ## qpel16_mc00_c (UINT8 *dst, UINT8 *src, int stride){\
1179 OPNAME ## pixels16(dst, src, stride, 16);\
1182 static void OPNAME ## qpel16_mc10_c(UINT8 *dst, UINT8 *src, int stride){\
1184 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1185 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
1188 static void OPNAME ## qpel16_mc20_c(UINT8 *dst, UINT8 *src, int stride){\
1189 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1192 static void OPNAME ## qpel16_mc30_c(UINT8 *dst, UINT8 *src, int stride){\
1194 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1195 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
1198 static void OPNAME ## qpel16_mc01_c(UINT8 *dst, UINT8 *src, int stride){\
1201 copy_block17(full, src, 24, stride, 17);\
1202 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24, 16);\
1203 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
1206 static void OPNAME ## qpel16_mc02_c(UINT8 *dst, UINT8 *src, int stride){\
1208 copy_block17(full, src, 24, stride, 17);\
1209 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24, 16);\
1212 static void OPNAME ## qpel16_mc03_c(UINT8 *dst, UINT8 *src, int stride){\
1215 copy_block17(full, src, 24, stride, 17);\
1216 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24, 16);\
1217 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
1219 static void OPNAME ## qpel16_mc11_c(UINT8 *dst, UINT8 *src, int stride){\
1224 copy_block17(full, src, 24, stride, 17);\
1225 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1226 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24, 16);\
1227 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
1228 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1230 static void OPNAME ## qpel16_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
1235 copy_block17(full, src, 24, stride, 17);\
1236 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1237 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24, 16);\
1238 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
1239 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1241 static void OPNAME ## qpel16_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
1246 copy_block17(full, src, 24, stride, 17);\
1247 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1248 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24, 16);\
1249 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
1250 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1252 static void OPNAME ## qpel16_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
1257 copy_block17(full, src, 24, stride, 17);\
1258 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1259 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24, 16);\
1260 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
1261 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1263 static void OPNAME ## qpel16_mc21_c(UINT8 *dst, UINT8 *src, int stride){\
1266 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1267 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
1268 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1270 static void OPNAME ## qpel16_mc23_c(UINT8 *dst, UINT8 *src, int stride){\
1273 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1274 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
1275 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1277 static void OPNAME ## qpel16_mc12_c(UINT8 *dst, UINT8 *src, int stride){\
1282 copy_block17(full, src, 24, stride, 17);\
1283 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1284 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24, 16);\
1285 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
1286 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1288 static void OPNAME ## qpel16_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
1293 copy_block17(full, src, 24, stride, 17);\
1294 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1295 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24, 16);\
1296 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
1297 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1299 static void OPNAME ## qpel16_mc22_c(UINT8 *dst, UINT8 *src, int stride){\
1301 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1302 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16, 16);\
1304 qpel_mc_func OPNAME ## qpel_pixels_tab[2][16]={ \
1306 OPNAME ## qpel16_mc00_c, \
1307 OPNAME ## qpel16_mc10_c, \
1308 OPNAME ## qpel16_mc20_c, \
1309 OPNAME ## qpel16_mc30_c, \
1310 OPNAME ## qpel16_mc01_c, \
1311 OPNAME ## qpel16_mc11_c, \
1312 OPNAME ## qpel16_mc21_c, \
1313 OPNAME ## qpel16_mc31_c, \
1314 OPNAME ## qpel16_mc02_c, \
1315 OPNAME ## qpel16_mc12_c, \
1316 OPNAME ## qpel16_mc22_c, \
1317 OPNAME ## qpel16_mc32_c, \
1318 OPNAME ## qpel16_mc03_c, \
1319 OPNAME ## qpel16_mc13_c, \
1320 OPNAME ## qpel16_mc23_c, \
1321 OPNAME ## qpel16_mc33_c, \
1323 OPNAME ## qpel8_mc00_c, \
1324 OPNAME ## qpel8_mc10_c, \
1325 OPNAME ## qpel8_mc20_c, \
1326 OPNAME ## qpel8_mc30_c, \
1327 OPNAME ## qpel8_mc01_c, \
1328 OPNAME ## qpel8_mc11_c, \
1329 OPNAME ## qpel8_mc21_c, \
1330 OPNAME ## qpel8_mc31_c, \
1331 OPNAME ## qpel8_mc02_c, \
1332 OPNAME ## qpel8_mc12_c, \
1333 OPNAME ## qpel8_mc22_c, \
1334 OPNAME ## qpel8_mc32_c, \
1335 OPNAME ## qpel8_mc03_c, \
1336 OPNAME ## qpel8_mc13_c, \
1337 OPNAME ## qpel8_mc23_c, \
1338 OPNAME ## qpel8_mc33_c, \
1342 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1343 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1344 #define op_put(a, b) a = cm[((b) + 16)>>5]
1345 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1347 QPEL_MC(0, put_ , _ , op_put)
1348 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1349 QPEL_MC(0, avg_ , _ , op_avg)
1350 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
1352 #undef op_avg_no_rnd
1354 #undef op_put_no_rnd
1356 int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1362 s += abs(pix1[0] - pix2[0]);
1363 s += abs(pix1[1] - pix2[1]);
1364 s += abs(pix1[2] - pix2[2]);
1365 s += abs(pix1[3] - pix2[3]);
1366 s += abs(pix1[4] - pix2[4]);
1367 s += abs(pix1[5] - pix2[5]);
1368 s += abs(pix1[6] - pix2[6]);
1369 s += abs(pix1[7] - pix2[7]);
1370 s += abs(pix1[8] - pix2[8]);
1371 s += abs(pix1[9] - pix2[9]);
1372 s += abs(pix1[10] - pix2[10]);
1373 s += abs(pix1[11] - pix2[11]);
1374 s += abs(pix1[12] - pix2[12]);
1375 s += abs(pix1[13] - pix2[13]);
1376 s += abs(pix1[14] - pix2[14]);
1377 s += abs(pix1[15] - pix2[15]);
1384 int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1390 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1391 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1392 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1393 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1394 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1395 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1396 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1397 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1398 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1399 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1400 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1401 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1402 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1403 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1404 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1405 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1412 int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1415 UINT8 *pix3 = pix2 + line_size;
1419 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1420 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1421 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1422 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1423 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1424 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1425 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1426 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1427 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1428 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1429 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1430 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1431 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1432 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1433 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1434 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1442 int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1445 UINT8 *pix3 = pix2 + line_size;
1449 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1450 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1451 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1452 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1453 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1454 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1455 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1456 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1457 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1458 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1459 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1460 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1461 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1462 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1463 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1464 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1472 int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1478 s += abs(pix1[0] - pix2[0]);
1479 s += abs(pix1[1] - pix2[1]);
1480 s += abs(pix1[2] - pix2[2]);
1481 s += abs(pix1[3] - pix2[3]);
1482 s += abs(pix1[4] - pix2[4]);
1483 s += abs(pix1[5] - pix2[5]);
1484 s += abs(pix1[6] - pix2[6]);
1485 s += abs(pix1[7] - pix2[7]);
1492 int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1498 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1499 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1500 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1501 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1502 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1503 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1504 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1505 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1512 int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1515 UINT8 *pix3 = pix2 + line_size;
1519 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1520 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1521 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1522 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1523 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1524 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1525 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1526 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1534 int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1537 UINT8 *pix3 = pix2 + line_size;
1541 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1542 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1543 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1544 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1545 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1546 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1547 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1548 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1556 void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last)
1562 if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
1564 for(i=0; i<=last; i++){
1565 const int j= scantable[i];
1570 for(i=0; i<=last; i++){
1571 const int j= scantable[i];
1572 const int perm_j= permutation[j];
1573 block[perm_j]= temp[j];
1577 void clear_blocks_c(DCTELEM *blocks)
1579 memset(blocks, 0, sizeof(DCTELEM)*6*64);
1582 void dsputil_init(void)
1586 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
1587 for(i=0;i<MAX_NEG_CROP;i++) {
1589 cropTbl[i + MAX_NEG_CROP + 256] = 255;
1592 for(i=0;i<512;i++) {
1593 squareTbl[i] = (i - 256) * (i - 256);
1596 get_pixels = get_pixels_c;
1597 diff_pixels = diff_pixels_c;
1598 put_pixels_clamped = put_pixels_clamped_c;
1599 add_pixels_clamped = add_pixels_clamped_c;
1602 clear_blocks= clear_blocks_c;
1604 pix_norm1= pix_norm1_c;
1606 pix_abs16x16 = pix_abs16x16_c;
1607 pix_abs16x16_x2 = pix_abs16x16_x2_c;
1608 pix_abs16x16_y2 = pix_abs16x16_y2_c;
1609 pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
1610 pix_abs8x8 = pix_abs8x8_c;
1611 pix_abs8x8_x2 = pix_abs8x8_x2_c;
1612 pix_abs8x8_y2 = pix_abs8x8_y2_c;
1613 pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
1619 dsputil_init_armv4l();
1622 dsputil_init_mlib();
1625 dsputil_init_alpha();
1634 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
1637 /* remove any non bit exact operation (testing purpose) */
1638 void avcodec_set_bit_exact(void)
1642 dsputil_set_bit_exact_mmx();
1646 void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3],
1647 int orig_linesize[3], int coded_linesize,
1648 AVCodecContext *avctx)
1650 int quad, diff, x, y;
1651 UINT8 *orig, *coded;
1652 UINT32 *sq = squareTbl + 256;
1658 orig = orig_image[0];
1659 coded = coded_image[0];
1661 for (y=0;y<avctx->height;y++) {
1662 for (x=0;x<avctx->width;x++) {
1663 diff = *(orig + x) - *(coded + x);
1666 orig += orig_linesize[0];
1667 coded += coded_linesize;
1670 avctx->psnr_y = (float) quad / (float) (avctx->width * avctx->height);
1672 if (avctx->psnr_y) {
1673 avctx->psnr_y = (float) (255 * 255) / avctx->psnr_y;
1674 avctx->psnr_y = 10 * (float) log10 (avctx->psnr_y);
1676 avctx->psnr_y = 99.99;