3 * Copyright (c) 2000, 2001 Fabrice Bellard.
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
32 #include "simple_idct.h"
39 void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
42 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
45 void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
48 void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc);
51 void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
54 void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
56 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
57 uint32_t ff_squareTbl[512] = {0, };
59 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
60 #define pb_7f (~0UL/255 * 0x7f)
61 #define pb_80 (~0UL/255 * 0x80)
63 const uint8_t ff_zigzag_direct[64] = {
64 0, 1, 8, 16, 9, 2, 3, 10,
65 17, 24, 32, 25, 18, 11, 4, 5,
66 12, 19, 26, 33, 40, 48, 41, 34,
67 27, 20, 13, 6, 7, 14, 21, 28,
68 35, 42, 49, 56, 57, 50, 43, 36,
69 29, 22, 15, 23, 30, 37, 44, 51,
70 58, 59, 52, 45, 38, 31, 39, 46,
71 53, 60, 61, 54, 47, 55, 62, 63
74 /* Specific zigzag scan for 248 idct. NOTE that unlike the
75 specification, we interleave the fields */
76 const uint8_t ff_zigzag248_direct[64] = {
77 0, 8, 1, 9, 16, 24, 2, 10,
78 17, 25, 32, 40, 48, 56, 33, 41,
79 18, 26, 3, 11, 4, 12, 19, 27,
80 34, 42, 49, 57, 50, 58, 35, 43,
81 20, 28, 5, 13, 6, 14, 21, 29,
82 36, 44, 51, 59, 52, 60, 37, 45,
83 22, 30, 7, 15, 23, 31, 38, 46,
84 53, 61, 54, 62, 39, 47, 55, 63,
87 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
88 DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16[64]) = {0, };
90 const uint8_t ff_alternate_horizontal_scan[64] = {
91 0, 1, 2, 3, 8, 9, 16, 17,
92 10, 11, 4, 5, 6, 7, 15, 14,
93 13, 12, 19, 18, 24, 25, 32, 33,
94 26, 27, 20, 21, 22, 23, 28, 29,
95 30, 31, 34, 35, 40, 41, 48, 49,
96 42, 43, 36, 37, 38, 39, 44, 45,
97 46, 47, 50, 51, 56, 57, 58, 59,
98 52, 53, 54, 55, 60, 61, 62, 63,
101 const uint8_t ff_alternate_vertical_scan[64] = {
102 0, 8, 16, 24, 1, 9, 2, 10,
103 17, 25, 32, 40, 48, 56, 57, 49,
104 41, 33, 26, 18, 3, 11, 4, 12,
105 19, 27, 34, 42, 50, 58, 35, 43,
106 51, 59, 20, 28, 5, 13, 6, 14,
107 21, 29, 36, 44, 52, 60, 37, 45,
108 53, 61, 22, 30, 7, 15, 23, 31,
109 38, 46, 54, 62, 39, 47, 55, 63,
112 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
113 const uint32_t ff_inverse[256]={
114 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
115 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
116 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
117 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
118 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
119 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
120 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
121 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
122 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
123 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
124 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
125 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
126 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
127 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
128 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
129 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
130 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
131 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
132 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
133 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
134 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
135 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
136 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
137 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
138 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
139 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
140 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
141 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
142 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
143 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
144 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
145 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
148 /* Input permutation for the simple_idct_mmx */
149 static const uint8_t simple_mmx_permutation[64]={
150 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
151 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
152 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
153 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
154 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
155 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
156 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
157 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
160 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
162 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
166 st->scantable= src_scantable;
170 j = src_scantable[i];
171 st->permutated[i] = permutation[j];
180 j = st->permutated[i];
182 st->raster_end[i]= end;
186 static int pix_sum_c(uint8_t * pix, int line_size)
191 for (i = 0; i < 16; i++) {
192 for (j = 0; j < 16; j += 8) {
203 pix += line_size - 16;
208 static int pix_norm1_c(uint8_t * pix, int line_size)
211 uint32_t *sq = ff_squareTbl + 256;
214 for (i = 0; i < 16; i++) {
215 for (j = 0; j < 16; j += 8) {
226 #if LONG_MAX > 2147483647
227 register uint64_t x=*(uint64_t*)pix;
229 s += sq[(x>>8)&0xff];
230 s += sq[(x>>16)&0xff];
231 s += sq[(x>>24)&0xff];
232 s += sq[(x>>32)&0xff];
233 s += sq[(x>>40)&0xff];
234 s += sq[(x>>48)&0xff];
235 s += sq[(x>>56)&0xff];
237 register uint32_t x=*(uint32_t*)pix;
239 s += sq[(x>>8)&0xff];
240 s += sq[(x>>16)&0xff];
241 s += sq[(x>>24)&0xff];
242 x=*(uint32_t*)(pix+4);
244 s += sq[(x>>8)&0xff];
245 s += sq[(x>>16)&0xff];
246 s += sq[(x>>24)&0xff];
251 pix += line_size - 16;
256 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
259 for(i=0; i+8<=w; i+=8){
260 dst[i+0]= bswap_32(src[i+0]);
261 dst[i+1]= bswap_32(src[i+1]);
262 dst[i+2]= bswap_32(src[i+2]);
263 dst[i+3]= bswap_32(src[i+3]);
264 dst[i+4]= bswap_32(src[i+4]);
265 dst[i+5]= bswap_32(src[i+5]);
266 dst[i+6]= bswap_32(src[i+6]);
267 dst[i+7]= bswap_32(src[i+7]);
270 dst[i+0]= bswap_32(src[i+0]);
274 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
277 uint32_t *sq = ff_squareTbl + 256;
280 for (i = 0; i < h; i++) {
281 s += sq[pix1[0] - pix2[0]];
282 s += sq[pix1[1] - pix2[1]];
283 s += sq[pix1[2] - pix2[2]];
284 s += sq[pix1[3] - pix2[3]];
291 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
294 uint32_t *sq = ff_squareTbl + 256;
297 for (i = 0; i < h; i++) {
298 s += sq[pix1[0] - pix2[0]];
299 s += sq[pix1[1] - pix2[1]];
300 s += sq[pix1[2] - pix2[2]];
301 s += sq[pix1[3] - pix2[3]];
302 s += sq[pix1[4] - pix2[4]];
303 s += sq[pix1[5] - pix2[5]];
304 s += sq[pix1[6] - pix2[6]];
305 s += sq[pix1[7] - pix2[7]];
312 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
315 uint32_t *sq = ff_squareTbl + 256;
318 for (i = 0; i < h; i++) {
319 s += sq[pix1[ 0] - pix2[ 0]];
320 s += sq[pix1[ 1] - pix2[ 1]];
321 s += sq[pix1[ 2] - pix2[ 2]];
322 s += sq[pix1[ 3] - pix2[ 3]];
323 s += sq[pix1[ 4] - pix2[ 4]];
324 s += sq[pix1[ 5] - pix2[ 5]];
325 s += sq[pix1[ 6] - pix2[ 6]];
326 s += sq[pix1[ 7] - pix2[ 7]];
327 s += sq[pix1[ 8] - pix2[ 8]];
328 s += sq[pix1[ 9] - pix2[ 9]];
329 s += sq[pix1[10] - pix2[10]];
330 s += sq[pix1[11] - pix2[11]];
331 s += sq[pix1[12] - pix2[12]];
332 s += sq[pix1[13] - pix2[13]];
333 s += sq[pix1[14] - pix2[14]];
334 s += sq[pix1[15] - pix2[15]];
343 #ifdef CONFIG_SNOW_ENCODER //dwt is in snow.c
344 static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
346 const int dec_count= w==8 ? 3 : 4;
349 static const int scale[2][2][4][4]={
353 {268, 239, 239, 213},
357 // 9/7 16x16 or 32x32 dec=4
358 {344, 310, 310, 280},
366 {275, 245, 245, 218},
370 // 5/3 16x16 or 32x32 dec=4
371 {352, 317, 317, 286},
379 for (i = 0; i < h; i++) {
380 for (j = 0; j < w; j+=4) {
381 tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
382 tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
383 tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
384 tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
390 ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
394 for(level=0; level<dec_count; level++){
395 for(ori= level ? 1 : 0; ori<4; ori++){
396 int size= w>>(dec_count-level);
397 int sx= (ori&1) ? size : 0;
398 int stride= 32<<(dec_count-level);
399 int sy= (ori&2) ? stride>>1 : 0;
401 for(i=0; i<size; i++){
402 for(j=0; j<size; j++){
403 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
413 static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
414 return w_c(v, pix1, pix2, line_size, 8, h, 1);
417 static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
418 return w_c(v, pix1, pix2, line_size, 8, h, 0);
421 static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
422 return w_c(v, pix1, pix2, line_size, 16, h, 1);
425 static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
426 return w_c(v, pix1, pix2, line_size, 16, h, 0);
429 int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
430 return w_c(v, pix1, pix2, line_size, 32, h, 1);
433 int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
434 return w_c(v, pix1, pix2, line_size, 32, h, 0);
438 /* draw the edges of width 'w' of an image of size width, height */
439 //FIXME check that this is ok for mpeg4 interlaced
440 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
442 uint8_t *ptr, *last_line;
445 last_line = buf + (height - 1) * wrap;
448 memcpy(buf - (i + 1) * wrap, buf, width);
449 memcpy(last_line + (i + 1) * wrap, last_line, width);
453 for(i=0;i<height;i++) {
454 memset(ptr - w, ptr[0], w);
455 memset(ptr + width, ptr[width-1], w);
460 memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
461 memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
462 memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
463 memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
468 * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
469 * @param buf destination buffer
470 * @param src source buffer
471 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
472 * @param block_w width of block
473 * @param block_h height of block
474 * @param src_x x coordinate of the top left sample of the block in the source buffer
475 * @param src_y y coordinate of the top left sample of the block in the source buffer
476 * @param w width of the source buffer
477 * @param h height of the source buffer
479 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
480 int src_x, int src_y, int w, int h){
482 int start_y, start_x, end_y, end_x;
485 src+= (h-1-src_y)*linesize;
487 }else if(src_y<=-block_h){
488 src+= (1-block_h-src_y)*linesize;
494 }else if(src_x<=-block_w){
495 src+= (1-block_w-src_x);
499 start_y= FFMAX(0, -src_y);
500 start_x= FFMAX(0, -src_x);
501 end_y= FFMIN(block_h, h-src_y);
502 end_x= FFMIN(block_w, w-src_x);
504 // copy existing part
505 for(y=start_y; y<end_y; y++){
506 for(x=start_x; x<end_x; x++){
507 buf[x + y*linesize]= src[x + y*linesize];
512 for(y=0; y<start_y; y++){
513 for(x=start_x; x<end_x; x++){
514 buf[x + y*linesize]= buf[x + start_y*linesize];
519 for(y=end_y; y<block_h; y++){
520 for(x=start_x; x<end_x; x++){
521 buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
525 for(y=0; y<block_h; y++){
527 for(x=0; x<start_x; x++){
528 buf[x + y*linesize]= buf[start_x + y*linesize];
532 for(x=end_x; x<block_w; x++){
533 buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
538 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
542 /* read the pixels */
544 block[0] = pixels[0];
545 block[1] = pixels[1];
546 block[2] = pixels[2];
547 block[3] = pixels[3];
548 block[4] = pixels[4];
549 block[5] = pixels[5];
550 block[6] = pixels[6];
551 block[7] = pixels[7];
557 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
558 const uint8_t *s2, int stride){
561 /* read the pixels */
563 block[0] = s1[0] - s2[0];
564 block[1] = s1[1] - s2[1];
565 block[2] = s1[2] - s2[2];
566 block[3] = s1[3] - s2[3];
567 block[4] = s1[4] - s2[4];
568 block[5] = s1[5] - s2[5];
569 block[6] = s1[6] - s2[6];
570 block[7] = s1[7] - s2[7];
578 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
582 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
584 /* read the pixels */
586 pixels[0] = cm[block[0]];
587 pixels[1] = cm[block[1]];
588 pixels[2] = cm[block[2]];
589 pixels[3] = cm[block[3]];
590 pixels[4] = cm[block[4]];
591 pixels[5] = cm[block[5]];
592 pixels[6] = cm[block[6]];
593 pixels[7] = cm[block[7]];
600 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
604 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
606 /* read the pixels */
608 pixels[0] = cm[block[0]];
609 pixels[1] = cm[block[1]];
610 pixels[2] = cm[block[2]];
611 pixels[3] = cm[block[3]];
618 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
622 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
624 /* read the pixels */
626 pixels[0] = cm[block[0]];
627 pixels[1] = cm[block[1]];
634 static void put_signed_pixels_clamped_c(const DCTELEM *block,
635 uint8_t *restrict pixels,
640 for (i = 0; i < 8; i++) {
641 for (j = 0; j < 8; j++) {
644 else if (*block > 127)
647 *pixels = (uint8_t)(*block + 128);
651 pixels += (line_size - 8);
655 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
659 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
661 /* read the pixels */
663 pixels[0] = cm[pixels[0] + block[0]];
664 pixels[1] = cm[pixels[1] + block[1]];
665 pixels[2] = cm[pixels[2] + block[2]];
666 pixels[3] = cm[pixels[3] + block[3]];
667 pixels[4] = cm[pixels[4] + block[4]];
668 pixels[5] = cm[pixels[5] + block[5]];
669 pixels[6] = cm[pixels[6] + block[6]];
670 pixels[7] = cm[pixels[7] + block[7]];
676 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
680 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
682 /* read the pixels */
684 pixels[0] = cm[pixels[0] + block[0]];
685 pixels[1] = cm[pixels[1] + block[1]];
686 pixels[2] = cm[pixels[2] + block[2]];
687 pixels[3] = cm[pixels[3] + block[3]];
693 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
697 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
699 /* read the pixels */
701 pixels[0] = cm[pixels[0] + block[0]];
702 pixels[1] = cm[pixels[1] + block[1]];
708 static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
712 pixels[0] += block[0];
713 pixels[1] += block[1];
714 pixels[2] += block[2];
715 pixels[3] += block[3];
716 pixels[4] += block[4];
717 pixels[5] += block[5];
718 pixels[6] += block[6];
719 pixels[7] += block[7];
725 static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
729 pixels[0] += block[0];
730 pixels[1] += block[1];
731 pixels[2] += block[2];
732 pixels[3] += block[3];
738 static int sum_abs_dctelem_c(DCTELEM *block)
742 sum+= FFABS(block[i]);
748 #define PIXOP2(OPNAME, OP) \
749 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
753 OP(*((uint64_t*)block), AV_RN64(pixels));\
759 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
763 const uint64_t a= AV_RN64(pixels );\
764 const uint64_t b= AV_RN64(pixels+1);\
765 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
771 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
775 const uint64_t a= AV_RN64(pixels );\
776 const uint64_t b= AV_RN64(pixels+1);\
777 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
783 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
787 const uint64_t a= AV_RN64(pixels );\
788 const uint64_t b= AV_RN64(pixels+line_size);\
789 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
795 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
799 const uint64_t a= AV_RN64(pixels );\
800 const uint64_t b= AV_RN64(pixels+line_size);\
801 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
807 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
810 const uint64_t a= AV_RN64(pixels );\
811 const uint64_t b= AV_RN64(pixels+1);\
812 uint64_t l0= (a&0x0303030303030303ULL)\
813 + (b&0x0303030303030303ULL)\
814 + 0x0202020202020202ULL;\
815 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
816 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
820 for(i=0; i<h; i+=2){\
821 uint64_t a= AV_RN64(pixels );\
822 uint64_t b= AV_RN64(pixels+1);\
823 l1= (a&0x0303030303030303ULL)\
824 + (b&0x0303030303030303ULL);\
825 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
826 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
827 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
830 a= AV_RN64(pixels );\
831 b= AV_RN64(pixels+1);\
832 l0= (a&0x0303030303030303ULL)\
833 + (b&0x0303030303030303ULL)\
834 + 0x0202020202020202ULL;\
835 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
836 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
837 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
843 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
846 const uint64_t a= AV_RN64(pixels );\
847 const uint64_t b= AV_RN64(pixels+1);\
848 uint64_t l0= (a&0x0303030303030303ULL)\
849 + (b&0x0303030303030303ULL)\
850 + 0x0101010101010101ULL;\
851 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
852 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
856 for(i=0; i<h; i+=2){\
857 uint64_t a= AV_RN64(pixels );\
858 uint64_t b= AV_RN64(pixels+1);\
859 l1= (a&0x0303030303030303ULL)\
860 + (b&0x0303030303030303ULL);\
861 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
862 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
863 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
866 a= AV_RN64(pixels );\
867 b= AV_RN64(pixels+1);\
868 l0= (a&0x0303030303030303ULL)\
869 + (b&0x0303030303030303ULL)\
870 + 0x0101010101010101ULL;\
871 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
872 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
873 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
879 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
880 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
881 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
882 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
883 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
884 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
885 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
887 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
888 #else // 64 bit variant
890 #define PIXOP2(OPNAME, OP) \
891 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
894 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\
899 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
902 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
907 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
910 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
911 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
916 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
917 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
920 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
921 int src_stride1, int src_stride2, int h){\
925 a= AV_RN32(&src1[i*src_stride1 ]);\
926 b= AV_RN32(&src2[i*src_stride2 ]);\
927 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
928 a= AV_RN32(&src1[i*src_stride1+4]);\
929 b= AV_RN32(&src2[i*src_stride2+4]);\
930 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
934 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
935 int src_stride1, int src_stride2, int h){\
939 a= AV_RN32(&src1[i*src_stride1 ]);\
940 b= AV_RN32(&src2[i*src_stride2 ]);\
941 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
942 a= AV_RN32(&src1[i*src_stride1+4]);\
943 b= AV_RN32(&src2[i*src_stride2+4]);\
944 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
948 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
949 int src_stride1, int src_stride2, int h){\
953 a= AV_RN32(&src1[i*src_stride1 ]);\
954 b= AV_RN32(&src2[i*src_stride2 ]);\
955 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
959 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
960 int src_stride1, int src_stride2, int h){\
964 a= AV_RN16(&src1[i*src_stride1 ]);\
965 b= AV_RN16(&src2[i*src_stride2 ]);\
966 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
970 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
971 int src_stride1, int src_stride2, int h){\
972 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
973 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
976 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
977 int src_stride1, int src_stride2, int h){\
978 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
979 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
982 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
983 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
986 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
987 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
990 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
991 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
994 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
995 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
998 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
999 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1001 for(i=0; i<h; i++){\
1002 uint32_t a, b, c, d, l0, l1, h0, h1;\
1003 a= AV_RN32(&src1[i*src_stride1]);\
1004 b= AV_RN32(&src2[i*src_stride2]);\
1005 c= AV_RN32(&src3[i*src_stride3]);\
1006 d= AV_RN32(&src4[i*src_stride4]);\
1007 l0= (a&0x03030303UL)\
1010 h0= ((a&0xFCFCFCFCUL)>>2)\
1011 + ((b&0xFCFCFCFCUL)>>2);\
1012 l1= (c&0x03030303UL)\
1013 + (d&0x03030303UL);\
1014 h1= ((c&0xFCFCFCFCUL)>>2)\
1015 + ((d&0xFCFCFCFCUL)>>2);\
1016 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1017 a= AV_RN32(&src1[i*src_stride1+4]);\
1018 b= AV_RN32(&src2[i*src_stride2+4]);\
1019 c= AV_RN32(&src3[i*src_stride3+4]);\
1020 d= AV_RN32(&src4[i*src_stride4+4]);\
1021 l0= (a&0x03030303UL)\
1024 h0= ((a&0xFCFCFCFCUL)>>2)\
1025 + ((b&0xFCFCFCFCUL)>>2);\
1026 l1= (c&0x03030303UL)\
1027 + (d&0x03030303UL);\
1028 h1= ((c&0xFCFCFCFCUL)>>2)\
1029 + ((d&0xFCFCFCFCUL)>>2);\
1030 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1034 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1035 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1038 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1039 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1042 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1043 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1046 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1047 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1050 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1051 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1053 for(i=0; i<h; i++){\
1054 uint32_t a, b, c, d, l0, l1, h0, h1;\
1055 a= AV_RN32(&src1[i*src_stride1]);\
1056 b= AV_RN32(&src2[i*src_stride2]);\
1057 c= AV_RN32(&src3[i*src_stride3]);\
1058 d= AV_RN32(&src4[i*src_stride4]);\
1059 l0= (a&0x03030303UL)\
1062 h0= ((a&0xFCFCFCFCUL)>>2)\
1063 + ((b&0xFCFCFCFCUL)>>2);\
1064 l1= (c&0x03030303UL)\
1065 + (d&0x03030303UL);\
1066 h1= ((c&0xFCFCFCFCUL)>>2)\
1067 + ((d&0xFCFCFCFCUL)>>2);\
1068 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1069 a= AV_RN32(&src1[i*src_stride1+4]);\
1070 b= AV_RN32(&src2[i*src_stride2+4]);\
1071 c= AV_RN32(&src3[i*src_stride3+4]);\
1072 d= AV_RN32(&src4[i*src_stride4+4]);\
1073 l0= (a&0x03030303UL)\
1076 h0= ((a&0xFCFCFCFCUL)>>2)\
1077 + ((b&0xFCFCFCFCUL)>>2);\
1078 l1= (c&0x03030303UL)\
1079 + (d&0x03030303UL);\
1080 h1= ((c&0xFCFCFCFCUL)>>2)\
1081 + ((d&0xFCFCFCFCUL)>>2);\
1082 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1085 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1086 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1087 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1088 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1090 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1091 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1092 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1093 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1096 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1098 int i, a0, b0, a1, b1;\
1105 for(i=0; i<h; i+=2){\
1111 block[0]= (a1+a0)>>2; /* FIXME non put */\
1112 block[1]= (b1+b0)>>2;\
1122 block[0]= (a1+a0)>>2;\
1123 block[1]= (b1+b0)>>2;\
1129 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1132 const uint32_t a= AV_RN32(pixels );\
1133 const uint32_t b= AV_RN32(pixels+1);\
1134 uint32_t l0= (a&0x03030303UL)\
1137 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1138 + ((b&0xFCFCFCFCUL)>>2);\
1142 for(i=0; i<h; i+=2){\
1143 uint32_t a= AV_RN32(pixels );\
1144 uint32_t b= AV_RN32(pixels+1);\
1145 l1= (a&0x03030303UL)\
1146 + (b&0x03030303UL);\
1147 h1= ((a&0xFCFCFCFCUL)>>2)\
1148 + ((b&0xFCFCFCFCUL)>>2);\
1149 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1152 a= AV_RN32(pixels );\
1153 b= AV_RN32(pixels+1);\
1154 l0= (a&0x03030303UL)\
1157 h0= ((a&0xFCFCFCFCUL)>>2)\
1158 + ((b&0xFCFCFCFCUL)>>2);\
1159 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1165 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1168 for(j=0; j<2; j++){\
1170 const uint32_t a= AV_RN32(pixels );\
1171 const uint32_t b= AV_RN32(pixels+1);\
1172 uint32_t l0= (a&0x03030303UL)\
1175 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1176 + ((b&0xFCFCFCFCUL)>>2);\
1180 for(i=0; i<h; i+=2){\
1181 uint32_t a= AV_RN32(pixels );\
1182 uint32_t b= AV_RN32(pixels+1);\
1183 l1= (a&0x03030303UL)\
1184 + (b&0x03030303UL);\
1185 h1= ((a&0xFCFCFCFCUL)>>2)\
1186 + ((b&0xFCFCFCFCUL)>>2);\
1187 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1190 a= AV_RN32(pixels );\
1191 b= AV_RN32(pixels+1);\
1192 l0= (a&0x03030303UL)\
1195 h0= ((a&0xFCFCFCFCUL)>>2)\
1196 + ((b&0xFCFCFCFCUL)>>2);\
1197 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1201 pixels+=4-line_size*(h+1);\
1202 block +=4-line_size*h;\
1206 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1209 for(j=0; j<2; j++){\
1211 const uint32_t a= AV_RN32(pixels );\
1212 const uint32_t b= AV_RN32(pixels+1);\
1213 uint32_t l0= (a&0x03030303UL)\
1216 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1217 + ((b&0xFCFCFCFCUL)>>2);\
1221 for(i=0; i<h; i+=2){\
1222 uint32_t a= AV_RN32(pixels );\
1223 uint32_t b= AV_RN32(pixels+1);\
1224 l1= (a&0x03030303UL)\
1225 + (b&0x03030303UL);\
1226 h1= ((a&0xFCFCFCFCUL)>>2)\
1227 + ((b&0xFCFCFCFCUL)>>2);\
1228 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1231 a= AV_RN32(pixels );\
1232 b= AV_RN32(pixels+1);\
1233 l0= (a&0x03030303UL)\
1236 h0= ((a&0xFCFCFCFCUL)>>2)\
1237 + ((b&0xFCFCFCFCUL)>>2);\
1238 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1242 pixels+=4-line_size*(h+1);\
1243 block +=4-line_size*h;\
1247 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
1248 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
1249 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
1250 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
1251 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
1252 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
1253 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
1254 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
1256 #define op_avg(a, b) a = rnd_avg32(a, b)
1258 #define op_put(a, b) a = b
1265 #define avg2(a,b) ((a+b+1)>>1)
1266 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
1268 static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1269 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
1272 static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1273 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
1276 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
1278 const int A=(16-x16)*(16-y16);
1279 const int B=( x16)*(16-y16);
1280 const int C=(16-x16)*( y16);
1281 const int D=( x16)*( y16);
1286 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
1287 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
1288 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
1289 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
1290 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
1291 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
1292 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
1293 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
1299 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
1300 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
1303 const int s= 1<<shift;
1313 for(x=0; x<8; x++){ //XXX FIXME optimize
1314 int src_x, src_y, frac_x, frac_y, index;
1318 frac_x= src_x&(s-1);
1319 frac_y= src_y&(s-1);
1323 if((unsigned)src_x < width){
1324 if((unsigned)src_y < height){
1325 index= src_x + src_y*stride;
1326 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
1327 + src[index +1]* frac_x )*(s-frac_y)
1328 + ( src[index+stride ]*(s-frac_x)
1329 + src[index+stride+1]* frac_x )* frac_y
1332 index= src_x + av_clip(src_y, 0, height)*stride;
1333 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
1334 + src[index +1]* frac_x )*s
1338 if((unsigned)src_y < height){
1339 index= av_clip(src_x, 0, width) + src_y*stride;
1340 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
1341 + src[index+stride ]* frac_y )*s
1344 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
1345 dst[y*stride + x]= src[index ];
1357 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1359 case 2: put_pixels2_c (dst, src, stride, height); break;
1360 case 4: put_pixels4_c (dst, src, stride, height); break;
1361 case 8: put_pixels8_c (dst, src, stride, height); break;
1362 case 16:put_pixels16_c(dst, src, stride, height); break;
1366 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1368 for (i=0; i < height; i++) {
1369 for (j=0; j < width; j++) {
1370 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
1377 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1379 for (i=0; i < height; i++) {
1380 for (j=0; j < width; j++) {
1381 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
1388 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1390 for (i=0; i < height; i++) {
1391 for (j=0; j < width; j++) {
1392 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
1399 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1401 for (i=0; i < height; i++) {
1402 for (j=0; j < width; j++) {
1403 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
1410 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1412 for (i=0; i < height; i++) {
1413 for (j=0; j < width; j++) {
1414 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
1421 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1423 for (i=0; i < height; i++) {
1424 for (j=0; j < width; j++) {
1425 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
1432 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1434 for (i=0; i < height; i++) {
1435 for (j=0; j < width; j++) {
1436 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
1443 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1445 for (i=0; i < height; i++) {
1446 for (j=0; j < width; j++) {
1447 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
1454 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1456 case 2: avg_pixels2_c (dst, src, stride, height); break;
1457 case 4: avg_pixels4_c (dst, src, stride, height); break;
1458 case 8: avg_pixels8_c (dst, src, stride, height); break;
1459 case 16:avg_pixels16_c(dst, src, stride, height); break;
1463 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1465 for (i=0; i < height; i++) {
1466 for (j=0; j < width; j++) {
1467 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
1474 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1476 for (i=0; i < height; i++) {
1477 for (j=0; j < width; j++) {
1478 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
1485 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1487 for (i=0; i < height; i++) {
1488 for (j=0; j < width; j++) {
1489 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
1496 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1498 for (i=0; i < height; i++) {
1499 for (j=0; j < width; j++) {
1500 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1507 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1509 for (i=0; i < height; i++) {
1510 for (j=0; j < width; j++) {
1511 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1518 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1520 for (i=0; i < height; i++) {
1521 for (j=0; j < width; j++) {
1522 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
1529 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1531 for (i=0; i < height; i++) {
1532 for (j=0; j < width; j++) {
1533 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1540 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1542 for (i=0; i < height; i++) {
1543 for (j=0; j < width; j++) {
1544 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1551 #define TPEL_WIDTH(width)\
1552 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1553 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1554 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1555 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1556 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1557 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1558 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1559 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1560 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1561 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1562 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1563 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1564 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1565 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1566 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1567 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1568 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1569 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1572 #define H264_CHROMA_MC(OPNAME, OP)\
1573 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1574 const int A=(8-x)*(8-y);\
1575 const int B=( x)*(8-y);\
1576 const int C=(8-x)*( y);\
1577 const int D=( x)*( y);\
1580 assert(x<8 && y<8 && x>=0 && y>=0);\
1583 for(i=0; i<h; i++){\
1584 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1585 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1591 const int step= C ? stride : 1;\
1592 for(i=0; i<h; i++){\
1593 OP(dst[0], (A*src[0] + E*src[step+0]));\
1594 OP(dst[1], (A*src[1] + E*src[step+1]));\
1601 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1602 const int A=(8-x)*(8-y);\
1603 const int B=( x)*(8-y);\
1604 const int C=(8-x)*( y);\
1605 const int D=( x)*( y);\
1608 assert(x<8 && y<8 && x>=0 && y>=0);\
1611 for(i=0; i<h; i++){\
1612 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1613 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1614 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1615 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1621 const int step= C ? stride : 1;\
1622 for(i=0; i<h; i++){\
1623 OP(dst[0], (A*src[0] + E*src[step+0]));\
1624 OP(dst[1], (A*src[1] + E*src[step+1]));\
1625 OP(dst[2], (A*src[2] + E*src[step+2]));\
1626 OP(dst[3], (A*src[3] + E*src[step+3]));\
1633 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1634 const int A=(8-x)*(8-y);\
1635 const int B=( x)*(8-y);\
1636 const int C=(8-x)*( y);\
1637 const int D=( x)*( y);\
1640 assert(x<8 && y<8 && x>=0 && y>=0);\
1643 for(i=0; i<h; i++){\
1644 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1645 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1646 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1647 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1648 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1649 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1650 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1651 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1657 const int step= C ? stride : 1;\
1658 for(i=0; i<h; i++){\
1659 OP(dst[0], (A*src[0] + E*src[step+0]));\
1660 OP(dst[1], (A*src[1] + E*src[step+1]));\
1661 OP(dst[2], (A*src[2] + E*src[step+2]));\
1662 OP(dst[3], (A*src[3] + E*src[step+3]));\
1663 OP(dst[4], (A*src[4] + E*src[step+4]));\
1664 OP(dst[5], (A*src[5] + E*src[step+5]));\
1665 OP(dst[6], (A*src[6] + E*src[step+6]));\
1666 OP(dst[7], (A*src[7] + E*src[step+7]));\
1673 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1674 #define op_put(a, b) a = (((b) + 32)>>6)
1676 H264_CHROMA_MC(put_ , op_put)
1677 H264_CHROMA_MC(avg_ , op_avg)
1681 static void put_no_rnd_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
1682 const int A=(8-x)*(8-y);
1683 const int B=( x)*(8-y);
1684 const int C=(8-x)*( y);
1685 const int D=( x)*( y);
1688 assert(x<8 && y<8 && x>=0 && y>=0);
1692 dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
1693 dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
1694 dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
1695 dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
1696 dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
1697 dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
1698 dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
1699 dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
1705 #define QPEL_MC(r, OPNAME, RND, OP) \
1706 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1707 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1711 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1712 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1713 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1714 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1715 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1716 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1717 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1718 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1724 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1726 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1730 const int src0= src[0*srcStride];\
1731 const int src1= src[1*srcStride];\
1732 const int src2= src[2*srcStride];\
1733 const int src3= src[3*srcStride];\
1734 const int src4= src[4*srcStride];\
1735 const int src5= src[5*srcStride];\
1736 const int src6= src[6*srcStride];\
1737 const int src7= src[7*srcStride];\
1738 const int src8= src[8*srcStride];\
1739 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1740 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1741 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1742 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1743 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1744 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1745 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1746 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1752 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1753 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1758 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1759 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1760 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1761 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1762 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1763 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1764 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1765 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1766 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1767 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1768 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1769 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1770 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1771 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1772 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1773 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1779 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1780 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1785 const int src0= src[0*srcStride];\
1786 const int src1= src[1*srcStride];\
1787 const int src2= src[2*srcStride];\
1788 const int src3= src[3*srcStride];\
1789 const int src4= src[4*srcStride];\
1790 const int src5= src[5*srcStride];\
1791 const int src6= src[6*srcStride];\
1792 const int src7= src[7*srcStride];\
1793 const int src8= src[8*srcStride];\
1794 const int src9= src[9*srcStride];\
1795 const int src10= src[10*srcStride];\
1796 const int src11= src[11*srcStride];\
1797 const int src12= src[12*srcStride];\
1798 const int src13= src[13*srcStride];\
1799 const int src14= src[14*srcStride];\
1800 const int src15= src[15*srcStride];\
1801 const int src16= src[16*srcStride];\
1802 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1803 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1804 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1805 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1806 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1807 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1808 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1809 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1810 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1811 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1812 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1813 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1814 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1815 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1816 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1817 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1823 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
1824 OPNAME ## pixels8_c(dst, src, stride, 8);\
1827 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1829 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1830 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
1833 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1834 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
1837 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1839 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1840 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
1843 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1844 uint8_t full[16*9];\
1846 copy_block9(full, src, 16, stride, 9);\
1847 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1848 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
1851 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1852 uint8_t full[16*9];\
1853 copy_block9(full, src, 16, stride, 9);\
1854 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
1857 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1858 uint8_t full[16*9];\
1860 copy_block9(full, src, 16, stride, 9);\
1861 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1862 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
1864 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1865 uint8_t full[16*9];\
1868 uint8_t halfHV[64];\
1869 copy_block9(full, src, 16, stride, 9);\
1870 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1871 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1872 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1873 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1875 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1876 uint8_t full[16*9];\
1878 uint8_t halfHV[64];\
1879 copy_block9(full, src, 16, stride, 9);\
1880 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1881 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1882 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1883 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1885 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1886 uint8_t full[16*9];\
1889 uint8_t halfHV[64];\
1890 copy_block9(full, src, 16, stride, 9);\
1891 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1892 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1893 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1894 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1896 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1897 uint8_t full[16*9];\
1899 uint8_t halfHV[64];\
1900 copy_block9(full, src, 16, stride, 9);\
1901 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1902 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1903 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1904 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1906 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1907 uint8_t full[16*9];\
1910 uint8_t halfHV[64];\
1911 copy_block9(full, src, 16, stride, 9);\
1912 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1913 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1914 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1915 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1917 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1918 uint8_t full[16*9];\
1920 uint8_t halfHV[64];\
1921 copy_block9(full, src, 16, stride, 9);\
1922 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1923 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1924 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1925 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1927 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1928 uint8_t full[16*9];\
1931 uint8_t halfHV[64];\
1932 copy_block9(full, src, 16, stride, 9);\
1933 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
1934 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1935 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1936 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1938 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1939 uint8_t full[16*9];\
1941 uint8_t halfHV[64];\
1942 copy_block9(full, src, 16, stride, 9);\
1943 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1944 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1945 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1946 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1948 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1950 uint8_t halfHV[64];\
1951 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1952 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1953 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1955 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1957 uint8_t halfHV[64];\
1958 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1959 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1960 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1962 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1963 uint8_t full[16*9];\
1966 uint8_t halfHV[64];\
1967 copy_block9(full, src, 16, stride, 9);\
1968 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1969 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1970 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1971 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1973 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1974 uint8_t full[16*9];\
1976 copy_block9(full, src, 16, stride, 9);\
1977 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1978 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1979 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1981 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1982 uint8_t full[16*9];\
1985 uint8_t halfHV[64];\
1986 copy_block9(full, src, 16, stride, 9);\
1987 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1988 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1989 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1990 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1992 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1993 uint8_t full[16*9];\
1995 copy_block9(full, src, 16, stride, 9);\
1996 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1997 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1998 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2000 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2002 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
2003 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2005 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2006 OPNAME ## pixels16_c(dst, src, stride, 16);\
2009 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2011 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2012 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
2015 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2016 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
2019 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2021 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2022 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
2025 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2026 uint8_t full[24*17];\
2028 copy_block17(full, src, 24, stride, 17);\
2029 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
2030 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
2033 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2034 uint8_t full[24*17];\
2035 copy_block17(full, src, 24, stride, 17);\
2036 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
2039 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2040 uint8_t full[24*17];\
2042 copy_block17(full, src, 24, stride, 17);\
2043 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
2044 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
2046 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
2047 uint8_t full[24*17];\
2048 uint8_t halfH[272];\
2049 uint8_t halfV[256];\
2050 uint8_t halfHV[256];\
2051 copy_block17(full, src, 24, stride, 17);\
2052 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2053 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2054 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2055 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2057 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2058 uint8_t full[24*17];\
2059 uint8_t halfH[272];\
2060 uint8_t halfHV[256];\
2061 copy_block17(full, src, 24, stride, 17);\
2062 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2063 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2064 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2065 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2067 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
2068 uint8_t full[24*17];\
2069 uint8_t halfH[272];\
2070 uint8_t halfV[256];\
2071 uint8_t halfHV[256];\
2072 copy_block17(full, src, 24, stride, 17);\
2073 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2074 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2075 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2076 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2078 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2079 uint8_t full[24*17];\
2080 uint8_t halfH[272];\
2081 uint8_t halfHV[256];\
2082 copy_block17(full, src, 24, stride, 17);\
2083 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2084 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2085 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2086 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2088 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
2089 uint8_t full[24*17];\
2090 uint8_t halfH[272];\
2091 uint8_t halfV[256];\
2092 uint8_t halfHV[256];\
2093 copy_block17(full, src, 24, stride, 17);\
2094 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2095 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2096 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2097 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2099 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2100 uint8_t full[24*17];\
2101 uint8_t halfH[272];\
2102 uint8_t halfHV[256];\
2103 copy_block17(full, src, 24, stride, 17);\
2104 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2105 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2106 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2107 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2109 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
2110 uint8_t full[24*17];\
2111 uint8_t halfH[272];\
2112 uint8_t halfV[256];\
2113 uint8_t halfHV[256];\
2114 copy_block17(full, src, 24, stride, 17);\
2115 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
2116 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2117 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2118 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2120 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2121 uint8_t full[24*17];\
2122 uint8_t halfH[272];\
2123 uint8_t halfHV[256];\
2124 copy_block17(full, src, 24, stride, 17);\
2125 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2126 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2127 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2128 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2130 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2131 uint8_t halfH[272];\
2132 uint8_t halfHV[256];\
2133 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2134 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2135 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2137 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2138 uint8_t halfH[272];\
2139 uint8_t halfHV[256];\
2140 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2141 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2142 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2144 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
2145 uint8_t full[24*17];\
2146 uint8_t halfH[272];\
2147 uint8_t halfV[256];\
2148 uint8_t halfHV[256];\
2149 copy_block17(full, src, 24, stride, 17);\
2150 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2151 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2152 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2153 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2155 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2156 uint8_t full[24*17];\
2157 uint8_t halfH[272];\
2158 copy_block17(full, src, 24, stride, 17);\
2159 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2160 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2161 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2163 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2164 uint8_t full[24*17];\
2165 uint8_t halfH[272];\
2166 uint8_t halfV[256];\
2167 uint8_t halfHV[256];\
2168 copy_block17(full, src, 24, stride, 17);\
2169 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2170 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2171 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2172 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2174 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2175 uint8_t full[24*17];\
2176 uint8_t halfH[272];\
2177 copy_block17(full, src, 24, stride, 17);\
2178 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2179 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2180 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2182 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2183 uint8_t halfH[272];\
2184 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2185 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2188 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2189 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
2190 #define op_put(a, b) a = cm[((b) + 16)>>5]
2191 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
2193 QPEL_MC(0, put_ , _ , op_put)
2194 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
2195 QPEL_MC(0, avg_ , _ , op_avg)
2196 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
2198 #undef op_avg_no_rnd
2200 #undef op_put_no_rnd
2203 #define H264_LOWPASS(OPNAME, OP, OP2) \
2204 static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2206 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2210 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2211 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2217 static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2219 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2223 const int srcB= src[-2*srcStride];\
2224 const int srcA= src[-1*srcStride];\
2225 const int src0= src[0 *srcStride];\
2226 const int src1= src[1 *srcStride];\
2227 const int src2= src[2 *srcStride];\
2228 const int src3= src[3 *srcStride];\
2229 const int src4= src[4 *srcStride];\
2230 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2231 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2237 static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2240 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2242 src -= 2*srcStride;\
2243 for(i=0; i<h+5; i++)\
2245 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2246 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2250 tmp -= tmpStride*(h+5-2);\
2253 const int tmpB= tmp[-2*tmpStride];\
2254 const int tmpA= tmp[-1*tmpStride];\
2255 const int tmp0= tmp[0 *tmpStride];\
2256 const int tmp1= tmp[1 *tmpStride];\
2257 const int tmp2= tmp[2 *tmpStride];\
2258 const int tmp3= tmp[3 *tmpStride];\
2259 const int tmp4= tmp[4 *tmpStride];\
2260 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2261 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2266 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2268 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2272 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2273 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2274 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
2275 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
2281 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2283 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2287 const int srcB= src[-2*srcStride];\
2288 const int srcA= src[-1*srcStride];\
2289 const int src0= src[0 *srcStride];\
2290 const int src1= src[1 *srcStride];\
2291 const int src2= src[2 *srcStride];\
2292 const int src3= src[3 *srcStride];\
2293 const int src4= src[4 *srcStride];\
2294 const int src5= src[5 *srcStride];\
2295 const int src6= src[6 *srcStride];\
2296 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2297 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2298 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2299 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2305 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2308 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2310 src -= 2*srcStride;\
2311 for(i=0; i<h+5; i++)\
2313 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2314 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2315 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
2316 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
2320 tmp -= tmpStride*(h+5-2);\
2323 const int tmpB= tmp[-2*tmpStride];\
2324 const int tmpA= tmp[-1*tmpStride];\
2325 const int tmp0= tmp[0 *tmpStride];\
2326 const int tmp1= tmp[1 *tmpStride];\
2327 const int tmp2= tmp[2 *tmpStride];\
2328 const int tmp3= tmp[3 *tmpStride];\
2329 const int tmp4= tmp[4 *tmpStride];\
2330 const int tmp5= tmp[5 *tmpStride];\
2331 const int tmp6= tmp[6 *tmpStride];\
2332 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2333 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2334 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2335 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2341 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2343 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2347 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
2348 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
2349 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
2350 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
2351 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
2352 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
2353 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
2354 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
2360 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2362 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2366 const int srcB= src[-2*srcStride];\
2367 const int srcA= src[-1*srcStride];\
2368 const int src0= src[0 *srcStride];\
2369 const int src1= src[1 *srcStride];\
2370 const int src2= src[2 *srcStride];\
2371 const int src3= src[3 *srcStride];\
2372 const int src4= src[4 *srcStride];\
2373 const int src5= src[5 *srcStride];\
2374 const int src6= src[6 *srcStride];\
2375 const int src7= src[7 *srcStride];\
2376 const int src8= src[8 *srcStride];\
2377 const int src9= src[9 *srcStride];\
2378 const int src10=src[10*srcStride];\
2379 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2380 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2381 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2382 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2383 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
2384 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
2385 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
2386 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
2392 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2395 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2397 src -= 2*srcStride;\
2398 for(i=0; i<h+5; i++)\
2400 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
2401 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
2402 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
2403 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
2404 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
2405 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
2406 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
2407 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
2411 tmp -= tmpStride*(h+5-2);\
2414 const int tmpB= tmp[-2*tmpStride];\
2415 const int tmpA= tmp[-1*tmpStride];\
2416 const int tmp0= tmp[0 *tmpStride];\
2417 const int tmp1= tmp[1 *tmpStride];\
2418 const int tmp2= tmp[2 *tmpStride];\
2419 const int tmp3= tmp[3 *tmpStride];\
2420 const int tmp4= tmp[4 *tmpStride];\
2421 const int tmp5= tmp[5 *tmpStride];\
2422 const int tmp6= tmp[6 *tmpStride];\
2423 const int tmp7= tmp[7 *tmpStride];\
2424 const int tmp8= tmp[8 *tmpStride];\
2425 const int tmp9= tmp[9 *tmpStride];\
2426 const int tmp10=tmp[10*tmpStride];\
2427 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2428 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2429 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2430 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2431 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
2432 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
2433 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
2434 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
2440 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2441 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2442 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2443 src += 8*srcStride;\
2444 dst += 8*dstStride;\
2445 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2446 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2449 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2450 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2451 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2452 src += 8*srcStride;\
2453 dst += 8*dstStride;\
2454 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2455 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2458 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2459 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2460 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2461 src += 8*srcStride;\
2462 dst += 8*dstStride;\
2463 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2464 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2467 #define H264_MC(OPNAME, SIZE) \
2468 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2469 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2472 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2473 uint8_t half[SIZE*SIZE];\
2474 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2475 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2478 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2479 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2482 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2483 uint8_t half[SIZE*SIZE];\
2484 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2485 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2488 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2489 uint8_t full[SIZE*(SIZE+5)];\
2490 uint8_t * const full_mid= full + SIZE*2;\
2491 uint8_t half[SIZE*SIZE];\
2492 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2493 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2494 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2497 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2498 uint8_t full[SIZE*(SIZE+5)];\
2499 uint8_t * const full_mid= full + SIZE*2;\
2500 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2501 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2504 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2505 uint8_t full[SIZE*(SIZE+5)];\
2506 uint8_t * const full_mid= full + SIZE*2;\
2507 uint8_t half[SIZE*SIZE];\
2508 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2509 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2510 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2513 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2514 uint8_t full[SIZE*(SIZE+5)];\
2515 uint8_t * const full_mid= full + SIZE*2;\
2516 uint8_t halfH[SIZE*SIZE];\
2517 uint8_t halfV[SIZE*SIZE];\
2518 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2519 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2520 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2521 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2524 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2525 uint8_t full[SIZE*(SIZE+5)];\
2526 uint8_t * const full_mid= full + SIZE*2;\
2527 uint8_t halfH[SIZE*SIZE];\
2528 uint8_t halfV[SIZE*SIZE];\
2529 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2530 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2531 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2532 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2535 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2536 uint8_t full[SIZE*(SIZE+5)];\
2537 uint8_t * const full_mid= full + SIZE*2;\
2538 uint8_t halfH[SIZE*SIZE];\
2539 uint8_t halfV[SIZE*SIZE];\
2540 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2541 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2542 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2543 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2546 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2547 uint8_t full[SIZE*(SIZE+5)];\
2548 uint8_t * const full_mid= full + SIZE*2;\
2549 uint8_t halfH[SIZE*SIZE];\
2550 uint8_t halfV[SIZE*SIZE];\
2551 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2552 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2553 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2554 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2557 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2558 int16_t tmp[SIZE*(SIZE+5)];\
2559 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
2562 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2563 int16_t tmp[SIZE*(SIZE+5)];\
2564 uint8_t halfH[SIZE*SIZE];\
2565 uint8_t halfHV[SIZE*SIZE];\
2566 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2567 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2568 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2571 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2572 int16_t tmp[SIZE*(SIZE+5)];\
2573 uint8_t halfH[SIZE*SIZE];\
2574 uint8_t halfHV[SIZE*SIZE];\
2575 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2576 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2577 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2580 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2581 uint8_t full[SIZE*(SIZE+5)];\
2582 uint8_t * const full_mid= full + SIZE*2;\
2583 int16_t tmp[SIZE*(SIZE+5)];\
2584 uint8_t halfV[SIZE*SIZE];\
2585 uint8_t halfHV[SIZE*SIZE];\
2586 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2587 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2588 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2589 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2592 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2593 uint8_t full[SIZE*(SIZE+5)];\
2594 uint8_t * const full_mid= full + SIZE*2;\
2595 int16_t tmp[SIZE*(SIZE+5)];\
2596 uint8_t halfV[SIZE*SIZE];\
2597 uint8_t halfHV[SIZE*SIZE];\
2598 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2599 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2600 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2601 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2604 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2605 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2606 #define op_put(a, b) a = cm[((b) + 16)>>5]
2607 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2608 #define op2_put(a, b) a = cm[((b) + 512)>>10]
2610 H264_LOWPASS(put_ , op_put, op2_put)
2611 H264_LOWPASS(avg_ , op_avg, op2_avg)
2626 #define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
2627 #define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
2628 #define H264_WEIGHT(W,H) \
2629 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
2631 offset <<= log2_denom; \
2632 if(log2_denom) offset += 1<<(log2_denom-1); \
2633 for(y=0; y<H; y++, block += stride){ \
2636 if(W==2) continue; \
2639 if(W==4) continue; \
2644 if(W==8) continue; \
2655 static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
2657 offset = ((offset + 1) | 1) << log2_denom; \
2658 for(y=0; y<H; y++, dst += stride, src += stride){ \
2661 if(W==2) continue; \
2664 if(W==4) continue; \
2669 if(W==8) continue; \
2696 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
2697 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2701 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
2702 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
2703 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
2704 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
2705 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
2706 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
2707 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
2708 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
2714 #ifdef CONFIG_CAVS_DECODER
2716 void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx);
2718 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2719 put_pixels8_c(dst, src, stride, 8);
2721 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2722 avg_pixels8_c(dst, src, stride, 8);
2724 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2725 put_pixels16_c(dst, src, stride, 16);
2727 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2728 avg_pixels16_c(dst, src, stride, 16);
2730 #endif /* CONFIG_CAVS_DECODER */
2732 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
2734 void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx);
2736 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
2737 put_pixels8_c(dst, src, stride, 8);
2739 #endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */
2741 void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
2744 void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx);
2746 #if defined(CONFIG_RV30_DECODER)
2747 void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
2748 #endif /* CONFIG_RV30_DECODER */
2750 #if defined(CONFIG_RV40_DECODER)
2751 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2752 put_pixels16_xy2_c(dst, src, stride, 16);
2754 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2755 avg_pixels16_xy2_c(dst, src, stride, 16);
2757 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2758 put_pixels8_xy2_c(dst, src, stride, 8);
2760 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2761 avg_pixels8_xy2_c(dst, src, stride, 8);
2764 void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
2765 #endif /* CONFIG_RV40_DECODER */
2767 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
2768 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2772 const int src_1= src[ -srcStride];
2773 const int src0 = src[0 ];
2774 const int src1 = src[ srcStride];
2775 const int src2 = src[2*srcStride];
2776 const int src3 = src[3*srcStride];
2777 const int src4 = src[4*srcStride];
2778 const int src5 = src[5*srcStride];
2779 const int src6 = src[6*srcStride];
2780 const int src7 = src[7*srcStride];
2781 const int src8 = src[8*srcStride];
2782 const int src9 = src[9*srcStride];
2783 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
2784 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
2785 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
2786 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
2787 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
2788 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
2789 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
2790 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
2796 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
2797 put_pixels8_c(dst, src, stride, 8);
2800 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
2802 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2803 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
2806 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
2807 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
2810 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
2812 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2813 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
2816 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
2817 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
2820 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
2824 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2825 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
2826 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2827 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2829 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
2833 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2834 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
2835 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2836 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2838 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
2840 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2841 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
2844 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
2845 if(ENABLE_ANY_H263) {
2847 const int strength= ff_h263_loop_filter_strength[qscale];
2851 int p0= src[x-2*stride];
2852 int p1= src[x-1*stride];
2853 int p2= src[x+0*stride];
2854 int p3= src[x+1*stride];
2855 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2857 if (d<-2*strength) d1= 0;
2858 else if(d<- strength) d1=-2*strength - d;
2859 else if(d< strength) d1= d;
2860 else if(d< 2*strength) d1= 2*strength - d;
2865 if(p1&256) p1= ~(p1>>31);
2866 if(p2&256) p2= ~(p2>>31);
2868 src[x-1*stride] = p1;
2869 src[x+0*stride] = p2;
2873 d2= av_clip((p0-p3)/4, -ad1, ad1);
2875 src[x-2*stride] = p0 - d2;
2876 src[x+ stride] = p3 + d2;
2881 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
2882 if(ENABLE_ANY_H263) {
2884 const int strength= ff_h263_loop_filter_strength[qscale];
2888 int p0= src[y*stride-2];
2889 int p1= src[y*stride-1];
2890 int p2= src[y*stride+0];
2891 int p3= src[y*stride+1];
2892 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2894 if (d<-2*strength) d1= 0;
2895 else if(d<- strength) d1=-2*strength - d;
2896 else if(d< strength) d1= d;
2897 else if(d< 2*strength) d1= 2*strength - d;
2902 if(p1&256) p1= ~(p1>>31);
2903 if(p2&256) p2= ~(p2>>31);
2905 src[y*stride-1] = p1;
2906 src[y*stride+0] = p2;
2910 d2= av_clip((p0-p3)/4, -ad1, ad1);
2912 src[y*stride-2] = p0 - d2;
2913 src[y*stride+1] = p3 + d2;
2918 static void h261_loop_filter_c(uint8_t *src, int stride){
2923 temp[x ] = 4*src[x ];
2924 temp[x + 7*8] = 4*src[x + 7*stride];
2928 xy = y * stride + x;
2930 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
2935 src[ y*stride] = (temp[ y*8] + 2)>>2;
2936 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
2938 xy = y * stride + x;
2940 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
2945 static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
2948 for( i = 0; i < 4; i++ ) {
2953 for( d = 0; d < 4; d++ ) {
2954 const int p0 = pix[-1*xstride];
2955 const int p1 = pix[-2*xstride];
2956 const int p2 = pix[-3*xstride];
2957 const int q0 = pix[0];
2958 const int q1 = pix[1*xstride];
2959 const int q2 = pix[2*xstride];
2961 if( FFABS( p0 - q0 ) < alpha &&
2962 FFABS( p1 - p0 ) < beta &&
2963 FFABS( q1 - q0 ) < beta ) {
2968 if( FFABS( p2 - p0 ) < beta ) {
2969 pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
2972 if( FFABS( q2 - q0 ) < beta ) {
2973 pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
2977 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
2978 pix[-xstride] = av_clip_uint8( p0 + i_delta ); /* p0' */
2979 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
2985 static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
2987 h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
2989 static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
2991 h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
2994 static inline void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
2997 for( d = 0; d < 16; d++ ) {
2998 const int p2 = pix[-3*xstride];
2999 const int p1 = pix[-2*xstride];
3000 const int p0 = pix[-1*xstride];
3002 const int q0 = pix[ 0*xstride];
3003 const int q1 = pix[ 1*xstride];
3004 const int q2 = pix[ 2*xstride];
3006 if( FFABS( p0 - q0 ) < alpha &&
3007 FFABS( p1 - p0 ) < beta &&
3008 FFABS( q1 - q0 ) < beta ) {
3010 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
3011 if( FFABS( p2 - p0 ) < beta)
3013 const int p3 = pix[-4*xstride];
3015 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
3016 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
3017 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
3020 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
3022 if( FFABS( q2 - q0 ) < beta)
3024 const int q3 = pix[3*xstride];
3026 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
3027 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
3028 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
3031 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
3035 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
3036 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
3042 static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3044 h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta);
3046 static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3048 h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta);
3051 static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
3054 for( i = 0; i < 4; i++ ) {
3055 const int tc = tc0[i];
3060 for( d = 0; d < 2; d++ ) {
3061 const int p0 = pix[-1*xstride];
3062 const int p1 = pix[-2*xstride];
3063 const int q0 = pix[0];
3064 const int q1 = pix[1*xstride];
3066 if( FFABS( p0 - q0 ) < alpha &&
3067 FFABS( p1 - p0 ) < beta &&
3068 FFABS( q1 - q0 ) < beta ) {
3070 int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
3072 pix[-xstride] = av_clip_uint8( p0 + delta ); /* p0' */
3073 pix[0] = av_clip_uint8( q0 - delta ); /* q0' */
3079 static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
3081 h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
3083 static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
3085 h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
3088 static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
3091 for( d = 0; d < 8; d++ ) {
3092 const int p0 = pix[-1*xstride];
3093 const int p1 = pix[-2*xstride];
3094 const int q0 = pix[0];
3095 const int q1 = pix[1*xstride];
3097 if( FFABS( p0 - q0 ) < alpha &&
3098 FFABS( p1 - p0 ) < beta &&
3099 FFABS( q1 - q0 ) < beta ) {
3101 pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
3102 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
3107 static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3109 h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
3111 static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3113 h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
3116 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3122 s += abs(pix1[0] - pix2[0]);
3123 s += abs(pix1[1] - pix2[1]);
3124 s += abs(pix1[2] - pix2[2]);
3125 s += abs(pix1[3] - pix2[3]);
3126 s += abs(pix1[4] - pix2[4]);
3127 s += abs(pix1[5] - pix2[5]);
3128 s += abs(pix1[6] - pix2[6]);
3129 s += abs(pix1[7] - pix2[7]);
3130 s += abs(pix1[8] - pix2[8]);
3131 s += abs(pix1[9] - pix2[9]);
3132 s += abs(pix1[10] - pix2[10]);
3133 s += abs(pix1[11] - pix2[11]);
3134 s += abs(pix1[12] - pix2[12]);
3135 s += abs(pix1[13] - pix2[13]);
3136 s += abs(pix1[14] - pix2[14]);
3137 s += abs(pix1[15] - pix2[15]);
3144 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3150 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
3151 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
3152 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
3153 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
3154 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
3155 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
3156 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
3157 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
3158 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
3159 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
3160 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
3161 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
3162 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
3163 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
3164 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
3165 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
3172 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3175 uint8_t *pix3 = pix2 + line_size;
3179 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
3180 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
3181 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
3182 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
3183 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
3184 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
3185 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
3186 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
3187 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
3188 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
3189 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
3190 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
3191 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
3192 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
3193 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
3194 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
3202 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3205 uint8_t *pix3 = pix2 + line_size;
3209 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
3210 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
3211 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
3212 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
3213 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
3214 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
3215 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
3216 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
3217 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
3218 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
3219 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
3220 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
3221 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
3222 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
3223 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
3224 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
3232 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3238 s += abs(pix1[0] - pix2[0]);
3239 s += abs(pix1[1] - pix2[1]);
3240 s += abs(pix1[2] - pix2[2]);
3241 s += abs(pix1[3] - pix2[3]);
3242 s += abs(pix1[4] - pix2[4]);
3243 s += abs(pix1[5] - pix2[5]);
3244 s += abs(pix1[6] - pix2[6]);
3245 s += abs(pix1[7] - pix2[7]);
3252 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3258 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
3259 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
3260 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
3261 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
3262 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
3263 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
3264 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
3265 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
3272 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3275 uint8_t *pix3 = pix2 + line_size;
3279 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
3280 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
3281 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
3282 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
3283 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
3284 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
3285 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
3286 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
3294 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3297 uint8_t *pix3 = pix2 + line_size;
3301 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
3302 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
3303 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
3304 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
3305 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
3306 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
3307 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
3308 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
3316 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
3317 MpegEncContext *c = v;
3323 for(x=0; x<16; x++){
3324 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
3327 for(x=0; x<15; x++){
3328 score2+= FFABS( s1[x ] - s1[x +stride]
3329 - s1[x+1] + s1[x+1+stride])
3330 -FFABS( s2[x ] - s2[x +stride]
3331 - s2[x+1] + s2[x+1+stride]);
3338 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
3339 else return score1 + FFABS(score2)*8;
3342 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
3343 MpegEncContext *c = v;
3350 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
3354 score2+= FFABS( s1[x ] - s1[x +stride]
3355 - s1[x+1] + s1[x+1+stride])
3356 -FFABS( s2[x ] - s2[x +stride]
3357 - s2[x+1] + s2[x+1+stride]);
3364 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
3365 else return score1 + FFABS(score2)*8;
3368 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
3372 for(i=0; i<8*8; i++){
3373 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
3376 assert(-512<b && b<512);
3378 sum += (w*b)*(w*b)>>4;
3383 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
3386 for(i=0; i<8*8; i++){
3387 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
3392 * permutes an 8x8 block.
3393 * @param block the block which will be permuted according to the given permutation vector
3394 * @param permutation the permutation vector
3395 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
3396 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
3397 * (inverse) permutated to scantable order!
3399 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
3405 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
3407 for(i=0; i<=last; i++){
3408 const int j= scantable[i];
3413 for(i=0; i<=last; i++){
3414 const int j= scantable[i];
3415 const int perm_j= permutation[j];
3416 block[perm_j]= temp[j];
3420 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
3424 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
3427 memset(cmp, 0, sizeof(void*)*5);
3435 cmp[i]= c->hadamard8_diff[i];
3441 cmp[i]= c->dct_sad[i];
3444 cmp[i]= c->dct264_sad[i];
3447 cmp[i]= c->dct_max[i];
3450 cmp[i]= c->quant_psnr[i];
3470 #ifdef CONFIG_SNOW_ENCODER
3479 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
3484 static void clear_block_c(DCTELEM *block)
3486 memset(block, 0, sizeof(DCTELEM)*64);
3490 * memset(blocks, 0, sizeof(DCTELEM)*6*64)
3492 static void clear_blocks_c(DCTELEM *blocks)
3494 memset(blocks, 0, sizeof(DCTELEM)*6*64);
3497 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
3499 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
3500 long a = *(long*)(src+i);
3501 long b = *(long*)(dst+i);
3502 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
3505 dst[i+0] += src[i+0];
3508 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
3510 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
3511 long a = *(long*)(src1+i);
3512 long b = *(long*)(src2+i);
3513 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
3516 dst[i] = src1[i]+src2[i];
3519 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
3521 #ifndef HAVE_FAST_UNALIGNED
3522 if((long)src2 & (sizeof(long)-1)){
3523 for(i=0; i+7<w; i+=8){
3524 dst[i+0] = src1[i+0]-src2[i+0];
3525 dst[i+1] = src1[i+1]-src2[i+1];
3526 dst[i+2] = src1[i+2]-src2[i+2];
3527 dst[i+3] = src1[i+3]-src2[i+3];
3528 dst[i+4] = src1[i+4]-src2[i+4];
3529 dst[i+5] = src1[i+5]-src2[i+5];
3530 dst[i+6] = src1[i+6]-src2[i+6];
3531 dst[i+7] = src1[i+7]-src2[i+7];
3535 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
3536 long a = *(long*)(src1+i);
3537 long b = *(long*)(src2+i);
3538 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
3541 dst[i+0] = src1[i+0]-src2[i+0];
3544 static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
3552 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
3562 #define BUTTERFLY2(o1,o2,i1,i2) \
3566 #define BUTTERFLY1(x,y) \
3575 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
3577 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
3585 //FIXME try pointer walks
3586 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
3587 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
3588 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
3589 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
3591 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
3592 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
3593 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
3594 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
3596 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
3597 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
3598 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
3599 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
3603 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
3604 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
3605 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
3606 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
3608 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
3609 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
3610 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
3611 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
3614 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
3615 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
3616 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
3617 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
3623 printf("MAX:%d\n", maxi);
3629 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
3637 //FIXME try pointer walks
3638 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
3639 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
3640 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
3641 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
3643 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
3644 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
3645 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
3646 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
3648 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
3649 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
3650 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
3651 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
3655 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
3656 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
3657 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
3658 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
3660 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
3661 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
3662 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
3663 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
3666 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
3667 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
3668 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
3669 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
3672 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
3677 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3678 MpegEncContext * const s= (MpegEncContext *)c;
3679 DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
3680 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3684 s->dsp.diff_pixels(temp, src1, src2, stride);
3686 return s->dsp.sum_abs_dctelem(temp);
3691 const int s07 = SRC(0) + SRC(7);\
3692 const int s16 = SRC(1) + SRC(6);\
3693 const int s25 = SRC(2) + SRC(5);\
3694 const int s34 = SRC(3) + SRC(4);\
3695 const int a0 = s07 + s34;\
3696 const int a1 = s16 + s25;\
3697 const int a2 = s07 - s34;\
3698 const int a3 = s16 - s25;\
3699 const int d07 = SRC(0) - SRC(7);\
3700 const int d16 = SRC(1) - SRC(6);\
3701 const int d25 = SRC(2) - SRC(5);\
3702 const int d34 = SRC(3) - SRC(4);\
3703 const int a4 = d16 + d25 + (d07 + (d07>>1));\
3704 const int a5 = d07 - d34 - (d25 + (d25>>1));\
3705 const int a6 = d07 + d34 - (d16 + (d16>>1));\
3706 const int a7 = d16 - d25 + (d34 + (d34>>1));\
3708 DST(1, a4 + (a7>>2)) ;\
3709 DST(2, a2 + (a3>>1)) ;\
3710 DST(3, a5 + (a6>>2)) ;\
3712 DST(5, a6 - (a5>>2)) ;\
3713 DST(6, (a2>>1) - a3 ) ;\
3714 DST(7, (a4>>2) - a7 ) ;\
3717 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3718 MpegEncContext * const s= (MpegEncContext *)c;
3723 s->dsp.diff_pixels(dct[0], src1, src2, stride);
3725 #define SRC(x) dct[i][x]
3726 #define DST(x,v) dct[i][x]= v
3727 for( i = 0; i < 8; i++ )
3732 #define SRC(x) dct[x][i]
3733 #define DST(x,v) sum += FFABS(v)
3734 for( i = 0; i < 8; i++ )
3742 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3743 MpegEncContext * const s= (MpegEncContext *)c;
3744 DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
3745 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3750 s->dsp.diff_pixels(temp, src1, src2, stride);
3754 sum= FFMAX(sum, FFABS(temp[i]));
3759 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3760 MpegEncContext * const s= (MpegEncContext *)c;
3761 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]);
3762 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3763 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
3769 s->dsp.diff_pixels(temp, src1, src2, stride);
3771 memcpy(bak, temp, 64*sizeof(DCTELEM));
3773 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
3774 s->dct_unquantize_inter(s, temp, 0, s->qscale);
3775 ff_simple_idct(temp); //FIXME
3778 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
3783 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3784 MpegEncContext * const s= (MpegEncContext *)c;
3785 const uint8_t *scantable= s->intra_scantable.permutated;
3786 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
3787 DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]);
3788 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3789 uint8_t * const bak= (uint8_t*)aligned_bak;
3790 int i, last, run, bits, level, distortion, start_i;
3791 const int esc_length= s->ac_esc_length;
3793 uint8_t * last_length;
3798 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
3799 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
3802 s->dsp.diff_pixels(temp, src1, src2, stride);
3804 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
3810 length = s->intra_ac_vlc_length;
3811 last_length= s->intra_ac_vlc_last_length;
3812 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3815 length = s->inter_ac_vlc_length;
3816 last_length= s->inter_ac_vlc_last_length;
3821 for(i=start_i; i<last; i++){
3822 int j= scantable[i];
3827 if((level&(~127)) == 0){
3828 bits+= length[UNI_AC_ENC_INDEX(run, level)];
3837 level= temp[i] + 64;
3841 if((level&(~127)) == 0){
3842 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
3850 s->dct_unquantize_intra(s, temp, 0, s->qscale);
3852 s->dct_unquantize_inter(s, temp, 0, s->qscale);
3855 s->dsp.idct_add(bak, stride, temp);
3857 distortion= s->dsp.sse[1](NULL, bak, src1, stride, 8);
3859 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
3862 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3863 MpegEncContext * const s= (MpegEncContext *)c;
3864 const uint8_t *scantable= s->intra_scantable.permutated;
3865 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
3866 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3867 int i, last, run, bits, level, start_i;
3868 const int esc_length= s->ac_esc_length;
3870 uint8_t * last_length;
3874 s->dsp.diff_pixels(temp, src1, src2, stride);
3876 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
3882 length = s->intra_ac_vlc_length;
3883 last_length= s->intra_ac_vlc_last_length;
3884 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3887 length = s->inter_ac_vlc_length;
3888 last_length= s->inter_ac_vlc_last_length;
3893 for(i=start_i; i<last; i++){
3894 int j= scantable[i];
3899 if((level&(~127)) == 0){
3900 bits+= length[UNI_AC_ENC_INDEX(run, level)];
3909 level= temp[i] + 64;
3913 if((level&(~127)) == 0){
3914 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
3922 static int vsad_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
3927 for(x=0; x<16; x+=4){
3928 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride])
3929 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]);
3937 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
3942 for(x=0; x<16; x++){
3943 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
3952 #define SQ(a) ((a)*(a))
3953 static int vsse_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
3958 for(x=0; x<16; x+=4){
3959 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride])
3960 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
3968 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
3973 for(x=0; x<16; x++){
3974 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
3983 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
3987 for(i=0; i<size; i++)
3988 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
3992 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
3993 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
3994 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
3996 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
3998 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
3999 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
4000 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
4001 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
4003 static void vector_fmul_c(float *dst, const float *src, int len){
4005 for(i=0; i<len; i++)
4009 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
4012 for(i=0; i<len; i++)
4013 dst[i] = src0[i] * src1[-i];
4016 void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step){
4018 for(i=0; i<len; i++)
4019 dst[i*step] = src0[i] * src1[i] + src2[i] + src3;
4022 void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){
4027 for(i=-len, j=len-1; i<0; i++, j--) {
4032 dst[i] = s0*wj - s1*wi + add_bias;
4033 dst[j] = s0*wi + s1*wj + add_bias;
4037 static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){
4039 for(i=0; i<len; i++)
4040 dst[i] = src[i] * mul;
4043 static av_always_inline int float_to_int16_one(const float *src){
4044 int_fast32_t tmp = *(const int32_t*)src;
4046 tmp = (0x43c0ffff - tmp)>>31;
4047 // is this faster on some gcc/cpu combinations?
4048 // if(tmp > 0x43c0ffff) tmp = 0xFFFF;
4051 return tmp - 0x8000;
4054 void ff_float_to_int16_c(int16_t *dst, const float *src, long len){
4056 for(i=0; i<len; i++)
4057 dst[i] = float_to_int16_one(src+i);
4060 void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){
4063 for(i=0; i<len; i++){
4064 dst[2*i] = float_to_int16_one(src[0]+i);
4065 dst[2*i+1] = float_to_int16_one(src[1]+i);
4068 for(c=0; c<channels; c++)
4069 for(i=0, j=c; i<len; i++, j+=channels)
4070 dst[j] = float_to_int16_one(src[c]+i);
4074 static void add_int16_c(int16_t * v1, int16_t * v2, int order)
4080 static void sub_int16_c(int16_t * v1, int16_t * v2, int order)
4086 static int32_t scalarproduct_int16_c(int16_t * v1, int16_t * v2, int order, int shift)
4091 res += (*v1++ * *v2++) >> shift;
4097 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
4098 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
4099 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
4100 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
4101 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
4102 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
4103 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
4105 static void wmv2_idct_row(short * b)
4108 int a0,a1,a2,a3,a4,a5,a6,a7;
4110 a1 = W1*b[1]+W7*b[7];
4111 a7 = W7*b[1]-W1*b[7];
4112 a5 = W5*b[5]+W3*b[3];
4113 a3 = W3*b[5]-W5*b[3];
4114 a2 = W2*b[2]+W6*b[6];
4115 a6 = W6*b[2]-W2*b[6];
4116 a0 = W0*b[0]+W0*b[4];
4117 a4 = W0*b[0]-W0*b[4];
4119 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
4120 s2 = (181*(a1-a5-a7+a3)+128)>>8;
4122 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
4123 b[1] = (a4+a6 +s1 + (1<<7))>>8;
4124 b[2] = (a4-a6 +s2 + (1<<7))>>8;
4125 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
4126 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
4127 b[5] = (a4-a6 -s2 + (1<<7))>>8;
4128 b[6] = (a4+a6 -s1 + (1<<7))>>8;
4129 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
4131 static void wmv2_idct_col(short * b)
4134 int a0,a1,a2,a3,a4,a5,a6,a7;
4135 /*step 1, with extended precision*/
4136 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
4137 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
4138 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
4139 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
4140 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
4141 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
4142 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
4143 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
4145 s1 = (181*(a1-a5+a7-a3)+128)>>8;
4146 s2 = (181*(a1-a5-a7+a3)+128)>>8;
4148 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
4149 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
4150 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
4151 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
4153 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
4154 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
4155 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
4156 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
4158 void ff_wmv2_idct_c(short * block){
4162 wmv2_idct_row(block+i);
4165 wmv2_idct_col(block+i);
4168 /* XXX: those functions should be suppressed ASAP when all IDCTs are
4170 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
4172 ff_wmv2_idct_c(block);
4173 put_pixels_clamped_c(block, dest, line_size);
4175 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
4177 ff_wmv2_idct_c(block);
4178 add_pixels_clamped_c(block, dest, line_size);
4180 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
4183 put_pixels_clamped_c(block, dest, line_size);
4185 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
4188 add_pixels_clamped_c(block, dest, line_size);
4191 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
4194 put_pixels_clamped4_c(block, dest, line_size);
4196 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
4199 add_pixels_clamped4_c(block, dest, line_size);
4202 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
4205 put_pixels_clamped2_c(block, dest, line_size);
4207 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
4210 add_pixels_clamped2_c(block, dest, line_size);
4213 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
4215 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
4217 dest[0] = cm[(block[0] + 4)>>3];
4219 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
4221 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
4223 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
4226 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
4228 /* init static data */
4229 void dsputil_static_init(void)
4233 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
4234 for(i=0;i<MAX_NEG_CROP;i++) {
4236 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
4239 for(i=0;i<512;i++) {
4240 ff_squareTbl[i] = (i - 256) * (i - 256);
4243 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
4246 int ff_check_alignment(void){
4247 static int did_fail=0;
4248 DECLARE_ALIGNED_16(int, aligned);
4250 if((long)&aligned & 15){
4252 #if defined(HAVE_MMX) || defined(HAVE_ALTIVEC)
4253 av_log(NULL, AV_LOG_ERROR,
4254 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
4255 "and may be very slow or crash. This is not a bug in libavcodec,\n"
4256 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
4257 "Do not report crashes to FFmpeg developers.\n");
4266 void dsputil_init(DSPContext* c, AVCodecContext *avctx)
4270 ff_check_alignment();
4272 #ifdef CONFIG_ENCODERS
4273 if(avctx->dct_algo==FF_DCT_FASTINT) {
4274 c->fdct = fdct_ifast;
4275 c->fdct248 = fdct_ifast248;
4277 else if(avctx->dct_algo==FF_DCT_FAAN) {
4278 c->fdct = ff_faandct;
4279 c->fdct248 = ff_faandct248;
4282 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
4283 c->fdct248 = ff_fdct248_islow;
4285 #endif //CONFIG_ENCODERS
4287 if(avctx->lowres==1){
4288 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !ENABLE_H264_DECODER){
4289 c->idct_put= ff_jref_idct4_put;
4290 c->idct_add= ff_jref_idct4_add;
4292 c->idct_put= ff_h264_lowres_idct_put_c;
4293 c->idct_add= ff_h264_lowres_idct_add_c;
4295 c->idct = j_rev_dct4;
4296 c->idct_permutation_type= FF_NO_IDCT_PERM;
4297 }else if(avctx->lowres==2){
4298 c->idct_put= ff_jref_idct2_put;
4299 c->idct_add= ff_jref_idct2_add;
4300 c->idct = j_rev_dct2;
4301 c->idct_permutation_type= FF_NO_IDCT_PERM;
4302 }else if(avctx->lowres==3){
4303 c->idct_put= ff_jref_idct1_put;
4304 c->idct_add= ff_jref_idct1_add;
4305 c->idct = j_rev_dct1;
4306 c->idct_permutation_type= FF_NO_IDCT_PERM;
4308 if(avctx->idct_algo==FF_IDCT_INT){
4309 c->idct_put= ff_jref_idct_put;
4310 c->idct_add= ff_jref_idct_add;
4311 c->idct = j_rev_dct;
4312 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
4313 }else if((ENABLE_VP3_DECODER || ENABLE_VP5_DECODER || ENABLE_VP6_DECODER || ENABLE_THEORA_DECODER ) &&
4314 avctx->idct_algo==FF_IDCT_VP3){
4315 c->idct_put= ff_vp3_idct_put_c;
4316 c->idct_add= ff_vp3_idct_add_c;
4317 c->idct = ff_vp3_idct_c;
4318 c->idct_permutation_type= FF_NO_IDCT_PERM;
4319 }else if(avctx->idct_algo==FF_IDCT_WMV2){
4320 c->idct_put= ff_wmv2_idct_put_c;
4321 c->idct_add= ff_wmv2_idct_add_c;
4322 c->idct = ff_wmv2_idct_c;
4323 c->idct_permutation_type= FF_NO_IDCT_PERM;
4324 }else if(avctx->idct_algo==FF_IDCT_FAAN){
4325 c->idct_put= ff_faanidct_put;
4326 c->idct_add= ff_faanidct_add;
4327 c->idct = ff_faanidct;
4328 c->idct_permutation_type= FF_NO_IDCT_PERM;
4329 }else if(ENABLE_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
4330 c->idct_put= ff_ea_idct_put_c;
4331 c->idct_permutation_type= FF_NO_IDCT_PERM;
4332 }else{ //accurate/default
4333 c->idct_put= ff_simple_idct_put;
4334 c->idct_add= ff_simple_idct_add;
4335 c->idct = ff_simple_idct;
4336 c->idct_permutation_type= FF_NO_IDCT_PERM;
4340 if (ENABLE_H264_DECODER) {
4341 c->h264_idct_add= ff_h264_idct_add_c;
4342 c->h264_idct8_add= ff_h264_idct8_add_c;
4343 c->h264_idct_dc_add= ff_h264_idct_dc_add_c;
4344 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c;
4345 c->h264_idct_add16 = ff_h264_idct_add16_c;
4346 c->h264_idct8_add4 = ff_h264_idct8_add4_c;
4347 c->h264_idct_add8 = ff_h264_idct_add8_c;
4348 c->h264_idct_add16intra= ff_h264_idct_add16intra_c;
4351 c->get_pixels = get_pixels_c;
4352 c->diff_pixels = diff_pixels_c;
4353 c->put_pixels_clamped = put_pixels_clamped_c;
4354 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
4355 c->add_pixels_clamped = add_pixels_clamped_c;
4356 c->add_pixels8 = add_pixels8_c;
4357 c->add_pixels4 = add_pixels4_c;
4358 c->sum_abs_dctelem = sum_abs_dctelem_c;
4361 c->clear_block = clear_block_c;
4362 c->clear_blocks = clear_blocks_c;
4363 c->pix_sum = pix_sum_c;
4364 c->pix_norm1 = pix_norm1_c;
4366 /* TODO [0] 16 [1] 8 */
4367 c->pix_abs[0][0] = pix_abs16_c;
4368 c->pix_abs[0][1] = pix_abs16_x2_c;
4369 c->pix_abs[0][2] = pix_abs16_y2_c;
4370 c->pix_abs[0][3] = pix_abs16_xy2_c;
4371 c->pix_abs[1][0] = pix_abs8_c;
4372 c->pix_abs[1][1] = pix_abs8_x2_c;
4373 c->pix_abs[1][2] = pix_abs8_y2_c;
4374 c->pix_abs[1][3] = pix_abs8_xy2_c;
4376 #define dspfunc(PFX, IDX, NUM) \
4377 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
4378 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
4379 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
4380 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
4382 dspfunc(put, 0, 16);
4383 dspfunc(put_no_rnd, 0, 16);
4385 dspfunc(put_no_rnd, 1, 8);
4389 dspfunc(avg, 0, 16);
4390 dspfunc(avg_no_rnd, 0, 16);
4392 dspfunc(avg_no_rnd, 1, 8);
4397 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
4398 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
4400 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
4401 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
4402 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
4403 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
4404 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
4405 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
4406 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
4407 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
4408 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
4410 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
4411 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
4412 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
4413 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
4414 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
4415 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
4416 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
4417 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
4418 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
4420 #define dspfunc(PFX, IDX, NUM) \
4421 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
4422 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
4423 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
4424 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
4425 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
4426 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
4427 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
4428 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
4429 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
4430 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
4431 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
4432 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
4433 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
4434 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
4435 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
4436 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
4438 dspfunc(put_qpel, 0, 16);
4439 dspfunc(put_no_rnd_qpel, 0, 16);
4441 dspfunc(avg_qpel, 0, 16);
4442 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
4444 dspfunc(put_qpel, 1, 8);
4445 dspfunc(put_no_rnd_qpel, 1, 8);
4447 dspfunc(avg_qpel, 1, 8);
4448 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
4450 dspfunc(put_h264_qpel, 0, 16);
4451 dspfunc(put_h264_qpel, 1, 8);
4452 dspfunc(put_h264_qpel, 2, 4);
4453 dspfunc(put_h264_qpel, 3, 2);
4454 dspfunc(avg_h264_qpel, 0, 16);
4455 dspfunc(avg_h264_qpel, 1, 8);
4456 dspfunc(avg_h264_qpel, 2, 4);
4459 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
4460 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
4461 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
4462 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
4463 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
4464 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
4465 c->put_no_rnd_h264_chroma_pixels_tab[0]= put_no_rnd_h264_chroma_mc8_c;
4467 c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
4468 c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
4469 c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c;
4470 c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c;
4471 c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c;
4472 c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c;
4473 c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c;
4474 c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c;
4475 c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c;
4476 c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c;
4477 c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c;
4478 c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c;
4479 c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c;
4480 c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c;
4481 c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c;
4482 c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c;
4483 c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c;
4484 c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c;
4485 c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
4486 c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
4488 c->draw_edges = draw_edges_c;
4490 #ifdef CONFIG_CAVS_DECODER
4491 ff_cavsdsp_init(c,avctx);
4493 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
4494 ff_vc1dsp_init(c,avctx);
4496 #if defined(CONFIG_WMV2_DECODER) || defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
4497 ff_intrax8dsp_init(c,avctx);
4499 #if defined(CONFIG_H264_ENCODER)
4500 ff_h264dspenc_init(c,avctx);
4502 #if defined(CONFIG_RV30_DECODER)
4503 ff_rv30dsp_init(c,avctx);
4505 #if defined(CONFIG_RV40_DECODER)
4506 ff_rv40dsp_init(c,avctx);
4507 c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c;
4508 c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c;
4509 c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c;
4510 c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
4513 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
4514 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
4515 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
4516 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
4517 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
4518 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
4519 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
4520 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
4522 #define SET_CMP_FUNC(name) \
4523 c->name[0]= name ## 16_c;\
4524 c->name[1]= name ## 8x8_c;
4526 SET_CMP_FUNC(hadamard8_diff)
4527 c->hadamard8_diff[4]= hadamard8_intra16_c;
4528 SET_CMP_FUNC(dct_sad)
4529 SET_CMP_FUNC(dct_max)
4531 SET_CMP_FUNC(dct264_sad)
4533 c->sad[0]= pix_abs16_c;
4534 c->sad[1]= pix_abs8_c;
4538 SET_CMP_FUNC(quant_psnr)
4541 c->vsad[0]= vsad16_c;
4542 c->vsad[4]= vsad_intra16_c;
4543 c->vsse[0]= vsse16_c;
4544 c->vsse[4]= vsse_intra16_c;
4545 c->nsse[0]= nsse16_c;
4546 c->nsse[1]= nsse8_c;
4547 #ifdef CONFIG_SNOW_ENCODER
4548 c->w53[0]= w53_16_c;
4550 c->w97[0]= w97_16_c;
4554 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
4556 c->add_bytes= add_bytes_c;
4557 c->add_bytes_l2= add_bytes_l2_c;
4558 c->diff_bytes= diff_bytes_c;
4559 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
4560 c->bswap_buf= bswap_buf;
4561 #ifdef CONFIG_PNG_DECODER
4562 c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
4565 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
4566 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
4567 c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c;
4568 c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c;
4569 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
4570 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
4571 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
4572 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
4573 c->h264_loop_filter_strength= NULL;
4575 if (ENABLE_ANY_H263) {
4576 c->h263_h_loop_filter= h263_h_loop_filter_c;
4577 c->h263_v_loop_filter= h263_v_loop_filter_c;
4580 if (ENABLE_VP3_DECODER || ENABLE_THEORA_DECODER) {
4581 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
4582 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
4585 c->h261_loop_filter= h261_loop_filter_c;
4587 c->try_8x8basis= try_8x8basis_c;
4588 c->add_8x8basis= add_8x8basis_c;
4590 #ifdef CONFIG_SNOW_DECODER
4591 c->vertical_compose97i = ff_snow_vertical_compose97i;
4592 c->horizontal_compose97i = ff_snow_horizontal_compose97i;
4593 c->inner_add_yblock = ff_snow_inner_add_yblock;
4596 #ifdef CONFIG_VORBIS_DECODER
4597 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
4599 #ifdef CONFIG_AC3_DECODER
4600 c->ac3_downmix = ff_ac3_downmix_c;
4602 #ifdef CONFIG_FLAC_ENCODER
4603 c->flac_compute_autocorr = ff_flac_compute_autocorr;
4605 c->vector_fmul = vector_fmul_c;
4606 c->vector_fmul_reverse = vector_fmul_reverse_c;
4607 c->vector_fmul_add_add = ff_vector_fmul_add_add_c;
4608 c->vector_fmul_window = ff_vector_fmul_window_c;
4609 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
4610 c->float_to_int16 = ff_float_to_int16_c;
4611 c->float_to_int16_interleave = ff_float_to_int16_interleave_c;
4612 c->add_int16 = add_int16_c;
4613 c->sub_int16 = sub_int16_c;
4614 c->scalarproduct_int16 = scalarproduct_int16_c;
4616 c->shrink[0]= ff_img_copy_plane;
4617 c->shrink[1]= ff_shrink22;
4618 c->shrink[2]= ff_shrink44;
4619 c->shrink[3]= ff_shrink88;
4621 c->prefetch= just_return;
4623 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
4624 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
4626 if (ENABLE_MMX) dsputil_init_mmx (c, avctx);
4627 if (ENABLE_ARM) dsputil_init_arm (c, avctx);
4628 if (ENABLE_MLIB) dsputil_init_mlib (c, avctx);
4629 if (ENABLE_VIS) dsputil_init_vis (c, avctx);
4630 if (ENABLE_ALPHA) dsputil_init_alpha (c, avctx);
4631 if (ENABLE_POWERPC) dsputil_init_ppc (c, avctx);
4632 if (ENABLE_MMI) dsputil_init_mmi (c, avctx);
4633 if (ENABLE_SH4) dsputil_init_sh4 (c, avctx);
4634 if (ENABLE_BFIN) dsputil_init_bfin (c, avctx);
4636 for(i=0; i<64; i++){
4637 if(!c->put_2tap_qpel_pixels_tab[0][i])
4638 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
4639 if(!c->avg_2tap_qpel_pixels_tab[0][i])
4640 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
4643 switch(c->idct_permutation_type){
4644 case FF_NO_IDCT_PERM:
4646 c->idct_permutation[i]= i;
4648 case FF_LIBMPEG2_IDCT_PERM:
4650 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
4652 case FF_SIMPLE_IDCT_PERM:
4654 c->idct_permutation[i]= simple_mmx_permutation[i];
4656 case FF_TRANSPOSE_IDCT_PERM:
4658 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
4660 case FF_PARTTRANS_IDCT_PERM:
4662 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
4664 case FF_SSE2_IDCT_PERM:
4666 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
4669 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");