3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @file libavcodec/dsputil.c
32 #include "simple_idct.h"
40 void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
43 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
46 void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
49 void ff_lpc_compute_autocorr(const int32_t *data, int len, int lag, double *autoc);
52 void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
55 void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
57 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
58 uint32_t ff_squareTbl[512] = {0, };
60 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
61 #define pb_7f (~0UL/255 * 0x7f)
62 #define pb_80 (~0UL/255 * 0x80)
64 const uint8_t ff_zigzag_direct[64] = {
65 0, 1, 8, 16, 9, 2, 3, 10,
66 17, 24, 32, 25, 18, 11, 4, 5,
67 12, 19, 26, 33, 40, 48, 41, 34,
68 27, 20, 13, 6, 7, 14, 21, 28,
69 35, 42, 49, 56, 57, 50, 43, 36,
70 29, 22, 15, 23, 30, 37, 44, 51,
71 58, 59, 52, 45, 38, 31, 39, 46,
72 53, 60, 61, 54, 47, 55, 62, 63
75 /* Specific zigzag scan for 248 idct. NOTE that unlike the
76 specification, we interleave the fields */
77 const uint8_t ff_zigzag248_direct[64] = {
78 0, 8, 1, 9, 16, 24, 2, 10,
79 17, 25, 32, 40, 48, 56, 33, 41,
80 18, 26, 3, 11, 4, 12, 19, 27,
81 34, 42, 49, 57, 50, 58, 35, 43,
82 20, 28, 5, 13, 6, 14, 21, 29,
83 36, 44, 51, 59, 52, 60, 37, 45,
84 22, 30, 7, 15, 23, 31, 38, 46,
85 53, 61, 54, 62, 39, 47, 55, 63,
88 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
89 DECLARE_ALIGNED_16(uint16_t, inv_zigzag_direct16[64]);
91 const uint8_t ff_alternate_horizontal_scan[64] = {
92 0, 1, 2, 3, 8, 9, 16, 17,
93 10, 11, 4, 5, 6, 7, 15, 14,
94 13, 12, 19, 18, 24, 25, 32, 33,
95 26, 27, 20, 21, 22, 23, 28, 29,
96 30, 31, 34, 35, 40, 41, 48, 49,
97 42, 43, 36, 37, 38, 39, 44, 45,
98 46, 47, 50, 51, 56, 57, 58, 59,
99 52, 53, 54, 55, 60, 61, 62, 63,
102 const uint8_t ff_alternate_vertical_scan[64] = {
103 0, 8, 16, 24, 1, 9, 2, 10,
104 17, 25, 32, 40, 48, 56, 57, 49,
105 41, 33, 26, 18, 3, 11, 4, 12,
106 19, 27, 34, 42, 50, 58, 35, 43,
107 51, 59, 20, 28, 5, 13, 6, 14,
108 21, 29, 36, 44, 52, 60, 37, 45,
109 53, 61, 22, 30, 7, 15, 23, 31,
110 38, 46, 54, 62, 39, 47, 55, 63,
113 /* a*inverse[b]>>32 == a/b for all 0<=a<=16909558 && 2<=b<=256
114 * for a>16909558, is an overestimate by less than 1 part in 1<<24 */
115 const uint32_t ff_inverse[257]={
116 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
117 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
118 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
119 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
120 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
121 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
122 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
123 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
124 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
125 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
126 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
127 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
128 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
129 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
130 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
131 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
132 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
133 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
134 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
135 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
136 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
137 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
138 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
139 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
140 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
141 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
142 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
143 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
144 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
145 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
146 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
147 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
151 /* Input permutation for the simple_idct_mmx */
152 static const uint8_t simple_mmx_permutation[64]={
153 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
154 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
155 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
156 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
157 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
158 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
159 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
160 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
163 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
165 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
169 st->scantable= src_scantable;
173 j = src_scantable[i];
174 st->permutated[i] = permutation[j];
183 j = st->permutated[i];
185 st->raster_end[i]= end;
189 static int pix_sum_c(uint8_t * pix, int line_size)
194 for (i = 0; i < 16; i++) {
195 for (j = 0; j < 16; j += 8) {
206 pix += line_size - 16;
211 static int pix_norm1_c(uint8_t * pix, int line_size)
214 uint32_t *sq = ff_squareTbl + 256;
217 for (i = 0; i < 16; i++) {
218 for (j = 0; j < 16; j += 8) {
229 #if LONG_MAX > 2147483647
230 register uint64_t x=*(uint64_t*)pix;
232 s += sq[(x>>8)&0xff];
233 s += sq[(x>>16)&0xff];
234 s += sq[(x>>24)&0xff];
235 s += sq[(x>>32)&0xff];
236 s += sq[(x>>40)&0xff];
237 s += sq[(x>>48)&0xff];
238 s += sq[(x>>56)&0xff];
240 register uint32_t x=*(uint32_t*)pix;
242 s += sq[(x>>8)&0xff];
243 s += sq[(x>>16)&0xff];
244 s += sq[(x>>24)&0xff];
245 x=*(uint32_t*)(pix+4);
247 s += sq[(x>>8)&0xff];
248 s += sq[(x>>16)&0xff];
249 s += sq[(x>>24)&0xff];
254 pix += line_size - 16;
259 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
262 for(i=0; i+8<=w; i+=8){
263 dst[i+0]= bswap_32(src[i+0]);
264 dst[i+1]= bswap_32(src[i+1]);
265 dst[i+2]= bswap_32(src[i+2]);
266 dst[i+3]= bswap_32(src[i+3]);
267 dst[i+4]= bswap_32(src[i+4]);
268 dst[i+5]= bswap_32(src[i+5]);
269 dst[i+6]= bswap_32(src[i+6]);
270 dst[i+7]= bswap_32(src[i+7]);
273 dst[i+0]= bswap_32(src[i+0]);
277 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
280 uint32_t *sq = ff_squareTbl + 256;
283 for (i = 0; i < h; i++) {
284 s += sq[pix1[0] - pix2[0]];
285 s += sq[pix1[1] - pix2[1]];
286 s += sq[pix1[2] - pix2[2]];
287 s += sq[pix1[3] - pix2[3]];
294 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
297 uint32_t *sq = ff_squareTbl + 256;
300 for (i = 0; i < h; i++) {
301 s += sq[pix1[0] - pix2[0]];
302 s += sq[pix1[1] - pix2[1]];
303 s += sq[pix1[2] - pix2[2]];
304 s += sq[pix1[3] - pix2[3]];
305 s += sq[pix1[4] - pix2[4]];
306 s += sq[pix1[5] - pix2[5]];
307 s += sq[pix1[6] - pix2[6]];
308 s += sq[pix1[7] - pix2[7]];
315 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
318 uint32_t *sq = ff_squareTbl + 256;
321 for (i = 0; i < h; i++) {
322 s += sq[pix1[ 0] - pix2[ 0]];
323 s += sq[pix1[ 1] - pix2[ 1]];
324 s += sq[pix1[ 2] - pix2[ 2]];
325 s += sq[pix1[ 3] - pix2[ 3]];
326 s += sq[pix1[ 4] - pix2[ 4]];
327 s += sq[pix1[ 5] - pix2[ 5]];
328 s += sq[pix1[ 6] - pix2[ 6]];
329 s += sq[pix1[ 7] - pix2[ 7]];
330 s += sq[pix1[ 8] - pix2[ 8]];
331 s += sq[pix1[ 9] - pix2[ 9]];
332 s += sq[pix1[10] - pix2[10]];
333 s += sq[pix1[11] - pix2[11]];
334 s += sq[pix1[12] - pix2[12]];
335 s += sq[pix1[13] - pix2[13]];
336 s += sq[pix1[14] - pix2[14]];
337 s += sq[pix1[15] - pix2[15]];
346 #if CONFIG_SNOW_ENCODER //dwt is in snow.c
347 static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
349 const int dec_count= w==8 ? 3 : 4;
352 static const int scale[2][2][4][4]={
356 {268, 239, 239, 213},
360 // 9/7 16x16 or 32x32 dec=4
361 {344, 310, 310, 280},
369 {275, 245, 245, 218},
373 // 5/3 16x16 or 32x32 dec=4
374 {352, 317, 317, 286},
382 for (i = 0; i < h; i++) {
383 for (j = 0; j < w; j+=4) {
384 tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
385 tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
386 tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
387 tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
393 ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
397 for(level=0; level<dec_count; level++){
398 for(ori= level ? 1 : 0; ori<4; ori++){
399 int size= w>>(dec_count-level);
400 int sx= (ori&1) ? size : 0;
401 int stride= 32<<(dec_count-level);
402 int sy= (ori&2) ? stride>>1 : 0;
404 for(i=0; i<size; i++){
405 for(j=0; j<size; j++){
406 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
416 static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
417 return w_c(v, pix1, pix2, line_size, 8, h, 1);
420 static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
421 return w_c(v, pix1, pix2, line_size, 8, h, 0);
424 static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
425 return w_c(v, pix1, pix2, line_size, 16, h, 1);
428 static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
429 return w_c(v, pix1, pix2, line_size, 16, h, 0);
432 int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
433 return w_c(v, pix1, pix2, line_size, 32, h, 1);
436 int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
437 return w_c(v, pix1, pix2, line_size, 32, h, 0);
441 /* draw the edges of width 'w' of an image of size width, height */
442 //FIXME check that this is ok for mpeg4 interlaced
443 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
445 uint8_t *ptr, *last_line;
448 last_line = buf + (height - 1) * wrap;
451 memcpy(buf - (i + 1) * wrap, buf, width);
452 memcpy(last_line + (i + 1) * wrap, last_line, width);
456 for(i=0;i<height;i++) {
457 memset(ptr - w, ptr[0], w);
458 memset(ptr + width, ptr[width-1], w);
463 memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
464 memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
465 memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
466 memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
471 * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
472 * @param buf destination buffer
473 * @param src source buffer
474 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
475 * @param block_w width of block
476 * @param block_h height of block
477 * @param src_x x coordinate of the top left sample of the block in the source buffer
478 * @param src_y y coordinate of the top left sample of the block in the source buffer
479 * @param w width of the source buffer
480 * @param h height of the source buffer
482 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
483 int src_x, int src_y, int w, int h){
485 int start_y, start_x, end_y, end_x;
488 src+= (h-1-src_y)*linesize;
490 }else if(src_y<=-block_h){
491 src+= (1-block_h-src_y)*linesize;
497 }else if(src_x<=-block_w){
498 src+= (1-block_w-src_x);
502 start_y= FFMAX(0, -src_y);
503 start_x= FFMAX(0, -src_x);
504 end_y= FFMIN(block_h, h-src_y);
505 end_x= FFMIN(block_w, w-src_x);
507 // copy existing part
508 for(y=start_y; y<end_y; y++){
509 for(x=start_x; x<end_x; x++){
510 buf[x + y*linesize]= src[x + y*linesize];
515 for(y=0; y<start_y; y++){
516 for(x=start_x; x<end_x; x++){
517 buf[x + y*linesize]= buf[x + start_y*linesize];
522 for(y=end_y; y<block_h; y++){
523 for(x=start_x; x<end_x; x++){
524 buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
528 for(y=0; y<block_h; y++){
530 for(x=0; x<start_x; x++){
531 buf[x + y*linesize]= buf[start_x + y*linesize];
535 for(x=end_x; x<block_w; x++){
536 buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
541 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
545 /* read the pixels */
547 block[0] = pixels[0];
548 block[1] = pixels[1];
549 block[2] = pixels[2];
550 block[3] = pixels[3];
551 block[4] = pixels[4];
552 block[5] = pixels[5];
553 block[6] = pixels[6];
554 block[7] = pixels[7];
560 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
561 const uint8_t *s2, int stride){
564 /* read the pixels */
566 block[0] = s1[0] - s2[0];
567 block[1] = s1[1] - s2[1];
568 block[2] = s1[2] - s2[2];
569 block[3] = s1[3] - s2[3];
570 block[4] = s1[4] - s2[4];
571 block[5] = s1[5] - s2[5];
572 block[6] = s1[6] - s2[6];
573 block[7] = s1[7] - s2[7];
581 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
585 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
587 /* read the pixels */
589 pixels[0] = cm[block[0]];
590 pixels[1] = cm[block[1]];
591 pixels[2] = cm[block[2]];
592 pixels[3] = cm[block[3]];
593 pixels[4] = cm[block[4]];
594 pixels[5] = cm[block[5]];
595 pixels[6] = cm[block[6]];
596 pixels[7] = cm[block[7]];
603 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
607 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
609 /* read the pixels */
611 pixels[0] = cm[block[0]];
612 pixels[1] = cm[block[1]];
613 pixels[2] = cm[block[2]];
614 pixels[3] = cm[block[3]];
621 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
625 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
627 /* read the pixels */
629 pixels[0] = cm[block[0]];
630 pixels[1] = cm[block[1]];
637 static void put_signed_pixels_clamped_c(const DCTELEM *block,
638 uint8_t *restrict pixels,
643 for (i = 0; i < 8; i++) {
644 for (j = 0; j < 8; j++) {
647 else if (*block > 127)
650 *pixels = (uint8_t)(*block + 128);
654 pixels += (line_size - 8);
658 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
662 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
664 /* read the pixels */
666 pixels[0] = cm[pixels[0] + block[0]];
667 pixels[1] = cm[pixels[1] + block[1]];
668 pixels[2] = cm[pixels[2] + block[2]];
669 pixels[3] = cm[pixels[3] + block[3]];
670 pixels[4] = cm[pixels[4] + block[4]];
671 pixels[5] = cm[pixels[5] + block[5]];
672 pixels[6] = cm[pixels[6] + block[6]];
673 pixels[7] = cm[pixels[7] + block[7]];
679 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
683 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
685 /* read the pixels */
687 pixels[0] = cm[pixels[0] + block[0]];
688 pixels[1] = cm[pixels[1] + block[1]];
689 pixels[2] = cm[pixels[2] + block[2]];
690 pixels[3] = cm[pixels[3] + block[3]];
696 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
700 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
702 /* read the pixels */
704 pixels[0] = cm[pixels[0] + block[0]];
705 pixels[1] = cm[pixels[1] + block[1]];
711 static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
715 pixels[0] += block[0];
716 pixels[1] += block[1];
717 pixels[2] += block[2];
718 pixels[3] += block[3];
719 pixels[4] += block[4];
720 pixels[5] += block[5];
721 pixels[6] += block[6];
722 pixels[7] += block[7];
728 static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
732 pixels[0] += block[0];
733 pixels[1] += block[1];
734 pixels[2] += block[2];
735 pixels[3] += block[3];
741 static int sum_abs_dctelem_c(DCTELEM *block)
745 sum+= FFABS(block[i]);
751 #define PIXOP2(OPNAME, OP) \
752 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
756 OP(*((uint64_t*)block), AV_RN64(pixels));\
762 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
766 const uint64_t a= AV_RN64(pixels );\
767 const uint64_t b= AV_RN64(pixels+1);\
768 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
774 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
778 const uint64_t a= AV_RN64(pixels );\
779 const uint64_t b= AV_RN64(pixels+1);\
780 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
786 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
790 const uint64_t a= AV_RN64(pixels );\
791 const uint64_t b= AV_RN64(pixels+line_size);\
792 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
798 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
802 const uint64_t a= AV_RN64(pixels );\
803 const uint64_t b= AV_RN64(pixels+line_size);\
804 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
810 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
813 const uint64_t a= AV_RN64(pixels );\
814 const uint64_t b= AV_RN64(pixels+1);\
815 uint64_t l0= (a&0x0303030303030303ULL)\
816 + (b&0x0303030303030303ULL)\
817 + 0x0202020202020202ULL;\
818 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
819 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
823 for(i=0; i<h; i+=2){\
824 uint64_t a= AV_RN64(pixels );\
825 uint64_t b= AV_RN64(pixels+1);\
826 l1= (a&0x0303030303030303ULL)\
827 + (b&0x0303030303030303ULL);\
828 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
829 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
830 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
833 a= AV_RN64(pixels );\
834 b= AV_RN64(pixels+1);\
835 l0= (a&0x0303030303030303ULL)\
836 + (b&0x0303030303030303ULL)\
837 + 0x0202020202020202ULL;\
838 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
839 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
840 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
846 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
849 const uint64_t a= AV_RN64(pixels );\
850 const uint64_t b= AV_RN64(pixels+1);\
851 uint64_t l0= (a&0x0303030303030303ULL)\
852 + (b&0x0303030303030303ULL)\
853 + 0x0101010101010101ULL;\
854 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
855 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
859 for(i=0; i<h; i+=2){\
860 uint64_t a= AV_RN64(pixels );\
861 uint64_t b= AV_RN64(pixels+1);\
862 l1= (a&0x0303030303030303ULL)\
863 + (b&0x0303030303030303ULL);\
864 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
865 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
866 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
869 a= AV_RN64(pixels );\
870 b= AV_RN64(pixels+1);\
871 l0= (a&0x0303030303030303ULL)\
872 + (b&0x0303030303030303ULL)\
873 + 0x0101010101010101ULL;\
874 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
875 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
876 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
882 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
883 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
884 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
885 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
886 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
887 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
888 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
890 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
891 #else // 64 bit variant
893 #define PIXOP2(OPNAME, OP) \
894 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
897 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\
902 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
905 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
910 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
913 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
914 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
919 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
920 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
923 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
924 int src_stride1, int src_stride2, int h){\
928 a= AV_RN32(&src1[i*src_stride1 ]);\
929 b= AV_RN32(&src2[i*src_stride2 ]);\
930 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
931 a= AV_RN32(&src1[i*src_stride1+4]);\
932 b= AV_RN32(&src2[i*src_stride2+4]);\
933 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
937 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
938 int src_stride1, int src_stride2, int h){\
942 a= AV_RN32(&src1[i*src_stride1 ]);\
943 b= AV_RN32(&src2[i*src_stride2 ]);\
944 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
945 a= AV_RN32(&src1[i*src_stride1+4]);\
946 b= AV_RN32(&src2[i*src_stride2+4]);\
947 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
951 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
952 int src_stride1, int src_stride2, int h){\
956 a= AV_RN32(&src1[i*src_stride1 ]);\
957 b= AV_RN32(&src2[i*src_stride2 ]);\
958 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
962 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
963 int src_stride1, int src_stride2, int h){\
967 a= AV_RN16(&src1[i*src_stride1 ]);\
968 b= AV_RN16(&src2[i*src_stride2 ]);\
969 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
973 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
974 int src_stride1, int src_stride2, int h){\
975 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
976 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
979 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
980 int src_stride1, int src_stride2, int h){\
981 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
982 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
985 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
986 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
989 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
990 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
993 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
994 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
997 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
998 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1001 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1002 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1004 for(i=0; i<h; i++){\
1005 uint32_t a, b, c, d, l0, l1, h0, h1;\
1006 a= AV_RN32(&src1[i*src_stride1]);\
1007 b= AV_RN32(&src2[i*src_stride2]);\
1008 c= AV_RN32(&src3[i*src_stride3]);\
1009 d= AV_RN32(&src4[i*src_stride4]);\
1010 l0= (a&0x03030303UL)\
1013 h0= ((a&0xFCFCFCFCUL)>>2)\
1014 + ((b&0xFCFCFCFCUL)>>2);\
1015 l1= (c&0x03030303UL)\
1016 + (d&0x03030303UL);\
1017 h1= ((c&0xFCFCFCFCUL)>>2)\
1018 + ((d&0xFCFCFCFCUL)>>2);\
1019 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1020 a= AV_RN32(&src1[i*src_stride1+4]);\
1021 b= AV_RN32(&src2[i*src_stride2+4]);\
1022 c= AV_RN32(&src3[i*src_stride3+4]);\
1023 d= AV_RN32(&src4[i*src_stride4+4]);\
1024 l0= (a&0x03030303UL)\
1027 h0= ((a&0xFCFCFCFCUL)>>2)\
1028 + ((b&0xFCFCFCFCUL)>>2);\
1029 l1= (c&0x03030303UL)\
1030 + (d&0x03030303UL);\
1031 h1= ((c&0xFCFCFCFCUL)>>2)\
1032 + ((d&0xFCFCFCFCUL)>>2);\
1033 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1037 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1038 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1041 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1042 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1045 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1046 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1049 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1050 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1053 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1054 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1056 for(i=0; i<h; i++){\
1057 uint32_t a, b, c, d, l0, l1, h0, h1;\
1058 a= AV_RN32(&src1[i*src_stride1]);\
1059 b= AV_RN32(&src2[i*src_stride2]);\
1060 c= AV_RN32(&src3[i*src_stride3]);\
1061 d= AV_RN32(&src4[i*src_stride4]);\
1062 l0= (a&0x03030303UL)\
1065 h0= ((a&0xFCFCFCFCUL)>>2)\
1066 + ((b&0xFCFCFCFCUL)>>2);\
1067 l1= (c&0x03030303UL)\
1068 + (d&0x03030303UL);\
1069 h1= ((c&0xFCFCFCFCUL)>>2)\
1070 + ((d&0xFCFCFCFCUL)>>2);\
1071 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1072 a= AV_RN32(&src1[i*src_stride1+4]);\
1073 b= AV_RN32(&src2[i*src_stride2+4]);\
1074 c= AV_RN32(&src3[i*src_stride3+4]);\
1075 d= AV_RN32(&src4[i*src_stride4+4]);\
1076 l0= (a&0x03030303UL)\
1079 h0= ((a&0xFCFCFCFCUL)>>2)\
1080 + ((b&0xFCFCFCFCUL)>>2);\
1081 l1= (c&0x03030303UL)\
1082 + (d&0x03030303UL);\
1083 h1= ((c&0xFCFCFCFCUL)>>2)\
1084 + ((d&0xFCFCFCFCUL)>>2);\
1085 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1088 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1089 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1090 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1091 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1093 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1094 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1095 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1096 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1099 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1101 int i, a0, b0, a1, b1;\
1108 for(i=0; i<h; i+=2){\
1114 block[0]= (a1+a0)>>2; /* FIXME non put */\
1115 block[1]= (b1+b0)>>2;\
1125 block[0]= (a1+a0)>>2;\
1126 block[1]= (b1+b0)>>2;\
1132 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1135 const uint32_t a= AV_RN32(pixels );\
1136 const uint32_t b= AV_RN32(pixels+1);\
1137 uint32_t l0= (a&0x03030303UL)\
1140 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1141 + ((b&0xFCFCFCFCUL)>>2);\
1145 for(i=0; i<h; i+=2){\
1146 uint32_t a= AV_RN32(pixels );\
1147 uint32_t b= AV_RN32(pixels+1);\
1148 l1= (a&0x03030303UL)\
1149 + (b&0x03030303UL);\
1150 h1= ((a&0xFCFCFCFCUL)>>2)\
1151 + ((b&0xFCFCFCFCUL)>>2);\
1152 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1155 a= AV_RN32(pixels );\
1156 b= AV_RN32(pixels+1);\
1157 l0= (a&0x03030303UL)\
1160 h0= ((a&0xFCFCFCFCUL)>>2)\
1161 + ((b&0xFCFCFCFCUL)>>2);\
1162 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1168 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1171 for(j=0; j<2; j++){\
1173 const uint32_t a= AV_RN32(pixels );\
1174 const uint32_t b= AV_RN32(pixels+1);\
1175 uint32_t l0= (a&0x03030303UL)\
1178 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1179 + ((b&0xFCFCFCFCUL)>>2);\
1183 for(i=0; i<h; i+=2){\
1184 uint32_t a= AV_RN32(pixels );\
1185 uint32_t b= AV_RN32(pixels+1);\
1186 l1= (a&0x03030303UL)\
1187 + (b&0x03030303UL);\
1188 h1= ((a&0xFCFCFCFCUL)>>2)\
1189 + ((b&0xFCFCFCFCUL)>>2);\
1190 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1193 a= AV_RN32(pixels );\
1194 b= AV_RN32(pixels+1);\
1195 l0= (a&0x03030303UL)\
1198 h0= ((a&0xFCFCFCFCUL)>>2)\
1199 + ((b&0xFCFCFCFCUL)>>2);\
1200 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1204 pixels+=4-line_size*(h+1);\
1205 block +=4-line_size*h;\
1209 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1212 for(j=0; j<2; j++){\
1214 const uint32_t a= AV_RN32(pixels );\
1215 const uint32_t b= AV_RN32(pixels+1);\
1216 uint32_t l0= (a&0x03030303UL)\
1219 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1220 + ((b&0xFCFCFCFCUL)>>2);\
1224 for(i=0; i<h; i+=2){\
1225 uint32_t a= AV_RN32(pixels );\
1226 uint32_t b= AV_RN32(pixels+1);\
1227 l1= (a&0x03030303UL)\
1228 + (b&0x03030303UL);\
1229 h1= ((a&0xFCFCFCFCUL)>>2)\
1230 + ((b&0xFCFCFCFCUL)>>2);\
1231 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1234 a= AV_RN32(pixels );\
1235 b= AV_RN32(pixels+1);\
1236 l0= (a&0x03030303UL)\
1239 h0= ((a&0xFCFCFCFCUL)>>2)\
1240 + ((b&0xFCFCFCFCUL)>>2);\
1241 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1245 pixels+=4-line_size*(h+1);\
1246 block +=4-line_size*h;\
1250 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
1251 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
1252 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
1253 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
1254 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
1255 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
1256 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
1257 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
1259 #define op_avg(a, b) a = rnd_avg32(a, b)
1261 #define op_put(a, b) a = b
1268 #define avg2(a,b) ((a+b+1)>>1)
1269 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
1271 static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1272 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
1275 static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1276 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
1279 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
1281 const int A=(16-x16)*(16-y16);
1282 const int B=( x16)*(16-y16);
1283 const int C=(16-x16)*( y16);
1284 const int D=( x16)*( y16);
1289 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
1290 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
1291 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
1292 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
1293 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
1294 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
1295 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
1296 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
1302 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
1303 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
1306 const int s= 1<<shift;
1316 for(x=0; x<8; x++){ //XXX FIXME optimize
1317 int src_x, src_y, frac_x, frac_y, index;
1321 frac_x= src_x&(s-1);
1322 frac_y= src_y&(s-1);
1326 if((unsigned)src_x < width){
1327 if((unsigned)src_y < height){
1328 index= src_x + src_y*stride;
1329 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
1330 + src[index +1]* frac_x )*(s-frac_y)
1331 + ( src[index+stride ]*(s-frac_x)
1332 + src[index+stride+1]* frac_x )* frac_y
1335 index= src_x + av_clip(src_y, 0, height)*stride;
1336 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
1337 + src[index +1]* frac_x )*s
1341 if((unsigned)src_y < height){
1342 index= av_clip(src_x, 0, width) + src_y*stride;
1343 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
1344 + src[index+stride ]* frac_y )*s
1347 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
1348 dst[y*stride + x]= src[index ];
1360 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1362 case 2: put_pixels2_c (dst, src, stride, height); break;
1363 case 4: put_pixels4_c (dst, src, stride, height); break;
1364 case 8: put_pixels8_c (dst, src, stride, height); break;
1365 case 16:put_pixels16_c(dst, src, stride, height); break;
1369 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1371 for (i=0; i < height; i++) {
1372 for (j=0; j < width; j++) {
1373 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
1380 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1382 for (i=0; i < height; i++) {
1383 for (j=0; j < width; j++) {
1384 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
1391 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1393 for (i=0; i < height; i++) {
1394 for (j=0; j < width; j++) {
1395 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
1402 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1404 for (i=0; i < height; i++) {
1405 for (j=0; j < width; j++) {
1406 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
1413 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1415 for (i=0; i < height; i++) {
1416 for (j=0; j < width; j++) {
1417 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
1424 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1426 for (i=0; i < height; i++) {
1427 for (j=0; j < width; j++) {
1428 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
1435 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1437 for (i=0; i < height; i++) {
1438 for (j=0; j < width; j++) {
1439 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
1446 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1448 for (i=0; i < height; i++) {
1449 for (j=0; j < width; j++) {
1450 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
1457 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1459 case 2: avg_pixels2_c (dst, src, stride, height); break;
1460 case 4: avg_pixels4_c (dst, src, stride, height); break;
1461 case 8: avg_pixels8_c (dst, src, stride, height); break;
1462 case 16:avg_pixels16_c(dst, src, stride, height); break;
1466 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1468 for (i=0; i < height; i++) {
1469 for (j=0; j < width; j++) {
1470 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
1477 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1479 for (i=0; i < height; i++) {
1480 for (j=0; j < width; j++) {
1481 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
1488 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1490 for (i=0; i < height; i++) {
1491 for (j=0; j < width; j++) {
1492 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
1499 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1501 for (i=0; i < height; i++) {
1502 for (j=0; j < width; j++) {
1503 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1510 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1512 for (i=0; i < height; i++) {
1513 for (j=0; j < width; j++) {
1514 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1521 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1523 for (i=0; i < height; i++) {
1524 for (j=0; j < width; j++) {
1525 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
1532 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1534 for (i=0; i < height; i++) {
1535 for (j=0; j < width; j++) {
1536 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1543 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1545 for (i=0; i < height; i++) {
1546 for (j=0; j < width; j++) {
1547 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1554 #define TPEL_WIDTH(width)\
1555 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1556 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1557 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1558 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1559 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1560 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1561 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1562 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1563 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1564 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1565 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1566 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1567 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1568 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1569 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1570 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1571 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1572 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1575 #define H264_CHROMA_MC(OPNAME, OP)\
1576 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1577 const int A=(8-x)*(8-y);\
1578 const int B=( x)*(8-y);\
1579 const int C=(8-x)*( y);\
1580 const int D=( x)*( y);\
1583 assert(x<8 && y<8 && x>=0 && y>=0);\
1586 for(i=0; i<h; i++){\
1587 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1588 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1594 const int step= C ? stride : 1;\
1595 for(i=0; i<h; i++){\
1596 OP(dst[0], (A*src[0] + E*src[step+0]));\
1597 OP(dst[1], (A*src[1] + E*src[step+1]));\
1604 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1605 const int A=(8-x)*(8-y);\
1606 const int B=( x)*(8-y);\
1607 const int C=(8-x)*( y);\
1608 const int D=( x)*( y);\
1611 assert(x<8 && y<8 && x>=0 && y>=0);\
1614 for(i=0; i<h; i++){\
1615 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1616 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1617 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1618 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1624 const int step= C ? stride : 1;\
1625 for(i=0; i<h; i++){\
1626 OP(dst[0], (A*src[0] + E*src[step+0]));\
1627 OP(dst[1], (A*src[1] + E*src[step+1]));\
1628 OP(dst[2], (A*src[2] + E*src[step+2]));\
1629 OP(dst[3], (A*src[3] + E*src[step+3]));\
1636 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1637 const int A=(8-x)*(8-y);\
1638 const int B=( x)*(8-y);\
1639 const int C=(8-x)*( y);\
1640 const int D=( x)*( y);\
1643 assert(x<8 && y<8 && x>=0 && y>=0);\
1646 for(i=0; i<h; i++){\
1647 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1648 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1649 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1650 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1651 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1652 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1653 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1654 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1660 const int step= C ? stride : 1;\
1661 for(i=0; i<h; i++){\
1662 OP(dst[0], (A*src[0] + E*src[step+0]));\
1663 OP(dst[1], (A*src[1] + E*src[step+1]));\
1664 OP(dst[2], (A*src[2] + E*src[step+2]));\
1665 OP(dst[3], (A*src[3] + E*src[step+3]));\
1666 OP(dst[4], (A*src[4] + E*src[step+4]));\
1667 OP(dst[5], (A*src[5] + E*src[step+5]));\
1668 OP(dst[6], (A*src[6] + E*src[step+6]));\
1669 OP(dst[7], (A*src[7] + E*src[step+7]));\
1676 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1677 #define op_put(a, b) a = (((b) + 32)>>6)
1679 H264_CHROMA_MC(put_ , op_put)
1680 H264_CHROMA_MC(avg_ , op_avg)
1684 static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
1685 const int A=(8-x)*(8-y);
1686 const int B=( x)*(8-y);
1687 const int C=(8-x)*( y);
1688 const int D=( x)*( y);
1691 assert(x<8 && y<8 && x>=0 && y>=0);
1695 dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
1696 dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
1697 dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
1698 dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
1699 dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
1700 dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
1701 dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
1702 dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
1708 static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
1709 const int A=(8-x)*(8-y);
1710 const int B=( x)*(8-y);
1711 const int C=(8-x)*( y);
1712 const int D=( x)*( y);
1715 assert(x<8 && y<8 && x>=0 && y>=0);
1719 dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
1720 dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
1721 dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
1722 dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
1723 dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6));
1724 dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6));
1725 dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6));
1726 dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6));
1732 #define QPEL_MC(r, OPNAME, RND, OP) \
1733 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1734 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1738 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1739 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1740 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1741 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1742 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1743 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1744 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1745 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1751 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1753 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1757 const int src0= src[0*srcStride];\
1758 const int src1= src[1*srcStride];\
1759 const int src2= src[2*srcStride];\
1760 const int src3= src[3*srcStride];\
1761 const int src4= src[4*srcStride];\
1762 const int src5= src[5*srcStride];\
1763 const int src6= src[6*srcStride];\
1764 const int src7= src[7*srcStride];\
1765 const int src8= src[8*srcStride];\
1766 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1767 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1768 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1769 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1770 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1771 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1772 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1773 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1779 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1780 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1785 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1786 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1787 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1788 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1789 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1790 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1791 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1792 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1793 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1794 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1795 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1796 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1797 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1798 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1799 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1800 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1806 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1807 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1812 const int src0= src[0*srcStride];\
1813 const int src1= src[1*srcStride];\
1814 const int src2= src[2*srcStride];\
1815 const int src3= src[3*srcStride];\
1816 const int src4= src[4*srcStride];\
1817 const int src5= src[5*srcStride];\
1818 const int src6= src[6*srcStride];\
1819 const int src7= src[7*srcStride];\
1820 const int src8= src[8*srcStride];\
1821 const int src9= src[9*srcStride];\
1822 const int src10= src[10*srcStride];\
1823 const int src11= src[11*srcStride];\
1824 const int src12= src[12*srcStride];\
1825 const int src13= src[13*srcStride];\
1826 const int src14= src[14*srcStride];\
1827 const int src15= src[15*srcStride];\
1828 const int src16= src[16*srcStride];\
1829 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1830 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1831 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1832 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1833 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1834 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1835 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1836 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1837 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1838 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1839 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1840 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1841 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1842 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1843 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1844 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1850 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
1851 OPNAME ## pixels8_c(dst, src, stride, 8);\
1854 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1856 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1857 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
1860 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1861 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
1864 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1866 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1867 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
1870 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1871 uint8_t full[16*9];\
1873 copy_block9(full, src, 16, stride, 9);\
1874 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1875 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
1878 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1879 uint8_t full[16*9];\
1880 copy_block9(full, src, 16, stride, 9);\
1881 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
1884 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1885 uint8_t full[16*9];\
1887 copy_block9(full, src, 16, stride, 9);\
1888 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1889 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
1891 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1892 uint8_t full[16*9];\
1895 uint8_t halfHV[64];\
1896 copy_block9(full, src, 16, stride, 9);\
1897 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1898 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1899 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1900 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1902 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1903 uint8_t full[16*9];\
1905 uint8_t halfHV[64];\
1906 copy_block9(full, src, 16, stride, 9);\
1907 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1908 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1909 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1910 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1912 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1913 uint8_t full[16*9];\
1916 uint8_t halfHV[64];\
1917 copy_block9(full, src, 16, stride, 9);\
1918 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1919 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1920 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1921 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1923 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1924 uint8_t full[16*9];\
1926 uint8_t halfHV[64];\
1927 copy_block9(full, src, 16, stride, 9);\
1928 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1929 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1930 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1931 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1933 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1934 uint8_t full[16*9];\
1937 uint8_t halfHV[64];\
1938 copy_block9(full, src, 16, stride, 9);\
1939 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1940 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1941 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1942 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1944 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1945 uint8_t full[16*9];\
1947 uint8_t halfHV[64];\
1948 copy_block9(full, src, 16, stride, 9);\
1949 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1950 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1951 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1952 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1954 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1955 uint8_t full[16*9];\
1958 uint8_t halfHV[64];\
1959 copy_block9(full, src, 16, stride, 9);\
1960 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
1961 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1962 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1963 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1965 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1966 uint8_t full[16*9];\
1968 uint8_t halfHV[64];\
1969 copy_block9(full, src, 16, stride, 9);\
1970 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1971 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1972 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1973 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1975 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1977 uint8_t halfHV[64];\
1978 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1979 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1980 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1982 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1984 uint8_t halfHV[64];\
1985 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1986 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1987 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1989 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1990 uint8_t full[16*9];\
1993 uint8_t halfHV[64];\
1994 copy_block9(full, src, 16, stride, 9);\
1995 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1996 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1997 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1998 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
2000 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2001 uint8_t full[16*9];\
2003 copy_block9(full, src, 16, stride, 9);\
2004 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2005 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
2006 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2008 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2009 uint8_t full[16*9];\
2012 uint8_t halfHV[64];\
2013 copy_block9(full, src, 16, stride, 9);\
2014 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2015 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
2016 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2017 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
2019 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2020 uint8_t full[16*9];\
2022 copy_block9(full, src, 16, stride, 9);\
2023 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2024 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
2025 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2027 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2029 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
2030 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2032 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2033 OPNAME ## pixels16_c(dst, src, stride, 16);\
2036 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2038 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2039 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
2042 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2043 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
2046 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2048 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2049 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
2052 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2053 uint8_t full[24*17];\
2055 copy_block17(full, src, 24, stride, 17);\
2056 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
2057 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
2060 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2061 uint8_t full[24*17];\
2062 copy_block17(full, src, 24, stride, 17);\
2063 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
2066 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2067 uint8_t full[24*17];\
2069 copy_block17(full, src, 24, stride, 17);\
2070 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
2071 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
2073 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
2074 uint8_t full[24*17];\
2075 uint8_t halfH[272];\
2076 uint8_t halfV[256];\
2077 uint8_t halfHV[256];\
2078 copy_block17(full, src, 24, stride, 17);\
2079 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2080 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2081 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2082 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2084 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2085 uint8_t full[24*17];\
2086 uint8_t halfH[272];\
2087 uint8_t halfHV[256];\
2088 copy_block17(full, src, 24, stride, 17);\
2089 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2090 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2091 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2092 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2094 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
2095 uint8_t full[24*17];\
2096 uint8_t halfH[272];\
2097 uint8_t halfV[256];\
2098 uint8_t halfHV[256];\
2099 copy_block17(full, src, 24, stride, 17);\
2100 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2101 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2102 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2103 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2105 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2106 uint8_t full[24*17];\
2107 uint8_t halfH[272];\
2108 uint8_t halfHV[256];\
2109 copy_block17(full, src, 24, stride, 17);\
2110 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2111 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2112 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2113 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2115 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
2116 uint8_t full[24*17];\
2117 uint8_t halfH[272];\
2118 uint8_t halfV[256];\
2119 uint8_t halfHV[256];\
2120 copy_block17(full, src, 24, stride, 17);\
2121 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2122 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2123 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2124 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2126 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2127 uint8_t full[24*17];\
2128 uint8_t halfH[272];\
2129 uint8_t halfHV[256];\
2130 copy_block17(full, src, 24, stride, 17);\
2131 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2132 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2133 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2134 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2136 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
2137 uint8_t full[24*17];\
2138 uint8_t halfH[272];\
2139 uint8_t halfV[256];\
2140 uint8_t halfHV[256];\
2141 copy_block17(full, src, 24, stride, 17);\
2142 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
2143 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2144 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2145 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2147 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2148 uint8_t full[24*17];\
2149 uint8_t halfH[272];\
2150 uint8_t halfHV[256];\
2151 copy_block17(full, src, 24, stride, 17);\
2152 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2153 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2154 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2155 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2157 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2158 uint8_t halfH[272];\
2159 uint8_t halfHV[256];\
2160 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2161 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2162 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2164 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2165 uint8_t halfH[272];\
2166 uint8_t halfHV[256];\
2167 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2168 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2169 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2171 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
2172 uint8_t full[24*17];\
2173 uint8_t halfH[272];\
2174 uint8_t halfV[256];\
2175 uint8_t halfHV[256];\
2176 copy_block17(full, src, 24, stride, 17);\
2177 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2178 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2179 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2180 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2182 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2183 uint8_t full[24*17];\
2184 uint8_t halfH[272];\
2185 copy_block17(full, src, 24, stride, 17);\
2186 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2187 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2188 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2190 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2191 uint8_t full[24*17];\
2192 uint8_t halfH[272];\
2193 uint8_t halfV[256];\
2194 uint8_t halfHV[256];\
2195 copy_block17(full, src, 24, stride, 17);\
2196 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2197 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2198 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2199 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2201 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2202 uint8_t full[24*17];\
2203 uint8_t halfH[272];\
2204 copy_block17(full, src, 24, stride, 17);\
2205 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2206 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2207 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2209 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2210 uint8_t halfH[272];\
2211 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2212 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2215 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2216 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
2217 #define op_put(a, b) a = cm[((b) + 16)>>5]
2218 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
2220 QPEL_MC(0, put_ , _ , op_put)
2221 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
2222 QPEL_MC(0, avg_ , _ , op_avg)
2223 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
2225 #undef op_avg_no_rnd
2227 #undef op_put_no_rnd
2230 #define H264_LOWPASS(OPNAME, OP, OP2) \
2231 static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2233 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2237 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2238 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2244 static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2246 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2250 const int srcB= src[-2*srcStride];\
2251 const int srcA= src[-1*srcStride];\
2252 const int src0= src[0 *srcStride];\
2253 const int src1= src[1 *srcStride];\
2254 const int src2= src[2 *srcStride];\
2255 const int src3= src[3 *srcStride];\
2256 const int src4= src[4 *srcStride];\
2257 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2258 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2264 static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2267 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2269 src -= 2*srcStride;\
2270 for(i=0; i<h+5; i++)\
2272 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2273 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2277 tmp -= tmpStride*(h+5-2);\
2280 const int tmpB= tmp[-2*tmpStride];\
2281 const int tmpA= tmp[-1*tmpStride];\
2282 const int tmp0= tmp[0 *tmpStride];\
2283 const int tmp1= tmp[1 *tmpStride];\
2284 const int tmp2= tmp[2 *tmpStride];\
2285 const int tmp3= tmp[3 *tmpStride];\
2286 const int tmp4= tmp[4 *tmpStride];\
2287 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2288 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2293 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2295 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2299 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2300 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2301 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
2302 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
2308 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2310 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2314 const int srcB= src[-2*srcStride];\
2315 const int srcA= src[-1*srcStride];\
2316 const int src0= src[0 *srcStride];\
2317 const int src1= src[1 *srcStride];\
2318 const int src2= src[2 *srcStride];\
2319 const int src3= src[3 *srcStride];\
2320 const int src4= src[4 *srcStride];\
2321 const int src5= src[5 *srcStride];\
2322 const int src6= src[6 *srcStride];\
2323 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2324 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2325 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2326 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2332 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2335 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2337 src -= 2*srcStride;\
2338 for(i=0; i<h+5; i++)\
2340 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2341 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2342 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
2343 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
2347 tmp -= tmpStride*(h+5-2);\
2350 const int tmpB= tmp[-2*tmpStride];\
2351 const int tmpA= tmp[-1*tmpStride];\
2352 const int tmp0= tmp[0 *tmpStride];\
2353 const int tmp1= tmp[1 *tmpStride];\
2354 const int tmp2= tmp[2 *tmpStride];\
2355 const int tmp3= tmp[3 *tmpStride];\
2356 const int tmp4= tmp[4 *tmpStride];\
2357 const int tmp5= tmp[5 *tmpStride];\
2358 const int tmp6= tmp[6 *tmpStride];\
2359 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2360 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2361 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2362 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2368 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2370 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2374 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
2375 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
2376 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
2377 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
2378 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
2379 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
2380 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
2381 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
2387 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2389 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2393 const int srcB= src[-2*srcStride];\
2394 const int srcA= src[-1*srcStride];\
2395 const int src0= src[0 *srcStride];\
2396 const int src1= src[1 *srcStride];\
2397 const int src2= src[2 *srcStride];\
2398 const int src3= src[3 *srcStride];\
2399 const int src4= src[4 *srcStride];\
2400 const int src5= src[5 *srcStride];\
2401 const int src6= src[6 *srcStride];\
2402 const int src7= src[7 *srcStride];\
2403 const int src8= src[8 *srcStride];\
2404 const int src9= src[9 *srcStride];\
2405 const int src10=src[10*srcStride];\
2406 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2407 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2408 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2409 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2410 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
2411 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
2412 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
2413 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
2419 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2422 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2424 src -= 2*srcStride;\
2425 for(i=0; i<h+5; i++)\
2427 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
2428 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
2429 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
2430 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
2431 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
2432 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
2433 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
2434 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
2438 tmp -= tmpStride*(h+5-2);\
2441 const int tmpB= tmp[-2*tmpStride];\
2442 const int tmpA= tmp[-1*tmpStride];\
2443 const int tmp0= tmp[0 *tmpStride];\
2444 const int tmp1= tmp[1 *tmpStride];\
2445 const int tmp2= tmp[2 *tmpStride];\
2446 const int tmp3= tmp[3 *tmpStride];\
2447 const int tmp4= tmp[4 *tmpStride];\
2448 const int tmp5= tmp[5 *tmpStride];\
2449 const int tmp6= tmp[6 *tmpStride];\
2450 const int tmp7= tmp[7 *tmpStride];\
2451 const int tmp8= tmp[8 *tmpStride];\
2452 const int tmp9= tmp[9 *tmpStride];\
2453 const int tmp10=tmp[10*tmpStride];\
2454 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2455 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2456 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2457 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2458 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
2459 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
2460 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
2461 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
2467 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2468 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2469 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2470 src += 8*srcStride;\
2471 dst += 8*dstStride;\
2472 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2473 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2476 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2477 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2478 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2479 src += 8*srcStride;\
2480 dst += 8*dstStride;\
2481 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2482 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2485 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2486 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2487 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2488 src += 8*srcStride;\
2489 dst += 8*dstStride;\
2490 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2491 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2494 #define H264_MC(OPNAME, SIZE) \
2495 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2496 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2499 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2500 uint8_t half[SIZE*SIZE];\
2501 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2502 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2505 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2506 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2509 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2510 uint8_t half[SIZE*SIZE];\
2511 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2512 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2515 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2516 uint8_t full[SIZE*(SIZE+5)];\
2517 uint8_t * const full_mid= full + SIZE*2;\
2518 uint8_t half[SIZE*SIZE];\
2519 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2520 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2521 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2524 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2525 uint8_t full[SIZE*(SIZE+5)];\
2526 uint8_t * const full_mid= full + SIZE*2;\
2527 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2528 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2531 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2532 uint8_t full[SIZE*(SIZE+5)];\
2533 uint8_t * const full_mid= full + SIZE*2;\
2534 uint8_t half[SIZE*SIZE];\
2535 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2536 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2537 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2540 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2541 uint8_t full[SIZE*(SIZE+5)];\
2542 uint8_t * const full_mid= full + SIZE*2;\
2543 uint8_t halfH[SIZE*SIZE];\
2544 uint8_t halfV[SIZE*SIZE];\
2545 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2546 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2547 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2548 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2551 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2552 uint8_t full[SIZE*(SIZE+5)];\
2553 uint8_t * const full_mid= full + SIZE*2;\
2554 uint8_t halfH[SIZE*SIZE];\
2555 uint8_t halfV[SIZE*SIZE];\
2556 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2557 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2558 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2559 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2562 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2563 uint8_t full[SIZE*(SIZE+5)];\
2564 uint8_t * const full_mid= full + SIZE*2;\
2565 uint8_t halfH[SIZE*SIZE];\
2566 uint8_t halfV[SIZE*SIZE];\
2567 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2568 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2569 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2570 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2573 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2574 uint8_t full[SIZE*(SIZE+5)];\
2575 uint8_t * const full_mid= full + SIZE*2;\
2576 uint8_t halfH[SIZE*SIZE];\
2577 uint8_t halfV[SIZE*SIZE];\
2578 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2579 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2580 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2581 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2584 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2585 int16_t tmp[SIZE*(SIZE+5)];\
2586 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
2589 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2590 int16_t tmp[SIZE*(SIZE+5)];\
2591 uint8_t halfH[SIZE*SIZE];\
2592 uint8_t halfHV[SIZE*SIZE];\
2593 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2594 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2595 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2598 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2599 int16_t tmp[SIZE*(SIZE+5)];\
2600 uint8_t halfH[SIZE*SIZE];\
2601 uint8_t halfHV[SIZE*SIZE];\
2602 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2603 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2604 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2607 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2608 uint8_t full[SIZE*(SIZE+5)];\
2609 uint8_t * const full_mid= full + SIZE*2;\
2610 int16_t tmp[SIZE*(SIZE+5)];\
2611 uint8_t halfV[SIZE*SIZE];\
2612 uint8_t halfHV[SIZE*SIZE];\
2613 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2614 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2615 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2616 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2619 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2620 uint8_t full[SIZE*(SIZE+5)];\
2621 uint8_t * const full_mid= full + SIZE*2;\
2622 int16_t tmp[SIZE*(SIZE+5)];\
2623 uint8_t halfV[SIZE*SIZE];\
2624 uint8_t halfHV[SIZE*SIZE];\
2625 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2626 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2627 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2628 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2631 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2632 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2633 #define op_put(a, b) a = cm[((b) + 16)>>5]
2634 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2635 #define op2_put(a, b) a = cm[((b) + 512)>>10]
2637 H264_LOWPASS(put_ , op_put, op2_put)
2638 H264_LOWPASS(avg_ , op_avg, op2_avg)
2653 #define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
2654 #define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
2655 #define H264_WEIGHT(W,H) \
2656 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
2658 offset <<= log2_denom; \
2659 if(log2_denom) offset += 1<<(log2_denom-1); \
2660 for(y=0; y<H; y++, block += stride){ \
2663 if(W==2) continue; \
2666 if(W==4) continue; \
2671 if(W==8) continue; \
2682 static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
2684 offset = ((offset + 1) | 1) << log2_denom; \
2685 for(y=0; y<H; y++, dst += stride, src += stride){ \
2688 if(W==2) continue; \
2691 if(W==4) continue; \
2696 if(W==8) continue; \
2723 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
2724 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2728 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
2729 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
2730 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
2731 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
2732 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
2733 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
2734 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
2735 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
2741 #if CONFIG_CAVS_DECODER
2743 void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx);
2745 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2746 put_pixels8_c(dst, src, stride, 8);
2748 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2749 avg_pixels8_c(dst, src, stride, 8);
2751 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2752 put_pixels16_c(dst, src, stride, 16);
2754 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2755 avg_pixels16_c(dst, src, stride, 16);
2757 #endif /* CONFIG_CAVS_DECODER */
2759 void ff_mlp_init(DSPContext* c, AVCodecContext *avctx);
2761 #if CONFIG_VC1_DECODER
2763 void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx);
2765 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
2766 put_pixels8_c(dst, src, stride, 8);
2768 void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
2769 avg_pixels8_c(dst, src, stride, 8);
2771 #endif /* CONFIG_VC1_DECODER */
2773 void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
2776 void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx);
2778 #if CONFIG_RV30_DECODER
2779 void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
2780 #endif /* CONFIG_RV30_DECODER */
2782 #if CONFIG_RV40_DECODER
2783 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2784 put_pixels16_xy2_c(dst, src, stride, 16);
2786 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2787 avg_pixels16_xy2_c(dst, src, stride, 16);
2789 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2790 put_pixels8_xy2_c(dst, src, stride, 8);
2792 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2793 avg_pixels8_xy2_c(dst, src, stride, 8);
2796 void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
2797 #endif /* CONFIG_RV40_DECODER */
2799 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
2800 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2804 const int src_1= src[ -srcStride];
2805 const int src0 = src[0 ];
2806 const int src1 = src[ srcStride];
2807 const int src2 = src[2*srcStride];
2808 const int src3 = src[3*srcStride];
2809 const int src4 = src[4*srcStride];
2810 const int src5 = src[5*srcStride];
2811 const int src6 = src[6*srcStride];
2812 const int src7 = src[7*srcStride];
2813 const int src8 = src[8*srcStride];
2814 const int src9 = src[9*srcStride];
2815 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
2816 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
2817 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
2818 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
2819 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
2820 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
2821 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
2822 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
2828 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
2829 put_pixels8_c(dst, src, stride, 8);
2832 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
2834 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2835 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
2838 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
2839 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
2842 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
2844 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2845 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
2848 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
2849 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
2852 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
2856 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2857 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
2858 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2859 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2861 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
2865 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2866 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
2867 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2868 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2870 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
2872 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2873 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
2876 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
2877 if(CONFIG_ANY_H263) {
2879 const int strength= ff_h263_loop_filter_strength[qscale];
2883 int p0= src[x-2*stride];
2884 int p1= src[x-1*stride];
2885 int p2= src[x+0*stride];
2886 int p3= src[x+1*stride];
2887 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2889 if (d<-2*strength) d1= 0;
2890 else if(d<- strength) d1=-2*strength - d;
2891 else if(d< strength) d1= d;
2892 else if(d< 2*strength) d1= 2*strength - d;
2897 if(p1&256) p1= ~(p1>>31);
2898 if(p2&256) p2= ~(p2>>31);
2900 src[x-1*stride] = p1;
2901 src[x+0*stride] = p2;
2905 d2= av_clip((p0-p3)/4, -ad1, ad1);
2907 src[x-2*stride] = p0 - d2;
2908 src[x+ stride] = p3 + d2;
2913 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
2914 if(CONFIG_ANY_H263) {
2916 const int strength= ff_h263_loop_filter_strength[qscale];
2920 int p0= src[y*stride-2];
2921 int p1= src[y*stride-1];
2922 int p2= src[y*stride+0];
2923 int p3= src[y*stride+1];
2924 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2926 if (d<-2*strength) d1= 0;
2927 else if(d<- strength) d1=-2*strength - d;
2928 else if(d< strength) d1= d;
2929 else if(d< 2*strength) d1= 2*strength - d;
2934 if(p1&256) p1= ~(p1>>31);
2935 if(p2&256) p2= ~(p2>>31);
2937 src[y*stride-1] = p1;
2938 src[y*stride+0] = p2;
2942 d2= av_clip((p0-p3)/4, -ad1, ad1);
2944 src[y*stride-2] = p0 - d2;
2945 src[y*stride+1] = p3 + d2;
2950 static void h261_loop_filter_c(uint8_t *src, int stride){
2955 temp[x ] = 4*src[x ];
2956 temp[x + 7*8] = 4*src[x + 7*stride];
2960 xy = y * stride + x;
2962 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
2967 src[ y*stride] = (temp[ y*8] + 2)>>2;
2968 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
2970 xy = y * stride + x;
2972 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
2977 static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
2980 for( i = 0; i < 4; i++ ) {
2985 for( d = 0; d < 4; d++ ) {
2986 const int p0 = pix[-1*xstride];
2987 const int p1 = pix[-2*xstride];
2988 const int p2 = pix[-3*xstride];
2989 const int q0 = pix[0];
2990 const int q1 = pix[1*xstride];
2991 const int q2 = pix[2*xstride];
2993 if( FFABS( p0 - q0 ) < alpha &&
2994 FFABS( p1 - p0 ) < beta &&
2995 FFABS( q1 - q0 ) < beta ) {
3000 if( FFABS( p2 - p0 ) < beta ) {
3001 pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
3004 if( FFABS( q2 - q0 ) < beta ) {
3005 pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
3009 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
3010 pix[-xstride] = av_clip_uint8( p0 + i_delta ); /* p0' */
3011 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
3017 static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
3019 h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
3021 static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
3023 h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
3026 static inline void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
3029 for( d = 0; d < 16; d++ ) {
3030 const int p2 = pix[-3*xstride];
3031 const int p1 = pix[-2*xstride];
3032 const int p0 = pix[-1*xstride];
3034 const int q0 = pix[ 0*xstride];
3035 const int q1 = pix[ 1*xstride];
3036 const int q2 = pix[ 2*xstride];
3038 if( FFABS( p0 - q0 ) < alpha &&
3039 FFABS( p1 - p0 ) < beta &&
3040 FFABS( q1 - q0 ) < beta ) {
3042 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
3043 if( FFABS( p2 - p0 ) < beta)
3045 const int p3 = pix[-4*xstride];
3047 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
3048 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
3049 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
3052 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
3054 if( FFABS( q2 - q0 ) < beta)
3056 const int q3 = pix[3*xstride];
3058 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
3059 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
3060 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
3063 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
3067 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
3068 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
3074 static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3076 h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta);
3078 static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3080 h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta);
3083 static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
3086 for( i = 0; i < 4; i++ ) {
3087 const int tc = tc0[i];
3092 for( d = 0; d < 2; d++ ) {
3093 const int p0 = pix[-1*xstride];
3094 const int p1 = pix[-2*xstride];
3095 const int q0 = pix[0];
3096 const int q1 = pix[1*xstride];
3098 if( FFABS( p0 - q0 ) < alpha &&
3099 FFABS( p1 - p0 ) < beta &&
3100 FFABS( q1 - q0 ) < beta ) {
3102 int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
3104 pix[-xstride] = av_clip_uint8( p0 + delta ); /* p0' */
3105 pix[0] = av_clip_uint8( q0 - delta ); /* q0' */
3111 static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
3113 h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
3115 static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
3117 h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
3120 static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
3123 for( d = 0; d < 8; d++ ) {
3124 const int p0 = pix[-1*xstride];
3125 const int p1 = pix[-2*xstride];
3126 const int q0 = pix[0];
3127 const int q1 = pix[1*xstride];
3129 if( FFABS( p0 - q0 ) < alpha &&
3130 FFABS( p1 - p0 ) < beta &&
3131 FFABS( q1 - q0 ) < beta ) {
3133 pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
3134 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
3139 static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3141 h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
3143 static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3145 h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
3148 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3154 s += abs(pix1[0] - pix2[0]);
3155 s += abs(pix1[1] - pix2[1]);
3156 s += abs(pix1[2] - pix2[2]);
3157 s += abs(pix1[3] - pix2[3]);
3158 s += abs(pix1[4] - pix2[4]);
3159 s += abs(pix1[5] - pix2[5]);
3160 s += abs(pix1[6] - pix2[6]);
3161 s += abs(pix1[7] - pix2[7]);
3162 s += abs(pix1[8] - pix2[8]);
3163 s += abs(pix1[9] - pix2[9]);
3164 s += abs(pix1[10] - pix2[10]);
3165 s += abs(pix1[11] - pix2[11]);
3166 s += abs(pix1[12] - pix2[12]);
3167 s += abs(pix1[13] - pix2[13]);
3168 s += abs(pix1[14] - pix2[14]);
3169 s += abs(pix1[15] - pix2[15]);
3176 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3182 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
3183 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
3184 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
3185 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
3186 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
3187 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
3188 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
3189 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
3190 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
3191 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
3192 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
3193 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
3194 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
3195 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
3196 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
3197 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
3204 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3207 uint8_t *pix3 = pix2 + line_size;
3211 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
3212 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
3213 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
3214 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
3215 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
3216 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
3217 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
3218 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
3219 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
3220 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
3221 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
3222 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
3223 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
3224 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
3225 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
3226 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
3234 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3237 uint8_t *pix3 = pix2 + line_size;
3241 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
3242 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
3243 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
3244 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
3245 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
3246 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
3247 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
3248 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
3249 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
3250 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
3251 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
3252 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
3253 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
3254 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
3255 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
3256 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
3264 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3270 s += abs(pix1[0] - pix2[0]);
3271 s += abs(pix1[1] - pix2[1]);
3272 s += abs(pix1[2] - pix2[2]);
3273 s += abs(pix1[3] - pix2[3]);
3274 s += abs(pix1[4] - pix2[4]);
3275 s += abs(pix1[5] - pix2[5]);
3276 s += abs(pix1[6] - pix2[6]);
3277 s += abs(pix1[7] - pix2[7]);
3284 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3290 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
3291 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
3292 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
3293 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
3294 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
3295 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
3296 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
3297 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
3304 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3307 uint8_t *pix3 = pix2 + line_size;
3311 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
3312 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
3313 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
3314 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
3315 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
3316 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
3317 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
3318 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
3326 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3329 uint8_t *pix3 = pix2 + line_size;
3333 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
3334 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
3335 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
3336 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
3337 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
3338 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
3339 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
3340 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
3348 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
3349 MpegEncContext *c = v;
3355 for(x=0; x<16; x++){
3356 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
3359 for(x=0; x<15; x++){
3360 score2+= FFABS( s1[x ] - s1[x +stride]
3361 - s1[x+1] + s1[x+1+stride])
3362 -FFABS( s2[x ] - s2[x +stride]
3363 - s2[x+1] + s2[x+1+stride]);
3370 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
3371 else return score1 + FFABS(score2)*8;
3374 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
3375 MpegEncContext *c = v;
3382 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
3386 score2+= FFABS( s1[x ] - s1[x +stride]
3387 - s1[x+1] + s1[x+1+stride])
3388 -FFABS( s2[x ] - s2[x +stride]
3389 - s2[x+1] + s2[x+1+stride]);
3396 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
3397 else return score1 + FFABS(score2)*8;
3400 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
3404 for(i=0; i<8*8; i++){
3405 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
3408 assert(-512<b && b<512);
3410 sum += (w*b)*(w*b)>>4;
3415 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
3418 for(i=0; i<8*8; i++){
3419 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
3424 * permutes an 8x8 block.
3425 * @param block the block which will be permuted according to the given permutation vector
3426 * @param permutation the permutation vector
3427 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
3428 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
3429 * (inverse) permutated to scantable order!
3431 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
3437 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
3439 for(i=0; i<=last; i++){
3440 const int j= scantable[i];
3445 for(i=0; i<=last; i++){
3446 const int j= scantable[i];
3447 const int perm_j= permutation[j];
3448 block[perm_j]= temp[j];
3452 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
3456 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
3459 memset(cmp, 0, sizeof(void*)*6);
3467 cmp[i]= c->hadamard8_diff[i];
3473 cmp[i]= c->dct_sad[i];
3476 cmp[i]= c->dct264_sad[i];
3479 cmp[i]= c->dct_max[i];
3482 cmp[i]= c->quant_psnr[i];
3502 #if CONFIG_SNOW_ENCODER
3511 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
3516 static void clear_block_c(DCTELEM *block)
3518 memset(block, 0, sizeof(DCTELEM)*64);
3522 * memset(blocks, 0, sizeof(DCTELEM)*6*64)
3524 static void clear_blocks_c(DCTELEM *blocks)
3526 memset(blocks, 0, sizeof(DCTELEM)*6*64);
3529 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
3531 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
3532 long a = *(long*)(src+i);
3533 long b = *(long*)(dst+i);
3534 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
3537 dst[i+0] += src[i+0];
3540 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
3542 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
3543 long a = *(long*)(src1+i);
3544 long b = *(long*)(src2+i);
3545 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
3548 dst[i] = src1[i]+src2[i];
3551 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
3553 #if !HAVE_FAST_UNALIGNED
3554 if((long)src2 & (sizeof(long)-1)){
3555 for(i=0; i+7<w; i+=8){
3556 dst[i+0] = src1[i+0]-src2[i+0];
3557 dst[i+1] = src1[i+1]-src2[i+1];
3558 dst[i+2] = src1[i+2]-src2[i+2];
3559 dst[i+3] = src1[i+3]-src2[i+3];
3560 dst[i+4] = src1[i+4]-src2[i+4];
3561 dst[i+5] = src1[i+5]-src2[i+5];
3562 dst[i+6] = src1[i+6]-src2[i+6];
3563 dst[i+7] = src1[i+7]-src2[i+7];
3567 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
3568 long a = *(long*)(src1+i);
3569 long b = *(long*)(src2+i);
3570 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
3573 dst[i+0] = src1[i+0]-src2[i+0];
3576 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
3584 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
3593 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
3601 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
3611 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
3614 for(i=0; i<w-1; i++){
3639 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue){
3664 #define BUTTERFLY2(o1,o2,i1,i2) \
3668 #define BUTTERFLY1(x,y) \
3677 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
3679 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
3687 //FIXME try pointer walks
3688 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
3689 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
3690 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
3691 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
3693 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
3694 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
3695 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
3696 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
3698 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
3699 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
3700 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
3701 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
3705 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
3706 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
3707 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
3708 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
3710 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
3711 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
3712 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
3713 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
3716 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
3717 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
3718 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
3719 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
3725 printf("MAX:%d\n", maxi);
3731 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
3739 //FIXME try pointer walks
3740 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
3741 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
3742 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
3743 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
3745 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
3746 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
3747 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
3748 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
3750 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
3751 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
3752 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
3753 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
3757 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
3758 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
3759 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
3760 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
3762 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
3763 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
3764 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
3765 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
3768 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
3769 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
3770 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
3771 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
3774 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
3779 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3780 MpegEncContext * const s= (MpegEncContext *)c;
3781 DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
3782 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3786 s->dsp.diff_pixels(temp, src1, src2, stride);
3788 return s->dsp.sum_abs_dctelem(temp);
3793 const int s07 = SRC(0) + SRC(7);\
3794 const int s16 = SRC(1) + SRC(6);\
3795 const int s25 = SRC(2) + SRC(5);\
3796 const int s34 = SRC(3) + SRC(4);\
3797 const int a0 = s07 + s34;\
3798 const int a1 = s16 + s25;\
3799 const int a2 = s07 - s34;\
3800 const int a3 = s16 - s25;\
3801 const int d07 = SRC(0) - SRC(7);\
3802 const int d16 = SRC(1) - SRC(6);\
3803 const int d25 = SRC(2) - SRC(5);\
3804 const int d34 = SRC(3) - SRC(4);\
3805 const int a4 = d16 + d25 + (d07 + (d07>>1));\
3806 const int a5 = d07 - d34 - (d25 + (d25>>1));\
3807 const int a6 = d07 + d34 - (d16 + (d16>>1));\
3808 const int a7 = d16 - d25 + (d34 + (d34>>1));\
3810 DST(1, a4 + (a7>>2)) ;\
3811 DST(2, a2 + (a3>>1)) ;\
3812 DST(3, a5 + (a6>>2)) ;\
3814 DST(5, a6 - (a5>>2)) ;\
3815 DST(6, (a2>>1) - a3 ) ;\
3816 DST(7, (a4>>2) - a7 ) ;\
3819 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3820 MpegEncContext * const s= (MpegEncContext *)c;
3825 s->dsp.diff_pixels(dct[0], src1, src2, stride);
3827 #define SRC(x) dct[i][x]
3828 #define DST(x,v) dct[i][x]= v
3829 for( i = 0; i < 8; i++ )
3834 #define SRC(x) dct[x][i]
3835 #define DST(x,v) sum += FFABS(v)
3836 for( i = 0; i < 8; i++ )
3844 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3845 MpegEncContext * const s= (MpegEncContext *)c;
3846 DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
3847 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3852 s->dsp.diff_pixels(temp, src1, src2, stride);
3856 sum= FFMAX(sum, FFABS(temp[i]));
3861 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3862 MpegEncContext * const s= (MpegEncContext *)c;
3863 DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]);
3864 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3865 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
3871 s->dsp.diff_pixels(temp, src1, src2, stride);
3873 memcpy(bak, temp, 64*sizeof(DCTELEM));
3875 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
3876 s->dct_unquantize_inter(s, temp, 0, s->qscale);
3877 ff_simple_idct(temp); //FIXME
3880 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
3885 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3886 MpegEncContext * const s= (MpegEncContext *)c;
3887 const uint8_t *scantable= s->intra_scantable.permutated;
3888 DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
3889 DECLARE_ALIGNED_16(uint64_t, aligned_src1[8]);
3890 DECLARE_ALIGNED_16(uint64_t, aligned_src2[8]);
3891 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3892 uint8_t * const lsrc1 = (uint8_t*)aligned_src1;
3893 uint8_t * const lsrc2 = (uint8_t*)aligned_src2;
3894 int i, last, run, bits, level, distortion, start_i;
3895 const int esc_length= s->ac_esc_length;
3897 uint8_t * last_length;
3901 copy_block8(lsrc1, src1, 8, stride, 8);
3902 copy_block8(lsrc2, src2, 8, stride, 8);
3904 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
3906 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
3912 length = s->intra_ac_vlc_length;
3913 last_length= s->intra_ac_vlc_last_length;
3914 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3917 length = s->inter_ac_vlc_length;
3918 last_length= s->inter_ac_vlc_last_length;
3923 for(i=start_i; i<last; i++){
3924 int j= scantable[i];
3929 if((level&(~127)) == 0){
3930 bits+= length[UNI_AC_ENC_INDEX(run, level)];
3939 level= temp[i] + 64;
3943 if((level&(~127)) == 0){
3944 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
3952 s->dct_unquantize_intra(s, temp, 0, s->qscale);
3954 s->dct_unquantize_inter(s, temp, 0, s->qscale);
3957 s->dsp.idct_add(lsrc2, 8, temp);
3959 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
3961 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
3964 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3965 MpegEncContext * const s= (MpegEncContext *)c;
3966 const uint8_t *scantable= s->intra_scantable.permutated;
3967 DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
3968 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3969 int i, last, run, bits, level, start_i;
3970 const int esc_length= s->ac_esc_length;
3972 uint8_t * last_length;
3976 s->dsp.diff_pixels(temp, src1, src2, stride);
3978 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
3984 length = s->intra_ac_vlc_length;
3985 last_length= s->intra_ac_vlc_last_length;
3986 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3989 length = s->inter_ac_vlc_length;
3990 last_length= s->inter_ac_vlc_last_length;
3995 for(i=start_i; i<last; i++){
3996 int j= scantable[i];
4001 if((level&(~127)) == 0){
4002 bits+= length[UNI_AC_ENC_INDEX(run, level)];
4011 level= temp[i] + 64;
4015 if((level&(~127)) == 0){
4016 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
4024 #define VSAD_INTRA(size) \
4025 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
4029 for(y=1; y<h; y++){ \
4030 for(x=0; x<size; x+=4){ \
4031 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
4032 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
4042 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
4047 for(x=0; x<16; x++){
4048 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
4057 #define SQ(a) ((a)*(a))
4058 #define VSSE_INTRA(size) \
4059 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
4063 for(y=1; y<h; y++){ \
4064 for(x=0; x<size; x+=4){ \
4065 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
4066 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
4076 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
4081 for(x=0; x<16; x++){
4082 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
4091 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
4095 for(i=0; i<size; i++)
4096 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
4100 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
4101 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
4102 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
4104 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
4106 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
4107 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
4108 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
4109 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
4111 static void vector_fmul_c(float *dst, const float *src, int len){
4113 for(i=0; i<len; i++)
4117 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
4120 for(i=0; i<len; i++)
4121 dst[i] = src0[i] * src1[-i];
4124 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
4126 for(i=0; i<len; i++)
4127 dst[i] = src0[i] * src1[i] + src2[i];
4130 void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){
4135 for(i=-len, j=len-1; i<0; i++, j--) {
4140 dst[i] = s0*wj - s1*wi + add_bias;
4141 dst[j] = s0*wi + s1*wj + add_bias;
4145 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
4149 for (i = 0; i < len; i++)
4150 dst[i] = src[i] * mul;
4153 static void vector_fmul_sv_scalar_2_c(float *dst, const float *src,
4154 const float **sv, float mul, int len)
4157 for (i = 0; i < len; i += 2, sv++) {
4158 dst[i ] = src[i ] * sv[0][0] * mul;
4159 dst[i+1] = src[i+1] * sv[0][1] * mul;
4163 static void vector_fmul_sv_scalar_4_c(float *dst, const float *src,
4164 const float **sv, float mul, int len)
4167 for (i = 0; i < len; i += 4, sv++) {
4168 dst[i ] = src[i ] * sv[0][0] * mul;
4169 dst[i+1] = src[i+1] * sv[0][1] * mul;
4170 dst[i+2] = src[i+2] * sv[0][2] * mul;
4171 dst[i+3] = src[i+3] * sv[0][3] * mul;
4175 static void sv_fmul_scalar_2_c(float *dst, const float **sv, float mul,
4179 for (i = 0; i < len; i += 2, sv++) {
4180 dst[i ] = sv[0][0] * mul;
4181 dst[i+1] = sv[0][1] * mul;
4185 static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul,
4189 for (i = 0; i < len; i += 4, sv++) {
4190 dst[i ] = sv[0][0] * mul;
4191 dst[i+1] = sv[0][1] * mul;
4192 dst[i+2] = sv[0][2] * mul;
4193 dst[i+3] = sv[0][3] * mul;
4197 static void butterflies_float_c(float *restrict v1, float *restrict v2,
4201 for (i = 0; i < len; i++) {
4202 float t = v1[i] - v2[i];
4208 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
4213 for (i = 0; i < len; i++)
4219 static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){
4221 for(i=0; i<len; i++)
4222 dst[i] = src[i] * mul;
4225 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
4226 uint32_t maxi, uint32_t maxisign)
4229 if(a > mini) return mini;
4230 else if((a^(1<<31)) > maxisign) return maxi;
4234 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
4236 uint32_t mini = *(uint32_t*)min;
4237 uint32_t maxi = *(uint32_t*)max;
4238 uint32_t maxisign = maxi ^ (1<<31);
4239 uint32_t *dsti = (uint32_t*)dst;
4240 const uint32_t *srci = (const uint32_t*)src;
4241 for(i=0; i<len; i+=8) {
4242 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
4243 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
4244 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
4245 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
4246 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
4247 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
4248 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
4249 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
4252 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
4254 if(min < 0 && max > 0) {
4255 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
4257 for(i=0; i < len; i+=8) {
4258 dst[i ] = av_clipf(src[i ], min, max);
4259 dst[i + 1] = av_clipf(src[i + 1], min, max);
4260 dst[i + 2] = av_clipf(src[i + 2], min, max);
4261 dst[i + 3] = av_clipf(src[i + 3], min, max);
4262 dst[i + 4] = av_clipf(src[i + 4], min, max);
4263 dst[i + 5] = av_clipf(src[i + 5], min, max);
4264 dst[i + 6] = av_clipf(src[i + 6], min, max);
4265 dst[i + 7] = av_clipf(src[i + 7], min, max);
4270 static av_always_inline int float_to_int16_one(const float *src){
4271 int_fast32_t tmp = *(const int32_t*)src;
4273 tmp = (0x43c0ffff - tmp)>>31;
4274 // is this faster on some gcc/cpu combinations?
4275 // if(tmp > 0x43c0ffff) tmp = 0xFFFF;
4278 return tmp - 0x8000;
4281 void ff_float_to_int16_c(int16_t *dst, const float *src, long len){
4283 for(i=0; i<len; i++)
4284 dst[i] = float_to_int16_one(src+i);
4287 void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){
4290 for(i=0; i<len; i++){
4291 dst[2*i] = float_to_int16_one(src[0]+i);
4292 dst[2*i+1] = float_to_int16_one(src[1]+i);
4295 for(c=0; c<channels; c++)
4296 for(i=0, j=c; i<len; i++, j+=channels)
4297 dst[j] = float_to_int16_one(src[c]+i);
4301 static void add_int16_c(int16_t * v1, int16_t * v2, int order)
4307 static void sub_int16_c(int16_t * v1, int16_t * v2, int order)
4313 static int32_t scalarproduct_int16_c(int16_t * v1, int16_t * v2, int order, int shift)
4318 res += (*v1++ * *v2++) >> shift;
4324 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
4325 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
4326 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
4327 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
4328 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
4329 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
4330 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
4332 static void wmv2_idct_row(short * b)
4335 int a0,a1,a2,a3,a4,a5,a6,a7;
4337 a1 = W1*b[1]+W7*b[7];
4338 a7 = W7*b[1]-W1*b[7];
4339 a5 = W5*b[5]+W3*b[3];
4340 a3 = W3*b[5]-W5*b[3];
4341 a2 = W2*b[2]+W6*b[6];
4342 a6 = W6*b[2]-W2*b[6];
4343 a0 = W0*b[0]+W0*b[4];
4344 a4 = W0*b[0]-W0*b[4];
4346 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
4347 s2 = (181*(a1-a5-a7+a3)+128)>>8;
4349 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
4350 b[1] = (a4+a6 +s1 + (1<<7))>>8;
4351 b[2] = (a4-a6 +s2 + (1<<7))>>8;
4352 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
4353 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
4354 b[5] = (a4-a6 -s2 + (1<<7))>>8;
4355 b[6] = (a4+a6 -s1 + (1<<7))>>8;
4356 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
4358 static void wmv2_idct_col(short * b)
4361 int a0,a1,a2,a3,a4,a5,a6,a7;
4362 /*step 1, with extended precision*/
4363 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
4364 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
4365 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
4366 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
4367 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
4368 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
4369 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
4370 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
4372 s1 = (181*(a1-a5+a7-a3)+128)>>8;
4373 s2 = (181*(a1-a5-a7+a3)+128)>>8;
4375 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
4376 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
4377 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
4378 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
4380 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
4381 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
4382 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
4383 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
4385 void ff_wmv2_idct_c(short * block){
4389 wmv2_idct_row(block+i);
4392 wmv2_idct_col(block+i);
4395 /* XXX: those functions should be suppressed ASAP when all IDCTs are
4397 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
4399 ff_wmv2_idct_c(block);
4400 put_pixels_clamped_c(block, dest, line_size);
4402 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
4404 ff_wmv2_idct_c(block);
4405 add_pixels_clamped_c(block, dest, line_size);
4407 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
4410 put_pixels_clamped_c(block, dest, line_size);
4412 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
4415 add_pixels_clamped_c(block, dest, line_size);
4418 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
4421 put_pixels_clamped4_c(block, dest, line_size);
4423 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
4426 add_pixels_clamped4_c(block, dest, line_size);
4429 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
4432 put_pixels_clamped2_c(block, dest, line_size);
4434 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
4437 add_pixels_clamped2_c(block, dest, line_size);
4440 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
4442 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
4444 dest[0] = cm[(block[0] + 4)>>3];
4446 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
4448 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
4450 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
4453 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
4455 /* init static data */
4456 void dsputil_static_init(void)
4460 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
4461 for(i=0;i<MAX_NEG_CROP;i++) {
4463 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
4466 for(i=0;i<512;i++) {
4467 ff_squareTbl[i] = (i - 256) * (i - 256);
4470 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
4473 int ff_check_alignment(void){
4474 static int did_fail=0;
4475 DECLARE_ALIGNED_16(int, aligned);
4477 if((intptr_t)&aligned & 15){
4479 #if HAVE_MMX || HAVE_ALTIVEC
4480 av_log(NULL, AV_LOG_ERROR,
4481 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
4482 "and may be very slow or crash. This is not a bug in libavcodec,\n"
4483 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
4484 "Do not report crashes to FFmpeg developers.\n");
4493 void dsputil_init(DSPContext* c, AVCodecContext *avctx)
4497 ff_check_alignment();
4500 if(avctx->dct_algo==FF_DCT_FASTINT) {
4501 c->fdct = fdct_ifast;
4502 c->fdct248 = fdct_ifast248;
4504 else if(avctx->dct_algo==FF_DCT_FAAN) {
4505 c->fdct = ff_faandct;
4506 c->fdct248 = ff_faandct248;
4509 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
4510 c->fdct248 = ff_fdct248_islow;
4512 #endif //CONFIG_ENCODERS
4514 if(avctx->lowres==1){
4515 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){
4516 c->idct_put= ff_jref_idct4_put;
4517 c->idct_add= ff_jref_idct4_add;
4519 c->idct_put= ff_h264_lowres_idct_put_c;
4520 c->idct_add= ff_h264_lowres_idct_add_c;
4522 c->idct = j_rev_dct4;
4523 c->idct_permutation_type= FF_NO_IDCT_PERM;
4524 }else if(avctx->lowres==2){
4525 c->idct_put= ff_jref_idct2_put;
4526 c->idct_add= ff_jref_idct2_add;
4527 c->idct = j_rev_dct2;
4528 c->idct_permutation_type= FF_NO_IDCT_PERM;
4529 }else if(avctx->lowres==3){
4530 c->idct_put= ff_jref_idct1_put;
4531 c->idct_add= ff_jref_idct1_add;
4532 c->idct = j_rev_dct1;
4533 c->idct_permutation_type= FF_NO_IDCT_PERM;
4535 if(avctx->idct_algo==FF_IDCT_INT){
4536 c->idct_put= ff_jref_idct_put;
4537 c->idct_add= ff_jref_idct_add;
4538 c->idct = j_rev_dct;
4539 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
4540 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
4541 avctx->idct_algo==FF_IDCT_VP3){
4542 c->idct_put= ff_vp3_idct_put_c;
4543 c->idct_add= ff_vp3_idct_add_c;
4544 c->idct = ff_vp3_idct_c;
4545 c->idct_permutation_type= FF_NO_IDCT_PERM;
4546 }else if(avctx->idct_algo==FF_IDCT_WMV2){
4547 c->idct_put= ff_wmv2_idct_put_c;
4548 c->idct_add= ff_wmv2_idct_add_c;
4549 c->idct = ff_wmv2_idct_c;
4550 c->idct_permutation_type= FF_NO_IDCT_PERM;
4551 }else if(avctx->idct_algo==FF_IDCT_FAAN){
4552 c->idct_put= ff_faanidct_put;
4553 c->idct_add= ff_faanidct_add;
4554 c->idct = ff_faanidct;
4555 c->idct_permutation_type= FF_NO_IDCT_PERM;
4556 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
4557 c->idct_put= ff_ea_idct_put_c;
4558 c->idct_permutation_type= FF_NO_IDCT_PERM;
4559 }else{ //accurate/default
4560 c->idct_put= ff_simple_idct_put;
4561 c->idct_add= ff_simple_idct_add;
4562 c->idct = ff_simple_idct;
4563 c->idct_permutation_type= FF_NO_IDCT_PERM;
4567 if (CONFIG_H264_DECODER) {
4568 c->h264_idct_add= ff_h264_idct_add_c;
4569 c->h264_idct8_add= ff_h264_idct8_add_c;
4570 c->h264_idct_dc_add= ff_h264_idct_dc_add_c;
4571 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c;
4572 c->h264_idct_add16 = ff_h264_idct_add16_c;
4573 c->h264_idct8_add4 = ff_h264_idct8_add4_c;
4574 c->h264_idct_add8 = ff_h264_idct_add8_c;
4575 c->h264_idct_add16intra= ff_h264_idct_add16intra_c;
4578 c->get_pixels = get_pixels_c;
4579 c->diff_pixels = diff_pixels_c;
4580 c->put_pixels_clamped = put_pixels_clamped_c;
4581 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
4582 c->add_pixels_clamped = add_pixels_clamped_c;
4583 c->add_pixels8 = add_pixels8_c;
4584 c->add_pixels4 = add_pixels4_c;
4585 c->sum_abs_dctelem = sum_abs_dctelem_c;
4588 c->clear_block = clear_block_c;
4589 c->clear_blocks = clear_blocks_c;
4590 c->pix_sum = pix_sum_c;
4591 c->pix_norm1 = pix_norm1_c;
4593 /* TODO [0] 16 [1] 8 */
4594 c->pix_abs[0][0] = pix_abs16_c;
4595 c->pix_abs[0][1] = pix_abs16_x2_c;
4596 c->pix_abs[0][2] = pix_abs16_y2_c;
4597 c->pix_abs[0][3] = pix_abs16_xy2_c;
4598 c->pix_abs[1][0] = pix_abs8_c;
4599 c->pix_abs[1][1] = pix_abs8_x2_c;
4600 c->pix_abs[1][2] = pix_abs8_y2_c;
4601 c->pix_abs[1][3] = pix_abs8_xy2_c;
4603 #define dspfunc(PFX, IDX, NUM) \
4604 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
4605 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
4606 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
4607 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
4609 dspfunc(put, 0, 16);
4610 dspfunc(put_no_rnd, 0, 16);
4612 dspfunc(put_no_rnd, 1, 8);
4616 dspfunc(avg, 0, 16);
4617 dspfunc(avg_no_rnd, 0, 16);
4619 dspfunc(avg_no_rnd, 1, 8);
4624 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
4625 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
4627 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
4628 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
4629 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
4630 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
4631 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
4632 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
4633 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
4634 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
4635 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
4637 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
4638 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
4639 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
4640 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
4641 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
4642 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
4643 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
4644 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
4645 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
4647 #define dspfunc(PFX, IDX, NUM) \
4648 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
4649 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
4650 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
4651 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
4652 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
4653 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
4654 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
4655 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
4656 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
4657 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
4658 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
4659 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
4660 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
4661 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
4662 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
4663 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
4665 dspfunc(put_qpel, 0, 16);
4666 dspfunc(put_no_rnd_qpel, 0, 16);
4668 dspfunc(avg_qpel, 0, 16);
4669 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
4671 dspfunc(put_qpel, 1, 8);
4672 dspfunc(put_no_rnd_qpel, 1, 8);
4674 dspfunc(avg_qpel, 1, 8);
4675 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
4677 dspfunc(put_h264_qpel, 0, 16);
4678 dspfunc(put_h264_qpel, 1, 8);
4679 dspfunc(put_h264_qpel, 2, 4);
4680 dspfunc(put_h264_qpel, 3, 2);
4681 dspfunc(avg_h264_qpel, 0, 16);
4682 dspfunc(avg_h264_qpel, 1, 8);
4683 dspfunc(avg_h264_qpel, 2, 4);
4686 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
4687 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
4688 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
4689 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
4690 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
4691 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
4692 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c;
4693 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c;
4695 c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
4696 c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
4697 c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c;
4698 c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c;
4699 c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c;
4700 c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c;
4701 c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c;
4702 c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c;
4703 c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c;
4704 c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c;
4705 c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c;
4706 c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c;
4707 c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c;
4708 c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c;
4709 c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c;
4710 c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c;
4711 c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c;
4712 c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c;
4713 c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
4714 c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
4716 c->draw_edges = draw_edges_c;
4718 #if CONFIG_CAVS_DECODER
4719 ff_cavsdsp_init(c,avctx);
4722 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
4723 ff_mlp_init(c, avctx);
4725 #if CONFIG_VC1_DECODER
4726 ff_vc1dsp_init(c,avctx);
4728 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
4729 ff_intrax8dsp_init(c,avctx);
4731 #if CONFIG_RV30_DECODER
4732 ff_rv30dsp_init(c,avctx);
4734 #if CONFIG_RV40_DECODER
4735 ff_rv40dsp_init(c,avctx);
4736 c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c;
4737 c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c;
4738 c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c;
4739 c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
4742 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
4743 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
4744 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
4745 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
4746 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
4747 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
4748 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
4749 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
4751 #define SET_CMP_FUNC(name) \
4752 c->name[0]= name ## 16_c;\
4753 c->name[1]= name ## 8x8_c;
4755 SET_CMP_FUNC(hadamard8_diff)
4756 c->hadamard8_diff[4]= hadamard8_intra16_c;
4757 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
4758 SET_CMP_FUNC(dct_sad)
4759 SET_CMP_FUNC(dct_max)
4761 SET_CMP_FUNC(dct264_sad)
4763 c->sad[0]= pix_abs16_c;
4764 c->sad[1]= pix_abs8_c;
4768 SET_CMP_FUNC(quant_psnr)
4771 c->vsad[0]= vsad16_c;
4772 c->vsad[4]= vsad_intra16_c;
4773 c->vsad[5]= vsad_intra8_c;
4774 c->vsse[0]= vsse16_c;
4775 c->vsse[4]= vsse_intra16_c;
4776 c->vsse[5]= vsse_intra8_c;
4777 c->nsse[0]= nsse16_c;
4778 c->nsse[1]= nsse8_c;
4779 #if CONFIG_SNOW_ENCODER
4780 c->w53[0]= w53_16_c;
4782 c->w97[0]= w97_16_c;
4786 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
4788 c->add_bytes= add_bytes_c;
4789 c->add_bytes_l2= add_bytes_l2_c;
4790 c->diff_bytes= diff_bytes_c;
4791 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
4792 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
4793 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
4794 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
4795 c->bswap_buf= bswap_buf;
4796 #if CONFIG_PNG_DECODER
4797 c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
4800 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
4801 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
4802 c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c;
4803 c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c;
4804 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
4805 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
4806 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
4807 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
4808 c->h264_loop_filter_strength= NULL;
4810 if (CONFIG_ANY_H263) {
4811 c->h263_h_loop_filter= h263_h_loop_filter_c;
4812 c->h263_v_loop_filter= h263_v_loop_filter_c;
4815 if (CONFIG_VP3_DECODER) {
4816 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
4817 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
4819 if (CONFIG_VP6_DECODER) {
4820 c->vp6_filter_diag4= ff_vp6_filter_diag4_c;
4823 c->h261_loop_filter= h261_loop_filter_c;
4825 c->try_8x8basis= try_8x8basis_c;
4826 c->add_8x8basis= add_8x8basis_c;
4828 #if CONFIG_SNOW_DECODER
4829 c->vertical_compose97i = ff_snow_vertical_compose97i;
4830 c->horizontal_compose97i = ff_snow_horizontal_compose97i;
4831 c->inner_add_yblock = ff_snow_inner_add_yblock;
4834 #if CONFIG_VORBIS_DECODER
4835 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
4837 #if CONFIG_AC3_DECODER
4838 c->ac3_downmix = ff_ac3_downmix_c;
4841 c->lpc_compute_autocorr = ff_lpc_compute_autocorr;
4843 c->vector_fmul = vector_fmul_c;
4844 c->vector_fmul_reverse = vector_fmul_reverse_c;
4845 c->vector_fmul_add = vector_fmul_add_c;
4846 c->vector_fmul_window = ff_vector_fmul_window_c;
4847 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
4848 c->vector_clipf = vector_clipf_c;
4849 c->float_to_int16 = ff_float_to_int16_c;
4850 c->float_to_int16_interleave = ff_float_to_int16_interleave_c;
4851 c->add_int16 = add_int16_c;
4852 c->sub_int16 = sub_int16_c;
4853 c->scalarproduct_int16 = scalarproduct_int16_c;
4854 c->scalarproduct_float = scalarproduct_float_c;
4855 c->butterflies_float = butterflies_float_c;
4856 c->vector_fmul_scalar = vector_fmul_scalar_c;
4858 c->vector_fmul_sv_scalar[0] = vector_fmul_sv_scalar_2_c;
4859 c->vector_fmul_sv_scalar[1] = vector_fmul_sv_scalar_4_c;
4861 c->sv_fmul_scalar[0] = sv_fmul_scalar_2_c;
4862 c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c;
4864 c->shrink[0]= ff_img_copy_plane;
4865 c->shrink[1]= ff_shrink22;
4866 c->shrink[2]= ff_shrink44;
4867 c->shrink[3]= ff_shrink88;
4869 c->prefetch= just_return;
4871 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
4872 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
4874 if (HAVE_MMX) dsputil_init_mmx (c, avctx);
4875 if (ARCH_ARM) dsputil_init_arm (c, avctx);
4876 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx);
4877 if (HAVE_VIS) dsputil_init_vis (c, avctx);
4878 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx);
4879 if (ARCH_PPC) dsputil_init_ppc (c, avctx);
4880 if (HAVE_MMI) dsputil_init_mmi (c, avctx);
4881 if (ARCH_SH4) dsputil_init_sh4 (c, avctx);
4882 if (ARCH_BFIN) dsputil_init_bfin (c, avctx);
4884 for(i=0; i<64; i++){
4885 if(!c->put_2tap_qpel_pixels_tab[0][i])
4886 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
4887 if(!c->avg_2tap_qpel_pixels_tab[0][i])
4888 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
4891 switch(c->idct_permutation_type){
4892 case FF_NO_IDCT_PERM:
4894 c->idct_permutation[i]= i;
4896 case FF_LIBMPEG2_IDCT_PERM:
4898 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
4900 case FF_SIMPLE_IDCT_PERM:
4902 c->idct_permutation[i]= simple_mmx_permutation[i];
4904 case FF_TRANSPOSE_IDCT_PERM:
4906 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
4908 case FF_PARTTRANS_IDCT_PERM:
4910 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
4912 case FF_SSE2_IDCT_PERM:
4914 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
4917 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");