3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 #include "libavutil/imgutils.h"
33 #include "simple_idct.h"
37 #include "mpegvideo.h"
42 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
43 uint32_t ff_squareTbl[512] = {0, };
45 #define pixeltmp int16_t
47 #include "dsputil_template.c"
51 #include "dsputil_template.c"
55 #define pixeltmp int32_t
57 #include "dsputil_template.c"
61 #include "dsputil_template.c"
65 #define pixeltmp int16_t
67 #include "dsputil_template.c"
70 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
71 #define pb_7f (~0UL/255 * 0x7f)
72 #define pb_80 (~0UL/255 * 0x80)
74 const uint8_t ff_zigzag_direct[64] = {
75 0, 1, 8, 16, 9, 2, 3, 10,
76 17, 24, 32, 25, 18, 11, 4, 5,
77 12, 19, 26, 33, 40, 48, 41, 34,
78 27, 20, 13, 6, 7, 14, 21, 28,
79 35, 42, 49, 56, 57, 50, 43, 36,
80 29, 22, 15, 23, 30, 37, 44, 51,
81 58, 59, 52, 45, 38, 31, 39, 46,
82 53, 60, 61, 54, 47, 55, 62, 63
85 /* Specific zigzag scan for 248 idct. NOTE that unlike the
86 specification, we interleave the fields */
87 const uint8_t ff_zigzag248_direct[64] = {
88 0, 8, 1, 9, 16, 24, 2, 10,
89 17, 25, 32, 40, 48, 56, 33, 41,
90 18, 26, 3, 11, 4, 12, 19, 27,
91 34, 42, 49, 57, 50, 58, 35, 43,
92 20, 28, 5, 13, 6, 14, 21, 29,
93 36, 44, 51, 59, 52, 60, 37, 45,
94 22, 30, 7, 15, 23, 31, 38, 46,
95 53, 61, 54, 62, 39, 47, 55, 63,
98 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
99 DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64];
101 const uint8_t ff_alternate_horizontal_scan[64] = {
102 0, 1, 2, 3, 8, 9, 16, 17,
103 10, 11, 4, 5, 6, 7, 15, 14,
104 13, 12, 19, 18, 24, 25, 32, 33,
105 26, 27, 20, 21, 22, 23, 28, 29,
106 30, 31, 34, 35, 40, 41, 48, 49,
107 42, 43, 36, 37, 38, 39, 44, 45,
108 46, 47, 50, 51, 56, 57, 58, 59,
109 52, 53, 54, 55, 60, 61, 62, 63,
112 const uint8_t ff_alternate_vertical_scan[64] = {
113 0, 8, 16, 24, 1, 9, 2, 10,
114 17, 25, 32, 40, 48, 56, 57, 49,
115 41, 33, 26, 18, 3, 11, 4, 12,
116 19, 27, 34, 42, 50, 58, 35, 43,
117 51, 59, 20, 28, 5, 13, 6, 14,
118 21, 29, 36, 44, 52, 60, 37, 45,
119 53, 61, 22, 30, 7, 15, 23, 31,
120 38, 46, 54, 62, 39, 47, 55, 63,
123 /* Input permutation for the simple_idct_mmx */
124 static const uint8_t simple_mmx_permutation[64]={
125 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
126 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
127 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
128 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
129 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
130 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
131 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
132 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
135 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
137 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
141 st->scantable= src_scantable;
145 j = src_scantable[i];
146 st->permutated[i] = permutation[j];
152 j = st->permutated[i];
154 st->raster_end[i]= end;
158 void ff_init_scantable_permutation(uint8_t *idct_permutation,
159 int idct_permutation_type)
163 switch(idct_permutation_type){
164 case FF_NO_IDCT_PERM:
166 idct_permutation[i]= i;
168 case FF_LIBMPEG2_IDCT_PERM:
170 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
172 case FF_SIMPLE_IDCT_PERM:
174 idct_permutation[i]= simple_mmx_permutation[i];
176 case FF_TRANSPOSE_IDCT_PERM:
178 idct_permutation[i]= ((i&7)<<3) | (i>>3);
180 case FF_PARTTRANS_IDCT_PERM:
182 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
184 case FF_SSE2_IDCT_PERM:
186 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
189 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
193 static int pix_sum_c(uint8_t * pix, int line_size)
198 for (i = 0; i < 16; i++) {
199 for (j = 0; j < 16; j += 8) {
210 pix += line_size - 16;
215 static int pix_norm1_c(uint8_t * pix, int line_size)
218 uint32_t *sq = ff_squareTbl + 256;
221 for (i = 0; i < 16; i++) {
222 for (j = 0; j < 16; j += 8) {
234 register uint64_t x=*(uint64_t*)pix;
236 s += sq[(x>>8)&0xff];
237 s += sq[(x>>16)&0xff];
238 s += sq[(x>>24)&0xff];
239 s += sq[(x>>32)&0xff];
240 s += sq[(x>>40)&0xff];
241 s += sq[(x>>48)&0xff];
242 s += sq[(x>>56)&0xff];
244 register uint32_t x=*(uint32_t*)pix;
246 s += sq[(x>>8)&0xff];
247 s += sq[(x>>16)&0xff];
248 s += sq[(x>>24)&0xff];
249 x=*(uint32_t*)(pix+4);
251 s += sq[(x>>8)&0xff];
252 s += sq[(x>>16)&0xff];
253 s += sq[(x>>24)&0xff];
258 pix += line_size - 16;
263 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
266 for(i=0; i+8<=w; i+=8){
267 dst[i+0]= av_bswap32(src[i+0]);
268 dst[i+1]= av_bswap32(src[i+1]);
269 dst[i+2]= av_bswap32(src[i+2]);
270 dst[i+3]= av_bswap32(src[i+3]);
271 dst[i+4]= av_bswap32(src[i+4]);
272 dst[i+5]= av_bswap32(src[i+5]);
273 dst[i+6]= av_bswap32(src[i+6]);
274 dst[i+7]= av_bswap32(src[i+7]);
277 dst[i+0]= av_bswap32(src[i+0]);
281 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
284 *dst++ = av_bswap16(*src++);
287 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
290 uint32_t *sq = ff_squareTbl + 256;
293 for (i = 0; i < h; i++) {
294 s += sq[pix1[0] - pix2[0]];
295 s += sq[pix1[1] - pix2[1]];
296 s += sq[pix1[2] - pix2[2]];
297 s += sq[pix1[3] - pix2[3]];
304 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
307 uint32_t *sq = ff_squareTbl + 256;
310 for (i = 0; i < h; i++) {
311 s += sq[pix1[0] - pix2[0]];
312 s += sq[pix1[1] - pix2[1]];
313 s += sq[pix1[2] - pix2[2]];
314 s += sq[pix1[3] - pix2[3]];
315 s += sq[pix1[4] - pix2[4]];
316 s += sq[pix1[5] - pix2[5]];
317 s += sq[pix1[6] - pix2[6]];
318 s += sq[pix1[7] - pix2[7]];
325 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
328 uint32_t *sq = ff_squareTbl + 256;
331 for (i = 0; i < h; i++) {
332 s += sq[pix1[ 0] - pix2[ 0]];
333 s += sq[pix1[ 1] - pix2[ 1]];
334 s += sq[pix1[ 2] - pix2[ 2]];
335 s += sq[pix1[ 3] - pix2[ 3]];
336 s += sq[pix1[ 4] - pix2[ 4]];
337 s += sq[pix1[ 5] - pix2[ 5]];
338 s += sq[pix1[ 6] - pix2[ 6]];
339 s += sq[pix1[ 7] - pix2[ 7]];
340 s += sq[pix1[ 8] - pix2[ 8]];
341 s += sq[pix1[ 9] - pix2[ 9]];
342 s += sq[pix1[10] - pix2[10]];
343 s += sq[pix1[11] - pix2[11]];
344 s += sq[pix1[12] - pix2[12]];
345 s += sq[pix1[13] - pix2[13]];
346 s += sq[pix1[14] - pix2[14]];
347 s += sq[pix1[15] - pix2[15]];
355 static void diff_pixels_c(DCTELEM *av_restrict block, const uint8_t *s1,
356 const uint8_t *s2, int stride){
359 /* read the pixels */
361 block[0] = s1[0] - s2[0];
362 block[1] = s1[1] - s2[1];
363 block[2] = s1[2] - s2[2];
364 block[3] = s1[3] - s2[3];
365 block[4] = s1[4] - s2[4];
366 block[5] = s1[5] - s2[5];
367 block[6] = s1[6] - s2[6];
368 block[7] = s1[7] - s2[7];
376 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *av_restrict pixels,
381 /* read the pixels */
383 pixels[0] = av_clip_uint8(block[0]);
384 pixels[1] = av_clip_uint8(block[1]);
385 pixels[2] = av_clip_uint8(block[2]);
386 pixels[3] = av_clip_uint8(block[3]);
387 pixels[4] = av_clip_uint8(block[4]);
388 pixels[5] = av_clip_uint8(block[5]);
389 pixels[6] = av_clip_uint8(block[6]);
390 pixels[7] = av_clip_uint8(block[7]);
397 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *av_restrict pixels,
402 /* read the pixels */
404 pixels[0] = av_clip_uint8(block[0]);
405 pixels[1] = av_clip_uint8(block[1]);
406 pixels[2] = av_clip_uint8(block[2]);
407 pixels[3] = av_clip_uint8(block[3]);
414 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *av_restrict pixels,
419 /* read the pixels */
421 pixels[0] = av_clip_uint8(block[0]);
422 pixels[1] = av_clip_uint8(block[1]);
429 static void put_signed_pixels_clamped_c(const DCTELEM *block,
430 uint8_t *av_restrict pixels,
435 for (i = 0; i < 8; i++) {
436 for (j = 0; j < 8; j++) {
439 else if (*block > 127)
442 *pixels = (uint8_t)(*block + 128);
446 pixels += (line_size - 8);
450 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *av_restrict pixels,
455 /* read the pixels */
457 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
458 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
459 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
460 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
461 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
462 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
463 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
464 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
470 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *av_restrict pixels,
475 /* read the pixels */
477 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
478 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
479 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
480 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
486 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *av_restrict pixels,
491 /* read the pixels */
493 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
494 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
500 static int sum_abs_dctelem_c(DCTELEM *block)
504 sum+= FFABS(block[i]);
508 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
512 for (i = 0; i < h; i++) {
513 memset(block, value, 16);
518 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
522 for (i = 0; i < h; i++) {
523 memset(block, value, 8);
528 #define avg2(a,b) ((a+b+1)>>1)
529 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
531 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
533 const int A=(16-x16)*(16-y16);
534 const int B=( x16)*(16-y16);
535 const int C=(16-x16)*( y16);
536 const int D=( x16)*( y16);
541 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
542 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
543 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
544 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
545 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
546 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
547 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
548 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
554 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
555 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
558 const int s= 1<<shift;
568 for(x=0; x<8; x++){ //XXX FIXME optimize
569 int src_x, src_y, frac_x, frac_y, index;
578 if((unsigned)src_x < width){
579 if((unsigned)src_y < height){
580 index= src_x + src_y*stride;
581 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
582 + src[index +1]* frac_x )*(s-frac_y)
583 + ( src[index+stride ]*(s-frac_x)
584 + src[index+stride+1]* frac_x )* frac_y
587 index= src_x + av_clip(src_y, 0, height)*stride;
588 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
589 + src[index +1]* frac_x )*s
593 if((unsigned)src_y < height){
594 index= av_clip(src_x, 0, width) + src_y*stride;
595 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
596 + src[index+stride ]* frac_y )*s
599 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
600 dst[y*stride + x]= src[index ];
612 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
614 case 2: put_pixels2_8_c (dst, src, stride, height); break;
615 case 4: put_pixels4_8_c (dst, src, stride, height); break;
616 case 8: put_pixels8_8_c (dst, src, stride, height); break;
617 case 16:put_pixels16_8_c(dst, src, stride, height); break;
621 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
623 for (i=0; i < height; i++) {
624 for (j=0; j < width; j++) {
625 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
632 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
634 for (i=0; i < height; i++) {
635 for (j=0; j < width; j++) {
636 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
643 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
645 for (i=0; i < height; i++) {
646 for (j=0; j < width; j++) {
647 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
654 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
656 for (i=0; i < height; i++) {
657 for (j=0; j < width; j++) {
658 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
665 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
667 for (i=0; i < height; i++) {
668 for (j=0; j < width; j++) {
669 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
676 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
678 for (i=0; i < height; i++) {
679 for (j=0; j < width; j++) {
680 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
687 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
689 for (i=0; i < height; i++) {
690 for (j=0; j < width; j++) {
691 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
698 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
700 for (i=0; i < height; i++) {
701 for (j=0; j < width; j++) {
702 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
709 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
711 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
712 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
713 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
714 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
718 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
720 for (i=0; i < height; i++) {
721 for (j=0; j < width; j++) {
722 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
729 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
731 for (i=0; i < height; i++) {
732 for (j=0; j < width; j++) {
733 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
740 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
742 for (i=0; i < height; i++) {
743 for (j=0; j < width; j++) {
744 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
751 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
753 for (i=0; i < height; i++) {
754 for (j=0; j < width; j++) {
755 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
762 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
764 for (i=0; i < height; i++) {
765 for (j=0; j < width; j++) {
766 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
773 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
775 for (i=0; i < height; i++) {
776 for (j=0; j < width; j++) {
777 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
784 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
786 for (i=0; i < height; i++) {
787 for (j=0; j < width; j++) {
788 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
795 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
797 for (i=0; i < height; i++) {
798 for (j=0; j < width; j++) {
799 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
806 #define QPEL_MC(r, OPNAME, RND, OP) \
807 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
808 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
812 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
813 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
814 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
815 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
816 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
817 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
818 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
819 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
825 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
827 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
831 const int src0= src[0*srcStride];\
832 const int src1= src[1*srcStride];\
833 const int src2= src[2*srcStride];\
834 const int src3= src[3*srcStride];\
835 const int src4= src[4*srcStride];\
836 const int src5= src[5*srcStride];\
837 const int src6= src[6*srcStride];\
838 const int src7= src[7*srcStride];\
839 const int src8= src[8*srcStride];\
840 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
841 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
842 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
843 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
844 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
845 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
846 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
847 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
853 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
854 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
859 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
860 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
861 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
862 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
863 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
864 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
865 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
866 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
867 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
868 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
869 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
870 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
871 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
872 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
873 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
874 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
880 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
881 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
886 const int src0= src[0*srcStride];\
887 const int src1= src[1*srcStride];\
888 const int src2= src[2*srcStride];\
889 const int src3= src[3*srcStride];\
890 const int src4= src[4*srcStride];\
891 const int src5= src[5*srcStride];\
892 const int src6= src[6*srcStride];\
893 const int src7= src[7*srcStride];\
894 const int src8= src[8*srcStride];\
895 const int src9= src[9*srcStride];\
896 const int src10= src[10*srcStride];\
897 const int src11= src[11*srcStride];\
898 const int src12= src[12*srcStride];\
899 const int src13= src[13*srcStride];\
900 const int src14= src[14*srcStride];\
901 const int src15= src[15*srcStride];\
902 const int src16= src[16*srcStride];\
903 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
904 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
905 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
906 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
907 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
908 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
909 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
910 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
911 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
912 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
913 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
914 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
915 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
916 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
917 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
918 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
924 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
926 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
927 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
930 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
931 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
934 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
936 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
937 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
940 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
943 copy_block9(full, src, 16, stride, 9);\
944 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
945 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
948 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
950 copy_block9(full, src, 16, stride, 9);\
951 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
954 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
957 copy_block9(full, src, 16, stride, 9);\
958 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
959 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
961 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
966 copy_block9(full, src, 16, stride, 9);\
967 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
968 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
969 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
970 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
972 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
976 copy_block9(full, src, 16, stride, 9);\
977 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
978 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
979 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
980 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
982 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
987 copy_block9(full, src, 16, stride, 9);\
988 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
989 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
990 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
991 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
993 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
997 copy_block9(full, src, 16, stride, 9);\
998 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
999 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1000 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1001 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1003 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1004 uint8_t full[16*9];\
1007 uint8_t halfHV[64];\
1008 copy_block9(full, src, 16, stride, 9);\
1009 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1010 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1011 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1012 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1014 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1015 uint8_t full[16*9];\
1017 uint8_t halfHV[64];\
1018 copy_block9(full, src, 16, stride, 9);\
1019 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1020 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1021 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1022 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1024 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1025 uint8_t full[16*9];\
1028 uint8_t halfHV[64];\
1029 copy_block9(full, src, 16, stride, 9);\
1030 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
1031 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1032 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1033 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1035 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1036 uint8_t full[16*9];\
1038 uint8_t halfHV[64];\
1039 copy_block9(full, src, 16, stride, 9);\
1040 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1041 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1042 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1043 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1045 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1047 uint8_t halfHV[64];\
1048 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1049 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1050 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1052 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1054 uint8_t halfHV[64];\
1055 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1056 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1057 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1059 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1060 uint8_t full[16*9];\
1063 uint8_t halfHV[64];\
1064 copy_block9(full, src, 16, stride, 9);\
1065 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1066 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1067 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1068 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1070 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1071 uint8_t full[16*9];\
1073 copy_block9(full, src, 16, stride, 9);\
1074 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1075 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1076 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1078 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1079 uint8_t full[16*9];\
1082 uint8_t halfHV[64];\
1083 copy_block9(full, src, 16, stride, 9);\
1084 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1085 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1086 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1087 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1089 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1090 uint8_t full[16*9];\
1092 copy_block9(full, src, 16, stride, 9);\
1093 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1094 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1095 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1097 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1099 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1100 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1103 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1105 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1106 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1109 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1110 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1113 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1115 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1116 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1119 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1120 uint8_t full[24*17];\
1122 copy_block17(full, src, 24, stride, 17);\
1123 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1124 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1127 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1128 uint8_t full[24*17];\
1129 copy_block17(full, src, 24, stride, 17);\
1130 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1133 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1134 uint8_t full[24*17];\
1136 copy_block17(full, src, 24, stride, 17);\
1137 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1138 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1140 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1141 uint8_t full[24*17];\
1142 uint8_t halfH[272];\
1143 uint8_t halfV[256];\
1144 uint8_t halfHV[256];\
1145 copy_block17(full, src, 24, stride, 17);\
1146 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1147 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1148 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1149 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1151 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1152 uint8_t full[24*17];\
1153 uint8_t halfH[272];\
1154 uint8_t halfHV[256];\
1155 copy_block17(full, src, 24, stride, 17);\
1156 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1157 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1158 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1159 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1161 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1162 uint8_t full[24*17];\
1163 uint8_t halfH[272];\
1164 uint8_t halfV[256];\
1165 uint8_t halfHV[256];\
1166 copy_block17(full, src, 24, stride, 17);\
1167 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1168 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1169 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1170 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1172 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1173 uint8_t full[24*17];\
1174 uint8_t halfH[272];\
1175 uint8_t halfHV[256];\
1176 copy_block17(full, src, 24, stride, 17);\
1177 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1178 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1179 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1180 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1182 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1183 uint8_t full[24*17];\
1184 uint8_t halfH[272];\
1185 uint8_t halfV[256];\
1186 uint8_t halfHV[256];\
1187 copy_block17(full, src, 24, stride, 17);\
1188 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1189 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1190 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1191 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1193 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1194 uint8_t full[24*17];\
1195 uint8_t halfH[272];\
1196 uint8_t halfHV[256];\
1197 copy_block17(full, src, 24, stride, 17);\
1198 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1199 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1200 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1201 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1203 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1204 uint8_t full[24*17];\
1205 uint8_t halfH[272];\
1206 uint8_t halfV[256];\
1207 uint8_t halfHV[256];\
1208 copy_block17(full, src, 24, stride, 17);\
1209 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1210 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1211 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1212 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1214 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1215 uint8_t full[24*17];\
1216 uint8_t halfH[272];\
1217 uint8_t halfHV[256];\
1218 copy_block17(full, src, 24, stride, 17);\
1219 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1220 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1221 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1222 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1224 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1225 uint8_t halfH[272];\
1226 uint8_t halfHV[256];\
1227 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1228 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1229 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1231 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1232 uint8_t halfH[272];\
1233 uint8_t halfHV[256];\
1234 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1235 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1236 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1238 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1239 uint8_t full[24*17];\
1240 uint8_t halfH[272];\
1241 uint8_t halfV[256];\
1242 uint8_t halfHV[256];\
1243 copy_block17(full, src, 24, stride, 17);\
1244 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1245 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1246 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1247 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1249 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1250 uint8_t full[24*17];\
1251 uint8_t halfH[272];\
1252 copy_block17(full, src, 24, stride, 17);\
1253 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1254 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1255 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1257 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1258 uint8_t full[24*17];\
1259 uint8_t halfH[272];\
1260 uint8_t halfV[256];\
1261 uint8_t halfHV[256];\
1262 copy_block17(full, src, 24, stride, 17);\
1263 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1264 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1265 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1266 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1268 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1269 uint8_t full[24*17];\
1270 uint8_t halfH[272];\
1271 copy_block17(full, src, 24, stride, 17);\
1272 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1273 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1274 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1276 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1277 uint8_t halfH[272];\
1278 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1279 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1282 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1283 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1284 #define op_put(a, b) a = cm[((b) + 16)>>5]
1285 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1287 QPEL_MC(0, put_ , _ , op_put)
1288 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1289 QPEL_MC(0, avg_ , _ , op_avg)
1290 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
1292 #undef op_avg_no_rnd
1294 #undef op_put_no_rnd
1296 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1297 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1298 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1299 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1300 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1301 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1303 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1304 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1308 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1309 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1310 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1311 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1312 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1313 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1314 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1315 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1321 #if CONFIG_RV40_DECODER
1322 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1323 put_pixels16_xy2_8_c(dst, src, stride, 16);
1325 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1326 avg_pixels16_xy2_8_c(dst, src, stride, 16);
1328 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1329 put_pixels8_xy2_8_c(dst, src, stride, 8);
1331 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1332 avg_pixels8_xy2_8_c(dst, src, stride, 8);
1334 #endif /* CONFIG_RV40_DECODER */
1336 #if CONFIG_DIRAC_DECODER
1337 #define DIRAC_MC(OPNAME)\
1338 void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1340 OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
1342 void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1344 OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
1346 void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1348 OPNAME ## _pixels16_8_c(dst , src[0] , stride, h);\
1349 OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
1351 void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1353 OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
1355 void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1357 OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
1359 void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1361 OPNAME ## _pixels16_l2_8(dst , src[0] , src[1] , stride, stride, stride, h);\
1362 OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
1364 void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1366 OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
1368 void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1370 OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
1372 void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1374 OPNAME ## _pixels16_l4_8(dst , src[0] , src[1] , src[2] , src[3] , stride, stride, stride, stride, stride, h);\
1375 OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
1381 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1382 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1386 const int src_1= src[ -srcStride];
1387 const int src0 = src[0 ];
1388 const int src1 = src[ srcStride];
1389 const int src2 = src[2*srcStride];
1390 const int src3 = src[3*srcStride];
1391 const int src4 = src[4*srcStride];
1392 const int src5 = src[5*srcStride];
1393 const int src6 = src[6*srcStride];
1394 const int src7 = src[7*srcStride];
1395 const int src8 = src[8*srcStride];
1396 const int src9 = src[9*srcStride];
1397 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1398 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1399 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1400 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1401 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1402 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1403 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1404 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1410 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1412 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1413 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1416 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1417 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1420 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1422 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1423 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1426 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1427 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1430 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1434 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1435 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1436 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1437 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1439 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1443 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1444 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1445 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1446 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1448 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1450 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1451 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1454 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
1455 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1457 const int strength= ff_h263_loop_filter_strength[qscale];
1461 int p0= src[x-2*stride];
1462 int p1= src[x-1*stride];
1463 int p2= src[x+0*stride];
1464 int p3= src[x+1*stride];
1465 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1467 if (d<-2*strength) d1= 0;
1468 else if(d<- strength) d1=-2*strength - d;
1469 else if(d< strength) d1= d;
1470 else if(d< 2*strength) d1= 2*strength - d;
1475 if(p1&256) p1= ~(p1>>31);
1476 if(p2&256) p2= ~(p2>>31);
1478 src[x-1*stride] = p1;
1479 src[x+0*stride] = p2;
1483 d2= av_clip((p0-p3)/4, -ad1, ad1);
1485 src[x-2*stride] = p0 - d2;
1486 src[x+ stride] = p3 + d2;
1491 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
1492 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1494 const int strength= ff_h263_loop_filter_strength[qscale];
1498 int p0= src[y*stride-2];
1499 int p1= src[y*stride-1];
1500 int p2= src[y*stride+0];
1501 int p3= src[y*stride+1];
1502 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1504 if (d<-2*strength) d1= 0;
1505 else if(d<- strength) d1=-2*strength - d;
1506 else if(d< strength) d1= d;
1507 else if(d< 2*strength) d1= 2*strength - d;
1512 if(p1&256) p1= ~(p1>>31);
1513 if(p2&256) p2= ~(p2>>31);
1515 src[y*stride-1] = p1;
1516 src[y*stride+0] = p2;
1520 d2= av_clip((p0-p3)/4, -ad1, ad1);
1522 src[y*stride-2] = p0 - d2;
1523 src[y*stride+1] = p3 + d2;
1528 static void h261_loop_filter_c(uint8_t *src, int stride){
1533 temp[x ] = 4*src[x ];
1534 temp[x + 7*8] = 4*src[x + 7*stride];
1538 xy = y * stride + x;
1540 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
1545 src[ y*stride] = (temp[ y*8] + 2)>>2;
1546 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
1548 xy = y * stride + x;
1550 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
1555 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1561 s += abs(pix1[0] - pix2[0]);
1562 s += abs(pix1[1] - pix2[1]);
1563 s += abs(pix1[2] - pix2[2]);
1564 s += abs(pix1[3] - pix2[3]);
1565 s += abs(pix1[4] - pix2[4]);
1566 s += abs(pix1[5] - pix2[5]);
1567 s += abs(pix1[6] - pix2[6]);
1568 s += abs(pix1[7] - pix2[7]);
1569 s += abs(pix1[8] - pix2[8]);
1570 s += abs(pix1[9] - pix2[9]);
1571 s += abs(pix1[10] - pix2[10]);
1572 s += abs(pix1[11] - pix2[11]);
1573 s += abs(pix1[12] - pix2[12]);
1574 s += abs(pix1[13] - pix2[13]);
1575 s += abs(pix1[14] - pix2[14]);
1576 s += abs(pix1[15] - pix2[15]);
1583 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1589 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1590 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1591 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1592 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1593 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1594 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1595 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1596 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1597 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1598 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1599 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1600 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1601 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1602 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1603 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1604 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1611 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1614 uint8_t *pix3 = pix2 + line_size;
1618 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1619 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1620 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1621 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1622 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1623 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1624 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1625 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1626 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1627 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1628 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1629 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1630 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1631 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1632 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1633 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1641 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1644 uint8_t *pix3 = pix2 + line_size;
1648 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1649 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1650 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1651 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1652 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1653 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1654 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1655 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1656 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1657 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1658 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1659 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1660 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1661 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1662 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1663 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1671 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1677 s += abs(pix1[0] - pix2[0]);
1678 s += abs(pix1[1] - pix2[1]);
1679 s += abs(pix1[2] - pix2[2]);
1680 s += abs(pix1[3] - pix2[3]);
1681 s += abs(pix1[4] - pix2[4]);
1682 s += abs(pix1[5] - pix2[5]);
1683 s += abs(pix1[6] - pix2[6]);
1684 s += abs(pix1[7] - pix2[7]);
1691 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1697 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1698 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1699 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1700 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1701 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1702 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1703 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1704 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1711 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1714 uint8_t *pix3 = pix2 + line_size;
1718 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1719 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1720 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1721 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1722 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1723 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1724 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1725 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1733 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1736 uint8_t *pix3 = pix2 + line_size;
1740 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1741 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1742 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1743 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1744 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1745 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1746 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1747 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1755 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1756 MpegEncContext *c = v;
1762 for(x=0; x<16; x++){
1763 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1766 for(x=0; x<15; x++){
1767 score2+= FFABS( s1[x ] - s1[x +stride]
1768 - s1[x+1] + s1[x+1+stride])
1769 -FFABS( s2[x ] - s2[x +stride]
1770 - s2[x+1] + s2[x+1+stride]);
1777 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1778 else return score1 + FFABS(score2)*8;
1781 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1782 MpegEncContext *c = v;
1789 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1793 score2+= FFABS( s1[x ] - s1[x +stride]
1794 - s1[x+1] + s1[x+1+stride])
1795 -FFABS( s2[x ] - s2[x +stride]
1796 - s2[x+1] + s2[x+1+stride]);
1803 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1804 else return score1 + FFABS(score2)*8;
1807 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1811 for(i=0; i<8*8; i++){
1812 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1815 av_assert2(-512<b && b<512);
1817 sum += (w*b)*(w*b)>>4;
1822 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1825 for(i=0; i<8*8; i++){
1826 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
1831 * Permute an 8x8 block.
1832 * @param block the block which will be permuted according to the given permutation vector
1833 * @param permutation the permutation vector
1834 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
1835 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
1836 * (inverse) permutated to scantable order!
1838 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
1844 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
1846 for(i=0; i<=last; i++){
1847 const int j= scantable[i];
1852 for(i=0; i<=last; i++){
1853 const int j= scantable[i];
1854 const int perm_j= permutation[j];
1855 block[perm_j]= temp[j];
1859 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1863 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
1866 memset(cmp, 0, sizeof(void*)*6);
1874 cmp[i]= c->hadamard8_diff[i];
1880 cmp[i]= c->dct_sad[i];
1883 cmp[i]= c->dct264_sad[i];
1886 cmp[i]= c->dct_max[i];
1889 cmp[i]= c->quant_psnr[i];
1918 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1923 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1925 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1926 long a = *(long*)(src+i);
1927 long b = *(long*)(dst+i);
1928 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
1931 dst[i+0] += src[i+0];
1934 static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w){
1936 #if !HAVE_FAST_UNALIGNED
1937 if((long)src2 & (sizeof(long)-1)){
1938 for(i=0; i+7<w; i+=8){
1939 dst[i+0] = src1[i+0]-src2[i+0];
1940 dst[i+1] = src1[i+1]-src2[i+1];
1941 dst[i+2] = src1[i+2]-src2[i+2];
1942 dst[i+3] = src1[i+3]-src2[i+3];
1943 dst[i+4] = src1[i+4]-src2[i+4];
1944 dst[i+5] = src1[i+5]-src2[i+5];
1945 dst[i+6] = src1[i+6]-src2[i+6];
1946 dst[i+7] = src1[i+7]-src2[i+7];
1950 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1951 long a = *(long*)(src1+i);
1952 long b = *(long*)(src2+i);
1953 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1956 dst[i+0] = src1[i+0]-src2[i+0];
1959 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
1967 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1976 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
1984 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1994 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
1997 for(i=0; i<w-1; i++){
2024 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
2054 #define BUTTERFLY2(o1,o2,i1,i2) \
2058 #define BUTTERFLY1(x,y) \
2067 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
2069 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
2077 //FIXME try pointer walks
2078 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2079 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2080 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2081 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2083 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2084 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2085 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2086 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2088 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2089 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2090 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2091 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2095 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2096 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2097 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2098 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2100 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2101 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2102 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2103 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2106 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2107 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2108 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2109 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2114 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
2122 //FIXME try pointer walks
2123 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2124 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2125 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2126 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
2128 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2129 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2130 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2131 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2133 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2134 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2135 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2136 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2140 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2141 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2142 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2143 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2145 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2146 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2147 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2148 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2151 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2152 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2153 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2154 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2157 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
2162 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2163 MpegEncContext * const s= (MpegEncContext *)c;
2164 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2168 s->dsp.diff_pixels(temp, src1, src2, stride);
2170 return s->dsp.sum_abs_dctelem(temp);
2175 const int s07 = SRC(0) + SRC(7);\
2176 const int s16 = SRC(1) + SRC(6);\
2177 const int s25 = SRC(2) + SRC(5);\
2178 const int s34 = SRC(3) + SRC(4);\
2179 const int a0 = s07 + s34;\
2180 const int a1 = s16 + s25;\
2181 const int a2 = s07 - s34;\
2182 const int a3 = s16 - s25;\
2183 const int d07 = SRC(0) - SRC(7);\
2184 const int d16 = SRC(1) - SRC(6);\
2185 const int d25 = SRC(2) - SRC(5);\
2186 const int d34 = SRC(3) - SRC(4);\
2187 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2188 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2189 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2190 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2192 DST(1, a4 + (a7>>2)) ;\
2193 DST(2, a2 + (a3>>1)) ;\
2194 DST(3, a5 + (a6>>2)) ;\
2196 DST(5, a6 - (a5>>2)) ;\
2197 DST(6, (a2>>1) - a3 ) ;\
2198 DST(7, (a4>>2) - a7 ) ;\
2201 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2202 MpegEncContext * const s= (MpegEncContext *)c;
2207 s->dsp.diff_pixels(dct[0], src1, src2, stride);
2209 #define SRC(x) dct[i][x]
2210 #define DST(x,v) dct[i][x]= v
2211 for( i = 0; i < 8; i++ )
2216 #define SRC(x) dct[x][i]
2217 #define DST(x,v) sum += FFABS(v)
2218 for( i = 0; i < 8; i++ )
2226 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2227 MpegEncContext * const s= (MpegEncContext *)c;
2228 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2233 s->dsp.diff_pixels(temp, src1, src2, stride);
2237 sum= FFMAX(sum, FFABS(temp[i]));
2242 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2243 MpegEncContext * const s= (MpegEncContext *)c;
2244 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
2245 DCTELEM * const bak = temp+64;
2251 s->dsp.diff_pixels(temp, src1, src2, stride);
2253 memcpy(bak, temp, 64*sizeof(DCTELEM));
2255 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2256 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2257 ff_simple_idct_8(temp); //FIXME
2260 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2265 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2266 MpegEncContext * const s= (MpegEncContext *)c;
2267 const uint8_t *scantable= s->intra_scantable.permutated;
2268 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2269 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2270 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
2271 int i, last, run, bits, level, distortion, start_i;
2272 const int esc_length= s->ac_esc_length;
2274 uint8_t * last_length;
2278 copy_block8(lsrc1, src1, 8, stride, 8);
2279 copy_block8(lsrc2, src2, 8, stride, 8);
2281 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
2283 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2289 length = s->intra_ac_vlc_length;
2290 last_length= s->intra_ac_vlc_last_length;
2291 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2294 length = s->inter_ac_vlc_length;
2295 last_length= s->inter_ac_vlc_last_length;
2300 for(i=start_i; i<last; i++){
2301 int j= scantable[i];
2306 if((level&(~127)) == 0){
2307 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2316 level= temp[i] + 64;
2318 av_assert2(level - 64);
2320 if((level&(~127)) == 0){
2321 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2329 s->dct_unquantize_intra(s, temp, 0, s->qscale);
2331 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2334 s->dsp.idct_add(lsrc2, 8, temp);
2336 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
2338 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
2341 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2342 MpegEncContext * const s= (MpegEncContext *)c;
2343 const uint8_t *scantable= s->intra_scantable.permutated;
2344 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2345 int i, last, run, bits, level, start_i;
2346 const int esc_length= s->ac_esc_length;
2348 uint8_t * last_length;
2352 s->dsp.diff_pixels(temp, src1, src2, stride);
2354 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2360 length = s->intra_ac_vlc_length;
2361 last_length= s->intra_ac_vlc_last_length;
2362 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2365 length = s->inter_ac_vlc_length;
2366 last_length= s->inter_ac_vlc_last_length;
2371 for(i=start_i; i<last; i++){
2372 int j= scantable[i];
2377 if((level&(~127)) == 0){
2378 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2387 level= temp[i] + 64;
2389 av_assert2(level - 64);
2391 if((level&(~127)) == 0){
2392 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2400 #define VSAD_INTRA(size) \
2401 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2405 for(y=1; y<h; y++){ \
2406 for(x=0; x<size; x+=4){ \
2407 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2408 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2418 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2423 for(x=0; x<16; x++){
2424 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2433 #define SQ(a) ((a)*(a))
2434 #define VSSE_INTRA(size) \
2435 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2439 for(y=1; y<h; y++){ \
2440 for(x=0; x<size; x+=4){ \
2441 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2442 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2452 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2457 for(x=0; x<16; x++){
2458 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2467 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2471 for(i=0; i<size; i++)
2472 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2476 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2477 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2478 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2480 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
2482 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2483 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2484 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2485 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
2487 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
2490 for(i=0; i<len; i++)
2491 dst[i] = src0[i] * src1[-i];
2494 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
2496 for(i=0; i<len; i++)
2497 dst[i] = src0[i] * src1[i] + src2[i];
2500 static void vector_fmul_window_c(float *dst, const float *src0,
2501 const float *src1, const float *win, int len)
2507 for(i=-len, j=len-1; i<0; i++, j--) {
2512 dst[i] = s0*wj - s1*wi;
2513 dst[j] = s0*wi + s1*wj;
2517 static void butterflies_float_c(float *av_restrict v1, float *av_restrict v2,
2521 for (i = 0; i < len; i++) {
2522 float t = v1[i] - v2[i];
2528 static void butterflies_float_interleave_c(float *dst, const float *src0,
2529 const float *src1, int len)
2532 for (i = 0; i < len; i++) {
2535 dst[2*i ] = f1 + f2;
2536 dst[2*i + 1] = f1 - f2;
2540 float ff_scalarproduct_float_c(const float *v1, const float *v2, int len)
2545 for (i = 0; i < len; i++)
2551 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2552 uint32_t maxi, uint32_t maxisign)
2555 if(a > mini) return mini;
2556 else if((a^(1U<<31)) > maxisign) return maxi;
2560 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
2562 uint32_t mini = *(uint32_t*)min;
2563 uint32_t maxi = *(uint32_t*)max;
2564 uint32_t maxisign = maxi ^ (1U<<31);
2565 uint32_t *dsti = (uint32_t*)dst;
2566 const uint32_t *srci = (const uint32_t*)src;
2567 for(i=0; i<len; i+=8) {
2568 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2569 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2570 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2571 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2572 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2573 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2574 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2575 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2578 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
2580 if(min < 0 && max > 0) {
2581 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2583 for(i=0; i < len; i+=8) {
2584 dst[i ] = av_clipf(src[i ], min, max);
2585 dst[i + 1] = av_clipf(src[i + 1], min, max);
2586 dst[i + 2] = av_clipf(src[i + 2], min, max);
2587 dst[i + 3] = av_clipf(src[i + 3], min, max);
2588 dst[i + 4] = av_clipf(src[i + 4], min, max);
2589 dst[i + 5] = av_clipf(src[i + 5], min, max);
2590 dst[i + 6] = av_clipf(src[i + 6], min, max);
2591 dst[i + 7] = av_clipf(src[i + 7], min, max);
2596 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
2601 res += *v1++ * *v2++;
2606 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
2611 *v1++ += mul * *v3++;
2616 static void apply_window_int16_c(int16_t *output, const int16_t *input,
2617 const int16_t *window, unsigned int len)
2620 int len2 = len >> 1;
2622 for (i = 0; i < len2; i++) {
2623 int16_t w = window[i];
2624 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
2625 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2629 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2630 int32_t max, unsigned int len)
2633 *dst++ = av_clip(*src++, min, max);
2634 *dst++ = av_clip(*src++, min, max);
2635 *dst++ = av_clip(*src++, min, max);
2636 *dst++ = av_clip(*src++, min, max);
2637 *dst++ = av_clip(*src++, min, max);
2638 *dst++ = av_clip(*src++, min, max);
2639 *dst++ = av_clip(*src++, min, max);
2640 *dst++ = av_clip(*src++, min, max);
2646 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
2647 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
2648 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
2649 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
2650 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
2651 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
2652 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
2654 static void wmv2_idct_row(short * b)
2657 int a0,a1,a2,a3,a4,a5,a6,a7;
2659 a1 = W1*b[1]+W7*b[7];
2660 a7 = W7*b[1]-W1*b[7];
2661 a5 = W5*b[5]+W3*b[3];
2662 a3 = W3*b[5]-W5*b[3];
2663 a2 = W2*b[2]+W6*b[6];
2664 a6 = W6*b[2]-W2*b[6];
2665 a0 = W0*b[0]+W0*b[4];
2666 a4 = W0*b[0]-W0*b[4];
2668 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
2669 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2671 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
2672 b[1] = (a4+a6 +s1 + (1<<7))>>8;
2673 b[2] = (a4-a6 +s2 + (1<<7))>>8;
2674 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
2675 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
2676 b[5] = (a4-a6 -s2 + (1<<7))>>8;
2677 b[6] = (a4+a6 -s1 + (1<<7))>>8;
2678 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
2680 static void wmv2_idct_col(short * b)
2683 int a0,a1,a2,a3,a4,a5,a6,a7;
2684 /*step 1, with extended precision*/
2685 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
2686 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
2687 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
2688 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
2689 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
2690 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
2691 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
2692 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
2694 s1 = (181*(a1-a5+a7-a3)+128)>>8;
2695 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2697 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
2698 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
2699 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
2700 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
2702 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
2703 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
2704 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
2705 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
2707 void ff_wmv2_idct_c(short * block){
2711 wmv2_idct_row(block+i);
2714 wmv2_idct_col(block+i);
2717 /* XXX: those functions should be suppressed ASAP when all IDCTs are
2719 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
2721 ff_wmv2_idct_c(block);
2722 put_pixels_clamped_c(block, dest, line_size);
2724 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
2726 ff_wmv2_idct_c(block);
2727 add_pixels_clamped_c(block, dest, line_size);
2729 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
2731 ff_j_rev_dct (block);
2732 put_pixels_clamped_c(block, dest, line_size);
2734 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
2736 ff_j_rev_dct (block);
2737 add_pixels_clamped_c(block, dest, line_size);
2740 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
2742 ff_j_rev_dct4 (block);
2743 put_pixels_clamped4_c(block, dest, line_size);
2745 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
2747 ff_j_rev_dct4 (block);
2748 add_pixels_clamped4_c(block, dest, line_size);
2751 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
2753 ff_j_rev_dct2 (block);
2754 put_pixels_clamped2_c(block, dest, line_size);
2756 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
2758 ff_j_rev_dct2 (block);
2759 add_pixels_clamped2_c(block, dest, line_size);
2762 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
2764 dest[0] = av_clip_uint8((block[0] + 4)>>3);
2766 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
2768 dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
2771 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
2773 /* init static data */
2774 av_cold void ff_dsputil_static_init(void)
2778 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
2779 for(i=0;i<MAX_NEG_CROP;i++) {
2781 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
2784 for(i=0;i<512;i++) {
2785 ff_squareTbl[i] = (i - 256) * (i - 256);
2788 for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
2791 int ff_check_alignment(void){
2792 static int did_fail=0;
2793 LOCAL_ALIGNED_16(int, aligned, [4]);
2795 if((intptr_t)aligned & 15){
2797 #if HAVE_MMX || HAVE_ALTIVEC
2798 av_log(NULL, AV_LOG_ERROR,
2799 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2800 "and may be very slow or crash. This is not a bug in libavcodec,\n"
2801 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2802 "Do not report crashes to FFmpeg developers.\n");
2811 av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
2815 ff_check_alignment();
2818 if (avctx->bits_per_raw_sample == 10) {
2819 c->fdct = ff_jpeg_fdct_islow_10;
2820 c->fdct248 = ff_fdct248_islow_10;
2822 if(avctx->dct_algo==FF_DCT_FASTINT) {
2823 c->fdct = ff_fdct_ifast;
2824 c->fdct248 = ff_fdct_ifast248;
2826 else if(avctx->dct_algo==FF_DCT_FAAN) {
2827 c->fdct = ff_faandct;
2828 c->fdct248 = ff_faandct248;
2831 c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
2832 c->fdct248 = ff_fdct248_islow_8;
2835 #endif //CONFIG_ENCODERS
2837 if(avctx->lowres==1){
2838 c->idct_put= ff_jref_idct4_put;
2839 c->idct_add= ff_jref_idct4_add;
2840 c->idct = ff_j_rev_dct4;
2841 c->idct_permutation_type= FF_NO_IDCT_PERM;
2842 }else if(avctx->lowres==2){
2843 c->idct_put= ff_jref_idct2_put;
2844 c->idct_add= ff_jref_idct2_add;
2845 c->idct = ff_j_rev_dct2;
2846 c->idct_permutation_type= FF_NO_IDCT_PERM;
2847 }else if(avctx->lowres==3){
2848 c->idct_put= ff_jref_idct1_put;
2849 c->idct_add= ff_jref_idct1_add;
2850 c->idct = ff_j_rev_dct1;
2851 c->idct_permutation_type= FF_NO_IDCT_PERM;
2853 if (avctx->bits_per_raw_sample == 10) {
2854 c->idct_put = ff_simple_idct_put_10;
2855 c->idct_add = ff_simple_idct_add_10;
2856 c->idct = ff_simple_idct_10;
2857 c->idct_permutation_type = FF_NO_IDCT_PERM;
2859 if(avctx->idct_algo==FF_IDCT_INT){
2860 c->idct_put= ff_jref_idct_put;
2861 c->idct_add= ff_jref_idct_add;
2862 c->idct = ff_j_rev_dct;
2863 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
2864 }else if(avctx->idct_algo==FF_IDCT_WMV2){
2865 c->idct_put= ff_wmv2_idct_put_c;
2866 c->idct_add= ff_wmv2_idct_add_c;
2867 c->idct = ff_wmv2_idct_c;
2868 c->idct_permutation_type= FF_NO_IDCT_PERM;
2869 }else if(avctx->idct_algo==FF_IDCT_FAAN){
2870 c->idct_put= ff_faanidct_put;
2871 c->idct_add= ff_faanidct_add;
2872 c->idct = ff_faanidct;
2873 c->idct_permutation_type= FF_NO_IDCT_PERM;
2874 }else{ //accurate/default
2875 c->idct_put = ff_simple_idct_put_8;
2876 c->idct_add = ff_simple_idct_add_8;
2877 c->idct = ff_simple_idct_8;
2878 c->idct_permutation_type= FF_NO_IDCT_PERM;
2883 c->diff_pixels = diff_pixels_c;
2884 c->put_pixels_clamped = put_pixels_clamped_c;
2885 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
2886 c->add_pixels_clamped = add_pixels_clamped_c;
2887 c->sum_abs_dctelem = sum_abs_dctelem_c;
2890 c->pix_sum = pix_sum_c;
2891 c->pix_norm1 = pix_norm1_c;
2893 c->fill_block_tab[0] = fill_block16_c;
2894 c->fill_block_tab[1] = fill_block8_c;
2896 /* TODO [0] 16 [1] 8 */
2897 c->pix_abs[0][0] = pix_abs16_c;
2898 c->pix_abs[0][1] = pix_abs16_x2_c;
2899 c->pix_abs[0][2] = pix_abs16_y2_c;
2900 c->pix_abs[0][3] = pix_abs16_xy2_c;
2901 c->pix_abs[1][0] = pix_abs8_c;
2902 c->pix_abs[1][1] = pix_abs8_x2_c;
2903 c->pix_abs[1][2] = pix_abs8_y2_c;
2904 c->pix_abs[1][3] = pix_abs8_xy2_c;
2906 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2907 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2908 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
2909 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
2910 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
2911 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
2912 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
2913 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
2914 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2916 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
2917 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
2918 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
2919 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
2920 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
2921 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
2922 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
2923 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
2924 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
2926 #define dspfunc(PFX, IDX, NUM) \
2927 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2928 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2929 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2930 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2931 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2932 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2933 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2934 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2935 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2936 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2937 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2938 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2939 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2940 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2941 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2942 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2944 dspfunc(put_qpel, 0, 16);
2945 dspfunc(put_no_rnd_qpel, 0, 16);
2947 dspfunc(avg_qpel, 0, 16);
2948 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2950 dspfunc(put_qpel, 1, 8);
2951 dspfunc(put_no_rnd_qpel, 1, 8);
2953 dspfunc(avg_qpel, 1, 8);
2954 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2958 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
2959 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
2960 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
2961 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
2962 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
2963 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
2964 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
2965 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
2967 #define SET_CMP_FUNC(name) \
2968 c->name[0]= name ## 16_c;\
2969 c->name[1]= name ## 8x8_c;
2971 SET_CMP_FUNC(hadamard8_diff)
2972 c->hadamard8_diff[4]= hadamard8_intra16_c;
2973 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
2974 SET_CMP_FUNC(dct_sad)
2975 SET_CMP_FUNC(dct_max)
2977 SET_CMP_FUNC(dct264_sad)
2979 c->sad[0]= pix_abs16_c;
2980 c->sad[1]= pix_abs8_c;
2984 SET_CMP_FUNC(quant_psnr)
2987 c->vsad[0]= vsad16_c;
2988 c->vsad[4]= vsad_intra16_c;
2989 c->vsad[5]= vsad_intra8_c;
2990 c->vsse[0]= vsse16_c;
2991 c->vsse[4]= vsse_intra16_c;
2992 c->vsse[5]= vsse_intra8_c;
2993 c->nsse[0]= nsse16_c;
2994 c->nsse[1]= nsse8_c;
2996 ff_dsputil_init_dwt(c);
2999 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
3001 c->add_bytes= add_bytes_c;
3002 c->diff_bytes= diff_bytes_c;
3003 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
3004 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
3005 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
3006 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
3007 c->bswap_buf= bswap_buf;
3008 c->bswap16_buf = bswap16_buf;
3010 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
3011 c->h263_h_loop_filter= h263_h_loop_filter_c;
3012 c->h263_v_loop_filter= h263_v_loop_filter_c;
3015 c->h261_loop_filter= h261_loop_filter_c;
3017 c->try_8x8basis= try_8x8basis_c;
3018 c->add_8x8basis= add_8x8basis_c;
3020 #if CONFIG_VORBIS_DECODER
3021 c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling;
3023 c->vector_fmul_reverse = vector_fmul_reverse_c;
3024 c->vector_fmul_add = vector_fmul_add_c;
3025 c->vector_fmul_window = vector_fmul_window_c;
3026 c->vector_clipf = vector_clipf_c;
3027 c->scalarproduct_int16 = scalarproduct_int16_c;
3028 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
3029 c->apply_window_int16 = apply_window_int16_c;
3030 c->vector_clip_int32 = vector_clip_int32_c;
3031 c->scalarproduct_float = ff_scalarproduct_float_c;
3032 c->butterflies_float = butterflies_float_c;
3033 c->butterflies_float_interleave = butterflies_float_interleave_c;
3035 c->shrink[0]= av_image_copy_plane;
3036 c->shrink[1]= ff_shrink22;
3037 c->shrink[2]= ff_shrink44;
3038 c->shrink[3]= ff_shrink88;
3040 c->prefetch= just_return;
3042 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
3043 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
3047 #define FUNC(f, depth) f ## _ ## depth
3048 #define FUNCC(f, depth) f ## _ ## depth ## _c
3050 #define dspfunc1(PFX, IDX, NUM, depth)\
3051 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
3052 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
3053 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
3054 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
3056 #define dspfunc2(PFX, IDX, NUM, depth)\
3057 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
3058 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
3059 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
3060 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
3061 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
3062 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
3063 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
3064 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
3065 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
3066 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
3067 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
3068 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
3069 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
3070 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
3071 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
3072 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
3075 #define BIT_DEPTH_FUNCS(depth, dct)\
3076 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
3077 c->draw_edges = FUNCC(draw_edges , depth);\
3078 c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
3079 c->clear_block = FUNCC(clear_block ## dct , depth);\
3080 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
3081 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
3082 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
3083 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
3084 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
3086 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
3087 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
3088 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
3089 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
3090 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
3091 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
3093 dspfunc1(put , 0, 16, depth);\
3094 dspfunc1(put , 1, 8, depth);\
3095 dspfunc1(put , 2, 4, depth);\
3096 dspfunc1(put , 3, 2, depth);\
3097 dspfunc1(put_no_rnd, 0, 16, depth);\
3098 dspfunc1(put_no_rnd, 1, 8, depth);\
3099 dspfunc1(avg , 0, 16, depth);\
3100 dspfunc1(avg , 1, 8, depth);\
3101 dspfunc1(avg , 2, 4, depth);\
3102 dspfunc1(avg , 3, 2, depth);\
3103 dspfunc1(avg_no_rnd, 0, 16, depth);\
3104 dspfunc1(avg_no_rnd, 1, 8, depth);\
3106 dspfunc2(put_h264_qpel, 0, 16, depth);\
3107 dspfunc2(put_h264_qpel, 1, 8, depth);\
3108 dspfunc2(put_h264_qpel, 2, 4, depth);\
3109 dspfunc2(put_h264_qpel, 3, 2, depth);\
3110 dspfunc2(avg_h264_qpel, 0, 16, depth);\
3111 dspfunc2(avg_h264_qpel, 1, 8, depth);\
3112 dspfunc2(avg_h264_qpel, 2, 4, depth);
3114 switch (avctx->bits_per_raw_sample) {
3116 if (c->dct_bits == 32) {
3117 BIT_DEPTH_FUNCS(9, _32);
3119 BIT_DEPTH_FUNCS(9, _16);
3123 if (c->dct_bits == 32) {
3124 BIT_DEPTH_FUNCS(10, _32);
3126 BIT_DEPTH_FUNCS(10, _16);
3130 if (c->dct_bits == 32) {
3131 BIT_DEPTH_FUNCS(12, _32);
3133 BIT_DEPTH_FUNCS(12, _16);
3137 if (c->dct_bits == 32) {
3138 BIT_DEPTH_FUNCS(14, _32);
3140 BIT_DEPTH_FUNCS(14, _16);
3144 if(avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
3145 BIT_DEPTH_FUNCS(8, _16);
3151 if (HAVE_MMX) ff_dsputil_init_mmx (c, avctx);
3152 if (ARCH_ARM) ff_dsputil_init_arm (c, avctx);
3153 if (HAVE_VIS) ff_dsputil_init_vis (c, avctx);
3154 if (ARCH_ALPHA) ff_dsputil_init_alpha (c, avctx);
3155 if (ARCH_PPC) ff_dsputil_init_ppc (c, avctx);
3156 if (ARCH_SH4) ff_dsputil_init_sh4 (c, avctx);
3157 if (ARCH_BFIN) ff_dsputil_init_bfin (c, avctx);
3158 if (HAVE_MIPSFPU) ff_dsputil_init_mips (c, avctx);
3160 for (i = 0; i < 4; i++) {
3161 for (j = 0; j < 16; j++) {
3162 if(!c->put_2tap_qpel_pixels_tab[i][j])
3163 c->put_2tap_qpel_pixels_tab[i][j] =
3164 c->put_h264_qpel_pixels_tab[i][j];
3165 if(!c->avg_2tap_qpel_pixels_tab[i][j])
3166 c->avg_2tap_qpel_pixels_tab[i][j] =
3167 c->avg_h264_qpel_pixels_tab[i][j];
3171 ff_init_scantable_permutation(c->idct_permutation,
3172 c->idct_permutation_type);
3175 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
3177 ff_dsputil_init(c, avctx);