3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of Libav.
10 * Libav is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * Libav is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with Libav; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 #include "libavutil/imgutils.h"
33 #include "simple_idct.h"
37 #include "mpegvideo.h"
43 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
44 uint32_t ff_squareTbl[512] = {0, };
47 #include "dsputil_template.c"
51 #include "dsputil_template.c"
55 #include "dsputil_template.c"
57 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
58 #define pb_7f (~0UL/255 * 0x7f)
59 #define pb_80 (~0UL/255 * 0x80)
61 const uint8_t ff_zigzag_direct[64] = {
62 0, 1, 8, 16, 9, 2, 3, 10,
63 17, 24, 32, 25, 18, 11, 4, 5,
64 12, 19, 26, 33, 40, 48, 41, 34,
65 27, 20, 13, 6, 7, 14, 21, 28,
66 35, 42, 49, 56, 57, 50, 43, 36,
67 29, 22, 15, 23, 30, 37, 44, 51,
68 58, 59, 52, 45, 38, 31, 39, 46,
69 53, 60, 61, 54, 47, 55, 62, 63
72 /* Specific zigzag scan for 248 idct. NOTE that unlike the
73 specification, we interleave the fields */
74 const uint8_t ff_zigzag248_direct[64] = {
75 0, 8, 1, 9, 16, 24, 2, 10,
76 17, 25, 32, 40, 48, 56, 33, 41,
77 18, 26, 3, 11, 4, 12, 19, 27,
78 34, 42, 49, 57, 50, 58, 35, 43,
79 20, 28, 5, 13, 6, 14, 21, 29,
80 36, 44, 51, 59, 52, 60, 37, 45,
81 22, 30, 7, 15, 23, 31, 38, 46,
82 53, 61, 54, 62, 39, 47, 55, 63,
85 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
86 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
88 const uint8_t ff_alternate_horizontal_scan[64] = {
89 0, 1, 2, 3, 8, 9, 16, 17,
90 10, 11, 4, 5, 6, 7, 15, 14,
91 13, 12, 19, 18, 24, 25, 32, 33,
92 26, 27, 20, 21, 22, 23, 28, 29,
93 30, 31, 34, 35, 40, 41, 48, 49,
94 42, 43, 36, 37, 38, 39, 44, 45,
95 46, 47, 50, 51, 56, 57, 58, 59,
96 52, 53, 54, 55, 60, 61, 62, 63,
99 const uint8_t ff_alternate_vertical_scan[64] = {
100 0, 8, 16, 24, 1, 9, 2, 10,
101 17, 25, 32, 40, 48, 56, 57, 49,
102 41, 33, 26, 18, 3, 11, 4, 12,
103 19, 27, 34, 42, 50, 58, 35, 43,
104 51, 59, 20, 28, 5, 13, 6, 14,
105 21, 29, 36, 44, 52, 60, 37, 45,
106 53, 61, 22, 30, 7, 15, 23, 31,
107 38, 46, 54, 62, 39, 47, 55, 63,
110 /* Input permutation for the simple_idct_mmx */
111 static const uint8_t simple_mmx_permutation[64]={
112 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
113 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
114 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
115 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
116 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
117 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
118 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
119 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
122 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
124 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
128 st->scantable= src_scantable;
132 j = src_scantable[i];
133 st->permutated[i] = permutation[j];
142 j = st->permutated[i];
144 st->raster_end[i]= end;
148 void ff_init_scantable_permutation(uint8_t *idct_permutation,
149 int idct_permutation_type)
153 switch(idct_permutation_type){
154 case FF_NO_IDCT_PERM:
156 idct_permutation[i]= i;
158 case FF_LIBMPEG2_IDCT_PERM:
160 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
162 case FF_SIMPLE_IDCT_PERM:
164 idct_permutation[i]= simple_mmx_permutation[i];
166 case FF_TRANSPOSE_IDCT_PERM:
168 idct_permutation[i]= ((i&7)<<3) | (i>>3);
170 case FF_PARTTRANS_IDCT_PERM:
172 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
174 case FF_SSE2_IDCT_PERM:
176 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
179 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
183 static int pix_sum_c(uint8_t * pix, int line_size)
188 for (i = 0; i < 16; i++) {
189 for (j = 0; j < 16; j += 8) {
200 pix += line_size - 16;
205 static int pix_norm1_c(uint8_t * pix, int line_size)
208 uint32_t *sq = ff_squareTbl + 256;
211 for (i = 0; i < 16; i++) {
212 for (j = 0; j < 16; j += 8) {
224 register uint64_t x=*(uint64_t*)pix;
226 s += sq[(x>>8)&0xff];
227 s += sq[(x>>16)&0xff];
228 s += sq[(x>>24)&0xff];
229 s += sq[(x>>32)&0xff];
230 s += sq[(x>>40)&0xff];
231 s += sq[(x>>48)&0xff];
232 s += sq[(x>>56)&0xff];
234 register uint32_t x=*(uint32_t*)pix;
236 s += sq[(x>>8)&0xff];
237 s += sq[(x>>16)&0xff];
238 s += sq[(x>>24)&0xff];
239 x=*(uint32_t*)(pix+4);
241 s += sq[(x>>8)&0xff];
242 s += sq[(x>>16)&0xff];
243 s += sq[(x>>24)&0xff];
248 pix += line_size - 16;
253 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
256 for(i=0; i+8<=w; i+=8){
257 dst[i+0]= av_bswap32(src[i+0]);
258 dst[i+1]= av_bswap32(src[i+1]);
259 dst[i+2]= av_bswap32(src[i+2]);
260 dst[i+3]= av_bswap32(src[i+3]);
261 dst[i+4]= av_bswap32(src[i+4]);
262 dst[i+5]= av_bswap32(src[i+5]);
263 dst[i+6]= av_bswap32(src[i+6]);
264 dst[i+7]= av_bswap32(src[i+7]);
267 dst[i+0]= av_bswap32(src[i+0]);
271 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
274 *dst++ = av_bswap16(*src++);
277 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
280 uint32_t *sq = ff_squareTbl + 256;
283 for (i = 0; i < h; i++) {
284 s += sq[pix1[0] - pix2[0]];
285 s += sq[pix1[1] - pix2[1]];
286 s += sq[pix1[2] - pix2[2]];
287 s += sq[pix1[3] - pix2[3]];
294 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
297 uint32_t *sq = ff_squareTbl + 256;
300 for (i = 0; i < h; i++) {
301 s += sq[pix1[0] - pix2[0]];
302 s += sq[pix1[1] - pix2[1]];
303 s += sq[pix1[2] - pix2[2]];
304 s += sq[pix1[3] - pix2[3]];
305 s += sq[pix1[4] - pix2[4]];
306 s += sq[pix1[5] - pix2[5]];
307 s += sq[pix1[6] - pix2[6]];
308 s += sq[pix1[7] - pix2[7]];
315 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
318 uint32_t *sq = ff_squareTbl + 256;
321 for (i = 0; i < h; i++) {
322 s += sq[pix1[ 0] - pix2[ 0]];
323 s += sq[pix1[ 1] - pix2[ 1]];
324 s += sq[pix1[ 2] - pix2[ 2]];
325 s += sq[pix1[ 3] - pix2[ 3]];
326 s += sq[pix1[ 4] - pix2[ 4]];
327 s += sq[pix1[ 5] - pix2[ 5]];
328 s += sq[pix1[ 6] - pix2[ 6]];
329 s += sq[pix1[ 7] - pix2[ 7]];
330 s += sq[pix1[ 8] - pix2[ 8]];
331 s += sq[pix1[ 9] - pix2[ 9]];
332 s += sq[pix1[10] - pix2[10]];
333 s += sq[pix1[11] - pix2[11]];
334 s += sq[pix1[12] - pix2[12]];
335 s += sq[pix1[13] - pix2[13]];
336 s += sq[pix1[14] - pix2[14]];
337 s += sq[pix1[15] - pix2[15]];
345 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
346 const uint8_t *s2, int stride){
349 /* read the pixels */
351 block[0] = s1[0] - s2[0];
352 block[1] = s1[1] - s2[1];
353 block[2] = s1[2] - s2[2];
354 block[3] = s1[3] - s2[3];
355 block[4] = s1[4] - s2[4];
356 block[5] = s1[5] - s2[5];
357 block[6] = s1[6] - s2[6];
358 block[7] = s1[7] - s2[7];
366 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
370 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
372 /* read the pixels */
374 pixels[0] = cm[block[0]];
375 pixels[1] = cm[block[1]];
376 pixels[2] = cm[block[2]];
377 pixels[3] = cm[block[3]];
378 pixels[4] = cm[block[4]];
379 pixels[5] = cm[block[5]];
380 pixels[6] = cm[block[6]];
381 pixels[7] = cm[block[7]];
388 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
392 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
394 /* read the pixels */
396 pixels[0] = cm[block[0]];
397 pixels[1] = cm[block[1]];
398 pixels[2] = cm[block[2]];
399 pixels[3] = cm[block[3]];
406 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
410 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
412 /* read the pixels */
414 pixels[0] = cm[block[0]];
415 pixels[1] = cm[block[1]];
422 void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
423 uint8_t *restrict pixels,
428 for (i = 0; i < 8; i++) {
429 for (j = 0; j < 8; j++) {
432 else if (*block > 127)
435 *pixels = (uint8_t)(*block + 128);
439 pixels += (line_size - 8);
443 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
447 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
449 /* read the pixels */
451 pixels[0] = cm[pixels[0] + block[0]];
452 pixels[1] = cm[pixels[1] + block[1]];
453 pixels[2] = cm[pixels[2] + block[2]];
454 pixels[3] = cm[pixels[3] + block[3]];
455 pixels[4] = cm[pixels[4] + block[4]];
456 pixels[5] = cm[pixels[5] + block[5]];
457 pixels[6] = cm[pixels[6] + block[6]];
458 pixels[7] = cm[pixels[7] + block[7]];
464 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
468 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
470 /* read the pixels */
472 pixels[0] = cm[pixels[0] + block[0]];
473 pixels[1] = cm[pixels[1] + block[1]];
474 pixels[2] = cm[pixels[2] + block[2]];
475 pixels[3] = cm[pixels[3] + block[3]];
481 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
485 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
487 /* read the pixels */
489 pixels[0] = cm[pixels[0] + block[0]];
490 pixels[1] = cm[pixels[1] + block[1]];
496 static int sum_abs_dctelem_c(DCTELEM *block)
500 sum+= FFABS(block[i]);
504 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
508 for (i = 0; i < h; i++) {
509 memset(block, value, 16);
514 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
518 for (i = 0; i < h; i++) {
519 memset(block, value, 8);
524 #define avg2(a,b) ((a+b+1)>>1)
525 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
527 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
529 const int A=(16-x16)*(16-y16);
530 const int B=( x16)*(16-y16);
531 const int C=(16-x16)*( y16);
532 const int D=( x16)*( y16);
537 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
538 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
539 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
540 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
541 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
542 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
543 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
544 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
550 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
551 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
554 const int s= 1<<shift;
564 for(x=0; x<8; x++){ //XXX FIXME optimize
565 int src_x, src_y, frac_x, frac_y, index;
574 if((unsigned)src_x < width){
575 if((unsigned)src_y < height){
576 index= src_x + src_y*stride;
577 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
578 + src[index +1]* frac_x )*(s-frac_y)
579 + ( src[index+stride ]*(s-frac_x)
580 + src[index+stride+1]* frac_x )* frac_y
583 index= src_x + av_clip(src_y, 0, height)*stride;
584 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
585 + src[index +1]* frac_x )*s
589 if((unsigned)src_y < height){
590 index= av_clip(src_x, 0, width) + src_y*stride;
591 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
592 + src[index+stride ]* frac_y )*s
595 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
596 dst[y*stride + x]= src[index ];
608 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
610 case 2: put_pixels2_8_c (dst, src, stride, height); break;
611 case 4: put_pixels4_8_c (dst, src, stride, height); break;
612 case 8: put_pixels8_8_c (dst, src, stride, height); break;
613 case 16:put_pixels16_8_c(dst, src, stride, height); break;
617 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
619 for (i=0; i < height; i++) {
620 for (j=0; j < width; j++) {
621 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
628 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
630 for (i=0; i < height; i++) {
631 for (j=0; j < width; j++) {
632 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
639 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
641 for (i=0; i < height; i++) {
642 for (j=0; j < width; j++) {
643 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
650 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
652 for (i=0; i < height; i++) {
653 for (j=0; j < width; j++) {
654 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
661 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
663 for (i=0; i < height; i++) {
664 for (j=0; j < width; j++) {
665 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
672 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
674 for (i=0; i < height; i++) {
675 for (j=0; j < width; j++) {
676 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
683 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
685 for (i=0; i < height; i++) {
686 for (j=0; j < width; j++) {
687 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
694 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
696 for (i=0; i < height; i++) {
697 for (j=0; j < width; j++) {
698 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
705 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
707 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
708 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
709 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
710 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
714 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
716 for (i=0; i < height; i++) {
717 for (j=0; j < width; j++) {
718 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
725 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
727 for (i=0; i < height; i++) {
728 for (j=0; j < width; j++) {
729 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
736 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
738 for (i=0; i < height; i++) {
739 for (j=0; j < width; j++) {
740 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
747 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
749 for (i=0; i < height; i++) {
750 for (j=0; j < width; j++) {
751 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
758 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
760 for (i=0; i < height; i++) {
761 for (j=0; j < width; j++) {
762 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
769 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
771 for (i=0; i < height; i++) {
772 for (j=0; j < width; j++) {
773 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
780 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
782 for (i=0; i < height; i++) {
783 for (j=0; j < width; j++) {
784 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
791 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
793 for (i=0; i < height; i++) {
794 for (j=0; j < width; j++) {
795 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
802 #define QPEL_MC(r, OPNAME, RND, OP) \
803 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
804 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
808 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
809 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
810 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
811 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
812 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
813 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
814 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
815 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
821 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
823 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
827 const int src0= src[0*srcStride];\
828 const int src1= src[1*srcStride];\
829 const int src2= src[2*srcStride];\
830 const int src3= src[3*srcStride];\
831 const int src4= src[4*srcStride];\
832 const int src5= src[5*srcStride];\
833 const int src6= src[6*srcStride];\
834 const int src7= src[7*srcStride];\
835 const int src8= src[8*srcStride];\
836 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
837 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
838 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
839 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
840 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
841 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
842 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
843 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
849 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
850 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
855 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
856 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
857 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
858 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
859 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
860 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
861 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
862 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
863 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
864 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
865 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
866 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
867 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
868 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
869 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
870 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
876 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
877 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
882 const int src0= src[0*srcStride];\
883 const int src1= src[1*srcStride];\
884 const int src2= src[2*srcStride];\
885 const int src3= src[3*srcStride];\
886 const int src4= src[4*srcStride];\
887 const int src5= src[5*srcStride];\
888 const int src6= src[6*srcStride];\
889 const int src7= src[7*srcStride];\
890 const int src8= src[8*srcStride];\
891 const int src9= src[9*srcStride];\
892 const int src10= src[10*srcStride];\
893 const int src11= src[11*srcStride];\
894 const int src12= src[12*srcStride];\
895 const int src13= src[13*srcStride];\
896 const int src14= src[14*srcStride];\
897 const int src15= src[15*srcStride];\
898 const int src16= src[16*srcStride];\
899 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
900 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
901 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
902 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
903 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
904 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
905 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
906 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
907 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
908 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
909 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
910 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
911 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
912 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
913 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
914 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
920 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
922 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
923 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
926 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
927 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
930 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
932 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
933 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
936 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
939 copy_block9(full, src, 16, stride, 9);\
940 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
941 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
944 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
946 copy_block9(full, src, 16, stride, 9);\
947 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
950 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
953 copy_block9(full, src, 16, stride, 9);\
954 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
955 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
957 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
962 copy_block9(full, src, 16, stride, 9);\
963 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
964 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
965 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
966 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
968 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
972 copy_block9(full, src, 16, stride, 9);\
973 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
974 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
975 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
976 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
978 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
983 copy_block9(full, src, 16, stride, 9);\
984 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
985 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
986 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
987 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
989 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
993 copy_block9(full, src, 16, stride, 9);\
994 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
995 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
996 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
997 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
999 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1000 uint8_t full[16*9];\
1003 uint8_t halfHV[64];\
1004 copy_block9(full, src, 16, stride, 9);\
1005 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1006 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1007 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1008 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1010 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1011 uint8_t full[16*9];\
1013 uint8_t halfHV[64];\
1014 copy_block9(full, src, 16, stride, 9);\
1015 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1016 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1017 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1018 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1020 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1021 uint8_t full[16*9];\
1024 uint8_t halfHV[64];\
1025 copy_block9(full, src, 16, stride, 9);\
1026 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
1027 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1028 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1029 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1031 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1032 uint8_t full[16*9];\
1034 uint8_t halfHV[64];\
1035 copy_block9(full, src, 16, stride, 9);\
1036 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1037 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1038 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1039 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1041 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1043 uint8_t halfHV[64];\
1044 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1045 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1046 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1048 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1050 uint8_t halfHV[64];\
1051 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1052 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1053 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1055 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1056 uint8_t full[16*9];\
1059 uint8_t halfHV[64];\
1060 copy_block9(full, src, 16, stride, 9);\
1061 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1062 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1063 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1064 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1066 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1067 uint8_t full[16*9];\
1069 copy_block9(full, src, 16, stride, 9);\
1070 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1071 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1072 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1074 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1075 uint8_t full[16*9];\
1078 uint8_t halfHV[64];\
1079 copy_block9(full, src, 16, stride, 9);\
1080 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1081 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1082 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1083 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1085 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1086 uint8_t full[16*9];\
1088 copy_block9(full, src, 16, stride, 9);\
1089 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1090 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1091 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1093 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1095 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1096 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1099 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1101 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1102 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1105 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1106 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1109 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1111 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1112 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1115 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1116 uint8_t full[24*17];\
1118 copy_block17(full, src, 24, stride, 17);\
1119 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1120 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1123 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1124 uint8_t full[24*17];\
1125 copy_block17(full, src, 24, stride, 17);\
1126 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1129 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1130 uint8_t full[24*17];\
1132 copy_block17(full, src, 24, stride, 17);\
1133 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1134 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1136 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1137 uint8_t full[24*17];\
1138 uint8_t halfH[272];\
1139 uint8_t halfV[256];\
1140 uint8_t halfHV[256];\
1141 copy_block17(full, src, 24, stride, 17);\
1142 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1143 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1144 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1145 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1147 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1148 uint8_t full[24*17];\
1149 uint8_t halfH[272];\
1150 uint8_t halfHV[256];\
1151 copy_block17(full, src, 24, stride, 17);\
1152 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1153 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1154 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1155 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1157 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1158 uint8_t full[24*17];\
1159 uint8_t halfH[272];\
1160 uint8_t halfV[256];\
1161 uint8_t halfHV[256];\
1162 copy_block17(full, src, 24, stride, 17);\
1163 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1164 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1165 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1166 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1168 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1169 uint8_t full[24*17];\
1170 uint8_t halfH[272];\
1171 uint8_t halfHV[256];\
1172 copy_block17(full, src, 24, stride, 17);\
1173 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1174 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1175 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1176 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1178 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1179 uint8_t full[24*17];\
1180 uint8_t halfH[272];\
1181 uint8_t halfV[256];\
1182 uint8_t halfHV[256];\
1183 copy_block17(full, src, 24, stride, 17);\
1184 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1185 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1186 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1187 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1189 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1190 uint8_t full[24*17];\
1191 uint8_t halfH[272];\
1192 uint8_t halfHV[256];\
1193 copy_block17(full, src, 24, stride, 17);\
1194 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1195 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1196 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1197 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1199 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1200 uint8_t full[24*17];\
1201 uint8_t halfH[272];\
1202 uint8_t halfV[256];\
1203 uint8_t halfHV[256];\
1204 copy_block17(full, src, 24, stride, 17);\
1205 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1206 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1207 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1208 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1210 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1211 uint8_t full[24*17];\
1212 uint8_t halfH[272];\
1213 uint8_t halfHV[256];\
1214 copy_block17(full, src, 24, stride, 17);\
1215 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1216 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1217 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1218 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1220 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1221 uint8_t halfH[272];\
1222 uint8_t halfHV[256];\
1223 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1224 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1225 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1227 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1228 uint8_t halfH[272];\
1229 uint8_t halfHV[256];\
1230 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1231 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1232 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1234 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1235 uint8_t full[24*17];\
1236 uint8_t halfH[272];\
1237 uint8_t halfV[256];\
1238 uint8_t halfHV[256];\
1239 copy_block17(full, src, 24, stride, 17);\
1240 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1241 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1242 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1243 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1245 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1246 uint8_t full[24*17];\
1247 uint8_t halfH[272];\
1248 copy_block17(full, src, 24, stride, 17);\
1249 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1250 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1251 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1253 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1254 uint8_t full[24*17];\
1255 uint8_t halfH[272];\
1256 uint8_t halfV[256];\
1257 uint8_t halfHV[256];\
1258 copy_block17(full, src, 24, stride, 17);\
1259 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1260 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1261 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1262 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1264 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1265 uint8_t full[24*17];\
1266 uint8_t halfH[272];\
1267 copy_block17(full, src, 24, stride, 17);\
1268 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1269 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1270 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1272 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1273 uint8_t halfH[272];\
1274 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1275 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1278 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1279 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1280 #define op_put(a, b) a = cm[((b) + 16)>>5]
1281 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1283 QPEL_MC(0, put_ , _ , op_put)
1284 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1285 QPEL_MC(0, avg_ , _ , op_avg)
1286 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
1288 #undef op_avg_no_rnd
1290 #undef op_put_no_rnd
1292 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1293 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1294 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1295 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1296 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1297 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1299 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1300 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1304 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1305 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1306 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1307 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1308 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1309 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1310 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1311 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1317 #if CONFIG_RV40_DECODER
1318 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1319 put_pixels16_xy2_8_c(dst, src, stride, 16);
1321 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1322 avg_pixels16_xy2_8_c(dst, src, stride, 16);
1324 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1325 put_pixels8_xy2_8_c(dst, src, stride, 8);
1327 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1328 avg_pixels8_xy2_8_c(dst, src, stride, 8);
1330 #endif /* CONFIG_RV40_DECODER */
1332 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1333 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1337 const int src_1= src[ -srcStride];
1338 const int src0 = src[0 ];
1339 const int src1 = src[ srcStride];
1340 const int src2 = src[2*srcStride];
1341 const int src3 = src[3*srcStride];
1342 const int src4 = src[4*srcStride];
1343 const int src5 = src[5*srcStride];
1344 const int src6 = src[6*srcStride];
1345 const int src7 = src[7*srcStride];
1346 const int src8 = src[8*srcStride];
1347 const int src9 = src[9*srcStride];
1348 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1349 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1350 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1351 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1352 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1353 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1354 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1355 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1361 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1363 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1364 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1367 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1368 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1371 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1373 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1374 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1377 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1378 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1381 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1385 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1386 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1387 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1388 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1390 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1394 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1395 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1396 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1397 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1399 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1401 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1402 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1405 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
1406 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1408 const int strength= ff_h263_loop_filter_strength[qscale];
1412 int p0= src[x-2*stride];
1413 int p1= src[x-1*stride];
1414 int p2= src[x+0*stride];
1415 int p3= src[x+1*stride];
1416 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1418 if (d<-2*strength) d1= 0;
1419 else if(d<- strength) d1=-2*strength - d;
1420 else if(d< strength) d1= d;
1421 else if(d< 2*strength) d1= 2*strength - d;
1426 if(p1&256) p1= ~(p1>>31);
1427 if(p2&256) p2= ~(p2>>31);
1429 src[x-1*stride] = p1;
1430 src[x+0*stride] = p2;
1434 d2= av_clip((p0-p3)/4, -ad1, ad1);
1436 src[x-2*stride] = p0 - d2;
1437 src[x+ stride] = p3 + d2;
1442 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
1443 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1445 const int strength= ff_h263_loop_filter_strength[qscale];
1449 int p0= src[y*stride-2];
1450 int p1= src[y*stride-1];
1451 int p2= src[y*stride+0];
1452 int p3= src[y*stride+1];
1453 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1455 if (d<-2*strength) d1= 0;
1456 else if(d<- strength) d1=-2*strength - d;
1457 else if(d< strength) d1= d;
1458 else if(d< 2*strength) d1= 2*strength - d;
1463 if(p1&256) p1= ~(p1>>31);
1464 if(p2&256) p2= ~(p2>>31);
1466 src[y*stride-1] = p1;
1467 src[y*stride+0] = p2;
1471 d2= av_clip((p0-p3)/4, -ad1, ad1);
1473 src[y*stride-2] = p0 - d2;
1474 src[y*stride+1] = p3 + d2;
1479 static void h261_loop_filter_c(uint8_t *src, int stride){
1484 temp[x ] = 4*src[x ];
1485 temp[x + 7*8] = 4*src[x + 7*stride];
1489 xy = y * stride + x;
1491 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
1496 src[ y*stride] = (temp[ y*8] + 2)>>2;
1497 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
1499 xy = y * stride + x;
1501 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
1506 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1512 s += abs(pix1[0] - pix2[0]);
1513 s += abs(pix1[1] - pix2[1]);
1514 s += abs(pix1[2] - pix2[2]);
1515 s += abs(pix1[3] - pix2[3]);
1516 s += abs(pix1[4] - pix2[4]);
1517 s += abs(pix1[5] - pix2[5]);
1518 s += abs(pix1[6] - pix2[6]);
1519 s += abs(pix1[7] - pix2[7]);
1520 s += abs(pix1[8] - pix2[8]);
1521 s += abs(pix1[9] - pix2[9]);
1522 s += abs(pix1[10] - pix2[10]);
1523 s += abs(pix1[11] - pix2[11]);
1524 s += abs(pix1[12] - pix2[12]);
1525 s += abs(pix1[13] - pix2[13]);
1526 s += abs(pix1[14] - pix2[14]);
1527 s += abs(pix1[15] - pix2[15]);
1534 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1540 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1541 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1542 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1543 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1544 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1545 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1546 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1547 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1548 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1549 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1550 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1551 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1552 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1553 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1554 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1555 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1562 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1565 uint8_t *pix3 = pix2 + line_size;
1569 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1570 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1571 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1572 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1573 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1574 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1575 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1576 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1577 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1578 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1579 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1580 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1581 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1582 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1583 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1584 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1592 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1595 uint8_t *pix3 = pix2 + line_size;
1599 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1600 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1601 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1602 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1603 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1604 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1605 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1606 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1607 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1608 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1609 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1610 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1611 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1612 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1613 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1614 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1622 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1628 s += abs(pix1[0] - pix2[0]);
1629 s += abs(pix1[1] - pix2[1]);
1630 s += abs(pix1[2] - pix2[2]);
1631 s += abs(pix1[3] - pix2[3]);
1632 s += abs(pix1[4] - pix2[4]);
1633 s += abs(pix1[5] - pix2[5]);
1634 s += abs(pix1[6] - pix2[6]);
1635 s += abs(pix1[7] - pix2[7]);
1642 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1648 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1649 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1650 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1651 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1652 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1653 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1654 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1655 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1662 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1665 uint8_t *pix3 = pix2 + line_size;
1669 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1670 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1671 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1672 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1673 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1674 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1675 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1676 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1684 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1687 uint8_t *pix3 = pix2 + line_size;
1691 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1692 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1693 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1694 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1695 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1696 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1697 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1698 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1706 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1707 MpegEncContext *c = v;
1713 for(x=0; x<16; x++){
1714 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1717 for(x=0; x<15; x++){
1718 score2+= FFABS( s1[x ] - s1[x +stride]
1719 - s1[x+1] + s1[x+1+stride])
1720 -FFABS( s2[x ] - s2[x +stride]
1721 - s2[x+1] + s2[x+1+stride]);
1728 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1729 else return score1 + FFABS(score2)*8;
1732 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1733 MpegEncContext *c = v;
1740 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1744 score2+= FFABS( s1[x ] - s1[x +stride]
1745 - s1[x+1] + s1[x+1+stride])
1746 -FFABS( s2[x ] - s2[x +stride]
1747 - s2[x+1] + s2[x+1+stride]);
1754 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1755 else return score1 + FFABS(score2)*8;
1758 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1762 for(i=0; i<8*8; i++){
1763 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1766 assert(-512<b && b<512);
1768 sum += (w*b)*(w*b)>>4;
1773 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1776 for(i=0; i<8*8; i++){
1777 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
1782 * permutes an 8x8 block.
1783 * @param block the block which will be permuted according to the given permutation vector
1784 * @param permutation the permutation vector
1785 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
1786 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
1787 * (inverse) permutated to scantable order!
1789 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
1795 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
1797 for(i=0; i<=last; i++){
1798 const int j= scantable[i];
1803 for(i=0; i<=last; i++){
1804 const int j= scantable[i];
1805 const int perm_j= permutation[j];
1806 block[perm_j]= temp[j];
1810 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1814 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
1817 memset(cmp, 0, sizeof(void*)*6);
1825 cmp[i]= c->hadamard8_diff[i];
1831 cmp[i]= c->dct_sad[i];
1834 cmp[i]= c->dct264_sad[i];
1837 cmp[i]= c->dct_max[i];
1840 cmp[i]= c->quant_psnr[i];
1869 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1874 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1876 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1877 long a = *(long*)(src+i);
1878 long b = *(long*)(dst+i);
1879 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
1882 dst[i+0] += src[i+0];
1885 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
1887 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1888 long a = *(long*)(src1+i);
1889 long b = *(long*)(src2+i);
1890 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
1893 dst[i] = src1[i]+src2[i];
1896 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
1898 #if !HAVE_FAST_UNALIGNED
1899 if((long)src2 & (sizeof(long)-1)){
1900 for(i=0; i+7<w; i+=8){
1901 dst[i+0] = src1[i+0]-src2[i+0];
1902 dst[i+1] = src1[i+1]-src2[i+1];
1903 dst[i+2] = src1[i+2]-src2[i+2];
1904 dst[i+3] = src1[i+3]-src2[i+3];
1905 dst[i+4] = src1[i+4]-src2[i+4];
1906 dst[i+5] = src1[i+5]-src2[i+5];
1907 dst[i+6] = src1[i+6]-src2[i+6];
1908 dst[i+7] = src1[i+7]-src2[i+7];
1912 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1913 long a = *(long*)(src1+i);
1914 long b = *(long*)(src2+i);
1915 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1918 dst[i+0] = src1[i+0]-src2[i+0];
1921 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
1929 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1938 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
1946 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1956 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
1959 for(i=0; i<w-1; i++){
1986 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
2016 #define BUTTERFLY2(o1,o2,i1,i2) \
2020 #define BUTTERFLY1(x,y) \
2029 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
2031 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
2039 //FIXME try pointer walks
2040 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2041 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2042 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2043 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2045 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2046 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2047 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2048 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2050 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2051 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2052 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2053 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2057 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2058 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2059 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2060 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2062 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2063 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2064 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2065 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2068 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2069 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2070 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2071 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2076 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
2084 //FIXME try pointer walks
2085 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2086 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2087 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2088 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
2090 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2091 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2092 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2093 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2095 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2096 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2097 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2098 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2102 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2103 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2104 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2105 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2107 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2108 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2109 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2110 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2113 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2114 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2115 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2116 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2119 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
2124 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2125 MpegEncContext * const s= (MpegEncContext *)c;
2126 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2130 s->dsp.diff_pixels(temp, src1, src2, stride);
2132 return s->dsp.sum_abs_dctelem(temp);
2137 const int s07 = SRC(0) + SRC(7);\
2138 const int s16 = SRC(1) + SRC(6);\
2139 const int s25 = SRC(2) + SRC(5);\
2140 const int s34 = SRC(3) + SRC(4);\
2141 const int a0 = s07 + s34;\
2142 const int a1 = s16 + s25;\
2143 const int a2 = s07 - s34;\
2144 const int a3 = s16 - s25;\
2145 const int d07 = SRC(0) - SRC(7);\
2146 const int d16 = SRC(1) - SRC(6);\
2147 const int d25 = SRC(2) - SRC(5);\
2148 const int d34 = SRC(3) - SRC(4);\
2149 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2150 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2151 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2152 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2154 DST(1, a4 + (a7>>2)) ;\
2155 DST(2, a2 + (a3>>1)) ;\
2156 DST(3, a5 + (a6>>2)) ;\
2158 DST(5, a6 - (a5>>2)) ;\
2159 DST(6, (a2>>1) - a3 ) ;\
2160 DST(7, (a4>>2) - a7 ) ;\
2163 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2164 MpegEncContext * const s= (MpegEncContext *)c;
2169 s->dsp.diff_pixels(dct[0], src1, src2, stride);
2171 #define SRC(x) dct[i][x]
2172 #define DST(x,v) dct[i][x]= v
2173 for( i = 0; i < 8; i++ )
2178 #define SRC(x) dct[x][i]
2179 #define DST(x,v) sum += FFABS(v)
2180 for( i = 0; i < 8; i++ )
2188 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2189 MpegEncContext * const s= (MpegEncContext *)c;
2190 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2195 s->dsp.diff_pixels(temp, src1, src2, stride);
2199 sum= FFMAX(sum, FFABS(temp[i]));
2204 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2205 MpegEncContext * const s= (MpegEncContext *)c;
2206 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
2207 DCTELEM * const bak = temp+64;
2213 s->dsp.diff_pixels(temp, src1, src2, stride);
2215 memcpy(bak, temp, 64*sizeof(DCTELEM));
2217 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2218 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2219 ff_simple_idct_8(temp); //FIXME
2222 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2227 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2228 MpegEncContext * const s= (MpegEncContext *)c;
2229 const uint8_t *scantable= s->intra_scantable.permutated;
2230 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2231 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2232 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
2233 int i, last, run, bits, level, distortion, start_i;
2234 const int esc_length= s->ac_esc_length;
2236 uint8_t * last_length;
2240 copy_block8(lsrc1, src1, 8, stride, 8);
2241 copy_block8(lsrc2, src2, 8, stride, 8);
2243 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
2245 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2251 length = s->intra_ac_vlc_length;
2252 last_length= s->intra_ac_vlc_last_length;
2253 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2256 length = s->inter_ac_vlc_length;
2257 last_length= s->inter_ac_vlc_last_length;
2262 for(i=start_i; i<last; i++){
2263 int j= scantable[i];
2268 if((level&(~127)) == 0){
2269 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2278 level= temp[i] + 64;
2282 if((level&(~127)) == 0){
2283 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2291 s->dct_unquantize_intra(s, temp, 0, s->qscale);
2293 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2296 s->dsp.idct_add(lsrc2, 8, temp);
2298 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
2300 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
2303 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2304 MpegEncContext * const s= (MpegEncContext *)c;
2305 const uint8_t *scantable= s->intra_scantable.permutated;
2306 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2307 int i, last, run, bits, level, start_i;
2308 const int esc_length= s->ac_esc_length;
2310 uint8_t * last_length;
2314 s->dsp.diff_pixels(temp, src1, src2, stride);
2316 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2322 length = s->intra_ac_vlc_length;
2323 last_length= s->intra_ac_vlc_last_length;
2324 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2327 length = s->inter_ac_vlc_length;
2328 last_length= s->inter_ac_vlc_last_length;
2333 for(i=start_i; i<last; i++){
2334 int j= scantable[i];
2339 if((level&(~127)) == 0){
2340 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2349 level= temp[i] + 64;
2353 if((level&(~127)) == 0){
2354 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2362 #define VSAD_INTRA(size) \
2363 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2367 for(y=1; y<h; y++){ \
2368 for(x=0; x<size; x+=4){ \
2369 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2370 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2380 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2385 for(x=0; x<16; x++){
2386 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2395 #define SQ(a) ((a)*(a))
2396 #define VSSE_INTRA(size) \
2397 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2401 for(y=1; y<h; y++){ \
2402 for(x=0; x<size; x+=4){ \
2403 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2404 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2414 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2419 for(x=0; x<16; x++){
2420 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2429 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2433 for(i=0; i<size; i++)
2434 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2438 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2439 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2440 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2442 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
2444 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2445 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2446 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2447 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
2449 static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
2451 for(i=0; i<len; i++)
2452 dst[i] = src0[i] * src1[i];
2455 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
2458 for(i=0; i<len; i++)
2459 dst[i] = src0[i] * src1[-i];
2462 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
2464 for(i=0; i<len; i++)
2465 dst[i] = src0[i] * src1[i] + src2[i];
2468 static void vector_fmul_window_c(float *dst, const float *src0,
2469 const float *src1, const float *win, int len)
2475 for(i=-len, j=len-1; i<0; i++, j--) {
2480 dst[i] = s0*wj - s1*wi;
2481 dst[j] = s0*wi + s1*wj;
2485 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
2489 for (i = 0; i < len; i++)
2490 dst[i] = src[i] * mul;
2493 static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
2497 for (i = 0; i < len; i++)
2498 dst[i] += src[i] * mul;
2501 static void butterflies_float_c(float *restrict v1, float *restrict v2,
2505 for (i = 0; i < len; i++) {
2506 float t = v1[i] - v2[i];
2512 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
2517 for (i = 0; i < len; i++)
2523 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2524 uint32_t maxi, uint32_t maxisign)
2527 if(a > mini) return mini;
2528 else if((a^(1U<<31)) > maxisign) return maxi;
2532 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
2534 uint32_t mini = *(uint32_t*)min;
2535 uint32_t maxi = *(uint32_t*)max;
2536 uint32_t maxisign = maxi ^ (1U<<31);
2537 uint32_t *dsti = (uint32_t*)dst;
2538 const uint32_t *srci = (const uint32_t*)src;
2539 for(i=0; i<len; i+=8) {
2540 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2541 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2542 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2543 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2544 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2545 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2546 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2547 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2550 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
2552 if(min < 0 && max > 0) {
2553 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2555 for(i=0; i < len; i+=8) {
2556 dst[i ] = av_clipf(src[i ], min, max);
2557 dst[i + 1] = av_clipf(src[i + 1], min, max);
2558 dst[i + 2] = av_clipf(src[i + 2], min, max);
2559 dst[i + 3] = av_clipf(src[i + 3], min, max);
2560 dst[i + 4] = av_clipf(src[i + 4], min, max);
2561 dst[i + 5] = av_clipf(src[i + 5], min, max);
2562 dst[i + 6] = av_clipf(src[i + 6], min, max);
2563 dst[i + 7] = av_clipf(src[i + 7], min, max);
2568 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
2573 res += (*v1++ * *v2++) >> shift;
2578 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
2583 *v1++ += mul * *v3++;
2588 static void apply_window_int16_c(int16_t *output, const int16_t *input,
2589 const int16_t *window, unsigned int len)
2592 int len2 = len >> 1;
2594 for (i = 0; i < len2; i++) {
2595 int16_t w = window[i];
2596 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
2597 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2601 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2602 int32_t max, unsigned int len)
2605 *dst++ = av_clip(*src++, min, max);
2606 *dst++ = av_clip(*src++, min, max);
2607 *dst++ = av_clip(*src++, min, max);
2608 *dst++ = av_clip(*src++, min, max);
2609 *dst++ = av_clip(*src++, min, max);
2610 *dst++ = av_clip(*src++, min, max);
2611 *dst++ = av_clip(*src++, min, max);
2612 *dst++ = av_clip(*src++, min, max);
2618 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
2619 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
2620 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
2621 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
2622 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
2623 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
2624 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
2626 static void wmv2_idct_row(short * b)
2629 int a0,a1,a2,a3,a4,a5,a6,a7;
2631 a1 = W1*b[1]+W7*b[7];
2632 a7 = W7*b[1]-W1*b[7];
2633 a5 = W5*b[5]+W3*b[3];
2634 a3 = W3*b[5]-W5*b[3];
2635 a2 = W2*b[2]+W6*b[6];
2636 a6 = W6*b[2]-W2*b[6];
2637 a0 = W0*b[0]+W0*b[4];
2638 a4 = W0*b[0]-W0*b[4];
2640 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
2641 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2643 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
2644 b[1] = (a4+a6 +s1 + (1<<7))>>8;
2645 b[2] = (a4-a6 +s2 + (1<<7))>>8;
2646 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
2647 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
2648 b[5] = (a4-a6 -s2 + (1<<7))>>8;
2649 b[6] = (a4+a6 -s1 + (1<<7))>>8;
2650 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
2652 static void wmv2_idct_col(short * b)
2655 int a0,a1,a2,a3,a4,a5,a6,a7;
2656 /*step 1, with extended precision*/
2657 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
2658 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
2659 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
2660 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
2661 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
2662 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
2663 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
2664 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
2666 s1 = (181*(a1-a5+a7-a3)+128)>>8;
2667 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2669 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
2670 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
2671 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
2672 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
2674 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
2675 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
2676 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
2677 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
2679 void ff_wmv2_idct_c(short * block){
2683 wmv2_idct_row(block+i);
2686 wmv2_idct_col(block+i);
2689 /* XXX: those functions should be suppressed ASAP when all IDCTs are
2691 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
2693 ff_wmv2_idct_c(block);
2694 ff_put_pixels_clamped_c(block, dest, line_size);
2696 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
2698 ff_wmv2_idct_c(block);
2699 ff_add_pixels_clamped_c(block, dest, line_size);
2701 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
2704 ff_put_pixels_clamped_c(block, dest, line_size);
2706 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
2709 ff_add_pixels_clamped_c(block, dest, line_size);
2712 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
2715 put_pixels_clamped4_c(block, dest, line_size);
2717 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
2720 add_pixels_clamped4_c(block, dest, line_size);
2723 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
2726 put_pixels_clamped2_c(block, dest, line_size);
2728 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
2731 add_pixels_clamped2_c(block, dest, line_size);
2734 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
2736 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2738 dest[0] = cm[(block[0] + 4)>>3];
2740 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
2742 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2744 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
2747 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
2749 /* init static data */
2750 av_cold void dsputil_static_init(void)
2754 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
2755 for(i=0;i<MAX_NEG_CROP;i++) {
2757 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
2760 for(i=0;i<512;i++) {
2761 ff_squareTbl[i] = (i - 256) * (i - 256);
2764 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
2767 int ff_check_alignment(void){
2768 static int did_fail=0;
2769 LOCAL_ALIGNED_16(int, aligned, [4]);
2771 if((intptr_t)aligned & 15){
2773 #if HAVE_MMX || HAVE_ALTIVEC
2774 av_log(NULL, AV_LOG_ERROR,
2775 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2776 "and may be very slow or crash. This is not a bug in libavcodec,\n"
2777 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2778 "Do not report crashes to Libav developers.\n");
2787 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
2791 ff_check_alignment();
2794 if (avctx->bits_per_raw_sample == 10) {
2795 c->fdct = ff_jpeg_fdct_islow_10;
2796 c->fdct248 = ff_fdct248_islow_10;
2798 if(avctx->dct_algo==FF_DCT_FASTINT) {
2799 c->fdct = fdct_ifast;
2800 c->fdct248 = fdct_ifast248;
2802 else if(avctx->dct_algo==FF_DCT_FAAN) {
2803 c->fdct = ff_faandct;
2804 c->fdct248 = ff_faandct248;
2807 c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
2808 c->fdct248 = ff_fdct248_islow_8;
2811 #endif //CONFIG_ENCODERS
2813 if(avctx->lowres==1){
2814 c->idct_put= ff_jref_idct4_put;
2815 c->idct_add= ff_jref_idct4_add;
2816 c->idct = j_rev_dct4;
2817 c->idct_permutation_type= FF_NO_IDCT_PERM;
2818 }else if(avctx->lowres==2){
2819 c->idct_put= ff_jref_idct2_put;
2820 c->idct_add= ff_jref_idct2_add;
2821 c->idct = j_rev_dct2;
2822 c->idct_permutation_type= FF_NO_IDCT_PERM;
2823 }else if(avctx->lowres==3){
2824 c->idct_put= ff_jref_idct1_put;
2825 c->idct_add= ff_jref_idct1_add;
2826 c->idct = j_rev_dct1;
2827 c->idct_permutation_type= FF_NO_IDCT_PERM;
2829 if (avctx->bits_per_raw_sample == 10) {
2830 c->idct_put = ff_simple_idct_put_10;
2831 c->idct_add = ff_simple_idct_add_10;
2832 c->idct = ff_simple_idct_10;
2833 c->idct_permutation_type = FF_NO_IDCT_PERM;
2835 if(avctx->idct_algo==FF_IDCT_INT){
2836 c->idct_put= ff_jref_idct_put;
2837 c->idct_add= ff_jref_idct_add;
2838 c->idct = j_rev_dct;
2839 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
2840 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
2841 avctx->idct_algo==FF_IDCT_VP3){
2842 c->idct_put= ff_vp3_idct_put_c;
2843 c->idct_add= ff_vp3_idct_add_c;
2844 c->idct = ff_vp3_idct_c;
2845 c->idct_permutation_type= FF_NO_IDCT_PERM;
2846 }else if(avctx->idct_algo==FF_IDCT_WMV2){
2847 c->idct_put= ff_wmv2_idct_put_c;
2848 c->idct_add= ff_wmv2_idct_add_c;
2849 c->idct = ff_wmv2_idct_c;
2850 c->idct_permutation_type= FF_NO_IDCT_PERM;
2851 }else if(avctx->idct_algo==FF_IDCT_FAAN){
2852 c->idct_put= ff_faanidct_put;
2853 c->idct_add= ff_faanidct_add;
2854 c->idct = ff_faanidct;
2855 c->idct_permutation_type= FF_NO_IDCT_PERM;
2856 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
2857 c->idct_put= ff_ea_idct_put_c;
2858 c->idct_permutation_type= FF_NO_IDCT_PERM;
2859 }else{ //accurate/default
2860 c->idct_put = ff_simple_idct_put_8;
2861 c->idct_add = ff_simple_idct_add_8;
2862 c->idct = ff_simple_idct_8;
2863 c->idct_permutation_type= FF_NO_IDCT_PERM;
2868 c->diff_pixels = diff_pixels_c;
2869 c->put_pixels_clamped = ff_put_pixels_clamped_c;
2870 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
2871 c->add_pixels_clamped = ff_add_pixels_clamped_c;
2872 c->sum_abs_dctelem = sum_abs_dctelem_c;
2875 c->pix_sum = pix_sum_c;
2876 c->pix_norm1 = pix_norm1_c;
2878 c->fill_block_tab[0] = fill_block16_c;
2879 c->fill_block_tab[1] = fill_block8_c;
2881 /* TODO [0] 16 [1] 8 */
2882 c->pix_abs[0][0] = pix_abs16_c;
2883 c->pix_abs[0][1] = pix_abs16_x2_c;
2884 c->pix_abs[0][2] = pix_abs16_y2_c;
2885 c->pix_abs[0][3] = pix_abs16_xy2_c;
2886 c->pix_abs[1][0] = pix_abs8_c;
2887 c->pix_abs[1][1] = pix_abs8_x2_c;
2888 c->pix_abs[1][2] = pix_abs8_y2_c;
2889 c->pix_abs[1][3] = pix_abs8_xy2_c;
2891 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2892 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2893 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
2894 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
2895 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
2896 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
2897 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
2898 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
2899 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2901 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
2902 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
2903 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
2904 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
2905 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
2906 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
2907 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
2908 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
2909 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
2911 #define dspfunc(PFX, IDX, NUM) \
2912 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2913 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2914 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2915 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2916 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2917 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2918 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2919 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2920 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2921 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2922 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2923 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2924 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2925 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2926 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2927 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2929 dspfunc(put_qpel, 0, 16);
2930 dspfunc(put_no_rnd_qpel, 0, 16);
2932 dspfunc(avg_qpel, 0, 16);
2933 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2935 dspfunc(put_qpel, 1, 8);
2936 dspfunc(put_no_rnd_qpel, 1, 8);
2938 dspfunc(avg_qpel, 1, 8);
2939 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2943 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
2944 ff_mlp_init(c, avctx);
2946 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
2947 ff_intrax8dsp_init(c,avctx);
2950 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
2951 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
2952 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
2953 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
2954 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
2955 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
2956 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
2957 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
2959 #define SET_CMP_FUNC(name) \
2960 c->name[0]= name ## 16_c;\
2961 c->name[1]= name ## 8x8_c;
2963 SET_CMP_FUNC(hadamard8_diff)
2964 c->hadamard8_diff[4]= hadamard8_intra16_c;
2965 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
2966 SET_CMP_FUNC(dct_sad)
2967 SET_CMP_FUNC(dct_max)
2969 SET_CMP_FUNC(dct264_sad)
2971 c->sad[0]= pix_abs16_c;
2972 c->sad[1]= pix_abs8_c;
2976 SET_CMP_FUNC(quant_psnr)
2979 c->vsad[0]= vsad16_c;
2980 c->vsad[4]= vsad_intra16_c;
2981 c->vsad[5]= vsad_intra8_c;
2982 c->vsse[0]= vsse16_c;
2983 c->vsse[4]= vsse_intra16_c;
2984 c->vsse[5]= vsse_intra8_c;
2985 c->nsse[0]= nsse16_c;
2986 c->nsse[1]= nsse8_c;
2988 ff_dsputil_init_dwt(c);
2991 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
2993 c->add_bytes= add_bytes_c;
2994 c->add_bytes_l2= add_bytes_l2_c;
2995 c->diff_bytes= diff_bytes_c;
2996 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
2997 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
2998 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
2999 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
3000 c->bswap_buf= bswap_buf;
3001 c->bswap16_buf = bswap16_buf;
3002 #if CONFIG_PNG_DECODER
3003 c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
3006 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
3007 c->h263_h_loop_filter= h263_h_loop_filter_c;
3008 c->h263_v_loop_filter= h263_v_loop_filter_c;
3011 if (CONFIG_VP3_DECODER) {
3012 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
3013 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
3014 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
3017 c->h261_loop_filter= h261_loop_filter_c;
3019 c->try_8x8basis= try_8x8basis_c;
3020 c->add_8x8basis= add_8x8basis_c;
3022 #if CONFIG_VORBIS_DECODER
3023 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
3025 #if CONFIG_AC3_DECODER
3026 c->ac3_downmix = ff_ac3_downmix_c;
3028 c->vector_fmul = vector_fmul_c;
3029 c->vector_fmul_reverse = vector_fmul_reverse_c;
3030 c->vector_fmul_add = vector_fmul_add_c;
3031 c->vector_fmul_window = vector_fmul_window_c;
3032 c->vector_clipf = vector_clipf_c;
3033 c->scalarproduct_int16 = scalarproduct_int16_c;
3034 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
3035 c->apply_window_int16 = apply_window_int16_c;
3036 c->vector_clip_int32 = vector_clip_int32_c;
3037 c->scalarproduct_float = scalarproduct_float_c;
3038 c->butterflies_float = butterflies_float_c;
3039 c->vector_fmul_scalar = vector_fmul_scalar_c;
3040 c->vector_fmac_scalar = vector_fmac_scalar_c;
3042 c->shrink[0]= av_image_copy_plane;
3043 c->shrink[1]= ff_shrink22;
3044 c->shrink[2]= ff_shrink44;
3045 c->shrink[3]= ff_shrink88;
3047 c->prefetch= just_return;
3049 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
3050 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
3054 #define FUNC(f, depth) f ## _ ## depth
3055 #define FUNCC(f, depth) f ## _ ## depth ## _c
3057 #define dspfunc1(PFX, IDX, NUM, depth)\
3058 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
3059 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
3060 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
3061 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
3063 #define dspfunc2(PFX, IDX, NUM, depth)\
3064 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
3065 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
3066 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
3067 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
3068 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
3069 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
3070 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
3071 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
3072 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
3073 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
3074 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
3075 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
3076 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
3077 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
3078 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
3079 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
3082 #define BIT_DEPTH_FUNCS(depth, dct)\
3083 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
3084 c->draw_edges = FUNCC(draw_edges , depth);\
3085 c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
3086 c->clear_block = FUNCC(clear_block ## dct , depth);\
3087 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
3088 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
3089 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
3090 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
3091 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
3093 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
3094 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
3095 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
3096 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
3097 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
3098 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
3100 dspfunc1(put , 0, 16, depth);\
3101 dspfunc1(put , 1, 8, depth);\
3102 dspfunc1(put , 2, 4, depth);\
3103 dspfunc1(put , 3, 2, depth);\
3104 dspfunc1(put_no_rnd, 0, 16, depth);\
3105 dspfunc1(put_no_rnd, 1, 8, depth);\
3106 dspfunc1(avg , 0, 16, depth);\
3107 dspfunc1(avg , 1, 8, depth);\
3108 dspfunc1(avg , 2, 4, depth);\
3109 dspfunc1(avg , 3, 2, depth);\
3110 dspfunc1(avg_no_rnd, 0, 16, depth);\
3111 dspfunc1(avg_no_rnd, 1, 8, depth);\
3113 dspfunc2(put_h264_qpel, 0, 16, depth);\
3114 dspfunc2(put_h264_qpel, 1, 8, depth);\
3115 dspfunc2(put_h264_qpel, 2, 4, depth);\
3116 dspfunc2(put_h264_qpel, 3, 2, depth);\
3117 dspfunc2(avg_h264_qpel, 0, 16, depth);\
3118 dspfunc2(avg_h264_qpel, 1, 8, depth);\
3119 dspfunc2(avg_h264_qpel, 2, 4, depth);
3121 switch (avctx->bits_per_raw_sample) {
3123 if (c->dct_bits == 32) {
3124 BIT_DEPTH_FUNCS(9, _32);
3126 BIT_DEPTH_FUNCS(9, _16);
3130 if (c->dct_bits == 32) {
3131 BIT_DEPTH_FUNCS(10, _32);
3133 BIT_DEPTH_FUNCS(10, _16);
3137 av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
3139 BIT_DEPTH_FUNCS(8, _16);
3144 if (HAVE_MMX) dsputil_init_mmx (c, avctx);
3145 if (ARCH_ARM) dsputil_init_arm (c, avctx);
3146 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx);
3147 if (HAVE_VIS) dsputil_init_vis (c, avctx);
3148 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx);
3149 if (ARCH_PPC) dsputil_init_ppc (c, avctx);
3150 if (HAVE_MMI) dsputil_init_mmi (c, avctx);
3151 if (ARCH_SH4) dsputil_init_sh4 (c, avctx);
3152 if (ARCH_BFIN) dsputil_init_bfin (c, avctx);
3154 for(i=0; i<64; i++){
3155 if(!c->put_2tap_qpel_pixels_tab[0][i])
3156 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
3157 if(!c->avg_2tap_qpel_pixels_tab[0][i])
3158 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
3161 ff_init_scantable_permutation(c->idct_permutation,
3162 c->idct_permutation_type);