3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 #include "libavutil/imgutils.h"
33 #include "simple_idct.h"
37 #include "mpegvideo.h"
42 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
43 uint32_t ff_squareTbl[512] = {0, };
46 #include "dsputil_template.c"
50 #include "dsputil_template.c"
54 #include "dsputil_template.c"
56 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
57 #define pb_7f (~0UL/255 * 0x7f)
58 #define pb_80 (~0UL/255 * 0x80)
60 const uint8_t ff_zigzag_direct[64] = {
61 0, 1, 8, 16, 9, 2, 3, 10,
62 17, 24, 32, 25, 18, 11, 4, 5,
63 12, 19, 26, 33, 40, 48, 41, 34,
64 27, 20, 13, 6, 7, 14, 21, 28,
65 35, 42, 49, 56, 57, 50, 43, 36,
66 29, 22, 15, 23, 30, 37, 44, 51,
67 58, 59, 52, 45, 38, 31, 39, 46,
68 53, 60, 61, 54, 47, 55, 62, 63
71 /* Specific zigzag scan for 248 idct. NOTE that unlike the
72 specification, we interleave the fields */
73 const uint8_t ff_zigzag248_direct[64] = {
74 0, 8, 1, 9, 16, 24, 2, 10,
75 17, 25, 32, 40, 48, 56, 33, 41,
76 18, 26, 3, 11, 4, 12, 19, 27,
77 34, 42, 49, 57, 50, 58, 35, 43,
78 20, 28, 5, 13, 6, 14, 21, 29,
79 36, 44, 51, 59, 52, 60, 37, 45,
80 22, 30, 7, 15, 23, 31, 38, 46,
81 53, 61, 54, 62, 39, 47, 55, 63,
84 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
85 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
87 const uint8_t ff_alternate_horizontal_scan[64] = {
88 0, 1, 2, 3, 8, 9, 16, 17,
89 10, 11, 4, 5, 6, 7, 15, 14,
90 13, 12, 19, 18, 24, 25, 32, 33,
91 26, 27, 20, 21, 22, 23, 28, 29,
92 30, 31, 34, 35, 40, 41, 48, 49,
93 42, 43, 36, 37, 38, 39, 44, 45,
94 46, 47, 50, 51, 56, 57, 58, 59,
95 52, 53, 54, 55, 60, 61, 62, 63,
98 const uint8_t ff_alternate_vertical_scan[64] = {
99 0, 8, 16, 24, 1, 9, 2, 10,
100 17, 25, 32, 40, 48, 56, 57, 49,
101 41, 33, 26, 18, 3, 11, 4, 12,
102 19, 27, 34, 42, 50, 58, 35, 43,
103 51, 59, 20, 28, 5, 13, 6, 14,
104 21, 29, 36, 44, 52, 60, 37, 45,
105 53, 61, 22, 30, 7, 15, 23, 31,
106 38, 46, 54, 62, 39, 47, 55, 63,
109 /* Input permutation for the simple_idct_mmx */
110 static const uint8_t simple_mmx_permutation[64]={
111 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
112 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
113 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
114 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
115 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
116 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
117 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
118 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
121 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
123 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
127 st->scantable= src_scantable;
131 j = src_scantable[i];
132 st->permutated[i] = permutation[j];
141 j = st->permutated[i];
143 st->raster_end[i]= end;
147 void ff_init_scantable_permutation(uint8_t *idct_permutation,
148 int idct_permutation_type)
152 switch(idct_permutation_type){
153 case FF_NO_IDCT_PERM:
155 idct_permutation[i]= i;
157 case FF_LIBMPEG2_IDCT_PERM:
159 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
161 case FF_SIMPLE_IDCT_PERM:
163 idct_permutation[i]= simple_mmx_permutation[i];
165 case FF_TRANSPOSE_IDCT_PERM:
167 idct_permutation[i]= ((i&7)<<3) | (i>>3);
169 case FF_PARTTRANS_IDCT_PERM:
171 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
173 case FF_SSE2_IDCT_PERM:
175 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
178 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
182 static int pix_sum_c(uint8_t * pix, int line_size)
187 for (i = 0; i < 16; i++) {
188 for (j = 0; j < 16; j += 8) {
199 pix += line_size - 16;
204 static int pix_norm1_c(uint8_t * pix, int line_size)
207 uint32_t *sq = ff_squareTbl + 256;
210 for (i = 0; i < 16; i++) {
211 for (j = 0; j < 16; j += 8) {
223 register uint64_t x=*(uint64_t*)pix;
225 s += sq[(x>>8)&0xff];
226 s += sq[(x>>16)&0xff];
227 s += sq[(x>>24)&0xff];
228 s += sq[(x>>32)&0xff];
229 s += sq[(x>>40)&0xff];
230 s += sq[(x>>48)&0xff];
231 s += sq[(x>>56)&0xff];
233 register uint32_t x=*(uint32_t*)pix;
235 s += sq[(x>>8)&0xff];
236 s += sq[(x>>16)&0xff];
237 s += sq[(x>>24)&0xff];
238 x=*(uint32_t*)(pix+4);
240 s += sq[(x>>8)&0xff];
241 s += sq[(x>>16)&0xff];
242 s += sq[(x>>24)&0xff];
247 pix += line_size - 16;
252 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
255 for(i=0; i+8<=w; i+=8){
256 dst[i+0]= av_bswap32(src[i+0]);
257 dst[i+1]= av_bswap32(src[i+1]);
258 dst[i+2]= av_bswap32(src[i+2]);
259 dst[i+3]= av_bswap32(src[i+3]);
260 dst[i+4]= av_bswap32(src[i+4]);
261 dst[i+5]= av_bswap32(src[i+5]);
262 dst[i+6]= av_bswap32(src[i+6]);
263 dst[i+7]= av_bswap32(src[i+7]);
266 dst[i+0]= av_bswap32(src[i+0]);
270 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
273 *dst++ = av_bswap16(*src++);
276 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
279 uint32_t *sq = ff_squareTbl + 256;
282 for (i = 0; i < h; i++) {
283 s += sq[pix1[0] - pix2[0]];
284 s += sq[pix1[1] - pix2[1]];
285 s += sq[pix1[2] - pix2[2]];
286 s += sq[pix1[3] - pix2[3]];
293 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
296 uint32_t *sq = ff_squareTbl + 256;
299 for (i = 0; i < h; i++) {
300 s += sq[pix1[0] - pix2[0]];
301 s += sq[pix1[1] - pix2[1]];
302 s += sq[pix1[2] - pix2[2]];
303 s += sq[pix1[3] - pix2[3]];
304 s += sq[pix1[4] - pix2[4]];
305 s += sq[pix1[5] - pix2[5]];
306 s += sq[pix1[6] - pix2[6]];
307 s += sq[pix1[7] - pix2[7]];
314 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
317 uint32_t *sq = ff_squareTbl + 256;
320 for (i = 0; i < h; i++) {
321 s += sq[pix1[ 0] - pix2[ 0]];
322 s += sq[pix1[ 1] - pix2[ 1]];
323 s += sq[pix1[ 2] - pix2[ 2]];
324 s += sq[pix1[ 3] - pix2[ 3]];
325 s += sq[pix1[ 4] - pix2[ 4]];
326 s += sq[pix1[ 5] - pix2[ 5]];
327 s += sq[pix1[ 6] - pix2[ 6]];
328 s += sq[pix1[ 7] - pix2[ 7]];
329 s += sq[pix1[ 8] - pix2[ 8]];
330 s += sq[pix1[ 9] - pix2[ 9]];
331 s += sq[pix1[10] - pix2[10]];
332 s += sq[pix1[11] - pix2[11]];
333 s += sq[pix1[12] - pix2[12]];
334 s += sq[pix1[13] - pix2[13]];
335 s += sq[pix1[14] - pix2[14]];
336 s += sq[pix1[15] - pix2[15]];
344 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
345 const uint8_t *s2, int stride){
348 /* read the pixels */
350 block[0] = s1[0] - s2[0];
351 block[1] = s1[1] - s2[1];
352 block[2] = s1[2] - s2[2];
353 block[3] = s1[3] - s2[3];
354 block[4] = s1[4] - s2[4];
355 block[5] = s1[5] - s2[5];
356 block[6] = s1[6] - s2[6];
357 block[7] = s1[7] - s2[7];
365 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
369 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
371 /* read the pixels */
373 pixels[0] = cm[block[0]];
374 pixels[1] = cm[block[1]];
375 pixels[2] = cm[block[2]];
376 pixels[3] = cm[block[3]];
377 pixels[4] = cm[block[4]];
378 pixels[5] = cm[block[5]];
379 pixels[6] = cm[block[6]];
380 pixels[7] = cm[block[7]];
387 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
391 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
393 /* read the pixels */
395 pixels[0] = cm[block[0]];
396 pixels[1] = cm[block[1]];
397 pixels[2] = cm[block[2]];
398 pixels[3] = cm[block[3]];
405 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
409 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
411 /* read the pixels */
413 pixels[0] = cm[block[0]];
414 pixels[1] = cm[block[1]];
421 void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
422 uint8_t *restrict pixels,
427 for (i = 0; i < 8; i++) {
428 for (j = 0; j < 8; j++) {
431 else if (*block > 127)
434 *pixels = (uint8_t)(*block + 128);
438 pixels += (line_size - 8);
442 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
446 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
448 /* read the pixels */
450 pixels[0] = cm[pixels[0] + block[0]];
451 pixels[1] = cm[pixels[1] + block[1]];
452 pixels[2] = cm[pixels[2] + block[2]];
453 pixels[3] = cm[pixels[3] + block[3]];
454 pixels[4] = cm[pixels[4] + block[4]];
455 pixels[5] = cm[pixels[5] + block[5]];
456 pixels[6] = cm[pixels[6] + block[6]];
457 pixels[7] = cm[pixels[7] + block[7]];
463 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
467 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
469 /* read the pixels */
471 pixels[0] = cm[pixels[0] + block[0]];
472 pixels[1] = cm[pixels[1] + block[1]];
473 pixels[2] = cm[pixels[2] + block[2]];
474 pixels[3] = cm[pixels[3] + block[3]];
480 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
484 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
486 /* read the pixels */
488 pixels[0] = cm[pixels[0] + block[0]];
489 pixels[1] = cm[pixels[1] + block[1]];
495 static int sum_abs_dctelem_c(DCTELEM *block)
499 sum+= FFABS(block[i]);
503 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
507 for (i = 0; i < h; i++) {
508 memset(block, value, 16);
513 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
517 for (i = 0; i < h; i++) {
518 memset(block, value, 8);
523 #define avg2(a,b) ((a+b+1)>>1)
524 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
526 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
528 const int A=(16-x16)*(16-y16);
529 const int B=( x16)*(16-y16);
530 const int C=(16-x16)*( y16);
531 const int D=( x16)*( y16);
536 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
537 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
538 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
539 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
540 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
541 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
542 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
543 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
549 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
550 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
553 const int s= 1<<shift;
563 for(x=0; x<8; x++){ //XXX FIXME optimize
564 int src_x, src_y, frac_x, frac_y, index;
573 if((unsigned)src_x < width){
574 if((unsigned)src_y < height){
575 index= src_x + src_y*stride;
576 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
577 + src[index +1]* frac_x )*(s-frac_y)
578 + ( src[index+stride ]*(s-frac_x)
579 + src[index+stride+1]* frac_x )* frac_y
582 index= src_x + av_clip(src_y, 0, height)*stride;
583 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
584 + src[index +1]* frac_x )*s
588 if((unsigned)src_y < height){
589 index= av_clip(src_x, 0, width) + src_y*stride;
590 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
591 + src[index+stride ]* frac_y )*s
594 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
595 dst[y*stride + x]= src[index ];
607 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
609 case 2: put_pixels2_8_c (dst, src, stride, height); break;
610 case 4: put_pixels4_8_c (dst, src, stride, height); break;
611 case 8: put_pixels8_8_c (dst, src, stride, height); break;
612 case 16:put_pixels16_8_c(dst, src, stride, height); break;
616 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
618 for (i=0; i < height; i++) {
619 for (j=0; j < width; j++) {
620 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
627 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
629 for (i=0; i < height; i++) {
630 for (j=0; j < width; j++) {
631 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
638 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
640 for (i=0; i < height; i++) {
641 for (j=0; j < width; j++) {
642 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
649 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
651 for (i=0; i < height; i++) {
652 for (j=0; j < width; j++) {
653 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
660 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
662 for (i=0; i < height; i++) {
663 for (j=0; j < width; j++) {
664 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
671 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
673 for (i=0; i < height; i++) {
674 for (j=0; j < width; j++) {
675 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
682 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
684 for (i=0; i < height; i++) {
685 for (j=0; j < width; j++) {
686 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
693 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
695 for (i=0; i < height; i++) {
696 for (j=0; j < width; j++) {
697 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
704 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
706 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
707 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
708 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
709 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
713 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
715 for (i=0; i < height; i++) {
716 for (j=0; j < width; j++) {
717 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
724 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
726 for (i=0; i < height; i++) {
727 for (j=0; j < width; j++) {
728 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
735 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
737 for (i=0; i < height; i++) {
738 for (j=0; j < width; j++) {
739 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
746 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
748 for (i=0; i < height; i++) {
749 for (j=0; j < width; j++) {
750 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
757 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
759 for (i=0; i < height; i++) {
760 for (j=0; j < width; j++) {
761 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
768 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
770 for (i=0; i < height; i++) {
771 for (j=0; j < width; j++) {
772 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
779 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
781 for (i=0; i < height; i++) {
782 for (j=0; j < width; j++) {
783 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
790 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
792 for (i=0; i < height; i++) {
793 for (j=0; j < width; j++) {
794 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
801 #define QPEL_MC(r, OPNAME, RND, OP) \
802 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
803 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
807 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
808 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
809 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
810 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
811 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
812 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
813 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
814 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
820 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
822 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
826 const int src0= src[0*srcStride];\
827 const int src1= src[1*srcStride];\
828 const int src2= src[2*srcStride];\
829 const int src3= src[3*srcStride];\
830 const int src4= src[4*srcStride];\
831 const int src5= src[5*srcStride];\
832 const int src6= src[6*srcStride];\
833 const int src7= src[7*srcStride];\
834 const int src8= src[8*srcStride];\
835 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
836 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
837 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
838 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
839 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
840 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
841 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
842 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
848 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
849 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
854 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
855 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
856 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
857 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
858 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
859 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
860 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
861 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
862 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
863 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
864 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
865 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
866 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
867 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
868 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
869 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
875 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
876 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
881 const int src0= src[0*srcStride];\
882 const int src1= src[1*srcStride];\
883 const int src2= src[2*srcStride];\
884 const int src3= src[3*srcStride];\
885 const int src4= src[4*srcStride];\
886 const int src5= src[5*srcStride];\
887 const int src6= src[6*srcStride];\
888 const int src7= src[7*srcStride];\
889 const int src8= src[8*srcStride];\
890 const int src9= src[9*srcStride];\
891 const int src10= src[10*srcStride];\
892 const int src11= src[11*srcStride];\
893 const int src12= src[12*srcStride];\
894 const int src13= src[13*srcStride];\
895 const int src14= src[14*srcStride];\
896 const int src15= src[15*srcStride];\
897 const int src16= src[16*srcStride];\
898 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
899 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
900 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
901 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
902 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
903 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
904 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
905 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
906 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
907 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
908 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
909 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
910 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
911 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
912 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
913 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
919 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
921 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
922 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
925 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
926 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
929 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
931 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
932 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
935 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
938 copy_block9(full, src, 16, stride, 9);\
939 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
940 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
943 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
945 copy_block9(full, src, 16, stride, 9);\
946 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
949 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
952 copy_block9(full, src, 16, stride, 9);\
953 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
954 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
956 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
961 copy_block9(full, src, 16, stride, 9);\
962 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
963 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
964 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
965 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
967 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
971 copy_block9(full, src, 16, stride, 9);\
972 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
973 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
974 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
975 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
977 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
982 copy_block9(full, src, 16, stride, 9);\
983 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
984 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
985 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
986 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
988 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
992 copy_block9(full, src, 16, stride, 9);\
993 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
994 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
995 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
996 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
998 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1002 uint8_t halfHV[64];\
1003 copy_block9(full, src, 16, stride, 9);\
1004 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1005 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1006 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1007 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1009 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1010 uint8_t full[16*9];\
1012 uint8_t halfHV[64];\
1013 copy_block9(full, src, 16, stride, 9);\
1014 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1015 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1016 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1017 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1019 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1020 uint8_t full[16*9];\
1023 uint8_t halfHV[64];\
1024 copy_block9(full, src, 16, stride, 9);\
1025 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
1026 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1027 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1028 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1030 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1031 uint8_t full[16*9];\
1033 uint8_t halfHV[64];\
1034 copy_block9(full, src, 16, stride, 9);\
1035 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1036 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1037 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1038 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1040 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1042 uint8_t halfHV[64];\
1043 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1044 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1045 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1047 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1049 uint8_t halfHV[64];\
1050 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1051 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1052 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1054 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1055 uint8_t full[16*9];\
1058 uint8_t halfHV[64];\
1059 copy_block9(full, src, 16, stride, 9);\
1060 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1061 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1062 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1063 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1065 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1066 uint8_t full[16*9];\
1068 copy_block9(full, src, 16, stride, 9);\
1069 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1070 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1071 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1073 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1074 uint8_t full[16*9];\
1077 uint8_t halfHV[64];\
1078 copy_block9(full, src, 16, stride, 9);\
1079 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1080 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1081 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1082 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1084 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1085 uint8_t full[16*9];\
1087 copy_block9(full, src, 16, stride, 9);\
1088 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1089 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1090 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1092 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1094 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1095 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1098 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1100 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1101 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1104 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1105 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1108 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1110 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1111 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1114 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1115 uint8_t full[24*17];\
1117 copy_block17(full, src, 24, stride, 17);\
1118 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1119 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1122 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1123 uint8_t full[24*17];\
1124 copy_block17(full, src, 24, stride, 17);\
1125 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1128 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1129 uint8_t full[24*17];\
1131 copy_block17(full, src, 24, stride, 17);\
1132 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1133 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1135 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1136 uint8_t full[24*17];\
1137 uint8_t halfH[272];\
1138 uint8_t halfV[256];\
1139 uint8_t halfHV[256];\
1140 copy_block17(full, src, 24, stride, 17);\
1141 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1142 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1143 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1144 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1146 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1147 uint8_t full[24*17];\
1148 uint8_t halfH[272];\
1149 uint8_t halfHV[256];\
1150 copy_block17(full, src, 24, stride, 17);\
1151 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1152 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1153 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1154 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1156 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1157 uint8_t full[24*17];\
1158 uint8_t halfH[272];\
1159 uint8_t halfV[256];\
1160 uint8_t halfHV[256];\
1161 copy_block17(full, src, 24, stride, 17);\
1162 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1163 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1164 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1165 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1167 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1168 uint8_t full[24*17];\
1169 uint8_t halfH[272];\
1170 uint8_t halfHV[256];\
1171 copy_block17(full, src, 24, stride, 17);\
1172 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1173 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1174 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1175 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1177 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1178 uint8_t full[24*17];\
1179 uint8_t halfH[272];\
1180 uint8_t halfV[256];\
1181 uint8_t halfHV[256];\
1182 copy_block17(full, src, 24, stride, 17);\
1183 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1184 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1185 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1186 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1188 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1189 uint8_t full[24*17];\
1190 uint8_t halfH[272];\
1191 uint8_t halfHV[256];\
1192 copy_block17(full, src, 24, stride, 17);\
1193 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1194 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1195 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1196 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1198 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1199 uint8_t full[24*17];\
1200 uint8_t halfH[272];\
1201 uint8_t halfV[256];\
1202 uint8_t halfHV[256];\
1203 copy_block17(full, src, 24, stride, 17);\
1204 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1205 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1206 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1207 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1209 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1210 uint8_t full[24*17];\
1211 uint8_t halfH[272];\
1212 uint8_t halfHV[256];\
1213 copy_block17(full, src, 24, stride, 17);\
1214 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1215 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1216 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1217 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1219 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1220 uint8_t halfH[272];\
1221 uint8_t halfHV[256];\
1222 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1223 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1224 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1226 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1227 uint8_t halfH[272];\
1228 uint8_t halfHV[256];\
1229 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1230 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1231 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1233 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1234 uint8_t full[24*17];\
1235 uint8_t halfH[272];\
1236 uint8_t halfV[256];\
1237 uint8_t halfHV[256];\
1238 copy_block17(full, src, 24, stride, 17);\
1239 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1240 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1241 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1242 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1244 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1245 uint8_t full[24*17];\
1246 uint8_t halfH[272];\
1247 copy_block17(full, src, 24, stride, 17);\
1248 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1249 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1250 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1252 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1253 uint8_t full[24*17];\
1254 uint8_t halfH[272];\
1255 uint8_t halfV[256];\
1256 uint8_t halfHV[256];\
1257 copy_block17(full, src, 24, stride, 17);\
1258 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1259 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1260 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1261 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1263 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1264 uint8_t full[24*17];\
1265 uint8_t halfH[272];\
1266 copy_block17(full, src, 24, stride, 17);\
1267 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1268 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1269 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1271 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1272 uint8_t halfH[272];\
1273 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1274 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1277 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1278 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1279 #define op_put(a, b) a = cm[((b) + 16)>>5]
1280 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1282 QPEL_MC(0, put_ , _ , op_put)
1283 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1284 QPEL_MC(0, avg_ , _ , op_avg)
1285 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
1287 #undef op_avg_no_rnd
1289 #undef op_put_no_rnd
1291 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1292 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1293 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1294 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1295 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1296 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1298 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1299 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1303 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1304 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1305 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1306 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1307 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1308 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1309 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1310 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1316 #if CONFIG_RV40_DECODER
1317 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1318 put_pixels16_xy2_8_c(dst, src, stride, 16);
1320 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1321 avg_pixels16_xy2_8_c(dst, src, stride, 16);
1323 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1324 put_pixels8_xy2_8_c(dst, src, stride, 8);
1326 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1327 avg_pixels8_xy2_8_c(dst, src, stride, 8);
1329 #endif /* CONFIG_RV40_DECODER */
1331 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1332 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1336 const int src_1= src[ -srcStride];
1337 const int src0 = src[0 ];
1338 const int src1 = src[ srcStride];
1339 const int src2 = src[2*srcStride];
1340 const int src3 = src[3*srcStride];
1341 const int src4 = src[4*srcStride];
1342 const int src5 = src[5*srcStride];
1343 const int src6 = src[6*srcStride];
1344 const int src7 = src[7*srcStride];
1345 const int src8 = src[8*srcStride];
1346 const int src9 = src[9*srcStride];
1347 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1348 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1349 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1350 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1351 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1352 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1353 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1354 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1360 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1362 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1363 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1366 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1367 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1370 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1372 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1373 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1376 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1377 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1380 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1384 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1385 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1386 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1387 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1389 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1393 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1394 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1395 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1396 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1398 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1400 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1401 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1404 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
1405 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1407 const int strength= ff_h263_loop_filter_strength[qscale];
1411 int p0= src[x-2*stride];
1412 int p1= src[x-1*stride];
1413 int p2= src[x+0*stride];
1414 int p3= src[x+1*stride];
1415 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1417 if (d<-2*strength) d1= 0;
1418 else if(d<- strength) d1=-2*strength - d;
1419 else if(d< strength) d1= d;
1420 else if(d< 2*strength) d1= 2*strength - d;
1425 if(p1&256) p1= ~(p1>>31);
1426 if(p2&256) p2= ~(p2>>31);
1428 src[x-1*stride] = p1;
1429 src[x+0*stride] = p2;
1433 d2= av_clip((p0-p3)/4, -ad1, ad1);
1435 src[x-2*stride] = p0 - d2;
1436 src[x+ stride] = p3 + d2;
1441 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
1442 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1444 const int strength= ff_h263_loop_filter_strength[qscale];
1448 int p0= src[y*stride-2];
1449 int p1= src[y*stride-1];
1450 int p2= src[y*stride+0];
1451 int p3= src[y*stride+1];
1452 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1454 if (d<-2*strength) d1= 0;
1455 else if(d<- strength) d1=-2*strength - d;
1456 else if(d< strength) d1= d;
1457 else if(d< 2*strength) d1= 2*strength - d;
1462 if(p1&256) p1= ~(p1>>31);
1463 if(p2&256) p2= ~(p2>>31);
1465 src[y*stride-1] = p1;
1466 src[y*stride+0] = p2;
1470 d2= av_clip((p0-p3)/4, -ad1, ad1);
1472 src[y*stride-2] = p0 - d2;
1473 src[y*stride+1] = p3 + d2;
1478 static void h261_loop_filter_c(uint8_t *src, int stride){
1483 temp[x ] = 4*src[x ];
1484 temp[x + 7*8] = 4*src[x + 7*stride];
1488 xy = y * stride + x;
1490 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
1495 src[ y*stride] = (temp[ y*8] + 2)>>2;
1496 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
1498 xy = y * stride + x;
1500 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
1505 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1511 s += abs(pix1[0] - pix2[0]);
1512 s += abs(pix1[1] - pix2[1]);
1513 s += abs(pix1[2] - pix2[2]);
1514 s += abs(pix1[3] - pix2[3]);
1515 s += abs(pix1[4] - pix2[4]);
1516 s += abs(pix1[5] - pix2[5]);
1517 s += abs(pix1[6] - pix2[6]);
1518 s += abs(pix1[7] - pix2[7]);
1519 s += abs(pix1[8] - pix2[8]);
1520 s += abs(pix1[9] - pix2[9]);
1521 s += abs(pix1[10] - pix2[10]);
1522 s += abs(pix1[11] - pix2[11]);
1523 s += abs(pix1[12] - pix2[12]);
1524 s += abs(pix1[13] - pix2[13]);
1525 s += abs(pix1[14] - pix2[14]);
1526 s += abs(pix1[15] - pix2[15]);
1533 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1539 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1540 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1541 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1542 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1543 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1544 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1545 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1546 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1547 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1548 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1549 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1550 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1551 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1552 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1553 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1554 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1561 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1564 uint8_t *pix3 = pix2 + line_size;
1568 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1569 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1570 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1571 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1572 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1573 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1574 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1575 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1576 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1577 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1578 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1579 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1580 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1581 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1582 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1583 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1591 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1594 uint8_t *pix3 = pix2 + line_size;
1598 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1599 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1600 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1601 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1602 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1603 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1604 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1605 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1606 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1607 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1608 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1609 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1610 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1611 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1612 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1613 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1621 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1627 s += abs(pix1[0] - pix2[0]);
1628 s += abs(pix1[1] - pix2[1]);
1629 s += abs(pix1[2] - pix2[2]);
1630 s += abs(pix1[3] - pix2[3]);
1631 s += abs(pix1[4] - pix2[4]);
1632 s += abs(pix1[5] - pix2[5]);
1633 s += abs(pix1[6] - pix2[6]);
1634 s += abs(pix1[7] - pix2[7]);
1641 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1647 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1648 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1649 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1650 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1651 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1652 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1653 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1654 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1661 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1664 uint8_t *pix3 = pix2 + line_size;
1668 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1669 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1670 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1671 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1672 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1673 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1674 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1675 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1683 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1686 uint8_t *pix3 = pix2 + line_size;
1690 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1691 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1692 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1693 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1694 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1695 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1696 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1697 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1705 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1706 MpegEncContext *c = v;
1712 for(x=0; x<16; x++){
1713 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1716 for(x=0; x<15; x++){
1717 score2+= FFABS( s1[x ] - s1[x +stride]
1718 - s1[x+1] + s1[x+1+stride])
1719 -FFABS( s2[x ] - s2[x +stride]
1720 - s2[x+1] + s2[x+1+stride]);
1727 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1728 else return score1 + FFABS(score2)*8;
1731 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1732 MpegEncContext *c = v;
1739 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1743 score2+= FFABS( s1[x ] - s1[x +stride]
1744 - s1[x+1] + s1[x+1+stride])
1745 -FFABS( s2[x ] - s2[x +stride]
1746 - s2[x+1] + s2[x+1+stride]);
1753 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1754 else return score1 + FFABS(score2)*8;
1757 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1761 for(i=0; i<8*8; i++){
1762 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1765 assert(-512<b && b<512);
1767 sum += (w*b)*(w*b)>>4;
1772 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1775 for(i=0; i<8*8; i++){
1776 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
1781 * permutes an 8x8 block.
1782 * @param block the block which will be permuted according to the given permutation vector
1783 * @param permutation the permutation vector
1784 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
1785 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
1786 * (inverse) permutated to scantable order!
1788 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
1794 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
1796 for(i=0; i<=last; i++){
1797 const int j= scantable[i];
1802 for(i=0; i<=last; i++){
1803 const int j= scantable[i];
1804 const int perm_j= permutation[j];
1805 block[perm_j]= temp[j];
1809 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1813 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
1816 memset(cmp, 0, sizeof(void*)*6);
1824 cmp[i]= c->hadamard8_diff[i];
1830 cmp[i]= c->dct_sad[i];
1833 cmp[i]= c->dct264_sad[i];
1836 cmp[i]= c->dct_max[i];
1839 cmp[i]= c->quant_psnr[i];
1868 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1873 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1875 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1876 long a = *(long*)(src+i);
1877 long b = *(long*)(dst+i);
1878 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
1881 dst[i+0] += src[i+0];
1884 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
1886 #if !HAVE_FAST_UNALIGNED
1887 if((long)src2 & (sizeof(long)-1)){
1888 for(i=0; i+7<w; i+=8){
1889 dst[i+0] = src1[i+0]-src2[i+0];
1890 dst[i+1] = src1[i+1]-src2[i+1];
1891 dst[i+2] = src1[i+2]-src2[i+2];
1892 dst[i+3] = src1[i+3]-src2[i+3];
1893 dst[i+4] = src1[i+4]-src2[i+4];
1894 dst[i+5] = src1[i+5]-src2[i+5];
1895 dst[i+6] = src1[i+6]-src2[i+6];
1896 dst[i+7] = src1[i+7]-src2[i+7];
1900 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1901 long a = *(long*)(src1+i);
1902 long b = *(long*)(src2+i);
1903 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1906 dst[i+0] = src1[i+0]-src2[i+0];
1909 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
1917 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1926 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
1934 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1944 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
1947 for(i=0; i<w-1; i++){
1974 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
2004 #define BUTTERFLY2(o1,o2,i1,i2) \
2008 #define BUTTERFLY1(x,y) \
2017 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
2019 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
2027 //FIXME try pointer walks
2028 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2029 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2030 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2031 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2033 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2034 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2035 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2036 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2038 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2039 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2040 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2041 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2045 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2046 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2047 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2048 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2050 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2051 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2052 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2053 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2056 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2057 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2058 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2059 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2064 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
2072 //FIXME try pointer walks
2073 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2074 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2075 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2076 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
2078 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2079 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2080 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2081 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2083 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2084 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2085 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2086 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2090 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2091 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2092 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2093 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2095 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2096 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2097 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2098 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2101 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2102 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2103 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2104 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2107 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
2112 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2113 MpegEncContext * const s= (MpegEncContext *)c;
2114 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2118 s->dsp.diff_pixels(temp, src1, src2, stride);
2120 return s->dsp.sum_abs_dctelem(temp);
2125 const int s07 = SRC(0) + SRC(7);\
2126 const int s16 = SRC(1) + SRC(6);\
2127 const int s25 = SRC(2) + SRC(5);\
2128 const int s34 = SRC(3) + SRC(4);\
2129 const int a0 = s07 + s34;\
2130 const int a1 = s16 + s25;\
2131 const int a2 = s07 - s34;\
2132 const int a3 = s16 - s25;\
2133 const int d07 = SRC(0) - SRC(7);\
2134 const int d16 = SRC(1) - SRC(6);\
2135 const int d25 = SRC(2) - SRC(5);\
2136 const int d34 = SRC(3) - SRC(4);\
2137 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2138 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2139 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2140 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2142 DST(1, a4 + (a7>>2)) ;\
2143 DST(2, a2 + (a3>>1)) ;\
2144 DST(3, a5 + (a6>>2)) ;\
2146 DST(5, a6 - (a5>>2)) ;\
2147 DST(6, (a2>>1) - a3 ) ;\
2148 DST(7, (a4>>2) - a7 ) ;\
2151 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2152 MpegEncContext * const s= (MpegEncContext *)c;
2157 s->dsp.diff_pixels(dct[0], src1, src2, stride);
2159 #define SRC(x) dct[i][x]
2160 #define DST(x,v) dct[i][x]= v
2161 for( i = 0; i < 8; i++ )
2166 #define SRC(x) dct[x][i]
2167 #define DST(x,v) sum += FFABS(v)
2168 for( i = 0; i < 8; i++ )
2176 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2177 MpegEncContext * const s= (MpegEncContext *)c;
2178 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2183 s->dsp.diff_pixels(temp, src1, src2, stride);
2187 sum= FFMAX(sum, FFABS(temp[i]));
2192 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2193 MpegEncContext * const s= (MpegEncContext *)c;
2194 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
2195 DCTELEM * const bak = temp+64;
2201 s->dsp.diff_pixels(temp, src1, src2, stride);
2203 memcpy(bak, temp, 64*sizeof(DCTELEM));
2205 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2206 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2207 ff_simple_idct_8(temp); //FIXME
2210 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2215 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2216 MpegEncContext * const s= (MpegEncContext *)c;
2217 const uint8_t *scantable= s->intra_scantable.permutated;
2218 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2219 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2220 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
2221 int i, last, run, bits, level, distortion, start_i;
2222 const int esc_length= s->ac_esc_length;
2224 uint8_t * last_length;
2228 copy_block8(lsrc1, src1, 8, stride, 8);
2229 copy_block8(lsrc2, src2, 8, stride, 8);
2231 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
2233 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2239 length = s->intra_ac_vlc_length;
2240 last_length= s->intra_ac_vlc_last_length;
2241 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2244 length = s->inter_ac_vlc_length;
2245 last_length= s->inter_ac_vlc_last_length;
2250 for(i=start_i; i<last; i++){
2251 int j= scantable[i];
2256 if((level&(~127)) == 0){
2257 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2266 level= temp[i] + 64;
2270 if((level&(~127)) == 0){
2271 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2279 s->dct_unquantize_intra(s, temp, 0, s->qscale);
2281 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2284 s->dsp.idct_add(lsrc2, 8, temp);
2286 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
2288 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
2291 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2292 MpegEncContext * const s= (MpegEncContext *)c;
2293 const uint8_t *scantable= s->intra_scantable.permutated;
2294 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2295 int i, last, run, bits, level, start_i;
2296 const int esc_length= s->ac_esc_length;
2298 uint8_t * last_length;
2302 s->dsp.diff_pixels(temp, src1, src2, stride);
2304 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2310 length = s->intra_ac_vlc_length;
2311 last_length= s->intra_ac_vlc_last_length;
2312 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2315 length = s->inter_ac_vlc_length;
2316 last_length= s->inter_ac_vlc_last_length;
2321 for(i=start_i; i<last; i++){
2322 int j= scantable[i];
2327 if((level&(~127)) == 0){
2328 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2337 level= temp[i] + 64;
2341 if((level&(~127)) == 0){
2342 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2350 #define VSAD_INTRA(size) \
2351 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2355 for(y=1; y<h; y++){ \
2356 for(x=0; x<size; x+=4){ \
2357 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2358 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2368 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2373 for(x=0; x<16; x++){
2374 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2383 #define SQ(a) ((a)*(a))
2384 #define VSSE_INTRA(size) \
2385 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2389 for(y=1; y<h; y++){ \
2390 for(x=0; x<size; x+=4){ \
2391 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2392 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2402 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2407 for(x=0; x<16; x++){
2408 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2417 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2421 for(i=0; i<size; i++)
2422 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2426 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2427 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2428 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2430 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
2432 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2433 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2434 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2435 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
2437 static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
2439 for(i=0; i<len; i++)
2440 dst[i] = src0[i] * src1[i];
2443 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
2446 for(i=0; i<len; i++)
2447 dst[i] = src0[i] * src1[-i];
2450 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
2452 for(i=0; i<len; i++)
2453 dst[i] = src0[i] * src1[i] + src2[i];
2456 static void vector_fmul_window_c(float *dst, const float *src0,
2457 const float *src1, const float *win, int len)
2463 for(i=-len, j=len-1; i<0; i++, j--) {
2468 dst[i] = s0*wj - s1*wi;
2469 dst[j] = s0*wi + s1*wj;
2473 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
2477 for (i = 0; i < len; i++)
2478 dst[i] = src[i] * mul;
2481 static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
2485 for (i = 0; i < len; i++)
2486 dst[i] += src[i] * mul;
2489 static void butterflies_float_c(float *restrict v1, float *restrict v2,
2493 for (i = 0; i < len; i++) {
2494 float t = v1[i] - v2[i];
2500 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
2505 for (i = 0; i < len; i++)
2511 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2512 uint32_t maxi, uint32_t maxisign)
2515 if(a > mini) return mini;
2516 else if((a^(1U<<31)) > maxisign) return maxi;
2520 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
2522 uint32_t mini = *(uint32_t*)min;
2523 uint32_t maxi = *(uint32_t*)max;
2524 uint32_t maxisign = maxi ^ (1U<<31);
2525 uint32_t *dsti = (uint32_t*)dst;
2526 const uint32_t *srci = (const uint32_t*)src;
2527 for(i=0; i<len; i+=8) {
2528 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2529 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2530 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2531 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2532 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2533 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2534 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2535 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2538 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
2540 if(min < 0 && max > 0) {
2541 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2543 for(i=0; i < len; i+=8) {
2544 dst[i ] = av_clipf(src[i ], min, max);
2545 dst[i + 1] = av_clipf(src[i + 1], min, max);
2546 dst[i + 2] = av_clipf(src[i + 2], min, max);
2547 dst[i + 3] = av_clipf(src[i + 3], min, max);
2548 dst[i + 4] = av_clipf(src[i + 4], min, max);
2549 dst[i + 5] = av_clipf(src[i + 5], min, max);
2550 dst[i + 6] = av_clipf(src[i + 6], min, max);
2551 dst[i + 7] = av_clipf(src[i + 7], min, max);
2556 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
2561 res += (*v1++ * *v2++) >> shift;
2566 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
2571 *v1++ += mul * *v3++;
2576 static void apply_window_int16_c(int16_t *output, const int16_t *input,
2577 const int16_t *window, unsigned int len)
2580 int len2 = len >> 1;
2582 for (i = 0; i < len2; i++) {
2583 int16_t w = window[i];
2584 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
2585 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2589 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2590 int32_t max, unsigned int len)
2593 *dst++ = av_clip(*src++, min, max);
2594 *dst++ = av_clip(*src++, min, max);
2595 *dst++ = av_clip(*src++, min, max);
2596 *dst++ = av_clip(*src++, min, max);
2597 *dst++ = av_clip(*src++, min, max);
2598 *dst++ = av_clip(*src++, min, max);
2599 *dst++ = av_clip(*src++, min, max);
2600 *dst++ = av_clip(*src++, min, max);
2606 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
2607 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
2608 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
2609 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
2610 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
2611 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
2612 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
2614 static void wmv2_idct_row(short * b)
2617 int a0,a1,a2,a3,a4,a5,a6,a7;
2619 a1 = W1*b[1]+W7*b[7];
2620 a7 = W7*b[1]-W1*b[7];
2621 a5 = W5*b[5]+W3*b[3];
2622 a3 = W3*b[5]-W5*b[3];
2623 a2 = W2*b[2]+W6*b[6];
2624 a6 = W6*b[2]-W2*b[6];
2625 a0 = W0*b[0]+W0*b[4];
2626 a4 = W0*b[0]-W0*b[4];
2628 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
2629 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2631 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
2632 b[1] = (a4+a6 +s1 + (1<<7))>>8;
2633 b[2] = (a4-a6 +s2 + (1<<7))>>8;
2634 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
2635 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
2636 b[5] = (a4-a6 -s2 + (1<<7))>>8;
2637 b[6] = (a4+a6 -s1 + (1<<7))>>8;
2638 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
2640 static void wmv2_idct_col(short * b)
2643 int a0,a1,a2,a3,a4,a5,a6,a7;
2644 /*step 1, with extended precision*/
2645 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
2646 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
2647 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
2648 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
2649 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
2650 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
2651 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
2652 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
2654 s1 = (181*(a1-a5+a7-a3)+128)>>8;
2655 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2657 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
2658 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
2659 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
2660 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
2662 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
2663 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
2664 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
2665 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
2667 void ff_wmv2_idct_c(short * block){
2671 wmv2_idct_row(block+i);
2674 wmv2_idct_col(block+i);
2677 /* XXX: those functions should be suppressed ASAP when all IDCTs are
2679 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
2681 ff_wmv2_idct_c(block);
2682 ff_put_pixels_clamped_c(block, dest, line_size);
2684 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
2686 ff_wmv2_idct_c(block);
2687 ff_add_pixels_clamped_c(block, dest, line_size);
2689 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
2692 ff_put_pixels_clamped_c(block, dest, line_size);
2694 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
2697 ff_add_pixels_clamped_c(block, dest, line_size);
2700 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
2703 put_pixels_clamped4_c(block, dest, line_size);
2705 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
2708 add_pixels_clamped4_c(block, dest, line_size);
2711 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
2714 put_pixels_clamped2_c(block, dest, line_size);
2716 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
2719 add_pixels_clamped2_c(block, dest, line_size);
2722 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
2724 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2726 dest[0] = cm[(block[0] + 4)>>3];
2728 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
2730 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2732 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
2735 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
2737 /* init static data */
2738 av_cold void dsputil_static_init(void)
2742 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
2743 for(i=0;i<MAX_NEG_CROP;i++) {
2745 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
2748 for(i=0;i<512;i++) {
2749 ff_squareTbl[i] = (i - 256) * (i - 256);
2752 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
2755 int ff_check_alignment(void){
2756 static int did_fail=0;
2757 LOCAL_ALIGNED_16(int, aligned, [4]);
2759 if((intptr_t)aligned & 15){
2761 #if HAVE_MMX || HAVE_ALTIVEC
2762 av_log(NULL, AV_LOG_ERROR,
2763 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2764 "and may be very slow or crash. This is not a bug in libavcodec,\n"
2765 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2766 "Do not report crashes to FFmpeg developers.\n");
2775 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
2779 ff_check_alignment();
2782 if (avctx->bits_per_raw_sample == 10) {
2783 c->fdct = ff_jpeg_fdct_islow_10;
2784 c->fdct248 = ff_fdct248_islow_10;
2786 if(avctx->dct_algo==FF_DCT_FASTINT) {
2787 c->fdct = fdct_ifast;
2788 c->fdct248 = fdct_ifast248;
2790 else if(avctx->dct_algo==FF_DCT_FAAN) {
2791 c->fdct = ff_faandct;
2792 c->fdct248 = ff_faandct248;
2795 c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
2796 c->fdct248 = ff_fdct248_islow_8;
2799 #endif //CONFIG_ENCODERS
2801 if(avctx->lowres==1){
2802 c->idct_put= ff_jref_idct4_put;
2803 c->idct_add= ff_jref_idct4_add;
2804 c->idct = j_rev_dct4;
2805 c->idct_permutation_type= FF_NO_IDCT_PERM;
2806 }else if(avctx->lowres==2){
2807 c->idct_put= ff_jref_idct2_put;
2808 c->idct_add= ff_jref_idct2_add;
2809 c->idct = j_rev_dct2;
2810 c->idct_permutation_type= FF_NO_IDCT_PERM;
2811 }else if(avctx->lowres==3){
2812 c->idct_put= ff_jref_idct1_put;
2813 c->idct_add= ff_jref_idct1_add;
2814 c->idct = j_rev_dct1;
2815 c->idct_permutation_type= FF_NO_IDCT_PERM;
2817 if (avctx->bits_per_raw_sample == 10) {
2818 c->idct_put = ff_simple_idct_put_10;
2819 c->idct_add = ff_simple_idct_add_10;
2820 c->idct = ff_simple_idct_10;
2821 c->idct_permutation_type = FF_NO_IDCT_PERM;
2823 if(avctx->idct_algo==FF_IDCT_INT){
2824 c->idct_put= ff_jref_idct_put;
2825 c->idct_add= ff_jref_idct_add;
2826 c->idct = j_rev_dct;
2827 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
2828 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
2829 avctx->idct_algo==FF_IDCT_VP3){
2830 c->idct_put= ff_vp3_idct_put_c;
2831 c->idct_add= ff_vp3_idct_add_c;
2832 c->idct = ff_vp3_idct_c;
2833 c->idct_permutation_type= FF_NO_IDCT_PERM;
2834 }else if(avctx->idct_algo==FF_IDCT_WMV2){
2835 c->idct_put= ff_wmv2_idct_put_c;
2836 c->idct_add= ff_wmv2_idct_add_c;
2837 c->idct = ff_wmv2_idct_c;
2838 c->idct_permutation_type= FF_NO_IDCT_PERM;
2839 }else if(avctx->idct_algo==FF_IDCT_FAAN){
2840 c->idct_put= ff_faanidct_put;
2841 c->idct_add= ff_faanidct_add;
2842 c->idct = ff_faanidct;
2843 c->idct_permutation_type= FF_NO_IDCT_PERM;
2844 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
2845 c->idct_put= ff_ea_idct_put_c;
2846 c->idct_permutation_type= FF_NO_IDCT_PERM;
2847 }else{ //accurate/default
2848 c->idct_put = ff_simple_idct_put_8;
2849 c->idct_add = ff_simple_idct_add_8;
2850 c->idct = ff_simple_idct_8;
2851 c->idct_permutation_type= FF_NO_IDCT_PERM;
2856 c->diff_pixels = diff_pixels_c;
2857 c->put_pixels_clamped = ff_put_pixels_clamped_c;
2858 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
2859 c->add_pixels_clamped = ff_add_pixels_clamped_c;
2860 c->sum_abs_dctelem = sum_abs_dctelem_c;
2863 c->pix_sum = pix_sum_c;
2864 c->pix_norm1 = pix_norm1_c;
2866 c->fill_block_tab[0] = fill_block16_c;
2867 c->fill_block_tab[1] = fill_block8_c;
2869 /* TODO [0] 16 [1] 8 */
2870 c->pix_abs[0][0] = pix_abs16_c;
2871 c->pix_abs[0][1] = pix_abs16_x2_c;
2872 c->pix_abs[0][2] = pix_abs16_y2_c;
2873 c->pix_abs[0][3] = pix_abs16_xy2_c;
2874 c->pix_abs[1][0] = pix_abs8_c;
2875 c->pix_abs[1][1] = pix_abs8_x2_c;
2876 c->pix_abs[1][2] = pix_abs8_y2_c;
2877 c->pix_abs[1][3] = pix_abs8_xy2_c;
2879 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2880 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2881 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
2882 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
2883 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
2884 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
2885 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
2886 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
2887 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2889 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
2890 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
2891 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
2892 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
2893 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
2894 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
2895 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
2896 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
2897 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
2899 #define dspfunc(PFX, IDX, NUM) \
2900 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2901 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2902 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2903 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2904 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2905 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2906 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2907 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2908 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2909 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2910 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2911 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2912 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2913 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2914 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2915 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2917 dspfunc(put_qpel, 0, 16);
2918 dspfunc(put_no_rnd_qpel, 0, 16);
2920 dspfunc(avg_qpel, 0, 16);
2921 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2923 dspfunc(put_qpel, 1, 8);
2924 dspfunc(put_no_rnd_qpel, 1, 8);
2926 dspfunc(avg_qpel, 1, 8);
2927 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2931 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
2932 ff_mlp_init(c, avctx);
2934 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
2935 ff_intrax8dsp_init(c,avctx);
2938 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
2939 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
2940 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
2941 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
2942 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
2943 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
2944 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
2945 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
2947 #define SET_CMP_FUNC(name) \
2948 c->name[0]= name ## 16_c;\
2949 c->name[1]= name ## 8x8_c;
2951 SET_CMP_FUNC(hadamard8_diff)
2952 c->hadamard8_diff[4]= hadamard8_intra16_c;
2953 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
2954 SET_CMP_FUNC(dct_sad)
2955 SET_CMP_FUNC(dct_max)
2957 SET_CMP_FUNC(dct264_sad)
2959 c->sad[0]= pix_abs16_c;
2960 c->sad[1]= pix_abs8_c;
2964 SET_CMP_FUNC(quant_psnr)
2967 c->vsad[0]= vsad16_c;
2968 c->vsad[4]= vsad_intra16_c;
2969 c->vsad[5]= vsad_intra8_c;
2970 c->vsse[0]= vsse16_c;
2971 c->vsse[4]= vsse_intra16_c;
2972 c->vsse[5]= vsse_intra8_c;
2973 c->nsse[0]= nsse16_c;
2974 c->nsse[1]= nsse8_c;
2976 ff_dsputil_init_dwt(c);
2979 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
2981 c->add_bytes= add_bytes_c;
2982 c->diff_bytes= diff_bytes_c;
2983 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
2984 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
2985 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
2986 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
2987 c->bswap_buf= bswap_buf;
2988 c->bswap16_buf = bswap16_buf;
2990 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
2991 c->h263_h_loop_filter= h263_h_loop_filter_c;
2992 c->h263_v_loop_filter= h263_v_loop_filter_c;
2995 if (CONFIG_VP3_DECODER) {
2996 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
2997 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
2998 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
3001 c->h261_loop_filter= h261_loop_filter_c;
3003 c->try_8x8basis= try_8x8basis_c;
3004 c->add_8x8basis= add_8x8basis_c;
3006 #if CONFIG_VORBIS_DECODER
3007 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
3009 #if CONFIG_AC3_DECODER
3010 c->ac3_downmix = ff_ac3_downmix_c;
3012 c->vector_fmul = vector_fmul_c;
3013 c->vector_fmul_reverse = vector_fmul_reverse_c;
3014 c->vector_fmul_add = vector_fmul_add_c;
3015 c->vector_fmul_window = vector_fmul_window_c;
3016 c->vector_clipf = vector_clipf_c;
3017 c->scalarproduct_int16 = scalarproduct_int16_c;
3018 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
3019 c->apply_window_int16 = apply_window_int16_c;
3020 c->vector_clip_int32 = vector_clip_int32_c;
3021 c->scalarproduct_float = scalarproduct_float_c;
3022 c->butterflies_float = butterflies_float_c;
3023 c->vector_fmul_scalar = vector_fmul_scalar_c;
3024 c->vector_fmac_scalar = vector_fmac_scalar_c;
3026 c->shrink[0]= av_image_copy_plane;
3027 c->shrink[1]= ff_shrink22;
3028 c->shrink[2]= ff_shrink44;
3029 c->shrink[3]= ff_shrink88;
3031 c->prefetch= just_return;
3033 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
3034 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
3038 #define FUNC(f, depth) f ## _ ## depth
3039 #define FUNCC(f, depth) f ## _ ## depth ## _c
3041 #define dspfunc1(PFX, IDX, NUM, depth)\
3042 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
3043 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
3044 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
3045 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
3047 #define dspfunc2(PFX, IDX, NUM, depth)\
3048 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
3049 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
3050 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
3051 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
3052 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
3053 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
3054 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
3055 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
3056 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
3057 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
3058 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
3059 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
3060 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
3061 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
3062 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
3063 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
3066 #define BIT_DEPTH_FUNCS(depth, dct)\
3067 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
3068 c->draw_edges = FUNCC(draw_edges , depth);\
3069 c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
3070 c->clear_block = FUNCC(clear_block ## dct , depth);\
3071 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
3072 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
3073 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
3074 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
3075 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
3077 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
3078 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
3079 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
3080 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
3081 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
3082 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
3084 dspfunc1(put , 0, 16, depth);\
3085 dspfunc1(put , 1, 8, depth);\
3086 dspfunc1(put , 2, 4, depth);\
3087 dspfunc1(put , 3, 2, depth);\
3088 dspfunc1(put_no_rnd, 0, 16, depth);\
3089 dspfunc1(put_no_rnd, 1, 8, depth);\
3090 dspfunc1(avg , 0, 16, depth);\
3091 dspfunc1(avg , 1, 8, depth);\
3092 dspfunc1(avg , 2, 4, depth);\
3093 dspfunc1(avg , 3, 2, depth);\
3094 dspfunc1(avg_no_rnd, 0, 16, depth);\
3095 dspfunc1(avg_no_rnd, 1, 8, depth);\
3097 dspfunc2(put_h264_qpel, 0, 16, depth);\
3098 dspfunc2(put_h264_qpel, 1, 8, depth);\
3099 dspfunc2(put_h264_qpel, 2, 4, depth);\
3100 dspfunc2(put_h264_qpel, 3, 2, depth);\
3101 dspfunc2(avg_h264_qpel, 0, 16, depth);\
3102 dspfunc2(avg_h264_qpel, 1, 8, depth);\
3103 dspfunc2(avg_h264_qpel, 2, 4, depth);
3105 switch (avctx->bits_per_raw_sample) {
3107 if (c->dct_bits == 32) {
3108 BIT_DEPTH_FUNCS(9, _32);
3110 BIT_DEPTH_FUNCS(9, _16);
3114 if (c->dct_bits == 32) {
3115 BIT_DEPTH_FUNCS(10, _32);
3117 BIT_DEPTH_FUNCS(10, _16);
3121 av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
3123 BIT_DEPTH_FUNCS(8, _16);
3128 if (HAVE_MMX) dsputil_init_mmx (c, avctx);
3129 if (ARCH_ARM) dsputil_init_arm (c, avctx);
3130 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx);
3131 if (HAVE_VIS) dsputil_init_vis (c, avctx);
3132 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx);
3133 if (ARCH_PPC) dsputil_init_ppc (c, avctx);
3134 if (HAVE_MMI) dsputil_init_mmi (c, avctx);
3135 if (ARCH_SH4) dsputil_init_sh4 (c, avctx);
3136 if (ARCH_BFIN) dsputil_init_bfin (c, avctx);
3138 for(i=0; i<64; i++){
3139 if(!c->put_2tap_qpel_pixels_tab[0][i])
3140 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
3141 if(!c->avg_2tap_qpel_pixels_tab[0][i])
3142 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
3145 ff_init_scantable_permutation(c->idct_permutation,
3146 c->idct_permutation_type);