3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of Libav.
10 * Libav is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * Libav is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with Libav; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 #include "libavutil/imgutils.h"
33 #include "simple_idct.h"
37 #include "mpegvideo.h"
41 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
42 uint32_t ff_squareTbl[512] = {0, };
45 #include "dsputil_template.c"
49 #include "dsputil_template.c"
53 #include "dsputil_template.c"
55 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
56 #define pb_7f (~0UL/255 * 0x7f)
57 #define pb_80 (~0UL/255 * 0x80)
59 const uint8_t ff_zigzag_direct[64] = {
60 0, 1, 8, 16, 9, 2, 3, 10,
61 17, 24, 32, 25, 18, 11, 4, 5,
62 12, 19, 26, 33, 40, 48, 41, 34,
63 27, 20, 13, 6, 7, 14, 21, 28,
64 35, 42, 49, 56, 57, 50, 43, 36,
65 29, 22, 15, 23, 30, 37, 44, 51,
66 58, 59, 52, 45, 38, 31, 39, 46,
67 53, 60, 61, 54, 47, 55, 62, 63
70 /* Specific zigzag scan for 248 idct. NOTE that unlike the
71 specification, we interleave the fields */
72 const uint8_t ff_zigzag248_direct[64] = {
73 0, 8, 1, 9, 16, 24, 2, 10,
74 17, 25, 32, 40, 48, 56, 33, 41,
75 18, 26, 3, 11, 4, 12, 19, 27,
76 34, 42, 49, 57, 50, 58, 35, 43,
77 20, 28, 5, 13, 6, 14, 21, 29,
78 36, 44, 51, 59, 52, 60, 37, 45,
79 22, 30, 7, 15, 23, 31, 38, 46,
80 53, 61, 54, 62, 39, 47, 55, 63,
83 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
84 DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64];
86 const uint8_t ff_alternate_horizontal_scan[64] = {
87 0, 1, 2, 3, 8, 9, 16, 17,
88 10, 11, 4, 5, 6, 7, 15, 14,
89 13, 12, 19, 18, 24, 25, 32, 33,
90 26, 27, 20, 21, 22, 23, 28, 29,
91 30, 31, 34, 35, 40, 41, 48, 49,
92 42, 43, 36, 37, 38, 39, 44, 45,
93 46, 47, 50, 51, 56, 57, 58, 59,
94 52, 53, 54, 55, 60, 61, 62, 63,
97 const uint8_t ff_alternate_vertical_scan[64] = {
98 0, 8, 16, 24, 1, 9, 2, 10,
99 17, 25, 32, 40, 48, 56, 57, 49,
100 41, 33, 26, 18, 3, 11, 4, 12,
101 19, 27, 34, 42, 50, 58, 35, 43,
102 51, 59, 20, 28, 5, 13, 6, 14,
103 21, 29, 36, 44, 52, 60, 37, 45,
104 53, 61, 22, 30, 7, 15, 23, 31,
105 38, 46, 54, 62, 39, 47, 55, 63,
108 /* Input permutation for the simple_idct_mmx */
109 static const uint8_t simple_mmx_permutation[64]={
110 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
111 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
112 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
113 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
114 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
115 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
116 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
117 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
120 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
122 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
126 st->scantable= src_scantable;
130 j = src_scantable[i];
131 st->permutated[i] = permutation[j];
137 j = st->permutated[i];
139 st->raster_end[i]= end;
143 void ff_init_scantable_permutation(uint8_t *idct_permutation,
144 int idct_permutation_type)
148 switch(idct_permutation_type){
149 case FF_NO_IDCT_PERM:
151 idct_permutation[i]= i;
153 case FF_LIBMPEG2_IDCT_PERM:
155 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
157 case FF_SIMPLE_IDCT_PERM:
159 idct_permutation[i]= simple_mmx_permutation[i];
161 case FF_TRANSPOSE_IDCT_PERM:
163 idct_permutation[i]= ((i&7)<<3) | (i>>3);
165 case FF_PARTTRANS_IDCT_PERM:
167 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
169 case FF_SSE2_IDCT_PERM:
171 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
174 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
178 static int pix_sum_c(uint8_t * pix, int line_size)
183 for (i = 0; i < 16; i++) {
184 for (j = 0; j < 16; j += 8) {
195 pix += line_size - 16;
200 static int pix_norm1_c(uint8_t * pix, int line_size)
203 uint32_t *sq = ff_squareTbl + 256;
206 for (i = 0; i < 16; i++) {
207 for (j = 0; j < 16; j += 8) {
219 register uint64_t x=*(uint64_t*)pix;
221 s += sq[(x>>8)&0xff];
222 s += sq[(x>>16)&0xff];
223 s += sq[(x>>24)&0xff];
224 s += sq[(x>>32)&0xff];
225 s += sq[(x>>40)&0xff];
226 s += sq[(x>>48)&0xff];
227 s += sq[(x>>56)&0xff];
229 register uint32_t x=*(uint32_t*)pix;
231 s += sq[(x>>8)&0xff];
232 s += sq[(x>>16)&0xff];
233 s += sq[(x>>24)&0xff];
234 x=*(uint32_t*)(pix+4);
236 s += sq[(x>>8)&0xff];
237 s += sq[(x>>16)&0xff];
238 s += sq[(x>>24)&0xff];
243 pix += line_size - 16;
248 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
251 for(i=0; i+8<=w; i+=8){
252 dst[i+0]= av_bswap32(src[i+0]);
253 dst[i+1]= av_bswap32(src[i+1]);
254 dst[i+2]= av_bswap32(src[i+2]);
255 dst[i+3]= av_bswap32(src[i+3]);
256 dst[i+4]= av_bswap32(src[i+4]);
257 dst[i+5]= av_bswap32(src[i+5]);
258 dst[i+6]= av_bswap32(src[i+6]);
259 dst[i+7]= av_bswap32(src[i+7]);
262 dst[i+0]= av_bswap32(src[i+0]);
266 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
269 *dst++ = av_bswap16(*src++);
272 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
275 uint32_t *sq = ff_squareTbl + 256;
278 for (i = 0; i < h; i++) {
279 s += sq[pix1[0] - pix2[0]];
280 s += sq[pix1[1] - pix2[1]];
281 s += sq[pix1[2] - pix2[2]];
282 s += sq[pix1[3] - pix2[3]];
289 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
292 uint32_t *sq = ff_squareTbl + 256;
295 for (i = 0; i < h; i++) {
296 s += sq[pix1[0] - pix2[0]];
297 s += sq[pix1[1] - pix2[1]];
298 s += sq[pix1[2] - pix2[2]];
299 s += sq[pix1[3] - pix2[3]];
300 s += sq[pix1[4] - pix2[4]];
301 s += sq[pix1[5] - pix2[5]];
302 s += sq[pix1[6] - pix2[6]];
303 s += sq[pix1[7] - pix2[7]];
310 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
313 uint32_t *sq = ff_squareTbl + 256;
316 for (i = 0; i < h; i++) {
317 s += sq[pix1[ 0] - pix2[ 0]];
318 s += sq[pix1[ 1] - pix2[ 1]];
319 s += sq[pix1[ 2] - pix2[ 2]];
320 s += sq[pix1[ 3] - pix2[ 3]];
321 s += sq[pix1[ 4] - pix2[ 4]];
322 s += sq[pix1[ 5] - pix2[ 5]];
323 s += sq[pix1[ 6] - pix2[ 6]];
324 s += sq[pix1[ 7] - pix2[ 7]];
325 s += sq[pix1[ 8] - pix2[ 8]];
326 s += sq[pix1[ 9] - pix2[ 9]];
327 s += sq[pix1[10] - pix2[10]];
328 s += sq[pix1[11] - pix2[11]];
329 s += sq[pix1[12] - pix2[12]];
330 s += sq[pix1[13] - pix2[13]];
331 s += sq[pix1[14] - pix2[14]];
332 s += sq[pix1[15] - pix2[15]];
340 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
341 const uint8_t *s2, int stride){
344 /* read the pixels */
346 block[0] = s1[0] - s2[0];
347 block[1] = s1[1] - s2[1];
348 block[2] = s1[2] - s2[2];
349 block[3] = s1[3] - s2[3];
350 block[4] = s1[4] - s2[4];
351 block[5] = s1[5] - s2[5];
352 block[6] = s1[6] - s2[6];
353 block[7] = s1[7] - s2[7];
361 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
366 /* read the pixels */
368 pixels[0] = av_clip_uint8(block[0]);
369 pixels[1] = av_clip_uint8(block[1]);
370 pixels[2] = av_clip_uint8(block[2]);
371 pixels[3] = av_clip_uint8(block[3]);
372 pixels[4] = av_clip_uint8(block[4]);
373 pixels[5] = av_clip_uint8(block[5]);
374 pixels[6] = av_clip_uint8(block[6]);
375 pixels[7] = av_clip_uint8(block[7]);
382 static void put_signed_pixels_clamped_c(const DCTELEM *block,
383 uint8_t *restrict pixels,
388 for (i = 0; i < 8; i++) {
389 for (j = 0; j < 8; j++) {
392 else if (*block > 127)
395 *pixels = (uint8_t)(*block + 128);
399 pixels += (line_size - 8);
403 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
408 /* read the pixels */
410 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
411 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
412 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
413 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
414 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
415 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
416 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
417 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
423 static int sum_abs_dctelem_c(DCTELEM *block)
427 sum+= FFABS(block[i]);
431 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
435 for (i = 0; i < h; i++) {
436 memset(block, value, 16);
441 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
445 for (i = 0; i < h; i++) {
446 memset(block, value, 8);
451 #define avg2(a,b) ((a+b+1)>>1)
452 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
454 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
456 const int A=(16-x16)*(16-y16);
457 const int B=( x16)*(16-y16);
458 const int C=(16-x16)*( y16);
459 const int D=( x16)*( y16);
464 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
465 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
466 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
467 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
468 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
469 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
470 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
471 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
477 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
478 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
481 const int s= 1<<shift;
491 for(x=0; x<8; x++){ //XXX FIXME optimize
492 int src_x, src_y, frac_x, frac_y, index;
501 if((unsigned)src_x < width){
502 if((unsigned)src_y < height){
503 index= src_x + src_y*stride;
504 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
505 + src[index +1]* frac_x )*(s-frac_y)
506 + ( src[index+stride ]*(s-frac_x)
507 + src[index+stride+1]* frac_x )* frac_y
510 index= src_x + av_clip(src_y, 0, height)*stride;
511 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
512 + src[index +1]* frac_x )*s
516 if((unsigned)src_y < height){
517 index= av_clip(src_x, 0, width) + src_y*stride;
518 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
519 + src[index+stride ]* frac_y )*s
522 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
523 dst[y*stride + x]= src[index ];
535 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
537 case 2: put_pixels2_8_c (dst, src, stride, height); break;
538 case 4: put_pixels4_8_c (dst, src, stride, height); break;
539 case 8: put_pixels8_8_c (dst, src, stride, height); break;
540 case 16:put_pixels16_8_c(dst, src, stride, height); break;
544 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
546 for (i=0; i < height; i++) {
547 for (j=0; j < width; j++) {
548 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
555 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
557 for (i=0; i < height; i++) {
558 for (j=0; j < width; j++) {
559 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
566 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
568 for (i=0; i < height; i++) {
569 for (j=0; j < width; j++) {
570 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
577 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
579 for (i=0; i < height; i++) {
580 for (j=0; j < width; j++) {
581 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
588 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
590 for (i=0; i < height; i++) {
591 for (j=0; j < width; j++) {
592 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
599 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
601 for (i=0; i < height; i++) {
602 for (j=0; j < width; j++) {
603 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
610 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
612 for (i=0; i < height; i++) {
613 for (j=0; j < width; j++) {
614 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
621 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
623 for (i=0; i < height; i++) {
624 for (j=0; j < width; j++) {
625 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
632 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
634 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
635 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
636 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
637 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
641 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
643 for (i=0; i < height; i++) {
644 for (j=0; j < width; j++) {
645 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
652 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
654 for (i=0; i < height; i++) {
655 for (j=0; j < width; j++) {
656 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
663 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
665 for (i=0; i < height; i++) {
666 for (j=0; j < width; j++) {
667 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
674 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
676 for (i=0; i < height; i++) {
677 for (j=0; j < width; j++) {
678 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
685 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
687 for (i=0; i < height; i++) {
688 for (j=0; j < width; j++) {
689 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
696 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
698 for (i=0; i < height; i++) {
699 for (j=0; j < width; j++) {
700 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
707 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
709 for (i=0; i < height; i++) {
710 for (j=0; j < width; j++) {
711 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
718 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
720 for (i=0; i < height; i++) {
721 for (j=0; j < width; j++) {
722 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
729 #define QPEL_MC(r, OPNAME, RND, OP) \
730 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
731 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
735 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
736 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
737 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
738 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
739 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
740 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
741 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
742 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
748 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
750 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
754 const int src0= src[0*srcStride];\
755 const int src1= src[1*srcStride];\
756 const int src2= src[2*srcStride];\
757 const int src3= src[3*srcStride];\
758 const int src4= src[4*srcStride];\
759 const int src5= src[5*srcStride];\
760 const int src6= src[6*srcStride];\
761 const int src7= src[7*srcStride];\
762 const int src8= src[8*srcStride];\
763 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
764 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
765 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
766 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
767 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
768 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
769 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
770 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
776 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
777 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
782 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
783 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
784 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
785 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
786 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
787 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
788 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
789 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
790 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
791 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
792 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
793 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
794 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
795 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
796 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
797 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
803 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
804 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
809 const int src0= src[0*srcStride];\
810 const int src1= src[1*srcStride];\
811 const int src2= src[2*srcStride];\
812 const int src3= src[3*srcStride];\
813 const int src4= src[4*srcStride];\
814 const int src5= src[5*srcStride];\
815 const int src6= src[6*srcStride];\
816 const int src7= src[7*srcStride];\
817 const int src8= src[8*srcStride];\
818 const int src9= src[9*srcStride];\
819 const int src10= src[10*srcStride];\
820 const int src11= src[11*srcStride];\
821 const int src12= src[12*srcStride];\
822 const int src13= src[13*srcStride];\
823 const int src14= src[14*srcStride];\
824 const int src15= src[15*srcStride];\
825 const int src16= src[16*srcStride];\
826 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
827 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
828 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
829 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
830 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
831 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
832 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
833 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
834 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
835 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
836 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
837 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
838 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
839 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
840 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
841 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
847 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
849 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
850 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
853 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
854 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
857 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
859 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
860 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
863 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
866 copy_block9(full, src, 16, stride, 9);\
867 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
868 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
871 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
873 copy_block9(full, src, 16, stride, 9);\
874 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
877 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
880 copy_block9(full, src, 16, stride, 9);\
881 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
882 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
884 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
889 copy_block9(full, src, 16, stride, 9);\
890 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
891 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
892 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
893 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
895 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
899 copy_block9(full, src, 16, stride, 9);\
900 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
901 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
902 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
903 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
905 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
910 copy_block9(full, src, 16, stride, 9);\
911 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
912 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
913 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
914 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
916 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
920 copy_block9(full, src, 16, stride, 9);\
921 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
922 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
923 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
924 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
926 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
931 copy_block9(full, src, 16, stride, 9);\
932 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
933 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
934 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
935 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
937 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
941 copy_block9(full, src, 16, stride, 9);\
942 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
943 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
944 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
945 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
947 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
952 copy_block9(full, src, 16, stride, 9);\
953 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
954 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
955 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
956 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
958 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
962 copy_block9(full, src, 16, stride, 9);\
963 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
964 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
965 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
966 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
968 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
971 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
972 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
973 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
975 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
978 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
979 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
980 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
982 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
987 copy_block9(full, src, 16, stride, 9);\
988 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
989 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
990 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
991 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
993 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
996 copy_block9(full, src, 16, stride, 9);\
997 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
998 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
999 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1001 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1002 uint8_t full[16*9];\
1005 uint8_t halfHV[64];\
1006 copy_block9(full, src, 16, stride, 9);\
1007 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1008 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1009 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1010 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1012 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1013 uint8_t full[16*9];\
1015 copy_block9(full, src, 16, stride, 9);\
1016 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1017 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1018 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1020 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1022 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1023 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1026 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1028 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1029 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1032 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1033 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1036 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1038 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1039 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1042 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1043 uint8_t full[24*17];\
1045 copy_block17(full, src, 24, stride, 17);\
1046 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1047 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1050 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1051 uint8_t full[24*17];\
1052 copy_block17(full, src, 24, stride, 17);\
1053 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1056 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1057 uint8_t full[24*17];\
1059 copy_block17(full, src, 24, stride, 17);\
1060 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1061 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1063 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1064 uint8_t full[24*17];\
1065 uint8_t halfH[272];\
1066 uint8_t halfV[256];\
1067 uint8_t halfHV[256];\
1068 copy_block17(full, src, 24, stride, 17);\
1069 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1070 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1071 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1072 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1074 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1075 uint8_t full[24*17];\
1076 uint8_t halfH[272];\
1077 uint8_t halfHV[256];\
1078 copy_block17(full, src, 24, stride, 17);\
1079 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1080 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1081 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1082 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1084 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1085 uint8_t full[24*17];\
1086 uint8_t halfH[272];\
1087 uint8_t halfV[256];\
1088 uint8_t halfHV[256];\
1089 copy_block17(full, src, 24, stride, 17);\
1090 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1091 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1092 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1093 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1095 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1096 uint8_t full[24*17];\
1097 uint8_t halfH[272];\
1098 uint8_t halfHV[256];\
1099 copy_block17(full, src, 24, stride, 17);\
1100 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1101 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1102 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1103 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1105 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1106 uint8_t full[24*17];\
1107 uint8_t halfH[272];\
1108 uint8_t halfV[256];\
1109 uint8_t halfHV[256];\
1110 copy_block17(full, src, 24, stride, 17);\
1111 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1112 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1113 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1114 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1116 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1117 uint8_t full[24*17];\
1118 uint8_t halfH[272];\
1119 uint8_t halfHV[256];\
1120 copy_block17(full, src, 24, stride, 17);\
1121 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1122 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1123 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1124 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1126 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1127 uint8_t full[24*17];\
1128 uint8_t halfH[272];\
1129 uint8_t halfV[256];\
1130 uint8_t halfHV[256];\
1131 copy_block17(full, src, 24, stride, 17);\
1132 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1133 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1134 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1135 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1137 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1138 uint8_t full[24*17];\
1139 uint8_t halfH[272];\
1140 uint8_t halfHV[256];\
1141 copy_block17(full, src, 24, stride, 17);\
1142 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1143 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1144 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1145 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1147 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1148 uint8_t halfH[272];\
1149 uint8_t halfHV[256];\
1150 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1151 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1152 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1154 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1155 uint8_t halfH[272];\
1156 uint8_t halfHV[256];\
1157 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1158 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1159 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1161 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1162 uint8_t full[24*17];\
1163 uint8_t halfH[272];\
1164 uint8_t halfV[256];\
1165 uint8_t halfHV[256];\
1166 copy_block17(full, src, 24, stride, 17);\
1167 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1168 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1169 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1170 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1172 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1173 uint8_t full[24*17];\
1174 uint8_t halfH[272];\
1175 copy_block17(full, src, 24, stride, 17);\
1176 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1177 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1178 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1180 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1181 uint8_t full[24*17];\
1182 uint8_t halfH[272];\
1183 uint8_t halfV[256];\
1184 uint8_t halfHV[256];\
1185 copy_block17(full, src, 24, stride, 17);\
1186 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1187 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1188 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1189 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1191 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1192 uint8_t full[24*17];\
1193 uint8_t halfH[272];\
1194 copy_block17(full, src, 24, stride, 17);\
1195 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1196 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1197 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1199 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1200 uint8_t halfH[272];\
1201 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1202 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1205 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1206 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1207 #define op_put(a, b) a = cm[((b) + 16)>>5]
1208 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1210 QPEL_MC(0, put_ , _ , op_put)
1211 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1212 QPEL_MC(0, avg_ , _ , op_avg)
1213 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
1215 #undef op_avg_no_rnd
1217 #undef op_put_no_rnd
1219 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1220 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1221 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1222 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1223 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1224 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1226 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1227 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1231 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1232 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1233 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1234 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1235 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1236 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1237 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1238 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1244 #if CONFIG_RV40_DECODER
1245 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1246 put_pixels16_xy2_8_c(dst, src, stride, 16);
1248 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1249 avg_pixels16_xy2_8_c(dst, src, stride, 16);
1251 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1252 put_pixels8_xy2_8_c(dst, src, stride, 8);
1254 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1255 avg_pixels8_xy2_8_c(dst, src, stride, 8);
1257 #endif /* CONFIG_RV40_DECODER */
1259 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1260 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1264 const int src_1= src[ -srcStride];
1265 const int src0 = src[0 ];
1266 const int src1 = src[ srcStride];
1267 const int src2 = src[2*srcStride];
1268 const int src3 = src[3*srcStride];
1269 const int src4 = src[4*srcStride];
1270 const int src5 = src[5*srcStride];
1271 const int src6 = src[6*srcStride];
1272 const int src7 = src[7*srcStride];
1273 const int src8 = src[8*srcStride];
1274 const int src9 = src[9*srcStride];
1275 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1276 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1277 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1278 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1279 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1280 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1281 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1282 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1288 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1290 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1291 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1294 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1295 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1298 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1300 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1301 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1304 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1305 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1308 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1312 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1313 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1314 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1315 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1317 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1321 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1322 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1323 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1324 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1326 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1328 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1329 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1332 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
1333 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1335 const int strength= ff_h263_loop_filter_strength[qscale];
1339 int p0= src[x-2*stride];
1340 int p1= src[x-1*stride];
1341 int p2= src[x+0*stride];
1342 int p3= src[x+1*stride];
1343 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1345 if (d<-2*strength) d1= 0;
1346 else if(d<- strength) d1=-2*strength - d;
1347 else if(d< strength) d1= d;
1348 else if(d< 2*strength) d1= 2*strength - d;
1353 if(p1&256) p1= ~(p1>>31);
1354 if(p2&256) p2= ~(p2>>31);
1356 src[x-1*stride] = p1;
1357 src[x+0*stride] = p2;
1361 d2= av_clip((p0-p3)/4, -ad1, ad1);
1363 src[x-2*stride] = p0 - d2;
1364 src[x+ stride] = p3 + d2;
1369 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
1370 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1372 const int strength= ff_h263_loop_filter_strength[qscale];
1376 int p0= src[y*stride-2];
1377 int p1= src[y*stride-1];
1378 int p2= src[y*stride+0];
1379 int p3= src[y*stride+1];
1380 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1382 if (d<-2*strength) d1= 0;
1383 else if(d<- strength) d1=-2*strength - d;
1384 else if(d< strength) d1= d;
1385 else if(d< 2*strength) d1= 2*strength - d;
1390 if(p1&256) p1= ~(p1>>31);
1391 if(p2&256) p2= ~(p2>>31);
1393 src[y*stride-1] = p1;
1394 src[y*stride+0] = p2;
1398 d2= av_clip((p0-p3)/4, -ad1, ad1);
1400 src[y*stride-2] = p0 - d2;
1401 src[y*stride+1] = p3 + d2;
1406 static void h261_loop_filter_c(uint8_t *src, int stride){
1411 temp[x ] = 4*src[x ];
1412 temp[x + 7*8] = 4*src[x + 7*stride];
1416 xy = y * stride + x;
1418 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
1423 src[ y*stride] = (temp[ y*8] + 2)>>2;
1424 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
1426 xy = y * stride + x;
1428 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
1433 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1439 s += abs(pix1[0] - pix2[0]);
1440 s += abs(pix1[1] - pix2[1]);
1441 s += abs(pix1[2] - pix2[2]);
1442 s += abs(pix1[3] - pix2[3]);
1443 s += abs(pix1[4] - pix2[4]);
1444 s += abs(pix1[5] - pix2[5]);
1445 s += abs(pix1[6] - pix2[6]);
1446 s += abs(pix1[7] - pix2[7]);
1447 s += abs(pix1[8] - pix2[8]);
1448 s += abs(pix1[9] - pix2[9]);
1449 s += abs(pix1[10] - pix2[10]);
1450 s += abs(pix1[11] - pix2[11]);
1451 s += abs(pix1[12] - pix2[12]);
1452 s += abs(pix1[13] - pix2[13]);
1453 s += abs(pix1[14] - pix2[14]);
1454 s += abs(pix1[15] - pix2[15]);
1461 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1467 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1468 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1469 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1470 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1471 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1472 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1473 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1474 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1475 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1476 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1477 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1478 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1479 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1480 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1481 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1482 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1489 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1492 uint8_t *pix3 = pix2 + line_size;
1496 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1497 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1498 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1499 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1500 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1501 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1502 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1503 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1504 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1505 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1506 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1507 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1508 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1509 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1510 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1511 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1519 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1522 uint8_t *pix3 = pix2 + line_size;
1526 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1527 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1528 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1529 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1530 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1531 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1532 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1533 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1534 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1535 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1536 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1537 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1538 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1539 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1540 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1541 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1549 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1555 s += abs(pix1[0] - pix2[0]);
1556 s += abs(pix1[1] - pix2[1]);
1557 s += abs(pix1[2] - pix2[2]);
1558 s += abs(pix1[3] - pix2[3]);
1559 s += abs(pix1[4] - pix2[4]);
1560 s += abs(pix1[5] - pix2[5]);
1561 s += abs(pix1[6] - pix2[6]);
1562 s += abs(pix1[7] - pix2[7]);
1569 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1575 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1576 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1577 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1578 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1579 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1580 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1581 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1582 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1589 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1592 uint8_t *pix3 = pix2 + line_size;
1596 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1597 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1598 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1599 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1600 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1601 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1602 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1603 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1611 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1614 uint8_t *pix3 = pix2 + line_size;
1618 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1619 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1620 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1621 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1622 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1623 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1624 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1625 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1633 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1634 MpegEncContext *c = v;
1640 for(x=0; x<16; x++){
1641 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1644 for(x=0; x<15; x++){
1645 score2+= FFABS( s1[x ] - s1[x +stride]
1646 - s1[x+1] + s1[x+1+stride])
1647 -FFABS( s2[x ] - s2[x +stride]
1648 - s2[x+1] + s2[x+1+stride]);
1655 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1656 else return score1 + FFABS(score2)*8;
1659 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1660 MpegEncContext *c = v;
1667 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1671 score2+= FFABS( s1[x ] - s1[x +stride]
1672 - s1[x+1] + s1[x+1+stride])
1673 -FFABS( s2[x ] - s2[x +stride]
1674 - s2[x+1] + s2[x+1+stride]);
1681 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1682 else return score1 + FFABS(score2)*8;
1685 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1689 for(i=0; i<8*8; i++){
1690 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1693 assert(-512<b && b<512);
1695 sum += (w*b)*(w*b)>>4;
1700 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1703 for(i=0; i<8*8; i++){
1704 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
1709 * Permute an 8x8 block.
1710 * @param block the block which will be permuted according to the given permutation vector
1711 * @param permutation the permutation vector
1712 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
1713 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
1714 * (inverse) permutated to scantable order!
1716 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
1722 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
1724 for(i=0; i<=last; i++){
1725 const int j= scantable[i];
1730 for(i=0; i<=last; i++){
1731 const int j= scantable[i];
1732 const int perm_j= permutation[j];
1733 block[perm_j]= temp[j];
1737 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1741 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
1744 memset(cmp, 0, sizeof(void*)*6);
1752 cmp[i]= c->hadamard8_diff[i];
1758 cmp[i]= c->dct_sad[i];
1761 cmp[i]= c->dct264_sad[i];
1764 cmp[i]= c->dct_max[i];
1767 cmp[i]= c->quant_psnr[i];
1788 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1793 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1795 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1796 long a = *(long*)(src+i);
1797 long b = *(long*)(dst+i);
1798 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
1801 dst[i+0] += src[i+0];
1804 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
1806 #if !HAVE_FAST_UNALIGNED
1807 if((long)src2 & (sizeof(long)-1)){
1808 for(i=0; i+7<w; i+=8){
1809 dst[i+0] = src1[i+0]-src2[i+0];
1810 dst[i+1] = src1[i+1]-src2[i+1];
1811 dst[i+2] = src1[i+2]-src2[i+2];
1812 dst[i+3] = src1[i+3]-src2[i+3];
1813 dst[i+4] = src1[i+4]-src2[i+4];
1814 dst[i+5] = src1[i+5]-src2[i+5];
1815 dst[i+6] = src1[i+6]-src2[i+6];
1816 dst[i+7] = src1[i+7]-src2[i+7];
1820 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1821 long a = *(long*)(src1+i);
1822 long b = *(long*)(src2+i);
1823 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1826 dst[i+0] = src1[i+0]-src2[i+0];
1829 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
1837 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1846 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
1854 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1864 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
1867 for(i=0; i<w-1; i++){
1894 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
1924 #define BUTTERFLY2(o1,o2,i1,i2) \
1928 #define BUTTERFLY1(x,y) \
1937 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
1939 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1947 //FIXME try pointer walks
1948 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
1949 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
1950 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
1951 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
1953 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1954 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1955 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1956 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1958 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1959 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1960 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1961 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1965 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1966 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1967 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1968 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1970 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1971 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1972 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1973 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1976 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1977 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1978 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1979 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1984 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
1992 //FIXME try pointer walks
1993 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
1994 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
1995 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
1996 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
1998 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1999 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2000 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2001 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2003 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2004 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2005 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2006 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2010 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2011 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2012 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2013 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2015 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2016 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2017 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2018 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2021 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2022 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2023 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2024 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2027 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
2032 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2033 MpegEncContext * const s= (MpegEncContext *)c;
2034 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2038 s->dsp.diff_pixels(temp, src1, src2, stride);
2040 return s->dsp.sum_abs_dctelem(temp);
2045 const int s07 = SRC(0) + SRC(7);\
2046 const int s16 = SRC(1) + SRC(6);\
2047 const int s25 = SRC(2) + SRC(5);\
2048 const int s34 = SRC(3) + SRC(4);\
2049 const int a0 = s07 + s34;\
2050 const int a1 = s16 + s25;\
2051 const int a2 = s07 - s34;\
2052 const int a3 = s16 - s25;\
2053 const int d07 = SRC(0) - SRC(7);\
2054 const int d16 = SRC(1) - SRC(6);\
2055 const int d25 = SRC(2) - SRC(5);\
2056 const int d34 = SRC(3) - SRC(4);\
2057 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2058 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2059 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2060 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2062 DST(1, a4 + (a7>>2)) ;\
2063 DST(2, a2 + (a3>>1)) ;\
2064 DST(3, a5 + (a6>>2)) ;\
2066 DST(5, a6 - (a5>>2)) ;\
2067 DST(6, (a2>>1) - a3 ) ;\
2068 DST(7, (a4>>2) - a7 ) ;\
2071 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2072 MpegEncContext * const s= (MpegEncContext *)c;
2077 s->dsp.diff_pixels(dct[0], src1, src2, stride);
2079 #define SRC(x) dct[i][x]
2080 #define DST(x,v) dct[i][x]= v
2081 for( i = 0; i < 8; i++ )
2086 #define SRC(x) dct[x][i]
2087 #define DST(x,v) sum += FFABS(v)
2088 for( i = 0; i < 8; i++ )
2096 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2097 MpegEncContext * const s= (MpegEncContext *)c;
2098 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2103 s->dsp.diff_pixels(temp, src1, src2, stride);
2107 sum= FFMAX(sum, FFABS(temp[i]));
2112 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2113 MpegEncContext * const s= (MpegEncContext *)c;
2114 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
2115 DCTELEM * const bak = temp+64;
2121 s->dsp.diff_pixels(temp, src1, src2, stride);
2123 memcpy(bak, temp, 64*sizeof(DCTELEM));
2125 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2126 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2127 ff_simple_idct_8(temp); //FIXME
2130 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2135 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2136 MpegEncContext * const s= (MpegEncContext *)c;
2137 const uint8_t *scantable= s->intra_scantable.permutated;
2138 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2139 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2140 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
2141 int i, last, run, bits, level, distortion, start_i;
2142 const int esc_length= s->ac_esc_length;
2144 uint8_t * last_length;
2148 copy_block8(lsrc1, src1, 8, stride, 8);
2149 copy_block8(lsrc2, src2, 8, stride, 8);
2151 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
2153 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2159 length = s->intra_ac_vlc_length;
2160 last_length= s->intra_ac_vlc_last_length;
2161 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2164 length = s->inter_ac_vlc_length;
2165 last_length= s->inter_ac_vlc_last_length;
2170 for(i=start_i; i<last; i++){
2171 int j= scantable[i];
2176 if((level&(~127)) == 0){
2177 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2186 level= temp[i] + 64;
2190 if((level&(~127)) == 0){
2191 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2199 s->dct_unquantize_intra(s, temp, 0, s->qscale);
2201 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2204 s->dsp.idct_add(lsrc2, 8, temp);
2206 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
2208 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
2211 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2212 MpegEncContext * const s= (MpegEncContext *)c;
2213 const uint8_t *scantable= s->intra_scantable.permutated;
2214 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2215 int i, last, run, bits, level, start_i;
2216 const int esc_length= s->ac_esc_length;
2218 uint8_t * last_length;
2222 s->dsp.diff_pixels(temp, src1, src2, stride);
2224 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2230 length = s->intra_ac_vlc_length;
2231 last_length= s->intra_ac_vlc_last_length;
2232 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2235 length = s->inter_ac_vlc_length;
2236 last_length= s->inter_ac_vlc_last_length;
2241 for(i=start_i; i<last; i++){
2242 int j= scantable[i];
2247 if((level&(~127)) == 0){
2248 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2257 level= temp[i] + 64;
2261 if((level&(~127)) == 0){
2262 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2270 #define VSAD_INTRA(size) \
2271 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2275 for(y=1; y<h; y++){ \
2276 for(x=0; x<size; x+=4){ \
2277 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2278 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2288 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2293 for(x=0; x<16; x++){
2294 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2303 #define SQ(a) ((a)*(a))
2304 #define VSSE_INTRA(size) \
2305 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2309 for(y=1; y<h; y++){ \
2310 for(x=0; x<size; x+=4){ \
2311 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2312 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2322 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2327 for(x=0; x<16; x++){
2328 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2337 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2341 for(i=0; i<size; i++)
2342 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2346 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2347 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2348 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2350 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
2352 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2353 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2354 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2355 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
2357 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
2360 for(i=0; i<len; i++)
2361 dst[i] = src0[i] * src1[-i];
2364 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
2366 for(i=0; i<len; i++)
2367 dst[i] = src0[i] * src1[i] + src2[i];
2370 static void vector_fmul_window_c(float *dst, const float *src0,
2371 const float *src1, const float *win, int len)
2377 for(i=-len, j=len-1; i<0; i++, j--) {
2382 dst[i] = s0*wj - s1*wi;
2383 dst[j] = s0*wi + s1*wj;
2387 static void butterflies_float_c(float *restrict v1, float *restrict v2,
2391 for (i = 0; i < len; i++) {
2392 float t = v1[i] - v2[i];
2398 static void butterflies_float_interleave_c(float *dst, const float *src0,
2399 const float *src1, int len)
2402 for (i = 0; i < len; i++) {
2405 dst[2*i ] = f1 + f2;
2406 dst[2*i + 1] = f1 - f2;
2410 float ff_scalarproduct_float_c(const float *v1, const float *v2, int len)
2415 for (i = 0; i < len; i++)
2421 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2422 uint32_t maxi, uint32_t maxisign)
2425 if(a > mini) return mini;
2426 else if((a^(1U<<31)) > maxisign) return maxi;
2430 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
2432 uint32_t mini = *(uint32_t*)min;
2433 uint32_t maxi = *(uint32_t*)max;
2434 uint32_t maxisign = maxi ^ (1U<<31);
2435 uint32_t *dsti = (uint32_t*)dst;
2436 const uint32_t *srci = (const uint32_t*)src;
2437 for(i=0; i<len; i+=8) {
2438 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2439 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2440 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2441 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2442 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2443 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2444 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2445 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2448 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
2450 if(min < 0 && max > 0) {
2451 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2453 for(i=0; i < len; i+=8) {
2454 dst[i ] = av_clipf(src[i ], min, max);
2455 dst[i + 1] = av_clipf(src[i + 1], min, max);
2456 dst[i + 2] = av_clipf(src[i + 2], min, max);
2457 dst[i + 3] = av_clipf(src[i + 3], min, max);
2458 dst[i + 4] = av_clipf(src[i + 4], min, max);
2459 dst[i + 5] = av_clipf(src[i + 5], min, max);
2460 dst[i + 6] = av_clipf(src[i + 6], min, max);
2461 dst[i + 7] = av_clipf(src[i + 7], min, max);
2466 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
2471 res += *v1++ * *v2++;
2476 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
2481 *v1++ += mul * *v3++;
2486 static void apply_window_int16_c(int16_t *output, const int16_t *input,
2487 const int16_t *window, unsigned int len)
2490 int len2 = len >> 1;
2492 for (i = 0; i < len2; i++) {
2493 int16_t w = window[i];
2494 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
2495 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2499 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2500 int32_t max, unsigned int len)
2503 *dst++ = av_clip(*src++, min, max);
2504 *dst++ = av_clip(*src++, min, max);
2505 *dst++ = av_clip(*src++, min, max);
2506 *dst++ = av_clip(*src++, min, max);
2507 *dst++ = av_clip(*src++, min, max);
2508 *dst++ = av_clip(*src++, min, max);
2509 *dst++ = av_clip(*src++, min, max);
2510 *dst++ = av_clip(*src++, min, max);
2516 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
2517 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
2518 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
2519 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
2520 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
2521 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
2522 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
2524 static void wmv2_idct_row(short * b)
2527 int a0,a1,a2,a3,a4,a5,a6,a7;
2529 a1 = W1*b[1]+W7*b[7];
2530 a7 = W7*b[1]-W1*b[7];
2531 a5 = W5*b[5]+W3*b[3];
2532 a3 = W3*b[5]-W5*b[3];
2533 a2 = W2*b[2]+W6*b[6];
2534 a6 = W6*b[2]-W2*b[6];
2535 a0 = W0*b[0]+W0*b[4];
2536 a4 = W0*b[0]-W0*b[4];
2538 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
2539 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2541 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
2542 b[1] = (a4+a6 +s1 + (1<<7))>>8;
2543 b[2] = (a4-a6 +s2 + (1<<7))>>8;
2544 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
2545 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
2546 b[5] = (a4-a6 -s2 + (1<<7))>>8;
2547 b[6] = (a4+a6 -s1 + (1<<7))>>8;
2548 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
2550 static void wmv2_idct_col(short * b)
2553 int a0,a1,a2,a3,a4,a5,a6,a7;
2554 /*step 1, with extended precision*/
2555 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
2556 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
2557 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
2558 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
2559 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
2560 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
2561 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
2562 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
2564 s1 = (181*(a1-a5+a7-a3)+128)>>8;
2565 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2567 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
2568 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
2569 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
2570 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
2572 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
2573 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
2574 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
2575 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
2577 void ff_wmv2_idct_c(short * block){
2581 wmv2_idct_row(block+i);
2584 wmv2_idct_col(block+i);
2587 /* XXX: those functions should be suppressed ASAP when all IDCTs are
2589 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
2591 ff_wmv2_idct_c(block);
2592 put_pixels_clamped_c(block, dest, line_size);
2594 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
2596 ff_wmv2_idct_c(block);
2597 add_pixels_clamped_c(block, dest, line_size);
2599 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
2601 ff_j_rev_dct (block);
2602 put_pixels_clamped_c(block, dest, line_size);
2604 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
2606 ff_j_rev_dct (block);
2607 add_pixels_clamped_c(block, dest, line_size);
2610 /* init static data */
2611 av_cold void ff_dsputil_static_init(void)
2615 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
2616 for(i=0;i<MAX_NEG_CROP;i++) {
2618 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
2621 for(i=0;i<512;i++) {
2622 ff_squareTbl[i] = (i - 256) * (i - 256);
2625 for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
2628 int ff_check_alignment(void){
2629 static int did_fail=0;
2630 LOCAL_ALIGNED_16(int, aligned, [4]);
2632 if((intptr_t)aligned & 15){
2634 #if HAVE_MMX || HAVE_ALTIVEC
2635 av_log(NULL, AV_LOG_ERROR,
2636 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2637 "and may be very slow or crash. This is not a bug in libavcodec,\n"
2638 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2639 "Do not report crashes to Libav developers.\n");
2648 av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
2652 ff_check_alignment();
2655 if (avctx->bits_per_raw_sample == 10) {
2656 c->fdct = ff_jpeg_fdct_islow_10;
2657 c->fdct248 = ff_fdct248_islow_10;
2659 if(avctx->dct_algo==FF_DCT_FASTINT) {
2660 c->fdct = ff_fdct_ifast;
2661 c->fdct248 = ff_fdct_ifast248;
2663 else if(avctx->dct_algo==FF_DCT_FAAN) {
2664 c->fdct = ff_faandct;
2665 c->fdct248 = ff_faandct248;
2668 c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
2669 c->fdct248 = ff_fdct248_islow_8;
2672 #endif //CONFIG_ENCODERS
2674 if (avctx->bits_per_raw_sample == 10) {
2675 c->idct_put = ff_simple_idct_put_10;
2676 c->idct_add = ff_simple_idct_add_10;
2677 c->idct = ff_simple_idct_10;
2678 c->idct_permutation_type = FF_NO_IDCT_PERM;
2680 if(avctx->idct_algo==FF_IDCT_INT){
2681 c->idct_put= ff_jref_idct_put;
2682 c->idct_add= ff_jref_idct_add;
2683 c->idct = ff_j_rev_dct;
2684 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
2685 }else if(avctx->idct_algo==FF_IDCT_WMV2){
2686 c->idct_put= ff_wmv2_idct_put_c;
2687 c->idct_add= ff_wmv2_idct_add_c;
2688 c->idct = ff_wmv2_idct_c;
2689 c->idct_permutation_type= FF_NO_IDCT_PERM;
2690 }else if(avctx->idct_algo==FF_IDCT_FAAN){
2691 c->idct_put= ff_faanidct_put;
2692 c->idct_add= ff_faanidct_add;
2693 c->idct = ff_faanidct;
2694 c->idct_permutation_type= FF_NO_IDCT_PERM;
2695 }else{ //accurate/default
2696 c->idct_put = ff_simple_idct_put_8;
2697 c->idct_add = ff_simple_idct_add_8;
2698 c->idct = ff_simple_idct_8;
2699 c->idct_permutation_type= FF_NO_IDCT_PERM;
2703 c->diff_pixels = diff_pixels_c;
2704 c->put_pixels_clamped = put_pixels_clamped_c;
2705 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
2706 c->add_pixels_clamped = add_pixels_clamped_c;
2707 c->sum_abs_dctelem = sum_abs_dctelem_c;
2710 c->pix_sum = pix_sum_c;
2711 c->pix_norm1 = pix_norm1_c;
2713 c->fill_block_tab[0] = fill_block16_c;
2714 c->fill_block_tab[1] = fill_block8_c;
2716 /* TODO [0] 16 [1] 8 */
2717 c->pix_abs[0][0] = pix_abs16_c;
2718 c->pix_abs[0][1] = pix_abs16_x2_c;
2719 c->pix_abs[0][2] = pix_abs16_y2_c;
2720 c->pix_abs[0][3] = pix_abs16_xy2_c;
2721 c->pix_abs[1][0] = pix_abs8_c;
2722 c->pix_abs[1][1] = pix_abs8_x2_c;
2723 c->pix_abs[1][2] = pix_abs8_y2_c;
2724 c->pix_abs[1][3] = pix_abs8_xy2_c;
2726 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2727 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2728 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
2729 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
2730 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
2731 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
2732 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
2733 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
2734 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2736 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
2737 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
2738 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
2739 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
2740 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
2741 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
2742 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
2743 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
2744 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
2746 #define dspfunc(PFX, IDX, NUM) \
2747 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2748 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2749 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2750 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2751 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2752 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2753 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2754 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2755 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2756 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2757 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2758 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2759 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2760 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2761 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2762 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2764 dspfunc(put_qpel, 0, 16);
2765 dspfunc(put_no_rnd_qpel, 0, 16);
2767 dspfunc(avg_qpel, 0, 16);
2768 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2770 dspfunc(put_qpel, 1, 8);
2771 dspfunc(put_no_rnd_qpel, 1, 8);
2773 dspfunc(avg_qpel, 1, 8);
2774 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2778 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
2779 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
2780 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
2781 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
2782 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
2783 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
2784 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
2785 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
2787 #define SET_CMP_FUNC(name) \
2788 c->name[0]= name ## 16_c;\
2789 c->name[1]= name ## 8x8_c;
2791 SET_CMP_FUNC(hadamard8_diff)
2792 c->hadamard8_diff[4]= hadamard8_intra16_c;
2793 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
2794 SET_CMP_FUNC(dct_sad)
2795 SET_CMP_FUNC(dct_max)
2797 SET_CMP_FUNC(dct264_sad)
2799 c->sad[0]= pix_abs16_c;
2800 c->sad[1]= pix_abs8_c;
2804 SET_CMP_FUNC(quant_psnr)
2807 c->vsad[0]= vsad16_c;
2808 c->vsad[4]= vsad_intra16_c;
2809 c->vsad[5]= vsad_intra8_c;
2810 c->vsse[0]= vsse16_c;
2811 c->vsse[4]= vsse_intra16_c;
2812 c->vsse[5]= vsse_intra8_c;
2813 c->nsse[0]= nsse16_c;
2814 c->nsse[1]= nsse8_c;
2816 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
2818 c->add_bytes= add_bytes_c;
2819 c->diff_bytes= diff_bytes_c;
2820 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
2821 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
2822 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
2823 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
2824 c->bswap_buf= bswap_buf;
2825 c->bswap16_buf = bswap16_buf;
2827 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
2828 c->h263_h_loop_filter= h263_h_loop_filter_c;
2829 c->h263_v_loop_filter= h263_v_loop_filter_c;
2832 c->h261_loop_filter= h261_loop_filter_c;
2834 c->try_8x8basis= try_8x8basis_c;
2835 c->add_8x8basis= add_8x8basis_c;
2837 #if CONFIG_VORBIS_DECODER
2838 c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling;
2840 c->vector_fmul_reverse = vector_fmul_reverse_c;
2841 c->vector_fmul_add = vector_fmul_add_c;
2842 c->vector_fmul_window = vector_fmul_window_c;
2843 c->vector_clipf = vector_clipf_c;
2844 c->scalarproduct_int16 = scalarproduct_int16_c;
2845 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
2846 c->apply_window_int16 = apply_window_int16_c;
2847 c->vector_clip_int32 = vector_clip_int32_c;
2848 c->scalarproduct_float = ff_scalarproduct_float_c;
2849 c->butterflies_float = butterflies_float_c;
2850 c->butterflies_float_interleave = butterflies_float_interleave_c;
2852 c->shrink[0]= av_image_copy_plane;
2853 c->shrink[1]= ff_shrink22;
2854 c->shrink[2]= ff_shrink44;
2855 c->shrink[3]= ff_shrink88;
2857 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
2858 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
2862 #define FUNC(f, depth) f ## _ ## depth
2863 #define FUNCC(f, depth) f ## _ ## depth ## _c
2865 #define dspfunc1(PFX, IDX, NUM, depth)\
2866 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
2867 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
2868 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
2869 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
2871 #define dspfunc2(PFX, IDX, NUM, depth)\
2872 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
2873 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
2874 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
2875 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
2876 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
2877 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
2878 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
2879 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
2880 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
2881 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
2882 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
2883 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
2884 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
2885 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
2886 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
2887 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
2890 #define BIT_DEPTH_FUNCS(depth, dct)\
2891 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
2892 c->draw_edges = FUNCC(draw_edges , depth);\
2893 c->clear_block = FUNCC(clear_block ## dct , depth);\
2894 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
2895 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
2896 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
2897 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
2898 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
2900 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
2901 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
2902 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
2903 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
2904 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
2905 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
2907 dspfunc1(put , 0, 16, depth);\
2908 dspfunc1(put , 1, 8, depth);\
2909 dspfunc1(put , 2, 4, depth);\
2910 dspfunc1(put , 3, 2, depth);\
2911 dspfunc1(put_no_rnd, 0, 16, depth);\
2912 dspfunc1(put_no_rnd, 1, 8, depth);\
2913 dspfunc1(avg , 0, 16, depth);\
2914 dspfunc1(avg , 1, 8, depth);\
2915 dspfunc1(avg , 2, 4, depth);\
2916 dspfunc1(avg , 3, 2, depth);\
2917 dspfunc1(avg_no_rnd, 0, 16, depth);\
2918 dspfunc1(avg_no_rnd, 1, 8, depth);\
2920 dspfunc2(put_h264_qpel, 0, 16, depth);\
2921 dspfunc2(put_h264_qpel, 1, 8, depth);\
2922 dspfunc2(put_h264_qpel, 2, 4, depth);\
2923 dspfunc2(put_h264_qpel, 3, 2, depth);\
2924 dspfunc2(avg_h264_qpel, 0, 16, depth);\
2925 dspfunc2(avg_h264_qpel, 1, 8, depth);\
2926 dspfunc2(avg_h264_qpel, 2, 4, depth);
2928 switch (avctx->bits_per_raw_sample) {
2930 if (c->dct_bits == 32) {
2931 BIT_DEPTH_FUNCS(9, _32);
2933 BIT_DEPTH_FUNCS(9, _16);
2937 if (c->dct_bits == 32) {
2938 BIT_DEPTH_FUNCS(10, _32);
2940 BIT_DEPTH_FUNCS(10, _16);
2944 BIT_DEPTH_FUNCS(8, _16);
2949 if (HAVE_MMX) ff_dsputil_init_mmx (c, avctx);
2950 if (ARCH_ARM) ff_dsputil_init_arm (c, avctx);
2951 if (HAVE_VIS) ff_dsputil_init_vis (c, avctx);
2952 if (ARCH_ALPHA) ff_dsputil_init_alpha (c, avctx);
2953 if (ARCH_PPC) ff_dsputil_init_ppc (c, avctx);
2954 if (ARCH_SH4) ff_dsputil_init_sh4 (c, avctx);
2955 if (ARCH_BFIN) ff_dsputil_init_bfin (c, avctx);
2957 for (i = 0; i < 4; i++) {
2958 for (j = 0; j < 16; j++) {
2959 if(!c->put_2tap_qpel_pixels_tab[i][j])
2960 c->put_2tap_qpel_pixels_tab[i][j] =
2961 c->put_h264_qpel_pixels_tab[i][j];
2962 if(!c->avg_2tap_qpel_pixels_tab[i][j])
2963 c->avg_2tap_qpel_pixels_tab[i][j] =
2964 c->avg_h264_qpel_pixels_tab[i][j];
2968 ff_init_scantable_permutation(c->idct_permutation,
2969 c->idct_permutation_type);