3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of Libav.
10 * Libav is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * Libav is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with Libav; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 #include "libavutil/imgutils.h"
33 #include "simple_idct.h"
37 #include "mpegvideo.h"
41 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
42 uint32_t ff_squareTbl[512] = {0, };
45 #include "dsputil_template.c"
49 #include "dsputil_template.c"
53 #include "dsputil_template.c"
55 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
56 #define pb_7f (~0UL/255 * 0x7f)
57 #define pb_80 (~0UL/255 * 0x80)
59 const uint8_t ff_zigzag_direct[64] = {
60 0, 1, 8, 16, 9, 2, 3, 10,
61 17, 24, 32, 25, 18, 11, 4, 5,
62 12, 19, 26, 33, 40, 48, 41, 34,
63 27, 20, 13, 6, 7, 14, 21, 28,
64 35, 42, 49, 56, 57, 50, 43, 36,
65 29, 22, 15, 23, 30, 37, 44, 51,
66 58, 59, 52, 45, 38, 31, 39, 46,
67 53, 60, 61, 54, 47, 55, 62, 63
70 /* Specific zigzag scan for 248 idct. NOTE that unlike the
71 specification, we interleave the fields */
72 const uint8_t ff_zigzag248_direct[64] = {
73 0, 8, 1, 9, 16, 24, 2, 10,
74 17, 25, 32, 40, 48, 56, 33, 41,
75 18, 26, 3, 11, 4, 12, 19, 27,
76 34, 42, 49, 57, 50, 58, 35, 43,
77 20, 28, 5, 13, 6, 14, 21, 29,
78 36, 44, 51, 59, 52, 60, 37, 45,
79 22, 30, 7, 15, 23, 31, 38, 46,
80 53, 61, 54, 62, 39, 47, 55, 63,
83 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
84 DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64];
86 const uint8_t ff_alternate_horizontal_scan[64] = {
87 0, 1, 2, 3, 8, 9, 16, 17,
88 10, 11, 4, 5, 6, 7, 15, 14,
89 13, 12, 19, 18, 24, 25, 32, 33,
90 26, 27, 20, 21, 22, 23, 28, 29,
91 30, 31, 34, 35, 40, 41, 48, 49,
92 42, 43, 36, 37, 38, 39, 44, 45,
93 46, 47, 50, 51, 56, 57, 58, 59,
94 52, 53, 54, 55, 60, 61, 62, 63,
97 const uint8_t ff_alternate_vertical_scan[64] = {
98 0, 8, 16, 24, 1, 9, 2, 10,
99 17, 25, 32, 40, 48, 56, 57, 49,
100 41, 33, 26, 18, 3, 11, 4, 12,
101 19, 27, 34, 42, 50, 58, 35, 43,
102 51, 59, 20, 28, 5, 13, 6, 14,
103 21, 29, 36, 44, 52, 60, 37, 45,
104 53, 61, 22, 30, 7, 15, 23, 31,
105 38, 46, 54, 62, 39, 47, 55, 63,
108 /* Input permutation for the simple_idct_mmx */
109 static const uint8_t simple_mmx_permutation[64]={
110 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
111 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
112 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
113 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
114 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
115 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
116 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
117 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
120 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
122 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
126 st->scantable= src_scantable;
130 j = src_scantable[i];
131 st->permutated[i] = permutation[j];
137 j = st->permutated[i];
139 st->raster_end[i]= end;
143 void ff_init_scantable_permutation(uint8_t *idct_permutation,
144 int idct_permutation_type)
148 switch(idct_permutation_type){
149 case FF_NO_IDCT_PERM:
151 idct_permutation[i]= i;
153 case FF_LIBMPEG2_IDCT_PERM:
155 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
157 case FF_SIMPLE_IDCT_PERM:
159 idct_permutation[i]= simple_mmx_permutation[i];
161 case FF_TRANSPOSE_IDCT_PERM:
163 idct_permutation[i]= ((i&7)<<3) | (i>>3);
165 case FF_PARTTRANS_IDCT_PERM:
167 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
169 case FF_SSE2_IDCT_PERM:
171 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
174 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
178 static int pix_sum_c(uint8_t * pix, int line_size)
183 for (i = 0; i < 16; i++) {
184 for (j = 0; j < 16; j += 8) {
195 pix += line_size - 16;
200 static int pix_norm1_c(uint8_t * pix, int line_size)
203 uint32_t *sq = ff_squareTbl + 256;
206 for (i = 0; i < 16; i++) {
207 for (j = 0; j < 16; j += 8) {
219 register uint64_t x=*(uint64_t*)pix;
221 s += sq[(x>>8)&0xff];
222 s += sq[(x>>16)&0xff];
223 s += sq[(x>>24)&0xff];
224 s += sq[(x>>32)&0xff];
225 s += sq[(x>>40)&0xff];
226 s += sq[(x>>48)&0xff];
227 s += sq[(x>>56)&0xff];
229 register uint32_t x=*(uint32_t*)pix;
231 s += sq[(x>>8)&0xff];
232 s += sq[(x>>16)&0xff];
233 s += sq[(x>>24)&0xff];
234 x=*(uint32_t*)(pix+4);
236 s += sq[(x>>8)&0xff];
237 s += sq[(x>>16)&0xff];
238 s += sq[(x>>24)&0xff];
243 pix += line_size - 16;
248 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
251 for(i=0; i+8<=w; i+=8){
252 dst[i+0]= av_bswap32(src[i+0]);
253 dst[i+1]= av_bswap32(src[i+1]);
254 dst[i+2]= av_bswap32(src[i+2]);
255 dst[i+3]= av_bswap32(src[i+3]);
256 dst[i+4]= av_bswap32(src[i+4]);
257 dst[i+5]= av_bswap32(src[i+5]);
258 dst[i+6]= av_bswap32(src[i+6]);
259 dst[i+7]= av_bswap32(src[i+7]);
262 dst[i+0]= av_bswap32(src[i+0]);
266 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
269 *dst++ = av_bswap16(*src++);
272 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
275 uint32_t *sq = ff_squareTbl + 256;
278 for (i = 0; i < h; i++) {
279 s += sq[pix1[0] - pix2[0]];
280 s += sq[pix1[1] - pix2[1]];
281 s += sq[pix1[2] - pix2[2]];
282 s += sq[pix1[3] - pix2[3]];
289 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
292 uint32_t *sq = ff_squareTbl + 256;
295 for (i = 0; i < h; i++) {
296 s += sq[pix1[0] - pix2[0]];
297 s += sq[pix1[1] - pix2[1]];
298 s += sq[pix1[2] - pix2[2]];
299 s += sq[pix1[3] - pix2[3]];
300 s += sq[pix1[4] - pix2[4]];
301 s += sq[pix1[5] - pix2[5]];
302 s += sq[pix1[6] - pix2[6]];
303 s += sq[pix1[7] - pix2[7]];
310 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
313 uint32_t *sq = ff_squareTbl + 256;
316 for (i = 0; i < h; i++) {
317 s += sq[pix1[ 0] - pix2[ 0]];
318 s += sq[pix1[ 1] - pix2[ 1]];
319 s += sq[pix1[ 2] - pix2[ 2]];
320 s += sq[pix1[ 3] - pix2[ 3]];
321 s += sq[pix1[ 4] - pix2[ 4]];
322 s += sq[pix1[ 5] - pix2[ 5]];
323 s += sq[pix1[ 6] - pix2[ 6]];
324 s += sq[pix1[ 7] - pix2[ 7]];
325 s += sq[pix1[ 8] - pix2[ 8]];
326 s += sq[pix1[ 9] - pix2[ 9]];
327 s += sq[pix1[10] - pix2[10]];
328 s += sq[pix1[11] - pix2[11]];
329 s += sq[pix1[12] - pix2[12]];
330 s += sq[pix1[13] - pix2[13]];
331 s += sq[pix1[14] - pix2[14]];
332 s += sq[pix1[15] - pix2[15]];
340 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
341 const uint8_t *s2, int stride){
344 /* read the pixels */
346 block[0] = s1[0] - s2[0];
347 block[1] = s1[1] - s2[1];
348 block[2] = s1[2] - s2[2];
349 block[3] = s1[3] - s2[3];
350 block[4] = s1[4] - s2[4];
351 block[5] = s1[5] - s2[5];
352 block[6] = s1[6] - s2[6];
353 block[7] = s1[7] - s2[7];
361 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
366 /* read the pixels */
368 pixels[0] = av_clip_uint8(block[0]);
369 pixels[1] = av_clip_uint8(block[1]);
370 pixels[2] = av_clip_uint8(block[2]);
371 pixels[3] = av_clip_uint8(block[3]);
372 pixels[4] = av_clip_uint8(block[4]);
373 pixels[5] = av_clip_uint8(block[5]);
374 pixels[6] = av_clip_uint8(block[6]);
375 pixels[7] = av_clip_uint8(block[7]);
382 static void put_signed_pixels_clamped_c(const DCTELEM *block,
383 uint8_t *restrict pixels,
388 for (i = 0; i < 8; i++) {
389 for (j = 0; j < 8; j++) {
392 else if (*block > 127)
395 *pixels = (uint8_t)(*block + 128);
399 pixels += (line_size - 8);
403 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
408 /* read the pixels */
410 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
411 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
412 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
413 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
414 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
415 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
416 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
417 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
423 static int sum_abs_dctelem_c(DCTELEM *block)
427 sum+= FFABS(block[i]);
431 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
435 for (i = 0; i < h; i++) {
436 memset(block, value, 16);
441 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
445 for (i = 0; i < h; i++) {
446 memset(block, value, 8);
451 #define avg2(a,b) ((a+b+1)>>1)
452 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
454 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
456 const int A=(16-x16)*(16-y16);
457 const int B=( x16)*(16-y16);
458 const int C=(16-x16)*( y16);
459 const int D=( x16)*( y16);
464 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
465 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
466 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
467 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
468 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
469 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
470 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
471 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
477 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
478 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
481 const int s= 1<<shift;
491 for(x=0; x<8; x++){ //XXX FIXME optimize
492 int src_x, src_y, frac_x, frac_y, index;
501 if((unsigned)src_x < width){
502 if((unsigned)src_y < height){
503 index= src_x + src_y*stride;
504 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
505 + src[index +1]* frac_x )*(s-frac_y)
506 + ( src[index+stride ]*(s-frac_x)
507 + src[index+stride+1]* frac_x )* frac_y
510 index= src_x + av_clip(src_y, 0, height)*stride;
511 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
512 + src[index +1]* frac_x )*s
516 if((unsigned)src_y < height){
517 index= av_clip(src_x, 0, width) + src_y*stride;
518 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
519 + src[index+stride ]* frac_y )*s
522 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
523 dst[y*stride + x]= src[index ];
535 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
537 case 2: put_pixels2_8_c (dst, src, stride, height); break;
538 case 4: put_pixels4_8_c (dst, src, stride, height); break;
539 case 8: put_pixels8_8_c (dst, src, stride, height); break;
540 case 16:put_pixels16_8_c(dst, src, stride, height); break;
544 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
546 for (i=0; i < height; i++) {
547 for (j=0; j < width; j++) {
548 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
555 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
557 for (i=0; i < height; i++) {
558 for (j=0; j < width; j++) {
559 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
566 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
568 for (i=0; i < height; i++) {
569 for (j=0; j < width; j++) {
570 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
577 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
579 for (i=0; i < height; i++) {
580 for (j=0; j < width; j++) {
581 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
588 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
590 for (i=0; i < height; i++) {
591 for (j=0; j < width; j++) {
592 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
599 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
601 for (i=0; i < height; i++) {
602 for (j=0; j < width; j++) {
603 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
610 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
612 for (i=0; i < height; i++) {
613 for (j=0; j < width; j++) {
614 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
621 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
623 for (i=0; i < height; i++) {
624 for (j=0; j < width; j++) {
625 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
632 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
634 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
635 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
636 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
637 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
641 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
643 for (i=0; i < height; i++) {
644 for (j=0; j < width; j++) {
645 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
652 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
654 for (i=0; i < height; i++) {
655 for (j=0; j < width; j++) {
656 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
663 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
665 for (i=0; i < height; i++) {
666 for (j=0; j < width; j++) {
667 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
674 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
676 for (i=0; i < height; i++) {
677 for (j=0; j < width; j++) {
678 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
685 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
687 for (i=0; i < height; i++) {
688 for (j=0; j < width; j++) {
689 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
696 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
698 for (i=0; i < height; i++) {
699 for (j=0; j < width; j++) {
700 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
707 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
709 for (i=0; i < height; i++) {
710 for (j=0; j < width; j++) {
711 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
718 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
720 for (i=0; i < height; i++) {
721 for (j=0; j < width; j++) {
722 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
729 #define QPEL_MC(r, OPNAME, RND, OP) \
730 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
731 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
735 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
736 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
737 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
738 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
739 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
740 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
741 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
742 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
748 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
750 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
754 const int src0= src[0*srcStride];\
755 const int src1= src[1*srcStride];\
756 const int src2= src[2*srcStride];\
757 const int src3= src[3*srcStride];\
758 const int src4= src[4*srcStride];\
759 const int src5= src[5*srcStride];\
760 const int src6= src[6*srcStride];\
761 const int src7= src[7*srcStride];\
762 const int src8= src[8*srcStride];\
763 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
764 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
765 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
766 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
767 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
768 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
769 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
770 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
776 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
777 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
782 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
783 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
784 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
785 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
786 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
787 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
788 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
789 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
790 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
791 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
792 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
793 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
794 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
795 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
796 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
797 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
803 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
804 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
809 const int src0= src[0*srcStride];\
810 const int src1= src[1*srcStride];\
811 const int src2= src[2*srcStride];\
812 const int src3= src[3*srcStride];\
813 const int src4= src[4*srcStride];\
814 const int src5= src[5*srcStride];\
815 const int src6= src[6*srcStride];\
816 const int src7= src[7*srcStride];\
817 const int src8= src[8*srcStride];\
818 const int src9= src[9*srcStride];\
819 const int src10= src[10*srcStride];\
820 const int src11= src[11*srcStride];\
821 const int src12= src[12*srcStride];\
822 const int src13= src[13*srcStride];\
823 const int src14= src[14*srcStride];\
824 const int src15= src[15*srcStride];\
825 const int src16= src[16*srcStride];\
826 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
827 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
828 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
829 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
830 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
831 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
832 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
833 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
834 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
835 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
836 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
837 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
838 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
839 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
840 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
841 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
847 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
849 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
850 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
853 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
854 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
857 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
859 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
860 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
863 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
866 copy_block9(full, src, 16, stride, 9);\
867 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
868 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
871 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
873 copy_block9(full, src, 16, stride, 9);\
874 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
877 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
880 copy_block9(full, src, 16, stride, 9);\
881 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
882 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
884 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
889 copy_block9(full, src, 16, stride, 9);\
890 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
891 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
892 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
893 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
895 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
899 copy_block9(full, src, 16, stride, 9);\
900 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
901 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
902 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
903 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
905 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
910 copy_block9(full, src, 16, stride, 9);\
911 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
912 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
913 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
914 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
916 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
920 copy_block9(full, src, 16, stride, 9);\
921 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
922 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
923 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
924 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
926 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
931 copy_block9(full, src, 16, stride, 9);\
932 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
933 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
934 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
935 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
937 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
941 copy_block9(full, src, 16, stride, 9);\
942 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
943 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
944 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
945 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
947 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
952 copy_block9(full, src, 16, stride, 9);\
953 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
954 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
955 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
956 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
958 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
962 copy_block9(full, src, 16, stride, 9);\
963 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
964 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
965 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
966 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
968 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
971 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
972 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
973 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
975 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
978 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
979 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
980 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
982 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
987 copy_block9(full, src, 16, stride, 9);\
988 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
989 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
990 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
991 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
993 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
996 copy_block9(full, src, 16, stride, 9);\
997 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
998 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
999 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1001 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1002 uint8_t full[16*9];\
1005 uint8_t halfHV[64];\
1006 copy_block9(full, src, 16, stride, 9);\
1007 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1008 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1009 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1010 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1012 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1013 uint8_t full[16*9];\
1015 copy_block9(full, src, 16, stride, 9);\
1016 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1017 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1018 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1020 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1022 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1023 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1026 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1028 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1029 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1032 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1033 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1036 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1038 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1039 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1042 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1043 uint8_t full[24*17];\
1045 copy_block17(full, src, 24, stride, 17);\
1046 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1047 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1050 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1051 uint8_t full[24*17];\
1052 copy_block17(full, src, 24, stride, 17);\
1053 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1056 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1057 uint8_t full[24*17];\
1059 copy_block17(full, src, 24, stride, 17);\
1060 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1061 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1063 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1064 uint8_t full[24*17];\
1065 uint8_t halfH[272];\
1066 uint8_t halfV[256];\
1067 uint8_t halfHV[256];\
1068 copy_block17(full, src, 24, stride, 17);\
1069 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1070 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1071 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1072 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1074 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1075 uint8_t full[24*17];\
1076 uint8_t halfH[272];\
1077 uint8_t halfHV[256];\
1078 copy_block17(full, src, 24, stride, 17);\
1079 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1080 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1081 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1082 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1084 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1085 uint8_t full[24*17];\
1086 uint8_t halfH[272];\
1087 uint8_t halfV[256];\
1088 uint8_t halfHV[256];\
1089 copy_block17(full, src, 24, stride, 17);\
1090 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1091 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1092 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1093 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1095 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1096 uint8_t full[24*17];\
1097 uint8_t halfH[272];\
1098 uint8_t halfHV[256];\
1099 copy_block17(full, src, 24, stride, 17);\
1100 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1101 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1102 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1103 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1105 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1106 uint8_t full[24*17];\
1107 uint8_t halfH[272];\
1108 uint8_t halfV[256];\
1109 uint8_t halfHV[256];\
1110 copy_block17(full, src, 24, stride, 17);\
1111 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1112 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1113 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1114 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1116 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1117 uint8_t full[24*17];\
1118 uint8_t halfH[272];\
1119 uint8_t halfHV[256];\
1120 copy_block17(full, src, 24, stride, 17);\
1121 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1122 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1123 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1124 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1126 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1127 uint8_t full[24*17];\
1128 uint8_t halfH[272];\
1129 uint8_t halfV[256];\
1130 uint8_t halfHV[256];\
1131 copy_block17(full, src, 24, stride, 17);\
1132 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1133 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1134 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1135 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1137 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1138 uint8_t full[24*17];\
1139 uint8_t halfH[272];\
1140 uint8_t halfHV[256];\
1141 copy_block17(full, src, 24, stride, 17);\
1142 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1143 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1144 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1145 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1147 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1148 uint8_t halfH[272];\
1149 uint8_t halfHV[256];\
1150 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1151 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1152 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1154 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1155 uint8_t halfH[272];\
1156 uint8_t halfHV[256];\
1157 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1158 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1159 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1161 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1162 uint8_t full[24*17];\
1163 uint8_t halfH[272];\
1164 uint8_t halfV[256];\
1165 uint8_t halfHV[256];\
1166 copy_block17(full, src, 24, stride, 17);\
1167 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1168 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1169 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1170 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1172 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1173 uint8_t full[24*17];\
1174 uint8_t halfH[272];\
1175 copy_block17(full, src, 24, stride, 17);\
1176 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1177 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1178 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1180 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1181 uint8_t full[24*17];\
1182 uint8_t halfH[272];\
1183 uint8_t halfV[256];\
1184 uint8_t halfHV[256];\
1185 copy_block17(full, src, 24, stride, 17);\
1186 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1187 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1188 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1189 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1191 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1192 uint8_t full[24*17];\
1193 uint8_t halfH[272];\
1194 copy_block17(full, src, 24, stride, 17);\
1195 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1196 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1197 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1199 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1200 uint8_t halfH[272];\
1201 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1202 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1205 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1206 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1207 #define op_put(a, b) a = cm[((b) + 16)>>5]
1208 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1210 QPEL_MC(0, put_ , _ , op_put)
1211 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1212 QPEL_MC(0, avg_ , _ , op_avg)
1213 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
1215 #undef op_avg_no_rnd
1217 #undef op_put_no_rnd
1219 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1220 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1221 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1222 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1223 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1224 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1226 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1227 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1231 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1232 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1233 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1234 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1235 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1236 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1237 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1238 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1244 #if CONFIG_RV40_DECODER
1245 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1246 put_pixels16_xy2_8_c(dst, src, stride, 16);
1248 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1249 avg_pixels16_xy2_8_c(dst, src, stride, 16);
1251 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1252 put_pixels8_xy2_8_c(dst, src, stride, 8);
1254 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1255 avg_pixels8_xy2_8_c(dst, src, stride, 8);
1257 #endif /* CONFIG_RV40_DECODER */
1259 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1260 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1264 const int src_1= src[ -srcStride];
1265 const int src0 = src[0 ];
1266 const int src1 = src[ srcStride];
1267 const int src2 = src[2*srcStride];
1268 const int src3 = src[3*srcStride];
1269 const int src4 = src[4*srcStride];
1270 const int src5 = src[5*srcStride];
1271 const int src6 = src[6*srcStride];
1272 const int src7 = src[7*srcStride];
1273 const int src8 = src[8*srcStride];
1274 const int src9 = src[9*srcStride];
1275 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1276 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1277 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1278 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1279 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1280 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1281 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1282 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1288 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1290 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1291 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1294 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1295 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1298 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1300 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1301 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1304 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1305 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1308 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1312 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1313 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1314 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1315 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1317 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1321 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1322 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1323 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1324 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1326 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1328 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1329 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1332 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
1333 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1335 const int strength= ff_h263_loop_filter_strength[qscale];
1339 int p0= src[x-2*stride];
1340 int p1= src[x-1*stride];
1341 int p2= src[x+0*stride];
1342 int p3= src[x+1*stride];
1343 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1345 if (d<-2*strength) d1= 0;
1346 else if(d<- strength) d1=-2*strength - d;
1347 else if(d< strength) d1= d;
1348 else if(d< 2*strength) d1= 2*strength - d;
1353 if(p1&256) p1= ~(p1>>31);
1354 if(p2&256) p2= ~(p2>>31);
1356 src[x-1*stride] = p1;
1357 src[x+0*stride] = p2;
1361 d2= av_clip((p0-p3)/4, -ad1, ad1);
1363 src[x-2*stride] = p0 - d2;
1364 src[x+ stride] = p3 + d2;
1369 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
1370 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1372 const int strength= ff_h263_loop_filter_strength[qscale];
1376 int p0= src[y*stride-2];
1377 int p1= src[y*stride-1];
1378 int p2= src[y*stride+0];
1379 int p3= src[y*stride+1];
1380 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1382 if (d<-2*strength) d1= 0;
1383 else if(d<- strength) d1=-2*strength - d;
1384 else if(d< strength) d1= d;
1385 else if(d< 2*strength) d1= 2*strength - d;
1390 if(p1&256) p1= ~(p1>>31);
1391 if(p2&256) p2= ~(p2>>31);
1393 src[y*stride-1] = p1;
1394 src[y*stride+0] = p2;
1398 d2= av_clip((p0-p3)/4, -ad1, ad1);
1400 src[y*stride-2] = p0 - d2;
1401 src[y*stride+1] = p3 + d2;
1406 static void h261_loop_filter_c(uint8_t *src, int stride){
1411 temp[x ] = 4*src[x ];
1412 temp[x + 7*8] = 4*src[x + 7*stride];
1416 xy = y * stride + x;
1418 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
1423 src[ y*stride] = (temp[ y*8] + 2)>>2;
1424 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
1426 xy = y * stride + x;
1428 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
1433 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1439 s += abs(pix1[0] - pix2[0]);
1440 s += abs(pix1[1] - pix2[1]);
1441 s += abs(pix1[2] - pix2[2]);
1442 s += abs(pix1[3] - pix2[3]);
1443 s += abs(pix1[4] - pix2[4]);
1444 s += abs(pix1[5] - pix2[5]);
1445 s += abs(pix1[6] - pix2[6]);
1446 s += abs(pix1[7] - pix2[7]);
1447 s += abs(pix1[8] - pix2[8]);
1448 s += abs(pix1[9] - pix2[9]);
1449 s += abs(pix1[10] - pix2[10]);
1450 s += abs(pix1[11] - pix2[11]);
1451 s += abs(pix1[12] - pix2[12]);
1452 s += abs(pix1[13] - pix2[13]);
1453 s += abs(pix1[14] - pix2[14]);
1454 s += abs(pix1[15] - pix2[15]);
1461 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1467 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1468 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1469 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1470 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1471 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1472 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1473 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1474 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1475 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1476 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1477 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1478 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1479 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1480 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1481 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1482 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1489 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1492 uint8_t *pix3 = pix2 + line_size;
1496 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1497 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1498 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1499 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1500 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1501 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1502 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1503 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1504 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1505 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1506 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1507 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1508 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1509 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1510 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1511 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1519 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1522 uint8_t *pix3 = pix2 + line_size;
1526 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1527 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1528 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1529 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1530 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1531 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1532 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1533 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1534 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1535 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1536 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1537 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1538 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1539 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1540 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1541 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1549 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1555 s += abs(pix1[0] - pix2[0]);
1556 s += abs(pix1[1] - pix2[1]);
1557 s += abs(pix1[2] - pix2[2]);
1558 s += abs(pix1[3] - pix2[3]);
1559 s += abs(pix1[4] - pix2[4]);
1560 s += abs(pix1[5] - pix2[5]);
1561 s += abs(pix1[6] - pix2[6]);
1562 s += abs(pix1[7] - pix2[7]);
1569 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1575 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1576 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1577 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1578 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1579 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1580 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1581 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1582 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1589 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1592 uint8_t *pix3 = pix2 + line_size;
1596 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1597 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1598 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1599 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1600 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1601 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1602 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1603 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1611 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1614 uint8_t *pix3 = pix2 + line_size;
1618 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1619 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1620 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1621 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1622 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1623 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1624 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1625 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1633 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1634 MpegEncContext *c = v;
1640 for(x=0; x<16; x++){
1641 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1644 for(x=0; x<15; x++){
1645 score2+= FFABS( s1[x ] - s1[x +stride]
1646 - s1[x+1] + s1[x+1+stride])
1647 -FFABS( s2[x ] - s2[x +stride]
1648 - s2[x+1] + s2[x+1+stride]);
1655 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1656 else return score1 + FFABS(score2)*8;
1659 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1660 MpegEncContext *c = v;
1667 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1671 score2+= FFABS( s1[x ] - s1[x +stride]
1672 - s1[x+1] + s1[x+1+stride])
1673 -FFABS( s2[x ] - s2[x +stride]
1674 - s2[x+1] + s2[x+1+stride]);
1681 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1682 else return score1 + FFABS(score2)*8;
1685 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1689 for(i=0; i<8*8; i++){
1690 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1693 assert(-512<b && b<512);
1695 sum += (w*b)*(w*b)>>4;
1700 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1703 for(i=0; i<8*8; i++){
1704 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
1709 * Permute an 8x8 block.
1710 * @param block the block which will be permuted according to the given permutation vector
1711 * @param permutation the permutation vector
1712 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
1713 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
1714 * (inverse) permutated to scantable order!
1716 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
1722 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
1724 for(i=0; i<=last; i++){
1725 const int j= scantable[i];
1730 for(i=0; i<=last; i++){
1731 const int j= scantable[i];
1732 const int perm_j= permutation[j];
1733 block[perm_j]= temp[j];
1737 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1741 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
1744 memset(cmp, 0, sizeof(void*)*6);
1752 cmp[i]= c->hadamard8_diff[i];
1758 cmp[i]= c->dct_sad[i];
1761 cmp[i]= c->dct264_sad[i];
1764 cmp[i]= c->dct_max[i];
1767 cmp[i]= c->quant_psnr[i];
1796 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1801 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1803 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1804 long a = *(long*)(src+i);
1805 long b = *(long*)(dst+i);
1806 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
1809 dst[i+0] += src[i+0];
1812 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
1814 #if !HAVE_FAST_UNALIGNED
1815 if((long)src2 & (sizeof(long)-1)){
1816 for(i=0; i+7<w; i+=8){
1817 dst[i+0] = src1[i+0]-src2[i+0];
1818 dst[i+1] = src1[i+1]-src2[i+1];
1819 dst[i+2] = src1[i+2]-src2[i+2];
1820 dst[i+3] = src1[i+3]-src2[i+3];
1821 dst[i+4] = src1[i+4]-src2[i+4];
1822 dst[i+5] = src1[i+5]-src2[i+5];
1823 dst[i+6] = src1[i+6]-src2[i+6];
1824 dst[i+7] = src1[i+7]-src2[i+7];
1828 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1829 long a = *(long*)(src1+i);
1830 long b = *(long*)(src2+i);
1831 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1834 dst[i+0] = src1[i+0]-src2[i+0];
1837 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
1845 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1854 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
1862 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1872 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
1875 for(i=0; i<w-1; i++){
1902 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
1932 #define BUTTERFLY2(o1,o2,i1,i2) \
1936 #define BUTTERFLY1(x,y) \
1945 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
1947 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1955 //FIXME try pointer walks
1956 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
1957 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
1958 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
1959 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
1961 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1962 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1963 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1964 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1966 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1967 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1968 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1969 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1973 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1974 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1975 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1976 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1978 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1979 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1980 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1981 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1984 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1985 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1986 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1987 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1992 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
2000 //FIXME try pointer walks
2001 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2002 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2003 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2004 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
2006 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2007 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2008 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2009 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2011 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2012 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2013 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2014 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2018 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2019 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2020 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2021 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2023 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2024 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2025 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2026 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2029 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2030 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2031 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2032 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2035 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
2040 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2041 MpegEncContext * const s= (MpegEncContext *)c;
2042 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2046 s->dsp.diff_pixels(temp, src1, src2, stride);
2048 return s->dsp.sum_abs_dctelem(temp);
2053 const int s07 = SRC(0) + SRC(7);\
2054 const int s16 = SRC(1) + SRC(6);\
2055 const int s25 = SRC(2) + SRC(5);\
2056 const int s34 = SRC(3) + SRC(4);\
2057 const int a0 = s07 + s34;\
2058 const int a1 = s16 + s25;\
2059 const int a2 = s07 - s34;\
2060 const int a3 = s16 - s25;\
2061 const int d07 = SRC(0) - SRC(7);\
2062 const int d16 = SRC(1) - SRC(6);\
2063 const int d25 = SRC(2) - SRC(5);\
2064 const int d34 = SRC(3) - SRC(4);\
2065 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2066 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2067 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2068 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2070 DST(1, a4 + (a7>>2)) ;\
2071 DST(2, a2 + (a3>>1)) ;\
2072 DST(3, a5 + (a6>>2)) ;\
2074 DST(5, a6 - (a5>>2)) ;\
2075 DST(6, (a2>>1) - a3 ) ;\
2076 DST(7, (a4>>2) - a7 ) ;\
2079 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2080 MpegEncContext * const s= (MpegEncContext *)c;
2085 s->dsp.diff_pixels(dct[0], src1, src2, stride);
2087 #define SRC(x) dct[i][x]
2088 #define DST(x,v) dct[i][x]= v
2089 for( i = 0; i < 8; i++ )
2094 #define SRC(x) dct[x][i]
2095 #define DST(x,v) sum += FFABS(v)
2096 for( i = 0; i < 8; i++ )
2104 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2105 MpegEncContext * const s= (MpegEncContext *)c;
2106 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2111 s->dsp.diff_pixels(temp, src1, src2, stride);
2115 sum= FFMAX(sum, FFABS(temp[i]));
2120 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2121 MpegEncContext * const s= (MpegEncContext *)c;
2122 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
2123 DCTELEM * const bak = temp+64;
2129 s->dsp.diff_pixels(temp, src1, src2, stride);
2131 memcpy(bak, temp, 64*sizeof(DCTELEM));
2133 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2134 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2135 ff_simple_idct_8(temp); //FIXME
2138 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2143 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2144 MpegEncContext * const s= (MpegEncContext *)c;
2145 const uint8_t *scantable= s->intra_scantable.permutated;
2146 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2147 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2148 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
2149 int i, last, run, bits, level, distortion, start_i;
2150 const int esc_length= s->ac_esc_length;
2152 uint8_t * last_length;
2156 copy_block8(lsrc1, src1, 8, stride, 8);
2157 copy_block8(lsrc2, src2, 8, stride, 8);
2159 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
2161 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2167 length = s->intra_ac_vlc_length;
2168 last_length= s->intra_ac_vlc_last_length;
2169 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2172 length = s->inter_ac_vlc_length;
2173 last_length= s->inter_ac_vlc_last_length;
2178 for(i=start_i; i<last; i++){
2179 int j= scantable[i];
2184 if((level&(~127)) == 0){
2185 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2194 level= temp[i] + 64;
2198 if((level&(~127)) == 0){
2199 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2207 s->dct_unquantize_intra(s, temp, 0, s->qscale);
2209 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2212 s->dsp.idct_add(lsrc2, 8, temp);
2214 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
2216 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
2219 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2220 MpegEncContext * const s= (MpegEncContext *)c;
2221 const uint8_t *scantable= s->intra_scantable.permutated;
2222 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2223 int i, last, run, bits, level, start_i;
2224 const int esc_length= s->ac_esc_length;
2226 uint8_t * last_length;
2230 s->dsp.diff_pixels(temp, src1, src2, stride);
2232 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2238 length = s->intra_ac_vlc_length;
2239 last_length= s->intra_ac_vlc_last_length;
2240 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2243 length = s->inter_ac_vlc_length;
2244 last_length= s->inter_ac_vlc_last_length;
2249 for(i=start_i; i<last; i++){
2250 int j= scantable[i];
2255 if((level&(~127)) == 0){
2256 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2265 level= temp[i] + 64;
2269 if((level&(~127)) == 0){
2270 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2278 #define VSAD_INTRA(size) \
2279 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2283 for(y=1; y<h; y++){ \
2284 for(x=0; x<size; x+=4){ \
2285 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2286 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2296 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2301 for(x=0; x<16; x++){
2302 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2311 #define SQ(a) ((a)*(a))
2312 #define VSSE_INTRA(size) \
2313 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2317 for(y=1; y<h; y++){ \
2318 for(x=0; x<size; x+=4){ \
2319 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2320 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2330 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2335 for(x=0; x<16; x++){
2336 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2345 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2349 for(i=0; i<size; i++)
2350 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2354 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2355 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2356 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2358 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
2360 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2361 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2362 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2363 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
2365 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
2368 for(i=0; i<len; i++)
2369 dst[i] = src0[i] * src1[-i];
2372 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
2374 for(i=0; i<len; i++)
2375 dst[i] = src0[i] * src1[i] + src2[i];
2378 static void vector_fmul_window_c(float *dst, const float *src0,
2379 const float *src1, const float *win, int len)
2385 for(i=-len, j=len-1; i<0; i++, j--) {
2390 dst[i] = s0*wj - s1*wi;
2391 dst[j] = s0*wi + s1*wj;
2395 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
2399 for (i = 0; i < len; i++)
2400 dst[i] = src[i] * mul;
2403 static void butterflies_float_c(float *restrict v1, float *restrict v2,
2407 for (i = 0; i < len; i++) {
2408 float t = v1[i] - v2[i];
2414 static void butterflies_float_interleave_c(float *dst, const float *src0,
2415 const float *src1, int len)
2418 for (i = 0; i < len; i++) {
2421 dst[2*i ] = f1 + f2;
2422 dst[2*i + 1] = f1 - f2;
2426 float ff_scalarproduct_float_c(const float *v1, const float *v2, int len)
2431 for (i = 0; i < len; i++)
2437 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2438 uint32_t maxi, uint32_t maxisign)
2441 if(a > mini) return mini;
2442 else if((a^(1U<<31)) > maxisign) return maxi;
2446 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
2448 uint32_t mini = *(uint32_t*)min;
2449 uint32_t maxi = *(uint32_t*)max;
2450 uint32_t maxisign = maxi ^ (1U<<31);
2451 uint32_t *dsti = (uint32_t*)dst;
2452 const uint32_t *srci = (const uint32_t*)src;
2453 for(i=0; i<len; i+=8) {
2454 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2455 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2456 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2457 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2458 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2459 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2460 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2461 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2464 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
2466 if(min < 0 && max > 0) {
2467 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2469 for(i=0; i < len; i+=8) {
2470 dst[i ] = av_clipf(src[i ], min, max);
2471 dst[i + 1] = av_clipf(src[i + 1], min, max);
2472 dst[i + 2] = av_clipf(src[i + 2], min, max);
2473 dst[i + 3] = av_clipf(src[i + 3], min, max);
2474 dst[i + 4] = av_clipf(src[i + 4], min, max);
2475 dst[i + 5] = av_clipf(src[i + 5], min, max);
2476 dst[i + 6] = av_clipf(src[i + 6], min, max);
2477 dst[i + 7] = av_clipf(src[i + 7], min, max);
2482 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
2487 res += *v1++ * *v2++;
2492 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
2497 *v1++ += mul * *v3++;
2502 static void apply_window_int16_c(int16_t *output, const int16_t *input,
2503 const int16_t *window, unsigned int len)
2506 int len2 = len >> 1;
2508 for (i = 0; i < len2; i++) {
2509 int16_t w = window[i];
2510 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
2511 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2515 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2516 int32_t max, unsigned int len)
2519 *dst++ = av_clip(*src++, min, max);
2520 *dst++ = av_clip(*src++, min, max);
2521 *dst++ = av_clip(*src++, min, max);
2522 *dst++ = av_clip(*src++, min, max);
2523 *dst++ = av_clip(*src++, min, max);
2524 *dst++ = av_clip(*src++, min, max);
2525 *dst++ = av_clip(*src++, min, max);
2526 *dst++ = av_clip(*src++, min, max);
2532 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
2533 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
2534 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
2535 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
2536 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
2537 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
2538 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
2540 static void wmv2_idct_row(short * b)
2543 int a0,a1,a2,a3,a4,a5,a6,a7;
2545 a1 = W1*b[1]+W7*b[7];
2546 a7 = W7*b[1]-W1*b[7];
2547 a5 = W5*b[5]+W3*b[3];
2548 a3 = W3*b[5]-W5*b[3];
2549 a2 = W2*b[2]+W6*b[6];
2550 a6 = W6*b[2]-W2*b[6];
2551 a0 = W0*b[0]+W0*b[4];
2552 a4 = W0*b[0]-W0*b[4];
2554 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
2555 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2557 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
2558 b[1] = (a4+a6 +s1 + (1<<7))>>8;
2559 b[2] = (a4-a6 +s2 + (1<<7))>>8;
2560 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
2561 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
2562 b[5] = (a4-a6 -s2 + (1<<7))>>8;
2563 b[6] = (a4+a6 -s1 + (1<<7))>>8;
2564 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
2566 static void wmv2_idct_col(short * b)
2569 int a0,a1,a2,a3,a4,a5,a6,a7;
2570 /*step 1, with extended precision*/
2571 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
2572 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
2573 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
2574 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
2575 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
2576 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
2577 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
2578 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
2580 s1 = (181*(a1-a5+a7-a3)+128)>>8;
2581 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2583 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
2584 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
2585 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
2586 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
2588 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
2589 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
2590 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
2591 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
2593 void ff_wmv2_idct_c(short * block){
2597 wmv2_idct_row(block+i);
2600 wmv2_idct_col(block+i);
2603 /* XXX: those functions should be suppressed ASAP when all IDCTs are
2605 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
2607 ff_wmv2_idct_c(block);
2608 put_pixels_clamped_c(block, dest, line_size);
2610 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
2612 ff_wmv2_idct_c(block);
2613 add_pixels_clamped_c(block, dest, line_size);
2615 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
2617 ff_j_rev_dct (block);
2618 put_pixels_clamped_c(block, dest, line_size);
2620 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
2622 ff_j_rev_dct (block);
2623 add_pixels_clamped_c(block, dest, line_size);
2626 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
2628 /* init static data */
2629 av_cold void ff_dsputil_static_init(void)
2633 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
2634 for(i=0;i<MAX_NEG_CROP;i++) {
2636 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
2639 for(i=0;i<512;i++) {
2640 ff_squareTbl[i] = (i - 256) * (i - 256);
2643 for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
2646 int ff_check_alignment(void){
2647 static int did_fail=0;
2648 LOCAL_ALIGNED_16(int, aligned, [4]);
2650 if((intptr_t)aligned & 15){
2652 #if HAVE_MMX || HAVE_ALTIVEC
2653 av_log(NULL, AV_LOG_ERROR,
2654 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2655 "and may be very slow or crash. This is not a bug in libavcodec,\n"
2656 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2657 "Do not report crashes to Libav developers.\n");
2666 av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
2670 ff_check_alignment();
2673 if (avctx->bits_per_raw_sample == 10) {
2674 c->fdct = ff_jpeg_fdct_islow_10;
2675 c->fdct248 = ff_fdct248_islow_10;
2677 if(avctx->dct_algo==FF_DCT_FASTINT) {
2678 c->fdct = ff_fdct_ifast;
2679 c->fdct248 = ff_fdct_ifast248;
2681 else if(avctx->dct_algo==FF_DCT_FAAN) {
2682 c->fdct = ff_faandct;
2683 c->fdct248 = ff_faandct248;
2686 c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
2687 c->fdct248 = ff_fdct248_islow_8;
2690 #endif //CONFIG_ENCODERS
2692 if (avctx->bits_per_raw_sample == 10) {
2693 c->idct_put = ff_simple_idct_put_10;
2694 c->idct_add = ff_simple_idct_add_10;
2695 c->idct = ff_simple_idct_10;
2696 c->idct_permutation_type = FF_NO_IDCT_PERM;
2698 if(avctx->idct_algo==FF_IDCT_INT){
2699 c->idct_put= ff_jref_idct_put;
2700 c->idct_add= ff_jref_idct_add;
2701 c->idct = ff_j_rev_dct;
2702 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
2703 }else if(avctx->idct_algo==FF_IDCT_WMV2){
2704 c->idct_put= ff_wmv2_idct_put_c;
2705 c->idct_add= ff_wmv2_idct_add_c;
2706 c->idct = ff_wmv2_idct_c;
2707 c->idct_permutation_type= FF_NO_IDCT_PERM;
2708 }else if(avctx->idct_algo==FF_IDCT_FAAN){
2709 c->idct_put= ff_faanidct_put;
2710 c->idct_add= ff_faanidct_add;
2711 c->idct = ff_faanidct;
2712 c->idct_permutation_type= FF_NO_IDCT_PERM;
2713 }else{ //accurate/default
2714 c->idct_put = ff_simple_idct_put_8;
2715 c->idct_add = ff_simple_idct_add_8;
2716 c->idct = ff_simple_idct_8;
2717 c->idct_permutation_type= FF_NO_IDCT_PERM;
2721 c->diff_pixels = diff_pixels_c;
2722 c->put_pixels_clamped = put_pixels_clamped_c;
2723 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
2724 c->add_pixels_clamped = add_pixels_clamped_c;
2725 c->sum_abs_dctelem = sum_abs_dctelem_c;
2728 c->pix_sum = pix_sum_c;
2729 c->pix_norm1 = pix_norm1_c;
2731 c->fill_block_tab[0] = fill_block16_c;
2732 c->fill_block_tab[1] = fill_block8_c;
2734 /* TODO [0] 16 [1] 8 */
2735 c->pix_abs[0][0] = pix_abs16_c;
2736 c->pix_abs[0][1] = pix_abs16_x2_c;
2737 c->pix_abs[0][2] = pix_abs16_y2_c;
2738 c->pix_abs[0][3] = pix_abs16_xy2_c;
2739 c->pix_abs[1][0] = pix_abs8_c;
2740 c->pix_abs[1][1] = pix_abs8_x2_c;
2741 c->pix_abs[1][2] = pix_abs8_y2_c;
2742 c->pix_abs[1][3] = pix_abs8_xy2_c;
2744 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2745 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2746 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
2747 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
2748 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
2749 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
2750 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
2751 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
2752 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2754 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
2755 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
2756 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
2757 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
2758 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
2759 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
2760 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
2761 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
2762 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
2764 #define dspfunc(PFX, IDX, NUM) \
2765 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2766 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2767 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2768 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2769 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2770 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2771 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2772 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2773 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2774 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2775 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2776 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2777 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2778 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2779 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2780 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2782 dspfunc(put_qpel, 0, 16);
2783 dspfunc(put_no_rnd_qpel, 0, 16);
2785 dspfunc(avg_qpel, 0, 16);
2786 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2788 dspfunc(put_qpel, 1, 8);
2789 dspfunc(put_no_rnd_qpel, 1, 8);
2791 dspfunc(avg_qpel, 1, 8);
2792 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2796 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
2797 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
2798 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
2799 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
2800 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
2801 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
2802 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
2803 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
2805 #define SET_CMP_FUNC(name) \
2806 c->name[0]= name ## 16_c;\
2807 c->name[1]= name ## 8x8_c;
2809 SET_CMP_FUNC(hadamard8_diff)
2810 c->hadamard8_diff[4]= hadamard8_intra16_c;
2811 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
2812 SET_CMP_FUNC(dct_sad)
2813 SET_CMP_FUNC(dct_max)
2815 SET_CMP_FUNC(dct264_sad)
2817 c->sad[0]= pix_abs16_c;
2818 c->sad[1]= pix_abs8_c;
2822 SET_CMP_FUNC(quant_psnr)
2825 c->vsad[0]= vsad16_c;
2826 c->vsad[4]= vsad_intra16_c;
2827 c->vsad[5]= vsad_intra8_c;
2828 c->vsse[0]= vsse16_c;
2829 c->vsse[4]= vsse_intra16_c;
2830 c->vsse[5]= vsse_intra8_c;
2831 c->nsse[0]= nsse16_c;
2832 c->nsse[1]= nsse8_c;
2834 ff_dsputil_init_dwt(c);
2837 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
2839 c->add_bytes= add_bytes_c;
2840 c->diff_bytes= diff_bytes_c;
2841 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
2842 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
2843 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
2844 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
2845 c->bswap_buf= bswap_buf;
2846 c->bswap16_buf = bswap16_buf;
2848 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
2849 c->h263_h_loop_filter= h263_h_loop_filter_c;
2850 c->h263_v_loop_filter= h263_v_loop_filter_c;
2853 c->h261_loop_filter= h261_loop_filter_c;
2855 c->try_8x8basis= try_8x8basis_c;
2856 c->add_8x8basis= add_8x8basis_c;
2858 #if CONFIG_VORBIS_DECODER
2859 c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling;
2861 c->vector_fmul_reverse = vector_fmul_reverse_c;
2862 c->vector_fmul_add = vector_fmul_add_c;
2863 c->vector_fmul_window = vector_fmul_window_c;
2864 c->vector_clipf = vector_clipf_c;
2865 c->scalarproduct_int16 = scalarproduct_int16_c;
2866 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
2867 c->apply_window_int16 = apply_window_int16_c;
2868 c->vector_clip_int32 = vector_clip_int32_c;
2869 c->scalarproduct_float = ff_scalarproduct_float_c;
2870 c->butterflies_float = butterflies_float_c;
2871 c->butterflies_float_interleave = butterflies_float_interleave_c;
2872 c->vector_fmul_scalar = vector_fmul_scalar_c;
2874 c->shrink[0]= av_image_copy_plane;
2875 c->shrink[1]= ff_shrink22;
2876 c->shrink[2]= ff_shrink44;
2877 c->shrink[3]= ff_shrink88;
2879 c->prefetch= just_return;
2881 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
2882 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
2886 #define FUNC(f, depth) f ## _ ## depth
2887 #define FUNCC(f, depth) f ## _ ## depth ## _c
2889 #define dspfunc1(PFX, IDX, NUM, depth)\
2890 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
2891 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
2892 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
2893 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
2895 #define dspfunc2(PFX, IDX, NUM, depth)\
2896 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
2897 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
2898 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
2899 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
2900 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
2901 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
2902 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
2903 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
2904 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
2905 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
2906 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
2907 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
2908 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
2909 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
2910 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
2911 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
2914 #define BIT_DEPTH_FUNCS(depth, dct)\
2915 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
2916 c->draw_edges = FUNCC(draw_edges , depth);\
2917 c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
2918 c->clear_block = FUNCC(clear_block ## dct , depth);\
2919 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
2920 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
2921 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
2922 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
2923 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
2925 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
2926 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
2927 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
2928 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
2929 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
2930 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
2932 dspfunc1(put , 0, 16, depth);\
2933 dspfunc1(put , 1, 8, depth);\
2934 dspfunc1(put , 2, 4, depth);\
2935 dspfunc1(put , 3, 2, depth);\
2936 dspfunc1(put_no_rnd, 0, 16, depth);\
2937 dspfunc1(put_no_rnd, 1, 8, depth);\
2938 dspfunc1(avg , 0, 16, depth);\
2939 dspfunc1(avg , 1, 8, depth);\
2940 dspfunc1(avg , 2, 4, depth);\
2941 dspfunc1(avg , 3, 2, depth);\
2942 dspfunc1(avg_no_rnd, 0, 16, depth);\
2943 dspfunc1(avg_no_rnd, 1, 8, depth);\
2945 dspfunc2(put_h264_qpel, 0, 16, depth);\
2946 dspfunc2(put_h264_qpel, 1, 8, depth);\
2947 dspfunc2(put_h264_qpel, 2, 4, depth);\
2948 dspfunc2(put_h264_qpel, 3, 2, depth);\
2949 dspfunc2(avg_h264_qpel, 0, 16, depth);\
2950 dspfunc2(avg_h264_qpel, 1, 8, depth);\
2951 dspfunc2(avg_h264_qpel, 2, 4, depth);
2953 switch (avctx->bits_per_raw_sample) {
2955 if (c->dct_bits == 32) {
2956 BIT_DEPTH_FUNCS(9, _32);
2958 BIT_DEPTH_FUNCS(9, _16);
2962 if (c->dct_bits == 32) {
2963 BIT_DEPTH_FUNCS(10, _32);
2965 BIT_DEPTH_FUNCS(10, _16);
2969 BIT_DEPTH_FUNCS(8, _16);
2974 if (HAVE_MMX) ff_dsputil_init_mmx (c, avctx);
2975 if (ARCH_ARM) ff_dsputil_init_arm (c, avctx);
2976 if (HAVE_VIS) ff_dsputil_init_vis (c, avctx);
2977 if (ARCH_ALPHA) ff_dsputil_init_alpha (c, avctx);
2978 if (ARCH_PPC) ff_dsputil_init_ppc (c, avctx);
2979 if (ARCH_SH4) ff_dsputil_init_sh4 (c, avctx);
2980 if (ARCH_BFIN) ff_dsputil_init_bfin (c, avctx);
2982 for (i = 0; i < 4; i++) {
2983 for (j = 0; j < 16; j++) {
2984 if(!c->put_2tap_qpel_pixels_tab[i][j])
2985 c->put_2tap_qpel_pixels_tab[i][j] =
2986 c->put_h264_qpel_pixels_tab[i][j];
2987 if(!c->avg_2tap_qpel_pixels_tab[i][j])
2988 c->avg_2tap_qpel_pixels_tab[i][j] =
2989 c->avg_h264_qpel_pixels_tab[i][j];
2993 ff_init_scantable_permutation(c->idct_permutation,
2994 c->idct_permutation_type);