3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 #include "libavutil/imgutils.h"
33 #include "simple_idct.h"
37 #include "mpegvideo.h"
43 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
44 uint32_t ff_squareTbl[512] = {0, };
47 #include "dsputil_template.c"
51 #include "dsputil_template.c"
55 #include "dsputil_template.c"
57 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
58 #define pb_7f (~0UL/255 * 0x7f)
59 #define pb_80 (~0UL/255 * 0x80)
61 const uint8_t ff_zigzag_direct[64] = {
62 0, 1, 8, 16, 9, 2, 3, 10,
63 17, 24, 32, 25, 18, 11, 4, 5,
64 12, 19, 26, 33, 40, 48, 41, 34,
65 27, 20, 13, 6, 7, 14, 21, 28,
66 35, 42, 49, 56, 57, 50, 43, 36,
67 29, 22, 15, 23, 30, 37, 44, 51,
68 58, 59, 52, 45, 38, 31, 39, 46,
69 53, 60, 61, 54, 47, 55, 62, 63
72 /* Specific zigzag scan for 248 idct. NOTE that unlike the
73 specification, we interleave the fields */
74 const uint8_t ff_zigzag248_direct[64] = {
75 0, 8, 1, 9, 16, 24, 2, 10,
76 17, 25, 32, 40, 48, 56, 33, 41,
77 18, 26, 3, 11, 4, 12, 19, 27,
78 34, 42, 49, 57, 50, 58, 35, 43,
79 20, 28, 5, 13, 6, 14, 21, 29,
80 36, 44, 51, 59, 52, 60, 37, 45,
81 22, 30, 7, 15, 23, 31, 38, 46,
82 53, 61, 54, 62, 39, 47, 55, 63,
85 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
86 DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64];
88 const uint8_t ff_alternate_horizontal_scan[64] = {
89 0, 1, 2, 3, 8, 9, 16, 17,
90 10, 11, 4, 5, 6, 7, 15, 14,
91 13, 12, 19, 18, 24, 25, 32, 33,
92 26, 27, 20, 21, 22, 23, 28, 29,
93 30, 31, 34, 35, 40, 41, 48, 49,
94 42, 43, 36, 37, 38, 39, 44, 45,
95 46, 47, 50, 51, 56, 57, 58, 59,
96 52, 53, 54, 55, 60, 61, 62, 63,
99 const uint8_t ff_alternate_vertical_scan[64] = {
100 0, 8, 16, 24, 1, 9, 2, 10,
101 17, 25, 32, 40, 48, 56, 57, 49,
102 41, 33, 26, 18, 3, 11, 4, 12,
103 19, 27, 34, 42, 50, 58, 35, 43,
104 51, 59, 20, 28, 5, 13, 6, 14,
105 21, 29, 36, 44, 52, 60, 37, 45,
106 53, 61, 22, 30, 7, 15, 23, 31,
107 38, 46, 54, 62, 39, 47, 55, 63,
110 /* Input permutation for the simple_idct_mmx */
111 static const uint8_t simple_mmx_permutation[64]={
112 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
113 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
114 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
115 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
116 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
117 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
118 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
119 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
122 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
124 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
128 st->scantable= src_scantable;
132 j = src_scantable[i];
133 st->permutated[i] = permutation[j];
139 j = st->permutated[i];
141 st->raster_end[i]= end;
145 void ff_init_scantable_permutation(uint8_t *idct_permutation,
146 int idct_permutation_type)
150 switch(idct_permutation_type){
151 case FF_NO_IDCT_PERM:
153 idct_permutation[i]= i;
155 case FF_LIBMPEG2_IDCT_PERM:
157 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
159 case FF_SIMPLE_IDCT_PERM:
161 idct_permutation[i]= simple_mmx_permutation[i];
163 case FF_TRANSPOSE_IDCT_PERM:
165 idct_permutation[i]= ((i&7)<<3) | (i>>3);
167 case FF_PARTTRANS_IDCT_PERM:
169 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
171 case FF_SSE2_IDCT_PERM:
173 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
176 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
180 static int pix_sum_c(uint8_t * pix, int line_size)
185 for (i = 0; i < 16; i++) {
186 for (j = 0; j < 16; j += 8) {
197 pix += line_size - 16;
202 static int pix_norm1_c(uint8_t * pix, int line_size)
205 uint32_t *sq = ff_squareTbl + 256;
208 for (i = 0; i < 16; i++) {
209 for (j = 0; j < 16; j += 8) {
221 register uint64_t x=*(uint64_t*)pix;
223 s += sq[(x>>8)&0xff];
224 s += sq[(x>>16)&0xff];
225 s += sq[(x>>24)&0xff];
226 s += sq[(x>>32)&0xff];
227 s += sq[(x>>40)&0xff];
228 s += sq[(x>>48)&0xff];
229 s += sq[(x>>56)&0xff];
231 register uint32_t x=*(uint32_t*)pix;
233 s += sq[(x>>8)&0xff];
234 s += sq[(x>>16)&0xff];
235 s += sq[(x>>24)&0xff];
236 x=*(uint32_t*)(pix+4);
238 s += sq[(x>>8)&0xff];
239 s += sq[(x>>16)&0xff];
240 s += sq[(x>>24)&0xff];
245 pix += line_size - 16;
250 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
253 for(i=0; i+8<=w; i+=8){
254 dst[i+0]= av_bswap32(src[i+0]);
255 dst[i+1]= av_bswap32(src[i+1]);
256 dst[i+2]= av_bswap32(src[i+2]);
257 dst[i+3]= av_bswap32(src[i+3]);
258 dst[i+4]= av_bswap32(src[i+4]);
259 dst[i+5]= av_bswap32(src[i+5]);
260 dst[i+6]= av_bswap32(src[i+6]);
261 dst[i+7]= av_bswap32(src[i+7]);
264 dst[i+0]= av_bswap32(src[i+0]);
268 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
271 *dst++ = av_bswap16(*src++);
274 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
277 uint32_t *sq = ff_squareTbl + 256;
280 for (i = 0; i < h; i++) {
281 s += sq[pix1[0] - pix2[0]];
282 s += sq[pix1[1] - pix2[1]];
283 s += sq[pix1[2] - pix2[2]];
284 s += sq[pix1[3] - pix2[3]];
291 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
294 uint32_t *sq = ff_squareTbl + 256;
297 for (i = 0; i < h; i++) {
298 s += sq[pix1[0] - pix2[0]];
299 s += sq[pix1[1] - pix2[1]];
300 s += sq[pix1[2] - pix2[2]];
301 s += sq[pix1[3] - pix2[3]];
302 s += sq[pix1[4] - pix2[4]];
303 s += sq[pix1[5] - pix2[5]];
304 s += sq[pix1[6] - pix2[6]];
305 s += sq[pix1[7] - pix2[7]];
312 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
315 uint32_t *sq = ff_squareTbl + 256;
318 for (i = 0; i < h; i++) {
319 s += sq[pix1[ 0] - pix2[ 0]];
320 s += sq[pix1[ 1] - pix2[ 1]];
321 s += sq[pix1[ 2] - pix2[ 2]];
322 s += sq[pix1[ 3] - pix2[ 3]];
323 s += sq[pix1[ 4] - pix2[ 4]];
324 s += sq[pix1[ 5] - pix2[ 5]];
325 s += sq[pix1[ 6] - pix2[ 6]];
326 s += sq[pix1[ 7] - pix2[ 7]];
327 s += sq[pix1[ 8] - pix2[ 8]];
328 s += sq[pix1[ 9] - pix2[ 9]];
329 s += sq[pix1[10] - pix2[10]];
330 s += sq[pix1[11] - pix2[11]];
331 s += sq[pix1[12] - pix2[12]];
332 s += sq[pix1[13] - pix2[13]];
333 s += sq[pix1[14] - pix2[14]];
334 s += sq[pix1[15] - pix2[15]];
342 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
343 const uint8_t *s2, int stride){
346 /* read the pixels */
348 block[0] = s1[0] - s2[0];
349 block[1] = s1[1] - s2[1];
350 block[2] = s1[2] - s2[2];
351 block[3] = s1[3] - s2[3];
352 block[4] = s1[4] - s2[4];
353 block[5] = s1[5] - s2[5];
354 block[6] = s1[6] - s2[6];
355 block[7] = s1[7] - s2[7];
363 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
368 /* read the pixels */
370 pixels[0] = av_clip_uint8(block[0]);
371 pixels[1] = av_clip_uint8(block[1]);
372 pixels[2] = av_clip_uint8(block[2]);
373 pixels[3] = av_clip_uint8(block[3]);
374 pixels[4] = av_clip_uint8(block[4]);
375 pixels[5] = av_clip_uint8(block[5]);
376 pixels[6] = av_clip_uint8(block[6]);
377 pixels[7] = av_clip_uint8(block[7]);
384 void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
385 uint8_t *restrict pixels,
390 for (i = 0; i < 8; i++) {
391 for (j = 0; j < 8; j++) {
394 else if (*block > 127)
397 *pixels = (uint8_t)(*block + 128);
401 pixels += (line_size - 8);
405 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
410 /* read the pixels */
412 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
413 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
414 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
415 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
416 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
417 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
418 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
419 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
425 static int sum_abs_dctelem_c(DCTELEM *block)
429 sum+= FFABS(block[i]);
433 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
437 for (i = 0; i < h; i++) {
438 memset(block, value, 16);
443 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
447 for (i = 0; i < h; i++) {
448 memset(block, value, 8);
453 #define avg2(a,b) ((a+b+1)>>1)
454 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
456 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
458 const int A=(16-x16)*(16-y16);
459 const int B=( x16)*(16-y16);
460 const int C=(16-x16)*( y16);
461 const int D=( x16)*( y16);
466 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
467 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
468 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
469 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
470 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
471 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
472 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
473 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
479 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
480 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
483 const int s= 1<<shift;
493 for(x=0; x<8; x++){ //XXX FIXME optimize
494 int src_x, src_y, frac_x, frac_y, index;
503 if((unsigned)src_x < width){
504 if((unsigned)src_y < height){
505 index= src_x + src_y*stride;
506 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
507 + src[index +1]* frac_x )*(s-frac_y)
508 + ( src[index+stride ]*(s-frac_x)
509 + src[index+stride+1]* frac_x )* frac_y
512 index= src_x + av_clip(src_y, 0, height)*stride;
513 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
514 + src[index +1]* frac_x )*s
518 if((unsigned)src_y < height){
519 index= av_clip(src_x, 0, width) + src_y*stride;
520 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
521 + src[index+stride ]* frac_y )*s
524 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
525 dst[y*stride + x]= src[index ];
537 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
539 case 2: put_pixels2_8_c (dst, src, stride, height); break;
540 case 4: put_pixels4_8_c (dst, src, stride, height); break;
541 case 8: put_pixels8_8_c (dst, src, stride, height); break;
542 case 16:put_pixels16_8_c(dst, src, stride, height); break;
546 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
548 for (i=0; i < height; i++) {
549 for (j=0; j < width; j++) {
550 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
557 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
559 for (i=0; i < height; i++) {
560 for (j=0; j < width; j++) {
561 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
568 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
570 for (i=0; i < height; i++) {
571 for (j=0; j < width; j++) {
572 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
579 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
581 for (i=0; i < height; i++) {
582 for (j=0; j < width; j++) {
583 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
590 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
592 for (i=0; i < height; i++) {
593 for (j=0; j < width; j++) {
594 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
601 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
603 for (i=0; i < height; i++) {
604 for (j=0; j < width; j++) {
605 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
612 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
614 for (i=0; i < height; i++) {
615 for (j=0; j < width; j++) {
616 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
623 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
625 for (i=0; i < height; i++) {
626 for (j=0; j < width; j++) {
627 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
634 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
636 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
637 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
638 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
639 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
643 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
645 for (i=0; i < height; i++) {
646 for (j=0; j < width; j++) {
647 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
654 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
656 for (i=0; i < height; i++) {
657 for (j=0; j < width; j++) {
658 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
665 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
667 for (i=0; i < height; i++) {
668 for (j=0; j < width; j++) {
669 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
676 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
678 for (i=0; i < height; i++) {
679 for (j=0; j < width; j++) {
680 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
687 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
689 for (i=0; i < height; i++) {
690 for (j=0; j < width; j++) {
691 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
698 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
700 for (i=0; i < height; i++) {
701 for (j=0; j < width; j++) {
702 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
709 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
711 for (i=0; i < height; i++) {
712 for (j=0; j < width; j++) {
713 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
720 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
722 for (i=0; i < height; i++) {
723 for (j=0; j < width; j++) {
724 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
731 #define QPEL_MC(r, OPNAME, RND, OP) \
732 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
733 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
737 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
738 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
739 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
740 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
741 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
742 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
743 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
744 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
750 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
752 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
756 const int src0= src[0*srcStride];\
757 const int src1= src[1*srcStride];\
758 const int src2= src[2*srcStride];\
759 const int src3= src[3*srcStride];\
760 const int src4= src[4*srcStride];\
761 const int src5= src[5*srcStride];\
762 const int src6= src[6*srcStride];\
763 const int src7= src[7*srcStride];\
764 const int src8= src[8*srcStride];\
765 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
766 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
767 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
768 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
769 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
770 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
771 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
772 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
778 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
779 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
784 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
785 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
786 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
787 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
788 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
789 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
790 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
791 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
792 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
793 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
794 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
795 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
796 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
797 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
798 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
799 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
805 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
806 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
811 const int src0= src[0*srcStride];\
812 const int src1= src[1*srcStride];\
813 const int src2= src[2*srcStride];\
814 const int src3= src[3*srcStride];\
815 const int src4= src[4*srcStride];\
816 const int src5= src[5*srcStride];\
817 const int src6= src[6*srcStride];\
818 const int src7= src[7*srcStride];\
819 const int src8= src[8*srcStride];\
820 const int src9= src[9*srcStride];\
821 const int src10= src[10*srcStride];\
822 const int src11= src[11*srcStride];\
823 const int src12= src[12*srcStride];\
824 const int src13= src[13*srcStride];\
825 const int src14= src[14*srcStride];\
826 const int src15= src[15*srcStride];\
827 const int src16= src[16*srcStride];\
828 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
829 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
830 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
831 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
832 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
833 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
834 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
835 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
836 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
837 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
838 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
839 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
840 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
841 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
842 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
843 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
849 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
851 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
852 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
855 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
856 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
859 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
861 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
862 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
865 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
868 copy_block9(full, src, 16, stride, 9);\
869 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
870 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
873 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
875 copy_block9(full, src, 16, stride, 9);\
876 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
879 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
882 copy_block9(full, src, 16, stride, 9);\
883 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
884 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
886 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
891 copy_block9(full, src, 16, stride, 9);\
892 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
893 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
894 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
895 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
897 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
901 copy_block9(full, src, 16, stride, 9);\
902 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
903 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
904 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
905 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
907 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
912 copy_block9(full, src, 16, stride, 9);\
913 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
914 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
915 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
916 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
918 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
922 copy_block9(full, src, 16, stride, 9);\
923 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
924 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
925 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
926 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
928 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
933 copy_block9(full, src, 16, stride, 9);\
934 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
935 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
936 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
937 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
939 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
943 copy_block9(full, src, 16, stride, 9);\
944 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
945 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
946 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
947 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
949 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
954 copy_block9(full, src, 16, stride, 9);\
955 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
956 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
957 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
958 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
960 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
964 copy_block9(full, src, 16, stride, 9);\
965 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
966 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
967 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
968 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
970 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
973 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
974 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
975 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
977 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
980 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
981 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
982 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
984 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
989 copy_block9(full, src, 16, stride, 9);\
990 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
991 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
992 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
993 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
995 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
998 copy_block9(full, src, 16, stride, 9);\
999 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1000 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1001 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1003 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1004 uint8_t full[16*9];\
1007 uint8_t halfHV[64];\
1008 copy_block9(full, src, 16, stride, 9);\
1009 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1010 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1011 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1012 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1014 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1015 uint8_t full[16*9];\
1017 copy_block9(full, src, 16, stride, 9);\
1018 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1019 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1020 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1022 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1024 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1025 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1028 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1030 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1031 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1034 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1035 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1038 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1040 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1041 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1044 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1045 uint8_t full[24*17];\
1047 copy_block17(full, src, 24, stride, 17);\
1048 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1049 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1052 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1053 uint8_t full[24*17];\
1054 copy_block17(full, src, 24, stride, 17);\
1055 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1058 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1059 uint8_t full[24*17];\
1061 copy_block17(full, src, 24, stride, 17);\
1062 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1063 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1065 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1066 uint8_t full[24*17];\
1067 uint8_t halfH[272];\
1068 uint8_t halfV[256];\
1069 uint8_t halfHV[256];\
1070 copy_block17(full, src, 24, stride, 17);\
1071 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1072 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1073 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1074 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1076 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1077 uint8_t full[24*17];\
1078 uint8_t halfH[272];\
1079 uint8_t halfHV[256];\
1080 copy_block17(full, src, 24, stride, 17);\
1081 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1082 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1083 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1084 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1086 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1087 uint8_t full[24*17];\
1088 uint8_t halfH[272];\
1089 uint8_t halfV[256];\
1090 uint8_t halfHV[256];\
1091 copy_block17(full, src, 24, stride, 17);\
1092 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1093 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1094 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1095 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1097 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1098 uint8_t full[24*17];\
1099 uint8_t halfH[272];\
1100 uint8_t halfHV[256];\
1101 copy_block17(full, src, 24, stride, 17);\
1102 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1103 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1104 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1105 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1107 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1108 uint8_t full[24*17];\
1109 uint8_t halfH[272];\
1110 uint8_t halfV[256];\
1111 uint8_t halfHV[256];\
1112 copy_block17(full, src, 24, stride, 17);\
1113 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1114 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1115 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1116 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1118 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1119 uint8_t full[24*17];\
1120 uint8_t halfH[272];\
1121 uint8_t halfHV[256];\
1122 copy_block17(full, src, 24, stride, 17);\
1123 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1124 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1125 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1126 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1128 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1129 uint8_t full[24*17];\
1130 uint8_t halfH[272];\
1131 uint8_t halfV[256];\
1132 uint8_t halfHV[256];\
1133 copy_block17(full, src, 24, stride, 17);\
1134 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1135 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1136 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1137 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1139 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1140 uint8_t full[24*17];\
1141 uint8_t halfH[272];\
1142 uint8_t halfHV[256];\
1143 copy_block17(full, src, 24, stride, 17);\
1144 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1145 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1146 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1147 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1149 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1150 uint8_t halfH[272];\
1151 uint8_t halfHV[256];\
1152 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1153 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1154 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1156 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1157 uint8_t halfH[272];\
1158 uint8_t halfHV[256];\
1159 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1160 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1161 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1163 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1164 uint8_t full[24*17];\
1165 uint8_t halfH[272];\
1166 uint8_t halfV[256];\
1167 uint8_t halfHV[256];\
1168 copy_block17(full, src, 24, stride, 17);\
1169 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1170 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1171 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1172 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1174 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1175 uint8_t full[24*17];\
1176 uint8_t halfH[272];\
1177 copy_block17(full, src, 24, stride, 17);\
1178 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1179 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1180 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1182 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1183 uint8_t full[24*17];\
1184 uint8_t halfH[272];\
1185 uint8_t halfV[256];\
1186 uint8_t halfHV[256];\
1187 copy_block17(full, src, 24, stride, 17);\
1188 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1189 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1190 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1191 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1193 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1194 uint8_t full[24*17];\
1195 uint8_t halfH[272];\
1196 copy_block17(full, src, 24, stride, 17);\
1197 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1198 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1199 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1201 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1202 uint8_t halfH[272];\
1203 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1204 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1207 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1208 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1209 #define op_put(a, b) a = cm[((b) + 16)>>5]
1210 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1212 QPEL_MC(0, put_ , _ , op_put)
1213 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1214 QPEL_MC(0, avg_ , _ , op_avg)
1215 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
1217 #undef op_avg_no_rnd
1219 #undef op_put_no_rnd
1221 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1222 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1223 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1224 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1225 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1226 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1228 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1229 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1233 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1234 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1235 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1236 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1237 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1238 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1239 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1240 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1246 #if CONFIG_RV40_DECODER
1247 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1248 put_pixels16_xy2_8_c(dst, src, stride, 16);
1250 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1251 avg_pixels16_xy2_8_c(dst, src, stride, 16);
1253 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1254 put_pixels8_xy2_8_c(dst, src, stride, 8);
1256 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1257 avg_pixels8_xy2_8_c(dst, src, stride, 8);
1259 #endif /* CONFIG_RV40_DECODER */
1261 #if CONFIG_DIRAC_DECODER
1262 #define DIRAC_MC(OPNAME)\
1263 void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1265 OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
1267 void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1269 OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
1271 void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1273 OPNAME ## _pixels16_8_c(dst , src[0] , stride, h);\
1274 OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
1276 void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1278 OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
1280 void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1282 OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
1284 void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1286 OPNAME ## _pixels16_l2_8(dst , src[0] , src[1] , stride, stride, stride, h);\
1287 OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
1289 void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1291 OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
1293 void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1295 OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
1297 void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1299 OPNAME ## _pixels16_l4_8(dst , src[0] , src[1] , src[2] , src[3] , stride, stride, stride, stride, stride, h);\
1300 OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
1306 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1307 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1311 const int src_1= src[ -srcStride];
1312 const int src0 = src[0 ];
1313 const int src1 = src[ srcStride];
1314 const int src2 = src[2*srcStride];
1315 const int src3 = src[3*srcStride];
1316 const int src4 = src[4*srcStride];
1317 const int src5 = src[5*srcStride];
1318 const int src6 = src[6*srcStride];
1319 const int src7 = src[7*srcStride];
1320 const int src8 = src[8*srcStride];
1321 const int src9 = src[9*srcStride];
1322 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1323 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1324 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1325 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1326 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1327 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1328 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1329 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1335 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1337 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1338 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1341 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1342 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1345 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1347 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1348 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1351 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1352 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1355 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1359 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1360 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1361 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1362 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1364 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1368 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1369 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1370 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1371 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1373 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1375 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1376 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1379 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
1380 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1382 const int strength= ff_h263_loop_filter_strength[qscale];
1386 int p0= src[x-2*stride];
1387 int p1= src[x-1*stride];
1388 int p2= src[x+0*stride];
1389 int p3= src[x+1*stride];
1390 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1392 if (d<-2*strength) d1= 0;
1393 else if(d<- strength) d1=-2*strength - d;
1394 else if(d< strength) d1= d;
1395 else if(d< 2*strength) d1= 2*strength - d;
1400 if(p1&256) p1= ~(p1>>31);
1401 if(p2&256) p2= ~(p2>>31);
1403 src[x-1*stride] = p1;
1404 src[x+0*stride] = p2;
1408 d2= av_clip((p0-p3)/4, -ad1, ad1);
1410 src[x-2*stride] = p0 - d2;
1411 src[x+ stride] = p3 + d2;
1416 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
1417 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1419 const int strength= ff_h263_loop_filter_strength[qscale];
1423 int p0= src[y*stride-2];
1424 int p1= src[y*stride-1];
1425 int p2= src[y*stride+0];
1426 int p3= src[y*stride+1];
1427 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1429 if (d<-2*strength) d1= 0;
1430 else if(d<- strength) d1=-2*strength - d;
1431 else if(d< strength) d1= d;
1432 else if(d< 2*strength) d1= 2*strength - d;
1437 if(p1&256) p1= ~(p1>>31);
1438 if(p2&256) p2= ~(p2>>31);
1440 src[y*stride-1] = p1;
1441 src[y*stride+0] = p2;
1445 d2= av_clip((p0-p3)/4, -ad1, ad1);
1447 src[y*stride-2] = p0 - d2;
1448 src[y*stride+1] = p3 + d2;
1453 static void h261_loop_filter_c(uint8_t *src, int stride){
1458 temp[x ] = 4*src[x ];
1459 temp[x + 7*8] = 4*src[x + 7*stride];
1463 xy = y * stride + x;
1465 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
1470 src[ y*stride] = (temp[ y*8] + 2)>>2;
1471 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
1473 xy = y * stride + x;
1475 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
1480 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1486 s += abs(pix1[0] - pix2[0]);
1487 s += abs(pix1[1] - pix2[1]);
1488 s += abs(pix1[2] - pix2[2]);
1489 s += abs(pix1[3] - pix2[3]);
1490 s += abs(pix1[4] - pix2[4]);
1491 s += abs(pix1[5] - pix2[5]);
1492 s += abs(pix1[6] - pix2[6]);
1493 s += abs(pix1[7] - pix2[7]);
1494 s += abs(pix1[8] - pix2[8]);
1495 s += abs(pix1[9] - pix2[9]);
1496 s += abs(pix1[10] - pix2[10]);
1497 s += abs(pix1[11] - pix2[11]);
1498 s += abs(pix1[12] - pix2[12]);
1499 s += abs(pix1[13] - pix2[13]);
1500 s += abs(pix1[14] - pix2[14]);
1501 s += abs(pix1[15] - pix2[15]);
1508 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1514 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1515 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1516 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1517 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1518 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1519 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1520 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1521 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1522 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1523 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1524 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1525 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1526 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1527 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1528 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1529 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1536 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1539 uint8_t *pix3 = pix2 + line_size;
1543 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1544 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1545 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1546 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1547 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1548 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1549 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1550 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1551 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1552 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1553 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1554 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1555 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1556 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1557 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1558 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1566 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1569 uint8_t *pix3 = pix2 + line_size;
1573 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1574 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1575 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1576 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1577 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1578 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1579 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1580 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1581 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1582 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1583 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1584 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1585 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1586 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1587 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1588 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1596 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1602 s += abs(pix1[0] - pix2[0]);
1603 s += abs(pix1[1] - pix2[1]);
1604 s += abs(pix1[2] - pix2[2]);
1605 s += abs(pix1[3] - pix2[3]);
1606 s += abs(pix1[4] - pix2[4]);
1607 s += abs(pix1[5] - pix2[5]);
1608 s += abs(pix1[6] - pix2[6]);
1609 s += abs(pix1[7] - pix2[7]);
1616 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1622 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1623 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1624 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1625 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1626 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1627 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1628 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1629 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1636 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1639 uint8_t *pix3 = pix2 + line_size;
1643 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1644 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1645 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1646 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1647 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1648 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1649 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1650 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1658 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1661 uint8_t *pix3 = pix2 + line_size;
1665 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1666 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1667 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1668 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1669 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1670 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1671 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1672 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1680 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1681 MpegEncContext *c = v;
1687 for(x=0; x<16; x++){
1688 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1691 for(x=0; x<15; x++){
1692 score2+= FFABS( s1[x ] - s1[x +stride]
1693 - s1[x+1] + s1[x+1+stride])
1694 -FFABS( s2[x ] - s2[x +stride]
1695 - s2[x+1] + s2[x+1+stride]);
1702 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1703 else return score1 + FFABS(score2)*8;
1706 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1707 MpegEncContext *c = v;
1714 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1718 score2+= FFABS( s1[x ] - s1[x +stride]
1719 - s1[x+1] + s1[x+1+stride])
1720 -FFABS( s2[x ] - s2[x +stride]
1721 - s2[x+1] + s2[x+1+stride]);
1728 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1729 else return score1 + FFABS(score2)*8;
1732 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1736 for(i=0; i<8*8; i++){
1737 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1740 assert(-512<b && b<512);
1742 sum += (w*b)*(w*b)>>4;
1747 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1750 for(i=0; i<8*8; i++){
1751 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
1756 * Permute an 8x8 block.
1757 * @param block the block which will be permuted according to the given permutation vector
1758 * @param permutation the permutation vector
1759 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
1760 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
1761 * (inverse) permutated to scantable order!
1763 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
1769 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
1771 for(i=0; i<=last; i++){
1772 const int j= scantable[i];
1777 for(i=0; i<=last; i++){
1778 const int j= scantable[i];
1779 const int perm_j= permutation[j];
1780 block[perm_j]= temp[j];
1784 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1788 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
1791 memset(cmp, 0, sizeof(void*)*6);
1799 cmp[i]= c->hadamard8_diff[i];
1805 cmp[i]= c->dct_sad[i];
1808 cmp[i]= c->dct264_sad[i];
1811 cmp[i]= c->dct_max[i];
1814 cmp[i]= c->quant_psnr[i];
1843 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1848 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1850 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1851 long a = *(long*)(src+i);
1852 long b = *(long*)(dst+i);
1853 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
1856 dst[i+0] += src[i+0];
1859 static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w){
1861 #if !HAVE_FAST_UNALIGNED
1862 if((long)src2 & (sizeof(long)-1)){
1863 for(i=0; i+7<w; i+=8){
1864 dst[i+0] = src1[i+0]-src2[i+0];
1865 dst[i+1] = src1[i+1]-src2[i+1];
1866 dst[i+2] = src1[i+2]-src2[i+2];
1867 dst[i+3] = src1[i+3]-src2[i+3];
1868 dst[i+4] = src1[i+4]-src2[i+4];
1869 dst[i+5] = src1[i+5]-src2[i+5];
1870 dst[i+6] = src1[i+6]-src2[i+6];
1871 dst[i+7] = src1[i+7]-src2[i+7];
1875 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1876 long a = *(long*)(src1+i);
1877 long b = *(long*)(src2+i);
1878 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1881 dst[i+0] = src1[i+0]-src2[i+0];
1884 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
1892 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1901 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
1909 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1919 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
1922 for(i=0; i<w-1; i++){
1949 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
1979 #define BUTTERFLY2(o1,o2,i1,i2) \
1983 #define BUTTERFLY1(x,y) \
1992 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
1994 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
2002 //FIXME try pointer walks
2003 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2004 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2005 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2006 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2008 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2009 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2010 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2011 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2013 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2014 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2015 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2016 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2020 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2021 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2022 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2023 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2025 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2026 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2027 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2028 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2031 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2032 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2033 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2034 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2039 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
2047 //FIXME try pointer walks
2048 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2049 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2050 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2051 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
2053 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2054 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2055 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2056 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2058 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2059 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2060 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2061 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2065 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2066 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2067 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2068 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2070 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2071 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2072 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2073 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2076 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2077 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2078 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2079 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2082 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
2087 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2088 MpegEncContext * const s= (MpegEncContext *)c;
2089 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2093 s->dsp.diff_pixels(temp, src1, src2, stride);
2095 return s->dsp.sum_abs_dctelem(temp);
2100 const int s07 = SRC(0) + SRC(7);\
2101 const int s16 = SRC(1) + SRC(6);\
2102 const int s25 = SRC(2) + SRC(5);\
2103 const int s34 = SRC(3) + SRC(4);\
2104 const int a0 = s07 + s34;\
2105 const int a1 = s16 + s25;\
2106 const int a2 = s07 - s34;\
2107 const int a3 = s16 - s25;\
2108 const int d07 = SRC(0) - SRC(7);\
2109 const int d16 = SRC(1) - SRC(6);\
2110 const int d25 = SRC(2) - SRC(5);\
2111 const int d34 = SRC(3) - SRC(4);\
2112 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2113 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2114 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2115 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2117 DST(1, a4 + (a7>>2)) ;\
2118 DST(2, a2 + (a3>>1)) ;\
2119 DST(3, a5 + (a6>>2)) ;\
2121 DST(5, a6 - (a5>>2)) ;\
2122 DST(6, (a2>>1) - a3 ) ;\
2123 DST(7, (a4>>2) - a7 ) ;\
2126 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2127 MpegEncContext * const s= (MpegEncContext *)c;
2132 s->dsp.diff_pixels(dct[0], src1, src2, stride);
2134 #define SRC(x) dct[i][x]
2135 #define DST(x,v) dct[i][x]= v
2136 for( i = 0; i < 8; i++ )
2141 #define SRC(x) dct[x][i]
2142 #define DST(x,v) sum += FFABS(v)
2143 for( i = 0; i < 8; i++ )
2151 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2152 MpegEncContext * const s= (MpegEncContext *)c;
2153 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2158 s->dsp.diff_pixels(temp, src1, src2, stride);
2162 sum= FFMAX(sum, FFABS(temp[i]));
2167 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2168 MpegEncContext * const s= (MpegEncContext *)c;
2169 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
2170 DCTELEM * const bak = temp+64;
2176 s->dsp.diff_pixels(temp, src1, src2, stride);
2178 memcpy(bak, temp, 64*sizeof(DCTELEM));
2180 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2181 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2182 ff_simple_idct_8(temp); //FIXME
2185 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2190 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2191 MpegEncContext * const s= (MpegEncContext *)c;
2192 const uint8_t *scantable= s->intra_scantable.permutated;
2193 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2194 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2195 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
2196 int i, last, run, bits, level, distortion, start_i;
2197 const int esc_length= s->ac_esc_length;
2199 uint8_t * last_length;
2203 copy_block8(lsrc1, src1, 8, stride, 8);
2204 copy_block8(lsrc2, src2, 8, stride, 8);
2206 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
2208 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2214 length = s->intra_ac_vlc_length;
2215 last_length= s->intra_ac_vlc_last_length;
2216 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2219 length = s->inter_ac_vlc_length;
2220 last_length= s->inter_ac_vlc_last_length;
2225 for(i=start_i; i<last; i++){
2226 int j= scantable[i];
2231 if((level&(~127)) == 0){
2232 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2241 level= temp[i] + 64;
2245 if((level&(~127)) == 0){
2246 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2254 s->dct_unquantize_intra(s, temp, 0, s->qscale);
2256 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2259 s->dsp.idct_add(lsrc2, 8, temp);
2261 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
2263 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
2266 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2267 MpegEncContext * const s= (MpegEncContext *)c;
2268 const uint8_t *scantable= s->intra_scantable.permutated;
2269 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2270 int i, last, run, bits, level, start_i;
2271 const int esc_length= s->ac_esc_length;
2273 uint8_t * last_length;
2277 s->dsp.diff_pixels(temp, src1, src2, stride);
2279 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2285 length = s->intra_ac_vlc_length;
2286 last_length= s->intra_ac_vlc_last_length;
2287 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2290 length = s->inter_ac_vlc_length;
2291 last_length= s->inter_ac_vlc_last_length;
2296 for(i=start_i; i<last; i++){
2297 int j= scantable[i];
2302 if((level&(~127)) == 0){
2303 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2312 level= temp[i] + 64;
2316 if((level&(~127)) == 0){
2317 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2325 #define VSAD_INTRA(size) \
2326 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2330 for(y=1; y<h; y++){ \
2331 for(x=0; x<size; x+=4){ \
2332 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2333 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2343 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2348 for(x=0; x<16; x++){
2349 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2358 #define SQ(a) ((a)*(a))
2359 #define VSSE_INTRA(size) \
2360 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2364 for(y=1; y<h; y++){ \
2365 for(x=0; x<size; x+=4){ \
2366 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2367 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2377 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2382 for(x=0; x<16; x++){
2383 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2392 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2396 for(i=0; i<size; i++)
2397 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2401 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2402 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2403 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2405 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
2407 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2408 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2409 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2410 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
2412 static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
2414 for(i=0; i<len; i++)
2415 dst[i] = src0[i] * src1[i];
2418 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
2421 for(i=0; i<len; i++)
2422 dst[i] = src0[i] * src1[-i];
2425 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
2427 for(i=0; i<len; i++)
2428 dst[i] = src0[i] * src1[i] + src2[i];
2431 static void vector_fmul_window_c(float *dst, const float *src0,
2432 const float *src1, const float *win, int len)
2438 for(i=-len, j=len-1; i<0; i++, j--) {
2443 dst[i] = s0*wj - s1*wi;
2444 dst[j] = s0*wi + s1*wj;
2448 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
2452 for (i = 0; i < len; i++)
2453 dst[i] = src[i] * mul;
2456 static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
2460 for (i = 0; i < len; i++)
2461 dst[i] += src[i] * mul;
2464 static void butterflies_float_c(float *restrict v1, float *restrict v2,
2468 for (i = 0; i < len; i++) {
2469 float t = v1[i] - v2[i];
2475 static void butterflies_float_interleave_c(float *dst, const float *src0,
2476 const float *src1, int len)
2479 for (i = 0; i < len; i++) {
2482 dst[2*i ] = f1 + f2;
2483 dst[2*i + 1] = f1 - f2;
2487 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
2492 for (i = 0; i < len; i++)
2498 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2499 uint32_t maxi, uint32_t maxisign)
2502 if(a > mini) return mini;
2503 else if((a^(1U<<31)) > maxisign) return maxi;
2507 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
2509 uint32_t mini = *(uint32_t*)min;
2510 uint32_t maxi = *(uint32_t*)max;
2511 uint32_t maxisign = maxi ^ (1U<<31);
2512 uint32_t *dsti = (uint32_t*)dst;
2513 const uint32_t *srci = (const uint32_t*)src;
2514 for(i=0; i<len; i+=8) {
2515 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2516 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2517 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2518 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2519 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2520 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2521 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2522 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2525 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
2527 if(min < 0 && max > 0) {
2528 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2530 for(i=0; i < len; i+=8) {
2531 dst[i ] = av_clipf(src[i ], min, max);
2532 dst[i + 1] = av_clipf(src[i + 1], min, max);
2533 dst[i + 2] = av_clipf(src[i + 2], min, max);
2534 dst[i + 3] = av_clipf(src[i + 3], min, max);
2535 dst[i + 4] = av_clipf(src[i + 4], min, max);
2536 dst[i + 5] = av_clipf(src[i + 5], min, max);
2537 dst[i + 6] = av_clipf(src[i + 6], min, max);
2538 dst[i + 7] = av_clipf(src[i + 7], min, max);
2543 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
2548 res += *v1++ * *v2++;
2553 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
2558 *v1++ += mul * *v3++;
2563 static void apply_window_int16_c(int16_t *output, const int16_t *input,
2564 const int16_t *window, unsigned int len)
2567 int len2 = len >> 1;
2569 for (i = 0; i < len2; i++) {
2570 int16_t w = window[i];
2571 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
2572 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2576 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2577 int32_t max, unsigned int len)
2580 *dst++ = av_clip(*src++, min, max);
2581 *dst++ = av_clip(*src++, min, max);
2582 *dst++ = av_clip(*src++, min, max);
2583 *dst++ = av_clip(*src++, min, max);
2584 *dst++ = av_clip(*src++, min, max);
2585 *dst++ = av_clip(*src++, min, max);
2586 *dst++ = av_clip(*src++, min, max);
2587 *dst++ = av_clip(*src++, min, max);
2593 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
2594 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
2595 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
2596 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
2597 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
2598 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
2599 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
2601 static void wmv2_idct_row(short * b)
2604 int a0,a1,a2,a3,a4,a5,a6,a7;
2606 a1 = W1*b[1]+W7*b[7];
2607 a7 = W7*b[1]-W1*b[7];
2608 a5 = W5*b[5]+W3*b[3];
2609 a3 = W3*b[5]-W5*b[3];
2610 a2 = W2*b[2]+W6*b[6];
2611 a6 = W6*b[2]-W2*b[6];
2612 a0 = W0*b[0]+W0*b[4];
2613 a4 = W0*b[0]-W0*b[4];
2615 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
2616 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2618 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
2619 b[1] = (a4+a6 +s1 + (1<<7))>>8;
2620 b[2] = (a4-a6 +s2 + (1<<7))>>8;
2621 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
2622 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
2623 b[5] = (a4-a6 -s2 + (1<<7))>>8;
2624 b[6] = (a4+a6 -s1 + (1<<7))>>8;
2625 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
2627 static void wmv2_idct_col(short * b)
2630 int a0,a1,a2,a3,a4,a5,a6,a7;
2631 /*step 1, with extended precision*/
2632 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
2633 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
2634 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
2635 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
2636 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
2637 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
2638 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
2639 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
2641 s1 = (181*(a1-a5+a7-a3)+128)>>8;
2642 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2644 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
2645 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
2646 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
2647 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
2649 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
2650 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
2651 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
2652 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
2654 void ff_wmv2_idct_c(short * block){
2658 wmv2_idct_row(block+i);
2661 wmv2_idct_col(block+i);
2664 /* XXX: those functions should be suppressed ASAP when all IDCTs are
2666 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
2668 ff_wmv2_idct_c(block);
2669 ff_put_pixels_clamped_c(block, dest, line_size);
2671 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
2673 ff_wmv2_idct_c(block);
2674 ff_add_pixels_clamped_c(block, dest, line_size);
2676 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
2678 ff_j_rev_dct (block);
2679 ff_put_pixels_clamped_c(block, dest, line_size);
2681 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
2683 ff_j_rev_dct (block);
2684 ff_add_pixels_clamped_c(block, dest, line_size);
2687 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
2689 /* init static data */
2690 av_cold void ff_dsputil_static_init(void)
2694 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
2695 for(i=0;i<MAX_NEG_CROP;i++) {
2697 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
2700 for(i=0;i<512;i++) {
2701 ff_squareTbl[i] = (i - 256) * (i - 256);
2704 for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
2707 int ff_check_alignment(void){
2708 static int did_fail=0;
2709 LOCAL_ALIGNED_16(int, aligned, [4]);
2711 if((intptr_t)aligned & 15){
2713 #if HAVE_MMX || HAVE_ALTIVEC
2714 av_log(NULL, AV_LOG_ERROR,
2715 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2716 "and may be very slow or crash. This is not a bug in libavcodec,\n"
2717 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2718 "Do not report crashes to FFmpeg developers.\n");
2727 av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
2731 ff_check_alignment();
2734 if (avctx->bits_per_raw_sample == 10) {
2735 c->fdct = ff_jpeg_fdct_islow_10;
2736 c->fdct248 = ff_fdct248_islow_10;
2738 if(avctx->dct_algo==FF_DCT_FASTINT) {
2739 c->fdct = ff_fdct_ifast;
2740 c->fdct248 = ff_fdct_ifast248;
2742 else if(avctx->dct_algo==FF_DCT_FAAN) {
2743 c->fdct = ff_faandct;
2744 c->fdct248 = ff_faandct248;
2747 c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
2748 c->fdct248 = ff_fdct248_islow_8;
2751 #endif //CONFIG_ENCODERS
2753 if (avctx->bits_per_raw_sample == 10) {
2754 c->idct_put = ff_simple_idct_put_10;
2755 c->idct_add = ff_simple_idct_add_10;
2756 c->idct = ff_simple_idct_10;
2757 c->idct_permutation_type = FF_NO_IDCT_PERM;
2759 if(avctx->idct_algo==FF_IDCT_INT){
2760 c->idct_put= ff_jref_idct_put;
2761 c->idct_add= ff_jref_idct_add;
2762 c->idct = ff_j_rev_dct;
2763 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
2764 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
2765 avctx->idct_algo==FF_IDCT_VP3){
2766 c->idct_put= ff_vp3_idct_put_c;
2767 c->idct_add= ff_vp3_idct_add_c;
2768 c->idct = ff_vp3_idct_c;
2769 c->idct_permutation_type= FF_NO_IDCT_PERM;
2770 }else if(avctx->idct_algo==FF_IDCT_WMV2){
2771 c->idct_put= ff_wmv2_idct_put_c;
2772 c->idct_add= ff_wmv2_idct_add_c;
2773 c->idct = ff_wmv2_idct_c;
2774 c->idct_permutation_type= FF_NO_IDCT_PERM;
2775 }else if(avctx->idct_algo==FF_IDCT_FAAN){
2776 c->idct_put= ff_faanidct_put;
2777 c->idct_add= ff_faanidct_add;
2778 c->idct = ff_faanidct;
2779 c->idct_permutation_type= FF_NO_IDCT_PERM;
2780 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
2781 c->idct_put= ff_ea_idct_put_c;
2782 c->idct_permutation_type= FF_NO_IDCT_PERM;
2783 }else{ //accurate/default
2784 c->idct_put = ff_simple_idct_put_8;
2785 c->idct_add = ff_simple_idct_add_8;
2786 c->idct = ff_simple_idct_8;
2787 c->idct_permutation_type= FF_NO_IDCT_PERM;
2791 c->diff_pixels = diff_pixels_c;
2792 c->put_pixels_clamped = ff_put_pixels_clamped_c;
2793 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
2794 c->add_pixels_clamped = ff_add_pixels_clamped_c;
2795 c->sum_abs_dctelem = sum_abs_dctelem_c;
2798 c->pix_sum = pix_sum_c;
2799 c->pix_norm1 = pix_norm1_c;
2801 c->fill_block_tab[0] = fill_block16_c;
2802 c->fill_block_tab[1] = fill_block8_c;
2804 /* TODO [0] 16 [1] 8 */
2805 c->pix_abs[0][0] = pix_abs16_c;
2806 c->pix_abs[0][1] = pix_abs16_x2_c;
2807 c->pix_abs[0][2] = pix_abs16_y2_c;
2808 c->pix_abs[0][3] = pix_abs16_xy2_c;
2809 c->pix_abs[1][0] = pix_abs8_c;
2810 c->pix_abs[1][1] = pix_abs8_x2_c;
2811 c->pix_abs[1][2] = pix_abs8_y2_c;
2812 c->pix_abs[1][3] = pix_abs8_xy2_c;
2814 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2815 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2816 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
2817 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
2818 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
2819 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
2820 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
2821 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
2822 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2824 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
2825 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
2826 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
2827 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
2828 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
2829 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
2830 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
2831 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
2832 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
2834 #define dspfunc(PFX, IDX, NUM) \
2835 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2836 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2837 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2838 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2839 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2840 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2841 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2842 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2843 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2844 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2845 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2846 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2847 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2848 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2849 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2850 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2852 dspfunc(put_qpel, 0, 16);
2853 dspfunc(put_no_rnd_qpel, 0, 16);
2855 dspfunc(avg_qpel, 0, 16);
2856 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2858 dspfunc(put_qpel, 1, 8);
2859 dspfunc(put_no_rnd_qpel, 1, 8);
2861 dspfunc(avg_qpel, 1, 8);
2862 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2866 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
2867 ff_mlp_init(c, avctx);
2869 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
2870 ff_intrax8dsp_init(c,avctx);
2873 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
2874 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
2875 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
2876 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
2877 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
2878 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
2879 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
2880 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
2882 #define SET_CMP_FUNC(name) \
2883 c->name[0]= name ## 16_c;\
2884 c->name[1]= name ## 8x8_c;
2886 SET_CMP_FUNC(hadamard8_diff)
2887 c->hadamard8_diff[4]= hadamard8_intra16_c;
2888 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
2889 SET_CMP_FUNC(dct_sad)
2890 SET_CMP_FUNC(dct_max)
2892 SET_CMP_FUNC(dct264_sad)
2894 c->sad[0]= pix_abs16_c;
2895 c->sad[1]= pix_abs8_c;
2899 SET_CMP_FUNC(quant_psnr)
2902 c->vsad[0]= vsad16_c;
2903 c->vsad[4]= vsad_intra16_c;
2904 c->vsad[5]= vsad_intra8_c;
2905 c->vsse[0]= vsse16_c;
2906 c->vsse[4]= vsse_intra16_c;
2907 c->vsse[5]= vsse_intra8_c;
2908 c->nsse[0]= nsse16_c;
2909 c->nsse[1]= nsse8_c;
2911 ff_dsputil_init_dwt(c);
2914 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
2916 c->add_bytes= add_bytes_c;
2917 c->diff_bytes= diff_bytes_c;
2918 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
2919 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
2920 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
2921 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
2922 c->bswap_buf= bswap_buf;
2923 c->bswap16_buf = bswap16_buf;
2925 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
2926 c->h263_h_loop_filter= h263_h_loop_filter_c;
2927 c->h263_v_loop_filter= h263_v_loop_filter_c;
2930 if (CONFIG_VP3_DECODER) {
2931 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
2932 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
2933 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
2936 c->h261_loop_filter= h261_loop_filter_c;
2938 c->try_8x8basis= try_8x8basis_c;
2939 c->add_8x8basis= add_8x8basis_c;
2941 #if CONFIG_VORBIS_DECODER
2942 c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling;
2944 #if CONFIG_AC3_DECODER
2945 c->ac3_downmix = ff_ac3_downmix_c;
2947 c->vector_fmul = vector_fmul_c;
2948 c->vector_fmul_reverse = vector_fmul_reverse_c;
2949 c->vector_fmul_add = vector_fmul_add_c;
2950 c->vector_fmul_window = vector_fmul_window_c;
2951 c->vector_clipf = vector_clipf_c;
2952 c->scalarproduct_int16 = scalarproduct_int16_c;
2953 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
2954 c->apply_window_int16 = apply_window_int16_c;
2955 c->vector_clip_int32 = vector_clip_int32_c;
2956 c->scalarproduct_float = scalarproduct_float_c;
2957 c->butterflies_float = butterflies_float_c;
2958 c->butterflies_float_interleave = butterflies_float_interleave_c;
2959 c->vector_fmul_scalar = vector_fmul_scalar_c;
2960 c->vector_fmac_scalar = vector_fmac_scalar_c;
2962 c->shrink[0]= av_image_copy_plane;
2963 c->shrink[1]= ff_shrink22;
2964 c->shrink[2]= ff_shrink44;
2965 c->shrink[3]= ff_shrink88;
2967 c->prefetch= just_return;
2969 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
2970 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
2974 #define FUNC(f, depth) f ## _ ## depth
2975 #define FUNCC(f, depth) f ## _ ## depth ## _c
2977 #define dspfunc1(PFX, IDX, NUM, depth)\
2978 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
2979 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
2980 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
2981 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
2983 #define dspfunc2(PFX, IDX, NUM, depth)\
2984 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
2985 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
2986 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
2987 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
2988 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
2989 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
2990 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
2991 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
2992 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
2993 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
2994 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
2995 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
2996 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
2997 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
2998 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
2999 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
3002 #define BIT_DEPTH_FUNCS(depth, dct)\
3003 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
3004 c->draw_edges = FUNCC(draw_edges , depth);\
3005 c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
3006 c->clear_block = FUNCC(clear_block ## dct , depth);\
3007 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
3008 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
3009 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
3010 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
3011 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
3013 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
3014 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
3015 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
3016 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
3017 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
3018 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
3020 dspfunc1(put , 0, 16, depth);\
3021 dspfunc1(put , 1, 8, depth);\
3022 dspfunc1(put , 2, 4, depth);\
3023 dspfunc1(put , 3, 2, depth);\
3024 dspfunc1(put_no_rnd, 0, 16, depth);\
3025 dspfunc1(put_no_rnd, 1, 8, depth);\
3026 dspfunc1(avg , 0, 16, depth);\
3027 dspfunc1(avg , 1, 8, depth);\
3028 dspfunc1(avg , 2, 4, depth);\
3029 dspfunc1(avg , 3, 2, depth);\
3030 dspfunc1(avg_no_rnd, 0, 16, depth);\
3031 dspfunc1(avg_no_rnd, 1, 8, depth);\
3033 dspfunc2(put_h264_qpel, 0, 16, depth);\
3034 dspfunc2(put_h264_qpel, 1, 8, depth);\
3035 dspfunc2(put_h264_qpel, 2, 4, depth);\
3036 dspfunc2(put_h264_qpel, 3, 2, depth);\
3037 dspfunc2(avg_h264_qpel, 0, 16, depth);\
3038 dspfunc2(avg_h264_qpel, 1, 8, depth);\
3039 dspfunc2(avg_h264_qpel, 2, 4, depth);
3041 switch (avctx->bits_per_raw_sample) {
3043 if (c->dct_bits == 32) {
3044 BIT_DEPTH_FUNCS(9, _32);
3046 BIT_DEPTH_FUNCS(9, _16);
3050 if (c->dct_bits == 32) {
3051 BIT_DEPTH_FUNCS(10, _32);
3053 BIT_DEPTH_FUNCS(10, _16);
3057 BIT_DEPTH_FUNCS(8, _16);
3062 if (HAVE_MMX) ff_dsputil_init_mmx (c, avctx);
3063 if (ARCH_ARM) ff_dsputil_init_arm (c, avctx);
3064 if (HAVE_VIS) ff_dsputil_init_vis (c, avctx);
3065 if (ARCH_ALPHA) ff_dsputil_init_alpha (c, avctx);
3066 if (ARCH_PPC) ff_dsputil_init_ppc (c, avctx);
3067 if (HAVE_MMI) ff_dsputil_init_mmi (c, avctx);
3068 if (ARCH_SH4) ff_dsputil_init_sh4 (c, avctx);
3069 if (ARCH_BFIN) ff_dsputil_init_bfin (c, avctx);
3071 for(i=0; i<64; i++){
3072 if(!c->put_2tap_qpel_pixels_tab[0][i])
3073 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
3074 if(!c->avg_2tap_qpel_pixels_tab[0][i])
3075 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
3078 ff_init_scantable_permutation(c->idct_permutation,
3079 c->idct_permutation_type);
3082 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
3084 ff_dsputil_init(c, avctx);