3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of Libav.
10 * Libav is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * Libav is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with Libav; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 #include "libavutil/attributes.h"
31 #include "libavutil/imgutils.h"
32 #include "libavutil/internal.h"
34 #include "copy_block.h"
37 #include "simple_idct.h"
40 #include "imgconvert.h"
42 #include "mpegvideo.h"
45 uint32_t ff_squareTbl[512] = {0, };
48 #include "dsputil_template.c"
52 #include "dsputil_template.c"
54 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
55 #define pb_7f (~0UL/255 * 0x7f)
56 #define pb_80 (~0UL/255 * 0x80)
58 /* Specific zigzag scan for 248 idct. NOTE that unlike the
59 specification, we interleave the fields */
60 const uint8_t ff_zigzag248_direct[64] = {
61 0, 8, 1, 9, 16, 24, 2, 10,
62 17, 25, 32, 40, 48, 56, 33, 41,
63 18, 26, 3, 11, 4, 12, 19, 27,
64 34, 42, 49, 57, 50, 58, 35, 43,
65 20, 28, 5, 13, 6, 14, 21, 29,
66 36, 44, 51, 59, 52, 60, 37, 45,
67 22, 30, 7, 15, 23, 31, 38, 46,
68 53, 61, 54, 62, 39, 47, 55, 63,
71 const uint8_t ff_alternate_horizontal_scan[64] = {
72 0, 1, 2, 3, 8, 9, 16, 17,
73 10, 11, 4, 5, 6, 7, 15, 14,
74 13, 12, 19, 18, 24, 25, 32, 33,
75 26, 27, 20, 21, 22, 23, 28, 29,
76 30, 31, 34, 35, 40, 41, 48, 49,
77 42, 43, 36, 37, 38, 39, 44, 45,
78 46, 47, 50, 51, 56, 57, 58, 59,
79 52, 53, 54, 55, 60, 61, 62, 63,
82 const uint8_t ff_alternate_vertical_scan[64] = {
83 0, 8, 16, 24, 1, 9, 2, 10,
84 17, 25, 32, 40, 48, 56, 57, 49,
85 41, 33, 26, 18, 3, 11, 4, 12,
86 19, 27, 34, 42, 50, 58, 35, 43,
87 51, 59, 20, 28, 5, 13, 6, 14,
88 21, 29, 36, 44, 52, 60, 37, 45,
89 53, 61, 22, 30, 7, 15, 23, 31,
90 38, 46, 54, 62, 39, 47, 55, 63,
93 /* Input permutation for the simple_idct_mmx */
94 static const uint8_t simple_mmx_permutation[64]={
95 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
96 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
97 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
98 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
99 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
100 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
101 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
102 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
105 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
107 av_cold void ff_init_scantable(uint8_t *permutation, ScanTable *st,
108 const uint8_t *src_scantable)
113 st->scantable= src_scantable;
117 j = src_scantable[i];
118 st->permutated[i] = permutation[j];
124 j = st->permutated[i];
126 st->raster_end[i]= end;
130 av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation,
131 int idct_permutation_type)
135 switch(idct_permutation_type){
136 case FF_NO_IDCT_PERM:
138 idct_permutation[i]= i;
140 case FF_LIBMPEG2_IDCT_PERM:
142 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
144 case FF_SIMPLE_IDCT_PERM:
146 idct_permutation[i]= simple_mmx_permutation[i];
148 case FF_TRANSPOSE_IDCT_PERM:
150 idct_permutation[i]= ((i&7)<<3) | (i>>3);
152 case FF_PARTTRANS_IDCT_PERM:
154 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
156 case FF_SSE2_IDCT_PERM:
158 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
161 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
165 static int pix_sum_c(uint8_t * pix, int line_size)
170 for (i = 0; i < 16; i++) {
171 for (j = 0; j < 16; j += 8) {
182 pix += line_size - 16;
187 static int pix_norm1_c(uint8_t * pix, int line_size)
190 uint32_t *sq = ff_squareTbl + 256;
193 for (i = 0; i < 16; i++) {
194 for (j = 0; j < 16; j += 8) {
206 register uint64_t x=*(uint64_t*)pix;
208 s += sq[(x>>8)&0xff];
209 s += sq[(x>>16)&0xff];
210 s += sq[(x>>24)&0xff];
211 s += sq[(x>>32)&0xff];
212 s += sq[(x>>40)&0xff];
213 s += sq[(x>>48)&0xff];
214 s += sq[(x>>56)&0xff];
216 register uint32_t x=*(uint32_t*)pix;
218 s += sq[(x>>8)&0xff];
219 s += sq[(x>>16)&0xff];
220 s += sq[(x>>24)&0xff];
221 x=*(uint32_t*)(pix+4);
223 s += sq[(x>>8)&0xff];
224 s += sq[(x>>16)&0xff];
225 s += sq[(x>>24)&0xff];
230 pix += line_size - 16;
235 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
238 for(i=0; i+8<=w; i+=8){
239 dst[i+0]= av_bswap32(src[i+0]);
240 dst[i+1]= av_bswap32(src[i+1]);
241 dst[i+2]= av_bswap32(src[i+2]);
242 dst[i+3]= av_bswap32(src[i+3]);
243 dst[i+4]= av_bswap32(src[i+4]);
244 dst[i+5]= av_bswap32(src[i+5]);
245 dst[i+6]= av_bswap32(src[i+6]);
246 dst[i+7]= av_bswap32(src[i+7]);
249 dst[i+0]= av_bswap32(src[i+0]);
253 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
256 *dst++ = av_bswap16(*src++);
259 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
262 uint32_t *sq = ff_squareTbl + 256;
265 for (i = 0; i < h; i++) {
266 s += sq[pix1[0] - pix2[0]];
267 s += sq[pix1[1] - pix2[1]];
268 s += sq[pix1[2] - pix2[2]];
269 s += sq[pix1[3] - pix2[3]];
276 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
279 uint32_t *sq = ff_squareTbl + 256;
282 for (i = 0; i < h; i++) {
283 s += sq[pix1[0] - pix2[0]];
284 s += sq[pix1[1] - pix2[1]];
285 s += sq[pix1[2] - pix2[2]];
286 s += sq[pix1[3] - pix2[3]];
287 s += sq[pix1[4] - pix2[4]];
288 s += sq[pix1[5] - pix2[5]];
289 s += sq[pix1[6] - pix2[6]];
290 s += sq[pix1[7] - pix2[7]];
297 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
300 uint32_t *sq = ff_squareTbl + 256;
303 for (i = 0; i < h; i++) {
304 s += sq[pix1[ 0] - pix2[ 0]];
305 s += sq[pix1[ 1] - pix2[ 1]];
306 s += sq[pix1[ 2] - pix2[ 2]];
307 s += sq[pix1[ 3] - pix2[ 3]];
308 s += sq[pix1[ 4] - pix2[ 4]];
309 s += sq[pix1[ 5] - pix2[ 5]];
310 s += sq[pix1[ 6] - pix2[ 6]];
311 s += sq[pix1[ 7] - pix2[ 7]];
312 s += sq[pix1[ 8] - pix2[ 8]];
313 s += sq[pix1[ 9] - pix2[ 9]];
314 s += sq[pix1[10] - pix2[10]];
315 s += sq[pix1[11] - pix2[11]];
316 s += sq[pix1[12] - pix2[12]];
317 s += sq[pix1[13] - pix2[13]];
318 s += sq[pix1[14] - pix2[14]];
319 s += sq[pix1[15] - pix2[15]];
327 static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
328 const uint8_t *s2, int stride){
331 /* read the pixels */
333 block[0] = s1[0] - s2[0];
334 block[1] = s1[1] - s2[1];
335 block[2] = s1[2] - s2[2];
336 block[3] = s1[3] - s2[3];
337 block[4] = s1[4] - s2[4];
338 block[5] = s1[5] - s2[5];
339 block[6] = s1[6] - s2[6];
340 block[7] = s1[7] - s2[7];
348 static void put_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
353 /* read the pixels */
355 pixels[0] = av_clip_uint8(block[0]);
356 pixels[1] = av_clip_uint8(block[1]);
357 pixels[2] = av_clip_uint8(block[2]);
358 pixels[3] = av_clip_uint8(block[3]);
359 pixels[4] = av_clip_uint8(block[4]);
360 pixels[5] = av_clip_uint8(block[5]);
361 pixels[6] = av_clip_uint8(block[6]);
362 pixels[7] = av_clip_uint8(block[7]);
369 static void put_signed_pixels_clamped_c(const int16_t *block,
370 uint8_t *restrict pixels,
375 for (i = 0; i < 8; i++) {
376 for (j = 0; j < 8; j++) {
379 else if (*block > 127)
382 *pixels = (uint8_t)(*block + 128);
386 pixels += (line_size - 8);
390 static void add_pixels8_c(uint8_t *restrict pixels,
397 pixels[0] += block[0];
398 pixels[1] += block[1];
399 pixels[2] += block[2];
400 pixels[3] += block[3];
401 pixels[4] += block[4];
402 pixels[5] += block[5];
403 pixels[6] += block[6];
404 pixels[7] += block[7];
410 static void add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
415 /* read the pixels */
417 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
418 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
419 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
420 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
421 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
422 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
423 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
424 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
430 static int sum_abs_dctelem_c(int16_t *block)
434 sum+= FFABS(block[i]);
438 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
442 for (i = 0; i < h; i++) {
443 memset(block, value, 16);
448 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
452 for (i = 0; i < h; i++) {
453 memset(block, value, 8);
458 #define avg2(a,b) ((a+b+1)>>1)
459 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
461 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
463 const int A=(16-x16)*(16-y16);
464 const int B=( x16)*(16-y16);
465 const int C=(16-x16)*( y16);
466 const int D=( x16)*( y16);
471 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
472 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
473 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
474 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
475 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
476 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
477 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
478 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
484 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
485 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
488 const int s= 1<<shift;
498 for(x=0; x<8; x++){ //XXX FIXME optimize
499 int src_x, src_y, frac_x, frac_y, index;
508 if((unsigned)src_x < width){
509 if((unsigned)src_y < height){
510 index= src_x + src_y*stride;
511 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
512 + src[index +1]* frac_x )*(s-frac_y)
513 + ( src[index+stride ]*(s-frac_x)
514 + src[index+stride+1]* frac_x )* frac_y
517 index= src_x + av_clip(src_y, 0, height)*stride;
518 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
519 + src[index +1]* frac_x )*s
523 if((unsigned)src_y < height){
524 index= av_clip(src_x, 0, width) + src_y*stride;
525 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
526 + src[index+stride ]* frac_y )*s
529 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
530 dst[y*stride + x]= src[index ];
542 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
544 case 2: put_pixels2_8_c (dst, src, stride, height); break;
545 case 4: put_pixels4_8_c (dst, src, stride, height); break;
546 case 8: put_pixels8_8_c (dst, src, stride, height); break;
547 case 16:put_pixels16_8_c(dst, src, stride, height); break;
551 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
553 for (i=0; i < height; i++) {
554 for (j=0; j < width; j++) {
555 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
562 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
564 for (i=0; i < height; i++) {
565 for (j=0; j < width; j++) {
566 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
573 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
575 for (i=0; i < height; i++) {
576 for (j=0; j < width; j++) {
577 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
584 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
586 for (i=0; i < height; i++) {
587 for (j=0; j < width; j++) {
588 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
595 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
597 for (i=0; i < height; i++) {
598 for (j=0; j < width; j++) {
599 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
606 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
608 for (i=0; i < height; i++) {
609 for (j=0; j < width; j++) {
610 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
617 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
619 for (i=0; i < height; i++) {
620 for (j=0; j < width; j++) {
621 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
628 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
630 for (i=0; i < height; i++) {
631 for (j=0; j < width; j++) {
632 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
639 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
641 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
642 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
643 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
644 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
648 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
650 for (i=0; i < height; i++) {
651 for (j=0; j < width; j++) {
652 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
659 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
661 for (i=0; i < height; i++) {
662 for (j=0; j < width; j++) {
663 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
670 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
672 for (i=0; i < height; i++) {
673 for (j=0; j < width; j++) {
674 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
681 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
683 for (i=0; i < height; i++) {
684 for (j=0; j < width; j++) {
685 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
692 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
694 for (i=0; i < height; i++) {
695 for (j=0; j < width; j++) {
696 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
703 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
705 for (i=0; i < height; i++) {
706 for (j=0; j < width; j++) {
707 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
714 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
716 for (i=0; i < height; i++) {
717 for (j=0; j < width; j++) {
718 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
725 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
727 for (i=0; i < height; i++) {
728 for (j=0; j < width; j++) {
729 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
736 #define QPEL_MC(r, OPNAME, RND, OP) \
737 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
738 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
742 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
743 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
744 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
745 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
746 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
747 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
748 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
749 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
755 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
757 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
761 const int src0= src[0*srcStride];\
762 const int src1= src[1*srcStride];\
763 const int src2= src[2*srcStride];\
764 const int src3= src[3*srcStride];\
765 const int src4= src[4*srcStride];\
766 const int src5= src[5*srcStride];\
767 const int src6= src[6*srcStride];\
768 const int src7= src[7*srcStride];\
769 const int src8= src[8*srcStride];\
770 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
771 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
772 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
773 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
774 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
775 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
776 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
777 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
783 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
784 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
789 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
790 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
791 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
792 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
793 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
794 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
795 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
796 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
797 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
798 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
799 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
800 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
801 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
802 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
803 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
804 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
810 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
811 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
816 const int src0= src[0*srcStride];\
817 const int src1= src[1*srcStride];\
818 const int src2= src[2*srcStride];\
819 const int src3= src[3*srcStride];\
820 const int src4= src[4*srcStride];\
821 const int src5= src[5*srcStride];\
822 const int src6= src[6*srcStride];\
823 const int src7= src[7*srcStride];\
824 const int src8= src[8*srcStride];\
825 const int src9= src[9*srcStride];\
826 const int src10= src[10*srcStride];\
827 const int src11= src[11*srcStride];\
828 const int src12= src[12*srcStride];\
829 const int src13= src[13*srcStride];\
830 const int src14= src[14*srcStride];\
831 const int src15= src[15*srcStride];\
832 const int src16= src[16*srcStride];\
833 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
834 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
835 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
836 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
837 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
838 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
839 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
840 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
841 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
842 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
843 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
844 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
845 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
846 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
847 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
848 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
854 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
857 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
858 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
861 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
863 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
866 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
869 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
870 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
873 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
877 copy_block9(full, src, 16, stride, 9);\
878 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
879 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
882 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
885 copy_block9(full, src, 16, stride, 9);\
886 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
889 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
893 copy_block9(full, src, 16, stride, 9);\
894 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
895 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
897 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
903 copy_block9(full, src, 16, stride, 9);\
904 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
905 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
906 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
907 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
909 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
914 copy_block9(full, src, 16, stride, 9);\
915 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
916 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
917 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
918 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
920 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
926 copy_block9(full, src, 16, stride, 9);\
927 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
928 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
929 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
930 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
932 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
937 copy_block9(full, src, 16, stride, 9);\
938 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
939 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
940 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
941 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
943 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
949 copy_block9(full, src, 16, stride, 9);\
950 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
951 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
952 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
953 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
955 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
960 copy_block9(full, src, 16, stride, 9);\
961 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
962 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
963 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
964 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
966 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
972 copy_block9(full, src, 16, stride, 9);\
973 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
974 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
975 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
976 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
978 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
983 copy_block9(full, src, 16, stride, 9);\
984 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
985 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
986 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
987 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
989 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
993 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
994 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
995 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
997 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1000 uint8_t halfHV[64];\
1001 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1002 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1003 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1005 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1007 uint8_t full[16*9];\
1010 uint8_t halfHV[64];\
1011 copy_block9(full, src, 16, stride, 9);\
1012 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1013 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1014 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1015 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1017 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1019 uint8_t full[16*9];\
1021 copy_block9(full, src, 16, stride, 9);\
1022 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1023 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1024 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1026 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1028 uint8_t full[16*9];\
1031 uint8_t halfHV[64];\
1032 copy_block9(full, src, 16, stride, 9);\
1033 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1034 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1035 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1036 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1038 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1040 uint8_t full[16*9];\
1042 copy_block9(full, src, 16, stride, 9);\
1043 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1044 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1045 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1047 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1050 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1051 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1054 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1057 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1058 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1061 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1063 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1066 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1069 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1070 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1073 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1075 uint8_t full[24*17];\
1077 copy_block17(full, src, 24, stride, 17);\
1078 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1079 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1082 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1084 uint8_t full[24*17];\
1085 copy_block17(full, src, 24, stride, 17);\
1086 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1089 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1091 uint8_t full[24*17];\
1093 copy_block17(full, src, 24, stride, 17);\
1094 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1095 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1097 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1099 uint8_t full[24*17];\
1100 uint8_t halfH[272];\
1101 uint8_t halfV[256];\
1102 uint8_t halfHV[256];\
1103 copy_block17(full, src, 24, stride, 17);\
1104 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1105 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1106 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1107 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1109 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1111 uint8_t full[24*17];\
1112 uint8_t halfH[272];\
1113 uint8_t halfHV[256];\
1114 copy_block17(full, src, 24, stride, 17);\
1115 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1116 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1117 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1118 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1120 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1122 uint8_t full[24*17];\
1123 uint8_t halfH[272];\
1124 uint8_t halfV[256];\
1125 uint8_t halfHV[256];\
1126 copy_block17(full, src, 24, stride, 17);\
1127 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1128 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1129 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1130 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1132 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1134 uint8_t full[24*17];\
1135 uint8_t halfH[272];\
1136 uint8_t halfHV[256];\
1137 copy_block17(full, src, 24, stride, 17);\
1138 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1139 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1140 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1141 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1143 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1145 uint8_t full[24*17];\
1146 uint8_t halfH[272];\
1147 uint8_t halfV[256];\
1148 uint8_t halfHV[256];\
1149 copy_block17(full, src, 24, stride, 17);\
1150 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1151 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1152 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1153 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1155 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1157 uint8_t full[24*17];\
1158 uint8_t halfH[272];\
1159 uint8_t halfHV[256];\
1160 copy_block17(full, src, 24, stride, 17);\
1161 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1162 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1163 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1164 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1166 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1168 uint8_t full[24*17];\
1169 uint8_t halfH[272];\
1170 uint8_t halfV[256];\
1171 uint8_t halfHV[256];\
1172 copy_block17(full, src, 24, stride, 17);\
1173 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1174 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1175 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1176 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1178 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1180 uint8_t full[24*17];\
1181 uint8_t halfH[272];\
1182 uint8_t halfHV[256];\
1183 copy_block17(full, src, 24, stride, 17);\
1184 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1185 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1186 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1187 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1189 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1191 uint8_t halfH[272];\
1192 uint8_t halfHV[256];\
1193 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1194 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1195 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1197 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1199 uint8_t halfH[272];\
1200 uint8_t halfHV[256];\
1201 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1202 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1203 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1205 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1207 uint8_t full[24*17];\
1208 uint8_t halfH[272];\
1209 uint8_t halfV[256];\
1210 uint8_t halfHV[256];\
1211 copy_block17(full, src, 24, stride, 17);\
1212 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1213 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1214 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1215 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1217 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1219 uint8_t full[24*17];\
1220 uint8_t halfH[272];\
1221 copy_block17(full, src, 24, stride, 17);\
1222 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1223 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1224 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1226 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1228 uint8_t full[24*17];\
1229 uint8_t halfH[272];\
1230 uint8_t halfV[256];\
1231 uint8_t halfHV[256];\
1232 copy_block17(full, src, 24, stride, 17);\
1233 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1234 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1235 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1236 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1238 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1240 uint8_t full[24*17];\
1241 uint8_t halfH[272];\
1242 copy_block17(full, src, 24, stride, 17);\
1243 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1244 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1245 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1247 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1249 uint8_t halfH[272];\
1250 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1251 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1254 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1255 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1256 #define op_put(a, b) a = cm[((b) + 16)>>5]
1257 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1259 QPEL_MC(0, put_ , _ , op_put)
1260 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1261 QPEL_MC(0, avg_ , _ , op_avg)
1262 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
1264 #undef op_avg_no_rnd
1266 #undef op_put_no_rnd
1268 void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1270 put_pixels8_8_c(dst, src, stride, 8);
1272 void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1274 avg_pixels8_8_c(dst, src, stride, 8);
1276 void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1278 put_pixels16_8_c(dst, src, stride, 16);
1280 void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1282 avg_pixels16_8_c(dst, src, stride, 16);
1285 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1286 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1287 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1288 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1289 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1290 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
1292 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1293 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1297 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1298 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1299 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1300 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1301 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1302 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1303 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1304 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1310 #if CONFIG_RV40_DECODER
1311 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1313 put_pixels16_xy2_8_c(dst, src, stride, 16);
1315 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1317 avg_pixels16_xy2_8_c(dst, src, stride, 16);
1319 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1321 put_pixels8_xy2_8_c(dst, src, stride, 8);
1323 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1325 avg_pixels8_xy2_8_c(dst, src, stride, 8);
1327 #endif /* CONFIG_RV40_DECODER */
1329 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1330 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1334 const int src_1= src[ -srcStride];
1335 const int src0 = src[0 ];
1336 const int src1 = src[ srcStride];
1337 const int src2 = src[2*srcStride];
1338 const int src3 = src[3*srcStride];
1339 const int src4 = src[4*srcStride];
1340 const int src5 = src[5*srcStride];
1341 const int src6 = src[6*srcStride];
1342 const int src7 = src[7*srcStride];
1343 const int src8 = src[8*srcStride];
1344 const int src9 = src[9*srcStride];
1345 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1346 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1347 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1348 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1349 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1350 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1351 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1352 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1358 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1361 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1362 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1365 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1367 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1370 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1373 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1374 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1377 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1379 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1382 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1387 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1388 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1389 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1390 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1392 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1397 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1398 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1399 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1400 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1402 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1405 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1406 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1409 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1415 s += abs(pix1[0] - pix2[0]);
1416 s += abs(pix1[1] - pix2[1]);
1417 s += abs(pix1[2] - pix2[2]);
1418 s += abs(pix1[3] - pix2[3]);
1419 s += abs(pix1[4] - pix2[4]);
1420 s += abs(pix1[5] - pix2[5]);
1421 s += abs(pix1[6] - pix2[6]);
1422 s += abs(pix1[7] - pix2[7]);
1423 s += abs(pix1[8] - pix2[8]);
1424 s += abs(pix1[9] - pix2[9]);
1425 s += abs(pix1[10] - pix2[10]);
1426 s += abs(pix1[11] - pix2[11]);
1427 s += abs(pix1[12] - pix2[12]);
1428 s += abs(pix1[13] - pix2[13]);
1429 s += abs(pix1[14] - pix2[14]);
1430 s += abs(pix1[15] - pix2[15]);
1437 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1443 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1444 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1445 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1446 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1447 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1448 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1449 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1450 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1451 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1452 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1453 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1454 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1455 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1456 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1457 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1458 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1465 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1468 uint8_t *pix3 = pix2 + line_size;
1472 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1473 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1474 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1475 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1476 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1477 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1478 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1479 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1480 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1481 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1482 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1483 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1484 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1485 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1486 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1487 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1495 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1498 uint8_t *pix3 = pix2 + line_size;
1502 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1503 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1504 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1505 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1506 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1507 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1508 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1509 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1510 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1511 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1512 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1513 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1514 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1515 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1516 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1517 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1525 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1531 s += abs(pix1[0] - pix2[0]);
1532 s += abs(pix1[1] - pix2[1]);
1533 s += abs(pix1[2] - pix2[2]);
1534 s += abs(pix1[3] - pix2[3]);
1535 s += abs(pix1[4] - pix2[4]);
1536 s += abs(pix1[5] - pix2[5]);
1537 s += abs(pix1[6] - pix2[6]);
1538 s += abs(pix1[7] - pix2[7]);
1545 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1551 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1552 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1553 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1554 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1555 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1556 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1557 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1558 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1565 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1568 uint8_t *pix3 = pix2 + line_size;
1572 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1573 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1574 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1575 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1576 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1577 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1578 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1579 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1587 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1590 uint8_t *pix3 = pix2 + line_size;
1594 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1595 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1596 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1597 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1598 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1599 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1600 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1601 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1609 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1610 MpegEncContext *c = v;
1616 for(x=0; x<16; x++){
1617 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1620 for(x=0; x<15; x++){
1621 score2+= FFABS( s1[x ] - s1[x +stride]
1622 - s1[x+1] + s1[x+1+stride])
1623 -FFABS( s2[x ] - s2[x +stride]
1624 - s2[x+1] + s2[x+1+stride]);
1631 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1632 else return score1 + FFABS(score2)*8;
1635 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1636 MpegEncContext *c = v;
1643 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1647 score2+= FFABS( s1[x ] - s1[x +stride]
1648 - s1[x+1] + s1[x+1+stride])
1649 -FFABS( s2[x ] - s2[x +stride]
1650 - s2[x+1] + s2[x+1+stride]);
1657 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1658 else return score1 + FFABS(score2)*8;
1661 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1665 for(i=0; i<8*8; i++){
1666 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1669 assert(-512<b && b<512);
1671 sum += (w*b)*(w*b)>>4;
1676 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1679 for(i=0; i<8*8; i++){
1680 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
1684 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1688 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
1691 memset(cmp, 0, sizeof(void*)*6);
1699 cmp[i]= c->hadamard8_diff[i];
1705 cmp[i]= c->dct_sad[i];
1708 cmp[i]= c->dct264_sad[i];
1711 cmp[i]= c->dct_max[i];
1714 cmp[i]= c->quant_psnr[i];
1735 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1740 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1742 for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) {
1743 long a = *(long*)(src+i);
1744 long b = *(long*)(dst+i);
1745 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
1748 dst[i+0] += src[i+0];
1751 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
1753 #if !HAVE_FAST_UNALIGNED
1754 if((long)src2 & (sizeof(long)-1)){
1755 for(i=0; i+7<w; i+=8){
1756 dst[i+0] = src1[i+0]-src2[i+0];
1757 dst[i+1] = src1[i+1]-src2[i+1];
1758 dst[i+2] = src1[i+2]-src2[i+2];
1759 dst[i+3] = src1[i+3]-src2[i+3];
1760 dst[i+4] = src1[i+4]-src2[i+4];
1761 dst[i+5] = src1[i+5]-src2[i+5];
1762 dst[i+6] = src1[i+6]-src2[i+6];
1763 dst[i+7] = src1[i+7]-src2[i+7];
1767 for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) {
1768 long a = *(long*)(src1+i);
1769 long b = *(long*)(src2+i);
1770 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1773 dst[i+0] = src1[i+0]-src2[i+0];
1776 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
1784 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1793 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
1801 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1811 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
1814 for(i=0; i<w-1; i++){
1841 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
1871 #define BUTTERFLY2(o1,o2,i1,i2) \
1875 #define BUTTERFLY1(x,y) \
1884 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
1886 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1894 //FIXME try pointer walks
1895 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
1896 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
1897 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
1898 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
1900 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1901 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1902 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1903 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1905 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1906 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1907 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1908 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1912 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1913 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1914 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1915 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1917 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1918 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1919 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1920 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1923 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1924 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1925 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1926 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1931 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
1939 //FIXME try pointer walks
1940 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
1941 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
1942 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
1943 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
1945 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1946 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1947 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1948 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1950 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1951 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1952 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1953 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1957 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1958 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1959 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1960 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1962 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1963 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1964 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1965 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1968 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1969 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1970 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1971 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1974 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
1979 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
1980 MpegEncContext * const s= (MpegEncContext *)c;
1981 LOCAL_ALIGNED_16(int16_t, temp, [64]);
1985 s->dsp.diff_pixels(temp, src1, src2, stride);
1987 return s->dsp.sum_abs_dctelem(temp);
1992 const int s07 = SRC(0) + SRC(7);\
1993 const int s16 = SRC(1) + SRC(6);\
1994 const int s25 = SRC(2) + SRC(5);\
1995 const int s34 = SRC(3) + SRC(4);\
1996 const int a0 = s07 + s34;\
1997 const int a1 = s16 + s25;\
1998 const int a2 = s07 - s34;\
1999 const int a3 = s16 - s25;\
2000 const int d07 = SRC(0) - SRC(7);\
2001 const int d16 = SRC(1) - SRC(6);\
2002 const int d25 = SRC(2) - SRC(5);\
2003 const int d34 = SRC(3) - SRC(4);\
2004 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2005 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2006 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2007 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2009 DST(1, a4 + (a7>>2)) ;\
2010 DST(2, a2 + (a3>>1)) ;\
2011 DST(3, a5 + (a6>>2)) ;\
2013 DST(5, a6 - (a5>>2)) ;\
2014 DST(6, (a2>>1) - a3 ) ;\
2015 DST(7, (a4>>2) - a7 ) ;\
2018 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2019 MpegEncContext * const s= (MpegEncContext *)c;
2024 s->dsp.diff_pixels(dct[0], src1, src2, stride);
2026 #define SRC(x) dct[i][x]
2027 #define DST(x,v) dct[i][x]= v
2028 for( i = 0; i < 8; i++ )
2033 #define SRC(x) dct[x][i]
2034 #define DST(x,v) sum += FFABS(v)
2035 for( i = 0; i < 8; i++ )
2043 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2044 MpegEncContext * const s= (MpegEncContext *)c;
2045 LOCAL_ALIGNED_16(int16_t, temp, [64]);
2050 s->dsp.diff_pixels(temp, src1, src2, stride);
2054 sum= FFMAX(sum, FFABS(temp[i]));
2059 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2060 MpegEncContext * const s= (MpegEncContext *)c;
2061 LOCAL_ALIGNED_16(int16_t, temp, [64*2]);
2062 int16_t * const bak = temp+64;
2068 s->dsp.diff_pixels(temp, src1, src2, stride);
2070 memcpy(bak, temp, 64*sizeof(int16_t));
2072 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2073 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2074 ff_simple_idct_8(temp); //FIXME
2077 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2082 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2083 MpegEncContext * const s= (MpegEncContext *)c;
2084 const uint8_t *scantable= s->intra_scantable.permutated;
2085 LOCAL_ALIGNED_16(int16_t, temp, [64]);
2086 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2087 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
2088 int i, last, run, bits, level, distortion, start_i;
2089 const int esc_length= s->ac_esc_length;
2091 uint8_t * last_length;
2095 copy_block8(lsrc1, src1, 8, stride, 8);
2096 copy_block8(lsrc2, src2, 8, stride, 8);
2098 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
2100 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2106 length = s->intra_ac_vlc_length;
2107 last_length= s->intra_ac_vlc_last_length;
2108 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2111 length = s->inter_ac_vlc_length;
2112 last_length= s->inter_ac_vlc_last_length;
2117 for(i=start_i; i<last; i++){
2118 int j= scantable[i];
2123 if((level&(~127)) == 0){
2124 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2133 level= temp[i] + 64;
2137 if((level&(~127)) == 0){
2138 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2146 s->dct_unquantize_intra(s, temp, 0, s->qscale);
2148 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2151 s->dsp.idct_add(lsrc2, 8, temp);
2153 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
2155 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
2158 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2159 MpegEncContext * const s= (MpegEncContext *)c;
2160 const uint8_t *scantable= s->intra_scantable.permutated;
2161 LOCAL_ALIGNED_16(int16_t, temp, [64]);
2162 int i, last, run, bits, level, start_i;
2163 const int esc_length= s->ac_esc_length;
2165 uint8_t * last_length;
2169 s->dsp.diff_pixels(temp, src1, src2, stride);
2171 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2177 length = s->intra_ac_vlc_length;
2178 last_length= s->intra_ac_vlc_last_length;
2179 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2182 length = s->inter_ac_vlc_length;
2183 last_length= s->inter_ac_vlc_last_length;
2188 for(i=start_i; i<last; i++){
2189 int j= scantable[i];
2194 if((level&(~127)) == 0){
2195 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2204 level= temp[i] + 64;
2208 if((level&(~127)) == 0){
2209 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2217 #define VSAD_INTRA(size) \
2218 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2222 for(y=1; y<h; y++){ \
2223 for(x=0; x<size; x+=4){ \
2224 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2225 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2235 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2240 for(x=0; x<16; x++){
2241 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2250 #define SQ(a) ((a)*(a))
2251 #define VSSE_INTRA(size) \
2252 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2256 for(y=1; y<h; y++){ \
2257 for(x=0; x<size; x+=4){ \
2258 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2259 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2269 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2274 for(x=0; x<16; x++){
2275 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2284 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2288 for(i=0; i<size; i++)
2289 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2293 #define WRAPPER8_16_SQ(name8, name16)\
2294 static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
2296 score +=name8(s, dst , src , stride, 8);\
2297 score +=name8(s, dst+8 , src+8 , stride, 8);\
2301 score +=name8(s, dst , src , stride, 8);\
2302 score +=name8(s, dst+8 , src+8 , stride, 8);\
2307 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2308 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2309 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2311 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
2313 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2314 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2315 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2316 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
2318 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2319 uint32_t maxi, uint32_t maxisign)
2322 if(a > mini) return mini;
2323 else if((a^(1U<<31)) > maxisign) return maxi;
2327 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
2329 uint32_t mini = *(uint32_t*)min;
2330 uint32_t maxi = *(uint32_t*)max;
2331 uint32_t maxisign = maxi ^ (1U<<31);
2332 uint32_t *dsti = (uint32_t*)dst;
2333 const uint32_t *srci = (const uint32_t*)src;
2334 for(i=0; i<len; i+=8) {
2335 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2336 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2337 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2338 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2339 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2340 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2341 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2342 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2345 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
2347 if(min < 0 && max > 0) {
2348 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2350 for(i=0; i < len; i+=8) {
2351 dst[i ] = av_clipf(src[i ], min, max);
2352 dst[i + 1] = av_clipf(src[i + 1], min, max);
2353 dst[i + 2] = av_clipf(src[i + 2], min, max);
2354 dst[i + 3] = av_clipf(src[i + 3], min, max);
2355 dst[i + 4] = av_clipf(src[i + 4], min, max);
2356 dst[i + 5] = av_clipf(src[i + 5], min, max);
2357 dst[i + 6] = av_clipf(src[i + 6], min, max);
2358 dst[i + 7] = av_clipf(src[i + 7], min, max);
2363 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
2368 res += *v1++ * *v2++;
2373 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
2378 *v1++ += mul * *v3++;
2383 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2384 int32_t max, unsigned int len)
2387 *dst++ = av_clip(*src++, min, max);
2388 *dst++ = av_clip(*src++, min, max);
2389 *dst++ = av_clip(*src++, min, max);
2390 *dst++ = av_clip(*src++, min, max);
2391 *dst++ = av_clip(*src++, min, max);
2392 *dst++ = av_clip(*src++, min, max);
2393 *dst++ = av_clip(*src++, min, max);
2394 *dst++ = av_clip(*src++, min, max);
2399 static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
2401 ff_j_rev_dct (block);
2402 put_pixels_clamped_c(block, dest, line_size);
2404 static void jref_idct_add(uint8_t *dest, int line_size, int16_t *block)
2406 ff_j_rev_dct (block);
2407 add_pixels_clamped_c(block, dest, line_size);
2410 /* init static data */
2411 av_cold void ff_dsputil_static_init(void)
2415 for(i=0;i<512;i++) {
2416 ff_squareTbl[i] = (i - 256) * (i - 256);
2420 int ff_check_alignment(void){
2421 static int did_fail=0;
2422 LOCAL_ALIGNED_16(int, aligned, [4]);
2424 if((intptr_t)aligned & 15){
2426 #if HAVE_MMX || HAVE_ALTIVEC
2427 av_log(NULL, AV_LOG_ERROR,
2428 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2429 "and may be very slow or crash. This is not a bug in libavcodec,\n"
2430 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2431 "Do not report crashes to Libav developers.\n");
2440 av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
2442 ff_check_alignment();
2445 if (avctx->bits_per_raw_sample == 10) {
2446 c->fdct = ff_jpeg_fdct_islow_10;
2447 c->fdct248 = ff_fdct248_islow_10;
2449 if(avctx->dct_algo==FF_DCT_FASTINT) {
2450 c->fdct = ff_fdct_ifast;
2451 c->fdct248 = ff_fdct_ifast248;
2453 else if(avctx->dct_algo==FF_DCT_FAAN) {
2454 c->fdct = ff_faandct;
2455 c->fdct248 = ff_faandct248;
2458 c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
2459 c->fdct248 = ff_fdct248_islow_8;
2462 #endif //CONFIG_ENCODERS
2464 if (avctx->bits_per_raw_sample == 10) {
2465 c->idct_put = ff_simple_idct_put_10;
2466 c->idct_add = ff_simple_idct_add_10;
2467 c->idct = ff_simple_idct_10;
2468 c->idct_permutation_type = FF_NO_IDCT_PERM;
2470 if(avctx->idct_algo==FF_IDCT_INT){
2471 c->idct_put= jref_idct_put;
2472 c->idct_add= jref_idct_add;
2473 c->idct = ff_j_rev_dct;
2474 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
2475 }else if(avctx->idct_algo==FF_IDCT_FAAN){
2476 c->idct_put= ff_faanidct_put;
2477 c->idct_add= ff_faanidct_add;
2478 c->idct = ff_faanidct;
2479 c->idct_permutation_type= FF_NO_IDCT_PERM;
2480 }else{ //accurate/default
2481 c->idct_put = ff_simple_idct_put_8;
2482 c->idct_add = ff_simple_idct_add_8;
2483 c->idct = ff_simple_idct_8;
2484 c->idct_permutation_type= FF_NO_IDCT_PERM;
2488 c->diff_pixels = diff_pixels_c;
2489 c->put_pixels_clamped = put_pixels_clamped_c;
2490 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
2491 c->add_pixels_clamped = add_pixels_clamped_c;
2492 c->sum_abs_dctelem = sum_abs_dctelem_c;
2495 c->pix_sum = pix_sum_c;
2496 c->pix_norm1 = pix_norm1_c;
2498 c->fill_block_tab[0] = fill_block16_c;
2499 c->fill_block_tab[1] = fill_block8_c;
2501 /* TODO [0] 16 [1] 8 */
2502 c->pix_abs[0][0] = pix_abs16_c;
2503 c->pix_abs[0][1] = pix_abs16_x2_c;
2504 c->pix_abs[0][2] = pix_abs16_y2_c;
2505 c->pix_abs[0][3] = pix_abs16_xy2_c;
2506 c->pix_abs[1][0] = pix_abs8_c;
2507 c->pix_abs[1][1] = pix_abs8_x2_c;
2508 c->pix_abs[1][2] = pix_abs8_y2_c;
2509 c->pix_abs[1][3] = pix_abs8_xy2_c;
2511 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2512 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2513 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
2514 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
2515 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
2516 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
2517 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
2518 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
2519 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2521 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
2522 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
2523 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
2524 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
2525 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
2526 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
2527 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
2528 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
2529 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
2531 #define dspfunc(PFX, IDX, NUM) \
2532 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2533 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2534 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2535 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2536 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2537 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2538 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2539 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2540 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2541 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2542 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2543 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2544 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2545 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2546 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2547 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2549 dspfunc(put_qpel, 0, 16);
2550 dspfunc(put_no_rnd_qpel, 0, 16);
2552 dspfunc(avg_qpel, 0, 16);
2553 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2555 dspfunc(put_qpel, 1, 8);
2556 dspfunc(put_no_rnd_qpel, 1, 8);
2558 dspfunc(avg_qpel, 1, 8);
2559 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2563 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
2564 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
2565 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
2566 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
2567 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
2568 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
2569 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
2570 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
2572 #define SET_CMP_FUNC(name) \
2573 c->name[0]= name ## 16_c;\
2574 c->name[1]= name ## 8x8_c;
2576 SET_CMP_FUNC(hadamard8_diff)
2577 c->hadamard8_diff[4]= hadamard8_intra16_c;
2578 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
2579 SET_CMP_FUNC(dct_sad)
2580 SET_CMP_FUNC(dct_max)
2582 SET_CMP_FUNC(dct264_sad)
2584 c->sad[0]= pix_abs16_c;
2585 c->sad[1]= pix_abs8_c;
2589 SET_CMP_FUNC(quant_psnr)
2592 c->vsad[0]= vsad16_c;
2593 c->vsad[4]= vsad_intra16_c;
2594 c->vsad[5]= vsad_intra8_c;
2595 c->vsse[0]= vsse16_c;
2596 c->vsse[4]= vsse_intra16_c;
2597 c->vsse[5]= vsse_intra8_c;
2598 c->nsse[0]= nsse16_c;
2599 c->nsse[1]= nsse8_c;
2601 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
2603 c->add_bytes= add_bytes_c;
2604 c->diff_bytes= diff_bytes_c;
2605 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
2606 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
2607 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
2608 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
2609 c->bswap_buf= bswap_buf;
2610 c->bswap16_buf = bswap16_buf;
2612 c->try_8x8basis= try_8x8basis_c;
2613 c->add_8x8basis= add_8x8basis_c;
2615 c->vector_clipf = vector_clipf_c;
2616 c->scalarproduct_int16 = scalarproduct_int16_c;
2617 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
2618 c->vector_clip_int32 = vector_clip_int32_c;
2620 c->shrink[0]= av_image_copy_plane;
2621 c->shrink[1]= ff_shrink22;
2622 c->shrink[2]= ff_shrink44;
2623 c->shrink[3]= ff_shrink88;
2625 c->add_pixels8 = add_pixels8_c;
2629 #define FUNC(f, depth) f ## _ ## depth
2630 #define FUNCC(f, depth) f ## _ ## depth ## _c
2632 c->draw_edges = FUNCC(draw_edges, 8);
2633 c->clear_block = FUNCC(clear_block, 8);
2634 c->clear_blocks = FUNCC(clear_blocks, 8);
2636 #define BIT_DEPTH_FUNCS(depth) \
2637 c->get_pixels = FUNCC(get_pixels, depth);
2639 switch (avctx->bits_per_raw_sample) {
2642 BIT_DEPTH_FUNCS(16);
2651 ff_dsputil_init_arm(c, avctx);
2653 ff_dsputil_init_bfin(c, avctx);
2655 ff_dsputil_init_ppc(c, avctx);
2657 ff_dsputil_init_sh4(c, avctx);
2659 ff_dsputil_init_vis(c, avctx);
2661 ff_dsputil_init_x86(c, avctx);
2663 ff_init_scantable_permutation(c->idct_permutation,
2664 c->idct_permutation_type);