3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of Libav.
10 * Libav is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * Libav is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with Libav; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 #include "libavutil/imgutils.h"
31 #include "libavutil/internal.h"
33 #include "copy_block.h"
35 #include "simple_idct.h"
38 #include "imgconvert.h"
40 #include "mpegvideo.h"
43 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
44 uint32_t ff_squareTbl[512] = {0, };
47 #include "dsputil_template.c"
51 #include "dsputil_template.c"
55 #include "dsputil_template.c"
57 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
58 #define pb_7f (~0UL/255 * 0x7f)
59 #define pb_80 (~0UL/255 * 0x80)
61 const uint8_t ff_zigzag_direct[64] = {
62 0, 1, 8, 16, 9, 2, 3, 10,
63 17, 24, 32, 25, 18, 11, 4, 5,
64 12, 19, 26, 33, 40, 48, 41, 34,
65 27, 20, 13, 6, 7, 14, 21, 28,
66 35, 42, 49, 56, 57, 50, 43, 36,
67 29, 22, 15, 23, 30, 37, 44, 51,
68 58, 59, 52, 45, 38, 31, 39, 46,
69 53, 60, 61, 54, 47, 55, 62, 63
72 /* Specific zigzag scan for 248 idct. NOTE that unlike the
73 specification, we interleave the fields */
74 const uint8_t ff_zigzag248_direct[64] = {
75 0, 8, 1, 9, 16, 24, 2, 10,
76 17, 25, 32, 40, 48, 56, 33, 41,
77 18, 26, 3, 11, 4, 12, 19, 27,
78 34, 42, 49, 57, 50, 58, 35, 43,
79 20, 28, 5, 13, 6, 14, 21, 29,
80 36, 44, 51, 59, 52, 60, 37, 45,
81 22, 30, 7, 15, 23, 31, 38, 46,
82 53, 61, 54, 62, 39, 47, 55, 63,
85 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
86 DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64];
88 const uint8_t ff_alternate_horizontal_scan[64] = {
89 0, 1, 2, 3, 8, 9, 16, 17,
90 10, 11, 4, 5, 6, 7, 15, 14,
91 13, 12, 19, 18, 24, 25, 32, 33,
92 26, 27, 20, 21, 22, 23, 28, 29,
93 30, 31, 34, 35, 40, 41, 48, 49,
94 42, 43, 36, 37, 38, 39, 44, 45,
95 46, 47, 50, 51, 56, 57, 58, 59,
96 52, 53, 54, 55, 60, 61, 62, 63,
99 const uint8_t ff_alternate_vertical_scan[64] = {
100 0, 8, 16, 24, 1, 9, 2, 10,
101 17, 25, 32, 40, 48, 56, 57, 49,
102 41, 33, 26, 18, 3, 11, 4, 12,
103 19, 27, 34, 42, 50, 58, 35, 43,
104 51, 59, 20, 28, 5, 13, 6, 14,
105 21, 29, 36, 44, 52, 60, 37, 45,
106 53, 61, 22, 30, 7, 15, 23, 31,
107 38, 46, 54, 62, 39, 47, 55, 63,
110 /* Input permutation for the simple_idct_mmx */
111 static const uint8_t simple_mmx_permutation[64]={
112 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
113 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
114 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
115 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
116 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
117 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
118 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
119 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
122 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
124 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
128 st->scantable= src_scantable;
132 j = src_scantable[i];
133 st->permutated[i] = permutation[j];
139 j = st->permutated[i];
141 st->raster_end[i]= end;
145 void ff_init_scantable_permutation(uint8_t *idct_permutation,
146 int idct_permutation_type)
150 switch(idct_permutation_type){
151 case FF_NO_IDCT_PERM:
153 idct_permutation[i]= i;
155 case FF_LIBMPEG2_IDCT_PERM:
157 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
159 case FF_SIMPLE_IDCT_PERM:
161 idct_permutation[i]= simple_mmx_permutation[i];
163 case FF_TRANSPOSE_IDCT_PERM:
165 idct_permutation[i]= ((i&7)<<3) | (i>>3);
167 case FF_PARTTRANS_IDCT_PERM:
169 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
171 case FF_SSE2_IDCT_PERM:
173 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
176 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
180 static int pix_sum_c(uint8_t * pix, int line_size)
185 for (i = 0; i < 16; i++) {
186 for (j = 0; j < 16; j += 8) {
197 pix += line_size - 16;
202 static int pix_norm1_c(uint8_t * pix, int line_size)
205 uint32_t *sq = ff_squareTbl + 256;
208 for (i = 0; i < 16; i++) {
209 for (j = 0; j < 16; j += 8) {
221 register uint64_t x=*(uint64_t*)pix;
223 s += sq[(x>>8)&0xff];
224 s += sq[(x>>16)&0xff];
225 s += sq[(x>>24)&0xff];
226 s += sq[(x>>32)&0xff];
227 s += sq[(x>>40)&0xff];
228 s += sq[(x>>48)&0xff];
229 s += sq[(x>>56)&0xff];
231 register uint32_t x=*(uint32_t*)pix;
233 s += sq[(x>>8)&0xff];
234 s += sq[(x>>16)&0xff];
235 s += sq[(x>>24)&0xff];
236 x=*(uint32_t*)(pix+4);
238 s += sq[(x>>8)&0xff];
239 s += sq[(x>>16)&0xff];
240 s += sq[(x>>24)&0xff];
245 pix += line_size - 16;
250 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
253 for(i=0; i+8<=w; i+=8){
254 dst[i+0]= av_bswap32(src[i+0]);
255 dst[i+1]= av_bswap32(src[i+1]);
256 dst[i+2]= av_bswap32(src[i+2]);
257 dst[i+3]= av_bswap32(src[i+3]);
258 dst[i+4]= av_bswap32(src[i+4]);
259 dst[i+5]= av_bswap32(src[i+5]);
260 dst[i+6]= av_bswap32(src[i+6]);
261 dst[i+7]= av_bswap32(src[i+7]);
264 dst[i+0]= av_bswap32(src[i+0]);
268 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
271 *dst++ = av_bswap16(*src++);
274 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
277 uint32_t *sq = ff_squareTbl + 256;
280 for (i = 0; i < h; i++) {
281 s += sq[pix1[0] - pix2[0]];
282 s += sq[pix1[1] - pix2[1]];
283 s += sq[pix1[2] - pix2[2]];
284 s += sq[pix1[3] - pix2[3]];
291 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
294 uint32_t *sq = ff_squareTbl + 256;
297 for (i = 0; i < h; i++) {
298 s += sq[pix1[0] - pix2[0]];
299 s += sq[pix1[1] - pix2[1]];
300 s += sq[pix1[2] - pix2[2]];
301 s += sq[pix1[3] - pix2[3]];
302 s += sq[pix1[4] - pix2[4]];
303 s += sq[pix1[5] - pix2[5]];
304 s += sq[pix1[6] - pix2[6]];
305 s += sq[pix1[7] - pix2[7]];
312 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
315 uint32_t *sq = ff_squareTbl + 256;
318 for (i = 0; i < h; i++) {
319 s += sq[pix1[ 0] - pix2[ 0]];
320 s += sq[pix1[ 1] - pix2[ 1]];
321 s += sq[pix1[ 2] - pix2[ 2]];
322 s += sq[pix1[ 3] - pix2[ 3]];
323 s += sq[pix1[ 4] - pix2[ 4]];
324 s += sq[pix1[ 5] - pix2[ 5]];
325 s += sq[pix1[ 6] - pix2[ 6]];
326 s += sq[pix1[ 7] - pix2[ 7]];
327 s += sq[pix1[ 8] - pix2[ 8]];
328 s += sq[pix1[ 9] - pix2[ 9]];
329 s += sq[pix1[10] - pix2[10]];
330 s += sq[pix1[11] - pix2[11]];
331 s += sq[pix1[12] - pix2[12]];
332 s += sq[pix1[13] - pix2[13]];
333 s += sq[pix1[14] - pix2[14]];
334 s += sq[pix1[15] - pix2[15]];
342 static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
343 const uint8_t *s2, int stride){
346 /* read the pixels */
348 block[0] = s1[0] - s2[0];
349 block[1] = s1[1] - s2[1];
350 block[2] = s1[2] - s2[2];
351 block[3] = s1[3] - s2[3];
352 block[4] = s1[4] - s2[4];
353 block[5] = s1[5] - s2[5];
354 block[6] = s1[6] - s2[6];
355 block[7] = s1[7] - s2[7];
363 static void put_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
368 /* read the pixels */
370 pixels[0] = av_clip_uint8(block[0]);
371 pixels[1] = av_clip_uint8(block[1]);
372 pixels[2] = av_clip_uint8(block[2]);
373 pixels[3] = av_clip_uint8(block[3]);
374 pixels[4] = av_clip_uint8(block[4]);
375 pixels[5] = av_clip_uint8(block[5]);
376 pixels[6] = av_clip_uint8(block[6]);
377 pixels[7] = av_clip_uint8(block[7]);
384 static void put_signed_pixels_clamped_c(const int16_t *block,
385 uint8_t *restrict pixels,
390 for (i = 0; i < 8; i++) {
391 for (j = 0; j < 8; j++) {
394 else if (*block > 127)
397 *pixels = (uint8_t)(*block + 128);
401 pixels += (line_size - 8);
405 static void add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
410 /* read the pixels */
412 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
413 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
414 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
415 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
416 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
417 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
418 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
419 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
425 static int sum_abs_dctelem_c(int16_t *block)
429 sum+= FFABS(block[i]);
433 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
437 for (i = 0; i < h; i++) {
438 memset(block, value, 16);
443 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
447 for (i = 0; i < h; i++) {
448 memset(block, value, 8);
453 #define avg2(a,b) ((a+b+1)>>1)
454 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
456 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
458 const int A=(16-x16)*(16-y16);
459 const int B=( x16)*(16-y16);
460 const int C=(16-x16)*( y16);
461 const int D=( x16)*( y16);
466 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
467 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
468 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
469 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
470 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
471 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
472 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
473 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
479 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
480 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
483 const int s= 1<<shift;
493 for(x=0; x<8; x++){ //XXX FIXME optimize
494 int src_x, src_y, frac_x, frac_y, index;
503 if((unsigned)src_x < width){
504 if((unsigned)src_y < height){
505 index= src_x + src_y*stride;
506 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
507 + src[index +1]* frac_x )*(s-frac_y)
508 + ( src[index+stride ]*(s-frac_x)
509 + src[index+stride+1]* frac_x )* frac_y
512 index= src_x + av_clip(src_y, 0, height)*stride;
513 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
514 + src[index +1]* frac_x )*s
518 if((unsigned)src_y < height){
519 index= av_clip(src_x, 0, width) + src_y*stride;
520 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
521 + src[index+stride ]* frac_y )*s
524 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
525 dst[y*stride + x]= src[index ];
537 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
539 case 2: put_pixels2_8_c (dst, src, stride, height); break;
540 case 4: put_pixels4_8_c (dst, src, stride, height); break;
541 case 8: put_pixels8_8_c (dst, src, stride, height); break;
542 case 16:put_pixels16_8_c(dst, src, stride, height); break;
546 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
548 for (i=0; i < height; i++) {
549 for (j=0; j < width; j++) {
550 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
557 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
559 for (i=0; i < height; i++) {
560 for (j=0; j < width; j++) {
561 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
568 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
570 for (i=0; i < height; i++) {
571 for (j=0; j < width; j++) {
572 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
579 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
581 for (i=0; i < height; i++) {
582 for (j=0; j < width; j++) {
583 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
590 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
592 for (i=0; i < height; i++) {
593 for (j=0; j < width; j++) {
594 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
601 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
603 for (i=0; i < height; i++) {
604 for (j=0; j < width; j++) {
605 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
612 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
614 for (i=0; i < height; i++) {
615 for (j=0; j < width; j++) {
616 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
623 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
625 for (i=0; i < height; i++) {
626 for (j=0; j < width; j++) {
627 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
634 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
636 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
637 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
638 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
639 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
643 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
645 for (i=0; i < height; i++) {
646 for (j=0; j < width; j++) {
647 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
654 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
656 for (i=0; i < height; i++) {
657 for (j=0; j < width; j++) {
658 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
665 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
667 for (i=0; i < height; i++) {
668 for (j=0; j < width; j++) {
669 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
676 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
678 for (i=0; i < height; i++) {
679 for (j=0; j < width; j++) {
680 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
687 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
689 for (i=0; i < height; i++) {
690 for (j=0; j < width; j++) {
691 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
698 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
700 for (i=0; i < height; i++) {
701 for (j=0; j < width; j++) {
702 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
709 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
711 for (i=0; i < height; i++) {
712 for (j=0; j < width; j++) {
713 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
720 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
722 for (i=0; i < height; i++) {
723 for (j=0; j < width; j++) {
724 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
731 #define QPEL_MC(r, OPNAME, RND, OP) \
732 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
733 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
737 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
738 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
739 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
740 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
741 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
742 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
743 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
744 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
750 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
752 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
756 const int src0= src[0*srcStride];\
757 const int src1= src[1*srcStride];\
758 const int src2= src[2*srcStride];\
759 const int src3= src[3*srcStride];\
760 const int src4= src[4*srcStride];\
761 const int src5= src[5*srcStride];\
762 const int src6= src[6*srcStride];\
763 const int src7= src[7*srcStride];\
764 const int src8= src[8*srcStride];\
765 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
766 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
767 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
768 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
769 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
770 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
771 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
772 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
778 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
779 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
784 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
785 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
786 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
787 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
788 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
789 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
790 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
791 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
792 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
793 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
794 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
795 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
796 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
797 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
798 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
799 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
805 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
806 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
811 const int src0= src[0*srcStride];\
812 const int src1= src[1*srcStride];\
813 const int src2= src[2*srcStride];\
814 const int src3= src[3*srcStride];\
815 const int src4= src[4*srcStride];\
816 const int src5= src[5*srcStride];\
817 const int src6= src[6*srcStride];\
818 const int src7= src[7*srcStride];\
819 const int src8= src[8*srcStride];\
820 const int src9= src[9*srcStride];\
821 const int src10= src[10*srcStride];\
822 const int src11= src[11*srcStride];\
823 const int src12= src[12*srcStride];\
824 const int src13= src[13*srcStride];\
825 const int src14= src[14*srcStride];\
826 const int src15= src[15*srcStride];\
827 const int src16= src[16*srcStride];\
828 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
829 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
830 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
831 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
832 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
833 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
834 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
835 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
836 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
837 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
838 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
839 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
840 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
841 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
842 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
843 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
849 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
851 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
852 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
855 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
856 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
859 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
861 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
862 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
865 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
868 copy_block9(full, src, 16, stride, 9);\
869 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
870 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
873 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
875 copy_block9(full, src, 16, stride, 9);\
876 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
879 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
882 copy_block9(full, src, 16, stride, 9);\
883 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
884 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
886 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
891 copy_block9(full, src, 16, stride, 9);\
892 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
893 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
894 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
895 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
897 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
901 copy_block9(full, src, 16, stride, 9);\
902 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
903 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
904 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
905 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
907 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
912 copy_block9(full, src, 16, stride, 9);\
913 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
914 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
915 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
916 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
918 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
922 copy_block9(full, src, 16, stride, 9);\
923 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
924 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
925 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
926 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
928 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
933 copy_block9(full, src, 16, stride, 9);\
934 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
935 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
936 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
937 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
939 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
943 copy_block9(full, src, 16, stride, 9);\
944 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
945 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
946 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
947 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
949 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
954 copy_block9(full, src, 16, stride, 9);\
955 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
956 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
957 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
958 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
960 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
964 copy_block9(full, src, 16, stride, 9);\
965 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
966 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
967 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
968 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
970 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
973 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
974 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
975 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
977 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
980 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
981 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
982 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
984 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
989 copy_block9(full, src, 16, stride, 9);\
990 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
991 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
992 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
993 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
995 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
998 copy_block9(full, src, 16, stride, 9);\
999 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1000 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1001 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1003 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1004 uint8_t full[16*9];\
1007 uint8_t halfHV[64];\
1008 copy_block9(full, src, 16, stride, 9);\
1009 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1010 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1011 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1012 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1014 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1015 uint8_t full[16*9];\
1017 copy_block9(full, src, 16, stride, 9);\
1018 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1019 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1020 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1022 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1024 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1025 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1028 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1030 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1031 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1034 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1035 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1038 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1040 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1041 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1044 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1045 uint8_t full[24*17];\
1047 copy_block17(full, src, 24, stride, 17);\
1048 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1049 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1052 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1053 uint8_t full[24*17];\
1054 copy_block17(full, src, 24, stride, 17);\
1055 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1058 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1059 uint8_t full[24*17];\
1061 copy_block17(full, src, 24, stride, 17);\
1062 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1063 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1065 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1066 uint8_t full[24*17];\
1067 uint8_t halfH[272];\
1068 uint8_t halfV[256];\
1069 uint8_t halfHV[256];\
1070 copy_block17(full, src, 24, stride, 17);\
1071 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1072 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1073 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1074 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1076 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1077 uint8_t full[24*17];\
1078 uint8_t halfH[272];\
1079 uint8_t halfHV[256];\
1080 copy_block17(full, src, 24, stride, 17);\
1081 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1082 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1083 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1084 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1086 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1087 uint8_t full[24*17];\
1088 uint8_t halfH[272];\
1089 uint8_t halfV[256];\
1090 uint8_t halfHV[256];\
1091 copy_block17(full, src, 24, stride, 17);\
1092 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1093 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1094 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1095 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1097 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1098 uint8_t full[24*17];\
1099 uint8_t halfH[272];\
1100 uint8_t halfHV[256];\
1101 copy_block17(full, src, 24, stride, 17);\
1102 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1103 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1104 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1105 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1107 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1108 uint8_t full[24*17];\
1109 uint8_t halfH[272];\
1110 uint8_t halfV[256];\
1111 uint8_t halfHV[256];\
1112 copy_block17(full, src, 24, stride, 17);\
1113 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1114 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1115 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1116 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1118 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1119 uint8_t full[24*17];\
1120 uint8_t halfH[272];\
1121 uint8_t halfHV[256];\
1122 copy_block17(full, src, 24, stride, 17);\
1123 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1124 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1125 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1126 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1128 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1129 uint8_t full[24*17];\
1130 uint8_t halfH[272];\
1131 uint8_t halfV[256];\
1132 uint8_t halfHV[256];\
1133 copy_block17(full, src, 24, stride, 17);\
1134 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1135 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1136 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1137 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1139 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1140 uint8_t full[24*17];\
1141 uint8_t halfH[272];\
1142 uint8_t halfHV[256];\
1143 copy_block17(full, src, 24, stride, 17);\
1144 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1145 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1146 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1147 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1149 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1150 uint8_t halfH[272];\
1151 uint8_t halfHV[256];\
1152 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1153 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1154 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1156 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1157 uint8_t halfH[272];\
1158 uint8_t halfHV[256];\
1159 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1160 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1161 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1163 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1164 uint8_t full[24*17];\
1165 uint8_t halfH[272];\
1166 uint8_t halfV[256];\
1167 uint8_t halfHV[256];\
1168 copy_block17(full, src, 24, stride, 17);\
1169 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1170 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1171 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1172 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1174 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1175 uint8_t full[24*17];\
1176 uint8_t halfH[272];\
1177 copy_block17(full, src, 24, stride, 17);\
1178 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1179 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1180 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1182 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1183 uint8_t full[24*17];\
1184 uint8_t halfH[272];\
1185 uint8_t halfV[256];\
1186 uint8_t halfHV[256];\
1187 copy_block17(full, src, 24, stride, 17);\
1188 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1189 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1190 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1191 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1193 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1194 uint8_t full[24*17];\
1195 uint8_t halfH[272];\
1196 copy_block17(full, src, 24, stride, 17);\
1197 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1198 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1199 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1201 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1202 uint8_t halfH[272];\
1203 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1204 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1207 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1208 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1209 #define op_put(a, b) a = cm[((b) + 16)>>5]
1210 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1212 QPEL_MC(0, put_ , _ , op_put)
1213 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1214 QPEL_MC(0, avg_ , _ , op_avg)
1215 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
1217 #undef op_avg_no_rnd
1219 #undef op_put_no_rnd
1221 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1222 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1223 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1224 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1225 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1226 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1228 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1229 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1233 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1234 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1235 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1236 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1237 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1238 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1239 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1240 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1246 #if CONFIG_RV40_DECODER
1247 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1248 put_pixels16_xy2_8_c(dst, src, stride, 16);
1250 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1251 avg_pixels16_xy2_8_c(dst, src, stride, 16);
1253 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1254 put_pixels8_xy2_8_c(dst, src, stride, 8);
1256 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1257 avg_pixels8_xy2_8_c(dst, src, stride, 8);
1259 #endif /* CONFIG_RV40_DECODER */
1261 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1262 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1266 const int src_1= src[ -srcStride];
1267 const int src0 = src[0 ];
1268 const int src1 = src[ srcStride];
1269 const int src2 = src[2*srcStride];
1270 const int src3 = src[3*srcStride];
1271 const int src4 = src[4*srcStride];
1272 const int src5 = src[5*srcStride];
1273 const int src6 = src[6*srcStride];
1274 const int src7 = src[7*srcStride];
1275 const int src8 = src[8*srcStride];
1276 const int src9 = src[9*srcStride];
1277 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1278 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1279 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1280 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1281 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1282 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1283 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1284 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1290 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1292 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1293 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1296 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1297 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1300 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1302 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1303 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1306 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1307 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1310 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1314 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1315 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1316 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1317 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1319 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1323 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1324 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1325 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1326 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1328 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1330 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1331 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1334 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
1335 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1337 const int strength= ff_h263_loop_filter_strength[qscale];
1341 int p0= src[x-2*stride];
1342 int p1= src[x-1*stride];
1343 int p2= src[x+0*stride];
1344 int p3= src[x+1*stride];
1345 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1347 if (d<-2*strength) d1= 0;
1348 else if(d<- strength) d1=-2*strength - d;
1349 else if(d< strength) d1= d;
1350 else if(d< 2*strength) d1= 2*strength - d;
1355 if(p1&256) p1= ~(p1>>31);
1356 if(p2&256) p2= ~(p2>>31);
1358 src[x-1*stride] = p1;
1359 src[x+0*stride] = p2;
1363 d2= av_clip((p0-p3)/4, -ad1, ad1);
1365 src[x-2*stride] = p0 - d2;
1366 src[x+ stride] = p3 + d2;
1371 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
1372 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1374 const int strength= ff_h263_loop_filter_strength[qscale];
1378 int p0= src[y*stride-2];
1379 int p1= src[y*stride-1];
1380 int p2= src[y*stride+0];
1381 int p3= src[y*stride+1];
1382 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1384 if (d<-2*strength) d1= 0;
1385 else if(d<- strength) d1=-2*strength - d;
1386 else if(d< strength) d1= d;
1387 else if(d< 2*strength) d1= 2*strength - d;
1392 if(p1&256) p1= ~(p1>>31);
1393 if(p2&256) p2= ~(p2>>31);
1395 src[y*stride-1] = p1;
1396 src[y*stride+0] = p2;
1400 d2= av_clip((p0-p3)/4, -ad1, ad1);
1402 src[y*stride-2] = p0 - d2;
1403 src[y*stride+1] = p3 + d2;
1408 static void h261_loop_filter_c(uint8_t *src, int stride){
1413 temp[x ] = 4*src[x ];
1414 temp[x + 7*8] = 4*src[x + 7*stride];
1418 xy = y * stride + x;
1420 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
1425 src[ y*stride] = (temp[ y*8] + 2)>>2;
1426 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
1428 xy = y * stride + x;
1430 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
1435 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1441 s += abs(pix1[0] - pix2[0]);
1442 s += abs(pix1[1] - pix2[1]);
1443 s += abs(pix1[2] - pix2[2]);
1444 s += abs(pix1[3] - pix2[3]);
1445 s += abs(pix1[4] - pix2[4]);
1446 s += abs(pix1[5] - pix2[5]);
1447 s += abs(pix1[6] - pix2[6]);
1448 s += abs(pix1[7] - pix2[7]);
1449 s += abs(pix1[8] - pix2[8]);
1450 s += abs(pix1[9] - pix2[9]);
1451 s += abs(pix1[10] - pix2[10]);
1452 s += abs(pix1[11] - pix2[11]);
1453 s += abs(pix1[12] - pix2[12]);
1454 s += abs(pix1[13] - pix2[13]);
1455 s += abs(pix1[14] - pix2[14]);
1456 s += abs(pix1[15] - pix2[15]);
1463 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1469 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1470 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1471 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1472 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1473 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1474 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1475 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1476 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1477 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1478 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1479 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1480 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1481 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1482 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1483 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1484 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1491 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1494 uint8_t *pix3 = pix2 + line_size;
1498 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1499 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1500 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1501 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1502 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1503 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1504 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1505 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1506 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1507 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1508 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1509 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1510 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1511 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1512 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1513 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1521 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1524 uint8_t *pix3 = pix2 + line_size;
1528 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1529 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1530 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1531 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1532 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1533 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1534 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1535 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1536 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1537 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1538 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1539 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1540 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1541 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1542 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1543 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1551 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1557 s += abs(pix1[0] - pix2[0]);
1558 s += abs(pix1[1] - pix2[1]);
1559 s += abs(pix1[2] - pix2[2]);
1560 s += abs(pix1[3] - pix2[3]);
1561 s += abs(pix1[4] - pix2[4]);
1562 s += abs(pix1[5] - pix2[5]);
1563 s += abs(pix1[6] - pix2[6]);
1564 s += abs(pix1[7] - pix2[7]);
1571 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1577 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1578 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1579 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1580 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1581 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1582 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1583 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1584 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1591 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1594 uint8_t *pix3 = pix2 + line_size;
1598 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1599 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1600 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1601 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1602 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1603 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1604 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1605 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1613 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1616 uint8_t *pix3 = pix2 + line_size;
1620 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1621 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1622 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1623 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1624 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1625 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1626 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1627 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1635 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1636 MpegEncContext *c = v;
1642 for(x=0; x<16; x++){
1643 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1646 for(x=0; x<15; x++){
1647 score2+= FFABS( s1[x ] - s1[x +stride]
1648 - s1[x+1] + s1[x+1+stride])
1649 -FFABS( s2[x ] - s2[x +stride]
1650 - s2[x+1] + s2[x+1+stride]);
1657 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1658 else return score1 + FFABS(score2)*8;
1661 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1662 MpegEncContext *c = v;
1669 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1673 score2+= FFABS( s1[x ] - s1[x +stride]
1674 - s1[x+1] + s1[x+1+stride])
1675 -FFABS( s2[x ] - s2[x +stride]
1676 - s2[x+1] + s2[x+1+stride]);
1683 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1684 else return score1 + FFABS(score2)*8;
1687 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1691 for(i=0; i<8*8; i++){
1692 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1695 assert(-512<b && b<512);
1697 sum += (w*b)*(w*b)>>4;
1702 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1705 for(i=0; i<8*8; i++){
1706 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
1710 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1714 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
1717 memset(cmp, 0, sizeof(void*)*6);
1725 cmp[i]= c->hadamard8_diff[i];
1731 cmp[i]= c->dct_sad[i];
1734 cmp[i]= c->dct264_sad[i];
1737 cmp[i]= c->dct_max[i];
1740 cmp[i]= c->quant_psnr[i];
1761 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1766 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1768 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1769 long a = *(long*)(src+i);
1770 long b = *(long*)(dst+i);
1771 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
1774 dst[i+0] += src[i+0];
1777 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
1779 #if !HAVE_FAST_UNALIGNED
1780 if((long)src2 & (sizeof(long)-1)){
1781 for(i=0; i+7<w; i+=8){
1782 dst[i+0] = src1[i+0]-src2[i+0];
1783 dst[i+1] = src1[i+1]-src2[i+1];
1784 dst[i+2] = src1[i+2]-src2[i+2];
1785 dst[i+3] = src1[i+3]-src2[i+3];
1786 dst[i+4] = src1[i+4]-src2[i+4];
1787 dst[i+5] = src1[i+5]-src2[i+5];
1788 dst[i+6] = src1[i+6]-src2[i+6];
1789 dst[i+7] = src1[i+7]-src2[i+7];
1793 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1794 long a = *(long*)(src1+i);
1795 long b = *(long*)(src2+i);
1796 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1799 dst[i+0] = src1[i+0]-src2[i+0];
1802 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
1810 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1819 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
1827 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1837 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
1840 for(i=0; i<w-1; i++){
1867 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
1897 #define BUTTERFLY2(o1,o2,i1,i2) \
1901 #define BUTTERFLY1(x,y) \
1910 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
1912 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1920 //FIXME try pointer walks
1921 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
1922 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
1923 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
1924 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
1926 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1927 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1928 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1929 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1931 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1932 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1933 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1934 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1938 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1939 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1940 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1941 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1943 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1944 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1945 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1946 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1949 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1950 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1951 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1952 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1957 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
1965 //FIXME try pointer walks
1966 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
1967 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
1968 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
1969 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
1971 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1972 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1973 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1974 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1976 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1977 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1978 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1979 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1983 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1984 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1985 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1986 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1988 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1989 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1990 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1991 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1994 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1995 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1996 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1997 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2000 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
2005 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2006 MpegEncContext * const s= (MpegEncContext *)c;
2007 LOCAL_ALIGNED_16(int16_t, temp, [64]);
2011 s->dsp.diff_pixels(temp, src1, src2, stride);
2013 return s->dsp.sum_abs_dctelem(temp);
2018 const int s07 = SRC(0) + SRC(7);\
2019 const int s16 = SRC(1) + SRC(6);\
2020 const int s25 = SRC(2) + SRC(5);\
2021 const int s34 = SRC(3) + SRC(4);\
2022 const int a0 = s07 + s34;\
2023 const int a1 = s16 + s25;\
2024 const int a2 = s07 - s34;\
2025 const int a3 = s16 - s25;\
2026 const int d07 = SRC(0) - SRC(7);\
2027 const int d16 = SRC(1) - SRC(6);\
2028 const int d25 = SRC(2) - SRC(5);\
2029 const int d34 = SRC(3) - SRC(4);\
2030 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2031 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2032 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2033 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2035 DST(1, a4 + (a7>>2)) ;\
2036 DST(2, a2 + (a3>>1)) ;\
2037 DST(3, a5 + (a6>>2)) ;\
2039 DST(5, a6 - (a5>>2)) ;\
2040 DST(6, (a2>>1) - a3 ) ;\
2041 DST(7, (a4>>2) - a7 ) ;\
2044 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2045 MpegEncContext * const s= (MpegEncContext *)c;
2050 s->dsp.diff_pixels(dct[0], src1, src2, stride);
2052 #define SRC(x) dct[i][x]
2053 #define DST(x,v) dct[i][x]= v
2054 for( i = 0; i < 8; i++ )
2059 #define SRC(x) dct[x][i]
2060 #define DST(x,v) sum += FFABS(v)
2061 for( i = 0; i < 8; i++ )
2069 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2070 MpegEncContext * const s= (MpegEncContext *)c;
2071 LOCAL_ALIGNED_16(int16_t, temp, [64]);
2076 s->dsp.diff_pixels(temp, src1, src2, stride);
2080 sum= FFMAX(sum, FFABS(temp[i]));
2085 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2086 MpegEncContext * const s= (MpegEncContext *)c;
2087 LOCAL_ALIGNED_16(int16_t, temp, [64*2]);
2088 int16_t * const bak = temp+64;
2094 s->dsp.diff_pixels(temp, src1, src2, stride);
2096 memcpy(bak, temp, 64*sizeof(int16_t));
2098 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2099 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2100 ff_simple_idct_8(temp); //FIXME
2103 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2108 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2109 MpegEncContext * const s= (MpegEncContext *)c;
2110 const uint8_t *scantable= s->intra_scantable.permutated;
2111 LOCAL_ALIGNED_16(int16_t, temp, [64]);
2112 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2113 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
2114 int i, last, run, bits, level, distortion, start_i;
2115 const int esc_length= s->ac_esc_length;
2117 uint8_t * last_length;
2121 copy_block8(lsrc1, src1, 8, stride, 8);
2122 copy_block8(lsrc2, src2, 8, stride, 8);
2124 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
2126 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2132 length = s->intra_ac_vlc_length;
2133 last_length= s->intra_ac_vlc_last_length;
2134 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2137 length = s->inter_ac_vlc_length;
2138 last_length= s->inter_ac_vlc_last_length;
2143 for(i=start_i; i<last; i++){
2144 int j= scantable[i];
2149 if((level&(~127)) == 0){
2150 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2159 level= temp[i] + 64;
2163 if((level&(~127)) == 0){
2164 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2172 s->dct_unquantize_intra(s, temp, 0, s->qscale);
2174 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2177 s->dsp.idct_add(lsrc2, 8, temp);
2179 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
2181 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
2184 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2185 MpegEncContext * const s= (MpegEncContext *)c;
2186 const uint8_t *scantable= s->intra_scantable.permutated;
2187 LOCAL_ALIGNED_16(int16_t, temp, [64]);
2188 int i, last, run, bits, level, start_i;
2189 const int esc_length= s->ac_esc_length;
2191 uint8_t * last_length;
2195 s->dsp.diff_pixels(temp, src1, src2, stride);
2197 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2203 length = s->intra_ac_vlc_length;
2204 last_length= s->intra_ac_vlc_last_length;
2205 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2208 length = s->inter_ac_vlc_length;
2209 last_length= s->inter_ac_vlc_last_length;
2214 for(i=start_i; i<last; i++){
2215 int j= scantable[i];
2220 if((level&(~127)) == 0){
2221 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2230 level= temp[i] + 64;
2234 if((level&(~127)) == 0){
2235 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2243 #define VSAD_INTRA(size) \
2244 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2248 for(y=1; y<h; y++){ \
2249 for(x=0; x<size; x+=4){ \
2250 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2251 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2261 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2266 for(x=0; x<16; x++){
2267 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2276 #define SQ(a) ((a)*(a))
2277 #define VSSE_INTRA(size) \
2278 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2282 for(y=1; y<h; y++){ \
2283 for(x=0; x<size; x+=4){ \
2284 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2285 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2295 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2300 for(x=0; x<16; x++){
2301 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2310 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2314 for(i=0; i<size; i++)
2315 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2319 #define WRAPPER8_16_SQ(name8, name16)\
2320 static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
2322 score +=name8(s, dst , src , stride, 8);\
2323 score +=name8(s, dst+8 , src+8 , stride, 8);\
2327 score +=name8(s, dst , src , stride, 8);\
2328 score +=name8(s, dst+8 , src+8 , stride, 8);\
2333 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2334 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2335 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2337 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
2339 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2340 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2341 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2342 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
2344 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2345 uint32_t maxi, uint32_t maxisign)
2348 if(a > mini) return mini;
2349 else if((a^(1U<<31)) > maxisign) return maxi;
2353 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
2355 uint32_t mini = *(uint32_t*)min;
2356 uint32_t maxi = *(uint32_t*)max;
2357 uint32_t maxisign = maxi ^ (1U<<31);
2358 uint32_t *dsti = (uint32_t*)dst;
2359 const uint32_t *srci = (const uint32_t*)src;
2360 for(i=0; i<len; i+=8) {
2361 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2362 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2363 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2364 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2365 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2366 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2367 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2368 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2371 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
2373 if(min < 0 && max > 0) {
2374 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2376 for(i=0; i < len; i+=8) {
2377 dst[i ] = av_clipf(src[i ], min, max);
2378 dst[i + 1] = av_clipf(src[i + 1], min, max);
2379 dst[i + 2] = av_clipf(src[i + 2], min, max);
2380 dst[i + 3] = av_clipf(src[i + 3], min, max);
2381 dst[i + 4] = av_clipf(src[i + 4], min, max);
2382 dst[i + 5] = av_clipf(src[i + 5], min, max);
2383 dst[i + 6] = av_clipf(src[i + 6], min, max);
2384 dst[i + 7] = av_clipf(src[i + 7], min, max);
2389 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
2394 res += *v1++ * *v2++;
2399 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
2404 *v1++ += mul * *v3++;
2409 static void apply_window_int16_c(int16_t *output, const int16_t *input,
2410 const int16_t *window, unsigned int len)
2413 int len2 = len >> 1;
2415 for (i = 0; i < len2; i++) {
2416 int16_t w = window[i];
2417 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
2418 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2422 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2423 int32_t max, unsigned int len)
2426 *dst++ = av_clip(*src++, min, max);
2427 *dst++ = av_clip(*src++, min, max);
2428 *dst++ = av_clip(*src++, min, max);
2429 *dst++ = av_clip(*src++, min, max);
2430 *dst++ = av_clip(*src++, min, max);
2431 *dst++ = av_clip(*src++, min, max);
2432 *dst++ = av_clip(*src++, min, max);
2433 *dst++ = av_clip(*src++, min, max);
2438 static void ff_jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
2440 ff_j_rev_dct (block);
2441 put_pixels_clamped_c(block, dest, line_size);
2443 static void ff_jref_idct_add(uint8_t *dest, int line_size, int16_t *block)
2445 ff_j_rev_dct (block);
2446 add_pixels_clamped_c(block, dest, line_size);
2449 /* init static data */
2450 av_cold void ff_dsputil_static_init(void)
2454 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
2455 for(i=0;i<MAX_NEG_CROP;i++) {
2457 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
2460 for(i=0;i<512;i++) {
2461 ff_squareTbl[i] = (i - 256) * (i - 256);
2464 for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
2467 int ff_check_alignment(void){
2468 static int did_fail=0;
2469 LOCAL_ALIGNED_16(int, aligned, [4]);
2471 if((intptr_t)aligned & 15){
2473 #if HAVE_MMX || HAVE_ALTIVEC
2474 av_log(NULL, AV_LOG_ERROR,
2475 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2476 "and may be very slow or crash. This is not a bug in libavcodec,\n"
2477 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2478 "Do not report crashes to Libav developers.\n");
2487 av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
2489 ff_check_alignment();
2492 if (avctx->bits_per_raw_sample == 10) {
2493 c->fdct = ff_jpeg_fdct_islow_10;
2494 c->fdct248 = ff_fdct248_islow_10;
2496 if(avctx->dct_algo==FF_DCT_FASTINT) {
2497 c->fdct = ff_fdct_ifast;
2498 c->fdct248 = ff_fdct_ifast248;
2500 else if(avctx->dct_algo==FF_DCT_FAAN) {
2501 c->fdct = ff_faandct;
2502 c->fdct248 = ff_faandct248;
2505 c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
2506 c->fdct248 = ff_fdct248_islow_8;
2509 #endif //CONFIG_ENCODERS
2511 if (avctx->bits_per_raw_sample == 10) {
2512 c->idct_put = ff_simple_idct_put_10;
2513 c->idct_add = ff_simple_idct_add_10;
2514 c->idct = ff_simple_idct_10;
2515 c->idct_permutation_type = FF_NO_IDCT_PERM;
2517 if(avctx->idct_algo==FF_IDCT_INT){
2518 c->idct_put= ff_jref_idct_put;
2519 c->idct_add= ff_jref_idct_add;
2520 c->idct = ff_j_rev_dct;
2521 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
2522 }else if(avctx->idct_algo==FF_IDCT_FAAN){
2523 c->idct_put= ff_faanidct_put;
2524 c->idct_add= ff_faanidct_add;
2525 c->idct = ff_faanidct;
2526 c->idct_permutation_type= FF_NO_IDCT_PERM;
2527 }else{ //accurate/default
2528 c->idct_put = ff_simple_idct_put_8;
2529 c->idct_add = ff_simple_idct_add_8;
2530 c->idct = ff_simple_idct_8;
2531 c->idct_permutation_type= FF_NO_IDCT_PERM;
2535 c->diff_pixels = diff_pixels_c;
2536 c->put_pixels_clamped = put_pixels_clamped_c;
2537 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
2538 c->add_pixels_clamped = add_pixels_clamped_c;
2539 c->sum_abs_dctelem = sum_abs_dctelem_c;
2542 c->pix_sum = pix_sum_c;
2543 c->pix_norm1 = pix_norm1_c;
2545 c->fill_block_tab[0] = fill_block16_c;
2546 c->fill_block_tab[1] = fill_block8_c;
2548 /* TODO [0] 16 [1] 8 */
2549 c->pix_abs[0][0] = pix_abs16_c;
2550 c->pix_abs[0][1] = pix_abs16_x2_c;
2551 c->pix_abs[0][2] = pix_abs16_y2_c;
2552 c->pix_abs[0][3] = pix_abs16_xy2_c;
2553 c->pix_abs[1][0] = pix_abs8_c;
2554 c->pix_abs[1][1] = pix_abs8_x2_c;
2555 c->pix_abs[1][2] = pix_abs8_y2_c;
2556 c->pix_abs[1][3] = pix_abs8_xy2_c;
2558 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2559 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2560 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
2561 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
2562 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
2563 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
2564 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
2565 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
2566 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2568 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
2569 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
2570 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
2571 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
2572 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
2573 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
2574 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
2575 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
2576 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
2578 #define dspfunc(PFX, IDX, NUM) \
2579 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2580 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2581 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2582 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2583 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2584 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2585 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2586 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2587 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2588 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2589 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2590 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2591 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2592 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2593 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2594 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2596 dspfunc(put_qpel, 0, 16);
2597 dspfunc(put_no_rnd_qpel, 0, 16);
2599 dspfunc(avg_qpel, 0, 16);
2600 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2602 dspfunc(put_qpel, 1, 8);
2603 dspfunc(put_no_rnd_qpel, 1, 8);
2605 dspfunc(avg_qpel, 1, 8);
2606 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2610 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
2611 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
2612 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
2613 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
2614 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
2615 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
2616 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
2617 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
2619 #define SET_CMP_FUNC(name) \
2620 c->name[0]= name ## 16_c;\
2621 c->name[1]= name ## 8x8_c;
2623 SET_CMP_FUNC(hadamard8_diff)
2624 c->hadamard8_diff[4]= hadamard8_intra16_c;
2625 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
2626 SET_CMP_FUNC(dct_sad)
2627 SET_CMP_FUNC(dct_max)
2629 SET_CMP_FUNC(dct264_sad)
2631 c->sad[0]= pix_abs16_c;
2632 c->sad[1]= pix_abs8_c;
2636 SET_CMP_FUNC(quant_psnr)
2639 c->vsad[0]= vsad16_c;
2640 c->vsad[4]= vsad_intra16_c;
2641 c->vsad[5]= vsad_intra8_c;
2642 c->vsse[0]= vsse16_c;
2643 c->vsse[4]= vsse_intra16_c;
2644 c->vsse[5]= vsse_intra8_c;
2645 c->nsse[0]= nsse16_c;
2646 c->nsse[1]= nsse8_c;
2648 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
2650 c->add_bytes= add_bytes_c;
2651 c->diff_bytes= diff_bytes_c;
2652 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
2653 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
2654 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
2655 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
2656 c->bswap_buf= bswap_buf;
2657 c->bswap16_buf = bswap16_buf;
2659 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
2660 c->h263_h_loop_filter= h263_h_loop_filter_c;
2661 c->h263_v_loop_filter= h263_v_loop_filter_c;
2664 c->h261_loop_filter= h261_loop_filter_c;
2666 c->try_8x8basis= try_8x8basis_c;
2667 c->add_8x8basis= add_8x8basis_c;
2669 c->vector_clipf = vector_clipf_c;
2670 c->scalarproduct_int16 = scalarproduct_int16_c;
2671 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
2672 c->apply_window_int16 = apply_window_int16_c;
2673 c->vector_clip_int32 = vector_clip_int32_c;
2675 c->shrink[0]= av_image_copy_plane;
2676 c->shrink[1]= ff_shrink22;
2677 c->shrink[2]= ff_shrink44;
2678 c->shrink[3]= ff_shrink88;
2680 #define hpel_funcs(prefix, idx, num) \
2681 c->prefix ## _pixels_tab idx [0] = prefix ## _pixels ## num ## _8_c; \
2682 c->prefix ## _pixels_tab idx [1] = prefix ## _pixels ## num ## _x2_8_c; \
2683 c->prefix ## _pixels_tab idx [2] = prefix ## _pixels ## num ## _y2_8_c; \
2684 c->prefix ## _pixels_tab idx [3] = prefix ## _pixels ## num ## _xy2_8_c
2686 hpel_funcs(put, [0], 16);
2687 hpel_funcs(put, [1], 8);
2688 hpel_funcs(put, [2], 4);
2689 hpel_funcs(put, [3], 2);
2690 hpel_funcs(put_no_rnd, [0], 16);
2691 hpel_funcs(put_no_rnd, [1], 8);
2692 hpel_funcs(avg, [0], 16);
2693 hpel_funcs(avg, [1], 8);
2694 hpel_funcs(avg, [2], 4);
2695 hpel_funcs(avg, [3], 2);
2696 hpel_funcs(avg_no_rnd,, 16);
2700 #define FUNC(f, depth) f ## _ ## depth
2701 #define FUNCC(f, depth) f ## _ ## depth ## _c
2703 #define BIT_DEPTH_FUNCS(depth, dct)\
2704 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
2705 c->draw_edges = FUNCC(draw_edges , depth);\
2706 c->clear_block = FUNCC(clear_block ## dct , depth);\
2707 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
2708 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
2709 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
2711 switch (avctx->bits_per_raw_sample) {
2713 if (c->dct_bits == 32) {
2714 BIT_DEPTH_FUNCS(9, _32);
2716 BIT_DEPTH_FUNCS(9, _16);
2720 if (c->dct_bits == 32) {
2721 BIT_DEPTH_FUNCS(10, _32);
2723 BIT_DEPTH_FUNCS(10, _16);
2727 BIT_DEPTH_FUNCS(8, _16);
2732 if (HAVE_MMX) ff_dsputil_init_mmx (c, avctx);
2733 if (ARCH_ARM) ff_dsputil_init_arm (c, avctx);
2734 if (HAVE_VIS) ff_dsputil_init_vis (c, avctx);
2735 if (ARCH_ALPHA) ff_dsputil_init_alpha (c, avctx);
2736 if (ARCH_PPC) ff_dsputil_init_ppc (c, avctx);
2737 if (ARCH_SH4) ff_dsputil_init_sh4 (c, avctx);
2738 if (ARCH_BFIN) ff_dsputil_init_bfin (c, avctx);
2740 ff_init_scantable_permutation(c->idct_permutation,
2741 c->idct_permutation_type);