3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of Libav.
10 * Libav is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * Libav is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with Libav; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 #include "libavutil/imgutils.h"
33 #include "simple_idct.h"
37 #include "mpegvideo.h"
42 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
43 uint32_t ff_squareTbl[512] = {0, };
46 #include "dsputil_template.c"
50 #include "dsputil_template.c"
54 #include "dsputil_template.c"
56 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
57 #define pb_7f (~0UL/255 * 0x7f)
58 #define pb_80 (~0UL/255 * 0x80)
60 const uint8_t ff_zigzag_direct[64] = {
61 0, 1, 8, 16, 9, 2, 3, 10,
62 17, 24, 32, 25, 18, 11, 4, 5,
63 12, 19, 26, 33, 40, 48, 41, 34,
64 27, 20, 13, 6, 7, 14, 21, 28,
65 35, 42, 49, 56, 57, 50, 43, 36,
66 29, 22, 15, 23, 30, 37, 44, 51,
67 58, 59, 52, 45, 38, 31, 39, 46,
68 53, 60, 61, 54, 47, 55, 62, 63
71 /* Specific zigzag scan for 248 idct. NOTE that unlike the
72 specification, we interleave the fields */
73 const uint8_t ff_zigzag248_direct[64] = {
74 0, 8, 1, 9, 16, 24, 2, 10,
75 17, 25, 32, 40, 48, 56, 33, 41,
76 18, 26, 3, 11, 4, 12, 19, 27,
77 34, 42, 49, 57, 50, 58, 35, 43,
78 20, 28, 5, 13, 6, 14, 21, 29,
79 36, 44, 51, 59, 52, 60, 37, 45,
80 22, 30, 7, 15, 23, 31, 38, 46,
81 53, 61, 54, 62, 39, 47, 55, 63,
84 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
85 DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64];
87 const uint8_t ff_alternate_horizontal_scan[64] = {
88 0, 1, 2, 3, 8, 9, 16, 17,
89 10, 11, 4, 5, 6, 7, 15, 14,
90 13, 12, 19, 18, 24, 25, 32, 33,
91 26, 27, 20, 21, 22, 23, 28, 29,
92 30, 31, 34, 35, 40, 41, 48, 49,
93 42, 43, 36, 37, 38, 39, 44, 45,
94 46, 47, 50, 51, 56, 57, 58, 59,
95 52, 53, 54, 55, 60, 61, 62, 63,
98 const uint8_t ff_alternate_vertical_scan[64] = {
99 0, 8, 16, 24, 1, 9, 2, 10,
100 17, 25, 32, 40, 48, 56, 57, 49,
101 41, 33, 26, 18, 3, 11, 4, 12,
102 19, 27, 34, 42, 50, 58, 35, 43,
103 51, 59, 20, 28, 5, 13, 6, 14,
104 21, 29, 36, 44, 52, 60, 37, 45,
105 53, 61, 22, 30, 7, 15, 23, 31,
106 38, 46, 54, 62, 39, 47, 55, 63,
109 /* Input permutation for the simple_idct_mmx */
110 static const uint8_t simple_mmx_permutation[64]={
111 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
112 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
113 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
114 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
115 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
116 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
117 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
118 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
121 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
123 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
127 st->scantable= src_scantable;
131 j = src_scantable[i];
132 st->permutated[i] = permutation[j];
138 j = st->permutated[i];
140 st->raster_end[i]= end;
144 void ff_init_scantable_permutation(uint8_t *idct_permutation,
145 int idct_permutation_type)
149 switch(idct_permutation_type){
150 case FF_NO_IDCT_PERM:
152 idct_permutation[i]= i;
154 case FF_LIBMPEG2_IDCT_PERM:
156 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
158 case FF_SIMPLE_IDCT_PERM:
160 idct_permutation[i]= simple_mmx_permutation[i];
162 case FF_TRANSPOSE_IDCT_PERM:
164 idct_permutation[i]= ((i&7)<<3) | (i>>3);
166 case FF_PARTTRANS_IDCT_PERM:
168 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
170 case FF_SSE2_IDCT_PERM:
172 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
175 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
179 static int pix_sum_c(uint8_t * pix, int line_size)
184 for (i = 0; i < 16; i++) {
185 for (j = 0; j < 16; j += 8) {
196 pix += line_size - 16;
201 static int pix_norm1_c(uint8_t * pix, int line_size)
204 uint32_t *sq = ff_squareTbl + 256;
207 for (i = 0; i < 16; i++) {
208 for (j = 0; j < 16; j += 8) {
220 register uint64_t x=*(uint64_t*)pix;
222 s += sq[(x>>8)&0xff];
223 s += sq[(x>>16)&0xff];
224 s += sq[(x>>24)&0xff];
225 s += sq[(x>>32)&0xff];
226 s += sq[(x>>40)&0xff];
227 s += sq[(x>>48)&0xff];
228 s += sq[(x>>56)&0xff];
230 register uint32_t x=*(uint32_t*)pix;
232 s += sq[(x>>8)&0xff];
233 s += sq[(x>>16)&0xff];
234 s += sq[(x>>24)&0xff];
235 x=*(uint32_t*)(pix+4);
237 s += sq[(x>>8)&0xff];
238 s += sq[(x>>16)&0xff];
239 s += sq[(x>>24)&0xff];
244 pix += line_size - 16;
249 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
252 for(i=0; i+8<=w; i+=8){
253 dst[i+0]= av_bswap32(src[i+0]);
254 dst[i+1]= av_bswap32(src[i+1]);
255 dst[i+2]= av_bswap32(src[i+2]);
256 dst[i+3]= av_bswap32(src[i+3]);
257 dst[i+4]= av_bswap32(src[i+4]);
258 dst[i+5]= av_bswap32(src[i+5]);
259 dst[i+6]= av_bswap32(src[i+6]);
260 dst[i+7]= av_bswap32(src[i+7]);
263 dst[i+0]= av_bswap32(src[i+0]);
267 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
270 *dst++ = av_bswap16(*src++);
273 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
276 uint32_t *sq = ff_squareTbl + 256;
279 for (i = 0; i < h; i++) {
280 s += sq[pix1[0] - pix2[0]];
281 s += sq[pix1[1] - pix2[1]];
282 s += sq[pix1[2] - pix2[2]];
283 s += sq[pix1[3] - pix2[3]];
290 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
293 uint32_t *sq = ff_squareTbl + 256;
296 for (i = 0; i < h; i++) {
297 s += sq[pix1[0] - pix2[0]];
298 s += sq[pix1[1] - pix2[1]];
299 s += sq[pix1[2] - pix2[2]];
300 s += sq[pix1[3] - pix2[3]];
301 s += sq[pix1[4] - pix2[4]];
302 s += sq[pix1[5] - pix2[5]];
303 s += sq[pix1[6] - pix2[6]];
304 s += sq[pix1[7] - pix2[7]];
311 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
314 uint32_t *sq = ff_squareTbl + 256;
317 for (i = 0; i < h; i++) {
318 s += sq[pix1[ 0] - pix2[ 0]];
319 s += sq[pix1[ 1] - pix2[ 1]];
320 s += sq[pix1[ 2] - pix2[ 2]];
321 s += sq[pix1[ 3] - pix2[ 3]];
322 s += sq[pix1[ 4] - pix2[ 4]];
323 s += sq[pix1[ 5] - pix2[ 5]];
324 s += sq[pix1[ 6] - pix2[ 6]];
325 s += sq[pix1[ 7] - pix2[ 7]];
326 s += sq[pix1[ 8] - pix2[ 8]];
327 s += sq[pix1[ 9] - pix2[ 9]];
328 s += sq[pix1[10] - pix2[10]];
329 s += sq[pix1[11] - pix2[11]];
330 s += sq[pix1[12] - pix2[12]];
331 s += sq[pix1[13] - pix2[13]];
332 s += sq[pix1[14] - pix2[14]];
333 s += sq[pix1[15] - pix2[15]];
341 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
342 const uint8_t *s2, int stride){
345 /* read the pixels */
347 block[0] = s1[0] - s2[0];
348 block[1] = s1[1] - s2[1];
349 block[2] = s1[2] - s2[2];
350 block[3] = s1[3] - s2[3];
351 block[4] = s1[4] - s2[4];
352 block[5] = s1[5] - s2[5];
353 block[6] = s1[6] - s2[6];
354 block[7] = s1[7] - s2[7];
362 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
367 /* read the pixels */
369 pixels[0] = av_clip_uint8(block[0]);
370 pixels[1] = av_clip_uint8(block[1]);
371 pixels[2] = av_clip_uint8(block[2]);
372 pixels[3] = av_clip_uint8(block[3]);
373 pixels[4] = av_clip_uint8(block[4]);
374 pixels[5] = av_clip_uint8(block[5]);
375 pixels[6] = av_clip_uint8(block[6]);
376 pixels[7] = av_clip_uint8(block[7]);
383 void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
384 uint8_t *restrict pixels,
389 for (i = 0; i < 8; i++) {
390 for (j = 0; j < 8; j++) {
393 else if (*block > 127)
396 *pixels = (uint8_t)(*block + 128);
400 pixels += (line_size - 8);
404 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
409 /* read the pixels */
411 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
412 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
413 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
414 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
415 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
416 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
417 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
418 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
424 static int sum_abs_dctelem_c(DCTELEM *block)
428 sum+= FFABS(block[i]);
432 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
436 for (i = 0; i < h; i++) {
437 memset(block, value, 16);
442 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
446 for (i = 0; i < h; i++) {
447 memset(block, value, 8);
452 #define avg2(a,b) ((a+b+1)>>1)
453 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
455 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
457 const int A=(16-x16)*(16-y16);
458 const int B=( x16)*(16-y16);
459 const int C=(16-x16)*( y16);
460 const int D=( x16)*( y16);
465 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
466 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
467 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
468 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
469 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
470 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
471 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
472 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
478 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
479 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
482 const int s= 1<<shift;
492 for(x=0; x<8; x++){ //XXX FIXME optimize
493 int src_x, src_y, frac_x, frac_y, index;
502 if((unsigned)src_x < width){
503 if((unsigned)src_y < height){
504 index= src_x + src_y*stride;
505 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
506 + src[index +1]* frac_x )*(s-frac_y)
507 + ( src[index+stride ]*(s-frac_x)
508 + src[index+stride+1]* frac_x )* frac_y
511 index= src_x + av_clip(src_y, 0, height)*stride;
512 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
513 + src[index +1]* frac_x )*s
517 if((unsigned)src_y < height){
518 index= av_clip(src_x, 0, width) + src_y*stride;
519 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
520 + src[index+stride ]* frac_y )*s
523 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
524 dst[y*stride + x]= src[index ];
536 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
538 case 2: put_pixels2_8_c (dst, src, stride, height); break;
539 case 4: put_pixels4_8_c (dst, src, stride, height); break;
540 case 8: put_pixels8_8_c (dst, src, stride, height); break;
541 case 16:put_pixels16_8_c(dst, src, stride, height); break;
545 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
547 for (i=0; i < height; i++) {
548 for (j=0; j < width; j++) {
549 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
556 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
558 for (i=0; i < height; i++) {
559 for (j=0; j < width; j++) {
560 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
567 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
569 for (i=0; i < height; i++) {
570 for (j=0; j < width; j++) {
571 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
578 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
580 for (i=0; i < height; i++) {
581 for (j=0; j < width; j++) {
582 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
589 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
591 for (i=0; i < height; i++) {
592 for (j=0; j < width; j++) {
593 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
600 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
602 for (i=0; i < height; i++) {
603 for (j=0; j < width; j++) {
604 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
611 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
613 for (i=0; i < height; i++) {
614 for (j=0; j < width; j++) {
615 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
622 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
624 for (i=0; i < height; i++) {
625 for (j=0; j < width; j++) {
626 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
633 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
635 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
636 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
637 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
638 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
642 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
644 for (i=0; i < height; i++) {
645 for (j=0; j < width; j++) {
646 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
653 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
655 for (i=0; i < height; i++) {
656 for (j=0; j < width; j++) {
657 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
664 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
666 for (i=0; i < height; i++) {
667 for (j=0; j < width; j++) {
668 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
675 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
677 for (i=0; i < height; i++) {
678 for (j=0; j < width; j++) {
679 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
686 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
688 for (i=0; i < height; i++) {
689 for (j=0; j < width; j++) {
690 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
697 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
699 for (i=0; i < height; i++) {
700 for (j=0; j < width; j++) {
701 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
708 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
710 for (i=0; i < height; i++) {
711 for (j=0; j < width; j++) {
712 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
719 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
721 for (i=0; i < height; i++) {
722 for (j=0; j < width; j++) {
723 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
730 #define QPEL_MC(r, OPNAME, RND, OP) \
731 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
732 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
736 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
737 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
738 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
739 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
740 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
741 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
742 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
743 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
749 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
751 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
755 const int src0= src[0*srcStride];\
756 const int src1= src[1*srcStride];\
757 const int src2= src[2*srcStride];\
758 const int src3= src[3*srcStride];\
759 const int src4= src[4*srcStride];\
760 const int src5= src[5*srcStride];\
761 const int src6= src[6*srcStride];\
762 const int src7= src[7*srcStride];\
763 const int src8= src[8*srcStride];\
764 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
765 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
766 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
767 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
768 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
769 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
770 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
771 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
777 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
778 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
783 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
784 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
785 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
786 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
787 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
788 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
789 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
790 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
791 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
792 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
793 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
794 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
795 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
796 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
797 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
798 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
804 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
805 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
810 const int src0= src[0*srcStride];\
811 const int src1= src[1*srcStride];\
812 const int src2= src[2*srcStride];\
813 const int src3= src[3*srcStride];\
814 const int src4= src[4*srcStride];\
815 const int src5= src[5*srcStride];\
816 const int src6= src[6*srcStride];\
817 const int src7= src[7*srcStride];\
818 const int src8= src[8*srcStride];\
819 const int src9= src[9*srcStride];\
820 const int src10= src[10*srcStride];\
821 const int src11= src[11*srcStride];\
822 const int src12= src[12*srcStride];\
823 const int src13= src[13*srcStride];\
824 const int src14= src[14*srcStride];\
825 const int src15= src[15*srcStride];\
826 const int src16= src[16*srcStride];\
827 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
828 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
829 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
830 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
831 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
832 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
833 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
834 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
835 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
836 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
837 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
838 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
839 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
840 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
841 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
842 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
848 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
850 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
851 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
854 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
855 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
858 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
860 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
861 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
864 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
867 copy_block9(full, src, 16, stride, 9);\
868 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
869 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
872 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
874 copy_block9(full, src, 16, stride, 9);\
875 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
878 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
881 copy_block9(full, src, 16, stride, 9);\
882 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
883 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
885 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
890 copy_block9(full, src, 16, stride, 9);\
891 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
892 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
893 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
894 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
896 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
900 copy_block9(full, src, 16, stride, 9);\
901 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
902 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
903 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
904 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
906 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
911 copy_block9(full, src, 16, stride, 9);\
912 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
913 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
914 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
915 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
917 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
921 copy_block9(full, src, 16, stride, 9);\
922 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
923 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
924 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
925 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
927 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
932 copy_block9(full, src, 16, stride, 9);\
933 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
934 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
935 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
936 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
938 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
942 copy_block9(full, src, 16, stride, 9);\
943 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
944 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
945 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
946 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
948 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
953 copy_block9(full, src, 16, stride, 9);\
954 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
955 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
956 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
957 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
959 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
963 copy_block9(full, src, 16, stride, 9);\
964 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
965 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
966 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
967 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
969 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
972 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
973 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
974 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
976 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
979 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
980 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
981 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
983 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
988 copy_block9(full, src, 16, stride, 9);\
989 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
990 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
991 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
992 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
994 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
997 copy_block9(full, src, 16, stride, 9);\
998 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
999 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1000 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1002 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1003 uint8_t full[16*9];\
1006 uint8_t halfHV[64];\
1007 copy_block9(full, src, 16, stride, 9);\
1008 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1009 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1010 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1011 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1013 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1014 uint8_t full[16*9];\
1016 copy_block9(full, src, 16, stride, 9);\
1017 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1018 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1019 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1021 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1023 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1024 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1027 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1029 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1030 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1033 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1034 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1037 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1039 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1040 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1043 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1044 uint8_t full[24*17];\
1046 copy_block17(full, src, 24, stride, 17);\
1047 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1048 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1051 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1052 uint8_t full[24*17];\
1053 copy_block17(full, src, 24, stride, 17);\
1054 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1057 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1058 uint8_t full[24*17];\
1060 copy_block17(full, src, 24, stride, 17);\
1061 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1062 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1064 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1065 uint8_t full[24*17];\
1066 uint8_t halfH[272];\
1067 uint8_t halfV[256];\
1068 uint8_t halfHV[256];\
1069 copy_block17(full, src, 24, stride, 17);\
1070 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1071 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1072 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1073 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1075 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1076 uint8_t full[24*17];\
1077 uint8_t halfH[272];\
1078 uint8_t halfHV[256];\
1079 copy_block17(full, src, 24, stride, 17);\
1080 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1081 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1082 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1083 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1085 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1086 uint8_t full[24*17];\
1087 uint8_t halfH[272];\
1088 uint8_t halfV[256];\
1089 uint8_t halfHV[256];\
1090 copy_block17(full, src, 24, stride, 17);\
1091 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1092 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1093 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1094 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1096 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1097 uint8_t full[24*17];\
1098 uint8_t halfH[272];\
1099 uint8_t halfHV[256];\
1100 copy_block17(full, src, 24, stride, 17);\
1101 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1102 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1103 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1104 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1106 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1107 uint8_t full[24*17];\
1108 uint8_t halfH[272];\
1109 uint8_t halfV[256];\
1110 uint8_t halfHV[256];\
1111 copy_block17(full, src, 24, stride, 17);\
1112 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1113 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1114 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1115 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1117 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1118 uint8_t full[24*17];\
1119 uint8_t halfH[272];\
1120 uint8_t halfHV[256];\
1121 copy_block17(full, src, 24, stride, 17);\
1122 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1123 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1124 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1125 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1127 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1128 uint8_t full[24*17];\
1129 uint8_t halfH[272];\
1130 uint8_t halfV[256];\
1131 uint8_t halfHV[256];\
1132 copy_block17(full, src, 24, stride, 17);\
1133 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1134 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1135 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1136 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1138 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1139 uint8_t full[24*17];\
1140 uint8_t halfH[272];\
1141 uint8_t halfHV[256];\
1142 copy_block17(full, src, 24, stride, 17);\
1143 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1144 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1145 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1146 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1148 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1149 uint8_t halfH[272];\
1150 uint8_t halfHV[256];\
1151 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1152 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1153 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1155 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1156 uint8_t halfH[272];\
1157 uint8_t halfHV[256];\
1158 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1159 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1160 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1162 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1163 uint8_t full[24*17];\
1164 uint8_t halfH[272];\
1165 uint8_t halfV[256];\
1166 uint8_t halfHV[256];\
1167 copy_block17(full, src, 24, stride, 17);\
1168 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1169 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1170 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1171 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1173 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1174 uint8_t full[24*17];\
1175 uint8_t halfH[272];\
1176 copy_block17(full, src, 24, stride, 17);\
1177 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1178 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1179 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1181 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1182 uint8_t full[24*17];\
1183 uint8_t halfH[272];\
1184 uint8_t halfV[256];\
1185 uint8_t halfHV[256];\
1186 copy_block17(full, src, 24, stride, 17);\
1187 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1188 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1189 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1190 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1192 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1193 uint8_t full[24*17];\
1194 uint8_t halfH[272];\
1195 copy_block17(full, src, 24, stride, 17);\
1196 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1197 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1198 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1200 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1201 uint8_t halfH[272];\
1202 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1203 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1206 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1207 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1208 #define op_put(a, b) a = cm[((b) + 16)>>5]
1209 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1211 QPEL_MC(0, put_ , _ , op_put)
1212 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1213 QPEL_MC(0, avg_ , _ , op_avg)
1214 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
1216 #undef op_avg_no_rnd
1218 #undef op_put_no_rnd
1220 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1221 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1222 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1223 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1224 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1225 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1227 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1228 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1232 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1233 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1234 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1235 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1236 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1237 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1238 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1239 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1245 #if CONFIG_RV40_DECODER
1246 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1247 put_pixels16_xy2_8_c(dst, src, stride, 16);
1249 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1250 avg_pixels16_xy2_8_c(dst, src, stride, 16);
1252 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1253 put_pixels8_xy2_8_c(dst, src, stride, 8);
1255 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1256 avg_pixels8_xy2_8_c(dst, src, stride, 8);
1258 #endif /* CONFIG_RV40_DECODER */
1260 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1261 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1265 const int src_1= src[ -srcStride];
1266 const int src0 = src[0 ];
1267 const int src1 = src[ srcStride];
1268 const int src2 = src[2*srcStride];
1269 const int src3 = src[3*srcStride];
1270 const int src4 = src[4*srcStride];
1271 const int src5 = src[5*srcStride];
1272 const int src6 = src[6*srcStride];
1273 const int src7 = src[7*srcStride];
1274 const int src8 = src[8*srcStride];
1275 const int src9 = src[9*srcStride];
1276 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1277 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1278 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1279 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1280 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1281 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1282 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1283 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1289 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1291 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1292 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1295 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1296 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1299 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1301 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1302 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1305 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1306 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1309 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1313 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1314 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1315 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1316 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1318 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1322 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1323 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1324 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1325 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1327 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1329 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1330 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1333 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
1334 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1336 const int strength= ff_h263_loop_filter_strength[qscale];
1340 int p0= src[x-2*stride];
1341 int p1= src[x-1*stride];
1342 int p2= src[x+0*stride];
1343 int p3= src[x+1*stride];
1344 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1346 if (d<-2*strength) d1= 0;
1347 else if(d<- strength) d1=-2*strength - d;
1348 else if(d< strength) d1= d;
1349 else if(d< 2*strength) d1= 2*strength - d;
1354 if(p1&256) p1= ~(p1>>31);
1355 if(p2&256) p2= ~(p2>>31);
1357 src[x-1*stride] = p1;
1358 src[x+0*stride] = p2;
1362 d2= av_clip((p0-p3)/4, -ad1, ad1);
1364 src[x-2*stride] = p0 - d2;
1365 src[x+ stride] = p3 + d2;
1370 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
1371 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1373 const int strength= ff_h263_loop_filter_strength[qscale];
1377 int p0= src[y*stride-2];
1378 int p1= src[y*stride-1];
1379 int p2= src[y*stride+0];
1380 int p3= src[y*stride+1];
1381 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1383 if (d<-2*strength) d1= 0;
1384 else if(d<- strength) d1=-2*strength - d;
1385 else if(d< strength) d1= d;
1386 else if(d< 2*strength) d1= 2*strength - d;
1391 if(p1&256) p1= ~(p1>>31);
1392 if(p2&256) p2= ~(p2>>31);
1394 src[y*stride-1] = p1;
1395 src[y*stride+0] = p2;
1399 d2= av_clip((p0-p3)/4, -ad1, ad1);
1401 src[y*stride-2] = p0 - d2;
1402 src[y*stride+1] = p3 + d2;
1407 static void h261_loop_filter_c(uint8_t *src, int stride){
1412 temp[x ] = 4*src[x ];
1413 temp[x + 7*8] = 4*src[x + 7*stride];
1417 xy = y * stride + x;
1419 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
1424 src[ y*stride] = (temp[ y*8] + 2)>>2;
1425 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
1427 xy = y * stride + x;
1429 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
1434 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1440 s += abs(pix1[0] - pix2[0]);
1441 s += abs(pix1[1] - pix2[1]);
1442 s += abs(pix1[2] - pix2[2]);
1443 s += abs(pix1[3] - pix2[3]);
1444 s += abs(pix1[4] - pix2[4]);
1445 s += abs(pix1[5] - pix2[5]);
1446 s += abs(pix1[6] - pix2[6]);
1447 s += abs(pix1[7] - pix2[7]);
1448 s += abs(pix1[8] - pix2[8]);
1449 s += abs(pix1[9] - pix2[9]);
1450 s += abs(pix1[10] - pix2[10]);
1451 s += abs(pix1[11] - pix2[11]);
1452 s += abs(pix1[12] - pix2[12]);
1453 s += abs(pix1[13] - pix2[13]);
1454 s += abs(pix1[14] - pix2[14]);
1455 s += abs(pix1[15] - pix2[15]);
1462 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1468 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1469 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1470 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1471 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1472 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1473 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1474 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1475 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1476 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1477 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1478 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1479 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1480 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1481 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1482 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1483 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1490 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1493 uint8_t *pix3 = pix2 + line_size;
1497 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1498 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1499 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1500 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1501 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1502 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1503 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1504 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1505 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1506 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1507 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1508 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1509 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1510 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1511 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1512 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1520 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1523 uint8_t *pix3 = pix2 + line_size;
1527 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1528 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1529 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1530 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1531 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1532 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1533 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1534 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1535 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1536 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1537 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1538 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1539 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1540 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1541 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1542 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1550 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1556 s += abs(pix1[0] - pix2[0]);
1557 s += abs(pix1[1] - pix2[1]);
1558 s += abs(pix1[2] - pix2[2]);
1559 s += abs(pix1[3] - pix2[3]);
1560 s += abs(pix1[4] - pix2[4]);
1561 s += abs(pix1[5] - pix2[5]);
1562 s += abs(pix1[6] - pix2[6]);
1563 s += abs(pix1[7] - pix2[7]);
1570 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1576 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1577 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1578 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1579 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1580 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1581 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1582 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1583 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1590 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1593 uint8_t *pix3 = pix2 + line_size;
1597 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1598 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1599 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1600 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1601 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1602 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1603 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1604 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1612 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1615 uint8_t *pix3 = pix2 + line_size;
1619 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1620 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1621 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1622 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1623 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1624 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1625 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1626 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1634 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1635 MpegEncContext *c = v;
1641 for(x=0; x<16; x++){
1642 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1645 for(x=0; x<15; x++){
1646 score2+= FFABS( s1[x ] - s1[x +stride]
1647 - s1[x+1] + s1[x+1+stride])
1648 -FFABS( s2[x ] - s2[x +stride]
1649 - s2[x+1] + s2[x+1+stride]);
1656 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1657 else return score1 + FFABS(score2)*8;
1660 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1661 MpegEncContext *c = v;
1668 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1672 score2+= FFABS( s1[x ] - s1[x +stride]
1673 - s1[x+1] + s1[x+1+stride])
1674 -FFABS( s2[x ] - s2[x +stride]
1675 - s2[x+1] + s2[x+1+stride]);
1682 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1683 else return score1 + FFABS(score2)*8;
1686 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1690 for(i=0; i<8*8; i++){
1691 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1694 assert(-512<b && b<512);
1696 sum += (w*b)*(w*b)>>4;
1701 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1704 for(i=0; i<8*8; i++){
1705 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
1710 * Permute an 8x8 block.
1711 * @param block the block which will be permuted according to the given permutation vector
1712 * @param permutation the permutation vector
1713 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
1714 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
1715 * (inverse) permutated to scantable order!
1717 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
1723 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
1725 for(i=0; i<=last; i++){
1726 const int j= scantable[i];
1731 for(i=0; i<=last; i++){
1732 const int j= scantable[i];
1733 const int perm_j= permutation[j];
1734 block[perm_j]= temp[j];
1738 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1742 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
1745 memset(cmp, 0, sizeof(void*)*6);
1753 cmp[i]= c->hadamard8_diff[i];
1759 cmp[i]= c->dct_sad[i];
1762 cmp[i]= c->dct264_sad[i];
1765 cmp[i]= c->dct_max[i];
1768 cmp[i]= c->quant_psnr[i];
1797 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1802 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1804 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1805 long a = *(long*)(src+i);
1806 long b = *(long*)(dst+i);
1807 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
1810 dst[i+0] += src[i+0];
1813 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
1815 #if !HAVE_FAST_UNALIGNED
1816 if((long)src2 & (sizeof(long)-1)){
1817 for(i=0; i+7<w; i+=8){
1818 dst[i+0] = src1[i+0]-src2[i+0];
1819 dst[i+1] = src1[i+1]-src2[i+1];
1820 dst[i+2] = src1[i+2]-src2[i+2];
1821 dst[i+3] = src1[i+3]-src2[i+3];
1822 dst[i+4] = src1[i+4]-src2[i+4];
1823 dst[i+5] = src1[i+5]-src2[i+5];
1824 dst[i+6] = src1[i+6]-src2[i+6];
1825 dst[i+7] = src1[i+7]-src2[i+7];
1829 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1830 long a = *(long*)(src1+i);
1831 long b = *(long*)(src2+i);
1832 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1835 dst[i+0] = src1[i+0]-src2[i+0];
1838 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
1846 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1855 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
1863 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1873 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
1876 for(i=0; i<w-1; i++){
1903 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
1933 #define BUTTERFLY2(o1,o2,i1,i2) \
1937 #define BUTTERFLY1(x,y) \
1946 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
1948 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1956 //FIXME try pointer walks
1957 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
1958 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
1959 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
1960 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
1962 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1963 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1964 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1965 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1967 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1968 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1969 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1970 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1974 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1975 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1976 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1977 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1979 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1980 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1981 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1982 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1985 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1986 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1987 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1988 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1993 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
2001 //FIXME try pointer walks
2002 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2003 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2004 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2005 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
2007 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2008 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2009 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2010 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2012 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2013 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2014 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2015 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2019 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2020 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2021 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2022 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2024 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2025 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2026 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2027 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2030 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2031 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2032 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2033 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2036 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
2041 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2042 MpegEncContext * const s= (MpegEncContext *)c;
2043 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2047 s->dsp.diff_pixels(temp, src1, src2, stride);
2049 return s->dsp.sum_abs_dctelem(temp);
2054 const int s07 = SRC(0) + SRC(7);\
2055 const int s16 = SRC(1) + SRC(6);\
2056 const int s25 = SRC(2) + SRC(5);\
2057 const int s34 = SRC(3) + SRC(4);\
2058 const int a0 = s07 + s34;\
2059 const int a1 = s16 + s25;\
2060 const int a2 = s07 - s34;\
2061 const int a3 = s16 - s25;\
2062 const int d07 = SRC(0) - SRC(7);\
2063 const int d16 = SRC(1) - SRC(6);\
2064 const int d25 = SRC(2) - SRC(5);\
2065 const int d34 = SRC(3) - SRC(4);\
2066 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2067 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2068 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2069 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2071 DST(1, a4 + (a7>>2)) ;\
2072 DST(2, a2 + (a3>>1)) ;\
2073 DST(3, a5 + (a6>>2)) ;\
2075 DST(5, a6 - (a5>>2)) ;\
2076 DST(6, (a2>>1) - a3 ) ;\
2077 DST(7, (a4>>2) - a7 ) ;\
2080 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2081 MpegEncContext * const s= (MpegEncContext *)c;
2086 s->dsp.diff_pixels(dct[0], src1, src2, stride);
2088 #define SRC(x) dct[i][x]
2089 #define DST(x,v) dct[i][x]= v
2090 for( i = 0; i < 8; i++ )
2095 #define SRC(x) dct[x][i]
2096 #define DST(x,v) sum += FFABS(v)
2097 for( i = 0; i < 8; i++ )
2105 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2106 MpegEncContext * const s= (MpegEncContext *)c;
2107 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2112 s->dsp.diff_pixels(temp, src1, src2, stride);
2116 sum= FFMAX(sum, FFABS(temp[i]));
2121 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2122 MpegEncContext * const s= (MpegEncContext *)c;
2123 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
2124 DCTELEM * const bak = temp+64;
2130 s->dsp.diff_pixels(temp, src1, src2, stride);
2132 memcpy(bak, temp, 64*sizeof(DCTELEM));
2134 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2135 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2136 ff_simple_idct_8(temp); //FIXME
2139 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2144 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2145 MpegEncContext * const s= (MpegEncContext *)c;
2146 const uint8_t *scantable= s->intra_scantable.permutated;
2147 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2148 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2149 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
2150 int i, last, run, bits, level, distortion, start_i;
2151 const int esc_length= s->ac_esc_length;
2153 uint8_t * last_length;
2157 copy_block8(lsrc1, src1, 8, stride, 8);
2158 copy_block8(lsrc2, src2, 8, stride, 8);
2160 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
2162 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2168 length = s->intra_ac_vlc_length;
2169 last_length= s->intra_ac_vlc_last_length;
2170 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2173 length = s->inter_ac_vlc_length;
2174 last_length= s->inter_ac_vlc_last_length;
2179 for(i=start_i; i<last; i++){
2180 int j= scantable[i];
2185 if((level&(~127)) == 0){
2186 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2195 level= temp[i] + 64;
2199 if((level&(~127)) == 0){
2200 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2208 s->dct_unquantize_intra(s, temp, 0, s->qscale);
2210 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2213 s->dsp.idct_add(lsrc2, 8, temp);
2215 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
2217 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
2220 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2221 MpegEncContext * const s= (MpegEncContext *)c;
2222 const uint8_t *scantable= s->intra_scantable.permutated;
2223 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2224 int i, last, run, bits, level, start_i;
2225 const int esc_length= s->ac_esc_length;
2227 uint8_t * last_length;
2231 s->dsp.diff_pixels(temp, src1, src2, stride);
2233 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2239 length = s->intra_ac_vlc_length;
2240 last_length= s->intra_ac_vlc_last_length;
2241 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2244 length = s->inter_ac_vlc_length;
2245 last_length= s->inter_ac_vlc_last_length;
2250 for(i=start_i; i<last; i++){
2251 int j= scantable[i];
2256 if((level&(~127)) == 0){
2257 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2266 level= temp[i] + 64;
2270 if((level&(~127)) == 0){
2271 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2279 #define VSAD_INTRA(size) \
2280 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2284 for(y=1; y<h; y++){ \
2285 for(x=0; x<size; x+=4){ \
2286 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2287 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2297 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2302 for(x=0; x<16; x++){
2303 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2312 #define SQ(a) ((a)*(a))
2313 #define VSSE_INTRA(size) \
2314 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2318 for(y=1; y<h; y++){ \
2319 for(x=0; x<size; x+=4){ \
2320 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2321 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2331 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2336 for(x=0; x<16; x++){
2337 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2346 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2350 for(i=0; i<size; i++)
2351 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2355 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2356 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2357 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2359 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
2361 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2362 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2363 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2364 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
2366 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
2369 for(i=0; i<len; i++)
2370 dst[i] = src0[i] * src1[-i];
2373 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
2375 for(i=0; i<len; i++)
2376 dst[i] = src0[i] * src1[i] + src2[i];
2379 static void vector_fmul_window_c(float *dst, const float *src0,
2380 const float *src1, const float *win, int len)
2386 for(i=-len, j=len-1; i<0; i++, j--) {
2391 dst[i] = s0*wj - s1*wi;
2392 dst[j] = s0*wi + s1*wj;
2396 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
2400 for (i = 0; i < len; i++)
2401 dst[i] = src[i] * mul;
2404 static void butterflies_float_c(float *restrict v1, float *restrict v2,
2408 for (i = 0; i < len; i++) {
2409 float t = v1[i] - v2[i];
2415 static void butterflies_float_interleave_c(float *dst, const float *src0,
2416 const float *src1, int len)
2419 for (i = 0; i < len; i++) {
2422 dst[2*i ] = f1 + f2;
2423 dst[2*i + 1] = f1 - f2;
2427 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
2432 for (i = 0; i < len; i++)
2438 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2439 uint32_t maxi, uint32_t maxisign)
2442 if(a > mini) return mini;
2443 else if((a^(1U<<31)) > maxisign) return maxi;
2447 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
2449 uint32_t mini = *(uint32_t*)min;
2450 uint32_t maxi = *(uint32_t*)max;
2451 uint32_t maxisign = maxi ^ (1U<<31);
2452 uint32_t *dsti = (uint32_t*)dst;
2453 const uint32_t *srci = (const uint32_t*)src;
2454 for(i=0; i<len; i+=8) {
2455 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2456 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2457 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2458 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2459 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2460 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2461 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2462 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2465 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
2467 if(min < 0 && max > 0) {
2468 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2470 for(i=0; i < len; i+=8) {
2471 dst[i ] = av_clipf(src[i ], min, max);
2472 dst[i + 1] = av_clipf(src[i + 1], min, max);
2473 dst[i + 2] = av_clipf(src[i + 2], min, max);
2474 dst[i + 3] = av_clipf(src[i + 3], min, max);
2475 dst[i + 4] = av_clipf(src[i + 4], min, max);
2476 dst[i + 5] = av_clipf(src[i + 5], min, max);
2477 dst[i + 6] = av_clipf(src[i + 6], min, max);
2478 dst[i + 7] = av_clipf(src[i + 7], min, max);
2483 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
2488 res += *v1++ * *v2++;
2493 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
2498 *v1++ += mul * *v3++;
2503 static void apply_window_int16_c(int16_t *output, const int16_t *input,
2504 const int16_t *window, unsigned int len)
2507 int len2 = len >> 1;
2509 for (i = 0; i < len2; i++) {
2510 int16_t w = window[i];
2511 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
2512 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2516 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2517 int32_t max, unsigned int len)
2520 *dst++ = av_clip(*src++, min, max);
2521 *dst++ = av_clip(*src++, min, max);
2522 *dst++ = av_clip(*src++, min, max);
2523 *dst++ = av_clip(*src++, min, max);
2524 *dst++ = av_clip(*src++, min, max);
2525 *dst++ = av_clip(*src++, min, max);
2526 *dst++ = av_clip(*src++, min, max);
2527 *dst++ = av_clip(*src++, min, max);
2533 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
2534 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
2535 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
2536 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
2537 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
2538 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
2539 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
2541 static void wmv2_idct_row(short * b)
2544 int a0,a1,a2,a3,a4,a5,a6,a7;
2546 a1 = W1*b[1]+W7*b[7];
2547 a7 = W7*b[1]-W1*b[7];
2548 a5 = W5*b[5]+W3*b[3];
2549 a3 = W3*b[5]-W5*b[3];
2550 a2 = W2*b[2]+W6*b[6];
2551 a6 = W6*b[2]-W2*b[6];
2552 a0 = W0*b[0]+W0*b[4];
2553 a4 = W0*b[0]-W0*b[4];
2555 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
2556 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2558 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
2559 b[1] = (a4+a6 +s1 + (1<<7))>>8;
2560 b[2] = (a4-a6 +s2 + (1<<7))>>8;
2561 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
2562 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
2563 b[5] = (a4-a6 -s2 + (1<<7))>>8;
2564 b[6] = (a4+a6 -s1 + (1<<7))>>8;
2565 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
2567 static void wmv2_idct_col(short * b)
2570 int a0,a1,a2,a3,a4,a5,a6,a7;
2571 /*step 1, with extended precision*/
2572 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
2573 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
2574 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
2575 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
2576 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
2577 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
2578 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
2579 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
2581 s1 = (181*(a1-a5+a7-a3)+128)>>8;
2582 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2584 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
2585 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
2586 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
2587 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
2589 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
2590 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
2591 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
2592 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
2594 void ff_wmv2_idct_c(short * block){
2598 wmv2_idct_row(block+i);
2601 wmv2_idct_col(block+i);
2604 /* XXX: those functions should be suppressed ASAP when all IDCTs are
2606 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
2608 ff_wmv2_idct_c(block);
2609 ff_put_pixels_clamped_c(block, dest, line_size);
2611 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
2613 ff_wmv2_idct_c(block);
2614 ff_add_pixels_clamped_c(block, dest, line_size);
2616 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
2618 ff_j_rev_dct (block);
2619 ff_put_pixels_clamped_c(block, dest, line_size);
2621 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
2623 ff_j_rev_dct (block);
2624 ff_add_pixels_clamped_c(block, dest, line_size);
2627 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
2629 /* init static data */
2630 av_cold void ff_dsputil_static_init(void)
2634 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
2635 for(i=0;i<MAX_NEG_CROP;i++) {
2637 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
2640 for(i=0;i<512;i++) {
2641 ff_squareTbl[i] = (i - 256) * (i - 256);
2644 for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
2647 int ff_check_alignment(void){
2648 static int did_fail=0;
2649 LOCAL_ALIGNED_16(int, aligned, [4]);
2651 if((intptr_t)aligned & 15){
2653 #if HAVE_MMX || HAVE_ALTIVEC
2654 av_log(NULL, AV_LOG_ERROR,
2655 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2656 "and may be very slow or crash. This is not a bug in libavcodec,\n"
2657 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2658 "Do not report crashes to Libav developers.\n");
2667 av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
2671 ff_check_alignment();
2674 if (avctx->bits_per_raw_sample == 10) {
2675 c->fdct = ff_jpeg_fdct_islow_10;
2676 c->fdct248 = ff_fdct248_islow_10;
2678 if(avctx->dct_algo==FF_DCT_FASTINT) {
2679 c->fdct = ff_fdct_ifast;
2680 c->fdct248 = ff_fdct_ifast248;
2682 else if(avctx->dct_algo==FF_DCT_FAAN) {
2683 c->fdct = ff_faandct;
2684 c->fdct248 = ff_faandct248;
2687 c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
2688 c->fdct248 = ff_fdct248_islow_8;
2691 #endif //CONFIG_ENCODERS
2693 if (avctx->bits_per_raw_sample == 10) {
2694 c->idct_put = ff_simple_idct_put_10;
2695 c->idct_add = ff_simple_idct_add_10;
2696 c->idct = ff_simple_idct_10;
2697 c->idct_permutation_type = FF_NO_IDCT_PERM;
2699 if(avctx->idct_algo==FF_IDCT_INT){
2700 c->idct_put= ff_jref_idct_put;
2701 c->idct_add= ff_jref_idct_add;
2702 c->idct = ff_j_rev_dct;
2703 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
2704 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
2705 avctx->idct_algo==FF_IDCT_VP3){
2706 c->idct_put= ff_vp3_idct_put_c;
2707 c->idct_add= ff_vp3_idct_add_c;
2708 c->idct = ff_vp3_idct_c;
2709 c->idct_permutation_type= FF_NO_IDCT_PERM;
2710 }else if(avctx->idct_algo==FF_IDCT_WMV2){
2711 c->idct_put= ff_wmv2_idct_put_c;
2712 c->idct_add= ff_wmv2_idct_add_c;
2713 c->idct = ff_wmv2_idct_c;
2714 c->idct_permutation_type= FF_NO_IDCT_PERM;
2715 }else if(avctx->idct_algo==FF_IDCT_FAAN){
2716 c->idct_put= ff_faanidct_put;
2717 c->idct_add= ff_faanidct_add;
2718 c->idct = ff_faanidct;
2719 c->idct_permutation_type= FF_NO_IDCT_PERM;
2720 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
2721 c->idct_put= ff_ea_idct_put_c;
2722 c->idct_permutation_type= FF_NO_IDCT_PERM;
2723 }else{ //accurate/default
2724 c->idct_put = ff_simple_idct_put_8;
2725 c->idct_add = ff_simple_idct_add_8;
2726 c->idct = ff_simple_idct_8;
2727 c->idct_permutation_type= FF_NO_IDCT_PERM;
2731 c->diff_pixels = diff_pixels_c;
2732 c->put_pixels_clamped = ff_put_pixels_clamped_c;
2733 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
2734 c->add_pixels_clamped = ff_add_pixels_clamped_c;
2735 c->sum_abs_dctelem = sum_abs_dctelem_c;
2738 c->pix_sum = pix_sum_c;
2739 c->pix_norm1 = pix_norm1_c;
2741 c->fill_block_tab[0] = fill_block16_c;
2742 c->fill_block_tab[1] = fill_block8_c;
2744 /* TODO [0] 16 [1] 8 */
2745 c->pix_abs[0][0] = pix_abs16_c;
2746 c->pix_abs[0][1] = pix_abs16_x2_c;
2747 c->pix_abs[0][2] = pix_abs16_y2_c;
2748 c->pix_abs[0][3] = pix_abs16_xy2_c;
2749 c->pix_abs[1][0] = pix_abs8_c;
2750 c->pix_abs[1][1] = pix_abs8_x2_c;
2751 c->pix_abs[1][2] = pix_abs8_y2_c;
2752 c->pix_abs[1][3] = pix_abs8_xy2_c;
2754 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2755 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2756 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
2757 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
2758 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
2759 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
2760 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
2761 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
2762 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2764 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
2765 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
2766 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
2767 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
2768 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
2769 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
2770 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
2771 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
2772 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
2774 #define dspfunc(PFX, IDX, NUM) \
2775 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2776 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2777 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2778 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2779 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2780 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2781 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2782 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2783 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2784 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2785 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2786 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2787 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2788 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2789 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2790 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2792 dspfunc(put_qpel, 0, 16);
2793 dspfunc(put_no_rnd_qpel, 0, 16);
2795 dspfunc(avg_qpel, 0, 16);
2796 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2798 dspfunc(put_qpel, 1, 8);
2799 dspfunc(put_no_rnd_qpel, 1, 8);
2801 dspfunc(avg_qpel, 1, 8);
2802 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2806 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
2807 ff_mlp_init(c, avctx);
2809 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
2810 ff_intrax8dsp_init(c,avctx);
2813 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
2814 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
2815 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
2816 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
2817 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
2818 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
2819 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
2820 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
2822 #define SET_CMP_FUNC(name) \
2823 c->name[0]= name ## 16_c;\
2824 c->name[1]= name ## 8x8_c;
2826 SET_CMP_FUNC(hadamard8_diff)
2827 c->hadamard8_diff[4]= hadamard8_intra16_c;
2828 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
2829 SET_CMP_FUNC(dct_sad)
2830 SET_CMP_FUNC(dct_max)
2832 SET_CMP_FUNC(dct264_sad)
2834 c->sad[0]= pix_abs16_c;
2835 c->sad[1]= pix_abs8_c;
2839 SET_CMP_FUNC(quant_psnr)
2842 c->vsad[0]= vsad16_c;
2843 c->vsad[4]= vsad_intra16_c;
2844 c->vsad[5]= vsad_intra8_c;
2845 c->vsse[0]= vsse16_c;
2846 c->vsse[4]= vsse_intra16_c;
2847 c->vsse[5]= vsse_intra8_c;
2848 c->nsse[0]= nsse16_c;
2849 c->nsse[1]= nsse8_c;
2851 ff_dsputil_init_dwt(c);
2854 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
2856 c->add_bytes= add_bytes_c;
2857 c->diff_bytes= diff_bytes_c;
2858 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
2859 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
2860 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
2861 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
2862 c->bswap_buf= bswap_buf;
2863 c->bswap16_buf = bswap16_buf;
2865 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
2866 c->h263_h_loop_filter= h263_h_loop_filter_c;
2867 c->h263_v_loop_filter= h263_v_loop_filter_c;
2870 if (CONFIG_VP3_DECODER) {
2871 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
2872 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
2873 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
2876 c->h261_loop_filter= h261_loop_filter_c;
2878 c->try_8x8basis= try_8x8basis_c;
2879 c->add_8x8basis= add_8x8basis_c;
2881 #if CONFIG_VORBIS_DECODER
2882 c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling;
2884 #if CONFIG_AC3_DECODER
2885 c->ac3_downmix = ff_ac3_downmix_c;
2887 c->vector_fmul_reverse = vector_fmul_reverse_c;
2888 c->vector_fmul_add = vector_fmul_add_c;
2889 c->vector_fmul_window = vector_fmul_window_c;
2890 c->vector_clipf = vector_clipf_c;
2891 c->scalarproduct_int16 = scalarproduct_int16_c;
2892 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
2893 c->apply_window_int16 = apply_window_int16_c;
2894 c->vector_clip_int32 = vector_clip_int32_c;
2895 c->scalarproduct_float = scalarproduct_float_c;
2896 c->butterflies_float = butterflies_float_c;
2897 c->butterflies_float_interleave = butterflies_float_interleave_c;
2898 c->vector_fmul_scalar = vector_fmul_scalar_c;
2900 c->shrink[0]= av_image_copy_plane;
2901 c->shrink[1]= ff_shrink22;
2902 c->shrink[2]= ff_shrink44;
2903 c->shrink[3]= ff_shrink88;
2905 c->prefetch= just_return;
2907 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
2908 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
2912 #define FUNC(f, depth) f ## _ ## depth
2913 #define FUNCC(f, depth) f ## _ ## depth ## _c
2915 #define dspfunc1(PFX, IDX, NUM, depth)\
2916 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
2917 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
2918 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
2919 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
2921 #define dspfunc2(PFX, IDX, NUM, depth)\
2922 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
2923 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
2924 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
2925 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
2926 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
2927 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
2928 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
2929 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
2930 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
2931 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
2932 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
2933 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
2934 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
2935 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
2936 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
2937 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
2940 #define BIT_DEPTH_FUNCS(depth, dct)\
2941 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
2942 c->draw_edges = FUNCC(draw_edges , depth);\
2943 c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
2944 c->clear_block = FUNCC(clear_block ## dct , depth);\
2945 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
2946 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
2947 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
2948 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
2949 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
2951 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
2952 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
2953 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
2954 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
2955 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
2956 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
2958 dspfunc1(put , 0, 16, depth);\
2959 dspfunc1(put , 1, 8, depth);\
2960 dspfunc1(put , 2, 4, depth);\
2961 dspfunc1(put , 3, 2, depth);\
2962 dspfunc1(put_no_rnd, 0, 16, depth);\
2963 dspfunc1(put_no_rnd, 1, 8, depth);\
2964 dspfunc1(avg , 0, 16, depth);\
2965 dspfunc1(avg , 1, 8, depth);\
2966 dspfunc1(avg , 2, 4, depth);\
2967 dspfunc1(avg , 3, 2, depth);\
2968 dspfunc1(avg_no_rnd, 0, 16, depth);\
2969 dspfunc1(avg_no_rnd, 1, 8, depth);\
2971 dspfunc2(put_h264_qpel, 0, 16, depth);\
2972 dspfunc2(put_h264_qpel, 1, 8, depth);\
2973 dspfunc2(put_h264_qpel, 2, 4, depth);\
2974 dspfunc2(put_h264_qpel, 3, 2, depth);\
2975 dspfunc2(avg_h264_qpel, 0, 16, depth);\
2976 dspfunc2(avg_h264_qpel, 1, 8, depth);\
2977 dspfunc2(avg_h264_qpel, 2, 4, depth);
2979 switch (avctx->bits_per_raw_sample) {
2981 if (c->dct_bits == 32) {
2982 BIT_DEPTH_FUNCS(9, _32);
2984 BIT_DEPTH_FUNCS(9, _16);
2988 if (c->dct_bits == 32) {
2989 BIT_DEPTH_FUNCS(10, _32);
2991 BIT_DEPTH_FUNCS(10, _16);
2995 BIT_DEPTH_FUNCS(8, _16);
3000 if (HAVE_MMX) ff_dsputil_init_mmx (c, avctx);
3001 if (ARCH_ARM) ff_dsputil_init_arm (c, avctx);
3002 if (HAVE_VIS) ff_dsputil_init_vis (c, avctx);
3003 if (ARCH_ALPHA) ff_dsputil_init_alpha (c, avctx);
3004 if (ARCH_PPC) ff_dsputil_init_ppc (c, avctx);
3005 if (HAVE_MMI) ff_dsputil_init_mmi (c, avctx);
3006 if (ARCH_SH4) ff_dsputil_init_sh4 (c, avctx);
3007 if (ARCH_BFIN) ff_dsputil_init_bfin (c, avctx);
3009 for (i = 0; i < 4; i++) {
3010 for (j = 0; j < 16; j++) {
3011 if(!c->put_2tap_qpel_pixels_tab[i][j])
3012 c->put_2tap_qpel_pixels_tab[i][j] =
3013 c->put_h264_qpel_pixels_tab[i][j];
3014 if(!c->avg_2tap_qpel_pixels_tab[i][j])
3015 c->avg_2tap_qpel_pixels_tab[i][j] =
3016 c->avg_h264_qpel_pixels_tab[i][j];
3020 ff_init_scantable_permutation(c->idct_permutation,
3021 c->idct_permutation_type);