2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * DCT test. (c) 2001 Fabrice Bellard.
25 * Started from sample code by Juan J. Sierralta P.
36 #include "simple_idct.h"
40 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
45 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
47 /* reference fdct/idct */
48 extern void fdct(DCTELEM *block);
49 extern void idct(DCTELEM *block);
50 extern void ff_idct_xvid_mmx(DCTELEM *block);
51 extern void ff_idct_xvid_mmx2(DCTELEM *block);
52 extern void init_fdct();
54 extern void j_rev_dct(DCTELEM *data);
55 extern void ff_mmx_idct(DCTELEM *data);
56 extern void ff_mmxext_idct(DCTELEM *data);
58 extern void odivx_idct_c (short *block);
61 extern void ff_bfin_idct (DCTELEM *block) ;
62 extern void ff_bfin_fdct (DCTELEM *block) ;
65 extern void fdct_altivec (DCTELEM *block);
66 //extern void idct_altivec (DCTELEM *block);?? no routine
71 enum { FDCT, IDCT } is_idct;
72 void (* func) (DCTELEM *block);
73 void (* ref) (DCTELEM *block);
74 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM } format;
77 #ifndef FAAN_POSTSCALE
78 #define FAAN_SCALE SCALE_PERM
80 #define FAAN_SCALE NO_PERM
83 #define DCT_ERROR(name,is_idct,func,ref,form) {name,is_idct,func,ref,form}
86 struct algo algos[] = {
87 DCT_ERROR( "REF-DBL", 0, fdct, fdct, NO_PERM),
88 DCT_ERROR("IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM),
89 DCT_ERROR("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM),
90 DCT_ERROR("REF-DBL", 1, idct, idct, NO_PERM),
91 DCT_ERROR("INT", 1, j_rev_dct, idct, MMX_PERM),
92 DCT_ERROR("SIMPLE-C", 1, simple_idct, idct, NO_PERM),
95 DCT_ERROR("MMX", 0, ff_fdct_mmx, fdct, NO_PERM),
96 DCT_ERROR("MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM),
97 DCT_ERROR("FAAN", 0, ff_faandct, fdct, FAAN_SCALE),
99 DCT_ERROR("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM),
100 DCT_ERROR("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM),
101 DCT_ERROR("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM),
102 DCT_ERROR("XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM),
103 DCT_ERROR("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM),
107 DCT_ERROR("altivecfdct", 0, fdct_altivec, fdct, NO_PERM),
111 DCT_ERROR("BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM),
112 DCT_ERROR("BFINidct", 1, ff_bfin_idct, idct, NO_PERM),
118 #define AANSCALE_BITS 12
119 static const unsigned short aanscales[64] = {
120 /* precomputed values scaled up by 14 bits */
121 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
122 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
123 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
124 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
125 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
126 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
127 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
128 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
131 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
133 int64_t gettime(void)
136 gettimeofday(&tv,NULL);
137 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
141 #define NB_ITS_SPEED 50000
143 static short idct_mmx_perm[64];
145 static short idct_simple_mmx_perm[64]={
146 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
147 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
148 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
149 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
150 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
151 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
152 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
153 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
156 void idct_mmx_init(void)
160 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
161 for (i = 0; i < 64; i++) {
162 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
163 // idct_simple_mmx_perm[i] = simple_block_permute_op(i);
167 static DCTELEM block[64] __attribute__ ((aligned (8)));
168 static DCTELEM block1[64] __attribute__ ((aligned (8)));
169 static DCTELEM block_org[64] __attribute__ ((aligned (8)));
171 void dct_error(const char *name, int is_idct,
172 void (*fdct_func)(DCTELEM *block),
173 void (*fdct_ref)(DCTELEM *block), int form, int test)
177 int64_t err2, ti, ti1, it1;
178 int64_t sysErr[64], sysErrMax=0;
180 int blockSumErrMax=0, blockSumErr;
186 for(i=0; i<64; i++) sysErr[i]=0;
187 for(it=0;it<NB_ITS;it++) {
193 block1[i] = (random() % 512) -256;
202 int num= (random()%10)+1;
204 block1[random()%64] = (random() % 512) -256;
207 block1[0]= (random()%4096)-2048;
208 block1[63]= (block1[0]&1)^1;
212 #if 0 // simulate mismatch control
217 if((sum&1)==0) block1[63]^=1;
222 block_org[i]= block1[i];
224 if (form == MMX_PERM) {
226 block[idct_mmx_perm[i]] = block1[i];
227 } else if (form == MMX_SIMPLE_PERM) {
229 block[idct_simple_mmx_perm[i]] = block1[i];
235 #if 0 // simulate mismatch control for tested IDCT but not the ref
240 if((sum&1)==0) block[63]^=1;
245 emms_c(); /* for ff_mmx_idct */
247 if (form == SCALE_PERM) {
248 for(i=0; i<64; i++) {
249 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
250 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
258 v = abs(block[i] - block1[i]);
262 sysErr[i] += block[i] - block1[i];
264 if( abs(block[i])>maxout) maxout=abs(block[i]);
266 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
267 #if 0 // print different matrix pairs
271 if((i&7)==0) printf("\n");
272 printf("%4d ", block_org[i]);
275 if((i&7)==0) printf("\n");
276 printf("%4d ", block[i] - block1[i]);
281 for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i]));
283 #if 1 // dump systematic errors
285 if(i%8==0) printf("\n");
286 printf("%5d ", (int)sysErr[i]);
291 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
292 is_idct ? "IDCT" : "DCT",
293 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
301 block1[i] = (random() % 512) -256;
311 block1[0] = (random() % 512) -256;
312 block1[1] = (random() % 512) -256;
313 block1[2] = (random() % 512) -256;
314 block1[3] = (random() % 512) -256;
318 if (form == MMX_PERM) {
320 block[idct_mmx_perm[i]] = block1[i];
321 } else if(form == MMX_SIMPLE_PERM) {
323 block[idct_simple_mmx_perm[i]] = block1[i];
332 for(it=0;it<NB_ITS_SPEED;it++) {
335 // memcpy(block, block1, sizeof(DCTELEM) * 64);
336 // dont memcpy especially not fastmemcpy because it does movntq !!!
340 ti1 = gettime() - ti;
341 } while (ti1 < 1000000);
344 printf("%s %s: %0.1f kdct/s\n",
345 is_idct ? "IDCT" : "DCT",
346 name, (double)it1 * 1000.0 / (double)ti1);
350 static uint8_t img_dest[64] __attribute__ ((aligned (8)));
351 static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
353 void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
356 static double c8[8][8];
357 static double c4[4][4];
358 double block1[64], block2[64], block3[64];
368 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
369 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
370 sum += c8[i][j] * c8[i][j];
377 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
378 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
379 sum += c4[i][j] * c4[i][j];
388 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
389 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
398 sum += c8[k][j] * block1[8*i+k];
409 sum += c4[k][j] * block2[8*(2*k)+i];
410 block3[8*(2*j)+i] = sum;
415 sum += c4[k][j] * block2[8*(2*k+1)+i];
416 block3[8*(2*j+1)+i] = sum;
420 /* clamp and store the result */
428 dest[i * linesize + j] = (int)rint(v);
433 void idct248_error(const char *name,
434 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
436 int it, i, it1, ti, ti1, err_max, v;
440 /* just one test to see if code is correct (precision is less
443 for(it=0;it<NB_ITS;it++) {
445 /* XXX: use forward transform to generate values */
447 block1[i] = (random() % 256) - 128;
452 idct248_ref(img_dest1, 8, block);
456 idct248_put(img_dest, 8, block);
459 v = abs((int)img_dest[i] - (int)img_dest1[i]);
461 printf("%d %d\n", img_dest[i], img_dest1[i]);
470 printf(" %3d", img_dest1[i*8+j]);
479 printf(" %3d", img_dest[i*8+j]);
485 printf("%s %s: err_inf=%d\n",
486 1 ? "IDCT248" : "DCT248",
492 for(it=0;it<NB_ITS_SPEED;it++) {
495 // memcpy(block, block1, sizeof(DCTELEM) * 64);
496 // dont memcpy especially not fastmemcpy because it does movntq !!!
497 idct248_put(img_dest, 8, block);
500 ti1 = gettime() - ti;
501 } while (ti1 < 1000000);
504 printf("%s %s: %0.1f kdct/s\n",
505 1 ? "IDCT248" : "DCT248",
506 name, (double)it1 * 1000.0 / (double)ti1);
511 printf("dct-test [-i] [<test-number>]\n"
512 "test-number 0 -> test with random matrixes\n"
513 " 1 -> test with random sparse matrixes\n"
514 " 2 -> do 3. test from mpeg4 std\n"
515 "-i test IDCT implementations\n"
516 "-4 test IDCT248 implementations\n");
519 int main(int argc, char **argv)
521 int test_idct = 0, test_248_dct = 0;
528 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
529 for(i=0;i<MAX_NEG_CROP;i++) {
531 cropTbl[i + MAX_NEG_CROP + 256] = 255;
535 c = getopt(argc, argv, "ih4");
552 if(optind <argc) test= atoi(argv[optind]);
554 printf("ffmpeg DCT/IDCT test\n");
557 idct248_error("SIMPLE-C", simple_idct248_put);
559 for (i=0;algos[i].name;i++)
560 if (algos[i].is_idct == test_idct) {
561 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);