2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * DCT test. (c) 2001 Fabrice Bellard.
25 * Started from sample code by Juan J. Sierralta P.
37 #include "simple_idct.h"
41 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
47 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
49 /* reference fdct/idct */
50 extern void fdct(DCTELEM *block);
51 extern void idct(DCTELEM *block);
52 extern void ff_idct_xvid_mmx(DCTELEM *block);
53 extern void ff_idct_xvid_mmx2(DCTELEM *block);
54 extern void init_fdct();
56 extern void ff_mmx_idct(DCTELEM *data);
57 extern void ff_mmxext_idct(DCTELEM *data);
59 extern void odivx_idct_c (short *block);
62 extern void ff_bfin_idct (DCTELEM *block) ;
63 extern void ff_bfin_fdct (DCTELEM *block) ;
66 extern void fdct_altivec (DCTELEM *block);
67 //extern void idct_altivec (DCTELEM *block);?? no routine
72 enum { FDCT, IDCT } is_idct;
73 void (* func) (DCTELEM *block);
74 void (* ref) (DCTELEM *block);
75 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM } format;
78 #ifndef FAAN_POSTSCALE
79 #define FAAN_SCALE SCALE_PERM
81 #define FAAN_SCALE NO_PERM
84 #define DCT_ERROR(name,is_idct,func,ref,form) {name,is_idct,func,ref,form}
87 struct algo algos[] = {
88 DCT_ERROR( "REF-DBL", 0, fdct, fdct, NO_PERM),
89 DCT_ERROR("FAAN", 0, ff_faandct, fdct, FAAN_SCALE),
90 DCT_ERROR("IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM),
91 DCT_ERROR("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM),
92 DCT_ERROR("REF-DBL", 1, idct, idct, NO_PERM),
93 DCT_ERROR("INT", 1, j_rev_dct, idct, MMX_PERM),
94 DCT_ERROR("SIMPLE-C", 1, ff_simple_idct, idct, NO_PERM),
97 DCT_ERROR("MMX", 0, ff_fdct_mmx, fdct, NO_PERM),
99 DCT_ERROR("MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM),
103 DCT_ERROR("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM),
104 DCT_ERROR("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM),
106 DCT_ERROR("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM),
107 DCT_ERROR("XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM),
108 DCT_ERROR("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM),
112 DCT_ERROR("altivecfdct", 0, fdct_altivec, fdct, NO_PERM),
116 DCT_ERROR("BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM),
117 DCT_ERROR("BFINidct", 1, ff_bfin_idct, idct, NO_PERM),
123 #define AANSCALE_BITS 12
124 static const unsigned short aanscales[64] = {
125 /* precomputed values scaled up by 14 bits */
126 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
127 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
128 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
129 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
130 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
131 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
132 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
133 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
136 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
138 int64_t gettime(void)
141 gettimeofday(&tv,NULL);
142 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
146 #define NB_ITS_SPEED 50000
148 static short idct_mmx_perm[64];
150 static short idct_simple_mmx_perm[64]={
151 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
152 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
153 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
154 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
155 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
156 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
157 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
158 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
161 void idct_mmx_init(void)
165 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
166 for (i = 0; i < 64; i++) {
167 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
168 // idct_simple_mmx_perm[i] = simple_block_permute_op(i);
172 static DCTELEM block[64] __attribute__ ((aligned (8)));
173 static DCTELEM block1[64] __attribute__ ((aligned (8)));
174 static DCTELEM block_org[64] __attribute__ ((aligned (8)));
176 void dct_error(const char *name, int is_idct,
177 void (*fdct_func)(DCTELEM *block),
178 void (*fdct_ref)(DCTELEM *block), int form, int test)
182 int64_t err2, ti, ti1, it1;
183 int64_t sysErr[64], sysErrMax=0;
185 int blockSumErrMax=0, blockSumErr;
191 for(i=0; i<64; i++) sysErr[i]=0;
192 for(it=0;it<NB_ITS;it++) {
198 block1[i] = (random() % 512) -256;
207 int num= (random()%10)+1;
209 block1[random()%64] = (random() % 512) -256;
212 block1[0]= (random()%4096)-2048;
213 block1[63]= (block1[0]&1)^1;
217 #if 0 // simulate mismatch control
222 if((sum&1)==0) block1[63]^=1;
227 block_org[i]= block1[i];
229 if (form == MMX_PERM) {
231 block[idct_mmx_perm[i]] = block1[i];
232 } else if (form == MMX_SIMPLE_PERM) {
234 block[idct_simple_mmx_perm[i]] = block1[i];
240 #if 0 // simulate mismatch control for tested IDCT but not the ref
245 if((sum&1)==0) block[63]^=1;
250 emms_c(); /* for ff_mmx_idct */
252 if (form == SCALE_PERM) {
253 for(i=0; i<64; i++) {
254 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
255 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
263 v = abs(block[i] - block1[i]);
267 sysErr[i] += block[i] - block1[i];
269 if( abs(block[i])>maxout) maxout=abs(block[i]);
271 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
272 #if 0 // print different matrix pairs
276 if((i&7)==0) printf("\n");
277 printf("%4d ", block_org[i]);
280 if((i&7)==0) printf("\n");
281 printf("%4d ", block[i] - block1[i]);
286 for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i]));
288 #if 1 // dump systematic errors
290 if(i%8==0) printf("\n");
291 printf("%5d ", (int)sysErr[i]);
296 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
297 is_idct ? "IDCT" : "DCT",
298 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
306 block1[i] = (random() % 512) -256;
316 block1[0] = (random() % 512) -256;
317 block1[1] = (random() % 512) -256;
318 block1[2] = (random() % 512) -256;
319 block1[3] = (random() % 512) -256;
323 if (form == MMX_PERM) {
325 block[idct_mmx_perm[i]] = block1[i];
326 } else if(form == MMX_SIMPLE_PERM) {
328 block[idct_simple_mmx_perm[i]] = block1[i];
337 for(it=0;it<NB_ITS_SPEED;it++) {
340 // memcpy(block, block1, sizeof(DCTELEM) * 64);
341 // do not memcpy especially not fastmemcpy because it does movntq !!!
345 ti1 = gettime() - ti;
346 } while (ti1 < 1000000);
349 printf("%s %s: %0.1f kdct/s\n",
350 is_idct ? "IDCT" : "DCT",
351 name, (double)it1 * 1000.0 / (double)ti1);
355 static uint8_t img_dest[64] __attribute__ ((aligned (8)));
356 static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
358 void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
361 static double c8[8][8];
362 static double c4[4][4];
363 double block1[64], block2[64], block3[64];
373 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
374 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
375 sum += c8[i][j] * c8[i][j];
382 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
383 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
384 sum += c4[i][j] * c4[i][j];
393 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
394 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
403 sum += c8[k][j] * block1[8*i+k];
414 sum += c4[k][j] * block2[8*(2*k)+i];
415 block3[8*(2*j)+i] = sum;
420 sum += c4[k][j] * block2[8*(2*k+1)+i];
421 block3[8*(2*j+1)+i] = sum;
425 /* clamp and store the result */
433 dest[i * linesize + j] = (int)rint(v);
438 void idct248_error(const char *name,
439 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
441 int it, i, it1, ti, ti1, err_max, v;
445 /* just one test to see if code is correct (precision is less
448 for(it=0;it<NB_ITS;it++) {
450 /* XXX: use forward transform to generate values */
452 block1[i] = (random() % 256) - 128;
457 idct248_ref(img_dest1, 8, block);
461 idct248_put(img_dest, 8, block);
464 v = abs((int)img_dest[i] - (int)img_dest1[i]);
466 printf("%d %d\n", img_dest[i], img_dest1[i]);
475 printf(" %3d", img_dest1[i*8+j]);
484 printf(" %3d", img_dest[i*8+j]);
490 printf("%s %s: err_inf=%d\n",
491 1 ? "IDCT248" : "DCT248",
497 for(it=0;it<NB_ITS_SPEED;it++) {
500 // memcpy(block, block1, sizeof(DCTELEM) * 64);
501 // do not memcpy especially not fastmemcpy because it does movntq !!!
502 idct248_put(img_dest, 8, block);
505 ti1 = gettime() - ti;
506 } while (ti1 < 1000000);
509 printf("%s %s: %0.1f kdct/s\n",
510 1 ? "IDCT248" : "DCT248",
511 name, (double)it1 * 1000.0 / (double)ti1);
516 printf("dct-test [-i] [<test-number>]\n"
517 "test-number 0 -> test with random matrixes\n"
518 " 1 -> test with random sparse matrixes\n"
519 " 2 -> do 3. test from mpeg4 std\n"
520 "-i test IDCT implementations\n"
521 "-4 test IDCT248 implementations\n");
524 int main(int argc, char **argv)
526 int test_idct = 0, test_248_dct = 0;
533 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
534 for(i=0;i<MAX_NEG_CROP;i++) {
536 cropTbl[i + MAX_NEG_CROP + 256] = 255;
540 c = getopt(argc, argv, "ih4");
557 if(optind <argc) test= atoi(argv[optind]);
559 printf("ffmpeg DCT/IDCT test\n");
562 idct248_error("SIMPLE-C", ff_simple_idct248_put);
564 for (i=0;algos[i].name;i++)
565 if (algos[i].is_idct == test_idct) {
566 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);