2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * DCT test. (c) 2001 Fabrice Bellard.
25 * Started from sample code by Juan J. Sierralta P.
37 #include "simple_idct.h"
41 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
46 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
48 /* reference fdct/idct */
49 extern void fdct(DCTELEM *block);
50 extern void idct(DCTELEM *block);
51 extern void ff_idct_xvid_mmx(DCTELEM *block);
52 extern void ff_idct_xvid_mmx2(DCTELEM *block);
53 extern void init_fdct();
55 extern void ff_mmx_idct(DCTELEM *data);
56 extern void ff_mmxext_idct(DCTELEM *data);
58 extern void odivx_idct_c (short *block);
61 extern void ff_bfin_idct (DCTELEM *block) ;
62 extern void ff_bfin_fdct (DCTELEM *block) ;
65 extern void fdct_altivec (DCTELEM *block);
66 //extern void idct_altivec (DCTELEM *block);?? no routine
71 enum { FDCT, IDCT } is_idct;
72 void (* func) (DCTELEM *block);
73 void (* ref) (DCTELEM *block);
74 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM } format;
77 #ifndef FAAN_POSTSCALE
78 #define FAAN_SCALE SCALE_PERM
80 #define FAAN_SCALE NO_PERM
83 #define DCT_ERROR(name,is_idct,func,ref,form) {name,is_idct,func,ref,form}
86 struct algo algos[] = {
87 DCT_ERROR( "REF-DBL", 0, fdct, fdct, NO_PERM),
88 DCT_ERROR("FAAN", 0, ff_faandct, fdct, FAAN_SCALE),
89 DCT_ERROR("IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM),
90 DCT_ERROR("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM),
91 DCT_ERROR("REF-DBL", 1, idct, idct, NO_PERM),
92 DCT_ERROR("INT", 1, j_rev_dct, idct, MMX_PERM),
93 DCT_ERROR("SIMPLE-C", 1, simple_idct, idct, NO_PERM),
96 DCT_ERROR("MMX", 0, ff_fdct_mmx, fdct, NO_PERM),
98 DCT_ERROR("MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM),
102 DCT_ERROR("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM),
103 DCT_ERROR("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM),
105 DCT_ERROR("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM),
106 DCT_ERROR("XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM),
107 DCT_ERROR("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM),
111 DCT_ERROR("altivecfdct", 0, fdct_altivec, fdct, NO_PERM),
115 DCT_ERROR("BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM),
116 DCT_ERROR("BFINidct", 1, ff_bfin_idct, idct, NO_PERM),
122 #define AANSCALE_BITS 12
123 static const unsigned short aanscales[64] = {
124 /* precomputed values scaled up by 14 bits */
125 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
126 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
127 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
128 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
129 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
130 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
131 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
132 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
135 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
137 int64_t gettime(void)
140 gettimeofday(&tv,NULL);
141 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
145 #define NB_ITS_SPEED 50000
147 static short idct_mmx_perm[64];
149 static short idct_simple_mmx_perm[64]={
150 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
151 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
152 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
153 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
154 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
155 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
156 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
157 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
160 void idct_mmx_init(void)
164 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
165 for (i = 0; i < 64; i++) {
166 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
167 // idct_simple_mmx_perm[i] = simple_block_permute_op(i);
171 static DCTELEM block[64] __attribute__ ((aligned (8)));
172 static DCTELEM block1[64] __attribute__ ((aligned (8)));
173 static DCTELEM block_org[64] __attribute__ ((aligned (8)));
175 void dct_error(const char *name, int is_idct,
176 void (*fdct_func)(DCTELEM *block),
177 void (*fdct_ref)(DCTELEM *block), int form, int test)
181 int64_t err2, ti, ti1, it1;
182 int64_t sysErr[64], sysErrMax=0;
184 int blockSumErrMax=0, blockSumErr;
190 for(i=0; i<64; i++) sysErr[i]=0;
191 for(it=0;it<NB_ITS;it++) {
197 block1[i] = (random() % 512) -256;
206 int num= (random()%10)+1;
208 block1[random()%64] = (random() % 512) -256;
211 block1[0]= (random()%4096)-2048;
212 block1[63]= (block1[0]&1)^1;
216 #if 0 // simulate mismatch control
221 if((sum&1)==0) block1[63]^=1;
226 block_org[i]= block1[i];
228 if (form == MMX_PERM) {
230 block[idct_mmx_perm[i]] = block1[i];
231 } else if (form == MMX_SIMPLE_PERM) {
233 block[idct_simple_mmx_perm[i]] = block1[i];
239 #if 0 // simulate mismatch control for tested IDCT but not the ref
244 if((sum&1)==0) block[63]^=1;
249 emms_c(); /* for ff_mmx_idct */
251 if (form == SCALE_PERM) {
252 for(i=0; i<64; i++) {
253 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
254 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
262 v = abs(block[i] - block1[i]);
266 sysErr[i] += block[i] - block1[i];
268 if( abs(block[i])>maxout) maxout=abs(block[i]);
270 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
271 #if 0 // print different matrix pairs
275 if((i&7)==0) printf("\n");
276 printf("%4d ", block_org[i]);
279 if((i&7)==0) printf("\n");
280 printf("%4d ", block[i] - block1[i]);
285 for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i]));
287 #if 1 // dump systematic errors
289 if(i%8==0) printf("\n");
290 printf("%5d ", (int)sysErr[i]);
295 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
296 is_idct ? "IDCT" : "DCT",
297 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
305 block1[i] = (random() % 512) -256;
315 block1[0] = (random() % 512) -256;
316 block1[1] = (random() % 512) -256;
317 block1[2] = (random() % 512) -256;
318 block1[3] = (random() % 512) -256;
322 if (form == MMX_PERM) {
324 block[idct_mmx_perm[i]] = block1[i];
325 } else if(form == MMX_SIMPLE_PERM) {
327 block[idct_simple_mmx_perm[i]] = block1[i];
336 for(it=0;it<NB_ITS_SPEED;it++) {
339 // memcpy(block, block1, sizeof(DCTELEM) * 64);
340 // do not memcpy especially not fastmemcpy because it does movntq !!!
344 ti1 = gettime() - ti;
345 } while (ti1 < 1000000);
348 printf("%s %s: %0.1f kdct/s\n",
349 is_idct ? "IDCT" : "DCT",
350 name, (double)it1 * 1000.0 / (double)ti1);
354 static uint8_t img_dest[64] __attribute__ ((aligned (8)));
355 static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
357 void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
360 static double c8[8][8];
361 static double c4[4][4];
362 double block1[64], block2[64], block3[64];
372 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
373 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
374 sum += c8[i][j] * c8[i][j];
381 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
382 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
383 sum += c4[i][j] * c4[i][j];
392 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
393 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
402 sum += c8[k][j] * block1[8*i+k];
413 sum += c4[k][j] * block2[8*(2*k)+i];
414 block3[8*(2*j)+i] = sum;
419 sum += c4[k][j] * block2[8*(2*k+1)+i];
420 block3[8*(2*j+1)+i] = sum;
424 /* clamp and store the result */
432 dest[i * linesize + j] = (int)rint(v);
437 void idct248_error(const char *name,
438 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
440 int it, i, it1, ti, ti1, err_max, v;
444 /* just one test to see if code is correct (precision is less
447 for(it=0;it<NB_ITS;it++) {
449 /* XXX: use forward transform to generate values */
451 block1[i] = (random() % 256) - 128;
456 idct248_ref(img_dest1, 8, block);
460 idct248_put(img_dest, 8, block);
463 v = abs((int)img_dest[i] - (int)img_dest1[i]);
465 printf("%d %d\n", img_dest[i], img_dest1[i]);
474 printf(" %3d", img_dest1[i*8+j]);
483 printf(" %3d", img_dest[i*8+j]);
489 printf("%s %s: err_inf=%d\n",
490 1 ? "IDCT248" : "DCT248",
496 for(it=0;it<NB_ITS_SPEED;it++) {
499 // memcpy(block, block1, sizeof(DCTELEM) * 64);
500 // do not memcpy especially not fastmemcpy because it does movntq !!!
501 idct248_put(img_dest, 8, block);
504 ti1 = gettime() - ti;
505 } while (ti1 < 1000000);
508 printf("%s %s: %0.1f kdct/s\n",
509 1 ? "IDCT248" : "DCT248",
510 name, (double)it1 * 1000.0 / (double)ti1);
515 printf("dct-test [-i] [<test-number>]\n"
516 "test-number 0 -> test with random matrixes\n"
517 " 1 -> test with random sparse matrixes\n"
518 " 2 -> do 3. test from mpeg4 std\n"
519 "-i test IDCT implementations\n"
520 "-4 test IDCT248 implementations\n");
523 int main(int argc, char **argv)
525 int test_idct = 0, test_248_dct = 0;
532 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
533 for(i=0;i<MAX_NEG_CROP;i++) {
535 cropTbl[i + MAX_NEG_CROP + 256] = 255;
539 c = getopt(argc, argv, "ih4");
556 if(optind <argc) test= atoi(argv[optind]);
558 printf("ffmpeg DCT/IDCT test\n");
561 idct248_error("SIMPLE-C", simple_idct248_put);
563 for (i=0;algos[i].name;i++)
564 if (algos[i].is_idct == test_idct) {
565 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);