2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * DCT test. (c) 2001 Fabrice Bellard.
25 * Started from sample code by Juan J. Sierralta P.
36 #include "simple_idct.h"
40 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
45 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
47 /* reference fdct/idct */
48 extern void fdct(DCTELEM *block);
49 extern void idct(DCTELEM *block);
50 extern void ff_idct_xvid_mmx(DCTELEM *block);
51 extern void ff_idct_xvid_mmx2(DCTELEM *block);
52 extern void init_fdct();
54 extern void ff_mmx_idct(DCTELEM *data);
55 extern void ff_mmxext_idct(DCTELEM *data);
57 extern void odivx_idct_c (short *block);
60 extern void ff_bfin_idct (DCTELEM *block) ;
61 extern void ff_bfin_fdct (DCTELEM *block) ;
64 extern void fdct_altivec (DCTELEM *block);
65 //extern void idct_altivec (DCTELEM *block);?? no routine
70 enum { FDCT, IDCT } is_idct;
71 void (* func) (DCTELEM *block);
72 void (* ref) (DCTELEM *block);
73 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM } format;
76 #ifndef FAAN_POSTSCALE
77 #define FAAN_SCALE SCALE_PERM
79 #define FAAN_SCALE NO_PERM
82 #define DCT_ERROR(name,is_idct,func,ref,form) {name,is_idct,func,ref,form}
85 struct algo algos[] = {
86 DCT_ERROR( "REF-DBL", 0, fdct, fdct, NO_PERM),
87 DCT_ERROR("IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM),
88 DCT_ERROR("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM),
89 DCT_ERROR("REF-DBL", 1, idct, idct, NO_PERM),
90 DCT_ERROR("INT", 1, j_rev_dct, idct, MMX_PERM),
91 DCT_ERROR("SIMPLE-C", 1, simple_idct, idct, NO_PERM),
94 DCT_ERROR("MMX", 0, ff_fdct_mmx, fdct, NO_PERM),
95 DCT_ERROR("MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM),
96 DCT_ERROR("FAAN", 0, ff_faandct, fdct, FAAN_SCALE),
98 DCT_ERROR("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM),
99 DCT_ERROR("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM),
100 DCT_ERROR("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM),
101 DCT_ERROR("XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM),
102 DCT_ERROR("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM),
106 DCT_ERROR("altivecfdct", 0, fdct_altivec, fdct, NO_PERM),
110 DCT_ERROR("BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM),
111 DCT_ERROR("BFINidct", 1, ff_bfin_idct, idct, NO_PERM),
117 #define AANSCALE_BITS 12
118 static const unsigned short aanscales[64] = {
119 /* precomputed values scaled up by 14 bits */
120 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
121 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
122 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
123 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
124 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
125 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
126 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
127 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
130 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
132 int64_t gettime(void)
135 gettimeofday(&tv,NULL);
136 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
140 #define NB_ITS_SPEED 50000
142 static short idct_mmx_perm[64];
144 static short idct_simple_mmx_perm[64]={
145 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
146 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
147 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
148 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
149 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
150 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
151 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
152 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
155 void idct_mmx_init(void)
159 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
160 for (i = 0; i < 64; i++) {
161 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
162 // idct_simple_mmx_perm[i] = simple_block_permute_op(i);
166 static DCTELEM block[64] __attribute__ ((aligned (8)));
167 static DCTELEM block1[64] __attribute__ ((aligned (8)));
168 static DCTELEM block_org[64] __attribute__ ((aligned (8)));
170 void dct_error(const char *name, int is_idct,
171 void (*fdct_func)(DCTELEM *block),
172 void (*fdct_ref)(DCTELEM *block), int form, int test)
176 int64_t err2, ti, ti1, it1;
177 int64_t sysErr[64], sysErrMax=0;
179 int blockSumErrMax=0, blockSumErr;
185 for(i=0; i<64; i++) sysErr[i]=0;
186 for(it=0;it<NB_ITS;it++) {
192 block1[i] = (random() % 512) -256;
201 int num= (random()%10)+1;
203 block1[random()%64] = (random() % 512) -256;
206 block1[0]= (random()%4096)-2048;
207 block1[63]= (block1[0]&1)^1;
211 #if 0 // simulate mismatch control
216 if((sum&1)==0) block1[63]^=1;
221 block_org[i]= block1[i];
223 if (form == MMX_PERM) {
225 block[idct_mmx_perm[i]] = block1[i];
226 } else if (form == MMX_SIMPLE_PERM) {
228 block[idct_simple_mmx_perm[i]] = block1[i];
234 #if 0 // simulate mismatch control for tested IDCT but not the ref
239 if((sum&1)==0) block[63]^=1;
244 emms_c(); /* for ff_mmx_idct */
246 if (form == SCALE_PERM) {
247 for(i=0; i<64; i++) {
248 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
249 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
257 v = abs(block[i] - block1[i]);
261 sysErr[i] += block[i] - block1[i];
263 if( abs(block[i])>maxout) maxout=abs(block[i]);
265 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
266 #if 0 // print different matrix pairs
270 if((i&7)==0) printf("\n");
271 printf("%4d ", block_org[i]);
274 if((i&7)==0) printf("\n");
275 printf("%4d ", block[i] - block1[i]);
280 for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i]));
282 #if 1 // dump systematic errors
284 if(i%8==0) printf("\n");
285 printf("%5d ", (int)sysErr[i]);
290 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
291 is_idct ? "IDCT" : "DCT",
292 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
300 block1[i] = (random() % 512) -256;
310 block1[0] = (random() % 512) -256;
311 block1[1] = (random() % 512) -256;
312 block1[2] = (random() % 512) -256;
313 block1[3] = (random() % 512) -256;
317 if (form == MMX_PERM) {
319 block[idct_mmx_perm[i]] = block1[i];
320 } else if(form == MMX_SIMPLE_PERM) {
322 block[idct_simple_mmx_perm[i]] = block1[i];
331 for(it=0;it<NB_ITS_SPEED;it++) {
334 // memcpy(block, block1, sizeof(DCTELEM) * 64);
335 // dont memcpy especially not fastmemcpy because it does movntq !!!
339 ti1 = gettime() - ti;
340 } while (ti1 < 1000000);
343 printf("%s %s: %0.1f kdct/s\n",
344 is_idct ? "IDCT" : "DCT",
345 name, (double)it1 * 1000.0 / (double)ti1);
349 static uint8_t img_dest[64] __attribute__ ((aligned (8)));
350 static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
352 void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
355 static double c8[8][8];
356 static double c4[4][4];
357 double block1[64], block2[64], block3[64];
367 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
368 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
369 sum += c8[i][j] * c8[i][j];
376 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
377 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
378 sum += c4[i][j] * c4[i][j];
387 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
388 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
397 sum += c8[k][j] * block1[8*i+k];
408 sum += c4[k][j] * block2[8*(2*k)+i];
409 block3[8*(2*j)+i] = sum;
414 sum += c4[k][j] * block2[8*(2*k+1)+i];
415 block3[8*(2*j+1)+i] = sum;
419 /* clamp and store the result */
427 dest[i * linesize + j] = (int)rint(v);
432 void idct248_error(const char *name,
433 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
435 int it, i, it1, ti, ti1, err_max, v;
439 /* just one test to see if code is correct (precision is less
442 for(it=0;it<NB_ITS;it++) {
444 /* XXX: use forward transform to generate values */
446 block1[i] = (random() % 256) - 128;
451 idct248_ref(img_dest1, 8, block);
455 idct248_put(img_dest, 8, block);
458 v = abs((int)img_dest[i] - (int)img_dest1[i]);
460 printf("%d %d\n", img_dest[i], img_dest1[i]);
469 printf(" %3d", img_dest1[i*8+j]);
478 printf(" %3d", img_dest[i*8+j]);
484 printf("%s %s: err_inf=%d\n",
485 1 ? "IDCT248" : "DCT248",
491 for(it=0;it<NB_ITS_SPEED;it++) {
494 // memcpy(block, block1, sizeof(DCTELEM) * 64);
495 // dont memcpy especially not fastmemcpy because it does movntq !!!
496 idct248_put(img_dest, 8, block);
499 ti1 = gettime() - ti;
500 } while (ti1 < 1000000);
503 printf("%s %s: %0.1f kdct/s\n",
504 1 ? "IDCT248" : "DCT248",
505 name, (double)it1 * 1000.0 / (double)ti1);
510 printf("dct-test [-i] [<test-number>]\n"
511 "test-number 0 -> test with random matrixes\n"
512 " 1 -> test with random sparse matrixes\n"
513 " 2 -> do 3. test from mpeg4 std\n"
514 "-i test IDCT implementations\n"
515 "-4 test IDCT248 implementations\n");
518 int main(int argc, char **argv)
520 int test_idct = 0, test_248_dct = 0;
527 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
528 for(i=0;i<MAX_NEG_CROP;i++) {
530 cropTbl[i + MAX_NEG_CROP + 256] = 255;
534 c = getopt(argc, argv, "ih4");
551 if(optind <argc) test= atoi(argv[optind]);
553 printf("ffmpeg DCT/IDCT test\n");
556 idct248_error("SIMPLE-C", simple_idct248_put);
558 for (i=0;algos[i].name;i++)
559 if (algos[i].is_idct == test_idct) {
560 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);