git.sesse.net Git - ffmpeg/blob - libavcodec/dct-test.c

   1 /*
   2  * (c) 2001 Fabrice Bellard
   3  *     2007 Marc Hoffman <marc.hoffman@analog.com>
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * DCT test (c) 2001 Fabrice Bellard
  25  * Started from sample code by Juan J. Sierralta P.
  26  */
  27
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <string.h>
  31 #include <sys/time.h>
  32 #include <unistd.h>
  33 #include <math.h>
  34
  35 #include "libavutil/cpu.h"
  36 #include "libavutil/common.h"
  37 #include "libavutil/lfg.h"
  38
  39 #include "simple_idct.h"
  40 #include "aandcttab.h"
  41 #include "faandct.h"
  42 #include "faanidct.h"
  43 #include "x86/idct_xvid.h"
  44 #include "dctref.h"
  45
  46 #undef printf
  47
  48 void ff_mmx_idct(DCTELEM *data);
  49 void ff_mmxext_idct(DCTELEM *data);
  50
  51 void odivx_idct_c(short *block);
  52
  53 // BFIN
  54 void ff_bfin_idct(DCTELEM *block);
  55 void ff_bfin_fdct(DCTELEM *block);
  56
  57 // ALTIVEC
  58 void fdct_altivec(DCTELEM *block);
  59 //void idct_altivec(DCTELEM *block);?? no routine
  60
  61 // ARM
  62 void ff_j_rev_dct_arm(DCTELEM *data);
  63 void ff_simple_idct_arm(DCTELEM *data);
  64 void ff_simple_idct_armv5te(DCTELEM *data);
  65 void ff_simple_idct_armv6(DCTELEM *data);
  66 void ff_simple_idct_neon(DCTELEM *data);
  67
  68 void ff_simple_idct_axp(DCTELEM *data);
  69
  70 struct algo {
  71     const char *name;
  72     void (*func)(DCTELEM *block);
  73     enum formattag { NO_PERM, MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM,
  74                      SSE2_PERM, PARTTRANS_PERM } format;
  75     int mm_support;
  76     int nonspec;
  77 };
  78
  79 #ifndef FAAN_POSTSCALE
  80 #define FAAN_SCALE SCALE_PERM
  81 #else
  82 #define FAAN_SCALE NO_PERM
  83 #endif
  84
  85 static int cpu_flags;
  86
  87 static const struct algo fdct_tab[] = {
  88     { "REF-DBL",        ff_ref_fdct,           NO_PERM    },
  89     { "FAAN",           ff_faandct,            FAAN_SCALE },
  90     { "IJG-AAN-INT",    fdct_ifast,            SCALE_PERM },
  91     { "IJG-LLM-INT",    ff_jpeg_fdct_islow_8,  NO_PERM    },
  92
  93 #if HAVE_MMX
  94     { "MMX",            ff_fdct_mmx,           NO_PERM,   AV_CPU_FLAG_MMX     },
  95     { "MMX2",           ff_fdct_mmx2,          NO_PERM,   AV_CPU_FLAG_MMX2    },
  96     { "SSE2",           ff_fdct_sse2,          NO_PERM,   AV_CPU_FLAG_SSE2    },
  97 #endif
  98
  99 #if HAVE_ALTIVEC
 100     { "altivecfdct",    fdct_altivec,          NO_PERM,   AV_CPU_FLAG_ALTIVEC },
 101 #endif
 102
 103 #if ARCH_BFIN
 104     { "BFINfdct",       ff_bfin_fdct,          NO_PERM  },
 105 #endif
 106
 107     { 0 }
 108 };
 109
 110 static const struct algo idct_tab[] = {
 111     { "FAANI",          ff_faanidct,           NO_PERM  },
 112     { "REF-DBL",        ff_ref_idct,           NO_PERM  },
 113     { "INT",            j_rev_dct,             MMX_PERM },
 114     { "SIMPLE-C",       ff_simple_idct_8,      NO_PERM  },
 115
 116 #if HAVE_MMX
 117 #if CONFIG_GPL
 118     { "LIBMPEG2-MMX",   ff_mmx_idct,           MMX_PERM,  AV_CPU_FLAG_MMX,  1 },
 119     { "LIBMPEG2-MMX2",  ff_mmxext_idct,        MMX_PERM,  AV_CPU_FLAG_MMX2, 1 },
 120 #endif
 121     { "SIMPLE-MMX",     ff_simple_idct_mmx,  MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX },
 122     { "XVID-MMX",       ff_idct_xvid_mmx,      NO_PERM,   AV_CPU_FLAG_MMX,  1 },
 123     { "XVID-MMX2",      ff_idct_xvid_mmx2,     NO_PERM,   AV_CPU_FLAG_MMX2, 1 },
 124     { "XVID-SSE2",      ff_idct_xvid_sse2,     SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
 125 #endif
 126
 127 #if ARCH_BFIN
 128     { "BFINidct",       ff_bfin_idct,          NO_PERM  },
 129 #endif
 130
 131 #if ARCH_ARM
 132     { "SIMPLE-ARM",     ff_simple_idct_arm,    NO_PERM  },
 133     { "INT-ARM",        ff_j_rev_dct_arm,      MMX_PERM },
 134 #endif
 135 #if HAVE_ARMV5TE
 136     { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM  },
 137 #endif
 138 #if HAVE_ARMV6
 139     { "SIMPLE-ARMV6",   ff_simple_idct_armv6,  MMX_PERM },
 140 #endif
 141 #if HAVE_NEON
 142     { "SIMPLE-NEON",    ff_simple_idct_neon,   PARTTRANS_PERM },
 143 #endif
 144
 145 #if ARCH_ALPHA
 146     { "SIMPLE-ALPHA",   ff_simple_idct_axp,    NO_PERM },
 147 #endif
 148
 149     { 0 }
 150 };
 151
 152 #define AANSCALE_BITS 12
 153
 154 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
 155
 156 static int64_t gettime(void)
 157 {
 158     struct timeval tv;
 159     gettimeofday(&tv, NULL);
 160     return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
 161 }
 162
 163 #define NB_ITS 20000
 164 #define NB_ITS_SPEED 50000
 165
 166 static short idct_mmx_perm[64];
 167
 168 static short idct_simple_mmx_perm[64] = {
 169     0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
 170     0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
 171     0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
 172     0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
 173     0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
 174     0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
 175     0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
 176     0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
 177 };
 178
 179 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
 180
 181 static void idct_mmx_init(void)
 182 {
 183     int i;
 184
 185     /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
 186     for (i = 0; i < 64; i++) {
 187         idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
 188     }
 189 }
 190
 191 DECLARE_ALIGNED(16, static DCTELEM, block)[64];
 192 DECLARE_ALIGNED(8,  static DCTELEM, block1)[64];
 193
 194 static inline void mmx_emms(void)
 195 {
 196 #if HAVE_MMX
 197     if (cpu_flags & AV_CPU_FLAG_MMX)
 198         __asm__ volatile ("emms\n\t");
 199 #endif
 200 }
 201
 202 static void init_block(DCTELEM block[64], int test, int is_idct, AVLFG *prng, int vals)
 203 {
 204     int i, j;
 205
 206     memset(block, 0, 64 * sizeof(*block));
 207
 208     switch (test) {
 209     case 0:
 210         for (i = 0; i < 64; i++)
 211             block[i] = (av_lfg_get(prng) % (2*vals)) -vals;
 212         if (is_idct) {
 213             ff_ref_fdct(block);
 214             for (i = 0; i < 64; i++)
 215                 block[i] >>= 3;
 216         }
 217         break;
 218     case 1:
 219         j = av_lfg_get(prng) % 10 + 1;
 220         for (i = 0; i < j; i++)
 221             block[av_lfg_get(prng) % 64] = av_lfg_get(prng) % (2*vals) -vals;
 222         break;
 223     case 2:
 224         block[ 0] = av_lfg_get(prng) % (16*vals) - (8*vals);
 225         block[63] = (block[0] & 1) ^ 1;
 226         break;
 227     }
 228 }
 229
 230 static void permute(DCTELEM dst[64], const DCTELEM src[64], int perm)
 231 {
 232     int i;
 233
 234     if (perm == MMX_PERM) {
 235         for (i = 0; i < 64; i++)
 236             dst[idct_mmx_perm[i]] = src[i];
 237     } else if (perm == MMX_SIMPLE_PERM) {
 238         for (i = 0; i < 64; i++)
 239             dst[idct_simple_mmx_perm[i]] = src[i];
 240     } else if (perm == SSE2_PERM) {
 241         for (i = 0; i < 64; i++)
 242             dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
 243     } else if (perm == PARTTRANS_PERM) {
 244         for (i = 0; i < 64; i++)
 245             dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
 246     } else {
 247         for (i = 0; i < 64; i++)
 248             dst[i] = src[i];
 249     }
 250 }
 251
 252 static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
 253 {
 254     void (*ref)(DCTELEM *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
 255     int it, i, scale;
 256     int err_inf, v;
 257     int64_t err2, ti, ti1, it1, err_sum = 0;
 258     int64_t sysErr[64], sysErrMax = 0;
 259     int maxout = 0;
 260     int blockSumErrMax = 0, blockSumErr;
 261     AVLFG prng;
 262     const int vals=1<<bits;
 263     double omse, ome;
 264     int spec_err;
 265
 266     av_lfg_init(&prng, 1);
 267
 268     err_inf = 0;
 269     err2 = 0;
 270     for (i = 0; i < 64; i++)
 271         sysErr[i] = 0;
 272     for (it = 0; it < NB_ITS; it++) {
 273         init_block(block1, test, is_idct, &prng, vals);
 274         permute(block, block1, dct->format);
 275
 276         dct->func(block);
 277         mmx_emms();
 278
 279         if (dct->format == SCALE_PERM) {
 280             for (i = 0; i < 64; i++) {
 281                 scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
 282                 block[i] = (block[i] * scale) >> AANSCALE_BITS;
 283             }
 284         }
 285
 286         ref(block1);
 287
 288         blockSumErr = 0;
 289         for (i = 0; i < 64; i++) {
 290             int err = block[i] - block1[i];
 291             err_sum += err;
 292             v = abs(err);
 293             if (v > err_inf)
 294                 err_inf = v;
 295             err2 += v * v;
 296             sysErr[i] += block[i] - block1[i];
 297             blockSumErr += v;
 298             if (abs(block[i]) > maxout)
 299                 maxout = abs(block[i]);
 300         }
 301         if (blockSumErrMax < blockSumErr)
 302             blockSumErrMax = blockSumErr;
 303     }
 304     for (i = 0; i < 64; i++)
 305         sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
 306
 307     for (i = 0; i < 64; i++) {
 308         if (i % 8 == 0)
 309             printf("\n");
 310         printf("%7d ", (int) sysErr[i]);
 311     }
 312     printf("\n");
 313
 314     omse = (double) err2 / NB_ITS / 64;
 315     ome  = (double) err_sum / NB_ITS / 64;
 316
 317     spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
 318
 319     printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
 320            is_idct ? "IDCT" : "DCT", dct->name, err_inf,
 321            omse, ome, (double) sysErrMax / NB_ITS,
 322            maxout, blockSumErrMax);
 323
 324     if (spec_err && !dct->nonspec)
 325         return 1;
 326
 327     if (!speed)
 328         return 0;
 329
 330     /* speed test */
 331
 332     init_block(block, test, is_idct, &prng, vals);
 333     permute(block1, block, dct->format);
 334
 335     ti = gettime();
 336     it1 = 0;
 337     do {
 338         for (it = 0; it < NB_ITS_SPEED; it++) {
 339             memcpy(block, block1, sizeof(block));
 340             dct->func(block);
 341         }
 342         it1 += NB_ITS_SPEED;
 343         ti1 = gettime() - ti;
 344     } while (ti1 < 1000000);
 345     mmx_emms();
 346
 347     printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
 348            (double) it1 * 1000.0 / (double) ti1);
 349
 350     return 0;
 351 }
 352
 353 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
 354 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
 355
 356 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
 357 {
 358     static int init;
 359     static double c8[8][8];
 360     static double c4[4][4];
 361     double block1[64], block2[64], block3[64];
 362     double s, sum, v;
 363     int i, j, k;
 364
 365     if (!init) {
 366         init = 1;
 367
 368         for (i = 0; i < 8; i++) {
 369             sum = 0;
 370             for (j = 0; j < 8; j++) {
 371                 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
 372                 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
 373                 sum += c8[i][j] * c8[i][j];
 374             }
 375         }
 376
 377         for (i = 0; i < 4; i++) {
 378             sum = 0;
 379             for (j = 0; j < 4; j++) {
 380                 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
 381                 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
 382                 sum += c4[i][j] * c4[i][j];
 383             }
 384         }
 385     }
 386
 387     /* butterfly */
 388     s = 0.5 * sqrt(2.0);
 389     for (i = 0; i < 4; i++) {
 390         for (j = 0; j < 8; j++) {
 391             block1[8 * (2 * i) + j] =
 392                 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
 393             block1[8 * (2 * i + 1) + j] =
 394                 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
 395         }
 396     }
 397
 398     /* idct8 on lines */
 399     for (i = 0; i < 8; i++) {
 400         for (j = 0; j < 8; j++) {
 401             sum = 0;
 402             for (k = 0; k < 8; k++)
 403                 sum += c8[k][j] * block1[8 * i + k];
 404             block2[8 * i + j] = sum;
 405         }
 406     }
 407
 408     /* idct4 */
 409     for (i = 0; i < 8; i++) {
 410         for (j = 0; j < 4; j++) {
 411             /* top */
 412             sum = 0;
 413             for (k = 0; k < 4; k++)
 414                 sum += c4[k][j] * block2[8 * (2 * k) + i];
 415             block3[8 * (2 * j) + i] = sum;
 416
 417             /* bottom */
 418             sum = 0;
 419             for (k = 0; k < 4; k++)
 420                 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
 421             block3[8 * (2 * j + 1) + i] = sum;
 422         }
 423     }
 424
 425     /* clamp and store the result */
 426     for (i = 0; i < 8; i++) {
 427         for (j = 0; j < 8; j++) {
 428             v = block3[8 * i + j];
 429             if      (v < 0)   v = 0;
 430             else if (v > 255) v = 255;
 431             dest[i * linesize + j] = (int) rint(v);
 432         }
 433     }
 434 }
 435
 436 static void idct248_error(const char *name,
 437                           void (*idct248_put)(uint8_t *dest, int line_size,
 438                                               int16_t *block),
 439                           int speed)
 440 {
 441     int it, i, it1, ti, ti1, err_max, v;
 442     AVLFG prng;
 443
 444     av_lfg_init(&prng, 1);
 445
 446     /* just one test to see if code is correct (precision is less
 447        important here) */
 448     err_max = 0;
 449     for (it = 0; it < NB_ITS; it++) {
 450         /* XXX: use forward transform to generate values */
 451         for (i = 0; i < 64; i++)
 452             block1[i] = av_lfg_get(&prng) % 256 - 128;
 453         block1[0] += 1024;
 454
 455         for (i = 0; i < 64; i++)
 456             block[i] = block1[i];
 457         idct248_ref(img_dest1, 8, block);
 458
 459         for (i = 0; i < 64; i++)
 460             block[i] = block1[i];
 461         idct248_put(img_dest, 8, block);
 462
 463         for (i = 0; i < 64; i++) {
 464             v = abs((int) img_dest[i] - (int) img_dest1[i]);
 465             if (v == 255)
 466                 printf("%d %d\n", img_dest[i], img_dest1[i]);
 467             if (v > err_max)
 468                 err_max = v;
 469         }
 470 #if 0
 471         printf("ref=\n");
 472         for(i=0;i<8;i++) {
 473             int j;
 474             for(j=0;j<8;j++) {
 475                 printf(" %3d", img_dest1[i*8+j]);
 476             }
 477             printf("\n");
 478         }
 479
 480         printf("out=\n");
 481         for(i=0;i<8;i++) {
 482             int j;
 483             for(j=0;j<8;j++) {
 484                 printf(" %3d", img_dest[i*8+j]);
 485             }
 486             printf("\n");
 487         }
 488 #endif
 489     }
 490     printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
 491
 492     if (!speed)
 493         return;
 494
 495     ti = gettime();
 496     it1 = 0;
 497     do {
 498         for (it = 0; it < NB_ITS_SPEED; it++) {
 499             for (i = 0; i < 64; i++)
 500                 block[i] = block1[i];
 501             idct248_put(img_dest, 8, block);
 502         }
 503         it1 += NB_ITS_SPEED;
 504         ti1 = gettime() - ti;
 505     } while (ti1 < 1000000);
 506     mmx_emms();
 507
 508     printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
 509            (double) it1 * 1000.0 / (double) ti1);
 510 }
 511
 512 static void help(void)
 513 {
 514     printf("dct-test [-i] [<test-number>] [<bits>]\n"
 515            "test-number 0 -> test with random matrixes\n"
 516            "            1 -> test with random sparse matrixes\n"
 517            "            2 -> do 3. test from mpeg4 std\n"
 518            "bits        Number of time domain bits to use, 8 is default\n"
 519            "-i          test IDCT implementations\n"
 520            "-4          test IDCT248 implementations\n"
 521            "-t          speed test\n");
 522 }
 523
 524 int main(int argc, char **argv)
 525 {
 526     int test_idct = 0, test_248_dct = 0;
 527     int c, i;
 528     int test = 1;
 529     int speed = 0;
 530     int err = 0;
 531     int bits=8;
 532
 533     cpu_flags = av_get_cpu_flags();
 534
 535     ff_ref_dct_init();
 536     idct_mmx_init();
 537
 538     for (i = 0; i < 256; i++)
 539         cropTbl[i + MAX_NEG_CROP] = i;
 540     for (i = 0; i < MAX_NEG_CROP; i++) {
 541         cropTbl[i] = 0;
 542         cropTbl[i + MAX_NEG_CROP + 256] = 255;
 543     }
 544
 545     for (;;) {
 546         c = getopt(argc, argv, "ih4t");
 547         if (c == -1)
 548             break;
 549         switch (c) {
 550         case 'i':
 551             test_idct = 1;
 552             break;
 553         case '4':
 554             test_248_dct = 1;
 555             break;
 556         case 't':
 557             speed = 1;
 558             break;
 559         default:
 560         case 'h':
 561             help();
 562             return 0;
 563         }
 564     }
 565
 566     if (optind < argc)
 567         test = atoi(argv[optind]);
 568     if(optind+1 < argc) bits= atoi(argv[optind+1]);
 569
 570     printf("ffmpeg DCT/IDCT test\n");
 571
 572     if (test_248_dct) {
 573         idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
 574     } else {
 575         const struct algo *algos = test_idct ? idct_tab : fdct_tab;
 576         for (i = 0; algos[i].name; i++)
 577             if (!(~cpu_flags & algos[i].mm_support)) {
 578                 err |= dct_error(&algos[i], test, test_idct, speed, bits);
 579             }
 580     }
 581
 582     return err;
 583 }