git.sesse.net Git - ffmpeg/blob - libavcodec/dct-test.c

   1 /*
   2  * (c) 2001 Fabrice Bellard
   3  *     2007 Marc Hoffman <marc.hoffman@analog.com>
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * DCT test (c) 2001 Fabrice Bellard
  25  * Started from sample code by Juan J. Sierralta P.
  26  */
  27
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <string.h>
  31 #include <unistd.h>
  32 #include <math.h>
  33
  34 #include "libavutil/cpu.h"
  35 #include "libavutil/common.h"
  36 #include "libavutil/lfg.h"
  37 #include "libavutil/time.h"
  38
  39 #include "simple_idct.h"
  40 #include "aandcttab.h"
  41 #include "faandct.h"
  42 #include "faanidct.h"
  43 #include "x86/idct_xvid.h"
  44 #include "dctref.h"
  45
  46 #undef printf
  47
  48 void ff_mmx_idct(DCTELEM *data);
  49 void ff_mmxext_idct(DCTELEM *data);
  50
  51 // BFIN
  52 void ff_bfin_idct(DCTELEM *block);
  53 void ff_bfin_fdct(DCTELEM *block);
  54
  55 // ALTIVEC
  56 void ff_fdct_altivec(DCTELEM *block);
  57 //void ff_idct_altivec(DCTELEM *block);?? no routine
  58
  59 // ARM
  60 void ff_j_rev_dct_arm(DCTELEM *data);
  61 void ff_simple_idct_arm(DCTELEM *data);
  62 void ff_simple_idct_armv5te(DCTELEM *data);
  63 void ff_simple_idct_armv6(DCTELEM *data);
  64 void ff_simple_idct_neon(DCTELEM *data);
  65
  66 void ff_simple_idct_axp(DCTELEM *data);
  67
  68 struct algo {
  69     const char *name;
  70     void (*func)(DCTELEM *block);
  71     enum formattag { NO_PERM, MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM,
  72                      SSE2_PERM, PARTTRANS_PERM } format;
  73     int mm_support;
  74     int nonspec;
  75 };
  76
  77 static int cpu_flags;
  78
  79 static const struct algo fdct_tab[] = {
  80     { "REF-DBL",        ff_ref_fdct,           NO_PERM    },
  81     { "FAAN",           ff_faandct,            NO_PERM    },
  82     { "IJG-AAN-INT",    ff_fdct_ifast,         SCALE_PERM },
  83     { "IJG-LLM-INT",    ff_jpeg_fdct_islow_8,  NO_PERM    },
  84
  85 #if HAVE_MMX
  86     { "MMX",            ff_fdct_mmx,           NO_PERM,   AV_CPU_FLAG_MMX     },
  87     { "MMX2",           ff_fdct_mmx2,          NO_PERM,   AV_CPU_FLAG_MMX2    },
  88     { "SSE2",           ff_fdct_sse2,          NO_PERM,   AV_CPU_FLAG_SSE2    },
  89 #endif
  90
  91 #if HAVE_ALTIVEC
  92     { "altivecfdct",    ff_fdct_altivec,       NO_PERM,   AV_CPU_FLAG_ALTIVEC },
  93 #endif
  94
  95 #if ARCH_BFIN
  96     { "BFINfdct",       ff_bfin_fdct,          NO_PERM  },
  97 #endif
  98
  99     { 0 }
 100 };
 101
 102 static const struct algo idct_tab[] = {
 103     { "FAANI",          ff_faanidct,           NO_PERM  },
 104     { "REF-DBL",        ff_ref_idct,           NO_PERM  },
 105     { "INT",            ff_j_rev_dct,          MMX_PERM },
 106     { "SIMPLE-C",       ff_simple_idct_8,      NO_PERM  },
 107
 108 #if HAVE_MMX
 109 #if CONFIG_GPL
 110     { "LIBMPEG2-MMX",   ff_mmx_idct,           MMX_PERM,  AV_CPU_FLAG_MMX,  1 },
 111     { "LIBMPEG2-MMX2",  ff_mmxext_idct,        MMX_PERM,  AV_CPU_FLAG_MMX2, 1 },
 112 #endif
 113     { "SIMPLE-MMX",     ff_simple_idct_mmx,  MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX },
 114     { "XVID-MMX",       ff_idct_xvid_mmx,      NO_PERM,   AV_CPU_FLAG_MMX,  1 },
 115     { "XVID-MMX2",      ff_idct_xvid_mmx2,     NO_PERM,   AV_CPU_FLAG_MMX2, 1 },
 116     { "XVID-SSE2",      ff_idct_xvid_sse2,     SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
 117 #endif
 118
 119 #if ARCH_BFIN
 120     { "BFINidct",       ff_bfin_idct,          NO_PERM  },
 121 #endif
 122
 123 #if ARCH_ARM
 124     { "SIMPLE-ARM",     ff_simple_idct_arm,    NO_PERM  },
 125     { "INT-ARM",        ff_j_rev_dct_arm,      MMX_PERM },
 126 #endif
 127 #if HAVE_ARMV5TE
 128     { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM  },
 129 #endif
 130 #if HAVE_ARMV6
 131     { "SIMPLE-ARMV6",   ff_simple_idct_armv6,  MMX_PERM },
 132 #endif
 133 #if HAVE_NEON
 134     { "SIMPLE-NEON",    ff_simple_idct_neon,   PARTTRANS_PERM },
 135 #endif
 136
 137 #if ARCH_ALPHA
 138     { "SIMPLE-ALPHA",   ff_simple_idct_axp,    NO_PERM },
 139 #endif
 140
 141     { 0 }
 142 };
 143
 144 #define AANSCALE_BITS 12
 145
 146 #define NB_ITS 20000
 147 #define NB_ITS_SPEED 50000
 148
 149 static short idct_mmx_perm[64];
 150
 151 static short idct_simple_mmx_perm[64] = {
 152     0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
 153     0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
 154     0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
 155     0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
 156     0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
 157     0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
 158     0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
 159     0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
 160 };
 161
 162 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
 163
 164 static void idct_mmx_init(void)
 165 {
 166     int i;
 167
 168     /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
 169     for (i = 0; i < 64; i++) {
 170         idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
 171     }
 172 }
 173
 174 DECLARE_ALIGNED(16, static DCTELEM, block)[64];
 175 DECLARE_ALIGNED(8,  static DCTELEM, block1)[64];
 176
 177 static void init_block(DCTELEM block[64], int test, int is_idct, AVLFG *prng)
 178 {
 179     int i, j;
 180
 181     memset(block, 0, 64 * sizeof(*block));
 182
 183     switch (test) {
 184     case 0:
 185         for (i = 0; i < 64; i++)
 186             block[i] = (av_lfg_get(prng) % 512) - 256;
 187         if (is_idct) {
 188             ff_ref_fdct(block);
 189             for (i = 0; i < 64; i++)
 190                 block[i] >>= 3;
 191         }
 192         break;
 193     case 1:
 194         j = av_lfg_get(prng) % 10 + 1;
 195         for (i = 0; i < j; i++)
 196             block[av_lfg_get(prng) % 64] = av_lfg_get(prng) % 512 - 256;
 197         break;
 198     case 2:
 199         block[ 0] = av_lfg_get(prng) % 4096 - 2048;
 200         block[63] = (block[0] & 1) ^ 1;
 201         break;
 202     }
 203 }
 204
 205 static void permute(DCTELEM dst[64], const DCTELEM src[64], int perm)
 206 {
 207     int i;
 208
 209     if (perm == MMX_PERM) {
 210         for (i = 0; i < 64; i++)
 211             dst[idct_mmx_perm[i]] = src[i];
 212     } else if (perm == MMX_SIMPLE_PERM) {
 213         for (i = 0; i < 64; i++)
 214             dst[idct_simple_mmx_perm[i]] = src[i];
 215     } else if (perm == SSE2_PERM) {
 216         for (i = 0; i < 64; i++)
 217             dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
 218     } else if (perm == PARTTRANS_PERM) {
 219         for (i = 0; i < 64; i++)
 220             dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
 221     } else {
 222         for (i = 0; i < 64; i++)
 223             dst[i] = src[i];
 224     }
 225 }
 226
 227 static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
 228 {
 229     void (*ref)(DCTELEM *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
 230     int it, i, scale;
 231     int err_inf, v;
 232     int64_t err2, ti, ti1, it1, err_sum = 0;
 233     int64_t sysErr[64], sysErrMax = 0;
 234     int maxout = 0;
 235     int blockSumErrMax = 0, blockSumErr;
 236     AVLFG prng;
 237     double omse, ome;
 238     int spec_err;
 239
 240     av_lfg_init(&prng, 1);
 241
 242     err_inf = 0;
 243     err2 = 0;
 244     for (i = 0; i < 64; i++)
 245         sysErr[i] = 0;
 246     for (it = 0; it < NB_ITS; it++) {
 247         init_block(block1, test, is_idct, &prng);
 248         permute(block, block1, dct->format);
 249
 250         dct->func(block);
 251         emms_c();
 252
 253         if (dct->format == SCALE_PERM) {
 254             for (i = 0; i < 64; i++) {
 255                 scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
 256                 block[i] = (block[i] * scale) >> AANSCALE_BITS;
 257             }
 258         }
 259
 260         ref(block1);
 261
 262         blockSumErr = 0;
 263         for (i = 0; i < 64; i++) {
 264             int err = block[i] - block1[i];
 265             err_sum += err;
 266             v = abs(err);
 267             if (v > err_inf)
 268                 err_inf = v;
 269             err2 += v * v;
 270             sysErr[i] += block[i] - block1[i];
 271             blockSumErr += v;
 272             if (abs(block[i]) > maxout)
 273                 maxout = abs(block[i]);
 274         }
 275         if (blockSumErrMax < blockSumErr)
 276             blockSumErrMax = blockSumErr;
 277     }
 278     for (i = 0; i < 64; i++)
 279         sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
 280
 281     for (i = 0; i < 64; i++) {
 282         if (i % 8 == 0)
 283             printf("\n");
 284         printf("%7d ", (int) sysErr[i]);
 285     }
 286     printf("\n");
 287
 288     omse = (double) err2 / NB_ITS / 64;
 289     ome  = (double) err_sum / NB_ITS / 64;
 290
 291     spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
 292
 293     printf("%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
 294            is_idct ? "IDCT" : "DCT", dct->name, err_inf,
 295            omse, ome, (double) sysErrMax / NB_ITS,
 296            maxout, blockSumErrMax);
 297
 298     if (spec_err && !dct->nonspec)
 299         return 1;
 300
 301     if (!speed)
 302         return 0;
 303
 304     /* speed test */
 305     init_block(block, test, is_idct, &prng);
 306     permute(block1, block, dct->format);
 307
 308     ti = av_gettime();
 309     it1 = 0;
 310     do {
 311         for (it = 0; it < NB_ITS_SPEED; it++) {
 312             memcpy(block, block1, sizeof(block));
 313             dct->func(block);
 314         }
 315         it1 += NB_ITS_SPEED;
 316         ti1 = av_gettime() - ti;
 317     } while (ti1 < 1000000);
 318     emms_c();
 319
 320     printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
 321            (double) it1 * 1000.0 / (double) ti1);
 322
 323     return 0;
 324 }
 325
 326 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
 327 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
 328
 329 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
 330 {
 331     static int init;
 332     static double c8[8][8];
 333     static double c4[4][4];
 334     double block1[64], block2[64], block3[64];
 335     double s, sum, v;
 336     int i, j, k;
 337
 338     if (!init) {
 339         init = 1;
 340
 341         for (i = 0; i < 8; i++) {
 342             sum = 0;
 343             for (j = 0; j < 8; j++) {
 344                 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
 345                 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
 346                 sum += c8[i][j] * c8[i][j];
 347             }
 348         }
 349
 350         for (i = 0; i < 4; i++) {
 351             sum = 0;
 352             for (j = 0; j < 4; j++) {
 353                 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
 354                 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
 355                 sum += c4[i][j] * c4[i][j];
 356             }
 357         }
 358     }
 359
 360     /* butterfly */
 361     s = 0.5 * sqrt(2.0);
 362     for (i = 0; i < 4; i++) {
 363         for (j = 0; j < 8; j++) {
 364             block1[8 * (2 * i) + j] =
 365                 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
 366             block1[8 * (2 * i + 1) + j] =
 367                 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
 368         }
 369     }
 370
 371     /* idct8 on lines */
 372     for (i = 0; i < 8; i++) {
 373         for (j = 0; j < 8; j++) {
 374             sum = 0;
 375             for (k = 0; k < 8; k++)
 376                 sum += c8[k][j] * block1[8 * i + k];
 377             block2[8 * i + j] = sum;
 378         }
 379     }
 380
 381     /* idct4 */
 382     for (i = 0; i < 8; i++) {
 383         for (j = 0; j < 4; j++) {
 384             /* top */
 385             sum = 0;
 386             for (k = 0; k < 4; k++)
 387                 sum += c4[k][j] * block2[8 * (2 * k) + i];
 388             block3[8 * (2 * j) + i] = sum;
 389
 390             /* bottom */
 391             sum = 0;
 392             for (k = 0; k < 4; k++)
 393                 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
 394             block3[8 * (2 * j + 1) + i] = sum;
 395         }
 396     }
 397
 398     /* clamp and store the result */
 399     for (i = 0; i < 8; i++) {
 400         for (j = 0; j < 8; j++) {
 401             v = block3[8 * i + j];
 402             if      (v < 0)   v = 0;
 403             else if (v > 255) v = 255;
 404             dest[i * linesize + j] = (int) rint(v);
 405         }
 406     }
 407 }
 408
 409 static void idct248_error(const char *name,
 410                           void (*idct248_put)(uint8_t *dest, int line_size,
 411                                               int16_t *block),
 412                           int speed)
 413 {
 414     int it, i, it1, ti, ti1, err_max, v;
 415     AVLFG prng;
 416
 417     av_lfg_init(&prng, 1);
 418
 419     /* just one test to see if code is correct (precision is less
 420        important here) */
 421     err_max = 0;
 422     for (it = 0; it < NB_ITS; it++) {
 423         /* XXX: use forward transform to generate values */
 424         for (i = 0; i < 64; i++)
 425             block1[i] = av_lfg_get(&prng) % 256 - 128;
 426         block1[0] += 1024;
 427
 428         for (i = 0; i < 64; i++)
 429             block[i] = block1[i];
 430         idct248_ref(img_dest1, 8, block);
 431
 432         for (i = 0; i < 64; i++)
 433             block[i] = block1[i];
 434         idct248_put(img_dest, 8, block);
 435
 436         for (i = 0; i < 64; i++) {
 437             v = abs((int) img_dest[i] - (int) img_dest1[i]);
 438             if (v == 255)
 439                 printf("%d %d\n", img_dest[i], img_dest1[i]);
 440             if (v > err_max)
 441                 err_max = v;
 442         }
 443     }
 444     printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
 445
 446     if (!speed)
 447         return;
 448
 449     ti = av_gettime();
 450     it1 = 0;
 451     do {
 452         for (it = 0; it < NB_ITS_SPEED; it++) {
 453             for (i = 0; i < 64; i++)
 454                 block[i] = block1[i];
 455             idct248_put(img_dest, 8, block);
 456         }
 457         it1 += NB_ITS_SPEED;
 458         ti1 = av_gettime() - ti;
 459     } while (ti1 < 1000000);
 460     emms_c();
 461
 462     printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
 463            (double) it1 * 1000.0 / (double) ti1);
 464 }
 465
 466 static void help(void)
 467 {
 468     printf("dct-test [-i] [<test-number>]\n"
 469            "test-number 0 -> test with random matrixes\n"
 470            "            1 -> test with random sparse matrixes\n"
 471            "            2 -> do 3. test from mpeg4 std\n"
 472            "-i          test IDCT implementations\n"
 473            "-4          test IDCT248 implementations\n"
 474            "-t          speed test\n");
 475 }
 476
 477 int main(int argc, char **argv)
 478 {
 479     int test_idct = 0, test_248_dct = 0;
 480     int c, i;
 481     int test = 1;
 482     int speed = 0;
 483     int err = 0;
 484
 485     cpu_flags = av_get_cpu_flags();
 486
 487     ff_ref_dct_init();
 488     idct_mmx_init();
 489
 490     for (;;) {
 491         c = getopt(argc, argv, "ih4t");
 492         if (c == -1)
 493             break;
 494         switch (c) {
 495         case 'i':
 496             test_idct = 1;
 497             break;
 498         case '4':
 499             test_248_dct = 1;
 500             break;
 501         case 't':
 502             speed = 1;
 503             break;
 504         default:
 505         case 'h':
 506             help();
 507             return 0;
 508         }
 509     }
 510
 511     if (optind < argc)
 512         test = atoi(argv[optind]);
 513
 514     printf("Libav DCT/IDCT test\n");
 515
 516     if (test_248_dct) {
 517         idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
 518     } else {
 519         const struct algo *algos = test_idct ? idct_tab : fdct_tab;
 520         for (i = 0; algos[i].name; i++)
 521             if (!(~cpu_flags & algos[i].mm_support)) {
 522                 err |= dct_error(&algos[i], test, test_idct, speed);
 523             }
 524     }
 525
 526     return err;
 527 }