git.sesse.net Git - ffmpeg/blob - libavcodec/dct-test.c

   1 /*
   2  * (c) 2001 Fabrice Bellard
   3  *     2007 Marc Hoffman <marc.hoffman@analog.com>
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * DCT test (c) 2001 Fabrice Bellard
  25  * Started from sample code by Juan J. Sierralta P.
  26  */
  27
  28 #include "config.h"
  29 #include <stdlib.h>
  30 #include <stdio.h>
  31 #include <string.h>
  32 #if HAVE_UNISTD_H
  33 #include <unistd.h>
  34 #endif
  35 #include <math.h>
  36
  37 #include "libavutil/cpu.h"
  38 #include "libavutil/common.h"
  39 #include "libavutil/lfg.h"
  40 #include "libavutil/time.h"
  41
  42 #include "dct.h"
  43 #include "simple_idct.h"
  44 #include "aandcttab.h"
  45 #include "faandct.h"
  46 #include "faanidct.h"
  47 #include "x86/idct_xvid.h"
  48 #include "dctref.h"
  49
  50 // ALTIVEC
  51 void ff_fdct_altivec(int16_t *block);
  52
  53 // ARM
  54 void ff_j_rev_dct_arm(int16_t *data);
  55 void ff_simple_idct_arm(int16_t *data);
  56 void ff_simple_idct_armv5te(int16_t *data);
  57 void ff_simple_idct_armv6(int16_t *data);
  58 void ff_simple_idct_neon(int16_t *data);
  59
  60 struct algo {
  61     const char *name;
  62     void (*func)(int16_t *block);
  63     enum formattag { NO_PERM, MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM,
  64                      SSE2_PERM, PARTTRANS_PERM } format;
  65     int cpu_flag;
  66     int nonspec;
  67 };
  68
  69 static const struct algo fdct_tab[] = {
  70     { "REF-DBL",        ff_ref_fdct,           NO_PERM    },
  71     { "FAAN",           ff_faandct,            NO_PERM    },
  72     { "IJG-AAN-INT",    ff_fdct_ifast,         SCALE_PERM },
  73     { "IJG-LLM-INT",    ff_jpeg_fdct_islow_8,  NO_PERM    },
  74
  75 #if HAVE_MMX_INLINE
  76     { "MMX",            ff_fdct_mmx,           NO_PERM,   AV_CPU_FLAG_MMX     },
  77 #endif
  78 #if HAVE_MMXEXT_INLINE
  79     { "MMXEXT",         ff_fdct_mmxext,        NO_PERM,   AV_CPU_FLAG_MMXEXT  },
  80 #endif
  81 #if HAVE_SSE2_INLINE
  82     { "SSE2",           ff_fdct_sse2,          NO_PERM,   AV_CPU_FLAG_SSE2    },
  83 #endif
  84
  85 #if HAVE_ALTIVEC
  86     { "altivecfdct",    ff_fdct_altivec,       NO_PERM,   AV_CPU_FLAG_ALTIVEC },
  87 #endif
  88
  89     { 0 }
  90 };
  91
  92 static const struct algo idct_tab[] = {
  93     { "FAANI",          ff_faanidct,           NO_PERM  },
  94     { "REF-DBL",        ff_ref_idct,           NO_PERM  },
  95     { "INT",            ff_j_rev_dct,          MMX_PERM },
  96     { "SIMPLE-C",       ff_simple_idct_8,      NO_PERM  },
  97
  98 #if HAVE_MMX_INLINE
  99     { "SIMPLE-MMX",     ff_simple_idct_mmx,  MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX },
 100     { "XVID-MMX",       ff_idct_xvid_mmx,      NO_PERM,   AV_CPU_FLAG_MMX,  1 },
 101 #endif
 102 #if HAVE_MMXEXT_INLINE
 103     { "XVID-MMXEXT",    ff_idct_xvid_mmxext,   NO_PERM,   AV_CPU_FLAG_MMXEXT, 1 },
 104 #endif
 105 #if HAVE_SSE2_INLINE
 106     { "XVID-SSE2",      ff_idct_xvid_sse2,     SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
 107 #endif
 108
 109 #if ARCH_ARM
 110     { "SIMPLE-ARM",     ff_simple_idct_arm,    NO_PERM  },
 111     { "INT-ARM",        ff_j_rev_dct_arm,      MMX_PERM },
 112 #endif
 113 #if HAVE_ARMV5TE
 114     { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM,   AV_CPU_FLAG_ARMV5TE },
 115 #endif
 116 #if HAVE_ARMV6
 117     { "SIMPLE-ARMV6",   ff_simple_idct_armv6,  MMX_PERM,  AV_CPU_FLAG_ARMV6   },
 118 #endif
 119 #if HAVE_NEON && ARCH_ARM
 120     { "SIMPLE-NEON",    ff_simple_idct_neon, PARTTRANS_PERM, AV_CPU_FLAG_NEON },
 121 #endif
 122
 123     { 0 }
 124 };
 125
 126 #define AANSCALE_BITS 12
 127
 128 #define NB_ITS 20000
 129 #define NB_ITS_SPEED 50000
 130
 131 static short idct_mmx_perm[64];
 132
 133 static short idct_simple_mmx_perm[64] = {
 134     0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
 135     0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
 136     0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
 137     0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
 138     0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
 139     0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
 140     0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
 141     0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
 142 };
 143
 144 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
 145
 146 static void idct_mmx_init(void)
 147 {
 148     int i;
 149
 150     /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
 151     for (i = 0; i < 64; i++) {
 152         idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
 153     }
 154 }
 155
 156 DECLARE_ALIGNED(16, static int16_t, block)[64];
 157 DECLARE_ALIGNED(8,  static int16_t, block1)[64];
 158
 159 static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng)
 160 {
 161     int i, j;
 162
 163     memset(block, 0, 64 * sizeof(*block));
 164
 165     switch (test) {
 166     case 0:
 167         for (i = 0; i < 64; i++)
 168             block[i] = (av_lfg_get(prng) % 512) - 256;
 169         if (is_idct) {
 170             ff_ref_fdct(block);
 171             for (i = 0; i < 64; i++)
 172                 block[i] >>= 3;
 173         }
 174         break;
 175     case 1:
 176         j = av_lfg_get(prng) % 10 + 1;
 177         for (i = 0; i < j; i++)
 178             block[av_lfg_get(prng) % 64] = av_lfg_get(prng) % 512 - 256;
 179         break;
 180     case 2:
 181         block[ 0] = av_lfg_get(prng) % 4096 - 2048;
 182         block[63] = (block[0] & 1) ^ 1;
 183         break;
 184     }
 185 }
 186
 187 static void permute(int16_t dst[64], const int16_t src[64], int perm)
 188 {
 189     int i;
 190
 191     if (perm == MMX_PERM) {
 192         for (i = 0; i < 64; i++)
 193             dst[idct_mmx_perm[i]] = src[i];
 194     } else if (perm == MMX_SIMPLE_PERM) {
 195         for (i = 0; i < 64; i++)
 196             dst[idct_simple_mmx_perm[i]] = src[i];
 197     } else if (perm == SSE2_PERM) {
 198         for (i = 0; i < 64; i++)
 199             dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
 200     } else if (perm == PARTTRANS_PERM) {
 201         for (i = 0; i < 64; i++)
 202             dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
 203     } else {
 204         for (i = 0; i < 64; i++)
 205             dst[i] = src[i];
 206     }
 207 }
 208
 209 static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
 210 {
 211     void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
 212     int it, i, scale;
 213     int err_inf, v;
 214     int64_t err2, ti, ti1, it1, err_sum = 0;
 215     int64_t sysErr[64], sysErrMax = 0;
 216     int maxout = 0;
 217     int blockSumErrMax = 0, blockSumErr;
 218     AVLFG prng;
 219     double omse, ome;
 220     int spec_err;
 221
 222     av_lfg_init(&prng, 1);
 223
 224     err_inf = 0;
 225     err2 = 0;
 226     for (i = 0; i < 64; i++)
 227         sysErr[i] = 0;
 228     for (it = 0; it < NB_ITS; it++) {
 229         init_block(block1, test, is_idct, &prng);
 230         permute(block, block1, dct->format);
 231
 232         dct->func(block);
 233         emms_c();
 234
 235         if (dct->format == SCALE_PERM) {
 236             for (i = 0; i < 64; i++) {
 237                 scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
 238                 block[i] = (block[i] * scale) >> AANSCALE_BITS;
 239             }
 240         }
 241
 242         ref(block1);
 243
 244         blockSumErr = 0;
 245         for (i = 0; i < 64; i++) {
 246             int err = block[i] - block1[i];
 247             err_sum += err;
 248             v = abs(err);
 249             if (v > err_inf)
 250                 err_inf = v;
 251             err2 += v * v;
 252             sysErr[i] += block[i] - block1[i];
 253             blockSumErr += v;
 254             if (abs(block[i]) > maxout)
 255                 maxout = abs(block[i]);
 256         }
 257         if (blockSumErrMax < blockSumErr)
 258             blockSumErrMax = blockSumErr;
 259     }
 260     for (i = 0; i < 64; i++)
 261         sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
 262
 263     for (i = 0; i < 64; i++) {
 264         if (i % 8 == 0)
 265             printf("\n");
 266         printf("%7d ", (int) sysErr[i]);
 267     }
 268     printf("\n");
 269
 270     omse = (double) err2 / NB_ITS / 64;
 271     ome  = (double) err_sum / NB_ITS / 64;
 272
 273     spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
 274
 275     printf("%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
 276            is_idct ? "IDCT" : "DCT", dct->name, err_inf,
 277            omse, ome, (double) sysErrMax / NB_ITS,
 278            maxout, blockSumErrMax);
 279
 280     if (spec_err && !dct->nonspec)
 281         return 1;
 282
 283     if (!speed)
 284         return 0;
 285
 286     /* speed test */
 287     init_block(block, test, is_idct, &prng);
 288     permute(block1, block, dct->format);
 289
 290     ti = av_gettime();
 291     it1 = 0;
 292     do {
 293         for (it = 0; it < NB_ITS_SPEED; it++) {
 294             memcpy(block, block1, sizeof(block));
 295             dct->func(block);
 296         }
 297         it1 += NB_ITS_SPEED;
 298         ti1 = av_gettime() - ti;
 299     } while (ti1 < 1000000);
 300     emms_c();
 301
 302     printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
 303            (double) it1 * 1000.0 / (double) ti1);
 304
 305     return 0;
 306 }
 307
 308 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
 309 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
 310
 311 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
 312 {
 313     static int init;
 314     static double c8[8][8];
 315     static double c4[4][4];
 316     double block1[64], block2[64], block3[64];
 317     double s, sum, v;
 318     int i, j, k;
 319
 320     if (!init) {
 321         init = 1;
 322
 323         for (i = 0; i < 8; i++) {
 324             sum = 0;
 325             for (j = 0; j < 8; j++) {
 326                 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
 327                 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
 328                 sum += c8[i][j] * c8[i][j];
 329             }
 330         }
 331
 332         for (i = 0; i < 4; i++) {
 333             sum = 0;
 334             for (j = 0; j < 4; j++) {
 335                 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
 336                 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
 337                 sum += c4[i][j] * c4[i][j];
 338             }
 339         }
 340     }
 341
 342     /* butterfly */
 343     s = 0.5 * sqrt(2.0);
 344     for (i = 0; i < 4; i++) {
 345         for (j = 0; j < 8; j++) {
 346             block1[8 * (2 * i) + j] =
 347                 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
 348             block1[8 * (2 * i + 1) + j] =
 349                 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
 350         }
 351     }
 352
 353     /* idct8 on lines */
 354     for (i = 0; i < 8; i++) {
 355         for (j = 0; j < 8; j++) {
 356             sum = 0;
 357             for (k = 0; k < 8; k++)
 358                 sum += c8[k][j] * block1[8 * i + k];
 359             block2[8 * i + j] = sum;
 360         }
 361     }
 362
 363     /* idct4 */
 364     for (i = 0; i < 8; i++) {
 365         for (j = 0; j < 4; j++) {
 366             /* top */
 367             sum = 0;
 368             for (k = 0; k < 4; k++)
 369                 sum += c4[k][j] * block2[8 * (2 * k) + i];
 370             block3[8 * (2 * j) + i] = sum;
 371
 372             /* bottom */
 373             sum = 0;
 374             for (k = 0; k < 4; k++)
 375                 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
 376             block3[8 * (2 * j + 1) + i] = sum;
 377         }
 378     }
 379
 380     /* clamp and store the result */
 381     for (i = 0; i < 8; i++) {
 382         for (j = 0; j < 8; j++) {
 383             v = block3[8 * i + j];
 384             if      (v < 0)   v = 0;
 385             else if (v > 255) v = 255;
 386             dest[i * linesize + j] = (int) rint(v);
 387         }
 388     }
 389 }
 390
 391 static void idct248_error(const char *name,
 392                           void (*idct248_put)(uint8_t *dest, int line_size,
 393                                               int16_t *block),
 394                           int speed)
 395 {
 396     int it, i, it1, ti, ti1, err_max, v;
 397     AVLFG prng;
 398
 399     av_lfg_init(&prng, 1);
 400
 401     /* just one test to see if code is correct (precision is less
 402        important here) */
 403     err_max = 0;
 404     for (it = 0; it < NB_ITS; it++) {
 405         /* XXX: use forward transform to generate values */
 406         for (i = 0; i < 64; i++)
 407             block1[i] = av_lfg_get(&prng) % 256 - 128;
 408         block1[0] += 1024;
 409
 410         for (i = 0; i < 64; i++)
 411             block[i] = block1[i];
 412         idct248_ref(img_dest1, 8, block);
 413
 414         for (i = 0; i < 64; i++)
 415             block[i] = block1[i];
 416         idct248_put(img_dest, 8, block);
 417
 418         for (i = 0; i < 64; i++) {
 419             v = abs((int) img_dest[i] - (int) img_dest1[i]);
 420             if (v == 255)
 421                 printf("%d %d\n", img_dest[i], img_dest1[i]);
 422             if (v > err_max)
 423                 err_max = v;
 424         }
 425     }
 426     printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
 427
 428     if (!speed)
 429         return;
 430
 431     ti = av_gettime();
 432     it1 = 0;
 433     do {
 434         for (it = 0; it < NB_ITS_SPEED; it++) {
 435             for (i = 0; i < 64; i++)
 436                 block[i] = block1[i];
 437             idct248_put(img_dest, 8, block);
 438         }
 439         it1 += NB_ITS_SPEED;
 440         ti1 = av_gettime() - ti;
 441     } while (ti1 < 1000000);
 442     emms_c();
 443
 444     printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
 445            (double) it1 * 1000.0 / (double) ti1);
 446 }
 447
 448 static void help(void)
 449 {
 450     printf("dct-test [-i] [<test-number>]\n"
 451            "test-number 0 -> test with random matrixes\n"
 452            "            1 -> test with random sparse matrixes\n"
 453            "            2 -> do 3. test from mpeg4 std\n"
 454            "-i          test IDCT implementations\n"
 455            "-4          test IDCT248 implementations\n"
 456            "-t          speed test\n");
 457 }
 458
 459 #if !HAVE_GETOPT
 460 #include "compat/getopt.c"
 461 #endif
 462
 463 int main(int argc, char **argv)
 464 {
 465     int test_idct = 0, test_248_dct = 0;
 466     int c, i;
 467     int test = 1;
 468     int speed = 0;
 469     int err = 0;
 470
 471     ff_ref_dct_init();
 472     idct_mmx_init();
 473
 474     for (;;) {
 475         c = getopt(argc, argv, "ih4t");
 476         if (c == -1)
 477             break;
 478         switch (c) {
 479         case 'i':
 480             test_idct = 1;
 481             break;
 482         case '4':
 483             test_248_dct = 1;
 484             break;
 485         case 't':
 486             speed = 1;
 487             break;
 488         default:
 489         case 'h':
 490             help();
 491             return 0;
 492         }
 493     }
 494
 495     if (optind < argc)
 496         test = atoi(argv[optind]);
 497
 498     printf("Libav DCT/IDCT test\n");
 499
 500     if (test_248_dct) {
 501         idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
 502     } else {
 503         const int cpu_flags = av_get_cpu_flags();
 504         const struct algo *algos = test_idct ? idct_tab : fdct_tab;
 505         for (i = 0; algos[i].name; i++)
 506             if (!(~cpu_flags & algos[i].cpu_flag)) {
 507                 err |= dct_error(&algos[i], test, test_idct, speed);
 508             }
 509     }
 510
 511     if (err)
 512         printf("Error: %d.\n", err);
 513
 514     return !!err;
 515 }