git.sesse.net Git - ffmpeg/blob - libavcodec/dct-test.c

   1 /*
   2  * (c) 2001 Fabrice Bellard
   3  *     2007 Marc Hoffman <marc.hoffman@analog.com>
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * DCT test (c) 2001 Fabrice Bellard
  25  * Started from sample code by Juan J. Sierralta P.
  26  */
  27
  28 #include "config.h"
  29 #include <stdlib.h>
  30 #include <stdio.h>
  31 #include <string.h>
  32 #if HAVE_UNISTD_H
  33 #include <unistd.h>
  34 #endif
  35 #include <math.h>
  36
  37 #include "libavutil/cpu.h"
  38 #include "libavutil/common.h"
  39 #include "libavutil/lfg.h"
  40 #include "libavutil/time.h"
  41
  42 #include "dct.h"
  43 #include "idctdsp.h"
  44 #include "simple_idct.h"
  45 #include "aandcttab.h"
  46 #include "faandct.h"
  47 #include "faanidct.h"
  48 #include "ppc/fdct.h"
  49 #include "x86/fdct.h"
  50 #include "x86/idct_xvid.h"
  51 #include "x86/simple_idct.h"
  52 #include "dctref.h"
  53
  54 // ARM
  55 void ff_j_rev_dct_arm(int16_t *data);
  56 void ff_simple_idct_arm(int16_t *data);
  57 void ff_simple_idct_armv5te(int16_t *data);
  58 void ff_simple_idct_armv6(int16_t *data);
  59 void ff_simple_idct_neon(int16_t *data);
  60
  61 struct algo {
  62     const char *name;
  63     void (*func)(int16_t *block);
  64     enum idct_permutation_type perm_type;
  65     int cpu_flag;
  66     int nonspec;
  67 };
  68
  69 static const struct algo fdct_tab[] = {
  70     { "REF-DBL",     ff_ref_fdct,          FF_IDCT_PERM_NONE },
  71     { "FAAN",        ff_faandct,           FF_IDCT_PERM_NONE },
  72     { "IJG-AAN-INT", ff_fdct_ifast,        FF_IDCT_PERM_NONE },
  73     { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, FF_IDCT_PERM_NONE },
  74
  75 #if HAVE_MMX_INLINE
  76     { "MMX",         ff_fdct_mmx,          FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX },
  77 #endif
  78 #if HAVE_MMXEXT_INLINE
  79     { "MMXEXT",      ff_fdct_mmxext,       FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT },
  80 #endif
  81 #if HAVE_SSE2_INLINE
  82     { "SSE2",        ff_fdct_sse2,         FF_IDCT_PERM_NONE, AV_CPU_FLAG_SSE2 },
  83 #endif
  84
  85 #if HAVE_ALTIVEC
  86     { "altivecfdct", ff_fdct_altivec,      FF_IDCT_PERM_NONE, AV_CPU_FLAG_ALTIVEC },
  87 #endif
  88
  89     { 0 }
  90 };
  91
  92 static const struct algo idct_tab[] = {
  93     { "FAANI",       ff_faanidct,          FF_IDCT_PERM_NONE },
  94     { "REF-DBL",     ff_ref_idct,          FF_IDCT_PERM_NONE },
  95     { "INT",         ff_j_rev_dct,         FF_IDCT_PERM_LIBMPEG2 },
  96     { "SIMPLE-C",    ff_simple_idct_8,     FF_IDCT_PERM_NONE },
  97
  98 #if HAVE_MMX_INLINE
  99     { "SIMPLE-MMX",     ff_simple_idct_mmx,     FF_IDCT_PERM_SIMPLE,    AV_CPU_FLAG_MMX },
 100     { "XVID-MMX",       ff_idct_xvid_mmx,       FF_IDCT_PERM_NONE,      AV_CPU_FLAG_MMX,    1 },
 101 #endif
 102 #if HAVE_MMXEXT_INLINE
 103     { "XVID-MMXEXT",    ff_idct_xvid_mmxext,    FF_IDCT_PERM_NONE,      AV_CPU_FLAG_MMXEXT, 1 },
 104 #endif
 105 #if HAVE_SSE2_INLINE
 106     { "XVID-SSE2",      ff_idct_xvid_sse2,      FF_IDCT_PERM_SSE2,      AV_CPU_FLAG_SSE2,   1 },
 107 #endif
 108
 109 #if ARCH_ARM
 110     { "SIMPLE-ARM",     ff_simple_idct_arm,     FF_IDCT_PERM_NONE },
 111     { "INT-ARM",        ff_j_rev_dct_arm,       FF_IDCT_PERM_LIBMPEG2 },
 112 #endif
 113 #if HAVE_ARMV5TE
 114     { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te, FF_IDCT_PERM_NONE,      AV_CPU_FLAG_ARMV5TE },
 115 #endif
 116 #if HAVE_ARMV6
 117     { "SIMPLE-ARMV6",   ff_simple_idct_armv6,   FF_IDCT_PERM_LIBMPEG2,  AV_CPU_FLAG_ARMV6 },
 118 #endif
 119 #if HAVE_NEON && ARCH_ARM
 120     { "SIMPLE-NEON",    ff_simple_idct_neon,    FF_IDCT_PERM_PARTTRANS, AV_CPU_FLAG_NEON },
 121 #endif
 122
 123     { 0 }
 124 };
 125
 126 #define AANSCALE_BITS 12
 127
 128 #define NB_ITS 20000
 129 #define NB_ITS_SPEED 50000
 130
 131 static short idct_simple_mmx_perm[64] = {
 132     0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
 133     0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
 134     0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
 135     0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
 136     0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
 137     0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
 138     0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
 139     0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
 140 };
 141
 142 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
 143
 144 DECLARE_ALIGNED(16, static int16_t, block)[64];
 145 DECLARE_ALIGNED(8,  static int16_t, block1)[64];
 146
 147 static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng)
 148 {
 149     int i, j;
 150
 151     memset(block, 0, 64 * sizeof(*block));
 152
 153     switch (test) {
 154     case 0:
 155         for (i = 0; i < 64; i++)
 156             block[i] = (av_lfg_get(prng) % 512) - 256;
 157         if (is_idct) {
 158             ff_ref_fdct(block);
 159             for (i = 0; i < 64; i++)
 160                 block[i] >>= 3;
 161         }
 162         break;
 163     case 1:
 164         j = av_lfg_get(prng) % 10 + 1;
 165         for (i = 0; i < j; i++)
 166             block[av_lfg_get(prng) % 64] = av_lfg_get(prng) % 512 - 256;
 167         break;
 168     case 2:
 169         block[ 0] = av_lfg_get(prng) % 4096 - 2048;
 170         block[63] = (block[0] & 1) ^ 1;
 171         break;
 172     }
 173 }
 174
 175 static void permute(int16_t dst[64], const int16_t src[64],
 176                     enum idct_permutation_type perm_type)
 177 {
 178     int i;
 179
 180     switch (perm_type) {
 181     case FF_IDCT_PERM_LIBMPEG2:
 182         for (i = 0; i < 64; i++)
 183             dst[(i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2)] = src[i];
 184         break;
 185     case FF_IDCT_PERM_SIMPLE:
 186         for (i = 0; i < 64; i++)
 187             dst[idct_simple_mmx_perm[i]] = src[i];
 188         break;
 189     case FF_IDCT_PERM_SSE2:
 190         for (i = 0; i < 64; i++)
 191             dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
 192         break;
 193     case FF_IDCT_PERM_PARTTRANS:
 194         for (i = 0; i < 64; i++)
 195             dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
 196         break;
 197     default:
 198         for (i = 0; i < 64; i++)
 199             dst[i] = src[i];
 200         break;
 201     }
 202 }
 203
 204 static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
 205 {
 206     void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
 207     int it, i, scale;
 208     int err_inf, v;
 209     int64_t err2, ti, ti1, it1, err_sum = 0;
 210     int64_t sysErr[64], sysErrMax = 0;
 211     int maxout = 0;
 212     int blockSumErrMax = 0, blockSumErr;
 213     AVLFG prng;
 214     double omse, ome;
 215     int spec_err;
 216
 217     av_lfg_init(&prng, 1);
 218
 219     err_inf = 0;
 220     err2 = 0;
 221     for (i = 0; i < 64; i++)
 222         sysErr[i] = 0;
 223     for (it = 0; it < NB_ITS; it++) {
 224         init_block(block1, test, is_idct, &prng);
 225         permute(block, block1, dct->perm_type);
 226
 227         dct->func(block);
 228         emms_c();
 229
 230         if (!strcmp(dct->name, "IJG-AAN-INT")) {
 231             for (i = 0; i < 64; i++) {
 232                 scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
 233                 block[i] = (block[i] * scale) >> AANSCALE_BITS;
 234             }
 235         }
 236
 237         ref(block1);
 238
 239         blockSumErr = 0;
 240         for (i = 0; i < 64; i++) {
 241             int err = block[i] - block1[i];
 242             err_sum += err;
 243             v = abs(err);
 244             if (v > err_inf)
 245                 err_inf = v;
 246             err2 += v * v;
 247             sysErr[i] += block[i] - block1[i];
 248             blockSumErr += v;
 249             if (abs(block[i]) > maxout)
 250                 maxout = abs(block[i]);
 251         }
 252         if (blockSumErrMax < blockSumErr)
 253             blockSumErrMax = blockSumErr;
 254     }
 255     for (i = 0; i < 64; i++)
 256         sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
 257
 258     for (i = 0; i < 64; i++) {
 259         if (i % 8 == 0)
 260             printf("\n");
 261         printf("%7d ", (int) sysErr[i]);
 262     }
 263     printf("\n");
 264
 265     omse = (double) err2 / NB_ITS / 64;
 266     ome  = (double) err_sum / NB_ITS / 64;
 267
 268     spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
 269
 270     printf("%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
 271            is_idct ? "IDCT" : "DCT", dct->name, err_inf,
 272            omse, ome, (double) sysErrMax / NB_ITS,
 273            maxout, blockSumErrMax);
 274
 275     if (spec_err && !dct->nonspec)
 276         return 1;
 277
 278     if (!speed)
 279         return 0;
 280
 281     /* speed test */
 282     init_block(block, test, is_idct, &prng);
 283     permute(block1, block, dct->perm_type);
 284
 285     ti = av_gettime();
 286     it1 = 0;
 287     do {
 288         for (it = 0; it < NB_ITS_SPEED; it++) {
 289             memcpy(block, block1, sizeof(block));
 290             dct->func(block);
 291         }
 292         it1 += NB_ITS_SPEED;
 293         ti1 = av_gettime() - ti;
 294     } while (ti1 < 1000000);
 295     emms_c();
 296
 297     printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
 298            (double) it1 * 1000.0 / (double) ti1);
 299
 300     return 0;
 301 }
 302
 303 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
 304 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
 305
 306 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
 307 {
 308     static int init;
 309     static double c8[8][8];
 310     static double c4[4][4];
 311     double block1[64], block2[64], block3[64];
 312     double s, sum, v;
 313     int i, j, k;
 314
 315     if (!init) {
 316         init = 1;
 317
 318         for (i = 0; i < 8; i++) {
 319             sum = 0;
 320             for (j = 0; j < 8; j++) {
 321                 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
 322                 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
 323                 sum += c8[i][j] * c8[i][j];
 324             }
 325         }
 326
 327         for (i = 0; i < 4; i++) {
 328             sum = 0;
 329             for (j = 0; j < 4; j++) {
 330                 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
 331                 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
 332                 sum += c4[i][j] * c4[i][j];
 333             }
 334         }
 335     }
 336
 337     /* butterfly */
 338     s = 0.5 * sqrt(2.0);
 339     for (i = 0; i < 4; i++) {
 340         for (j = 0; j < 8; j++) {
 341             block1[8 * (2 * i) + j] =
 342                 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
 343             block1[8 * (2 * i + 1) + j] =
 344                 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
 345         }
 346     }
 347
 348     /* idct8 on lines */
 349     for (i = 0; i < 8; i++) {
 350         for (j = 0; j < 8; j++) {
 351             sum = 0;
 352             for (k = 0; k < 8; k++)
 353                 sum += c8[k][j] * block1[8 * i + k];
 354             block2[8 * i + j] = sum;
 355         }
 356     }
 357
 358     /* idct4 */
 359     for (i = 0; i < 8; i++) {
 360         for (j = 0; j < 4; j++) {
 361             /* top */
 362             sum = 0;
 363             for (k = 0; k < 4; k++)
 364                 sum += c4[k][j] * block2[8 * (2 * k) + i];
 365             block3[8 * (2 * j) + i] = sum;
 366
 367             /* bottom */
 368             sum = 0;
 369             for (k = 0; k < 4; k++)
 370                 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
 371             block3[8 * (2 * j + 1) + i] = sum;
 372         }
 373     }
 374
 375     /* clamp and store the result */
 376     for (i = 0; i < 8; i++) {
 377         for (j = 0; j < 8; j++) {
 378             v = block3[8 * i + j];
 379             if      (v < 0)   v = 0;
 380             else if (v > 255) v = 255;
 381             dest[i * linesize + j] = (int) rint(v);
 382         }
 383     }
 384 }
 385
 386 static void idct248_error(const char *name,
 387                           void (*idct248_put)(uint8_t *dest, int line_size,
 388                                               int16_t *block),
 389                           int speed)
 390 {
 391     int it, i, it1, ti, ti1, err_max, v;
 392     AVLFG prng;
 393
 394     av_lfg_init(&prng, 1);
 395
 396     /* just one test to see if code is correct (precision is less
 397        important here) */
 398     err_max = 0;
 399     for (it = 0; it < NB_ITS; it++) {
 400         /* XXX: use forward transform to generate values */
 401         for (i = 0; i < 64; i++)
 402             block1[i] = av_lfg_get(&prng) % 256 - 128;
 403         block1[0] += 1024;
 404
 405         for (i = 0; i < 64; i++)
 406             block[i] = block1[i];
 407         idct248_ref(img_dest1, 8, block);
 408
 409         for (i = 0; i < 64; i++)
 410             block[i] = block1[i];
 411         idct248_put(img_dest, 8, block);
 412
 413         for (i = 0; i < 64; i++) {
 414             v = abs((int) img_dest[i] - (int) img_dest1[i]);
 415             if (v == 255)
 416                 printf("%d %d\n", img_dest[i], img_dest1[i]);
 417             if (v > err_max)
 418                 err_max = v;
 419         }
 420     }
 421     printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
 422
 423     if (!speed)
 424         return;
 425
 426     ti = av_gettime();
 427     it1 = 0;
 428     do {
 429         for (it = 0; it < NB_ITS_SPEED; it++) {
 430             for (i = 0; i < 64; i++)
 431                 block[i] = block1[i];
 432             idct248_put(img_dest, 8, block);
 433         }
 434         it1 += NB_ITS_SPEED;
 435         ti1 = av_gettime() - ti;
 436     } while (ti1 < 1000000);
 437     emms_c();
 438
 439     printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
 440            (double) it1 * 1000.0 / (double) ti1);
 441 }
 442
 443 static void help(void)
 444 {
 445     printf("dct-test [-i] [<test-number>]\n"
 446            "test-number 0 -> test with random matrixes\n"
 447            "            1 -> test with random sparse matrixes\n"
 448            "            2 -> do 3. test from mpeg4 std\n"
 449            "-i          test IDCT implementations\n"
 450            "-4          test IDCT248 implementations\n"
 451            "-t          speed test\n");
 452 }
 453
 454 #if !HAVE_GETOPT
 455 #include "compat/getopt.c"
 456 #endif
 457
 458 int main(int argc, char **argv)
 459 {
 460     int test_idct = 0, test_248_dct = 0;
 461     int c, i;
 462     int test = 1;
 463     int speed = 0;
 464     int err = 0;
 465
 466     ff_ref_dct_init();
 467
 468     for (;;) {
 469         c = getopt(argc, argv, "ih4t");
 470         if (c == -1)
 471             break;
 472         switch (c) {
 473         case 'i':
 474             test_idct = 1;
 475             break;
 476         case '4':
 477             test_248_dct = 1;
 478             break;
 479         case 't':
 480             speed = 1;
 481             break;
 482         default:
 483         case 'h':
 484             help();
 485             return 0;
 486         }
 487     }
 488
 489     if (optind < argc)
 490         test = atoi(argv[optind]);
 491
 492     printf("Libav DCT/IDCT test\n");
 493
 494     if (test_248_dct) {
 495         idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
 496     } else {
 497         const int cpu_flags = av_get_cpu_flags();
 498         const struct algo *algos = test_idct ? idct_tab : fdct_tab;
 499         for (i = 0; algos[i].name; i++)
 500             if (!(~cpu_flags & algos[i].cpu_flag)) {
 501                 err |= dct_error(&algos[i], test, test_idct, speed);
 502             }
 503     }
 504
 505     if (err)
 506         printf("Error: %d.\n", err);
 507
 508     return !!err;
 509 }