git.sesse.net Git - ffmpeg/blob - libavcodec/dct-test.c

   1 /*
   2  * (c) 2001 Fabrice Bellard
   3  *     2007 Marc Hoffman <marc.hoffman@analog.com>
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * DCT test (c) 2001 Fabrice Bellard
  25  * Started from sample code by Juan J. Sierralta P.
  26  */
  27
  28 #include "config.h"
  29 #include <stdlib.h>
  30 #include <stdio.h>
  31 #include <string.h>
  32 #if HAVE_UNISTD_H
  33 #include <unistd.h>
  34 #endif
  35 #include <math.h>
  36
  37 #include "libavutil/cpu.h"
  38 #include "libavutil/common.h"
  39 #include "libavutil/lfg.h"
  40 #include "libavutil/time.h"
  41
  42 #include "dct.h"
  43 #include "simple_idct.h"
  44 #include "aandcttab.h"
  45 #include "faandct.h"
  46 #include "faanidct.h"
  47 #include "x86/idct_xvid.h"
  48 #include "dctref.h"
  49
  50 // ALTIVEC
  51 void ff_fdct_altivec(int16_t *block);
  52
  53 // ARM
  54 void ff_j_rev_dct_arm(int16_t *data);
  55 void ff_simple_idct_arm(int16_t *data);
  56 void ff_simple_idct_armv5te(int16_t *data);
  57 void ff_simple_idct_armv6(int16_t *data);
  58 void ff_simple_idct_neon(int16_t *data);
  59
  60 struct algo {
  61     const char *name;
  62     void (*func)(int16_t *block);
  63     enum formattag { NO_PERM, MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM,
  64                      SSE2_PERM, PARTTRANS_PERM } format;
  65     int mm_support;
  66     int nonspec;
  67 };
  68
  69 static int cpu_flags;
  70
  71 static const struct algo fdct_tab[] = {
  72     { "REF-DBL",        ff_ref_fdct,           NO_PERM    },
  73     { "FAAN",           ff_faandct,            NO_PERM    },
  74     { "IJG-AAN-INT",    ff_fdct_ifast,         SCALE_PERM },
  75     { "IJG-LLM-INT",    ff_jpeg_fdct_islow_8,  NO_PERM    },
  76
  77 #if HAVE_MMX_INLINE
  78     { "MMX",            ff_fdct_mmx,           NO_PERM,   AV_CPU_FLAG_MMX     },
  79 #endif
  80 #if HAVE_MMXEXT_INLINE
  81     { "MMXEXT",         ff_fdct_mmxext,        NO_PERM,   AV_CPU_FLAG_MMXEXT  },
  82 #endif
  83 #if HAVE_SSE2_INLINE
  84     { "SSE2",           ff_fdct_sse2,          NO_PERM,   AV_CPU_FLAG_SSE2    },
  85 #endif
  86
  87 #if HAVE_ALTIVEC
  88     { "altivecfdct",    ff_fdct_altivec,       NO_PERM,   AV_CPU_FLAG_ALTIVEC },
  89 #endif
  90
  91     { 0 }
  92 };
  93
  94 static const struct algo idct_tab[] = {
  95     { "FAANI",          ff_faanidct,           NO_PERM  },
  96     { "REF-DBL",        ff_ref_idct,           NO_PERM  },
  97     { "INT",            ff_j_rev_dct,          MMX_PERM },
  98     { "SIMPLE-C",       ff_simple_idct_8,      NO_PERM  },
  99
 100 #if HAVE_MMX_INLINE
 101     { "SIMPLE-MMX",     ff_simple_idct_mmx,  MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX },
 102     { "XVID-MMX",       ff_idct_xvid_mmx,      NO_PERM,   AV_CPU_FLAG_MMX,  1 },
 103 #endif
 104 #if HAVE_MMXEXT_INLINE
 105     { "XVID-MMXEXT",    ff_idct_xvid_mmxext,   NO_PERM,   AV_CPU_FLAG_MMXEXT, 1 },
 106 #endif
 107 #if HAVE_SSE2_INLINE
 108     { "XVID-SSE2",      ff_idct_xvid_sse2,     SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
 109 #endif
 110
 111 #if ARCH_ARM
 112     { "SIMPLE-ARM",     ff_simple_idct_arm,    NO_PERM  },
 113     { "INT-ARM",        ff_j_rev_dct_arm,      MMX_PERM },
 114 #endif
 115 #if HAVE_ARMV5TE
 116     { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM,   AV_CPU_FLAG_ARMV5TE },
 117 #endif
 118 #if HAVE_ARMV6
 119     { "SIMPLE-ARMV6",   ff_simple_idct_armv6,  MMX_PERM,  AV_CPU_FLAG_ARMV6   },
 120 #endif
 121 #if HAVE_NEON && ARCH_ARM
 122     { "SIMPLE-NEON",    ff_simple_idct_neon, PARTTRANS_PERM, AV_CPU_FLAG_NEON },
 123 #endif
 124
 125     { 0 }
 126 };
 127
 128 #define AANSCALE_BITS 12
 129
 130 #define NB_ITS 20000
 131 #define NB_ITS_SPEED 50000
 132
 133 static short idct_mmx_perm[64];
 134
 135 static short idct_simple_mmx_perm[64] = {
 136     0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
 137     0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
 138     0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
 139     0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
 140     0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
 141     0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
 142     0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
 143     0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
 144 };
 145
 146 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
 147
 148 static void idct_mmx_init(void)
 149 {
 150     int i;
 151
 152     /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
 153     for (i = 0; i < 64; i++) {
 154         idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
 155     }
 156 }
 157
 158 DECLARE_ALIGNED(16, static int16_t, block)[64];
 159 DECLARE_ALIGNED(8,  static int16_t, block1)[64];
 160
 161 static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng)
 162 {
 163     int i, j;
 164
 165     memset(block, 0, 64 * sizeof(*block));
 166
 167     switch (test) {
 168     case 0:
 169         for (i = 0; i < 64; i++)
 170             block[i] = (av_lfg_get(prng) % 512) - 256;
 171         if (is_idct) {
 172             ff_ref_fdct(block);
 173             for (i = 0; i < 64; i++)
 174                 block[i] >>= 3;
 175         }
 176         break;
 177     case 1:
 178         j = av_lfg_get(prng) % 10 + 1;
 179         for (i = 0; i < j; i++)
 180             block[av_lfg_get(prng) % 64] = av_lfg_get(prng) % 512 - 256;
 181         break;
 182     case 2:
 183         block[ 0] = av_lfg_get(prng) % 4096 - 2048;
 184         block[63] = (block[0] & 1) ^ 1;
 185         break;
 186     }
 187 }
 188
 189 static void permute(int16_t dst[64], const int16_t src[64], int perm)
 190 {
 191     int i;
 192
 193     if (perm == MMX_PERM) {
 194         for (i = 0; i < 64; i++)
 195             dst[idct_mmx_perm[i]] = src[i];
 196     } else if (perm == MMX_SIMPLE_PERM) {
 197         for (i = 0; i < 64; i++)
 198             dst[idct_simple_mmx_perm[i]] = src[i];
 199     } else if (perm == SSE2_PERM) {
 200         for (i = 0; i < 64; i++)
 201             dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
 202     } else if (perm == PARTTRANS_PERM) {
 203         for (i = 0; i < 64; i++)
 204             dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
 205     } else {
 206         for (i = 0; i < 64; i++)
 207             dst[i] = src[i];
 208     }
 209 }
 210
 211 static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
 212 {
 213     void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
 214     int it, i, scale;
 215     int err_inf, v;
 216     int64_t err2, ti, ti1, it1, err_sum = 0;
 217     int64_t sysErr[64], sysErrMax = 0;
 218     int maxout = 0;
 219     int blockSumErrMax = 0, blockSumErr;
 220     AVLFG prng;
 221     double omse, ome;
 222     int spec_err;
 223
 224     av_lfg_init(&prng, 1);
 225
 226     err_inf = 0;
 227     err2 = 0;
 228     for (i = 0; i < 64; i++)
 229         sysErr[i] = 0;
 230     for (it = 0; it < NB_ITS; it++) {
 231         init_block(block1, test, is_idct, &prng);
 232         permute(block, block1, dct->format);
 233
 234         dct->func(block);
 235         emms_c();
 236
 237         if (dct->format == SCALE_PERM) {
 238             for (i = 0; i < 64; i++) {
 239                 scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
 240                 block[i] = (block[i] * scale) >> AANSCALE_BITS;
 241             }
 242         }
 243
 244         ref(block1);
 245
 246         blockSumErr = 0;
 247         for (i = 0; i < 64; i++) {
 248             int err = block[i] - block1[i];
 249             err_sum += err;
 250             v = abs(err);
 251             if (v > err_inf)
 252                 err_inf = v;
 253             err2 += v * v;
 254             sysErr[i] += block[i] - block1[i];
 255             blockSumErr += v;
 256             if (abs(block[i]) > maxout)
 257                 maxout = abs(block[i]);
 258         }
 259         if (blockSumErrMax < blockSumErr)
 260             blockSumErrMax = blockSumErr;
 261     }
 262     for (i = 0; i < 64; i++)
 263         sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
 264
 265     for (i = 0; i < 64; i++) {
 266         if (i % 8 == 0)
 267             printf("\n");
 268         printf("%7d ", (int) sysErr[i]);
 269     }
 270     printf("\n");
 271
 272     omse = (double) err2 / NB_ITS / 64;
 273     ome  = (double) err_sum / NB_ITS / 64;
 274
 275     spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
 276
 277     printf("%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
 278            is_idct ? "IDCT" : "DCT", dct->name, err_inf,
 279            omse, ome, (double) sysErrMax / NB_ITS,
 280            maxout, blockSumErrMax);
 281
 282     if (spec_err && !dct->nonspec)
 283         return 1;
 284
 285     if (!speed)
 286         return 0;
 287
 288     /* speed test */
 289     init_block(block, test, is_idct, &prng);
 290     permute(block1, block, dct->format);
 291
 292     ti = av_gettime();
 293     it1 = 0;
 294     do {
 295         for (it = 0; it < NB_ITS_SPEED; it++) {
 296             memcpy(block, block1, sizeof(block));
 297             dct->func(block);
 298         }
 299         it1 += NB_ITS_SPEED;
 300         ti1 = av_gettime() - ti;
 301     } while (ti1 < 1000000);
 302     emms_c();
 303
 304     printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
 305            (double) it1 * 1000.0 / (double) ti1);
 306
 307     return 0;
 308 }
 309
 310 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
 311 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
 312
 313 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
 314 {
 315     static int init;
 316     static double c8[8][8];
 317     static double c4[4][4];
 318     double block1[64], block2[64], block3[64];
 319     double s, sum, v;
 320     int i, j, k;
 321
 322     if (!init) {
 323         init = 1;
 324
 325         for (i = 0; i < 8; i++) {
 326             sum = 0;
 327             for (j = 0; j < 8; j++) {
 328                 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
 329                 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
 330                 sum += c8[i][j] * c8[i][j];
 331             }
 332         }
 333
 334         for (i = 0; i < 4; i++) {
 335             sum = 0;
 336             for (j = 0; j < 4; j++) {
 337                 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
 338                 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
 339                 sum += c4[i][j] * c4[i][j];
 340             }
 341         }
 342     }
 343
 344     /* butterfly */
 345     s = 0.5 * sqrt(2.0);
 346     for (i = 0; i < 4; i++) {
 347         for (j = 0; j < 8; j++) {
 348             block1[8 * (2 * i) + j] =
 349                 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
 350             block1[8 * (2 * i + 1) + j] =
 351                 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
 352         }
 353     }
 354
 355     /* idct8 on lines */
 356     for (i = 0; i < 8; i++) {
 357         for (j = 0; j < 8; j++) {
 358             sum = 0;
 359             for (k = 0; k < 8; k++)
 360                 sum += c8[k][j] * block1[8 * i + k];
 361             block2[8 * i + j] = sum;
 362         }
 363     }
 364
 365     /* idct4 */
 366     for (i = 0; i < 8; i++) {
 367         for (j = 0; j < 4; j++) {
 368             /* top */
 369             sum = 0;
 370             for (k = 0; k < 4; k++)
 371                 sum += c4[k][j] * block2[8 * (2 * k) + i];
 372             block3[8 * (2 * j) + i] = sum;
 373
 374             /* bottom */
 375             sum = 0;
 376             for (k = 0; k < 4; k++)
 377                 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
 378             block3[8 * (2 * j + 1) + i] = sum;
 379         }
 380     }
 381
 382     /* clamp and store the result */
 383     for (i = 0; i < 8; i++) {
 384         for (j = 0; j < 8; j++) {
 385             v = block3[8 * i + j];
 386             if      (v < 0)   v = 0;
 387             else if (v > 255) v = 255;
 388             dest[i * linesize + j] = (int) rint(v);
 389         }
 390     }
 391 }
 392
 393 static void idct248_error(const char *name,
 394                           void (*idct248_put)(uint8_t *dest, int line_size,
 395                                               int16_t *block),
 396                           int speed)
 397 {
 398     int it, i, it1, ti, ti1, err_max, v;
 399     AVLFG prng;
 400
 401     av_lfg_init(&prng, 1);
 402
 403     /* just one test to see if code is correct (precision is less
 404        important here) */
 405     err_max = 0;
 406     for (it = 0; it < NB_ITS; it++) {
 407         /* XXX: use forward transform to generate values */
 408         for (i = 0; i < 64; i++)
 409             block1[i] = av_lfg_get(&prng) % 256 - 128;
 410         block1[0] += 1024;
 411
 412         for (i = 0; i < 64; i++)
 413             block[i] = block1[i];
 414         idct248_ref(img_dest1, 8, block);
 415
 416         for (i = 0; i < 64; i++)
 417             block[i] = block1[i];
 418         idct248_put(img_dest, 8, block);
 419
 420         for (i = 0; i < 64; i++) {
 421             v = abs((int) img_dest[i] - (int) img_dest1[i]);
 422             if (v == 255)
 423                 printf("%d %d\n", img_dest[i], img_dest1[i]);
 424             if (v > err_max)
 425                 err_max = v;
 426         }
 427     }
 428     printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
 429
 430     if (!speed)
 431         return;
 432
 433     ti = av_gettime();
 434     it1 = 0;
 435     do {
 436         for (it = 0; it < NB_ITS_SPEED; it++) {
 437             for (i = 0; i < 64; i++)
 438                 block[i] = block1[i];
 439             idct248_put(img_dest, 8, block);
 440         }
 441         it1 += NB_ITS_SPEED;
 442         ti1 = av_gettime() - ti;
 443     } while (ti1 < 1000000);
 444     emms_c();
 445
 446     printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
 447            (double) it1 * 1000.0 / (double) ti1);
 448 }
 449
 450 static void help(void)
 451 {
 452     printf("dct-test [-i] [<test-number>]\n"
 453            "test-number 0 -> test with random matrixes\n"
 454            "            1 -> test with random sparse matrixes\n"
 455            "            2 -> do 3. test from mpeg4 std\n"
 456            "-i          test IDCT implementations\n"
 457            "-4          test IDCT248 implementations\n"
 458            "-t          speed test\n");
 459 }
 460
 461 #if !HAVE_GETOPT
 462 #include "compat/getopt.c"
 463 #endif
 464
 465 int main(int argc, char **argv)
 466 {
 467     int test_idct = 0, test_248_dct = 0;
 468     int c, i;
 469     int test = 1;
 470     int speed = 0;
 471     int err = 0;
 472
 473     cpu_flags = av_get_cpu_flags();
 474
 475     ff_ref_dct_init();
 476     idct_mmx_init();
 477
 478     for (;;) {
 479         c = getopt(argc, argv, "ih4t");
 480         if (c == -1)
 481             break;
 482         switch (c) {
 483         case 'i':
 484             test_idct = 1;
 485             break;
 486         case '4':
 487             test_248_dct = 1;
 488             break;
 489         case 't':
 490             speed = 1;
 491             break;
 492         default:
 493         case 'h':
 494             help();
 495             return 0;
 496         }
 497     }
 498
 499     if (optind < argc)
 500         test = atoi(argv[optind]);
 501
 502     printf("Libav DCT/IDCT test\n");
 503
 504     if (test_248_dct) {
 505         idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
 506     } else {
 507         const struct algo *algos = test_idct ? idct_tab : fdct_tab;
 508         for (i = 0; algos[i].name; i++)
 509             if (!(~cpu_flags & algos[i].mm_support)) {
 510                 err |= dct_error(&algos[i], test, test_idct, speed);
 511             }
 512     }
 513
 514     if (err)
 515         printf("Error: %d.\n", err);
 516
 517     return !!err;
 518 }