git.sesse.net Git - ffmpeg/blob - libavcodec/dct-test.c

   1 /*
   2  * (c) 2001 Fabrice Bellard
   3  *     2007 Marc Hoffman <marc.hoffman@analog.com>
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * DCT test (c) 2001 Fabrice Bellard
  25  * Started from sample code by Juan J. Sierralta P.
  26  */
  27
  28 #include "config.h"
  29 #include <stdlib.h>
  30 #include <stdio.h>
  31 #include <string.h>
  32 #if HAVE_UNISTD_H
  33 #include <unistd.h>
  34 #endif
  35 #include <math.h>
  36
  37 #include "libavutil/cpu.h"
  38 #include "libavutil/common.h"
  39 #include "libavutil/lfg.h"
  40 #include "libavutil/time.h"
  41
  42 #include "dct.h"
  43 #include "idctdsp.h"
  44 #include "simple_idct.h"
  45 #include "aandcttab.h"
  46 #include "faandct.h"
  47 #include "faanidct.h"
  48 #include "x86/fdct.h"
  49 #include "x86/idct_xvid.h"
  50 #include "dctref.h"
  51
  52 // ALTIVEC
  53 void ff_fdct_altivec(int16_t *block);
  54
  55 // ARM
  56 void ff_j_rev_dct_arm(int16_t *data);
  57 void ff_simple_idct_arm(int16_t *data);
  58 void ff_simple_idct_armv5te(int16_t *data);
  59 void ff_simple_idct_armv6(int16_t *data);
  60 void ff_simple_idct_neon(int16_t *data);
  61
  62 struct algo {
  63     const char *name;
  64     void (*func)(int16_t *block);
  65     enum idct_permutation_type perm_type;
  66     int cpu_flag;
  67     int nonspec;
  68 };
  69
  70 static const struct algo fdct_tab[] = {
  71     { "REF-DBL",     ff_ref_fdct,          FF_IDCT_PERM_NONE },
  72     { "FAAN",        ff_faandct,           FF_IDCT_PERM_NONE },
  73     { "IJG-AAN-INT", ff_fdct_ifast,        FF_IDCT_PERM_NONE },
  74     { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, FF_IDCT_PERM_NONE },
  75
  76 #if HAVE_MMX_INLINE
  77     { "MMX",         ff_fdct_mmx,          FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX },
  78 #endif
  79 #if HAVE_MMXEXT_INLINE
  80     { "MMXEXT",      ff_fdct_mmxext,       FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT },
  81 #endif
  82 #if HAVE_SSE2_INLINE
  83     { "SSE2",        ff_fdct_sse2,         FF_IDCT_PERM_NONE, AV_CPU_FLAG_SSE2 },
  84 #endif
  85
  86 #if HAVE_ALTIVEC
  87     { "altivecfdct", ff_fdct_altivec,      FF_IDCT_PERM_NONE, AV_CPU_FLAG_ALTIVEC },
  88 #endif
  89
  90     { 0 }
  91 };
  92
  93 static const struct algo idct_tab[] = {
  94     { "FAANI",       ff_faanidct,          FF_IDCT_PERM_NONE },
  95     { "REF-DBL",     ff_ref_idct,          FF_IDCT_PERM_NONE },
  96     { "INT",         ff_j_rev_dct,         FF_IDCT_PERM_LIBMPEG2 },
  97     { "SIMPLE-C",    ff_simple_idct_8,     FF_IDCT_PERM_NONE },
  98
  99 #if HAVE_MMX_INLINE
 100     { "SIMPLE-MMX",     ff_simple_idct_mmx,     FF_IDCT_PERM_SIMPLE,    AV_CPU_FLAG_MMX },
 101     { "XVID-MMX",       ff_idct_xvid_mmx,       FF_IDCT_PERM_NONE,      AV_CPU_FLAG_MMX,    1 },
 102 #endif
 103 #if HAVE_MMXEXT_INLINE
 104     { "XVID-MMXEXT",    ff_idct_xvid_mmxext,    FF_IDCT_PERM_NONE,      AV_CPU_FLAG_MMXEXT, 1 },
 105 #endif
 106 #if HAVE_SSE2_INLINE
 107     { "XVID-SSE2",      ff_idct_xvid_sse2,      FF_IDCT_PERM_SSE2,      AV_CPU_FLAG_SSE2,   1 },
 108 #endif
 109
 110 #if ARCH_ARM
 111     { "SIMPLE-ARM",     ff_simple_idct_arm,     FF_IDCT_PERM_NONE },
 112     { "INT-ARM",        ff_j_rev_dct_arm,       FF_IDCT_PERM_LIBMPEG2 },
 113 #endif
 114 #if HAVE_ARMV5TE
 115     { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te, FF_IDCT_PERM_NONE,      AV_CPU_FLAG_ARMV5TE },
 116 #endif
 117 #if HAVE_ARMV6
 118     { "SIMPLE-ARMV6",   ff_simple_idct_armv6,   FF_IDCT_PERM_LIBMPEG2,  AV_CPU_FLAG_ARMV6 },
 119 #endif
 120 #if HAVE_NEON && ARCH_ARM
 121     { "SIMPLE-NEON",    ff_simple_idct_neon,    FF_IDCT_PERM_PARTTRANS, AV_CPU_FLAG_NEON },
 122 #endif
 123
 124     { 0 }
 125 };
 126
 127 #define AANSCALE_BITS 12
 128
 129 #define NB_ITS 20000
 130 #define NB_ITS_SPEED 50000
 131
 132 static short idct_simple_mmx_perm[64] = {
 133     0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
 134     0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
 135     0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
 136     0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
 137     0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
 138     0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
 139     0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
 140     0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
 141 };
 142
 143 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
 144
 145 DECLARE_ALIGNED(16, static int16_t, block)[64];
 146 DECLARE_ALIGNED(8,  static int16_t, block1)[64];
 147
 148 static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng)
 149 {
 150     int i, j;
 151
 152     memset(block, 0, 64 * sizeof(*block));
 153
 154     switch (test) {
 155     case 0:
 156         for (i = 0; i < 64; i++)
 157             block[i] = (av_lfg_get(prng) % 512) - 256;
 158         if (is_idct) {
 159             ff_ref_fdct(block);
 160             for (i = 0; i < 64; i++)
 161                 block[i] >>= 3;
 162         }
 163         break;
 164     case 1:
 165         j = av_lfg_get(prng) % 10 + 1;
 166         for (i = 0; i < j; i++)
 167             block[av_lfg_get(prng) % 64] = av_lfg_get(prng) % 512 - 256;
 168         break;
 169     case 2:
 170         block[ 0] = av_lfg_get(prng) % 4096 - 2048;
 171         block[63] = (block[0] & 1) ^ 1;
 172         break;
 173     }
 174 }
 175
 176 static void permute(int16_t dst[64], const int16_t src[64],
 177                     enum idct_permutation_type perm_type)
 178 {
 179     int i;
 180
 181     switch (perm_type) {
 182     case FF_IDCT_PERM_LIBMPEG2:
 183         for (i = 0; i < 64; i++)
 184             dst[(i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2)] = src[i];
 185         break;
 186     case FF_IDCT_PERM_SIMPLE:
 187         for (i = 0; i < 64; i++)
 188             dst[idct_simple_mmx_perm[i]] = src[i];
 189         break;
 190     case FF_IDCT_PERM_SSE2:
 191         for (i = 0; i < 64; i++)
 192             dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
 193         break;
 194     case FF_IDCT_PERM_PARTTRANS:
 195         for (i = 0; i < 64; i++)
 196             dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
 197         break;
 198     default:
 199         for (i = 0; i < 64; i++)
 200             dst[i] = src[i];
 201         break;
 202     }
 203 }
 204
 205 static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
 206 {
 207     void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
 208     int it, i, scale;
 209     int err_inf, v;
 210     int64_t err2, ti, ti1, it1, err_sum = 0;
 211     int64_t sysErr[64], sysErrMax = 0;
 212     int maxout = 0;
 213     int blockSumErrMax = 0, blockSumErr;
 214     AVLFG prng;
 215     double omse, ome;
 216     int spec_err;
 217
 218     av_lfg_init(&prng, 1);
 219
 220     err_inf = 0;
 221     err2 = 0;
 222     for (i = 0; i < 64; i++)
 223         sysErr[i] = 0;
 224     for (it = 0; it < NB_ITS; it++) {
 225         init_block(block1, test, is_idct, &prng);
 226         permute(block, block1, dct->perm_type);
 227
 228         dct->func(block);
 229         emms_c();
 230
 231         if (!strcmp(dct->name, "IJG-AAN-INT")) {
 232             for (i = 0; i < 64; i++) {
 233                 scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
 234                 block[i] = (block[i] * scale) >> AANSCALE_BITS;
 235             }
 236         }
 237
 238         ref(block1);
 239
 240         blockSumErr = 0;
 241         for (i = 0; i < 64; i++) {
 242             int err = block[i] - block1[i];
 243             err_sum += err;
 244             v = abs(err);
 245             if (v > err_inf)
 246                 err_inf = v;
 247             err2 += v * v;
 248             sysErr[i] += block[i] - block1[i];
 249             blockSumErr += v;
 250             if (abs(block[i]) > maxout)
 251                 maxout = abs(block[i]);
 252         }
 253         if (blockSumErrMax < blockSumErr)
 254             blockSumErrMax = blockSumErr;
 255     }
 256     for (i = 0; i < 64; i++)
 257         sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
 258
 259     for (i = 0; i < 64; i++) {
 260         if (i % 8 == 0)
 261             printf("\n");
 262         printf("%7d ", (int) sysErr[i]);
 263     }
 264     printf("\n");
 265
 266     omse = (double) err2 / NB_ITS / 64;
 267     ome  = (double) err_sum / NB_ITS / 64;
 268
 269     spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
 270
 271     printf("%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
 272            is_idct ? "IDCT" : "DCT", dct->name, err_inf,
 273            omse, ome, (double) sysErrMax / NB_ITS,
 274            maxout, blockSumErrMax);
 275
 276     if (spec_err && !dct->nonspec)
 277         return 1;
 278
 279     if (!speed)
 280         return 0;
 281
 282     /* speed test */
 283     init_block(block, test, is_idct, &prng);
 284     permute(block1, block, dct->perm_type);
 285
 286     ti = av_gettime();
 287     it1 = 0;
 288     do {
 289         for (it = 0; it < NB_ITS_SPEED; it++) {
 290             memcpy(block, block1, sizeof(block));
 291             dct->func(block);
 292         }
 293         it1 += NB_ITS_SPEED;
 294         ti1 = av_gettime() - ti;
 295     } while (ti1 < 1000000);
 296     emms_c();
 297
 298     printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
 299            (double) it1 * 1000.0 / (double) ti1);
 300
 301     return 0;
 302 }
 303
 304 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
 305 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
 306
 307 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
 308 {
 309     static int init;
 310     static double c8[8][8];
 311     static double c4[4][4];
 312     double block1[64], block2[64], block3[64];
 313     double s, sum, v;
 314     int i, j, k;
 315
 316     if (!init) {
 317         init = 1;
 318
 319         for (i = 0; i < 8; i++) {
 320             sum = 0;
 321             for (j = 0; j < 8; j++) {
 322                 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
 323                 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
 324                 sum += c8[i][j] * c8[i][j];
 325             }
 326         }
 327
 328         for (i = 0; i < 4; i++) {
 329             sum = 0;
 330             for (j = 0; j < 4; j++) {
 331                 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
 332                 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
 333                 sum += c4[i][j] * c4[i][j];
 334             }
 335         }
 336     }
 337
 338     /* butterfly */
 339     s = 0.5 * sqrt(2.0);
 340     for (i = 0; i < 4; i++) {
 341         for (j = 0; j < 8; j++) {
 342             block1[8 * (2 * i) + j] =
 343                 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
 344             block1[8 * (2 * i + 1) + j] =
 345                 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
 346         }
 347     }
 348
 349     /* idct8 on lines */
 350     for (i = 0; i < 8; i++) {
 351         for (j = 0; j < 8; j++) {
 352             sum = 0;
 353             for (k = 0; k < 8; k++)
 354                 sum += c8[k][j] * block1[8 * i + k];
 355             block2[8 * i + j] = sum;
 356         }
 357     }
 358
 359     /* idct4 */
 360     for (i = 0; i < 8; i++) {
 361         for (j = 0; j < 4; j++) {
 362             /* top */
 363             sum = 0;
 364             for (k = 0; k < 4; k++)
 365                 sum += c4[k][j] * block2[8 * (2 * k) + i];
 366             block3[8 * (2 * j) + i] = sum;
 367
 368             /* bottom */
 369             sum = 0;
 370             for (k = 0; k < 4; k++)
 371                 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
 372             block3[8 * (2 * j + 1) + i] = sum;
 373         }
 374     }
 375
 376     /* clamp and store the result */
 377     for (i = 0; i < 8; i++) {
 378         for (j = 0; j < 8; j++) {
 379             v = block3[8 * i + j];
 380             if      (v < 0)   v = 0;
 381             else if (v > 255) v = 255;
 382             dest[i * linesize + j] = (int) rint(v);
 383         }
 384     }
 385 }
 386
 387 static void idct248_error(const char *name,
 388                           void (*idct248_put)(uint8_t *dest, int line_size,
 389                                               int16_t *block),
 390                           int speed)
 391 {
 392     int it, i, it1, ti, ti1, err_max, v;
 393     AVLFG prng;
 394
 395     av_lfg_init(&prng, 1);
 396
 397     /* just one test to see if code is correct (precision is less
 398        important here) */
 399     err_max = 0;
 400     for (it = 0; it < NB_ITS; it++) {
 401         /* XXX: use forward transform to generate values */
 402         for (i = 0; i < 64; i++)
 403             block1[i] = av_lfg_get(&prng) % 256 - 128;
 404         block1[0] += 1024;
 405
 406         for (i = 0; i < 64; i++)
 407             block[i] = block1[i];
 408         idct248_ref(img_dest1, 8, block);
 409
 410         for (i = 0; i < 64; i++)
 411             block[i] = block1[i];
 412         idct248_put(img_dest, 8, block);
 413
 414         for (i = 0; i < 64; i++) {
 415             v = abs((int) img_dest[i] - (int) img_dest1[i]);
 416             if (v == 255)
 417                 printf("%d %d\n", img_dest[i], img_dest1[i]);
 418             if (v > err_max)
 419                 err_max = v;
 420         }
 421     }
 422     printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
 423
 424     if (!speed)
 425         return;
 426
 427     ti = av_gettime();
 428     it1 = 0;
 429     do {
 430         for (it = 0; it < NB_ITS_SPEED; it++) {
 431             for (i = 0; i < 64; i++)
 432                 block[i] = block1[i];
 433             idct248_put(img_dest, 8, block);
 434         }
 435         it1 += NB_ITS_SPEED;
 436         ti1 = av_gettime() - ti;
 437     } while (ti1 < 1000000);
 438     emms_c();
 439
 440     printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
 441            (double) it1 * 1000.0 / (double) ti1);
 442 }
 443
 444 static void help(void)
 445 {
 446     printf("dct-test [-i] [<test-number>]\n"
 447            "test-number 0 -> test with random matrixes\n"
 448            "            1 -> test with random sparse matrixes\n"
 449            "            2 -> do 3. test from mpeg4 std\n"
 450            "-i          test IDCT implementations\n"
 451            "-4          test IDCT248 implementations\n"
 452            "-t          speed test\n");
 453 }
 454
 455 #if !HAVE_GETOPT
 456 #include "compat/getopt.c"
 457 #endif
 458
 459 int main(int argc, char **argv)
 460 {
 461     int test_idct = 0, test_248_dct = 0;
 462     int c, i;
 463     int test = 1;
 464     int speed = 0;
 465     int err = 0;
 466
 467     ff_ref_dct_init();
 468
 469     for (;;) {
 470         c = getopt(argc, argv, "ih4t");
 471         if (c == -1)
 472             break;
 473         switch (c) {
 474         case 'i':
 475             test_idct = 1;
 476             break;
 477         case '4':
 478             test_248_dct = 1;
 479             break;
 480         case 't':
 481             speed = 1;
 482             break;
 483         default:
 484         case 'h':
 485             help();
 486             return 0;
 487         }
 488     }
 489
 490     if (optind < argc)
 491         test = atoi(argv[optind]);
 492
 493     printf("Libav DCT/IDCT test\n");
 494
 495     if (test_248_dct) {
 496         idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
 497     } else {
 498         const int cpu_flags = av_get_cpu_flags();
 499         const struct algo *algos = test_idct ? idct_tab : fdct_tab;
 500         for (i = 0; algos[i].name; i++)
 501             if (!(~cpu_flags & algos[i].cpu_flag)) {
 502                 err |= dct_error(&algos[i], test, test_idct, speed);
 503             }
 504     }
 505
 506     if (err)
 507         printf("Error: %d.\n", err);
 508
 509     return !!err;
 510 }