git.sesse.net Git - ffmpeg/blob - libavcodec/tests/dct.c

   1 /*
   2  * (c) 2001 Fabrice Bellard
   3  *     2007 Marc Hoffman <marc.hoffman@analog.com>
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * DCT test (c) 2001 Fabrice Bellard
  25  * Started from sample code by Juan J. Sierralta P.
  26  */
  27
  28 #include "config.h"
  29 #include <stdlib.h>
  30 #include <stdio.h>
  31 #include <string.h>
  32 #if HAVE_UNISTD_H
  33 #include <unistd.h>
  34 #endif
  35 #include <math.h>
  36
  37 #include "libavutil/cpu.h"
  38 #include "libavutil/common.h"
  39 #include "libavutil/internal.h"
  40 #include "libavutil/lfg.h"
  41 #include "libavutil/mem_internal.h"
  42 #include "libavutil/time.h"
  43
  44 #include "libavcodec/dct.h"
  45 #include "libavcodec/idctdsp.h"
  46 #include "libavcodec/simple_idct.h"
  47 #include "libavcodec/xvididct.h"
  48 #include "libavcodec/aandcttab.h"
  49 #include "libavcodec/faandct.h"
  50 #include "libavcodec/faanidct.h"
  51 #include "libavcodec/dctref.h"
  52
  53 struct algo {
  54     const char *name;
  55     void (*func)(int16_t *block);
  56     enum idct_permutation_type perm_type;
  57     int cpu_flag;
  58     int nonspec;
  59 };
  60
  61 static const struct algo fdct_tab[] = {
  62     { "REF-DBL",     ff_ref_fdct,          FF_IDCT_PERM_NONE },
  63     { "IJG-AAN-INT", ff_fdct_ifast,        FF_IDCT_PERM_NONE },
  64     { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, FF_IDCT_PERM_NONE },
  65 #if CONFIG_FAANDCT
  66     { "FAAN",        ff_faandct,           FF_IDCT_PERM_NONE },
  67 #endif /* CONFIG_FAANDCT */
  68 };
  69
  70 static void ff_prores_idct_wrap(int16_t *dst){
  71     LOCAL_ALIGNED(16, int16_t, qmat, [64]);
  72     int i;
  73
  74     for(i=0; i<64; i++){
  75         qmat[i]=4;
  76     }
  77     ff_prores_idct_10(dst, qmat);
  78     for(i=0; i<64; i++) {
  79          dst[i] -= 512;
  80     }
  81 }
  82
  83 static const struct algo idct_tab[] = {
  84     { "REF-DBL",     ff_ref_idct,          FF_IDCT_PERM_NONE },
  85     { "INT",         ff_j_rev_dct,         FF_IDCT_PERM_LIBMPEG2 },
  86     { "SIMPLE-C",    ff_simple_idct_int16_8bit,     FF_IDCT_PERM_NONE },
  87     { "SIMPLE-C10",  ff_simple_idct_int16_10bit,    FF_IDCT_PERM_NONE },
  88     { "SIMPLE-C12",  ff_simple_idct_int16_12bit,    FF_IDCT_PERM_NONE, 0, 1 },
  89     { "PR-C",        ff_prores_idct_wrap,  FF_IDCT_PERM_NONE, 0, 1 },
  90 #if CONFIG_FAANIDCT
  91     { "FAANI",       ff_faanidct,          FF_IDCT_PERM_NONE },
  92 #endif /* CONFIG_FAANIDCT */
  93 #if CONFIG_MPEG4_DECODER
  94     { "XVID",        ff_xvid_idct,         FF_IDCT_PERM_NONE, 0, 1 },
  95 #endif /* CONFIG_MPEG4_DECODER */
  96 };
  97
  98 #if ARCH_AARCH64
  99 #include "aarch64/dct.c"
 100 #elif ARCH_ARM
 101 #include "arm/dct.c"
 102 #elif ARCH_PPC
 103 #include "ppc/dct.c"
 104 #elif ARCH_X86
 105 #include "x86/dct.c"
 106 #else
 107 static const struct algo fdct_tab_arch[] = { { 0 } };
 108 static const struct algo idct_tab_arch[] = { { 0 } };
 109 #endif
 110
 111 #define AANSCALE_BITS 12
 112
 113 #define NB_ITS 20000
 114 #define NB_ITS_SPEED 50000
 115
 116 DECLARE_ALIGNED(16, static int16_t, block)[64];
 117 DECLARE_ALIGNED(8,  static int16_t, block1)[64];
 118
 119 static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals)
 120 {
 121     int i, j;
 122
 123     memset(block, 0, 64 * sizeof(*block));
 124
 125     switch (test) {
 126     case 0:
 127         for (i = 0; i < 64; i++)
 128             block[i] = (av_lfg_get(prng) % (2*vals)) -vals;
 129         if (is_idct) {
 130             ff_ref_fdct(block);
 131             for (i = 0; i < 64; i++)
 132                 block[i] >>= 3;
 133         }
 134         break;
 135     case 1:
 136         j = av_lfg_get(prng) % 10 + 1;
 137         for (i = 0; i < j; i++) {
 138             int idx = av_lfg_get(prng) % 64;
 139             block[idx] = av_lfg_get(prng) % (2*vals) -vals;
 140         }
 141         break;
 142     case 2:
 143         block[ 0] = av_lfg_get(prng) % (16*vals) - (8*vals);
 144         block[63] = (block[0] & 1) ^ 1;
 145         break;
 146     }
 147 }
 148
 149 static void permute(int16_t dst[64], const int16_t src[64],
 150                     enum idct_permutation_type perm_type)
 151 {
 152     int i;
 153
 154 #if ARCH_X86
 155     if (permute_x86(dst, src, perm_type))
 156         return;
 157 #endif
 158
 159     switch (perm_type) {
 160     case FF_IDCT_PERM_LIBMPEG2:
 161         for (i = 0; i < 64; i++)
 162             dst[(i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2)] = src[i];
 163         break;
 164     case FF_IDCT_PERM_PARTTRANS:
 165         for (i = 0; i < 64; i++)
 166             dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
 167         break;
 168     case FF_IDCT_PERM_TRANSPOSE:
 169         for (i = 0; i < 64; i++)
 170             dst[(i>>3) | ((i<<3)&0x38)] = src[i];
 171         break;
 172     default:
 173         for (i = 0; i < 64; i++)
 174             dst[i] = src[i];
 175         break;
 176     }
 177 }
 178
 179 static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
 180 {
 181     void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
 182     int it, i, scale;
 183     int err_inf, v;
 184     int64_t err2, ti, ti1, it1, err_sum = 0;
 185     int64_t sysErr[64], sysErrMax = 0;
 186     int64_t err2_matrix[64], err2_max = 0;
 187     int maxout = 0;
 188     int blockSumErrMax = 0, blockSumErr;
 189     AVLFG prng;
 190     const int vals=1<<bits;
 191     double omse, ome;
 192     int spec_err;
 193
 194     av_lfg_init(&prng, 1);
 195
 196     err_inf = 0;
 197     err2 = 0;
 198     for (i = 0; i < 64; i++)
 199         err2_matrix[i] = sysErr[i] = 0;
 200     for (it = 0; it < NB_ITS; it++) {
 201         init_block(block1, test, is_idct, &prng, vals);
 202         permute(block, block1, dct->perm_type);
 203
 204         dct->func(block);
 205         emms_c();
 206
 207         if (!strcmp(dct->name, "IJG-AAN-INT")) {
 208             for (i = 0; i < 64; i++) {
 209                 scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
 210                 block[i] = (block[i] * scale) >> AANSCALE_BITS;
 211             }
 212         }
 213
 214         ref(block1);
 215         if (!strcmp(dct->name, "PR-SSE2"))
 216             for (i = 0; i < 64; i++)
 217                 block1[i] = av_clip(block1[i], 4-512, 1019-512);
 218
 219         blockSumErr = 0;
 220         for (i = 0; i < 64; i++) {
 221             int err = block[i] - block1[i];
 222             err_sum += err;
 223             v = abs(err);
 224             if (v > err_inf)
 225                 err_inf = v;
 226             err2_matrix[i] += v * v;
 227             err2 += v * v;
 228             sysErr[i] += block[i] - block1[i];
 229             blockSumErr += v;
 230             if (abs(block[i]) > maxout)
 231                 maxout = abs(block[i]);
 232         }
 233         if (blockSumErrMax < blockSumErr)
 234             blockSumErrMax = blockSumErr;
 235     }
 236     for (i = 0; i < 64; i++) {
 237         sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
 238         err2_max  = FFMAX(err2_max , FFABS(err2_matrix[i]));
 239     }
 240
 241     for (i = 0; i < 64; i++) {
 242         if (i % 8 == 0)
 243             printf("\n");
 244         printf("%7d ", (int) sysErr[i]);
 245     }
 246     printf("\n");
 247
 248     omse = (double) err2 / NB_ITS / 64;
 249     ome  = (double) err_sum / NB_ITS / 64;
 250
 251     spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
 252     if (test < 2)
 253         spec_err = is_idct && ((double) err2_max / NB_ITS > 0.06 || (double) sysErrMax / NB_ITS > 0.015);
 254
 255     printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
 256            is_idct ? "IDCT" : "DCT", dct->name, err_inf,
 257            omse, ome, (double) sysErrMax / NB_ITS,
 258            maxout, blockSumErrMax);
 259
 260     if (spec_err && !dct->nonspec) {
 261         printf("Failed!\n");
 262         return 1;
 263     }
 264
 265     if (!speed)
 266         return 0;
 267
 268     /* speed test */
 269
 270     init_block(block, test, is_idct, &prng, vals);
 271     permute(block1, block, dct->perm_type);
 272
 273     ti = av_gettime_relative();
 274     it1 = 0;
 275     do {
 276         for (it = 0; it < NB_ITS_SPEED; it++) {
 277             memcpy(block, block1, sizeof(block));
 278             dct->func(block);
 279         }
 280         emms_c();
 281         it1 += NB_ITS_SPEED;
 282         ti1 = av_gettime_relative() - ti;
 283     } while (ti1 < 1000000);
 284
 285     printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
 286            (double) it1 * 1000.0 / (double) ti1);
 287
 288     return 0;
 289 }
 290
 291 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
 292 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
 293
 294 static void idct248_ref(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
 295 {
 296     static int init;
 297     static double c8[8][8];
 298     static double c4[4][4];
 299     double block1[64], block2[64], block3[64];
 300     double s, sum, v;
 301     int i, j, k;
 302
 303     if (!init) {
 304         init = 1;
 305
 306         for (i = 0; i < 8; i++) {
 307             sum = 0;
 308             for (j = 0; j < 8; j++) {
 309                 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
 310                 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
 311                 sum += c8[i][j] * c8[i][j];
 312             }
 313         }
 314
 315         for (i = 0; i < 4; i++) {
 316             sum = 0;
 317             for (j = 0; j < 4; j++) {
 318                 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
 319                 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
 320                 sum += c4[i][j] * c4[i][j];
 321             }
 322         }
 323     }
 324
 325     /* butterfly */
 326     s = 0.5 * sqrt(2.0);
 327     for (i = 0; i < 4; i++) {
 328         for (j = 0; j < 8; j++) {
 329             block1[8 * (2 * i) + j] =
 330                 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
 331             block1[8 * (2 * i + 1) + j] =
 332                 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
 333         }
 334     }
 335
 336     /* idct8 on lines */
 337     for (i = 0; i < 8; i++) {
 338         for (j = 0; j < 8; j++) {
 339             sum = 0;
 340             for (k = 0; k < 8; k++)
 341                 sum += c8[k][j] * block1[8 * i + k];
 342             block2[8 * i + j] = sum;
 343         }
 344     }
 345
 346     /* idct4 */
 347     for (i = 0; i < 8; i++) {
 348         for (j = 0; j < 4; j++) {
 349             /* top */
 350             sum = 0;
 351             for (k = 0; k < 4; k++)
 352                 sum += c4[k][j] * block2[8 * (2 * k) + i];
 353             block3[8 * (2 * j) + i] = sum;
 354
 355             /* bottom */
 356             sum = 0;
 357             for (k = 0; k < 4; k++)
 358                 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
 359             block3[8 * (2 * j + 1) + i] = sum;
 360         }
 361     }
 362
 363     /* clamp and store the result */
 364     for (i = 0; i < 8; i++) {
 365         for (j = 0; j < 8; j++) {
 366             v = block3[8 * i + j];
 367             if      (v < 0)   v = 0;
 368             else if (v > 255) v = 255;
 369             dest[i * linesize + j] = (int) rint(v);
 370         }
 371     }
 372 }
 373
 374 static void idct248_error(const char *name,
 375                           void (*idct248_put)(uint8_t *dest,
 376                                               ptrdiff_t line_size,
 377                                               int16_t *block),
 378                           int speed)
 379 {
 380     int it, i, it1, ti, ti1, err_max, v;
 381     AVLFG prng;
 382
 383     av_lfg_init(&prng, 1);
 384
 385     /* just one test to see if code is correct (precision is less
 386        important here) */
 387     err_max = 0;
 388     for (it = 0; it < NB_ITS; it++) {
 389         /* XXX: use forward transform to generate values */
 390         for (i = 0; i < 64; i++)
 391             block1[i] = av_lfg_get(&prng) % 256 - 128;
 392         block1[0] += 1024;
 393
 394         for (i = 0; i < 64; i++)
 395             block[i] = block1[i];
 396         idct248_ref(img_dest1, 8, block);
 397
 398         for (i = 0; i < 64; i++)
 399             block[i] = block1[i];
 400         idct248_put(img_dest, 8, block);
 401
 402         for (i = 0; i < 64; i++) {
 403             v = abs((int) img_dest[i] - (int) img_dest1[i]);
 404             if (v == 255)
 405                 printf("%d %d\n", img_dest[i], img_dest1[i]);
 406             if (v > err_max)
 407                 err_max = v;
 408         }
 409 #if 0
 410         printf("ref=\n");
 411         for(i=0;i<8;i++) {
 412             int j;
 413             for(j=0;j<8;j++) {
 414                 printf(" %3d", img_dest1[i*8+j]);
 415             }
 416             printf("\n");
 417         }
 418
 419         printf("out=\n");
 420         for(i=0;i<8;i++) {
 421             int j;
 422             for(j=0;j<8;j++) {
 423                 printf(" %3d", img_dest[i*8+j]);
 424             }
 425             printf("\n");
 426         }
 427 #endif
 428     }
 429     printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
 430
 431     if (!speed)
 432         return;
 433
 434     ti = av_gettime_relative();
 435     it1 = 0;
 436     do {
 437         for (it = 0; it < NB_ITS_SPEED; it++) {
 438             for (i = 0; i < 64; i++)
 439                 block[i] = block1[i];
 440             idct248_put(img_dest, 8, block);
 441         }
 442         emms_c();
 443         it1 += NB_ITS_SPEED;
 444         ti1 = av_gettime_relative() - ti;
 445     } while (ti1 < 1000000);
 446
 447     printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
 448            (double) it1 * 1000.0 / (double) ti1);
 449 }
 450
 451 static void help(void)
 452 {
 453     printf("dct-test [-i] [<test-number>] [<bits>]\n"
 454            "test-number 0 -> test with random matrixes\n"
 455            "            1 -> test with random sparse matrixes\n"
 456            "            2 -> do 3. test from MPEG-4 std\n"
 457            "bits        Number of time domain bits to use, 8 is default\n"
 458            "-i          test IDCT implementations\n"
 459            "-4          test IDCT248 implementations\n"
 460            "-t          speed test\n");
 461 }
 462
 463 #if !HAVE_GETOPT
 464 #include "compat/getopt.c"
 465 #endif
 466
 467 int main(int argc, char **argv)
 468 {
 469     int test_idct = 0, test_248_dct = 0;
 470     int c, i;
 471     int test = 1;
 472     int speed = 0;
 473     int err = 0;
 474     int bits=8;
 475
 476     ff_ref_dct_init();
 477
 478     for (;;) {
 479         c = getopt(argc, argv, "ih4t");
 480         if (c == -1)
 481             break;
 482         switch (c) {
 483         case 'i':
 484             test_idct = 1;
 485             break;
 486         case '4':
 487             test_248_dct = 1;
 488             break;
 489         case 't':
 490             speed = 1;
 491             break;
 492         default:
 493         case 'h':
 494             help();
 495             return 0;
 496         }
 497     }
 498
 499     if (optind < argc)
 500         test = atoi(argv[optind]);
 501     if(optind+1 < argc) bits= atoi(argv[optind+1]);
 502
 503     printf("ffmpeg DCT/IDCT test\n");
 504
 505     if (test_248_dct) {
 506         idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
 507     } else {
 508         const int cpu_flags = av_get_cpu_flags();
 509         if (test_idct) {
 510             for (i = 0; i < FF_ARRAY_ELEMS(idct_tab); i++)
 511                 err |= dct_error(&idct_tab[i], test, test_idct, speed, bits);
 512
 513             for (i = 0; idct_tab_arch[i].name; i++)
 514                 if (!(~cpu_flags & idct_tab_arch[i].cpu_flag))
 515                     err |= dct_error(&idct_tab_arch[i], test, test_idct, speed, bits);
 516         }
 517 #if CONFIG_FDCTDSP
 518         else {
 519             for (i = 0; i < FF_ARRAY_ELEMS(fdct_tab); i++)
 520                 err |= dct_error(&fdct_tab[i], test, test_idct, speed, bits);
 521
 522             for (i = 0; fdct_tab_arch[i].name; i++)
 523                 if (!(~cpu_flags & fdct_tab_arch[i].cpu_flag))
 524                     err |= dct_error(&fdct_tab_arch[i], test, test_idct, speed, bits);
 525         }
 526 #endif /* CONFIG_FDCTDSP */
 527     }
 528
 529     if (err)
 530         printf("Error: %d.\n", err);
 531
 532     return !!err;
 533 }