git.sesse.net Git - ffmpeg/blob - libavcodec/imgresample.c

   1 /*
   2  * High quality image resampling with polyphase filters
   3  * Copyright (c) 2001 Fabrice Bellard.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18  */
  19
  20 /**
  21  * @file imgresample.c
  22  * High quality image resampling with polyphase filters .
  23  */
  24
  25 #include "avcodec.h"
  26 #include "dsputil.h"
  27
  28 #ifdef USE_FASTMEMCPY
  29 #include "fastmemcpy.h"
  30 #endif
  31
  32 #define NB_COMPONENTS 3
  33
  34 #define PHASE_BITS 4
  35 #define NB_PHASES  (1 << PHASE_BITS)
  36 #define NB_TAPS    4
  37 #define FCENTER    1  /* index of the center of the filter */
  38 //#define TEST    1  /* Test it */
  39
  40 #define POS_FRAC_BITS 16
  41 #define POS_FRAC      (1 << POS_FRAC_BITS)
  42 /* 6 bits precision is needed for MMX */
  43 #define FILTER_BITS   8
  44
  45 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
  46
  47 struct ImgReSampleContext {
  48     int iwidth, iheight, owidth, oheight, topBand, bottomBand, leftBand, rightBand;
  49     int h_incr, v_incr;
  50     int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
  51     int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
  52     uint8_t *line_buf;
  53 };
  54
  55 static inline int get_phase(int pos)
  56 {
  57     return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
  58 }
  59
  60 /* This function must be optimized */
  61 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
  62                             int src_width, int src_start, int src_incr,
  63                             int16_t *filters)
  64 {
  65     int src_pos, phase, sum, i;
  66     const uint8_t *s;
  67     int16_t *filter;
  68
  69     src_pos = src_start;
  70     for(i=0;i<dst_width;i++) {
  71 #ifdef TEST
  72         /* test */
  73         if ((src_pos >> POS_FRAC_BITS) < 0 ||
  74             (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
  75             av_abort();
  76 #endif
  77         s = src + (src_pos >> POS_FRAC_BITS);
  78         phase = get_phase(src_pos);
  79         filter = filters + phase * NB_TAPS;
  80 #if NB_TAPS == 4
  81         sum = s[0] * filter[0] +
  82             s[1] * filter[1] +
  83             s[2] * filter[2] +
  84             s[3] * filter[3];
  85 #else
  86         {
  87             int j;
  88             sum = 0;
  89             for(j=0;j<NB_TAPS;j++)
  90                 sum += s[j] * filter[j];
  91         }
  92 #endif
  93         sum = sum >> FILTER_BITS;
  94         if (sum < 0)
  95             sum = 0;
  96         else if (sum > 255)
  97             sum = 255;
  98         dst[0] = sum;
  99         src_pos += src_incr;
 100         dst++;
 101     }
 102 }
 103
 104 /* This function must be optimized */
 105 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
 106                        int wrap, int16_t *filter)
 107 {
 108     int sum, i;
 109     const uint8_t *s;
 110
 111     s = src;
 112     for(i=0;i<dst_width;i++) {
 113 #if NB_TAPS == 4
 114         sum = s[0 * wrap] * filter[0] +
 115             s[1 * wrap] * filter[1] +
 116             s[2 * wrap] * filter[2] +
 117             s[3 * wrap] * filter[3];
 118 #else
 119         {
 120             int j;
 121             uint8_t *s1 = s;
 122
 123             sum = 0;
 124             for(j=0;j<NB_TAPS;j++) {
 125                 sum += s1[0] * filter[j];
 126                 s1 += wrap;
 127             }
 128         }
 129 #endif
 130         sum = sum >> FILTER_BITS;
 131         if (sum < 0)
 132             sum = 0;
 133         else if (sum > 255)
 134             sum = 255;
 135         dst[0] = sum;
 136         dst++;
 137         s++;
 138     }
 139 }
 140
 141 #ifdef HAVE_MMX
 142
 143 #include "i386/mmx.h"
 144
 145 #define FILTER4(reg) \
 146 {\
 147         s = src + (src_pos >> POS_FRAC_BITS);\
 148         phase = get_phase(src_pos);\
 149         filter = filters + phase * NB_TAPS;\
 150         movq_m2r(*s, reg);\
 151         punpcklbw_r2r(mm7, reg);\
 152         movq_m2r(*filter, mm6);\
 153         pmaddwd_r2r(reg, mm6);\
 154         movq_r2r(mm6, reg);\
 155         psrlq_i2r(32, reg);\
 156         paddd_r2r(mm6, reg);\
 157         psrad_i2r(FILTER_BITS, reg);\
 158         src_pos += src_incr;\
 159 }
 160
 161 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
 162
 163 /* XXX: do four pixels at a time */
 164 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
 165                                  const uint8_t *src, int src_width,
 166                                  int src_start, int src_incr, int16_t *filters)
 167 {
 168     int src_pos, phase;
 169     const uint8_t *s;
 170     int16_t *filter;
 171     mmx_t tmp;
 172
 173     src_pos = src_start;
 174     pxor_r2r(mm7, mm7);
 175
 176     while (dst_width >= 4) {
 177
 178         FILTER4(mm0);
 179         FILTER4(mm1);
 180         FILTER4(mm2);
 181         FILTER4(mm3);
 182
 183         packuswb_r2r(mm7, mm0);
 184         packuswb_r2r(mm7, mm1);
 185         packuswb_r2r(mm7, mm3);
 186         packuswb_r2r(mm7, mm2);
 187         movq_r2m(mm0, tmp);
 188         dst[0] = tmp.ub[0];
 189         movq_r2m(mm1, tmp);
 190         dst[1] = tmp.ub[0];
 191         movq_r2m(mm2, tmp);
 192         dst[2] = tmp.ub[0];
 193         movq_r2m(mm3, tmp);
 194         dst[3] = tmp.ub[0];
 195         dst += 4;
 196         dst_width -= 4;
 197     }
 198     while (dst_width > 0) {
 199         FILTER4(mm0);
 200         packuswb_r2r(mm7, mm0);
 201         movq_r2m(mm0, tmp);
 202         dst[0] = tmp.ub[0];
 203         dst++;
 204         dst_width--;
 205     }
 206     emms();
 207 }
 208
 209 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
 210                             int wrap, int16_t *filter)
 211 {
 212     int sum, i, v;
 213     const uint8_t *s;
 214     mmx_t tmp;
 215     mmx_t coefs[4];
 216
 217     for(i=0;i<4;i++) {
 218         v = filter[i];
 219         coefs[i].uw[0] = v;
 220         coefs[i].uw[1] = v;
 221         coefs[i].uw[2] = v;
 222         coefs[i].uw[3] = v;
 223     }
 224
 225     pxor_r2r(mm7, mm7);
 226     s = src;
 227     while (dst_width >= 4) {
 228         movq_m2r(s[0 * wrap], mm0);
 229         punpcklbw_r2r(mm7, mm0);
 230         movq_m2r(s[1 * wrap], mm1);
 231         punpcklbw_r2r(mm7, mm1);
 232         movq_m2r(s[2 * wrap], mm2);
 233         punpcklbw_r2r(mm7, mm2);
 234         movq_m2r(s[3 * wrap], mm3);
 235         punpcklbw_r2r(mm7, mm3);
 236
 237         pmullw_m2r(coefs[0], mm0);
 238         pmullw_m2r(coefs[1], mm1);
 239         pmullw_m2r(coefs[2], mm2);
 240         pmullw_m2r(coefs[3], mm3);
 241
 242         paddw_r2r(mm1, mm0);
 243         paddw_r2r(mm3, mm2);
 244         paddw_r2r(mm2, mm0);
 245         psraw_i2r(FILTER_BITS, mm0);
 246
 247         packuswb_r2r(mm7, mm0);
 248         movq_r2m(mm0, tmp);
 249
 250         *(uint32_t *)dst = tmp.ud[0];
 251         dst += 4;
 252         s += 4;
 253         dst_width -= 4;
 254     }
 255     while (dst_width > 0) {
 256         sum = s[0 * wrap] * filter[0] +
 257             s[1 * wrap] * filter[1] +
 258             s[2 * wrap] * filter[2] +
 259             s[3 * wrap] * filter[3];
 260         sum = sum >> FILTER_BITS;
 261         if (sum < 0)
 262             sum = 0;
 263         else if (sum > 255)
 264             sum = 255;
 265         dst[0] = sum;
 266         dst++;
 267         s++;
 268         dst_width--;
 269     }
 270     emms();
 271 }
 272 #endif
 273
 274 #ifdef HAVE_ALTIVEC
 275 typedef union {
 276     vector unsigned char v;
 277     unsigned char c[16];
 278 } vec_uc_t;
 279
 280 typedef union {
 281     vector signed short v;
 282     signed short s[8];
 283 } vec_ss_t;
 284
 285 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
 286                           int wrap, int16_t *filter)
 287 {
 288     int sum, i;
 289     const uint8_t *s;
 290     vector unsigned char *tv, tmp, dstv, zero;
 291     vec_ss_t srchv[4], srclv[4], fv[4];
 292     vector signed short zeros, sumhv, sumlv;
 293     s = src;
 294
 295     for(i=0;i<4;i++)
 296     {
 297         /*
 298            The vec_madds later on does an implicit >>15 on the result.
 299            Since FILTER_BITS is 8, and we have 15 bits of magnitude in
 300            a signed short, we have just enough bits to pre-shift our
 301            filter constants <<7 to compensate for vec_madds.
 302         */
 303         fv[i].s[0] = filter[i] << (15-FILTER_BITS);
 304         fv[i].v = vec_splat(fv[i].v, 0);
 305     }
 306
 307     zero = vec_splat_u8(0);
 308     zeros = vec_splat_s16(0);
 309
 310
 311     /*
 312        When we're resampling, we'd ideally like both our input buffers,
 313        and output buffers to be 16-byte aligned, so we can do both aligned
 314        reads and writes. Sadly we can't always have this at the moment, so
 315        we opt for aligned writes, as unaligned writes have a huge overhead.
 316        To do this, do enough scalar resamples to get dst 16-byte aligned.
 317     */
 318     i = (-(int)dst) & 0xf;
 319     while(i>0) {
 320         sum = s[0 * wrap] * filter[0] +
 321         s[1 * wrap] * filter[1] +
 322         s[2 * wrap] * filter[2] +
 323         s[3 * wrap] * filter[3];
 324         sum = sum >> FILTER_BITS;
 325         if (sum<0) sum = 0; else if (sum>255) sum=255;
 326         dst[0] = sum;
 327         dst++;
 328         s++;
 329         dst_width--;
 330         i--;
 331     }
 332
 333     /* Do our altivec resampling on 16 pixels at once. */
 334     while(dst_width>=16) {
 335         /*
 336            Read 16 (potentially unaligned) bytes from each of
 337            4 lines into 4 vectors, and split them into shorts.
 338            Interleave the multipy/accumulate for the resample
 339            filter with the loads to hide the 3 cycle latency
 340            the vec_madds have.
 341         */
 342         tv = (vector unsigned char *) &s[0 * wrap];
 343         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
 344         srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
 345         srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
 346         sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
 347         sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
 348
 349         tv = (vector unsigned char *) &s[1 * wrap];
 350         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
 351         srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
 352         srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
 353         sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
 354         sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
 355
 356         tv = (vector unsigned char *) &s[2 * wrap];
 357         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
 358         srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
 359         srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
 360         sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
 361         sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
 362
 363         tv = (vector unsigned char *) &s[3 * wrap];
 364         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
 365         srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
 366         srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
 367         sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
 368         sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
 369
 370         /*
 371            Pack the results into our destination vector,
 372            and do an aligned write of that back to memory.
 373         */
 374         dstv = vec_packsu(sumhv, sumlv) ;
 375         vec_st(dstv, 0, (vector unsigned char *) dst);
 376
 377         dst+=16;
 378         s+=16;
 379         dst_width-=16;
 380     }
 381
 382     /*
 383        If there are any leftover pixels, resample them
 384        with the slow scalar method.
 385     */
 386     while(dst_width>0) {
 387         sum = s[0 * wrap] * filter[0] +
 388         s[1 * wrap] * filter[1] +
 389         s[2 * wrap] * filter[2] +
 390         s[3 * wrap] * filter[3];
 391         sum = sum >> FILTER_BITS;
 392         if (sum<0) sum = 0; else if (sum>255) sum=255;
 393         dst[0] = sum;
 394         dst++;
 395         s++;
 396         dst_width--;
 397     }
 398 }
 399 #endif
 400
 401 /* slow version to handle limit cases. Does not need optimisation */
 402 static void h_resample_slow(uint8_t *dst, int dst_width,
 403                             const uint8_t *src, int src_width,
 404                             int src_start, int src_incr, int16_t *filters)
 405 {
 406     int src_pos, phase, sum, j, v, i;
 407     const uint8_t *s, *src_end;
 408     int16_t *filter;
 409
 410     src_end = src + src_width;
 411     src_pos = src_start;
 412     for(i=0;i<dst_width;i++) {
 413         s = src + (src_pos >> POS_FRAC_BITS);
 414         phase = get_phase(src_pos);
 415         filter = filters + phase * NB_TAPS;
 416         sum = 0;
 417         for(j=0;j<NB_TAPS;j++) {
 418             if (s < src)
 419                 v = src[0];
 420             else if (s >= src_end)
 421                 v = src_end[-1];
 422             else
 423                 v = s[0];
 424             sum += v * filter[j];
 425             s++;
 426         }
 427         sum = sum >> FILTER_BITS;
 428         if (sum < 0)
 429             sum = 0;
 430         else if (sum > 255)
 431             sum = 255;
 432         dst[0] = sum;
 433         src_pos += src_incr;
 434         dst++;
 435     }
 436 }
 437
 438 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
 439                        int src_width, int src_start, int src_incr,
 440                        int16_t *filters)
 441 {
 442     int n, src_end;
 443
 444     if (src_start < 0) {
 445         n = (0 - src_start + src_incr - 1) / src_incr;
 446         h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
 447         dst += n;
 448         dst_width -= n;
 449         src_start += n * src_incr;
 450     }
 451     src_end = src_start + dst_width * src_incr;
 452     if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
 453         n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
 454             src_incr;
 455     } else {
 456         n = dst_width;
 457     }
 458 #ifdef HAVE_MMX
 459     if ((mm_flags & MM_MMX) && NB_TAPS == 4)
 460         h_resample_fast4_mmx(dst, n,
 461                              src, src_width, src_start, src_incr, filters);
 462     else
 463 #endif
 464         h_resample_fast(dst, n,
 465                         src, src_width, src_start, src_incr, filters);
 466     if (n < dst_width) {
 467         dst += n;
 468         dst_width -= n;
 469         src_start += n * src_incr;
 470         h_resample_slow(dst, dst_width,
 471                         src, src_width, src_start, src_incr, filters);
 472     }
 473 }
 474
 475 static void component_resample(ImgReSampleContext *s,
 476                                uint8_t *output, int owrap, int owidth, int oheight,
 477                                uint8_t *input, int iwrap, int iwidth, int iheight)
 478 {
 479     int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
 480     uint8_t *new_line, *src_line;
 481
 482     last_src_y = - FCENTER - 1;
 483     /* position of the bottom of the filter in the source image */
 484     src_y = (last_src_y + NB_TAPS) * POS_FRAC;
 485     ring_y = NB_TAPS; /* position in ring buffer */
 486     for(y=0;y<oheight;y++) {
 487         /* apply horizontal filter on new lines from input if needed */
 488         src_y1 = src_y >> POS_FRAC_BITS;
 489         while (last_src_y < src_y1) {
 490             if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
 491                 ring_y = NB_TAPS;
 492             last_src_y++;
 493             /* handle limit conditions : replicate line (slightly
 494                inefficient because we filter multiple times) */
 495             y1 = last_src_y;
 496             if (y1 < 0) {
 497                 y1 = 0;
 498             } else if (y1 >= iheight) {
 499                 y1 = iheight - 1;
 500             }
 501             src_line = input + y1 * iwrap;
 502             new_line = s->line_buf + ring_y * owidth;
 503             /* apply filter and handle limit cases correctly */
 504             h_resample(new_line, owidth,
 505                        src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
 506                        &s->h_filters[0][0]);
 507             /* handle ring buffer wraping */
 508             if (ring_y >= LINE_BUF_HEIGHT) {
 509                 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
 510                        new_line, owidth);
 511             }
 512         }
 513         /* apply vertical filter */
 514         phase_y = get_phase(src_y);
 515 #ifdef HAVE_MMX
 516         /* desactivated MMX because loss of precision */
 517         if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
 518             v_resample4_mmx(output, owidth,
 519                             s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 520                             &s->v_filters[phase_y][0]);
 521         else
 522 #endif
 523 #ifdef HAVE_ALTIVEC
 524             if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
 525                 v_resample16_altivec(output, owidth,
 526                                 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 527                                 &s->v_filters[phase_y][0]);
 528         else
 529 #endif
 530             v_resample(output, owidth,
 531                        s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 532                        &s->v_filters[phase_y][0]);
 533
 534         src_y += s->v_incr;
 535         output += owrap;
 536     }
 537 }
 538
 539 /* XXX: the following filter is quite naive, but it seems to suffice
 540    for 4 taps */
 541 static void build_filter(int16_t *filter, float factor)
 542 {
 543     int ph, i, v;
 544     float x, y, tab[NB_TAPS], norm, mult;
 545
 546     /* if upsampling, only need to interpolate, no filter */
 547     if (factor > 1.0)
 548         factor = 1.0;
 549
 550     for(ph=0;ph<NB_PHASES;ph++) {
 551         norm = 0;
 552         for(i=0;i<NB_TAPS;i++) {
 553
 554             x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor;
 555             if (x == 0)
 556                 y = 1.0;
 557             else
 558                 y = sin(x) / x;
 559             tab[i] = y;
 560             norm += y;
 561         }
 562
 563         /* normalize so that an uniform color remains the same */
 564         mult = (float)(1 << FILTER_BITS) / norm;
 565         for(i=0;i<NB_TAPS;i++) {
 566             v = (int)(tab[i] * mult);
 567             filter[ph * NB_TAPS + i] = v;
 568         }
 569     }
 570 }
 571
 572 ImgReSampleContext *img_resample_init(int owidth, int oheight,
 573                                       int iwidth, int iheight)
 574 {
 575         return img_resample_full_init(owidth, oheight, iwidth, iheight, 0, 0, 0, 0);
 576 }
 577
 578 ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
 579                                       int iwidth, int iheight,
 580                                       int topBand, int bottomBand,
 581                                       int leftBand, int rightBand)
 582 {
 583     ImgReSampleContext *s;
 584
 585     s = av_mallocz(sizeof(ImgReSampleContext));
 586     if (!s)
 587         return NULL;
 588     s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
 589     if (!s->line_buf)
 590         goto fail;
 591
 592     s->owidth = owidth;
 593     s->oheight = oheight;
 594     s->iwidth = iwidth;
 595     s->iheight = iheight;
 596     s->topBand = topBand;
 597     s->bottomBand = bottomBand;
 598     s->leftBand = leftBand;
 599     s->rightBand = rightBand;
 600
 601     s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / owidth;
 602     s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / oheight;
 603
 604     build_filter(&s->h_filters[0][0], (float) owidth  / (float) (iwidth - leftBand - rightBand));
 605     build_filter(&s->v_filters[0][0], (float) oheight / (float) (iheight - topBand - bottomBand));
 606
 607     return s;
 608  fail:
 609     av_free(s);
 610     return NULL;
 611 }
 612
 613 void img_resample(ImgReSampleContext *s,
 614                   AVPicture *output, const AVPicture *input)
 615 {
 616     int i, shift;
 617
 618     for(i=0;i<3;i++) {
 619         shift = (i == 0) ? 0 : 1;
 620         component_resample(s, output->data[i], output->linesize[i],
 621                            s->owidth >> shift, s->oheight >> shift,
 622                            input->data[i] + (input->linesize[i] * (s->topBand >> shift)) + (s->leftBand >> shift),
 623                            input->linesize[i], ((s->iwidth - s->leftBand - s->rightBand) >> shift),
 624                            (s->iheight - s->topBand - s->bottomBand) >> shift);
 625     }
 626 }
 627
 628 void img_resample_close(ImgReSampleContext *s)
 629 {
 630     av_free(s->line_buf);
 631     av_free(s);
 632 }
 633
 634 #ifdef TEST
 635
 636 void *av_mallocz(int size)
 637 {
 638     void *ptr;
 639     ptr = malloc(size);
 640     memset(ptr, 0, size);
 641     return ptr;
 642 }
 643
 644 void av_free(void *ptr)
 645 {
 646     /* XXX: this test should not be needed on most libcs */
 647     if (ptr)
 648         free(ptr);
 649 }
 650
 651 /* input */
 652 #define XSIZE 256
 653 #define YSIZE 256
 654 uint8_t img[XSIZE * YSIZE];
 655
 656 /* output */
 657 #define XSIZE1 512
 658 #define YSIZE1 512
 659 uint8_t img1[XSIZE1 * YSIZE1];
 660 uint8_t img2[XSIZE1 * YSIZE1];
 661
 662 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
 663 {
 664     FILE *f;
 665     f=fopen(filename,"w");
 666     fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
 667     fwrite(img,1, xsize * ysize,f);
 668     fclose(f);
 669 }
 670
 671 static void dump_filter(int16_t *filter)
 672 {
 673     int i, ph;
 674
 675     for(ph=0;ph<NB_PHASES;ph++) {
 676         printf("%2d: ", ph);
 677         for(i=0;i<NB_TAPS;i++) {
 678             printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0);
 679         }
 680         printf("\n");
 681     }
 682 }
 683
 684 #ifdef HAVE_MMX
 685 int mm_flags;
 686 #endif
 687
 688 int main(int argc, char **argv)
 689 {
 690     int x, y, v, i, xsize, ysize;
 691     ImgReSampleContext *s;
 692     float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
 693     char buf[256];
 694
 695     /* build test image */
 696     for(y=0;y<YSIZE;y++) {
 697         for(x=0;x<XSIZE;x++) {
 698             if (x < XSIZE/2 && y < YSIZE/2) {
 699                 if (x < XSIZE/4 && y < YSIZE/4) {
 700                     if ((x % 10) <= 6 &&
 701                         (y % 10) <= 6)
 702                         v = 0xff;
 703                     else
 704                         v = 0x00;
 705                 } else if (x < XSIZE/4) {
 706                     if (x & 1)
 707                         v = 0xff;
 708                     else
 709                         v = 0;
 710                 } else if (y < XSIZE/4) {
 711                     if (y & 1)
 712                         v = 0xff;
 713                     else
 714                         v = 0;
 715                 } else {
 716                     if (y < YSIZE*3/8) {
 717                         if ((y+x) & 1)
 718                             v = 0xff;
 719                         else
 720                             v = 0;
 721                     } else {
 722                         if (((x+3) % 4) <= 1 &&
 723                             ((y+3) % 4) <= 1)
 724                             v = 0xff;
 725                         else
 726                             v = 0x00;
 727                     }
 728                 }
 729             } else if (x < XSIZE/2) {
 730                 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
 731             } else if (y < XSIZE/2) {
 732                 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
 733             } else {
 734                 v = ((x + y - XSIZE) * 255) / XSIZE;
 735             }
 736             img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
 737         }
 738     }
 739     save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
 740     for(i=0;i<sizeof(factors)/sizeof(float);i++) {
 741         fact = factors[i];
 742         xsize = (int)(XSIZE * fact);
 743         ysize = (int)((YSIZE - 100) * fact);
 744         s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0);
 745         printf("Factor=%0.2f\n", fact);
 746         dump_filter(&s->h_filters[0][0]);
 747         component_resample(s, img1, xsize, xsize, ysize,
 748                            img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
 749         img_resample_close(s);
 750
 751         sprintf(buf, "/tmp/out%d.pgm", i);
 752         save_pgm(buf, img1, xsize, ysize);
 753     }
 754
 755     /* mmx test */
 756 #ifdef HAVE_MMX
 757     printf("MMX test\n");
 758     fact = 0.72;
 759     xsize = (int)(XSIZE * fact);
 760     ysize = (int)(YSIZE * fact);
 761     mm_flags = MM_MMX;
 762     s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
 763     component_resample(s, img1, xsize, xsize, ysize,
 764                        img, XSIZE, XSIZE, YSIZE);
 765
 766     mm_flags = 0;
 767     s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
 768     component_resample(s, img2, xsize, xsize, ysize,
 769                        img, XSIZE, XSIZE, YSIZE);
 770     if (memcmp(img1, img2, xsize * ysize) != 0) {
 771         fprintf(stderr, "mmx error\n");
 772         exit(1);
 773     }
 774     printf("MMX OK\n");
 775 #endif
 776     return 0;
 777 }
 778
 779 #endif