git.sesse.net Git - ffmpeg/blob - libavcodec/imgresample.c

   1 /*
   2  * High quality image resampling with polyphase filters
   3  * Copyright (c) 2001 Fabrice Bellard.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  18  */
  19
  20 /**
  21  * @file imgresample.c
  22  * High quality image resampling with polyphase filters .
  23  */
  24
  25 #include "avcodec.h"
  26 #include "dsputil.h"
  27
  28 #ifdef USE_FASTMEMCPY
  29 #include "fastmemcpy.h"
  30 #endif
  31
  32 #define NB_COMPONENTS 3
  33
  34 #define PHASE_BITS 4
  35 #define NB_PHASES  (1 << PHASE_BITS)
  36 #define NB_TAPS    4
  37 #define FCENTER    1  /* index of the center of the filter */
  38 //#define TEST    1  /* Test it */
  39
  40 #define POS_FRAC_BITS 16
  41 #define POS_FRAC      (1 << POS_FRAC_BITS)
  42 /* 6 bits precision is needed for MMX */
  43 #define FILTER_BITS   8
  44
  45 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
  46
  47 struct ImgReSampleContext {
  48     int iwidth, iheight, owidth, oheight;
  49     int topBand, bottomBand, leftBand, rightBand;
  50     int padtop, padbottom, padleft, padright;
  51     int pad_owidth, pad_oheight;
  52     int h_incr, v_incr;
  53     DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
  54     DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
  55     uint8_t *line_buf;
  56 };
  57
  58 void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
  59
  60 static inline int get_phase(int pos)
  61 {
  62     return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
  63 }
  64
  65 /* This function must be optimized */
  66 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
  67                             int src_width, int src_start, int src_incr,
  68                             int16_t *filters)
  69 {
  70     int src_pos, phase, sum, i;
  71     const uint8_t *s;
  72     int16_t *filter;
  73
  74     src_pos = src_start;
  75     for(i=0;i<dst_width;i++) {
  76 #ifdef TEST
  77         /* test */
  78         if ((src_pos >> POS_FRAC_BITS) < 0 ||
  79             (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
  80             av_abort();
  81 #endif
  82         s = src + (src_pos >> POS_FRAC_BITS);
  83         phase = get_phase(src_pos);
  84         filter = filters + phase * NB_TAPS;
  85 #if NB_TAPS == 4
  86         sum = s[0] * filter[0] +
  87             s[1] * filter[1] +
  88             s[2] * filter[2] +
  89             s[3] * filter[3];
  90 #else
  91         {
  92             int j;
  93             sum = 0;
  94             for(j=0;j<NB_TAPS;j++)
  95                 sum += s[j] * filter[j];
  96         }
  97 #endif
  98         sum = sum >> FILTER_BITS;
  99         if (sum < 0)
 100             sum = 0;
 101         else if (sum > 255)
 102             sum = 255;
 103         dst[0] = sum;
 104         src_pos += src_incr;
 105         dst++;
 106     }
 107 }
 108
 109 /* This function must be optimized */
 110 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
 111                        int wrap, int16_t *filter)
 112 {
 113     int sum, i;
 114     const uint8_t *s;
 115
 116     s = src;
 117     for(i=0;i<dst_width;i++) {
 118 #if NB_TAPS == 4
 119         sum = s[0 * wrap] * filter[0] +
 120             s[1 * wrap] * filter[1] +
 121             s[2 * wrap] * filter[2] +
 122             s[3 * wrap] * filter[3];
 123 #else
 124         {
 125             int j;
 126             uint8_t *s1 = s;
 127
 128             sum = 0;
 129             for(j=0;j<NB_TAPS;j++) {
 130                 sum += s1[0] * filter[j];
 131                 s1 += wrap;
 132             }
 133         }
 134 #endif
 135         sum = sum >> FILTER_BITS;
 136         if (sum < 0)
 137             sum = 0;
 138         else if (sum > 255)
 139             sum = 255;
 140         dst[0] = sum;
 141         dst++;
 142         s++;
 143     }
 144 }
 145
 146 #ifdef HAVE_MMX
 147
 148 #include "i386/mmx.h"
 149
 150 #define FILTER4(reg) \
 151 {\
 152         s = src + (src_pos >> POS_FRAC_BITS);\
 153         phase = get_phase(src_pos);\
 154         filter = filters + phase * NB_TAPS;\
 155         movq_m2r(*s, reg);\
 156         punpcklbw_r2r(mm7, reg);\
 157         movq_m2r(*filter, mm6);\
 158         pmaddwd_r2r(reg, mm6);\
 159         movq_r2r(mm6, reg);\
 160         psrlq_i2r(32, reg);\
 161         paddd_r2r(mm6, reg);\
 162         psrad_i2r(FILTER_BITS, reg);\
 163         src_pos += src_incr;\
 164 }
 165
 166 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
 167
 168 /* XXX: do four pixels at a time */
 169 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
 170                                  const uint8_t *src, int src_width,
 171                                  int src_start, int src_incr, int16_t *filters)
 172 {
 173     int src_pos, phase;
 174     const uint8_t *s;
 175     int16_t *filter;
 176     mmx_t tmp;
 177
 178     src_pos = src_start;
 179     pxor_r2r(mm7, mm7);
 180
 181     while (dst_width >= 4) {
 182
 183         FILTER4(mm0);
 184         FILTER4(mm1);
 185         FILTER4(mm2);
 186         FILTER4(mm3);
 187
 188         packuswb_r2r(mm7, mm0);
 189         packuswb_r2r(mm7, mm1);
 190         packuswb_r2r(mm7, mm3);
 191         packuswb_r2r(mm7, mm2);
 192         movq_r2m(mm0, tmp);
 193         dst[0] = tmp.ub[0];
 194         movq_r2m(mm1, tmp);
 195         dst[1] = tmp.ub[0];
 196         movq_r2m(mm2, tmp);
 197         dst[2] = tmp.ub[0];
 198         movq_r2m(mm3, tmp);
 199         dst[3] = tmp.ub[0];
 200         dst += 4;
 201         dst_width -= 4;
 202     }
 203     while (dst_width > 0) {
 204         FILTER4(mm0);
 205         packuswb_r2r(mm7, mm0);
 206         movq_r2m(mm0, tmp);
 207         dst[0] = tmp.ub[0];
 208         dst++;
 209         dst_width--;
 210     }
 211     emms();
 212 }
 213
 214 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
 215                             int wrap, int16_t *filter)
 216 {
 217     int sum, i, v;
 218     const uint8_t *s;
 219     mmx_t tmp;
 220     mmx_t coefs[4];
 221
 222     for(i=0;i<4;i++) {
 223         v = filter[i];
 224         coefs[i].uw[0] = v;
 225         coefs[i].uw[1] = v;
 226         coefs[i].uw[2] = v;
 227         coefs[i].uw[3] = v;
 228     }
 229
 230     pxor_r2r(mm7, mm7);
 231     s = src;
 232     while (dst_width >= 4) {
 233         movq_m2r(s[0 * wrap], mm0);
 234         punpcklbw_r2r(mm7, mm0);
 235         movq_m2r(s[1 * wrap], mm1);
 236         punpcklbw_r2r(mm7, mm1);
 237         movq_m2r(s[2 * wrap], mm2);
 238         punpcklbw_r2r(mm7, mm2);
 239         movq_m2r(s[3 * wrap], mm3);
 240         punpcklbw_r2r(mm7, mm3);
 241
 242         pmullw_m2r(coefs[0], mm0);
 243         pmullw_m2r(coefs[1], mm1);
 244         pmullw_m2r(coefs[2], mm2);
 245         pmullw_m2r(coefs[3], mm3);
 246
 247         paddw_r2r(mm1, mm0);
 248         paddw_r2r(mm3, mm2);
 249         paddw_r2r(mm2, mm0);
 250         psraw_i2r(FILTER_BITS, mm0);
 251
 252         packuswb_r2r(mm7, mm0);
 253         movq_r2m(mm0, tmp);
 254
 255         *(uint32_t *)dst = tmp.ud[0];
 256         dst += 4;
 257         s += 4;
 258         dst_width -= 4;
 259     }
 260     while (dst_width > 0) {
 261         sum = s[0 * wrap] * filter[0] +
 262             s[1 * wrap] * filter[1] +
 263             s[2 * wrap] * filter[2] +
 264             s[3 * wrap] * filter[3];
 265         sum = sum >> FILTER_BITS;
 266         if (sum < 0)
 267             sum = 0;
 268         else if (sum > 255)
 269             sum = 255;
 270         dst[0] = sum;
 271         dst++;
 272         s++;
 273         dst_width--;
 274     }
 275     emms();
 276 }
 277 #endif
 278
 279 #ifdef HAVE_ALTIVEC
 280 typedef         union {
 281     vector unsigned char v;
 282     unsigned char c[16];
 283 } vec_uc_t;
 284
 285 typedef         union {
 286     vector signed short v;
 287     signed short s[8];
 288 } vec_ss_t;
 289
 290 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
 291                           int wrap, int16_t *filter)
 292 {
 293     int sum, i;
 294     const uint8_t *s;
 295     vector unsigned char *tv, tmp, dstv, zero;
 296     vec_ss_t srchv[4], srclv[4], fv[4];
 297     vector signed short zeros, sumhv, sumlv;
 298     s = src;
 299
 300     for(i=0;i<4;i++)
 301     {
 302         /*
 303            The vec_madds later on does an implicit >>15 on the result.
 304            Since FILTER_BITS is 8, and we have 15 bits of magnitude in
 305            a signed short, we have just enough bits to pre-shift our
 306            filter constants <<7 to compensate for vec_madds.
 307         */
 308         fv[i].s[0] = filter[i] << (15-FILTER_BITS);
 309         fv[i].v = vec_splat(fv[i].v, 0);
 310     }
 311
 312     zero = vec_splat_u8(0);
 313     zeros = vec_splat_s16(0);
 314
 315
 316     /*
 317        When we're resampling, we'd ideally like both our input buffers,
 318        and output buffers to be 16-byte aligned, so we can do both aligned
 319        reads and writes. Sadly we can't always have this at the moment, so
 320        we opt for aligned writes, as unaligned writes have a huge overhead.
 321        To do this, do enough scalar resamples to get dst 16-byte aligned.
 322     */
 323     i = (-(int)dst) & 0xf;
 324     while(i>0) {
 325         sum = s[0 * wrap] * filter[0] +
 326         s[1 * wrap] * filter[1] +
 327         s[2 * wrap] * filter[2] +
 328         s[3 * wrap] * filter[3];
 329         sum = sum >> FILTER_BITS;
 330         if (sum<0) sum = 0; else if (sum>255) sum=255;
 331         dst[0] = sum;
 332         dst++;
 333         s++;
 334         dst_width--;
 335         i--;
 336     }
 337
 338     /* Do our altivec resampling on 16 pixels at once. */
 339     while(dst_width>=16) {
 340         /*
 341            Read 16 (potentially unaligned) bytes from each of
 342            4 lines into 4 vectors, and split them into shorts.
 343            Interleave the multipy/accumulate for the resample
 344            filter with the loads to hide the 3 cycle latency
 345            the vec_madds have.
 346         */
 347         tv = (vector unsigned char *) &s[0 * wrap];
 348         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
 349         srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
 350         srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
 351         sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
 352         sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
 353
 354         tv = (vector unsigned char *) &s[1 * wrap];
 355         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
 356         srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
 357         srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
 358         sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
 359         sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
 360
 361         tv = (vector unsigned char *) &s[2 * wrap];
 362         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
 363         srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
 364         srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
 365         sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
 366         sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
 367
 368         tv = (vector unsigned char *) &s[3 * wrap];
 369         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
 370         srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
 371         srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
 372         sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
 373         sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
 374
 375         /*
 376            Pack the results into our destination vector,
 377            and do an aligned write of that back to memory.
 378         */
 379         dstv = vec_packsu(sumhv, sumlv) ;
 380         vec_st(dstv, 0, (vector unsigned char *) dst);
 381
 382         dst+=16;
 383         s+=16;
 384         dst_width-=16;
 385     }
 386
 387     /*
 388        If there are any leftover pixels, resample them
 389        with the slow scalar method.
 390     */
 391     while(dst_width>0) {
 392         sum = s[0 * wrap] * filter[0] +
 393         s[1 * wrap] * filter[1] +
 394         s[2 * wrap] * filter[2] +
 395         s[3 * wrap] * filter[3];
 396         sum = sum >> FILTER_BITS;
 397         if (sum<0) sum = 0; else if (sum>255) sum=255;
 398         dst[0] = sum;
 399         dst++;
 400         s++;
 401         dst_width--;
 402     }
 403 }
 404 #endif
 405
 406 /* slow version to handle limit cases. Does not need optimisation */
 407 static void h_resample_slow(uint8_t *dst, int dst_width,
 408                             const uint8_t *src, int src_width,
 409                             int src_start, int src_incr, int16_t *filters)
 410 {
 411     int src_pos, phase, sum, j, v, i;
 412     const uint8_t *s, *src_end;
 413     int16_t *filter;
 414
 415     src_end = src + src_width;
 416     src_pos = src_start;
 417     for(i=0;i<dst_width;i++) {
 418         s = src + (src_pos >> POS_FRAC_BITS);
 419         phase = get_phase(src_pos);
 420         filter = filters + phase * NB_TAPS;
 421         sum = 0;
 422         for(j=0;j<NB_TAPS;j++) {
 423             if (s < src)
 424                 v = src[0];
 425             else if (s >= src_end)
 426                 v = src_end[-1];
 427             else
 428                 v = s[0];
 429             sum += v * filter[j];
 430             s++;
 431         }
 432         sum = sum >> FILTER_BITS;
 433         if (sum < 0)
 434             sum = 0;
 435         else if (sum > 255)
 436             sum = 255;
 437         dst[0] = sum;
 438         src_pos += src_incr;
 439         dst++;
 440     }
 441 }
 442
 443 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
 444                        int src_width, int src_start, int src_incr,
 445                        int16_t *filters)
 446 {
 447     int n, src_end;
 448
 449     if (src_start < 0) {
 450         n = (0 - src_start + src_incr - 1) / src_incr;
 451         h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
 452         dst += n;
 453         dst_width -= n;
 454         src_start += n * src_incr;
 455     }
 456     src_end = src_start + dst_width * src_incr;
 457     if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
 458         n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
 459             src_incr;
 460     } else {
 461         n = dst_width;
 462     }
 463 #ifdef HAVE_MMX
 464     if ((mm_flags & MM_MMX) && NB_TAPS == 4)
 465         h_resample_fast4_mmx(dst, n,
 466                              src, src_width, src_start, src_incr, filters);
 467     else
 468 #endif
 469         h_resample_fast(dst, n,
 470                         src, src_width, src_start, src_incr, filters);
 471     if (n < dst_width) {
 472         dst += n;
 473         dst_width -= n;
 474         src_start += n * src_incr;
 475         h_resample_slow(dst, dst_width,
 476                         src, src_width, src_start, src_incr, filters);
 477     }
 478 }
 479
 480 static void component_resample(ImgReSampleContext *s,
 481                                uint8_t *output, int owrap, int owidth, int oheight,
 482                                uint8_t *input, int iwrap, int iwidth, int iheight)
 483 {
 484     int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
 485     uint8_t *new_line, *src_line;
 486
 487     last_src_y = - FCENTER - 1;
 488     /* position of the bottom of the filter in the source image */
 489     src_y = (last_src_y + NB_TAPS) * POS_FRAC;
 490     ring_y = NB_TAPS; /* position in ring buffer */
 491     for(y=0;y<oheight;y++) {
 492         /* apply horizontal filter on new lines from input if needed */
 493         src_y1 = src_y >> POS_FRAC_BITS;
 494         while (last_src_y < src_y1) {
 495             if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
 496                 ring_y = NB_TAPS;
 497             last_src_y++;
 498             /* handle limit conditions : replicate line (slightly
 499                inefficient because we filter multiple times) */
 500             y1 = last_src_y;
 501             if (y1 < 0) {
 502                 y1 = 0;
 503             } else if (y1 >= iheight) {
 504                 y1 = iheight - 1;
 505             }
 506             src_line = input + y1 * iwrap;
 507             new_line = s->line_buf + ring_y * owidth;
 508             /* apply filter and handle limit cases correctly */
 509             h_resample(new_line, owidth,
 510                        src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
 511                        &s->h_filters[0][0]);
 512             /* handle ring buffer wraping */
 513             if (ring_y >= LINE_BUF_HEIGHT) {
 514                 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
 515                        new_line, owidth);
 516             }
 517         }
 518         /* apply vertical filter */
 519         phase_y = get_phase(src_y);
 520 #ifdef HAVE_MMX
 521         /* desactivated MMX because loss of precision */
 522         if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
 523             v_resample4_mmx(output, owidth,
 524                             s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 525                             &s->v_filters[phase_y][0]);
 526         else
 527 #endif
 528 #ifdef HAVE_ALTIVEC
 529             if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
 530                 v_resample16_altivec(output, owidth,
 531                                 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 532                                 &s->v_filters[phase_y][0]);
 533         else
 534 #endif
 535             v_resample(output, owidth,
 536                        s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 537                        &s->v_filters[phase_y][0]);
 538
 539         src_y += s->v_incr;
 540
 541         output += owrap;
 542     }
 543 }
 544
 545 ImgReSampleContext *img_resample_init(int owidth, int oheight,
 546                                       int iwidth, int iheight)
 547 {
 548     return img_resample_full_init(owidth, oheight, iwidth, iheight,
 549             0, 0, 0, 0, 0, 0, 0, 0);
 550 }
 551
 552 ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
 553                                       int iwidth, int iheight,
 554                                       int topBand, int bottomBand,
 555         int leftBand, int rightBand,
 556         int padtop, int padbottom,
 557         int padleft, int padright)
 558 {
 559     ImgReSampleContext *s;
 560
 561     if (!owidth || !oheight || !iwidth || !iheight)
 562         return NULL;
 563
 564     s = av_mallocz(sizeof(ImgReSampleContext));
 565     if (!s)
 566         return NULL;
 567     if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
 568         return NULL;
 569     s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
 570     if (!s->line_buf)
 571         goto fail;
 572
 573     s->owidth = owidth;
 574     s->oheight = oheight;
 575     s->iwidth = iwidth;
 576     s->iheight = iheight;
 577
 578     s->topBand = topBand;
 579     s->bottomBand = bottomBand;
 580     s->leftBand = leftBand;
 581     s->rightBand = rightBand;
 582
 583     s->padtop = padtop;
 584     s->padbottom = padbottom;
 585     s->padleft = padleft;
 586     s->padright = padright;
 587
 588     s->pad_owidth = owidth - (padleft + padright);
 589     s->pad_oheight = oheight - (padtop + padbottom);
 590
 591     s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
 592     s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
 593
 594     av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth  /
 595             (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
 596     av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
 597             (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
 598
 599     return s;
 600 fail:
 601     av_free(s);
 602     return NULL;
 603 }
 604
 605 void img_resample(ImgReSampleContext *s,
 606                   AVPicture *output, const AVPicture *input)
 607 {
 608     int i, shift;
 609     uint8_t* optr;
 610
 611     for (i=0;i<3;i++) {
 612         shift = (i == 0) ? 0 : 1;
 613
 614         optr = output->data[i] + (((output->linesize[i] *
 615                         s->padtop) + s->padleft) >> shift);
 616
 617         component_resample(s, optr, output->linesize[i],
 618                 s->pad_owidth >> shift, s->pad_oheight >> shift,
 619                 input->data[i] + (input->linesize[i] *
 620                     (s->topBand >> shift)) + (s->leftBand >> shift),
 621                 input->linesize[i], ((s->iwidth - s->leftBand -
 622                         s->rightBand) >> shift),
 623                            (s->iheight - s->topBand - s->bottomBand) >> shift);
 624     }
 625 }
 626
 627 void img_resample_close(ImgReSampleContext *s)
 628 {
 629     av_free(s->line_buf);
 630     av_free(s);
 631 }
 632
 633 #ifdef TEST
 634 #include <stdio.h>
 635
 636 /* input */
 637 #define XSIZE 256
 638 #define YSIZE 256
 639 uint8_t img[XSIZE * YSIZE];
 640
 641 /* output */
 642 #define XSIZE1 512
 643 #define YSIZE1 512
 644 uint8_t img1[XSIZE1 * YSIZE1];
 645 uint8_t img2[XSIZE1 * YSIZE1];
 646
 647 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
 648 {
 649 #undef fprintf
 650     FILE *f;
 651     f=fopen(filename,"w");
 652     fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
 653     fwrite(img,1, xsize * ysize,f);
 654     fclose(f);
 655 #define fprintf please_use_av_log
 656 }
 657
 658 static void dump_filter(int16_t *filter)
 659 {
 660     int i, ph;
 661
 662     for(ph=0;ph<NB_PHASES;ph++) {
 663         av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
 664         for(i=0;i<NB_TAPS;i++) {
 665             av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
 666         }
 667         av_log(NULL, AV_LOG_INFO, "\n");
 668     }
 669 }
 670
 671 #ifdef HAVE_MMX
 672 int mm_flags;
 673 #endif
 674
 675 int main(int argc, char **argv)
 676 {
 677     int x, y, v, i, xsize, ysize;
 678     ImgReSampleContext *s;
 679     float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
 680     char buf[256];
 681
 682     /* build test image */
 683     for(y=0;y<YSIZE;y++) {
 684         for(x=0;x<XSIZE;x++) {
 685             if (x < XSIZE/2 && y < YSIZE/2) {
 686                 if (x < XSIZE/4 && y < YSIZE/4) {
 687                     if ((x % 10) <= 6 &&
 688                         (y % 10) <= 6)
 689                         v = 0xff;
 690                     else
 691                         v = 0x00;
 692                 } else if (x < XSIZE/4) {
 693                     if (x & 1)
 694                         v = 0xff;
 695                     else
 696                         v = 0;
 697                 } else if (y < XSIZE/4) {
 698                     if (y & 1)
 699                         v = 0xff;
 700                     else
 701                         v = 0;
 702                 } else {
 703                     if (y < YSIZE*3/8) {
 704                         if ((y+x) & 1)
 705                             v = 0xff;
 706                         else
 707                             v = 0;
 708                     } else {
 709                         if (((x+3) % 4) <= 1 &&
 710                             ((y+3) % 4) <= 1)
 711                             v = 0xff;
 712                         else
 713                             v = 0x00;
 714                     }
 715                 }
 716             } else if (x < XSIZE/2) {
 717                 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
 718             } else if (y < XSIZE/2) {
 719                 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
 720             } else {
 721                 v = ((x + y - XSIZE) * 255) / XSIZE;
 722             }
 723             img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
 724         }
 725     }
 726     save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
 727     for(i=0;i<sizeof(factors)/sizeof(float);i++) {
 728         fact = factors[i];
 729         xsize = (int)(XSIZE * fact);
 730         ysize = (int)((YSIZE - 100) * fact);
 731         s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
 732         av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
 733         dump_filter(&s->h_filters[0][0]);
 734         component_resample(s, img1, xsize, xsize, ysize,
 735                            img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
 736         img_resample_close(s);
 737
 738         snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
 739         save_pgm(buf, img1, xsize, ysize);
 740     }
 741
 742     /* mmx test */
 743 #ifdef HAVE_MMX
 744     av_log(NULL, AV_LOG_INFO, "MMX test\n");
 745     fact = 0.72;
 746     xsize = (int)(XSIZE * fact);
 747     ysize = (int)(YSIZE * fact);
 748     mm_flags = MM_MMX;
 749     s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
 750     component_resample(s, img1, xsize, xsize, ysize,
 751                        img, XSIZE, XSIZE, YSIZE);
 752
 753     mm_flags = 0;
 754     s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
 755     component_resample(s, img2, xsize, xsize, ysize,
 756                        img, XSIZE, XSIZE, YSIZE);
 757     if (memcmp(img1, img2, xsize * ysize) != 0) {
 758         av_log(NULL, AV_LOG_ERROR, "mmx error\n");
 759         exit(1);
 760     }
 761     av_log(NULL, AV_LOG_INFO, "MMX OK\n");
 762 #endif
 763     return 0;
 764 }
 765
 766 #endif