git.sesse.net Git - ffmpeg/blob - libavcodec/imgresample.c

   1 /*
   2  * High quality image resampling with polyphase filters
   3  * Copyright (c) 2001 Fabrice Bellard.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  18  */
  19
  20 /**
  21  * @file imgresample.c
  22  * High quality image resampling with polyphase filters .
  23  */
  24
  25 #include "avcodec.h"
  26 #include "swscale.h"
  27 #include "dsputil.h"
  28
  29 #ifdef USE_FASTMEMCPY
  30 #include "fastmemcpy.h"
  31 #endif
  32
  33 #define NB_COMPONENTS 3
  34
  35 #define PHASE_BITS 4
  36 #define NB_PHASES  (1 << PHASE_BITS)
  37 #define NB_TAPS    4
  38 #define FCENTER    1  /* index of the center of the filter */
  39 //#define TEST    1  /* Test it */
  40
  41 #define POS_FRAC_BITS 16
  42 #define POS_FRAC      (1 << POS_FRAC_BITS)
  43 /* 6 bits precision is needed for MMX */
  44 #define FILTER_BITS   8
  45
  46 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
  47
  48 struct ImgReSampleContext {
  49     int iwidth, iheight, owidth, oheight;
  50     int topBand, bottomBand, leftBand, rightBand;
  51     int padtop, padbottom, padleft, padright;
  52     int pad_owidth, pad_oheight;
  53     int h_incr, v_incr;
  54     DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
  55     DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
  56     uint8_t *line_buf;
  57 };
  58
  59 void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
  60
  61 static inline int get_phase(int pos)
  62 {
  63     return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
  64 }
  65
  66 /* This function must be optimized */
  67 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
  68                             int src_width, int src_start, int src_incr,
  69                             int16_t *filters)
  70 {
  71     int src_pos, phase, sum, i;
  72     const uint8_t *s;
  73     int16_t *filter;
  74
  75     src_pos = src_start;
  76     for(i=0;i<dst_width;i++) {
  77 #ifdef TEST
  78         /* test */
  79         if ((src_pos >> POS_FRAC_BITS) < 0 ||
  80             (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
  81             av_abort();
  82 #endif
  83         s = src + (src_pos >> POS_FRAC_BITS);
  84         phase = get_phase(src_pos);
  85         filter = filters + phase * NB_TAPS;
  86 #if NB_TAPS == 4
  87         sum = s[0] * filter[0] +
  88             s[1] * filter[1] +
  89             s[2] * filter[2] +
  90             s[3] * filter[3];
  91 #else
  92         {
  93             int j;
  94             sum = 0;
  95             for(j=0;j<NB_TAPS;j++)
  96                 sum += s[j] * filter[j];
  97         }
  98 #endif
  99         sum = sum >> FILTER_BITS;
 100         if (sum < 0)
 101             sum = 0;
 102         else if (sum > 255)
 103             sum = 255;
 104         dst[0] = sum;
 105         src_pos += src_incr;
 106         dst++;
 107     }
 108 }
 109
 110 /* This function must be optimized */
 111 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
 112                        int wrap, int16_t *filter)
 113 {
 114     int sum, i;
 115     const uint8_t *s;
 116
 117     s = src;
 118     for(i=0;i<dst_width;i++) {
 119 #if NB_TAPS == 4
 120         sum = s[0 * wrap] * filter[0] +
 121             s[1 * wrap] * filter[1] +
 122             s[2 * wrap] * filter[2] +
 123             s[3 * wrap] * filter[3];
 124 #else
 125         {
 126             int j;
 127             uint8_t *s1 = s;
 128
 129             sum = 0;
 130             for(j=0;j<NB_TAPS;j++) {
 131                 sum += s1[0] * filter[j];
 132                 s1 += wrap;
 133             }
 134         }
 135 #endif
 136         sum = sum >> FILTER_BITS;
 137         if (sum < 0)
 138             sum = 0;
 139         else if (sum > 255)
 140             sum = 255;
 141         dst[0] = sum;
 142         dst++;
 143         s++;
 144     }
 145 }
 146
 147 #ifdef HAVE_MMX
 148
 149 #include "i386/mmx.h"
 150
 151 #define FILTER4(reg) \
 152 {\
 153         s = src + (src_pos >> POS_FRAC_BITS);\
 154         phase = get_phase(src_pos);\
 155         filter = filters + phase * NB_TAPS;\
 156         movq_m2r(*s, reg);\
 157         punpcklbw_r2r(mm7, reg);\
 158         movq_m2r(*filter, mm6);\
 159         pmaddwd_r2r(reg, mm6);\
 160         movq_r2r(mm6, reg);\
 161         psrlq_i2r(32, reg);\
 162         paddd_r2r(mm6, reg);\
 163         psrad_i2r(FILTER_BITS, reg);\
 164         src_pos += src_incr;\
 165 }
 166
 167 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
 168
 169 /* XXX: do four pixels at a time */
 170 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
 171                                  const uint8_t *src, int src_width,
 172                                  int src_start, int src_incr, int16_t *filters)
 173 {
 174     int src_pos, phase;
 175     const uint8_t *s;
 176     int16_t *filter;
 177     mmx_t tmp;
 178
 179     src_pos = src_start;
 180     pxor_r2r(mm7, mm7);
 181
 182     while (dst_width >= 4) {
 183
 184         FILTER4(mm0);
 185         FILTER4(mm1);
 186         FILTER4(mm2);
 187         FILTER4(mm3);
 188
 189         packuswb_r2r(mm7, mm0);
 190         packuswb_r2r(mm7, mm1);
 191         packuswb_r2r(mm7, mm3);
 192         packuswb_r2r(mm7, mm2);
 193         movq_r2m(mm0, tmp);
 194         dst[0] = tmp.ub[0];
 195         movq_r2m(mm1, tmp);
 196         dst[1] = tmp.ub[0];
 197         movq_r2m(mm2, tmp);
 198         dst[2] = tmp.ub[0];
 199         movq_r2m(mm3, tmp);
 200         dst[3] = tmp.ub[0];
 201         dst += 4;
 202         dst_width -= 4;
 203     }
 204     while (dst_width > 0) {
 205         FILTER4(mm0);
 206         packuswb_r2r(mm7, mm0);
 207         movq_r2m(mm0, tmp);
 208         dst[0] = tmp.ub[0];
 209         dst++;
 210         dst_width--;
 211     }
 212     emms();
 213 }
 214
 215 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
 216                             int wrap, int16_t *filter)
 217 {
 218     int sum, i, v;
 219     const uint8_t *s;
 220     mmx_t tmp;
 221     mmx_t coefs[4];
 222
 223     for(i=0;i<4;i++) {
 224         v = filter[i];
 225         coefs[i].uw[0] = v;
 226         coefs[i].uw[1] = v;
 227         coefs[i].uw[2] = v;
 228         coefs[i].uw[3] = v;
 229     }
 230
 231     pxor_r2r(mm7, mm7);
 232     s = src;
 233     while (dst_width >= 4) {
 234         movq_m2r(s[0 * wrap], mm0);
 235         punpcklbw_r2r(mm7, mm0);
 236         movq_m2r(s[1 * wrap], mm1);
 237         punpcklbw_r2r(mm7, mm1);
 238         movq_m2r(s[2 * wrap], mm2);
 239         punpcklbw_r2r(mm7, mm2);
 240         movq_m2r(s[3 * wrap], mm3);
 241         punpcklbw_r2r(mm7, mm3);
 242
 243         pmullw_m2r(coefs[0], mm0);
 244         pmullw_m2r(coefs[1], mm1);
 245         pmullw_m2r(coefs[2], mm2);
 246         pmullw_m2r(coefs[3], mm3);
 247
 248         paddw_r2r(mm1, mm0);
 249         paddw_r2r(mm3, mm2);
 250         paddw_r2r(mm2, mm0);
 251         psraw_i2r(FILTER_BITS, mm0);
 252
 253         packuswb_r2r(mm7, mm0);
 254         movq_r2m(mm0, tmp);
 255
 256         *(uint32_t *)dst = tmp.ud[0];
 257         dst += 4;
 258         s += 4;
 259         dst_width -= 4;
 260     }
 261     while (dst_width > 0) {
 262         sum = s[0 * wrap] * filter[0] +
 263             s[1 * wrap] * filter[1] +
 264             s[2 * wrap] * filter[2] +
 265             s[3 * wrap] * filter[3];
 266         sum = sum >> FILTER_BITS;
 267         if (sum < 0)
 268             sum = 0;
 269         else if (sum > 255)
 270             sum = 255;
 271         dst[0] = sum;
 272         dst++;
 273         s++;
 274         dst_width--;
 275     }
 276     emms();
 277 }
 278 #endif
 279
 280 #ifdef HAVE_ALTIVEC
 281 typedef         union {
 282     vector unsigned char v;
 283     unsigned char c[16];
 284 } vec_uc_t;
 285
 286 typedef         union {
 287     vector signed short v;
 288     signed short s[8];
 289 } vec_ss_t;
 290
 291 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
 292                           int wrap, int16_t *filter)
 293 {
 294     int sum, i;
 295     const uint8_t *s;
 296     vector unsigned char *tv, tmp, dstv, zero;
 297     vec_ss_t srchv[4], srclv[4], fv[4];
 298     vector signed short zeros, sumhv, sumlv;
 299     s = src;
 300
 301     for(i=0;i<4;i++)
 302     {
 303         /*
 304            The vec_madds later on does an implicit >>15 on the result.
 305            Since FILTER_BITS is 8, and we have 15 bits of magnitude in
 306            a signed short, we have just enough bits to pre-shift our
 307            filter constants <<7 to compensate for vec_madds.
 308         */
 309         fv[i].s[0] = filter[i] << (15-FILTER_BITS);
 310         fv[i].v = vec_splat(fv[i].v, 0);
 311     }
 312
 313     zero = vec_splat_u8(0);
 314     zeros = vec_splat_s16(0);
 315
 316
 317     /*
 318        When we're resampling, we'd ideally like both our input buffers,
 319        and output buffers to be 16-byte aligned, so we can do both aligned
 320        reads and writes. Sadly we can't always have this at the moment, so
 321        we opt for aligned writes, as unaligned writes have a huge overhead.
 322        To do this, do enough scalar resamples to get dst 16-byte aligned.
 323     */
 324     i = (-(int)dst) & 0xf;
 325     while(i>0) {
 326         sum = s[0 * wrap] * filter[0] +
 327         s[1 * wrap] * filter[1] +
 328         s[2 * wrap] * filter[2] +
 329         s[3 * wrap] * filter[3];
 330         sum = sum >> FILTER_BITS;
 331         if (sum<0) sum = 0; else if (sum>255) sum=255;
 332         dst[0] = sum;
 333         dst++;
 334         s++;
 335         dst_width--;
 336         i--;
 337     }
 338
 339     /* Do our altivec resampling on 16 pixels at once. */
 340     while(dst_width>=16) {
 341         /*
 342            Read 16 (potentially unaligned) bytes from each of
 343            4 lines into 4 vectors, and split them into shorts.
 344            Interleave the multipy/accumulate for the resample
 345            filter with the loads to hide the 3 cycle latency
 346            the vec_madds have.
 347         */
 348         tv = (vector unsigned char *) &s[0 * wrap];
 349         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
 350         srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
 351         srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
 352         sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
 353         sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
 354
 355         tv = (vector unsigned char *) &s[1 * wrap];
 356         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
 357         srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
 358         srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
 359         sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
 360         sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
 361
 362         tv = (vector unsigned char *) &s[2 * wrap];
 363         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
 364         srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
 365         srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
 366         sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
 367         sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
 368
 369         tv = (vector unsigned char *) &s[3 * wrap];
 370         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
 371         srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
 372         srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
 373         sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
 374         sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
 375
 376         /*
 377            Pack the results into our destination vector,
 378            and do an aligned write of that back to memory.
 379         */
 380         dstv = vec_packsu(sumhv, sumlv) ;
 381         vec_st(dstv, 0, (vector unsigned char *) dst);
 382
 383         dst+=16;
 384         s+=16;
 385         dst_width-=16;
 386     }
 387
 388     /*
 389        If there are any leftover pixels, resample them
 390        with the slow scalar method.
 391     */
 392     while(dst_width>0) {
 393         sum = s[0 * wrap] * filter[0] +
 394         s[1 * wrap] * filter[1] +
 395         s[2 * wrap] * filter[2] +
 396         s[3 * wrap] * filter[3];
 397         sum = sum >> FILTER_BITS;
 398         if (sum<0) sum = 0; else if (sum>255) sum=255;
 399         dst[0] = sum;
 400         dst++;
 401         s++;
 402         dst_width--;
 403     }
 404 }
 405 #endif
 406
 407 /* slow version to handle limit cases. Does not need optimisation */
 408 static void h_resample_slow(uint8_t *dst, int dst_width,
 409                             const uint8_t *src, int src_width,
 410                             int src_start, int src_incr, int16_t *filters)
 411 {
 412     int src_pos, phase, sum, j, v, i;
 413     const uint8_t *s, *src_end;
 414     int16_t *filter;
 415
 416     src_end = src + src_width;
 417     src_pos = src_start;
 418     for(i=0;i<dst_width;i++) {
 419         s = src + (src_pos >> POS_FRAC_BITS);
 420         phase = get_phase(src_pos);
 421         filter = filters + phase * NB_TAPS;
 422         sum = 0;
 423         for(j=0;j<NB_TAPS;j++) {
 424             if (s < src)
 425                 v = src[0];
 426             else if (s >= src_end)
 427                 v = src_end[-1];
 428             else
 429                 v = s[0];
 430             sum += v * filter[j];
 431             s++;
 432         }
 433         sum = sum >> FILTER_BITS;
 434         if (sum < 0)
 435             sum = 0;
 436         else if (sum > 255)
 437             sum = 255;
 438         dst[0] = sum;
 439         src_pos += src_incr;
 440         dst++;
 441     }
 442 }
 443
 444 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
 445                        int src_width, int src_start, int src_incr,
 446                        int16_t *filters)
 447 {
 448     int n, src_end;
 449
 450     if (src_start < 0) {
 451         n = (0 - src_start + src_incr - 1) / src_incr;
 452         h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
 453         dst += n;
 454         dst_width -= n;
 455         src_start += n * src_incr;
 456     }
 457     src_end = src_start + dst_width * src_incr;
 458     if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
 459         n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
 460             src_incr;
 461     } else {
 462         n = dst_width;
 463     }
 464 #ifdef HAVE_MMX
 465     if ((mm_flags & MM_MMX) && NB_TAPS == 4)
 466         h_resample_fast4_mmx(dst, n,
 467                              src, src_width, src_start, src_incr, filters);
 468     else
 469 #endif
 470         h_resample_fast(dst, n,
 471                         src, src_width, src_start, src_incr, filters);
 472     if (n < dst_width) {
 473         dst += n;
 474         dst_width -= n;
 475         src_start += n * src_incr;
 476         h_resample_slow(dst, dst_width,
 477                         src, src_width, src_start, src_incr, filters);
 478     }
 479 }
 480
 481 static void component_resample(ImgReSampleContext *s,
 482                                uint8_t *output, int owrap, int owidth, int oheight,
 483                                uint8_t *input, int iwrap, int iwidth, int iheight)
 484 {
 485     int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
 486     uint8_t *new_line, *src_line;
 487
 488     last_src_y = - FCENTER - 1;
 489     /* position of the bottom of the filter in the source image */
 490     src_y = (last_src_y + NB_TAPS) * POS_FRAC;
 491     ring_y = NB_TAPS; /* position in ring buffer */
 492     for(y=0;y<oheight;y++) {
 493         /* apply horizontal filter on new lines from input if needed */
 494         src_y1 = src_y >> POS_FRAC_BITS;
 495         while (last_src_y < src_y1) {
 496             if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
 497                 ring_y = NB_TAPS;
 498             last_src_y++;
 499             /* handle limit conditions : replicate line (slightly
 500                inefficient because we filter multiple times) */
 501             y1 = last_src_y;
 502             if (y1 < 0) {
 503                 y1 = 0;
 504             } else if (y1 >= iheight) {
 505                 y1 = iheight - 1;
 506             }
 507             src_line = input + y1 * iwrap;
 508             new_line = s->line_buf + ring_y * owidth;
 509             /* apply filter and handle limit cases correctly */
 510             h_resample(new_line, owidth,
 511                        src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
 512                        &s->h_filters[0][0]);
 513             /* handle ring buffer wraping */
 514             if (ring_y >= LINE_BUF_HEIGHT) {
 515                 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
 516                        new_line, owidth);
 517             }
 518         }
 519         /* apply vertical filter */
 520         phase_y = get_phase(src_y);
 521 #ifdef HAVE_MMX
 522         /* desactivated MMX because loss of precision */
 523         if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
 524             v_resample4_mmx(output, owidth,
 525                             s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 526                             &s->v_filters[phase_y][0]);
 527         else
 528 #endif
 529 #ifdef HAVE_ALTIVEC
 530             if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
 531                 v_resample16_altivec(output, owidth,
 532                                 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 533                                 &s->v_filters[phase_y][0]);
 534         else
 535 #endif
 536             v_resample(output, owidth,
 537                        s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 538                        &s->v_filters[phase_y][0]);
 539
 540         src_y += s->v_incr;
 541
 542         output += owrap;
 543     }
 544 }
 545
 546 ImgReSampleContext *img_resample_init(int owidth, int oheight,
 547                                       int iwidth, int iheight)
 548 {
 549     return img_resample_full_init(owidth, oheight, iwidth, iheight,
 550             0, 0, 0, 0, 0, 0, 0, 0);
 551 }
 552
 553 ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
 554                                       int iwidth, int iheight,
 555                                       int topBand, int bottomBand,
 556         int leftBand, int rightBand,
 557         int padtop, int padbottom,
 558         int padleft, int padright)
 559 {
 560     ImgReSampleContext *s;
 561
 562     if (!owidth || !oheight || !iwidth || !iheight)
 563         return NULL;
 564
 565     s = av_mallocz(sizeof(ImgReSampleContext));
 566     if (!s)
 567         return NULL;
 568     if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
 569         return NULL;
 570     s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
 571     if (!s->line_buf)
 572         goto fail;
 573
 574     s->owidth = owidth;
 575     s->oheight = oheight;
 576     s->iwidth = iwidth;
 577     s->iheight = iheight;
 578
 579     s->topBand = topBand;
 580     s->bottomBand = bottomBand;
 581     s->leftBand = leftBand;
 582     s->rightBand = rightBand;
 583
 584     s->padtop = padtop;
 585     s->padbottom = padbottom;
 586     s->padleft = padleft;
 587     s->padright = padright;
 588
 589     s->pad_owidth = owidth - (padleft + padright);
 590     s->pad_oheight = oheight - (padtop + padbottom);
 591
 592     s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
 593     s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
 594
 595     av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth  /
 596             (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
 597     av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
 598             (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
 599
 600     return s;
 601 fail:
 602     av_free(s);
 603     return NULL;
 604 }
 605
 606 void img_resample(ImgReSampleContext *s,
 607                   AVPicture *output, const AVPicture *input)
 608 {
 609     int i, shift;
 610     uint8_t* optr;
 611
 612     for (i=0;i<3;i++) {
 613         shift = (i == 0) ? 0 : 1;
 614
 615         optr = output->data[i] + (((output->linesize[i] *
 616                         s->padtop) + s->padleft) >> shift);
 617
 618         component_resample(s, optr, output->linesize[i],
 619                 s->pad_owidth >> shift, s->pad_oheight >> shift,
 620                 input->data[i] + (input->linesize[i] *
 621                     (s->topBand >> shift)) + (s->leftBand >> shift),
 622                 input->linesize[i], ((s->iwidth - s->leftBand -
 623                         s->rightBand) >> shift),
 624                            (s->iheight - s->topBand - s->bottomBand) >> shift);
 625     }
 626 }
 627
 628 void img_resample_close(ImgReSampleContext *s)
 629 {
 630     av_free(s->line_buf);
 631     av_free(s);
 632 }
 633
 634 struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
 635                                   int dstW, int dstH, int dstFormat,
 636                                   int flags, SwsFilter *srcFilter,
 637                                   SwsFilter *dstFilter, double *param)
 638 {
 639     struct SwsContext *ctx;
 640
 641     ctx = av_malloc(sizeof(struct SwsContext));
 642     if (ctx == NULL) {
 643         av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");
 644
 645         return NULL;
 646     }
 647
 648     if ((srcH != dstH) || (srcW != dstW)) {
 649         if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
 650             av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
 651         }
 652         ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
 653     } else {
 654         ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
 655         ctx->resampling_ctx->iheight = srcH;
 656         ctx->resampling_ctx->iwidth = srcW;
 657         ctx->resampling_ctx->oheight = dstH;
 658         ctx->resampling_ctx->owidth = dstW;
 659     }
 660     ctx->src_pix_fmt = srcFormat;
 661     ctx->dst_pix_fmt = dstFormat;
 662
 663     return ctx;
 664 }
 665
 666 void sws_freeContext(struct SwsContext *ctx)
 667 {
 668     if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
 669         (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
 670         img_resample_close(ctx->resampling_ctx);
 671     } else {
 672         av_free(ctx->resampling_ctx);
 673     }
 674     av_free(ctx);
 675 }
 676
 677 int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
 678               int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
 679 {
 680     AVPicture src_pict, dst_pict;
 681     int i, res = 0;
 682     AVPicture picture_format_temp;
 683     AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
 684     uint8_t *buf1 = NULL, *buf2 = NULL;
 685     enum PixelFormat current_pix_fmt;
 686
 687     for (i = 0; i < 3; i++) {
 688         src_pict.data[i] = src[i];
 689         src_pict.linesize[i] = srcStride[i];
 690         dst_pict.data[i] = dst[i];
 691         dst_pict.linesize[i] = dstStride[i];
 692     }
 693     if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
 694         (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
 695         /* We have to rescale the picture, but only YUV420P rescaling is supported... */
 696
 697         if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
 698             int size;
 699
 700             /* create temporary picture for rescaling input*/
 701             size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
 702             buf1 = av_malloc(size);
 703             if (!buf1) {
 704                 res = -1;
 705                 goto the_end;
 706             }
 707             formatted_picture = &picture_format_temp;
 708             avpicture_fill((AVPicture*)formatted_picture, buf1,
 709                            PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
 710
 711             if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
 712                             &src_pict, ctx->src_pix_fmt,
 713                             ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {
 714
 715                 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
 716                 res = -1;
 717                 goto the_end;
 718             }
 719         } else {
 720             formatted_picture = &src_pict;
 721         }
 722
 723         if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
 724             int size;
 725
 726             /* create temporary picture for rescaling output*/
 727             size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
 728             buf2 = av_malloc(size);
 729             if (!buf2) {
 730                 res = -1;
 731                 goto the_end;
 732             }
 733             resampled_picture = &picture_resample_temp;
 734             avpicture_fill((AVPicture*)resampled_picture, buf2,
 735                            PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
 736
 737         } else {
 738             resampled_picture = &dst_pict;
 739         }
 740
 741         /* ...and finally rescale!!! */
 742         img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
 743         current_pix_fmt = PIX_FMT_YUV420P;
 744     } else {
 745         resampled_picture = &src_pict;
 746         current_pix_fmt = ctx->src_pix_fmt;
 747     }
 748
 749     if (current_pix_fmt != ctx->dst_pix_fmt) {
 750         if (img_convert(&dst_pict, ctx->dst_pix_fmt,
 751                         resampled_picture, current_pix_fmt,
 752                         ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {
 753
 754             av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
 755
 756             res = -1;
 757             goto the_end;
 758         }
 759     }
 760
 761 the_end:
 762     av_free(buf1);
 763     av_free(buf2);
 764     return res;
 765 }
 766
 767
 768 #ifdef TEST
 769 #include <stdio.h>
 770
 771 /* input */
 772 #define XSIZE 256
 773 #define YSIZE 256
 774 uint8_t img[XSIZE * YSIZE];
 775
 776 /* output */
 777 #define XSIZE1 512
 778 #define YSIZE1 512
 779 uint8_t img1[XSIZE1 * YSIZE1];
 780 uint8_t img2[XSIZE1 * YSIZE1];
 781
 782 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
 783 {
 784 #undef fprintf
 785     FILE *f;
 786     f=fopen(filename,"w");
 787     fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
 788     fwrite(img,1, xsize * ysize,f);
 789     fclose(f);
 790 #define fprintf please_use_av_log
 791 }
 792
 793 static void dump_filter(int16_t *filter)
 794 {
 795     int i, ph;
 796
 797     for(ph=0;ph<NB_PHASES;ph++) {
 798         av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
 799         for(i=0;i<NB_TAPS;i++) {
 800             av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
 801         }
 802         av_log(NULL, AV_LOG_INFO, "\n");
 803     }
 804 }
 805
 806 #ifdef HAVE_MMX
 807 int mm_flags;
 808 #endif
 809
 810 int main(int argc, char **argv)
 811 {
 812     int x, y, v, i, xsize, ysize;
 813     ImgReSampleContext *s;
 814     float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
 815     char buf[256];
 816
 817     /* build test image */
 818     for(y=0;y<YSIZE;y++) {
 819         for(x=0;x<XSIZE;x++) {
 820             if (x < XSIZE/2 && y < YSIZE/2) {
 821                 if (x < XSIZE/4 && y < YSIZE/4) {
 822                     if ((x % 10) <= 6 &&
 823                         (y % 10) <= 6)
 824                         v = 0xff;
 825                     else
 826                         v = 0x00;
 827                 } else if (x < XSIZE/4) {
 828                     if (x & 1)
 829                         v = 0xff;
 830                     else
 831                         v = 0;
 832                 } else if (y < XSIZE/4) {
 833                     if (y & 1)
 834                         v = 0xff;
 835                     else
 836                         v = 0;
 837                 } else {
 838                     if (y < YSIZE*3/8) {
 839                         if ((y+x) & 1)
 840                             v = 0xff;
 841                         else
 842                             v = 0;
 843                     } else {
 844                         if (((x+3) % 4) <= 1 &&
 845                             ((y+3) % 4) <= 1)
 846                             v = 0xff;
 847                         else
 848                             v = 0x00;
 849                     }
 850                 }
 851             } else if (x < XSIZE/2) {
 852                 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
 853             } else if (y < XSIZE/2) {
 854                 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
 855             } else {
 856                 v = ((x + y - XSIZE) * 255) / XSIZE;
 857             }
 858             img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
 859         }
 860     }
 861     save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
 862     for(i=0;i<sizeof(factors)/sizeof(float);i++) {
 863         fact = factors[i];
 864         xsize = (int)(XSIZE * fact);
 865         ysize = (int)((YSIZE - 100) * fact);
 866         s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
 867         av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
 868         dump_filter(&s->h_filters[0][0]);
 869         component_resample(s, img1, xsize, xsize, ysize,
 870                            img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
 871         img_resample_close(s);
 872
 873         snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
 874         save_pgm(buf, img1, xsize, ysize);
 875     }
 876
 877     /* mmx test */
 878 #ifdef HAVE_MMX
 879     av_log(NULL, AV_LOG_INFO, "MMX test\n");
 880     fact = 0.72;
 881     xsize = (int)(XSIZE * fact);
 882     ysize = (int)(YSIZE * fact);
 883     mm_flags = MM_MMX;
 884     s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
 885     component_resample(s, img1, xsize, xsize, ysize,
 886                        img, XSIZE, XSIZE, YSIZE);
 887
 888     mm_flags = 0;
 889     s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
 890     component_resample(s, img2, xsize, xsize, ysize,
 891                        img, XSIZE, XSIZE, YSIZE);
 892     if (memcmp(img1, img2, xsize * ysize) != 0) {
 893         av_log(NULL, AV_LOG_ERROR, "mmx error\n");
 894         exit(1);
 895     }
 896     av_log(NULL, AV_LOG_INFO, "MMX OK\n");
 897 #endif
 898     return 0;
 899 }
 900
 901 #endif