git.sesse.net Git - ffmpeg/blob - libavcodec/imgresample.c

   1 /*
   2  * High quality image resampling with polyphase filters
   3  * Copyright (c) 2001 Fabrice Bellard.
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file imgresample.c
  24  * High quality image resampling with polyphase filters .
  25  */
  26
  27 #include "avcodec.h"
  28 #include "swscale.h"
  29 #include "dsputil.h"
  30
  31 #ifdef USE_FASTMEMCPY
  32 #include "libvo/fastmemcpy.h"
  33 #endif
  34
  35 #define NB_COMPONENTS 3
  36
  37 #define PHASE_BITS 4
  38 #define NB_PHASES  (1 << PHASE_BITS)
  39 #define NB_TAPS    4
  40 #define FCENTER    1  /* index of the center of the filter */
  41 //#define TEST    1  /* Test it */
  42
  43 #define POS_FRAC_BITS 16
  44 #define POS_FRAC      (1 << POS_FRAC_BITS)
  45 /* 6 bits precision is needed for MMX */
  46 #define FILTER_BITS   8
  47
  48 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
  49
  50 struct SwsContext {
  51     struct ImgReSampleContext *resampling_ctx;
  52     enum PixelFormat src_pix_fmt, dst_pix_fmt;
  53 };
  54
  55 struct ImgReSampleContext {
  56     int iwidth, iheight, owidth, oheight;
  57     int topBand, bottomBand, leftBand, rightBand;
  58     int padtop, padbottom, padleft, padright;
  59     int pad_owidth, pad_oheight;
  60     int h_incr, v_incr;
  61     DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
  62     DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
  63     uint8_t *line_buf;
  64 };
  65
  66 void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
  67
  68 static inline int get_phase(int pos)
  69 {
  70     return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
  71 }
  72
  73 /* This function must be optimized */
  74 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
  75                             int src_width, int src_start, int src_incr,
  76                             int16_t *filters)
  77 {
  78     int src_pos, phase, sum, i;
  79     const uint8_t *s;
  80     int16_t *filter;
  81
  82     src_pos = src_start;
  83     for(i=0;i<dst_width;i++) {
  84 #ifdef TEST
  85         /* test */
  86         if ((src_pos >> POS_FRAC_BITS) < 0 ||
  87             (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
  88             av_abort();
  89 #endif
  90         s = src + (src_pos >> POS_FRAC_BITS);
  91         phase = get_phase(src_pos);
  92         filter = filters + phase * NB_TAPS;
  93 #if NB_TAPS == 4
  94         sum = s[0] * filter[0] +
  95             s[1] * filter[1] +
  96             s[2] * filter[2] +
  97             s[3] * filter[3];
  98 #else
  99         {
 100             int j;
 101             sum = 0;
 102             for(j=0;j<NB_TAPS;j++)
 103                 sum += s[j] * filter[j];
 104         }
 105 #endif
 106         sum = sum >> FILTER_BITS;
 107         if (sum < 0)
 108             sum = 0;
 109         else if (sum > 255)
 110             sum = 255;
 111         dst[0] = sum;
 112         src_pos += src_incr;
 113         dst++;
 114     }
 115 }
 116
 117 /* This function must be optimized */
 118 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
 119                        int wrap, int16_t *filter)
 120 {
 121     int sum, i;
 122     const uint8_t *s;
 123
 124     s = src;
 125     for(i=0;i<dst_width;i++) {
 126 #if NB_TAPS == 4
 127         sum = s[0 * wrap] * filter[0] +
 128             s[1 * wrap] * filter[1] +
 129             s[2 * wrap] * filter[2] +
 130             s[3 * wrap] * filter[3];
 131 #else
 132         {
 133             int j;
 134             uint8_t *s1 = s;
 135
 136             sum = 0;
 137             for(j=0;j<NB_TAPS;j++) {
 138                 sum += s1[0] * filter[j];
 139                 s1 += wrap;
 140             }
 141         }
 142 #endif
 143         sum = sum >> FILTER_BITS;
 144         if (sum < 0)
 145             sum = 0;
 146         else if (sum > 255)
 147             sum = 255;
 148         dst[0] = sum;
 149         dst++;
 150         s++;
 151     }
 152 }
 153
 154 #ifdef HAVE_MMX
 155
 156 #include "i386/mmx.h"
 157
 158 #define FILTER4(reg) \
 159 {\
 160         s = src + (src_pos >> POS_FRAC_BITS);\
 161         phase = get_phase(src_pos);\
 162         filter = filters + phase * NB_TAPS;\
 163         movq_m2r(*s, reg);\
 164         punpcklbw_r2r(mm7, reg);\
 165         movq_m2r(*filter, mm6);\
 166         pmaddwd_r2r(reg, mm6);\
 167         movq_r2r(mm6, reg);\
 168         psrlq_i2r(32, reg);\
 169         paddd_r2r(mm6, reg);\
 170         psrad_i2r(FILTER_BITS, reg);\
 171         src_pos += src_incr;\
 172 }
 173
 174 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016"PRIx64"\n", tmp.uq);
 175
 176 /* XXX: do four pixels at a time */
 177 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
 178                                  const uint8_t *src, int src_width,
 179                                  int src_start, int src_incr, int16_t *filters)
 180 {
 181     int src_pos, phase;
 182     const uint8_t *s;
 183     int16_t *filter;
 184     mmx_t tmp;
 185
 186     src_pos = src_start;
 187     pxor_r2r(mm7, mm7);
 188
 189     while (dst_width >= 4) {
 190
 191         FILTER4(mm0);
 192         FILTER4(mm1);
 193         FILTER4(mm2);
 194         FILTER4(mm3);
 195
 196         packuswb_r2r(mm7, mm0);
 197         packuswb_r2r(mm7, mm1);
 198         packuswb_r2r(mm7, mm3);
 199         packuswb_r2r(mm7, mm2);
 200         movq_r2m(mm0, tmp);
 201         dst[0] = tmp.ub[0];
 202         movq_r2m(mm1, tmp);
 203         dst[1] = tmp.ub[0];
 204         movq_r2m(mm2, tmp);
 205         dst[2] = tmp.ub[0];
 206         movq_r2m(mm3, tmp);
 207         dst[3] = tmp.ub[0];
 208         dst += 4;
 209         dst_width -= 4;
 210     }
 211     while (dst_width > 0) {
 212         FILTER4(mm0);
 213         packuswb_r2r(mm7, mm0);
 214         movq_r2m(mm0, tmp);
 215         dst[0] = tmp.ub[0];
 216         dst++;
 217         dst_width--;
 218     }
 219     emms();
 220 }
 221
 222 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
 223                             int wrap, int16_t *filter)
 224 {
 225     int sum, i, v;
 226     const uint8_t *s;
 227     mmx_t tmp;
 228     mmx_t coefs[4];
 229
 230     for(i=0;i<4;i++) {
 231         v = filter[i];
 232         coefs[i].uw[0] = v;
 233         coefs[i].uw[1] = v;
 234         coefs[i].uw[2] = v;
 235         coefs[i].uw[3] = v;
 236     }
 237
 238     pxor_r2r(mm7, mm7);
 239     s = src;
 240     while (dst_width >= 4) {
 241         movq_m2r(s[0 * wrap], mm0);
 242         punpcklbw_r2r(mm7, mm0);
 243         movq_m2r(s[1 * wrap], mm1);
 244         punpcklbw_r2r(mm7, mm1);
 245         movq_m2r(s[2 * wrap], mm2);
 246         punpcklbw_r2r(mm7, mm2);
 247         movq_m2r(s[3 * wrap], mm3);
 248         punpcklbw_r2r(mm7, mm3);
 249
 250         pmullw_m2r(coefs[0], mm0);
 251         pmullw_m2r(coefs[1], mm1);
 252         pmullw_m2r(coefs[2], mm2);
 253         pmullw_m2r(coefs[3], mm3);
 254
 255         paddw_r2r(mm1, mm0);
 256         paddw_r2r(mm3, mm2);
 257         paddw_r2r(mm2, mm0);
 258         psraw_i2r(FILTER_BITS, mm0);
 259
 260         packuswb_r2r(mm7, mm0);
 261         movq_r2m(mm0, tmp);
 262
 263         *(uint32_t *)dst = tmp.ud[0];
 264         dst += 4;
 265         s += 4;
 266         dst_width -= 4;
 267     }
 268     while (dst_width > 0) {
 269         sum = s[0 * wrap] * filter[0] +
 270             s[1 * wrap] * filter[1] +
 271             s[2 * wrap] * filter[2] +
 272             s[3 * wrap] * filter[3];
 273         sum = sum >> FILTER_BITS;
 274         if (sum < 0)
 275             sum = 0;
 276         else if (sum > 255)
 277             sum = 255;
 278         dst[0] = sum;
 279         dst++;
 280         s++;
 281         dst_width--;
 282     }
 283     emms();
 284 }
 285 #endif
 286
 287 #ifdef HAVE_ALTIVEC
 288 typedef         union {
 289     vector unsigned char v;
 290     unsigned char c[16];
 291 } vec_uc_t;
 292
 293 typedef         union {
 294     vector signed short v;
 295     signed short s[8];
 296 } vec_ss_t;
 297
 298 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
 299                           int wrap, int16_t *filter)
 300 {
 301     int sum, i;
 302     const uint8_t *s;
 303     vector unsigned char *tv, tmp, dstv, zero;
 304     vec_ss_t srchv[4], srclv[4], fv[4];
 305     vector signed short zeros, sumhv, sumlv;
 306     s = src;
 307
 308     for(i=0;i<4;i++)
 309     {
 310         /*
 311            The vec_madds later on does an implicit >>15 on the result.
 312            Since FILTER_BITS is 8, and we have 15 bits of magnitude in
 313            a signed short, we have just enough bits to pre-shift our
 314            filter constants <<7 to compensate for vec_madds.
 315         */
 316         fv[i].s[0] = filter[i] << (15-FILTER_BITS);
 317         fv[i].v = vec_splat(fv[i].v, 0);
 318     }
 319
 320     zero = vec_splat_u8(0);
 321     zeros = vec_splat_s16(0);
 322
 323
 324     /*
 325        When we're resampling, we'd ideally like both our input buffers,
 326        and output buffers to be 16-byte aligned, so we can do both aligned
 327        reads and writes. Sadly we can't always have this at the moment, so
 328        we opt for aligned writes, as unaligned writes have a huge overhead.
 329        To do this, do enough scalar resamples to get dst 16-byte aligned.
 330     */
 331     i = (-(int)dst) & 0xf;
 332     while(i>0) {
 333         sum = s[0 * wrap] * filter[0] +
 334         s[1 * wrap] * filter[1] +
 335         s[2 * wrap] * filter[2] +
 336         s[3 * wrap] * filter[3];
 337         sum = sum >> FILTER_BITS;
 338         if (sum<0) sum = 0; else if (sum>255) sum=255;
 339         dst[0] = sum;
 340         dst++;
 341         s++;
 342         dst_width--;
 343         i--;
 344     }
 345
 346     /* Do our altivec resampling on 16 pixels at once. */
 347     while(dst_width>=16) {
 348         /*
 349            Read 16 (potentially unaligned) bytes from each of
 350            4 lines into 4 vectors, and split them into shorts.
 351            Interleave the multipy/accumulate for the resample
 352            filter with the loads to hide the 3 cycle latency
 353            the vec_madds have.
 354         */
 355         tv = (vector unsigned char *) &s[0 * wrap];
 356         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
 357         srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
 358         srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
 359         sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
 360         sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
 361
 362         tv = (vector unsigned char *) &s[1 * wrap];
 363         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
 364         srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
 365         srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
 366         sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
 367         sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
 368
 369         tv = (vector unsigned char *) &s[2 * wrap];
 370         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
 371         srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
 372         srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
 373         sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
 374         sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
 375
 376         tv = (vector unsigned char *) &s[3 * wrap];
 377         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
 378         srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
 379         srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
 380         sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
 381         sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
 382
 383         /*
 384            Pack the results into our destination vector,
 385            and do an aligned write of that back to memory.
 386         */
 387         dstv = vec_packsu(sumhv, sumlv) ;
 388         vec_st(dstv, 0, (vector unsigned char *) dst);
 389
 390         dst+=16;
 391         s+=16;
 392         dst_width-=16;
 393     }
 394
 395     /*
 396        If there are any leftover pixels, resample them
 397        with the slow scalar method.
 398     */
 399     while(dst_width>0) {
 400         sum = s[0 * wrap] * filter[0] +
 401         s[1 * wrap] * filter[1] +
 402         s[2 * wrap] * filter[2] +
 403         s[3 * wrap] * filter[3];
 404         sum = sum >> FILTER_BITS;
 405         if (sum<0) sum = 0; else if (sum>255) sum=255;
 406         dst[0] = sum;
 407         dst++;
 408         s++;
 409         dst_width--;
 410     }
 411 }
 412 #endif
 413
 414 /* slow version to handle limit cases. Does not need optimisation */
 415 static void h_resample_slow(uint8_t *dst, int dst_width,
 416                             const uint8_t *src, int src_width,
 417                             int src_start, int src_incr, int16_t *filters)
 418 {
 419     int src_pos, phase, sum, j, v, i;
 420     const uint8_t *s, *src_end;
 421     int16_t *filter;
 422
 423     src_end = src + src_width;
 424     src_pos = src_start;
 425     for(i=0;i<dst_width;i++) {
 426         s = src + (src_pos >> POS_FRAC_BITS);
 427         phase = get_phase(src_pos);
 428         filter = filters + phase * NB_TAPS;
 429         sum = 0;
 430         for(j=0;j<NB_TAPS;j++) {
 431             if (s < src)
 432                 v = src[0];
 433             else if (s >= src_end)
 434                 v = src_end[-1];
 435             else
 436                 v = s[0];
 437             sum += v * filter[j];
 438             s++;
 439         }
 440         sum = sum >> FILTER_BITS;
 441         if (sum < 0)
 442             sum = 0;
 443         else if (sum > 255)
 444             sum = 255;
 445         dst[0] = sum;
 446         src_pos += src_incr;
 447         dst++;
 448     }
 449 }
 450
 451 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
 452                        int src_width, int src_start, int src_incr,
 453                        int16_t *filters)
 454 {
 455     int n, src_end;
 456
 457     if (src_start < 0) {
 458         n = (0 - src_start + src_incr - 1) / src_incr;
 459         h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
 460         dst += n;
 461         dst_width -= n;
 462         src_start += n * src_incr;
 463     }
 464     src_end = src_start + dst_width * src_incr;
 465     if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
 466         n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
 467             src_incr;
 468     } else {
 469         n = dst_width;
 470     }
 471 #ifdef HAVE_MMX
 472     if ((mm_flags & MM_MMX) && NB_TAPS == 4)
 473         h_resample_fast4_mmx(dst, n,
 474                              src, src_width, src_start, src_incr, filters);
 475     else
 476 #endif
 477         h_resample_fast(dst, n,
 478                         src, src_width, src_start, src_incr, filters);
 479     if (n < dst_width) {
 480         dst += n;
 481         dst_width -= n;
 482         src_start += n * src_incr;
 483         h_resample_slow(dst, dst_width,
 484                         src, src_width, src_start, src_incr, filters);
 485     }
 486 }
 487
 488 static void component_resample(ImgReSampleContext *s,
 489                                uint8_t *output, int owrap, int owidth, int oheight,
 490                                uint8_t *input, int iwrap, int iwidth, int iheight)
 491 {
 492     int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
 493     uint8_t *new_line, *src_line;
 494
 495     last_src_y = - FCENTER - 1;
 496     /* position of the bottom of the filter in the source image */
 497     src_y = (last_src_y + NB_TAPS) * POS_FRAC;
 498     ring_y = NB_TAPS; /* position in ring buffer */
 499     for(y=0;y<oheight;y++) {
 500         /* apply horizontal filter on new lines from input if needed */
 501         src_y1 = src_y >> POS_FRAC_BITS;
 502         while (last_src_y < src_y1) {
 503             if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
 504                 ring_y = NB_TAPS;
 505             last_src_y++;
 506             /* handle limit conditions : replicate line (slightly
 507                inefficient because we filter multiple times) */
 508             y1 = last_src_y;
 509             if (y1 < 0) {
 510                 y1 = 0;
 511             } else if (y1 >= iheight) {
 512                 y1 = iheight - 1;
 513             }
 514             src_line = input + y1 * iwrap;
 515             new_line = s->line_buf + ring_y * owidth;
 516             /* apply filter and handle limit cases correctly */
 517             h_resample(new_line, owidth,
 518                        src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
 519                        &s->h_filters[0][0]);
 520             /* handle ring buffer wraping */
 521             if (ring_y >= LINE_BUF_HEIGHT) {
 522                 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
 523                        new_line, owidth);
 524             }
 525         }
 526         /* apply vertical filter */
 527         phase_y = get_phase(src_y);
 528 #ifdef HAVE_MMX
 529         /* desactivated MMX because loss of precision */
 530         if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
 531             v_resample4_mmx(output, owidth,
 532                             s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 533                             &s->v_filters[phase_y][0]);
 534         else
 535 #endif
 536 #ifdef HAVE_ALTIVEC
 537             if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
 538                 v_resample16_altivec(output, owidth,
 539                                 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 540                                 &s->v_filters[phase_y][0]);
 541         else
 542 #endif
 543             v_resample(output, owidth,
 544                        s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 545                        &s->v_filters[phase_y][0]);
 546
 547         src_y += s->v_incr;
 548
 549         output += owrap;
 550     }
 551 }
 552
 553 ImgReSampleContext *img_resample_init(int owidth, int oheight,
 554                                       int iwidth, int iheight)
 555 {
 556     return img_resample_full_init(owidth, oheight, iwidth, iheight,
 557             0, 0, 0, 0, 0, 0, 0, 0);
 558 }
 559
 560 ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
 561                                       int iwidth, int iheight,
 562                                       int topBand, int bottomBand,
 563         int leftBand, int rightBand,
 564         int padtop, int padbottom,
 565         int padleft, int padright)
 566 {
 567     ImgReSampleContext *s;
 568
 569     if (!owidth || !oheight || !iwidth || !iheight)
 570         return NULL;
 571
 572     s = av_mallocz(sizeof(ImgReSampleContext));
 573     if (!s)
 574         return NULL;
 575     if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
 576         return NULL;
 577     s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
 578     if (!s->line_buf)
 579         goto fail;
 580
 581     s->owidth = owidth;
 582     s->oheight = oheight;
 583     s->iwidth = iwidth;
 584     s->iheight = iheight;
 585
 586     s->topBand = topBand;
 587     s->bottomBand = bottomBand;
 588     s->leftBand = leftBand;
 589     s->rightBand = rightBand;
 590
 591     s->padtop = padtop;
 592     s->padbottom = padbottom;
 593     s->padleft = padleft;
 594     s->padright = padright;
 595
 596     s->pad_owidth = owidth - (padleft + padright);
 597     s->pad_oheight = oheight - (padtop + padbottom);
 598
 599     s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
 600     s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
 601
 602     av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth  /
 603             (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
 604     av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
 605             (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
 606
 607     return s;
 608 fail:
 609     av_free(s);
 610     return NULL;
 611 }
 612
 613 void img_resample(ImgReSampleContext *s,
 614                   AVPicture *output, const AVPicture *input)
 615 {
 616     int i, shift;
 617     uint8_t* optr;
 618
 619     for (i=0;i<3;i++) {
 620         shift = (i == 0) ? 0 : 1;
 621
 622         optr = output->data[i] + (((output->linesize[i] *
 623                         s->padtop) + s->padleft) >> shift);
 624
 625         component_resample(s, optr, output->linesize[i],
 626                 s->pad_owidth >> shift, s->pad_oheight >> shift,
 627                 input->data[i] + (input->linesize[i] *
 628                     (s->topBand >> shift)) + (s->leftBand >> shift),
 629                 input->linesize[i], ((s->iwidth - s->leftBand -
 630                         s->rightBand) >> shift),
 631                            (s->iheight - s->topBand - s->bottomBand) >> shift);
 632     }
 633 }
 634
 635 void img_resample_close(ImgReSampleContext *s)
 636 {
 637     av_free(s->line_buf);
 638     av_free(s);
 639 }
 640
 641 struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
 642                                   int dstW, int dstH, int dstFormat,
 643                                   int flags, SwsFilter *srcFilter,
 644                                   SwsFilter *dstFilter, double *param)
 645 {
 646     struct SwsContext *ctx;
 647
 648     ctx = av_malloc(sizeof(struct SwsContext));
 649     if (ctx == NULL) {
 650         av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");
 651
 652         return NULL;
 653     }
 654
 655     if ((srcH != dstH) || (srcW != dstW)) {
 656         if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
 657             av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
 658         }
 659         ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
 660     } else {
 661         ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
 662         ctx->resampling_ctx->iheight = srcH;
 663         ctx->resampling_ctx->iwidth = srcW;
 664         ctx->resampling_ctx->oheight = dstH;
 665         ctx->resampling_ctx->owidth = dstW;
 666     }
 667     ctx->src_pix_fmt = srcFormat;
 668     ctx->dst_pix_fmt = dstFormat;
 669
 670     return ctx;
 671 }
 672
 673 void sws_freeContext(struct SwsContext *ctx)
 674 {
 675     if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
 676         (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
 677         img_resample_close(ctx->resampling_ctx);
 678     } else {
 679         av_free(ctx->resampling_ctx);
 680     }
 681     av_free(ctx);
 682 }
 683
 684
 685 /**
 686  * Checks if context is valid or reallocs a new one instead.
 687  * If context is NULL, just calls sws_getContext() to get a new one.
 688  * Otherwise, checks if the parameters are the same already saved in context.
 689  * If that is the case, returns the current context.
 690  * Otherwise, frees context and gets a new one.
 691  *
 692  * Be warned that srcFilter, dstFilter are not checked, they are
 693  * asumed to remain valid.
 694  */
 695 struct SwsContext *sws_getCachedContext(struct SwsContext *ctx,
 696                         int srcW, int srcH, int srcFormat,
 697                         int dstW, int dstH, int dstFormat, int flags,
 698                         SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
 699 {
 700     if (ctx != NULL) {
 701         if ((ctx->resampling_ctx->iwidth != srcW) ||
 702                         (ctx->resampling_ctx->iheight != srcH) ||
 703                         (ctx->src_pix_fmt != srcFormat) ||
 704                         (ctx->resampling_ctx->owidth != dstW) ||
 705                         (ctx->resampling_ctx->oheight != dstH) ||
 706                         (ctx->dst_pix_fmt != dstFormat))
 707         {
 708             sws_freeContext(ctx);
 709             ctx = NULL;
 710         }
 711     }
 712     if (ctx == NULL) {
 713         return sws_getContext(srcW, srcH, srcFormat,
 714                         dstW, dstH, dstFormat, flags,
 715                         srcFilter, dstFilter, param);
 716     }
 717     return ctx;
 718 }
 719
 720 int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
 721               int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
 722 {
 723     AVPicture src_pict, dst_pict;
 724     int i, res = 0;
 725     AVPicture picture_format_temp;
 726     AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
 727     uint8_t *buf1 = NULL, *buf2 = NULL;
 728     enum PixelFormat current_pix_fmt;
 729
 730     for (i = 0; i < 4; i++) {
 731         src_pict.data[i] = src[i];
 732         src_pict.linesize[i] = srcStride[i];
 733         dst_pict.data[i] = dst[i];
 734         dst_pict.linesize[i] = dstStride[i];
 735     }
 736     if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
 737         (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
 738         /* We have to rescale the picture, but only YUV420P rescaling is supported... */
 739
 740         if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
 741             int size;
 742
 743             /* create temporary picture for rescaling input*/
 744             size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
 745             buf1 = av_malloc(size);
 746             if (!buf1) {
 747                 res = -1;
 748                 goto the_end;
 749             }
 750             formatted_picture = &picture_format_temp;
 751             avpicture_fill((AVPicture*)formatted_picture, buf1,
 752                            PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
 753
 754             if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
 755                             &src_pict, ctx->src_pix_fmt,
 756                             ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {
 757
 758                 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
 759                 res = -1;
 760                 goto the_end;
 761             }
 762         } else {
 763             formatted_picture = &src_pict;
 764         }
 765
 766         if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
 767             int size;
 768
 769             /* create temporary picture for rescaling output*/
 770             size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
 771             buf2 = av_malloc(size);
 772             if (!buf2) {
 773                 res = -1;
 774                 goto the_end;
 775             }
 776             resampled_picture = &picture_resample_temp;
 777             avpicture_fill((AVPicture*)resampled_picture, buf2,
 778                            PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
 779
 780         } else {
 781             resampled_picture = &dst_pict;
 782         }
 783
 784         /* ...and finally rescale!!! */
 785         img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
 786         current_pix_fmt = PIX_FMT_YUV420P;
 787     } else {
 788         resampled_picture = &src_pict;
 789         current_pix_fmt = ctx->src_pix_fmt;
 790     }
 791
 792     if (current_pix_fmt != ctx->dst_pix_fmt) {
 793         if (img_convert(&dst_pict, ctx->dst_pix_fmt,
 794                         resampled_picture, current_pix_fmt,
 795                         ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {
 796
 797             av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
 798
 799             res = -1;
 800             goto the_end;
 801         }
 802     } else if (resampled_picture != &dst_pict) {
 803         img_copy(&dst_pict, resampled_picture, current_pix_fmt,
 804                         ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
 805     }
 806
 807 the_end:
 808     av_free(buf1);
 809     av_free(buf2);
 810     return res;
 811 }
 812
 813
 814 #ifdef TEST
 815 #include <stdio.h>
 816
 817 /* input */
 818 #define XSIZE 256
 819 #define YSIZE 256
 820 uint8_t img[XSIZE * YSIZE];
 821
 822 /* output */
 823 #define XSIZE1 512
 824 #define YSIZE1 512
 825 uint8_t img1[XSIZE1 * YSIZE1];
 826 uint8_t img2[XSIZE1 * YSIZE1];
 827
 828 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
 829 {
 830 #undef fprintf
 831     FILE *f;
 832     f=fopen(filename,"w");
 833     fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
 834     fwrite(img,1, xsize * ysize,f);
 835     fclose(f);
 836 #define fprintf please_use_av_log
 837 }
 838
 839 static void dump_filter(int16_t *filter)
 840 {
 841     int i, ph;
 842
 843     for(ph=0;ph<NB_PHASES;ph++) {
 844         av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
 845         for(i=0;i<NB_TAPS;i++) {
 846             av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
 847         }
 848         av_log(NULL, AV_LOG_INFO, "\n");
 849     }
 850 }
 851
 852 #ifdef HAVE_MMX
 853 int mm_flags;
 854 #endif
 855
 856 int main(int argc, char **argv)
 857 {
 858     int x, y, v, i, xsize, ysize;
 859     ImgReSampleContext *s;
 860     float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
 861     char buf[256];
 862
 863     /* build test image */
 864     for(y=0;y<YSIZE;y++) {
 865         for(x=0;x<XSIZE;x++) {
 866             if (x < XSIZE/2 && y < YSIZE/2) {
 867                 if (x < XSIZE/4 && y < YSIZE/4) {
 868                     if ((x % 10) <= 6 &&
 869                         (y % 10) <= 6)
 870                         v = 0xff;
 871                     else
 872                         v = 0x00;
 873                 } else if (x < XSIZE/4) {
 874                     if (x & 1)
 875                         v = 0xff;
 876                     else
 877                         v = 0;
 878                 } else if (y < XSIZE/4) {
 879                     if (y & 1)
 880                         v = 0xff;
 881                     else
 882                         v = 0;
 883                 } else {
 884                     if (y < YSIZE*3/8) {
 885                         if ((y+x) & 1)
 886                             v = 0xff;
 887                         else
 888                             v = 0;
 889                     } else {
 890                         if (((x+3) % 4) <= 1 &&
 891                             ((y+3) % 4) <= 1)
 892                             v = 0xff;
 893                         else
 894                             v = 0x00;
 895                     }
 896                 }
 897             } else if (x < XSIZE/2) {
 898                 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
 899             } else if (y < XSIZE/2) {
 900                 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
 901             } else {
 902                 v = ((x + y - XSIZE) * 255) / XSIZE;
 903             }
 904             img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
 905         }
 906     }
 907     save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
 908     for(i=0;i<sizeof(factors)/sizeof(float);i++) {
 909         fact = factors[i];
 910         xsize = (int)(XSIZE * fact);
 911         ysize = (int)((YSIZE - 100) * fact);
 912         s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
 913         av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
 914         dump_filter(&s->h_filters[0][0]);
 915         component_resample(s, img1, xsize, xsize, ysize,
 916                            img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
 917         img_resample_close(s);
 918
 919         snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
 920         save_pgm(buf, img1, xsize, ysize);
 921     }
 922
 923     /* mmx test */
 924 #ifdef HAVE_MMX
 925     av_log(NULL, AV_LOG_INFO, "MMX test\n");
 926     fact = 0.72;
 927     xsize = (int)(XSIZE * fact);
 928     ysize = (int)(YSIZE * fact);
 929     mm_flags = MM_MMX;
 930     s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
 931     component_resample(s, img1, xsize, xsize, ysize,
 932                        img, XSIZE, XSIZE, YSIZE);
 933
 934     mm_flags = 0;
 935     s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
 936     component_resample(s, img2, xsize, xsize, ysize,
 937                        img, XSIZE, XSIZE, YSIZE);
 938     if (memcmp(img1, img2, xsize * ysize) != 0) {
 939         av_log(NULL, AV_LOG_ERROR, "mmx error\n");
 940         exit(1);
 941     }
 942     av_log(NULL, AV_LOG_INFO, "MMX OK\n");
 943 #endif
 944     return 0;
 945 }
 946
 947 #endif