2 * High quality image resampling with polyphase filters
3 * Copyright (c) 2001 Fabrice Bellard.
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * High quality image resampling with polyphase filters .
29 #include "libswscale/swscale.h"
32 #include "ppc/imgresample_altivec.h"
35 #define NB_COMPONENTS 3
38 #define NB_PHASES (1 << PHASE_BITS)
40 #define FCENTER 1 /* index of the center of the filter */
41 //#define TEST 1 /* Test it */
43 #define POS_FRAC_BITS 16
44 #define POS_FRAC (1 << POS_FRAC_BITS)
45 /* 6 bits precision is needed for MMX */
48 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
51 const AVClass *av_class;
52 struct ImgReSampleContext *resampling_ctx;
53 enum PixelFormat src_pix_fmt, dst_pix_fmt;
56 typedef struct ImgReSampleContext {
57 int iwidth, iheight, owidth, oheight;
58 int topBand, bottomBand, leftBand, rightBand;
59 int padtop, padbottom, padleft, padright;
60 int pad_owidth, pad_oheight;
62 DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
63 DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
67 void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
69 static inline int get_phase(int pos)
71 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
74 /* This function must be optimized */
75 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
76 int src_width, int src_start, int src_incr,
79 int src_pos, phase, sum, i;
84 for(i=0;i<dst_width;i++) {
87 if ((src_pos >> POS_FRAC_BITS) < 0 ||
88 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
91 s = src + (src_pos >> POS_FRAC_BITS);
92 phase = get_phase(src_pos);
93 filter = filters + phase * NB_TAPS;
95 sum = s[0] * filter[0] +
103 for(j=0;j<NB_TAPS;j++)
104 sum += s[j] * filter[j];
107 sum = sum >> FILTER_BITS;
118 /* This function must be optimized */
119 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
120 int wrap, int16_t *filter)
126 for(i=0;i<dst_width;i++) {
128 sum = s[0 * wrap] * filter[0] +
129 s[1 * wrap] * filter[1] +
130 s[2 * wrap] * filter[2] +
131 s[3 * wrap] * filter[3];
138 for(j=0;j<NB_TAPS;j++) {
139 sum += s1[0] * filter[j];
144 sum = sum >> FILTER_BITS;
157 #include "i386/mmx.h"
159 #define FILTER4(reg) \
161 s = src + (src_pos >> POS_FRAC_BITS);\
162 phase = get_phase(src_pos);\
163 filter = filters + phase * NB_TAPS;\
165 punpcklbw_r2r(mm7, reg);\
166 movq_m2r(*filter, mm6);\
167 pmaddwd_r2r(reg, mm6);\
170 paddd_r2r(mm6, reg);\
171 psrad_i2r(FILTER_BITS, reg);\
172 src_pos += src_incr;\
175 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016"PRIx64"\n", tmp.uq);
177 /* XXX: do four pixels at a time */
178 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
179 const uint8_t *src, int src_width,
180 int src_start, int src_incr, int16_t *filters)
190 while (dst_width >= 4) {
197 packuswb_r2r(mm7, mm0);
198 packuswb_r2r(mm7, mm1);
199 packuswb_r2r(mm7, mm3);
200 packuswb_r2r(mm7, mm2);
212 while (dst_width > 0) {
214 packuswb_r2r(mm7, mm0);
223 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
224 int wrap, int16_t *filter)
241 while (dst_width >= 4) {
242 movq_m2r(s[0 * wrap], mm0);
243 punpcklbw_r2r(mm7, mm0);
244 movq_m2r(s[1 * wrap], mm1);
245 punpcklbw_r2r(mm7, mm1);
246 movq_m2r(s[2 * wrap], mm2);
247 punpcklbw_r2r(mm7, mm2);
248 movq_m2r(s[3 * wrap], mm3);
249 punpcklbw_r2r(mm7, mm3);
251 pmullw_m2r(coefs[0], mm0);
252 pmullw_m2r(coefs[1], mm1);
253 pmullw_m2r(coefs[2], mm2);
254 pmullw_m2r(coefs[3], mm3);
259 psraw_i2r(FILTER_BITS, mm0);
261 packuswb_r2r(mm7, mm0);
264 *(uint32_t *)dst = tmp.ud[0];
269 while (dst_width > 0) {
270 sum = s[0 * wrap] * filter[0] +
271 s[1 * wrap] * filter[1] +
272 s[2 * wrap] * filter[2] +
273 s[3 * wrap] * filter[3];
274 sum = sum >> FILTER_BITS;
286 #endif /* HAVE_MMX */
288 /* slow version to handle limit cases. Does not need optimization */
289 static void h_resample_slow(uint8_t *dst, int dst_width,
290 const uint8_t *src, int src_width,
291 int src_start, int src_incr, int16_t *filters)
293 int src_pos, phase, sum, j, v, i;
294 const uint8_t *s, *src_end;
297 src_end = src + src_width;
299 for(i=0;i<dst_width;i++) {
300 s = src + (src_pos >> POS_FRAC_BITS);
301 phase = get_phase(src_pos);
302 filter = filters + phase * NB_TAPS;
304 for(j=0;j<NB_TAPS;j++) {
307 else if (s >= src_end)
311 sum += v * filter[j];
314 sum = sum >> FILTER_BITS;
325 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
326 int src_width, int src_start, int src_incr,
332 n = (0 - src_start + src_incr - 1) / src_incr;
333 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
336 src_start += n * src_incr;
338 src_end = src_start + dst_width * src_incr;
339 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
340 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
346 if ((mm_flags & FF_MM_MMX) && NB_TAPS == 4)
347 h_resample_fast4_mmx(dst, n,
348 src, src_width, src_start, src_incr, filters);
351 h_resample_fast(dst, n,
352 src, src_width, src_start, src_incr, filters);
356 src_start += n * src_incr;
357 h_resample_slow(dst, dst_width,
358 src, src_width, src_start, src_incr, filters);
362 static void component_resample(ImgReSampleContext *s,
363 uint8_t *output, int owrap, int owidth, int oheight,
364 uint8_t *input, int iwrap, int iwidth, int iheight)
366 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
367 uint8_t *new_line, *src_line;
369 last_src_y = - FCENTER - 1;
370 /* position of the bottom of the filter in the source image */
371 src_y = (last_src_y + NB_TAPS) * POS_FRAC;
372 ring_y = NB_TAPS; /* position in ring buffer */
373 for(y=0;y<oheight;y++) {
374 /* apply horizontal filter on new lines from input if needed */
375 src_y1 = src_y >> POS_FRAC_BITS;
376 while (last_src_y < src_y1) {
377 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
380 /* handle limit conditions : replicate line (slightly
381 inefficient because we filter multiple times) */
385 } else if (y1 >= iheight) {
388 src_line = input + y1 * iwrap;
389 new_line = s->line_buf + ring_y * owidth;
390 /* apply filter and handle limit cases correctly */
391 h_resample(new_line, owidth,
392 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
393 &s->h_filters[0][0]);
394 /* handle ring buffer wrapping */
395 if (ring_y >= LINE_BUF_HEIGHT) {
396 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
400 /* apply vertical filter */
401 phase_y = get_phase(src_y);
403 /* desactivated MMX because loss of precision */
404 if ((mm_flags & FF_MM_MMX) && NB_TAPS == 4 && 0)
405 v_resample4_mmx(output, owidth,
406 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
407 &s->v_filters[phase_y][0]);
411 if ((mm_flags & FF_MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
412 v_resample16_altivec(output, owidth,
413 s->line_buf + (ring_y - NB_TAPS + 1) * owidth,
414 owidth, &s->v_filters[phase_y][0]);
417 v_resample(output, owidth,
418 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
419 &s->v_filters[phase_y][0]);
427 ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
428 int iwidth, int iheight,
429 int topBand, int bottomBand,
430 int leftBand, int rightBand,
431 int padtop, int padbottom,
432 int padleft, int padright)
434 ImgReSampleContext *s;
436 if (!owidth || !oheight || !iwidth || !iheight)
439 s = av_mallocz(sizeof(ImgReSampleContext));
442 if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
444 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
449 s->oheight = oheight;
451 s->iheight = iheight;
453 s->topBand = topBand;
454 s->bottomBand = bottomBand;
455 s->leftBand = leftBand;
456 s->rightBand = rightBand;
459 s->padbottom = padbottom;
460 s->padleft = padleft;
461 s->padright = padright;
463 s->pad_owidth = owidth - (padleft + padright);
464 s->pad_oheight = oheight - (padtop + padbottom);
466 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
467 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
469 av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth /
470 (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
471 av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
472 (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
480 ImgReSampleContext *img_resample_init(int owidth, int oheight,
481 int iwidth, int iheight)
483 return img_resample_full_init(owidth, oheight, iwidth, iheight,
484 0, 0, 0, 0, 0, 0, 0, 0);
487 void img_resample(ImgReSampleContext *s,
488 AVPicture *output, const AVPicture *input)
494 shift = (i == 0) ? 0 : 1;
496 optr = output->data[i] + (((output->linesize[i] *
497 s->padtop) + s->padleft) >> shift);
499 component_resample(s, optr, output->linesize[i],
500 s->pad_owidth >> shift, s->pad_oheight >> shift,
501 input->data[i] + (input->linesize[i] *
502 (s->topBand >> shift)) + (s->leftBand >> shift),
503 input->linesize[i], ((s->iwidth - s->leftBand -
504 s->rightBand) >> shift),
505 (s->iheight - s->topBand - s->bottomBand) >> shift);
509 void img_resample_close(ImgReSampleContext *s)
511 av_free(s->line_buf);
515 static const char *context_to_name(void* ptr)
520 static const AVClass context_class = { "imgresample", context_to_name, NULL };
522 struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
523 int dstW, int dstH, int dstFormat,
524 int flags, SwsFilter *srcFilter,
525 SwsFilter *dstFilter, double *param)
527 struct SwsContext *ctx;
529 ctx = av_malloc(sizeof(struct SwsContext));
531 av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");
535 ctx->av_class = &context_class;
537 if ((srcH != dstH) || (srcW != dstW)) {
538 if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
539 av_log(ctx, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
541 ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
543 ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
544 ctx->resampling_ctx->iheight = srcH;
545 ctx->resampling_ctx->iwidth = srcW;
546 ctx->resampling_ctx->oheight = dstH;
547 ctx->resampling_ctx->owidth = dstW;
549 ctx->src_pix_fmt = srcFormat;
550 ctx->dst_pix_fmt = dstFormat;
555 void sws_freeContext(struct SwsContext *ctx)
559 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
560 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
561 img_resample_close(ctx->resampling_ctx);
563 av_free(ctx->resampling_ctx);
570 * Checks if context is valid or reallocs a new one instead.
571 * If context is NULL, just calls sws_getContext() to get a new one.
572 * Otherwise, checks if the parameters are the same already saved in context.
573 * If that is the case, returns the current context.
574 * Otherwise, frees context and gets a new one.
576 * Be warned that srcFilter, dstFilter are not checked, they are
577 * asumed to remain valid.
579 struct SwsContext *sws_getCachedContext(struct SwsContext *ctx,
580 int srcW, int srcH, int srcFormat,
581 int dstW, int dstH, int dstFormat, int flags,
582 SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
585 if ((ctx->resampling_ctx->iwidth != srcW) ||
586 (ctx->resampling_ctx->iheight != srcH) ||
587 (ctx->src_pix_fmt != srcFormat) ||
588 (ctx->resampling_ctx->owidth != dstW) ||
589 (ctx->resampling_ctx->oheight != dstH) ||
590 (ctx->dst_pix_fmt != dstFormat))
592 sws_freeContext(ctx);
597 return sws_getContext(srcW, srcH, srcFormat,
598 dstW, dstH, dstFormat, flags,
599 srcFilter, dstFilter, param);
604 int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
605 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
607 AVPicture src_pict, dst_pict;
609 AVPicture picture_format_temp;
610 AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
611 uint8_t *buf1 = NULL, *buf2 = NULL;
612 enum PixelFormat current_pix_fmt;
614 for (i = 0; i < 4; i++) {
615 src_pict.data[i] = src[i];
616 src_pict.linesize[i] = srcStride[i];
617 dst_pict.data[i] = dst[i];
618 dst_pict.linesize[i] = dstStride[i];
620 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
621 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
622 /* We have to rescale the picture, but only YUV420P rescaling is supported... */
624 if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
627 /* create temporary picture for rescaling input*/
628 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
629 buf1 = av_malloc(size);
634 formatted_picture = &picture_format_temp;
635 avpicture_fill((AVPicture*)formatted_picture, buf1,
636 PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
638 if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
639 &src_pict, ctx->src_pix_fmt,
640 ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {
642 av_log(ctx, AV_LOG_ERROR, "pixel format conversion not handled\n");
647 formatted_picture = &src_pict;
650 if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
653 /* create temporary picture for rescaling output*/
654 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
655 buf2 = av_malloc(size);
660 resampled_picture = &picture_resample_temp;
661 avpicture_fill((AVPicture*)resampled_picture, buf2,
662 PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
665 resampled_picture = &dst_pict;
668 /* ...and finally rescale!!! */
669 img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
670 current_pix_fmt = PIX_FMT_YUV420P;
672 resampled_picture = &src_pict;
673 current_pix_fmt = ctx->src_pix_fmt;
676 if (current_pix_fmt != ctx->dst_pix_fmt) {
677 if (img_convert(&dst_pict, ctx->dst_pix_fmt,
678 resampled_picture, current_pix_fmt,
679 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {
681 av_log(ctx, AV_LOG_ERROR, "pixel format conversion not handled\n");
686 } else if (resampled_picture != &dst_pict) {
687 av_picture_copy(&dst_pict, resampled_picture, current_pix_fmt,
688 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
705 uint8_t img[XSIZE * YSIZE];
710 uint8_t img1[XSIZE1 * YSIZE1];
711 uint8_t img2[XSIZE1 * YSIZE1];
713 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
717 f=fopen(filename,"w");
718 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
719 fwrite(img,1, xsize * ysize,f);
721 #define fprintf please_use_av_log
724 static void dump_filter(int16_t *filter)
728 for(ph=0;ph<NB_PHASES;ph++) {
729 av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
730 for(i=0;i<NB_TAPS;i++) {
731 av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
733 av_log(NULL, AV_LOG_INFO, "\n");
741 int main(int argc, char **argv)
743 int x, y, v, i, xsize, ysize;
744 ImgReSampleContext *s;
745 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
748 /* build test image */
749 for(y=0;y<YSIZE;y++) {
750 for(x=0;x<XSIZE;x++) {
751 if (x < XSIZE/2 && y < YSIZE/2) {
752 if (x < XSIZE/4 && y < YSIZE/4) {
758 } else if (x < XSIZE/4) {
763 } else if (y < XSIZE/4) {
775 if (((x+3) % 4) <= 1 &&
782 } else if (x < XSIZE/2) {
783 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
784 } else if (y < XSIZE/2) {
785 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
787 v = ((x + y - XSIZE) * 255) / XSIZE;
789 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
792 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
793 for(i=0;i<FF_ARRAY_ELEMS(factors);i++) {
795 xsize = (int)(XSIZE * fact);
796 ysize = (int)((YSIZE - 100) * fact);
797 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
798 av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
799 dump_filter(&s->h_filters[0][0]);
800 component_resample(s, img1, xsize, xsize, ysize,
801 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
802 img_resample_close(s);
804 snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
805 save_pgm(buf, img1, xsize, ysize);
810 av_log(NULL, AV_LOG_INFO, "MMX test\n");
812 xsize = (int)(XSIZE * fact);
813 ysize = (int)(YSIZE * fact);
814 mm_flags = FF_MM_MMX;
815 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
816 component_resample(s, img1, xsize, xsize, ysize,
817 img, XSIZE, XSIZE, YSIZE);
820 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
821 component_resample(s, img2, xsize, xsize, ysize,
822 img, XSIZE, XSIZE, YSIZE);
823 if (memcmp(img1, img2, xsize * ysize) != 0) {
824 av_log(NULL, AV_LOG_ERROR, "mmx error\n");
827 av_log(NULL, AV_LOG_INFO, "MMX OK\n");
828 #endif /* HAVE_MMX */