2 * High quality image resampling with polyphase filters
3 * Copyright (c) 2001 Fabrice Bellard.
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * High quality image resampling with polyphase filters .
32 #include "ppc/imgresample_altivec.h"
35 #define NB_COMPONENTS 3
38 #define NB_PHASES (1 << PHASE_BITS)
40 #define FCENTER 1 /* index of the center of the filter */
41 //#define TEST 1 /* Test it */
43 #define POS_FRAC_BITS 16
44 #define POS_FRAC (1 << POS_FRAC_BITS)
45 /* 6 bits precision is needed for MMX */
48 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
52 struct ImgReSampleContext *resampling_ctx;
53 enum PixelFormat src_pix_fmt, dst_pix_fmt;
56 struct ImgReSampleContext {
57 int iwidth, iheight, owidth, oheight;
58 int topBand, bottomBand, leftBand, rightBand;
59 int padtop, padbottom, padleft, padright;
60 int pad_owidth, pad_oheight;
62 DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
63 DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
67 void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
69 static inline int get_phase(int pos)
71 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
74 /* This function must be optimized */
75 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
76 int src_width, int src_start, int src_incr,
79 int src_pos, phase, sum, i;
84 for(i=0;i<dst_width;i++) {
87 if ((src_pos >> POS_FRAC_BITS) < 0 ||
88 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
91 s = src + (src_pos >> POS_FRAC_BITS);
92 phase = get_phase(src_pos);
93 filter = filters + phase * NB_TAPS;
95 sum = s[0] * filter[0] +
103 for(j=0;j<NB_TAPS;j++)
104 sum += s[j] * filter[j];
107 sum = sum >> FILTER_BITS;
118 /* This function must be optimized */
119 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
120 int wrap, int16_t *filter)
126 for(i=0;i<dst_width;i++) {
128 sum = s[0 * wrap] * filter[0] +
129 s[1 * wrap] * filter[1] +
130 s[2 * wrap] * filter[2] +
131 s[3 * wrap] * filter[3];
138 for(j=0;j<NB_TAPS;j++) {
139 sum += s1[0] * filter[j];
144 sum = sum >> FILTER_BITS;
157 #include "i386/mmx.h"
159 #define FILTER4(reg) \
161 s = src + (src_pos >> POS_FRAC_BITS);\
162 phase = get_phase(src_pos);\
163 filter = filters + phase * NB_TAPS;\
165 punpcklbw_r2r(mm7, reg);\
166 movq_m2r(*filter, mm6);\
167 pmaddwd_r2r(reg, mm6);\
170 paddd_r2r(mm6, reg);\
171 psrad_i2r(FILTER_BITS, reg);\
172 src_pos += src_incr;\
175 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016"PRIx64"\n", tmp.uq);
177 /* XXX: do four pixels at a time */
178 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
179 const uint8_t *src, int src_width,
180 int src_start, int src_incr, int16_t *filters)
190 while (dst_width >= 4) {
197 packuswb_r2r(mm7, mm0);
198 packuswb_r2r(mm7, mm1);
199 packuswb_r2r(mm7, mm3);
200 packuswb_r2r(mm7, mm2);
212 while (dst_width > 0) {
214 packuswb_r2r(mm7, mm0);
223 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
224 int wrap, int16_t *filter)
241 while (dst_width >= 4) {
242 movq_m2r(s[0 * wrap], mm0);
243 punpcklbw_r2r(mm7, mm0);
244 movq_m2r(s[1 * wrap], mm1);
245 punpcklbw_r2r(mm7, mm1);
246 movq_m2r(s[2 * wrap], mm2);
247 punpcklbw_r2r(mm7, mm2);
248 movq_m2r(s[3 * wrap], mm3);
249 punpcklbw_r2r(mm7, mm3);
251 pmullw_m2r(coefs[0], mm0);
252 pmullw_m2r(coefs[1], mm1);
253 pmullw_m2r(coefs[2], mm2);
254 pmullw_m2r(coefs[3], mm3);
259 psraw_i2r(FILTER_BITS, mm0);
261 packuswb_r2r(mm7, mm0);
264 *(uint32_t *)dst = tmp.ud[0];
269 while (dst_width > 0) {
270 sum = s[0 * wrap] * filter[0] +
271 s[1 * wrap] * filter[1] +
272 s[2 * wrap] * filter[2] +
273 s[3 * wrap] * filter[3];
274 sum = sum >> FILTER_BITS;
286 #endif /* HAVE_MMX */
288 /* slow version to handle limit cases. Does not need optimization */
289 static void h_resample_slow(uint8_t *dst, int dst_width,
290 const uint8_t *src, int src_width,
291 int src_start, int src_incr, int16_t *filters)
293 int src_pos, phase, sum, j, v, i;
294 const uint8_t *s, *src_end;
297 src_end = src + src_width;
299 for(i=0;i<dst_width;i++) {
300 s = src + (src_pos >> POS_FRAC_BITS);
301 phase = get_phase(src_pos);
302 filter = filters + phase * NB_TAPS;
304 for(j=0;j<NB_TAPS;j++) {
307 else if (s >= src_end)
311 sum += v * filter[j];
314 sum = sum >> FILTER_BITS;
325 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
326 int src_width, int src_start, int src_incr,
332 n = (0 - src_start + src_incr - 1) / src_incr;
333 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
336 src_start += n * src_incr;
338 src_end = src_start + dst_width * src_incr;
339 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
340 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
346 if ((mm_flags & MM_MMX) && NB_TAPS == 4)
347 h_resample_fast4_mmx(dst, n,
348 src, src_width, src_start, src_incr, filters);
351 h_resample_fast(dst, n,
352 src, src_width, src_start, src_incr, filters);
356 src_start += n * src_incr;
357 h_resample_slow(dst, dst_width,
358 src, src_width, src_start, src_incr, filters);
362 static void component_resample(ImgReSampleContext *s,
363 uint8_t *output, int owrap, int owidth, int oheight,
364 uint8_t *input, int iwrap, int iwidth, int iheight)
366 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
367 uint8_t *new_line, *src_line;
369 last_src_y = - FCENTER - 1;
370 /* position of the bottom of the filter in the source image */
371 src_y = (last_src_y + NB_TAPS) * POS_FRAC;
372 ring_y = NB_TAPS; /* position in ring buffer */
373 for(y=0;y<oheight;y++) {
374 /* apply horizontal filter on new lines from input if needed */
375 src_y1 = src_y >> POS_FRAC_BITS;
376 while (last_src_y < src_y1) {
377 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
380 /* handle limit conditions : replicate line (slightly
381 inefficient because we filter multiple times) */
385 } else if (y1 >= iheight) {
388 src_line = input + y1 * iwrap;
389 new_line = s->line_buf + ring_y * owidth;
390 /* apply filter and handle limit cases correctly */
391 h_resample(new_line, owidth,
392 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
393 &s->h_filters[0][0]);
394 /* handle ring buffer wrapping */
395 if (ring_y >= LINE_BUF_HEIGHT) {
396 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
400 /* apply vertical filter */
401 phase_y = get_phase(src_y);
403 /* desactivated MMX because loss of precision */
404 if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
405 v_resample4_mmx(output, owidth,
406 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
407 &s->v_filters[phase_y][0]);
411 if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
412 v_resample16_altivec(output, owidth,
413 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
414 &s->v_filters[phase_y][0]);
417 v_resample(output, owidth,
418 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
419 &s->v_filters[phase_y][0]);
427 ImgReSampleContext *img_resample_init(int owidth, int oheight,
428 int iwidth, int iheight)
430 return img_resample_full_init(owidth, oheight, iwidth, iheight,
431 0, 0, 0, 0, 0, 0, 0, 0);
434 ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
435 int iwidth, int iheight,
436 int topBand, int bottomBand,
437 int leftBand, int rightBand,
438 int padtop, int padbottom,
439 int padleft, int padright)
441 ImgReSampleContext *s;
443 if (!owidth || !oheight || !iwidth || !iheight)
446 s = av_mallocz(sizeof(ImgReSampleContext));
449 if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
451 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
456 s->oheight = oheight;
458 s->iheight = iheight;
460 s->topBand = topBand;
461 s->bottomBand = bottomBand;
462 s->leftBand = leftBand;
463 s->rightBand = rightBand;
466 s->padbottom = padbottom;
467 s->padleft = padleft;
468 s->padright = padright;
470 s->pad_owidth = owidth - (padleft + padright);
471 s->pad_oheight = oheight - (padtop + padbottom);
473 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
474 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
476 av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth /
477 (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
478 av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
479 (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
487 void img_resample(ImgReSampleContext *s,
488 AVPicture *output, const AVPicture *input)
494 shift = (i == 0) ? 0 : 1;
496 optr = output->data[i] + (((output->linesize[i] *
497 s->padtop) + s->padleft) >> shift);
499 component_resample(s, optr, output->linesize[i],
500 s->pad_owidth >> shift, s->pad_oheight >> shift,
501 input->data[i] + (input->linesize[i] *
502 (s->topBand >> shift)) + (s->leftBand >> shift),
503 input->linesize[i], ((s->iwidth - s->leftBand -
504 s->rightBand) >> shift),
505 (s->iheight - s->topBand - s->bottomBand) >> shift);
509 void img_resample_close(ImgReSampleContext *s)
511 av_free(s->line_buf);
515 static const AVClass context_class = { "imgresample", NULL, NULL };
517 struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
518 int dstW, int dstH, int dstFormat,
519 int flags, SwsFilter *srcFilter,
520 SwsFilter *dstFilter, double *param)
522 struct SwsContext *ctx;
524 ctx = av_malloc(sizeof(struct SwsContext));
526 av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");
530 ctx->av_class = &context_class;
532 if ((srcH != dstH) || (srcW != dstW)) {
533 if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
534 av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
536 ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
538 ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
539 ctx->resampling_ctx->iheight = srcH;
540 ctx->resampling_ctx->iwidth = srcW;
541 ctx->resampling_ctx->oheight = dstH;
542 ctx->resampling_ctx->owidth = dstW;
544 ctx->src_pix_fmt = srcFormat;
545 ctx->dst_pix_fmt = dstFormat;
550 void sws_freeContext(struct SwsContext *ctx)
554 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
555 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
556 img_resample_close(ctx->resampling_ctx);
558 av_free(ctx->resampling_ctx);
560 av_free(ctx->av_class);
566 * Checks if context is valid or reallocs a new one instead.
567 * If context is NULL, just calls sws_getContext() to get a new one.
568 * Otherwise, checks if the parameters are the same already saved in context.
569 * If that is the case, returns the current context.
570 * Otherwise, frees context and gets a new one.
572 * Be warned that srcFilter, dstFilter are not checked, they are
573 * asumed to remain valid.
575 struct SwsContext *sws_getCachedContext(struct SwsContext *ctx,
576 int srcW, int srcH, int srcFormat,
577 int dstW, int dstH, int dstFormat, int flags,
578 SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
581 if ((ctx->resampling_ctx->iwidth != srcW) ||
582 (ctx->resampling_ctx->iheight != srcH) ||
583 (ctx->src_pix_fmt != srcFormat) ||
584 (ctx->resampling_ctx->owidth != dstW) ||
585 (ctx->resampling_ctx->oheight != dstH) ||
586 (ctx->dst_pix_fmt != dstFormat))
588 sws_freeContext(ctx);
593 return sws_getContext(srcW, srcH, srcFormat,
594 dstW, dstH, dstFormat, flags,
595 srcFilter, dstFilter, param);
600 int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
601 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
603 AVPicture src_pict, dst_pict;
605 AVPicture picture_format_temp;
606 AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
607 uint8_t *buf1 = NULL, *buf2 = NULL;
608 enum PixelFormat current_pix_fmt;
610 for (i = 0; i < 4; i++) {
611 src_pict.data[i] = src[i];
612 src_pict.linesize[i] = srcStride[i];
613 dst_pict.data[i] = dst[i];
614 dst_pict.linesize[i] = dstStride[i];
616 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
617 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
618 /* We have to rescale the picture, but only YUV420P rescaling is supported... */
620 if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
623 /* create temporary picture for rescaling input*/
624 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
625 buf1 = av_malloc(size);
630 formatted_picture = &picture_format_temp;
631 avpicture_fill((AVPicture*)formatted_picture, buf1,
632 PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
634 if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
635 &src_pict, ctx->src_pix_fmt,
636 ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {
638 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
643 formatted_picture = &src_pict;
646 if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
649 /* create temporary picture for rescaling output*/
650 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
651 buf2 = av_malloc(size);
656 resampled_picture = &picture_resample_temp;
657 avpicture_fill((AVPicture*)resampled_picture, buf2,
658 PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
661 resampled_picture = &dst_pict;
664 /* ...and finally rescale!!! */
665 img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
666 current_pix_fmt = PIX_FMT_YUV420P;
668 resampled_picture = &src_pict;
669 current_pix_fmt = ctx->src_pix_fmt;
672 if (current_pix_fmt != ctx->dst_pix_fmt) {
673 if (img_convert(&dst_pict, ctx->dst_pix_fmt,
674 resampled_picture, current_pix_fmt,
675 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {
677 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
682 } else if (resampled_picture != &dst_pict) {
683 av_picture_copy(&dst_pict, resampled_picture, current_pix_fmt,
684 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
701 uint8_t img[XSIZE * YSIZE];
706 uint8_t img1[XSIZE1 * YSIZE1];
707 uint8_t img2[XSIZE1 * YSIZE1];
709 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
713 f=fopen(filename,"w");
714 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
715 fwrite(img,1, xsize * ysize,f);
717 #define fprintf please_use_av_log
720 static void dump_filter(int16_t *filter)
724 for(ph=0;ph<NB_PHASES;ph++) {
725 av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
726 for(i=0;i<NB_TAPS;i++) {
727 av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
729 av_log(NULL, AV_LOG_INFO, "\n");
737 int main(int argc, char **argv)
739 int x, y, v, i, xsize, ysize;
740 ImgReSampleContext *s;
741 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
744 /* build test image */
745 for(y=0;y<YSIZE;y++) {
746 for(x=0;x<XSIZE;x++) {
747 if (x < XSIZE/2 && y < YSIZE/2) {
748 if (x < XSIZE/4 && y < YSIZE/4) {
754 } else if (x < XSIZE/4) {
759 } else if (y < XSIZE/4) {
771 if (((x+3) % 4) <= 1 &&
778 } else if (x < XSIZE/2) {
779 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
780 } else if (y < XSIZE/2) {
781 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
783 v = ((x + y - XSIZE) * 255) / XSIZE;
785 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
788 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
789 for(i=0;i<sizeof(factors)/sizeof(float);i++) {
791 xsize = (int)(XSIZE * fact);
792 ysize = (int)((YSIZE - 100) * fact);
793 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
794 av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
795 dump_filter(&s->h_filters[0][0]);
796 component_resample(s, img1, xsize, xsize, ysize,
797 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
798 img_resample_close(s);
800 snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
801 save_pgm(buf, img1, xsize, ysize);
806 av_log(NULL, AV_LOG_INFO, "MMX test\n");
808 xsize = (int)(XSIZE * fact);
809 ysize = (int)(YSIZE * fact);
811 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
812 component_resample(s, img1, xsize, xsize, ysize,
813 img, XSIZE, XSIZE, YSIZE);
816 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
817 component_resample(s, img2, xsize, xsize, ysize,
818 img, XSIZE, XSIZE, YSIZE);
819 if (memcmp(img1, img2, xsize * ysize) != 0) {
820 av_log(NULL, AV_LOG_ERROR, "mmx error\n");
823 av_log(NULL, AV_LOG_INFO, "MMX OK\n");
824 #endif /* HAVE_MMX */