2 * High quality image resampling with polyphase filters
3 * Copyright (c) 2001 Fabrice Bellard.
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * High quality image resampling with polyphase filters .
32 #include "ppc/imgresample_altivec.h"
35 #define NB_COMPONENTS 3
38 #define NB_PHASES (1 << PHASE_BITS)
40 #define FCENTER 1 /* index of the center of the filter */
41 //#define TEST 1 /* Test it */
43 #define POS_FRAC_BITS 16
44 #define POS_FRAC (1 << POS_FRAC_BITS)
45 /* 6 bits precision is needed for MMX */
48 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
52 struct ImgReSampleContext *resampling_ctx;
53 enum PixelFormat src_pix_fmt, dst_pix_fmt;
56 struct ImgReSampleContext {
57 int iwidth, iheight, owidth, oheight;
58 int topBand, bottomBand, leftBand, rightBand;
59 int padtop, padbottom, padleft, padright;
60 int pad_owidth, pad_oheight;
62 DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
63 DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
67 void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
69 static inline int get_phase(int pos)
71 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
74 /* This function must be optimized */
75 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
76 int src_width, int src_start, int src_incr,
79 int src_pos, phase, sum, i;
84 for(i=0;i<dst_width;i++) {
87 if ((src_pos >> POS_FRAC_BITS) < 0 ||
88 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
91 s = src + (src_pos >> POS_FRAC_BITS);
92 phase = get_phase(src_pos);
93 filter = filters + phase * NB_TAPS;
95 sum = s[0] * filter[0] +
103 for(j=0;j<NB_TAPS;j++)
104 sum += s[j] * filter[j];
107 sum = sum >> FILTER_BITS;
118 /* This function must be optimized */
119 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
120 int wrap, int16_t *filter)
126 for(i=0;i<dst_width;i++) {
128 sum = s[0 * wrap] * filter[0] +
129 s[1 * wrap] * filter[1] +
130 s[2 * wrap] * filter[2] +
131 s[3 * wrap] * filter[3];
138 for(j=0;j<NB_TAPS;j++) {
139 sum += s1[0] * filter[j];
144 sum = sum >> FILTER_BITS;
157 #include "i386/mmx.h"
159 #define FILTER4(reg) \
161 s = src + (src_pos >> POS_FRAC_BITS);\
162 phase = get_phase(src_pos);\
163 filter = filters + phase * NB_TAPS;\
165 punpcklbw_r2r(mm7, reg);\
166 movq_m2r(*filter, mm6);\
167 pmaddwd_r2r(reg, mm6);\
170 paddd_r2r(mm6, reg);\
171 psrad_i2r(FILTER_BITS, reg);\
172 src_pos += src_incr;\
175 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016"PRIx64"\n", tmp.uq);
177 /* XXX: do four pixels at a time */
178 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
179 const uint8_t *src, int src_width,
180 int src_start, int src_incr, int16_t *filters)
190 while (dst_width >= 4) {
197 packuswb_r2r(mm7, mm0);
198 packuswb_r2r(mm7, mm1);
199 packuswb_r2r(mm7, mm3);
200 packuswb_r2r(mm7, mm2);
212 while (dst_width > 0) {
214 packuswb_r2r(mm7, mm0);
223 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
224 int wrap, int16_t *filter)
241 while (dst_width >= 4) {
242 movq_m2r(s[0 * wrap], mm0);
243 punpcklbw_r2r(mm7, mm0);
244 movq_m2r(s[1 * wrap], mm1);
245 punpcklbw_r2r(mm7, mm1);
246 movq_m2r(s[2 * wrap], mm2);
247 punpcklbw_r2r(mm7, mm2);
248 movq_m2r(s[3 * wrap], mm3);
249 punpcklbw_r2r(mm7, mm3);
251 pmullw_m2r(coefs[0], mm0);
252 pmullw_m2r(coefs[1], mm1);
253 pmullw_m2r(coefs[2], mm2);
254 pmullw_m2r(coefs[3], mm3);
259 psraw_i2r(FILTER_BITS, mm0);
261 packuswb_r2r(mm7, mm0);
264 *(uint32_t *)dst = tmp.ud[0];
269 while (dst_width > 0) {
270 sum = s[0 * wrap] * filter[0] +
271 s[1 * wrap] * filter[1] +
272 s[2 * wrap] * filter[2] +
273 s[3 * wrap] * filter[3];
274 sum = sum >> FILTER_BITS;
286 #endif /* HAVE_MMX */
288 /* slow version to handle limit cases. Does not need optimization */
289 static void h_resample_slow(uint8_t *dst, int dst_width,
290 const uint8_t *src, int src_width,
291 int src_start, int src_incr, int16_t *filters)
293 int src_pos, phase, sum, j, v, i;
294 const uint8_t *s, *src_end;
297 src_end = src + src_width;
299 for(i=0;i<dst_width;i++) {
300 s = src + (src_pos >> POS_FRAC_BITS);
301 phase = get_phase(src_pos);
302 filter = filters + phase * NB_TAPS;
304 for(j=0;j<NB_TAPS;j++) {
307 else if (s >= src_end)
311 sum += v * filter[j];
314 sum = sum >> FILTER_BITS;
325 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
326 int src_width, int src_start, int src_incr,
332 n = (0 - src_start + src_incr - 1) / src_incr;
333 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
336 src_start += n * src_incr;
338 src_end = src_start + dst_width * src_incr;
339 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
340 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
346 if ((mm_flags & MM_MMX) && NB_TAPS == 4)
347 h_resample_fast4_mmx(dst, n,
348 src, src_width, src_start, src_incr, filters);
351 h_resample_fast(dst, n,
352 src, src_width, src_start, src_incr, filters);
356 src_start += n * src_incr;
357 h_resample_slow(dst, dst_width,
358 src, src_width, src_start, src_incr, filters);
362 static void component_resample(ImgReSampleContext *s,
363 uint8_t *output, int owrap, int owidth, int oheight,
364 uint8_t *input, int iwrap, int iwidth, int iheight)
366 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
367 uint8_t *new_line, *src_line;
369 last_src_y = - FCENTER - 1;
370 /* position of the bottom of the filter in the source image */
371 src_y = (last_src_y + NB_TAPS) * POS_FRAC;
372 ring_y = NB_TAPS; /* position in ring buffer */
373 for(y=0;y<oheight;y++) {
374 /* apply horizontal filter on new lines from input if needed */
375 src_y1 = src_y >> POS_FRAC_BITS;
376 while (last_src_y < src_y1) {
377 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
380 /* handle limit conditions : replicate line (slightly
381 inefficient because we filter multiple times) */
385 } else if (y1 >= iheight) {
388 src_line = input + y1 * iwrap;
389 new_line = s->line_buf + ring_y * owidth;
390 /* apply filter and handle limit cases correctly */
391 h_resample(new_line, owidth,
392 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
393 &s->h_filters[0][0]);
394 /* handle ring buffer wrapping */
395 if (ring_y >= LINE_BUF_HEIGHT) {
396 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
400 /* apply vertical filter */
401 phase_y = get_phase(src_y);
403 /* desactivated MMX because loss of precision */
404 if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
405 v_resample4_mmx(output, owidth,
406 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
407 &s->v_filters[phase_y][0]);
411 if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
412 v_resample16_altivec(output, owidth,
413 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
414 &s->v_filters[phase_y][0]);
417 v_resample(output, owidth,
418 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
419 &s->v_filters[phase_y][0]);
427 ImgReSampleContext *img_resample_init(int owidth, int oheight,
428 int iwidth, int iheight)
430 return img_resample_full_init(owidth, oheight, iwidth, iheight,
431 0, 0, 0, 0, 0, 0, 0, 0);
434 ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
435 int iwidth, int iheight,
436 int topBand, int bottomBand,
437 int leftBand, int rightBand,
438 int padtop, int padbottom,
439 int padleft, int padright)
441 ImgReSampleContext *s;
443 if (!owidth || !oheight || !iwidth || !iheight)
446 s = av_mallocz(sizeof(ImgReSampleContext));
449 if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
451 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
456 s->oheight = oheight;
458 s->iheight = iheight;
460 s->topBand = topBand;
461 s->bottomBand = bottomBand;
462 s->leftBand = leftBand;
463 s->rightBand = rightBand;
466 s->padbottom = padbottom;
467 s->padleft = padleft;
468 s->padright = padright;
470 s->pad_owidth = owidth - (padleft + padright);
471 s->pad_oheight = oheight - (padtop + padbottom);
473 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
474 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
476 av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth /
477 (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
478 av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
479 (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
487 void img_resample(ImgReSampleContext *s,
488 AVPicture *output, const AVPicture *input)
494 shift = (i == 0) ? 0 : 1;
496 optr = output->data[i] + (((output->linesize[i] *
497 s->padtop) + s->padleft) >> shift);
499 component_resample(s, optr, output->linesize[i],
500 s->pad_owidth >> shift, s->pad_oheight >> shift,
501 input->data[i] + (input->linesize[i] *
502 (s->topBand >> shift)) + (s->leftBand >> shift),
503 input->linesize[i], ((s->iwidth - s->leftBand -
504 s->rightBand) >> shift),
505 (s->iheight - s->topBand - s->bottomBand) >> shift);
509 void img_resample_close(ImgReSampleContext *s)
511 av_free(s->line_buf);
515 struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
516 int dstW, int dstH, int dstFormat,
517 int flags, SwsFilter *srcFilter,
518 SwsFilter *dstFilter, double *param)
520 struct SwsContext *ctx;
522 ctx = av_malloc(sizeof(struct SwsContext));
524 ctx->av_class = av_mallocz(sizeof(AVClass));
525 if (!ctx || !ctx->av_class) {
526 av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");
531 if ((srcH != dstH) || (srcW != dstW)) {
532 if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
533 av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
535 ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
537 ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
538 ctx->resampling_ctx->iheight = srcH;
539 ctx->resampling_ctx->iwidth = srcW;
540 ctx->resampling_ctx->oheight = dstH;
541 ctx->resampling_ctx->owidth = dstW;
543 ctx->src_pix_fmt = srcFormat;
544 ctx->dst_pix_fmt = dstFormat;
549 void sws_freeContext(struct SwsContext *ctx)
553 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
554 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
555 img_resample_close(ctx->resampling_ctx);
557 av_free(ctx->resampling_ctx);
559 av_free(ctx->av_class);
565 * Checks if context is valid or reallocs a new one instead.
566 * If context is NULL, just calls sws_getContext() to get a new one.
567 * Otherwise, checks if the parameters are the same already saved in context.
568 * If that is the case, returns the current context.
569 * Otherwise, frees context and gets a new one.
571 * Be warned that srcFilter, dstFilter are not checked, they are
572 * asumed to remain valid.
574 struct SwsContext *sws_getCachedContext(struct SwsContext *ctx,
575 int srcW, int srcH, int srcFormat,
576 int dstW, int dstH, int dstFormat, int flags,
577 SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
580 if ((ctx->resampling_ctx->iwidth != srcW) ||
581 (ctx->resampling_ctx->iheight != srcH) ||
582 (ctx->src_pix_fmt != srcFormat) ||
583 (ctx->resampling_ctx->owidth != dstW) ||
584 (ctx->resampling_ctx->oheight != dstH) ||
585 (ctx->dst_pix_fmt != dstFormat))
587 sws_freeContext(ctx);
592 return sws_getContext(srcW, srcH, srcFormat,
593 dstW, dstH, dstFormat, flags,
594 srcFilter, dstFilter, param);
599 int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
600 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
602 AVPicture src_pict, dst_pict;
604 AVPicture picture_format_temp;
605 AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
606 uint8_t *buf1 = NULL, *buf2 = NULL;
607 enum PixelFormat current_pix_fmt;
609 for (i = 0; i < 4; i++) {
610 src_pict.data[i] = src[i];
611 src_pict.linesize[i] = srcStride[i];
612 dst_pict.data[i] = dst[i];
613 dst_pict.linesize[i] = dstStride[i];
615 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
616 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
617 /* We have to rescale the picture, but only YUV420P rescaling is supported... */
619 if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
622 /* create temporary picture for rescaling input*/
623 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
624 buf1 = av_malloc(size);
629 formatted_picture = &picture_format_temp;
630 avpicture_fill((AVPicture*)formatted_picture, buf1,
631 PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
633 if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
634 &src_pict, ctx->src_pix_fmt,
635 ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {
637 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
642 formatted_picture = &src_pict;
645 if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
648 /* create temporary picture for rescaling output*/
649 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
650 buf2 = av_malloc(size);
655 resampled_picture = &picture_resample_temp;
656 avpicture_fill((AVPicture*)resampled_picture, buf2,
657 PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
660 resampled_picture = &dst_pict;
663 /* ...and finally rescale!!! */
664 img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
665 current_pix_fmt = PIX_FMT_YUV420P;
667 resampled_picture = &src_pict;
668 current_pix_fmt = ctx->src_pix_fmt;
671 if (current_pix_fmt != ctx->dst_pix_fmt) {
672 if (img_convert(&dst_pict, ctx->dst_pix_fmt,
673 resampled_picture, current_pix_fmt,
674 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {
676 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
681 } else if (resampled_picture != &dst_pict) {
682 av_picture_copy(&dst_pict, resampled_picture, current_pix_fmt,
683 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
700 uint8_t img[XSIZE * YSIZE];
705 uint8_t img1[XSIZE1 * YSIZE1];
706 uint8_t img2[XSIZE1 * YSIZE1];
708 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
712 f=fopen(filename,"w");
713 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
714 fwrite(img,1, xsize * ysize,f);
716 #define fprintf please_use_av_log
719 static void dump_filter(int16_t *filter)
723 for(ph=0;ph<NB_PHASES;ph++) {
724 av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
725 for(i=0;i<NB_TAPS;i++) {
726 av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
728 av_log(NULL, AV_LOG_INFO, "\n");
736 int main(int argc, char **argv)
738 int x, y, v, i, xsize, ysize;
739 ImgReSampleContext *s;
740 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
743 /* build test image */
744 for(y=0;y<YSIZE;y++) {
745 for(x=0;x<XSIZE;x++) {
746 if (x < XSIZE/2 && y < YSIZE/2) {
747 if (x < XSIZE/4 && y < YSIZE/4) {
753 } else if (x < XSIZE/4) {
758 } else if (y < XSIZE/4) {
770 if (((x+3) % 4) <= 1 &&
777 } else if (x < XSIZE/2) {
778 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
779 } else if (y < XSIZE/2) {
780 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
782 v = ((x + y - XSIZE) * 255) / XSIZE;
784 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
787 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
788 for(i=0;i<sizeof(factors)/sizeof(float);i++) {
790 xsize = (int)(XSIZE * fact);
791 ysize = (int)((YSIZE - 100) * fact);
792 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
793 av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
794 dump_filter(&s->h_filters[0][0]);
795 component_resample(s, img1, xsize, xsize, ysize,
796 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
797 img_resample_close(s);
799 snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
800 save_pgm(buf, img1, xsize, ysize);
805 av_log(NULL, AV_LOG_INFO, "MMX test\n");
807 xsize = (int)(XSIZE * fact);
808 ysize = (int)(YSIZE * fact);
810 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
811 component_resample(s, img1, xsize, xsize, ysize,
812 img, XSIZE, XSIZE, YSIZE);
815 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
816 component_resample(s, img2, xsize, xsize, ysize,
817 img, XSIZE, XSIZE, YSIZE);
818 if (memcmp(img1, img2, xsize * ysize) != 0) {
819 av_log(NULL, AV_LOG_ERROR, "mmx error\n");
822 av_log(NULL, AV_LOG_INFO, "MMX OK\n");
823 #endif /* HAVE_MMX */