2 * High quality image resampling with polyphase filters
3 * Copyright (c) 2001 Fabrice Bellard.
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * High quality image resampling with polyphase filters .
29 #include "imgconvert.h"
30 #include "libswscale/swscale.h"
33 #include "ppc/imgresample_altivec.h"
36 #define NB_COMPONENTS 3
39 #define NB_PHASES (1 << PHASE_BITS)
41 #define FCENTER 1 /* index of the center of the filter */
42 //#define TEST 1 /* Test it */
44 #define POS_FRAC_BITS 16
45 #define POS_FRAC (1 << POS_FRAC_BITS)
46 /* 6 bits precision is needed for MMX */
49 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
52 const AVClass *av_class;
53 struct ImgReSampleContext *resampling_ctx;
54 enum PixelFormat src_pix_fmt, dst_pix_fmt;
57 typedef struct ImgReSampleContext {
58 int iwidth, iheight, owidth, oheight;
59 int topBand, bottomBand, leftBand, rightBand;
60 int padtop, padbottom, padleft, padright;
61 int pad_owidth, pad_oheight;
63 DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
64 DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
68 void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
70 static inline int get_phase(int pos)
72 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
75 /* This function must be optimized */
76 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
77 int src_width, int src_start, int src_incr,
80 int src_pos, phase, sum, i;
85 for(i=0;i<dst_width;i++) {
88 if ((src_pos >> POS_FRAC_BITS) < 0 ||
89 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
92 s = src + (src_pos >> POS_FRAC_BITS);
93 phase = get_phase(src_pos);
94 filter = filters + phase * NB_TAPS;
96 sum = s[0] * filter[0] +
104 for(j=0;j<NB_TAPS;j++)
105 sum += s[j] * filter[j];
108 sum = sum >> FILTER_BITS;
119 /* This function must be optimized */
120 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
121 int wrap, int16_t *filter)
127 for(i=0;i<dst_width;i++) {
129 sum = s[0 * wrap] * filter[0] +
130 s[1 * wrap] * filter[1] +
131 s[2 * wrap] * filter[2] +
132 s[3 * wrap] * filter[3];
139 for(j=0;j<NB_TAPS;j++) {
140 sum += s1[0] * filter[j];
145 sum = sum >> FILTER_BITS;
160 #define FILTER4(reg) \
162 s = src + (src_pos >> POS_FRAC_BITS);\
163 phase = get_phase(src_pos);\
164 filter = filters + phase * NB_TAPS;\
166 punpcklbw_r2r(mm7, reg);\
167 movq_m2r(*filter, mm6);\
168 pmaddwd_r2r(reg, mm6);\
171 paddd_r2r(mm6, reg);\
172 psrad_i2r(FILTER_BITS, reg);\
173 src_pos += src_incr;\
176 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016"PRIx64"\n", tmp.uq);
178 /* XXX: do four pixels at a time */
179 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
180 const uint8_t *src, int src_width,
181 int src_start, int src_incr, int16_t *filters)
191 while (dst_width >= 4) {
198 packuswb_r2r(mm7, mm0);
199 packuswb_r2r(mm7, mm1);
200 packuswb_r2r(mm7, mm3);
201 packuswb_r2r(mm7, mm2);
213 while (dst_width > 0) {
215 packuswb_r2r(mm7, mm0);
224 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
225 int wrap, int16_t *filter)
234 coefs[i] = (tmp<<48) + (tmp<<32) + (tmp<<16) + tmp;
239 while (dst_width >= 4) {
240 movq_m2r(s[0 * wrap], mm0);
241 punpcklbw_r2r(mm7, mm0);
242 movq_m2r(s[1 * wrap], mm1);
243 punpcklbw_r2r(mm7, mm1);
244 movq_m2r(s[2 * wrap], mm2);
245 punpcklbw_r2r(mm7, mm2);
246 movq_m2r(s[3 * wrap], mm3);
247 punpcklbw_r2r(mm7, mm3);
249 pmullw_m2r(coefs[0], mm0);
250 pmullw_m2r(coefs[1], mm1);
251 pmullw_m2r(coefs[2], mm2);
252 pmullw_m2r(coefs[3], mm3);
257 psraw_i2r(FILTER_BITS, mm0);
259 packuswb_r2r(mm7, mm0);
262 *(uint32_t *)dst = tmp & 0xFFFFFFFF;
267 while (dst_width > 0) {
268 sum = s[0 * wrap] * filter[0] +
269 s[1 * wrap] * filter[1] +
270 s[2 * wrap] * filter[2] +
271 s[3 * wrap] * filter[3];
272 sum = sum >> FILTER_BITS;
284 #endif /* HAVE_MMX */
286 /* slow version to handle limit cases. Does not need optimization */
287 static void h_resample_slow(uint8_t *dst, int dst_width,
288 const uint8_t *src, int src_width,
289 int src_start, int src_incr, int16_t *filters)
291 int src_pos, phase, sum, j, v, i;
292 const uint8_t *s, *src_end;
295 src_end = src + src_width;
297 for(i=0;i<dst_width;i++) {
298 s = src + (src_pos >> POS_FRAC_BITS);
299 phase = get_phase(src_pos);
300 filter = filters + phase * NB_TAPS;
302 for(j=0;j<NB_TAPS;j++) {
305 else if (s >= src_end)
309 sum += v * filter[j];
312 sum = sum >> FILTER_BITS;
323 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
324 int src_width, int src_start, int src_incr,
330 n = (0 - src_start + src_incr - 1) / src_incr;
331 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
334 src_start += n * src_incr;
336 src_end = src_start + dst_width * src_incr;
337 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
338 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
344 if ((mm_flags & FF_MM_MMX) && NB_TAPS == 4)
345 h_resample_fast4_mmx(dst, n,
346 src, src_width, src_start, src_incr, filters);
349 h_resample_fast(dst, n,
350 src, src_width, src_start, src_incr, filters);
354 src_start += n * src_incr;
355 h_resample_slow(dst, dst_width,
356 src, src_width, src_start, src_incr, filters);
360 static void component_resample(ImgReSampleContext *s,
361 uint8_t *output, int owrap, int owidth, int oheight,
362 uint8_t *input, int iwrap, int iwidth, int iheight)
364 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
365 uint8_t *new_line, *src_line;
367 last_src_y = - FCENTER - 1;
368 /* position of the bottom of the filter in the source image */
369 src_y = (last_src_y + NB_TAPS) * POS_FRAC;
370 ring_y = NB_TAPS; /* position in ring buffer */
371 for(y=0;y<oheight;y++) {
372 /* apply horizontal filter on new lines from input if needed */
373 src_y1 = src_y >> POS_FRAC_BITS;
374 while (last_src_y < src_y1) {
375 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
378 /* handle limit conditions : replicate line (slightly
379 inefficient because we filter multiple times) */
383 } else if (y1 >= iheight) {
386 src_line = input + y1 * iwrap;
387 new_line = s->line_buf + ring_y * owidth;
388 /* apply filter and handle limit cases correctly */
389 h_resample(new_line, owidth,
390 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
391 &s->h_filters[0][0]);
392 /* handle ring buffer wrapping */
393 if (ring_y >= LINE_BUF_HEIGHT) {
394 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
398 /* apply vertical filter */
399 phase_y = get_phase(src_y);
401 /* desactivated MMX because loss of precision */
402 if ((mm_flags & FF_MM_MMX) && NB_TAPS == 4 && 0)
403 v_resample4_mmx(output, owidth,
404 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
405 &s->v_filters[phase_y][0]);
409 if ((mm_flags & FF_MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
410 v_resample16_altivec(output, owidth,
411 s->line_buf + (ring_y - NB_TAPS + 1) * owidth,
412 owidth, &s->v_filters[phase_y][0]);
415 v_resample(output, owidth,
416 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
417 &s->v_filters[phase_y][0]);
425 ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
426 int iwidth, int iheight,
427 int topBand, int bottomBand,
428 int leftBand, int rightBand,
429 int padtop, int padbottom,
430 int padleft, int padright)
432 ImgReSampleContext *s;
434 if (!owidth || !oheight || !iwidth || !iheight)
437 s = av_mallocz(sizeof(ImgReSampleContext));
440 if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
442 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
447 s->oheight = oheight;
449 s->iheight = iheight;
451 s->topBand = topBand;
452 s->bottomBand = bottomBand;
453 s->leftBand = leftBand;
454 s->rightBand = rightBand;
457 s->padbottom = padbottom;
458 s->padleft = padleft;
459 s->padright = padright;
461 s->pad_owidth = owidth - (padleft + padright);
462 s->pad_oheight = oheight - (padtop + padbottom);
464 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
465 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
467 av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth /
468 (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
469 av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
470 (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
478 ImgReSampleContext *img_resample_init(int owidth, int oheight,
479 int iwidth, int iheight)
481 return img_resample_full_init(owidth, oheight, iwidth, iheight,
482 0, 0, 0, 0, 0, 0, 0, 0);
485 void img_resample(ImgReSampleContext *s,
486 AVPicture *output, const AVPicture *input)
492 shift = (i == 0) ? 0 : 1;
494 optr = output->data[i] + (((output->linesize[i] *
495 s->padtop) + s->padleft) >> shift);
497 component_resample(s, optr, output->linesize[i],
498 s->pad_owidth >> shift, s->pad_oheight >> shift,
499 input->data[i] + (input->linesize[i] *
500 (s->topBand >> shift)) + (s->leftBand >> shift),
501 input->linesize[i], ((s->iwidth - s->leftBand -
502 s->rightBand) >> shift),
503 (s->iheight - s->topBand - s->bottomBand) >> shift);
507 void img_resample_close(ImgReSampleContext *s)
509 av_free(s->line_buf);
513 static const char *context_to_name(void* ptr)
518 static const AVClass context_class = { "imgresample", context_to_name, NULL };
520 struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
521 int dstW, int dstH, int dstFormat,
522 int flags, SwsFilter *srcFilter,
523 SwsFilter *dstFilter, double *param)
525 struct SwsContext *ctx;
527 ctx = av_malloc(sizeof(struct SwsContext));
529 av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");
533 ctx->av_class = &context_class;
535 if ((srcH != dstH) || (srcW != dstW)) {
536 if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
537 av_log(ctx, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
539 ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
541 ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
542 ctx->resampling_ctx->iheight = srcH;
543 ctx->resampling_ctx->iwidth = srcW;
544 ctx->resampling_ctx->oheight = dstH;
545 ctx->resampling_ctx->owidth = dstW;
547 ctx->src_pix_fmt = srcFormat;
548 ctx->dst_pix_fmt = dstFormat;
553 void sws_freeContext(struct SwsContext *ctx)
557 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
558 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
559 img_resample_close(ctx->resampling_ctx);
561 av_free(ctx->resampling_ctx);
568 * Checks if context is valid or reallocs a new one instead.
569 * If context is NULL, just calls sws_getContext() to get a new one.
570 * Otherwise, checks if the parameters are the same already saved in context.
571 * If that is the case, returns the current context.
572 * Otherwise, frees context and gets a new one.
574 * Be warned that srcFilter, dstFilter are not checked, they are
575 * asumed to remain valid.
577 struct SwsContext *sws_getCachedContext(struct SwsContext *ctx,
578 int srcW, int srcH, int srcFormat,
579 int dstW, int dstH, int dstFormat, int flags,
580 SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
583 if ((ctx->resampling_ctx->iwidth != srcW) ||
584 (ctx->resampling_ctx->iheight != srcH) ||
585 (ctx->src_pix_fmt != srcFormat) ||
586 (ctx->resampling_ctx->owidth != dstW) ||
587 (ctx->resampling_ctx->oheight != dstH) ||
588 (ctx->dst_pix_fmt != dstFormat))
590 sws_freeContext(ctx);
595 return sws_getContext(srcW, srcH, srcFormat,
596 dstW, dstH, dstFormat, flags,
597 srcFilter, dstFilter, param);
602 int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
603 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
605 AVPicture src_pict, dst_pict;
607 AVPicture picture_format_temp;
608 AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
609 uint8_t *buf1 = NULL, *buf2 = NULL;
610 enum PixelFormat current_pix_fmt;
612 for (i = 0; i < 4; i++) {
613 src_pict.data[i] = src[i];
614 src_pict.linesize[i] = srcStride[i];
615 dst_pict.data[i] = dst[i];
616 dst_pict.linesize[i] = dstStride[i];
618 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
619 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
620 /* We have to rescale the picture, but only YUV420P rescaling is supported... */
622 if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
625 /* create temporary picture for rescaling input*/
626 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
627 buf1 = av_malloc(size);
632 formatted_picture = &picture_format_temp;
633 avpicture_fill((AVPicture*)formatted_picture, buf1,
634 PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
636 if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
637 &src_pict, ctx->src_pix_fmt,
638 ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {
640 av_log(ctx, AV_LOG_ERROR, "pixel format conversion not handled\n");
645 formatted_picture = &src_pict;
648 if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
651 /* create temporary picture for rescaling output*/
652 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
653 buf2 = av_malloc(size);
658 resampled_picture = &picture_resample_temp;
659 avpicture_fill((AVPicture*)resampled_picture, buf2,
660 PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
663 resampled_picture = &dst_pict;
666 /* ...and finally rescale!!! */
667 img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
668 current_pix_fmt = PIX_FMT_YUV420P;
670 resampled_picture = &src_pict;
671 current_pix_fmt = ctx->src_pix_fmt;
674 if (current_pix_fmt != ctx->dst_pix_fmt) {
675 if (img_convert(&dst_pict, ctx->dst_pix_fmt,
676 resampled_picture, current_pix_fmt,
677 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {
679 av_log(ctx, AV_LOG_ERROR, "pixel format conversion not handled\n");
684 } else if (resampled_picture != &dst_pict) {
685 av_picture_copy(&dst_pict, resampled_picture, current_pix_fmt,
686 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
703 uint8_t img[XSIZE * YSIZE];
708 uint8_t img1[XSIZE1 * YSIZE1];
709 uint8_t img2[XSIZE1 * YSIZE1];
711 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
715 f=fopen(filename,"w");
716 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
717 fwrite(img,1, xsize * ysize,f);
719 #define fprintf please_use_av_log
722 static void dump_filter(int16_t *filter)
726 for(ph=0;ph<NB_PHASES;ph++) {
727 av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
728 for(i=0;i<NB_TAPS;i++) {
729 av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
731 av_log(NULL, AV_LOG_INFO, "\n");
739 int main(int argc, char **argv)
741 int x, y, v, i, xsize, ysize;
742 ImgReSampleContext *s;
743 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
746 /* build test image */
747 for(y=0;y<YSIZE;y++) {
748 for(x=0;x<XSIZE;x++) {
749 if (x < XSIZE/2 && y < YSIZE/2) {
750 if (x < XSIZE/4 && y < YSIZE/4) {
756 } else if (x < XSIZE/4) {
761 } else if (y < XSIZE/4) {
773 if (((x+3) % 4) <= 1 &&
780 } else if (x < XSIZE/2) {
781 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
782 } else if (y < XSIZE/2) {
783 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
785 v = ((x + y - XSIZE) * 255) / XSIZE;
787 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
790 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
791 for(i=0;i<FF_ARRAY_ELEMS(factors);i++) {
793 xsize = (int)(XSIZE * fact);
794 ysize = (int)((YSIZE - 100) * fact);
795 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
796 av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
797 dump_filter(&s->h_filters[0][0]);
798 component_resample(s, img1, xsize, xsize, ysize,
799 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
800 img_resample_close(s);
802 snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
803 save_pgm(buf, img1, xsize, ysize);
808 av_log(NULL, AV_LOG_INFO, "MMX test\n");
810 xsize = (int)(XSIZE * fact);
811 ysize = (int)(YSIZE * fact);
812 mm_flags = FF_MM_MMX;
813 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
814 component_resample(s, img1, xsize, xsize, ysize,
815 img, XSIZE, XSIZE, YSIZE);
818 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
819 component_resample(s, img2, xsize, xsize, ysize,
820 img, XSIZE, XSIZE, YSIZE);
821 if (memcmp(img1, img2, xsize * ysize) != 0) {
822 av_log(NULL, AV_LOG_ERROR, "mmx error\n");
825 av_log(NULL, AV_LOG_INFO, "MMX OK\n");
826 #endif /* HAVE_MMX */