2 * High quality image resampling with polyphase filters
3 * Copyright (c) 2001 Fabrice Bellard
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/imgresample.c
24 * High quality image resampling with polyphase filters .
26 * WARNING: This file is deprecated and will be removed after FFmpeg 0.5
27 * release, do not lose your time improving it!
32 #include "imgconvert.h"
33 #include "libswscale/swscale.h"
36 #include "ppc/imgresample_altivec.h"
39 #define NB_COMPONENTS 3
42 #define NB_PHASES (1 << PHASE_BITS)
44 #define FCENTER 1 /* index of the center of the filter */
45 //#define TEST 1 /* Test it */
47 #define POS_FRAC_BITS 16
48 #define POS_FRAC (1 << POS_FRAC_BITS)
49 /* 6 bits precision is needed for MMX */
52 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
55 const AVClass *av_class;
56 struct ImgReSampleContext *resampling_ctx;
57 enum PixelFormat src_pix_fmt, dst_pix_fmt;
60 typedef struct ImgReSampleContext {
61 int iwidth, iheight, owidth, oheight;
62 int topBand, bottomBand, leftBand, rightBand;
63 int padtop, padbottom, padleft, padright;
64 int pad_owidth, pad_oheight;
66 DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
67 DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
71 void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
73 static inline int get_phase(int pos)
75 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
78 /* This function must be optimized */
79 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
80 int src_width, int src_start, int src_incr,
83 int src_pos, phase, sum, i;
88 for(i=0;i<dst_width;i++) {
91 if ((src_pos >> POS_FRAC_BITS) < 0 ||
92 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
95 s = src + (src_pos >> POS_FRAC_BITS);
96 phase = get_phase(src_pos);
97 filter = filters + phase * NB_TAPS;
99 sum = s[0] * filter[0] +
107 for(j=0;j<NB_TAPS;j++)
108 sum += s[j] * filter[j];
111 sum = sum >> FILTER_BITS;
122 /* This function must be optimized */
123 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
124 int wrap, int16_t *filter)
130 for(i=0;i<dst_width;i++) {
132 sum = s[0 * wrap] * filter[0] +
133 s[1 * wrap] * filter[1] +
134 s[2 * wrap] * filter[2] +
135 s[3 * wrap] * filter[3];
142 for(j=0;j<NB_TAPS;j++) {
143 sum += s1[0] * filter[j];
148 sum = sum >> FILTER_BITS;
163 #define FILTER4(reg) \
165 s = src + (src_pos >> POS_FRAC_BITS);\
166 phase = get_phase(src_pos);\
167 filter = filters + phase * NB_TAPS;\
169 punpcklbw_r2r(mm7, reg);\
170 movq_m2r(*filter, mm6);\
171 pmaddwd_r2r(reg, mm6);\
174 paddd_r2r(mm6, reg);\
175 psrad_i2r(FILTER_BITS, reg);\
176 src_pos += src_incr;\
179 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016"PRIx64"\n", tmp.uq);
181 /* XXX: do four pixels at a time */
182 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
183 const uint8_t *src, int src_width,
184 int src_start, int src_incr, int16_t *filters)
194 while (dst_width >= 4) {
201 packuswb_r2r(mm7, mm0);
202 packuswb_r2r(mm7, mm1);
203 packuswb_r2r(mm7, mm3);
204 packuswb_r2r(mm7, mm2);
216 while (dst_width > 0) {
218 packuswb_r2r(mm7, mm0);
227 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
228 int wrap, int16_t *filter)
237 coefs[i] = (tmp<<48) + (tmp<<32) + (tmp<<16) + tmp;
242 while (dst_width >= 4) {
243 movq_m2r(s[0 * wrap], mm0);
244 punpcklbw_r2r(mm7, mm0);
245 movq_m2r(s[1 * wrap], mm1);
246 punpcklbw_r2r(mm7, mm1);
247 movq_m2r(s[2 * wrap], mm2);
248 punpcklbw_r2r(mm7, mm2);
249 movq_m2r(s[3 * wrap], mm3);
250 punpcklbw_r2r(mm7, mm3);
252 pmullw_m2r(coefs[0], mm0);
253 pmullw_m2r(coefs[1], mm1);
254 pmullw_m2r(coefs[2], mm2);
255 pmullw_m2r(coefs[3], mm3);
260 psraw_i2r(FILTER_BITS, mm0);
262 packuswb_r2r(mm7, mm0);
265 *(uint32_t *)dst = tmp & 0xFFFFFFFF;
270 while (dst_width > 0) {
271 sum = s[0 * wrap] * filter[0] +
272 s[1 * wrap] * filter[1] +
273 s[2 * wrap] * filter[2] +
274 s[3 * wrap] * filter[3];
275 sum = sum >> FILTER_BITS;
287 #endif /* HAVE_MMX */
289 /* slow version to handle limit cases. Does not need optimization */
290 static void h_resample_slow(uint8_t *dst, int dst_width,
291 const uint8_t *src, int src_width,
292 int src_start, int src_incr, int16_t *filters)
294 int src_pos, phase, sum, j, v, i;
295 const uint8_t *s, *src_end;
298 src_end = src + src_width;
300 for(i=0;i<dst_width;i++) {
301 s = src + (src_pos >> POS_FRAC_BITS);
302 phase = get_phase(src_pos);
303 filter = filters + phase * NB_TAPS;
305 for(j=0;j<NB_TAPS;j++) {
308 else if (s >= src_end)
312 sum += v * filter[j];
315 sum = sum >> FILTER_BITS;
326 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
327 int src_width, int src_start, int src_incr,
333 n = (0 - src_start + src_incr - 1) / src_incr;
334 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
337 src_start += n * src_incr;
339 src_end = src_start + dst_width * src_incr;
340 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
341 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
347 if ((mm_flags & FF_MM_MMX) && NB_TAPS == 4)
348 h_resample_fast4_mmx(dst, n,
349 src, src_width, src_start, src_incr, filters);
352 h_resample_fast(dst, n,
353 src, src_width, src_start, src_incr, filters);
357 src_start += n * src_incr;
358 h_resample_slow(dst, dst_width,
359 src, src_width, src_start, src_incr, filters);
363 static void component_resample(ImgReSampleContext *s,
364 uint8_t *output, int owrap, int owidth, int oheight,
365 uint8_t *input, int iwrap, int iwidth, int iheight)
367 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
368 uint8_t *new_line, *src_line;
370 last_src_y = - FCENTER - 1;
371 /* position of the bottom of the filter in the source image */
372 src_y = (last_src_y + NB_TAPS) * POS_FRAC;
373 ring_y = NB_TAPS; /* position in ring buffer */
374 for(y=0;y<oheight;y++) {
375 /* apply horizontal filter on new lines from input if needed */
376 src_y1 = src_y >> POS_FRAC_BITS;
377 while (last_src_y < src_y1) {
378 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
381 /* handle limit conditions : replicate line (slightly
382 inefficient because we filter multiple times) */
386 } else if (y1 >= iheight) {
389 src_line = input + y1 * iwrap;
390 new_line = s->line_buf + ring_y * owidth;
391 /* apply filter and handle limit cases correctly */
392 h_resample(new_line, owidth,
393 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
394 &s->h_filters[0][0]);
395 /* handle ring buffer wrapping */
396 if (ring_y >= LINE_BUF_HEIGHT) {
397 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
401 /* apply vertical filter */
402 phase_y = get_phase(src_y);
404 /* desactivated MMX because loss of precision */
405 if ((mm_flags & FF_MM_MMX) && NB_TAPS == 4 && 0)
406 v_resample4_mmx(output, owidth,
407 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
408 &s->v_filters[phase_y][0]);
412 if ((mm_flags & FF_MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
413 v_resample16_altivec(output, owidth,
414 s->line_buf + (ring_y - NB_TAPS + 1) * owidth,
415 owidth, &s->v_filters[phase_y][0]);
418 v_resample(output, owidth,
419 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
420 &s->v_filters[phase_y][0]);
428 ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
429 int iwidth, int iheight,
430 int topBand, int bottomBand,
431 int leftBand, int rightBand,
432 int padtop, int padbottom,
433 int padleft, int padright)
435 ImgReSampleContext *s;
437 if (!owidth || !oheight || !iwidth || !iheight)
440 s = av_mallocz(sizeof(ImgReSampleContext));
443 if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
445 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
450 s->oheight = oheight;
452 s->iheight = iheight;
454 s->topBand = topBand;
455 s->bottomBand = bottomBand;
456 s->leftBand = leftBand;
457 s->rightBand = rightBand;
460 s->padbottom = padbottom;
461 s->padleft = padleft;
462 s->padright = padright;
464 s->pad_owidth = owidth - (padleft + padright);
465 s->pad_oheight = oheight - (padtop + padbottom);
467 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
468 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
470 av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth /
471 (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
472 av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
473 (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
481 ImgReSampleContext *img_resample_init(int owidth, int oheight,
482 int iwidth, int iheight)
484 return img_resample_full_init(owidth, oheight, iwidth, iheight,
485 0, 0, 0, 0, 0, 0, 0, 0);
488 void img_resample(ImgReSampleContext *s,
489 AVPicture *output, const AVPicture *input)
495 shift = (i == 0) ? 0 : 1;
497 optr = output->data[i] + (((output->linesize[i] *
498 s->padtop) + s->padleft) >> shift);
500 component_resample(s, optr, output->linesize[i],
501 s->pad_owidth >> shift, s->pad_oheight >> shift,
502 input->data[i] + (input->linesize[i] *
503 (s->topBand >> shift)) + (s->leftBand >> shift),
504 input->linesize[i], ((s->iwidth - s->leftBand -
505 s->rightBand) >> shift),
506 (s->iheight - s->topBand - s->bottomBand) >> shift);
510 void img_resample_close(ImgReSampleContext *s)
512 av_free(s->line_buf);
516 static const char *context_to_name(void* ptr)
521 static const AVClass context_class = { "imgresample", context_to_name, NULL };
523 struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
524 int dstW, int dstH, int dstFormat,
525 int flags, SwsFilter *srcFilter,
526 SwsFilter *dstFilter, double *param)
528 struct SwsContext *ctx;
530 ctx = av_malloc(sizeof(struct SwsContext));
532 av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");
536 ctx->av_class = &context_class;
538 if ((srcH != dstH) || (srcW != dstW)) {
539 if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
540 av_log(ctx, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
542 ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
544 ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
545 ctx->resampling_ctx->iheight = srcH;
546 ctx->resampling_ctx->iwidth = srcW;
547 ctx->resampling_ctx->oheight = dstH;
548 ctx->resampling_ctx->owidth = dstW;
550 ctx->src_pix_fmt = srcFormat;
551 ctx->dst_pix_fmt = dstFormat;
556 void sws_freeContext(struct SwsContext *ctx)
560 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
561 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
562 img_resample_close(ctx->resampling_ctx);
564 av_free(ctx->resampling_ctx);
571 * Checks if context is valid or reallocs a new one instead.
572 * If context is NULL, just calls sws_getContext() to get a new one.
573 * Otherwise, checks if the parameters are the same already saved in context.
574 * If that is the case, returns the current context.
575 * Otherwise, frees context and gets a new one.
577 * Be warned that srcFilter, dstFilter are not checked, they are
578 * asumed to remain valid.
580 struct SwsContext *sws_getCachedContext(struct SwsContext *ctx,
581 int srcW, int srcH, int srcFormat,
582 int dstW, int dstH, int dstFormat, int flags,
583 SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
586 if ((ctx->resampling_ctx->iwidth != srcW) ||
587 (ctx->resampling_ctx->iheight != srcH) ||
588 (ctx->src_pix_fmt != srcFormat) ||
589 (ctx->resampling_ctx->owidth != dstW) ||
590 (ctx->resampling_ctx->oheight != dstH) ||
591 (ctx->dst_pix_fmt != dstFormat))
593 sws_freeContext(ctx);
598 return sws_getContext(srcW, srcH, srcFormat,
599 dstW, dstH, dstFormat, flags,
600 srcFilter, dstFilter, param);
605 int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
606 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
608 AVPicture src_pict, dst_pict;
610 AVPicture picture_format_temp;
611 AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
612 uint8_t *buf1 = NULL, *buf2 = NULL;
613 enum PixelFormat current_pix_fmt;
615 for (i = 0; i < 4; i++) {
616 src_pict.data[i] = src[i];
617 src_pict.linesize[i] = srcStride[i];
618 dst_pict.data[i] = dst[i];
619 dst_pict.linesize[i] = dstStride[i];
621 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
622 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
623 /* We have to rescale the picture, but only YUV420P rescaling is supported... */
625 if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
628 /* create temporary picture for rescaling input*/
629 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
630 buf1 = av_malloc(size);
635 formatted_picture = &picture_format_temp;
636 avpicture_fill((AVPicture*)formatted_picture, buf1,
637 PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
639 if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
640 &src_pict, ctx->src_pix_fmt,
641 ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {
643 av_log(ctx, AV_LOG_ERROR, "pixel format conversion not handled\n");
648 formatted_picture = &src_pict;
651 if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
654 /* create temporary picture for rescaling output*/
655 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
656 buf2 = av_malloc(size);
661 resampled_picture = &picture_resample_temp;
662 avpicture_fill((AVPicture*)resampled_picture, buf2,
663 PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
666 resampled_picture = &dst_pict;
669 /* ...and finally rescale!!! */
670 img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
671 current_pix_fmt = PIX_FMT_YUV420P;
673 resampled_picture = &src_pict;
674 current_pix_fmt = ctx->src_pix_fmt;
677 if (current_pix_fmt != ctx->dst_pix_fmt) {
678 if (img_convert(&dst_pict, ctx->dst_pix_fmt,
679 resampled_picture, current_pix_fmt,
680 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {
682 av_log(ctx, AV_LOG_ERROR, "pixel format conversion not handled\n");
687 } else if (resampled_picture != &dst_pict) {
688 av_picture_copy(&dst_pict, resampled_picture, current_pix_fmt,
689 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
706 uint8_t img[XSIZE * YSIZE];
711 uint8_t img1[XSIZE1 * YSIZE1];
712 uint8_t img2[XSIZE1 * YSIZE1];
714 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
718 f=fopen(filename,"w");
719 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
720 fwrite(img,1, xsize * ysize,f);
722 #define fprintf please_use_av_log
725 static void dump_filter(int16_t *filter)
729 for(ph=0;ph<NB_PHASES;ph++) {
730 av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
731 for(i=0;i<NB_TAPS;i++) {
732 av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
734 av_log(NULL, AV_LOG_INFO, "\n");
742 int main(int argc, char **argv)
744 int x, y, v, i, xsize, ysize;
745 ImgReSampleContext *s;
746 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
749 /* build test image */
750 for(y=0;y<YSIZE;y++) {
751 for(x=0;x<XSIZE;x++) {
752 if (x < XSIZE/2 && y < YSIZE/2) {
753 if (x < XSIZE/4 && y < YSIZE/4) {
759 } else if (x < XSIZE/4) {
764 } else if (y < XSIZE/4) {
776 if (((x+3) % 4) <= 1 &&
783 } else if (x < XSIZE/2) {
784 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
785 } else if (y < XSIZE/2) {
786 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
788 v = ((x + y - XSIZE) * 255) / XSIZE;
790 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
793 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
794 for(i=0;i<FF_ARRAY_ELEMS(factors);i++) {
796 xsize = (int)(XSIZE * fact);
797 ysize = (int)((YSIZE - 100) * fact);
798 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
799 av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
800 dump_filter(&s->h_filters[0][0]);
801 component_resample(s, img1, xsize, xsize, ysize,
802 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
803 img_resample_close(s);
805 snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
806 save_pgm(buf, img1, xsize, ysize);
811 av_log(NULL, AV_LOG_INFO, "MMX test\n");
813 xsize = (int)(XSIZE * fact);
814 ysize = (int)(YSIZE * fact);
815 mm_flags = FF_MM_MMX;
816 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
817 component_resample(s, img1, xsize, xsize, ysize,
818 img, XSIZE, XSIZE, YSIZE);
821 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
822 component_resample(s, img2, xsize, xsize, ysize,
823 img, XSIZE, XSIZE, YSIZE);
824 if (memcmp(img1, img2, xsize * ysize) != 0) {
825 av_log(NULL, AV_LOG_ERROR, "mmx error\n");
828 av_log(NULL, AV_LOG_INFO, "MMX OK\n");
829 #endif /* HAVE_MMX */