2 * High quality image resampling with polyphase filters
3 * Copyright (c) 2001 Gerard Lantau.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 #include "fastmemcpy.h"
31 #define NB_COMPONENTS 3
34 #define NB_PHASES (1 << PHASE_BITS)
36 #define FCENTER 1 /* index of the center of the filter */
38 #define POS_FRAC_BITS 16
39 #define POS_FRAC (1 << POS_FRAC_BITS)
40 /* 6 bits precision is needed for MMX */
43 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
45 struct ImgReSampleContext {
46 int iwidth, iheight, owidth, oheight;
48 INT16 h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
49 INT16 v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
53 static inline int get_phase(int pos)
55 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
58 /* This function must be optimized */
59 static void h_resample_fast(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
60 int src_start, int src_incr, INT16 *filters)
62 int src_pos, phase, sum, i;
67 for(i=0;i<dst_width;i++) {
70 if ((src_pos >> POS_FRAC_BITS) < 0 ||
71 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
74 s = src + (src_pos >> POS_FRAC_BITS);
75 phase = get_phase(src_pos);
76 filter = filters + phase * NB_TAPS;
78 sum = s[0] * filter[0] +
86 for(j=0;j<NB_TAPS;j++)
87 sum += s[j] * filter[j];
90 sum = sum >> FILTER_BITS;
101 /* This function must be optimized */
102 static void v_resample(UINT8 *dst, int dst_width, UINT8 *src, int wrap,
109 for(i=0;i<dst_width;i++) {
111 sum = s[0 * wrap] * filter[0] +
112 s[1 * wrap] * filter[1] +
113 s[2 * wrap] * filter[2] +
114 s[3 * wrap] * filter[3];
121 for(j=0;j<NB_TAPS;j++) {
122 sum += s1[0] * filter[j];
127 sum = sum >> FILTER_BITS;
140 #include "i386/mmx.h"
142 #define FILTER4(reg) \
144 s = src + (src_pos >> POS_FRAC_BITS);\
145 phase = get_phase(src_pos);\
146 filter = filters + phase * NB_TAPS;\
148 punpcklbw_r2r(mm7, reg);\
149 movq_m2r(*filter, mm6);\
150 pmaddwd_r2r(reg, mm6);\
153 paddd_r2r(mm6, reg);\
154 psrad_i2r(FILTER_BITS, reg);\
155 src_pos += src_incr;\
158 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
160 /* XXX: do four pixels at a time */
161 static void h_resample_fast4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
162 int src_start, int src_incr, INT16 *filters)
172 while (dst_width >= 4) {
179 packuswb_r2r(mm7, mm0);
180 packuswb_r2r(mm7, mm1);
181 packuswb_r2r(mm7, mm3);
182 packuswb_r2r(mm7, mm2);
194 while (dst_width > 0) {
196 packuswb_r2r(mm7, mm0);
205 static void v_resample4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int wrap,
223 while (dst_width >= 4) {
224 movq_m2r(s[0 * wrap], mm0);
225 punpcklbw_r2r(mm7, mm0);
226 movq_m2r(s[1 * wrap], mm1);
227 punpcklbw_r2r(mm7, mm1);
228 movq_m2r(s[2 * wrap], mm2);
229 punpcklbw_r2r(mm7, mm2);
230 movq_m2r(s[3 * wrap], mm3);
231 punpcklbw_r2r(mm7, mm3);
233 pmullw_m2r(coefs[0], mm0);
234 pmullw_m2r(coefs[1], mm1);
235 pmullw_m2r(coefs[2], mm2);
236 pmullw_m2r(coefs[3], mm3);
241 psraw_i2r(FILTER_BITS, mm0);
243 packuswb_r2r(mm7, mm0);
246 *(UINT32 *)dst = tmp.ud[0];
251 while (dst_width > 0) {
252 sum = s[0 * wrap] * filter[0] +
253 s[1 * wrap] * filter[1] +
254 s[2 * wrap] * filter[2] +
255 s[3 * wrap] * filter[3];
256 sum = sum >> FILTER_BITS;
270 /* slow version to handle limit cases. Does not need optimisation */
271 static void h_resample_slow(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
272 int src_start, int src_incr, INT16 *filters)
274 int src_pos, phase, sum, j, v, i;
278 src_end = src + src_width;
280 for(i=0;i<dst_width;i++) {
281 s = src + (src_pos >> POS_FRAC_BITS);
282 phase = get_phase(src_pos);
283 filter = filters + phase * NB_TAPS;
285 for(j=0;j<NB_TAPS;j++) {
288 else if (s >= src_end)
292 sum += v * filter[j];
295 sum = sum >> FILTER_BITS;
306 static void h_resample(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
307 int src_start, int src_incr, INT16 *filters)
312 n = (0 - src_start + src_incr - 1) / src_incr;
313 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
316 src_start += n * src_incr;
318 src_end = src_start + dst_width * src_incr;
319 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
320 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
326 if ((mm_flags & MM_MMX) && NB_TAPS == 4)
327 h_resample_fast4_mmx(dst, n,
328 src, src_width, src_start, src_incr, filters);
331 h_resample_fast(dst, n,
332 src, src_width, src_start, src_incr, filters);
336 src_start += n * src_incr;
337 h_resample_slow(dst, dst_width,
338 src, src_width, src_start, src_incr, filters);
342 static void component_resample(ImgReSampleContext *s,
343 UINT8 *output, int owrap, int owidth, int oheight,
344 UINT8 *input, int iwrap, int iwidth, int iheight)
346 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
347 UINT8 *new_line, *src_line;
349 last_src_y = - FCENTER - 1;
350 /* position of the bottom of the filter in the source image */
351 src_y = (last_src_y + NB_TAPS) * POS_FRAC;
352 ring_y = NB_TAPS; /* position in ring buffer */
353 for(y=0;y<oheight;y++) {
354 /* apply horizontal filter on new lines from input if needed */
355 src_y1 = src_y >> POS_FRAC_BITS;
356 while (last_src_y < src_y1) {
357 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
360 /* handle limit conditions : replicate line (slighly
361 inefficient because we filter multiple times */
365 } else if (y1 >= iheight) {
368 src_line = input + y1 * iwrap;
369 new_line = s->line_buf + ring_y * owidth;
370 /* apply filter and handle limit cases correctly */
371 h_resample(new_line, owidth,
372 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
373 &s->h_filters[0][0]);
374 /* handle ring buffer wraping */
375 if (ring_y >= LINE_BUF_HEIGHT) {
376 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
380 /* apply vertical filter */
381 phase_y = get_phase(src_y);
383 /* desactivated MMX because loss of precision */
384 if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
385 v_resample4_mmx(output, owidth,
386 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
387 &s->v_filters[phase_y][0]);
390 v_resample(output, owidth,
391 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
392 &s->v_filters[phase_y][0]);
399 /* XXX: the following filter is quite naive, but it seems to suffice
401 static void build_filter(INT16 *filter, float factor)
404 float x, y, tab[NB_TAPS], norm, mult;
406 /* if upsampling, only need to interpolate, no filter */
410 for(ph=0;ph<NB_PHASES;ph++) {
412 for(i=0;i<NB_TAPS;i++) {
414 x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor;
423 /* normalize so that an uniform color remains the same */
424 mult = (float)(1 << FILTER_BITS) / norm;
425 for(i=0;i<NB_TAPS;i++) {
426 v = (int)(tab[i] * mult);
427 filter[ph * NB_TAPS + i] = v;
432 ImgReSampleContext *img_resample_init(int owidth, int oheight,
433 int iwidth, int iheight)
435 ImgReSampleContext *s;
437 s = av_mallocz(sizeof(ImgReSampleContext));
440 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
445 s->oheight = oheight;
447 s->iheight = iheight;
449 s->h_incr = (iwidth * POS_FRAC) / owidth;
450 s->v_incr = (iheight * POS_FRAC) / oheight;
452 build_filter(&s->h_filters[0][0], (float)owidth / (float)iwidth);
453 build_filter(&s->v_filters[0][0], (float)oheight / (float)iheight);
461 void img_resample(ImgReSampleContext *s,
462 AVPicture *output, AVPicture *input)
467 shift = (i == 0) ? 0 : 1;
468 component_resample(s, output->data[i], output->linesize[i],
469 s->owidth >> shift, s->oheight >> shift,
470 input->data[i], input->linesize[i],
471 s->iwidth >> shift, s->iheight >> shift);
475 void img_resample_close(ImgReSampleContext *s)
483 void *av_mallocz(int size)
487 memset(ptr, 0, size);
494 UINT8 img[XSIZE * YSIZE];
499 UINT8 img1[XSIZE1 * YSIZE1];
500 UINT8 img2[XSIZE1 * YSIZE1];
502 void save_pgm(const char *filename, UINT8 *img, int xsize, int ysize)
505 f=fopen(filename,"w");
506 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
507 fwrite(img,1, xsize * ysize,f);
511 static void dump_filter(INT16 *filter)
515 for(ph=0;ph<NB_PHASES;ph++) {
517 for(i=0;i<NB_TAPS;i++) {
518 printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0);
528 int main(int argc, char **argv)
530 int x, y, v, i, xsize, ysize;
531 ImgReSampleContext *s;
532 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
535 /* build test image */
536 for(y=0;y<YSIZE;y++) {
537 for(x=0;x<XSIZE;x++) {
538 if (x < XSIZE/2 && y < YSIZE/2) {
539 if (x < XSIZE/4 && y < YSIZE/4) {
545 } else if (x < XSIZE/4) {
550 } else if (y < XSIZE/4) {
562 if (((x+3) % 4) <= 1 &&
569 } else if (x < XSIZE/2) {
570 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
571 } else if (y < XSIZE/2) {
572 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
574 v = ((x + y - XSIZE) * 255) / XSIZE;
576 img[y * XSIZE + x] = v;
579 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
580 for(i=0;i<sizeof(factors)/sizeof(float);i++) {
582 xsize = (int)(XSIZE * fact);
583 ysize = (int)(YSIZE * fact);
584 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
585 printf("Factor=%0.2f\n", fact);
586 dump_filter(&s->h_filters[0][0]);
587 component_resample(s, img1, xsize, xsize, ysize,
588 img, XSIZE, XSIZE, YSIZE);
589 img_resample_close(s);
591 sprintf(buf, "/tmp/out%d.pgm", i);
592 save_pgm(buf, img1, xsize, ysize);
597 printf("MMX test\n");
599 xsize = (int)(XSIZE * fact);
600 ysize = (int)(YSIZE * fact);
602 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
603 component_resample(s, img1, xsize, xsize, ysize,
604 img, XSIZE, XSIZE, YSIZE);
607 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
608 component_resample(s, img2, xsize, xsize, ysize,
609 img, XSIZE, XSIZE, YSIZE);
610 if (memcmp(img1, img2, xsize * ysize) != 0) {
611 fprintf(stderr, "mmx error\n");