2 * Misc image convertion routines
3 * Copyright (c) 2001, 2002 Fabrice Bellard.
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include "fastmemcpy.h"
29 /* XXX: totally non optimized */
31 static void yuv422_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
32 UINT8 *src, int width, int height)
37 for(y=0;y<height;y+=2) {
38 for(x=0;x<width;x+=2) {
48 for(x=0;x<width;x+=2) {
58 #define ONE_HALF (1 << (SCALEBITS - 1))
59 #define FIX(x) ((int) ((x) * (1L<<SCALEBITS) + 0.5))
61 static void rgb24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
62 UINT8 *src, int width, int height)
64 int wrap, wrap3, x, y;
65 int r, g, b, r1, g1, b1;
71 for(y=0;y<height;y+=2) {
72 for(x=0;x<width;x+=2) {
79 lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
80 FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
87 lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
88 FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
98 lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
99 FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
106 lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
107 FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
109 cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
110 FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
111 cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
112 FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
124 static void rgba32_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
125 UINT8 *src, int width, int height)
127 int wrap, wrap4, x, y;
128 int r, g, b, r1, g1, b1;
134 for(y=0;y<height;y+=2) {
135 for(x=0;x<width;x+=2) {
142 lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
143 FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
150 lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
151 FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
161 lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
162 FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
169 lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
170 FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
172 cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
173 FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
174 cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
175 FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
187 static void bgr24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
188 UINT8 *src, int width, int height)
190 int wrap, wrap3, x, y;
191 int r, g, b, r1, g1, b1;
197 for(y=0;y<height;y+=2) {
198 for(x=0;x<width;x+=2) {
205 lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
206 FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
213 lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
214 FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
224 lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
225 FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
232 lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
233 FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
235 cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
236 FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
237 cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
238 FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
250 static void bgra32_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
251 UINT8 *src, int width, int height)
253 int wrap, wrap4, x, y;
254 int r, g, b, r1, g1, b1;
260 for(y=0;y<height;y+=2) {
261 for(x=0;x<width;x+=2) {
268 lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
269 FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
276 lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
277 FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
287 lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
288 FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
295 lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
296 FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
298 cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
299 FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
300 cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
301 FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
313 /* XXX: use generic filter ? */
315 static void shrink2(UINT8 *dst, int dst_wrap,
316 UINT8 *src, int src_wrap,
317 int width, int height)
322 for(;height > 0; height--) {
326 for(w = width;w >= 4; w-=4) {
327 d[0] = (s1[0] + s2[0]) >> 1;
328 d[1] = (s1[1] + s2[1]) >> 1;
329 d[2] = (s1[2] + s2[2]) >> 1;
330 d[3] = (s1[3] + s2[3]) >> 1;
336 d[0] = (s1[0] + s2[0]) >> 1;
347 static void shrink22(UINT8 *dst, int dst_wrap,
348 UINT8 *src, int src_wrap,
349 int width, int height)
354 for(;height > 0; height--) {
358 for(w = width;w >= 4; w-=4) {
359 d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 1;
360 d[1] = (s1[2] + s1[3] + s2[2] + s2[3] + 2) >> 1;
361 d[2] = (s1[4] + s1[5] + s2[4] + s2[5] + 2) >> 1;
362 d[3] = (s1[6] + s1[7] + s2[6] + s2[7] + 2) >> 1;
368 d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 1;
379 static void grow22(UINT8 *dst, int dst_wrap,
380 UINT8 *src, int src_wrap,
381 int width, int height)
386 for(;height > 0; height--) {
389 for(w = width;w >= 4; w-=4) {
406 /* 1x2 -> 2x1. width and height are given for the source picture */
407 static void conv411(UINT8 *dst, int dst_wrap,
408 UINT8 *src, int src_wrap,
409 int width, int height)
414 for(;height > 0; height -= 2) {
418 for(w = width;w > 0; w--) {
419 c = (s1[0] + s2[0]) >> 1;
431 static void img_copy(UINT8 *dst, int dst_wrap,
432 UINT8 *src, int src_wrap,
433 int width, int height)
435 for(;height > 0; height--) {
436 memcpy(dst, src, width);
442 #define SCALE_BITS 10
444 #define C_Y (76309 >> (16 - SCALE_BITS))
445 #define C_RV (117504 >> (16 - SCALE_BITS))
446 #define C_BU (138453 >> (16 - SCALE_BITS))
447 #define C_GU (13954 >> (16 - SCALE_BITS))
448 #define C_GV (34903 >> (16 - SCALE_BITS))
450 #define RGBOUT(r, g, b, y1)\
452 y = (y1 - 16) * C_Y;\
453 r = cm[(y + r_add) >> SCALE_BITS];\
454 g = cm[(y + g_add) >> SCALE_BITS];\
455 b = cm[(y + b_add) >> SCALE_BITS];\
458 /* XXX: no chroma interpolating is done */
459 static void yuv420p_to_bgra32(AVPicture *dst, AVPicture *src,
460 int width, int height)
462 UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2;
463 int w, y, cb, cr, r_add, g_add, b_add, width2;
464 UINT8 *cm = cropTbl + MAX_NEG_CROP;
467 y1_ptr = src->data[0];
468 cb_ptr = src->data[1];
469 cr_ptr = src->data[2];
471 for(;height > 0; height -= 2) {
473 d2 = d + dst->linesize[0];
474 y2_ptr = y1_ptr + src->linesize[0];
475 for(w = width2; w > 0; w --) {
476 cb = cb_ptr[0] - 128;
477 cr = cr_ptr[0] - 128;
478 r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
479 g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
480 b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
482 /* output 4 pixels */
483 RGBOUT(d1[2], d1[1], d1[0], y1_ptr[0]);
484 RGBOUT(d1[6], d1[5], d1[4], y1_ptr[1]);
485 RGBOUT(d2[2], d2[1], d2[0], y2_ptr[0]);
486 RGBOUT(d2[6], d2[5], d2[4], y2_ptr[1]);
488 d1[3] = d1[7] = d2[3] = d2[7] = 255;
497 d += 2 * dst->linesize[0];
498 y1_ptr += 2 * src->linesize[0] - width;
499 cb_ptr += src->linesize[1] - width2;
500 cr_ptr += src->linesize[2] - width2;
504 /* XXX: no chroma interpolating is done */
505 static void yuv420p_to_rgba32(AVPicture *dst, AVPicture *src,
506 int width, int height)
508 UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2;
509 int w, y, cb, cr, r_add, g_add, b_add, width2;
510 UINT8 *cm = cropTbl + MAX_NEG_CROP;
513 y1_ptr = src->data[0];
514 cb_ptr = src->data[1];
515 cr_ptr = src->data[2];
517 for(;height > 0; height -= 2) {
519 d2 = d + dst->linesize[0];
520 y2_ptr = y1_ptr + src->linesize[0];
521 for(w = width2; w > 0; w --) {
522 cb = cb_ptr[0] - 128;
523 cr = cr_ptr[0] - 128;
524 r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
525 g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
526 b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
528 /* output 4 pixels */
529 RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]);
530 RGBOUT(d1[4], d1[5], d1[6], y1_ptr[1]);
531 RGBOUT(d2[0], d2[1], d2[2], y2_ptr[0]);
532 RGBOUT(d2[4], d2[5], d2[6], y2_ptr[1]);
534 d1[3] = d1[7] = d2[3] = d2[7] = 255;
543 d += 2 * dst->linesize[0];
544 y1_ptr += 2 * src->linesize[0] - width;
545 cb_ptr += src->linesize[1] - width2;
546 cr_ptr += src->linesize[2] - width2;
550 /* XXX: no chroma interpolating is done */
551 static void yuv420p_to_rgb24(AVPicture *dst, AVPicture *src,
552 int width, int height)
554 UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2;
555 int w, y, cb, cr, r_add, g_add, b_add, width2;
556 UINT8 *cm = cropTbl + MAX_NEG_CROP;
559 y1_ptr = src->data[0];
560 cb_ptr = src->data[1];
561 cr_ptr = src->data[2];
563 for(;height > 0; height -= 2) {
565 d2 = d + dst->linesize[0];
566 y2_ptr = y1_ptr + src->linesize[0];
567 for(w = width2; w > 0; w --) {
568 cb = cb_ptr[0] - 128;
569 cr = cr_ptr[0] - 128;
570 r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
571 g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
572 b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
574 /* output 4 pixels */
575 RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]);
576 RGBOUT(d1[3], d1[4], d1[5], y1_ptr[1]);
577 RGBOUT(d2[0], d2[1], d2[2], y2_ptr[0]);
578 RGBOUT(d2[3], d2[4], d2[5], y2_ptr[1]);
587 d += 2 * dst->linesize[0];
588 y1_ptr += 2 * src->linesize[0] - width;
589 cb_ptr += src->linesize[1] - width2;
590 cr_ptr += src->linesize[2] - width2;
594 /* XXX: no chroma interpolating is done */
595 static void yuv422p_to_rgb24(AVPicture *dst, AVPicture *src,
596 int width, int height)
598 UINT8 *y1_ptr, *cb_ptr, *cr_ptr, *d, *d1;
599 int w, y, cb, cr, r_add, g_add, b_add, width2;
600 UINT8 *cm = cropTbl + MAX_NEG_CROP;
603 y1_ptr = src->data[0];
604 cb_ptr = src->data[1];
605 cr_ptr = src->data[2];
607 for(;height > 0; height --) {
609 for(w = width2; w > 0; w --) {
610 cb = cb_ptr[0] - 128;
611 cr = cr_ptr[0] - 128;
612 r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
613 g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
614 b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
616 /* output 2 pixels */
617 RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]);
618 RGBOUT(d1[3], d1[4], d1[5], y1_ptr[1]);
625 d += dst->linesize[0];
626 y1_ptr += src->linesize[0] - width;
627 cb_ptr += src->linesize[1] - width2;
628 cr_ptr += src->linesize[2] - width2;
632 /* XXX: always use linesize. Return -1 if not supported */
633 int img_convert(AVPicture *dst, int dst_pix_fmt,
634 AVPicture *src, int pix_fmt,
635 int width, int height)
639 assert(pix_fmt != PIX_FMT_ANY && dst_pix_fmt != PIX_FMT_ANY);
641 if (dst_pix_fmt == pix_fmt) {
643 case PIX_FMT_YUV420P:
649 img_copy(dst->data[i], dst->linesize[i],
650 src->data[i], src->linesize[i],
657 } else if (dst_pix_fmt == PIX_FMT_YUV420P) {
660 case PIX_FMT_YUV411P:
661 img_copy(dst->data[0], dst->linesize[0],
662 src->data[0], src->linesize[0],
664 conv411(dst->data[1], dst->linesize[1],
665 src->data[1], src->linesize[1],
667 conv411(dst->data[2], dst->linesize[2],
668 src->data[2], src->linesize[2],
671 case PIX_FMT_YUV410P:
672 img_copy(dst->data[0], dst->linesize[0],
673 src->data[0], src->linesize[0],
675 grow22(dst->data[1], dst->linesize[1],
676 src->data[1], src->linesize[1],
678 grow22(dst->data[2], dst->linesize[2],
679 src->data[2], src->linesize[2],
682 case PIX_FMT_YUV420P:
684 img_copy(dst->data[i], dst->linesize[i],
685 src->data[i], src->linesize[i],
689 case PIX_FMT_YUV422P:
690 img_copy(dst->data[0], dst->linesize[0],
691 src->data[0], src->linesize[0],
696 shrink2(dst->data[i], dst->linesize[i],
697 src->data[i], src->linesize[i],
701 case PIX_FMT_YUV444P:
702 img_copy(dst->data[0], dst->linesize[0],
703 src->data[0], src->linesize[0],
708 shrink22(dst->data[i], dst->linesize[i],
709 src->data[i], src->linesize[i],
714 yuv422_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
715 src->data[0], width, height);
718 rgb24_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
719 src->data[0], width, height);
722 rgba32_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
723 src->data[0], width, height);
726 bgr24_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
727 src->data[0], width, height);
730 bgra32_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
731 src->data[0], width, height);
736 } else if (dst_pix_fmt == PIX_FMT_RGB24) {
738 case PIX_FMT_YUV420P:
739 yuv420p_to_rgb24(dst, src, width, height);
741 case PIX_FMT_YUV422P:
742 yuv422p_to_rgb24(dst, src, width, height);
747 } else if (dst_pix_fmt == PIX_FMT_RGBA32) {
749 case PIX_FMT_YUV420P:
750 yuv420p_to_rgba32(dst, src, width, height);
755 } else if (dst_pix_fmt == PIX_FMT_BGRA32) {
757 case PIX_FMT_YUV420P:
758 yuv420p_to_bgra32(dst, src, width, height);
771 #define DEINT_INPLACE_LINE_LUM \
772 movd_m2r(lum_m4[0],mm0);\
773 movd_m2r(lum_m3[0],mm1);\
774 movd_m2r(lum_m2[0],mm2);\
775 movd_m2r(lum_m1[0],mm3);\
776 movd_m2r(lum[0],mm4);\
777 punpcklbw_r2r(mm7,mm0);\
778 movd_r2m(mm2,lum_m4[0]);\
779 punpcklbw_r2r(mm7,mm1);\
780 punpcklbw_r2r(mm7,mm2);\
781 punpcklbw_r2r(mm7,mm3);\
782 punpcklbw_r2r(mm7,mm4);\
789 psubusw_r2r(mm0,mm1);\
791 packuswb_r2r(mm7,mm1);\
792 movd_r2m(mm1,lum_m2[0]);
794 #define DEINT_LINE_LUM \
795 movd_m2r(lum_m4[0],mm0);\
796 movd_m2r(lum_m3[0],mm1);\
797 movd_m2r(lum_m2[0],mm2);\
798 movd_m2r(lum_m1[0],mm3);\
799 movd_m2r(lum[0],mm4);\
800 punpcklbw_r2r(mm7,mm0);\
801 punpcklbw_r2r(mm7,mm1);\
802 punpcklbw_r2r(mm7,mm2);\
803 punpcklbw_r2r(mm7,mm3);\
804 punpcklbw_r2r(mm7,mm4);\
811 psubusw_r2r(mm0,mm1);\
813 packuswb_r2r(mm7,mm1);\
814 movd_r2m(mm1,dst[0]);
817 /* filter parameters: [-1 4 2 4 -1] // 8 */
818 static void deinterlace_line(UINT8 *dst, UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lum_m2, UINT8 *lum_m1, UINT8 *lum,
822 UINT8 *cm = cropTbl + MAX_NEG_CROP;
825 for(;size > 0;size--) {
827 sum += lum_m3[0] << 2;
828 sum += lum_m2[0] << 1;
829 sum += lum_m1[0] << 2;
831 dst[0] = cm[(sum + 4) >> 3];
841 for (;size > 3; size-=4) {
852 static void deinterlace_line_inplace(UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lum_m2, UINT8 *lum_m1, UINT8 *lum,
856 UINT8 *cm = cropTbl + MAX_NEG_CROP;
859 for(;size > 0;size--) {
861 sum += lum_m3[0] << 2;
862 sum += lum_m2[0] << 1;
864 sum += lum_m1[0] << 2;
866 lum_m2[0] = cm[(sum + 4) >> 3];
875 for (;size > 3; size-=4) {
876 DEINT_INPLACE_LINE_LUM
886 /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
887 top field is copied as is, but the bottom field is deinterlaced
888 against the top field. */
889 static void deinterlace_bottom_field(UINT8 *dst, int dst_wrap,
890 UINT8 *src1, int src_wrap,
891 int width, int height)
893 UINT8 *src_m2, *src_m1, *src_0, *src_p1, *src_p2;
898 src_0=&src_m1[src_wrap];
899 src_p1=&src_0[src_wrap];
900 src_p2=&src_p1[src_wrap];
901 for(y=0;y<(height-2);y+=2) {
902 memcpy(dst,src_m1,width);
904 deinterlace_line(dst,src_m2,src_m1,src_0,src_p1,src_p2,width);
908 src_p1 += 2*src_wrap;
909 src_p2 += 2*src_wrap;
912 memcpy(dst,src_m1,width);
915 deinterlace_line(dst,src_m2,src_m1,src_0,src_0,src_0,width);
918 static void deinterlace_bottom_field_inplace(UINT8 *src1, int src_wrap,
919 int width, int height)
921 UINT8 *src_m1, *src_0, *src_p1, *src_p2;
924 buf = (UINT8*)av_malloc(width);
927 memcpy(buf,src_m1,width);
928 src_0=&src_m1[src_wrap];
929 src_p1=&src_0[src_wrap];
930 src_p2=&src_p1[src_wrap];
931 for(y=0;y<(height-2);y+=2) {
932 deinterlace_line_inplace(buf,src_m1,src_0,src_p1,src_p2,width);
935 src_p1 += 2*src_wrap;
936 src_p2 += 2*src_wrap;
939 deinterlace_line_inplace(buf,src_m1,src_0,src_0,src_0,width);
944 /* deinterlace - if not supported return -1 */
945 int avpicture_deinterlace(AVPicture *dst, AVPicture *src,
946 int pix_fmt, int width, int height)
950 if (pix_fmt != PIX_FMT_YUV420P &&
951 pix_fmt != PIX_FMT_YUV422P &&
952 pix_fmt != PIX_FMT_YUV444P)
954 if ((width & 3) != 0 || (height & 3) != 0)
965 movq_m2r(rounder,mm6);
973 case PIX_FMT_YUV420P:
977 case PIX_FMT_YUV422P:
985 deinterlace_bottom_field_inplace(src->data[i], src->linesize[i],
988 deinterlace_bottom_field(dst->data[i],dst->linesize[i],
989 src->data[i], src->linesize[i],