2 * software RGB to RGB converter
3 * pluralize by software PAL8 to RGB converter
4 * software YUV to YUV converter
5 * software YUV to RGB converter
6 * Written by Nick Kurshev.
7 * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
8 * lot of big-endian byte order fixes by Alex Beregszaszi
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, int src_size)
32 const uint8_t *s = src;
38 /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
53 static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
56 const uint8_t *s = src;
63 /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
79 original by Strepto/Astral
80 ported to gcc & bugfixed: A'rpi
81 MMX2, 3DNOW optimization by Nick Kurshev
82 32-bit C version, and and&add trick by Michael Niedermayer
84 static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size)
86 register const uint8_t* s=src;
87 register uint8_t* d=dst;
88 register const uint8_t *end;
89 const uint8_t *mm_end;
93 register unsigned x= *((const uint32_t *)s);
94 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
99 register unsigned short x= *((const uint16_t *)s);
100 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
104 static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, int src_size)
106 register const uint8_t* s=src;
107 register uint8_t* d=dst;
108 register const uint8_t *end;
109 const uint8_t *mm_end;
114 register uint32_t x= *((const uint32_t*)s);
115 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
120 register uint16_t x= *((const uint16_t*)s);
121 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
125 static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, int src_size)
127 const uint8_t *s = src;
129 uint16_t *d = (uint16_t *)dst;
133 register int rgb = *(const uint32_t*)s; s += 4;
134 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
138 static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, int src_size)
140 const uint8_t *s = src;
142 uint16_t *d = (uint16_t *)dst;
145 register int rgb = *(const uint32_t*)s; s += 4;
146 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
150 static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, int src_size)
152 const uint8_t *s = src;
154 uint16_t *d = (uint16_t *)dst;
157 register int rgb = *(const uint32_t*)s; s += 4;
158 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
162 static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, int src_size)
164 const uint8_t *s = src;
166 uint16_t *d = (uint16_t *)dst;
169 register int rgb = *(const uint32_t*)s; s += 4;
170 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
174 static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, int src_size)
176 const uint8_t *s = src;
178 uint16_t *d = (uint16_t *)dst;
184 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
188 static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, int src_size)
190 const uint8_t *s = src;
192 uint16_t *d = (uint16_t *)dst;
198 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
202 static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, int src_size)
204 const uint8_t *s = src;
206 uint16_t *d = (uint16_t *)dst;
212 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
216 static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size)
218 const uint8_t *s = src;
220 uint16_t *d = (uint16_t *)dst;
226 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
230 static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
234 const uint16_t *s = (const uint16_t*)src;
235 end = s + src_size/2;
237 register uint16_t bgr;
239 *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
240 *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
241 *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
245 static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
248 uint8_t *d = (uint8_t *)dst;
249 const uint16_t *s = (const uint16_t *)src;
250 end = s + src_size/2;
252 register uint16_t bgr;
254 *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
255 *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
256 *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
260 static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
264 const uint16_t *s = (const uint16_t *)src;
265 end = s + src_size/2;
267 register uint16_t bgr;
271 *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
272 *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
273 *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
275 *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
276 *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
277 *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
283 static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size)
287 const uint16_t *s = (const uint16_t*)src;
288 end = s + src_size/2;
290 register uint16_t bgr;
294 *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
295 *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
296 *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
298 *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
299 *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
300 *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
306 static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, int src_size)
308 int idx = 15 - src_size;
309 const uint8_t *s = src-idx;
310 uint8_t *d = dst-idx;
311 for (; idx<15; idx+=4) {
312 register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
314 *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
318 static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
321 for (i=0; i<src_size; i+=3) {
324 dst[i + 1] = src[i + 1];
325 dst[i + 2] = src[i + 0];
330 static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
331 const uint8_t *vsrc, uint8_t *dst,
332 int width, int height,
333 int lumStride, int chromStride,
334 int dstStride, int vertLumPerChroma)
337 const int chromWidth = width >> 1;
338 for (y=0; y<height; y++) {
341 uint64_t *ldst = (uint64_t *) dst;
342 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
343 for (i = 0; i < chromWidth; i += 2) {
345 k = yc[0] + (uc[0] << 8) +
346 (yc[1] << 16) + (vc[0] << 24);
347 l = yc[2] + (uc[1] << 8) +
348 (yc[3] << 16) + (vc[1] << 24);
349 *ldst++ = k + (l << 32);
356 int i, *idst = (int32_t *) dst;
357 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
358 for (i = 0; i < chromWidth; i++) {
360 *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
361 (yc[1] << 8) + (vc[0] << 0);
363 *idst++ = yc[0] + (uc[0] << 8) +
364 (yc[1] << 16) + (vc[0] << 24);
371 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
381 * Height should be a multiple of 2 and width should be a multiple of 16.
382 * (If this is a problem for anyone then tell me, and I will fix it.)
384 static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
385 const uint8_t *vsrc, uint8_t *dst,
386 int width, int height,
387 int lumStride, int chromStride,
390 //FIXME interpolate chroma
391 yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
392 chromStride, dstStride, 2);
395 static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
396 const uint8_t *vsrc, uint8_t *dst,
397 int width, int height,
398 int lumStride, int chromStride,
399 int dstStride, int vertLumPerChroma)
402 const int chromWidth = width >> 1;
403 for (y=0; y<height; y++) {
406 uint64_t *ldst = (uint64_t *) dst;
407 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
408 for (i = 0; i < chromWidth; i += 2) {
410 k = uc[0] + (yc[0] << 8) +
411 (vc[0] << 16) + (yc[1] << 24);
412 l = uc[1] + (yc[2] << 8) +
413 (vc[1] << 16) + (yc[3] << 24);
414 *ldst++ = k + (l << 32);
421 int i, *idst = (int32_t *) dst;
422 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
423 for (i = 0; i < chromWidth; i++) {
425 *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
426 (vc[0] << 8) + (yc[1] << 0);
428 *idst++ = uc[0] + (yc[0] << 8) +
429 (vc[0] << 16) + (yc[1] << 24);
436 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
446 * Height should be a multiple of 2 and width should be a multiple of 16
447 * (If this is a problem for anyone then tell me, and I will fix it.)
449 static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
450 const uint8_t *vsrc, uint8_t *dst,
451 int width, int height,
452 int lumStride, int chromStride,
455 //FIXME interpolate chroma
456 yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
457 chromStride, dstStride, 2);
461 * Width should be a multiple of 16.
463 static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
464 const uint8_t *vsrc, uint8_t *dst,
465 int width, int height,
466 int lumStride, int chromStride,
469 yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
470 chromStride, dstStride, 1);
474 * Width should be a multiple of 16.
476 static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
477 const uint8_t *vsrc, uint8_t *dst,
478 int width, int height,
479 int lumStride, int chromStride,
482 yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
483 chromStride, dstStride, 1);
487 * Height should be a multiple of 2 and width should be a multiple of 16.
488 * (If this is a problem for anyone then tell me, and I will fix it.)
490 static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
491 uint8_t *udst, uint8_t *vdst,
492 int width, int height,
493 int lumStride, int chromStride,
497 const int chromWidth = width >> 1;
498 for (y=0; y<height; y+=2) {
500 for (i=0; i<chromWidth; i++) {
501 ydst[2*i+0] = src[4*i+0];
502 udst[i] = src[4*i+1];
503 ydst[2*i+1] = src[4*i+2];
504 vdst[i] = src[4*i+3];
509 for (i=0; i<chromWidth; i++) {
510 ydst[2*i+0] = src[4*i+0];
511 ydst[2*i+1] = src[4*i+2];
520 static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
521 int srcHeight, int srcStride, int dstStride)
528 for (x=0; x<srcWidth-1; x++) {
529 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
530 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
532 dst[2*srcWidth-1]= src[srcWidth-1];
536 for (y=1; y<srcHeight; y++) {
537 const int mmxSize = 1;
539 dst[0 ]= (3*src[0] + src[srcStride])>>2;
540 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
542 for (x=mmxSize-1; x<srcWidth-1; x++) {
543 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
544 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2;
545 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2;
546 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2;
548 dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2;
549 dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
558 for (x=0; x<srcWidth-1; x++) {
559 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
560 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
562 dst[2*srcWidth-1]= src[srcWidth-1];
566 * Height should be a multiple of 2 and width should be a multiple of 16.
567 * (If this is a problem for anyone then tell me, and I will fix it.)
568 * Chrominance data is only taken from every second line, others are ignored.
569 * FIXME: Write HQ version.
571 static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
572 uint8_t *udst, uint8_t *vdst,
573 int width, int height,
574 int lumStride, int chromStride,
578 const int chromWidth = width >> 1;
579 for (y=0; y<height; y+=2) {
581 for (i=0; i<chromWidth; i++) {
582 udst[i] = src[4*i+0];
583 ydst[2*i+0] = src[4*i+1];
584 vdst[i] = src[4*i+2];
585 ydst[2*i+1] = src[4*i+3];
590 for (i=0; i<chromWidth; i++) {
591 ydst[2*i+0] = src[4*i+1];
592 ydst[2*i+1] = src[4*i+3];
602 * Height should be a multiple of 2 and width should be a multiple of 2.
603 * (If this is a problem for anyone then tell me, and I will fix it.)
604 * Chrominance data is only taken from every second line,
605 * others are ignored in the C version.
606 * FIXME: Write HQ version.
608 void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
609 uint8_t *vdst, int width, int height, int lumStride,
610 int chromStride, int srcStride)
613 const int chromWidth = width >> 1;
615 for (; y<height; y+=2) {
617 for (i=0; i<chromWidth; i++) {
618 unsigned int b = src[6*i+0];
619 unsigned int g = src[6*i+1];
620 unsigned int r = src[6*i+2];
622 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
623 unsigned int V = ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128;
624 unsigned int U = ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128;
634 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
643 for (i=0; i<chromWidth; i++) {
644 unsigned int b = src[6*i+0];
645 unsigned int g = src[6*i+1];
646 unsigned int r = src[6*i+2];
648 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
656 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
666 static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
667 uint8_t *dest, int width,
668 int height, int src1Stride,
669 int src2Stride, int dstStride)
673 for (h=0; h < height; h++) {
675 for (w=0; w < width; w++) {
676 dest[2*w+0] = src1[w];
677 dest[2*w+1] = src2[w];
685 static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
686 uint8_t *dst1, uint8_t *dst2,
687 int width, int height,
688 int srcStride1, int srcStride2,
689 int dstStride1, int dstStride2)
693 w=width/2; h=height/2;
695 const uint8_t* s1=src1+srcStride1*(y>>1);
696 uint8_t* d=dst1+dstStride1*y;
698 for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
701 const uint8_t* s2=src2+srcStride2*(y>>1);
702 uint8_t* d=dst2+dstStride2*y;
704 for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
708 static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
709 const uint8_t *src3, uint8_t *dst,
710 int width, int height,
711 int srcStride1, int srcStride2,
712 int srcStride3, int dstStride)
718 const uint8_t* yp=src1+srcStride1*y;
719 const uint8_t* up=src2+srcStride2*(y>>2);
720 const uint8_t* vp=src3+srcStride3*(y>>2);
721 uint8_t* d=dst+dstStride*y;
737 static void extract_even_c(const uint8_t *src, uint8_t *dst, int count)
744 dst[count]= src[2*count];
749 static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
757 dst0[count]= src[4*count+0];
758 dst1[count]= src[4*count+2];
763 static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1,
764 uint8_t *dst0, uint8_t *dst1, int count)
772 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
773 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
778 static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
787 dst0[count]= src[4*count+0];
788 dst1[count]= src[4*count+2];
793 static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1,
794 uint8_t *dst0, uint8_t *dst1, int count)
804 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
805 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
810 static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
811 const uint8_t *src, int width, int height,
812 int lumStride, int chromStride, int srcStride)
815 const int chromWidth= -((-width)>>1);
817 for (y=0; y<height; y++) {
818 extract_even_c(src, ydst, width);
820 extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth);
830 static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
831 const uint8_t *src, int width, int height,
832 int lumStride, int chromStride, int srcStride)
835 const int chromWidth= -((-width)>>1);
837 for (y=0; y<height; y++) {
838 extract_even_c(src, ydst, width);
839 extract_odd2_c(src, udst, vdst, chromWidth);
848 static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
849 const uint8_t *src, int width, int height,
850 int lumStride, int chromStride, int srcStride)
853 const int chromWidth= -((-width)>>1);
855 for (y=0; y<height; y++) {
856 extract_even_c(src + 1, ydst, width);
858 extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth);
868 static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
869 const uint8_t *src, int width, int height,
870 int lumStride, int chromStride, int srcStride)
873 const int chromWidth= -((-width)>>1);
875 for (y=0; y<height; y++) {
876 extract_even_c(src + 1, ydst, width);
877 extract_even2_c(src, udst, vdst, chromWidth);
886 static inline void rgb2rgb_init_c(void)
888 rgb15to16 = rgb15to16_c;
889 rgb15tobgr24 = rgb15tobgr24_c;
890 rgb15to32 = rgb15to32_c;
891 rgb16tobgr24 = rgb16tobgr24_c;
892 rgb16to32 = rgb16to32_c;
893 rgb16to15 = rgb16to15_c;
894 rgb24tobgr16 = rgb24tobgr16_c;
895 rgb24tobgr15 = rgb24tobgr15_c;
896 rgb24tobgr32 = rgb24tobgr32_c;
897 rgb32to16 = rgb32to16_c;
898 rgb32to15 = rgb32to15_c;
899 rgb32tobgr24 = rgb32tobgr24_c;
900 rgb24to15 = rgb24to15_c;
901 rgb24to16 = rgb24to16_c;
902 rgb24tobgr24 = rgb24tobgr24_c;
903 shuffle_bytes_2103 = shuffle_bytes_2103_c;
904 rgb32tobgr16 = rgb32tobgr16_c;
905 rgb32tobgr15 = rgb32tobgr15_c;
906 yv12toyuy2 = yv12toyuy2_c;
907 yv12touyvy = yv12touyvy_c;
908 yuv422ptoyuy2 = yuv422ptoyuy2_c;
909 yuv422ptouyvy = yuv422ptouyvy_c;
910 yuy2toyv12 = yuy2toyv12_c;
911 planar2x = planar2x_c;
912 rgb24toyv12 = rgb24toyv12_c;
913 interleaveBytes = interleaveBytes_c;
914 vu9_to_vu12 = vu9_to_vu12_c;
915 yvu9_to_yuy2 = yvu9_to_yuy2_c;
917 uyvytoyuv420 = uyvytoyuv420_c;
918 uyvytoyuv422 = uyvytoyuv422_c;
919 yuyvtoyuv420 = yuyvtoyuv420_c;
920 yuyvtoyuv422 = yuyvtoyuv422_c;