2 * software RGB to RGB converter
3 * pluralize by software PAL8 to RGB converter
4 * software YUV to YUV converter
5 * software YUV to RGB converter
6 * Written by Nick Kurshev.
7 * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
8 * lot of big-endian byte order fixes by Alex Beregszaszi
10 * This file is part of Libav.
12 * Libav is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * Libav is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with Libav; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, long src_size)
32 const uint8_t *s = src;
38 /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
53 static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
56 const uint8_t *s = src;
63 /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
79 original by Strepto/Astral
80 ported to gcc & bugfixed: A'rpi
81 MMX2, 3DNOW optimization by Nick Kurshev
82 32-bit C version, and and&add trick by Michael Niedermayer
84 static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, long src_size)
86 register const uint8_t* s=src;
87 register uint8_t* d=dst;
88 register const uint8_t *end;
89 const uint8_t *mm_end;
93 register unsigned x= *((const uint32_t *)s);
94 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
99 register unsigned short x= *((const uint16_t *)s);
100 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
104 static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, long src_size)
106 register const uint8_t* s=src;
107 register uint8_t* d=dst;
108 register const uint8_t *end;
109 const uint8_t *mm_end;
114 register uint32_t x= *((const uint32_t*)s);
115 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
120 register uint16_t x= *((const uint16_t*)s);
121 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
125 static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, long src_size)
127 const uint8_t *s = src;
129 uint16_t *d = (uint16_t *)dst;
133 register int rgb = *(const uint32_t*)s; s += 4;
134 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
138 static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, long src_size)
140 const uint8_t *s = src;
142 uint16_t *d = (uint16_t *)dst;
145 register int rgb = *(const uint32_t*)s; s += 4;
146 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
150 static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, long src_size)
152 const uint8_t *s = src;
154 uint16_t *d = (uint16_t *)dst;
157 register int rgb = *(const uint32_t*)s; s += 4;
158 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
162 static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, long src_size)
164 const uint8_t *s = src;
166 uint16_t *d = (uint16_t *)dst;
169 register int rgb = *(const uint32_t*)s; s += 4;
170 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
174 static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, long src_size)
176 const uint8_t *s = src;
178 uint16_t *d = (uint16_t *)dst;
184 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
188 static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, long src_size)
190 const uint8_t *s = src;
192 uint16_t *d = (uint16_t *)dst;
198 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
202 static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, long src_size)
204 const uint8_t *s = src;
206 uint16_t *d = (uint16_t *)dst;
212 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
216 static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, long src_size)
218 const uint8_t *s = src;
220 uint16_t *d = (uint16_t *)dst;
226 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
231 I use less accurate approximation here by simply left-shifting the input
232 value and filling the low order bits with zeroes. This method improves PNG
233 compression but this scheme cannot reproduce white exactly, since it does
234 not generate an all-ones maximum value; the net effect is to darken the
237 The better method should be "left bit replication":
247 | leftmost bits repeated to fill open bits
251 static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
255 const uint16_t *s = (const uint16_t*)src;
256 end = s + src_size/2;
258 register uint16_t bgr;
260 *d++ = (bgr&0x1F)<<3;
261 *d++ = (bgr&0x3E0)>>2;
262 *d++ = (bgr&0x7C00)>>7;
266 static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
269 uint8_t *d = (uint8_t *)dst;
270 const uint16_t *s = (const uint16_t *)src;
271 end = s + src_size/2;
273 register uint16_t bgr;
275 *d++ = (bgr&0x1F)<<3;
276 *d++ = (bgr&0x7E0)>>3;
277 *d++ = (bgr&0xF800)>>8;
281 static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, long src_size)
285 const uint16_t *s = (const uint16_t *)src;
286 end = s + src_size/2;
288 register uint16_t bgr;
292 *d++ = (bgr&0x7C00)>>7;
293 *d++ = (bgr&0x3E0)>>2;
294 *d++ = (bgr&0x1F)<<3;
296 *d++ = (bgr&0x1F)<<3;
297 *d++ = (bgr&0x3E0)>>2;
298 *d++ = (bgr&0x7C00)>>7;
304 static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, long src_size)
308 const uint16_t *s = (const uint16_t*)src;
309 end = s + src_size/2;
311 register uint16_t bgr;
315 *d++ = (bgr&0xF800)>>8;
316 *d++ = (bgr&0x7E0)>>3;
317 *d++ = (bgr&0x1F)<<3;
319 *d++ = (bgr&0x1F)<<3;
320 *d++ = (bgr&0x7E0)>>3;
321 *d++ = (bgr&0xF800)>>8;
327 static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, long src_size)
329 int idx = 15 - src_size;
330 const uint8_t *s = src-idx;
331 uint8_t *d = dst-idx;
332 for (; idx<15; idx+=4) {
333 register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
335 *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
339 static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
342 for (i=0; i<src_size; i+=3) {
345 dst[i + 1] = src[i + 1];
346 dst[i + 2] = src[i + 0];
351 static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
352 const uint8_t *vsrc, uint8_t *dst,
353 long width, long height,
354 long lumStride, long chromStride,
355 long dstStride, long vertLumPerChroma)
358 const int chromWidth = width >> 1;
359 for (y=0; y<height; y++) {
362 uint64_t *ldst = (uint64_t *) dst;
363 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
364 for (i = 0; i < chromWidth; i += 2) {
366 k = yc[0] + (uc[0] << 8) +
367 (yc[1] << 16) + (vc[0] << 24);
368 l = yc[2] + (uc[1] << 8) +
369 (yc[3] << 16) + (vc[1] << 24);
370 *ldst++ = k + (l << 32);
377 int i, *idst = (int32_t *) dst;
378 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
379 for (i = 0; i < chromWidth; i++) {
381 *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
382 (yc[1] << 8) + (vc[0] << 0);
384 *idst++ = yc[0] + (uc[0] << 8) +
385 (yc[1] << 16) + (vc[0] << 24);
392 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
402 * Height should be a multiple of 2 and width should be a multiple of 16.
403 * (If this is a problem for anyone then tell me, and I will fix it.)
405 static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
406 const uint8_t *vsrc, uint8_t *dst,
407 long width, long height,
408 long lumStride, long chromStride,
411 //FIXME interpolate chroma
412 yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
413 chromStride, dstStride, 2);
416 static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
417 const uint8_t *vsrc, uint8_t *dst,
418 long width, long height,
419 long lumStride, long chromStride,
420 long dstStride, long vertLumPerChroma)
423 const int chromWidth = width >> 1;
424 for (y=0; y<height; y++) {
427 uint64_t *ldst = (uint64_t *) dst;
428 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
429 for (i = 0; i < chromWidth; i += 2) {
431 k = uc[0] + (yc[0] << 8) +
432 (vc[0] << 16) + (yc[1] << 24);
433 l = uc[1] + (yc[2] << 8) +
434 (vc[1] << 16) + (yc[3] << 24);
435 *ldst++ = k + (l << 32);
442 int i, *idst = (int32_t *) dst;
443 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
444 for (i = 0; i < chromWidth; i++) {
446 *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
447 (vc[0] << 8) + (yc[1] << 0);
449 *idst++ = uc[0] + (yc[0] << 8) +
450 (vc[0] << 16) + (yc[1] << 24);
457 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
467 * Height should be a multiple of 2 and width should be a multiple of 16
468 * (If this is a problem for anyone then tell me, and I will fix it.)
470 static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
471 const uint8_t *vsrc, uint8_t *dst,
472 long width, long height,
473 long lumStride, long chromStride,
476 //FIXME interpolate chroma
477 yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
478 chromStride, dstStride, 2);
482 * Width should be a multiple of 16.
484 static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
485 const uint8_t *vsrc, uint8_t *dst,
486 long width, long height,
487 long lumStride, long chromStride,
490 yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
491 chromStride, dstStride, 1);
495 * Width should be a multiple of 16.
497 static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
498 const uint8_t *vsrc, uint8_t *dst,
499 long width, long height,
500 long lumStride, long chromStride,
503 yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
504 chromStride, dstStride, 1);
508 * Height should be a multiple of 2 and width should be a multiple of 16.
509 * (If this is a problem for anyone then tell me, and I will fix it.)
511 static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
512 uint8_t *udst, uint8_t *vdst,
513 long width, long height,
514 long lumStride, long chromStride,
518 const int chromWidth = width >> 1;
519 for (y=0; y<height; y+=2) {
521 for (i=0; i<chromWidth; i++) {
522 ydst[2*i+0] = src[4*i+0];
523 udst[i] = src[4*i+1];
524 ydst[2*i+1] = src[4*i+2];
525 vdst[i] = src[4*i+3];
530 for (i=0; i<chromWidth; i++) {
531 ydst[2*i+0] = src[4*i+0];
532 ydst[2*i+1] = src[4*i+2];
541 static inline void planar2x_c(const uint8_t *src, uint8_t *dst, long srcWidth,
542 long srcHeight, long srcStride, long dstStride)
549 for (x=0; x<srcWidth-1; x++) {
550 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
551 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
553 dst[2*srcWidth-1]= src[srcWidth-1];
557 for (y=1; y<srcHeight; y++) {
558 const int mmxSize = 1;
560 dst[0 ]= (3*src[0] + src[srcStride])>>2;
561 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
563 for (x=mmxSize-1; x<srcWidth-1; x++) {
564 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
565 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2;
566 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2;
567 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2;
569 dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2;
570 dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
580 for (x=0; x<srcWidth-1; x++) {
581 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
582 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
584 dst[2*srcWidth-1]= src[srcWidth-1];
586 for (x=0; x<srcWidth; x++) {
594 * Height should be a multiple of 2 and width should be a multiple of 16.
595 * (If this is a problem for anyone then tell me, and I will fix it.)
596 * Chrominance data is only taken from every second line, others are ignored.
597 * FIXME: Write HQ version.
599 static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
600 uint8_t *udst, uint8_t *vdst,
601 long width, long height,
602 long lumStride, long chromStride,
606 const int chromWidth = width >> 1;
607 for (y=0; y<height; y+=2) {
609 for (i=0; i<chromWidth; i++) {
610 udst[i] = src[4*i+0];
611 ydst[2*i+0] = src[4*i+1];
612 vdst[i] = src[4*i+2];
613 ydst[2*i+1] = src[4*i+3];
618 for (i=0; i<chromWidth; i++) {
619 ydst[2*i+0] = src[4*i+1];
620 ydst[2*i+1] = src[4*i+3];
630 * Height should be a multiple of 2 and width should be a multiple of 2.
631 * (If this is a problem for anyone then tell me, and I will fix it.)
632 * Chrominance data is only taken from every second line,
633 * others are ignored in the C version.
634 * FIXME: Write HQ version.
636 static inline void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst,
637 uint8_t *udst, uint8_t *vdst,
638 long width, long height,
639 long lumStride, long chromStride,
643 const int chromWidth = width >> 1;
645 for (; y<height; y+=2) {
647 for (i=0; i<chromWidth; i++) {
648 unsigned int b = src[6*i+0];
649 unsigned int g = src[6*i+1];
650 unsigned int r = src[6*i+2];
652 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
653 unsigned int V = ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128;
654 unsigned int U = ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128;
664 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
670 for (i=0; i<chromWidth; i++) {
671 unsigned int b = src[6*i+0];
672 unsigned int g = src[6*i+1];
673 unsigned int r = src[6*i+2];
675 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
683 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
693 static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
694 uint8_t *dest, long width,
695 long height, long src1Stride,
696 long src2Stride, long dstStride)
700 for (h=0; h < height; h++) {
702 for (w=0; w < width; w++) {
703 dest[2*w+0] = src1[w];
704 dest[2*w+1] = src2[w];
712 static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
713 uint8_t *dst1, uint8_t *dst2,
714 long width, long height,
715 long srcStride1, long srcStride2,
716 long dstStride1, long dstStride2)
720 w=width/2; h=height/2;
722 const uint8_t* s1=src1+srcStride1*(y>>1);
723 uint8_t* d=dst1+dstStride1*y;
725 for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
728 const uint8_t* s2=src2+srcStride2*(y>>1);
729 uint8_t* d=dst2+dstStride2*y;
731 for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
735 static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
736 const uint8_t *src3, uint8_t *dst,
737 long width, long height,
738 long srcStride1, long srcStride2,
739 long srcStride3, long dstStride)
745 const uint8_t* yp=src1+srcStride1*y;
746 const uint8_t* up=src2+srcStride2*(y>>2);
747 const uint8_t* vp=src3+srcStride3*(y>>2);
748 uint8_t* d=dst+dstStride*y;
751 const long x2 = x<<2;
764 static void extract_even_c(const uint8_t *src, uint8_t *dst, int count)
771 dst[count]= src[2*count];
776 static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
784 dst0[count]= src[4*count+0];
785 dst1[count]= src[4*count+2];
790 static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1,
791 uint8_t *dst0, uint8_t *dst1, int count)
799 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
800 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
805 static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
814 dst0[count]= src[4*count+0];
815 dst1[count]= src[4*count+2];
820 static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1,
821 uint8_t *dst0, uint8_t *dst1, int count)
831 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
832 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
837 static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
838 const uint8_t *src, long width, long height,
839 long lumStride, long chromStride, long srcStride)
842 const long chromWidth= -((-width)>>1);
844 for (y=0; y<height; y++) {
845 extract_even_c(src, ydst, width);
847 extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth);
857 static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
858 const uint8_t *src, long width, long height,
859 long lumStride, long chromStride, long srcStride)
862 const long chromWidth= -((-width)>>1);
864 for (y=0; y<height; y++) {
865 extract_even_c(src, ydst, width);
866 extract_odd2_c(src, udst, vdst, chromWidth);
875 static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
876 const uint8_t *src, long width, long height,
877 long lumStride, long chromStride, long srcStride)
880 const long chromWidth= -((-width)>>1);
882 for (y=0; y<height; y++) {
883 extract_even_c(src + 1, ydst, width);
885 extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth);
895 static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
896 const uint8_t *src, long width, long height,
897 long lumStride, long chromStride, long srcStride)
900 const long chromWidth= -((-width)>>1);
902 for (y=0; y<height; y++) {
903 extract_even_c(src + 1, ydst, width);
904 extract_even2_c(src, udst, vdst, chromWidth);
913 static inline void rgb2rgb_init_c(void)
915 rgb15to16 = rgb15to16_c;
916 rgb15tobgr24 = rgb15tobgr24_c;
917 rgb15to32 = rgb15to32_c;
918 rgb16tobgr24 = rgb16tobgr24_c;
919 rgb16to32 = rgb16to32_c;
920 rgb16to15 = rgb16to15_c;
921 rgb24tobgr16 = rgb24tobgr16_c;
922 rgb24tobgr15 = rgb24tobgr15_c;
923 rgb24tobgr32 = rgb24tobgr32_c;
924 rgb32to16 = rgb32to16_c;
925 rgb32to15 = rgb32to15_c;
926 rgb32tobgr24 = rgb32tobgr24_c;
927 rgb24to15 = rgb24to15_c;
928 rgb24to16 = rgb24to16_c;
929 rgb24tobgr24 = rgb24tobgr24_c;
930 shuffle_bytes_2103 = shuffle_bytes_2103_c;
931 rgb32tobgr16 = rgb32tobgr16_c;
932 rgb32tobgr15 = rgb32tobgr15_c;
933 yv12toyuy2 = yv12toyuy2_c;
934 yv12touyvy = yv12touyvy_c;
935 yuv422ptoyuy2 = yuv422ptoyuy2_c;
936 yuv422ptouyvy = yuv422ptouyvy_c;
937 yuy2toyv12 = yuy2toyv12_c;
938 planar2x = planar2x_c;
939 rgb24toyv12 = rgb24toyv12_c;
940 interleaveBytes = interleaveBytes_c;
941 vu9_to_vu12 = vu9_to_vu12_c;
942 yvu9_to_yuy2 = yvu9_to_yuy2_c;
944 uyvytoyuv420 = uyvytoyuv420_c;
945 uyvytoyuv422 = uyvytoyuv422_c;
946 yuyvtoyuv420 = yuyvtoyuv420_c;
947 yuyvtoyuv422 = yuyvtoyuv422_c;