]> git.sesse.net Git - ffmpeg/blob - libswscale/rgb2rgb.c
cosmetics: typo pallete --> palette
[ffmpeg] / libswscale / rgb2rgb.c
1 /*
2  *
3  *  rgb2rgb.c, Software RGB to RGB convertor
4  *  pluralize by Software PAL8 to RGB convertor
5  *               Software YUV to YUV convertor
6  *               Software YUV to RGB convertor
7  *  Written by Nick Kurshev.
8  *  palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
9  *
10  * This file is part of FFmpeg.
11  *
12  * FFmpeg is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * FFmpeg is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with FFmpeg; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25  * 
26  * the C code (not assembly, mmx, ...) of this file can be used
27  * under the LGPL license too
28  */
29 #include <inttypes.h>
30 #include "config.h"
31 #include "rgb2rgb.h"
32 #include "swscale.h"
33 #include "swscale_internal.h"
34 #include "x86_cpu.h"
35 #include "bswap.h"
36 #ifdef USE_FASTMEMCPY
37 #include "libvo/fastmemcpy.h"
38 #endif
39
40 #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
41
42 void (*rgb24to32)(const uint8_t *src,uint8_t *dst,long src_size);
43 void (*rgb24to16)(const uint8_t *src,uint8_t *dst,long src_size);
44 void (*rgb24to15)(const uint8_t *src,uint8_t *dst,long src_size);
45 void (*rgb32to24)(const uint8_t *src,uint8_t *dst,long src_size);
46 void (*rgb32to16)(const uint8_t *src,uint8_t *dst,long src_size);
47 void (*rgb32to15)(const uint8_t *src,uint8_t *dst,long src_size);
48 void (*rgb15to16)(const uint8_t *src,uint8_t *dst,long src_size);
49 void (*rgb15to24)(const uint8_t *src,uint8_t *dst,long src_size);
50 void (*rgb15to32)(const uint8_t *src,uint8_t *dst,long src_size);
51 void (*rgb16to15)(const uint8_t *src,uint8_t *dst,long src_size);
52 void (*rgb16to24)(const uint8_t *src,uint8_t *dst,long src_size);
53 void (*rgb16to32)(const uint8_t *src,uint8_t *dst,long src_size);
54 //void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
55 void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
56 void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
57 void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
58 void (*rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
59 //void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
60 void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
61 void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
62
63 void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
64         long width, long height,
65         long lumStride, long chromStride, long dstStride);
66 void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
67         long width, long height,
68         long lumStride, long chromStride, long dstStride);
69 void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
70         long width, long height,
71         long lumStride, long chromStride, long dstStride);
72 void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
73         long width, long height,
74         long lumStride, long chromStride, long srcStride);
75 void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
76         long width, long height,
77         long lumStride, long chromStride, long srcStride);
78 void (*planar2x)(const uint8_t *src, uint8_t *dst, long width, long height,
79         long srcStride, long dstStride);
80 void (*interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dst,
81                             long width, long height, long src1Stride,
82                             long src2Stride, long dstStride);
83 void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
84                         uint8_t *dst1, uint8_t *dst2,
85                         long width, long height,
86                         long srcStride1, long srcStride2,
87                         long dstStride1, long dstStride2);
88 void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
89                         uint8_t *dst,
90                         long width, long height,
91                         long srcStride1, long srcStride2,
92                         long srcStride3, long dstStride);
93
94 #if defined(ARCH_X86) && defined(CONFIG_GPL)
95 static const uint64_t mmx_null  __attribute__((aligned(8))) = 0x0000000000000000ULL;
96 static const uint64_t mmx_one   __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL;
97 static const uint64_t mask32b  attribute_used __attribute__((aligned(8))) = 0x000000FF000000FFULL;
98 static const uint64_t mask32g  attribute_used __attribute__((aligned(8))) = 0x0000FF000000FF00ULL;
99 static const uint64_t mask32r  attribute_used __attribute__((aligned(8))) = 0x00FF000000FF0000ULL;
100 static const uint64_t mask32   __attribute__((aligned(8))) = 0x00FFFFFF00FFFFFFULL;
101 static const uint64_t mask3216br __attribute__((aligned(8)))=0x00F800F800F800F8ULL;
102 static const uint64_t mask3216g  __attribute__((aligned(8)))=0x0000FC000000FC00ULL;
103 static const uint64_t mask3215g  __attribute__((aligned(8)))=0x0000F8000000F800ULL;
104 static const uint64_t mul3216  __attribute__((aligned(8))) = 0x2000000420000004ULL;
105 static const uint64_t mul3215  __attribute__((aligned(8))) = 0x2000000820000008ULL;
106 static const uint64_t mask24b  attribute_used __attribute__((aligned(8))) = 0x00FF0000FF0000FFULL;
107 static const uint64_t mask24g  attribute_used __attribute__((aligned(8))) = 0xFF0000FF0000FF00ULL;
108 static const uint64_t mask24r  attribute_used __attribute__((aligned(8))) = 0x0000FF0000FF0000ULL;
109 static const uint64_t mask24l  __attribute__((aligned(8))) = 0x0000000000FFFFFFULL;
110 static const uint64_t mask24h  __attribute__((aligned(8))) = 0x0000FFFFFF000000ULL;
111 static const uint64_t mask24hh  __attribute__((aligned(8))) = 0xffff000000000000ULL;
112 static const uint64_t mask24hhh  __attribute__((aligned(8))) = 0xffffffff00000000ULL;
113 static const uint64_t mask24hhhh  __attribute__((aligned(8))) = 0xffffffffffff0000ULL;
114 static const uint64_t mask15b  __attribute__((aligned(8))) = 0x001F001F001F001FULL; /* 00000000 00011111  xxB */
115 static const uint64_t mask15rg __attribute__((aligned(8))) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000  RGx */
116 static const uint64_t mask15s  __attribute__((aligned(8))) = 0xFFE0FFE0FFE0FFE0ULL;
117 static const uint64_t mask15g  __attribute__((aligned(8))) = 0x03E003E003E003E0ULL;
118 static const uint64_t mask15r  __attribute__((aligned(8))) = 0x7C007C007C007C00ULL;
119 #define mask16b mask15b
120 static const uint64_t mask16g  __attribute__((aligned(8))) = 0x07E007E007E007E0ULL;
121 static const uint64_t mask16r  __attribute__((aligned(8))) = 0xF800F800F800F800ULL;
122 static const uint64_t red_16mask  __attribute__((aligned(8))) = 0x0000f8000000f800ULL;
123 static const uint64_t green_16mask __attribute__((aligned(8)))= 0x000007e0000007e0ULL;
124 static const uint64_t blue_16mask __attribute__((aligned(8))) = 0x0000001f0000001fULL;
125 static const uint64_t red_15mask  __attribute__((aligned(8))) = 0x00007c000000f800ULL;
126 static const uint64_t green_15mask __attribute__((aligned(8)))= 0x000003e0000007e0ULL;
127 static const uint64_t blue_15mask __attribute__((aligned(8))) = 0x0000001f0000001fULL;
128
129 #ifdef FAST_BGR2YV12
130 static const uint64_t bgr2YCoeff  attribute_used __attribute__((aligned(8))) = 0x000000210041000DULL;
131 static const uint64_t bgr2UCoeff  attribute_used __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
132 static const uint64_t bgr2VCoeff  attribute_used __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
133 #else
134 static const uint64_t bgr2YCoeff  attribute_used __attribute__((aligned(8))) = 0x000020E540830C8BULL;
135 static const uint64_t bgr2UCoeff  attribute_used __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
136 static const uint64_t bgr2VCoeff  attribute_used __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
137 #endif
138 static const uint64_t bgr2YOffset attribute_used __attribute__((aligned(8))) = 0x1010101010101010ULL;
139 static const uint64_t bgr2UVOffset attribute_used __attribute__((aligned(8)))= 0x8080808080808080ULL;
140 static const uint64_t w1111       attribute_used __attribute__((aligned(8))) = 0x0001000100010001ULL;
141
142 #if 0
143 static volatile uint64_t __attribute__((aligned(8))) b5Dither;
144 static volatile uint64_t __attribute__((aligned(8))) g5Dither;
145 static volatile uint64_t __attribute__((aligned(8))) g6Dither;
146 static volatile uint64_t __attribute__((aligned(8))) r5Dither;
147
148 static uint64_t __attribute__((aligned(8))) dither4[2]={
149         0x0103010301030103LL,
150         0x0200020002000200LL,};
151
152 static uint64_t __attribute__((aligned(8))) dither8[2]={
153         0x0602060206020602LL,
154         0x0004000400040004LL,};
155 #endif
156 #endif /* defined(ARCH_X86) */
157
158 #define RGB2YUV_SHIFT 8
159 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
160 #define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
161 #define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
162 #define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
163 #define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
164 #define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
165 #define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
166 #define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
167 #define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
168
169 //Note: we have C, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
170 //Plain C versions
171 #undef HAVE_MMX
172 #undef HAVE_MMX2
173 #undef HAVE_3DNOW
174 #undef HAVE_SSE2
175 #define RENAME(a) a ## _C
176 #include "rgb2rgb_template.c"
177
178 #if defined(ARCH_X86) && defined(CONFIG_GPL)
179
180 //MMX versions
181 #undef RENAME
182 #define HAVE_MMX
183 #undef HAVE_MMX2
184 #undef HAVE_3DNOW
185 #undef HAVE_SSE2
186 #define RENAME(a) a ## _MMX
187 #include "rgb2rgb_template.c"
188
189 //MMX2 versions
190 #undef RENAME
191 #define HAVE_MMX
192 #define HAVE_MMX2
193 #undef HAVE_3DNOW
194 #undef HAVE_SSE2
195 #define RENAME(a) a ## _MMX2
196 #include "rgb2rgb_template.c"
197
198 //3DNOW versions
199 #undef RENAME
200 #define HAVE_MMX
201 #undef HAVE_MMX2
202 #define HAVE_3DNOW
203 #undef HAVE_SSE2
204 #define RENAME(a) a ## _3DNOW
205 #include "rgb2rgb_template.c"
206
207 #endif //ARCH_X86 || ARCH_X86_64
208
209 /*
210  rgb15->rgb16 Original by Strepto/Astral
211  ported to gcc & bugfixed : A'rpi
212  MMX2, 3DNOW optimization by Nick Kurshev
213  32bit c version, and and&add trick by Michael Niedermayer
214 */
215
216 void sws_rgb2rgb_init(int flags){
217 #if (defined(HAVE_MMX2) || defined(HAVE_3DNOW) || defined(HAVE_MMX))  && defined(CONFIG_GPL)
218         if(flags & SWS_CPU_CAPS_MMX2){
219                 rgb15to16= rgb15to16_MMX2;
220                 rgb15to24= rgb15to24_MMX2;
221                 rgb15to32= rgb15to32_MMX2;
222                 rgb16to24= rgb16to24_MMX2;
223                 rgb16to32= rgb16to32_MMX2;
224                 rgb16to15= rgb16to15_MMX2;
225                 rgb24to16= rgb24to16_MMX2;
226                 rgb24to15= rgb24to15_MMX2;
227                 rgb24to32= rgb24to32_MMX2;
228                 rgb32to16= rgb32to16_MMX2;
229                 rgb32to15= rgb32to15_MMX2;
230                 rgb32to24= rgb32to24_MMX2;
231                 rgb24tobgr15= rgb24tobgr15_MMX2;
232                 rgb24tobgr16= rgb24tobgr16_MMX2;
233                 rgb24tobgr24= rgb24tobgr24_MMX2;
234                 rgb32tobgr32= rgb32tobgr32_MMX2;
235                 rgb32tobgr16= rgb32tobgr16_MMX2;
236                 rgb32tobgr15= rgb32tobgr15_MMX2;
237                 yv12toyuy2= yv12toyuy2_MMX2;
238                 yv12touyvy= yv12touyvy_MMX2;
239                 yuv422ptoyuy2= yuv422ptoyuy2_MMX2;
240                 yuy2toyv12= yuy2toyv12_MMX2;
241 //              uyvytoyv12= uyvytoyv12_MMX2;
242 //              yvu9toyv12= yvu9toyv12_MMX2;
243                 planar2x= planar2x_MMX2;
244                 rgb24toyv12= rgb24toyv12_MMX2;
245                 interleaveBytes= interleaveBytes_MMX2;
246                 vu9_to_vu12= vu9_to_vu12_MMX2;
247                 yvu9_to_yuy2= yvu9_to_yuy2_MMX2;
248         }else if(flags & SWS_CPU_CAPS_3DNOW){
249                 rgb15to16= rgb15to16_3DNOW;
250                 rgb15to24= rgb15to24_3DNOW;
251                 rgb15to32= rgb15to32_3DNOW;
252                 rgb16to24= rgb16to24_3DNOW;
253                 rgb16to32= rgb16to32_3DNOW;
254                 rgb16to15= rgb16to15_3DNOW;
255                 rgb24to16= rgb24to16_3DNOW;
256                 rgb24to15= rgb24to15_3DNOW;
257                 rgb24to32= rgb24to32_3DNOW;
258                 rgb32to16= rgb32to16_3DNOW;
259                 rgb32to15= rgb32to15_3DNOW;
260                 rgb32to24= rgb32to24_3DNOW;
261                 rgb24tobgr15= rgb24tobgr15_3DNOW;
262                 rgb24tobgr16= rgb24tobgr16_3DNOW;
263                 rgb24tobgr24= rgb24tobgr24_3DNOW;
264                 rgb32tobgr32= rgb32tobgr32_3DNOW;
265                 rgb32tobgr16= rgb32tobgr16_3DNOW;
266                 rgb32tobgr15= rgb32tobgr15_3DNOW;
267                 yv12toyuy2= yv12toyuy2_3DNOW;
268                 yv12touyvy= yv12touyvy_3DNOW;
269                 yuv422ptoyuy2= yuv422ptoyuy2_3DNOW;
270                 yuy2toyv12= yuy2toyv12_3DNOW;
271 //              uyvytoyv12= uyvytoyv12_3DNOW;
272 //              yvu9toyv12= yvu9toyv12_3DNOW;
273                 planar2x= planar2x_3DNOW;
274                 rgb24toyv12= rgb24toyv12_3DNOW;
275                 interleaveBytes= interleaveBytes_3DNOW;
276                 vu9_to_vu12= vu9_to_vu12_3DNOW;
277                 yvu9_to_yuy2= yvu9_to_yuy2_3DNOW;
278         }else if(flags & SWS_CPU_CAPS_MMX){
279                 rgb15to16= rgb15to16_MMX;
280                 rgb15to24= rgb15to24_MMX;
281                 rgb15to32= rgb15to32_MMX;
282                 rgb16to24= rgb16to24_MMX;
283                 rgb16to32= rgb16to32_MMX;
284                 rgb16to15= rgb16to15_MMX;
285                 rgb24to16= rgb24to16_MMX;
286                 rgb24to15= rgb24to15_MMX;
287                 rgb24to32= rgb24to32_MMX;
288                 rgb32to16= rgb32to16_MMX;
289                 rgb32to15= rgb32to15_MMX;
290                 rgb32to24= rgb32to24_MMX;
291                 rgb24tobgr15= rgb24tobgr15_MMX;
292                 rgb24tobgr16= rgb24tobgr16_MMX;
293                 rgb24tobgr24= rgb24tobgr24_MMX;
294                 rgb32tobgr32= rgb32tobgr32_MMX;
295                 rgb32tobgr16= rgb32tobgr16_MMX;
296                 rgb32tobgr15= rgb32tobgr15_MMX;
297                 yv12toyuy2= yv12toyuy2_MMX;
298                 yv12touyvy= yv12touyvy_MMX;
299                 yuv422ptoyuy2= yuv422ptoyuy2_MMX;
300                 yuy2toyv12= yuy2toyv12_MMX;
301 //              uyvytoyv12= uyvytoyv12_MMX;
302 //              yvu9toyv12= yvu9toyv12_MMX;
303                 planar2x= planar2x_MMX;
304                 rgb24toyv12= rgb24toyv12_MMX;
305                 interleaveBytes= interleaveBytes_MMX;
306                 vu9_to_vu12= vu9_to_vu12_MMX;
307                 yvu9_to_yuy2= yvu9_to_yuy2_MMX;
308         }else
309 #endif /* defined(HAVE_MMX2) || defined(HAVE_3DNOW) || defined(HAVE_MMX) */
310         {
311                 rgb15to16= rgb15to16_C;
312                 rgb15to24= rgb15to24_C;
313                 rgb15to32= rgb15to32_C;
314                 rgb16to24= rgb16to24_C;
315                 rgb16to32= rgb16to32_C;
316                 rgb16to15= rgb16to15_C;
317                 rgb24to16= rgb24to16_C;
318                 rgb24to15= rgb24to15_C;
319                 rgb24to32= rgb24to32_C;
320                 rgb32to16= rgb32to16_C;
321                 rgb32to15= rgb32to15_C;
322                 rgb32to24= rgb32to24_C;
323                 rgb24tobgr15= rgb24tobgr15_C;
324                 rgb24tobgr16= rgb24tobgr16_C;
325                 rgb24tobgr24= rgb24tobgr24_C;
326                 rgb32tobgr32= rgb32tobgr32_C;
327                 rgb32tobgr16= rgb32tobgr16_C;
328                 rgb32tobgr15= rgb32tobgr15_C;
329                 yv12toyuy2= yv12toyuy2_C;
330                 yv12touyvy= yv12touyvy_C;
331                 yuv422ptoyuy2= yuv422ptoyuy2_C;
332                 yuy2toyv12= yuy2toyv12_C;
333 //              uyvytoyv12= uyvytoyv12_C;
334 //              yvu9toyv12= yvu9toyv12_C;
335                 planar2x= planar2x_C;
336                 rgb24toyv12= rgb24toyv12_C;
337                 interleaveBytes= interleaveBytes_C;
338                 vu9_to_vu12= vu9_to_vu12_C;
339                 yvu9_to_yuy2= yvu9_to_yuy2_C;
340         }
341 }
342
343 /**
344  * Palette is assumed to contain BGR32.
345  */
346 void palette8torgb32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
347 {
348         long i;
349
350 /*
351         for(i=0; i<num_pixels; i++)
352                 ((unsigned *)dst)[i] = ((unsigned *)palette)[ src[i] ];
353 */
354
355         for(i=0; i<num_pixels; i++)
356         {
357                 #ifdef WORDS_BIGENDIAN
358                         dst[3]= palette[ src[i]*4+2 ];
359                         dst[2]= palette[ src[i]*4+1 ];
360                         dst[1]= palette[ src[i]*4+0 ];
361                 #else
362                 //FIXME slow?
363                         dst[0]= palette[ src[i]*4+2 ];
364                         dst[1]= palette[ src[i]*4+1 ];
365                         dst[2]= palette[ src[i]*4+0 ];
366                         //dst[3]= 0; /* do we need this cleansing? */
367                 #endif
368                 dst+= 4;
369         }
370 }
371
372 void palette8tobgr32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
373 {
374         long i;
375         for(i=0; i<num_pixels; i++)
376         {
377                 #ifdef WORDS_BIGENDIAN
378                         dst[3]= palette[ src[i]*4+0 ];
379                         dst[2]= palette[ src[i]*4+1 ];
380                         dst[1]= palette[ src[i]*4+2 ];
381                 #else
382                         //FIXME slow?
383                         dst[0]= palette[ src[i]*4+0 ];
384                         dst[1]= palette[ src[i]*4+1 ];
385                         dst[2]= palette[ src[i]*4+2 ];
386                         //dst[3]= 0; /* do we need this cleansing? */
387                 #endif
388                 
389                 dst+= 4;
390         }
391 }
392
393 /**
394  * Palette is assumed to contain BGR32.
395  */
396 void palette8torgb24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
397 {
398         long i;
399 /*
400         writes 1 byte o much and might cause alignment issues on some architectures?
401         for(i=0; i<num_pixels; i++)
402                 ((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[ src[i] ];
403 */
404         for(i=0; i<num_pixels; i++)
405         {
406                 //FIXME slow?
407                 dst[0]= palette[ src[i]*4+2 ];
408                 dst[1]= palette[ src[i]*4+1 ];
409                 dst[2]= palette[ src[i]*4+0 ];
410                 dst+= 3;
411         }
412 }
413
414 void palette8tobgr24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
415 {
416         long i;
417 /*
418         writes 1 byte o much and might cause alignment issues on some architectures?
419         for(i=0; i<num_pixels; i++)
420                 ((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[ src[i] ];
421 */
422         for(i=0; i<num_pixels; i++)
423         {
424                 //FIXME slow?
425                 dst[0]= palette[ src[i]*4+0 ];
426                 dst[1]= palette[ src[i]*4+1 ];
427                 dst[2]= palette[ src[i]*4+2 ];
428                 dst+= 3;
429         }
430 }
431
432 /**
433  * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette
434  */
435 void palette8torgb16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
436 {
437         long i;
438         for(i=0; i<num_pixels; i++)
439                 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ];
440 }
441 void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
442 {
443         long i;
444         for(i=0; i<num_pixels; i++)
445                 ((uint16_t *)dst)[i] = bswap_16(((uint16_t *)palette)[ src[i] ]);
446 }
447
448 /**
449  * Palette is assumed to contain BGR15, see rgb32to15 to convert the palette.
450  */
451 void palette8torgb15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
452 {
453         long i;
454         for(i=0; i<num_pixels; i++)
455                 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ];
456 }
457 void palette8tobgr15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
458 {
459         long i;
460         for(i=0; i<num_pixels; i++)
461                 ((uint16_t *)dst)[i] = bswap_16(((uint16_t *)palette)[ src[i] ]);
462 }
463
464 void rgb32tobgr24(const uint8_t *src, uint8_t *dst, long src_size)
465 {
466         long i;
467         long num_pixels = src_size >> 2;
468         for(i=0; i<num_pixels; i++)
469         {
470                 #ifdef WORDS_BIGENDIAN
471                         /* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) */
472                         dst[3*i + 0] = src[4*i + 1];
473                         dst[3*i + 1] = src[4*i + 2];
474                         dst[3*i + 2] = src[4*i + 3];
475                 #else
476                         dst[3*i + 0] = src[4*i + 2];
477                         dst[3*i + 1] = src[4*i + 1];
478                         dst[3*i + 2] = src[4*i + 0];
479                 #endif
480         }
481 }
482
483 void rgb24tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
484 {
485         long i;
486         for(i=0; 3*i<src_size; i++)
487         {
488                 #ifdef WORDS_BIGENDIAN
489                         /* RGB24 (= R,G,B) -> BGR32 (= A,R,G,B) */
490                         dst[4*i + 0] = 0;
491                         dst[4*i + 1] = src[3*i + 0];
492                         dst[4*i + 2] = src[3*i + 1];
493                         dst[4*i + 3] = src[3*i + 2];
494                 #else
495                         dst[4*i + 0] = src[3*i + 2];
496                         dst[4*i + 1] = src[3*i + 1];
497                         dst[4*i + 2] = src[3*i + 0];
498                         dst[4*i + 3] = 0;
499                 #endif
500         }
501 }
502
503 void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
504 {
505         const uint16_t *end;
506         uint8_t *d = (uint8_t *)dst;
507         const uint16_t *s = (uint16_t *)src;
508         end = s + src_size/2;
509         while(s < end)
510         {
511                 register uint16_t bgr;
512                 bgr = *s++;
513                 #ifdef WORDS_BIGENDIAN
514                         *d++ = 0;
515                         *d++ = (bgr&0x1F)<<3;
516                         *d++ = (bgr&0x7E0)>>3;
517                         *d++ = (bgr&0xF800)>>8;
518                 #else
519                         *d++ = (bgr&0xF800)>>8;
520                         *d++ = (bgr&0x7E0)>>3;
521                         *d++ = (bgr&0x1F)<<3;
522                         *d++ = 0;
523                 #endif
524         }
525 }
526
527 void rgb16tobgr24(const uint8_t *src, uint8_t *dst, long src_size)
528 {
529         const uint16_t *end;
530         uint8_t *d = (uint8_t *)dst;
531         const uint16_t *s = (const uint16_t *)src;
532         end = s + src_size/2;
533         while(s < end)
534         {
535                 register uint16_t bgr;
536                 bgr = *s++;
537                 *d++ = (bgr&0xF800)>>8;
538                 *d++ = (bgr&0x7E0)>>3;
539                 *d++ = (bgr&0x1F)<<3;
540         }
541 }
542
543 void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
544 {
545         long i;
546         long num_pixels = src_size >> 1;
547         
548         for(i=0; i<num_pixels; i++)
549         {
550             unsigned b,g,r;
551             register uint16_t rgb;
552             rgb = src[2*i];
553             r = rgb&0x1F;
554             g = (rgb&0x7E0)>>5;
555             b = (rgb&0xF800)>>11;
556             dst[2*i] = (b&0x1F) | ((g&0x3F)<<5) | ((r&0x1F)<<11);
557         }
558 }
559
560 void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
561 {
562         long i;
563         long num_pixels = src_size >> 1;
564         
565         for(i=0; i<num_pixels; i++)
566         {
567             unsigned b,g,r;
568             register uint16_t rgb;
569             rgb = src[2*i];
570             r = rgb&0x1F;
571             g = (rgb&0x7E0)>>5;
572             b = (rgb&0xF800)>>11;
573             dst[2*i] = (b&0x1F) | ((g&0x1F)<<5) | ((r&0x1F)<<10);
574         }
575 }
576
577 void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
578 {
579         const uint16_t *end;
580         uint8_t *d = (uint8_t *)dst;
581         const uint16_t *s = (const uint16_t *)src;
582         end = s + src_size/2;
583         while(s < end)
584         {
585                 register uint16_t bgr;
586                 bgr = *s++;
587                 #ifdef WORDS_BIGENDIAN
588                         *d++ = 0;
589                         *d++ = (bgr&0x1F)<<3;
590                         *d++ = (bgr&0x3E0)>>2;
591                         *d++ = (bgr&0x7C00)>>7;
592                 #else
593                         *d++ = (bgr&0x7C00)>>7;
594                         *d++ = (bgr&0x3E0)>>2;
595                         *d++ = (bgr&0x1F)<<3;
596                         *d++ = 0;
597                 #endif
598         }
599 }
600
601 void rgb15tobgr24(const uint8_t *src, uint8_t *dst, long src_size)
602 {
603         const uint16_t *end;
604         uint8_t *d = (uint8_t *)dst;
605         const uint16_t *s = (uint16_t *)src;
606         end = s + src_size/2;
607         while(s < end)
608         {
609                 register uint16_t bgr;
610                 bgr = *s++;
611                 *d++ = (bgr&0x7C00)>>7;
612                 *d++ = (bgr&0x3E0)>>2;
613                 *d++ = (bgr&0x1F)<<3;
614         }
615 }
616
617 void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
618 {
619         long i;
620         long num_pixels = src_size >> 1;
621         
622         for(i=0; i<num_pixels; i++)
623         {
624             unsigned b,g,r;
625             register uint16_t rgb;
626             rgb = src[2*i];
627             r = rgb&0x1F;
628             g = (rgb&0x3E0)>>5;
629             b = (rgb&0x7C00)>>10;
630             dst[2*i] = (b&0x1F) | ((g&0x3F)<<5) | ((r&0x1F)<<11);
631         }
632 }
633
634 void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
635 {
636         long i;
637         long num_pixels = src_size >> 1;
638         
639         for(i=0; i<num_pixels; i++)
640         {
641             unsigned b,g,r;
642             register uint16_t rgb;
643             rgb = src[2*i];
644             r = rgb&0x1F;
645             g = (rgb&0x3E0)>>5;
646             b = (rgb&0x7C00)>>10;
647             dst[2*i] = (b&0x1F) | ((g&0x1F)<<5) | ((r&0x1F)<<10);
648         }
649 }
650
651 void rgb8tobgr8(const uint8_t *src, uint8_t *dst, long src_size)
652 {
653         long i;
654         long num_pixels = src_size;
655         for(i=0; i<num_pixels; i++)
656         {
657             unsigned b,g,r;
658             register uint8_t rgb;
659             rgb = src[i];
660             r = (rgb&0x07);
661             g = (rgb&0x38)>>3;
662             b = (rgb&0xC0)>>6;
663             dst[i] = ((b<<1)&0x07) | ((g&0x07)<<3) | ((r&0x03)<<6);
664         }
665 }