]> git.sesse.net Git - ffmpeg/blob - libswscale/rgb2rgb.c
2dbcf4e71943421d5787b7e156e2327ebd34658f
[ffmpeg] / libswscale / rgb2rgb.c
1 /*
2  *
3  *  rgb2rgb.c, Software RGB to RGB convertor
4  *  pluralize by Software PAL8 to RGB convertor
5  *               Software YUV to YUV convertor
6  *               Software YUV to RGB convertor
7  *  Written by Nick Kurshev.
8  *  palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 #include <inttypes.h>
25 #include "config.h"
26 #include "rgb2rgb.h"
27 #include "swscale.h"
28 #include "swscale_internal.h"
29 #include "x86_cpu.h"
30 #include "bswap.h"
31 #ifdef USE_FASTMEMCPY
32 #include "libvo/fastmemcpy.h"
33 #endif
34
35 #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
36
37 void (*rgb24to32)(const uint8_t *src,uint8_t *dst,long src_size);
38 void (*rgb24to16)(const uint8_t *src,uint8_t *dst,long src_size);
39 void (*rgb24to15)(const uint8_t *src,uint8_t *dst,long src_size);
40 void (*rgb32to24)(const uint8_t *src,uint8_t *dst,long src_size);
41 void (*rgb32to16)(const uint8_t *src,uint8_t *dst,long src_size);
42 void (*rgb32to15)(const uint8_t *src,uint8_t *dst,long src_size);
43 void (*rgb15to16)(const uint8_t *src,uint8_t *dst,long src_size);
44 void (*rgb15to24)(const uint8_t *src,uint8_t *dst,long src_size);
45 void (*rgb15to32)(const uint8_t *src,uint8_t *dst,long src_size);
46 void (*rgb16to15)(const uint8_t *src,uint8_t *dst,long src_size);
47 void (*rgb16to24)(const uint8_t *src,uint8_t *dst,long src_size);
48 void (*rgb16to32)(const uint8_t *src,uint8_t *dst,long src_size);
49 //void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
50 void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
51 void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
52 void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
53 void (*rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
54 //void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
55 void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
56 void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
57
58 void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
59         long width, long height,
60         long lumStride, long chromStride, long dstStride);
61 void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
62         long width, long height,
63         long lumStride, long chromStride, long dstStride);
64 void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
65         long width, long height,
66         long lumStride, long chromStride, long dstStride);
67 void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
68         long width, long height,
69         long lumStride, long chromStride, long srcStride);
70 void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
71         long width, long height,
72         long lumStride, long chromStride, long srcStride);
73 void (*planar2x)(const uint8_t *src, uint8_t *dst, long width, long height,
74         long srcStride, long dstStride);
75 void (*interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dst,
76                             long width, long height, long src1Stride,
77                             long src2Stride, long dstStride);
78 void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
79                         uint8_t *dst1, uint8_t *dst2,
80                         long width, long height,
81                         long srcStride1, long srcStride2,
82                         long dstStride1, long dstStride2);
83 void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
84                         uint8_t *dst,
85                         long width, long height,
86                         long srcStride1, long srcStride2,
87                         long srcStride3, long dstStride);
88
89 #if defined(ARCH_X86) || defined(ARCH_X86_64)
90 static const uint64_t mmx_null  __attribute__((aligned(8))) = 0x0000000000000000ULL;
91 static const uint64_t mmx_one   __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL;
92 static const uint64_t mask32b  attribute_used __attribute__((aligned(8))) = 0x000000FF000000FFULL;
93 static const uint64_t mask32g  attribute_used __attribute__((aligned(8))) = 0x0000FF000000FF00ULL;
94 static const uint64_t mask32r  attribute_used __attribute__((aligned(8))) = 0x00FF000000FF0000ULL;
95 static const uint64_t mask32   __attribute__((aligned(8))) = 0x00FFFFFF00FFFFFFULL;
96 static const uint64_t mask3216br __attribute__((aligned(8)))=0x00F800F800F800F8ULL;
97 static const uint64_t mask3216g  __attribute__((aligned(8)))=0x0000FC000000FC00ULL;
98 static const uint64_t mask3215g  __attribute__((aligned(8)))=0x0000F8000000F800ULL;
99 static const uint64_t mul3216  __attribute__((aligned(8))) = 0x2000000420000004ULL;
100 static const uint64_t mul3215  __attribute__((aligned(8))) = 0x2000000820000008ULL;
101 static const uint64_t mask24b  attribute_used __attribute__((aligned(8))) = 0x00FF0000FF0000FFULL;
102 static const uint64_t mask24g  attribute_used __attribute__((aligned(8))) = 0xFF0000FF0000FF00ULL;
103 static const uint64_t mask24r  attribute_used __attribute__((aligned(8))) = 0x0000FF0000FF0000ULL;
104 static const uint64_t mask24l  __attribute__((aligned(8))) = 0x0000000000FFFFFFULL;
105 static const uint64_t mask24h  __attribute__((aligned(8))) = 0x0000FFFFFF000000ULL;
106 static const uint64_t mask24hh  __attribute__((aligned(8))) = 0xffff000000000000ULL;
107 static const uint64_t mask24hhh  __attribute__((aligned(8))) = 0xffffffff00000000ULL;
108 static const uint64_t mask24hhhh  __attribute__((aligned(8))) = 0xffffffffffff0000ULL;
109 static const uint64_t mask15b  __attribute__((aligned(8))) = 0x001F001F001F001FULL; /* 00000000 00011111  xxB */
110 static const uint64_t mask15rg __attribute__((aligned(8))) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000  RGx */
111 static const uint64_t mask15s  __attribute__((aligned(8))) = 0xFFE0FFE0FFE0FFE0ULL;
112 static const uint64_t mask15g  __attribute__((aligned(8))) = 0x03E003E003E003E0ULL;
113 static const uint64_t mask15r  __attribute__((aligned(8))) = 0x7C007C007C007C00ULL;
114 #define mask16b mask15b
115 static const uint64_t mask16g  __attribute__((aligned(8))) = 0x07E007E007E007E0ULL;
116 static const uint64_t mask16r  __attribute__((aligned(8))) = 0xF800F800F800F800ULL;
117 static const uint64_t red_16mask  __attribute__((aligned(8))) = 0x0000f8000000f800ULL;
118 static const uint64_t green_16mask __attribute__((aligned(8)))= 0x000007e0000007e0ULL;
119 static const uint64_t blue_16mask __attribute__((aligned(8))) = 0x0000001f0000001fULL;
120 static const uint64_t red_15mask  __attribute__((aligned(8))) = 0x00007c000000f800ULL;
121 static const uint64_t green_15mask __attribute__((aligned(8)))= 0x000003e0000007e0ULL;
122 static const uint64_t blue_15mask __attribute__((aligned(8))) = 0x0000001f0000001fULL;
123
124 #ifdef FAST_BGR2YV12
125 static const uint64_t bgr2YCoeff  attribute_used __attribute__((aligned(8))) = 0x000000210041000DULL;
126 static const uint64_t bgr2UCoeff  attribute_used __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
127 static const uint64_t bgr2VCoeff  attribute_used __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
128 #else
129 static const uint64_t bgr2YCoeff  attribute_used __attribute__((aligned(8))) = 0x000020E540830C8BULL;
130 static const uint64_t bgr2UCoeff  attribute_used __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
131 static const uint64_t bgr2VCoeff  attribute_used __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
132 #endif
133 static const uint64_t bgr2YOffset attribute_used __attribute__((aligned(8))) = 0x1010101010101010ULL;
134 static const uint64_t bgr2UVOffset attribute_used __attribute__((aligned(8)))= 0x8080808080808080ULL;
135 static const uint64_t w1111       attribute_used __attribute__((aligned(8))) = 0x0001000100010001ULL;
136
137 #if 0
138 static volatile uint64_t __attribute__((aligned(8))) b5Dither;
139 static volatile uint64_t __attribute__((aligned(8))) g5Dither;
140 static volatile uint64_t __attribute__((aligned(8))) g6Dither;
141 static volatile uint64_t __attribute__((aligned(8))) r5Dither;
142
143 static uint64_t __attribute__((aligned(8))) dither4[2]={
144         0x0103010301030103LL,
145         0x0200020002000200LL,};
146
147 static uint64_t __attribute__((aligned(8))) dither8[2]={
148         0x0602060206020602LL,
149         0x0004000400040004LL,};
150 #endif
151 #endif /* defined(ARCH_X86) || defined(ARCH_X86_64) */
152
153 #define RGB2YUV_SHIFT 8
154 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
155 #define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
156 #define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
157 #define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
158 #define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
159 #define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
160 #define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
161 #define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
162 #define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
163
164 //Note: we have C, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
165 //Plain C versions
166 #undef HAVE_MMX
167 #undef HAVE_MMX2
168 #undef HAVE_3DNOW
169 #undef HAVE_SSE2
170 #define RENAME(a) a ## _C
171 #include "rgb2rgb_template.c"
172
173 #if defined(ARCH_X86) || defined(ARCH_X86_64)
174
175 //MMX versions
176 #undef RENAME
177 #define HAVE_MMX
178 #undef HAVE_MMX2
179 #undef HAVE_3DNOW
180 #undef HAVE_SSE2
181 #define RENAME(a) a ## _MMX
182 #include "rgb2rgb_template.c"
183
184 //MMX2 versions
185 #undef RENAME
186 #define HAVE_MMX
187 #define HAVE_MMX2
188 #undef HAVE_3DNOW
189 #undef HAVE_SSE2
190 #define RENAME(a) a ## _MMX2
191 #include "rgb2rgb_template.c"
192
193 //3DNOW versions
194 #undef RENAME
195 #define HAVE_MMX
196 #undef HAVE_MMX2
197 #define HAVE_3DNOW
198 #undef HAVE_SSE2
199 #define RENAME(a) a ## _3DNOW
200 #include "rgb2rgb_template.c"
201
202 #endif //ARCH_X86 || ARCH_X86_64
203
204 /*
205  rgb15->rgb16 Original by Strepto/Astral
206  ported to gcc & bugfixed : A'rpi
207  MMX2, 3DNOW optimization by Nick Kurshev
208  32bit c version, and and&add trick by Michael Niedermayer
209 */
210
211 void sws_rgb2rgb_init(int flags){
212 #if defined(HAVE_MMX2) || defined(HAVE_3DNOW) || defined(HAVE_MMX)
213         if(flags & SWS_CPU_CAPS_MMX2){
214                 rgb15to16= rgb15to16_MMX2;
215                 rgb15to24= rgb15to24_MMX2;
216                 rgb15to32= rgb15to32_MMX2;
217                 rgb16to24= rgb16to24_MMX2;
218                 rgb16to32= rgb16to32_MMX2;
219                 rgb16to15= rgb16to15_MMX2;
220                 rgb24to16= rgb24to16_MMX2;
221                 rgb24to15= rgb24to15_MMX2;
222                 rgb24to32= rgb24to32_MMX2;
223                 rgb32to16= rgb32to16_MMX2;
224                 rgb32to15= rgb32to15_MMX2;
225                 rgb32to24= rgb32to24_MMX2;
226                 rgb24tobgr15= rgb24tobgr15_MMX2;
227                 rgb24tobgr16= rgb24tobgr16_MMX2;
228                 rgb24tobgr24= rgb24tobgr24_MMX2;
229                 rgb32tobgr32= rgb32tobgr32_MMX2;
230                 rgb32tobgr16= rgb32tobgr16_MMX2;
231                 rgb32tobgr15= rgb32tobgr15_MMX2;
232                 yv12toyuy2= yv12toyuy2_MMX2;
233                 yv12touyvy= yv12touyvy_MMX2;
234                 yuv422ptoyuy2= yuv422ptoyuy2_MMX2;
235                 yuy2toyv12= yuy2toyv12_MMX2;
236 //              uyvytoyv12= uyvytoyv12_MMX2;
237 //              yvu9toyv12= yvu9toyv12_MMX2;
238                 planar2x= planar2x_MMX2;
239                 rgb24toyv12= rgb24toyv12_MMX2;
240                 interleaveBytes= interleaveBytes_MMX2;
241                 vu9_to_vu12= vu9_to_vu12_MMX2;
242                 yvu9_to_yuy2= yvu9_to_yuy2_MMX2;
243         }else if(flags & SWS_CPU_CAPS_3DNOW){
244                 rgb15to16= rgb15to16_3DNOW;
245                 rgb15to24= rgb15to24_3DNOW;
246                 rgb15to32= rgb15to32_3DNOW;
247                 rgb16to24= rgb16to24_3DNOW;
248                 rgb16to32= rgb16to32_3DNOW;
249                 rgb16to15= rgb16to15_3DNOW;
250                 rgb24to16= rgb24to16_3DNOW;
251                 rgb24to15= rgb24to15_3DNOW;
252                 rgb24to32= rgb24to32_3DNOW;
253                 rgb32to16= rgb32to16_3DNOW;
254                 rgb32to15= rgb32to15_3DNOW;
255                 rgb32to24= rgb32to24_3DNOW;
256                 rgb24tobgr15= rgb24tobgr15_3DNOW;
257                 rgb24tobgr16= rgb24tobgr16_3DNOW;
258                 rgb24tobgr24= rgb24tobgr24_3DNOW;
259                 rgb32tobgr32= rgb32tobgr32_3DNOW;
260                 rgb32tobgr16= rgb32tobgr16_3DNOW;
261                 rgb32tobgr15= rgb32tobgr15_3DNOW;
262                 yv12toyuy2= yv12toyuy2_3DNOW;
263                 yv12touyvy= yv12touyvy_3DNOW;
264                 yuv422ptoyuy2= yuv422ptoyuy2_3DNOW;
265                 yuy2toyv12= yuy2toyv12_3DNOW;
266 //              uyvytoyv12= uyvytoyv12_3DNOW;
267 //              yvu9toyv12= yvu9toyv12_3DNOW;
268                 planar2x= planar2x_3DNOW;
269                 rgb24toyv12= rgb24toyv12_3DNOW;
270                 interleaveBytes= interleaveBytes_3DNOW;
271                 vu9_to_vu12= vu9_to_vu12_3DNOW;
272                 yvu9_to_yuy2= yvu9_to_yuy2_3DNOW;
273         }else if(flags & SWS_CPU_CAPS_MMX){
274                 rgb15to16= rgb15to16_MMX;
275                 rgb15to24= rgb15to24_MMX;
276                 rgb15to32= rgb15to32_MMX;
277                 rgb16to24= rgb16to24_MMX;
278                 rgb16to32= rgb16to32_MMX;
279                 rgb16to15= rgb16to15_MMX;
280                 rgb24to16= rgb24to16_MMX;
281                 rgb24to15= rgb24to15_MMX;
282                 rgb24to32= rgb24to32_MMX;
283                 rgb32to16= rgb32to16_MMX;
284                 rgb32to15= rgb32to15_MMX;
285                 rgb32to24= rgb32to24_MMX;
286                 rgb24tobgr15= rgb24tobgr15_MMX;
287                 rgb24tobgr16= rgb24tobgr16_MMX;
288                 rgb24tobgr24= rgb24tobgr24_MMX;
289                 rgb32tobgr32= rgb32tobgr32_MMX;
290                 rgb32tobgr16= rgb32tobgr16_MMX;
291                 rgb32tobgr15= rgb32tobgr15_MMX;
292                 yv12toyuy2= yv12toyuy2_MMX;
293                 yv12touyvy= yv12touyvy_MMX;
294                 yuv422ptoyuy2= yuv422ptoyuy2_MMX;
295                 yuy2toyv12= yuy2toyv12_MMX;
296 //              uyvytoyv12= uyvytoyv12_MMX;
297 //              yvu9toyv12= yvu9toyv12_MMX;
298                 planar2x= planar2x_MMX;
299                 rgb24toyv12= rgb24toyv12_MMX;
300                 interleaveBytes= interleaveBytes_MMX;
301                 vu9_to_vu12= vu9_to_vu12_MMX;
302                 yvu9_to_yuy2= yvu9_to_yuy2_MMX;
303         }else
304 #endif /* defined(HAVE_MMX2) || defined(HAVE_3DNOW) || defined(HAVE_MMX) */
305         {
306                 rgb15to16= rgb15to16_C;
307                 rgb15to24= rgb15to24_C;
308                 rgb15to32= rgb15to32_C;
309                 rgb16to24= rgb16to24_C;
310                 rgb16to32= rgb16to32_C;
311                 rgb16to15= rgb16to15_C;
312                 rgb24to16= rgb24to16_C;
313                 rgb24to15= rgb24to15_C;
314                 rgb24to32= rgb24to32_C;
315                 rgb32to16= rgb32to16_C;
316                 rgb32to15= rgb32to15_C;
317                 rgb32to24= rgb32to24_C;
318                 rgb24tobgr15= rgb24tobgr15_C;
319                 rgb24tobgr16= rgb24tobgr16_C;
320                 rgb24tobgr24= rgb24tobgr24_C;
321                 rgb32tobgr32= rgb32tobgr32_C;
322                 rgb32tobgr16= rgb32tobgr16_C;
323                 rgb32tobgr15= rgb32tobgr15_C;
324                 yv12toyuy2= yv12toyuy2_C;
325                 yv12touyvy= yv12touyvy_C;
326                 yuv422ptoyuy2= yuv422ptoyuy2_C;
327                 yuy2toyv12= yuy2toyv12_C;
328 //              uyvytoyv12= uyvytoyv12_C;
329 //              yvu9toyv12= yvu9toyv12_C;
330                 planar2x= planar2x_C;
331                 rgb24toyv12= rgb24toyv12_C;
332                 interleaveBytes= interleaveBytes_C;
333                 vu9_to_vu12= vu9_to_vu12_C;
334                 yvu9_to_yuy2= yvu9_to_yuy2_C;
335         }
336 }
337
338 /**
339  * Pallete is assumed to contain bgr32
340  */
341 void palette8torgb32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
342 {
343         long i;
344
345 /*
346         for(i=0; i<num_pixels; i++)
347                 ((unsigned *)dst)[i] = ((unsigned *)palette)[ src[i] ];
348 */
349
350         for(i=0; i<num_pixels; i++)
351         {
352                 #ifdef WORDS_BIGENDIAN
353                         dst[3]= palette[ src[i]*4+2 ];
354                         dst[2]= palette[ src[i]*4+1 ];
355                         dst[1]= palette[ src[i]*4+0 ];
356                 #else
357                 //FIXME slow?
358                         dst[0]= palette[ src[i]*4+2 ];
359                         dst[1]= palette[ src[i]*4+1 ];
360                         dst[2]= palette[ src[i]*4+0 ];
361                         //dst[3]= 0; /* do we need this cleansing? */
362                 #endif
363                 dst+= 4;
364         }
365 }
366
367 void palette8tobgr32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
368 {
369         long i;
370         for(i=0; i<num_pixels; i++)
371         {
372                 #ifdef WORDS_BIGENDIAN
373                         dst[3]= palette[ src[i]*4+0 ];
374                         dst[2]= palette[ src[i]*4+1 ];
375                         dst[1]= palette[ src[i]*4+2 ];
376                 #else
377                         //FIXME slow?
378                         dst[0]= palette[ src[i]*4+0 ];
379                         dst[1]= palette[ src[i]*4+1 ];
380                         dst[2]= palette[ src[i]*4+2 ];
381                         //dst[3]= 0; /* do we need this cleansing? */
382                 #endif
383                 
384                 dst+= 4;
385         }
386 }
387
388 /**
389  * Pallete is assumed to contain bgr32
390  */
391 void palette8torgb24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
392 {
393         long i;
394 /*
395         writes 1 byte o much and might cause alignment issues on some architectures?
396         for(i=0; i<num_pixels; i++)
397                 ((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[ src[i] ];
398 */
399         for(i=0; i<num_pixels; i++)
400         {
401                 //FIXME slow?
402                 dst[0]= palette[ src[i]*4+2 ];
403                 dst[1]= palette[ src[i]*4+1 ];
404                 dst[2]= palette[ src[i]*4+0 ];
405                 dst+= 3;
406         }
407 }
408
409 void palette8tobgr24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
410 {
411         long i;
412 /*
413         writes 1 byte o much and might cause alignment issues on some architectures?
414         for(i=0; i<num_pixels; i++)
415                 ((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[ src[i] ];
416 */
417         for(i=0; i<num_pixels; i++)
418         {
419                 //FIXME slow?
420                 dst[0]= palette[ src[i]*4+0 ];
421                 dst[1]= palette[ src[i]*4+1 ];
422                 dst[2]= palette[ src[i]*4+2 ];
423                 dst+= 3;
424         }
425 }
426
427 /**
428  * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette
429  */
430 void palette8torgb16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
431 {
432         long i;
433         for(i=0; i<num_pixels; i++)
434                 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ];
435 }
436 void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
437 {
438         long i;
439         for(i=0; i<num_pixels; i++)
440                 ((uint16_t *)dst)[i] = bswap_16(((uint16_t *)palette)[ src[i] ]);
441 }
442
443 /**
444  * Pallete is assumed to contain bgr15, see rgb32to15 to convert the palette
445  */
446 void palette8torgb15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
447 {
448         long i;
449         for(i=0; i<num_pixels; i++)
450                 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ];
451 }
452 void palette8tobgr15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
453 {
454         long i;
455         for(i=0; i<num_pixels; i++)
456                 ((uint16_t *)dst)[i] = bswap_16(((uint16_t *)palette)[ src[i] ]);
457 }
458
459 void rgb32tobgr24(const uint8_t *src, uint8_t *dst, long src_size)
460 {
461         long i;
462         long num_pixels = src_size >> 2;
463         for(i=0; i<num_pixels; i++)
464         {
465                 #ifdef WORDS_BIGENDIAN
466                         /* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) */
467                         dst[3*i + 0] = src[4*i + 1];
468                         dst[3*i + 1] = src[4*i + 2];
469                         dst[3*i + 2] = src[4*i + 3];
470                 #else
471                         dst[3*i + 0] = src[4*i + 2];
472                         dst[3*i + 1] = src[4*i + 1];
473                         dst[3*i + 2] = src[4*i + 0];
474                 #endif
475         }
476 }
477
478 void rgb24tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
479 {
480         long i;
481         for(i=0; 3*i<src_size; i++)
482         {
483                 #ifdef WORDS_BIGENDIAN
484                         /* RGB24 (= R,G,B) -> BGR32 (= A,R,G,B) */
485                         dst[4*i + 0] = 0;
486                         dst[4*i + 1] = src[3*i + 0];
487                         dst[4*i + 2] = src[3*i + 1];
488                         dst[4*i + 3] = src[3*i + 2];
489                 #else
490                         dst[4*i + 0] = src[3*i + 2];
491                         dst[4*i + 1] = src[3*i + 1];
492                         dst[4*i + 2] = src[3*i + 0];
493                         dst[4*i + 3] = 0;
494                 #endif
495         }
496 }
497
498 void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
499 {
500         const uint16_t *end;
501         uint8_t *d = (uint8_t *)dst;
502         const uint16_t *s = (uint16_t *)src;
503         end = s + src_size/2;
504         while(s < end)
505         {
506                 register uint16_t bgr;
507                 bgr = *s++;
508                 #ifdef WORDS_BIGENDIAN
509                         *d++ = 0;
510                         *d++ = (bgr&0x1F)<<3;
511                         *d++ = (bgr&0x7E0)>>3;
512                         *d++ = (bgr&0xF800)>>8;
513                 #else
514                         *d++ = (bgr&0xF800)>>8;
515                         *d++ = (bgr&0x7E0)>>3;
516                         *d++ = (bgr&0x1F)<<3;
517                         *d++ = 0;
518                 #endif
519         }
520 }
521
522 void rgb16tobgr24(const uint8_t *src, uint8_t *dst, long src_size)
523 {
524         const uint16_t *end;
525         uint8_t *d = (uint8_t *)dst;
526         const uint16_t *s = (const uint16_t *)src;
527         end = s + src_size/2;
528         while(s < end)
529         {
530                 register uint16_t bgr;
531                 bgr = *s++;
532                 *d++ = (bgr&0xF800)>>8;
533                 *d++ = (bgr&0x7E0)>>3;
534                 *d++ = (bgr&0x1F)<<3;
535         }
536 }
537
538 void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
539 {
540         long i;
541         long num_pixels = src_size >> 1;
542         
543         for(i=0; i<num_pixels; i++)
544         {
545             unsigned b,g,r;
546             register uint16_t rgb;
547             rgb = src[2*i];
548             r = rgb&0x1F;
549             g = (rgb&0x7E0)>>5;
550             b = (rgb&0xF800)>>11;
551             dst[2*i] = (b&0x1F) | ((g&0x3F)<<5) | ((r&0x1F)<<11);
552         }
553 }
554
555 void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
556 {
557         long i;
558         long num_pixels = src_size >> 1;
559         
560         for(i=0; i<num_pixels; i++)
561         {
562             unsigned b,g,r;
563             register uint16_t rgb;
564             rgb = src[2*i];
565             r = rgb&0x1F;
566             g = (rgb&0x7E0)>>5;
567             b = (rgb&0xF800)>>11;
568             dst[2*i] = (b&0x1F) | ((g&0x1F)<<5) | ((r&0x1F)<<10);
569         }
570 }
571
572 void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
573 {
574         const uint16_t *end;
575         uint8_t *d = (uint8_t *)dst;
576         const uint16_t *s = (const uint16_t *)src;
577         end = s + src_size/2;
578         while(s < end)
579         {
580                 register uint16_t bgr;
581                 bgr = *s++;
582                 #ifdef WORDS_BIGENDIAN
583                         *d++ = 0;
584                         *d++ = (bgr&0x1F)<<3;
585                         *d++ = (bgr&0x3E0)>>2;
586                         *d++ = (bgr&0x7C00)>>7;
587                 #else
588                         *d++ = (bgr&0x7C00)>>7;
589                         *d++ = (bgr&0x3E0)>>2;
590                         *d++ = (bgr&0x1F)<<3;
591                         *d++ = 0;
592                 #endif
593         }
594 }
595
596 void rgb15tobgr24(const uint8_t *src, uint8_t *dst, long src_size)
597 {
598         const uint16_t *end;
599         uint8_t *d = (uint8_t *)dst;
600         const uint16_t *s = (uint16_t *)src;
601         end = s + src_size/2;
602         while(s < end)
603         {
604                 register uint16_t bgr;
605                 bgr = *s++;
606                 *d++ = (bgr&0x7C00)>>7;
607                 *d++ = (bgr&0x3E0)>>2;
608                 *d++ = (bgr&0x1F)<<3;
609         }
610 }
611
612 void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
613 {
614         long i;
615         long num_pixels = src_size >> 1;
616         
617         for(i=0; i<num_pixels; i++)
618         {
619             unsigned b,g,r;
620             register uint16_t rgb;
621             rgb = src[2*i];
622             r = rgb&0x1F;
623             g = (rgb&0x3E0)>>5;
624             b = (rgb&0x7C00)>>10;
625             dst[2*i] = (b&0x1F) | ((g&0x3F)<<5) | ((r&0x1F)<<11);
626         }
627 }
628
629 void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
630 {
631         long i;
632         long num_pixels = src_size >> 1;
633         
634         for(i=0; i<num_pixels; i++)
635         {
636             unsigned b,g,r;
637             register uint16_t rgb;
638             rgb = src[2*i];
639             r = rgb&0x1F;
640             g = (rgb&0x3E0)>>5;
641             b = (rgb&0x7C00)>>10;
642             dst[2*i] = (b&0x1F) | ((g&0x1F)<<5) | ((r&0x1F)<<10);
643         }
644 }
645
646 void rgb8tobgr8(const uint8_t *src, uint8_t *dst, long src_size)
647 {
648         long i;
649         long num_pixels = src_size;
650         for(i=0; i<num_pixels; i++)
651         {
652             unsigned b,g,r;
653             register uint8_t rgb;
654             rgb = src[i];
655             r = (rgb&0x07);
656             g = (rgb&0x38)>>3;
657             b = (rgb&0xC0)>>6;
658             dst[i] = ((b<<1)&0x07) | ((g&0x07)<<3) | ((r&0x03)<<6);
659         }
660 }