]> git.sesse.net Git - vlc/blob - modules/video_chroma/i420_rgb.h
- video_chromas: more SSE2 and MMX support and optimization, added SSE2 i420 -> RGB...
[vlc] / modules / video_chroma / i420_rgb.h
1 /*****************************************************************************
2  * i420_rgb.h : YUV to bitmap RGB conversion module for vlc
3  *****************************************************************************
4  * Copyright (C) 2000, 2004 the VideoLAN team
5  * $Id$
6  *
7  * Authors: Samuel Hocevar <sam@zoy.org>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  * 
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22  *****************************************************************************/
23
24 /** Number of entries in RGB palette/colormap */
25 #define CMAP_RGB2_SIZE 256
26
27 /**
28  * chroma_sys_t: chroma method descriptor
29
30  * This structure is part of the chroma transformation descriptor, it
31  * describes the yuv2rgb specific properties.
32  */
33 struct chroma_sys_t
34 {
35     uint8_t  *p_buffer;
36     int *p_offset;
37
38 #ifdef MODULE_NAME_IS_i420_rgb
39     /**< Pre-calculated conversion tables */
40     void *p_base;                      /**< base for all conversion tables */
41     uint8_t   *p_rgb8;                 /**< RGB 8 bits table */
42     uint16_t  *p_rgb16;                /**< RGB 16 bits table */
43     uint32_t  *p_rgb32;                /**< RGB 32 bits table */
44
45     /**< To get RGB value for palette entry i, use (p_rgb_r[i], p_rgb_g[i],
46        p_rgb_b[i]). Note these are 16 bits per pixel. For 8bpp entries,
47        shift right 8 bits.
48     */
49     uint16_t  p_rgb_r[CMAP_RGB2_SIZE];  /**< Red values of palette */
50     uint16_t  p_rgb_g[CMAP_RGB2_SIZE];  /**< Green values of palette */
51     uint16_t  p_rgb_b[CMAP_RGB2_SIZE];  /**< Blue values of palette */
52 #endif
53 };
54
55 /*****************************************************************************
56  * Prototypes
57  *****************************************************************************/
58 #ifdef MODULE_NAME_IS_i420_rgb
59 void E_(I420_RGB8)         ( vout_thread_t *, picture_t *, picture_t * );
60 void E_(I420_RGB16_dither) ( vout_thread_t *, picture_t *, picture_t * );
61 void E_(I420_RGB16)        ( vout_thread_t *, picture_t *, picture_t * );
62 void E_(I420_RGB32)        ( vout_thread_t *, picture_t *, picture_t * );
63 #else // if defined(MODULE_NAME_IS_i420_rgb_mmx)
64 void E_(I420_R5G5B5)       ( vout_thread_t *, picture_t *, picture_t * );
65 void E_(I420_R5G6B5)       ( vout_thread_t *, picture_t *, picture_t * );
66 void E_(I420_A8R8G8B8)     ( vout_thread_t *, picture_t *, picture_t * );
67 void E_(I420_B8G8R8A8)     ( vout_thread_t *, picture_t *, picture_t * );
68 #endif
69
70 /*****************************************************************************
71  * CONVERT_*_PIXEL: pixel conversion macros
72  *****************************************************************************
73  * These conversion routines are used by YUV conversion functions.
74  * conversion are made from p_y, p_u, p_v, which are modified, to p_buffer,
75  * which is also modified. CONVERT_4YUV_PIXEL is used for 8bpp dithering,
76  * CONVERT_4YUV_PIXEL_SCALE does the same but also scales the output.
77  *****************************************************************************/
78 #define CONVERT_Y_PIXEL( BPP )                                                \
79     /* Only Y sample is present */                                            \
80     p_ybase = p_yuv + *p_y++;                                                 \
81     *p_buffer++ = p_ybase[RED_OFFSET-((V_RED_COEF*128)>>SHIFT) + i_red] |     \
82         p_ybase[GREEN_OFFSET-(((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT)       \
83         + i_green ] | p_ybase[BLUE_OFFSET-((U_BLUE_COEF*128)>>SHIFT) + i_blue];
84
85 #define CONVERT_YUV_PIXEL( BPP )                                              \
86     /* Y, U and V samples are present */                                      \
87     i_uval =    *p_u++;                                                       \
88     i_vval =    *p_v++;                                                       \
89     i_red =     (V_RED_COEF * i_vval) >> SHIFT;                               \
90     i_green =   (U_GREEN_COEF * i_uval + V_GREEN_COEF * i_vval) >> SHIFT;     \
91     i_blue =    (U_BLUE_COEF * i_uval) >> SHIFT;                              \
92     CONVERT_Y_PIXEL( BPP )                                                    \
93
94 #define CONVERT_Y_PIXEL_DITHER( BPP )                                         \
95     /* Only Y sample is present */                                            \
96     p_ybase = p_yuv + *p_y++;                                                 \
97     *p_buffer++ = p_ybase[RED_OFFSET-((V_RED_COEF*128+p_dither[i_real_y])>>SHIFT) + i_red] |     \
98         p_ybase[GREEN_OFFSET-(((U_GREEN_COEF+V_GREEN_COEF)*128+p_dither[i_real_y])>>SHIFT)       \
99         + i_green ] | p_ybase[BLUE_OFFSET-((U_BLUE_COEF*128+p_dither[i_real_y])>>SHIFT) + i_blue];
100
101 #define CONVERT_YUV_PIXEL_DITHER( BPP )                                       \
102     /* Y, U and V samples are present */                                      \
103     i_uval =    *p_u++;                                                       \
104     i_vval =    *p_v++;                                                       \
105     i_red =     (V_RED_COEF * i_vval) >> SHIFT;                               \
106     i_green =   (U_GREEN_COEF * i_uval + V_GREEN_COEF * i_vval) >> SHIFT;     \
107     i_blue =    (U_BLUE_COEF * i_uval) >> SHIFT;                              \
108     CONVERT_Y_PIXEL_DITHER( BPP )                                             \
109
110 #define CONVERT_4YUV_PIXEL( CHROMA )                                          \
111     *p_pic++ = p_lookup[                                                      \
112         (((*p_y++ + dither10[i_real_y]) >> 4) << 7)                           \
113       + ((*p_u + dither20[i_real_y]) >> 5) * 9                                \
114       + ((*p_v + dither20[i_real_y]) >> 5) ];                                 \
115     *p_pic++ = p_lookup[                                                      \
116         (((*p_y++ + dither11[i_real_y]) >> 4) << 7)                           \
117       + ((*p_u++ + dither21[i_real_y]) >> 5) * 9                              \
118       + ((*p_v++ + dither21[i_real_y]) >> 5) ];                               \
119     *p_pic++ = p_lookup[                                                      \
120         (((*p_y++ + dither12[i_real_y]) >> 4) << 7)                           \
121       + ((*p_u + dither22[i_real_y]) >> 5) * 9                                \
122       + ((*p_v + dither22[i_real_y]) >> 5) ];                                 \
123     *p_pic++ = p_lookup[                                                      \
124         (((*p_y++ + dither13[i_real_y]) >> 4) << 7)                           \
125       + ((*p_u++ + dither23[i_real_y]) >> 5) * 9                              \
126       + ((*p_v++ + dither23[i_real_y]) >> 5) ];                               \
127
128 #define CONVERT_4YUV_PIXEL_SCALE( CHROMA )                                    \
129     *p_pic++ = p_lookup[                                                      \
130         ( ((*p_y + dither10[i_real_y]) >> 4) << 7)                            \
131         + ((*p_u + dither20[i_real_y]) >> 5) * 9                              \
132         + ((*p_v + dither20[i_real_y]) >> 5) ];                               \
133     p_y += *p_offset++;                                                       \
134     p_u += *p_offset;                                                         \
135     p_v += *p_offset++;                                                       \
136     *p_pic++ = p_lookup[                                                      \
137         ( ((*p_y + dither11[i_real_y]) >> 4) << 7)                            \
138         + ((*p_u + dither21[i_real_y]) >> 5) * 9                              \
139         + ((*p_v + dither21[i_real_y]) >> 5) ];                               \
140     p_y += *p_offset++;                                                       \
141     p_u += *p_offset;                                                         \
142     p_v += *p_offset++;                                                       \
143     *p_pic++ = p_lookup[                                                      \
144         ( ((*p_y + dither12[i_real_y]) >> 4) << 7)                            \
145         + ((*p_u + dither22[i_real_y]) >> 5) * 9                              \
146         + ((*p_v + dither22[i_real_y]) >> 5) ];                               \
147     p_y += *p_offset++;                                                       \
148     p_u += *p_offset;                                                         \
149     p_v += *p_offset++;                                                       \
150     *p_pic++ = p_lookup[                                                      \
151         ( ((*p_y + dither13[i_real_y]) >> 4) << 7)                            \
152         + ((*p_u + dither23[i_real_y]) >> 5) * 9                              \
153         + ((*p_v + dither23[i_real_y]) >> 5) ];                               \
154     p_y += *p_offset++;                                                       \
155     p_u += *p_offset;                                                         \
156     p_v += *p_offset++;                                                       \
157
158 /*****************************************************************************
159  * SCALE_WIDTH: scale a line horizontally
160  *****************************************************************************
161  * This macro scales a line using rendering buffer and offset array. It works
162  * for 1, 2 and 4 Bpp.
163  *****************************************************************************/
164 #define SCALE_WIDTH                                                           \
165     if( b_hscale )                                                            \
166     {                                                                         \
167         /* Horizontal scaling, conversion has been done to buffer.            \
168          * Rewind buffer and offset, then copy and scale line */              \
169         p_buffer = p_buffer_start;                                            \
170         p_offset = p_offset_start;                                            \
171         for( i_x = p_vout->output.i_width / 16; i_x--; )                      \
172         {                                                                     \
173             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
174             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
175             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
176             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
177             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
178             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
179             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
180             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
181             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
182             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
183             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
184             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
185             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
186             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
187             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
188             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
189         }                                                                     \
190         for( i_x = p_vout->output.i_width & 15; i_x--; )                      \
191         {                                                                     \
192             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
193         }                                                                     \
194         p_pic = (void*)((uint8_t*)p_pic + i_right_margin );                   \
195     }                                                                         \
196     else                                                                      \
197     {                                                                         \
198         /* No scaling, conversion has been done directly in picture memory.   \
199          * Increment of picture pointer to end of line is still needed */     \
200         p_pic = (void*)((uint8_t*)p_pic + p_dest->p->i_pitch );               \
201     }                                                                         \
202
203 /*****************************************************************************
204  * SCALE_WIDTH_DITHER: scale a line horizontally for dithered 8 bpp
205  *****************************************************************************
206  * This macro scales a line using an offset array.
207  *****************************************************************************/
208 #define SCALE_WIDTH_DITHER( CHROMA )                                          \
209     if( b_hscale )                                                            \
210     {                                                                         \
211         /* Horizontal scaling - we can't use a buffer due to dithering */     \
212         p_offset = p_offset_start;                                            \
213         for( i_x = p_vout->output.i_width / 16; i_x--; )                      \
214         {                                                                     \
215             CONVERT_4YUV_PIXEL_SCALE( CHROMA )                                \
216             CONVERT_4YUV_PIXEL_SCALE( CHROMA )                                \
217             CONVERT_4YUV_PIXEL_SCALE( CHROMA )                                \
218             CONVERT_4YUV_PIXEL_SCALE( CHROMA )                                \
219         }                                                                     \
220     }                                                                         \
221     else                                                                      \
222     {                                                                         \
223         for( i_x = p_vout->render.i_width / 16; i_x--;  )                     \
224         {                                                                     \
225             CONVERT_4YUV_PIXEL( CHROMA )                                      \
226             CONVERT_4YUV_PIXEL( CHROMA )                                      \
227             CONVERT_4YUV_PIXEL( CHROMA )                                      \
228             CONVERT_4YUV_PIXEL( CHROMA )                                      \
229         }                                                                     \
230     }                                                                         \
231     /* Increment of picture pointer to end of line is still needed */         \
232     p_pic = (void*)((uint8_t*)p_pic + i_right_margin );                       \
233                                                                               \
234     /* Increment the Y coordinate in the matrix, modulo 4 */                  \
235     i_real_y = (i_real_y + 1) & 0x3;                                          \
236
237 /*****************************************************************************
238  * SCALE_HEIGHT: handle vertical scaling
239  *****************************************************************************
240  * This macro handle vertical scaling for a picture. CHROMA may be 420, 422 or
241  * 444 for RGB conversion, or 400 for gray conversion. It works for 1, 2, 3
242  * and 4 Bpp.
243  *****************************************************************************/
244 #define SCALE_HEIGHT( CHROMA, BPP )                                           \
245     /* If line is odd, rewind 4:2:0 U and V samples */                        \
246     if( ((CHROMA == 420) || (CHROMA == 422)) && !(i_y & 0x1) )                \
247     {                                                                         \
248         p_u -= i_chroma_width;                                                \
249         p_v -= i_chroma_width;                                                \
250     }                                                                         \
251                                                                               \
252     /*                                                                        \
253      * Handle vertical scaling. The current line can be copied or next one    \
254      * can be ignored.                                                        \
255      */                                                                       \
256     switch( i_vscale )                                                        \
257     {                                                                         \
258     case -1:                             /* vertical scaling factor is < 1 */ \
259         while( (i_scale_count -= p_vout->output.i_height) > 0 )               \
260         {                                                                     \
261             /* Height reduction: skip next source line */                     \
262             p_y += p_vout->render.i_width;                                    \
263             i_y++;                                                            \
264             if( (CHROMA == 420) || (CHROMA == 422) )                          \
265             {                                                                 \
266                 if( i_y & 0x1 )                                               \
267                 {                                                             \
268                     p_u += i_chroma_width;                                    \
269                     p_v += i_chroma_width;                                    \
270                 }                                                             \
271             }                                                                 \
272             else if( CHROMA == 444 )                                          \
273             {                                                                 \
274                 p_u += p_vout->render.i_width;                                \
275                 p_v += p_vout->render.i_width;                                \
276             }                                                                 \
277         }                                                                     \
278         i_scale_count += p_vout->render.i_height;                             \
279         break;                                                                \
280     case 1:                              /* vertical scaling factor is > 1 */ \
281         while( (i_scale_count -= p_vout->render.i_height) > 0 )               \
282         {                                                                     \
283             /* Height increment: copy previous picture line */                \
284             p_vout->p_libvlc->pf_memcpy( p_pic, p_pic_start,                     \
285                                       p_vout->output.i_width * BPP );         \
286             p_pic = (void*)((uint8_t*)p_pic + p_dest->p->i_pitch );           \
287         }                                                                     \
288         i_scale_count += p_vout->output.i_height;                             \
289         break;                                                                \
290     }                                                                         \
291
292 /*****************************************************************************
293  * SCALE_HEIGHT_DITHER: handle vertical scaling for dithered 8 bpp
294  *****************************************************************************
295  * This macro handles vertical scaling for a picture. CHROMA may be 420,
296  * 422 or 444 for RGB conversion, or 400 for gray conversion.
297  *****************************************************************************/
298 #define SCALE_HEIGHT_DITHER( CHROMA )                                         \
299                                                                               \
300     /* If line is odd, rewind 4:2:0 U and V samples */                        \
301     if( ((CHROMA == 420) || (CHROMA == 422)) && !(i_y & 0x1) )                \
302     {                                                                         \
303         p_u -= i_chroma_width;                                                \
304         p_v -= i_chroma_width;                                                \
305     }                                                                         \
306                                                                               \
307     /*                                                                        \
308      * Handle vertical scaling. The current line can be copied or next one    \
309      * can be ignored.                                                        \
310      */                                                                       \
311                                                                               \
312     switch( i_vscale )                                                        \
313     {                                                                         \
314     case -1:                             /* vertical scaling factor is < 1 */ \
315         while( (i_scale_count -= p_vout->output.i_height) > 0 )               \
316         {                                                                     \
317             /* Height reduction: skip next source line */                     \
318             p_y += p_vout->render.i_width;                                    \
319             i_y++;                                                            \
320             if( (CHROMA == 420) || (CHROMA == 422) )                          \
321             {                                                                 \
322                 if( i_y & 0x1 )                                               \
323                 {                                                             \
324                     p_u += i_chroma_width;                                    \
325                     p_v += i_chroma_width;                                    \
326                 }                                                             \
327             }                                                                 \
328             else if( CHROMA == 444 )                                          \
329             {                                                                 \
330                 p_u += p_vout->render.i_width;                                \
331                 p_v += p_vout->render.i_width;                                \
332             }                                                                 \
333         }                                                                     \
334         i_scale_count += p_vout->render.i_height;                             \
335         break;                                                                \
336     case 1:                              /* vertical scaling factor is > 1 */ \
337         while( (i_scale_count -= p_vout->render.i_height) > 0 )               \
338         {                                                                     \
339             p_y -= p_vout->render.i_width;                                    \
340             p_u -= i_chroma_width;                                            \
341             p_v -= i_chroma_width;                                            \
342             SCALE_WIDTH_DITHER( CHROMA );                                     \
343         }                                                                     \
344         i_scale_count += p_vout->output.i_height;                             \
345         break;                                                                \
346     }                                                                         \
347