]> git.sesse.net Git - vlc/blob - src/video_output/video_yuv.c
. disparition de la plupart des printf
[vlc] / src / video_output / video_yuv.c
1 /*****************************************************************************
2  * video_yuv.c: YUV transformation functions
3  * Provides functions to perform the YUV conversion. The functions provided here
4  * are a complete and portable C implementation, and may be replaced in certain
5  * case by optimized functions.
6  *****************************************************************************
7  * Copyright (C) 1999, 2000 VideoLAN
8  *
9  * Authors:
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version.
15  * 
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
24  *****************************************************************************/
25
26 /*****************************************************************************
27  * Preamble
28  *****************************************************************************/
29 #include "defs.h"
30
31 #include <math.h>                                            /* exp(), pow() */
32 #include <errno.h>                                                 /* ENOMEM */
33 #include <stdlib.h>                                                /* free() */
34 #include <string.h>                                            /* strerror() */
35
36 #include "config.h"
37 #include "common.h"
38 #include "threads.h"
39 #include "mtime.h"
40 #include "plugins.h"
41 #include "video.h"
42 #include "video_output.h"
43 #include "video_yuv.h"
44
45 #include "intf_msg.h"
46
47 /*****************************************************************************
48  * Constants
49  *****************************************************************************/
50
51 /* Margins and offsets in conversion tables - Margins are used in case a RGB
52  * RGB conversion would give a value outside the 0-255 range. Offsets have been
53  * calculated to avoid using the same cache line for 2 tables. conversion tables
54  * are 2*MARGIN + 256 long and stores pixels.*/
55 #define RED_MARGIN      178
56 #define GREEN_MARGIN    135
57 #define BLUE_MARGIN     224
58 #define RED_OFFSET      1501                                 /* 1323 to 1935 */
59 #define GREEN_OFFSET    135                                      /* 0 to 526 */
60 #define BLUE_OFFSET     818                                   /* 594 to 1298 */
61 #define RGB_TABLE_SIZE  1935                             /* total table size */
62
63 #define GRAY_MARGIN     384
64 #define GRAY_TABLE_SIZE 1024                             /* total table size */
65
66 #define PALETTE_TABLE_SIZE 2176          /* YUV -> 8bpp palette lookup table */
67
68 /* macros used for YUV pixel conversions */
69 #define SHIFT 20
70 #define U_GREEN_COEF    ((int)(-0.391 * (1<<SHIFT) / 1.164))
71 #define U_BLUE_COEF     ((int)(2.018 * (1<<SHIFT) / 1.164))
72 #define V_RED_COEF      ((int)(1.596 * (1<<SHIFT) / 1.164))
73 #define V_GREEN_COEF    ((int)(-0.813 * (1<<SHIFT) / 1.164))
74
75 #ifdef HAVE_MMX
76 /* hope these constant values are cache line aligned */
77 static unsigned long long mmx_80w     = 0x0080008000800080;
78 static unsigned long long mmx_10w     = 0x1010101010101010;
79 static unsigned long long mmx_00ffw   = 0x00ff00ff00ff00ff;
80 static unsigned long long mmx_Y_coeff = 0x253f253f253f253f;
81
82 /* hope these constant values are cache line aligned */
83 static unsigned long long mmx_U_green = 0xf37df37df37df37d;
84 static unsigned long long mmx_U_blue  = 0x4093409340934093;
85 static unsigned long long mmx_V_red   = 0x3312331233123312;
86 static unsigned long long mmx_V_green = 0xe5fce5fce5fce5fc;
87
88 /* hope these constant values are cache line aligned */
89 static unsigned long long mmx_redmask = 0xf8f8f8f8f8f8f8f8;
90 static unsigned long long mmx_grnmask = 0xfcfcfcfcfcfcfcfc;
91 static unsigned long long mmx_grnshift   = 0x03;
92 static unsigned long long mmx_blueshift  = 0x03;
93 #endif
94
95 /*****************************************************************************
96  * Local prototypes
97  *****************************************************************************/
98 static void     SetGammaTable     ( int *pi_table, double f_gamma );
99 static void     SetYUV            ( vout_thread_t *p_vout );
100 static void     SetOffset         ( int i_width, int i_height, int i_pic_width, int i_pic_height,
101                                     boolean_t *pb_h_scaling, int *pi_v_scaling, int *p_offset );
102
103 static void     ConvertY4Gray8    ( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
104                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
105                                     int i_matrix_coefficients );
106 static void     ConvertY4Gray16   ( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
107                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
108                                     int i_matrix_coefficients );
109 static void     ConvertY4Gray24   ( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
110                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
111                                     int i_matrix_coefficients );
112 static void     ConvertY4Gray32   ( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
113                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
114                                     int i_matrix_coefficients );
115 static void     ConvertYUV420RGB8 ( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
116                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
117                                     int i_matrix_coefficients );
118 static void     ConvertYUV422RGB8 ( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
119                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
120                                     int i_matrix_coefficients );
121 static void     ConvertYUV444RGB8 ( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
122                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
123                                     int i_matrix_coefficients );
124 static void     ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
125                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
126                                     int i_matrix_coefficients );
127 static void     ConvertYUV422RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
128                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
129                                     int i_matrix_coefficients );
130 static void     ConvertYUV444RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
131                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
132                                     int i_matrix_coefficients );
133 static void     ConvertYUV420RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
134                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
135                                     int i_matrix_coefficients );
136 static void     ConvertYUV422RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
137                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
138                                     int i_matrix_coefficients );
139 static void     ConvertYUV444RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
140                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
141                                     int i_matrix_coefficients );
142 static void     ConvertYUV420RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
143                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
144                                     int i_matrix_coefficients );
145 static void     ConvertYUV422RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
146                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
147                                     int i_matrix_coefficients );
148 static void     ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
149                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
150                                     int i_matrix_coefficients );
151
152 /*****************************************************************************
153  * CONVERT_YUV_PIXEL, CONVERT_Y_PIXEL: pixel conversion blocks
154  *****************************************************************************
155  * These conversion routines are used by YUV conversion functions.
156  * conversion are made from p_y, p_u, p_v, which are modified, to p_buffer,
157  * which is also modified.
158  *****************************************************************************/
159 #define CONVERT_Y_PIXEL( BPP )                                                \
160     /* Only Y sample is present */                                            \
161     p_ybase = p_yuv + *p_y++;                                                 \
162     *p_buffer++ = p_ybase[RED_OFFSET-((V_RED_COEF*128)>>SHIFT) + i_red] |     \
163         p_ybase[GREEN_OFFSET-(((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT)       \
164         + i_green ] | p_ybase[BLUE_OFFSET-((U_BLUE_COEF*128)>>SHIFT) + i_blue];
165
166 #define CONVERT_YUV_PIXEL( BPP )                                              \
167     /* Y, U and V samples are present */                                      \
168     i_uval =    *p_u++;                                                       \
169     i_vval =    *p_v++;                                                       \
170     i_red =     (V_RED_COEF * i_vval) >> SHIFT;                               \
171     i_green =   (U_GREEN_COEF * i_uval + V_GREEN_COEF * i_vval) >> SHIFT;     \
172     i_blue =    (U_BLUE_COEF * i_uval) >> SHIFT;                              \
173     CONVERT_Y_PIXEL( BPP )                                                    \
174
175 /*****************************************************************************
176  * CONVERT_4YUV_PIXELS, CONVERT_4YUV_PIXELS_SCALE: dither 4 pixels in 8 bpp
177  *****************************************************************************
178  * These macros dither 4 pixels in 8 bpp, with or without horiz. scaling
179  *****************************************************************************/
180 #define CONVERT_4YUV_PIXELS( CHROMA )                                         \
181     *p_pic++ = p_lookup[                                                      \
182         (((*p_y++ + dither10[i_real_y]) >> 4) << 7)                           \
183       + ((*p_u + dither20[i_real_y]) >> 5) * 9                                \
184       + ((*p_v + dither20[i_real_y]) >> 5) ];                                 \
185     *p_pic++ = p_lookup[                                                      \
186         (((*p_y++ + dither11[i_real_y]) >> 4) << 7)                           \
187       + ((*p_u++ + dither21[i_real_y]) >> 5) * 9                              \
188       + ((*p_v++ + dither21[i_real_y]) >> 5) ];                               \
189     *p_pic++ = p_lookup[                                                      \
190         (((*p_y++ + dither12[i_real_y]) >> 4) << 7)                           \
191       + ((*p_u + dither22[i_real_y]) >> 5) * 9                                \
192       + ((*p_v + dither22[i_real_y]) >> 5) ];                                 \
193     *p_pic++ = p_lookup[                                                      \
194         (((*p_y++ + dither13[i_real_y]) >> 4) << 7)                           \
195       + ((*p_u++ + dither23[i_real_y]) >> 5) * 9                              \
196       + ((*p_v++ + dither23[i_real_y]) >> 5) ];                               \
197
198 #define CONVERT_4YUV_PIXELS_SCALE( CHROMA )                                   \
199     *p_pic++ = p_lookup[                                                      \
200         (((*p_y + dither10[i_real_y]) >> 4) << 7)                             \
201         + ((*p_u + dither20[i_real_y])   >> 5) * 9                            \
202         + ((*p_v + dither20[i_real_y])   >> 5) ];                             \
203     b_jump_uv += *p_offset;                                                   \
204     p_y += *p_offset;                                                         \
205     p_u += *p_offset   & b_jump_uv;                                           \
206     p_v += *p_offset++ & b_jump_uv;                                           \
207     *p_pic++ = p_lookup[                                                      \
208         (((*p_y + dither11[i_real_y]) >> 4) << 7)                             \
209         + ((*p_u + dither21[i_real_y])   >> 5) * 9                            \
210         + ((*p_v + dither21[i_real_y])   >> 5) ];                             \
211     b_jump_uv += *p_offset;                                                   \
212     p_y += *p_offset;                                                         \
213     p_u += *p_offset   & b_jump_uv;                                           \
214     p_v += *p_offset++ & b_jump_uv;                                           \
215     *p_pic++ = p_lookup[                                                      \
216         (((*p_y + dither12[i_real_y]) >> 4) << 7)                             \
217         + ((*p_u + dither22[i_real_y])   >> 5) * 9                            \
218         + ((*p_v + dither22[i_real_y])   \f>> 5) ];                             \
219     b_jump_uv += *p_offset;                                                   \
220     p_y += *p_offset;                                                         \
221     p_u += *p_offset   & b_jump_uv;                                           \
222     p_v += *p_offset++ & b_jump_uv;                                           \
223     *p_pic++ = p_lookup[                                                      \
224         (((*p_y + dither13[i_real_y]) >> 4) << 7)                             \
225         + ((*p_u + dither23[i_real_y])   >> 5) * 9                            \
226         + ((*p_v + dither23[i_real_y])   >> 5) ];                             \
227     b_jump_uv += *p_offset;                                                   \
228     p_y += *p_offset;                                                         \
229     p_u += *p_offset   & b_jump_uv;                                           \
230     p_v += *p_offset++ & b_jump_uv;                                           \
231
232 /*****************************************************************************
233  * SCALE_WIDTH: scale a line horizontally
234  *****************************************************************************
235  * This macro scales a line using rendering buffer and offset array. It works
236  * for 1, 2 and 4 Bpp.
237  *****************************************************************************/
238 #define SCALE_WIDTH                                                           \
239     if( b_horizontal_scaling )                                                \
240     {                                                                         \
241         /* Horizontal scaling, conversion has been done to buffer.            \
242          * Rewind buffer and offset, then copy and scale line */              \
243         p_buffer = p_buffer_start;                                            \
244         p_offset = p_offset_start;                                            \
245         for( i_x = i_pic_width / 16; i_x--; )                                 \
246         {                                                                     \
247             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
248             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
249             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
250             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
251             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
252             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
253             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
254             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
255             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
256             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
257             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
258             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
259             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
260             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
261             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
262             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
263         }                                                                     \
264         p_pic += i_pic_line_width;                                            \
265     }                                                                         \
266     else                                                                      \
267     {                                                                         \
268         /* No scaling, conversion has been done directly in picture memory.   \
269          * Increment of picture pointer to end of line is still needed */     \
270         p_pic += i_pic_width + i_pic_line_width;                              \
271     }                                                                         \
272
273
274 /*****************************************************************************
275  * SCALE_WIDTH_DITHER: scale a line horizontally for dithered 8 bpp
276  *****************************************************************************
277  * This macro scales a line using an offset array.
278  *****************************************************************************/
279 #define SCALE_WIDTH_DITHER( CHROMA )                                          \
280     if( b_horizontal_scaling )                                                \
281     {                                                                         \
282         /* Horizontal scaling, but we can't use a buffer due to dither */     \
283         p_offset = p_offset_start;                                            \
284         b_jump_uv = 0;                                                        \
285         for( i_x = i_pic_width / 16; i_x--; )                                 \
286         {                                                                     \
287             CONVERT_4YUV_PIXELS_SCALE( CHROMA )                               \
288             CONVERT_4YUV_PIXELS_SCALE( CHROMA )                               \
289             CONVERT_4YUV_PIXELS_SCALE( CHROMA )                               \
290             CONVERT_4YUV_PIXELS_SCALE( CHROMA )                               \
291         }                                                                     \
292     }                                                                         \
293     else                                                                      \
294     {                                                                         \
295         for( i_x = i_width / 16; i_x--;  )                                    \
296         {                                                                     \
297             CONVERT_4YUV_PIXELS( CHROMA )                                     \
298             CONVERT_4YUV_PIXELS( CHROMA )                                     \
299             CONVERT_4YUV_PIXELS( CHROMA )                                     \
300             CONVERT_4YUV_PIXELS( CHROMA )                                     \
301         }                                                                     \
302     }                                                                         \
303     /* Increment of picture pointer to end of line is still needed */         \
304     p_pic += i_pic_line_width;                                                \
305     i_real_y = (i_real_y + 1) & 0x3;                                          \
306
307 /*****************************************************************************
308  * SCALE_HEIGHT: handle vertical scaling
309  *****************************************************************************
310  * This macro handle vertical scaling for a picture. CHROMA may be 420, 422 or
311  * 444 for RGB conversion, or 400 for gray conversion. It works for 1, 2, 3
312  * and 4 Bpp.
313  *****************************************************************************/
314 #define SCALE_HEIGHT( CHROMA, BPP )                                           \
315                                                                               \
316     /* If line is odd, rewind 4:2:0 U and V samples */                        \
317     /*if( ((CHROMA == 420) || (CHROMA == 422)) && !(i_y & 0x1) )                */\
318     /*{                                                                         */\
319       /*  p_u -= i_chroma_width;                                                */\
320       /*  p_v -= i_chroma_width;                                                */\
321     /*}                                                                         */\
322                                                                               \
323     /*                                                                        \
324      * Handle vertical scaling. The current line can be copied or next one    \
325      * can be ignored.                                                        \
326      */                                                                       \
327     switch( i_vertical_scaling )                                              \
328     {                                                                         \
329     case -1:                             /* vertical scaling factor is < 1 */ \
330         if( i_y & 0x1 )                                                       \
331         {                                                                     \
332             while( (i_scale_count -= i_pic_height) >= 0 )                     \
333             {                                                                 \
334             /* Height reduction: skip next source line */                     \
335                 p_y += i_width;                                               \
336                 if( (CHROMA == 420) || (CHROMA == 422) )                      \
337                 {                                                             \
338                     if( (i_scale_count -= i_pic_height) >= 0 )                \
339                     {                                                         \
340                         p_y += i_width;                                       \
341                         i_y += 2;                                             \
342                         p_u += i_chroma_width;                                \
343                         p_v += i_chroma_width;                                \
344                         continue;                                             \
345                     }                                                         \
346                     else                                                      \
347                     {                                                         \
348                         i_y++;                                                \
349                         break;                                                \
350                     }                                                         \
351                 }                                                             \
352                 else if( CHROMA == 444 )                                      \
353                 {                                                             \
354                     i_y++;                                                    \
355                     p_u += i_width;                                           \
356                     p_v += i_width;                                           \
357                 }                                                             \
358             }                                                                 \
359         }                                                                     \
360         else                                                                  \
361         {                                                                     \
362             if( CHROMA == 420 || CHROMA == 422 )                              \
363             {                                                                 \
364                 p_u -= i_chroma_width;                                        \
365                 p_v -= i_chroma_width;                                        \
366             }                                                                 \
367             while( (i_scale_count -= i_pic_height) >= 0 )                     \
368             {                                                                 \
369                 /* Height reduction: skip next source line */                 \
370                 p_y += i_width;                                               \
371                 if( (CHROMA == 420) || (CHROMA == 422) )                      \
372                 {                                                             \
373                     p_u += i_chroma_width;                                    \
374                     p_v += i_chroma_width;                                    \
375                     if( (i_scale_count -= i_pic_height) >= 0 )                \
376                     {                                                         \
377                         p_y += i_width;                                       \
378                         i_y+=2;                                               \
379                         continue;                                             \
380                     }                                                         \
381                     else                                                      \
382                     {                                                         \
383                         i_y++;                                                \
384                         break;                                                \
385                     }                                                         \
386                 }                                                             \
387                 else if( CHROMA == 444 )                                      \
388                 {                                                             \
389                     i_y++;                                                    \
390                     p_u += i_width;                                           \
391                     p_v += i_width;                                           \
392                 }                                                             \
393             }                                                                 \
394         }                                                                     \
395         i_scale_count += i_height;                                            \
396         break;                                                                \
397     case 1:                              /* vertical scaling factor is > 1 */ \
398         while( (i_scale_count -= i_height) > 0 )                              \
399         {                                                                     \
400             /* Height increment: copy previous picture line */                \
401             for( i_x = i_pic_width >> 4; i_x--; )                             \
402             {                                                                 \
403                 *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );           \
404                 *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );           \
405                 if( BPP > 1 )                               /* 2, 3, 4 Bpp */ \
406                 {                                                             \
407                     *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
408                     *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
409                 }                                                             \
410                 if( BPP > 2 )                                  /* 3, 4 Bpp */ \
411                 {                                                             \
412                     *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
413                     *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
414                 }                                                             \
415                 if( BPP > 3 )                                     /* 4 Bpp */ \
416                 {                                                             \
417                     *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
418                     *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
419                 }                                                             \
420             }                                                                 \
421             p_pic +=        i_pic_line_width;                                 \
422             p_pic_start +=  i_pic_line_width;                                 \
423         }                                                                     \
424         i_scale_count += i_pic_height;                                        \
425         break;                                                                \
426     }                                                                         \
427
428 /*****************************************************************************
429  * SCALE_HEIGHT_DITHER: handle vertical scaling for dithered 8 bpp
430  *****************************************************************************
431  * This macro handles vertical scaling for a picture. CHROMA may be 420, 422 or
432  * 444 for RGB conversion, or 400 for gray conversion.
433  *****************************************************************************/
434 #define SCALE_HEIGHT_DITHER( CHROMA )                                         \
435                                                                               \
436     /* If line is odd, rewind 4:2:0 U and V samples */                        \
437     if( ((CHROMA == 420) || (CHROMA == 422)) && !(i_y & 0x1) )                \
438     {                                                                         \
439         p_u -= i_chroma_width;                                                \
440         p_v -= i_chroma_width;                                                \
441     }                                                                         \
442                                                                               \
443     /*                                                                        \
444      * Handle vertical scaling. The current line can be copied or next one    \
445      * can be ignored.                                                        \
446      */                                                                       \
447     switch( i_vertical_scaling )                                              \
448     {                                                                         \
449     case -1:                             /* vertical scaling factor is < 1 */ \
450         while( (i_scale_count -= i_pic_height) >= 0 )                         \
451         {                                                                     \
452             /* Height reduction: skip next source line */                     \
453             p_y += i_width;                                                   \
454             i_y++;                                                            \
455             if( (CHROMA == 420) || (CHROMA == 422) )                          \
456             {                                                                 \
457                 if( i_y & 0x1 )                                               \
458                 {                                                             \
459                     p_u += i_chroma_width;                                    \
460                     p_v += i_chroma_width;                                    \
461                 }                                                             \
462             }                                                                 \
463             else if( CHROMA == 444 )                                          \
464             {                                                                 \
465                 p_u += i_width;                                               \
466                 p_v += i_width;                                               \
467             }                                                                 \
468         }                                                                     \
469         i_scale_count += i_height;                                            \
470         break;                                                                \
471     case 1:                              /* vertical scaling factor is > 1 */ \
472         while( (i_scale_count -= i_height) > 0 )                              \
473         {                                                                     \
474             SCALE_WIDTH_DITHER( CHROMA );                                     \
475             p_y -= i_width;                                                   \
476             p_u -= i_chroma_width;                                            \
477             p_v -= i_chroma_width;                                            \
478             p_pic +=        i_pic_line_width;                                 \
479         }                                                                     \
480         i_scale_count += i_pic_height;                                        \
481         break;                                                                \
482     }                                                                         \
483
484 /*****************************************************************************
485  * vout_InitYUV: allocate and initialize translations tables
486  *****************************************************************************
487  * This function will allocate memory to store translation tables, depending
488  * of the screen depth.
489  *****************************************************************************/
490 int vout_InitYUV( vout_thread_t *p_vout )
491 {
492     size_t      tables_size;                        /* tables size, in bytes */
493
494     /* Computes tables size - 3 Bpp use 32 bits pixel entries in tables */
495     switch( p_vout->i_bytes_per_pixel )
496     {
497     case 1:
498         tables_size = sizeof( u8 ) * (p_vout->b_grayscale ? GRAY_TABLE_SIZE : PALETTE_TABLE_SIZE);
499         break;
500     case 2:
501         tables_size = sizeof( u16 ) * (p_vout->b_grayscale ? GRAY_TABLE_SIZE : RGB_TABLE_SIZE);
502         break;
503     case 3:
504     case 4:
505     default:
506         tables_size = sizeof( u32 ) * (p_vout->b_grayscale ? GRAY_TABLE_SIZE : RGB_TABLE_SIZE);
507         break;
508     }
509
510     /* Allocate memory */
511     p_vout->yuv.p_base = malloc( tables_size );
512     if( p_vout->yuv.p_base == NULL )
513     {
514         intf_ErrMsg("error: %s\n", strerror(ENOMEM));
515         return( 1 );
516     }
517
518     /* Allocate memory for conversion buffer and offset array */
519     p_vout->yuv.p_buffer = malloc( VOUT_MAX_WIDTH * p_vout->i_bytes_per_pixel );
520     if( p_vout->yuv.p_buffer == NULL )
521     {
522         intf_ErrMsg("error: %s\n", strerror(ENOMEM));
523         free( p_vout->yuv.p_base );
524         return( 1 );
525     }
526     p_vout->yuv.p_offset = malloc( p_vout->i_width * sizeof( int ) );
527     if( p_vout->yuv.p_offset == NULL )
528     {
529         intf_ErrMsg("error: %s\n", strerror(ENOMEM));
530         free( p_vout->yuv.p_base );
531         free( p_vout->yuv.p_buffer );
532         return( 1 );
533     }
534
535     /* Initialize tables */
536     SetYUV( p_vout );
537     return( 0 );
538 }
539
540 /*****************************************************************************
541  * vout_ResetTables: re-initialize translations tables
542  *****************************************************************************
543  * This function will initialize the tables allocated by vout_CreateTables and
544  * set functions pointers.
545  *****************************************************************************/
546 int vout_ResetYUV( vout_thread_t *p_vout )
547 {
548     vout_EndYUV( p_vout );
549     return( vout_InitYUV( p_vout ) );
550 }
551
552 /*****************************************************************************
553  * vout_EndYUV: destroy translations tables
554  *****************************************************************************
555  * Free memory allocated by vout_CreateTables.
556  *****************************************************************************/
557 void vout_EndYUV( vout_thread_t *p_vout )
558 {
559     free( p_vout->yuv.p_base );
560     free( p_vout->yuv.p_buffer );
561     free( p_vout->yuv.p_offset );
562 }
563
564 /* following functions are local */
565
566 /*****************************************************************************
567  * SetGammaTable: return intensity table transformed by gamma curve.
568  *****************************************************************************
569  * pi_table is a table of 256 entries from 0 to 255.
570  *****************************************************************************/
571 static void SetGammaTable( int *pi_table, double f_gamma )
572 {
573     int         i_y;                                       /* base intensity */
574
575     /* Use exp(gamma) instead of gamma */
576     f_gamma = exp( f_gamma );
577
578     /* Build gamma table */
579     for( i_y = 0; i_y < 256; i_y++ )
580     {
581         pi_table[ i_y ] = pow( (double)i_y / 256, f_gamma ) * 256;
582     }
583  }
584
585 /*****************************************************************************
586  * SetYUV: compute tables and set function pointers
587 + *****************************************************************************/
588 static void SetYUV( vout_thread_t *p_vout )
589 {
590     int         pi_gamma[256];                                /* gamma table */
591     int         i_index;                                  /* index in tables */
592
593     /* Build gamma table */
594     SetGammaTable( pi_gamma, p_vout->f_gamma );
595
596     /*
597      * Set pointers and build YUV tables
598      */
599     if( p_vout->b_grayscale )
600     {
601         /* Grayscale: build gray table */
602         switch( p_vout->i_bytes_per_pixel )
603         {
604         case 1:
605             {
606                 u16 bright[256], transp[256];
607
608                 p_vout->yuv.yuv.p_gray8 =  (u8 *)p_vout->yuv.p_base + GRAY_MARGIN;
609                 for( i_index = 0; i_index < GRAY_MARGIN; i_index++ )
610                 {
611                     p_vout->yuv.yuv.p_gray8[ -i_index ] =      RGB2PIXEL( p_vout, pi_gamma[0], pi_gamma[0], pi_gamma[0] );
612                     p_vout->yuv.yuv.p_gray8[ 256 + i_index ] = RGB2PIXEL( p_vout, pi_gamma[255], pi_gamma[255], pi_gamma[255] );
613                 }
614                 for( i_index = 0; i_index < 256; i_index++)
615                 {
616                     p_vout->yuv.yuv.p_gray8[ i_index ] = pi_gamma[ i_index ];
617                     bright[ i_index ] = i_index << 8;
618                     transp[ i_index ] = 0;
619                 }
620                 /* the colors have been allocated, we can set the palette */
621                 p_vout->p_set_palette( p_vout, bright, bright, bright, transp );
622                 p_vout->i_white_pixel = 0xff;
623                 p_vout->i_black_pixel = 0x00;
624                 p_vout->i_gray_pixel = 0x44;
625                 p_vout->i_blue_pixel = 0x3b;
626
627                 break;
628             }
629         case 2:
630             p_vout->yuv.yuv.p_gray16 =  (u16 *)p_vout->yuv.p_base + GRAY_MARGIN;
631             for( i_index = 0; i_index < GRAY_MARGIN; i_index++ )
632             {
633                 p_vout->yuv.yuv.p_gray16[ -i_index ] =      RGB2PIXEL( p_vout, pi_gamma[0], pi_gamma[0], pi_gamma[0] );
634                 p_vout->yuv.yuv.p_gray16[ 256 + i_index ] = RGB2PIXEL( p_vout, pi_gamma[255], pi_gamma[255], pi_gamma[255] );
635             }
636             for( i_index = 0; i_index < 256; i_index++)
637             {
638                 p_vout->yuv.yuv.p_gray16[ i_index ] = RGB2PIXEL( p_vout, pi_gamma[i_index], pi_gamma[i_index], pi_gamma[i_index] );
639             }
640             break;
641         case 3:
642         case 4:
643             p_vout->yuv.yuv.p_gray32 =  (u32 *)p_vout->yuv.p_base + GRAY_MARGIN;
644             for( i_index = 0; i_index < GRAY_MARGIN; i_index++ )
645             {
646                 p_vout->yuv.yuv.p_gray32[ -i_index ] =      RGB2PIXEL( p_vout, pi_gamma[0], pi_gamma[0], pi_gamma[0] );
647                 p_vout->yuv.yuv.p_gray32[ 256 + i_index ] = RGB2PIXEL( p_vout, pi_gamma[255], pi_gamma[255], pi_gamma[255] );
648             }
649             for( i_index = 0; i_index < 256; i_index++)
650             {
651                 p_vout->yuv.yuv.p_gray32[ i_index ] = RGB2PIXEL( p_vout, pi_gamma[i_index], pi_gamma[i_index], pi_gamma[i_index] );
652             }
653             break;
654          }
655     }
656     else
657     {
658         /* Color: build red, green and blue tables */
659         switch( p_vout->i_bytes_per_pixel )
660         {
661         case 1:
662             {
663                 #define RGB_MIN 0
664                 #define RGB_MAX 255
665                 #define CLIP( x ) ( ((x < 0) ? 0 : (x > 255) ? 255 : x) << 8 )
666
667                 int y,u,v;
668                 int r,g,b;
669                 int uvr, uvg, uvb;
670                 int i = 0, j = 0;
671                 u16 red[256], green[256], blue[256], transp[256];
672                 unsigned char lookup[PALETTE_TABLE_SIZE];
673
674                 p_vout->yuv.yuv.p_rgb8 = (u8 *)p_vout->yuv.p_base;
675
676                 /* this loop calculates the intersection of an YUV box
677                  * and the RGB cube. */
678                 for ( y = 0; y <= 256; y += 16 )
679                 {
680                     for ( u = 0; u <= 256; u += 32 )
681                     for ( v = 0; v <= 256; v += 32 )
682                     {
683                         uvr = (V_RED_COEF*(v-128)) >> SHIFT;
684                         uvg = (U_GREEN_COEF*(u-128) + V_GREEN_COEF*(v-128)) >> SHIFT;
685                         uvb = (U_BLUE_COEF*(u-128)) >> SHIFT;
686                         r = y + uvr;
687                         g = y + uvg;
688                         b = y + uvb;
689
690                         if( r >= RGB_MIN && g >= RGB_MIN && b >= RGB_MIN
691                                 && r <= RGB_MAX && g <= RGB_MAX && b <= RGB_MAX )
692                         {
693                             /* this one should never happen unless someone fscked up my code */
694                             if(j == 256) { intf_ErrMsg( "vout error: no colors left to build palette\n" ); break; }
695
696                             /* clip the colors */
697                             red[j] = CLIP( r );
698                             green[j] = CLIP( g );
699                             blue[j] = CLIP( b );
700                             transp[j] = 0;
701
702                             /* allocate color */
703                             lookup[i] = 1;
704                             p_vout->yuv.yuv.p_rgb8[i++] = j;
705                             j++;
706                         }
707                         else
708                         {
709                             lookup[i] = 0;
710                             p_vout->yuv.yuv.p_rgb8[i++] = 0;
711                         }
712                     }
713                     i += 128-81;
714                 }
715
716                 /* the colors have been allocated, we can set the palette */
717                 /* there will eventually be a way to know which colors
718                  * couldn't be allocated and try to find a replacement */
719                 p_vout->p_set_palette( p_vout, red, green, blue, transp );
720
721                 p_vout->i_white_pixel = 0xff;
722                 p_vout->i_black_pixel = 0x00;
723                 p_vout->i_gray_pixel = 0x44;
724                 p_vout->i_blue_pixel = 0x3b;
725
726                 i = 0;
727                 /* this loop allocates colors that got outside
728                  * the RGB cube */
729                 for ( y = 0; y <= 256; y += 16 )
730                 {
731                     for ( u = 0; u <= 256; u += 32 )
732                     for ( v = 0; v <= 256; v += 32 )
733                     {
734                         int u2, v2;
735                         int dist, mindist = 100000000;
736
737                         if( lookup[i] || y==0)
738                         {
739                             i++;
740                             continue;
741                         }
742
743                         /* heavy. yeah. */
744                         for( u2 = 0; u2 <= 256; u2 += 32 )
745                         for( v2 = 0; v2 <= 256; v2 += 32 )
746                         {
747                             j = ((y>>4)<<7) + (u2>>5)*9 + (v2>>5);
748                             dist = (u-u2)*(u-u2) + (v-v2)*(v-v2);
749                             if( lookup[j] )
750                             /* find the nearest color */
751                             if( dist < mindist )
752                             {
753                                 p_vout->yuv.yuv.p_rgb8[i] = p_vout->yuv.yuv.p_rgb8[j];
754                                 mindist = dist;
755                             }
756                             j -= 128;
757                             if( lookup[j] )
758                             /* find the nearest color */
759                             if( dist + 128 < mindist )
760                             {
761                                 p_vout->yuv.yuv.p_rgb8[i] = p_vout->yuv.yuv.p_rgb8[j];
762                                 mindist = dist + 128;
763                             }
764                         }
765                         i++;
766                     }
767                     i += 128-81;
768                 }
769
770                 break;
771             }
772         case 2:
773             p_vout->yuv.yuv.p_rgb16 = (u16 *)p_vout->yuv.p_base;
774             for( i_index = 0; i_index < RED_MARGIN; i_index++ )
775             {
776                 p_vout->yuv.yuv.p_rgb16[RED_OFFSET - RED_MARGIN + i_index] = RGB2PIXEL( p_vout, pi_gamma[0], 0, 0 );
777                 p_vout->yuv.yuv.p_rgb16[RED_OFFSET + 256 + i_index] =        RGB2PIXEL( p_vout, pi_gamma[255], 0, 0 );
778             }
779             for( i_index = 0; i_index < GREEN_MARGIN; i_index++ )
780             {
781                 p_vout->yuv.yuv.p_rgb16[GREEN_OFFSET - GREEN_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, pi_gamma[0], 0 );
782                 p_vout->yuv.yuv.p_rgb16[GREEN_OFFSET + 256 + i_index] =          RGB2PIXEL( p_vout, 0, pi_gamma[255], 0 );
783             }
784             for( i_index = 0; i_index < BLUE_MARGIN; i_index++ )
785             {
786                 p_vout->yuv.yuv.p_rgb16[BLUE_OFFSET - BLUE_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, 0, pi_gamma[0] );
787                 p_vout->yuv.yuv.p_rgb16[BLUE_OFFSET + BLUE_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, 0, pi_gamma[255] );
788             }
789             for( i_index = 0; i_index < 256; i_index++ )
790             {
791                 p_vout->yuv.yuv.p_rgb16[RED_OFFSET + i_index] =   RGB2PIXEL( p_vout, pi_gamma[ i_index ], 0, 0 );
792                 p_vout->yuv.yuv.p_rgb16[GREEN_OFFSET + i_index] = RGB2PIXEL( p_vout, 0, pi_gamma[ i_index ], 0 );
793                 p_vout->yuv.yuv.p_rgb16[BLUE_OFFSET + i_index] =  RGB2PIXEL( p_vout, 0, 0, pi_gamma[ i_index ] );
794             }
795             break;
796         case 3:
797         case 4:
798             p_vout->yuv.yuv.p_rgb32 = (u32 *)p_vout->yuv.p_base;
799             for( i_index = 0; i_index < RED_MARGIN; i_index++ )
800             {
801                 p_vout->yuv.yuv.p_rgb32[RED_OFFSET - RED_MARGIN + i_index] = RGB2PIXEL( p_vout, pi_gamma[0], 0, 0 );
802                 p_vout->yuv.yuv.p_rgb32[RED_OFFSET + 256 + i_index] =        RGB2PIXEL( p_vout, pi_gamma[255], 0, 0 );
803             }
804             for( i_index = 0; i_index < GREEN_MARGIN; i_index++ )
805             {
806                 p_vout->yuv.yuv.p_rgb32[GREEN_OFFSET - GREEN_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, pi_gamma[0], 0 );
807                 p_vout->yuv.yuv.p_rgb32[GREEN_OFFSET + 256 + i_index] =          RGB2PIXEL( p_vout, 0, pi_gamma[255], 0 );
808             }
809             for( i_index = 0; i_index < BLUE_MARGIN; i_index++ )
810             {
811                 p_vout->yuv.yuv.p_rgb32[BLUE_OFFSET - BLUE_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, 0, pi_gamma[0] );
812                 p_vout->yuv.yuv.p_rgb32[BLUE_OFFSET + BLUE_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, 0, pi_gamma[255] );
813             }
814             for( i_index = 0; i_index < 256; i_index++ )
815             {
816                 p_vout->yuv.yuv.p_rgb32[RED_OFFSET + i_index] =   RGB2PIXEL( p_vout, pi_gamma[ i_index ], 0, 0 );
817                 p_vout->yuv.yuv.p_rgb32[GREEN_OFFSET + i_index] = RGB2PIXEL( p_vout, 0, pi_gamma[ i_index ], 0 );
818                 p_vout->yuv.yuv.p_rgb32[BLUE_OFFSET + i_index] =  RGB2PIXEL( p_vout, 0, 0, pi_gamma[ i_index ] );
819             }
820             break;
821         }
822     }
823
824     /*
825      * Set functions pointers
826      */
827     if( p_vout->b_grayscale )
828     {
829         /* Grayscale */
830         switch( p_vout->i_bytes_per_pixel )
831         {
832         case 1:
833             p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray8;
834             p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray8;
835             p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray8;
836             break;
837         case 2:
838             p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray16;
839             p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray16;
840             p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray16;
841             break;
842         case 3:
843             p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray24;
844             p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray24;
845             p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray24;
846             break;
847         case 4:
848             p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray32;
849             p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray32;
850             p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray32;
851             break;
852         }
853     }
854     else
855     {
856         /* Color */
857         switch( p_vout->i_bytes_per_pixel )
858         {
859         case 1:
860             p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertYUV420RGB8;
861             p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertYUV422RGB8;
862             p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertYUV444RGB8;
863             break;
864         case 2:
865             p_vout->yuv.p_Convert420 =   (vout_yuv_convert_t *) ConvertYUV420RGB16;
866             p_vout->yuv.p_Convert422 =   (vout_yuv_convert_t *) ConvertYUV422RGB16;
867             p_vout->yuv.p_Convert444 =   (vout_yuv_convert_t *) ConvertYUV444RGB16;
868             break;
869         case 3:
870             p_vout->yuv.p_Convert420 =   (vout_yuv_convert_t *) ConvertYUV420RGB24;
871             p_vout->yuv.p_Convert422 =   (vout_yuv_convert_t *) ConvertYUV422RGB24;
872             p_vout->yuv.p_Convert444 =   (vout_yuv_convert_t *) ConvertYUV444RGB24;
873             break;
874         case 4:
875             p_vout->yuv.p_Convert420 =   (vout_yuv_convert_t *) ConvertYUV420RGB32;
876             p_vout->yuv.p_Convert422 =   (vout_yuv_convert_t *) ConvertYUV422RGB32;
877             p_vout->yuv.p_Convert444 =   (vout_yuv_convert_t *) ConvertYUV444RGB32;
878             break;
879         }
880     }
881 }
882
883 /*****************************************************************************
884  * SetOffset: build offset array for conversion functions
885  *****************************************************************************
886  * This function will build an offset array used in later conversion functions.
887  * It will also set horizontal and vertical scaling indicators.
888  *****************************************************************************/
889 static void SetOffset( int i_width, int i_height, int i_pic_width, int i_pic_height,
890                        boolean_t *pb_h_scaling, int *pi_v_scaling, int *p_offset )
891 {
892     int i_x;                                    /* x position in destination */
893     int i_scale_count;                                     /* modulo counter */
894
895     /*
896      * Prepare horizontal offset array
897      */
898     if( i_pic_width - i_width > 0 )
899     {
900         /* Prepare scaling array for horizontal extension */
901         *pb_h_scaling =  1;
902         i_scale_count =         i_pic_width;
903         for( i_x = i_width; i_x--; )
904         {
905             while( (i_scale_count -= i_width) > 0 )
906             {
907                 *p_offset++ = 0;
908             }
909             *p_offset++ = 1;
910             i_scale_count += i_pic_width;
911         }
912     }
913     else if( i_pic_width - i_width < 0 )
914     {
915         /* Prepare scaling array for horizontal reduction */
916         *pb_h_scaling =  1;
917         i_scale_count =         i_pic_width;
918         for( i_x = i_pic_width; i_x--; )
919         {
920             *p_offset = 1;
921             while( (i_scale_count -= i_pic_width) >= 0 )
922             {
923                 *p_offset += 1;
924             }
925             p_offset++;
926             i_scale_count += i_width;
927         }
928     }
929     else
930     {
931         /* No horizontal scaling: YUV conversion is done directly to picture */
932         *pb_h_scaling = 0;
933     }
934
935     /*
936      * Set vertical scaling indicator
937      */
938     if( i_pic_height - i_height > 0 )
939     {
940         *pi_v_scaling = 1;
941     }
942     else if( i_pic_height - i_height < 0 )
943     {
944         *pi_v_scaling = -1;
945     }
946     else
947     {
948         *pi_v_scaling = 0;
949     }
950 }
951
952 /*****************************************************************************
953  * ConvertY4Gray8: grayscale YUV 4:x:x to RGB 8 bpp
954  *****************************************************************************/
955 static void ConvertY4Gray8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y,
956                             yuv_data_t *p_u, yuv_data_t *p_v, int i_width,
957                             int i_height, int i_pic_width, int i_pic_height,
958                             int i_pic_line_width, int i_matrix_coefficients )
959 {
960     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
961     int         i_vertical_scaling;                 /* vertical scaling type */
962     int         i_x, i_y;                 /* horizontal and vertical indexes */
963     int         i_scale_count;                       /* scale modulo counter */
964     int         i_chroma_width;                    /* chroma width, not used */
965     u8 *        p_gray;                             /* base conversion table */
966     u8 *        p_pic_start;       /* beginning of the current line for copy */
967     u8 *        p_buffer_start;                   /* conversion buffer start */
968     u8 *        p_buffer;                       /* conversion buffer pointer */
969     int *       p_offset_start;                        /* offset array start */
970     int *       p_offset;                            /* offset array pointer */
971
972     /*
973      * Initialize some values  - i_pic_line_width will store the line skip
974      */
975     i_pic_line_width -= i_pic_width;
976     p_gray =            p_vout->yuv.yuv.p_gray8;
977     p_buffer_start =    p_vout->yuv.p_buffer;
978     p_offset_start =    p_vout->yuv.p_offset;
979     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
980                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
981
982     /*
983      * Perform conversion
984      */
985     i_scale_count = i_pic_height;
986     for( i_y = 0; i_y < i_height; i_y++ )
987     {
988         /* Mark beginnning of line for possible later line copy, and initialize
989          * buffer */
990         p_pic_start =   p_pic;
991         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
992
993         /* Do YUV conversion to buffer - YUV picture is always formed of 16
994          * pixels wide blocks */
995         for( i_x = i_width / 16; i_x--;  )
996         {
997             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
998             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
999             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1000             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1001             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1002             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1003             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1004             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1005         }
1006
1007         /* Do horizontal and vertical scaling */
1008         SCALE_WIDTH;
1009         SCALE_HEIGHT(400, 1);
1010     }
1011 }
1012
1013 /*****************************************************************************
1014  * ConvertY4Gray16: grayscale YUV 4:x:x to RGB 2 Bpp
1015  *****************************************************************************/
1016 static void ConvertY4Gray16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1017                              int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1018                              int i_matrix_coefficients )
1019 {
1020     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1021     int         i_vertical_scaling;                 /* vertical scaling type */
1022     int         i_x, i_y;                 /* horizontal and vertical indexes */
1023     int         i_scale_count;                       /* scale modulo counter */
1024     int         i_chroma_width;                    /* chroma width, not used */
1025     u16 *       p_gray;                             /* base conversion table */
1026     u16 *       p_pic_start;       /* beginning of the current line for copy */
1027     u16 *       p_buffer_start;                   /* conversion buffer start */
1028     u16 *       p_buffer;                       /* conversion buffer pointer */
1029     int *       p_offset_start;                        /* offset array start */
1030     int *       p_offset;                            /* offset array pointer */
1031
1032     /*
1033      * Initialize some values  - i_pic_line_width will store the line skip
1034      */
1035     i_pic_line_width -= i_pic_width;
1036     p_gray =            p_vout->yuv.yuv.p_gray16;
1037     p_buffer_start =    p_vout->yuv.p_buffer;
1038     p_offset_start =    p_vout->yuv.p_offset;
1039     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1040                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1041
1042     /*
1043      * Perform conversion
1044      */
1045     i_scale_count = i_pic_height;
1046     for( i_y = 0; i_y < i_height; i_y++ )
1047     {
1048         /* Mark beginnning of line for possible later line copy, and initialize
1049          * buffer */
1050         p_pic_start =   p_pic;
1051         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1052
1053         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1054          * pixels wide blocks */
1055         for( i_x = i_width / 16; i_x--;  )
1056         {
1057             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1058             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1059             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1060             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1061             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1062             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1063             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1064             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1065         }
1066
1067         /* Do horizontal and vertical scaling */
1068         SCALE_WIDTH;
1069         SCALE_HEIGHT(400, 2);
1070     }
1071 }
1072
1073 /*****************************************************************************
1074  * ConvertY4Gray24: grayscale YUV 4:x:x to RGB 3 Bpp
1075  *****************************************************************************/
1076 static void ConvertY4Gray24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1077                              int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1078                              int i_matrix_coefficients )
1079 {
1080     /* XXX?? */
1081 }
1082
1083 /*****************************************************************************
1084  * ConvertY4Gray32: grayscale YUV 4:x:x to RGB 4 Bpp
1085  *****************************************************************************/
1086 static void ConvertY4Gray32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1087                              int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1088                              int i_matrix_coefficients )
1089 {
1090     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1091     int         i_vertical_scaling;                 /* vertical scaling type */
1092     int         i_x, i_y;                 /* horizontal and vertical indexes */
1093     int         i_scale_count;                       /* scale modulo counter */
1094     int         i_chroma_width;                    /* chroma width, not used */
1095     u32 *       p_gray;                             /* base conversion table */
1096     u32 *       p_pic_start;       /* beginning of the current line for copy */
1097     u32 *       p_buffer_start;                   /* conversion buffer start */
1098     u32 *       p_buffer;                       /* conversion buffer pointer */
1099     int *       p_offset_start;                        /* offset array start */
1100     int *       p_offset;                            /* offset array pointer */
1101
1102     /*
1103      * Initialize some values  - i_pic_line_width will store the line skip
1104      */
1105     i_pic_line_width -= i_pic_width;
1106     p_gray =            p_vout->yuv.yuv.p_gray32;
1107     p_buffer_start =    p_vout->yuv.p_buffer;
1108     p_offset_start =    p_vout->yuv.p_offset;
1109     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1110                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1111
1112     /*
1113      * Perform conversion
1114      */
1115     i_scale_count = i_pic_height;
1116     for( i_y = 0; i_y < i_height; i_y++ )
1117     {
1118         /* Mark beginnning of line for possible later line copy, and initialize
1119          * buffer */
1120         p_pic_start =   p_pic;
1121         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1122
1123         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1124          * pixels wide blocks */
1125         for( i_x = i_width / 16; i_x--;  )
1126         {
1127             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1128             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1129             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1130             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1131             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1132             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1133             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1134             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1135         }
1136
1137         /* Do horizontal and vertical scaling */
1138         SCALE_WIDTH;
1139         SCALE_HEIGHT(400, 4);
1140     }
1141 }
1142
1143 /*****************************************************************************
1144  * ConvertYUV420RGB8: color YUV 4:2:0 to RGB 8 bpp
1145  *****************************************************************************/
1146 static void ConvertYUV420RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1147                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1148                                 int i_matrix_coefficients )
1149 {
1150     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1151     int         i_vertical_scaling;                 /* vertical scaling type */
1152     int         i_x, i_y;                 /* horizontal and vertical indexes */
1153     int         i_scale_count;                       /* scale modulo counter */
1154     int         b_jump_uv;                       /* should we jump u and v ? */
1155     int         i_real_y;                                           /* y % 4 */
1156     u8 *        p_lookup;                                    /* lookup table */
1157     int         i_chroma_width;                              /* chroma width */
1158     int *       p_offset_start;                        /* offset array start */
1159     int *       p_offset;                            /* offset array pointer */
1160
1161     int dither10[4] = {  0x0,  0x8,  0x2,  0xa };
1162     int dither11[4] = {  0xc,  0x4,  0xe,  0x6 };
1163     int dither12[4] = {  0x3,  0xb,  0x1,  0x9 };
1164     int dither13[4] = {  0xf,  0x7,  0xd,  0x5 };
1165
1166     int dither20[4] = {  0x0, 0x10,  0x4, 0x14 };
1167     int dither21[4] = { 0x18,  0x8, 0x1c,  0xc };
1168     int dither22[4] = {  0x6, 0x16,  0x2, 0x12 };
1169     int dither23[4] = { 0x1e,  0xe, 0x1a,  0xa };
1170
1171     #if 0
1172     /* other matrices that can be interesting, either for debugging or for
1173      * various effects */
1174     int dither[4][4] = { { 0, 8, 2, 10 }, { 12, 4, 14, 16 }, { 3, 11, 1, 9}, {15, 7, 13, 5} };
1175     int dither[4][4] = { { 7, 8, 0, 15 }, { 0, 15, 8, 7 }, { 7, 0, 15, 8 }, { 15, 7, 8, 0 } };
1176     int dither[4][4] = { { 0, 15, 0, 15 }, { 15, 0, 15, 0 }, { 0, 15, 0, 15 }, { 15, 0, 15, 0 } };
1177     int dither[4][4] = { { 15, 15, 0, 0 }, { 15, 15, 0, 0 }, { 0, 0, 15, 15 }, { 0, 0, 15, 15 } };
1178     int dither[4][4] = { { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 } };
1179     int dither[4][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7 }, { 8, 9, 10, 11 }, { 12, 13, 14, 15 } };
1180     #endif
1181
1182     /*
1183      * Initialize some values  - i_pic_line_width will store the line skip
1184      */
1185     i_pic_line_width -= i_pic_width;
1186     i_chroma_width =    i_width / 2;
1187     p_offset_start =    p_vout->yuv.p_offset;
1188     p_lookup =          p_vout->yuv.p_base;
1189     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1190                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1191
1192     /*
1193      * Perform conversion
1194      */
1195     i_scale_count = i_pic_height;
1196     i_real_y = 0;
1197     for( i_y = 0; i_y < i_height; i_y++ )
1198     {
1199         /* Do horizontal and vertical scaling */
1200         SCALE_WIDTH_DITHER( 420 );
1201         SCALE_HEIGHT_DITHER( 420 );
1202     }
1203 }
1204
1205 /*****************************************************************************
1206  * ConvertYUV422RGB8: color YUV 4:2:2 to RGB 8 bpp
1207  *****************************************************************************/
1208 static void ConvertYUV422RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1209                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1210                                 int i_matrix_coefficients )
1211 {
1212     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1213     int         i_vertical_scaling;                 /* vertical scaling type */
1214     int         i_x, i_y;                 /* horizontal and vertical indexes */
1215     int         i_scale_count;                       /* scale modulo counter */
1216     int         i_uval, i_vval;                           /* U and V samples */
1217     int         i_red, i_green, i_blue;          /* U and V modified samples */
1218     int         i_chroma_width;                              /* chroma width */
1219     u8 *        p_yuv;                              /* base conversion table */
1220     u8 *        p_ybase;                     /* Y dependant conversion table */
1221     u8 *        p_pic_start;       /* beginning of the current line for copy */
1222     u8 *        p_buffer_start;                   /* conversion buffer start */
1223     u8 *        p_buffer;                       /* conversion buffer pointer */
1224     int *       p_offset_start;                        /* offset array start */
1225     int *       p_offset;                            /* offset array pointer */
1226
1227     /*
1228      * Initialize some values  - i_pic_line_width will store the line skip
1229      */
1230     i_pic_line_width -= i_pic_width;
1231     i_chroma_width =    i_width / 2;
1232     p_yuv =             p_vout->yuv.yuv.p_rgb8;
1233     p_buffer_start =    p_vout->yuv.p_buffer;
1234     p_offset_start =    p_vout->yuv.p_offset;
1235     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1236                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1237
1238     /*
1239      * Perform conversion
1240      */
1241     i_scale_count = i_pic_height;
1242     for( i_y = 0; i_y < i_height; i_y++ )
1243     {
1244         /* Mark beginnning of line for possible later line copy, and initialize
1245          * buffer */
1246         p_pic_start =   p_pic;
1247         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1248
1249         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1250          * pixels wide blocks */
1251         for( i_x = i_width / 16; i_x--;  )
1252         {
1253             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1254             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1255             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1256             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1257             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1258             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1259             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1260             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1261         }
1262
1263         /* Do horizontal and vertical scaling */
1264         SCALE_WIDTH;
1265         SCALE_HEIGHT(422, 1);
1266     }
1267 }
1268
1269 /*****************************************************************************
1270  * ConvertYUV444RGB8: color YUV 4:4:4 to RGB 8 bpp
1271  *****************************************************************************/
1272 static void ConvertYUV444RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1273                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1274                                 int i_matrix_coefficients )
1275 {
1276     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1277     int         i_vertical_scaling;                 /* vertical scaling type */
1278     int         i_x, i_y;                 /* horizontal and vertical indexes */
1279     int         i_scale_count;                       /* scale modulo counter */
1280     int         i_uval, i_vval;                           /* U and V samples */
1281     int         i_red, i_green, i_blue;          /* U and V modified samples */
1282     int         i_chroma_width;                    /* chroma width, not used */
1283     u8 *        p_yuv;                              /* base conversion table */
1284     u8 *        p_ybase;                     /* Y dependant conversion table */
1285     u8 *        p_pic_start;       /* beginning of the current line for copy */
1286     u8 *        p_buffer_start;                   /* conversion buffer start */
1287     u8 *        p_buffer;                       /* conversion buffer pointer */
1288     int *       p_offset_start;                        /* offset array start */
1289     int *       p_offset;                            /* offset array pointer */
1290
1291     /*
1292      * Initialize some values  - i_pic_line_width will store the line skip
1293      */
1294     i_pic_line_width -= i_pic_width;
1295     p_yuv =             p_vout->yuv.yuv.p_rgb8;
1296     p_buffer_start =    p_vout->yuv.p_buffer;
1297     p_offset_start =    p_vout->yuv.p_offset;
1298     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1299                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1300
1301     /*
1302      * Perform conversion
1303      */
1304     i_scale_count = i_pic_height;
1305     for( i_y = 0; i_y < i_height; i_y++ )
1306     {
1307         /* Mark beginnning of line for possible later line copy, and initialize
1308          * buffer */
1309         p_pic_start =   p_pic;
1310         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1311
1312         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1313          * pixels wide blocks */
1314         for( i_x = i_width / 16; i_x--;  )
1315         {
1316             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1317             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1318             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1319             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1320             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1321             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1322             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1323             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1324         }
1325
1326         /* Do horizontal and vertical scaling */
1327         SCALE_WIDTH;
1328         SCALE_HEIGHT(444, 1);
1329     }
1330 }
1331
1332 /*****************************************************************************
1333  * ConvertYUV420RGB16: color YUV 4:2:0 to RGB 2 Bpp
1334  *****************************************************************************/
1335 static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1336                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1337                                 int i_matrix_coefficients )
1338 {
1339 #if 0
1340     /* MMX version */
1341     int                 i_chroma_width, i_chroma_skip;      /* width and eol for chroma */
1342
1343     i_chroma_width =    i_width / 2;
1344     i_chroma_skip =     i_skip / 2;
1345     ConvertYUV420RGB16MMX( p_y, p_u, p_v, i_width, i_height,
1346                            (i_width + i_skip) * sizeof( yuv_data_t ),
1347                            (i_chroma_width + i_chroma_skip) * sizeof( yuv_data_t),
1348                            i_scale, (u8 *)p_pic, 0, 0, (i_width + i_pic_eol) * sizeof( u16 ),
1349                            p_vout->i_screen_depth == 15 );
1350 #endif
1351     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1352     int         i_vertical_scaling;                 /* vertical scaling type */
1353     int         i_x, i_y;                 /* horizontal and vertical indexes */
1354     int         i_scale_count;                       /* scale modulo counter */
1355 #ifndef HAVE_MMX
1356     int         i_uval, i_vval;                           /* U and V samples */
1357     int         i_red, i_green, i_blue;          /* U and V modified samples */
1358 #endif
1359     int         i_chroma_width;                              /* chroma width */
1360     u16 *       p_yuv;                              /* base conversion table */
1361 #ifndef HAVE_MMX
1362     u16 *       p_ybase;                     /* Y dependant conversion table */
1363 #endif
1364     u16 *       p_pic_start;       /* beginning of the current line for copy */
1365     u16 *       p_buffer_start;                   /* conversion buffer start */
1366     u16 *       p_buffer;                       /* conversion buffer pointer */
1367     int *       p_offset_start;                        /* offset array start */
1368     int *       p_offset;                            /* offset array pointer */
1369
1370     /*
1371      * Initialize some values  - i_pic_line_width will store the line skip
1372      */
1373     i_pic_line_width -= i_pic_width;
1374     i_chroma_width =    i_width / 2;
1375     p_yuv =             p_vout->yuv.yuv.p_rgb16;
1376     p_buffer_start =    p_vout->yuv.p_buffer;
1377     p_offset_start =    p_vout->yuv.p_offset;
1378     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1379                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1380
1381     /*
1382      * Perform conversion
1383      */
1384     i_scale_count = i_pic_height;
1385     for( i_y = 0; i_y < i_height; i_y++ )
1386     {
1387         /* Mark beginnning of line for possible later line copy, and initialize
1388          * buffer */
1389         p_pic_start =   p_pic;
1390         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1391
1392
1393 #ifndef HAVE_MMX
1394
1395         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1396          * pixels wide blocks */
1397         for( i_x = i_width / 16; i_x--;  )
1398         {
1399             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1400             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1401             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1402             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1403             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1404             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1405             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1406             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1407         }
1408         SCALE_WIDTH;
1409         SCALE_HEIGHT(420, 2);
1410     }
1411     
1412 #else
1413         for ( i_x = i_width / 8; i_x--; )
1414         {
1415         __asm__ (
1416             "movd      (%1), %%mm0       # Load 4 Cb       00 00 00 00 u3 u2 u1 u0\n\t"
1417             "movd      (%2), %%mm1       # Load 4 Cr       00 00 00 00 v3 v2 v1 v0\n\t"
1418             "pxor      %%mm4, %%mm4      # zero mm4\n\t"
1419             "movq      (%0), %%mm6       # Load 8 Y        Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0\n\t"
1420           //"movl      $0, (%3)          # cache preload for image\n\t"
1421              : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer));
1422
1423         __asm__ (
1424             ".align 8 \n\t"
1425             /* Do the multiply part of the conversion for even and odd pixels,
1426              * register usage:
1427              * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
1428              * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd  pixels,
1429              * mm6 -> Y even, mm7 -> Y odd */
1430             /* convert the chroma part */
1431             "punpcklbw %%mm4, %%mm0      # scatter 4 Cb    00 u3 00 u2 00 u1 00 u0\n\t"
1432             "punpcklbw %%mm4, %%mm1      # scatter 4 Cr    00 v3 00 v2 00 v1 00 v0\n\t"
1433             "psubsw    mmx_80w, %%mm0    # Cb -= 128\n\t"
1434             "psubsw    mmx_80w, %%mm1    # Cr -= 128\n\t"
1435             "psllw     $3, %%mm0         # Promote precision\n\t"
1436             "psllw     $3, %%mm1         # Promote precision\n\t"
1437             "movq      %%mm0, %%mm2      # Copy 4 Cb       00 u3 00 u2 00 u1 00 u0\n\t"
1438             "movq      %%mm1, %%mm3      # Copy 4 Cr       00 v3 00 v2 00 v1 00 v0\n\t"
1439             "pmulhw    mmx_U_green, %%mm2# Mul Cb with green coeff -> Cb green\n\t"
1440             "pmulhw    mmx_V_green, %%mm3# Mul Cr with green coeff -> Cr green\n\t"
1441             "pmulhw    mmx_U_blue, %%mm0 # Mul Cb -> Cblue 00 b3 00 b2 00 b1 00 b0\n\t"
1442             "pmulhw    mmx_V_red, %%mm1  # Mul Cr -> Cred  00 r3 00 r2 00 r1 00 r0\n\t"
1443             "paddsw    %%mm3, %%mm2      # Cb green + Cr green -> Cgreen\n\t"
1444             /* convert the luma part */
1445             "psubusb   mmx_10w, %%mm6    # Y -= 16\n\t"
1446             "movq      %%mm6, %%mm7      # Copy 8 Y        Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0\n\t"
1447             "pand      mmx_00ffw, %%mm6  # get Y even      00 Y6 00 Y4 00 Y2 00 Y0\n\t"
1448             "psrlw     $8, %%mm7         # get Y odd       00 Y7 00 Y5 00 Y3 00 Y1\n\t"
1449             "psllw     $3, %%mm6         # Promote precision\n\t"
1450             "psllw     $3, %%mm7         # Promote precision\n\t"
1451             "pmulhw    mmx_Y_coeff, %%mm6# Mul 4 Y even    00 y6 00 y4 00 y2 00 y0\n\t"
1452             "pmulhw    mmx_Y_coeff, %%mm7# Mul 4 Y odd     00 y7 00 y5 00 y3 00 y1\n\t"
1453             /* Do the addition part of the conversion for even and odd pixels,
1454              * register usage:
1455              * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
1456              * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd  pixels,
1457              * mm6 -> Y even, mm7 -> Y odd */                                                                                                                                        /* Do horizontal and vertical scaling */
1458             "movq      %%mm0, %%mm3      # Copy Cblue\n\t"
1459             "movq      %%mm1, %%mm4      # Copy Cred\n\t"
1460             "movq      %%mm2, %%mm5      # Copy Cgreen\n\t"
1461             "paddsw    %%mm6, %%mm0      # Y even + Cblue  00 B6 00 B4 00 B2 00 B0\n\t"
1462             "paddsw    %%mm7, %%mm3      # Y odd  + Cblue  00 B7 00 B5 00 B3 00 B1\n\t"
1463             "paddsw    %%mm6, %%mm1      # Y even + Cred   00 R6 00 R4 00 R2 00 R0\n\t"
1464             "paddsw    %%mm7, %%mm4      # Y odd  + Cred   00 R7 00 R5 00 R3 00 R1\n\t"
1465             "paddsw    %%mm6, %%mm2      # Y even + Cgreen 00 G6 00 G4 00 G2 00 G0\n\t"
1466             "paddsw    %%mm7, %%mm5      # Y odd  + Cgreen 00 G7 00 G5 00 G3 00 G1\n\t"
1467             /* Limit RGB even to 0..255 */
1468             "packuswb  %%mm0, %%mm0      # B6 B4 B2 B0 | B6 B4 B2 B0\n\t"
1469             "packuswb  %%mm1, %%mm1      # R6 R4 R2 R0 | R6 R4 R2 R0\n\t"
1470             "packuswb  %%mm2, %%mm2      # G6 G4 G2 G0 | G6 G4 G2 G0\n\t"
1471             /* Limit RGB odd to 0..255 */
1472             "packuswb  %%mm3, %%mm3      # B7 B5 B3 B1 | B7 B5 B3 B1\n\t"
1473             "packuswb  %%mm4, %%mm4      # R7 R5 R3 R1 | R7 R5 R3 R1\n\t"
1474             "packuswb  %%mm5, %%mm5      # G7 G5 G3 G1 | G7 G5 G3 G1\n\t"
1475             /* Interleave RGB even and odd */
1476             "punpcklbw %%mm3, %%mm0      #                 B7 B6 B5 B4 B3 B2 B1 B0\n\t"
1477             "punpcklbw %%mm4, %%mm1      #                 R7 R6 R5 R4 R3 R2 R1 R0\n\t"
1478             "punpcklbw %%mm5, %%mm2      #                 G7 G6 G5 G4 G3 G2 G1 G0\n\t"
1479             /* mask unneeded bits off */
1480             "pand      mmx_redmask, %%mm0# b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0\n\t"
1481             "pand      mmx_grnmask, %%mm2# g7g6g5g4 g3g2_0_0 g7g6g5g4 g3g2_0_0\n\t"
1482             "pand      mmx_redmask, %%mm1# r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0\n\t"
1483             "psrlw     mmx_blueshift,%%mm0#0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3\n\t"
1484             "pxor      %%mm4, %%mm4      # zero mm4\n\t"
1485             "movq      %%mm0, %%mm5      # Copy B7-B0\n\t"
1486             "movq      %%mm2, %%mm7      # Copy G7-G0\n\t"
1487             /* convert rgb24 plane to rgb16 pack for pixel 0-3 */
1488             "punpcklbw %%mm4, %%mm2      #  0_0_0_0  0_0_0_0 g7g6g5g4 g3g2_0_0\n\t"
1489             "punpcklbw %%mm1, %%mm0      # r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3\n\t"
1490             "psllw     mmx_blueshift,%%mm2#  0_0_0_0 0_g7g6g5 g4g3g2_0  0_0_0_0\n\t"
1491             "por       %%mm2, %%mm0      # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3\n\t"
1492             "movq      8(%0), %%mm6      # Load 8 Y        Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0\n\t"
1493             "movq      %%mm0, (%3)       # store pixel 0-3\n\t"
1494             /* convert rgb24 plane to rgb16 pack for pixel 0-3 */
1495             "punpckhbw %%mm4, %%mm7      #  0_0_0_0  0_0_0_0 g7g6g5g4 g3g2_0_0\n\t"
1496             "punpckhbw %%mm1, %%mm5      # r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3\n\t"
1497             "psllw     mmx_blueshift,%%mm7#  0_0_0_0 0_g7g6g5 g4g3g2_0  0_0_0_0\n\t"
1498             "movd      4(%1), %%mm0      # Load 4 Cb       00 00 00 00 u3 u2 u1 u0\n\t"
1499             "por       %%mm7, %%mm5      # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3\n\t"
1500             "movd      4(%2), %%mm1      # Load 4 Cr       00 00 00 00 v3 v2 v1 v0\n\t"
1501             "movq      %%mm5, 8(%3)      # store pixel 4-7\n\t"
1502             : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer));
1503         p_y += 8;
1504         p_u += 4;
1505         p_v += 4;
1506         p_buffer += 8;
1507         }
1508         
1509         SCALE_WIDTH;
1510         SCALE_HEIGHT(420, 2);
1511     }
1512     __asm__ ("emms\n\t");
1513 #endif
1514 }
1515
1516 /*****************************************************************************
1517  * ConvertYUV422RGB16: color YUV 4:2:2 to RGB 2 Bpp
1518  *****************************************************************************/
1519 static void ConvertYUV422RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1520                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1521                                 int i_matrix_coefficients )
1522 {
1523     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1524     int         i_vertical_scaling;                 /* vertical scaling type */
1525     int         i_x, i_y;                 /* horizontal and vertical indexes */
1526     int         i_scale_count;                       /* scale modulo counter */
1527     int         i_uval, i_vval;                           /* U and V samples */
1528     int         i_red, i_green, i_blue;          /* U and V modified samples */
1529     int         i_chroma_width;                              /* chroma width */
1530     u16 *       p_yuv;                              /* base conversion table */
1531     u16 *       p_ybase;                     /* Y dependant conversion table */
1532     u16 *       p_pic_start;       /* beginning of the current line for copy */
1533     u16 *       p_buffer_start;                   /* conversion buffer start */
1534     u16 *       p_buffer;                       /* conversion buffer pointer */
1535     int *       p_offset_start;                        /* offset array start */
1536     int *       p_offset;                            /* offset array pointer */
1537
1538     /*
1539      * Initialize some values  - i_pic_line_width will store the line skip
1540      */
1541     i_pic_line_width -= i_pic_width;
1542     i_chroma_width =    i_width / 2;
1543     p_yuv =             p_vout->yuv.yuv.p_rgb16;
1544     p_buffer_start =    p_vout->yuv.p_buffer;
1545     p_offset_start =    p_vout->yuv.p_offset;
1546     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1547                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1548
1549     /*
1550      * Perform conversion
1551      */
1552     i_scale_count = i_pic_height;
1553     for( i_y = 0; i_y < i_height; i_y++ )
1554     {
1555         /* Mark beginnning of line for possible later line copy, and initialize
1556          * buffer */
1557         p_pic_start =   p_pic;
1558         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1559
1560         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1561          * pixels wide blocks */
1562         for( i_x = i_width / 16; i_x--;  )
1563         {
1564             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1565             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1566             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1567             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1568             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1569             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1570             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1571             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1572         }
1573
1574         /* Do horizontal and vertical scaling */
1575         SCALE_WIDTH;
1576         SCALE_HEIGHT(422, 2);
1577     }
1578 }
1579
1580 /*****************************************************************************
1581  * ConvertYUV444RGB16: color YUV 4:4:4 to RGB 2 Bpp
1582  *****************************************************************************/
1583 static void ConvertYUV444RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1584                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1585                                 int i_matrix_coefficients )
1586 {
1587     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1588     int         i_vertical_scaling;                 /* vertical scaling type */
1589     int         i_x, i_y;                 /* horizontal and vertical indexes */
1590     int         i_scale_count;                       /* scale modulo counter */
1591     int         i_uval, i_vval;                           /* U and V samples */
1592     int         i_red, i_green, i_blue;          /* U and V modified samples */
1593     int         i_chroma_width;                    /* chroma width, not used */
1594     u16 *       p_yuv;                              /* base conversion table */
1595     u16 *       p_ybase;                     /* Y dependant conversion table */
1596     u16 *       p_pic_start;       /* beginning of the current line for copy */
1597     u16 *       p_buffer_start;                   /* conversion buffer start */
1598     u16 *       p_buffer;                       /* conversion buffer pointer */
1599     int *       p_offset_start;                        /* offset array start */
1600     int *       p_offset;                            /* offset array pointer */
1601
1602     /*
1603      * Initialize some values  - i_pic_line_width will store the line skip
1604      */
1605     i_pic_line_width -= i_pic_width;
1606     p_yuv =             p_vout->yuv.yuv.p_rgb16;
1607     p_buffer_start =    p_vout->yuv.p_buffer;
1608     p_offset_start =    p_vout->yuv.p_offset;
1609     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1610                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1611
1612     /*
1613      * Perform conversion
1614      */
1615     i_scale_count = i_pic_height;
1616     for( i_y = 0; i_y < i_height; i_y++ )
1617     {
1618         /* Mark beginnning of line for possible later line copy, and initialize
1619          * buffer */
1620         p_pic_start =   p_pic;
1621         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1622
1623         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1624          * pixels wide blocks */
1625         for( i_x = i_width / 16; i_x--;  )
1626         {
1627             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1628             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1629             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1630             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1631             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1632             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1633             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1634             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1635         }
1636
1637         /* Do horizontal and vertical scaling */
1638         SCALE_WIDTH;
1639         SCALE_HEIGHT(444, 2);
1640     }
1641 }
1642
1643 /*****************************************************************************
1644  * ConvertYUV420RGB24: color YUV 4:2:0 to RGB 3 Bpp
1645  *****************************************************************************/
1646 static void ConvertYUV420RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1647                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1648                                 int i_matrix_coefficients )
1649 {
1650     /* XXX?? */
1651 }
1652
1653 /*****************************************************************************
1654  * ConvertYUV422RGB24: color YUV 4:2:2 to RGB 3 Bpp
1655  *****************************************************************************/
1656 static void ConvertYUV422RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1657                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1658                                 int i_matrix_coefficients )
1659 {
1660     /* XXX?? */
1661 }
1662
1663 /*****************************************************************************
1664  * ConvertYUV444RGB24: color YUV 4:4:4 to RGB 3 Bpp
1665  *****************************************************************************/
1666 static void ConvertYUV444RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1667                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1668                                 int i_matrix_coefficients )
1669 {
1670     /* XXX?? */
1671 }
1672
1673 /*****************************************************************************
1674  * ConvertYUV420RGB32: color YUV 4:2:0 to RGB 4 Bpp
1675  *****************************************************************************/
1676 static void ConvertYUV420RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1677                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1678                                 int i_matrix_coefficients )
1679 {
1680     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1681     int         i_vertical_scaling;                 /* vertical scaling type */
1682     int         i_x, i_y;                 /* horizontal and vertical indexes */
1683     int         i_scale_count;                       /* scale modulo counter */
1684     int         i_uval, i_vval;                           /* U and V samples */
1685     int         i_red, i_green, i_blue;          /* U and V modified samples */
1686     int         i_chroma_width;                              /* chroma width */
1687     u32 *       p_yuv;                              /* base conversion table */
1688     u32 *       p_ybase;                     /* Y dependant conversion table */
1689     u32 *       p_pic_start;       /* beginning of the current line for copy */
1690     u32 *       p_buffer_start;                   /* conversion buffer start */
1691     u32 *       p_buffer;                       /* conversion buffer pointer */
1692     int *       p_offset_start;                        /* offset array start */
1693     int *       p_offset;                            /* offset array pointer */
1694
1695     /*
1696      * Initialize some values  - i_pic_line_width will store the line skip
1697      */
1698     i_pic_line_width -= i_pic_width;
1699     i_chroma_width =    i_width / 2;
1700     p_yuv =             p_vout->yuv.yuv.p_rgb32;
1701     p_buffer_start =    p_vout->yuv.p_buffer;
1702     p_offset_start =    p_vout->yuv.p_offset;
1703     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1704                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1705
1706     /*
1707      * Perform conversion
1708      */
1709     i_scale_count = i_pic_height;
1710     for( i_y = 0; i_y < i_height; i_y++ )
1711     {
1712         /* Mark beginnning of line for possible later line copy, and initialize
1713          * buffer */
1714         p_pic_start =   p_pic;
1715         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1716
1717         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1718          * pixels wide blocks */
1719         for( i_x = i_width / 16; i_x--;  )
1720         {
1721             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1722             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1723             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1724             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1725             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1726             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1727             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1728             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1729         }
1730
1731         /* Do horizontal and vertical scaling */
1732         SCALE_WIDTH;
1733         SCALE_HEIGHT(420, 4);
1734     }
1735 }
1736
1737 /*****************************************************************************
1738  * ConvertYUV422RGB32: color YUV 4:2:2 to RGB 4 Bpp
1739  *****************************************************************************/
1740 static void ConvertYUV422RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1741                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1742                                 int i_matrix_coefficients )
1743 {
1744     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1745     int         i_vertical_scaling;                 /* vertical scaling type */
1746     int         i_x, i_y;                 /* horizontal and vertical indexes */
1747     int         i_scale_count;                       /* scale modulo counter */
1748     int         i_uval, i_vval;                           /* U and V samples */
1749     int         i_red, i_green, i_blue;          /* U and V modified samples */
1750     int         i_chroma_width;                              /* chroma width */
1751     u32 *       p_yuv;                              /* base conversion table */
1752     u32 *       p_ybase;                     /* Y dependant conversion table */
1753     u32 *       p_pic_start;       /* beginning of the current line for copy */
1754     u32 *       p_buffer_start;                   /* conversion buffer start */
1755     u32 *       p_buffer;                       /* conversion buffer pointer */
1756     int *       p_offset_start;                        /* offset array start */
1757     int *       p_offset;                            /* offset array pointer */
1758
1759     /*
1760      * Initialize some values  - i_pic_line_width will store the line skip
1761      */
1762     i_pic_line_width -= i_pic_width;
1763     i_chroma_width =    i_width / 2;
1764     p_yuv =             p_vout->yuv.yuv.p_rgb32;
1765     p_buffer_start =    p_vout->yuv.p_buffer;
1766     p_offset_start =    p_vout->yuv.p_offset;
1767     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1768                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1769
1770     /*
1771      * Perform conversion
1772      */
1773     i_scale_count = i_pic_height;
1774     for( i_y = 0; i_y < i_height; i_y++ )
1775     {
1776         /* Mark beginnning of line for possible later line copy, and initialize
1777          * buffer */
1778         p_pic_start =   p_pic;
1779         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1780
1781         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1782          * pixels wide blocks */
1783         for( i_x = i_width / 16; i_x--;  )
1784         {
1785             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1786             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1787             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1788             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1789             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1790             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1791             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1792             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1793         }
1794
1795         /* Do horizontal and vertical scaling */
1796         SCALE_WIDTH;
1797         SCALE_HEIGHT(422, 4);
1798     }
1799 }
1800
1801 /*****************************************************************************
1802  * ConvertYUV444RGB32: color YUV 4:4:4 to RGB 4 Bpp
1803  *****************************************************************************/
1804 static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1805                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1806                                 int i_matrix_coefficients )
1807 {
1808     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1809     int         i_vertical_scaling;                 /* vertical scaling type */
1810     int         i_x, i_y;                 /* horizontal and vertical indexes */
1811     int         i_scale_count;                       /* scale modulo counter */
1812     int         i_uval, i_vval;                           /* U and V samples */
1813     int         i_red, i_green, i_blue;          /* U and V modified samples */
1814     int         i_chroma_width;                    /* chroma width, not used */
1815     u32 *       p_yuv;                              /* base conversion table */
1816     u32 *       p_ybase;                     /* Y dependant conversion table */
1817     u32 *       p_pic_start;       /* beginning of the current line for copy */
1818     u32 *       p_buffer_start;                   /* conversion buffer start */
1819     u32 *       p_buffer;                       /* conversion buffer pointer */
1820     int *       p_offset_start;                        /* offset array start */
1821     int *       p_offset;                            /* offset array pointer */
1822
1823     /*
1824      * Initialize some values  - i_pic_line_width will store the line skip
1825      */
1826     i_pic_line_width -= i_pic_width;
1827     p_yuv =             p_vout->yuv.yuv.p_rgb32;
1828     p_buffer_start =    p_vout->yuv.p_buffer;
1829     p_offset_start =    p_vout->yuv.p_offset;
1830     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1831                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1832
1833     /*
1834      * Perform conversion
1835      */
1836     i_scale_count = i_pic_height;
1837     for( i_y = 0; i_y < i_height; i_y++ )
1838     {
1839         /* Mark beginnning of line for possible later line copy, and initialize
1840          * buffer */
1841         p_pic_start =   p_pic;
1842         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1843
1844         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1845          * pixels wide blocks */
1846         for( i_x = i_width / 16; i_x--;  )
1847         {
1848             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1849             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1850             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1851             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1852             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1853             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1854             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1855             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1856         }
1857
1858         /* Do horizontal and vertical scaling */
1859         SCALE_WIDTH;
1860         SCALE_HEIGHT(444, 4);
1861     }
1862 }
1863
1864 /*-------------------- walken code follows ----------------------------------*/
1865
1866 /*
1867  * YUV to RGB routines.
1868  *
1869  * these routines calculate r, g and b values from each pixel's y, u and v.
1870  * these r, g an b values are then passed thru a table lookup to take the
1871  * gamma curve into account and find the corresponding pixel value.
1872  *
1873  * the table must store more than 3*256 values because of the possibility
1874  * of overflow in the yuv->rgb calculation. actually the calculated r,g,b
1875  * values are in the following intervals :
1876  * -176 to 255+176 for red
1877  * -133 to 255+133 for green
1878  * -222 to 255+222 for blue
1879  *
1880  * If the input y,u,v values are right, the r,g,b results are not expected
1881  * to move out of the 0 to 255 interval but who knows what will happen in
1882  * real use...
1883  *
1884  * the red, green and blue conversion tables are stored in a single 1935-entry
1885  * array. The respective positions of each component in the array have been
1886  * calculated to minimize the cache interactions of the 3 tables.
1887  */
1888
1889 #if 0
1890 /* XXX?? */
1891 static void yuvToRgb24 (unsigned char * Y,
1892                         unsigned char * U, unsigned char * V,
1893                         char * dest, int table[1935], int width)
1894 {
1895     int i;
1896     int u;
1897     int v;
1898     int uvRed;
1899     int uvGreen;
1900     int uvBlue;
1901     int * tableY;
1902     int tmp24;
1903
1904     i = width >> 3;
1905     while (i--) {
1906         u = *(U++);
1907         v = *(V++);
1908         uvRed = (V_RED_COEF*v) >> SHIFT;
1909         uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
1910         uvBlue = (U_BLUE_COEF*u) >> SHIFT;
1911
1912         tableY = table + *(Y++);
1913         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1914                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1915                         uvGreen] |
1916                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1917         *(dest++) = tmp24;
1918         *(dest++) = tmp24 >> 8;
1919         *(dest++) = tmp24 >> 16;
1920
1921         tableY = table + *(Y++);
1922         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1923                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1924                         uvGreen] |
1925                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1926         *(dest++) = tmp24;
1927         *(dest++) = tmp24 >> 8;
1928         *(dest++) = tmp24 >> 16;
1929
1930         u = *(U++);
1931         v = *(V++);
1932         uvRed = (V_RED_COEF*v) >> SHIFT;
1933         uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
1934         uvBlue = (U_BLUE_COEF*u) >> SHIFT;
1935
1936         tableY = table + *(Y++);
1937         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1938                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1939                         uvGreen] |
1940                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1941         *(dest++) = tmp24;
1942         *(dest++) = tmp24 >> 8;
1943         *(dest++) = tmp24 >> 16;
1944
1945         tableY = table + *(Y++);
1946         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1947                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1948                         uvGreen] |
1949                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1950         *(dest++) = tmp24;
1951         *(dest++) = tmp24 >> 8;
1952         *(dest++) = tmp24 >> 16;
1953
1954         u = *(U++);
1955         v = *(V++);
1956         uvRed = (V_RED_COEF*v) >> SHIFT;
1957         uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
1958         uvBlue = (U_BLUE_COEF*u) >> SHIFT;
1959
1960         tableY = table + *(Y++);
1961         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1962                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1963                         uvGreen] |
1964                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1965         *(dest++) = tmp24;
1966         *(dest++) = tmp24 >> 8;
1967         *(dest++) = tmp24 >> 16;
1968
1969         tableY = table + *(Y++);
1970         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1971                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1972                         uvGreen] |
1973                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1974         *(dest++) = tmp24;
1975         *(dest++) = tmp24 >> 8;
1976         *(dest++) = tmp24 >> 16;
1977
1978         u = *(U++);
1979         v = *(V++);
1980         uvRed = (V_RED_COEF*v) >> SHIFT;
1981         uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
1982         uvBlue = (U_BLUE_COEF*u) >> SHIFT;
1983
1984         tableY = table + *(Y++);
1985         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1986                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1987                         uvGreen] |
1988                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1989         *(dest++) = tmp24;
1990         *(dest++) = tmp24 >> 8;
1991         *(dest++) = tmp24 >> 16;
1992
1993         tableY = table + *(Y++);
1994         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1995                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1996                         uvGreen] |
1997                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1998         *(dest++) = tmp24;
1999         *(dest++) = tmp24 >> 8;
2000         *(dest++) = tmp24 >> 16;
2001     }
2002
2003     i = (width & 7) >> 1;
2004     while (i--) {
2005         u = *(U++);
2006         v = *(V++);
2007         uvRed = (V_RED_COEF*v) >> SHIFT;
2008         uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
2009         uvBlue = (U_BLUE_COEF*u) >> SHIFT;
2010
2011         tableY = table + *(Y++);
2012         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
2013                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
2014                         uvGreen] |
2015                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
2016         *(dest++) = tmp24;
2017         *(dest++) = tmp24 >> 8;
2018         *(dest++) = tmp24 >> 16;
2019
2020         tableY = table + *(Y++);
2021         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
2022                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
2023                         uvGreen] |
2024                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
2025         *(dest++) = tmp24;
2026         *(dest++) = tmp24 >> 8;
2027         *(dest++) = tmp24 >> 16;
2028     }
2029
2030     if (width & 1) {
2031         u = *(U++);
2032         v = *(V++);
2033         uvRed = (V_RED_COEF*v) >> SHIFT;
2034         uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
2035         uvBlue = (U_BLUE_COEF*u) >> SHIFT;
2036
2037         tableY = table + *(Y++);
2038         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
2039                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
2040                         uvGreen] |
2041                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
2042         *(dest++) = tmp24;
2043         *(dest++) = tmp24 >> 8;
2044         *(dest++) = tmp24 >> 16;
2045     }
2046 }
2047 #endif