]> git.sesse.net Git - vlc/blob - src/video_output/video_yuv.c
Petites optims dans SCALE_HEIGHT pour les images de hauteur r�duite.
[vlc] / src / video_output / video_yuv.c
1 /*****************************************************************************
2  * video_yuv.c: YUV transformation functions
3  * Provides functions to perform the YUV conversion. The functions provided here
4  * are a complete and portable C implementation, and may be replaced in certain
5  * case by optimized functions.
6  *****************************************************************************
7  * Copyright (C) 1999, 2000 VideoLAN
8  *
9  * Authors:
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version.
15  * 
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
24  *****************************************************************************/
25
26 /*****************************************************************************
27  * Preamble
28  *****************************************************************************/
29 #include "defs.h"
30
31 #include <math.h>                                            /* exp(), pow() */
32 #include <errno.h>                                                 /* ENOMEM */
33 #include <stdlib.h>                                                /* free() */
34 #include <string.h>                                            /* strerror() */
35
36 #include "config.h"
37 #include "common.h"
38 #include "threads.h"
39 #include "mtime.h"
40 #include "plugins.h"
41 #include "video.h"
42 #include "video_output.h"
43 #include "video_yuv.h"
44
45 #include "intf_msg.h"
46
47 /*****************************************************************************
48  * Constants
49  *****************************************************************************/
50
51 /* Margins and offsets in conversion tables - Margins are used in case a RGB
52  * RGB conversion would give a value outside the 0-255 range. Offsets have been
53  * calculated to avoid using the same cache line for 2 tables. conversion tables
54  * are 2*MARGIN + 256 long and stores pixels.*/
55 #define RED_MARGIN      178
56 #define GREEN_MARGIN    135
57 #define BLUE_MARGIN     224
58 #define RED_OFFSET      1501                                 /* 1323 to 1935 */
59 #define GREEN_OFFSET    135                                      /* 0 to 526 */
60 #define BLUE_OFFSET     818                                   /* 594 to 1298 */
61 #define RGB_TABLE_SIZE  1935                             /* total table size */
62
63 #define GRAY_MARGIN     384
64 #define GRAY_TABLE_SIZE 1024                             /* total table size */
65
66 #define PALETTE_TABLE_SIZE 2176          /* YUV -> 8bpp palette lookup table */
67
68 /* macros used for YUV pixel conversions */
69 #define SHIFT 20
70 #define U_GREEN_COEF    ((int)(-0.391 * (1<<SHIFT) / 1.164))
71 #define U_BLUE_COEF     ((int)(2.018 * (1<<SHIFT) / 1.164))
72 #define V_RED_COEF      ((int)(1.596 * (1<<SHIFT) / 1.164))
73 #define V_GREEN_COEF    ((int)(-0.813 * (1<<SHIFT) / 1.164))
74
75 #define MMX
76 #ifdef MMX
77 /* hope these constant values are cache line aligned */
78 static unsigned long long mmx_80w     = 0x0080008000800080;
79 static unsigned long long mmx_10w     = 0x1010101010101010;
80 static unsigned long long mmx_00ffw   = 0x00ff00ff00ff00ff;
81 static unsigned long long mmx_Y_coeff = 0x253f253f253f253f;
82
83 /* hope these constant values are cache line aligned */
84 static unsigned long long mmx_U_green = 0xf37df37df37df37d;
85 static unsigned long long mmx_U_blue  = 0x4093409340934093;
86 static unsigned long long mmx_V_red   = 0x3312331233123312;
87 static unsigned long long mmx_V_green = 0xe5fce5fce5fce5fc;
88
89 /* hope these constant values are cache line aligned */
90 static unsigned long long mmx_redmask = 0xf8f8f8f8f8f8f8f8;
91 static unsigned long long mmx_grnmask = 0xfcfcfcfcfcfcfcfc;
92 static unsigned long long mmx_grnshift   = 0x03;
93 static unsigned long long mmx_blueshift  = 0x03;
94 #endif
95
96 /*****************************************************************************
97  * Local prototypes
98  *****************************************************************************/
99 static void     SetGammaTable     ( int *pi_table, double f_gamma );
100 static void     SetYUV            ( vout_thread_t *p_vout );
101 static void     SetOffset         ( int i_width, int i_height, int i_pic_width, int i_pic_height,
102                                     boolean_t *pb_h_scaling, int *pi_v_scaling, int *p_offset );
103
104 static void     ConvertY4Gray8    ( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
105                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
106                                     int i_matrix_coefficients );
107 static void     ConvertY4Gray16   ( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
108                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
109                                     int i_matrix_coefficients );
110 static void     ConvertY4Gray24   ( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
111                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
112                                     int i_matrix_coefficients );
113 static void     ConvertY4Gray32   ( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
114                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
115                                     int i_matrix_coefficients );
116 static void     ConvertYUV420RGB8 ( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
117                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
118                                     int i_matrix_coefficients );
119 static void     ConvertYUV422RGB8 ( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
120                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
121                                     int i_matrix_coefficients );
122 static void     ConvertYUV444RGB8 ( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
123                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
124                                     int i_matrix_coefficients );
125 static void     ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
126                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
127                                     int i_matrix_coefficients );
128 static void     ConvertYUV422RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
129                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
130                                     int i_matrix_coefficients );
131 static void     ConvertYUV444RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
132                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
133                                     int i_matrix_coefficients );
134 static void     ConvertYUV420RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
135                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
136                                     int i_matrix_coefficients );
137 static void     ConvertYUV422RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
138                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
139                                     int i_matrix_coefficients );
140 static void     ConvertYUV444RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
141                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
142                                     int i_matrix_coefficients );
143 static void     ConvertYUV420RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
144                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
145                                     int i_matrix_coefficients );
146 static void     ConvertYUV422RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
147                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
148                                     int i_matrix_coefficients );
149 static void     ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
150                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
151                                     int i_matrix_coefficients );
152
153 /*****************************************************************************
154  * CONVERT_YUV_PIXEL, CONVERT_Y_PIXEL: pixel conversion blocks
155  *****************************************************************************
156  * These conversion routines are used by YUV conversion functions.
157  * conversion are made from p_y, p_u, p_v, which are modified, to p_buffer,
158  * which is also modified.
159  *****************************************************************************/
160 #define CONVERT_Y_PIXEL( BPP )                                                \
161     /* Only Y sample is present */                                            \
162     p_ybase = p_yuv + *p_y++;                                                 \
163     *p_buffer++ = p_ybase[RED_OFFSET-((V_RED_COEF*128)>>SHIFT) + i_red] |     \
164         p_ybase[GREEN_OFFSET-(((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT)       \
165         + i_green ] | p_ybase[BLUE_OFFSET-((U_BLUE_COEF*128)>>SHIFT) + i_blue];
166
167 #define CONVERT_YUV_PIXEL( BPP )                                              \
168     /* Y, U and V samples are present */                                      \
169     i_uval =    *p_u++;                                                       \
170     i_vval =    *p_v++;                                                       \
171     i_red =     (V_RED_COEF * i_vval) >> SHIFT;                               \
172     i_green =   (U_GREEN_COEF * i_uval + V_GREEN_COEF * i_vval) >> SHIFT;     \
173     i_blue =    (U_BLUE_COEF * i_uval) >> SHIFT;                              \
174     CONVERT_Y_PIXEL( BPP )                                                    \
175
176 /*****************************************************************************
177  * CONVERT_4YUV_PIXELS, CONVERT_4YUV_PIXELS_SCALE: dither 4 pixels in 8 bpp
178  *****************************************************************************
179  * These macros dither 4 pixels in 8 bpp, with or without horiz. scaling
180  *****************************************************************************/
181 #define CONVERT_4YUV_PIXELS( CHROMA )                                         \
182     *p_pic++ = p_lookup[                                                      \
183         (((*p_y++ + dither10[i_real_y]) >> 4) << 7)                           \
184       + ((*p_u + dither20[i_real_y]) >> 5) * 9                                \
185       + ((*p_v + dither20[i_real_y]) >> 5) ];                                 \
186     *p_pic++ = p_lookup[                                                      \
187         (((*p_y++ + dither11[i_real_y]) >> 4) << 7)                           \
188       + ((*p_u++ + dither21[i_real_y]) >> 5) * 9                              \
189       + ((*p_v++ + dither21[i_real_y]) >> 5) ];                               \
190     *p_pic++ = p_lookup[                                                      \
191         (((*p_y++ + dither12[i_real_y]) >> 4) << 7)                           \
192       + ((*p_u + dither22[i_real_y]) >> 5) * 9                                \
193       + ((*p_v + dither22[i_real_y]) >> 5) ];                                 \
194     *p_pic++ = p_lookup[                                                      \
195         (((*p_y++ + dither13[i_real_y]) >> 4) << 7)                           \
196       + ((*p_u++ + dither23[i_real_y]) >> 5) * 9                              \
197       + ((*p_v++ + dither23[i_real_y]) >> 5) ];                               \
198
199 #define CONVERT_4YUV_PIXELS_SCALE( CHROMA )                                   \
200     *p_pic++ = p_lookup[                                                      \
201         (((*p_y + dither10[i_real_y]) >> 4) << 7)                             \
202         + ((*p_u + dither20[i_real_y])   >> 5) * 9                            \
203         + ((*p_v + dither20[i_real_y])   >> 5) ];                             \
204     b_jump_uv += *p_offset;                                                   \
205     p_y += *p_offset;                                                         \
206     p_u += *p_offset   & b_jump_uv;                                           \
207     p_v += *p_offset++ & b_jump_uv;                                           \
208     *p_pic++ = p_lookup[                                                      \
209         (((*p_y + dither11[i_real_y]) >> 4) << 7)                             \
210         + ((*p_u + dither21[i_real_y])   >> 5) * 9                            \
211         + ((*p_v + dither21[i_real_y])   >> 5) ];                             \
212     b_jump_uv += *p_offset;                                                   \
213     p_y += *p_offset;                                                         \
214     p_u += *p_offset   & b_jump_uv;                                           \
215     p_v += *p_offset++ & b_jump_uv;                                           \
216     *p_pic++ = p_lookup[                                                      \
217         (((*p_y + dither12[i_real_y]) >> 4) << 7)                             \
218         + ((*p_u + dither22[i_real_y])   >> 5) * 9                            \
219         + ((*p_v + dither22[i_real_y])   \f>> 5) ];                             \
220     b_jump_uv += *p_offset;                                                   \
221     p_y += *p_offset;                                                         \
222     p_u += *p_offset   & b_jump_uv;                                           \
223     p_v += *p_offset++ & b_jump_uv;                                           \
224     *p_pic++ = p_lookup[                                                      \
225         (((*p_y + dither13[i_real_y]) >> 4) << 7)                             \
226         + ((*p_u + dither23[i_real_y])   >> 5) * 9                            \
227         + ((*p_v + dither23[i_real_y])   >> 5) ];                             \
228     b_jump_uv += *p_offset;                                                   \
229     p_y += *p_offset;                                                         \
230     p_u += *p_offset   & b_jump_uv;                                           \
231     p_v += *p_offset++ & b_jump_uv;                                           \
232
233 /*****************************************************************************
234  * SCALE_WIDTH: scale a line horizontally
235  *****************************************************************************
236  * This macro scales a line using rendering buffer and offset array. It works
237  * for 1, 2 and 4 Bpp.
238  *****************************************************************************/
239 #define SCALE_WIDTH                                                           \
240     if( b_horizontal_scaling )                                                \
241     {                                                                         \
242         /* Horizontal scaling, conversion has been done to buffer.            \
243          * Rewind buffer and offset, then copy and scale line */              \
244         p_buffer = p_buffer_start;                                            \
245         p_offset = p_offset_start;                                            \
246         for( i_x = i_pic_width / 16; i_x--; )                                 \
247         {                                                                     \
248             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
249             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
250             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
251             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
252             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
253             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
254             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
255             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
256             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
257             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
258             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
259             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
260             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
261             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
262             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
263             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
264         }                                                                     \
265         p_pic += i_pic_line_width;                                            \
266     }                                                                         \
267     else                                                                      \
268     {                                                                         \
269         /* No scaling, conversion has been done directly in picture memory.   \
270          * Increment of picture pointer to end of line is still needed */     \
271         p_pic += i_pic_width + i_pic_line_width;                              \
272     }                                                                         \
273
274
275 /*****************************************************************************
276  * SCALE_WIDTH_DITHER: scale a line horizontally for dithered 8 bpp
277  *****************************************************************************
278  * This macro scales a line using an offset array.
279  *****************************************************************************/
280 #define SCALE_WIDTH_DITHER( CHROMA )                                          \
281     if( b_horizontal_scaling )                                                \
282     {                                                                         \
283         /* Horizontal scaling, but we can't use a buffer due to dither */     \
284         p_offset = p_offset_start;                                            \
285         b_jump_uv = 0;                                                        \
286         for( i_x = i_pic_width / 16; i_x--; )                                 \
287         {                                                                     \
288             CONVERT_4YUV_PIXELS_SCALE( CHROMA )                               \
289             CONVERT_4YUV_PIXELS_SCALE( CHROMA )                               \
290             CONVERT_4YUV_PIXELS_SCALE( CHROMA )                               \
291             CONVERT_4YUV_PIXELS_SCALE( CHROMA )                               \
292         }                                                                     \
293     }                                                                         \
294     else                                                                      \
295     {                                                                         \
296         for( i_x = i_width / 16; i_x--;  )                                    \
297         {                                                                     \
298             CONVERT_4YUV_PIXELS( CHROMA )                                     \
299             CONVERT_4YUV_PIXELS( CHROMA )                                     \
300             CONVERT_4YUV_PIXELS( CHROMA )                                     \
301             CONVERT_4YUV_PIXELS( CHROMA )                                     \
302         }                                                                     \
303     }                                                                         \
304     /* Increment of picture pointer to end of line is still needed */         \
305     p_pic += i_pic_line_width;                                                \
306     i_real_y = (i_real_y + 1) & 0x3;                                          \
307
308 /*****************************************************************************
309  * SCALE_HEIGHT: handle vertical scaling
310  *****************************************************************************
311  * This macro handle vertical scaling for a picture. CHROMA may be 420, 422 or
312  * 444 for RGB conversion, or 400 for gray conversion. It works for 1, 2, 3
313  * and 4 Bpp.
314  *****************************************************************************/
315 #define SCALE_HEIGHT( CHROMA, BPP )                                           \
316     /*                                                                        \
317      * Handle vertical scaling. The current line can be copied or next one    \
318      * can be ignored.                                                        \
319      */                                                                       \
320     switch( i_vertical_scaling )                                              \
321     {                                                                         \
322     case -1:                             /* vertical scaling factor is < 1 */ \
323         if( i_y & 0x1 )                                                       \
324         {                                                                     \
325             while( (i_scale_count -= i_pic_height) >= 0 )                     \
326             {                                                                 \
327             /* Height reduction: skip next source line */                     \
328                 p_y += i_width;                                               \
329                 if( (CHROMA == 420) || (CHROMA == 422) )                      \
330                 {                                                             \
331                     if( (i_scale_count -= i_pic_height) >= 0 )                \
332                     {                                                         \
333                         p_y += i_width;                                       \
334                         i_y+=2;                                               \
335                         p_u += i_chroma_width;                                \
336                         p_v += i_chroma_width;                                \
337                         continue;                                             \
338                     }                                                         \
339                     else                                                      \
340                     {                                                         \
341                         i_y++;                                                \
342                         break;                                                \
343                     }                                                         \
344                 }                                                             \
345                 else if( CHROMA == 444 )                                      \
346                 {                                                             \
347                     i_y++;                                                    \
348                     p_u += i_width;                                           \
349                     p_v += i_width;                                           \
350                 }                                                             \
351             }                                                                 \
352         }                                                                     \
353         else                                                                  \
354         {                                                                     \
355             if( CHROMA == 420 || CHROMA == 422 )                              \
356             {                                                                 \
357                 p_u -= i_chroma_width;                                        \
358                 p_v -= i_chroma_width;                                        \
359             }                                                                 \
360             while( (i_scale_count -= i_pic_height) >= 0 )                     \
361             {                                                                 \
362             /* Height reduction: skip next source line */                     \
363                 p_y += i_width;                                               \
364                 if( (CHROMA == 420) || (CHROMA == 422) )                      \
365                 {                                                             \
366                     p_u += i_chroma_width;                                    \
367                     p_v += i_chroma_width;                                    \
368                     if( (i_scale_count -= i_pic_height) >= 0 )                \
369                     {                                                         \
370                         p_y += i_width;                                       \
371                         i_y+=2;                                               \
372                         continue;                                             \
373                     }                                                         \
374                     else                                                      \
375                     {                                                         \
376                         i_y++;                                                \
377                         break;                                                \
378                     }                                                         \
379                 }                                                             \
380                 else if( CHROMA == 444 )                                      \
381                 {                                                             \
382                     i_y++;                                                    \
383                     p_u += i_width;                                           \
384                     p_v += i_width;                                           \
385                 }                                                             \
386             }                                                                 \
387         }                                                                     \
388         i_scale_count += i_height;                                            \
389         break;                                                                \
390     case 1:                              /* vertical scaling factor is > 1 */ \
391         while( (i_scale_count -= i_height) > 0 )                              \
392         {                                                                     \
393             /* Height increment: copy previous picture line */                \
394             for( i_x = i_pic_width >> 4; i_x--; )                             \
395             {                                                                 \
396                 *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );           \
397                 *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );           \
398                 if( BPP > 1 )                               /* 2, 3, 4 Bpp */ \
399                 {                                                             \
400                     *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
401                     *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
402                 }                                                             \
403                 if( BPP > 2 )                                  /* 3, 4 Bpp */ \
404                 {                                                             \
405                     *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
406                     *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
407                 }                                                             \
408                 if( BPP > 3 )                                     /* 4 Bpp */ \
409                 {                                                             \
410                     *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
411                     *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
412                 }                                                             \
413             }                                                                 \
414             p_pic +=        i_pic_line_width;                                 \
415             p_pic_start +=  i_pic_line_width;                                 \
416         }                                                                     \
417         i_scale_count += i_pic_height;                                        \
418         break;                                                                \
419     }                                                                         \
420
421 /*****************************************************************************
422  * SCALE_HEIGHT_DITHER: handle vertical scaling for dithered 8 bpp
423  *****************************************************************************
424  * This macro handles vertical scaling for a picture. CHROMA may be 420, 422 or
425  * 444 for RGB conversion, or 400 for gray conversion.
426  *****************************************************************************/
427 #define SCALE_HEIGHT_DITHER( CHROMA )                                         \
428                                                                               \
429     /* If line is odd, rewind 4:2:0 U and V samples */                        \
430     if( ((CHROMA == 420) || (CHROMA == 422)) && !(i_y & 0x1) )                \
431     {                                                                         \
432         p_u -= i_chroma_width;                                                \
433         p_v -= i_chroma_width;                                                \
434     }                                                                         \
435                                                                               \
436     /*                                                                        \
437      * Handle vertical scaling. The current line can be copied or next one    \
438      * can be ignored.                                                        \
439      */                                                                       \
440                                                                               \
441     switch( i_vertical_scaling )                                              \
442     {                                                                         \
443     case -1:                             /* vertical scaling factor is < 1 */ \
444         while( (i_scale_count -= i_pic_height) >= 0 )                         \
445         {                                                                     \
446             /* Height reduction: skip next source line */                     \
447             p_y += i_width;                                                   \
448             i_y++;                                                            \
449             if( (CHROMA == 420) || (CHROMA == 422) )                          \
450             {                                                                 \
451                 if( i_y & 0x1 )                                               \
452                 {                                                             \
453                     p_u += i_chroma_width;                                    \
454                     p_v += i_chroma_width;                                    \
455                 }                                                             \
456             }                                                                 \
457             else if( CHROMA == 444 )                                          \
458             {                                                                 \
459                 p_u += i_width;                                               \
460                 p_v += i_width;                                               \
461             }                                                                 \
462         }                                                                     \
463         i_scale_count += i_height;                                            \
464         break;                                                                \
465     case 1:                              /* vertical scaling factor is > 1 */ \
466         while( (i_scale_count -= i_height) > 0 )                              \
467         {                                                                     \
468             SCALE_WIDTH_DITHER( CHROMA );                                     \
469             p_y -= i_width;                                                   \
470             p_u -= i_chroma_width;                                            \
471             p_v -= i_chroma_width;                                            \
472             p_pic +=        i_pic_line_width;                                 \
473         }                                                                     \
474         i_scale_count += i_pic_height;                                        \
475         break;                                                                \
476     }                                                                         \
477
478 /*****************************************************************************
479  * vout_InitYUV: allocate and initialize translations tables
480  *****************************************************************************
481  * This function will allocate memory to store translation tables, depending
482  * of the screen depth.
483  *****************************************************************************/
484 int vout_InitYUV( vout_thread_t *p_vout )
485 {
486     size_t      tables_size;                        /* tables size, in bytes */
487
488     /* Computes tables size - 3 Bpp use 32 bits pixel entries in tables */
489     switch( p_vout->i_bytes_per_pixel )
490     {
491     case 1:
492         tables_size = sizeof( u8 ) * (p_vout->b_grayscale ? GRAY_TABLE_SIZE : PALETTE_TABLE_SIZE);
493         break;
494     case 2:
495         tables_size = sizeof( u16 ) * (p_vout->b_grayscale ? GRAY_TABLE_SIZE : RGB_TABLE_SIZE);
496         break;
497     case 3:
498     case 4:
499     default:
500         tables_size = sizeof( u32 ) * (p_vout->b_grayscale ? GRAY_TABLE_SIZE : RGB_TABLE_SIZE);
501         break;
502     }
503
504     /* Allocate memory */
505     p_vout->yuv.p_base = malloc( tables_size );
506     if( p_vout->yuv.p_base == NULL )
507     {
508         intf_ErrMsg("error: %s\n", strerror(ENOMEM));
509         return( 1 );
510     }
511
512     /* Allocate memory for conversion buffer and offset array */
513     p_vout->yuv.p_buffer = malloc( VOUT_MAX_WIDTH * p_vout->i_bytes_per_pixel );
514     if( p_vout->yuv.p_buffer == NULL )
515     {
516         intf_ErrMsg("error: %s\n", strerror(ENOMEM));
517         free( p_vout->yuv.p_base );
518         return( 1 );
519     }
520     p_vout->yuv.p_offset = malloc( p_vout->i_width * sizeof( int ) );
521     if( p_vout->yuv.p_offset == NULL )
522     {
523         intf_ErrMsg("error: %s\n", strerror(ENOMEM));
524         free( p_vout->yuv.p_base );
525         free( p_vout->yuv.p_buffer );
526         return( 1 );
527     }
528
529     /* Initialize tables */
530     SetYUV( p_vout );
531     return( 0 );
532 }
533
534 /*****************************************************************************
535  * vout_ResetTables: re-initialize translations tables
536  *****************************************************************************
537  * This function will initialize the tables allocated by vout_CreateTables and
538  * set functions pointers.
539  *****************************************************************************/
540 int vout_ResetYUV( vout_thread_t *p_vout )
541 {
542     vout_EndYUV( p_vout );
543     return( vout_InitYUV( p_vout ) );
544 }
545
546 /*****************************************************************************
547  * vout_EndYUV: destroy translations tables
548  *****************************************************************************
549  * Free memory allocated by vout_CreateTables.
550  *****************************************************************************/
551 void vout_EndYUV( vout_thread_t *p_vout )
552 {
553     free( p_vout->yuv.p_base );
554     free( p_vout->yuv.p_buffer );
555     free( p_vout->yuv.p_offset );
556 }
557
558 /* following functions are local */
559
560 /*****************************************************************************
561  * SetGammaTable: return intensity table transformed by gamma curve.
562  *****************************************************************************
563  * pi_table is a table of 256 entries from 0 to 255.
564  *****************************************************************************/
565 static void SetGammaTable( int *pi_table, double f_gamma )
566 {
567     int         i_y;                                       /* base intensity */
568
569     /* Use exp(gamma) instead of gamma */
570     f_gamma = exp( f_gamma );
571
572     /* Build gamma table */
573     for( i_y = 0; i_y < 256; i_y++ )
574     {
575         pi_table[ i_y ] = pow( (double)i_y / 256, f_gamma ) * 256;
576     }
577  }
578
579 /*****************************************************************************
580  * SetYUV: compute tables and set function pointers
581 + *****************************************************************************/
582 static void SetYUV( vout_thread_t *p_vout )
583 {
584     int         pi_gamma[256];                                /* gamma table */
585     int         i_index;                                  /* index in tables */
586
587     /* Build gamma table */
588     SetGammaTable( pi_gamma, p_vout->f_gamma );
589
590     /*
591      * Set pointers and build YUV tables
592      */
593     if( p_vout->b_grayscale )
594     {
595         /* Grayscale: build gray table */
596         switch( p_vout->i_bytes_per_pixel )
597         {
598         case 1:
599             {
600                 u16 bright[256], transp[256];
601
602                 p_vout->yuv.yuv.p_gray8 =  (u8 *)p_vout->yuv.p_base + GRAY_MARGIN;
603                 for( i_index = 0; i_index < GRAY_MARGIN; i_index++ )
604                 {
605                     p_vout->yuv.yuv.p_gray8[ -i_index ] =      RGB2PIXEL( p_vout, pi_gamma[0], pi_gamma[0], pi_gamma[0] );
606                     p_vout->yuv.yuv.p_gray8[ 256 + i_index ] = RGB2PIXEL( p_vout, pi_gamma[255], pi_gamma[255], pi_gamma[255] );
607                 }
608                 for( i_index = 0; i_index < 256; i_index++)
609                 {
610                     p_vout->yuv.yuv.p_gray8[ i_index ] = pi_gamma[ i_index ];
611                     bright[ i_index ] = i_index << 8;
612                     transp[ i_index ] = 0;
613                 }
614                 /* the colors have been allocated, we can set the palette */
615                 p_vout->p_set_palette( p_vout, bright, bright, bright, transp );
616                 p_vout->i_white_pixel = 0xff;
617                 p_vout->i_black_pixel = 0x00;
618                 p_vout->i_gray_pixel = 0x44;
619                 p_vout->i_blue_pixel = 0x3b;
620
621                 break;
622             }
623         case 2:
624             p_vout->yuv.yuv.p_gray16 =  (u16 *)p_vout->yuv.p_base + GRAY_MARGIN;
625             for( i_index = 0; i_index < GRAY_MARGIN; i_index++ )
626             {
627                 p_vout->yuv.yuv.p_gray16[ -i_index ] =      RGB2PIXEL( p_vout, pi_gamma[0], pi_gamma[0], pi_gamma[0] );
628                 p_vout->yuv.yuv.p_gray16[ 256 + i_index ] = RGB2PIXEL( p_vout, pi_gamma[255], pi_gamma[255], pi_gamma[255] );
629             }
630             for( i_index = 0; i_index < 256; i_index++)
631             {
632                 p_vout->yuv.yuv.p_gray16[ i_index ] = RGB2PIXEL( p_vout, pi_gamma[i_index], pi_gamma[i_index], pi_gamma[i_index] );
633             }
634             break;
635         case 3:
636         case 4:
637             p_vout->yuv.yuv.p_gray32 =  (u32 *)p_vout->yuv.p_base + GRAY_MARGIN;
638             for( i_index = 0; i_index < GRAY_MARGIN; i_index++ )
639             {
640                 p_vout->yuv.yuv.p_gray32[ -i_index ] =      RGB2PIXEL( p_vout, pi_gamma[0], pi_gamma[0], pi_gamma[0] );
641                 p_vout->yuv.yuv.p_gray32[ 256 + i_index ] = RGB2PIXEL( p_vout, pi_gamma[255], pi_gamma[255], pi_gamma[255] );
642             }
643             for( i_index = 0; i_index < 256; i_index++)
644             {
645                 p_vout->yuv.yuv.p_gray32[ i_index ] = RGB2PIXEL( p_vout, pi_gamma[i_index], pi_gamma[i_index], pi_gamma[i_index] );
646             }
647             break;
648          }
649     }
650     else
651     {
652         /* Color: build red, green and blue tables */
653         switch( p_vout->i_bytes_per_pixel )
654         {
655         case 1:
656             {
657                 #define RGB_MIN 0
658                 #define RGB_MAX 255
659                 #define CLIP( x ) ( ((x < 0) ? 0 : (x > 255) ? 255 : x) << 8 )
660
661                 int y,u,v;
662                 int r,g,b;
663                 int uvr, uvg, uvb;
664                 int i = 0, j = 0;
665                 u16 red[256], green[256], blue[256], transp[256];
666                 unsigned char lookup[PALETTE_TABLE_SIZE];
667
668                 p_vout->yuv.yuv.p_rgb8 = (u8 *)p_vout->yuv.p_base;
669
670                 /* this loop calculates the intersection of an YUV box
671                  * and the RGB cube. */
672                 for ( y = 0; y <= 256; y += 16 )
673                 {
674                     for ( u = 0; u <= 256; u += 32 )
675                     for ( v = 0; v <= 256; v += 32 )
676                     {
677                         uvr = (V_RED_COEF*(v-128)) >> SHIFT;
678                         uvg = (U_GREEN_COEF*(u-128) + V_GREEN_COEF*(v-128)) >> SHIFT;
679                         uvb = (U_BLUE_COEF*(u-128)) >> SHIFT;
680                         r = y + uvr;
681                         g = y + uvg;
682                         b = y + uvb;
683
684                         if( r >= RGB_MIN && g >= RGB_MIN && b >= RGB_MIN
685                                 && r <= RGB_MAX && g <= RGB_MAX && b <= RGB_MAX )
686                         {
687                             /* this one should never happen unless someone fscked up my code */
688                             if(j == 256) { intf_ErrMsg( "vout error: no colors left to build palette\n" ); break; }
689
690                             /* clip the colors */
691                             red[j] = CLIP( r );
692                             green[j] = CLIP( g );
693                             blue[j] = CLIP( b );
694                             transp[j] = 0;
695
696                             /* allocate color */
697                             lookup[i] = 1;
698                             p_vout->yuv.yuv.p_rgb8[i++] = j;
699                             j++;
700                         }
701                         else
702                         {
703                             lookup[i] = 0;
704                             p_vout->yuv.yuv.p_rgb8[i++] = 0;
705                         }
706                     }
707                     i += 128-81;
708                 }
709
710                 /* the colors have been allocated, we can set the palette */
711                 /* there will eventually be a way to know which colors
712                  * couldn't be allocated and try to find a replacement */
713                 p_vout->p_set_palette( p_vout, red, green, blue, transp );
714
715                 p_vout->i_white_pixel = 0xff;
716                 p_vout->i_black_pixel = 0x00;
717                 p_vout->i_gray_pixel = 0x44;
718                 p_vout->i_blue_pixel = 0x3b;
719
720                 i = 0;
721                 /* this loop allocates colors that got outside
722                  * the RGB cube */
723                 for ( y = 0; y <= 256; y += 16 )
724                 {
725                     for ( u = 0; u <= 256; u += 32 )
726                     for ( v = 0; v <= 256; v += 32 )
727                     {
728                         int u2, v2;
729                         int dist, mindist = 100000000;
730
731                         if( lookup[i] || y==0)
732                         {
733                             i++;
734                             continue;
735                         }
736
737                         /* heavy. yeah. */
738                         for( u2 = 0; u2 <= 256; u2 += 32 )
739                         for( v2 = 0; v2 <= 256; v2 += 32 )
740                         {
741                             j = ((y>>4)<<7) + (u2>>5)*9 + (v2>>5);
742                             dist = (u-u2)*(u-u2) + (v-v2)*(v-v2);
743                             if( lookup[j] )
744                             /* find the nearest color */
745                             if( dist < mindist )
746                             {
747                                 p_vout->yuv.yuv.p_rgb8[i] = p_vout->yuv.yuv.p_rgb8[j];
748                                 mindist = dist;
749                             }
750                             j -= 128;
751                             if( lookup[j] )
752                             /* find the nearest color */
753                             if( dist + 128 < mindist )
754                             {
755                                 p_vout->yuv.yuv.p_rgb8[i] = p_vout->yuv.yuv.p_rgb8[j];
756                                 mindist = dist + 128;
757                             }
758                         }
759                         i++;
760                     }
761                     i += 128-81;
762                 }
763
764                 break;
765             }
766         case 2:
767             p_vout->yuv.yuv.p_rgb16 = (u16 *)p_vout->yuv.p_base;
768             for( i_index = 0; i_index < RED_MARGIN; i_index++ )
769             {
770                 p_vout->yuv.yuv.p_rgb16[RED_OFFSET - RED_MARGIN + i_index] = RGB2PIXEL( p_vout, pi_gamma[0], 0, 0 );
771                 p_vout->yuv.yuv.p_rgb16[RED_OFFSET + 256 + i_index] =        RGB2PIXEL( p_vout, pi_gamma[255], 0, 0 );
772             }
773             for( i_index = 0; i_index < GREEN_MARGIN; i_index++ )
774             {
775                 p_vout->yuv.yuv.p_rgb16[GREEN_OFFSET - GREEN_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, pi_gamma[0], 0 );
776                 p_vout->yuv.yuv.p_rgb16[GREEN_OFFSET + 256 + i_index] =          RGB2PIXEL( p_vout, 0, pi_gamma[255], 0 );
777             }
778             for( i_index = 0; i_index < BLUE_MARGIN; i_index++ )
779             {
780                 p_vout->yuv.yuv.p_rgb16[BLUE_OFFSET - BLUE_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, 0, pi_gamma[0] );
781                 p_vout->yuv.yuv.p_rgb16[BLUE_OFFSET + BLUE_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, 0, pi_gamma[255] );
782             }
783             for( i_index = 0; i_index < 256; i_index++ )
784             {
785                 p_vout->yuv.yuv.p_rgb16[RED_OFFSET + i_index] =   RGB2PIXEL( p_vout, pi_gamma[ i_index ], 0, 0 );
786                 p_vout->yuv.yuv.p_rgb16[GREEN_OFFSET + i_index] = RGB2PIXEL( p_vout, 0, pi_gamma[ i_index ], 0 );
787                 p_vout->yuv.yuv.p_rgb16[BLUE_OFFSET + i_index] =  RGB2PIXEL( p_vout, 0, 0, pi_gamma[ i_index ] );
788             }
789             break;
790         case 3:
791         case 4:
792             p_vout->yuv.yuv.p_rgb32 = (u32 *)p_vout->yuv.p_base;
793             for( i_index = 0; i_index < RED_MARGIN; i_index++ )
794             {
795                 p_vout->yuv.yuv.p_rgb32[RED_OFFSET - RED_MARGIN + i_index] = RGB2PIXEL( p_vout, pi_gamma[0], 0, 0 );
796                 p_vout->yuv.yuv.p_rgb32[RED_OFFSET + 256 + i_index] =        RGB2PIXEL( p_vout, pi_gamma[255], 0, 0 );
797             }
798             for( i_index = 0; i_index < GREEN_MARGIN; i_index++ )
799             {
800                 p_vout->yuv.yuv.p_rgb32[GREEN_OFFSET - GREEN_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, pi_gamma[0], 0 );
801                 p_vout->yuv.yuv.p_rgb32[GREEN_OFFSET + 256 + i_index] =          RGB2PIXEL( p_vout, 0, pi_gamma[255], 0 );
802             }
803             for( i_index = 0; i_index < BLUE_MARGIN; i_index++ )
804             {
805                 p_vout->yuv.yuv.p_rgb32[BLUE_OFFSET - BLUE_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, 0, pi_gamma[0] );
806                 p_vout->yuv.yuv.p_rgb32[BLUE_OFFSET + BLUE_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, 0, pi_gamma[255] );
807             }
808             for( i_index = 0; i_index < 256; i_index++ )
809             {
810                 p_vout->yuv.yuv.p_rgb32[RED_OFFSET + i_index] =   RGB2PIXEL( p_vout, pi_gamma[ i_index ], 0, 0 );
811                 p_vout->yuv.yuv.p_rgb32[GREEN_OFFSET + i_index] = RGB2PIXEL( p_vout, 0, pi_gamma[ i_index ], 0 );
812                 p_vout->yuv.yuv.p_rgb32[BLUE_OFFSET + i_index] =  RGB2PIXEL( p_vout, 0, 0, pi_gamma[ i_index ] );
813             }
814             break;
815         }
816     }
817
818     /*
819      * Set functions pointers
820      */
821     if( p_vout->b_grayscale )
822     {
823         /* Grayscale */
824         switch( p_vout->i_bytes_per_pixel )
825         {
826         case 1:
827             p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray8;
828             p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray8;
829             p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray8;
830             break;
831         case 2:
832             p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray16;
833             p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray16;
834             p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray16;
835             break;
836         case 3:
837             p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray24;
838             p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray24;
839             p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray24;
840             break;
841         case 4:
842             p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray32;
843             p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray32;
844             p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray32;
845             break;
846         }
847     }
848     else
849     {
850         /* Color */
851         switch( p_vout->i_bytes_per_pixel )
852         {
853         case 1:
854             p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertYUV420RGB8;
855             p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertYUV422RGB8;
856             p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertYUV444RGB8;
857             break;
858         case 2:
859             p_vout->yuv.p_Convert420 =   (vout_yuv_convert_t *) ConvertYUV420RGB16;
860             p_vout->yuv.p_Convert422 =   (vout_yuv_convert_t *) ConvertYUV422RGB16;
861             p_vout->yuv.p_Convert444 =   (vout_yuv_convert_t *) ConvertYUV444RGB16;
862             break;
863         case 3:
864             p_vout->yuv.p_Convert420 =   (vout_yuv_convert_t *) ConvertYUV420RGB24;
865             p_vout->yuv.p_Convert422 =   (vout_yuv_convert_t *) ConvertYUV422RGB24;
866             p_vout->yuv.p_Convert444 =   (vout_yuv_convert_t *) ConvertYUV444RGB24;
867             break;
868         case 4:
869             p_vout->yuv.p_Convert420 =   (vout_yuv_convert_t *) ConvertYUV420RGB32;
870             p_vout->yuv.p_Convert422 =   (vout_yuv_convert_t *) ConvertYUV422RGB32;
871             p_vout->yuv.p_Convert444 =   (vout_yuv_convert_t *) ConvertYUV444RGB32;
872             break;
873         }
874     }
875 }
876
877 /*****************************************************************************
878  * SetOffset: build offset array for conversion functions
879  *****************************************************************************
880  * This function will build an offset array used in later conversion functions.
881  * It will also set horizontal and vertical scaling indicators.
882  *****************************************************************************/
883 static void SetOffset( int i_width, int i_height, int i_pic_width, int i_pic_height,
884                        boolean_t *pb_h_scaling, int *pi_v_scaling, int *p_offset )
885 {
886     int i_x;                                    /* x position in destination */
887     int i_scale_count;                                     /* modulo counter */
888
889     /*
890      * Prepare horizontal offset array
891      */
892     if( i_pic_width - i_width > 0 )
893     {
894         /* Prepare scaling array for horizontal extension */
895         *pb_h_scaling =  1;
896         i_scale_count =         i_pic_width;
897         for( i_x = i_width; i_x--; )
898         {
899             while( (i_scale_count -= i_width) > 0 )
900             {
901                 *p_offset++ = 0;
902             }
903             *p_offset++ = 1;
904             i_scale_count += i_pic_width;
905         }
906     }
907     else if( i_pic_width - i_width < 0 )
908     {
909         /* Prepare scaling array for horizontal reduction */
910         *pb_h_scaling =  1;
911         i_scale_count =         i_pic_width;
912         for( i_x = i_pic_width; i_x--; )
913         {
914             *p_offset = 1;
915             while( (i_scale_count -= i_pic_width) >= 0 )
916             {
917                 *p_offset += 1;
918             }
919             p_offset++;
920             i_scale_count += i_width;
921         }
922     }
923     else
924     {
925         /* No horizontal scaling: YUV conversion is done directly to picture */
926         *pb_h_scaling = 0;
927     }
928
929     /*
930      * Set vertical scaling indicator
931      */
932     if( i_pic_height - i_height > 0 )
933     {
934         *pi_v_scaling = 1;
935     }
936     else if( i_pic_height - i_height < 0 )
937     {
938         *pi_v_scaling = -1;
939     }
940     else
941     {
942         *pi_v_scaling = 0;
943     }
944 }
945
946 /*****************************************************************************
947  * ConvertY4Gray8: grayscale YUV 4:x:x to RGB 8 bpp
948  *****************************************************************************/
949 static void ConvertY4Gray8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y,
950                             yuv_data_t *p_u, yuv_data_t *p_v, int i_width,
951                             int i_height, int i_pic_width, int i_pic_height,
952                             int i_pic_line_width, int i_matrix_coefficients )
953 {
954     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
955     int         i_vertical_scaling;                 /* vertical scaling type */
956     int         i_x, i_y;                 /* horizontal and vertical indexes */
957     int         i_scale_count;                       /* scale modulo counter */
958     int         i_chroma_width;                    /* chroma width, not used */
959     u8 *        p_gray;                             /* base conversion table */
960     u8 *        p_pic_start;       /* beginning of the current line for copy */
961     u8 *        p_buffer_start;                   /* conversion buffer start */
962     u8 *        p_buffer;                       /* conversion buffer pointer */
963     int *       p_offset_start;                        /* offset array start */
964     int *       p_offset;                            /* offset array pointer */
965
966     /*
967      * Initialize some values  - i_pic_line_width will store the line skip
968      */
969     i_pic_line_width -= i_pic_width;
970     p_gray =            p_vout->yuv.yuv.p_gray8;
971     p_buffer_start =    p_vout->yuv.p_buffer;
972     p_offset_start =    p_vout->yuv.p_offset;
973     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
974                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
975
976     /*
977      * Perform conversion
978      */
979     i_scale_count = i_pic_height;
980     for( i_y = 0; i_y < i_height; i_y++ )
981     {
982         /* Mark beginnning of line for possible later line copy, and initialize
983          * buffer */
984         p_pic_start =   p_pic;
985         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
986
987         /* Do YUV conversion to buffer - YUV picture is always formed of 16
988          * pixels wide blocks */
989         for( i_x = i_width / 16; i_x--;  )
990         {
991             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
992             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
993             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
994             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
995             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
996             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
997             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
998             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
999         }
1000
1001         /* Do horizontal and vertical scaling */
1002         SCALE_WIDTH;
1003         SCALE_HEIGHT(400, 1);
1004     }
1005 }
1006
1007 /*****************************************************************************
1008  * ConvertY4Gray16: grayscale YUV 4:x:x to RGB 2 Bpp
1009  *****************************************************************************/
1010 static void ConvertY4Gray16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1011                              int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1012                              int i_matrix_coefficients )
1013 {
1014     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1015     int         i_vertical_scaling;                 /* vertical scaling type */
1016     int         i_x, i_y;                 /* horizontal and vertical indexes */
1017     int         i_scale_count;                       /* scale modulo counter */
1018     int         i_chroma_width;                    /* chroma width, not used */
1019     u16 *       p_gray;                             /* base conversion table */
1020     u16 *       p_pic_start;       /* beginning of the current line for copy */
1021     u16 *       p_buffer_start;                   /* conversion buffer start */
1022     u16 *       p_buffer;                       /* conversion buffer pointer */
1023     int *       p_offset_start;                        /* offset array start */
1024     int *       p_offset;                            /* offset array pointer */
1025
1026     /*
1027      * Initialize some values  - i_pic_line_width will store the line skip
1028      */
1029     i_pic_line_width -= i_pic_width;
1030     p_gray =            p_vout->yuv.yuv.p_gray16;
1031     p_buffer_start =    p_vout->yuv.p_buffer;
1032     p_offset_start =    p_vout->yuv.p_offset;
1033     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1034                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1035
1036     /*
1037      * Perform conversion
1038      */
1039     i_scale_count = i_pic_height;
1040     for( i_y = 0; i_y < i_height; i_y++ )
1041     {
1042         /* Mark beginnning of line for possible later line copy, and initialize
1043          * buffer */
1044         p_pic_start =   p_pic;
1045         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1046
1047         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1048          * pixels wide blocks */
1049         for( i_x = i_width / 16; i_x--;  )
1050         {
1051             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1052             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1053             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1054             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1055             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1056             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1057             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1058             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1059         }
1060
1061         /* Do horizontal and vertical scaling */
1062         SCALE_WIDTH;
1063         SCALE_HEIGHT(400, 2);
1064     }
1065 }
1066
1067 /*****************************************************************************
1068  * ConvertY4Gray24: grayscale YUV 4:x:x to RGB 3 Bpp
1069  *****************************************************************************/
1070 static void ConvertY4Gray24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1071                              int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1072                              int i_matrix_coefficients )
1073 {
1074     /* XXX?? */
1075 }
1076
1077 /*****************************************************************************
1078  * ConvertY4Gray32: grayscale YUV 4:x:x to RGB 4 Bpp
1079  *****************************************************************************/
1080 static void ConvertY4Gray32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1081                              int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1082                              int i_matrix_coefficients )
1083 {
1084     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1085     int         i_vertical_scaling;                 /* vertical scaling type */
1086     int         i_x, i_y;                 /* horizontal and vertical indexes */
1087     int         i_scale_count;                       /* scale modulo counter */
1088     int         i_chroma_width;                    /* chroma width, not used */
1089     u32 *       p_gray;                             /* base conversion table */
1090     u32 *       p_pic_start;       /* beginning of the current line for copy */
1091     u32 *       p_buffer_start;                   /* conversion buffer start */
1092     u32 *       p_buffer;                       /* conversion buffer pointer */
1093     int *       p_offset_start;                        /* offset array start */
1094     int *       p_offset;                            /* offset array pointer */
1095
1096     /*
1097      * Initialize some values  - i_pic_line_width will store the line skip
1098      */
1099     i_pic_line_width -= i_pic_width;
1100     p_gray =            p_vout->yuv.yuv.p_gray32;
1101     p_buffer_start =    p_vout->yuv.p_buffer;
1102     p_offset_start =    p_vout->yuv.p_offset;
1103     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1104                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1105
1106     /*
1107      * Perform conversion
1108      */
1109     i_scale_count = i_pic_height;
1110     for( i_y = 0; i_y < i_height; i_y++ )
1111     {
1112         /* Mark beginnning of line for possible later line copy, and initialize
1113          * buffer */
1114         p_pic_start =   p_pic;
1115         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1116
1117         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1118          * pixels wide blocks */
1119         for( i_x = i_width / 16; i_x--;  )
1120         {
1121             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1122             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1123             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1124             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1125             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1126             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1127             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1128             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1129         }
1130
1131         /* Do horizontal and vertical scaling */
1132         SCALE_WIDTH;
1133         SCALE_HEIGHT(400, 4);
1134     }
1135 }
1136
1137 /*****************************************************************************
1138  * ConvertYUV420RGB8: color YUV 4:2:0 to RGB 8 bpp
1139  *****************************************************************************/
1140 static void ConvertYUV420RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1141                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1142                                 int i_matrix_coefficients )
1143 {
1144     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1145     int         i_vertical_scaling;                 /* vertical scaling type */
1146     int         i_x, i_y;                 /* horizontal and vertical indexes */
1147     int         i_scale_count;                       /* scale modulo counter */
1148     int         b_jump_uv;                       /* should we jump u and v ? */
1149     int         i_real_y;                                           /* y % 4 */
1150     u8 *        p_lookup;                                    /* lookup table */
1151     int         i_chroma_width;                              /* chroma width */
1152     int *       p_offset_start;                        /* offset array start */
1153     int *       p_offset;                            /* offset array pointer */
1154
1155     int dither10[4] = {  0x0,  0x8,  0x2,  0xa };
1156     int dither11[4] = {  0xc,  0x4,  0xe,  0x6 };
1157     int dither12[4] = {  0x3,  0xb,  0x1,  0x9 };
1158     int dither13[4] = {  0xf,  0x7,  0xd,  0x5 };
1159
1160     int dither20[4] = {  0x0, 0x10,  0x4, 0x14 };
1161     int dither21[4] = { 0x18,  0x8, 0x1c,  0xc };
1162     int dither22[4] = {  0x6, 0x16,  0x2, 0x12 };
1163     int dither23[4] = { 0x1e,  0xe, 0x1a,  0xa };
1164
1165     #if 0
1166     /* other matrices that can be interesting, either for debugging or for
1167      * various effects */
1168     int dither[4][4] = { { 0, 8, 2, 10 }, { 12, 4, 14, 16 }, { 3, 11, 1, 9}, {15, 7, 13, 5} };
1169     int dither[4][4] = { { 7, 8, 0, 15 }, { 0, 15, 8, 7 }, { 7, 0, 15, 8 }, { 15, 7, 8, 0 } };
1170     int dither[4][4] = { { 0, 15, 0, 15 }, { 15, 0, 15, 0 }, { 0, 15, 0, 15 }, { 15, 0, 15, 0 } };
1171     int dither[4][4] = { { 15, 15, 0, 0 }, { 15, 15, 0, 0 }, { 0, 0, 15, 15 }, { 0, 0, 15, 15 } };
1172     int dither[4][4] = { { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 } };
1173     int dither[4][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7 }, { 8, 9, 10, 11 }, { 12, 13, 14, 15 } };
1174     #endif
1175
1176     /*
1177      * Initialize some values  - i_pic_line_width will store the line skip
1178      */
1179     i_pic_line_width -= i_pic_width;
1180     i_chroma_width =    i_width / 2;
1181     p_offset_start =    p_vout->yuv.p_offset;
1182     p_lookup =          p_vout->yuv.p_base;
1183     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1184                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1185
1186     /*
1187      * Perform conversion
1188      */
1189     i_scale_count = i_pic_height;
1190     i_real_y = 0;
1191     for( i_y = 0; i_y < i_height; i_y++ )
1192     {
1193         /* Do horizontal and vertical scaling */
1194         SCALE_WIDTH_DITHER( 420 );
1195         SCALE_HEIGHT_DITHER( 420 );
1196     }
1197 }
1198
1199 /*****************************************************************************
1200  * ConvertYUV422RGB8: color YUV 4:2:2 to RGB 8 bpp
1201  *****************************************************************************/
1202 static void ConvertYUV422RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1203                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1204                                 int i_matrix_coefficients )
1205 {
1206     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1207     int         i_vertical_scaling;                 /* vertical scaling type */
1208     int         i_x, i_y;                 /* horizontal and vertical indexes */
1209     int         i_scale_count;                       /* scale modulo counter */
1210     int         i_uval, i_vval;                           /* U and V samples */
1211     int         i_red, i_green, i_blue;          /* U and V modified samples */
1212     int         i_chroma_width;                              /* chroma width */
1213     u8 *        p_yuv;                              /* base conversion table */
1214     u8 *        p_ybase;                     /* Y dependant conversion table */
1215     u8 *        p_pic_start;       /* beginning of the current line for copy */
1216     u8 *        p_buffer_start;                   /* conversion buffer start */
1217     u8 *        p_buffer;                       /* conversion buffer pointer */
1218     int *       p_offset_start;                        /* offset array start */
1219     int *       p_offset;                            /* offset array pointer */
1220
1221     /*
1222      * Initialize some values  - i_pic_line_width will store the line skip
1223      */
1224     i_pic_line_width -= i_pic_width;
1225     i_chroma_width =    i_width / 2;
1226     p_yuv =             p_vout->yuv.yuv.p_rgb8;
1227     p_buffer_start =    p_vout->yuv.p_buffer;
1228     p_offset_start =    p_vout->yuv.p_offset;
1229     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1230                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1231
1232     /*
1233      * Perform conversion
1234      */
1235     i_scale_count = i_pic_height;
1236     for( i_y = 0; i_y < i_height; i_y++ )
1237     {
1238         /* Mark beginnning of line for possible later line copy, and initialize
1239          * buffer */
1240         p_pic_start =   p_pic;
1241         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1242
1243         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1244          * pixels wide blocks */
1245         for( i_x = i_width / 16; i_x--;  )
1246         {
1247             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1248             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1249             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1250             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1251             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1252             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1253             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1254             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1255         }
1256
1257         /* Do horizontal and vertical scaling */
1258         SCALE_WIDTH;
1259         SCALE_HEIGHT(422, 1);
1260     }
1261 }
1262
1263 /*****************************************************************************
1264  * ConvertYUV444RGB8: color YUV 4:4:4 to RGB 8 bpp
1265  *****************************************************************************/
1266 static void ConvertYUV444RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1267                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1268                                 int i_matrix_coefficients )
1269 {
1270     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1271     int         i_vertical_scaling;                 /* vertical scaling type */
1272     int         i_x, i_y;                 /* horizontal and vertical indexes */
1273     int         i_scale_count;                       /* scale modulo counter */
1274     int         i_uval, i_vval;                           /* U and V samples */
1275     int         i_red, i_green, i_blue;          /* U and V modified samples */
1276     int         i_chroma_width;                    /* chroma width, not used */
1277     u8 *        p_yuv;                              /* base conversion table */
1278     u8 *        p_ybase;                     /* Y dependant conversion table */
1279     u8 *        p_pic_start;       /* beginning of the current line for copy */
1280     u8 *        p_buffer_start;                   /* conversion buffer start */
1281     u8 *        p_buffer;                       /* conversion buffer pointer */
1282     int *       p_offset_start;                        /* offset array start */
1283     int *       p_offset;                            /* offset array pointer */
1284
1285     /*
1286      * Initialize some values  - i_pic_line_width will store the line skip
1287      */
1288     i_pic_line_width -= i_pic_width;
1289     p_yuv =             p_vout->yuv.yuv.p_rgb8;
1290     p_buffer_start =    p_vout->yuv.p_buffer;
1291     p_offset_start =    p_vout->yuv.p_offset;
1292     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1293                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1294
1295     /*
1296      * Perform conversion
1297      */
1298     i_scale_count = i_pic_height;
1299     for( i_y = 0; i_y < i_height; i_y++ )
1300     {
1301         /* Mark beginnning of line for possible later line copy, and initialize
1302          * buffer */
1303         p_pic_start =   p_pic;
1304         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1305
1306         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1307          * pixels wide blocks */
1308         for( i_x = i_width / 16; i_x--;  )
1309         {
1310             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1311             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1312             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1313             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1314             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1315             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1316             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1317             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1318         }
1319
1320         /* Do horizontal and vertical scaling */
1321         SCALE_WIDTH;
1322         SCALE_HEIGHT(444, 1);
1323     }
1324 }
1325
1326 /*****************************************************************************
1327  * ConvertYUV420RGB16: color YUV 4:2:0 to RGB 2 Bpp
1328  *****************************************************************************/
1329 static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1330                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1331                                 int i_matrix_coefficients )
1332 {
1333 #if 0
1334     /* MMX version */
1335     int                 i_chroma_width, i_chroma_skip;      /* width and eol for chroma */
1336
1337     i_chroma_width =    i_width / 2;
1338     i_chroma_skip =     i_skip / 2;
1339     ConvertYUV420RGB16MMX( p_y, p_u, p_v, i_width, i_height,
1340                            (i_width + i_skip) * sizeof( yuv_data_t ),
1341                            (i_chroma_width + i_chroma_skip) * sizeof( yuv_data_t),
1342                            i_scale, (u8 *)p_pic, 0, 0, (i_width + i_pic_eol) * sizeof( u16 ),
1343                            p_vout->i_screen_depth == 15 );
1344 #endif
1345     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1346     int         i_vertical_scaling;                 /* vertical scaling type */
1347     int         i_x, i_y;                 /* horizontal and vertical indexes */
1348     int         i_scale_count;                       /* scale modulo counter */
1349     int         i_uval, i_vval;                           /* U and V samples */
1350     int         i_red, i_green, i_blue;          /* U and V modified samples */
1351     int         i_chroma_width;                              /* chroma width */
1352     u16 *       p_yuv;                              /* base conversion table */
1353     u16 *       p_ybase;                     /* Y dependant conversion table */
1354     u16 *       p_pic_start;       /* beginning of the current line for copy */
1355     u16 *       p_buffer_start;                   /* conversion buffer start */
1356     u16 *       p_buffer;                       /* conversion buffer pointer */
1357     int *       p_offset_start;                        /* offset array start */
1358     int *       p_offset;                            /* offset array pointer */
1359
1360     /*
1361      * Initialize some values  - i_pic_line_width will store the line skip
1362      */
1363     i_pic_line_width -= i_pic_width;
1364     i_chroma_width =    i_width / 2;
1365     p_yuv =             p_vout->yuv.yuv.p_rgb16;
1366     p_buffer_start =    p_vout->yuv.p_buffer;
1367     p_offset_start =    p_vout->yuv.p_offset;
1368     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1369                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1370
1371     /*
1372      * Perform conversion
1373      */
1374     i_scale_count = i_pic_height;
1375     for( i_y = 0; i_y < i_height; i_y++ )
1376     {
1377         /* Mark beginnning of line for possible later line copy, and initialize
1378          * buffer */
1379         p_pic_start =   p_pic;
1380         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1381
1382
1383 #ifndef MMX
1384
1385         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1386          * pixels wide blocks */
1387         for( i_x = i_width / 16; i_x--;  )
1388         {
1389             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1390             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1391             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1392             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1393             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1394             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1395             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1396             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1397         }
1398         SCALE_WIDTH;
1399         SCALE_HEIGHT(420, 2);
1400     }
1401     
1402 #else
1403         for ( i_x = i_width / 8; i_x--; )
1404         {
1405         __asm__ (
1406             "movd      (%1), %%mm0       # Load 4 Cb       00 00 00 00 u3 u2 u1 u0\n\t"
1407             "movd      (%2), %%mm1       # Load 4 Cr       00 00 00 00 v3 v2 v1 v0\n\t"
1408             "pxor      %%mm4, %%mm4      # zero mm4\n\t"
1409             "movq      (%0), %%mm6       # Load 8 Y        Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0\n\t"
1410           //"movl      $0, (%3)          # cache preload for image\n\t"
1411              : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer));
1412
1413         __asm__ (
1414             ".align 8 \n\t"
1415             /* Do the multiply part of the conversion for even and odd pixels,
1416              * register usage:
1417              * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
1418              * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd  pixels,
1419              * mm6 -> Y even, mm7 -> Y odd */
1420             /* convert the chroma part */
1421             "punpcklbw %%mm4, %%mm0      # scatter 4 Cb    00 u3 00 u2 00 u1 00 u0\n\t"
1422             "punpcklbw %%mm4, %%mm1      # scatter 4 Cr    00 v3 00 v2 00 v1 00 v0\n\t"
1423             "psubsw    mmx_80w, %%mm0    # Cb -= 128\n\t"
1424             "psubsw    mmx_80w, %%mm1    # Cr -= 128\n\t"
1425             "psllw     $3, %%mm0         # Promote precision\n\t"
1426             "psllw     $3, %%mm1         # Promote precision\n\t"
1427             "movq      %%mm0, %%mm2      # Copy 4 Cb       00 u3 00 u2 00 u1 00 u0\n\t"
1428             "movq      %%mm1, %%mm3      # Copy 4 Cr       00 v3 00 v2 00 v1 00 v0\n\t"
1429             "pmulhw    mmx_U_green, %%mm2# Mul Cb with green coeff -> Cb green\n\t"
1430             "pmulhw    mmx_V_green, %%mm3# Mul Cr with green coeff -> Cr green\n\t"
1431             "pmulhw    mmx_U_blue, %%mm0 # Mul Cb -> Cblue 00 b3 00 b2 00 b1 00 b0\n\t"
1432             "pmulhw    mmx_V_red, %%mm1  # Mul Cr -> Cred  00 r3 00 r2 00 r1 00 r0\n\t"
1433             "paddsw    %%mm3, %%mm2      # Cb green + Cr green -> Cgreen\n\t"
1434             /* convert the luma part */
1435             "psubusb   mmx_10w, %%mm6    # Y -= 16\n\t"
1436             "movq      %%mm6, %%mm7      # Copy 8 Y        Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0\n\t"
1437             "pand      mmx_00ffw, %%mm6  # get Y even      00 Y6 00 Y4 00 Y2 00 Y0\n\t"
1438             "psrlw     $8, %%mm7         # get Y odd       00 Y7 00 Y5 00 Y3 00 Y1\n\t"
1439             "psllw     $3, %%mm6         # Promote precision\n\t"
1440             "psllw     $3, %%mm7         # Promote precision\n\t"
1441             "pmulhw    mmx_Y_coeff, %%mm6# Mul 4 Y even    00 y6 00 y4 00 y2 00 y0\n\t"
1442             "pmulhw    mmx_Y_coeff, %%mm7# Mul 4 Y odd     00 y7 00 y5 00 y3 00 y1\n\t"
1443             /* Do the addition part of the conversion for even and odd pixels,
1444              * register usage:
1445              * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
1446              * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd  pixels,
1447              * mm6 -> Y even, mm7 -> Y odd */                                                                                                                                        /* Do horizontal and vertical scaling */
1448             "movq      %%mm0, %%mm3      # Copy Cblue\n\t"
1449             "movq      %%mm1, %%mm4      # Copy Cred\n\t"
1450             "movq      %%mm2, %%mm5      # Copy Cgreen\n\t"
1451             "paddsw    %%mm6, %%mm0      # Y even + Cblue  00 B6 00 B4 00 B2 00 B0\n\t"
1452             "paddsw    %%mm7, %%mm3      # Y odd  + Cblue  00 B7 00 B5 00 B3 00 B1\n\t"
1453             "paddsw    %%mm6, %%mm1      # Y even + Cred   00 R6 00 R4 00 R2 00 R0\n\t"
1454             "paddsw    %%mm7, %%mm4      # Y odd  + Cred   00 R7 00 R5 00 R3 00 R1\n\t"
1455             "paddsw    %%mm6, %%mm2      # Y even + Cgreen 00 G6 00 G4 00 G2 00 G0\n\t"
1456             "paddsw    %%mm7, %%mm5      # Y odd  + Cgreen 00 G7 00 G5 00 G3 00 G1\n\t"
1457             /* Limit RGB even to 0..255 */
1458             "packuswb  %%mm0, %%mm0      # B6 B4 B2 B0 | B6 B4 B2 B0\n\t"
1459             "packuswb  %%mm1, %%mm1      # R6 R4 R2 R0 | R6 R4 R2 R0\n\t"
1460             "packuswb  %%mm2, %%mm2      # G6 G4 G2 G0 | G6 G4 G2 G0\n\t"
1461             /* Limit RGB odd to 0..255 */
1462             "packuswb  %%mm3, %%mm3      # B7 B5 B3 B1 | B7 B5 B3 B1\n\t"
1463             "packuswb  %%mm4, %%mm4      # R7 R5 R3 R1 | R7 R5 R3 R1\n\t"
1464             "packuswb  %%mm5, %%mm5      # G7 G5 G3 G1 | G7 G5 G3 G1\n\t"
1465             /* Interleave RGB even and odd */
1466             "punpcklbw %%mm3, %%mm0      #                 B7 B6 B5 B4 B3 B2 B1 B0\n\t"
1467             "punpcklbw %%mm4, %%mm1      #                 R7 R6 R5 R4 R3 R2 R1 R0\n\t"
1468             "punpcklbw %%mm5, %%mm2      #                 G7 G6 G5 G4 G3 G2 G1 G0\n\t"
1469             /* mask unneeded bits off */
1470             "pand      mmx_redmask, %%mm0# b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0\n\t"
1471             "pand      mmx_grnmask, %%mm2# g7g6g5g4 g3g2_0_0 g7g6g5g4 g3g2_0_0\n\t"
1472             "pand      mmx_redmask, %%mm1# r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0\n\t"
1473             "psrlw     mmx_blueshift,%%mm0#0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3\n\t"
1474             "pxor      %%mm4, %%mm4      # zero mm4\n\t"
1475             "movq      %%mm0, %%mm5      # Copy B7-B0\n\t"
1476             "movq      %%mm2, %%mm7      # Copy G7-G0\n\t"
1477             /* convert rgb24 plane to rgb16 pack for pixel 0-3 */
1478             "punpcklbw %%mm4, %%mm2      #  0_0_0_0  0_0_0_0 g7g6g5g4 g3g2_0_0\n\t"
1479             "punpcklbw %%mm1, %%mm0      # r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3\n\t"
1480             "psllw     mmx_blueshift,%%mm2#  0_0_0_0 0_g7g6g5 g4g3g2_0  0_0_0_0\n\t"
1481             "por       %%mm2, %%mm0      # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3\n\t"
1482             "movq      8(%0), %%mm6      # Load 8 Y        Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0\n\t"
1483             "movq      %%mm0, (%3)       # store pixel 0-3\n\t"
1484             /* convert rgb24 plane to rgb16 pack for pixel 0-3 */
1485             "punpckhbw %%mm4, %%mm7      #  0_0_0_0  0_0_0_0 g7g6g5g4 g3g2_0_0\n\t"
1486             "punpckhbw %%mm1, %%mm5      # r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3\n\t"
1487             "psllw     mmx_blueshift,%%mm7#  0_0_0_0 0_g7g6g5 g4g3g2_0  0_0_0_0\n\t"
1488             "movd      4(%1), %%mm0      # Load 4 Cb       00 00 00 00 u3 u2 u1 u0\n\t"
1489             "por       %%mm7, %%mm5      # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3\n\t"
1490             "movd      4(%2), %%mm1      # Load 4 Cr       00 00 00 00 v3 v2 v1 v0\n\t"
1491             "movq      %%mm5, 8(%3)      # store pixel 4-7\n\t"
1492             : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer));
1493         p_y += 8;
1494         p_u += 4;
1495         p_v += 4;
1496         p_buffer += 8;
1497         }
1498         
1499         SCALE_WIDTH;
1500         SCALE_HEIGHT(420, 2);
1501     }
1502     __asm__ ("emms\n\t");
1503 #endif
1504 }
1505
1506 /*****************************************************************************
1507  * ConvertYUV422RGB16: color YUV 4:2:2 to RGB 2 Bpp
1508  *****************************************************************************/
1509 static void ConvertYUV422RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1510                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1511                                 int i_matrix_coefficients )
1512 {
1513     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1514     int         i_vertical_scaling;                 /* vertical scaling type */
1515     int         i_x, i_y;                 /* horizontal and vertical indexes */
1516     int         i_scale_count;                       /* scale modulo counter */
1517     int         i_uval, i_vval;                           /* U and V samples */
1518     int         i_red, i_green, i_blue;          /* U and V modified samples */
1519     int         i_chroma_width;                              /* chroma width */
1520     u16 *       p_yuv;                              /* base conversion table */
1521     u16 *       p_ybase;                     /* Y dependant conversion table */
1522     u16 *       p_pic_start;       /* beginning of the current line for copy */
1523     u16 *       p_buffer_start;                   /* conversion buffer start */
1524     u16 *       p_buffer;                       /* conversion buffer pointer */
1525     int *       p_offset_start;                        /* offset array start */
1526     int *       p_offset;                            /* offset array pointer */
1527
1528     /*
1529      * Initialize some values  - i_pic_line_width will store the line skip
1530      */
1531     i_pic_line_width -= i_pic_width;
1532     i_chroma_width =    i_width / 2;
1533     p_yuv =             p_vout->yuv.yuv.p_rgb16;
1534     p_buffer_start =    p_vout->yuv.p_buffer;
1535     p_offset_start =    p_vout->yuv.p_offset;
1536     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1537                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1538
1539     /*
1540      * Perform conversion
1541      */
1542     i_scale_count = i_pic_height;
1543     for( i_y = 0; i_y < i_height; i_y++ )
1544     {
1545         /* Mark beginnning of line for possible later line copy, and initialize
1546          * buffer */
1547         p_pic_start =   p_pic;
1548         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1549
1550         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1551          * pixels wide blocks */
1552         for( i_x = i_width / 16; i_x--;  )
1553         {
1554             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1555             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1556             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1557             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1558             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1559             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1560             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1561             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1562         }
1563
1564         /* Do horizontal and vertical scaling */
1565         SCALE_WIDTH;
1566         SCALE_HEIGHT(422, 2);
1567     }
1568 }
1569
1570 /*****************************************************************************
1571  * ConvertYUV444RGB16: color YUV 4:4:4 to RGB 2 Bpp
1572  *****************************************************************************/
1573 static void ConvertYUV444RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1574                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1575                                 int i_matrix_coefficients )
1576 {
1577     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1578     int         i_vertical_scaling;                 /* vertical scaling type */
1579     int         i_x, i_y;                 /* horizontal and vertical indexes */
1580     int         i_scale_count;                       /* scale modulo counter */
1581     int         i_uval, i_vval;                           /* U and V samples */
1582     int         i_red, i_green, i_blue;          /* U and V modified samples */
1583     int         i_chroma_width;                    /* chroma width, not used */
1584     u16 *       p_yuv;                              /* base conversion table */
1585     u16 *       p_ybase;                     /* Y dependant conversion table */
1586     u16 *       p_pic_start;       /* beginning of the current line for copy */
1587     u16 *       p_buffer_start;                   /* conversion buffer start */
1588     u16 *       p_buffer;                       /* conversion buffer pointer */
1589     int *       p_offset_start;                        /* offset array start */
1590     int *       p_offset;                            /* offset array pointer */
1591
1592     /*
1593      * Initialize some values  - i_pic_line_width will store the line skip
1594      */
1595     i_pic_line_width -= i_pic_width;
1596     p_yuv =             p_vout->yuv.yuv.p_rgb16;
1597     p_buffer_start =    p_vout->yuv.p_buffer;
1598     p_offset_start =    p_vout->yuv.p_offset;
1599     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1600                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1601
1602     /*
1603      * Perform conversion
1604      */
1605     i_scale_count = i_pic_height;
1606     for( i_y = 0; i_y < i_height; i_y++ )
1607     {
1608         /* Mark beginnning of line for possible later line copy, and initialize
1609          * buffer */
1610         p_pic_start =   p_pic;
1611         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1612
1613         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1614          * pixels wide blocks */
1615         for( i_x = i_width / 16; i_x--;  )
1616         {
1617             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1618             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1619             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1620             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1621             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1622             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1623             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1624             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1625         }
1626
1627         /* Do horizontal and vertical scaling */
1628         SCALE_WIDTH;
1629         SCALE_HEIGHT(444, 2);
1630     }
1631 }
1632
1633 /*****************************************************************************
1634  * ConvertYUV420RGB24: color YUV 4:2:0 to RGB 3 Bpp
1635  *****************************************************************************/
1636 static void ConvertYUV420RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1637                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1638                                 int i_matrix_coefficients )
1639 {
1640     /* XXX?? */
1641 }
1642
1643 /*****************************************************************************
1644  * ConvertYUV422RGB24: color YUV 4:2:2 to RGB 3 Bpp
1645  *****************************************************************************/
1646 static void ConvertYUV422RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1647                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1648                                 int i_matrix_coefficients )
1649 {
1650     /* XXX?? */
1651 }
1652
1653 /*****************************************************************************
1654  * ConvertYUV444RGB24: color YUV 4:4:4 to RGB 3 Bpp
1655  *****************************************************************************/
1656 static void ConvertYUV444RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1657                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1658                                 int i_matrix_coefficients )
1659 {
1660     /* XXX?? */
1661 }
1662
1663 /*****************************************************************************
1664  * ConvertYUV420RGB32: color YUV 4:2:0 to RGB 4 Bpp
1665  *****************************************************************************/
1666 static void ConvertYUV420RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1667                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1668                                 int i_matrix_coefficients )
1669 {
1670     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1671     int         i_vertical_scaling;                 /* vertical scaling type */
1672     int         i_x, i_y;                 /* horizontal and vertical indexes */
1673     int         i_scale_count;                       /* scale modulo counter */
1674     int         i_uval, i_vval;                           /* U and V samples */
1675     int         i_red, i_green, i_blue;          /* U and V modified samples */
1676     int         i_chroma_width;                              /* chroma width */
1677     u32 *       p_yuv;                              /* base conversion table */
1678     u32 *       p_ybase;                     /* Y dependant conversion table */
1679     u32 *       p_pic_start;       /* beginning of the current line for copy */
1680     u32 *       p_buffer_start;                   /* conversion buffer start */
1681     u32 *       p_buffer;                       /* conversion buffer pointer */
1682     int *       p_offset_start;                        /* offset array start */
1683     int *       p_offset;                            /* offset array pointer */
1684
1685     /*
1686      * Initialize some values  - i_pic_line_width will store the line skip
1687      */
1688     i_pic_line_width -= i_pic_width;
1689     i_chroma_width =    i_width / 2;
1690     p_yuv =             p_vout->yuv.yuv.p_rgb32;
1691     p_buffer_start =    p_vout->yuv.p_buffer;
1692     p_offset_start =    p_vout->yuv.p_offset;
1693     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1694                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1695
1696     /*
1697      * Perform conversion
1698      */
1699     i_scale_count = i_pic_height;
1700     for( i_y = 0; i_y < i_height; i_y++ )
1701     {
1702         /* Mark beginnning of line for possible later line copy, and initialize
1703          * buffer */
1704         p_pic_start =   p_pic;
1705         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1706
1707         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1708          * pixels wide blocks */
1709         for( i_x = i_width / 16; i_x--;  )
1710         {
1711             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1712             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1713             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1714             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1715             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1716             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1717             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1718             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1719         }
1720
1721         /* Do horizontal and vertical scaling */
1722         SCALE_WIDTH;
1723         SCALE_HEIGHT(420, 4);
1724     }
1725 }
1726
1727 /*****************************************************************************
1728  * ConvertYUV422RGB32: color YUV 4:2:2 to RGB 4 Bpp
1729  *****************************************************************************/
1730 static void ConvertYUV422RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1731                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1732                                 int i_matrix_coefficients )
1733 {
1734     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1735     int         i_vertical_scaling;                 /* vertical scaling type */
1736     int         i_x, i_y;                 /* horizontal and vertical indexes */
1737     int         i_scale_count;                       /* scale modulo counter */
1738     int         i_uval, i_vval;                           /* U and V samples */
1739     int         i_red, i_green, i_blue;          /* U and V modified samples */
1740     int         i_chroma_width;                              /* chroma width */
1741     u32 *       p_yuv;                              /* base conversion table */
1742     u32 *       p_ybase;                     /* Y dependant conversion table */
1743     u32 *       p_pic_start;       /* beginning of the current line for copy */
1744     u32 *       p_buffer_start;                   /* conversion buffer start */
1745     u32 *       p_buffer;                       /* conversion buffer pointer */
1746     int *       p_offset_start;                        /* offset array start */
1747     int *       p_offset;                            /* offset array pointer */
1748
1749     /*
1750      * Initialize some values  - i_pic_line_width will store the line skip
1751      */
1752     i_pic_line_width -= i_pic_width;
1753     i_chroma_width =    i_width / 2;
1754     p_yuv =             p_vout->yuv.yuv.p_rgb32;
1755     p_buffer_start =    p_vout->yuv.p_buffer;
1756     p_offset_start =    p_vout->yuv.p_offset;
1757     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1758                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1759
1760     /*
1761      * Perform conversion
1762      */
1763     i_scale_count = i_pic_height;
1764     for( i_y = 0; i_y < i_height; i_y++ )
1765     {
1766         /* Mark beginnning of line for possible later line copy, and initialize
1767          * buffer */
1768         p_pic_start =   p_pic;
1769         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1770
1771         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1772          * pixels wide blocks */
1773         for( i_x = i_width / 16; i_x--;  )
1774         {
1775             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1776             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1777             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1778             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1779             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1780             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1781             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1782             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1783         }
1784
1785         /* Do horizontal and vertical scaling */
1786         SCALE_WIDTH;
1787         SCALE_HEIGHT(422, 4);
1788     }
1789 }
1790
1791 /*****************************************************************************
1792  * ConvertYUV444RGB32: color YUV 4:4:4 to RGB 4 Bpp
1793  *****************************************************************************/
1794 static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1795                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1796                                 int i_matrix_coefficients )
1797 {
1798     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1799     int         i_vertical_scaling;                 /* vertical scaling type */
1800     int         i_x, i_y;                 /* horizontal and vertical indexes */
1801     int         i_scale_count;                       /* scale modulo counter */
1802     int         i_uval, i_vval;                           /* U and V samples */
1803     int         i_red, i_green, i_blue;          /* U and V modified samples */
1804     int         i_chroma_width;                    /* chroma width, not used */
1805     u32 *       p_yuv;                              /* base conversion table */
1806     u32 *       p_ybase;                     /* Y dependant conversion table */
1807     u32 *       p_pic_start;       /* beginning of the current line for copy */
1808     u32 *       p_buffer_start;                   /* conversion buffer start */
1809     u32 *       p_buffer;                       /* conversion buffer pointer */
1810     int *       p_offset_start;                        /* offset array start */
1811     int *       p_offset;                            /* offset array pointer */
1812
1813     /*
1814      * Initialize some values  - i_pic_line_width will store the line skip
1815      */
1816     i_pic_line_width -= i_pic_width;
1817     p_yuv =             p_vout->yuv.yuv.p_rgb32;
1818     p_buffer_start =    p_vout->yuv.p_buffer;
1819     p_offset_start =    p_vout->yuv.p_offset;
1820     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1821                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1822
1823     /*
1824      * Perform conversion
1825      */
1826     i_scale_count = i_pic_height;
1827     for( i_y = 0; i_y < i_height; i_y++ )
1828     {
1829         /* Mark beginnning of line for possible later line copy, and initialize
1830          * buffer */
1831         p_pic_start =   p_pic;
1832         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1833
1834         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1835          * pixels wide blocks */
1836         for( i_x = i_width / 16; i_x--;  )
1837         {
1838             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1839             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1840             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1841             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1842             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1843             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1844             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1845             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1846         }
1847
1848         /* Do horizontal and vertical scaling */
1849         SCALE_WIDTH;
1850         SCALE_HEIGHT(444, 4);
1851     }
1852 }
1853
1854 /*-------------------- walken code follows ----------------------------------*/
1855
1856 /*
1857  * YUV to RGB routines.
1858  *
1859  * these routines calculate r, g and b values from each pixel's y, u and v.
1860  * these r, g an b values are then passed thru a table lookup to take the
1861  * gamma curve into account and find the corresponding pixel value.
1862  *
1863  * the table must store more than 3*256 values because of the possibility
1864  * of overflow in the yuv->rgb calculation. actually the calculated r,g,b
1865  * values are in the following intervals :
1866  * -176 to 255+176 for red
1867  * -133 to 255+133 for green
1868  * -222 to 255+222 for blue
1869  *
1870  * If the input y,u,v values are right, the r,g,b results are not expected
1871  * to move out of the 0 to 255 interval but who knows what will happen in
1872  * real use...
1873  *
1874  * the red, green and blue conversion tables are stored in a single 1935-entry
1875  * array. The respective positions of each component in the array have been
1876  * calculated to minimize the cache interactions of the 3 tables.
1877  */
1878
1879 #if 0
1880 /* XXX?? */
1881 static void yuvToRgb24 (unsigned char * Y,
1882                         unsigned char * U, unsigned char * V,
1883                         char * dest, int table[1935], int width)
1884 {
1885     int i;
1886     int u;
1887     int v;
1888     int uvRed;
1889     int uvGreen;
1890     int uvBlue;
1891     int * tableY;
1892     int tmp24;
1893
1894     i = width >> 3;
1895     while (i--) {
1896         u = *(U++);
1897         v = *(V++);
1898         uvRed = (V_RED_COEF*v) >> SHIFT;
1899         uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
1900         uvBlue = (U_BLUE_COEF*u) >> SHIFT;
1901
1902         tableY = table + *(Y++);
1903         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1904                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1905                         uvGreen] |
1906                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1907         *(dest++) = tmp24;
1908         *(dest++) = tmp24 >> 8;
1909         *(dest++) = tmp24 >> 16;
1910
1911         tableY = table + *(Y++);
1912         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1913                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1914                         uvGreen] |
1915                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1916         *(dest++) = tmp24;
1917         *(dest++) = tmp24 >> 8;
1918         *(dest++) = tmp24 >> 16;
1919
1920         u = *(U++);
1921         v = *(V++);
1922         uvRed = (V_RED_COEF*v) >> SHIFT;
1923         uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
1924         uvBlue = (U_BLUE_COEF*u) >> SHIFT;
1925
1926         tableY = table + *(Y++);
1927         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1928                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1929                         uvGreen] |
1930                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1931         *(dest++) = tmp24;
1932         *(dest++) = tmp24 >> 8;
1933         *(dest++) = tmp24 >> 16;
1934
1935         tableY = table + *(Y++);
1936         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1937                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1938                         uvGreen] |
1939                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1940         *(dest++) = tmp24;
1941         *(dest++) = tmp24 >> 8;
1942         *(dest++) = tmp24 >> 16;
1943
1944         u = *(U++);
1945         v = *(V++);
1946         uvRed = (V_RED_COEF*v) >> SHIFT;
1947         uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
1948         uvBlue = (U_BLUE_COEF*u) >> SHIFT;
1949
1950         tableY = table + *(Y++);
1951         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1952                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1953                         uvGreen] |
1954                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1955         *(dest++) = tmp24;
1956         *(dest++) = tmp24 >> 8;
1957         *(dest++) = tmp24 >> 16;
1958
1959         tableY = table + *(Y++);
1960         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1961                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1962                         uvGreen] |
1963                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1964         *(dest++) = tmp24;
1965         *(dest++) = tmp24 >> 8;
1966         *(dest++) = tmp24 >> 16;
1967
1968         u = *(U++);
1969         v = *(V++);
1970         uvRed = (V_RED_COEF*v) >> SHIFT;
1971         uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
1972         uvBlue = (U_BLUE_COEF*u) >> SHIFT;
1973
1974         tableY = table + *(Y++);
1975         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1976                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1977                         uvGreen] |
1978                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1979         *(dest++) = tmp24;
1980         *(dest++) = tmp24 >> 8;
1981         *(dest++) = tmp24 >> 16;
1982
1983         tableY = table + *(Y++);
1984         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1985                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1986                         uvGreen] |
1987                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1988         *(dest++) = tmp24;
1989         *(dest++) = tmp24 >> 8;
1990         *(dest++) = tmp24 >> 16;
1991     }
1992
1993     i = (width & 7) >> 1;
1994     while (i--) {
1995         u = *(U++);
1996         v = *(V++);
1997         uvRed = (V_RED_COEF*v) >> SHIFT;
1998         uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
1999         uvBlue = (U_BLUE_COEF*u) >> SHIFT;
2000
2001         tableY = table + *(Y++);
2002         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
2003                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
2004                         uvGreen] |
2005                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
2006         *(dest++) = tmp24;
2007         *(dest++) = tmp24 >> 8;
2008         *(dest++) = tmp24 >> 16;
2009
2010         tableY = table + *(Y++);
2011         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
2012                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
2013                         uvGreen] |
2014                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
2015         *(dest++) = tmp24;
2016         *(dest++) = tmp24 >> 8;
2017         *(dest++) = tmp24 >> 16;
2018     }
2019
2020     if (width & 1) {
2021         u = *(U++);
2022         v = *(V++);
2023         uvRed = (V_RED_COEF*v) >> SHIFT;
2024         uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
2025         uvBlue = (U_BLUE_COEF*u) >> SHIFT;
2026
2027         tableY = table + *(Y++);
2028         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
2029                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
2030                         uvGreen] |
2031                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
2032         *(dest++) = tmp24;
2033         *(dest++) = tmp24 >> 8;
2034         *(dest++) = tmp24 >> 16;
2035     }
2036 }
2037 #endif