]> git.sesse.net Git - vlc/blob - src/video_output/video_yuv.c
Ajout cradement d'une yuv en mmx 4:2:0 en 16 bpp dans video_yuv.c. Pour
[vlc] / src / video_output / video_yuv.c
1 /*****************************************************************************
2  * video_yuv.c: YUV transformation functions
3  * Provides functions to perform the YUV conversion. The functions provided here
4  * are a complete and portable C implementation, and may be replaced in certain
5  * case by optimized functions.
6  *****************************************************************************
7  * Copyright (C) 1999, 2000 VideoLAN
8  *
9  * Authors:
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version.
15  * 
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
24  *****************************************************************************/
25
26 /*****************************************************************************
27  * Preamble
28  *****************************************************************************/
29 #include "defs.h"
30
31 #include <math.h>                                            /* exp(), pow() */
32 #include <errno.h>                                                 /* ENOMEM */
33 #include <stdlib.h>                                                /* free() */
34 #include <string.h>                                            /* strerror() */
35
36 #include "config.h"
37 #include "common.h"
38 #include "threads.h"
39 #include "mtime.h"
40 #include "plugins.h"
41 #include "video.h"
42 #include "video_output.h"
43 #include "video_yuv.h"
44
45 #include "intf_msg.h"
46
47 /*****************************************************************************
48  * Constants
49  *****************************************************************************/
50
51 /* Margins and offsets in conversion tables - Margins are used in case a RGB
52  * RGB conversion would give a value outside the 0-255 range. Offsets have been
53  * calculated to avoid using the same cache line for 2 tables. conversion tables
54  * are 2*MARGIN + 256 long and stores pixels.*/
55 #define RED_MARGIN      178
56 #define GREEN_MARGIN    135
57 #define BLUE_MARGIN     224
58 #define RED_OFFSET      1501                                 /* 1323 to 1935 */
59 #define GREEN_OFFSET    135                                      /* 0 to 526 */
60 #define BLUE_OFFSET     818                                   /* 594 to 1298 */
61 #define RGB_TABLE_SIZE  1935                             /* total table size */
62
63 #define GRAY_MARGIN     384
64 #define GRAY_TABLE_SIZE 1024                             /* total table size */
65
66 #define PALETTE_TABLE_SIZE 2176          /* YUV -> 8bpp palette lookup table */
67
68 /* macros used for YUV pixel conversions */
69 #define SHIFT 20
70 #define U_GREEN_COEF    ((int)(-0.391 * (1<<SHIFT) / 1.164))
71 #define U_BLUE_COEF     ((int)(2.018 * (1<<SHIFT) / 1.164))
72 #define V_RED_COEF      ((int)(1.596 * (1<<SHIFT) / 1.164))
73 #define V_GREEN_COEF    ((int)(-0.813 * (1<<SHIFT) / 1.164))
74
75 #define MMX
76 #ifdef MMX
77 /* hope these constant values are cache line aligned */
78 static unsigned long long mmx_80w     = 0x0080008000800080;
79 static unsigned long long mmx_10w     = 0x1010101010101010;
80 static unsigned long long mmx_00ffw   = 0x00ff00ff00ff00ff;
81 static unsigned long long mmx_Y_coeff = 0x253f253f253f253f;
82
83 /* hope these constant values are cache line aligned */
84 static unsigned long long mmx_U_green = 0xf37df37df37df37d;
85 static unsigned long long mmx_U_blue  = 0x4093409340934093;
86 static unsigned long long mmx_V_red   = 0x3312331233123312;
87 static unsigned long long mmx_V_green = 0xe5fce5fce5fce5fc;
88
89 /* hope these constant values are cache line aligned */
90 static unsigned long long mmx_redmask = 0xf8f8f8f8f8f8f8f8;
91 static unsigned long long mmx_grnmask = 0xfcfcfcfcfcfcfcfc;
92 static unsigned long long mmx_grnshift   = 0x03;
93 static unsigned long long mmx_blueshift  = 0x03;
94 #endif
95
96 /*****************************************************************************
97  * Local prototypes
98  *****************************************************************************/
99 static void     SetGammaTable     ( int *pi_table, double f_gamma );
100 static void     SetYUV            ( vout_thread_t *p_vout );
101 static void     SetOffset         ( int i_width, int i_height, int i_pic_width, int i_pic_height,
102                                     boolean_t *pb_h_scaling, int *pi_v_scaling, int *p_offset );
103
104 static void     ConvertY4Gray8    ( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
105                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
106                                     int i_matrix_coefficients );
107 static void     ConvertY4Gray16   ( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
108                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
109                                     int i_matrix_coefficients );
110 static void     ConvertY4Gray24   ( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
111                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
112                                     int i_matrix_coefficients );
113 static void     ConvertY4Gray32   ( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
114                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
115                                     int i_matrix_coefficients );
116 static void     ConvertYUV420RGB8 ( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
117                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
118                                     int i_matrix_coefficients );
119 static void     ConvertYUV422RGB8 ( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
120                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
121                                     int i_matrix_coefficients );
122 static void     ConvertYUV444RGB8 ( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
123                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
124                                     int i_matrix_coefficients );
125 static void     ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
126                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
127                                     int i_matrix_coefficients );
128 static void     ConvertYUV422RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
129                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
130                                     int i_matrix_coefficients );
131 static void     ConvertYUV444RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
132                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
133                                     int i_matrix_coefficients );
134 static void     ConvertYUV420RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
135                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
136                                     int i_matrix_coefficients );
137 static void     ConvertYUV422RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
138                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
139                                     int i_matrix_coefficients );
140 static void     ConvertYUV444RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
141                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
142                                     int i_matrix_coefficients );
143 static void     ConvertYUV420RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
144                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
145                                     int i_matrix_coefficients );
146 static void     ConvertYUV422RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
147                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
148                                     int i_matrix_coefficients );
149 static void     ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
150                                     int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
151                                     int i_matrix_coefficients );
152
153 /*****************************************************************************
154  * CONVERT_YUV_PIXEL, CONVERT_Y_PIXEL: pixel conversion blocks
155  *****************************************************************************
156  * These conversion routines are used by YUV conversion functions.
157  * conversion are made from p_y, p_u, p_v, which are modified, to p_buffer,
158  * which is also modified.
159  *****************************************************************************/
160 #define CONVERT_Y_PIXEL( BPP )                                                \
161     /* Only Y sample is present */                                            \
162     p_ybase = p_yuv + *p_y++;                                                 \
163     *p_buffer++ = p_ybase[RED_OFFSET-((V_RED_COEF*128)>>SHIFT) + i_red] |     \
164         p_ybase[GREEN_OFFSET-(((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT)       \
165         + i_green ] | p_ybase[BLUE_OFFSET-((U_BLUE_COEF*128)>>SHIFT) + i_blue];
166
167 #define CONVERT_YUV_PIXEL( BPP )                                              \
168     /* Y, U and V samples are present */                                      \
169     i_uval =    *p_u++;                                                       \
170     i_vval =    *p_v++;                                                       \
171     i_red =     (V_RED_COEF * i_vval) >> SHIFT;                               \
172     i_green =   (U_GREEN_COEF * i_uval + V_GREEN_COEF * i_vval) >> SHIFT;     \
173     i_blue =    (U_BLUE_COEF * i_uval) >> SHIFT;                              \
174     CONVERT_Y_PIXEL( BPP )                                                    \
175
176 /*****************************************************************************
177  * CONVERT_4YUV_PIXELS, CONVERT_4YUV_PIXELS_SCALE: dither 4 pixels in 8 bpp
178  *****************************************************************************
179  * These macros dither 4 pixels in 8 bpp, with or without horiz. scaling
180  *****************************************************************************/
181 #define CONVERT_4YUV_PIXELS( CHROMA )                                         \
182     *p_pic++ = p_lookup[                                                      \
183         (((*p_y++ + dither10[i_real_y]) >> 4) << 7)                           \
184       + ((*p_u + dither20[i_real_y]) >> 5) * 9                                \
185       + ((*p_v + dither20[i_real_y]) >> 5) ];                                 \
186     *p_pic++ = p_lookup[                                                      \
187         (((*p_y++ + dither11[i_real_y]) >> 4) << 7)                           \
188       + ((*p_u++ + dither21[i_real_y]) >> 5) * 9                              \
189       + ((*p_v++ + dither21[i_real_y]) >> 5) ];                               \
190     *p_pic++ = p_lookup[                                                      \
191         (((*p_y++ + dither12[i_real_y]) >> 4) << 7)                           \
192       + ((*p_u + dither22[i_real_y]) >> 5) * 9                                \
193       + ((*p_v + dither22[i_real_y]) >> 5) ];                                 \
194     *p_pic++ = p_lookup[                                                      \
195         (((*p_y++ + dither13[i_real_y]) >> 4) << 7)                           \
196       + ((*p_u++ + dither23[i_real_y]) >> 5) * 9                              \
197       + ((*p_v++ + dither23[i_real_y]) >> 5) ];                               \
198
199 #define CONVERT_4YUV_PIXELS_SCALE( CHROMA )                                   \
200     *p_pic++ = p_lookup[                                                      \
201         (((*p_y + dither10[i_real_y]) >> 4) << 7)                             \
202         + ((*p_u + dither20[i_real_y])   >> 5) * 9                            \
203         + ((*p_v + dither20[i_real_y])   >> 5) ];                             \
204     b_jump_uv += *p_offset;                                                   \
205     p_y += *p_offset;                                                         \
206     p_u += *p_offset   & b_jump_uv;                                           \
207     p_v += *p_offset++ & b_jump_uv;                                           \
208     *p_pic++ = p_lookup[                                                      \
209         (((*p_y + dither11[i_real_y]) >> 4) << 7)                             \
210         + ((*p_u + dither21[i_real_y])   >> 5) * 9                            \
211         + ((*p_v + dither21[i_real_y])   >> 5) ];                             \
212     b_jump_uv += *p_offset;                                                   \
213     p_y += *p_offset;                                                         \
214     p_u += *p_offset   & b_jump_uv;                                           \
215     p_v += *p_offset++ & b_jump_uv;                                           \
216     *p_pic++ = p_lookup[                                                      \
217         (((*p_y + dither12[i_real_y]) >> 4) << 7)                             \
218         + ((*p_u + dither22[i_real_y])   >> 5) * 9                            \
219         + ((*p_v + dither22[i_real_y])   >> 5) ];                             \
220     b_jump_uv += *p_offset;                                                   \
221     p_y += *p_offset;                                                         \
222     p_u += *p_offset   & b_jump_uv;                                           \
223     p_v += *p_offset++ & b_jump_uv;                                           \
224     *p_pic++ = p_lookup[                                                      \
225         (((*p_y + dither13[i_real_y]) >> 4) << 7)                             \
226         + ((*p_u + dither23[i_real_y])   >> 5) * 9                            \
227         + ((*p_v + dither23[i_real_y])   >> 5) ];                             \
228     b_jump_uv += *p_offset;                                                   \
229     p_y += *p_offset;                                                         \
230     p_u += *p_offset   & b_jump_uv;                                           \
231     p_v += *p_offset++ & b_jump_uv;                                           \
232
233 /*****************************************************************************
234  * SCALE_WIDTH: scale a line horizontally
235  *****************************************************************************
236  * This macro scales a line using rendering buffer and offset array. It works
237  * for 1, 2 and 4 Bpp.
238  *****************************************************************************/
239 #define SCALE_WIDTH                                                           \
240     if( b_horizontal_scaling )                                                \
241     {                                                                         \
242         /* Horizontal scaling, conversion has been done to buffer.            \
243          * Rewind buffer and offset, then copy and scale line */              \
244         p_buffer = p_buffer_start;                                            \
245         p_offset = p_offset_start;                                            \
246         for( i_x = i_pic_width / 16; i_x--; )                                 \
247         {                                                                     \
248             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
249             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
250             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
251             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
252             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
253             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
254             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
255             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
256             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
257             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
258             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
259             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
260             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
261             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
262             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
263             *p_pic++ = *p_buffer;   p_buffer += *p_offset++;                  \
264         }                                                                     \
265         p_pic += i_pic_line_width;                                            \
266     }                                                                         \
267     else                                                                      \
268     {                                                                         \
269         /* No scaling, conversion has been done directly in picture memory.   \
270          * Increment of picture pointer to end of line is still needed */     \
271         p_pic += i_pic_width + i_pic_line_width;                              \
272     }                                                                         \
273
274
275 /*****************************************************************************
276  * SCALE_WIDTH_DITHER: scale a line horizontally for dithered 8 bpp
277  *****************************************************************************
278  * This macro scales a line using an offset array.
279  *****************************************************************************/
280 #define SCALE_WIDTH_DITHER( CHROMA )                                          \
281     if( b_horizontal_scaling )                                                \
282     {                                                                         \
283         /* Horizontal scaling, but we can't use a buffer due to dither */     \
284         p_offset = p_offset_start;                                            \
285         b_jump_uv = 0;                                                        \
286         for( i_x = i_pic_width / 16; i_x--; )                                 \
287         {                                                                     \
288             CONVERT_4YUV_PIXELS_SCALE( CHROMA )                               \
289             CONVERT_4YUV_PIXELS_SCALE( CHROMA )                               \
290             CONVERT_4YUV_PIXELS_SCALE( CHROMA )                               \
291             CONVERT_4YUV_PIXELS_SCALE( CHROMA )                               \
292         }                                                                     \
293     }                                                                         \
294     else                                                                      \
295     {                                                                         \
296         for( i_x = i_width / 16; i_x--;  )                                    \
297         {                                                                     \
298             CONVERT_4YUV_PIXELS( CHROMA )                                     \
299             CONVERT_4YUV_PIXELS( CHROMA )                                     \
300             CONVERT_4YUV_PIXELS( CHROMA )                                     \
301             CONVERT_4YUV_PIXELS( CHROMA )                                     \
302         }                                                                     \
303     }                                                                         \
304     /* Increment of picture pointer to end of line is still needed */         \
305     p_pic += i_pic_line_width;                                                \
306     i_real_y = (i_real_y + 1) & 0x3;                                          \
307
308 /*****************************************************************************
309  * SCALE_HEIGHT: handle vertical scaling
310  *****************************************************************************
311  * This macro handle vertical scaling for a picture. CHROMA may be 420, 422 or
312  * 444 for RGB conversion, or 400 for gray conversion. It works for 1, 2, 3
313  * and 4 Bpp.
314  *****************************************************************************/
315 #define SCALE_HEIGHT( CHROMA, BPP )                                           \
316     /* If line is odd, rewind 4:2:0 U and V samples */                        \
317     if( ((CHROMA == 420) || (CHROMA == 422)) && !(i_y & 0x1) )                \
318     {                                                                         \
319         p_u -= i_chroma_width;                                                \
320         p_v -= i_chroma_width;                                                \
321     }                                                                         \
322                                                                               \
323     /*                                                                        \
324      * Handle vertical scaling. The current line can be copied or next one    \
325      * can be ignored.                                                        \
326      */                                                                       \
327     switch( i_vertical_scaling )                                              \
328     {                                                                         \
329     case -1:                             /* vertical scaling factor is < 1 */ \
330         while( (i_scale_count -= i_pic_height) >= 0 )                         \
331         {                                                                     \
332             /* Height reduction: skip next source line */                     \
333             p_y += i_width;                                                   \
334             i_y++;                                                            \
335             if( (CHROMA == 420) || (CHROMA == 422) )                          \
336             {                                                                 \
337                 if( i_y & 0x1 )                                               \
338                 {                                                             \
339                     p_u += i_chroma_width;                                    \
340                     p_v += i_chroma_width;                                    \
341                 }                                                             \
342             }                                                                 \
343             else if( CHROMA == 444 )                                          \
344             {                                                                 \
345                 p_u += i_width;                                               \
346                 p_v += i_width;                                               \
347             }                                                                 \
348         }                                                                     \
349         i_scale_count += i_height;                                            \
350         break;                                                                \
351     case 1:                              /* vertical scaling factor is > 1 */ \
352         while( (i_scale_count -= i_height) > 0 )                              \
353         {                                                                     \
354             /* Height increment: copy previous picture line */                \
355             for( i_x = i_pic_width / 16; i_x--; )                             \
356             {                                                                 \
357                 *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );           \
358                 *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );           \
359                 if( BPP > 1 )                               /* 2, 3, 4 Bpp */ \
360                 {                                                             \
361                     *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
362                     *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
363                 }                                                             \
364                 if( BPP > 2 )                                  /* 3, 4 Bpp */ \
365                 {                                                             \
366                     *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
367                     *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
368                 }                                                             \
369                 if( BPP > 3 )                                     /* 4 Bpp */ \
370                 {                                                             \
371                     *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
372                     *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
373                 }                                                             \
374             }                                                                 \
375             p_pic +=        i_pic_line_width;                                 \
376             p_pic_start +=  i_pic_line_width;                                 \
377         }                                                                     \
378         i_scale_count += i_pic_height;                                        \
379         break;                                                                \
380     }                                                                         \
381
382 /*****************************************************************************
383  * SCALE_HEIGHT_DITHER: handle vertical scaling for dithered 8 bpp
384  *****************************************************************************
385  * This macro handles vertical scaling for a picture. CHROMA may be 420, 422 or
386  * 444 for RGB conversion, or 400 for gray conversion.
387  *****************************************************************************/
388 #define SCALE_HEIGHT_DITHER( CHROMA )                                         \
389                                                                               \
390     /* If line is odd, rewind 4:2:0 U and V samples */                        \
391     if( ((CHROMA == 420) || (CHROMA == 422)) && !(i_y & 0x1) )                \
392     {                                                                         \
393         p_u -= i_chroma_width;                                                \
394         p_v -= i_chroma_width;                                                \
395     }                                                                         \
396                                                                               \
397     /*                                                                        \
398      * Handle vertical scaling. The current line can be copied or next one    \
399      * can be ignored.                                                        \
400      */                                                                       \
401                                                                               \
402     switch( i_vertical_scaling )                                              \
403     {                                                                         \
404     case -1:                             /* vertical scaling factor is < 1 */ \
405         while( (i_scale_count -= i_pic_height) >= 0 )                         \
406         {                                                                     \
407             /* Height reduction: skip next source line */                     \
408             p_y += i_width;                                                   \
409             i_y++;                                                            \
410             if( (CHROMA == 420) || (CHROMA == 422) )                          \
411             {                                                                 \
412                 if( i_y & 0x1 )                                               \
413                 {                                                             \
414                     p_u += i_chroma_width;                                    \
415                     p_v += i_chroma_width;                                    \
416                 }                                                             \
417             }                                                                 \
418             else if( CHROMA == 444 )                                          \
419             {                                                                 \
420                 p_u += i_width;                                               \
421                 p_v += i_width;                                               \
422             }                                                                 \
423         }                                                                     \
424         i_scale_count += i_height;                                            \
425         break;                                                                \
426     case 1:                              /* vertical scaling factor is > 1 */ \
427         while( (i_scale_count -= i_height) > 0 )                              \
428         {                                                                     \
429             SCALE_WIDTH_DITHER( CHROMA );                                     \
430             p_y -= i_width;                                                   \
431             p_u -= i_chroma_width;                                            \
432             p_v -= i_chroma_width;                                            \
433             p_pic +=        i_pic_line_width;                                 \
434         }                                                                     \
435         i_scale_count += i_pic_height;                                        \
436         break;                                                                \
437     }                                                                         \
438
439 /*****************************************************************************
440  * vout_InitYUV: allocate and initialize translations tables
441  *****************************************************************************
442  * This function will allocate memory to store translation tables, depending
443  * of the screen depth.
444  *****************************************************************************/
445 int vout_InitYUV( vout_thread_t *p_vout )
446 {
447     size_t      tables_size;                        /* tables size, in bytes */
448
449     /* Computes tables size - 3 Bpp use 32 bits pixel entries in tables */
450     switch( p_vout->i_bytes_per_pixel )
451     {
452     case 1:
453         tables_size = sizeof( u8 ) * (p_vout->b_grayscale ? GRAY_TABLE_SIZE : PALETTE_TABLE_SIZE);
454         break;
455     case 2:
456         tables_size = sizeof( u16 ) * (p_vout->b_grayscale ? GRAY_TABLE_SIZE : RGB_TABLE_SIZE);
457         break;
458     case 3:
459     case 4:
460     default:
461         tables_size = sizeof( u32 ) * (p_vout->b_grayscale ? GRAY_TABLE_SIZE : RGB_TABLE_SIZE);
462         break;
463     }
464
465     /* Allocate memory */
466     p_vout->yuv.p_base = malloc( tables_size );
467     if( p_vout->yuv.p_base == NULL )
468     {
469         intf_ErrMsg("error: %s\n", strerror(ENOMEM));
470         return( 1 );
471     }
472
473     /* Allocate memory for conversion buffer and offset array */
474     p_vout->yuv.p_buffer = malloc( VOUT_MAX_WIDTH * p_vout->i_bytes_per_pixel );
475     if( p_vout->yuv.p_buffer == NULL )
476     {
477         intf_ErrMsg("error: %s\n", strerror(ENOMEM));
478         free( p_vout->yuv.p_base );
479         return( 1 );
480     }
481     p_vout->yuv.p_offset = malloc( p_vout->i_width * sizeof( int ) );
482     if( p_vout->yuv.p_offset == NULL )
483     {
484         intf_ErrMsg("error: %s\n", strerror(ENOMEM));
485         free( p_vout->yuv.p_base );
486         free( p_vout->yuv.p_buffer );
487         return( 1 );
488     }
489
490     /* Initialize tables */
491     SetYUV( p_vout );
492     return( 0 );
493 }
494
495 /*****************************************************************************
496  * vout_ResetTables: re-initialize translations tables
497  *****************************************************************************
498  * This function will initialize the tables allocated by vout_CreateTables and
499  * set functions pointers.
500  *****************************************************************************/
501 int vout_ResetYUV( vout_thread_t *p_vout )
502 {
503     vout_EndYUV( p_vout );
504     return( vout_InitYUV( p_vout ) );
505 }
506
507 /*****************************************************************************
508  * vout_EndYUV: destroy translations tables
509  *****************************************************************************
510  * Free memory allocated by vout_CreateTables.
511  *****************************************************************************/
512 void vout_EndYUV( vout_thread_t *p_vout )
513 {
514     free( p_vout->yuv.p_base );
515     free( p_vout->yuv.p_buffer );
516     free( p_vout->yuv.p_offset );
517 }
518
519 /* following functions are local */
520
521 /*****************************************************************************
522  * SetGammaTable: return intensity table transformed by gamma curve.
523  *****************************************************************************
524  * pi_table is a table of 256 entries from 0 to 255.
525  *****************************************************************************/
526 static void SetGammaTable( int *pi_table, double f_gamma )
527 {
528     int         i_y;                                       /* base intensity */
529
530     /* Use exp(gamma) instead of gamma */
531     f_gamma = exp( f_gamma );
532
533     /* Build gamma table */
534     for( i_y = 0; i_y < 256; i_y++ )
535     {
536         pi_table[ i_y ] = pow( (double)i_y / 256, f_gamma ) * 256;
537     }
538  }
539
540 /*****************************************************************************
541  * SetYUV: compute tables and set function pointers
542 + *****************************************************************************/
543 static void SetYUV( vout_thread_t *p_vout )
544 {
545     int         pi_gamma[256];                                /* gamma table */
546     int         i_index;                                  /* index in tables */
547
548     /* Build gamma table */
549     SetGammaTable( pi_gamma, p_vout->f_gamma );
550
551     /*
552      * Set pointers and build YUV tables
553      */
554     if( p_vout->b_grayscale )
555     {
556         /* Grayscale: build gray table */
557         switch( p_vout->i_bytes_per_pixel )
558         {
559         case 1:
560             {
561                 u16 bright[256], transp[256];
562
563                 p_vout->yuv.yuv.p_gray8 =  (u8 *)p_vout->yuv.p_base + GRAY_MARGIN;
564                 for( i_index = 0; i_index < GRAY_MARGIN; i_index++ )
565                 {
566                     p_vout->yuv.yuv.p_gray8[ -i_index ] =      RGB2PIXEL( p_vout, pi_gamma[0], pi_gamma[0], pi_gamma[0] );
567                     p_vout->yuv.yuv.p_gray8[ 256 + i_index ] = RGB2PIXEL( p_vout, pi_gamma[255], pi_gamma[255], pi_gamma[255] );
568                 }
569                 for( i_index = 0; i_index < 256; i_index++)
570                 {
571                     p_vout->yuv.yuv.p_gray8[ i_index ] = pi_gamma[ i_index ];
572                     bright[ i_index ] = i_index << 8;
573                     transp[ i_index ] = 0;
574                 }
575                 /* the colors have been allocated, we can set the palette */
576                 p_vout->p_set_palette( p_vout, bright, bright, bright, transp );
577                 p_vout->i_white_pixel = 0xff;
578                 p_vout->i_black_pixel = 0x00;
579                 p_vout->i_gray_pixel = 0x44;
580                 p_vout->i_blue_pixel = 0x3b;
581
582                 break;
583             }
584         case 2:
585             p_vout->yuv.yuv.p_gray16 =  (u16 *)p_vout->yuv.p_base + GRAY_MARGIN;
586             for( i_index = 0; i_index < GRAY_MARGIN; i_index++ )
587             {
588                 p_vout->yuv.yuv.p_gray16[ -i_index ] =      RGB2PIXEL( p_vout, pi_gamma[0], pi_gamma[0], pi_gamma[0] );
589                 p_vout->yuv.yuv.p_gray16[ 256 + i_index ] = RGB2PIXEL( p_vout, pi_gamma[255], pi_gamma[255], pi_gamma[255] );
590             }
591             for( i_index = 0; i_index < 256; i_index++)
592             {
593                 p_vout->yuv.yuv.p_gray16[ i_index ] = RGB2PIXEL( p_vout, pi_gamma[i_index], pi_gamma[i_index], pi_gamma[i_index] );
594             }
595             break;
596         case 3:
597         case 4:
598             p_vout->yuv.yuv.p_gray32 =  (u32 *)p_vout->yuv.p_base + GRAY_MARGIN;
599             for( i_index = 0; i_index < GRAY_MARGIN; i_index++ )
600             {
601                 p_vout->yuv.yuv.p_gray32[ -i_index ] =      RGB2PIXEL( p_vout, pi_gamma[0], pi_gamma[0], pi_gamma[0] );
602                 p_vout->yuv.yuv.p_gray32[ 256 + i_index ] = RGB2PIXEL( p_vout, pi_gamma[255], pi_gamma[255], pi_gamma[255] );
603             }
604             for( i_index = 0; i_index < 256; i_index++)
605             {
606                 p_vout->yuv.yuv.p_gray32[ i_index ] = RGB2PIXEL( p_vout, pi_gamma[i_index], pi_gamma[i_index], pi_gamma[i_index] );
607             }
608             break;
609          }
610     }
611     else
612     {
613         /* Color: build red, green and blue tables */
614         switch( p_vout->i_bytes_per_pixel )
615         {
616         case 1:
617             {
618                 #define RGB_MIN 0
619                 #define RGB_MAX 255
620                 #define CLIP( x ) ( ((x < 0) ? 0 : (x > 255) ? 255 : x) << 8 )
621
622                 int y,u,v;
623                 int r,g,b;
624                 int uvr, uvg, uvb;
625                 int i = 0, j = 0;
626                 u16 red[256], green[256], blue[256], transp[256];
627                 unsigned char lookup[PALETTE_TABLE_SIZE];
628
629                 p_vout->yuv.yuv.p_rgb8 = (u8 *)p_vout->yuv.p_base;
630
631                 /* this loop calculates the intersection of an YUV box
632                  * and the RGB cube. */
633                 for ( y = 0; y <= 256; y += 16 )
634                 {
635                     for ( u = 0; u <= 256; u += 32 )
636                     for ( v = 0; v <= 256; v += 32 )
637                     {
638                         uvr = (V_RED_COEF*(v-128)) >> SHIFT;
639                         uvg = (U_GREEN_COEF*(u-128) + V_GREEN_COEF*(v-128)) >> SHIFT;
640                         uvb = (U_BLUE_COEF*(u-128)) >> SHIFT;
641                         r = y + uvr;
642                         g = y + uvg;
643                         b = y + uvb;
644
645                         if( r >= RGB_MIN && g >= RGB_MIN && b >= RGB_MIN
646                                 && r <= RGB_MAX && g <= RGB_MAX && b <= RGB_MAX )
647                         {
648                             /* this one should never happen unless someone fscked up my code */
649                             if(j == 256) { intf_ErrMsg( "vout error: no colors left to build palette\n" ); break; }
650
651                             /* clip the colors */
652                             red[j] = CLIP( r );
653                             green[j] = CLIP( g );
654                             blue[j] = CLIP( b );
655                             transp[j] = 0;
656
657                             /* allocate color */
658                             lookup[i] = 1;
659                             p_vout->yuv.yuv.p_rgb8[i++] = j;
660                             j++;
661                         }
662                         else
663                         {
664                             lookup[i] = 0;
665                             p_vout->yuv.yuv.p_rgb8[i++] = 0;
666                         }
667                     }
668                     i += 128-81;
669                 }
670
671                 /* the colors have been allocated, we can set the palette */
672                 /* there will eventually be a way to know which colors
673                  * couldn't be allocated and try to find a replacement */
674                 p_vout->p_set_palette( p_vout, red, green, blue, transp );
675
676                 p_vout->i_white_pixel = 0xff;
677                 p_vout->i_black_pixel = 0x00;
678                 p_vout->i_gray_pixel = 0x44;
679                 p_vout->i_blue_pixel = 0x3b;
680
681                 i = 0;
682                 /* this loop allocates colors that got outside
683                  * the RGB cube */
684                 for ( y = 0; y <= 256; y += 16 )
685                 {
686                     for ( u = 0; u <= 256; u += 32 )
687                     for ( v = 0; v <= 256; v += 32 )
688                     {
689                         int u2, v2;
690                         int dist, mindist = 100000000;
691
692                         if( lookup[i] || y==0)
693                         {
694                             i++;
695                             continue;
696                         }
697
698                         /* heavy. yeah. */
699                         for( u2 = 0; u2 <= 256; u2 += 32 )
700                         for( v2 = 0; v2 <= 256; v2 += 32 )
701                         {
702                             j = ((y>>4)<<7) + (u2>>5)*9 + (v2>>5);
703                             dist = (u-u2)*(u-u2) + (v-v2)*(v-v2);
704                             if( lookup[j] )
705                             /* find the nearest color */
706                             if( dist < mindist )
707                             {
708                                 p_vout->yuv.yuv.p_rgb8[i] = p_vout->yuv.yuv.p_rgb8[j];
709                                 mindist = dist;
710                             }
711                             j -= 128;
712                             if( lookup[j] )
713                             /* find the nearest color */
714                             if( dist + 128 < mindist )
715                             {
716                                 p_vout->yuv.yuv.p_rgb8[i] = p_vout->yuv.yuv.p_rgb8[j];
717                                 mindist = dist + 128;
718                             }
719                         }
720                         i++;
721                     }
722                     i += 128-81;
723                 }
724
725                 break;
726             }
727         case 2:
728             p_vout->yuv.yuv.p_rgb16 = (u16 *)p_vout->yuv.p_base;
729             for( i_index = 0; i_index < RED_MARGIN; i_index++ )
730             {
731                 p_vout->yuv.yuv.p_rgb16[RED_OFFSET - RED_MARGIN + i_index] = RGB2PIXEL( p_vout, pi_gamma[0], 0, 0 );
732                 p_vout->yuv.yuv.p_rgb16[RED_OFFSET + 256 + i_index] =        RGB2PIXEL( p_vout, pi_gamma[255], 0, 0 );
733             }
734             for( i_index = 0; i_index < GREEN_MARGIN; i_index++ )
735             {
736                 p_vout->yuv.yuv.p_rgb16[GREEN_OFFSET - GREEN_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, pi_gamma[0], 0 );
737                 p_vout->yuv.yuv.p_rgb16[GREEN_OFFSET + 256 + i_index] =          RGB2PIXEL( p_vout, 0, pi_gamma[255], 0 );
738             }
739             for( i_index = 0; i_index < BLUE_MARGIN; i_index++ )
740             {
741                 p_vout->yuv.yuv.p_rgb16[BLUE_OFFSET - BLUE_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, 0, pi_gamma[0] );
742                 p_vout->yuv.yuv.p_rgb16[BLUE_OFFSET + BLUE_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, 0, pi_gamma[255] );
743             }
744             for( i_index = 0; i_index < 256; i_index++ )
745             {
746                 p_vout->yuv.yuv.p_rgb16[RED_OFFSET + i_index] =   RGB2PIXEL( p_vout, pi_gamma[ i_index ], 0, 0 );
747                 p_vout->yuv.yuv.p_rgb16[GREEN_OFFSET + i_index] = RGB2PIXEL( p_vout, 0, pi_gamma[ i_index ], 0 );
748                 p_vout->yuv.yuv.p_rgb16[BLUE_OFFSET + i_index] =  RGB2PIXEL( p_vout, 0, 0, pi_gamma[ i_index ] );
749             }
750             break;
751         case 3:
752         case 4:
753             p_vout->yuv.yuv.p_rgb32 = (u32 *)p_vout->yuv.p_base;
754             for( i_index = 0; i_index < RED_MARGIN; i_index++ )
755             {
756                 p_vout->yuv.yuv.p_rgb32[RED_OFFSET - RED_MARGIN + i_index] = RGB2PIXEL( p_vout, pi_gamma[0], 0, 0 );
757                 p_vout->yuv.yuv.p_rgb32[RED_OFFSET + 256 + i_index] =        RGB2PIXEL( p_vout, pi_gamma[255], 0, 0 );
758             }
759             for( i_index = 0; i_index < GREEN_MARGIN; i_index++ )
760             {
761                 p_vout->yuv.yuv.p_rgb32[GREEN_OFFSET - GREEN_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, pi_gamma[0], 0 );
762                 p_vout->yuv.yuv.p_rgb32[GREEN_OFFSET + 256 + i_index] =          RGB2PIXEL( p_vout, 0, pi_gamma[255], 0 );
763             }
764             for( i_index = 0; i_index < BLUE_MARGIN; i_index++ )
765             {
766                 p_vout->yuv.yuv.p_rgb32[BLUE_OFFSET - BLUE_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, 0, pi_gamma[0] );
767                 p_vout->yuv.yuv.p_rgb32[BLUE_OFFSET + BLUE_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, 0, pi_gamma[255] );
768             }
769             for( i_index = 0; i_index < 256; i_index++ )
770             {
771                 p_vout->yuv.yuv.p_rgb32[RED_OFFSET + i_index] =   RGB2PIXEL( p_vout, pi_gamma[ i_index ], 0, 0 );
772                 p_vout->yuv.yuv.p_rgb32[GREEN_OFFSET + i_index] = RGB2PIXEL( p_vout, 0, pi_gamma[ i_index ], 0 );
773                 p_vout->yuv.yuv.p_rgb32[BLUE_OFFSET + i_index] =  RGB2PIXEL( p_vout, 0, 0, pi_gamma[ i_index ] );
774             }
775             break;
776         }
777     }
778
779     /*
780      * Set functions pointers
781      */
782     if( p_vout->b_grayscale )
783     {
784         /* Grayscale */
785         switch( p_vout->i_bytes_per_pixel )
786         {
787         case 1:
788             p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray8;
789             p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray8;
790             p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray8;
791             break;
792         case 2:
793             p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray16;
794             p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray16;
795             p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray16;
796             break;
797         case 3:
798             p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray24;
799             p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray24;
800             p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray24;
801             break;
802         case 4:
803             p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray32;
804             p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray32;
805             p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray32;
806             break;
807         }
808     }
809     else
810     {
811         /* Color */
812         switch( p_vout->i_bytes_per_pixel )
813         {
814         case 1:
815             p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertYUV420RGB8;
816             p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertYUV422RGB8;
817             p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertYUV444RGB8;
818             break;
819         case 2:
820             p_vout->yuv.p_Convert420 =   (vout_yuv_convert_t *) ConvertYUV420RGB16;
821             p_vout->yuv.p_Convert422 =   (vout_yuv_convert_t *) ConvertYUV422RGB16;
822             p_vout->yuv.p_Convert444 =   (vout_yuv_convert_t *) ConvertYUV444RGB16;
823             break;
824         case 3:
825             p_vout->yuv.p_Convert420 =   (vout_yuv_convert_t *) ConvertYUV420RGB24;
826             p_vout->yuv.p_Convert422 =   (vout_yuv_convert_t *) ConvertYUV422RGB24;
827             p_vout->yuv.p_Convert444 =   (vout_yuv_convert_t *) ConvertYUV444RGB24;
828             break;
829         case 4:
830             p_vout->yuv.p_Convert420 =   (vout_yuv_convert_t *) ConvertYUV420RGB32;
831             p_vout->yuv.p_Convert422 =   (vout_yuv_convert_t *) ConvertYUV422RGB32;
832             p_vout->yuv.p_Convert444 =   (vout_yuv_convert_t *) ConvertYUV444RGB32;
833             break;
834         }
835     }
836 }
837
838 /*****************************************************************************
839  * SetOffset: build offset array for conversion functions
840  *****************************************************************************
841  * This function will build an offset array used in later conversion functions.
842  * It will also set horizontal and vertical scaling indicators.
843  *****************************************************************************/
844 static void SetOffset( int i_width, int i_height, int i_pic_width, int i_pic_height,
845                        boolean_t *pb_h_scaling, int *pi_v_scaling, int *p_offset )
846 {
847     int i_x;                                    /* x position in destination */
848     int i_scale_count;                                     /* modulo counter */
849
850     /*
851      * Prepare horizontal offset array
852      */
853     if( i_pic_width - i_width > 0 )
854     {
855         /* Prepare scaling array for horizontal extension */
856         *pb_h_scaling =  1;
857         i_scale_count =         i_pic_width;
858         for( i_x = i_width; i_x--; )
859         {
860             while( (i_scale_count -= i_width) > 0 )
861             {
862                 *p_offset++ = 0;
863             }
864             *p_offset++ = 1;
865             i_scale_count += i_pic_width;
866         }
867     }
868     else if( i_pic_width - i_width < 0 )
869     {
870         /* Prepare scaling array for horizontal reduction */
871         *pb_h_scaling =  1;
872         i_scale_count =         i_pic_width;
873         for( i_x = i_pic_width; i_x--; )
874         {
875             *p_offset = 1;
876             while( (i_scale_count -= i_pic_width) >= 0 )
877             {
878                 *p_offset += 1;
879             }
880             p_offset++;
881             i_scale_count += i_width;
882         }
883     }
884     else
885     {
886         /* No horizontal scaling: YUV conversion is done directly to picture */
887         *pb_h_scaling = 0;
888     }
889
890     /*
891      * Set vertical scaling indicator
892      */
893     if( i_pic_height - i_height > 0 )
894     {
895         *pi_v_scaling = 1;
896     }
897     else if( i_pic_height - i_height < 0 )
898     {
899         *pi_v_scaling = -1;
900     }
901     else
902     {
903         *pi_v_scaling = 0;
904     }
905 }
906
907 /*****************************************************************************
908  * ConvertY4Gray8: grayscale YUV 4:x:x to RGB 8 bpp
909  *****************************************************************************/
910 static void ConvertY4Gray8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y,
911                             yuv_data_t *p_u, yuv_data_t *p_v, int i_width,
912                             int i_height, int i_pic_width, int i_pic_height,
913                             int i_pic_line_width, int i_matrix_coefficients )
914 {
915     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
916     int         i_vertical_scaling;                 /* vertical scaling type */
917     int         i_x, i_y;                 /* horizontal and vertical indexes */
918     int         i_scale_count;                       /* scale modulo counter */
919     int         i_chroma_width;                    /* chroma width, not used */
920     u8 *        p_gray;                             /* base conversion table */
921     u8 *        p_pic_start;       /* beginning of the current line for copy */
922     u8 *        p_buffer_start;                   /* conversion buffer start */
923     u8 *        p_buffer;                       /* conversion buffer pointer */
924     int *       p_offset_start;                        /* offset array start */
925     int *       p_offset;                            /* offset array pointer */
926
927     /*
928      * Initialize some values  - i_pic_line_width will store the line skip
929      */
930     i_pic_line_width -= i_pic_width;
931     p_gray =            p_vout->yuv.yuv.p_gray8;
932     p_buffer_start =    p_vout->yuv.p_buffer;
933     p_offset_start =    p_vout->yuv.p_offset;
934     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
935                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
936
937     /*
938      * Perform conversion
939      */
940     i_scale_count = i_pic_height;
941     for( i_y = 0; i_y < i_height; i_y++ )
942     {
943         /* Mark beginnning of line for possible later line copy, and initialize
944          * buffer */
945         p_pic_start =   p_pic;
946         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
947
948         /* Do YUV conversion to buffer - YUV picture is always formed of 16
949          * pixels wide blocks */
950         for( i_x = i_width / 16; i_x--;  )
951         {
952             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
953             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
954             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
955             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
956             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
957             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
958             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
959             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
960         }
961
962         /* Do horizontal and vertical scaling */
963         SCALE_WIDTH;
964         SCALE_HEIGHT(400, 1);
965     }
966 }
967
968 /*****************************************************************************
969  * ConvertY4Gray16: grayscale YUV 4:x:x to RGB 2 Bpp
970  *****************************************************************************/
971 static void ConvertY4Gray16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
972                              int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
973                              int i_matrix_coefficients )
974 {
975     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
976     int         i_vertical_scaling;                 /* vertical scaling type */
977     int         i_x, i_y;                 /* horizontal and vertical indexes */
978     int         i_scale_count;                       /* scale modulo counter */
979     int         i_chroma_width;                    /* chroma width, not used */
980     u16 *       p_gray;                             /* base conversion table */
981     u16 *       p_pic_start;       /* beginning of the current line for copy */
982     u16 *       p_buffer_start;                   /* conversion buffer start */
983     u16 *       p_buffer;                       /* conversion buffer pointer */
984     int *       p_offset_start;                        /* offset array start */
985     int *       p_offset;                            /* offset array pointer */
986
987     /*
988      * Initialize some values  - i_pic_line_width will store the line skip
989      */
990     i_pic_line_width -= i_pic_width;
991     p_gray =            p_vout->yuv.yuv.p_gray16;
992     p_buffer_start =    p_vout->yuv.p_buffer;
993     p_offset_start =    p_vout->yuv.p_offset;
994     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
995                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
996
997     /*
998      * Perform conversion
999      */
1000     i_scale_count = i_pic_height;
1001     for( i_y = 0; i_y < i_height; i_y++ )
1002     {
1003         /* Mark beginnning of line for possible later line copy, and initialize
1004          * buffer */
1005         p_pic_start =   p_pic;
1006         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1007
1008         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1009          * pixels wide blocks */
1010         for( i_x = i_width / 16; i_x--;  )
1011         {
1012             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1013             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1014             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1015             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1016             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1017             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1018             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1019             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1020         }
1021
1022         /* Do horizontal and vertical scaling */
1023         SCALE_WIDTH;
1024         SCALE_HEIGHT(400, 2);
1025     }
1026 }
1027
1028 /*****************************************************************************
1029  * ConvertY4Gray24: grayscale YUV 4:x:x to RGB 3 Bpp
1030  *****************************************************************************/
1031 static void ConvertY4Gray24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1032                              int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1033                              int i_matrix_coefficients )
1034 {
1035     /* XXX?? */
1036 }
1037
1038 /*****************************************************************************
1039  * ConvertY4Gray32: grayscale YUV 4:x:x to RGB 4 Bpp
1040  *****************************************************************************/
1041 static void ConvertY4Gray32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1042                              int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1043                              int i_matrix_coefficients )
1044 {
1045     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1046     int         i_vertical_scaling;                 /* vertical scaling type */
1047     int         i_x, i_y;                 /* horizontal and vertical indexes */
1048     int         i_scale_count;                       /* scale modulo counter */
1049     int         i_chroma_width;                    /* chroma width, not used */
1050     u32 *       p_gray;                             /* base conversion table */
1051     u32 *       p_pic_start;       /* beginning of the current line for copy */
1052     u32 *       p_buffer_start;                   /* conversion buffer start */
1053     u32 *       p_buffer;                       /* conversion buffer pointer */
1054     int *       p_offset_start;                        /* offset array start */
1055     int *       p_offset;                            /* offset array pointer */
1056
1057     /*
1058      * Initialize some values  - i_pic_line_width will store the line skip
1059      */
1060     i_pic_line_width -= i_pic_width;
1061     p_gray =            p_vout->yuv.yuv.p_gray32;
1062     p_buffer_start =    p_vout->yuv.p_buffer;
1063     p_offset_start =    p_vout->yuv.p_offset;
1064     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1065                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1066
1067     /*
1068      * Perform conversion
1069      */
1070     i_scale_count = i_pic_height;
1071     for( i_y = 0; i_y < i_height; i_y++ )
1072     {
1073         /* Mark beginnning of line for possible later line copy, and initialize
1074          * buffer */
1075         p_pic_start =   p_pic;
1076         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1077
1078         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1079          * pixels wide blocks */
1080         for( i_x = i_width / 16; i_x--;  )
1081         {
1082             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1083             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1084             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1085             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1086             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1087             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1088             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1089             *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
1090         }
1091
1092         /* Do horizontal and vertical scaling */
1093         SCALE_WIDTH;
1094         SCALE_HEIGHT(400, 4);
1095     }
1096 }
1097
1098 /*****************************************************************************
1099  * ConvertYUV420RGB8: color YUV 4:2:0 to RGB 8 bpp
1100  *****************************************************************************/
1101 static void ConvertYUV420RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1102                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1103                                 int i_matrix_coefficients )
1104 {
1105     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1106     int         i_vertical_scaling;                 /* vertical scaling type */
1107     int         i_x, i_y;                 /* horizontal and vertical indexes */
1108     int         i_scale_count;                       /* scale modulo counter */
1109     int         b_jump_uv;                       /* should we jump u and v ? */
1110     int         i_real_y;                                           /* y % 4 */
1111     u8 *        p_lookup;                                    /* lookup table */
1112     int         i_chroma_width;                              /* chroma width */
1113     int *       p_offset_start;                        /* offset array start */
1114     int *       p_offset;                            /* offset array pointer */
1115
1116     int dither10[4] = {  0x0,  0x8,  0x2,  0xa };
1117     int dither11[4] = {  0xc,  0x4,  0xe,  0x6 };
1118     int dither12[4] = {  0x3,  0xb,  0x1,  0x9 };
1119     int dither13[4] = {  0xf,  0x7,  0xd,  0x5 };
1120
1121     int dither20[4] = {  0x0, 0x10,  0x4, 0x14 };
1122     int dither21[4] = { 0x18,  0x8, 0x1c,  0xc };
1123     int dither22[4] = {  0x6, 0x16,  0x2, 0x12 };
1124     int dither23[4] = { 0x1e,  0xe, 0x1a,  0xa };
1125
1126     #if 0
1127     /* other matrices that can be interesting, either for debugging or for
1128      * various effects */
1129     int dither[4][4] = { { 0, 8, 2, 10 }, { 12, 4, 14, 16 }, { 3, 11, 1, 9}, {15, 7, 13, 5} };
1130     int dither[4][4] = { { 7, 8, 0, 15 }, { 0, 15, 8, 7 }, { 7, 0, 15, 8 }, { 15, 7, 8, 0 } };
1131     int dither[4][4] = { { 0, 15, 0, 15 }, { 15, 0, 15, 0 }, { 0, 15, 0, 15 }, { 15, 0, 15, 0 } };
1132     int dither[4][4] = { { 15, 15, 0, 0 }, { 15, 15, 0, 0 }, { 0, 0, 15, 15 }, { 0, 0, 15, 15 } };
1133     int dither[4][4] = { { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 } };
1134     int dither[4][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7 }, { 8, 9, 10, 11 }, { 12, 13, 14, 15 } };
1135     #endif
1136
1137     /*
1138      * Initialize some values  - i_pic_line_width will store the line skip
1139      */
1140     i_pic_line_width -= i_pic_width;
1141     i_chroma_width =    i_width / 2;
1142     p_offset_start =    p_vout->yuv.p_offset;
1143     p_lookup =          p_vout->yuv.p_base;
1144     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1145                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1146
1147     /*
1148      * Perform conversion
1149      */
1150     i_scale_count = i_pic_height;
1151     i_real_y = 0;
1152     for( i_y = 0; i_y < i_height; i_y++ )
1153     {
1154         /* Do horizontal and vertical scaling */
1155         SCALE_WIDTH_DITHER( 420 );
1156         SCALE_HEIGHT_DITHER( 420 );
1157     }
1158 }
1159
1160 /*****************************************************************************
1161  * ConvertYUV422RGB8: color YUV 4:2:2 to RGB 8 bpp
1162  *****************************************************************************/
1163 static void ConvertYUV422RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1164                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1165                                 int i_matrix_coefficients )
1166 {
1167     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1168     int         i_vertical_scaling;                 /* vertical scaling type */
1169     int         i_x, i_y;                 /* horizontal and vertical indexes */
1170     int         i_scale_count;                       /* scale modulo counter */
1171     int         i_uval, i_vval;                           /* U and V samples */
1172     int         i_red, i_green, i_blue;          /* U and V modified samples */
1173     int         i_chroma_width;                              /* chroma width */
1174     u8 *        p_yuv;                              /* base conversion table */
1175     u8 *        p_ybase;                     /* Y dependant conversion table */
1176     u8 *        p_pic_start;       /* beginning of the current line for copy */
1177     u8 *        p_buffer_start;                   /* conversion buffer start */
1178     u8 *        p_buffer;                       /* conversion buffer pointer */
1179     int *       p_offset_start;                        /* offset array start */
1180     int *       p_offset;                            /* offset array pointer */
1181
1182     /*
1183      * Initialize some values  - i_pic_line_width will store the line skip
1184      */
1185     i_pic_line_width -= i_pic_width;
1186     i_chroma_width =    i_width / 2;
1187     p_yuv =             p_vout->yuv.yuv.p_rgb8;
1188     p_buffer_start =    p_vout->yuv.p_buffer;
1189     p_offset_start =    p_vout->yuv.p_offset;
1190     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1191                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1192
1193     /*
1194      * Perform conversion
1195      */
1196     i_scale_count = i_pic_height;
1197     for( i_y = 0; i_y < i_height; i_y++ )
1198     {
1199         /* Mark beginnning of line for possible later line copy, and initialize
1200          * buffer */
1201         p_pic_start =   p_pic;
1202         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1203
1204         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1205          * pixels wide blocks */
1206         for( i_x = i_width / 16; i_x--;  )
1207         {
1208             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1209             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1210             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1211             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1212             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1213             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1214             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1215             CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
1216         }
1217
1218         /* Do horizontal and vertical scaling */
1219         SCALE_WIDTH;
1220         SCALE_HEIGHT(422, 1);
1221     }
1222 }
1223
1224 /*****************************************************************************
1225  * ConvertYUV444RGB8: color YUV 4:4:4 to RGB 8 bpp
1226  *****************************************************************************/
1227 static void ConvertYUV444RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1228                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1229                                 int i_matrix_coefficients )
1230 {
1231     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1232     int         i_vertical_scaling;                 /* vertical scaling type */
1233     int         i_x, i_y;                 /* horizontal and vertical indexes */
1234     int         i_scale_count;                       /* scale modulo counter */
1235     int         i_uval, i_vval;                           /* U and V samples */
1236     int         i_red, i_green, i_blue;          /* U and V modified samples */
1237     int         i_chroma_width;                    /* chroma width, not used */
1238     u8 *        p_yuv;                              /* base conversion table */
1239     u8 *        p_ybase;                     /* Y dependant conversion table */
1240     u8 *        p_pic_start;       /* beginning of the current line for copy */
1241     u8 *        p_buffer_start;                   /* conversion buffer start */
1242     u8 *        p_buffer;                       /* conversion buffer pointer */
1243     int *       p_offset_start;                        /* offset array start */
1244     int *       p_offset;                            /* offset array pointer */
1245
1246     /*
1247      * Initialize some values  - i_pic_line_width will store the line skip
1248      */
1249     i_pic_line_width -= i_pic_width;
1250     p_yuv =             p_vout->yuv.yuv.p_rgb8;
1251     p_buffer_start =    p_vout->yuv.p_buffer;
1252     p_offset_start =    p_vout->yuv.p_offset;
1253     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1254                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1255
1256     /*
1257      * Perform conversion
1258      */
1259     i_scale_count = i_pic_height;
1260     for( i_y = 0; i_y < i_height; i_y++ )
1261     {
1262         /* Mark beginnning of line for possible later line copy, and initialize
1263          * buffer */
1264         p_pic_start =   p_pic;
1265         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1266
1267         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1268          * pixels wide blocks */
1269         for( i_x = i_width / 16; i_x--;  )
1270         {
1271             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1272             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1273             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1274             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1275             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1276             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1277             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1278             CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
1279         }
1280
1281         /* Do horizontal and vertical scaling */
1282         SCALE_WIDTH;
1283         SCALE_HEIGHT(444, 1);
1284     }
1285 }
1286
1287 /*****************************************************************************
1288  * ConvertYUV420RGB16: color YUV 4:2:0 to RGB 2 Bpp
1289  *****************************************************************************/
1290 static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1291                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1292                                 int i_matrix_coefficients )
1293 {
1294 #if 0
1295     /* MMX version */
1296     int                 i_chroma_width, i_chroma_skip;      /* width and eol for chroma */
1297
1298     i_chroma_width =    i_width / 2;
1299     i_chroma_skip =     i_skip / 2;
1300     ConvertYUV420RGB16MMX( p_y, p_u, p_v, i_width, i_height,
1301                            (i_width + i_skip) * sizeof( yuv_data_t ),
1302                            (i_chroma_width + i_chroma_skip) * sizeof( yuv_data_t),
1303                            i_scale, (u8 *)p_pic, 0, 0, (i_width + i_pic_eol) * sizeof( u16 ),
1304                            p_vout->i_screen_depth == 15 );
1305 #endif
1306     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1307     int         i_vertical_scaling;                 /* vertical scaling type */
1308     int         i_x, i_y;                 /* horizontal and vertical indexes */
1309     int         i_scale_count;                       /* scale modulo counter */
1310     int         i_uval, i_vval;                           /* U and V samples */
1311     int         i_red, i_green, i_blue;          /* U and V modified samples */
1312     int         i_chroma_width;                              /* chroma width */
1313     u16 *       p_yuv;                              /* base conversion table */
1314     u16 *       p_ybase;                     /* Y dependant conversion table */
1315     u16 *       p_pic_start;       /* beginning of the current line for copy */
1316     u16 *       p_buffer_start;                   /* conversion buffer start */
1317     u16 *       p_buffer;                       /* conversion buffer pointer */
1318     int *       p_offset_start;                        /* offset array start */
1319     int *       p_offset;                            /* offset array pointer */
1320
1321     /*
1322      * Initialize some values  - i_pic_line_width will store the line skip
1323      */
1324     i_pic_line_width -= i_pic_width;
1325     i_chroma_width =    i_width / 2;
1326     p_yuv =             p_vout->yuv.yuv.p_rgb16;
1327     p_buffer_start =    p_vout->yuv.p_buffer;
1328     p_offset_start =    p_vout->yuv.p_offset;
1329     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1330                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1331
1332     /*
1333      * Perform conversion
1334      */
1335     i_scale_count = i_pic_height;
1336     for( i_y = 0; i_y < i_height; i_y++ )
1337     {
1338         /* Mark beginnning of line for possible later line copy, and initialize
1339          * buffer */
1340         p_pic_start =   p_pic;
1341         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1342
1343
1344 #ifndef MMX
1345
1346         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1347          * pixels wide blocks */
1348         for( i_x = i_width / 16; i_x--;  )
1349         {
1350             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1351             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1352             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1353             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1354             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1355             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1356             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1357             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1358         }
1359         SCALE_WIDTH;
1360         SCALE_HEIGHT(420, 2);
1361     }
1362     
1363 #else
1364         for ( i_x = i_width / 8; i_x--; )
1365         {
1366         __asm__ (
1367             "movd      (%1), %%mm0       # Load 4 Cb       00 00 00 00 u3 u2 u1 u0\n\t"
1368             "movd      (%2), %%mm1       # Load 4 Cr       00 00 00 00 v3 v2 v1 v0\n\t"
1369             "pxor      %%mm4, %%mm4      # zero mm4\n\t"
1370             "movq      (%0), %%mm6       # Load 8 Y        Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0\n\t"
1371           //"movl      $0, (%3)          # cache preload for image\n\t"
1372              : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer));
1373
1374         __asm__ (
1375             ".align 8 \n\t"
1376             /* Do the multiply part of the conversion for even and odd pixels,
1377              * register usage:
1378              * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
1379              * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd  pixels,
1380              * mm6 -> Y even, mm7 -> Y odd */
1381             /* convert the chroma part */
1382             "punpcklbw %%mm4, %%mm0      # scatter 4 Cb    00 u3 00 u2 00 u1 00 u0\n\t"
1383             "punpcklbw %%mm4, %%mm1      # scatter 4 Cr    00 v3 00 v2 00 v1 00 v0\n\t"
1384             "psubsw    mmx_80w, %%mm0    # Cb -= 128\n\t"
1385             "psubsw    mmx_80w, %%mm1    # Cr -= 128\n\t"
1386             "psllw     $3, %%mm0         # Promote precision\n\t"
1387             "psllw     $3, %%mm1         # Promote precision\n\t"
1388             "movq      %%mm0, %%mm2      # Copy 4 Cb       00 u3 00 u2 00 u1 00 u0\n\t"
1389             "movq      %%mm1, %%mm3      # Copy 4 Cr       00 v3 00 v2 00 v1 00 v0\n\t"
1390             "pmulhw    mmx_U_green, %%mm2# Mul Cb with green coeff -> Cb green\n\t"
1391             "pmulhw    mmx_V_green, %%mm3# Mul Cr with green coeff -> Cr green\n\t"
1392             "pmulhw    mmx_U_blue, %%mm0 # Mul Cb -> Cblue 00 b3 00 b2 00 b1 00 b0\n\t"
1393             "pmulhw    mmx_V_red, %%mm1  # Mul Cr -> Cred  00 r3 00 r2 00 r1 00 r0\n\t"
1394             "paddsw    %%mm3, %%mm2      # Cb green + Cr green -> Cgreen\n\t"
1395             /* convert the luma part */
1396             "psubusb   mmx_10w, %%mm6    # Y -= 16\n\t"
1397             "movq      %%mm6, %%mm7      # Copy 8 Y        Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0\n\t"
1398             "pand      mmx_00ffw, %%mm6  # get Y even      00 Y6 00 Y4 00 Y2 00 Y0\n\t"
1399             "psrlw     $8, %%mm7         # get Y odd       00 Y7 00 Y5 00 Y3 00 Y1\n\t"
1400             "psllw     $3, %%mm6         # Promote precision\n\t"
1401             "psllw     $3, %%mm7         # Promote precision\n\t"
1402             "pmulhw    mmx_Y_coeff, %%mm6# Mul 4 Y even    00 y6 00 y4 00 y2 00 y0\n\t"
1403             "pmulhw    mmx_Y_coeff, %%mm7# Mul 4 Y odd     00 y7 00 y5 00 y3 00 y1\n\t"
1404             /* Do the addition part of the conversion for even and odd pixels,
1405              * register usage:
1406              * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
1407              * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd  pixels,
1408              * mm6 -> Y even, mm7 -> Y odd */                                                                                                                                        /* Do horizontal and vertical scaling */
1409             "movq      %%mm0, %%mm3      # Copy Cblue\n\t"
1410             "movq      %%mm1, %%mm4      # Copy Cred\n\t"
1411             "movq      %%mm2, %%mm5      # Copy Cgreen\n\t"
1412             "paddsw    %%mm6, %%mm0      # Y even + Cblue  00 B6 00 B4 00 B2 00 B0\n\t"
1413             "paddsw    %%mm7, %%mm3      # Y odd  + Cblue  00 B7 00 B5 00 B3 00 B1\n\t"
1414             "paddsw    %%mm6, %%mm1      # Y even + Cred   00 R6 00 R4 00 R2 00 R0\n\t"
1415             "paddsw    %%mm7, %%mm4      # Y odd  + Cred   00 R7 00 R5 00 R3 00 R1\n\t"
1416             "paddsw    %%mm6, %%mm2      # Y even + Cgreen 00 G6 00 G4 00 G2 00 G0\n\t"
1417             "paddsw    %%mm7, %%mm5      # Y odd  + Cgreen 00 G7 00 G5 00 G3 00 G1\n\t"
1418             /* Limit RGB even to 0..255 */
1419             "packuswb  %%mm0, %%mm0      # B6 B4 B2 B0 | B6 B4 B2 B0\n\t"
1420             "packuswb  %%mm1, %%mm1      # R6 R4 R2 R0 | R6 R4 R2 R0\n\t"
1421             "packuswb  %%mm2, %%mm2      # G6 G4 G2 G0 | G6 G4 G2 G0\n\t"
1422             /* Limit RGB odd to 0..255 */
1423             "packuswb  %%mm3, %%mm3      # B7 B5 B3 B1 | B7 B5 B3 B1\n\t"
1424             "packuswb  %%mm4, %%mm4      # R7 R5 R3 R1 | R7 R5 R3 R1\n\t"
1425             "packuswb  %%mm5, %%mm5      # G7 G5 G3 G1 | G7 G5 G3 G1\n\t"
1426             /* Interleave RGB even and odd */
1427             "punpcklbw %%mm3, %%mm0      #                 B7 B6 B5 B4 B3 B2 B1 B0\n\t"
1428             "punpcklbw %%mm4, %%mm1      #                 R7 R6 R5 R4 R3 R2 R1 R0\n\t"
1429             "punpcklbw %%mm5, %%mm2      #                 G7 G6 G5 G4 G3 G2 G1 G0\n\t"
1430             /* mask unneeded bits off */
1431             "pand      mmx_redmask, %%mm0# b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0\n\t"
1432             "pand      mmx_grnmask, %%mm2# g7g6g5g4 g3g2_0_0 g7g6g5g4 g3g2_0_0\n\t"
1433             "pand      mmx_redmask, %%mm1# r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0\n\t"
1434             "psrlw     mmx_blueshift,%%mm0#0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3\n\t"
1435             "pxor      %%mm4, %%mm4      # zero mm4\n\t"
1436             "movq      %%mm0, %%mm5      # Copy B7-B0\n\t"
1437             "movq      %%mm2, %%mm7      # Copy G7-G0\n\t"
1438             /* convert rgb24 plane to rgb16 pack for pixel 0-3 */
1439             "punpcklbw %%mm4, %%mm2      #  0_0_0_0  0_0_0_0 g7g6g5g4 g3g2_0_0\n\t"
1440             "punpcklbw %%mm1, %%mm0      # r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3\n\t"
1441             "psllw     mmx_blueshift,%%mm2#  0_0_0_0 0_g7g6g5 g4g3g2_0  0_0_0_0\n\t"
1442             "por       %%mm2, %%mm0      # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3\n\t"
1443             "movq      8(%0), %%mm6      # Load 8 Y        Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0\n\t"
1444             "movq      %%mm0, (%3)       # store pixel 0-3\n\t"
1445             /* convert rgb24 plane to rgb16 pack for pixel 0-3 */
1446             "punpckhbw %%mm4, %%mm7      #  0_0_0_0  0_0_0_0 g7g6g5g4 g3g2_0_0\n\t"
1447             "punpckhbw %%mm1, %%mm5      # r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3\n\t"
1448             "psllw     mmx_blueshift,%%mm7#  0_0_0_0 0_g7g6g5 g4g3g2_0  0_0_0_0\n\t"
1449             "movd      4(%1), %%mm0      # Load 4 Cb       00 00 00 00 u3 u2 u1 u0\n\t"
1450             "por       %%mm7, %%mm5      # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3\n\t"
1451             "movd      4(%2), %%mm1      # Load 4 Cr       00 00 00 00 v3 v2 v1 v0\n\t"
1452             "movq      %%mm5, 8(%3)      # store pixel 4-7\n\t"
1453             : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer));
1454         p_y += 8;
1455         p_u += 4;
1456         p_v += 4;
1457         p_buffer += 8;
1458         }
1459         
1460         SCALE_WIDTH;
1461         SCALE_HEIGHT(420, 2);
1462     }
1463     __asm__ ("emms\n\t");
1464 #endif
1465 }
1466
1467 /*****************************************************************************
1468  * ConvertYUV422RGB16: color YUV 4:2:2 to RGB 2 Bpp
1469  *****************************************************************************/
1470 static void ConvertYUV422RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1471                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1472                                 int i_matrix_coefficients )
1473 {
1474     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1475     int         i_vertical_scaling;                 /* vertical scaling type */
1476     int         i_x, i_y;                 /* horizontal and vertical indexes */
1477     int         i_scale_count;                       /* scale modulo counter */
1478     int         i_uval, i_vval;                           /* U and V samples */
1479     int         i_red, i_green, i_blue;          /* U and V modified samples */
1480     int         i_chroma_width;                              /* chroma width */
1481     u16 *       p_yuv;                              /* base conversion table */
1482     u16 *       p_ybase;                     /* Y dependant conversion table */
1483     u16 *       p_pic_start;       /* beginning of the current line for copy */
1484     u16 *       p_buffer_start;                   /* conversion buffer start */
1485     u16 *       p_buffer;                       /* conversion buffer pointer */
1486     int *       p_offset_start;                        /* offset array start */
1487     int *       p_offset;                            /* offset array pointer */
1488
1489     /*
1490      * Initialize some values  - i_pic_line_width will store the line skip
1491      */
1492     i_pic_line_width -= i_pic_width;
1493     i_chroma_width =    i_width / 2;
1494     p_yuv =             p_vout->yuv.yuv.p_rgb16;
1495     p_buffer_start =    p_vout->yuv.p_buffer;
1496     p_offset_start =    p_vout->yuv.p_offset;
1497     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1498                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1499
1500     /*
1501      * Perform conversion
1502      */
1503     i_scale_count = i_pic_height;
1504     for( i_y = 0; i_y < i_height; i_y++ )
1505     {
1506         /* Mark beginnning of line for possible later line copy, and initialize
1507          * buffer */
1508         p_pic_start =   p_pic;
1509         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1510
1511         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1512          * pixels wide blocks */
1513         for( i_x = i_width / 16; i_x--;  )
1514         {
1515             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1516             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1517             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1518             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1519             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1520             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1521             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1522             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
1523         }
1524
1525         /* Do horizontal and vertical scaling */
1526         SCALE_WIDTH;
1527         SCALE_HEIGHT(422, 2);
1528     }
1529 }
1530
1531 /*****************************************************************************
1532  * ConvertYUV444RGB16: color YUV 4:4:4 to RGB 2 Bpp
1533  *****************************************************************************/
1534 static void ConvertYUV444RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1535                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1536                                 int i_matrix_coefficients )
1537 {
1538     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1539     int         i_vertical_scaling;                 /* vertical scaling type */
1540     int         i_x, i_y;                 /* horizontal and vertical indexes */
1541     int         i_scale_count;                       /* scale modulo counter */
1542     int         i_uval, i_vval;                           /* U and V samples */
1543     int         i_red, i_green, i_blue;          /* U and V modified samples */
1544     int         i_chroma_width;                    /* chroma width, not used */
1545     u16 *       p_yuv;                              /* base conversion table */
1546     u16 *       p_ybase;                     /* Y dependant conversion table */
1547     u16 *       p_pic_start;       /* beginning of the current line for copy */
1548     u16 *       p_buffer_start;                   /* conversion buffer start */
1549     u16 *       p_buffer;                       /* conversion buffer pointer */
1550     int *       p_offset_start;                        /* offset array start */
1551     int *       p_offset;                            /* offset array pointer */
1552
1553     /*
1554      * Initialize some values  - i_pic_line_width will store the line skip
1555      */
1556     i_pic_line_width -= i_pic_width;
1557     p_yuv =             p_vout->yuv.yuv.p_rgb16;
1558     p_buffer_start =    p_vout->yuv.p_buffer;
1559     p_offset_start =    p_vout->yuv.p_offset;
1560     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1561                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1562
1563     /*
1564      * Perform conversion
1565      */
1566     i_scale_count = i_pic_height;
1567     for( i_y = 0; i_y < i_height; i_y++ )
1568     {
1569         /* Mark beginnning of line for possible later line copy, and initialize
1570          * buffer */
1571         p_pic_start =   p_pic;
1572         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1573
1574         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1575          * pixels wide blocks */
1576         for( i_x = i_width / 16; i_x--;  )
1577         {
1578             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1579             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1580             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1581             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1582             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1583             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1584             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1585             CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
1586         }
1587
1588         /* Do horizontal and vertical scaling */
1589         SCALE_WIDTH;
1590         SCALE_HEIGHT(444, 2);
1591     }
1592 }
1593
1594 /*****************************************************************************
1595  * ConvertYUV420RGB24: color YUV 4:2:0 to RGB 3 Bpp
1596  *****************************************************************************/
1597 static void ConvertYUV420RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1598                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1599                                 int i_matrix_coefficients )
1600 {
1601     /* XXX?? */
1602 }
1603
1604 /*****************************************************************************
1605  * ConvertYUV422RGB24: color YUV 4:2:2 to RGB 3 Bpp
1606  *****************************************************************************/
1607 static void ConvertYUV422RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1608                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1609                                 int i_matrix_coefficients )
1610 {
1611     /* XXX?? */
1612 }
1613
1614 /*****************************************************************************
1615  * ConvertYUV444RGB24: color YUV 4:4:4 to RGB 3 Bpp
1616  *****************************************************************************/
1617 static void ConvertYUV444RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1618                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1619                                 int i_matrix_coefficients )
1620 {
1621     /* XXX?? */
1622 }
1623
1624 /*****************************************************************************
1625  * ConvertYUV420RGB32: color YUV 4:2:0 to RGB 4 Bpp
1626  *****************************************************************************/
1627 static void ConvertYUV420RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1628                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1629                                 int i_matrix_coefficients )
1630 {
1631     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1632     int         i_vertical_scaling;                 /* vertical scaling type */
1633     int         i_x, i_y;                 /* horizontal and vertical indexes */
1634     int         i_scale_count;                       /* scale modulo counter */
1635     int         i_uval, i_vval;                           /* U and V samples */
1636     int         i_red, i_green, i_blue;          /* U and V modified samples */
1637     int         i_chroma_width;                              /* chroma width */
1638     u32 *       p_yuv;                              /* base conversion table */
1639     u32 *       p_ybase;                     /* Y dependant conversion table */
1640     u32 *       p_pic_start;       /* beginning of the current line for copy */
1641     u32 *       p_buffer_start;                   /* conversion buffer start */
1642     u32 *       p_buffer;                       /* conversion buffer pointer */
1643     int *       p_offset_start;                        /* offset array start */
1644     int *       p_offset;                            /* offset array pointer */
1645
1646     /*
1647      * Initialize some values  - i_pic_line_width will store the line skip
1648      */
1649     i_pic_line_width -= i_pic_width;
1650     i_chroma_width =    i_width / 2;
1651     p_yuv =             p_vout->yuv.yuv.p_rgb32;
1652     p_buffer_start =    p_vout->yuv.p_buffer;
1653     p_offset_start =    p_vout->yuv.p_offset;
1654     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1655                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1656
1657     /*
1658      * Perform conversion
1659      */
1660     i_scale_count = i_pic_height;
1661     for( i_y = 0; i_y < i_height; i_y++ )
1662     {
1663         /* Mark beginnning of line for possible later line copy, and initialize
1664          * buffer */
1665         p_pic_start =   p_pic;
1666         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1667
1668         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1669          * pixels wide blocks */
1670         for( i_x = i_width / 16; i_x--;  )
1671         {
1672             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1673             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1674             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1675             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1676             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1677             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1678             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1679             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1680         }
1681
1682         /* Do horizontal and vertical scaling */
1683         SCALE_WIDTH;
1684         SCALE_HEIGHT(420, 4);
1685     }
1686 }
1687
1688 /*****************************************************************************
1689  * ConvertYUV422RGB32: color YUV 4:2:2 to RGB 4 Bpp
1690  *****************************************************************************/
1691 static void ConvertYUV422RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1692                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1693                                 int i_matrix_coefficients )
1694 {
1695     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1696     int         i_vertical_scaling;                 /* vertical scaling type */
1697     int         i_x, i_y;                 /* horizontal and vertical indexes */
1698     int         i_scale_count;                       /* scale modulo counter */
1699     int         i_uval, i_vval;                           /* U and V samples */
1700     int         i_red, i_green, i_blue;          /* U and V modified samples */
1701     int         i_chroma_width;                              /* chroma width */
1702     u32 *       p_yuv;                              /* base conversion table */
1703     u32 *       p_ybase;                     /* Y dependant conversion table */
1704     u32 *       p_pic_start;       /* beginning of the current line for copy */
1705     u32 *       p_buffer_start;                   /* conversion buffer start */
1706     u32 *       p_buffer;                       /* conversion buffer pointer */
1707     int *       p_offset_start;                        /* offset array start */
1708     int *       p_offset;                            /* offset array pointer */
1709
1710     /*
1711      * Initialize some values  - i_pic_line_width will store the line skip
1712      */
1713     i_pic_line_width -= i_pic_width;
1714     i_chroma_width =    i_width / 2;
1715     p_yuv =             p_vout->yuv.yuv.p_rgb32;
1716     p_buffer_start =    p_vout->yuv.p_buffer;
1717     p_offset_start =    p_vout->yuv.p_offset;
1718     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1719                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1720
1721     /*
1722      * Perform conversion
1723      */
1724     i_scale_count = i_pic_height;
1725     for( i_y = 0; i_y < i_height; i_y++ )
1726     {
1727         /* Mark beginnning of line for possible later line copy, and initialize
1728          * buffer */
1729         p_pic_start =   p_pic;
1730         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1731
1732         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1733          * pixels wide blocks */
1734         for( i_x = i_width / 16; i_x--;  )
1735         {
1736             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1737             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1738             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1739             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1740             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1741             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1742             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1743             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
1744         }
1745
1746         /* Do horizontal and vertical scaling */
1747         SCALE_WIDTH;
1748         SCALE_HEIGHT(422, 4);
1749     }
1750 }
1751
1752 /*****************************************************************************
1753  * ConvertYUV444RGB32: color YUV 4:4:4 to RGB 4 Bpp
1754  *****************************************************************************/
1755 static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
1756                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
1757                                 int i_matrix_coefficients )
1758 {
1759     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
1760     int         i_vertical_scaling;                 /* vertical scaling type */
1761     int         i_x, i_y;                 /* horizontal and vertical indexes */
1762     int         i_scale_count;                       /* scale modulo counter */
1763     int         i_uval, i_vval;                           /* U and V samples */
1764     int         i_red, i_green, i_blue;          /* U and V modified samples */
1765     int         i_chroma_width;                    /* chroma width, not used */
1766     u32 *       p_yuv;                              /* base conversion table */
1767     u32 *       p_ybase;                     /* Y dependant conversion table */
1768     u32 *       p_pic_start;       /* beginning of the current line for copy */
1769     u32 *       p_buffer_start;                   /* conversion buffer start */
1770     u32 *       p_buffer;                       /* conversion buffer pointer */
1771     int *       p_offset_start;                        /* offset array start */
1772     int *       p_offset;                            /* offset array pointer */
1773
1774     /*
1775      * Initialize some values  - i_pic_line_width will store the line skip
1776      */
1777     i_pic_line_width -= i_pic_width;
1778     p_yuv =             p_vout->yuv.yuv.p_rgb32;
1779     p_buffer_start =    p_vout->yuv.p_buffer;
1780     p_offset_start =    p_vout->yuv.p_offset;
1781     SetOffset( i_width, i_height, i_pic_width, i_pic_height,
1782                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
1783
1784     /*
1785      * Perform conversion
1786      */
1787     i_scale_count = i_pic_height;
1788     for( i_y = 0; i_y < i_height; i_y++ )
1789     {
1790         /* Mark beginnning of line for possible later line copy, and initialize
1791          * buffer */
1792         p_pic_start =   p_pic;
1793         p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
1794
1795         /* Do YUV conversion to buffer - YUV picture is always formed of 16
1796          * pixels wide blocks */
1797         for( i_x = i_width / 16; i_x--;  )
1798         {
1799             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1800             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1801             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1802             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1803             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1804             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1805             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1806             CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
1807         }
1808
1809         /* Do horizontal and vertical scaling */
1810         SCALE_WIDTH;
1811         SCALE_HEIGHT(444, 4);
1812     }
1813 }
1814
1815 /*-------------------- walken code follows ----------------------------------*/
1816
1817 /*
1818  * YUV to RGB routines.
1819  *
1820  * these routines calculate r, g and b values from each pixel's y, u and v.
1821  * these r, g an b values are then passed thru a table lookup to take the
1822  * gamma curve into account and find the corresponding pixel value.
1823  *
1824  * the table must store more than 3*256 values because of the possibility
1825  * of overflow in the yuv->rgb calculation. actually the calculated r,g,b
1826  * values are in the following intervals :
1827  * -176 to 255+176 for red
1828  * -133 to 255+133 for green
1829  * -222 to 255+222 for blue
1830  *
1831  * If the input y,u,v values are right, the r,g,b results are not expected
1832  * to move out of the 0 to 255 interval but who knows what will happen in
1833  * real use...
1834  *
1835  * the red, green and blue conversion tables are stored in a single 1935-entry
1836  * array. The respective positions of each component in the array have been
1837  * calculated to minimize the cache interactions of the 3 tables.
1838  */
1839
1840 #if 0
1841 /* XXX?? */
1842 static void yuvToRgb24 (unsigned char * Y,
1843                         unsigned char * U, unsigned char * V,
1844                         char * dest, int table[1935], int width)
1845 {
1846     int i;
1847     int u;
1848     int v;
1849     int uvRed;
1850     int uvGreen;
1851     int uvBlue;
1852     int * tableY;
1853     int tmp24;
1854
1855     i = width >> 3;
1856     while (i--) {
1857         u = *(U++);
1858         v = *(V++);
1859         uvRed = (V_RED_COEF*v) >> SHIFT;
1860         uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
1861         uvBlue = (U_BLUE_COEF*u) >> SHIFT;
1862
1863         tableY = table + *(Y++);
1864         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1865                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1866                         uvGreen] |
1867                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1868         *(dest++) = tmp24;
1869         *(dest++) = tmp24 >> 8;
1870         *(dest++) = tmp24 >> 16;
1871
1872         tableY = table + *(Y++);
1873         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1874                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1875                         uvGreen] |
1876                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1877         *(dest++) = tmp24;
1878         *(dest++) = tmp24 >> 8;
1879         *(dest++) = tmp24 >> 16;
1880
1881         u = *(U++);
1882         v = *(V++);
1883         uvRed = (V_RED_COEF*v) >> SHIFT;
1884         uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
1885         uvBlue = (U_BLUE_COEF*u) >> SHIFT;
1886
1887         tableY = table + *(Y++);
1888         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1889                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1890                         uvGreen] |
1891                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1892         *(dest++) = tmp24;
1893         *(dest++) = tmp24 >> 8;
1894         *(dest++) = tmp24 >> 16;
1895
1896         tableY = table + *(Y++);
1897         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1898                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1899                         uvGreen] |
1900                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1901         *(dest++) = tmp24;
1902         *(dest++) = tmp24 >> 8;
1903         *(dest++) = tmp24 >> 16;
1904
1905         u = *(U++);
1906         v = *(V++);
1907         uvRed = (V_RED_COEF*v) >> SHIFT;
1908         uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
1909         uvBlue = (U_BLUE_COEF*u) >> SHIFT;
1910
1911         tableY = table + *(Y++);
1912         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1913                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1914                         uvGreen] |
1915                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1916         *(dest++) = tmp24;
1917         *(dest++) = tmp24 >> 8;
1918         *(dest++) = tmp24 >> 16;
1919
1920         tableY = table + *(Y++);
1921         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1922                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1923                         uvGreen] |
1924                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1925         *(dest++) = tmp24;
1926         *(dest++) = tmp24 >> 8;
1927         *(dest++) = tmp24 >> 16;
1928
1929         u = *(U++);
1930         v = *(V++);
1931         uvRed = (V_RED_COEF*v) >> SHIFT;
1932         uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
1933         uvBlue = (U_BLUE_COEF*u) >> SHIFT;
1934
1935         tableY = table + *(Y++);
1936         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1937                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1938                         uvGreen] |
1939                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1940         *(dest++) = tmp24;
1941         *(dest++) = tmp24 >> 8;
1942         *(dest++) = tmp24 >> 16;
1943
1944         tableY = table + *(Y++);
1945         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1946                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1947                         uvGreen] |
1948                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1949         *(dest++) = tmp24;
1950         *(dest++) = tmp24 >> 8;
1951         *(dest++) = tmp24 >> 16;
1952     }
1953
1954     i = (width & 7) >> 1;
1955     while (i--) {
1956         u = *(U++);
1957         v = *(V++);
1958         uvRed = (V_RED_COEF*v) >> SHIFT;
1959         uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
1960         uvBlue = (U_BLUE_COEF*u) >> SHIFT;
1961
1962         tableY = table + *(Y++);
1963         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1964                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1965                         uvGreen] |
1966                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1967         *(dest++) = tmp24;
1968         *(dest++) = tmp24 >> 8;
1969         *(dest++) = tmp24 >> 16;
1970
1971         tableY = table + *(Y++);
1972         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1973                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1974                         uvGreen] |
1975                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1976         *(dest++) = tmp24;
1977         *(dest++) = tmp24 >> 8;
1978         *(dest++) = tmp24 >> 16;
1979     }
1980
1981     if (width & 1) {
1982         u = *(U++);
1983         v = *(V++);
1984         uvRed = (V_RED_COEF*v) >> SHIFT;
1985         uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
1986         uvBlue = (U_BLUE_COEF*u) >> SHIFT;
1987
1988         tableY = table + *(Y++);
1989         tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
1990                  tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
1991                         uvGreen] |
1992                  tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
1993         *(dest++) = tmp24;
1994         *(dest++) = tmp24 >> 8;
1995         *(dest++) = tmp24 >> 16;
1996     }
1997 }
1998 #endif