]> git.sesse.net Git - vlc/blobdiff - src/video_output/video_yuv.c
. nouveaux plugins - ne fonctionnent pas encore tous
[vlc] / src / video_output / video_yuv.c
index d3c414411670c4bb4d483262bc3ad356451341fa..2c3bc260427da5d22119db66316e960844707708 100644 (file)
@@ -1,77 +1,86 @@
-/*******************************************************************************
+/*****************************************************************************
  * video_yuv.c: YUV transformation functions
- * (c)1999 VideoLAN
- *******************************************************************************
  * Provides functions to perform the YUV conversion. The functions provided here
  * are a complete and portable C implementation, and may be replaced in certain
  * case by optimized functions.
- *******************************************************************************/
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ *
+ * Authors:
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *****************************************************************************/
 
-/*******************************************************************************
+/*****************************************************************************
  * Preamble
- *******************************************************************************/
-#include <math.h>
-#include <errno.h>
-#include <string.h>
-#include <stdlib.h>
+ *****************************************************************************/
+#include "defs.h"
+
+#include <math.h>                                            /* exp(), pow() */
+#include <errno.h>                                                 /* ENOMEM */
+#include <stdlib.h>                                                /* free() */
+#include <string.h>                                            /* strerror() */
 
-#include "common.h"
 #include "config.h"
+#include "common.h"
+#include "threads.h"
 #include "mtime.h"
-#include "vlc_thread.h"
+#include "plugins.h"
 #include "video.h"
 #include "video_output.h"
 #include "video_yuv.h"
+
 #include "intf_msg.h"
 
-/*******************************************************************************
+/*****************************************************************************
  * Constants
- *******************************************************************************/
+ *****************************************************************************/
 
-/* RGB/YUV inversion matrix (ISO/IEC 13818-2 section 6.3.6, table 6.9) */
-//?? no more used ?
-const int MATRIX_COEFFICIENTS_TABLE[8][4] =
-{
-  {117504, 138453, 13954, 34903},       /* no sequence_display_extension */
-  {117504, 138453, 13954, 34903},       /* ITU-R Rec. 709 (1990) */
-  {104597, 132201, 25675, 53279},       /* unspecified */
-  {104597, 132201, 25675, 53279},       /* reserved */
-  {104448, 132798, 24759, 53109},       /* FCC */
-  {104597, 132201, 25675, 53279},       /* ITU-R Rec. 624-4 System B, G */
-  {104597, 132201, 25675, 53279},       /* SMPTE 170M */
-  {117579, 136230, 16907, 35559}        /* SMPTE 240M (1987) */
-};
-
-/* Margins and offsets in convertion tables - Margins are used in case a RGB
- * RGB convertion would give a value outside the 0-255 range. Offsets have been
- * calculated to avoid using the same cache line for 2 tables. Convertion tables
+/* Margins and offsets in conversion tables - Margins are used in case a RGB
+ * RGB conversion would give a value outside the 0-255 range. Offsets have been
+ * calculated to avoid using the same cache line for 2 tables. conversion tables
  * are 2*MARGIN + 256 long and stores pixels.*/
 #define RED_MARGIN      178
 #define GREEN_MARGIN    135
 #define BLUE_MARGIN     224
-#define RED_OFFSET      1501                                   /* 1323 to 1935 */
-#define GREEN_OFFSET    135                                        /* 0 to 526 */
-#define BLUE_OFFSET     818                                     /* 594 to 1298 */
-#define RGB_TABLE_SIZE  1935                               /* total table size */
+#define RED_OFFSET      1501                                 /* 1323 to 1935 */
+#define GREEN_OFFSET    135                                      /* 0 to 526 */
+#define BLUE_OFFSET     818                                   /* 594 to 1298 */
+#define RGB_TABLE_SIZE  1935                             /* total table size */
 
 #define GRAY_MARGIN     384
-#define GRAY_TABLE_SIZE 1024                               /* total table size */
+#define GRAY_TABLE_SIZE 1024                             /* total table size */
+
+#define PALETTE_TABLE_SIZE 2176          /* YUV -> 8bpp palette lookup table */
 
-//??
+/* macros used for YUV pixel conversions */
 #define SHIFT 20
 #define U_GREEN_COEF    ((int)(-0.391 * (1<<SHIFT) / 1.164))
 #define U_BLUE_COEF     ((int)(2.018 * (1<<SHIFT) / 1.164))
 #define V_RED_COEF      ((int)(1.596 * (1<<SHIFT) / 1.164))
 #define V_GREEN_COEF    ((int)(-0.813 * (1<<SHIFT) / 1.164))
 
-/*******************************************************************************
+//#define NODITHER
+
+/*****************************************************************************
  * Local prototypes
- *******************************************************************************/
-static int      BinaryLog         ( u32 i );
-static void     MaskToShift       ( int *pi_right, int *pi_left, u32 i_mask );
+ *****************************************************************************/
 static void     SetGammaTable     ( int *pi_table, double f_gamma );
 static void     SetYUV            ( vout_thread_t *p_vout );
-static void     SetOffset         ( int i_width, int i_height, int i_pic_width, int i_pic_height, 
+static void     SetOffset         ( int i_width, int i_height, int i_pic_width, int i_pic_height,
                                     boolean_t *pb_h_scaling, int *pi_v_scaling, int *p_offset );
 
 static void     ConvertY4Gray8    ( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
@@ -124,37 +133,95 @@ static void     ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
                                     int i_matrix_coefficients );
 
 /*****************************************************************************
- * CONVERT_YUV_PIXEL, CONVERT_Y_PIXEL: pixel convertion blocks
+ * CONVERT_YUV_PIXEL, CONVERT_Y_PIXEL: pixel conversion blocks
  *****************************************************************************
- * These convertion routines are used by YUV convertion functions.
- * Convertion are made from p_y, p_u, p_v, which are modified, to p_buffer,
+ * These conversion routines are used by YUV conversion functions.
+ * conversion are made from p_y, p_u, p_v, which are modified, to p_buffer,
  * which is also modified.
  *****************************************************************************/
-#define CONVERT_Y_PIXEL                                                       \
+#define CONVERT_Y_PIXEL( BPP )                                                \
     /* Only Y sample is present */                                            \
     p_ybase = p_yuv + *p_y++;                                                 \
     *p_buffer++ = p_ybase[RED_OFFSET-((V_RED_COEF*128)>>SHIFT) + i_red] |     \
         p_ybase[GREEN_OFFSET-(((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT)       \
         + i_green ] | p_ybase[BLUE_OFFSET-((U_BLUE_COEF*128)>>SHIFT) + i_blue];
 
-#define CONVERT_YUV_PIXEL                                                     \
+#define CONVERT_YUV_PIXEL( BPP )                                              \
     /* Y, U and V samples are present */                                      \
     i_uval =    *p_u++;                                                       \
     i_vval =    *p_v++;                                                       \
     i_red =     (V_RED_COEF * i_vval) >> SHIFT;                               \
     i_green =   (U_GREEN_COEF * i_uval + V_GREEN_COEF * i_vval) >> SHIFT;     \
     i_blue =    (U_BLUE_COEF * i_uval) >> SHIFT;                              \
-    CONVERT_Y_PIXEL                                                           \
+    CONVERT_Y_PIXEL( BPP )                                                    \
+
+/*****************************************************************************
+ * CONVERT_4YUV_PIXELS, CONVERT_4YUV_PIXELS_SCALE: dither 4 pixels in 8 bpp
+ *****************************************************************************
+ * These macros dither 4 pixels in 8 bpp, with or without horiz. scaling
+ *****************************************************************************/
+#define CONVERT_4YUV_PIXELS( CHROMA )                                         \
+    *p_pic++ = p_lookup[                                                      \
+        (((*p_y++ + dither10[i_real_y]) >> 4) << 7)                           \
+      + ((*p_u + dither20[i_real_y]) >> 5) * 9                                \
+      + ((*p_v + dither20[i_real_y]) >> 5) ];                                 \
+    *p_pic++ = p_lookup[                                                      \
+        (((*p_y++ + dither11[i_real_y]) >> 4) << 7)                           \
+      + ((*p_u++ + dither21[i_real_y]) >> 5) * 9                              \
+      + ((*p_v++ + dither21[i_real_y]) >> 5) ];                               \
+    *p_pic++ = p_lookup[                                                      \
+        (((*p_y++ + dither12[i_real_y]) >> 4) << 7)                           \
+      + ((*p_u + dither22[i_real_y]) >> 5) * 9                                \
+      + ((*p_v + dither22[i_real_y]) >> 5) ];                                 \
+    *p_pic++ = p_lookup[                                                      \
+        (((*p_y++ + dither13[i_real_y]) >> 4) << 7)                           \
+      + ((*p_u++ + dither23[i_real_y]) >> 5) * 9                              \
+      + ((*p_v++ + dither23[i_real_y]) >> 5) ];                               \
+
+#define CONVERT_4YUV_PIXELS_SCALE( CHROMA )                                   \
+    *p_pic++ = p_lookup[                                                      \
+        (((*p_y + dither10[i_real_y]) >> 4) << 7)                             \
+        + ((*p_u + dither20[i_real_y])   >> 5) * 9                            \
+        + ((*p_v + dither20[i_real_y])   >> 5) ];                             \
+    b_jump_uv = (b_jump_uv + *p_offset) & 0x1;                                \
+    p_y += *p_offset;                                                         \
+    p_u += *p_offset   & b_jump_uv;                                           \
+    p_v += *p_offset++ & b_jump_uv;                                           \
+    *p_pic++ = p_lookup[                                                      \
+        (((*p_y + dither11[i_real_y]) >> 4) << 7)                             \
+        + ((*p_u + dither21[i_real_y])   >> 5) * 9                            \
+        + ((*p_v + dither21[i_real_y])   >> 5) ];                             \
+    b_jump_uv = (b_jump_uv + *p_offset) & 0x1;                                \
+    p_y += *p_offset;                                                         \
+    p_u += *p_offset   & b_jump_uv;                                           \
+    p_v += *p_offset++ & b_jump_uv;                                           \
+    *p_pic++ = p_lookup[                                                      \
+        (((*p_y + dither12[i_real_y]) >> 4) << 7)                             \
+        + ((*p_u + dither22[i_real_y])   >> 5) * 9                            \
+        + ((*p_v + dither22[i_real_y])   >> 5) ];                             \
+    b_jump_uv = (b_jump_uv + *p_offset) & 0x1;                                \
+    p_y += *p_offset;                                                         \
+    p_u += *p_offset   & b_jump_uv;                                           \
+    p_v += *p_offset++ & b_jump_uv;                                           \
+    *p_pic++ = p_lookup[                                                      \
+        (((*p_y + dither13[i_real_y]) >> 4) << 7)                             \
+        + ((*p_u + dither23[i_real_y])   >> 5) * 9                            \
+        + ((*p_v + dither23[i_real_y])   >> 5) ];                             \
+    b_jump_uv = (b_jump_uv + *p_offset) & 0x1;                                \
+    p_y += *p_offset;                                                         \
+    p_u += *p_offset   & b_jump_uv;                                           \
+    p_v += *p_offset++ & b_jump_uv;                                           \
 
 /*****************************************************************************
  * SCALE_WIDTH: scale a line horizontally
  *****************************************************************************
- * This macro scale a line using rendering buffer and offset array.
+ * This macro scales a line using rendering buffer and offset array. It works
+ * for 1, 2 and 4 Bpp.
  *****************************************************************************/
 #define SCALE_WIDTH                                                           \
     if( b_horizontal_scaling )                                                \
     {                                                                         \
-        /* Horizontal scaling, convertion has been done to buffer.            \
+        /* Horizontal scaling, conversion has been done to buffer.            \
          * Rewind buffer and offset, then copy and scale line */              \
         p_buffer = p_buffer_start;                                            \
         p_offset = p_offset_start;                                            \
@@ -181,18 +248,53 @@ static void     ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
     }                                                                         \
     else                                                                      \
     {                                                                         \
-        /* No scaling, convertion has been done directly in picture memory.   \
+        /* No scaling, conversion has been done directly in picture memory.   \
          * Increment of picture pointer to end of line is still needed */     \
         p_pic += i_pic_width + i_pic_line_width;                              \
     }                                                                         \
 
+
+/*****************************************************************************
+ * SCALE_WIDTH_DITHER: scale a line horizontally for dithered 8 bpp
+ *****************************************************************************
+ * This macro scales a line using an offset array.
+ *****************************************************************************/
+#define SCALE_WIDTH_DITHER( CHROMA )                                          \
+    if( b_horizontal_scaling )                                                \
+    {                                                                         \
+        /* Horizontal scaling, but we can't use a buffer due to dither */     \
+        p_offset = p_offset_start;                                            \
+        b_jump_uv = 0;                                                        \
+        for( i_x = i_pic_width / 16; i_x--; )                                 \
+        {                                                                     \
+            CONVERT_4YUV_PIXELS_SCALE( CHROMA )                               \
+            CONVERT_4YUV_PIXELS_SCALE( CHROMA )                               \
+            CONVERT_4YUV_PIXELS_SCALE( CHROMA )                               \
+            CONVERT_4YUV_PIXELS_SCALE( CHROMA )                               \
+        }                                                                     \
+    }                                                                         \
+    else                                                                      \
+    {                                                                         \
+        for( i_x = i_width / 16; i_x--;  )                                    \
+        {                                                                     \
+            CONVERT_4YUV_PIXELS( CHROMA )                                     \
+            CONVERT_4YUV_PIXELS( CHROMA )                                     \
+            CONVERT_4YUV_PIXELS( CHROMA )                                     \
+            CONVERT_4YUV_PIXELS( CHROMA )                                     \
+        }                                                                     \
+    }                                                                         \
+    /* Increment of picture pointer to end of line is still needed */         \
+    p_pic += i_pic_line_width;                                                \
+    i_real_y = (i_real_y + 1) & 0x3;                                          \
+
 /*****************************************************************************
  * SCALE_HEIGHT: handle vertical scaling
  *****************************************************************************
  * This macro handle vertical scaling for a picture. CHROMA may be 420, 422 or
- * 444 for RGB convertion, or 400 for gray convertion.
+ * 444 for RGB conversion, or 400 for gray conversion. It works for 1, 2, 3
+ * and 4 Bpp.
  *****************************************************************************/
-#define SCALE_HEIGHT( CHROMA )                                                \
+#define SCALE_HEIGHT( CHROMA, BPP )                                           \
     /* If line is odd, rewind 4:2:0 U and V samples */                        \
     if( ((CHROMA == 420) || (CHROMA == 422)) && !(i_y & 0x1) )                \
     {                                                                         \
@@ -236,8 +338,21 @@ static void     ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
             {                                                                 \
                 *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );           \
                 *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );           \
-                *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );           \
-                *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );           \
+                if( BPP > 1 )                               /* 2, 3, 4 Bpp */ \
+                {                                                             \
+                    *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
+                    *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
+                }                                                             \
+                if( BPP > 2 )                                  /* 3, 4 Bpp */ \
+                {                                                             \
+                    *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
+                    *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
+                }                                                             \
+                if( BPP > 3 )                                     /* 4 Bpp */ \
+                {                                                             \
+                    *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
+                    *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );       \
+                }                                                             \
             }                                                                 \
             p_pic +=        i_pic_line_width;                                 \
             p_pic_start +=  i_pic_line_width;                                 \
@@ -246,6 +361,63 @@ static void     ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
         break;                                                                \
     }                                                                         \
 
+/*****************************************************************************
+ * SCALE_HEIGHT_DITHER: handle vertical scaling for dithered 8 bpp
+ *****************************************************************************
+ * This macro handles vertical scaling for a picture. CHROMA may be 420, 422 or
+ * 444 for RGB conversion, or 400 for gray conversion.
+ *****************************************************************************/
+#define SCALE_HEIGHT_DITHER( CHROMA )                                         \
+                                                                              \
+    /* If line is odd, rewind 4:2:0 U and V samples */                        \
+    if( ((CHROMA == 420) || (CHROMA == 422)) && !(i_y & 0x1) )                \
+    {                                                                         \
+        p_u -= i_chroma_width;                                                \
+        p_v -= i_chroma_width;                                                \
+    }                                                                         \
+                                                                              \
+    /*                                                                        \
+     * Handle vertical scaling. The current line can be copied or next one    \
+     * can be ignored.                                                        \
+     */                                                                       \
+                                                                              \
+    switch( i_vertical_scaling )                                              \
+    {                                                                         \
+    case -1:                             /* vertical scaling factor is < 1 */ \
+        while( (i_scale_count -= i_pic_height) >= 0 )                         \
+        {                                                                     \
+            /* Height reduction: skip next source line */                     \
+            p_y += i_width;                                                   \
+            i_y++;                                                            \
+            if( (CHROMA == 420) || (CHROMA == 422) )                          \
+            {                                                                 \
+                if( i_y & 0x1 )                                               \
+                {                                                             \
+                    p_u += i_chroma_width;                                    \
+                    p_v += i_chroma_width;                                    \
+                }                                                             \
+            }                                                                 \
+            else if( CHROMA == 444 )                                          \
+            {                                                                 \
+                p_u += i_width;                                               \
+                p_v += i_width;                                               \
+            }                                                                 \
+        }                                                                     \
+        i_scale_count += i_height;                                            \
+        break;                                                                \
+    case 1:                              /* vertical scaling factor is > 1 */ \
+        while( (i_scale_count -= i_height) > 0 )                              \
+        {                                                                     \
+            SCALE_WIDTH_DITHER( CHROMA );                                     \
+            p_y -= i_width;                                                   \
+            p_u -= i_chroma_width;                                            \
+            p_v -= i_chroma_width;                                            \
+            p_pic +=        i_pic_line_width;                                 \
+        }                                                                     \
+        i_scale_count += i_pic_height;                                        \
+        break;                                                                \
+    }                                                                         \
+
 /*****************************************************************************
  * vout_InitYUV: allocate and initialize translations tables
  *****************************************************************************
@@ -255,60 +427,51 @@ static void     ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
 int vout_InitYUV( vout_thread_t *p_vout )
 {
     size_t      tables_size;                        /* tables size, in bytes */
-    
-    /* Computes tables size */
-    switch( p_vout->i_screen_depth )
+
+    /* Computes tables size - 3 Bpp use 32 bits pixel entries in tables */
+    switch( p_vout->i_bytes_per_pixel )
     {
-    case 8:
-        tables_size = sizeof( u8 ) * (p_vout->b_grayscale ? GRAY_TABLE_SIZE : RGB_TABLE_SIZE);
-        break;        
-    case 15:
-    case 16:
+    case 1:
+        tables_size = sizeof( u8 ) * (p_vout->b_grayscale ? GRAY_TABLE_SIZE : PALETTE_TABLE_SIZE);
+        break;
+    case 2:
         tables_size = sizeof( u16 ) * (p_vout->b_grayscale ? GRAY_TABLE_SIZE : RGB_TABLE_SIZE);
-        break;        
-    case 24:        
-    case 32:
-#ifndef DEBUG
-    default:        
-#endif
-        tables_size = sizeof( u32 ) * (p_vout->b_grayscale ? GRAY_TABLE_SIZE : RGB_TABLE_SIZE);        
-        break;        
-#ifdef DEBUG
+        break;
+    case 3:
+    case 4:
     default:
-        intf_DbgMsg("error: invalid screen depth %d\n", p_vout->i_screen_depth );
-        tables_size = 0;
-        break;        
-#endif      
+        tables_size = sizeof( u32 ) * (p_vout->b_grayscale ? GRAY_TABLE_SIZE : RGB_TABLE_SIZE);
+        break;
     }
-    
+
     /* Allocate memory */
     p_vout->yuv.p_base = malloc( tables_size );
     if( p_vout->yuv.p_base == NULL )
     {
         intf_ErrMsg("error: %s\n", strerror(ENOMEM));
-        return( 1 );                
+        return( 1 );
     }
 
-    /* Allocate memory for convertion buffer and offset array */
+    /* Allocate memory for conversion buffer and offset array */
     p_vout->yuv.p_buffer = malloc( VOUT_MAX_WIDTH * p_vout->i_bytes_per_pixel );
     if( p_vout->yuv.p_buffer == NULL )
     {
         intf_ErrMsg("error: %s\n", strerror(ENOMEM));
         free( p_vout->yuv.p_base );
-        return( 1 );                
+        return( 1 );
     }
-    p_vout->yuv.p_offset = malloc( p_vout->i_width * sizeof( int ) );    
+    p_vout->yuv.p_offset = malloc( p_vout->i_width * sizeof( int ) );
     if( p_vout->yuv.p_offset == NULL )
     {
         intf_ErrMsg("error: %s\n", strerror(ENOMEM));
         free( p_vout->yuv.p_base );
-        free( p_vout->yuv.p_buffer );        
-        return( 1 );                
+        free( p_vout->yuv.p_buffer );
+        return( 1 );
     }
 
     /* Initialize tables */
     SetYUV( p_vout );
-    return( 0 );    
+    return( 0 );
 }
 
 /*****************************************************************************
@@ -319,8 +482,8 @@ int vout_InitYUV( vout_thread_t *p_vout )
  *****************************************************************************/
 int vout_ResetYUV( vout_thread_t *p_vout )
 {
-    vout_EndYUV( p_vout );    
-    return( vout_InitYUV( p_vout ) );    
+    vout_EndYUV( p_vout );
+    return( vout_InitYUV( p_vout ) );
 }
 
 /*****************************************************************************
@@ -332,72 +495,11 @@ void vout_EndYUV( vout_thread_t *p_vout )
 {
     free( p_vout->yuv.p_base );
     free( p_vout->yuv.p_buffer );
-    free( p_vout->yuv.p_offset );    
+    free( p_vout->yuv.p_offset );
 }
 
 /* following functions are local */
 
-/*****************************************************************************
- * BinaryLog: computes the base 2 log of a binary value
- *****************************************************************************
- * This functions is used by MaskToShift during tables initialisation, to
- * get a bit index from a binary value.
- *****************************************************************************/
-static int BinaryLog(u32 i)
-{
-    int i_log;
-
-    i_log = 0;
-    if (i & 0xffff0000) 
-    {        
-        i_log = 16;
-    }    
-    if (i & 0xff00ff00) 
-    {        
-        i_log += 8;
-    }    
-    if (i & 0xf0f0f0f0) 
-    {        
-        i_log += 4;
-    }    
-    if (i & 0xcccccccc) 
-    {        
-        i_log += 2;
-    }    
-    if (i & 0xaaaaaaaa) 
-    {        
-        i_log++;
-    }    
-    if (i != ((u32)1 << i_log))
-    {        
-       intf_ErrMsg("internal error: binary log overflow\n");        
-    }    
-
-    return( i_log );
-}
-
-/*****************************************************************************
- * MaskToShift: Transform a color mask into right and left shifts
- *****************************************************************************
- * This function is used during table initialisation. It can return a value
- *****************************************************************************/
-static void MaskToShift (int *pi_right, int *pi_left, u32 i_mask)
-{
-    u32 i_low, i_high;                 /* lower hand higher bits of the mask */
-
-    /* Get bits */
-    i_low =  i_mask & (- i_mask);                   /* lower bit of the mask */
-    i_high = i_mask + i_low;                       /* higher bit of the mask */
-
-    /* Transform bits into an index */
-    i_low =  BinaryLog (i_low);
-    i_high = BinaryLog (i_high);
-
-    /* Update pointers and return */
-    *pi_left =   i_low;
-    *pi_right = (8 - i_high + i_low);
-}
-
 /*****************************************************************************
  * SetGammaTable: return intensity table transformed by gamma curve.
  *****************************************************************************
@@ -408,7 +510,7 @@ static void SetGammaTable( int *pi_table, double f_gamma )
     int         i_y;                                       /* base intensity */
 
     /* Use exp(gamma) instead of gamma */
-    f_gamma = exp(f_gamma );
+    f_gamma = exp( f_gamma );
 
     /* Build gamma table */
     for( i_y = 0; i_y < 256; i_y++ )
@@ -424,163 +526,237 @@ static void SetYUV( vout_thread_t *p_vout )
 {
     int         pi_gamma[256];                                /* gamma table */
     int         i_index;                                  /* index in tables */
-    int         i_red_right, i_red_left;                       /* red shifts */
-    int         i_green_right, i_green_left;                 /* green shifts */
-    int         i_blue_right, i_blue_left;                    /* blue shifts */
 
-    /* Build gamma table */    
+    /* Build gamma table */
     SetGammaTable( pi_gamma, p_vout->f_gamma );
-    
-    /*          
-     * Set color masks and shifts
-     */
-    switch( p_vout->i_screen_depth )
-    {
-    case 8:
-        MaskToShift( &i_red_right,   &i_red_left,   0xe0 );
-        MaskToShift( &i_green_right, &i_green_left, 0x1c );
-        MaskToShift( &i_blue_right,  &i_blue_left,  0x03 );        
-        break;        
-    case 15:
-        MaskToShift( &i_red_right,   &i_red_left,   0xf800 );
-        MaskToShift( &i_green_right, &i_green_left, 0x03e0 );
-        MaskToShift( &i_blue_right,  &i_blue_left,  0x001f );        
-        break;        
-    case 16:
-        MaskToShift( &i_red_right,   &i_red_left,   0xf800 );
-        MaskToShift( &i_green_right, &i_green_left, 0x07e0 );
-        MaskToShift( &i_blue_right,  &i_blue_left,  0x001f );
-        break;        
-    case 24:
-    case 32:        
-        MaskToShift( &i_red_right,   &i_red_left,   0x00ff0000 );
-        MaskToShift( &i_green_right, &i_green_left, 0x0000ff00 );
-        MaskToShift( &i_blue_right,  &i_blue_left,  0x000000ff );
-        break;
-#ifdef DEBUG
-    default:
-        intf_DbgMsg("error: invalid screen depth %d\n", p_vout->i_screen_depth );
-        break;        
-#endif      
-    }
 
     /*
      * Set pointers and build YUV tables
-     */        
+     */
     if( p_vout->b_grayscale )
     {
         /* Grayscale: build gray table */
-        switch( p_vout->i_screen_depth )
+        switch( p_vout->i_bytes_per_pixel )
         {
-        case 8:
-            p_vout->yuv.yuv.p_gray8 =  (u8 *)p_vout->yuv.p_base + GRAY_MARGIN;
-            break;        
-        case 15:
-        case 16:         
+        case 1:
+            {
+                u16 bright[256], transp[256];
+
+                p_vout->yuv.yuv.p_gray8 =  (u8 *)p_vout->yuv.p_base + GRAY_MARGIN;
+                for( i_index = 0; i_index < GRAY_MARGIN; i_index++ )
+                {
+                    p_vout->yuv.yuv.p_gray8[ -i_index ] =      RGB2PIXEL( p_vout, pi_gamma[0], pi_gamma[0], pi_gamma[0] );
+                    p_vout->yuv.yuv.p_gray8[ 256 + i_index ] = RGB2PIXEL( p_vout, pi_gamma[255], pi_gamma[255], pi_gamma[255] );
+                }
+                for( i_index = 0; i_index < 256; i_index++)
+                {
+                    p_vout->yuv.yuv.p_gray8[ i_index ] = pi_gamma[ i_index ];
+                    bright[ i_index ] = i_index << 8;
+                    transp[ i_index ] = 0;
+                }
+                /* the colors have been allocated, we can set the palette */
+                p_vout->p_set_palette( p_vout, bright, bright, bright, transp );
+                p_vout->i_white_pixel = 0xff;
+                p_vout->i_black_pixel = 0x00;
+                p_vout->i_gray_pixel = 0x44;
+                p_vout->i_blue_pixel = 0x3b;
+
+                break;
+            }
+        case 2:
             p_vout->yuv.yuv.p_gray16 =  (u16 *)p_vout->yuv.p_base + GRAY_MARGIN;
             for( i_index = 0; i_index < GRAY_MARGIN; i_index++ )
             {
-                p_vout->yuv.yuv.p_gray16[ -i_index ] = 
-                    ((pi_gamma[ 0 ] >> i_red_right)   << i_red_left)   |
-                    ((pi_gamma[ 0 ] >> i_green_right) << i_green_left) |
-                    ((pi_gamma[ 0 ] >> i_blue_right)  << i_blue_left);
-                p_vout->yuv.yuv.p_gray16[ 256 + i_index ] = 
-                    ((pi_gamma[ 255 ] >> i_red_right)   << i_red_left)   |
-                    ((pi_gamma[ 255 ] >> i_green_right) << i_green_left) |
-                    ((pi_gamma[ 255 ] >> i_blue_right)  << i_blue_left);
-            }            
-            for( i_index = 0; i_index < 256; i_index++) 
+                p_vout->yuv.yuv.p_gray16[ -i_index ] =      RGB2PIXEL( p_vout, pi_gamma[0], pi_gamma[0], pi_gamma[0] );
+                p_vout->yuv.yuv.p_gray16[ 256 + i_index ] = RGB2PIXEL( p_vout, pi_gamma[255], pi_gamma[255], pi_gamma[255] );
+            }
+            for( i_index = 0; i_index < 256; i_index++)
             {
-                p_vout->yuv.yuv.p_gray16[ i_index ] = 
-                    ((pi_gamma[ i_index ] >> i_red_right)   << i_red_left)   |
-                    ((pi_gamma[ i_index ] >> i_green_right) << i_green_left) |
-                    ((pi_gamma[ i_index ] >> i_blue_right)  << i_blue_left);
+                p_vout->yuv.yuv.p_gray16[ i_index ] = RGB2PIXEL( p_vout, pi_gamma[i_index], pi_gamma[i_index], pi_gamma[i_index] );
             }
-            break;        
-        case 24:
-        case 32:        
+            break;
+        case 3:
+        case 4:
             p_vout->yuv.yuv.p_gray32 =  (u32 *)p_vout->yuv.p_base + GRAY_MARGIN;
             for( i_index = 0; i_index < GRAY_MARGIN; i_index++ )
             {
-                p_vout->yuv.yuv.p_gray32[ -i_index ] = 
-                    ((pi_gamma[ 0 ] >> i_red_right)   << i_red_left)   |
-                    ((pi_gamma[ 0 ] >> i_green_right) << i_green_left) |
-                    ((pi_gamma[ 0 ] >> i_blue_right)  << i_blue_left);
-                p_vout->yuv.yuv.p_gray32[ 256 + i_index ] = 
-                    ((pi_gamma[ 255 ] >> i_red_right)   << i_red_left)   |
-                    ((pi_gamma[ 255 ] >> i_green_right) << i_green_left) |
-                    ((pi_gamma[ 255 ] >> i_blue_right)  << i_blue_left);
-            }            
-            for( i_index = 0; i_index < 256; i_index++) 
+                p_vout->yuv.yuv.p_gray32[ -i_index ] =      RGB2PIXEL( p_vout, pi_gamma[0], pi_gamma[0], pi_gamma[0] );
+                p_vout->yuv.yuv.p_gray32[ 256 + i_index ] = RGB2PIXEL( p_vout, pi_gamma[255], pi_gamma[255], pi_gamma[255] );
+            }
+            for( i_index = 0; i_index < 256; i_index++)
             {
-                p_vout->yuv.yuv.p_gray32[ i_index ] = 
-                    ((pi_gamma[ i_index ] >> i_red_right)   << i_red_left)   |
-                    ((pi_gamma[ i_index ] >> i_green_right) << i_green_left) |
-                    ((pi_gamma[ i_index ] >> i_blue_right)  << i_blue_left);
+                p_vout->yuv.yuv.p_gray32[ i_index ] = RGB2PIXEL( p_vout, pi_gamma[i_index], pi_gamma[i_index], pi_gamma[i_index] );
             }
-            break;        
+            break;
          }
     }
     else
     {
         /* Color: build red, green and blue tables */
-        switch( p_vout->i_screen_depth )
+        switch( p_vout->i_bytes_per_pixel )
         {
-        case 8:
-            p_vout->yuv.yuv.p_rgb8 = (u8 *)p_vout->yuv.p_base;
-            break;        
-        case 15:
-        case 16:            
+        case 1:
+            {
+                #define RGB_MIN 0
+                #define RGB_MAX 255
+                #define CLIP( x ) ( ((x < 0) ? 0 : (x > 255) ? 255 : x) << 8 )
+
+                int y,u,v;
+                int r,g,b;
+                int uvr, uvg, uvb;
+                int i = 0, j = 0;
+                u16 red[256], green[256], blue[256], transp[256];
+                unsigned char lookup[PALETTE_TABLE_SIZE];
+
+                p_vout->yuv.yuv.p_rgb8 = (u8 *)p_vout->yuv.p_base;
+
+                /* this loop calculates the intersection of an YUV box
+                 * and the RGB cube. */
+                for ( y = 0; y <= 256; y += 16 )
+                {
+                    for ( u = 0; u <= 256; u += 32 )
+                    for ( v = 0; v <= 256; v += 32 )
+                    {
+                        uvr = (V_RED_COEF*(v-128)) >> SHIFT;
+                        uvg = (U_GREEN_COEF*(u-128) + V_GREEN_COEF*(v-128)) >> SHIFT;
+                        uvb = (U_BLUE_COEF*(u-128)) >> SHIFT;
+                        r = y + uvr;
+                        g = y + uvg;
+                        b = y + uvb;
+
+                        if( r >= RGB_MIN && g >= RGB_MIN && b >= RGB_MIN
+                                && r <= RGB_MAX && g <= RGB_MAX && b <= RGB_MAX )
+                        {
+                            /* this one should never happen unless someone fscked up my code */
+                            if(j == 256) { intf_ErrMsg( "vout error: no colors left to build palette\n" ); break; }
+
+                            /* clip the colors */
+                            red[j] = CLIP( r );
+                            green[j] = CLIP( g );
+                            blue[j] = CLIP( b );
+                            transp[j] = 0;
+
+                            /* allocate color */
+                            lookup[i] = 1;
+                            p_vout->yuv.yuv.p_rgb8[i++] = j;
+                            j++;
+                        }
+                        else
+                        {
+                            lookup[i] = 0;
+                            p_vout->yuv.yuv.p_rgb8[i++] = 0;
+                        }
+                    }
+                    i += 128-81;
+                }
+
+                /* the colors have been allocated, we can set the palette */
+                /* there will eventually be a way to know which colors
+                 * couldn't be allocated and try to find a replacement */
+                p_vout->p_set_palette( p_vout, red, green, blue, transp );
+
+                p_vout->i_white_pixel = 0xff;
+                p_vout->i_black_pixel = 0x00;
+                p_vout->i_gray_pixel = 0x44;
+                p_vout->i_blue_pixel = 0x3b;
+
+                i = 0;
+                /* this loop allocates colors that got outside
+                 * the RGB cube */
+                for ( y = 0; y <= 256; y += 16 )
+                {
+                    for ( u = 0; u <= 256; u += 32 )
+                    for ( v = 0; v <= 256; v += 32 )
+                    {
+                        int u2, v2;
+                        int dist, mindist = 100000000;
+
+                        if( lookup[i] || y==0)
+                        {
+                            i++;
+                            continue;
+                        }
+
+                        /* heavy. yeah. */
+                        for( u2 = 0; u2 <= 256; u2 += 32 )
+                        for( v2 = 0; v2 <= 256; v2 += 32 )
+                        {
+                            j = ((y>>4)<<7) + (u2>>5)*9 + (v2>>5);
+                            dist = (u-u2)*(u-u2) + (v-v2)*(v-v2);
+                            if( lookup[j] )
+                            /* find the nearest color */
+                            if( dist < mindist )
+                            {
+                                p_vout->yuv.yuv.p_rgb8[i] = p_vout->yuv.yuv.p_rgb8[j];
+                                mindist = dist;
+                            }
+                            j -= 128;
+                            if( lookup[j] )
+                            /* find the nearest color */
+                            if( dist + 128 < mindist )
+                            {
+                                p_vout->yuv.yuv.p_rgb8[i] = p_vout->yuv.yuv.p_rgb8[j];
+                                mindist = dist + 128;
+                            }
+                        }
+                        i++;
+                    }
+                    i += 128-81;
+                }
+
+                break;
+            }
+        case 2:
             p_vout->yuv.yuv.p_rgb16 = (u16 *)p_vout->yuv.p_base;
             for( i_index = 0; i_index < RED_MARGIN; i_index++ )
             {
-                p_vout->yuv.yuv.p_rgb16[RED_OFFSET - RED_MARGIN + i_index] = (pi_gamma[0]>>i_red_right)<<i_red_left;
-                p_vout->yuv.yuv.p_rgb16[RED_OFFSET + 256 + i_index] = (pi_gamma[255]>>i_red_right)<<i_red_left;                
+                p_vout->yuv.yuv.p_rgb16[RED_OFFSET - RED_MARGIN + i_index] = RGB2PIXEL( p_vout, pi_gamma[0], 0, 0 );
+                p_vout->yuv.yuv.p_rgb16[RED_OFFSET + 256 + i_index] =        RGB2PIXEL( p_vout, pi_gamma[255], 0, 0 );
             }
             for( i_index = 0; i_index < GREEN_MARGIN; i_index++ )
             {
-                p_vout->yuv.yuv.p_rgb16[GREEN_OFFSET - GREEN_MARGIN + i_index] = (pi_gamma[0]>>i_green_right) <<i_green_left;
-                p_vout->yuv.yuv.p_rgb16[GREEN_OFFSET + 256 + i_index] = (pi_gamma[255]>>i_green_right)<<i_green_left;
+                p_vout->yuv.yuv.p_rgb16[GREEN_OFFSET - GREEN_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, pi_gamma[0], 0 );
+                p_vout->yuv.yuv.p_rgb16[GREEN_OFFSET + 256 + i_index] =          RGB2PIXEL( p_vout, 0, pi_gamma[255], 0 );
             }
             for( i_index = 0; i_index < BLUE_MARGIN; i_index++ )
             {
-                p_vout->yuv.yuv.p_rgb16[BLUE_OFFSET - BLUE_MARGIN + i_index] = (pi_gamma[0]>>i_blue_right)<<i_blue_left;
-                p_vout->yuv.yuv.p_rgb16[BLUE_OFFSET + BLUE_MARGIN + i_index] = (pi_gamma[255]>>i_blue_right)<<i_blue_left;                
+                p_vout->yuv.yuv.p_rgb16[BLUE_OFFSET - BLUE_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, 0, pi_gamma[0] );
+                p_vout->yuv.yuv.p_rgb16[BLUE_OFFSET + BLUE_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, 0, pi_gamma[255] );
             }
             for( i_index = 0; i_index < 256; i_index++ )
             {
-                p_vout->yuv.yuv.p_rgb16[RED_OFFSET + i_index] = (pi_gamma[i_index]>>i_red_right)<<i_red_left;
-                p_vout->yuv.yuv.p_rgb16[GREEN_OFFSET + i_index] = (pi_gamma[i_index]>>i_green_right)<<i_green_left;
-                p_vout->yuv.yuv.p_rgb16[BLUE_OFFSET + i_index] = (pi_gamma[i_index]>>i_blue_right)<<i_blue_left;
-            }            
-            break;        
-        case 24:
-        case 32:
+                p_vout->yuv.yuv.p_rgb16[RED_OFFSET + i_index] =   RGB2PIXEL( p_vout, pi_gamma[ i_index ], 0, 0 );
+                p_vout->yuv.yuv.p_rgb16[GREEN_OFFSET + i_index] = RGB2PIXEL( p_vout, 0, pi_gamma[ i_index ], 0 );
+                p_vout->yuv.yuv.p_rgb16[BLUE_OFFSET + i_index] =  RGB2PIXEL( p_vout, 0, 0, pi_gamma[ i_index ] );
+            }
+            break;
+        case 3:
+        case 4:
             p_vout->yuv.yuv.p_rgb32 = (u32 *)p_vout->yuv.p_base;
             for( i_index = 0; i_index < RED_MARGIN; i_index++ )
             {
-                p_vout->yuv.yuv.p_rgb32[RED_OFFSET - RED_MARGIN + i_index] = (pi_gamma[0]>>i_red_right)<<i_red_left;
-                p_vout->yuv.yuv.p_rgb32[RED_OFFSET + 256 + i_index] = (pi_gamma[255]>>i_red_right)<<i_red_left;                
+                p_vout->yuv.yuv.p_rgb32[RED_OFFSET - RED_MARGIN + i_index] = RGB2PIXEL( p_vout, pi_gamma[0], 0, 0 );
+                p_vout->yuv.yuv.p_rgb32[RED_OFFSET + 256 + i_index] =        RGB2PIXEL( p_vout, pi_gamma[255], 0, 0 );
             }
             for( i_index = 0; i_index < GREEN_MARGIN; i_index++ )
             {
-                p_vout->yuv.yuv.p_rgb32[GREEN_OFFSET - GREEN_MARGIN + i_index] = (pi_gamma[0]>>i_green_right)<<i_green_left;
-                p_vout->yuv.yuv.p_rgb32[GREEN_OFFSET + 256 + i_index] = (pi_gamma[255]>>i_green_right)<<i_green_left;
+                p_vout->yuv.yuv.p_rgb32[GREEN_OFFSET - GREEN_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, pi_gamma[0], 0 );
+                p_vout->yuv.yuv.p_rgb32[GREEN_OFFSET + 256 + i_index] =          RGB2PIXEL( p_vout, 0, pi_gamma[255], 0 );
             }
             for( i_index = 0; i_index < BLUE_MARGIN; i_index++ )
             {
-                p_vout->yuv.yuv.p_rgb32[BLUE_OFFSET - BLUE_MARGIN + i_index] = (pi_gamma[0]>>i_blue_right)<<i_blue_left;
-                p_vout->yuv.yuv.p_rgb32[BLUE_OFFSET + BLUE_MARGIN + i_index] = (pi_gamma[255]>>i_blue_right)<<i_blue_left;                
+                p_vout->yuv.yuv.p_rgb32[BLUE_OFFSET - BLUE_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, 0, pi_gamma[0] );
+                p_vout->yuv.yuv.p_rgb32[BLUE_OFFSET + BLUE_MARGIN + i_index] = RGB2PIXEL( p_vout, 0, 0, pi_gamma[255] );
             }
             for( i_index = 0; i_index < 256; i_index++ )
             {
-                p_vout->yuv.yuv.p_rgb32[RED_OFFSET + i_index] = (pi_gamma[i_index]>>i_red_right)<<i_red_left;
-                p_vout->yuv.yuv.p_rgb32[GREEN_OFFSET + i_index] = (pi_gamma[i_index]>>i_green_right)<<i_green_left;
-                p_vout->yuv.yuv.p_rgb32[BLUE_OFFSET + i_index] = (pi_gamma[i_index]>>i_blue_right)<<i_blue_left;
-            }            
-            break;        
+                p_vout->yuv.yuv.p_rgb32[RED_OFFSET + i_index] =   RGB2PIXEL( p_vout, pi_gamma[ i_index ], 0, 0 );
+                p_vout->yuv.yuv.p_rgb32[GREEN_OFFSET + i_index] = RGB2PIXEL( p_vout, 0, pi_gamma[ i_index ], 0 );
+                p_vout->yuv.yuv.p_rgb32[BLUE_OFFSET + i_index] =  RGB2PIXEL( p_vout, 0, 0, pi_gamma[ i_index ] );
+            }
+            break;
         }
-    }    
+    }
 
     /*
      * Set functions pointers
@@ -588,90 +764,88 @@ static void SetYUV( vout_thread_t *p_vout )
     if( p_vout->b_grayscale )
     {
         /* Grayscale */
-        switch( p_vout->i_screen_depth )
+        switch( p_vout->i_bytes_per_pixel )
         {
-        case 8:
-            p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray8;        
-            p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray8;        
-            p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray8;        
-            break;        
-        case 15:
-        case 16:  
-            p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray16;        
-            p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray16;        
-            p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray16;        
-            break;        
-        case 24:
-            p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray24;        
-            p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray24;        
-            p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray24;        
-            break;        
-        case 32:        
-            p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray32;        
-            p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray32;        
-            p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray32;        
-            break;        
-        }        
+        case 1:
+            p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray8;
+            p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray8;
+            p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray8;
+            break;
+        case 2:
+            p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray16;
+            p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray16;
+            p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray16;
+            break;
+        case 3:
+            p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray24;
+            p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray24;
+            p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray24;
+            break;
+        case 4:
+            p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertY4Gray32;
+            p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertY4Gray32;
+            p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertY4Gray32;
+            break;
+        }
     }
     else
     {
         /* Color */
-        switch( p_vout->i_screen_depth )
+        switch( p_vout->i_bytes_per_pixel )
         {
-        case 8:
+        case 1:
             p_vout->yuv.p_Convert420 = (vout_yuv_convert_t *) ConvertYUV420RGB8;
             p_vout->yuv.p_Convert422 = (vout_yuv_convert_t *) ConvertYUV422RGB8;
             p_vout->yuv.p_Convert444 = (vout_yuv_convert_t *) ConvertYUV444RGB8;
-            break;        
-        case 15:
-        case 16:  
-            p_vout->yuv.p_Convert420 =   (vout_yuv_convert_t *) ConvertYUV420RGB16;        
-            p_vout->yuv.p_Convert422 =   (vout_yuv_convert_t *) ConvertYUV422RGB16;        
-            p_vout->yuv.p_Convert444 =   (vout_yuv_convert_t *) ConvertYUV444RGB16;        
-            break;        
-        case 24:
-            p_vout->yuv.p_Convert420 =   (vout_yuv_convert_t *) ConvertYUV420RGB24;        
-            p_vout->yuv.p_Convert422 =   (vout_yuv_convert_t *) ConvertYUV422RGB24;        
-            p_vout->yuv.p_Convert444 =   (vout_yuv_convert_t *) ConvertYUV444RGB24;        
-            break;        
-        case 32:        
-            p_vout->yuv.p_Convert420 =   (vout_yuv_convert_t *) ConvertYUV420RGB32;        
-            p_vout->yuv.p_Convert422 =   (vout_yuv_convert_t *) ConvertYUV422RGB32;        
-            p_vout->yuv.p_Convert444 =   (vout_yuv_convert_t *) ConvertYUV444RGB32;        
-            break;        
+            break;
+        case 2:
+            p_vout->yuv.p_Convert420 =   (vout_yuv_convert_t *) ConvertYUV420RGB16;
+            p_vout->yuv.p_Convert422 =   (vout_yuv_convert_t *) ConvertYUV422RGB16;
+            p_vout->yuv.p_Convert444 =   (vout_yuv_convert_t *) ConvertYUV444RGB16;
+            break;
+        case 3:
+            p_vout->yuv.p_Convert420 =   (vout_yuv_convert_t *) ConvertYUV420RGB24;
+            p_vout->yuv.p_Convert422 =   (vout_yuv_convert_t *) ConvertYUV422RGB24;
+            p_vout->yuv.p_Convert444 =   (vout_yuv_convert_t *) ConvertYUV444RGB24;
+            break;
+        case 4:
+            p_vout->yuv.p_Convert420 =   (vout_yuv_convert_t *) ConvertYUV420RGB32;
+            p_vout->yuv.p_Convert422 =   (vout_yuv_convert_t *) ConvertYUV422RGB32;
+            p_vout->yuv.p_Convert444 =   (vout_yuv_convert_t *) ConvertYUV444RGB32;
+            break;
         }
-    }        
+    }
 }
 
 /*****************************************************************************
- * SetOffset: build offset array for convertion functions
+ * SetOffset: build offset array for conversion functions
  *****************************************************************************
- * This function will build an offset array used in later convertion functions.
+ * This function will build an offset array used in later conversion functions.
  * It will also set horizontal and vertical scaling indicators.
  *****************************************************************************/
-static void SetOffset( int i_width, int i_height, int i_pic_width, int i_pic_height, 
+static void SetOffset( int i_width, int i_height, int i_pic_width, int i_pic_height,
                        boolean_t *pb_h_scaling, int *pi_v_scaling, int *p_offset )
-{    
+{
     int i_x;                                    /* x position in destination */
     int i_scale_count;                                     /* modulo counter */
 
     /*
      * Prepare horizontal offset array
-     */      
+     */
     if( i_pic_width - i_width > 0 )
     {
         /* Prepare scaling array for horizontal extension */
-        *pb_h_scaling =  1;   
+        *pb_h_scaling =  1;
         i_scale_count =         i_pic_width;
         for( i_x = i_width; i_x--; )
         {
             while( (i_scale_count -= i_width) > 0 )
             {
-                *p_offset++ = 0;                
+                *p_offset++ = 0;
             }
-            *p_offset++ = 1;            
-            i_scale_count += i_pic_width;            
-        }        
+            *p_offset++ = 1;
+            i_scale_count += i_pic_width;
+        }
     }
     else if( i_pic_width - i_width < 0 )
     {
@@ -680,19 +854,19 @@ static void SetOffset( int i_width, int i_height, int i_pic_width, int i_pic_hei
         i_scale_count =         i_pic_width;
         for( i_x = i_pic_width; i_x--; )
         {
-            *p_offset = 1;            
+            *p_offset = 1;
             while( (i_scale_count -= i_pic_width) >= 0 )
-            {                
-                *p_offset += 1;                
+            {
+                *p_offset += 1;
             }
             p_offset++;
             i_scale_count += i_width;
-        }        
+        }
     }
     else
     {
-        /* No horizontal scaling: YUV convertion is done directly to picture */          
-        *pb_h_scaling = 0;        
+        /* No horizontal scaling: YUV conversion is done directly to picture */
+        *pb_h_scaling = 0;
     }
 
     /*
@@ -700,15 +874,15 @@ static void SetOffset( int i_width, int i_height, int i_pic_width, int i_pic_hei
      */
     if( i_pic_height - i_height > 0 )
     {
-        *pi_v_scaling = 1;        
+        *pi_v_scaling = 1;
     }
     else if( i_pic_height - i_height < 0 )
     {
-        *pi_v_scaling = -1;        
+        *pi_v_scaling = -1;
     }
     else
     {
-        *pi_v_scaling = 0;        
+        *pi_v_scaling = 0;
     }
 }
 
@@ -725,25 +899,25 @@ static void ConvertY4Gray8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y,
     int         i_x, i_y;                 /* horizontal and vertical indexes */
     int         i_scale_count;                       /* scale modulo counter */
     int         i_chroma_width;                    /* chroma width, not used */
-    u8 *        p_gray;                             /* base convertion table */
+    u8 *        p_gray;                             /* base conversion table */
     u8 *        p_pic_start;       /* beginning of the current line for copy */
-    u8 *        p_buffer_start;                   /* convertion buffer start */
-    u8 *        p_buffer;                       /* convertion buffer pointer */
+    u8 *        p_buffer_start;                   /* conversion buffer start */
+    u8 *        p_buffer;                       /* conversion buffer pointer */
     int *       p_offset_start;                        /* offset array start */
     int *       p_offset;                            /* offset array pointer */
-    
-    /* 
-     * Initialize some values  - i_pic_line_width will store the line skip 
+
+    /*
+     * Initialize some values  - i_pic_line_width will store the line skip
      */
-    i_pic_line_width -= i_pic_width;                                            
-    p_gray =            p_vout->yuv.yuv.p_gray8;    
-    p_buffer_start =    p_vout->yuv.p_buffer;                                   
-    p_offset_start =    p_vout->yuv.p_offset;                                   
-    SetOffset( i_width, i_height, i_pic_width, i_pic_height, 
+    i_pic_line_width -= i_pic_width;
+    p_gray =            p_vout->yuv.yuv.p_gray8;
+    p_buffer_start =    p_vout->yuv.p_buffer;
+    p_offset_start =    p_vout->yuv.p_offset;
+    SetOffset( i_width, i_height, i_pic_width, i_pic_height,
                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
 
     /*
-     * Perform convertion
+     * Perform conversion
      */
     i_scale_count = i_pic_height;
     for( i_y = 0; i_y < i_height; i_y++ )
@@ -751,38 +925,30 @@ static void ConvertY4Gray8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y,
         /* Mark beginnning of line for possible later line copy, and initialize
          * buffer */
         p_pic_start =   p_pic;
-        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;        
+        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
 
-        /* Do YUV convertion to buffer - YUV picture is always formed of 16
+        /* Do YUV conversion to buffer - YUV picture is always formed of 16
          * pixels wide blocks */
         for( i_x = i_width / 16; i_x--;  )
         {
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-        }             
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+        }
 
         /* Do horizontal and vertical scaling */
         SCALE_WIDTH;
-        SCALE_HEIGHT(400);        
+        SCALE_HEIGHT(400, 1);
     }
 }
 
 /*****************************************************************************
- * ConvertY4Gray16: grayscale YUV 4:x:x to RGB 15 or 16 bpp
+ * ConvertY4Gray16: grayscale YUV 4:x:x to RGB 2 Bpp
  *****************************************************************************/
 static void ConvertY4Gray16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
                              int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
@@ -793,25 +959,25 @@ static void ConvertY4Gray16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y
     int         i_x, i_y;                 /* horizontal and vertical indexes */
     int         i_scale_count;                       /* scale modulo counter */
     int         i_chroma_width;                    /* chroma width, not used */
-    u16 *       p_gray;                             /* base convertion table */
+    u16 *       p_gray;                             /* base conversion table */
     u16 *       p_pic_start;       /* beginning of the current line for copy */
-    u16 *       p_buffer_start;                   /* convertion buffer start */
-    u16 *       p_buffer;                       /* convertion buffer pointer */
+    u16 *       p_buffer_start;                   /* conversion buffer start */
+    u16 *       p_buffer;                       /* conversion buffer pointer */
     int *       p_offset_start;                        /* offset array start */
     int *       p_offset;                            /* offset array pointer */
-    
-    /* 
-     * Initialize some values  - i_pic_line_width will store the line skip 
+
+    /*
+     * Initialize some values  - i_pic_line_width will store the line skip
      */
-    i_pic_line_width -= i_pic_width;                                            
-    p_gray =            p_vout->yuv.yuv.p_gray16;    
-    p_buffer_start =    p_vout->yuv.p_buffer;                                   
-    p_offset_start =    p_vout->yuv.p_offset;                                   
-    SetOffset( i_width, i_height, i_pic_width, i_pic_height, 
+    i_pic_line_width -= i_pic_width;
+    p_gray =            p_vout->yuv.yuv.p_gray16;
+    p_buffer_start =    p_vout->yuv.p_buffer;
+    p_offset_start =    p_vout->yuv.p_offset;
+    SetOffset( i_width, i_height, i_pic_width, i_pic_height,
                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
 
     /*
-     * Perform convertion
+     * Perform conversion
      */
     i_scale_count = i_pic_height;
     for( i_y = 0; i_y < i_height; i_y++ )
@@ -819,48 +985,40 @@ static void ConvertY4Gray16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y
         /* Mark beginnning of line for possible later line copy, and initialize
          * buffer */
         p_pic_start =   p_pic;
-        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;        
+        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
 
-        /* Do YUV convertion to buffer - YUV picture is always formed of 16
+        /* Do YUV conversion to buffer - YUV picture is always formed of 16
          * pixels wide blocks */
         for( i_x = i_width / 16; i_x--;  )
         {
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-        }             
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+        }
 
         /* Do horizontal and vertical scaling */
         SCALE_WIDTH;
-        SCALE_HEIGHT(400);        
+        SCALE_HEIGHT(400, 2);
     }
 }
 
 /*****************************************************************************
- * ConvertY4Gray24: grayscale YUV 4:x:x to RGB 24 bpp
+ * ConvertY4Gray24: grayscale YUV 4:x:x to RGB 3 Bpp
  *****************************************************************************/
 static void ConvertY4Gray24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
                              int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
                              int i_matrix_coefficients )
 {
-    //??
+    /* XXX?? */
 }
 
 /*****************************************************************************
- * ConvertY4Gray32: grayscale YUV 4:x:x to RGB 32 bpp
+ * ConvertY4Gray32: grayscale YUV 4:x:x to RGB 4 Bpp
  *****************************************************************************/
 static void ConvertY4Gray32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
                              int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
@@ -871,25 +1029,25 @@ static void ConvertY4Gray32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y
     int         i_x, i_y;                 /* horizontal and vertical indexes */
     int         i_scale_count;                       /* scale modulo counter */
     int         i_chroma_width;                    /* chroma width, not used */
-    u32 *       p_gray;                             /* base convertion table */
+    u32 *       p_gray;                             /* base conversion table */
     u32 *       p_pic_start;       /* beginning of the current line for copy */
-    u32 *       p_buffer_start;                   /* convertion buffer start */
-    u32 *       p_buffer;                       /* convertion buffer pointer */
+    u32 *       p_buffer_start;                   /* conversion buffer start */
+    u32 *       p_buffer;                       /* conversion buffer pointer */
     int *       p_offset_start;                        /* offset array start */
     int *       p_offset;                            /* offset array pointer */
-    
-    /* 
-     * Initialize some values  - i_pic_line_width will store the line skip 
+
+    /*
+     * Initialize some values  - i_pic_line_width will store the line skip
      */
-    i_pic_line_width -= i_pic_width;                                            
-    p_gray =            p_vout->yuv.yuv.p_gray32;    
-    p_buffer_start =    p_vout->yuv.p_buffer;                                   
-    p_offset_start =    p_vout->yuv.p_offset;                                   
-    SetOffset( i_width, i_height, i_pic_width, i_pic_height, 
+    i_pic_line_width -= i_pic_width;
+    p_gray =            p_vout->yuv.yuv.p_gray32;
+    p_buffer_start =    p_vout->yuv.p_buffer;
+    p_offset_start =    p_vout->yuv.p_offset;
+    SetOffset( i_width, i_height, i_pic_width, i_pic_height,
                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
 
     /*
-     * Perform convertion
+     * Perform conversion
      */
     i_scale_count = i_pic_height;
     for( i_y = 0; i_y < i_height; i_y++ )
@@ -897,33 +1055,25 @@ static void ConvertY4Gray32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y
         /* Mark beginnning of line for possible later line copy, and initialize
          * buffer */
         p_pic_start =   p_pic;
-        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;        
+        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
 
-        /* Do YUV convertion to buffer - YUV picture is always formed of 16
+        /* Do YUV conversion to buffer - YUV picture is always formed of 16
          * pixels wide blocks */
         for( i_x = i_width / 16; i_x--;  )
         {
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-            *p_buffer++ = p_gray[ *p_y++ ];
-        }             
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+            *p_buffer++ = p_gray[ *p_y++ ]; *p_buffer++ = p_gray[ *p_y++ ];
+        }
 
         /* Do horizontal and vertical scaling */
         SCALE_WIDTH;
-        SCALE_HEIGHT(400);        
+        SCALE_HEIGHT(400, 4);
     }
 }
 
@@ -938,56 +1088,66 @@ static void ConvertYUV420RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_
     int         i_vertical_scaling;                 /* vertical scaling type */
     int         i_x, i_y;                 /* horizontal and vertical indexes */
     int         i_scale_count;                       /* scale modulo counter */
-    int         i_uval, i_vval;                           /* U and V samples */
-    int         i_red, i_green, i_blue;          /* U and V modified samples */
+    int         b_jump_uv;                       /* should we jump u and v ? */
+    int         i_real_y;                                           /* y % 4 */
+    u8 *        p_lookup;                                    /* lookup table */
     int         i_chroma_width;                              /* chroma width */
-    u8 *        p_yuv;                              /* base convertion table */
-    u8 *        p_ybase;                     /* Y dependant convertion table */
-    u8 *        p_pic_start;       /* beginning of the current line for copy */
-    u8 *        p_buffer_start;                   /* convertion buffer start */
-    u8 *        p_buffer;                       /* convertion buffer pointer */
     int *       p_offset_start;                        /* offset array start */
     int *       p_offset;                            /* offset array pointer */
-    
-    /* 
-     * Initialize some values  - i_pic_line_width will store the line skip 
+
+#ifdef NODITHER
+    int dither10[4] = {  0x7,  0x8,  0x7,  0x8 };
+    int dither11[4] = {  0x8,  0x7,  0x8,  0x7 };
+    int dither12[4] = {  0x7,  0x8,  0x7,  0x8 };
+    int dither13[4] = {  0x8,  0x7,  0x8,  0x7 };
+
+    int dither20[4] = {  0xf, 0x10,  0xf, 0x10 };
+    int dither21[4] = { 0x10,  0xf, 0x10,  0xf };
+    int dither22[4] = {  0xf, 0x10,  0xf, 0x10 };
+    int dither23[4] = { 0x10,  0xf, 0x10,  0xf };
+#else
+    int dither10[4] = {  0x0,  0x8,  0x2,  0xa };
+    int dither11[4] = {  0xc,  0x4,  0xe,  0x6 };
+    int dither12[4] = {  0x3,  0xb,  0x1,  0x9 };
+    int dither13[4] = {  0xf,  0x7,  0xd,  0x5 };
+
+    int dither20[4] = {  0x0, 0x10,  0x4, 0x14 };
+    int dither21[4] = { 0x18,  0x8, 0x1c,  0xc };
+    int dither22[4] = {  0x6, 0x16,  0x2, 0x12 };
+    int dither23[4] = { 0x1e,  0xe, 0x1a,  0xa };
+#endif
+
+    /* some other matrices that can be interesting, either for debugging
+     *  or for effects :
+     *  
+     * { { 0, 8, 2, 10 }, { 12, 4, 14, 16 }, { 3, 11, 1, 9}, {15, 7, 13, 5} }
+     * { { 7, 8, 0, 15 }, { 0, 15, 8, 7 }, { 7, 0, 15, 8 }, { 15, 7, 8, 0 } }
+     * { { 0, 15, 0, 15 }, { 15, 0, 15, 0 }, { 0, 15, 0, 15 }, { 15, 0, 15, 0 } }
+     * { { 15, 15, 0, 0 }, { 15, 15, 0, 0 }, { 0, 0, 15, 15 }, { 0, 0, 15, 15 } }
+     * { { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 } }
+     * { { 0, 1, 2, 3 }, { 4, 5, 6, 7 }, { 8, 9, 10, 11 }, { 12, 13, 14, 15 } }
+     */
+
+    /*
+     * Initialize some values  - i_pic_line_width will store the line skip
      */
     i_pic_line_width -= i_pic_width;
     i_chroma_width =    i_width / 2;
-    p_yuv =             p_vout->yuv.yuv.p_rgb8;
-    p_buffer_start =    p_vout->yuv.p_buffer;        
-    p_offset_start =    p_vout->yuv.p_offset;                    
-    SetOffset( i_width, i_height, i_pic_width, i_pic_height, 
+    p_offset_start =    p_vout->yuv.p_offset;
+    p_lookup =          p_vout->yuv.p_base;
+    SetOffset( i_width, i_height, i_pic_width, i_pic_height,
                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
 
     /*
-     * Perform convertion
+     * Perform conversion
      */
     i_scale_count = i_pic_height;
+    i_real_y = 0;
     for( i_y = 0; i_y < i_height; i_y++ )
     {
-        /* Mark beginnning of line for possible later line copy, and initialize
-         * buffer */
-        p_pic_start =   p_pic;
-        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;        
-
-        /* Do YUV convertion to buffer - YUV picture is always formed of 16
-         * pixels wide blocks */
-        for( i_x = i_width / 16; i_x--;  )
-        {
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-        }             
-
         /* Do horizontal and vertical scaling */
-        SCALE_WIDTH;
-        SCALE_HEIGHT(420);        
+        SCALE_WIDTH_DITHER( 420 );
+        SCALE_HEIGHT_DITHER( 420 );
     }
 }
 
@@ -1005,27 +1165,27 @@ static void ConvertYUV422RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_
     int         i_uval, i_vval;                           /* U and V samples */
     int         i_red, i_green, i_blue;          /* U and V modified samples */
     int         i_chroma_width;                              /* chroma width */
-    u8 *        p_yuv;                              /* base convertion table */
-    u8 *        p_ybase;                     /* Y dependant convertion table */
+    u8 *        p_yuv;                              /* base conversion table */
+    u8 *        p_ybase;                     /* Y dependant conversion table */
     u8 *        p_pic_start;       /* beginning of the current line for copy */
-    u8 *        p_buffer_start;                   /* convertion buffer start */
-    u8 *        p_buffer;                       /* convertion buffer pointer */
+    u8 *        p_buffer_start;                   /* conversion buffer start */
+    u8 *        p_buffer;                       /* conversion buffer pointer */
     int *       p_offset_start;                        /* offset array start */
     int *       p_offset;                            /* offset array pointer */
-    
-    /* 
-     * Initialize some values  - i_pic_line_width will store the line skip 
+
+    /*
+     * Initialize some values  - i_pic_line_width will store the line skip
      */
     i_pic_line_width -= i_pic_width;
     i_chroma_width =    i_width / 2;
     p_yuv =             p_vout->yuv.yuv.p_rgb8;
-    p_buffer_start =    p_vout->yuv.p_buffer;        
-    p_offset_start =    p_vout->yuv.p_offset;                    
-    SetOffset( i_width, i_height, i_pic_width, i_pic_height, 
+    p_buffer_start =    p_vout->yuv.p_buffer;
+    p_offset_start =    p_vout->yuv.p_offset;
+    SetOffset( i_width, i_height, i_pic_width, i_pic_height,
                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
 
     /*
-     * Perform convertion
+     * Perform conversion
      */
     i_scale_count = i_pic_height;
     for( i_y = 0; i_y < i_height; i_y++ )
@@ -1033,25 +1193,25 @@ static void ConvertYUV422RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_
         /* Mark beginnning of line for possible later line copy, and initialize
          * buffer */
         p_pic_start =   p_pic;
-        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;        
+        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
 
-        /* Do YUV convertion to buffer - YUV picture is always formed of 16
+        /* Do YUV conversion to buffer - YUV picture is always formed of 16
          * pixels wide blocks */
         for( i_x = i_width / 16; i_x--;  )
         {
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-        }             
+            CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
+            CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
+            CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
+            CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
+            CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
+            CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
+            CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
+            CONVERT_YUV_PIXEL(1);  CONVERT_Y_PIXEL(1);
+        }
 
         /* Do horizontal and vertical scaling */
         SCALE_WIDTH;
-        SCALE_HEIGHT(422);        
+        SCALE_HEIGHT(422, 1);
     }
 }
 
@@ -1069,26 +1229,26 @@ static void ConvertYUV444RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_
     int         i_uval, i_vval;                           /* U and V samples */
     int         i_red, i_green, i_blue;          /* U and V modified samples */
     int         i_chroma_width;                    /* chroma width, not used */
-    u8 *        p_yuv;                              /* base convertion table */
-    u8 *        p_ybase;                     /* Y dependant convertion table */
+    u8 *        p_yuv;                              /* base conversion table */
+    u8 *        p_ybase;                     /* Y dependant conversion table */
     u8 *        p_pic_start;       /* beginning of the current line for copy */
-    u8 *        p_buffer_start;                   /* convertion buffer start */
-    u8 *        p_buffer;                       /* convertion buffer pointer */
+    u8 *        p_buffer_start;                   /* conversion buffer start */
+    u8 *        p_buffer;                       /* conversion buffer pointer */
     int *       p_offset_start;                        /* offset array start */
     int *       p_offset;                            /* offset array pointer */
-    
-    /* 
-     * Initialize some values  - i_pic_line_width will store the line skip 
+
+    /*
+     * Initialize some values  - i_pic_line_width will store the line skip
      */
     i_pic_line_width -= i_pic_width;
     p_yuv =             p_vout->yuv.yuv.p_rgb8;
-    p_buffer_start =    p_vout->yuv.p_buffer;        
-    p_offset_start =    p_vout->yuv.p_offset;                    
-    SetOffset( i_width, i_height, i_pic_width, i_pic_height, 
+    p_buffer_start =    p_vout->yuv.p_buffer;
+    p_offset_start =    p_vout->yuv.p_offset;
+    SetOffset( i_width, i_height, i_pic_width, i_pic_height,
                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
 
     /*
-     * Perform convertion
+     * Perform conversion
      */
     i_scale_count = i_pic_height;
     for( i_y = 0; i_y < i_height; i_y++ )
@@ -1096,46 +1256,47 @@ static void ConvertYUV444RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_
         /* Mark beginnning of line for possible later line copy, and initialize
          * buffer */
         p_pic_start =   p_pic;
-        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;        
+        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
 
-        /* Do YUV convertion to buffer - YUV picture is always formed of 16
+        /* Do YUV conversion to buffer - YUV picture is always formed of 16
          * pixels wide blocks */
         for( i_x = i_width / 16; i_x--;  )
         {
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-        }             
+            CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
+            CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
+            CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
+            CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
+            CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
+            CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
+            CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
+            CONVERT_YUV_PIXEL(1);  CONVERT_YUV_PIXEL(1);
+        }
 
         /* Do horizontal and vertical scaling */
         SCALE_WIDTH;
-        SCALE_HEIGHT(444);        
+        SCALE_HEIGHT(444, 1);
     }
 }
 
 /*****************************************************************************
- * ConvertYUV420RGB16: color YUV 4:2:0 to RGB 15 or 16 bpp
+ * ConvertYUV420RGB16: color YUV 4:2:0 to RGB 2 Bpp
  *****************************************************************************/
 static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
                                 int i_matrix_coefficients )
 {
-/* MMX version */
-  //  int                 i_chroma_width, i_chroma_skip;      /* width and eol for chroma */
-/*
+#if 0
+    /* MMX version */
+    int                 i_chroma_width, i_chroma_skip;      /* width and eol for chroma */
+
     i_chroma_width =    i_width / 2;
     i_chroma_skip =     i_skip / 2;
-    ConvertYUV420RGB16MMX( p_y, p_u, p_v, i_width, i_height, 
-                           (i_width + i_skip) * sizeof( yuv_data_t ), 
+    ConvertYUV420RGB16MMX( p_y, p_u, p_v, i_width, i_height,
+                           (i_width + i_skip) * sizeof( yuv_data_t ),
                            (i_chroma_width + i_chroma_skip) * sizeof( yuv_data_t),
                            i_scale, (u8 *)p_pic, 0, 0, (i_width + i_pic_eol) * sizeof( u16 ),
-                           p_vout->i_screen_depth == 15 );    
-*/
+                           p_vout->i_screen_depth == 15 );
+#endif
     boolean_t   b_horizontal_scaling;             /* horizontal scaling type */
     int         i_vertical_scaling;                 /* vertical scaling type */
     int         i_x, i_y;                 /* horizontal and vertical indexes */
@@ -1143,27 +1304,27 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
     int         i_uval, i_vval;                           /* U and V samples */
     int         i_red, i_green, i_blue;          /* U and V modified samples */
     int         i_chroma_width;                              /* chroma width */
-    u16 *       p_yuv;                              /* base convertion table */
-    u16 *       p_ybase;                     /* Y dependant convertion table */
+    u16 *       p_yuv;                              /* base conversion table */
+    u16 *       p_ybase;                     /* Y dependant conversion table */
     u16 *       p_pic_start;       /* beginning of the current line for copy */
-    u16 *       p_buffer_start;                   /* convertion buffer start */
-    u16 *       p_buffer;                       /* convertion buffer pointer */
+    u16 *       p_buffer_start;                   /* conversion buffer start */
+    u16 *       p_buffer;                       /* conversion buffer pointer */
     int *       p_offset_start;                        /* offset array start */
     int *       p_offset;                            /* offset array pointer */
-    
-    /* 
-     * Initialize some values  - i_pic_line_width will store the line skip 
+
+    /*
+     * Initialize some values  - i_pic_line_width will store the line skip
      */
     i_pic_line_width -= i_pic_width;
     i_chroma_width =    i_width / 2;
     p_yuv =             p_vout->yuv.yuv.p_rgb16;
-    p_buffer_start =    p_vout->yuv.p_buffer;        
-    p_offset_start =    p_vout->yuv.p_offset;                    
-    SetOffset( i_width, i_height, i_pic_width, i_pic_height, 
+    p_buffer_start =    p_vout->yuv.p_buffer;
+    p_offset_start =    p_vout->yuv.p_offset;
+    SetOffset( i_width, i_height, i_pic_width, i_pic_height,
                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
 
     /*
-     * Perform convertion
+     * Perform conversion
      */
     i_scale_count = i_pic_height;
     for( i_y = 0; i_y < i_height; i_y++ )
@@ -1171,30 +1332,30 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
         /* Mark beginnning of line for possible later line copy, and initialize
          * buffer */
         p_pic_start =   p_pic;
-        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;        
+        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
 
-        /* Do YUV convertion to buffer - YUV picture is always formed of 16
+        /* Do YUV conversion to buffer - YUV picture is always formed of 16
          * pixels wide blocks */
         for( i_x = i_width / 16; i_x--;  )
         {
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-        }             
+            CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
+        }
 
         /* Do horizontal and vertical scaling */
         SCALE_WIDTH;
-        SCALE_HEIGHT(420);        
+        SCALE_HEIGHT(420, 2);
     }
 }
 
 /*****************************************************************************
- * ConvertYUV422RGB16: color YUV 4:2:2 to RGB 15 or 16 bpp
+ * ConvertYUV422RGB16: color YUV 4:2:2 to RGB 2 Bpp
  *****************************************************************************/
 static void ConvertYUV422RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
@@ -1207,27 +1368,27 @@ static void ConvertYUV422RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
     int         i_uval, i_vval;                           /* U and V samples */
     int         i_red, i_green, i_blue;          /* U and V modified samples */
     int         i_chroma_width;                              /* chroma width */
-    u16 *       p_yuv;                              /* base convertion table */
-    u16 *       p_ybase;                     /* Y dependant convertion table */
+    u16 *       p_yuv;                              /* base conversion table */
+    u16 *       p_ybase;                     /* Y dependant conversion table */
     u16 *       p_pic_start;       /* beginning of the current line for copy */
-    u16 *       p_buffer_start;                   /* convertion buffer start */
-    u16 *       p_buffer;                       /* convertion buffer pointer */
+    u16 *       p_buffer_start;                   /* conversion buffer start */
+    u16 *       p_buffer;                       /* conversion buffer pointer */
     int *       p_offset_start;                        /* offset array start */
     int *       p_offset;                            /* offset array pointer */
-    
-    /* 
-     * Initialize some values  - i_pic_line_width will store the line skip 
+
+    /*
+     * Initialize some values  - i_pic_line_width will store the line skip
      */
     i_pic_line_width -= i_pic_width;
     i_chroma_width =    i_width / 2;
     p_yuv =             p_vout->yuv.yuv.p_rgb16;
-    p_buffer_start =    p_vout->yuv.p_buffer;        
-    p_offset_start =    p_vout->yuv.p_offset;                    
-    SetOffset( i_width, i_height, i_pic_width, i_pic_height, 
+    p_buffer_start =    p_vout->yuv.p_buffer;
+    p_offset_start =    p_vout->yuv.p_offset;
+    SetOffset( i_width, i_height, i_pic_width, i_pic_height,
                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
 
     /*
-     * Perform convertion
+     * Perform conversion
      */
     i_scale_count = i_pic_height;
     for( i_y = 0; i_y < i_height; i_y++ )
@@ -1235,30 +1396,30 @@ static void ConvertYUV422RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
         /* Mark beginnning of line for possible later line copy, and initialize
          * buffer */
         p_pic_start =   p_pic;
-        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;        
+        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
 
-        /* Do YUV convertion to buffer - YUV picture is always formed of 16
+        /* Do YUV conversion to buffer - YUV picture is always formed of 16
          * pixels wide blocks */
         for( i_x = i_width / 16; i_x--;  )
         {
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-        }             
+            CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
+        }
 
         /* Do horizontal and vertical scaling */
         SCALE_WIDTH;
-        SCALE_HEIGHT(422);        
+        SCALE_HEIGHT(422, 2);
     }
 }
 
 /*****************************************************************************
- * ConvertYUV444RGB16: color YUV 4:4:4 to RGB 15 or 16 bpp
+ * ConvertYUV444RGB16: color YUV 4:4:4 to RGB 2 Bpp
  *****************************************************************************/
 static void ConvertYUV444RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
@@ -1271,26 +1432,26 @@ static void ConvertYUV444RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
     int         i_uval, i_vval;                           /* U and V samples */
     int         i_red, i_green, i_blue;          /* U and V modified samples */
     int         i_chroma_width;                    /* chroma width, not used */
-    u16 *       p_yuv;                              /* base convertion table */
-    u16 *       p_ybase;                     /* Y dependant convertion table */
+    u16 *       p_yuv;                              /* base conversion table */
+    u16 *       p_ybase;                     /* Y dependant conversion table */
     u16 *       p_pic_start;       /* beginning of the current line for copy */
-    u16 *       p_buffer_start;                   /* convertion buffer start */
-    u16 *       p_buffer;                       /* convertion buffer pointer */
+    u16 *       p_buffer_start;                   /* conversion buffer start */
+    u16 *       p_buffer;                       /* conversion buffer pointer */
     int *       p_offset_start;                        /* offset array start */
     int *       p_offset;                            /* offset array pointer */
-    
-    /* 
-     * Initialize some values  - i_pic_line_width will store the line skip 
+
+    /*
+     * Initialize some values  - i_pic_line_width will store the line skip
      */
     i_pic_line_width -= i_pic_width;
     p_yuv =             p_vout->yuv.yuv.p_rgb16;
-    p_buffer_start =    p_vout->yuv.p_buffer;        
-    p_offset_start =    p_vout->yuv.p_offset;                    
-    SetOffset( i_width, i_height, i_pic_width, i_pic_height, 
+    p_buffer_start =    p_vout->yuv.p_buffer;
+    p_offset_start =    p_vout->yuv.p_offset;
+    SetOffset( i_width, i_height, i_pic_width, i_pic_height,
                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
 
     /*
-     * Perform convertion
+     * Perform conversion
      */
     i_scale_count = i_pic_height;
     for( i_y = 0; i_y < i_height; i_y++ )
@@ -1298,60 +1459,60 @@ static void ConvertYUV444RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
         /* Mark beginnning of line for possible later line copy, and initialize
          * buffer */
         p_pic_start =   p_pic;
-        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;        
+        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
 
-        /* Do YUV convertion to buffer - YUV picture is always formed of 16
+        /* Do YUV conversion to buffer - YUV picture is always formed of 16
          * pixels wide blocks */
         for( i_x = i_width / 16; i_x--;  )
         {
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-        }             
+            CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
+            CONVERT_YUV_PIXEL(2);  CONVERT_YUV_PIXEL(2);
+        }
 
         /* Do horizontal and vertical scaling */
         SCALE_WIDTH;
-        SCALE_HEIGHT(444);        
+        SCALE_HEIGHT(444, 2);
     }
 }
 
 /*****************************************************************************
- * ConvertYUV420RGB24: color YUV 4:2:0 to RGB 24 bpp
+ * ConvertYUV420RGB24: color YUV 4:2:0 to RGB 3 Bpp
  *****************************************************************************/
 static void ConvertYUV420RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
                                 int i_matrix_coefficients )
 {
-    //???
+    /* XXX?? */
 }
 
 /*****************************************************************************
- * ConvertYUV422RGB24: color YUV 4:2:2 to RGB 24 bpp
+ * ConvertYUV422RGB24: color YUV 4:2:2 to RGB 3 Bpp
  *****************************************************************************/
 static void ConvertYUV422RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
                                 int i_matrix_coefficients )
 {
-    //???
+    /* XXX?? */
 }
 
 /*****************************************************************************
- * ConvertYUV444RGB24: color YUV 4:4:4 to RGB 24 bpp
+ * ConvertYUV444RGB24: color YUV 4:4:4 to RGB 3 Bpp
  *****************************************************************************/
 static void ConvertYUV444RGB24( p_vout_thread_t p_vout, void *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
                                 int i_matrix_coefficients )
-{    
-    //???
+{
+    /* XXX?? */
 }
 
 /*****************************************************************************
- * ConvertYUV420RGB32: color YUV 4:2:0 to RGB 32 bpp
+ * ConvertYUV420RGB32: color YUV 4:2:0 to RGB 4 Bpp
  *****************************************************************************/
 static void ConvertYUV420RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
@@ -1364,27 +1525,27 @@ static void ConvertYUV420RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *
     int         i_uval, i_vval;                           /* U and V samples */
     int         i_red, i_green, i_blue;          /* U and V modified samples */
     int         i_chroma_width;                              /* chroma width */
-    u32 *       p_yuv;                              /* base convertion table */
-    u32 *       p_ybase;                     /* Y dependant convertion table */
+    u32 *       p_yuv;                              /* base conversion table */
+    u32 *       p_ybase;                     /* Y dependant conversion table */
     u32 *       p_pic_start;       /* beginning of the current line for copy */
-    u32 *       p_buffer_start;                   /* convertion buffer start */
-    u32 *       p_buffer;                       /* convertion buffer pointer */
+    u32 *       p_buffer_start;                   /* conversion buffer start */
+    u32 *       p_buffer;                       /* conversion buffer pointer */
     int *       p_offset_start;                        /* offset array start */
     int *       p_offset;                            /* offset array pointer */
-    
-    /* 
-     * Initialize some values  - i_pic_line_width will store the line skip 
+
+    /*
+     * Initialize some values  - i_pic_line_width will store the line skip
      */
     i_pic_line_width -= i_pic_width;
     i_chroma_width =    i_width / 2;
     p_yuv =             p_vout->yuv.yuv.p_rgb32;
-    p_buffer_start =    p_vout->yuv.p_buffer;        
-    p_offset_start =    p_vout->yuv.p_offset;                    
-    SetOffset( i_width, i_height, i_pic_width, i_pic_height, 
+    p_buffer_start =    p_vout->yuv.p_buffer;
+    p_offset_start =    p_vout->yuv.p_offset;
+    SetOffset( i_width, i_height, i_pic_width, i_pic_height,
                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
 
     /*
-     * Perform convertion
+     * Perform conversion
      */
     i_scale_count = i_pic_height;
     for( i_y = 0; i_y < i_height; i_y++ )
@@ -1392,30 +1553,30 @@ static void ConvertYUV420RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *
         /* Mark beginnning of line for possible later line copy, and initialize
          * buffer */
         p_pic_start =   p_pic;
-        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;        
+        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
 
-        /* Do YUV convertion to buffer - YUV picture is always formed of 16
+        /* Do YUV conversion to buffer - YUV picture is always formed of 16
          * pixels wide blocks */
         for( i_x = i_width / 16; i_x--;  )
         {
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-        }             
+            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
+        }
 
         /* Do horizontal and vertical scaling */
         SCALE_WIDTH;
-        SCALE_HEIGHT(420);        
+        SCALE_HEIGHT(420, 4);
     }
 }
 
 /*****************************************************************************
- * ConvertYUV422RGB32: color YUV 4:2:2 to RGB 32 bpp
+ * ConvertYUV422RGB32: color YUV 4:2:2 to RGB 4 Bpp
  *****************************************************************************/
 static void ConvertYUV422RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
@@ -1428,27 +1589,27 @@ static void ConvertYUV422RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *
     int         i_uval, i_vval;                           /* U and V samples */
     int         i_red, i_green, i_blue;          /* U and V modified samples */
     int         i_chroma_width;                              /* chroma width */
-    u32 *       p_yuv;                              /* base convertion table */
-    u32 *       p_ybase;                     /* Y dependant convertion table */
+    u32 *       p_yuv;                              /* base conversion table */
+    u32 *       p_ybase;                     /* Y dependant conversion table */
     u32 *       p_pic_start;       /* beginning of the current line for copy */
-    u32 *       p_buffer_start;                   /* convertion buffer start */
-    u32 *       p_buffer;                       /* convertion buffer pointer */
+    u32 *       p_buffer_start;                   /* conversion buffer start */
+    u32 *       p_buffer;                       /* conversion buffer pointer */
     int *       p_offset_start;                        /* offset array start */
     int *       p_offset;                            /* offset array pointer */
-    
-    /* 
-     * Initialize some values  - i_pic_line_width will store the line skip 
+
+    /*
+     * Initialize some values  - i_pic_line_width will store the line skip
      */
     i_pic_line_width -= i_pic_width;
     i_chroma_width =    i_width / 2;
     p_yuv =             p_vout->yuv.yuv.p_rgb32;
-    p_buffer_start =    p_vout->yuv.p_buffer;        
-    p_offset_start =    p_vout->yuv.p_offset;                    
-    SetOffset( i_width, i_height, i_pic_width, i_pic_height, 
+    p_buffer_start =    p_vout->yuv.p_buffer;
+    p_offset_start =    p_vout->yuv.p_offset;
+    SetOffset( i_width, i_height, i_pic_width, i_pic_height,
                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
 
     /*
-     * Perform convertion
+     * Perform conversion
      */
     i_scale_count = i_pic_height;
     for( i_y = 0; i_y < i_height; i_y++ )
@@ -1456,30 +1617,30 @@ static void ConvertYUV422RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *
         /* Mark beginnning of line for possible later line copy, and initialize
          * buffer */
         p_pic_start =   p_pic;
-        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;        
+        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
 
-        /* Do YUV convertion to buffer - YUV picture is always formed of 16
+        /* Do YUV conversion to buffer - YUV picture is always formed of 16
          * pixels wide blocks */
         for( i_x = i_width / 16; i_x--;  )
         {
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_Y_PIXEL;
-        }             
+            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
+        }
 
         /* Do horizontal and vertical scaling */
         SCALE_WIDTH;
-        SCALE_HEIGHT(422);        
+        SCALE_HEIGHT(422, 4);
     }
 }
 
 /*****************************************************************************
- * ConvertYUV444RGB32: color YUV 4:4:4 to RGB 32 bpp
+ * ConvertYUV444RGB32: color YUV 4:4:4 to RGB 4 Bpp
  *****************************************************************************/
 static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v,
                                 int i_width, int i_height, int i_pic_width, int i_pic_height, int i_pic_line_width,
@@ -1492,26 +1653,26 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *
     int         i_uval, i_vval;                           /* U and V samples */
     int         i_red, i_green, i_blue;          /* U and V modified samples */
     int         i_chroma_width;                    /* chroma width, not used */
-    u32 *       p_yuv;                              /* base convertion table */
-    u32 *       p_ybase;                     /* Y dependant convertion table */
+    u32 *       p_yuv;                              /* base conversion table */
+    u32 *       p_ybase;                     /* Y dependant conversion table */
     u32 *       p_pic_start;       /* beginning of the current line for copy */
-    u32 *       p_buffer_start;                   /* convertion buffer start */
-    u32 *       p_buffer;                       /* convertion buffer pointer */
+    u32 *       p_buffer_start;                   /* conversion buffer start */
+    u32 *       p_buffer;                       /* conversion buffer pointer */
     int *       p_offset_start;                        /* offset array start */
     int *       p_offset;                            /* offset array pointer */
-    
-    /* 
-     * Initialize some values  - i_pic_line_width will store the line skip 
+
+    /*
+     * Initialize some values  - i_pic_line_width will store the line skip
      */
     i_pic_line_width -= i_pic_width;
     p_yuv =             p_vout->yuv.yuv.p_rgb32;
-    p_buffer_start =    p_vout->yuv.p_buffer;        
-    p_offset_start =    p_vout->yuv.p_offset;                    
-    SetOffset( i_width, i_height, i_pic_width, i_pic_height, 
+    p_buffer_start =    p_vout->yuv.p_buffer;
+    p_offset_start =    p_vout->yuv.p_offset;
+    SetOffset( i_width, i_height, i_pic_width, i_pic_height,
                &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
 
     /*
-     * Perform convertion
+     * Perform conversion
      */
     i_scale_count = i_pic_height;
     for( i_y = 0; i_y < i_height; i_y++ )
@@ -1519,29 +1680,29 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *
         /* Mark beginnning of line for possible later line copy, and initialize
          * buffer */
         p_pic_start =   p_pic;
-        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;        
+        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;
 
-        /* Do YUV convertion to buffer - YUV picture is always formed of 16
+        /* Do YUV conversion to buffer - YUV picture is always formed of 16
          * pixels wide blocks */
         for( i_x = i_width / 16; i_x--;  )
         {
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-            CONVERT_YUV_PIXEL;  CONVERT_YUV_PIXEL;
-        }             
+            CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
+            CONVERT_YUV_PIXEL(4);  CONVERT_YUV_PIXEL(4);
+        }
 
         /* Do horizontal and vertical scaling */
         SCALE_WIDTH;
-        SCALE_HEIGHT(444);        
+        SCALE_HEIGHT(444, 4);
     }
 }
 
-//-------------------- walken code follows ------------------------------------
+/*-------------------- walken code follows ----------------------------------*/
 
 /*
  * YUV to RGB routines.
@@ -1567,9 +1728,10 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *
  */
 
 #if 0
+/* XXX?? */
 static void yuvToRgb24 (unsigned char * Y,
-                       unsigned char * U, unsigned char * V,
-                       char * dest, int table[1935], int width)
+                        unsigned char * U, unsigned char * V,
+                        char * dest, int table[1935], int width)
 {
     int i;
     int u;
@@ -1582,145 +1744,145 @@ static void yuvToRgb24 (unsigned char * Y,
 
     i = width >> 3;
     while (i--) {
-       u = *(U++);
-       v = *(V++);
-       uvRed = (V_RED_COEF*v) >> SHIFT;
-       uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
-       uvBlue = (U_BLUE_COEF*u) >> SHIFT;
-
-       tableY = table + *(Y++);
-       tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
-                tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
-                       uvGreen] |
-                tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
-       *(dest++) = tmp24;
-       *(dest++) = tmp24 >> 8;
-       *(dest++) = tmp24 >> 16;
-
-       tableY = table + *(Y++);
-       tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
-                tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
-                       uvGreen] |
-                tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
-       *(dest++) = tmp24;
-       *(dest++) = tmp24 >> 8;
-       *(dest++) = tmp24 >> 16;
-
-       u = *(U++);
-       v = *(V++);
-       uvRed = (V_RED_COEF*v) >> SHIFT;
-       uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
-       uvBlue = (U_BLUE_COEF*u) >> SHIFT;
-
-       tableY = table + *(Y++);
-       tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
-                tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
-                       uvGreen] |
-                tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
-       *(dest++) = tmp24;
-       *(dest++) = tmp24 >> 8;
-       *(dest++) = tmp24 >> 16;
-
-       tableY = table + *(Y++);
-       tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
-                tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
-                       uvGreen] |
-                tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
-       *(dest++) = tmp24;
-       *(dest++) = tmp24 >> 8;
-       *(dest++) = tmp24 >> 16;
-
-       u = *(U++);
-       v = *(V++);
-       uvRed = (V_RED_COEF*v) >> SHIFT;
-       uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
-       uvBlue = (U_BLUE_COEF*u) >> SHIFT;
-
-       tableY = table + *(Y++);
-       tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
-                tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
-                       uvGreen] |
-                tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
-       *(dest++) = tmp24;
-       *(dest++) = tmp24 >> 8;
-       *(dest++) = tmp24 >> 16;
-
-       tableY = table + *(Y++);
-       tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
-                tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
-                       uvGreen] |
-                tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
-       *(dest++) = tmp24;
-       *(dest++) = tmp24 >> 8;
-       *(dest++) = tmp24 >> 16;
-
-       u = *(U++);
-       v = *(V++);
-       uvRed = (V_RED_COEF*v) >> SHIFT;
-       uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
-       uvBlue = (U_BLUE_COEF*u) >> SHIFT;
-
-       tableY = table + *(Y++);
-       tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
-                tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
-                       uvGreen] |
-                tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
-       *(dest++) = tmp24;
-       *(dest++) = tmp24 >> 8;
-       *(dest++) = tmp24 >> 16;
-
-       tableY = table + *(Y++);
-       tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
-                tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
-                       uvGreen] |
-                tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
-       *(dest++) = tmp24;
-       *(dest++) = tmp24 >> 8;
-       *(dest++) = tmp24 >> 16;
+        u = *(U++);
+        v = *(V++);
+        uvRed = (V_RED_COEF*v) >> SHIFT;
+        uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
+        uvBlue = (U_BLUE_COEF*u) >> SHIFT;
+
+        tableY = table + *(Y++);
+        tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
+                 tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
+                        uvGreen] |
+                 tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
+        *(dest++) = tmp24;
+        *(dest++) = tmp24 >> 8;
+        *(dest++) = tmp24 >> 16;
+
+        tableY = table + *(Y++);
+        tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
+                 tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
+                        uvGreen] |
+                 tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
+        *(dest++) = tmp24;
+        *(dest++) = tmp24 >> 8;
+        *(dest++) = tmp24 >> 16;
+
+        u = *(U++);
+        v = *(V++);
+        uvRed = (V_RED_COEF*v) >> SHIFT;
+        uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
+        uvBlue = (U_BLUE_COEF*u) >> SHIFT;
+
+        tableY = table + *(Y++);
+        tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
+                 tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
+                        uvGreen] |
+                 tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
+        *(dest++) = tmp24;
+        *(dest++) = tmp24 >> 8;
+        *(dest++) = tmp24 >> 16;
+
+        tableY = table + *(Y++);
+        tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
+                 tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
+                        uvGreen] |
+                 tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
+        *(dest++) = tmp24;
+        *(dest++) = tmp24 >> 8;
+        *(dest++) = tmp24 >> 16;
+
+        u = *(U++);
+        v = *(V++);
+        uvRed = (V_RED_COEF*v) >> SHIFT;
+        uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
+        uvBlue = (U_BLUE_COEF*u) >> SHIFT;
+
+        tableY = table + *(Y++);
+        tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
+                 tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
+                        uvGreen] |
+                 tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
+        *(dest++) = tmp24;
+        *(dest++) = tmp24 >> 8;
+        *(dest++) = tmp24 >> 16;
+
+        tableY = table + *(Y++);
+        tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
+                 tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
+                        uvGreen] |
+                 tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
+        *(dest++) = tmp24;
+        *(dest++) = tmp24 >> 8;
+        *(dest++) = tmp24 >> 16;
+
+        u = *(U++);
+        v = *(V++);
+        uvRed = (V_RED_COEF*v) >> SHIFT;
+        uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
+        uvBlue = (U_BLUE_COEF*u) >> SHIFT;
+
+        tableY = table + *(Y++);
+        tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
+                 tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
+                        uvGreen] |
+                 tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
+        *(dest++) = tmp24;
+        *(dest++) = tmp24 >> 8;
+        *(dest++) = tmp24 >> 16;
+
+        tableY = table + *(Y++);
+        tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
+                 tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
+                        uvGreen] |
+                 tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
+        *(dest++) = tmp24;
+        *(dest++) = tmp24 >> 8;
+        *(dest++) = tmp24 >> 16;
     }
 
     i = (width & 7) >> 1;
     while (i--) {
-       u = *(U++);
-       v = *(V++);
-       uvRed = (V_RED_COEF*v) >> SHIFT;
-       uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
-       uvBlue = (U_BLUE_COEF*u) >> SHIFT;
-
-       tableY = table + *(Y++);
-       tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
-                tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
-                       uvGreen] |
-                tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
-       *(dest++) = tmp24;
-       *(dest++) = tmp24 >> 8;
-       *(dest++) = tmp24 >> 16;
-
-       tableY = table + *(Y++);
-       tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
-                tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
-                       uvGreen] |
-                tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
-       *(dest++) = tmp24;
-       *(dest++) = tmp24 >> 8;
-       *(dest++) = tmp24 >> 16;
+        u = *(U++);
+        v = *(V++);
+        uvRed = (V_RED_COEF*v) >> SHIFT;
+        uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
+        uvBlue = (U_BLUE_COEF*u) >> SHIFT;
+
+        tableY = table + *(Y++);
+        tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
+                 tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
+                        uvGreen] |
+                 tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
+        *(dest++) = tmp24;
+        *(dest++) = tmp24 >> 8;
+        *(dest++) = tmp24 >> 16;
+
+        tableY = table + *(Y++);
+        tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
+                 tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
+                        uvGreen] |
+                 tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
+        *(dest++) = tmp24;
+        *(dest++) = tmp24 >> 8;
+        *(dest++) = tmp24 >> 16;
     }
 
     if (width & 1) {
-       u = *(U++);
-       v = *(V++);
-       uvRed = (V_RED_COEF*v) >> SHIFT;
-       uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
-       uvBlue = (U_BLUE_COEF*u) >> SHIFT;
-
-       tableY = table + *(Y++);
-       tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
-                tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
-                       uvGreen] |
-                tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
-       *(dest++) = tmp24;
-       *(dest++) = tmp24 >> 8;
-       *(dest++) = tmp24 >> 16;
+        u = *(U++);
+        v = *(V++);
+        uvRed = (V_RED_COEF*v) >> SHIFT;
+        uvGreen = (U_GREEN_COEF*u + V_GREEN_COEF*v) >> SHIFT;
+        uvBlue = (U_BLUE_COEF*u) >> SHIFT;
+
+        tableY = table + *(Y++);
+        tmp24 = (tableY [1501 - ((V_RED_COEF*128)>>SHIFT) + uvRed] |
+                 tableY [135 - (((U_GREEN_COEF+V_GREEN_COEF)*128)>>SHIFT) +
+                        uvGreen] |
+                 tableY [818 - ((U_BLUE_COEF*128)>>SHIFT) + uvBlue]);
+        *(dest++) = tmp24;
+        *(dest++) = tmp24 >> 8;
+        *(dest++) = tmp24 >> 16;
     }
 }
 #endif