]> git.sesse.net Git - vlc/blobdiff - modules/codec/ogt/render.c
RV32 added, but not tested. All chromas now handled.
[vlc] / modules / codec / ogt / render.c
index 37d8ac715d8f49f6ac5e6d6ec98325b9cdfe36c2..0295e25d3b986729e923c90ab4f8b11b7647bb25 100644 (file)
@@ -1,10 +1,10 @@
 /*****************************************************************************
- * render.c : Philips OGT and CVD (VCD Subtitle) renderer
+ * render.c : Philips OGT and CVD (VCD Subtitle) blending routines
  *****************************************************************************
  * Copyright (C) 2003, 2004 VideoLAN
- * $Id: render.c,v 1.16 2004/01/20 13:31:15 rocky Exp $
+ * $Id: render.c,v 1.22 2004/01/23 11:03:06 rocky Exp $
  *
- * Author: Rocky Bernstein 
+ * Author: Rocky Bernstein <rocky@panix.com>
  *   based on code from: 
  *          Sam Hocevar <sam@zoy.org>
  *          Rudolf Cornelissen <rag.cornelissen@inter.nl.net>
 #define MAX_ALPHA  ((1<<ALPHA_BITS) - 1) 
 #define ALPHA_SCALEDOWN (8-ALPHA_BITS)
 
+/* We use a fixed-point arithmetic to scaling ratios so that we
+   can use integer arithmetic and still get fairly precise
+   results. ASCALE is a left shift amount. 
+*/
+#define ASCALE 6  /* 2^6 = 32 */
+
 /* Horrible hack to get dbg_print to do the right thing */
 #define p_dec p_vout
 
 /*****************************************************************************
  * Local prototypes
  *****************************************************************************/
-static void RenderI420( vout_thread_t *, picture_t *, const subpicture_t *,
+static void BlendI420( vout_thread_t *, picture_t *, const subpicture_t *,
                         vlc_bool_t );
-static void RenderYUY2( vout_thread_t *p_vout, picture_t *p_pic,
+static void BlendYUY2( vout_thread_t *p_vout, picture_t *p_pic,
                         const subpicture_t *p_spu, vlc_bool_t b_crop );
-static void RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
+static void BlendRV16( vout_thread_t *p_vout, picture_t *p_pic,
+                       const subpicture_t *p_spu, vlc_bool_t b_crop,
+                       vlc_bool_t b_15bpp );
+static void BlendRV24( vout_thread_t *p_vout, picture_t *p_pic,
                         const subpicture_t *p_spu, vlc_bool_t b_crop );
-static void RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
+static void BlendRV32( vout_thread_t *p_vout, picture_t *p_pic,
                         const subpicture_t *p_spu, vlc_bool_t b_crop );
-static void RenderRGB2( vout_thread_t *p_vout, picture_t *p_pic,
+static void BlendRGB2( vout_thread_t *p_vout, picture_t *p_pic,
                         const subpicture_t *p_spu, vlc_bool_t b_crop );
 
 /*****************************************************************************
- * RenderSPU: draw an SPU on a picture
+ * BlendSPU: blend a subtitle into a picture
  *****************************************************************************
  
-  This is a fast implementation of the subpicture drawing code. The
-  data has been preprocessed. Each byte has a run-length 1 in the upper
-  nibble and a color in the lower nibble. The interleaving of rows has
-  been done. Most sanity checks are already done so that this
-  routine can be as fast as possible.
+  This blends subtitles (a subpicture) into the underlying
+  picture. Subtitle data has been preprocessed as YUV + transparancy
+  or 4 bytes per pixel with interleaving of rows in the subtitle
+  removed. 
 
  *****************************************************************************/
-void VCDSubRender( vout_thread_t *p_vout, picture_t *p_pic,
+void VCDSubBlend( vout_thread_t *p_vout, picture_t *p_pic,
                   const subpicture_t *p_spu )
 {
     struct subpicture_sys_t *p_sys = p_spu->p_sys;
@@ -86,23 +94,33 @@ void VCDSubRender( vout_thread_t *p_vout, picture_t *p_pic,
         case VLC_FOURCC('I','4','2','0'):
         case VLC_FOURCC('I','Y','U','V'):
         case VLC_FOURCC('Y','V','1','2'):
-            RenderI420( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
+            BlendI420( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
             break;
 
         /* RGB 555 - scaled */
+        case VLC_FOURCC('R','V','1','5'):
+            BlendRV16( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop, 
+                       VLC_TRUE );
+            break;
+          
         case VLC_FOURCC('R','V','1','6'):
-            RenderRV16( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
+            BlendRV16( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop,
+                       VLC_FALSE );
            break;
 
-        /* RV32 target, scaling */
+        /* RV24 target, scaling */
         case VLC_FOURCC('R','V','2','4'):
+            BlendRV24( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
+           break;
+
+        /* RV32 target, scaling */
         case VLC_FOURCC('R','V','3','2'):
-            RenderRV32( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
+            BlendRV32( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
            break;
 
         /* NVidia overlay, no scaling */
         case VLC_FOURCC('Y','U','Y','2'):
-            RenderYUY2( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
+            BlendYUY2( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
            break;
 
         /* Palettized 8 bits per pixel (256 colors). Each
@@ -110,9 +128,8 @@ void VCDSubRender( vout_thread_t *p_vout, picture_t *p_pic,
            Used in ASCII Art. 
         */
         case VLC_FOURCC('R','G','B','2'):
-            RenderRGB2( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
+            BlendRGB2( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
           
-          /*msg_Err( p_vout, "RGB2 not implemented yet" );*/
            break;
 
         default:
@@ -133,12 +150,12 @@ void VCDSubRender( vout_thread_t *p_vout, picture_t *p_pic,
   all Cb (=V) samples in a similar fashion.
 */
 
-static void RenderI420( vout_thread_t *p_vout, picture_t *p_pic,
+static void BlendI420( vout_thread_t *p_vout, picture_t *p_pic,
                         const subpicture_t *p_spu, vlc_bool_t b_crop )
 {
   /* Common variables */
   uint8_t *p_pixel_base_Y, *p_pixel_base_V, *p_pixel_base_U;
-  ogt_yuvt_t *p_source;
+  ogt_yuvt_t *p_source; /* This is the where the subtitle pixels come from */
 
   int i_x, i_y;
   vlc_bool_t even_scanline = VLC_FALSE;
@@ -211,7 +228,7 @@ static void RenderI420( vout_thread_t *p_vout, picture_t *p_pic,
          
          switch( p_source->s.t )
            {
-           case 0x00
+           case 0: 
              /* Completely transparent. Don't change pixel. */
              break;
              
@@ -239,7 +256,7 @@ static void RenderI420( vout_thread_t *p_vout, picture_t *p_pic,
              {
                /* Blend in underlying subtitle pixel. */
                
-               /* This is the location that's going to get changed.*/
+               /* This is the location that's going to get changed. */
                uint8_t *p_pixel_Y = p_pixel_base_Y_y + i_x;
 
 
@@ -269,7 +286,7 @@ static void RenderI420( vout_thread_t *p_vout, picture_t *p_pic,
                   transparent and all opaque) aren't handled properly.
                   But we deal with them in special cases above. */
 
-               *p_pixel_Y = ( i_sub_color_Y + i_pixel_color_Y ) >> 4;
+               *p_pixel_Y = ( i_sub_color_Y + i_pixel_color_Y ) >> ALPHA_BITS;
 
                if ( even_scanline && i_x % 2 == 0 ) {
                  uint8_t *p_pixel_U = p_pixel_base_U_y + i_x/2;
@@ -287,8 +304,8 @@ static void RenderI420( vout_thread_t *p_vout, picture_t *p_pic,
                  uint16_t i_pixel_color_V = 
                    (uint16_t) ( *p_pixel_V * 
                                 (uint16_t) (MAX_ALPHA - p_source->s.t) ) ;
-                 *p_pixel_U = ( i_sub_color_U + i_pixel_color_U ) >> 4;
-                 *p_pixel_V = ( i_sub_color_V + i_pixel_color_V ) >> 4;
+                 *p_pixel_U = ( i_sub_color_U + i_pixel_color_U )>>ALPHA_BITS;
+                 *p_pixel_V = ( i_sub_color_V + i_pixel_color_V )>>ALPHA_BITS;
                }
                break;
              }
@@ -311,20 +328,18 @@ static void RenderI420( vout_thread_t *p_vout, picture_t *p_pic,
   spans the two pixels.
 */
 
-/* 
-   Should be same as p_pic->p_format.i_bits_per_pixel / 8. But since
-   we know it here, why try to compute it?
-*/
 #define BYTES_PER_PIXEL 4
 
-static void RenderYUY2( vout_thread_t *p_vout, picture_t *p_pic,
+static void BlendYUY2( vout_thread_t *p_vout, picture_t *p_pic,
                         const subpicture_t *p_spu, vlc_bool_t b_crop )
 {
   /* Common variables */
   uint8_t *p_pixel_base;
+
+  /* This is the where the subtitle pixels come from */
   ogt_yuvt_t *p_source = (ogt_yuvt_t *) p_spu->p_sys->p_data;;
 
-#ifdef EXTRA_CHECKING
+#if 1
   ogt_yuvt_t *p_source_end = (ogt_yuvt_t *)p_spu->p_sys->p_data + 
     (p_spu->i_width * p_spu->i_height);
 #endif
@@ -379,7 +394,7 @@ static void RenderYUY2( vout_thread_t *p_vout, picture_t *p_pic,
        {
           uint16_t i_avg_tr; /* transparancy sort of averaged over 2 pixels*/
 
-#if EXTRA_CHECKING
+#if 1
           if (p_source > p_source_end-1) {
             msg_Err( p_vout, "Trying to access beyond subtitle x: %d y: %d",
                      i_x, i_y);
@@ -411,7 +426,7 @@ static void RenderYUY2( vout_thread_t *p_vout, picture_t *p_pic,
 
          switch( i_avg_tr )
            {
-           case 0x00
+           case 0: 
              /* Completely transparent. Don't change pixel. */
              break;
              
@@ -420,7 +435,7 @@ static void RenderYUY2( vout_thread_t *p_vout, picture_t *p_pic,
                /* Completely opaque. Completely overwrite underlying
                   pixel with subtitle pixel. */
                
-               /* This is the location that's going to get changed.*/
+               /* This is the location that's going to get changed. */
                uint8_t *p_pixel = p_pixel_base_y + i_x * BYTES_PER_PIXEL;
                uint8_t i_avg_u;
                uint8_t i_avg_v;
@@ -452,7 +467,7 @@ static void RenderYUY2( vout_thread_t *p_vout, picture_t *p_pic,
              {
                /* Blend in underlying subtitle pixels. */
                
-               /* This is the location that's going to get changed.*/
+               /* This is the location that's going to get changed. */
                uint8_t *p_pixel = p_pixel_base_y + i_x * BYTES_PER_PIXEL;
                uint8_t i_avg_u = ( p_source->plane[U_PLANE] 
                                     + (p_source+1)->plane[U_PLANE] ) / 2;
@@ -506,10 +521,10 @@ static void RenderYUY2( vout_thread_t *p_vout, picture_t *p_pic,
                   transparent and all opaque) aren't handled properly.
                   But we deal with them in special cases above. */
 
-               *p_pixel++ = ( i_sub_color_Y1 + i_pixel_color_Y1 ) >> 4;
-               *p_pixel++ = ( i_sub_color_V + i_pixel_color_V ) >> 4;
-               *p_pixel++ = ( i_sub_color_Y2 + i_pixel_color_Y2 ) >> 4;
-               *p_pixel++ = ( i_sub_color_U + i_pixel_color_U ) >> 4;
+               *p_pixel++ = ( i_sub_color_Y1 + i_pixel_color_Y1 )>>ALPHA_BITS;
+               *p_pixel++ = ( i_sub_color_V + i_pixel_color_V )  >>ALPHA_BITS;
+               *p_pixel++ = ( i_sub_color_Y2 + i_pixel_color_Y2 )>>ALPHA_BITS;
+               *p_pixel++ = ( i_sub_color_U + i_pixel_color_U )  >>ALPHA_BITS;
                break;
              }
            }
@@ -558,21 +573,51 @@ yuv2rgb555(ogt_yuvt_t *p_yuv, uint8_t *p_rgb1, uint8_t *p_rgb2 )
          *p_rgb1, *p_rgb2);
 #endif
 
-#undef RED_PIXEL   
-#undef GREEN_PIXEL 
-#undef BLUE_PIXEL  
 }
 
-/* 
-   Should be same as p_pic->p_format.i_bits_per_pixel / 8. But since
-   we know it here, why try to compute it?
-*/
+/**
+   Convert a YUV pixel into a 16-bit RGB 5-6-5 pixel.
+
+   A RGB 5-6-5 pixel looks like this:
+   RGB 5-6-5   bit  (MSB) 7  6   5  4  3  2  1  0 (LSB)
+                 p      B4 B3  B2 B1 B0 R5 R4  R3
+                 q      R2 R1  R0 G4 G3 G2 G1  G0
+
+**/
+
+static inline void
+yuv2rgb565(ogt_yuvt_t *p_yuv, uint8_t *p_rgb1, uint8_t *p_rgb2 )
+{
+
+  uint8_t rgb[3];
+
+  yuv2rgb(p_yuv, rgb);
+  
+  /* Scale RGB from 8 bits down to 5 or 6 bits. */
+  rgb[RED_PIXEL]   >>= (8-6);
+  rgb[GREEN_PIXEL] >>= (8-5);
+  rgb[BLUE_PIXEL]  >>= (8-5);
+  
+  *p_rgb1 = ( (rgb[BLUE_PIXEL] << 3)&0xF8 ) | ( (rgb[RED_PIXEL]>>3) & 0x07 );
+  *p_rgb2 = ( (rgb[RED_PIXEL]  << 5)&0xe0 ) | ( rgb[GREEN_PIXEL]&0x1f );
+
+#if 0
+  printf("Y,Cb,Cr,T=(%02x,%02x,%02x,%02x), r,g,b=(%d,%d,%d), "
+         "rgb1: %02x, rgb2 %02x\n",
+         p_yuv->s.y, p_yuv->s.u, p_yuv->s.v, p_yuv->s.t,
+         rgb[RED_PIXEL], rgb[GREEN_PIXEL], rgb[BLUE_PIXEL],
+         *p_rgb1, *p_rgb2);
+#endif
+
+}
+
 #undef BYTES_PER_PIXEL
 #define BYTES_PER_PIXEL 2
 
 static void 
-RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
-            const subpicture_t *p_spu, vlc_bool_t b_crop )
+BlendRV16( vout_thread_t *p_vout, picture_t *p_pic,
+           const subpicture_t *p_spu, vlc_bool_t b_crop,
+           vlc_bool_t b_15bpp )
 {
     /* Common variables */
     uint8_t *p_pixel_base;
@@ -583,16 +628,21 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
     int i_x, i_y;
     int i_y_src;
 
-    /* RGB-specific */
-    int i_xscale, i_yscale, i_width, i_height, i_ytmp, i_ynext;
+    /* Chroma specific */
+    uint32_t i_xscale;   /* Amount we scale subtitle in the x direction,
+                            multiplied by 2**ASCALE. */
+    uint32_t i_yscale;   /* Amount we scale subtitle in the y direction.
+                            multiplied by 2**ASCALE. */
+
+    int i_width, i_height, i_ytmp, i_ynext;
 
     /* Crop-specific */
     int i_x_start, i_y_start, i_x_end, i_y_end;
 
     struct subpicture_sys_t *p_sys = p_spu->p_sys;
 
-    i_xscale = ( p_vout->output.i_width << 6 ) / p_vout->render.i_width;
-    i_yscale = ( p_vout->output.i_height << 6 ) / p_vout->render.i_height;
+    i_xscale = ( p_vout->output.i_width << ASCALE ) / p_vout->render.i_width;
+    i_yscale = ( p_vout->output.i_height << ASCALE ) / p_vout->render.i_height;
 
     dbg_print( (DECODE_DBG_CALL|DECODE_DBG_RENDER), 
               "spu: %dx%d, scaled: %dx%d, vout render: %dx%d, scale %dx%d", 
@@ -609,8 +659,8 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
        the picture coordinates subtitle offsets
     */
     p_pixel_base = p_pic->p->p_pixels 
-              + ( (p_spu->i_x * i_xscale) >> 6 ) * BYTES_PER_PIXEL
-              + ( (p_spu->i_y * i_yscale) >> 6 ) * p_pic->p->i_pitch;
+              + ( (p_spu->i_x * i_xscale) >> ASCALE ) * BYTES_PER_PIXEL
+              + ( (p_spu->i_y * i_yscale) >> ASCALE ) * p_pic->p->i_pitch;
 
     i_x_start = p_sys->i_x_start;
     i_y_start = i_yscale * p_sys->i_y_start;
@@ -625,7 +675,7 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
          i_y_src += p_spu->i_width )
     {
        uint8_t *p_pixel_base_y;
-        i_ytmp = i_y >> 6;
+        i_ytmp = i_y >> ASCALE;
         i_y += i_yscale;
        p_pixel_base_y = p_pixel_base + (i_ytmp * p_pic->p->i_pitch);
        i_x = 0;
@@ -639,7 +689,7 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
         }
 
         /* Check whether we need to draw one line or more than one */
-        if( i_ytmp + 1 >= ( i_y >> 6 ) )
+        if( i_ytmp + 1 >= ( i_y >> ASCALE ) )
         {
           /* Draw until we reach the end of the line */
           for( ; i_x < p_spu->i_width;  i_x++, p_source++ )
@@ -672,7 +722,7 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
              
              switch( p_source->s.t )
                 {
-                case 0x00:
+                case 0:
                  /* Completely transparent. Don't change pixel. */
                  break;
                  
@@ -682,14 +732,27 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
                    /* Completely opaque. Completely overwrite underlying
                       pixel with subtitle pixel. */
                
-                   /* This is the location that's going to get changed.
-                    */
-                   uint8_t *p_dest = p_pixel_base_y + i_x * BYTES_PER_PIXEL;
+                    uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                         * BYTES_PER_PIXEL );
+                    uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                         * BYTES_PER_PIXEL );
+                    uint32_t len     = i_xlast - i_xdest;
+
                     uint8_t i_rgb1;
                     uint8_t i_rgb2;
-                    yuv2rgb555(p_source, &i_rgb1, &i_rgb2);
-                    *p_dest++ = i_rgb1;
-                    *p_dest++ = i_rgb2;
+
+                   /* This is the location that's going to get changed. */
+                   uint8_t *p_dest = p_pixel_base_y + i_x * BYTES_PER_PIXEL;
+
+                    if (b_15bpp) 
+                      yuv2rgb555(p_source, &i_rgb1, &i_rgb2);
+                    else 
+                      yuv2rgb565(p_source, &i_rgb1, &i_rgb2);
+
+                    for ( len = i_xlast - i_xdest; len ; len--) {
+                      *p_dest++ = i_rgb1;
+                      *p_dest++ = i_rgb2;
+                    }
                    break;
                  }
 
@@ -698,16 +761,28 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
                  {
                    /* Blend in underlying pixel subtitle pixel. */
                    
+                    uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                         * BYTES_PER_PIXEL );
+                    uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                         * BYTES_PER_PIXEL );
+                    uint32_t len     = i_xlast - i_xdest;
+
+                   /* This is the location that's going to get changed. */
+                   uint8_t *p_dest = p_pixel_base_y + i_x * BYTES_PER_PIXEL;
+
                    /* To be able to scale correctly for full opaqueness, we
                       add 1 to the alpha.  This means alpha value 0 won't
                       be completely transparent and is not correct, but
                       that's handled in a special case above anyway. */
                
-                   uint8_t *p_dest = p_pixel_base_y + i_x * BYTES_PER_PIXEL;
                    uint8_t i_destalpha = MAX_ALPHA - p_source->s.t;
-                    uint8_t rgb[3];
+                    uint8_t i_rgb1;
+                    uint8_t i_rgb2;
 
-                    yuv2rgb(p_source, rgb);
+                    if (b_15bpp) 
+                      yuv2rgb555(p_source, &i_rgb1, &i_rgb2);
+                    else 
+                      yuv2rgb565(p_source, &i_rgb1, &i_rgb2);
                     rv16_pack_blend(p_dest, rgb, dest_alpha, ALPHA_SCALEDOWN);
                    break;
                  }
@@ -717,7 +792,7 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
         }
         else
         {
-            i_ynext = p_pic->p->i_pitch * i_y >> 6;
+            i_ynext = p_pic->p->i_pitch * i_y >> ASCALE;
 
 
             /* Draw until we reach the end of the line */
@@ -745,7 +820,7 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
              
              switch( p_source->s.t )
                 {
-                case 0x00:
+                case 0:
                    /* Completely transparent. Don't change pixel. */
                     break;
 
@@ -755,20 +830,29 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
                    /* Completely opaque. Completely overwrite underlying
                       pixel with subtitle pixel. */
 
-                   /* This is the location that's going to get changed.
-                    */
-                   uint8_t *p_pixel_base_x = p_pixel_base 
-                                            + i_x * BYTES_PER_PIXEL;
+                    uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                         * BYTES_PER_PIXEL );
+                    uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                         * BYTES_PER_PIXEL );
+                    uint32_t len     = i_xlast - i_xdest;
+
+                   uint8_t *p_pixel_base_x = p_pixel_base + i_xdest;
 
                     for(  ; i_ytmp < i_ynext ; i_ytmp += p_pic->p->i_pitch )
                     {
-                     /* This is the location that's going to get changed.  */
+                     /* This is the location that's going to get changed. */
                      uint8_t *p_dest = p_pixel_base_x + i_ytmp;
                       uint8_t i_rgb1;
                       uint8_t i_rgb2;
-                      yuv2rgb555(p_source, &i_rgb1, &i_rgb2);
-                      *p_dest++ = i_rgb1;
-                      *p_dest++ = i_rgb2;
+                      if (b_15bpp) 
+                        yuv2rgb555(p_source, &i_rgb1, &i_rgb2);
+                      else 
+                        yuv2rgb565(p_source, &i_rgb1, &i_rgb2);
+
+                      for ( len = i_xlast - i_xdest; len ; len--) {
+                        *p_dest++ = i_rgb1;
+                        *p_dest++ = i_rgb2;
+                      }
                     }
                     break;
                  }
@@ -785,9 +869,13 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
                      
                      uint8_t *p_dest = p_pixel_base + i_ytmp;
                       uint8_t i_destalpha = MAX_ALPHA - p_source->s.t;
-                      uint8_t rgb[3];
+                      uint8_t i_rgb1;
+                      uint8_t i_rgb2;
 
-                      yuv2rgb(p_source, rgb);
+                      if (b_15bpp) 
+                        yuv2rgb555(p_source, &i_rgb1, &i_rgb2);
+                      else 
+                        yuv2rgb565(p_source, &i_rgb1, &i_rgb2);
                       rv16_pack_blend(p_dest, rgb, dest_alpha,ALPHA_SCALEDOWN);
                     }
                     break;
@@ -798,36 +886,55 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
     }
 }
 
-/* 
-   Should be Same as p_pic->p_format.i_bits_per_pixel / 8. But since
-   we know it here, why try to compute it?
-*/
 #undef  BYTES_PER_PIXEL
 #define BYTES_PER_PIXEL 4
 
+/* 
+  RV24 format??? Is this just for X11? Or just not for Win32? Is this
+  the same as RV32?
+
+  a pixel is represented by 3 bytes containing a red,
+  blue and green sample with blue stored at the lowest address, green
+  next then red. One padding byte is added between pixels. Although
+  this may not be part of a spec, images should be stored with each
+  line padded to a u_int32 boundary. 
+*/
 static void 
-RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
+BlendRV24( vout_thread_t *p_vout, picture_t *p_pic,
             const subpicture_t *p_spu, vlc_bool_t b_crop )
 {
     /* Common variables */
     uint8_t *p_pixel_base;
     ogt_yuvt_t *p_src_start = (ogt_yuvt_t *)p_spu->p_sys->p_data;
     ogt_yuvt_t *p_src_end   = &p_src_start[p_spu->i_height * p_spu->i_width];
-    ogt_yuvt_t *p_source;
+    ogt_yuvt_t *p_source; /* This is the where the subtitle pixels come from */
 
     int i_x, i_y;
     int i_y_src;
 
-    /* RGB-specific */
-    int i_xscale, i_yscale, i_width, i_height, i_ytmp, i_ynext;
+    /* Make sure we start on a word (4-byte) boundary. */
+    uint32_t i_spu_x;
+
+    /* Chroma specific */
+    uint32_t i_xscale;   /* Amount we scale subtitle in the x direction,
+                            multiplied by 2**ASCALE. */
+    uint32_t i_yscale;   /* Amount we scale subtitle in the y direction.
+                            multiplied by 2**ASCALE. */
+
+    int i_width, i_height, i_ytmp, i_ynext;
 
     /* Crop-specific */
-    int i_x_start, i_y_start, i_x_end, i_y_end;
+    int32_t i_x_start, i_y_start, i_x_end, i_y_end;
 
     struct subpicture_sys_t *p_sys = p_spu->p_sys;
+    unsigned int i_aspect_x, i_aspect_y;
+
+    vout_AspectRatio( p_vout->render.i_aspect, &i_aspect_y, 
+                      &i_aspect_x );
 
-    i_xscale = ( p_vout->output.i_width << 6 ) / p_vout->render.i_width;
-    i_yscale = ( p_vout->output.i_height << 6 ) / p_vout->render.i_height;
+    i_xscale = (( p_vout->output.i_width << ASCALE ) * i_aspect_x)
+      / (i_aspect_y * p_vout->render.i_width);
+    i_yscale = ( p_vout->output.i_height << ASCALE ) / p_vout->render.i_height;
 
     dbg_print( (DECODE_DBG_CALL|DECODE_DBG_RENDER), 
               "spu: %dx%d, scaled: %dx%d, vout render: %dx%d, scale %dx%d", 
@@ -841,11 +948,12 @@ RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
     i_height = p_spu->i_height * i_yscale;
 
     /* Set where we will start blending subtitle from using
-       the picture coordinates subtitle offsets
+       the picture coordinates subtitle offsets.
     */
-    p_pixel_base = p_pic->p->p_pixels 
-              + ( (p_spu->i_x * i_xscale) >> 6 ) * BYTES_PER_PIXEL
-              + ( (p_spu->i_y * i_yscale) >> 6 ) * p_pic->p->i_pitch;
+    i_spu_x = ((p_spu->i_x * i_xscale) >> ASCALE) * BYTES_PER_PIXEL;
+
+    p_pixel_base = p_pic->p->p_pixels + i_spu_x
+              + ( (p_spu->i_y * i_yscale) >> ASCALE ) * p_pic->p->i_pitch;
 
     i_x_start = p_sys->i_x_start;
     i_y_start = i_yscale * p_sys->i_y_start;
@@ -860,7 +968,7 @@ RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
          i_y_src += p_spu->i_width )
     {
        uint8_t *p_pixel_base_y;
-        i_ytmp = i_y >> 6;
+        i_ytmp = i_y >> ASCALE;
         i_y += i_yscale;
        p_pixel_base_y = p_pixel_base + (i_ytmp * p_pic->p->i_pitch);
        i_x = 0;
@@ -874,18 +982,12 @@ RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
         }
 
         /* Check whether we need to draw one line or more than one */
-        if( i_ytmp + 1 >= ( i_y >> 6 ) )
+        if( i_ytmp + 1 >= ( i_y >> ASCALE ) )
         {
           /* Draw until we reach the end of the line */
           for( ; i_x < p_spu->i_width;  i_x++, p_source++ )
             {
 
-#if 0              
-              uint8_t *p=(uint8_t *) p_source;
-              printf("+++ %02x %02x %02x %02x\n", 
-                     p[0], p[1], p[2], p[3]);
-#endif
-    
               if( b_crop ) {
                 
                 /* FIXME: y cropping should be dealt with outside of this 
@@ -907,7 +1009,7 @@ RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
              
              switch( p_source->s.t )
                 {
-                case 0x00:
+                case 0:
                  /* Completely transparent. Don't change pixel. */
                  break;
                  
@@ -916,44 +1018,367 @@ RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
                  {
                    /* Completely opaque. Completely overwrite underlying
                       pixel with subtitle pixel. */
-               
-                   /* This is the location that's going to get changed.
-                    */
-                   uint8_t *p_dest = p_pixel_base_y + i_x * BYTES_PER_PIXEL;
-                    uint8_t rgb[4];
+
+                    uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                         * BYTES_PER_PIXEL );
+                    uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                         * BYTES_PER_PIXEL );
+                    uint32_t len     = i_xlast - i_xdest;
+
+                    uint8_t rgb[3];
+
+                   /* This is the location that's going to get changed. */
+                   uint8_t *p_dest = p_pixel_base_y + i_xdest;
 
                     yuv2rgb(p_source, rgb);
-                    *p_dest++ = rgb[2];
-                    *p_dest++ = rgb[1];
-                    *p_dest++ = rgb[0];
-                   break;
-                 }
+
+                    for ( len = i_xlast - i_xdest; len ; len--) {
+                      *p_dest++ = rgb[BLUE_PIXEL];
+                      *p_dest++ = rgb[GREEN_PIXEL];
+                      *p_dest++ = rgb[RED_PIXEL];
+                      *p_dest++;
+                    }
 
 #ifdef TRANSPARENCY_FINISHED
+                  default:
+                    {
+                      /* Blend in underlying pixel subtitle pixel. */
+                      
+                      uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                           * BYTES_PER_PIXEL );
+                      uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                           * BYTES_PER_PIXEL );
+                      uint32_t len     = i_xlast - i_xdest;
+
+                      /* To be able to scale correctly for full opaqueness, we
+                         add 1 to the alpha.  This means alpha value 0 won't
+                         be completely transparent and is not correct, but
+                         that's handled in a special case above anyway. */
+                      
+                      uint8_t i_destalpha = MAX_ALPHA - p_source->s.t;
+                      uint8_t rgb[3];
+
+                      /* This is the location that's going to get changed. */
+                      uint8_t *p_dest = p_pixel_base_y + i_xdest;
+                      
+                      yuv2rgb(p_source, rgb);
+                      rv32_pack_blend(p_dest, rgb, dest_alpha, 
+                                      ALPHA_SCALEDOWN);
+
+                      for ( len = i_xlast - i_xdest; len ; len--) {
+                        *p_dest++ = rgb[BLUE_PIXEL];
+                        *p_dest++ = rgb[GREEN_PIXEL];
+                        *p_dest++ = rgb[RED_PIXEL];
+                        *p_dest++;
+                      }
+                      break;
+                    }
+#endif /*TRANSPARENCY_FINISHED*/
+                  }
+                }
+            }
+        } 
+        else
+        {
+            i_ynext = p_pic->p->i_pitch * i_y >> ASCALE;
+
+
+            /* Draw until we reach the end of the line */
+            for( ; i_x < p_spu->i_width; i_x++, p_source++ )
+            {
+
+              if( b_crop ) {
+                
+                /* FIXME: y cropping should be dealt with outside of this 
+                   loop.*/
+                if ( i_y < i_y_start) continue;
+                
+                if ( i_x > i_x_end )
+                  {
+                    p_source += p_spu->i_width - i_x;
+                    break;
+                  }
+              }
+             
+             if (p_source >= p_src_end) {
+               msg_Err( p_vout, "Trying to access beyond subtitle %dx%d %d",
+                        i_x, i_y / i_yscale, i_height);
+               return;
+             }
+             
+             switch( p_source->s.t )
+                {
+                case 0:
+                   /* Completely transparent. Don't change pixel. */
+                    break;
+
                 default:
+                case MAX_ALPHA: 
                  {
-                   /* Blend in underlying pixel subtitle pixel. */
-                   
-                   /* To be able to scale correctly for full opaqueness, we
-                      add 1 to the alpha.  This means alpha value 0 won't
-                      be completely transparent and is not correct, but
-                      that's handled in a special case above anyway. */
-               
-                   uint8_t *p_dest = p_pixel_base_y + i_x * BYTES_PER_PIXEL;
-                   uint8_t i_destalpha = MAX_ALPHA - p_source->s.t;
+                   /* Completely opaque. Completely overwrite underlying
+                      pixel with subtitle pixel. */
+
+                   /* This is the location that's going to get changed. */
+                    uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                         * BYTES_PER_PIXEL );
+                    uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                         * BYTES_PER_PIXEL );
+                    uint32_t len     = i_xlast - i_xdest;
+
                     uint8_t rgb[3];
 
-                    yuv2rgb(p_source, rgb);
-                    rv32_pack_blend(p_dest, rgb, dest_alpha, ALPHA_SCALEDOWN);
-                   break;
+                    yuv2rgb(p_source, rgb); 
+
+                    for(  ; i_ytmp < i_ynext ; i_ytmp += p_pic->p->i_pitch )
+                    {
+                      /* Completely opaque. Completely overwrite underlying
+                         pixel with subtitle pixel. */
+                      
+                      /* This is the location that's going to get changed. */
+                      uint8_t *p_dest = p_pixel_base + i_ytmp + i_xdest;
+                      
+                      for ( len = i_xlast - i_xdest; len ; len--) {
+                        *p_dest++ = rgb[BLUE_PIXEL];
+                        *p_dest++ = rgb[GREEN_PIXEL];
+                        *p_dest++ = rgb[RED_PIXEL];
+                        *p_dest++;
+                      }
+                    }
+                    break;
                  }
+#ifdef TRANSPARENCY_FINISHED
+                default: 
+                  {
+                    
+
+                    uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                         * BYTES_PER_PIXEL );
+                    uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                         * BYTES_PER_PIXEL );
+                    uint32_t len     = i_xlast - i_xdest;
+                    uint8_t rgb[3];
+
+                    yuv2rgb(p_source, rgb);
+
+                    for(  ; i_ytmp < i_ynext ; y_ytmp += p_pic->p->i_pitch )
+                    {
+                     /* Blend in underlying pixel subtitle pixel. */
+                     
+                      /* This is the location that's going to get changed. */
+                      uint8_t *p_dest = p_pixel_base + i_ytmp + i_xdest;
+
+                     /* To be able to scale correctly for full opaqueness, we
+                        add 1 to the alpha.  This means alpha value 0 won't
+                        be completely transparent and is not correct, but
+                        that's handled in a special case above anyway. */
+
+                      uint8_t i_destalpha = MAX_ALPHA - p_source->s.t;
+                      rv32_pack_blend(p_dest, rgb, dest_alpha,
+                                      ALPHA_SCALEDOWN);
+                    }
+                    break;
 #endif /*TRANSPARENCY_FINISHED*/
+               }
+           }
+       }
+    }
+}
+
+#undef  BYTES_PER_PIXEL
+#define BYTES_PER_PIXEL 4
+
+/* 
+  RV32 format??? Is this just for X11? Or just not for Win32? Is this
+  the same as RV24?
+
+  RV32 format: a pixel is represented by 4 bytes containing a red,
+  blue and green sample with blue stored at the lowest address, green
+  next then red. One padding byte is added between pixels. Although
+  this may not be part of a spec, images should be stored with each
+  line padded to a u_int32 boundary. 
+*/
+static void 
+BlendRV32( vout_thread_t *p_vout, picture_t *p_pic,
+            const subpicture_t *p_spu, vlc_bool_t b_crop )
+{
+    /* Common variables */
+    uint8_t *p_pixel_base;
+    ogt_yuvt_t *p_src_start = (ogt_yuvt_t *)p_spu->p_sys->p_data;
+    ogt_yuvt_t *p_src_end   = &p_src_start[p_spu->i_height * p_spu->i_width];
+    ogt_yuvt_t *p_source; /* This is the where the subtitle pixels come from */
+
+    int i_x, i_y;
+    int i_y_src;
+
+    /* Make sure we start on a word (4-byte) boundary. */
+    uint32_t i_spu_x;
+
+    /* Chroma specific */
+    uint32_t i_xscale;   /* Amount we scale subtitle in the x direction,
+                            multiplied by 2**ASCALE. */
+    uint32_t i_yscale;   /* Amount we scale subtitle in the y direction.
+                            multiplied by 2**ASCALE. */
+
+    int i_width, i_height, i_ytmp, i_ynext;
+
+    /* Crop-specific */
+    int32_t i_x_start, i_y_start, i_x_end, i_y_end;
+
+    struct subpicture_sys_t *p_sys = p_spu->p_sys;
+    unsigned int i_aspect_x, i_aspect_y;
+
+    vout_AspectRatio( p_vout->render.i_aspect, &i_aspect_y, 
+                      &i_aspect_x );
+
+    i_xscale = (( p_vout->output.i_width << ASCALE ) * i_aspect_x)
+      / (i_aspect_y * p_vout->render.i_width);
+    i_yscale = ( p_vout->output.i_height << ASCALE ) / p_vout->render.i_height;
+
+    dbg_print( (DECODE_DBG_CALL|DECODE_DBG_RENDER), 
+              "spu: %dx%d, scaled: %dx%d, vout render: %dx%d, scale %dx%d", 
+              p_spu->i_width,  p_spu->i_height, 
+              p_vout->output.i_width, p_vout->output.i_height,
+              p_vout->render.i_width, p_vout->render.i_height,
+              i_xscale, i_yscale
+              );
+
+    i_width  = p_spu->i_width  * i_xscale;
+    i_height = p_spu->i_height * i_yscale;
+
+    /* Set where we will start blending subtitle from using
+       the picture coordinates subtitle offsets.
+    */
+    i_spu_x = ((p_spu->i_x * i_xscale) >> ASCALE) * BYTES_PER_PIXEL; 
+
+    p_pixel_base = p_pic->p->p_pixels + i_spu_x
+              + ( (p_spu->i_y * i_yscale) >> ASCALE ) * p_pic->p->i_pitch;
+
+    i_x_start = p_sys->i_x_start;
+    i_y_start = i_yscale * p_sys->i_y_start;
+    i_x_end   = p_sys->i_x_end;
+    i_y_end   = i_yscale * p_sys->i_y_end;
+
+    p_source = (ogt_yuvt_t *)p_sys->p_data;
+  
+    /* Draw until we reach the bottom of the subtitle */
+    i_y = 0;
+    for( i_y_src = 0 ; i_y_src < p_spu->i_height * p_spu->i_width; 
+         i_y_src += p_spu->i_width )
+    {
+       uint8_t *p_pixel_base_y;
+        i_ytmp = i_y >> ASCALE;
+        i_y += i_yscale;
+       p_pixel_base_y = p_pixel_base + (i_ytmp * p_pic->p->i_pitch);
+       i_x = 0;
+
+        if ( b_crop ) {
+          if ( i_y > i_y_end ) break;
+          if (i_x_start) {
+            i_x = i_x_start;
+            p_source += i_x_start;
+          }
+        }
+
+        /* Check whether we need to draw one line or more than one */
+        if( i_ytmp + 1 >= ( i_y >> ASCALE ) )
+        {
+          /* Draw until we reach the end of the line */
+          for( ; i_x < p_spu->i_width;  i_x++, p_source++ )
+            {
+
+              if( b_crop ) {
+                
+                /* FIXME: y cropping should be dealt with outside of this 
+                   loop.*/
+                if ( i_y < i_y_start) continue;
+                
+                if ( i_x > i_x_end )
+                  {
+                    p_source += p_spu->i_width - i_x;
+                    break;
+                  }
+              }
+
+             if (p_source >= p_src_end) {
+               msg_Err( p_vout, "Trying to access beyond subtitle %dx%d %d",
+                        i_x, i_y / i_yscale, i_height);
+               return;
+             }
+             
+             switch( p_source->s.t )
+                {
+                case 0:
+                 /* Completely transparent. Don't change pixel. */
+                 break;
+                 
+                default:
+                case MAX_ALPHA:
+                 {
+                   /* Completely opaque. Completely overwrite underlying
+                      pixel with subtitle pixel. */
+
+                    uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                         * BYTES_PER_PIXEL );
+                    uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                         * BYTES_PER_PIXEL );
+                    uint32_t len     = i_xlast - i_xdest;
+
+                    uint8_t rgb[3];
+
+                   /* This is the location that's going to get changed. */
+                   uint8_t *p_dest = p_pixel_base_y + i_xdest;
+
+                    yuv2rgb(p_source, rgb);
+
+                    for ( len = i_xlast - i_xdest; len ; len--) {
+                      *p_dest++ = rgb[BLUE_PIXEL];
+                      *p_dest++ = rgb[GREEN_PIXEL];
+                      *p_dest++ = rgb[RED_PIXEL];
+                      *p_dest++;
+                    }
+
+#ifdef TRANSPARENCY_FINISHED
+                  default:
+                    {
+                      /* Blend in underlying pixel subtitle pixel. */
+                      
+                      uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                           * BYTES_PER_PIXEL );
+                      uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                           * BYTES_PER_PIXEL );
+                      uint32_t len     = i_xlast - i_xdest;
+
+                      /* To be able to scale correctly for full opaqueness, we
+                         add 1 to the alpha.  This means alpha value 0 won't
+                         be completely transparent and is not correct, but
+                         that's handled in a special case above anyway. */
+                      
+                      uint8_t i_destalpha = MAX_ALPHA - p_source->s.t;
+                      uint8_t rgb[3];
+
+                      /* This is the location that's going to get changed. */
+                      uint8_t *p_dest = p_pixel_base_y + i_xdest;
+                      
+                      yuv2rgb(p_source, rgb);
+                      rv32_pack_blend(p_dest, rgb, dest_alpha, 
+                                      ALPHA_SCALEDOWN);
+
+                      for ( len = i_xlast - i_xdest; len ; len--) {
+                        *p_dest++ = rgb[BLUE_PIXEL];
+                        *p_dest++ = rgb[GREEN_PIXEL];
+                        *p_dest++ = rgb[RED_PIXEL];
+                        *p_dest++;
+                      }
+                      break;
+                    }
+#endif /*TRANSPARENCY_FINISHED*/
+                  }
                 }
             }
-        }
+        } 
         else
         {
-            i_ynext = p_pic->p->i_pitch * i_y >> 6;
+            i_ynext = p_pic->p->i_pitch * i_y >> ASCALE;
 
 
             /* Draw until we reach the end of the line */
@@ -981,7 +1406,7 @@ RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
              
              switch( p_source->s.t )
                 {
-                case 0x00:
+                case 0:
                    /* Completely transparent. Don't change pixel. */
                     break;
 
@@ -991,18 +1416,31 @@ RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
                    /* Completely opaque. Completely overwrite underlying
                       pixel with subtitle pixel. */
 
-                   /* This is the location that's going to get changed.
-                    */
-                   uint8_t *p_pixel_base_x = p_pixel_base 
-                                            + i_x * BYTES_PER_PIXEL;
-                    uint8_t rgb[4];
+                   /* This is the location that's going to get changed. */
+                    uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                         * BYTES_PER_PIXEL );
+                    uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                         * BYTES_PER_PIXEL );
+                    uint32_t len     = i_xlast - i_xdest;
+
+                    uint8_t rgb[3];
+
                     yuv2rgb(p_source, rgb); 
 
                     for(  ; i_ytmp < i_ynext ; i_ytmp += p_pic->p->i_pitch )
                     {
-                     /* This is the location that's going to get changed.  */
-                     uint8_t *p_dest = p_pixel_base_x + i_ytmp;
-                      memcpy(p_dest, rgb, 4);
+                      /* Completely opaque. Completely overwrite underlying
+                         pixel with subtitle pixel. */
+                      
+                      /* This is the location that's going to get changed. */
+                      uint8_t *p_dest = p_pixel_base + i_ytmp + i_xdest;
+                      
+                      for ( len = i_xlast - i_xdest; len ; len--) {
+                        *p_dest++ = rgb[BLUE_PIXEL];
+                        *p_dest++ = rgb[GREEN_PIXEL];
+                        *p_dest++ = rgb[RED_PIXEL];
+                        *p_dest++;
+                      }
                     }
                     break;
                  }
@@ -1010,20 +1448,31 @@ RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
                 default: 
                   {
                     
-                    uint8_t rgb[4];
+
+                    uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                         * BYTES_PER_PIXEL );
+                    uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                         * BYTES_PER_PIXEL );
+                    uint32_t len     = i_xlast - i_xdest;
+                    uint8_t rgb[3];
+
                     yuv2rgb(p_source, rgb);
 
                     for(  ; i_ytmp < i_ynext ; y_ytmp += p_pic->p->i_pitch )
                     {
                      /* Blend in underlying pixel subtitle pixel. */
                      
+                      /* This is the location that's going to get changed. */
+                      uint8_t *p_dest = p_pixel_base + i_ytmp + i_xdest;
+
                      /* To be able to scale correctly for full opaqueness, we
                         add 1 to the alpha.  This means alpha value 0 won't
                         be completely transparent and is not correct, but
                         that's handled in a special case above anyway. */
-                     uint8_t *p_dest = p_pixel_base + i_ytmp;
+
                       uint8_t i_destalpha = MAX_ALPHA - p_source->s.t;
-                      rv32_pack_blend(p_dest, rgb, dest_alpha,ALPHA_SCALEDOWN);
+                      rv32_pack_blend(p_dest, rgb, dest_alpha,
+                                      ALPHA_SCALEDOWN);
                     }
                     break;
 #endif /*TRANSPARENCY_FINISHED*/
@@ -1033,36 +1482,64 @@ RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
     }
 }
 
-/* 
-   Should be same as p_pic->p_format.i_bits_per_pixel / 8. But since
-   we know it here, why try to compute it?
-*/
 #undef  BYTES_PER_PIXEL
 #define BYTES_PER_PIXEL 1
 
 static void 
-RenderRGB2( vout_thread_t *p_vout, picture_t *p_pic,
+BlendRGB2( vout_thread_t *p_vout, picture_t *p_pic,
             const subpicture_t *p_spu, vlc_bool_t b_crop )
 {
     /* Common variables */
     uint8_t *p_pixel_base;
-    ogt_yuvt_t *p_src_start = (ogt_yuvt_t *)p_spu->p_sys->p_data;
-    ogt_yuvt_t *p_src_end   = &p_src_start[p_spu->i_height * p_spu->i_width];
-    ogt_yuvt_t *p_source;
+    uint8_t *p_src_start = (uint8_t *)p_spu->p_sys->p_data;
+    uint8_t *p_src_end   = &p_src_start[p_spu->i_height * p_spu->i_width];
+    uint8_t *p_source; /* This is the where the subtitle pixels come from */
 
     int i_x, i_y;
     int i_y_src;
 
-    /* RGB-specific */
-    int i_xscale, i_yscale, i_width, i_height, i_ytmp;
+    /* Chroma specific */
+    uint32_t i_xscale;   /* Amount we scale subtitle in the x direction,
+                            multiplied by 2**ASCALE. */
+    uint32_t i_yscale;   /* Amount we scale subtitle in the y direction.
+                            multiplied by 2**ASCALE. */
+
+    int i_width, i_height, i_ytmp;
 
     /* Crop-specific */
     int i_x_start, i_y_start, i_x_end, i_y_end;
 
+    /* 4 entry colormap */
+    uint8_t cmap[NUM_SUBTITLE_COLORS];
+    int i_cmap;
+
     struct subpicture_sys_t *p_sys = p_spu->p_sys;
+    unsigned int i_aspect_x, i_aspect_y;
+
+    vout_AspectRatio( p_vout->render.i_aspect, &i_aspect_y, 
+                      &i_aspect_x );
+    
+    /* Find a corresponding colormap entries for our palette entries. */
+    for( i_cmap = 0; i_cmap < NUM_SUBTITLE_COLORS; i_cmap++ )
+    {
+      uint8_t Y = p_sys->p_palette[i_cmap].s.y;
+
+      /* FIXME: when we have a way to look at colormap entries we can
+         do better.  For now we have to use 0xff for white 0x00 for
+         black and 0x44 for something in between. To do this we use
+         only the Y component.
+      */
+      if (Y > 0x70) 
+        cmap[i_cmap] = 0xff; /* Use white. */
+      else if (Y < 0x10) 
+        cmap[i_cmap] = 0x00; /* Use black. */
+      else 
+        cmap[i_cmap] = 0x44; /* Use something else. */
+    }
 
-    i_xscale = ( p_vout->output.i_width << 6 ) / p_vout->render.i_width;
-    i_yscale = ( p_vout->output.i_height << 6 ) / p_vout->render.i_height;
+    i_xscale = (( p_vout->output.i_width << ASCALE ) * i_aspect_x)
+      / (i_aspect_y * p_vout->render.i_width);
+    i_yscale = ( p_vout->output.i_height << ASCALE ) / p_vout->render.i_height;
 
     dbg_print( (DECODE_DBG_CALL|DECODE_DBG_RENDER), 
               "spu: %dx%d, scaled: %dx%d, vout render: %dx%d, scale %dx%d", 
@@ -1079,15 +1556,15 @@ RenderRGB2( vout_thread_t *p_vout, picture_t *p_pic,
        the picture coordinates subtitle offsets
     */
     p_pixel_base = p_pic->p->p_pixels 
-              + ( (p_spu->i_x * i_xscale) >> 6 ) * BYTES_PER_PIXEL
-              + ( (p_spu->i_y * i_yscale) >> 6 ) * p_pic->p->i_pitch;
+              + ( (p_spu->i_x * i_xscale) >> ASCALE ) * BYTES_PER_PIXEL
+              + ( (p_spu->i_y * i_yscale) >> ASCALE ) * p_pic->p->i_pitch;
 
     i_x_start = p_sys->i_x_start;
     i_y_start = i_yscale * p_sys->i_y_start;
     i_x_end   = p_sys->i_x_end;
     i_y_end   = i_yscale * p_sys->i_y_end;
 
-    p_source = (ogt_yuvt_t *)p_sys->p_data;
+    p_source = (uint8_t *)p_sys->p_data;
   
     /* Draw until we reach the bottom of the subtitle */
     i_y = 0;
@@ -1095,11 +1572,11 @@ RenderRGB2( vout_thread_t *p_vout, picture_t *p_pic,
          i_y_src += p_spu->i_width )
       {
        uint8_t *p_pixel_base_y;
-        i_ytmp = i_y >> 6;
+        i_ytmp = i_y >> ASCALE;
         i_y += i_yscale;
        p_pixel_base_y = p_pixel_base + (i_ytmp * p_pic->p->i_pitch);
        i_x = 0;
-        
+
         if ( b_crop ) {
           if ( i_y > i_y_end ) break;
           if (i_x_start) {
@@ -1108,79 +1585,112 @@ RenderRGB2( vout_thread_t *p_vout, picture_t *p_pic,
           }
         }
         
-        /* Draw until we reach the end of the line */
-        for( ; i_x < p_spu->i_width; i_x ++, p_source++ )
-          {
-            
-#if 0              
-            uint8_t *p=(uint8_t *) p_source;
-            printf("+++ %02x %02x %02x %02x\n", 
-                   p[0], p[1], p[2], p[3]);
-#endif
-            
-            if( b_crop ) {
+        /* Check whether we need to draw one line or more than one */
+        if( i_ytmp + 1 >= ( i_y >> ASCALE ) )
+        {
+
+          /* Draw until we reach the end of the line */
+          for( ; i_x < p_spu->i_width; i_x ++, p_source++ )
+            {
+              ogt_yuvt_t p_yuvt;
+
+              if( b_crop ) {
+                
+                /* FIXME: y cropping should be dealt with outside of this 
+                   loop.*/
+                if ( i_y < i_y_start) continue;
+                
+                if ( i_x > i_x_end )
+                  {
+                    p_source += p_spu->i_width - i_x;
+                    break;
+                  }
+              }
               
-              /* FIXME: y cropping should be dealt with outside of this 
-                 loop.*/
-              if ( i_y < i_y_start) continue;
+              if (p_source >= p_src_end) {
+                msg_Err( p_vout, "Trying to access beyond subtitle %dx%d %d",
+                         i_x, i_y / i_yscale, i_height);
+                return;
+              }
+              
+              p_yuvt = p_sys->p_palette[*p_source & 0x3];
+              if ( (p_yuvt.s.t) < (MAX_ALPHA) / 2 ) {
+                /* Completely or relatively transparent. Don't change pixel. */
+                ;
+#if 0
+                printf(" "); /*++++*/
+#endif
+              } else {
+                uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                     * BYTES_PER_PIXEL );
+                uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                     * BYTES_PER_PIXEL );
+                /* This is the pixel that's going to change;*/
+                uint8_t *p_dest = p_pixel_base_y + i_xdest;
+                memset( p_dest, cmap[*p_source & 0x3], i_xlast - i_xdest );
+#if 0
+                printf("%1d", *p_source); /*++++*/
+#endif
+              }
               
-              if ( i_x > i_x_end )
-                {
-                  p_source += p_spu->i_width - i_x;
-                  break;
-                }
-            }
-            
-            if (p_source >= p_src_end) {
-              msg_Err( p_vout, "Trying to access beyond subtitle %dx%d %d",
-                       i_x, i_y / i_yscale, i_height);
-              return;
             }
-            
-            switch( p_source->s.t )
-              {
-              case 0x00:
-                /* Completely transparent. Don't change pixel. */
-                break;
-               
-              default:
-              case MAX_ALPHA:
-                {
-                  /* Completely opaque. Completely overwrite underlying
-                     pixel with subtitle pixel. */
-                  
-                  /* This is the location that's going to get changed.
-                   */
-                  uint8_t *p_dest = p_pixel_base_y + i_x;
-                  uint8_t rgb[4];
-                  
-                  yuv2rgb(p_source, rgb);
-                  *p_dest++ = 0xff;
-                  break;
-                }
+#if 0
+          printf("\n"); /*++++*/
+#endif
+        } else {
+          /* Have to scale over many lines. */
+          int i_yreal = p_pic->p->i_pitch * i_ytmp;
+          int i_ynext = p_pic->p->i_pitch * i_y >> ASCALE;
+
+           /* Draw until we reach the end of the line */
+           for( ; i_x < p_spu->i_width; i_x ++, p_source++ )
+             {
+              ogt_yuvt_t p_yuvt = p_sys->p_palette[*p_source & 0x3];
+
+              if( b_crop ) {
                 
-#ifdef TRANSPARENCY_FINISHED
-              default:
-                {
-                  /* Blend in underlying pixel subtitle pixel. */
-                 
-                  /* To be able to scale correctly for full opaqueness, we
-                     add 1 to the alpha.  This means alpha value 0 won't
-                     be completely transparent and is not correct, but
-                     that's handled in a special case above anyway. */
-                  
-                  uint8_t *p_dest = p_pixel_base_y + i_x * BYTES_PER_PIXEL;
-                  uint8_t i_destalpha = MAX_ALPHA - p_source->s.t;
-                  uint8_t rgb[3];
-                  
-                  yuv2rgb(p_source, rgb);
-                  rv32_pack_blend(p_dest, rgb, dest_alpha, ALPHA_SCALEDOWN);
-                  break;
+                /* FIXME: y cropping should be dealt with outside of this 
+                   loop.*/
+                if ( i_y < i_y_start) continue;
+                
+                if ( i_x > i_x_end )
+                  {
+                    p_source += p_spu->i_width - i_x;
+                    break;
+                  }
+              }
+              
+              if (p_source >= p_src_end) {
+                msg_Err( p_vout, "Trying to access beyond subtitle %dx%d %d",
+                         i_x, i_y / i_yscale, i_height);
+                return;
+              }
+              
+              if ( (p_yuvt.s.t) < (MAX_ALPHA) / 2 ) {
+                /* Completely or relatively transparent. Don't change pixel. */
+                ;
+#if 0
+                printf(" "); /*++++*/
+#endif
+              } else {
+                uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                     * BYTES_PER_PIXEL );
+                uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                     * BYTES_PER_PIXEL );
+                uint32_t len     = i_xlast - i_xdest;
+#if 0
+                printf("%1d", *p_source); /*++++*/
+#endif
+                for( i_ytmp = i_yreal ; i_ytmp < i_ynext ;
+                     i_ytmp += p_pic->p->i_pitch ) {
+                  uint8_t *p_dest = p_pixel_base + i_ytmp + i_xdest;
+                  memset( p_dest, cmap[*p_source & 0x3], len );
                 }
-#endif /*TRANSPARENCY_FINISHED*/
               }
-          }
-    }
+            }
+
+        }
+      }
 }
 
 \f