]> git.sesse.net Git - vlc/blobdiff - modules/codec/ogt/render.c
string review
[vlc] / modules / codec / ogt / render.c
index cec23b6811de7876c867097891e171f981332c75..f2d81ad829f580ac26fd4ad2524ed4fbe78f9e11 100644 (file)
@@ -1,10 +1,10 @@
 /*****************************************************************************
- * render.c : Philips OGT (SVCD Subtitle) renderer
+ * render.c : Philips OGT and CVD (VCD Subtitle) blending routines
  *****************************************************************************
  * Copyright (C) 2003, 2004 VideoLAN
- * $Id: render.c,v 1.15 2004/01/16 13:32:37 rocky Exp $
+ * $Id: render.c,v 1.23 2004/01/25 18:20:12 bigben Exp $
  *
- * Author: Rocky Bernstein 
+ * Author: Rocky Bernstein <rocky@panix.com>
  *   based on code from: 
  *          Sam Hocevar <sam@zoy.org>
  *          Rudolf Cornelissen <rag.cornelissen@inter.nl.net>
 #define MAX_ALPHA  ((1<<ALPHA_BITS) - 1) 
 #define ALPHA_SCALEDOWN (8-ALPHA_BITS)
 
+/* We use a fixed-point arithmetic to scaling ratios so that we
+   can use integer arithmetic and still get fairly precise
+   results. ASCALE is a left shift amount. 
+*/
+#define ASCALE 6  /* 2^6 = 32 */
+
 /* Horrible hack to get dbg_print to do the right thing */
 #define p_dec p_vout
 
 /*****************************************************************************
  * Local prototypes
  *****************************************************************************/
-static void RenderI420( vout_thread_t *, picture_t *, const subpicture_t *,
+static void BlendI420( vout_thread_t *, picture_t *, const subpicture_t *,
                         vlc_bool_t );
-static void RenderYUY2( vout_thread_t *p_vout, picture_t *p_pic,
+static void BlendYUY2( vout_thread_t *p_vout, picture_t *p_pic,
                         const subpicture_t *p_spu, vlc_bool_t b_crop );
-static void RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
+static void BlendRV16( vout_thread_t *p_vout, picture_t *p_pic,
+                       const subpicture_t *p_spu, vlc_bool_t b_crop,
+                       vlc_bool_t b_15bpp );
+static void BlendRV24( vout_thread_t *p_vout, picture_t *p_pic,
                         const subpicture_t *p_spu, vlc_bool_t b_crop );
-static void RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
+static void BlendRV32( vout_thread_t *p_vout, picture_t *p_pic,
+                        const subpicture_t *p_spu, vlc_bool_t b_crop );
+static void BlendRGB2( vout_thread_t *p_vout, picture_t *p_pic,
                         const subpicture_t *p_spu, vlc_bool_t b_crop );
 
 /*****************************************************************************
- * RenderSPU: draw an SPU on a picture
+ * BlendSPU: blend a subtitle into a picture
  *****************************************************************************
  
-  This is a fast implementation of the subpicture drawing code. The
-  data has been preprocessed. Each byte has a run-length 1 in the upper
-  nibble and a color in the lower nibble. The interleaving of rows has
-  been done. Most sanity checks are already done so that this
-  routine can be as fast as possible.
+  This blends subtitles (a subpicture) into the underlying
+  picture. Subtitle data has been preprocessed as YUV + transparancy
+  or 4 bytes per pixel with interleaving of rows in the subtitle
+  removed. 
 
  *****************************************************************************/
-void VCDSubRender( vout_thread_t *p_vout, picture_t *p_pic,
+void VCDSubBlend( vout_thread_t *p_vout, picture_t *p_pic,
                   const subpicture_t *p_spu )
 {
     struct subpicture_sys_t *p_sys = p_spu->p_sys;
@@ -84,23 +94,33 @@ void VCDSubRender( vout_thread_t *p_vout, picture_t *p_pic,
         case VLC_FOURCC('I','4','2','0'):
         case VLC_FOURCC('I','Y','U','V'):
         case VLC_FOURCC('Y','V','1','2'):
-            RenderI420( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
+            BlendI420( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
             break;
 
         /* RGB 555 - scaled */
+        case VLC_FOURCC('R','V','1','5'):
+            BlendRV16( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop, 
+                       VLC_TRUE );
+            break;
+          
         case VLC_FOURCC('R','V','1','6'):
-            RenderRV16( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
+            BlendRV16( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop,
+                       VLC_FALSE );
            break;
 
-        /* RV32 target, scaling */
+        /* RV24 target, scaling */
         case VLC_FOURCC('R','V','2','4'):
+            BlendRV24( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
+           break;
+
+        /* RV32 target, scaling */
         case VLC_FOURCC('R','V','3','2'):
-            RenderRV32( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
+            BlendRV32( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
            break;
 
         /* NVidia overlay, no scaling */
         case VLC_FOURCC('Y','U','Y','2'):
-            RenderYUY2( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
+            BlendYUY2( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
            break;
 
         /* Palettized 8 bits per pixel (256 colors). Each
@@ -108,7 +128,8 @@ void VCDSubRender( vout_thread_t *p_vout, picture_t *p_pic,
            Used in ASCII Art. 
         */
         case VLC_FOURCC('R','G','B','2'):
-            msg_Err( p_vout, "RGB2 not implemented yet" );
+            BlendRGB2( p_vout, p_pic, p_spu, p_spu->p_sys->b_crop );
+          
            break;
 
         default:
@@ -129,12 +150,12 @@ void VCDSubRender( vout_thread_t *p_vout, picture_t *p_pic,
   all Cb (=V) samples in a similar fashion.
 */
 
-static void RenderI420( vout_thread_t *p_vout, picture_t *p_pic,
+static void BlendI420( vout_thread_t *p_vout, picture_t *p_pic,
                         const subpicture_t *p_spu, vlc_bool_t b_crop )
 {
   /* Common variables */
   uint8_t *p_pixel_base_Y, *p_pixel_base_V, *p_pixel_base_U;
-  ogt_yuvt_t *p_source;
+  ogt_yuvt_t *p_source; /* This is the where the subtitle pixels come from */
 
   int i_x, i_y;
   vlc_bool_t even_scanline = VLC_FALSE;
@@ -207,7 +228,7 @@ static void RenderI420( vout_thread_t *p_vout, picture_t *p_pic,
          
          switch( p_source->s.t )
            {
-           case 0x00
+           case 0: 
              /* Completely transparent. Don't change pixel. */
              break;
              
@@ -235,7 +256,7 @@ static void RenderI420( vout_thread_t *p_vout, picture_t *p_pic,
              {
                /* Blend in underlying subtitle pixel. */
                
-               /* This is the location that's going to get changed.*/
+               /* This is the location that's going to get changed. */
                uint8_t *p_pixel_Y = p_pixel_base_Y_y + i_x;
 
 
@@ -265,7 +286,7 @@ static void RenderI420( vout_thread_t *p_vout, picture_t *p_pic,
                   transparent and all opaque) aren't handled properly.
                   But we deal with them in special cases above. */
 
-               *p_pixel_Y = ( i_sub_color_Y + i_pixel_color_Y ) >> 4;
+               *p_pixel_Y = ( i_sub_color_Y + i_pixel_color_Y ) >> ALPHA_BITS;
 
                if ( even_scanline && i_x % 2 == 0 ) {
                  uint8_t *p_pixel_U = p_pixel_base_U_y + i_x/2;
@@ -283,8 +304,8 @@ static void RenderI420( vout_thread_t *p_vout, picture_t *p_pic,
                  uint16_t i_pixel_color_V = 
                    (uint16_t) ( *p_pixel_V * 
                                 (uint16_t) (MAX_ALPHA - p_source->s.t) ) ;
-                 *p_pixel_U = ( i_sub_color_U + i_pixel_color_U ) >> 4;
-                 *p_pixel_V = ( i_sub_color_V + i_pixel_color_V ) >> 4;
+                 *p_pixel_U = ( i_sub_color_U + i_pixel_color_U )>>ALPHA_BITS;
+                 *p_pixel_V = ( i_sub_color_V + i_pixel_color_V )>>ALPHA_BITS;
                }
                break;
              }
@@ -307,18 +328,29 @@ static void RenderI420( vout_thread_t *p_vout, picture_t *p_pic,
   spans the two pixels.
 */
 
-static void RenderYUY2( vout_thread_t *p_vout, picture_t *p_pic,
+#define BYTES_PER_PIXEL 4
+
+static void BlendYUY2( vout_thread_t *p_vout, picture_t *p_pic,
                         const subpicture_t *p_spu, vlc_bool_t b_crop )
 {
   /* Common variables */
   uint8_t *p_pixel_base;
-  ogt_yuvt_t *p_source;
 
-  int i_x, i_y;
+  /* This is the where the subtitle pixels come from */
+  ogt_yuvt_t *p_source = (ogt_yuvt_t *) p_spu->p_sys->p_data;;
+
+#if 1
+  ogt_yuvt_t *p_source_end = (ogt_yuvt_t *)p_spu->p_sys->p_data + 
+    (p_spu->i_width * p_spu->i_height);
+#endif
+
+  uint16_t i_x, i_y;
+
+  /* Make sure we start on a word (4-byte) boundary. */
+  uint16_t i_spu_x = (p_spu->i_x & 0xFFFE) * 2;
 
   /* Crop-specific */
   int i_x_start, i_y_start, i_x_end, i_y_end;
-  /* int i=0; */
 
   const struct subpicture_sys_t *p_sys = p_spu->p_sys;
   
@@ -329,16 +361,14 @@ static void RenderYUY2( vout_thread_t *p_vout, picture_t *p_pic,
 
   
   p_pixel_base = p_pic->p->p_pixels + 
-               + ( p_spu->i_y * p_pic->p->i_pitch ) + p_spu->i_x * 2;
-  
+    + ( p_spu->i_y * p_pic->p->i_pitch ) + i_spu_x;
+
   i_x_start = p_sys->i_x_start;
   i_y_start = p_sys->i_y_start * p_pic->p->i_pitch;
   
   i_x_end   = p_sys->i_x_end;
   i_y_end   = p_sys->i_y_end   * p_pic->p->i_pitch;
 
-  p_source = (ogt_yuvt_t *)p_sys->p_data;
-  
   /* Draw until we reach the bottom of the subtitle */
   for( i_y = 0; 
        i_y < p_spu->i_height * p_pic->p[Y_PLANE].i_pitch ;
@@ -352,14 +382,25 @@ static void RenderYUY2( vout_thread_t *p_vout, picture_t *p_pic,
         if ( i_y > i_y_end ) break;
         if (i_x_start) {
           i_x = i_x_start;
-          p_source += i_x_start;
+          p_source += (i_x_start*2);
         }
       }
   
 
-      /* Draw until we reach the end of the line */
-      for( ;  i_x < p_spu->i_width; i_x++, p_source++ )
+      /* Draw until we reach the end of the line. Each output pixel
+         is a combination of two source pixels. 
+       */
+      for( i_x = 0;  i_x < p_spu->i_width / 2; i_x++, p_source +=2 )
        {
+          uint16_t i_avg_tr; /* transparancy sort of averaged over 2 pixels*/
+
+#if 1
+          if (p_source > p_source_end-1) {
+            msg_Err( p_vout, "Trying to access beyond subtitle x: %d y: %d",
+                     i_x, i_y);
+            return;
+          }
+#endif
 
          if( b_crop ) {
 
@@ -368,15 +409,24 @@ static void RenderYUY2( vout_thread_t *p_vout, picture_t *p_pic,
 
             if ( i_x > i_x_end )
            {
-             p_source += p_spu->i_width - i_x;
+             p_source += p_spu->i_width - (i_x*2);
               break;
            }
           }
   
          
-         switch( p_source->s.t )
+          /* Favor opaque subtitle pixels. */
+         if ( (p_source->s.t == 0) && (p_source+1)->s.t == MAX_ALPHA )
+            i_avg_tr = (p_source+1)->s.t;
+          else if ( (p_source->s.t == MAX_ALPHA) && (p_source+1)->s.t == 0 )
+            i_avg_tr = p_source->s.t;
+          else 
+            i_avg_tr = ( p_source->s.t + (p_source+1)->s.t ) / 2;
+          
+
+         switch( i_avg_tr )
            {
-           case 0x00
+           case 0: 
              /* Completely transparent. Don't change pixel. */
              break;
              
@@ -385,14 +435,30 @@ static void RenderYUY2( vout_thread_t *p_vout, picture_t *p_pic,
                /* Completely opaque. Completely overwrite underlying
                   pixel with subtitle pixel. */
                
-               /* This is the location that's going to get changed.*/
-               uint8_t *p_pixel = p_pixel_base_y + i_x * 2;
-               
+               /* This is the location that's going to get changed. */
+               uint8_t *p_pixel = p_pixel_base_y + i_x * BYTES_PER_PIXEL;
+               uint8_t i_avg_u;
+               uint8_t i_avg_v;
+
+                /* Favor opaque subtitle pixel. */
+                if (p_source->s.t == MAX_ALPHA ) {
+                  i_avg_u = p_source->plane[U_PLANE] ;
+                  i_avg_v = p_source->plane[V_PLANE] ;
+                } else if ( (p_source+1)->s.t == MAX_ALPHA ) {
+                  i_avg_u = (p_source+1)->plane[U_PLANE] ;
+                  i_avg_v = (p_source+1)->plane[V_PLANE] ;
+                } else {
+                  i_avg_u = ( p_source->plane[U_PLANE] 
+                              + (p_source+1)->plane[U_PLANE] ) / 2;
+                  i_avg_v = ( p_source->plane[V_PLANE] 
+                              + (p_source+1)->plane[V_PLANE] ) / 2;
+                }
+
                /* draw a two contiguous pixels: 2 Y values, 1 U, and 1 V. */
                *p_pixel++ = p_source->plane[Y_PLANE] ;
-                *p_pixel++ = p_source->plane[V_PLANE] ;
-               *p_pixel++ = p_source->plane[Y_PLANE] ;
-                *p_pixel++ = p_source->plane[U_PLANE] ;
+                *p_pixel++ = i_avg_u;
+               *p_pixel++ = (p_source+1)->plane[Y_PLANE] ;
+                *p_pixel++ = i_avg_v;
                 
                break;
              }
@@ -401,9 +467,12 @@ static void RenderYUY2( vout_thread_t *p_vout, picture_t *p_pic,
              {
                /* Blend in underlying subtitle pixels. */
                
-               /* This is the location that's going to get changed.*/
-               uint8_t *p_pixel = p_pixel_base_y + i_x * 2;
-
+               /* This is the location that's going to get changed. */
+               uint8_t *p_pixel = p_pixel_base_y + i_x * BYTES_PER_PIXEL;
+               uint8_t i_avg_u = ( p_source->plane[U_PLANE] 
+                                    + (p_source+1)->plane[U_PLANE] ) / 2;
+               uint8_t i_avg_v = ( p_source->plane[V_PLANE] 
+                                    + (p_source+1)->plane[V_PLANE] ) / 2;
 
                /* This is the weighted part of the two subtitle
                   pixels. The color plane is 8 bits and transparancy
@@ -413,6 +482,10 @@ static void RenderYUY2( vout_thread_t *p_vout, picture_t *p_pic,
                  (uint16_t) ( p_source->plane[Y_PLANE] *
                               (uint16_t) (p_source->s.t) );
 
+               uint16_t i_sub_color_Y2 = 
+                 (uint16_t) ( (p_source+1)->plane[Y_PLANE] *
+                              (uint16_t) ((p_source+1)->s.t) );
+
                /* This is the weighted part of the underlying pixels.
                   For the same reasons above, the result is up to 12
                   bits.  However since the transparancies are
@@ -420,22 +493,23 @@ static void RenderYUY2( vout_thread_t *p_vout, picture_t *p_pic,
                   will not exceed 12 bits.
                */
                uint16_t i_sub_color_U = 
-                 (uint16_t) ( p_source->plane[U_PLANE] *
-                              (uint16_t) (p_source->s.t) );
+                 (uint16_t) ( i_avg_u * (uint16_t) i_avg_tr );
                
                uint16_t i_sub_color_V = 
-                 (uint16_t) ( p_source->plane[V_PLANE] *
-                              (uint16_t) (p_source->s.t) );
+                 (uint16_t) ( i_avg_v * (uint16_t) i_avg_tr );
 
                uint16_t i_pixel_color_Y1 = 
                  (uint16_t) ( *(p_pixel) * 
                               (uint16_t) (MAX_ALPHA - p_source->s.t) ) ;
+               uint16_t i_pixel_color_Y2 = 
+                 (uint16_t) ( *(p_pixel) * 
+                              (uint16_t) (MAX_ALPHA - (p_source+1)->s.t) ) ;
                uint16_t i_pixel_color_U = 
                  (uint16_t) ( *(p_pixel+1) * 
-                              (uint16_t) (MAX_ALPHA - p_source->s.t) ) ;
+                              (uint16_t) (MAX_ALPHA - i_avg_tr) ) ;
                uint16_t i_pixel_color_V = 
                  (uint16_t) ( *(p_pixel+3) * 
-                              (uint16_t) (MAX_ALPHA - p_source->s.t) ) ;
+                              (uint16_t) (MAX_ALPHA - i_avg_tr) ) ;
 
                /* draw a two contiguous pixels: 2 Y values, 1 U, and 1 V. */
 
@@ -447,14 +521,17 @@ static void RenderYUY2( vout_thread_t *p_vout, picture_t *p_pic,
                   transparent and all opaque) aren't handled properly.
                   But we deal with them in special cases above. */
 
-               *p_pixel++ = ( i_sub_color_Y1 + i_pixel_color_Y1 ) >> 4;
-               *p_pixel++ = ( i_sub_color_V + i_pixel_color_V ) >> 4;
-               *p_pixel++ = ( i_sub_color_Y1 + i_pixel_color_Y1 ) >> 4;
-               *p_pixel++ = ( i_sub_color_U + i_pixel_color_U ) >> 4;
+               *p_pixel++ = ( i_sub_color_Y1 + i_pixel_color_Y1 )>>ALPHA_BITS;
+               *p_pixel++ = ( i_sub_color_V + i_pixel_color_V )  >>ALPHA_BITS;
+               *p_pixel++ = ( i_sub_color_Y2 + i_pixel_color_Y2 )>>ALPHA_BITS;
+               *p_pixel++ = ( i_sub_color_U + i_pixel_color_U )  >>ALPHA_BITS;
                break;
              }
            }
        }
+
+      /* For an odd width source, we'll just have to drop off a pixel. */
+      if (p_spu->i_width % 2) p_source++;
     }
 }
 
@@ -496,20 +573,51 @@ yuv2rgb555(ogt_yuvt_t *p_yuv, uint8_t *p_rgb1, uint8_t *p_rgb2 )
          *p_rgb1, *p_rgb2);
 #endif
 
-#undef RED_PIXEL   
-#undef GREEN_PIXEL 
-#undef BLUE_PIXEL  
 }
 
-/* 
-   Should be Same as p_pic->p_format.i_bits_per_pixel / 8. But since
-   we know it here, why try to compute it?
-*/
+/**
+   Convert a YUV pixel into a 16-bit RGB 5-6-5 pixel.
+
+   A RGB 5-6-5 pixel looks like this:
+   RGB 5-6-5   bit  (MSB) 7  6   5  4  3  2  1  0 (LSB)
+                 p      B4 B3  B2 B1 B0 R5 R4  R3
+                 q      R2 R1  R0 G4 G3 G2 G1  G0
+
+**/
+
+static inline void
+yuv2rgb565(ogt_yuvt_t *p_yuv, uint8_t *p_rgb1, uint8_t *p_rgb2 )
+{
+
+  uint8_t rgb[3];
+
+  yuv2rgb(p_yuv, rgb);
+  
+  /* Scale RGB from 8 bits down to 5 or 6 bits. */
+  rgb[RED_PIXEL]   >>= (8-6);
+  rgb[GREEN_PIXEL] >>= (8-5);
+  rgb[BLUE_PIXEL]  >>= (8-5);
+  
+  *p_rgb1 = ( (rgb[BLUE_PIXEL] << 3)&0xF8 ) | ( (rgb[RED_PIXEL]>>3) & 0x07 );
+  *p_rgb2 = ( (rgb[RED_PIXEL]  << 5)&0xe0 ) | ( rgb[GREEN_PIXEL]&0x1f );
+
+#if 0
+  printf("Y,Cb,Cr,T=(%02x,%02x,%02x,%02x), r,g,b=(%d,%d,%d), "
+         "rgb1: %02x, rgb2 %02x\n",
+         p_yuv->s.y, p_yuv->s.u, p_yuv->s.v, p_yuv->s.t,
+         rgb[RED_PIXEL], rgb[GREEN_PIXEL], rgb[BLUE_PIXEL],
+         *p_rgb1, *p_rgb2);
+#endif
+
+}
+
+#undef BYTES_PER_PIXEL
 #define BYTES_PER_PIXEL 2
 
 static void 
-RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
-            const subpicture_t *p_spu, vlc_bool_t b_crop )
+BlendRV16( vout_thread_t *p_vout, picture_t *p_pic,
+           const subpicture_t *p_spu, vlc_bool_t b_crop,
+           vlc_bool_t b_15bpp )
 {
     /* Common variables */
     uint8_t *p_pixel_base;
@@ -520,16 +628,21 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
     int i_x, i_y;
     int i_y_src;
 
-    /* RGB-specific */
-    int i_xscale, i_yscale, i_width, i_height, i_ytmp, i_ynext;
+    /* Chroma specific */
+    uint32_t i_xscale;   /* Amount we scale subtitle in the x direction,
+                            multiplied by 2**ASCALE. */
+    uint32_t i_yscale;   /* Amount we scale subtitle in the y direction.
+                            multiplied by 2**ASCALE. */
+
+    int i_width, i_height, i_ytmp, i_ynext;
 
     /* Crop-specific */
     int i_x_start, i_y_start, i_x_end, i_y_end;
 
     struct subpicture_sys_t *p_sys = p_spu->p_sys;
 
-    i_xscale = ( p_vout->output.i_width << 6 ) / p_vout->render.i_width;
-    i_yscale = ( p_vout->output.i_height << 6 ) / p_vout->render.i_height;
+    i_xscale = ( p_vout->output.i_width << ASCALE ) / p_vout->render.i_width;
+    i_yscale = ( p_vout->output.i_height << ASCALE ) / p_vout->render.i_height;
 
     dbg_print( (DECODE_DBG_CALL|DECODE_DBG_RENDER), 
               "spu: %dx%d, scaled: %dx%d, vout render: %dx%d, scale %dx%d", 
@@ -546,8 +659,8 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
        the picture coordinates subtitle offsets
     */
     p_pixel_base = p_pic->p->p_pixels 
-              + ( (p_spu->i_x * i_xscale) >> 6 ) * BYTES_PER_PIXEL
-              + ( (p_spu->i_y * i_yscale) >> 6 ) * p_pic->p->i_pitch;
+              + ( (p_spu->i_x * i_xscale) >> ASCALE ) * BYTES_PER_PIXEL
+              + ( (p_spu->i_y * i_yscale) >> ASCALE ) * p_pic->p->i_pitch;
 
     i_x_start = p_sys->i_x_start;
     i_y_start = i_yscale * p_sys->i_y_start;
@@ -562,7 +675,7 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
          i_y_src += p_spu->i_width )
     {
        uint8_t *p_pixel_base_y;
-        i_ytmp = i_y >> 6;
+        i_ytmp = i_y >> ASCALE;
         i_y += i_yscale;
        p_pixel_base_y = p_pixel_base + (i_ytmp * p_pic->p->i_pitch);
        i_x = 0;
@@ -576,7 +689,7 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
         }
 
         /* Check whether we need to draw one line or more than one */
-        if( i_ytmp + 1 >= ( i_y >> 6 ) )
+        if( i_ytmp + 1 >= ( i_y >> ASCALE ) )
         {
           /* Draw until we reach the end of the line */
           for( ; i_x < p_spu->i_width;  i_x++, p_source++ )
@@ -609,7 +722,7 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
              
              switch( p_source->s.t )
                 {
-                case 0x00:
+                case 0:
                  /* Completely transparent. Don't change pixel. */
                  break;
                  
@@ -619,14 +732,27 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
                    /* Completely opaque. Completely overwrite underlying
                       pixel with subtitle pixel. */
                
-                   /* This is the location that's going to get changed.
-                    */
-                   uint8_t *p_dest = p_pixel_base_y + i_x * BYTES_PER_PIXEL;
+                    uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                         * BYTES_PER_PIXEL );
+                    uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                         * BYTES_PER_PIXEL );
+                    uint32_t len     = i_xlast - i_xdest;
+
                     uint8_t i_rgb1;
                     uint8_t i_rgb2;
-                    yuv2rgb555(p_source, &i_rgb1, &i_rgb2);
-                    *p_dest++ = i_rgb1;
-                    *p_dest++ = i_rgb2;
+
+                   /* This is the location that's going to get changed. */
+                   uint8_t *p_dest = p_pixel_base_y + i_x * BYTES_PER_PIXEL;
+
+                    if (b_15bpp) 
+                      yuv2rgb555(p_source, &i_rgb1, &i_rgb2);
+                    else 
+                      yuv2rgb565(p_source, &i_rgb1, &i_rgb2);
+
+                    for ( len = i_xlast - i_xdest; len ; len--) {
+                      *p_dest++ = i_rgb1;
+                      *p_dest++ = i_rgb2;
+                    }
                    break;
                  }
 
@@ -635,16 +761,28 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
                  {
                    /* Blend in underlying pixel subtitle pixel. */
                    
+                    uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                         * BYTES_PER_PIXEL );
+                    uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                         * BYTES_PER_PIXEL );
+                    uint32_t len     = i_xlast - i_xdest;
+
+                   /* This is the location that's going to get changed. */
+                   uint8_t *p_dest = p_pixel_base_y + i_x * BYTES_PER_PIXEL;
+
                    /* To be able to scale correctly for full opaqueness, we
                       add 1 to the alpha.  This means alpha value 0 won't
                       be completely transparent and is not correct, but
                       that's handled in a special case above anyway. */
                
-                   uint8_t *p_dest = p_pixel_base_y + i_x * BYTES_PER_PIXEL;
                    uint8_t i_destalpha = MAX_ALPHA - p_source->s.t;
-                    uint8_t rgb[3];
+                    uint8_t i_rgb1;
+                    uint8_t i_rgb2;
 
-                    yuv2rgb(p_source, rgb);
+                    if (b_15bpp) 
+                      yuv2rgb555(p_source, &i_rgb1, &i_rgb2);
+                    else 
+                      yuv2rgb565(p_source, &i_rgb1, &i_rgb2);
                     rv16_pack_blend(p_dest, rgb, dest_alpha, ALPHA_SCALEDOWN);
                    break;
                  }
@@ -654,7 +792,7 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
         }
         else
         {
-            i_ynext = p_pic->p->i_pitch * i_y >> 6;
+            i_ynext = p_pic->p->i_pitch * i_y >> ASCALE;
 
 
             /* Draw until we reach the end of the line */
@@ -675,14 +813,14 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
               }
              
              if (p_source >= p_src_end) {
-               msg_Err( p_vout, "Trying to access beyond subtitle %dx%d %d",
+               msg_Err( p_vout, "trying to access beyond subtitle %dx%d %d",
                         i_x, i_y / i_yscale, i_height);
                return;
              }
              
              switch( p_source->s.t )
                 {
-                case 0x00:
+                case 0:
                    /* Completely transparent. Don't change pixel. */
                     break;
 
@@ -692,20 +830,29 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
                    /* Completely opaque. Completely overwrite underlying
                       pixel with subtitle pixel. */
 
-                   /* This is the location that's going to get changed.
-                    */
-                   uint8_t *p_pixel_base_x = p_pixel_base 
-                                            + i_x * BYTES_PER_PIXEL;
+                    uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                         * BYTES_PER_PIXEL );
+                    uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                         * BYTES_PER_PIXEL );
+                    uint32_t len     = i_xlast - i_xdest;
+
+                   uint8_t *p_pixel_base_x = p_pixel_base + i_xdest;
 
                     for(  ; i_ytmp < i_ynext ; i_ytmp += p_pic->p->i_pitch )
                     {
-                     /* This is the location that's going to get changed.  */
+                     /* This is the location that's going to get changed. */
                      uint8_t *p_dest = p_pixel_base_x + i_ytmp;
                       uint8_t i_rgb1;
                       uint8_t i_rgb2;
-                      yuv2rgb555(p_source, &i_rgb1, &i_rgb2);
-                      *p_dest++ = i_rgb1;
-                      *p_dest++ = i_rgb2;
+                      if (b_15bpp) 
+                        yuv2rgb555(p_source, &i_rgb1, &i_rgb2);
+                      else 
+                        yuv2rgb565(p_source, &i_rgb1, &i_rgb2);
+
+                      for ( len = i_xlast - i_xdest; len ; len--) {
+                        *p_dest++ = i_rgb1;
+                        *p_dest++ = i_rgb2;
+                      }
                     }
                     break;
                  }
@@ -722,9 +869,13 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
                      
                      uint8_t *p_dest = p_pixel_base + i_ytmp;
                       uint8_t i_destalpha = MAX_ALPHA - p_source->s.t;
-                      uint8_t rgb[3];
+                      uint8_t i_rgb1;
+                      uint8_t i_rgb2;
 
-                      yuv2rgb(p_source, rgb);
+                      if (b_15bpp) 
+                        yuv2rgb555(p_source, &i_rgb1, &i_rgb2);
+                      else 
+                        yuv2rgb565(p_source, &i_rgb1, &i_rgb2);
                       rv16_pack_blend(p_dest, rgb, dest_alpha,ALPHA_SCALEDOWN);
                     }
                     break;
@@ -735,36 +886,55 @@ RenderRV16( vout_thread_t *p_vout, picture_t *p_pic,
     }
 }
 
-/* 
-   Should be Same as p_pic->p_format.i_bits_per_pixel / 8. But since
-   we know it here, why try to compute it?
-*/
 #undef  BYTES_PER_PIXEL
 #define BYTES_PER_PIXEL 4
 
+/* 
+  RV24 format??? Is this just for X11? Or just not for Win32? Is this
+  the same as RV32?
+
+  a pixel is represented by 3 bytes containing a red,
+  blue and green sample with blue stored at the lowest address, green
+  next then red. One padding byte is added between pixels. Although
+  this may not be part of a spec, images should be stored with each
+  line padded to a u_int32 boundary. 
+*/
 static void 
-RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
+BlendRV24( vout_thread_t *p_vout, picture_t *p_pic,
             const subpicture_t *p_spu, vlc_bool_t b_crop )
 {
     /* Common variables */
     uint8_t *p_pixel_base;
     ogt_yuvt_t *p_src_start = (ogt_yuvt_t *)p_spu->p_sys->p_data;
     ogt_yuvt_t *p_src_end   = &p_src_start[p_spu->i_height * p_spu->i_width];
-    ogt_yuvt_t *p_source;
+    ogt_yuvt_t *p_source; /* This is the where the subtitle pixels come from */
 
     int i_x, i_y;
     int i_y_src;
 
-    /* RGB-specific */
-    int i_xscale, i_yscale, i_width, i_height, i_ytmp, i_ynext;
+    /* Make sure we start on a word (4-byte) boundary. */
+    uint32_t i_spu_x;
+
+    /* Chroma specific */
+    uint32_t i_xscale;   /* Amount we scale subtitle in the x direction,
+                            multiplied by 2**ASCALE. */
+    uint32_t i_yscale;   /* Amount we scale subtitle in the y direction.
+                            multiplied by 2**ASCALE. */
+
+    int i_width, i_height, i_ytmp, i_ynext;
 
     /* Crop-specific */
-    int i_x_start, i_y_start, i_x_end, i_y_end;
+    int32_t i_x_start, i_y_start, i_x_end, i_y_end;
 
     struct subpicture_sys_t *p_sys = p_spu->p_sys;
+    unsigned int i_aspect_x, i_aspect_y;
 
-    i_xscale = ( p_vout->output.i_width << 6 ) / p_vout->render.i_width;
-    i_yscale = ( p_vout->output.i_height << 6 ) / p_vout->render.i_height;
+    vout_AspectRatio( p_vout->render.i_aspect, &i_aspect_y, 
+                      &i_aspect_x );
+
+    i_xscale = (( p_vout->output.i_width << ASCALE ) * i_aspect_x)
+      / (i_aspect_y * p_vout->render.i_width);
+    i_yscale = ( p_vout->output.i_height << ASCALE ) / p_vout->render.i_height;
 
     dbg_print( (DECODE_DBG_CALL|DECODE_DBG_RENDER), 
               "spu: %dx%d, scaled: %dx%d, vout render: %dx%d, scale %dx%d", 
@@ -778,11 +948,12 @@ RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
     i_height = p_spu->i_height * i_yscale;
 
     /* Set where we will start blending subtitle from using
-       the picture coordinates subtitle offsets
+       the picture coordinates subtitle offsets.
     */
-    p_pixel_base = p_pic->p->p_pixels 
-              + ( (p_spu->i_x * i_xscale) >> 6 ) * BYTES_PER_PIXEL
-              + ( (p_spu->i_y * i_yscale) >> 6 ) * p_pic->p->i_pitch;
+    i_spu_x = ((p_spu->i_x * i_xscale) >> ASCALE) * BYTES_PER_PIXEL;
+
+    p_pixel_base = p_pic->p->p_pixels + i_spu_x
+              + ( (p_spu->i_y * i_yscale) >> ASCALE ) * p_pic->p->i_pitch;
 
     i_x_start = p_sys->i_x_start;
     i_y_start = i_yscale * p_sys->i_y_start;
@@ -797,7 +968,7 @@ RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
          i_y_src += p_spu->i_width )
     {
        uint8_t *p_pixel_base_y;
-        i_ytmp = i_y >> 6;
+        i_ytmp = i_y >> ASCALE;
         i_y += i_yscale;
        p_pixel_base_y = p_pixel_base + (i_ytmp * p_pic->p->i_pitch);
        i_x = 0;
@@ -811,18 +982,12 @@ RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
         }
 
         /* Check whether we need to draw one line or more than one */
-        if( i_ytmp + 1 >= ( i_y >> 6 ) )
+        if( i_ytmp + 1 >= ( i_y >> ASCALE ) )
         {
           /* Draw until we reach the end of the line */
           for( ; i_x < p_spu->i_width;  i_x++, p_source++ )
             {
 
-#if 0              
-              uint8_t *p=(uint8_t *) p_source;
-              printf("+++ %02x %02x %02x %02x\n", 
-                     p[0], p[1], p[2], p[3]);
-#endif
-    
               if( b_crop ) {
                 
                 /* FIXME: y cropping should be dealt with outside of this 
@@ -837,14 +1002,14 @@ RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
               }
 
              if (p_source >= p_src_end) {
-               msg_Err( p_vout, "Trying to access beyond subtitle %dx%d %d",
+               msg_Err( p_vout, "trying to access beyond subtitle %dx%d %d",
                         i_x, i_y / i_yscale, i_height);
                return;
              }
              
              switch( p_source->s.t )
                 {
-                case 0x00:
+                case 0:
                  /* Completely transparent. Don't change pixel. */
                  break;
                  
@@ -853,44 +1018,367 @@ RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
                  {
                    /* Completely opaque. Completely overwrite underlying
                       pixel with subtitle pixel. */
-               
-                   /* This is the location that's going to get changed.
-                    */
-                   uint8_t *p_dest = p_pixel_base_y + i_x * BYTES_PER_PIXEL;
-                    uint8_t rgb[4];
+
+                    uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                         * BYTES_PER_PIXEL );
+                    uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                         * BYTES_PER_PIXEL );
+                    uint32_t len     = i_xlast - i_xdest;
+
+                    uint8_t rgb[3];
+
+                   /* This is the location that's going to get changed. */
+                   uint8_t *p_dest = p_pixel_base_y + i_xdest;
 
                     yuv2rgb(p_source, rgb);
-                    *p_dest++ = rgb[2];
-                    *p_dest++ = rgb[1];
-                    *p_dest++ = rgb[0];
-                   break;
-                 }
+
+                    for ( len = i_xlast - i_xdest; len ; len--) {
+                      *p_dest++ = rgb[BLUE_PIXEL];
+                      *p_dest++ = rgb[GREEN_PIXEL];
+                      *p_dest++ = rgb[RED_PIXEL];
+                      *p_dest++;
+                    }
 
 #ifdef TRANSPARENCY_FINISHED
+                  default:
+                    {
+                      /* Blend in underlying pixel subtitle pixel. */
+                      
+                      uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                           * BYTES_PER_PIXEL );
+                      uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                           * BYTES_PER_PIXEL );
+                      uint32_t len     = i_xlast - i_xdest;
+
+                      /* To be able to scale correctly for full opaqueness, we
+                         add 1 to the alpha.  This means alpha value 0 won't
+                         be completely transparent and is not correct, but
+                         that's handled in a special case above anyway. */
+                      
+                      uint8_t i_destalpha = MAX_ALPHA - p_source->s.t;
+                      uint8_t rgb[3];
+
+                      /* This is the location that's going to get changed. */
+                      uint8_t *p_dest = p_pixel_base_y + i_xdest;
+                      
+                      yuv2rgb(p_source, rgb);
+                      rv32_pack_blend(p_dest, rgb, dest_alpha, 
+                                      ALPHA_SCALEDOWN);
+
+                      for ( len = i_xlast - i_xdest; len ; len--) {
+                        *p_dest++ = rgb[BLUE_PIXEL];
+                        *p_dest++ = rgb[GREEN_PIXEL];
+                        *p_dest++ = rgb[RED_PIXEL];
+                        *p_dest++;
+                      }
+                      break;
+                    }
+#endif /*TRANSPARENCY_FINISHED*/
+                  }
+                }
+            }
+        } 
+        else
+        {
+            i_ynext = p_pic->p->i_pitch * i_y >> ASCALE;
+
+
+            /* Draw until we reach the end of the line */
+            for( ; i_x < p_spu->i_width; i_x++, p_source++ )
+            {
+
+              if( b_crop ) {
+                
+                /* FIXME: y cropping should be dealt with outside of this 
+                   loop.*/
+                if ( i_y < i_y_start) continue;
+                
+                if ( i_x > i_x_end )
+                  {
+                    p_source += p_spu->i_width - i_x;
+                    break;
+                  }
+              }
+             
+             if (p_source >= p_src_end) {
+               msg_Err( p_vout, "trying to access beyond subtitle %dx%d %d",
+                        i_x, i_y / i_yscale, i_height);
+               return;
+             }
+             
+             switch( p_source->s.t )
+                {
+                case 0:
+                   /* Completely transparent. Don't change pixel. */
+                    break;
+
                 default:
+                case MAX_ALPHA: 
                  {
-                   /* Blend in underlying pixel subtitle pixel. */
-                   
-                   /* To be able to scale correctly for full opaqueness, we
-                      add 1 to the alpha.  This means alpha value 0 won't
-                      be completely transparent and is not correct, but
-                      that's handled in a special case above anyway. */
-               
-                   uint8_t *p_dest = p_pixel_base_y + i_x * BYTES_PER_PIXEL;
-                   uint8_t i_destalpha = MAX_ALPHA - p_source->s.t;
+                   /* Completely opaque. Completely overwrite underlying
+                      pixel with subtitle pixel. */
+
+                   /* This is the location that's going to get changed. */
+                    uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                         * BYTES_PER_PIXEL );
+                    uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                         * BYTES_PER_PIXEL );
+                    uint32_t len     = i_xlast - i_xdest;
+
                     uint8_t rgb[3];
 
-                    yuv2rgb(p_source, rgb);
-                    rv32_pack_blend(p_dest, rgb, dest_alpha, ALPHA_SCALEDOWN);
-                   break;
+                    yuv2rgb(p_source, rgb); 
+
+                    for(  ; i_ytmp < i_ynext ; i_ytmp += p_pic->p->i_pitch )
+                    {
+                      /* Completely opaque. Completely overwrite underlying
+                         pixel with subtitle pixel. */
+                      
+                      /* This is the location that's going to get changed. */
+                      uint8_t *p_dest = p_pixel_base + i_ytmp + i_xdest;
+                      
+                      for ( len = i_xlast - i_xdest; len ; len--) {
+                        *p_dest++ = rgb[BLUE_PIXEL];
+                        *p_dest++ = rgb[GREEN_PIXEL];
+                        *p_dest++ = rgb[RED_PIXEL];
+                        *p_dest++;
+                      }
+                    }
+                    break;
                  }
+#ifdef TRANSPARENCY_FINISHED
+                default: 
+                  {
+                    
+
+                    uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                         * BYTES_PER_PIXEL );
+                    uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                         * BYTES_PER_PIXEL );
+                    uint32_t len     = i_xlast - i_xdest;
+                    uint8_t rgb[3];
+
+                    yuv2rgb(p_source, rgb);
+
+                    for(  ; i_ytmp < i_ynext ; y_ytmp += p_pic->p->i_pitch )
+                    {
+                     /* Blend in underlying pixel subtitle pixel. */
+                     
+                      /* This is the location that's going to get changed. */
+                      uint8_t *p_dest = p_pixel_base + i_ytmp + i_xdest;
+
+                     /* To be able to scale correctly for full opaqueness, we
+                        add 1 to the alpha.  This means alpha value 0 won't
+                        be completely transparent and is not correct, but
+                        that's handled in a special case above anyway. */
+
+                      uint8_t i_destalpha = MAX_ALPHA - p_source->s.t;
+                      rv32_pack_blend(p_dest, rgb, dest_alpha,
+                                      ALPHA_SCALEDOWN);
+                    }
+                    break;
+#endif /*TRANSPARENCY_FINISHED*/
+               }
+           }
+       }
+    }
+}
+
+#undef  BYTES_PER_PIXEL
+#define BYTES_PER_PIXEL 4
+
+/* 
+  RV32 format??? Is this just for X11? Or just not for Win32? Is this
+  the same as RV24?
+
+  RV32 format: a pixel is represented by 4 bytes containing a red,
+  blue and green sample with blue stored at the lowest address, green
+  next then red. One padding byte is added between pixels. Although
+  this may not be part of a spec, images should be stored with each
+  line padded to a u_int32 boundary. 
+*/
+static void 
+BlendRV32( vout_thread_t *p_vout, picture_t *p_pic,
+            const subpicture_t *p_spu, vlc_bool_t b_crop )
+{
+    /* Common variables */
+    uint8_t *p_pixel_base;
+    ogt_yuvt_t *p_src_start = (ogt_yuvt_t *)p_spu->p_sys->p_data;
+    ogt_yuvt_t *p_src_end   = &p_src_start[p_spu->i_height * p_spu->i_width];
+    ogt_yuvt_t *p_source; /* This is the where the subtitle pixels come from */
+
+    int i_x, i_y;
+    int i_y_src;
+
+    /* Make sure we start on a word (4-byte) boundary. */
+    uint32_t i_spu_x;
+
+    /* Chroma specific */
+    uint32_t i_xscale;   /* Amount we scale subtitle in the x direction,
+                            multiplied by 2**ASCALE. */
+    uint32_t i_yscale;   /* Amount we scale subtitle in the y direction.
+                            multiplied by 2**ASCALE. */
+
+    int i_width, i_height, i_ytmp, i_ynext;
+
+    /* Crop-specific */
+    int32_t i_x_start, i_y_start, i_x_end, i_y_end;
+
+    struct subpicture_sys_t *p_sys = p_spu->p_sys;
+    unsigned int i_aspect_x, i_aspect_y;
+
+    vout_AspectRatio( p_vout->render.i_aspect, &i_aspect_y, 
+                      &i_aspect_x );
+
+    i_xscale = (( p_vout->output.i_width << ASCALE ) * i_aspect_x)
+      / (i_aspect_y * p_vout->render.i_width);
+    i_yscale = ( p_vout->output.i_height << ASCALE ) / p_vout->render.i_height;
+
+    dbg_print( (DECODE_DBG_CALL|DECODE_DBG_RENDER), 
+              "spu: %dx%d, scaled: %dx%d, vout render: %dx%d, scale %dx%d", 
+              p_spu->i_width,  p_spu->i_height, 
+              p_vout->output.i_width, p_vout->output.i_height,
+              p_vout->render.i_width, p_vout->render.i_height,
+              i_xscale, i_yscale
+              );
+
+    i_width  = p_spu->i_width  * i_xscale;
+    i_height = p_spu->i_height * i_yscale;
+
+    /* Set where we will start blending subtitle from using
+       the picture coordinates subtitle offsets.
+    */
+    i_spu_x = ((p_spu->i_x * i_xscale) >> ASCALE) * BYTES_PER_PIXEL; 
+
+    p_pixel_base = p_pic->p->p_pixels + i_spu_x
+              + ( (p_spu->i_y * i_yscale) >> ASCALE ) * p_pic->p->i_pitch;
+
+    i_x_start = p_sys->i_x_start;
+    i_y_start = i_yscale * p_sys->i_y_start;
+    i_x_end   = p_sys->i_x_end;
+    i_y_end   = i_yscale * p_sys->i_y_end;
+
+    p_source = (ogt_yuvt_t *)p_sys->p_data;
+  
+    /* Draw until we reach the bottom of the subtitle */
+    i_y = 0;
+    for( i_y_src = 0 ; i_y_src < p_spu->i_height * p_spu->i_width; 
+         i_y_src += p_spu->i_width )
+    {
+       uint8_t *p_pixel_base_y;
+        i_ytmp = i_y >> ASCALE;
+        i_y += i_yscale;
+       p_pixel_base_y = p_pixel_base + (i_ytmp * p_pic->p->i_pitch);
+       i_x = 0;
+
+        if ( b_crop ) {
+          if ( i_y > i_y_end ) break;
+          if (i_x_start) {
+            i_x = i_x_start;
+            p_source += i_x_start;
+          }
+        }
+
+        /* Check whether we need to draw one line or more than one */
+        if( i_ytmp + 1 >= ( i_y >> ASCALE ) )
+        {
+          /* Draw until we reach the end of the line */
+          for( ; i_x < p_spu->i_width;  i_x++, p_source++ )
+            {
+
+              if( b_crop ) {
+                
+                /* FIXME: y cropping should be dealt with outside of this 
+                   loop.*/
+                if ( i_y < i_y_start) continue;
+                
+                if ( i_x > i_x_end )
+                  {
+                    p_source += p_spu->i_width - i_x;
+                    break;
+                  }
+              }
+
+             if (p_source >= p_src_end) {
+               msg_Err( p_vout, "trying to access beyond subtitle %dx%d %d",
+                        i_x, i_y / i_yscale, i_height);
+               return;
+             }
+             
+             switch( p_source->s.t )
+                {
+                case 0:
+                 /* Completely transparent. Don't change pixel. */
+                 break;
+                 
+                default:
+                case MAX_ALPHA:
+                 {
+                   /* Completely opaque. Completely overwrite underlying
+                      pixel with subtitle pixel. */
+
+                    uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                         * BYTES_PER_PIXEL );
+                    uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                         * BYTES_PER_PIXEL );
+                    uint32_t len     = i_xlast - i_xdest;
+
+                    uint8_t rgb[3];
+
+                   /* This is the location that's going to get changed. */
+                   uint8_t *p_dest = p_pixel_base_y + i_xdest;
+
+                    yuv2rgb(p_source, rgb);
+
+                    for ( len = i_xlast - i_xdest; len ; len--) {
+                      *p_dest++ = rgb[BLUE_PIXEL];
+                      *p_dest++ = rgb[GREEN_PIXEL];
+                      *p_dest++ = rgb[RED_PIXEL];
+                      *p_dest++;
+                    }
+
+#ifdef TRANSPARENCY_FINISHED
+                  default:
+                    {
+                      /* Blend in underlying pixel subtitle pixel. */
+                      
+                      uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                           * BYTES_PER_PIXEL );
+                      uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                           * BYTES_PER_PIXEL );
+                      uint32_t len     = i_xlast - i_xdest;
+
+                      /* To be able to scale correctly for full opaqueness, we
+                         add 1 to the alpha.  This means alpha value 0 won't
+                         be completely transparent and is not correct, but
+                         that's handled in a special case above anyway. */
+                      
+                      uint8_t i_destalpha = MAX_ALPHA - p_source->s.t;
+                      uint8_t rgb[3];
+
+                      /* This is the location that's going to get changed. */
+                      uint8_t *p_dest = p_pixel_base_y + i_xdest;
+                      
+                      yuv2rgb(p_source, rgb);
+                      rv32_pack_blend(p_dest, rgb, dest_alpha, 
+                                      ALPHA_SCALEDOWN);
+
+                      for ( len = i_xlast - i_xdest; len ; len--) {
+                        *p_dest++ = rgb[BLUE_PIXEL];
+                        *p_dest++ = rgb[GREEN_PIXEL];
+                        *p_dest++ = rgb[RED_PIXEL];
+                        *p_dest++;
+                      }
+                      break;
+                    }
 #endif /*TRANSPARENCY_FINISHED*/
+                  }
                 }
             }
-        }
+        } 
         else
         {
-            i_ynext = p_pic->p->i_pitch * i_y >> 6;
+            i_ynext = p_pic->p->i_pitch * i_y >> ASCALE;
 
 
             /* Draw until we reach the end of the line */
@@ -918,7 +1406,7 @@ RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
              
              switch( p_source->s.t )
                 {
-                case 0x00:
+                case 0:
                    /* Completely transparent. Don't change pixel. */
                     break;
 
@@ -928,18 +1416,31 @@ RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
                    /* Completely opaque. Completely overwrite underlying
                       pixel with subtitle pixel. */
 
-                   /* This is the location that's going to get changed.
-                    */
-                   uint8_t *p_pixel_base_x = p_pixel_base 
-                                            + i_x * BYTES_PER_PIXEL;
-                    uint8_t rgb[4];
+                   /* This is the location that's going to get changed. */
+                    uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                         * BYTES_PER_PIXEL );
+                    uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                         * BYTES_PER_PIXEL );
+                    uint32_t len     = i_xlast - i_xdest;
+
+                    uint8_t rgb[3];
+
                     yuv2rgb(p_source, rgb); 
 
                     for(  ; i_ytmp < i_ynext ; i_ytmp += p_pic->p->i_pitch )
                     {
-                     /* This is the location that's going to get changed.  */
-                     uint8_t *p_dest = p_pixel_base_x + i_ytmp;
-                      memcpy(p_dest, rgb, 4);
+                      /* Completely opaque. Completely overwrite underlying
+                         pixel with subtitle pixel. */
+                      
+                      /* This is the location that's going to get changed. */
+                      uint8_t *p_dest = p_pixel_base + i_ytmp + i_xdest;
+                      
+                      for ( len = i_xlast - i_xdest; len ; len--) {
+                        *p_dest++ = rgb[BLUE_PIXEL];
+                        *p_dest++ = rgb[GREEN_PIXEL];
+                        *p_dest++ = rgb[RED_PIXEL];
+                        *p_dest++;
+                      }
                     }
                     break;
                  }
@@ -947,20 +1448,31 @@ RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
                 default: 
                   {
                     
-                    uint8_t rgb[4];
+
+                    uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                         * BYTES_PER_PIXEL );
+                    uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                         * BYTES_PER_PIXEL );
+                    uint32_t len     = i_xlast - i_xdest;
+                    uint8_t rgb[3];
+
                     yuv2rgb(p_source, rgb);
 
                     for(  ; i_ytmp < i_ynext ; y_ytmp += p_pic->p->i_pitch )
                     {
                      /* Blend in underlying pixel subtitle pixel. */
                      
+                      /* This is the location that's going to get changed. */
+                      uint8_t *p_dest = p_pixel_base + i_ytmp + i_xdest;
+
                      /* To be able to scale correctly for full opaqueness, we
                         add 1 to the alpha.  This means alpha value 0 won't
                         be completely transparent and is not correct, but
                         that's handled in a special case above anyway. */
-                     uint8_t *p_dest = p_pixel_base + i_ytmp;
+
                       uint8_t i_destalpha = MAX_ALPHA - p_source->s.t;
-                      rv32_pack_blend(p_dest, rgb, dest_alpha,ALPHA_SCALEDOWN);
+                      rv32_pack_blend(p_dest, rgb, dest_alpha,
+                                      ALPHA_SCALEDOWN);
                     }
                     break;
 #endif /*TRANSPARENCY_FINISHED*/
@@ -970,6 +1482,217 @@ RenderRV32( vout_thread_t *p_vout, picture_t *p_pic,
     }
 }
 
+#undef  BYTES_PER_PIXEL
+#define BYTES_PER_PIXEL 1
+
+static void 
+BlendRGB2( vout_thread_t *p_vout, picture_t *p_pic,
+            const subpicture_t *p_spu, vlc_bool_t b_crop )
+{
+    /* Common variables */
+    uint8_t *p_pixel_base;
+    uint8_t *p_src_start = (uint8_t *)p_spu->p_sys->p_data;
+    uint8_t *p_src_end   = &p_src_start[p_spu->i_height * p_spu->i_width];
+    uint8_t *p_source; /* This is the where the subtitle pixels come from */
+
+    int i_x, i_y;
+    int i_y_src;
+
+    /* Chroma specific */
+    uint32_t i_xscale;   /* Amount we scale subtitle in the x direction,
+                            multiplied by 2**ASCALE. */
+    uint32_t i_yscale;   /* Amount we scale subtitle in the y direction.
+                            multiplied by 2**ASCALE. */
+
+    int i_width, i_height, i_ytmp;
+
+    /* Crop-specific */
+    int i_x_start, i_y_start, i_x_end, i_y_end;
+
+    /* 4 entry colormap */
+    uint8_t cmap[NUM_SUBTITLE_COLORS];
+    int i_cmap;
+
+    struct subpicture_sys_t *p_sys = p_spu->p_sys;
+    unsigned int i_aspect_x, i_aspect_y;
+
+    vout_AspectRatio( p_vout->render.i_aspect, &i_aspect_y, 
+                      &i_aspect_x );
+    
+    /* Find a corresponding colormap entries for our palette entries. */
+    for( i_cmap = 0; i_cmap < NUM_SUBTITLE_COLORS; i_cmap++ )
+    {
+      uint8_t Y = p_sys->p_palette[i_cmap].s.y;
+
+      /* FIXME: when we have a way to look at colormap entries we can
+         do better.  For now we have to use 0xff for white 0x00 for
+         black and 0x44 for something in between. To do this we use
+         only the Y component.
+      */
+      if (Y > 0x70) 
+        cmap[i_cmap] = 0xff; /* Use white. */
+      else if (Y < 0x10) 
+        cmap[i_cmap] = 0x00; /* Use black. */
+      else 
+        cmap[i_cmap] = 0x44; /* Use something else. */
+    }
+
+    i_xscale = (( p_vout->output.i_width << ASCALE ) * i_aspect_x)
+      / (i_aspect_y * p_vout->render.i_width);
+    i_yscale = ( p_vout->output.i_height << ASCALE ) / p_vout->render.i_height;
+
+    dbg_print( (DECODE_DBG_CALL|DECODE_DBG_RENDER), 
+              "spu: %dx%d, scaled: %dx%d, vout render: %dx%d, scale %dx%d", 
+              p_spu->i_width,  p_spu->i_height, 
+              p_vout->output.i_width, p_vout->output.i_height,
+              p_vout->render.i_width, p_vout->render.i_height,
+              i_xscale, i_yscale
+              );
+
+    i_width  = p_spu->i_width  * i_xscale;
+    i_height = p_spu->i_height * i_yscale;
+
+    /* Set where we will start blending subtitle from using
+       the picture coordinates subtitle offsets
+    */
+    p_pixel_base = p_pic->p->p_pixels 
+              + ( (p_spu->i_x * i_xscale) >> ASCALE ) * BYTES_PER_PIXEL
+              + ( (p_spu->i_y * i_yscale) >> ASCALE ) * p_pic->p->i_pitch;
+
+    i_x_start = p_sys->i_x_start;
+    i_y_start = i_yscale * p_sys->i_y_start;
+    i_x_end   = p_sys->i_x_end;
+    i_y_end   = i_yscale * p_sys->i_y_end;
+
+    p_source = (uint8_t *)p_sys->p_data;
+  
+    /* Draw until we reach the bottom of the subtitle */
+    i_y = 0;
+    for( i_y_src = 0 ; i_y_src < p_spu->i_height * p_spu->i_width; 
+         i_y_src += p_spu->i_width )
+      {
+       uint8_t *p_pixel_base_y;
+        i_ytmp = i_y >> ASCALE;
+        i_y += i_yscale;
+       p_pixel_base_y = p_pixel_base + (i_ytmp * p_pic->p->i_pitch);
+       i_x = 0;
+
+        if ( b_crop ) {
+          if ( i_y > i_y_end ) break;
+          if (i_x_start) {
+            i_x = i_x_start;
+            p_source += i_x_start;
+          }
+        }
+        
+        /* Check whether we need to draw one line or more than one */
+        if( i_ytmp + 1 >= ( i_y >> ASCALE ) )
+        {
+
+          /* Draw until we reach the end of the line */
+          for( ; i_x < p_spu->i_width; i_x ++, p_source++ )
+            {
+              ogt_yuvt_t p_yuvt;
+
+              if( b_crop ) {
+                
+                /* FIXME: y cropping should be dealt with outside of this 
+                   loop.*/
+                if ( i_y < i_y_start) continue;
+                
+                if ( i_x > i_x_end )
+                  {
+                    p_source += p_spu->i_width - i_x;
+                    break;
+                  }
+              }
+              
+              if (p_source >= p_src_end) {
+                msg_Err( p_vout, "trying to access beyond subtitle %dx%d %d",
+                         i_x, i_y / i_yscale, i_height);
+                return;
+              }
+              
+              p_yuvt = p_sys->p_palette[*p_source & 0x3];
+              if ( (p_yuvt.s.t) < (MAX_ALPHA) / 2 ) {
+                /* Completely or relatively transparent. Don't change pixel. */
+                ;
+#if 0
+                printf(" "); /*++++*/
+#endif
+              } else {
+                uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                     * BYTES_PER_PIXEL );
+                uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                     * BYTES_PER_PIXEL );
+                /* This is the pixel that's going to change;*/
+                uint8_t *p_dest = p_pixel_base_y + i_xdest;
+                memset( p_dest, cmap[*p_source & 0x3], i_xlast - i_xdest );
+#if 0
+                printf("%1d", *p_source); /*++++*/
+#endif
+              }
+              
+            }
+#if 0
+          printf("\n"); /*++++*/
+#endif
+        } else {
+          /* Have to scale over many lines. */
+          int i_yreal = p_pic->p->i_pitch * i_ytmp;
+          int i_ynext = p_pic->p->i_pitch * i_y >> ASCALE;
+
+           /* Draw until we reach the end of the line */
+           for( ; i_x < p_spu->i_width; i_x ++, p_source++ )
+             {
+              ogt_yuvt_t p_yuvt = p_sys->p_palette[*p_source & 0x3];
+
+              if( b_crop ) {
+                
+                /* FIXME: y cropping should be dealt with outside of this 
+                   loop.*/
+                if ( i_y < i_y_start) continue;
+                
+                if ( i_x > i_x_end )
+                  {
+                    p_source += p_spu->i_width - i_x;
+                    break;
+                  }
+              }
+              
+              if (p_source >= p_src_end) {
+                msg_Err( p_vout, "trying to access beyond subtitle %dx%d %d",
+                         i_x, i_y / i_yscale, i_height);
+                return;
+              }
+              
+              if ( (p_yuvt.s.t) < (MAX_ALPHA) / 2 ) {
+                /* Completely or relatively transparent. Don't change pixel. */
+                ;
+#if 0
+                printf(" "); /*++++*/
+#endif
+              } else {
+                uint32_t i_xdest = ( ((i_x*i_xscale) >> ASCALE) 
+                                     * BYTES_PER_PIXEL );
+                uint32_t i_xlast = ( (((i_x+1)*i_xscale) >> ASCALE)
+                                     * BYTES_PER_PIXEL );
+                uint32_t len     = i_xlast - i_xdest;
+#if 0
+                printf("%1d", *p_source); /*++++*/
+#endif
+                for( i_ytmp = i_yreal ; i_ytmp < i_ynext ;
+                     i_ytmp += p_pic->p->i_pitch ) {
+                  uint8_t *p_dest = p_pixel_base + i_ytmp + i_xdest;
+                  memset( p_dest, cmap[*p_source & 0x3], len );
+                }
+              }
+            }
+
+        }
+      }
+}
+
 \f
 /* 
  * Local variables: