]> git.sesse.net Git - vlc/blobdiff - modules/video_filter/blend.c
Copy clip_uint8() function from ffmpeg and replace where applicable for video filters.
[vlc] / modules / video_filter / blend.c
index cafcd32c163466ca81a035eb0d6ec48d0da08984..f372247c63d5ccf164a749693292369f08ae147d 100644 (file)
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * blend.c: alpha blend 2 pictures together
  *****************************************************************************
- * Copyright (C) 2003 VideoLAN
+ * Copyright (C) 2003 the VideoLAN team
  * $Id$
  *
  * Author: Gildas Bazin <gbazin@videolan.org>
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  *****************************************************************************/
 
 /*****************************************************************************
  * Preamble
  *****************************************************************************/
 #include <vlc/vlc.h>
-#include <vlc/decoder.h>
+#include <vlc_vout.h>
 #include "vlc_filter.h"
 
 /*****************************************************************************
@@ -33,6 +33,7 @@
  *****************************************************************************/
 struct filter_sys_t
 {
+    int i_dummy;
 };
 
 /****************************************************************************
@@ -41,20 +42,23 @@ struct filter_sys_t
 static int  OpenFilter ( vlc_object_t * );
 static void CloseFilter( vlc_object_t * );
 
+/* TODO i_alpha support for BlendR16 */
 static void Blend( filter_t *, picture_t *, picture_t *, picture_t *,
-                   int, int );
+                   int, int, int );
 static void BlendI420( filter_t *, picture_t *, picture_t *, picture_t *,
-                       int, int );
+                       int, int, int, int, int );
 static void BlendR16( filter_t *, picture_t *, picture_t *, picture_t *,
-                      int, int );
+                      int, int, int, int, int );
 static void BlendR24( filter_t *, picture_t *, picture_t *, picture_t *,
-                      int, int );
-static void BlendYUY2( filter_t *, picture_t *, picture_t *, picture_t *,
-                       int, int );
+                      int, int, int, int, int );
+static void BlendYUVPacked( filter_t *, picture_t *, picture_t *, picture_t *,
+                            int, int, int, int, int );
 static void BlendPalI420( filter_t *, picture_t *, picture_t *, picture_t *,
-                          int, int );
-static void BlendPalYUY2( filter_t *, picture_t *, picture_t *, picture_t *,
-                          int, int );
+                          int, int, int, int, int );
+static void BlendPalYUVPacked( filter_t *, picture_t *, picture_t *, picture_t *,
+                               int, int, int, int, int );
+static void BlendPalRV( filter_t *, picture_t *, picture_t *, picture_t *,
+                        int, int, int, int, int );
 
 /*****************************************************************************
  * Module descriptor
@@ -80,6 +84,8 @@ static int OpenFilter( vlc_object_t *p_this )
         ( p_filter->fmt_out.video.i_chroma != VLC_FOURCC('I','4','2','0') &&
           p_filter->fmt_out.video.i_chroma != VLC_FOURCC('Y','U','Y','2') &&
           p_filter->fmt_out.video.i_chroma != VLC_FOURCC('Y','V','1','2') &&
+          p_filter->fmt_out.video.i_chroma != VLC_FOURCC('U','Y','V','Y') &&
+          p_filter->fmt_out.video.i_chroma != VLC_FOURCC('Y','V','Y','U') &&
           p_filter->fmt_out.video.i_chroma != VLC_FOURCC('R','V','1','6') &&
           p_filter->fmt_out.video.i_chroma != VLC_FOURCC('R','V','2','4') &&
           p_filter->fmt_out.video.i_chroma != VLC_FOURCC('R','V','3','2') ) )
@@ -113,28 +119,40 @@ static int OpenFilter( vlc_object_t *p_this )
  ****************************************************************************/
 static void Blend( filter_t *p_filter, picture_t *p_dst,
                    picture_t *p_dst_orig, picture_t *p_src,
-                   int i_x_offset, int i_y_offset )
+                   int i_x_offset, int i_y_offset, int i_alpha )
 {
+    int i_width, i_height;
+
+    i_width = __MIN((int)p_filter->fmt_out.video.i_visible_width - i_x_offset,
+                    (int)p_filter->fmt_in.video.i_visible_width);
+
+    i_height = __MIN((int)p_filter->fmt_out.video.i_visible_height -i_y_offset,
+                     (int)p_filter->fmt_in.video.i_visible_height);
+
+    if( i_width <= 0 || i_height <= 0 ) return;
+
     if( p_filter->fmt_in.video.i_chroma == VLC_FOURCC('Y','U','V','A') &&
         ( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('I','4','2','0') ||
           p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','V','1','2') ) )
     {
         BlendI420( p_filter, p_dst, p_dst_orig, p_src,
-                   i_x_offset, i_y_offset );
+                   i_x_offset, i_y_offset, i_width, i_height, i_alpha );
         return;
     }
     if( p_filter->fmt_in.video.i_chroma == VLC_FOURCC('Y','U','V','A') &&
-        p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','U','Y','2') )
+        ( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','U','Y','2') ||
+          p_filter->fmt_out.video.i_chroma == VLC_FOURCC('U','Y','V','Y') ||
+          p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','V','Y','U') ) )
     {
-        BlendYUY2( p_filter, p_dst, p_dst_orig, p_src,
-                   i_x_offset, i_y_offset );
+        BlendYUVPacked( p_filter, p_dst, p_dst_orig, p_src,
+                   i_x_offset, i_y_offset, i_width, i_height, i_alpha );
         return;
     }
     if( p_filter->fmt_in.video.i_chroma == VLC_FOURCC('Y','U','V','A') &&
         p_filter->fmt_out.video.i_chroma == VLC_FOURCC('R','V','1','6') )
     {
         BlendR16( p_filter, p_dst, p_dst_orig, p_src,
-                  i_x_offset, i_y_offset );
+                  i_x_offset, i_y_offset, i_width, i_height, i_alpha );
         return;
     }
     if( p_filter->fmt_in.video.i_chroma == VLC_FOURCC('Y','U','V','A') &&
@@ -142,7 +160,7 @@ static void Blend( filter_t *p_filter, picture_t *p_dst,
           p_filter->fmt_out.video.i_chroma == VLC_FOURCC('R','V','3','2') ) )
     {
         BlendR24( p_filter, p_dst, p_dst_orig, p_src,
-                  i_x_offset, i_y_offset );
+                  i_x_offset, i_y_offset, i_width, i_height, i_alpha );
         return;
     }
     if( p_filter->fmt_in.video.i_chroma == VLC_FOURCC('Y','U','V','P') &&
@@ -150,14 +168,25 @@ static void Blend( filter_t *p_filter, picture_t *p_dst,
           p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','V','1','2') ) )
     {
         BlendPalI420( p_filter, p_dst, p_dst_orig, p_src,
-                      i_x_offset, i_y_offset );
+                      i_x_offset, i_y_offset, i_width, i_height, i_alpha );
+        return;
+    }
+    if( p_filter->fmt_in.video.i_chroma == VLC_FOURCC('Y','U','V','P') &&
+        ( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','U','Y','2') ||
+          p_filter->fmt_out.video.i_chroma == VLC_FOURCC('U','Y','V','Y') ||
+          p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','V','Y','U') ) )
+    {
+        BlendPalYUVPacked( p_filter, p_dst, p_dst_orig, p_src,
+                      i_x_offset, i_y_offset, i_width, i_height, i_alpha );
         return;
     }
     if( p_filter->fmt_in.video.i_chroma == VLC_FOURCC('Y','U','V','P') &&
-        p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','U','Y','2') )
+        ( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('R','V','1','6') ||
+          p_filter->fmt_out.video.i_chroma == VLC_FOURCC('R','V','2','4') ||
+          p_filter->fmt_out.video.i_chroma == VLC_FOURCC('R','V','3','2') ) )
     {
-        BlendPalYUY2( p_filter, p_dst, p_dst_orig, p_src,
-                      i_x_offset, i_y_offset );
+        BlendPalRV( p_filter, p_dst, p_dst_orig, p_src,
+                    i_x_offset, i_y_offset, i_width, i_height, i_alpha );
         return;
     }
 
@@ -166,15 +195,15 @@ static void Blend( filter_t *p_filter, picture_t *p_dst,
 
 static void BlendI420( filter_t *p_filter, picture_t *p_dst,
                        picture_t *p_dst_orig, picture_t *p_src,
-                       int i_x_offset, int i_y_offset )
+                       int i_x_offset, int i_y_offset,
+                       int i_width, int i_height, int i_alpha )
 {
-    filter_sys_t *p_sys = p_filter->p_sys;
     int i_src1_pitch, i_src2_pitch, i_dst_pitch;
     uint8_t *p_src1_y, *p_src2_y, *p_dst_y;
     uint8_t *p_src1_u, *p_src2_u, *p_dst_u;
     uint8_t *p_src1_v, *p_src2_v, *p_dst_v;
     uint8_t *p_trans;
-    int i_width, i_height, i_x, i_y;
+    int i_x, i_y, i_trans;
     vlc_bool_t b_even_scanline = i_y_offset % 2;
 
     i_dst_pitch = p_dst->p[Y_PLANE].i_pitch;
@@ -220,12 +249,6 @@ static void BlendI420( filter_t *p_filter, picture_t *p_dst,
               p_filter->fmt_in.video.i_x_offset +
               p_src->p[A_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset;
 
-    i_width = __MIN( p_filter->fmt_out.video.i_visible_width - i_x_offset,
-                     p_filter->fmt_in.video.i_visible_width );
-
-    i_height = __MIN( p_filter->fmt_out.video.i_visible_height - i_y_offset,
-                      p_filter->fmt_in.video.i_visible_height );
-
 #define MAX_TRANS 255
 #define TRANS_BITS  8
 
@@ -245,12 +268,13 @@ static void BlendI420( filter_t *p_filter, picture_t *p_dst,
         /* Draw until we reach the end of the line */
         for( i_x = 0; i_x < i_width; i_x++ )
         {
-            if( !p_trans[i_x] )
+            i_trans = ( p_trans[i_x] * i_alpha ) / 255;
+            if( !i_trans )
             {
                 /* Completely transparent. Don't change pixel */
                 continue;
             }
-            else if( p_trans[i_x] == MAX_TRANS )
+            else if( i_trans == MAX_TRANS )
             {
                 /* Completely opaque. Completely overwrite underlying pixel */
                 p_dst_y[i_x] = p_src2_y[i_x];
@@ -264,17 +288,17 @@ static void BlendI420( filter_t *p_filter, picture_t *p_dst,
             }
 
             /* Blending */
-            p_dst_y[i_x] = ( (uint16_t)p_src2_y[i_x] * p_trans[i_x] +
-                (uint16_t)p_src1_y[i_x] * (MAX_TRANS - p_trans[i_x]) )
+            p_dst_y[i_x] = ( (uint16_t)p_src2_y[i_x] * i_trans +
+                (uint16_t)p_src1_y[i_x] * (MAX_TRANS - i_trans) )
                 >> TRANS_BITS;
 
             if( b_even_scanline && i_x % 2 == 0 )
             {
-                p_dst_u[i_x/2] = ( (uint16_t)p_src2_u[i_x] * p_trans[i_x] +
-                (uint16_t)p_src1_u[i_x/2] * (MAX_TRANS - p_trans[i_x]) )
+                p_dst_u[i_x/2] = ( (uint16_t)p_src2_u[i_x] * i_trans +
+                (uint16_t)p_src1_u[i_x/2] * (MAX_TRANS - i_trans) )
                 >> TRANS_BITS;
-                p_dst_v[i_x/2] = ( (uint16_t)p_src2_v[i_x] * p_trans[i_x] +
-                (uint16_t)p_src1_v[i_x/2] * (MAX_TRANS - p_trans[i_x]) )
+                p_dst_v[i_x/2] = ( (uint16_t)p_src2_v[i_x] * i_trans +
+                (uint16_t)p_src1_v[i_x/2] * (MAX_TRANS - i_trans) )
                 >> TRANS_BITS;
             }
         }
@@ -311,14 +335,14 @@ static inline void yuv_to_rgb( int *r, int *g, int *b,
 
 static void BlendR16( filter_t *p_filter, picture_t *p_dst_pic,
                       picture_t *p_dst_orig, picture_t *p_src,
-                      int i_x_offset, int i_y_offset )
+                      int i_x_offset, int i_y_offset,
+                      int i_width, int i_height, int i_alpha )
 {
-    filter_sys_t *p_sys = p_filter->p_sys;
     int i_src1_pitch, i_src2_pitch, i_dst_pitch;
     uint8_t *p_dst, *p_src1, *p_src2_y;
     uint8_t *p_src2_u, *p_src2_v;
     uint8_t *p_trans;
-    int i_width, i_height, i_x, i_y, i_pix_pitch;
+    int i_x, i_y, i_pix_pitch;
     int r, g, b;
 
     i_pix_pitch = p_dst_pic->p->i_pixel_pitch;
@@ -349,12 +373,6 @@ static void BlendR16( filter_t *p_filter, picture_t *p_dst_pic,
               p_filter->fmt_in.video.i_x_offset +
               p_src->p[A_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset;
 
-    i_width = __MIN( p_filter->fmt_out.video.i_visible_width - i_x_offset,
-                     p_filter->fmt_in.video.i_visible_width );
-
-    i_height = __MIN( p_filter->fmt_out.video.i_visible_height - i_y_offset,
-                      p_filter->fmt_in.video.i_visible_height );
-
 #define MAX_TRANS 255
 #define TRANS_BITS  8
 
@@ -398,14 +416,14 @@ static void BlendR16( filter_t *p_filter, picture_t *p_dst_pic,
 
 static void BlendR24( filter_t *p_filter, picture_t *p_dst_pic,
                       picture_t *p_dst_orig, picture_t *p_src,
-                      int i_x_offset, int i_y_offset )
+                      int i_x_offset, int i_y_offset,
+                      int i_width, int i_height, int i_alpha )
 {
-    filter_sys_t *p_sys = p_filter->p_sys;
     int i_src1_pitch, i_src2_pitch, i_dst_pitch;
     uint8_t *p_dst, *p_src1, *p_src2_y;
     uint8_t *p_src2_u, *p_src2_v;
     uint8_t *p_trans;
-    int i_width, i_height, i_x, i_y, i_pix_pitch;
+    int i_x, i_y, i_pix_pitch, i_trans;
     int r, g, b;
 
     i_pix_pitch = p_dst_pic->p->i_pixel_pitch;
@@ -436,12 +454,6 @@ static void BlendR24( filter_t *p_filter, picture_t *p_dst_pic,
               p_filter->fmt_in.video.i_x_offset +
               p_src->p[A_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset;
 
-    i_width = __MIN( p_filter->fmt_out.video.i_visible_width - i_x_offset,
-                     p_filter->fmt_in.video.i_visible_width );
-
-    i_height = __MIN( p_filter->fmt_out.video.i_visible_height - i_y_offset,
-                      p_filter->fmt_in.video.i_visible_height );
-
 #define MAX_TRANS 255
 #define TRANS_BITS  8
 
@@ -454,12 +466,13 @@ static void BlendR24( filter_t *p_filter, picture_t *p_dst_pic,
         /* Draw until we reach the end of the line */
         for( i_x = 0; i_x < i_width; i_x++ )
         {
-            if( !p_trans[i_x] )
+            i_trans = ( p_trans[i_x] * i_alpha ) / 255;
+            if( !i_trans )
             {
                 /* Completely transparent. Don't change pixel */
                 continue;
             }
-            else if( p_trans[i_x] == MAX_TRANS )
+            else if( i_trans == MAX_TRANS )
             {
                 /* Completely opaque. Completely overwrite underlying pixel */
                 yuv_to_rgb( &r, &g, &b,
@@ -475,15 +488,15 @@ static void BlendR24( filter_t *p_filter, picture_t *p_dst_pic,
             yuv_to_rgb( &r, &g, &b,
                         p_src2_y[i_x], p_src2_u[i_x], p_src2_v[i_x] );
 
-            p_dst[i_x * i_pix_pitch]     = ( r * p_trans[i_x] +
+            p_dst[i_x * i_pix_pitch]     = ( r * i_trans +
                 (uint16_t)p_src1[i_x * i_pix_pitch] *
-                (MAX_TRANS - p_trans[i_x]) ) >> TRANS_BITS;
-            p_dst[i_x * i_pix_pitch + 1] = ( g * p_trans[i_x] +
+                (MAX_TRANS - i_trans) ) >> TRANS_BITS;
+            p_dst[i_x * i_pix_pitch + 1] = ( g * i_trans +
                 (uint16_t)p_src1[i_x * i_pix_pitch + 1] *
-                (MAX_TRANS - p_trans[i_x]) ) >> TRANS_BITS;
-            p_dst[i_x * i_pix_pitch + 2] = ( b * p_trans[i_x] +
+                (MAX_TRANS - i_trans) ) >> TRANS_BITS;
+            p_dst[i_x * i_pix_pitch + 2] = ( b * i_trans +
                 (uint16_t)p_src1[i_x * i_pix_pitch + 2] *
-                (MAX_TRANS - p_trans[i_x]) ) >> TRANS_BITS;
+                (MAX_TRANS - i_trans) ) >> TRANS_BITS;
         }
     }
 
@@ -493,16 +506,37 @@ static void BlendR24( filter_t *p_filter, picture_t *p_dst_pic,
     return;
 }
 
-static void BlendYUY2( filter_t *p_filter, picture_t *p_dst_pic,
-                       picture_t *p_dst_orig, picture_t *p_src,
-                       int i_x_offset, int i_y_offset )
+static void BlendYUVPacked( filter_t *p_filter, picture_t *p_dst_pic,
+                            picture_t *p_dst_orig, picture_t *p_src,
+                            int i_x_offset, int i_y_offset,
+                            int i_width, int i_height, int i_alpha )
 {
-    filter_sys_t *p_sys = p_filter->p_sys;
     int i_src1_pitch, i_src2_pitch, i_dst_pitch;
     uint8_t *p_dst, *p_src1, *p_src2_y;
     uint8_t *p_src2_u, *p_src2_v;
     uint8_t *p_trans;
-    int i_width, i_height, i_x, i_y, i_pix_pitch;
+    int i_x, i_y, i_pix_pitch, i_trans;
+    vlc_bool_t b_even = !((i_x_offset + p_filter->fmt_out.video.i_x_offset)%2);
+    int i_l_offset = 0, i_u_offset = 0, i_v_offset = 0;
+
+    if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','U','Y','2') )
+    {
+        i_l_offset = 0;
+        i_u_offset = 1;
+        i_v_offset = 3;
+    }
+    else if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('U','Y','V','Y') )
+    {
+        i_l_offset = 1;
+        i_u_offset = 0;
+        i_v_offset = 2;
+    }
+    else if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','V','Y','U') )
+    {
+        i_l_offset = 0;
+        i_u_offset = 3;
+        i_v_offset = 1;
+    }
 
     i_pix_pitch = 2;
     i_dst_pitch = p_dst_pic->p->i_pitch;
@@ -532,11 +566,7 @@ static void BlendYUY2( filter_t *p_filter, picture_t *p_dst_pic,
               p_filter->fmt_in.video.i_x_offset +
               p_src->p[A_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset;
 
-    i_width = __MIN( p_filter->fmt_out.video.i_visible_width - i_x_offset,
-                     p_filter->fmt_in.video.i_visible_width );
-
-    i_height = __MIN( p_filter->fmt_out.video.i_visible_height - i_y_offset,
-                      p_filter->fmt_in.video.i_visible_height );
+    i_width = (i_width >> 1) << 1; /* Needs to be a multiple of 2 */
 
 #define MAX_TRANS 255
 #define TRANS_BITS  8
@@ -548,48 +578,60 @@ static void BlendYUY2( filter_t *p_filter, picture_t *p_dst_pic,
          p_src2_v += i_src2_pitch )
     {
         /* Draw until we reach the end of the line */
-        for( i_x = 0; i_x < i_width; i_x += 2 )
+        for( i_x = 0; i_x < i_width; i_x++, b_even = !b_even )
         {
-            if( !p_trans[i_x] )
+            i_trans = ( p_trans[i_x] * i_alpha ) / 255;
+            if( !i_trans )
             {
                 /* Completely transparent. Don't change pixel */
             }
-            else if( p_trans[i_x] == MAX_TRANS )
+            else if( i_trans == MAX_TRANS )
             {
                 /* Completely opaque. Completely overwrite underlying pixel */
-                p_dst[i_x * 2]     = p_src2_y[i_x];
-                p_dst[i_x * 2 + 1] = p_src2_u[i_x];
-                p_dst[i_x * 2 + 3] = p_src2_v[i_x];
-            }
-            else
-            {
-                /* Blending */
-                p_dst[i_x * 2]     = ( (uint16_t)p_src2_y[i_x] * p_trans[i_x] +
-                    (uint16_t)p_src1[i_x * 2] *
-                    (MAX_TRANS - p_trans[i_x]) ) >> TRANS_BITS;
-                p_dst[i_x * 2 + 1] = ( (uint16_t)p_src2_u[i_x] * p_trans[i_x] +
-                    (uint16_t)p_src1[i_x * 2 + 1] *
-                    (MAX_TRANS - p_trans[i_x]) ) >> TRANS_BITS;
-                p_dst[i_x * 2 + 3] = ( (uint16_t)p_src2_v[i_x] * p_trans[i_x] +
-                    (uint16_t)p_src1[i_x * 2 + 3] *
-                    (MAX_TRANS - p_trans[i_x]) ) >> TRANS_BITS;
-            }
+                p_dst[i_x * 2 + i_l_offset]     = p_src2_y[i_x];
 
-            if( !p_trans[i_x+1] )
-            {
-                /* Completely transparent. Don't change pixel */
-            }
-            else if( p_trans[i_x+1] == MAX_TRANS )
-            {
-                /* Completely opaque. Completely overwrite underlying pixel */
-                p_dst[i_x * 2 + 2] = p_src2_y[i_x + 1];
+                if( b_even )
+                {
+                    if( p_trans[i_x+1] > 0xaa )
+                    {
+                        p_dst[i_x * 2 + i_u_offset] = (p_src2_u[i_x]+p_src2_u[i_x+1])>>1;
+                        p_dst[i_x * 2 + i_v_offset] = (p_src2_v[i_x]+p_src2_v[i_x+1])>>1;
+                    }
+                    else
+                    {
+                        p_dst[i_x * 2 + i_u_offset] = p_src2_u[i_x];
+                        p_dst[i_x * 2 + i_v_offset] = p_src2_v[i_x];
+                    }
+                }
             }
             else
             {
                 /* Blending */
-                p_dst[i_x * 2 + 2] = ( (uint16_t)p_src2_y[i_x+1] *
-                    p_trans[i_x+1] + (uint16_t)p_src1[i_x * 2 + 2] *
-                    (MAX_TRANS - p_trans[i_x+1]) ) >> TRANS_BITS;
+                p_dst[i_x * 2 + i_l_offset]     = ( (uint16_t)p_src2_y[i_x] * i_trans +
+                    (uint16_t)p_src1[i_x * 2 + i_l_offset] * (MAX_TRANS - i_trans) )
+                    >> TRANS_BITS;
+
+                if( b_even )
+                {
+                    uint16_t i_u = 0;
+                    uint16_t i_v = 0;
+                    if( p_trans[i_x+1] > 0xaa )
+                    {
+                        i_u = (p_src2_u[i_x]+p_src2_u[i_x+1])>>1;
+                        i_v = (p_src2_v[i_x]+p_src2_v[i_x+1])>>1;
+                    }
+                    else 
+                    {
+                        i_u = p_src2_u[i_x];
+                        i_v = p_src2_v[i_x];
+                    }
+                    p_dst[i_x * 2 + i_u_offset] = ( (uint16_t)i_u * i_trans +
+                        (uint16_t)p_src1[i_x * 2 + i_u_offset] * (MAX_TRANS - i_trans) )
+                        >> TRANS_BITS;
+                    p_dst[i_x * 2 + i_v_offset] = ( (uint16_t)i_v * i_trans +
+                        (uint16_t)p_src1[i_x * 2 + i_v_offset] * (MAX_TRANS - i_trans) )
+                        >> TRANS_BITS;
+                }
             }
         }
     }
@@ -602,14 +644,14 @@ static void BlendYUY2( filter_t *p_filter, picture_t *p_dst_pic,
 
 static void BlendPalI420( filter_t *p_filter, picture_t *p_dst,
                           picture_t *p_dst_orig, picture_t *p_src,
-                          int i_x_offset, int i_y_offset )
+                          int i_x_offset, int i_y_offset,
+                          int i_width, int i_height, int i_alpha )
 {
-    filter_sys_t *p_sys = p_filter->p_sys;
     int i_src1_pitch, i_src2_pitch, i_dst_pitch;
     uint8_t *p_src1_y, *p_src2, *p_dst_y;
     uint8_t *p_src1_u, *p_dst_u;
     uint8_t *p_src1_v, *p_dst_v;
-    int i_width, i_height, i_x, i_y;
+    int i_x, i_y, i_trans;
     vlc_bool_t b_even_scanline = i_y_offset % 2;
 
     i_dst_pitch = p_dst->p[Y_PLANE].i_pitch;
@@ -644,12 +686,6 @@ static void BlendPalI420( filter_t *p_filter, picture_t *p_dst,
     p_src2 = p_src->p->p_pixels + p_filter->fmt_in.video.i_x_offset +
              i_src2_pitch * p_filter->fmt_in.video.i_y_offset;
 
-    i_width = __MIN( p_filter->fmt_out.video.i_visible_width - i_x_offset,
-                     p_filter->fmt_in.video.i_visible_width );
-
-    i_height = __MIN( p_filter->fmt_out.video.i_visible_height - i_y_offset,
-                      p_filter->fmt_in.video.i_visible_height );
-
 #define MAX_TRANS 255
 #define TRANS_BITS  8
 #define p_trans p_src2
@@ -669,12 +705,13 @@ static void BlendPalI420( filter_t *p_filter, picture_t *p_dst,
         /* Draw until we reach the end of the line */
         for( i_x = 0; i_x < i_width; i_x++ )
         {
-            if( !p_pal[p_trans[i_x]][3] )
+            i_trans = ( p_pal[p_trans[i_x]][3] * i_alpha ) / 255;
+            if( !i_trans )
             {
                 /* Completely transparent. Don't change pixel */
                 continue;
             }
-            else if( p_pal[p_trans[i_x]][3] == MAX_TRANS )
+            else if( i_trans == MAX_TRANS )
             {
                 /* Completely opaque. Completely overwrite underlying pixel */
                 p_dst_y[i_x] = p_pal[p_src2[i_x]][0];
@@ -688,18 +725,18 @@ static void BlendPalI420( filter_t *p_filter, picture_t *p_dst,
             }
 
             /* Blending */
-            p_dst_y[i_x] = ( (uint16_t)p_pal[p_src2[i_x]][0] *
-                p_pal[p_trans[i_x]][3] + (uint16_t)p_src1_y[i_x] *
-                (MAX_TRANS - p_pal[p_trans[i_x]][3]) ) >> TRANS_BITS;
+            p_dst_y[i_x] = ( (uint16_t)p_pal[p_src2[i_x]][0] * i_trans +
+                (uint16_t)p_src1_y[i_x] * (MAX_TRANS - i_trans) )
+                >> TRANS_BITS;
 
             if( b_even_scanline && i_x % 2 == 0 )
             {
-                p_dst_u[i_x/2] = ( (uint16_t)p_pal[p_src2[i_x]][1] *
-                    p_pal[p_trans[i_x]][3] + (uint16_t)p_src1_u[i_x/2] *
-                    (MAX_TRANS - p_pal[p_trans[i_x]][3]) ) >> TRANS_BITS;
-                p_dst_v[i_x/2] = ( (uint16_t)p_pal[p_src2[i_x]][2] *
-                    p_pal[p_trans[i_x]][3] + (uint16_t)p_src1_v[i_x/2] *
-                    (MAX_TRANS - p_pal[p_trans[i_x]][3]) ) >> TRANS_BITS;
+                p_dst_u[i_x/2] = ( (uint16_t)p_pal[p_src2[i_x]][1] * i_trans +
+                    (uint16_t)p_src1_u[i_x/2] * (MAX_TRANS - i_trans) )
+                    >> TRANS_BITS;
+                p_dst_v[i_x/2] = ( (uint16_t)p_pal[p_src2[i_x]][2] * i_trans +
+                    (uint16_t)p_src1_v[i_x/2] * (MAX_TRANS - i_trans) )
+                    >> TRANS_BITS;
             }
         }
     }
@@ -712,14 +749,35 @@ static void BlendPalI420( filter_t *p_filter, picture_t *p_dst,
     return;
 }
 
-static void BlendPalYUY2( filter_t *p_filter, picture_t *p_dst_pic,
-                          picture_t *p_dst_orig, picture_t *p_src,
-                          int i_x_offset, int i_y_offset )
+static void BlendPalYUVPacked( filter_t *p_filter, picture_t *p_dst_pic,
+                               picture_t *p_dst_orig, picture_t *p_src,
+                               int i_x_offset, int i_y_offset,
+                               int i_width, int i_height, int i_alpha )
 {
-    filter_sys_t *p_sys = p_filter->p_sys;
     int i_src1_pitch, i_src2_pitch, i_dst_pitch;
     uint8_t *p_src1, *p_src2, *p_dst;
-    int i_width, i_height, i_x, i_y, i_pix_pitch;
+    int i_x, i_y, i_pix_pitch, i_trans;
+    vlc_bool_t b_even = !((i_x_offset + p_filter->fmt_out.video.i_x_offset)%2);
+    int i_l_offset = 0, i_u_offset = 0, i_v_offset = 0;
+
+    if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','U','Y','2') )
+    {
+        i_l_offset = 0;
+        i_u_offset = 1;
+        i_v_offset = 3;
+    }
+    else if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('U','Y','V','Y') )
+    {
+        i_l_offset = 1;
+        i_u_offset = 0;
+        i_v_offset = 2;
+    }
+    else if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','V','Y','U') )
+    {
+        i_l_offset = 0;
+        i_u_offset = 3;
+        i_v_offset = 1;
+    }
 
     i_pix_pitch = 2;
     i_dst_pitch = p_dst_pic->p->i_pitch;
@@ -736,11 +794,7 @@ static void BlendPalYUY2( filter_t *p_filter, picture_t *p_dst_pic,
     p_src2 = p_src->p->p_pixels + p_filter->fmt_in.video.i_x_offset +
              i_src2_pitch * p_filter->fmt_in.video.i_y_offset;
 
-    i_width = __MIN( p_filter->fmt_out.video.i_visible_width - i_x_offset,
-                     p_filter->fmt_in.video.i_visible_width );
-
-    i_height = __MIN( p_filter->fmt_out.video.i_visible_height - i_y_offset,
-                      p_filter->fmt_in.video.i_visible_height );
+    i_width = (i_width >> 1) << 1; /* Needs to be a multiple of 2 */
 
 #define MAX_TRANS 255
 #define TRANS_BITS  8
@@ -752,49 +806,158 @@ static void BlendPalYUY2( filter_t *p_filter, picture_t *p_dst_pic,
          p_dst += i_dst_pitch, p_src1 += i_src1_pitch, p_src2 += i_src2_pitch )
     {
         /* Draw until we reach the end of the line */
-        for( i_x = 0; i_x < i_width; i_x += 2 )
+        for( i_x = 0; i_x < i_width; i_x++, b_even = !b_even )
         {
-            if( !p_pal[p_trans[i_x]][3] )
+            i_trans = ( p_pal[p_trans[i_x]][3] * i_alpha ) / 255;
+            if( !i_trans )
             {
                 /* Completely transparent. Don't change pixel */
             }
-            else if( p_pal[p_trans[i_x]][3] == MAX_TRANS )
+            else if( i_trans == MAX_TRANS )
             {
                 /* Completely opaque. Completely overwrite underlying pixel */
-                p_dst[i_x * 2]     = p_pal[p_src2[i_x]][0];
-                p_dst[i_x * 2 + 1] = p_pal[p_src2[i_x]][1];
-                p_dst[i_x * 2 + 3] = p_pal[p_src2[i_x]][2];
+                p_dst[i_x * 2 + i_l_offset]     = p_pal[p_src2[i_x]][0];
+
+                if( b_even )
+                {
+                    if( p_trans[i_x+1] > 0xaa )
+                    {
+                        p_dst[i_x * 2 + i_u_offset] = (p_pal[p_src2[i_x]][1] + p_pal[p_src2[i_x+1]][1]) >> 1;
+                        p_dst[i_x * 2 + i_v_offset] = (p_pal[p_src2[i_x]][2] + p_pal[p_src2[i_x+1]][2]) >> 1;
+                    }
+                    else
+                    {
+                        p_dst[i_x * 2 + i_u_offset] = p_pal[p_src2[i_x]][1];
+                        p_dst[i_x * 2 + i_v_offset] = p_pal[p_src2[i_x]][2];
+                    }
+                }
             }
             else
             {
                 /* Blending */
-                p_dst[i_x * 2]     = ( (uint16_t)p_pal[p_src2[i_x]][0] *
-                    p_pal[p_trans[i_x]][3] + (uint16_t)p_src1[i_x * 2] *
-                    (MAX_TRANS - p_pal[p_trans[i_x]][3]) ) >> TRANS_BITS;
-                p_dst[i_x * 2 + 1] = ( (uint16_t)p_pal[p_src2[i_x]][1] *
-                    p_pal[p_trans[i_x]][3] + (uint16_t)p_src1[i_x * 2 + 1] *
-                    (MAX_TRANS - p_pal[p_trans[i_x]][3]) ) >> TRANS_BITS;
-                p_dst[i_x * 2 + 3] = ( (uint16_t)p_pal[p_src2[i_x]][2] *
-                    p_pal[p_trans[i_x]][3] + (uint16_t)p_src1[i_x * 2 + 3] *
-                    (MAX_TRANS - p_pal[p_trans[i_x]][3]) ) >> TRANS_BITS;
+                p_dst[i_x * 2 + i_l_offset]     = ( (uint16_t)p_pal[p_src2[i_x]][0] *
+                    i_trans + (uint16_t)p_src1[i_x * 2 + i_l_offset] *
+                    (MAX_TRANS - i_trans) ) >> TRANS_BITS;
+
+                if( b_even )
+                {
+                    uint16_t i_u = 0;
+                    uint16_t i_v = 0;
+                    if( p_trans[i_x+1] > 0xaa )
+                    {
+                        i_u = (p_pal[p_src2[i_x]][1] + p_pal[p_src2[i_x+1]][1]) >> 1;
+                        i_v = (p_pal[p_src2[i_x]][2] + p_pal[p_src2[i_x+1]][2]) >> 1;
+                    }
+                    else 
+                    {
+                        i_u = p_pal[p_src2[i_x]][1];
+                        i_v = p_pal[p_src2[i_x]][2];
+                    }
+
+                    p_dst[i_x * 2 + i_u_offset] = ( (uint16_t)i_u *
+                        i_trans + (uint16_t)p_src1[i_x * 2 + i_u_offset] *
+                        (MAX_TRANS - i_trans) ) >> TRANS_BITS;
+                    p_dst[i_x * 2 + i_v_offset] = ( (uint16_t)i_v *
+                        i_trans + (uint16_t)p_src1[i_x * 2 + i_v_offset] *
+                        (MAX_TRANS - i_trans) ) >> TRANS_BITS;
+                }
             }
+        }
+    }
+
+#undef MAX_TRANS
+#undef TRANS_BITS
+#undef p_trans
+#undef p_pal
+
+    return;
+}
+
+static void BlendPalRV( filter_t *p_filter, picture_t *p_dst_pic,
+                        picture_t *p_dst_orig, picture_t *p_src,
+                        int i_x_offset, int i_y_offset,
+                        int i_width, int i_height, int i_alpha )
+{
+    int i_src1_pitch, i_src2_pitch, i_dst_pitch;
+    uint8_t *p_src1, *p_src2, *p_dst;
+    int i_x, i_y, i_pix_pitch, i_trans;
+    int r, g, b;
+    video_palette_t rgbpalette;
+
+    i_pix_pitch = p_dst_pic->p->i_pixel_pitch;
+    i_dst_pitch = p_dst_pic->p->i_pitch;
+    p_dst = p_dst_pic->p->p_pixels + i_pix_pitch * (i_x_offset +
+            p_filter->fmt_out.video.i_x_offset) + p_dst_pic->p->i_pitch *
+            ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
+
+    i_src1_pitch = p_dst_orig->p->i_pitch;
+    p_src1 = p_dst_orig->p->p_pixels + i_pix_pitch * (i_x_offset +
+             p_filter->fmt_out.video.i_x_offset) + p_dst_orig->p->i_pitch *
+             ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
 
-            if( !p_pal[p_trans[i_x+1]][3] )
+    i_src2_pitch = p_src->p->i_pitch;
+    p_src2 = p_src->p->p_pixels + p_filter->fmt_in.video.i_x_offset +
+             i_src2_pitch * p_filter->fmt_in.video.i_y_offset;
+
+#define MAX_TRANS 255
+#define TRANS_BITS  8
+#define p_trans p_src2
+#define p_pal p_filter->fmt_in.video.p_palette->palette
+#define rgbpal rgbpalette.palette
+
+    /* Convert palette first */
+    for( i_y = 0; i_y < p_filter->fmt_in.video.p_palette->i_entries &&
+         i_y < 256; i_y++ )
+    {
+        yuv_to_rgb( &r, &g, &b, p_pal[i_y][0], p_pal[i_y][1], p_pal[i_y][2] );
+
+        if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('R','V','1','6') )
+        {
+            *(uint16_t *)rgbpal[i_y] =
+                ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
+        }
+        else
+        {
+            rgbpal[i_y][0] = r; rgbpal[i_y][1] = g; rgbpal[i_y][2] = b;
+        }
+    }
+
+    /* Draw until we reach the bottom of the subtitle */
+    for( i_y = 0; i_y < i_height; i_y++,
+         p_dst += i_dst_pitch, p_src1 += i_src1_pitch, p_src2 += i_src2_pitch )
+    {
+        /* Draw until we reach the end of the line */
+        for( i_x = 0; i_x < i_width; i_x++ )
+        {
+            i_trans = ( p_pal[p_trans[i_x]][3] * i_alpha ) / 255;
+            if( !i_trans )
             {
                 /* Completely transparent. Don't change pixel */
+                continue;
             }
-            else if( p_pal[p_trans[i_x+1]][3] == MAX_TRANS )
+            else if( i_trans == MAX_TRANS ||
+                     p_filter->fmt_out.video.i_chroma ==
+                     VLC_FOURCC('R','V','1','6') )
             {
                 /* Completely opaque. Completely overwrite underlying pixel */
-                p_dst[i_x * 2 + 2] = p_pal[p_src2[i_x + 1]][0];
-            }
-            else
-            {
-                /* Blending */
-                p_dst[i_x * 2 + 2] = ( (uint16_t)p_pal[p_src2[i_x+1]][0] *
-                    p_pal[p_trans[i_x+1]][3] + (uint16_t)p_src1[i_x * 2 + 2] *
-                    (MAX_TRANS - p_pal[p_trans[i_x+1]][3]) ) >> TRANS_BITS;
+                p_dst[i_x * i_pix_pitch]     = rgbpal[p_src2[i_x]][0];
+                p_dst[i_x * i_pix_pitch + 1] = rgbpal[p_src2[i_x]][1];
+                if( p_filter->fmt_out.video.i_chroma !=
+                    VLC_FOURCC('R','V','1','6') )
+                p_dst[i_x * i_pix_pitch + 2] = rgbpal[p_src2[i_x]][2];
+                continue;
             }
+
+            /* Blending */
+            p_dst[i_x * i_pix_pitch]     = ( (uint16_t)rgbpal[p_src2[i_x]][0] *
+                i_trans + (uint16_t)p_src1[i_x * i_pix_pitch] *
+                (MAX_TRANS - i_trans) ) >> TRANS_BITS;
+            p_dst[i_x * i_pix_pitch + 1] = ( (uint16_t)rgbpal[p_src2[i_x]][1] *
+                i_trans + (uint16_t)p_src1[i_x * i_pix_pitch + 1] *
+                (MAX_TRANS - i_trans) ) >> TRANS_BITS;
+            p_dst[i_x * i_pix_pitch + 2] = ( (uint16_t)rgbpal[p_src2[i_x]][2] *
+                i_trans + (uint16_t)p_src1[i_x * i_pix_pitch + 2] *
+                (MAX_TRANS - i_trans) ) >> TRANS_BITS;
         }
     }
 
@@ -802,6 +965,7 @@ static void BlendPalYUY2( filter_t *p_filter, picture_t *p_dst_pic,
 #undef TRANS_BITS
 #undef p_trans
 #undef p_pal
+#undef rgbpal
 
     return;
 }