]> git.sesse.net Git - vlc/blobdiff - modules/video_filter/blend.c
Use var_InheritString for --decklink-video-connection.
[vlc] / modules / video_filter / blend.c
index 89dfeba6b9e2bf187e9c18923a8854bdcfd9d310..530be44c88b59c7371f1ee41cc819346a1a585a5 100644 (file)
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * blend.c: alpha blend 2 pictures together
  *****************************************************************************
- * Copyright (C) 2003-2007 the VideoLAN team
+ * Copyright (C) 2003-2009 the VideoLAN team
  * $Id$
  *
  * Authors: Gildas Bazin <gbazin@videolan.org>
 # include "config.h"
 #endif
 
+#include <assert.h>
 #include <vlc_common.h>
 #include <vlc_plugin.h>
-#include <vlc_vout.h>
-#include "vlc_filter.h"
+#include <vlc_filter.h>
 
 /*****************************************************************************
- * filter_sys_t : filter descriptor
+ * Module descriptor
  *****************************************************************************/
-struct filter_sys_t
-{
-    int i_dummy;
-};
+static int  OpenFilter ( vlc_object_t * );
+static void CloseFilter( vlc_object_t * );
+
+vlc_module_begin ()
+    set_description( N_("Video pictures blending") )
+    set_capability( "video blending", 100 )
+    set_callbacks( OpenFilter, CloseFilter )
+vlc_module_end ()
 
 /****************************************************************************
  * Local prototypes
  ****************************************************************************/
-static int  OpenFilter ( vlc_object_t * );
-static void CloseFilter( vlc_object_t * );
-
-static void Blend( filter_t *, picture_t *, picture_t *, picture_t *,
+static void Blend( filter_t *, picture_t *, const picture_t *,
                    int, int, int );
 
-/* TODO i_alpha support for BlendR16 */
 /* YUVA */
-static void BlendI420( filter_t *, picture_t *, picture_t *, picture_t *,
-                       int, int, int, int, int );
-static void BlendR16( filter_t *, picture_t *, picture_t *, picture_t *,
-                      int, int, int, int, int );
-static void BlendR24( filter_t *, picture_t *, picture_t *, picture_t *,
-                      int, int, int, int, int );
-static void BlendYUVPacked( filter_t *, picture_t *, picture_t *, picture_t *,
-                            int, int, int, int, int );
+static void BlendYUVAI420( filter_t *, picture_t *, const picture_t *,
+                           int, int, int, int, int );
+static void BlendYUVARV16( filter_t *, picture_t *, const picture_t *,
+                           int, int, int, int, int );
+static void BlendYUVARV24( filter_t *, picture_t *, const picture_t *,
+                           int, int, int, int, int );
+static void BlendYUVAYUVPacked( filter_t *, picture_t *, const picture_t *,
+                                int, int, int, int, int );
 
 /* I420, YV12 */
-static void BlendI420I420( filter_t *, picture_t *, picture_t *, picture_t *,
+static void BlendI420I420( filter_t *, picture_t *, const picture_t *,
                            int, int, int, int, int );
 static void BlendI420I420_no_alpha(
-                           filter_t *, picture_t *, picture_t *, picture_t *,
+                           filter_t *, picture_t *, const picture_t *,
                            int, int, int, int );
-static void BlendI420R16( filter_t *, picture_t *, picture_t *, picture_t *,
+static void BlendI420R16( filter_t *, picture_t *, const picture_t *,
                            int, int, int, int, int );
-static void BlendI420R24( filter_t *, picture_t *, picture_t *, picture_t *,
+static void BlendI420R24( filter_t *, picture_t *, const picture_t *,
                           int, int, int, int, int );
-static void BlendI420YUVPacked( filter_t *, picture_t *, picture_t *,
-                                picture_t *, int, int, int, int, int );
+static void BlendI420YUVPacked( filter_t *, picture_t *,
+                                const picture_t *, int, int, int, int, int );
 
 /* YUVP */
-static void BlendPalI420( filter_t *, picture_t *, picture_t *, picture_t *,
+static void BlendPalI420( filter_t *, picture_t *, const picture_t *,
                           int, int, int, int, int );
-static void BlendPalYUVPacked( filter_t *, picture_t *, picture_t *, picture_t *,
+static void BlendPalYUVPacked( filter_t *, picture_t *, const picture_t *,
                                int, int, int, int, int );
-static void BlendPalRV( filter_t *, picture_t *, picture_t *, picture_t *,
+static void BlendPalRV( filter_t *, picture_t *, const picture_t *,
                         int, int, int, int, int );
 
 /* RGBA */
-static void BlendRGBAI420( filter_t *, picture_t *, picture_t *, picture_t *,
+static void BlendRGBAI420( filter_t *, picture_t *, const picture_t *,
                            int, int, int, int, int );
-static void BlendRGBAYUVPacked( filter_t *, picture_t *, picture_t *,
-                                picture_t *, int, int, int, int, int );
-static void BlendRGBAR16( filter_t *, picture_t *, picture_t *, picture_t *,
+static void BlendRGBAYUVPacked( filter_t *, picture_t *,
+                                const picture_t *, int, int, int, int, int );
+static void BlendRGBAR16( filter_t *, picture_t *, const picture_t *,
                           int, int, int, int, int );
-static void BlendRGBAR24( filter_t *, picture_t *, picture_t *, picture_t *,
+static void BlendRGBAR24( filter_t *, picture_t *, const picture_t *,
                           int, int, int, int, int );
 
-/*****************************************************************************
- * Module descriptor
- *****************************************************************************/
-vlc_module_begin();
-    set_description( N_("Video pictures blending") );
-    set_capability( "video blending", 100 );
-    set_callbacks( OpenFilter, CloseFilter );
-vlc_module_end();
+struct filter_sys_t
+{
+    int i_blendcfg;
+};
+
+typedef void (*BlendFunction)( filter_t *,
+                       picture_t *, const picture_t *,
+                       int , int , int , int , int );
+
+#define VLC_CODEC_PLANAR_420 { VLC_CODEC_I420, VLC_CODEC_J420, VLC_CODEC_YV12, 0 }
+#define VLC_CODEC_PACKED_422 { VLC_CODEC_YUYV, VLC_CODEC_UYVY, VLC_CODEC_YVYU, VLC_CODEC_VYUY, 0 }
+#define VLC_CODEC_RGB_16 { VLC_CODEC_RGB15, VLC_CODEC_RGB16, 0 }
+#define VLC_CODEC_RGB_24 { VLC_CODEC_RGB24, VLC_CODEC_RGB32, 0 }
+
+#define BLEND_CFG( fccSrc, fctPlanar, fctPacked, fctRgb16, fctRgb24  ) \
+    { .src = fccSrc, .p_dst = VLC_CODEC_PLANAR_420, .pf_blend = fctPlanar }, \
+    { .src = fccSrc, .p_dst = VLC_CODEC_PACKED_422, .pf_blend = fctPacked }, \
+    { .src = fccSrc, .p_dst = VLC_CODEC_RGB_16,     .pf_blend = fctRgb16  }, \
+    { .src = fccSrc, .p_dst = VLC_CODEC_RGB_24,     .pf_blend = fctRgb24  }
+
+static const struct
+{
+    vlc_fourcc_t src;
+    vlc_fourcc_t p_dst[16];
+    BlendFunction pf_blend;
+} p_blend_cfg[] = {
+
+    BLEND_CFG( VLC_CODEC_YUVA, BlendYUVAI420, BlendYUVAYUVPacked, BlendYUVARV16, BlendYUVARV24 ),
+
+    BLEND_CFG( VLC_CODEC_YUVP, BlendPalI420, BlendPalYUVPacked, BlendPalRV, BlendPalRV ),
+
+    BLEND_CFG( VLC_CODEC_RGBA, BlendRGBAI420, BlendRGBAYUVPacked, BlendRGBAR16, BlendRGBAR24 ),
+
+    BLEND_CFG( VLC_CODEC_I420, BlendI420I420, BlendI420YUVPacked, BlendI420R16, BlendI420R24 ),
+
+    BLEND_CFG( VLC_CODEC_YV12, BlendI420I420, BlendI420YUVPacked, BlendI420R16, BlendI420R24 ),
+
+    { 0, {0,}, NULL }
+};
 
 /*****************************************************************************
  * OpenFilter: probe the filter and return score
@@ -108,33 +139,52 @@ vlc_module_end();
 static int OpenFilter( vlc_object_t *p_this )
 {
     filter_t *p_filter = (filter_t*)p_this;
-    filter_sys_t *p_sys;
+    filter_sys_t *p_sys = (filter_sys_t *)malloc( sizeof( filter_sys_t ) );
+    if( !p_sys )
+        return VLC_ENOMEM;
+    p_filter->p_sys = p_sys;
+    p_filter->p_sys->i_blendcfg = -1;
 
     /* Check if we can handle that format.
      * We could try to use a chroma filter if we can't. */
     int in_chroma = p_filter->fmt_in.video.i_chroma;
     int out_chroma = p_filter->fmt_out.video.i_chroma;
-    if( ( in_chroma  != VLC_FOURCC('Y','U','V','A') &&
-          in_chroma  != VLC_FOURCC('I','4','2','0') &&
-          in_chroma  != VLC_FOURCC('Y','V','1','2') &&
-          in_chroma  != VLC_FOURCC('Y','U','V','P') &&
-          in_chroma  != VLC_FOURCC('R','G','B','A') ) ||
-        ( out_chroma != VLC_FOURCC('I','4','2','0') &&
-          out_chroma != VLC_FOURCC('Y','U','Y','2') &&
-          out_chroma != VLC_FOURCC('Y','V','1','2') &&
-          out_chroma != VLC_FOURCC('U','Y','V','Y') &&
-          out_chroma != VLC_FOURCC('Y','V','Y','U') &&
-          out_chroma != VLC_FOURCC('R','V','1','6') &&
-          out_chroma != VLC_FOURCC('R','V','2','4') &&
-          out_chroma != VLC_FOURCC('R','V','3','2') ) )
+
+    if( ( in_chroma  != VLC_CODEC_YUVA && in_chroma  != VLC_CODEC_I420 &&
+          in_chroma  != VLC_CODEC_YV12 && in_chroma  != VLC_CODEC_YUVP &&
+          in_chroma  != VLC_CODEC_RGBA ) ||
+        ( out_chroma != VLC_CODEC_I420 && out_chroma != VLC_CODEC_J420 &&
+          out_chroma != VLC_CODEC_YV12 &&
+          out_chroma != VLC_CODEC_YUYV && out_chroma != VLC_CODEC_YVYU &&
+          out_chroma != VLC_CODEC_UYVY && out_chroma != VLC_CODEC_VYUY &&
+          out_chroma != VLC_CODEC_RGB15 &&
+          out_chroma != VLC_CODEC_RGB16 &&
+          out_chroma != VLC_CODEC_RGB24 &&
+          out_chroma != VLC_CODEC_RGB32 ) )
     {
         return VLC_EGENERIC;
     }
+    for( int i = 0; p_blend_cfg[i].src != 0; i++ )
+    {
+        if( p_blend_cfg[i].src != p_filter->fmt_in.video.i_chroma )
+            continue;
+        for( int j = 0; p_blend_cfg[i].p_dst[j] != 0; j++ )
+        {
+            if( p_blend_cfg[i].p_dst[j] != p_filter->fmt_out.video.i_chroma )
+                continue;
+            p_sys->i_blendcfg = i;
+        }
+    }
 
-    /* Allocate the memory needed to store the decoder's structure */
-    if( ( p_filter->p_sys = p_sys =
-          (filter_sys_t *)malloc(sizeof(filter_sys_t)) ) == NULL )
-        return VLC_ENOMEM;
+    if( p_sys->i_blendcfg == -1 )
+    {
+       msg_Dbg( p_filter, "no matching alpha blending routine "
+             "(chroma: %4.4s -> %4.4s)",
+             (char *)&p_filter->fmt_in.video.i_chroma,
+             (char *)&p_filter->fmt_out.video.i_chroma );
+      free( p_sys );
+      return VLC_EGENERIC;
+   }
 
     /* Misc init */
     p_filter->pf_video_blend = Blend;
@@ -152,9 +202,7 @@ static int OpenFilter( vlc_object_t *p_this )
 static void CloseFilter( vlc_object_t *p_this )
 {
     filter_t *p_filter = (filter_t*)p_this;
-    filter_sys_t *p_sys = p_filter->p_sys;
-
-    free( p_sys );
+    free( p_filter->p_sys );
 }
 
 /****************************************************************************
@@ -162,13 +210,15 @@ static void CloseFilter( vlc_object_t *p_this )
  ****************************************************************************
  * This function is called just after the thread is launched.
  ****************************************************************************/
-static void Blend( filter_t *p_filter, picture_t *p_dst,
-                   picture_t *p_dst_orig, picture_t *p_src,
+
+static void Blend( filter_t *p_filter,
+                   picture_t *p_dst, const picture_t *p_src,
                    int i_x_offset, int i_y_offset, int i_alpha )
 {
     int i_width, i_height;
 
-    if( i_alpha == 0 ) return;
+    if( i_alpha == 0 )
+        return;
 
     i_width = __MIN((int)p_filter->fmt_out.video.i_visible_width - i_x_offset,
                     (int)p_filter->fmt_in.video.i_visible_width);
@@ -176,7 +226,11 @@ static void Blend( filter_t *p_filter, picture_t *p_dst,
     i_height = __MIN((int)p_filter->fmt_out.video.i_visible_height -i_y_offset,
                      (int)p_filter->fmt_in.video.i_visible_height);
 
-    if( i_width <= 0 || i_height <= 0 ) return;
+    if( i_width <= 0 || i_height <= 0 )
+        return;
+
+    video_format_FixRgb( &p_filter->fmt_out.video );
+    video_format_FixRgb( &p_filter->fmt_in.video );
 
 #if 0
     msg_Dbg( p_filter, "chroma: %4.4s -> %4.4s\n",
@@ -184,134 +238,45 @@ static void Blend( filter_t *p_filter, picture_t *p_dst,
              (char *)&p_filter->fmt_out.video.i_chroma );
 #endif
 
-    switch( p_filter->fmt_in.video.i_chroma )
-    {
-        case VLC_FOURCC('Y','U','V','A'):
-            switch( p_filter->fmt_out.video.i_chroma )
-            {
-                case VLC_FOURCC('I','4','2','0'):
-                case VLC_FOURCC('Y','V','1','2'):
-                    BlendI420( p_filter, p_dst, p_dst_orig, p_src,
-                               i_x_offset, i_y_offset,
-                               i_width, i_height, i_alpha );
-                    return;
-                case VLC_FOURCC('Y','U','Y','2'):
-                case VLC_FOURCC('U','Y','V','Y'):
-                case VLC_FOURCC('Y','V','Y','U'):
-                    BlendYUVPacked( p_filter, p_dst, p_dst_orig, p_src,
-                                    i_x_offset, i_y_offset,
-                                    i_width, i_height, i_alpha );
-                    return;
-                case VLC_FOURCC('R','V','1','6'):
-                    BlendR16( p_filter, p_dst, p_dst_orig, p_src,
-                              i_x_offset, i_y_offset,
-                              i_width, i_height, i_alpha );
-                    return;
-                case VLC_FOURCC('R','V','2','4'):
-                case VLC_FOURCC('R','V','3','2'):
-                    BlendR24( p_filter, p_dst, p_dst_orig, p_src,
-                              i_x_offset, i_y_offset,
-                              i_width, i_height, i_alpha );
-                    return;
-            }
-        case VLC_FOURCC('Y','U','V','P'):
-            switch( p_filter->fmt_out.video.i_chroma )
-            {
-                case VLC_FOURCC('I','4','2','0'):
-                case VLC_FOURCC('Y','V','1','2'):
-                    BlendPalI420( p_filter, p_dst, p_dst_orig, p_src,
-                                  i_x_offset, i_y_offset,
-                                  i_width, i_height, i_alpha );
-                    return;
-                case VLC_FOURCC('Y','U','Y','2'):
-                case VLC_FOURCC('U','Y','V','Y'):
-                case VLC_FOURCC('Y','V','Y','U'):
-                    BlendPalYUVPacked( p_filter, p_dst, p_dst_orig, p_src,
-                                       i_x_offset, i_y_offset,
-                                       i_width, i_height, i_alpha );
-                    return;
-                case VLC_FOURCC('R','V','1','6'):
-                case VLC_FOURCC('R','V','2','4'):
-                case VLC_FOURCC('R','V','3','2'):
-                    BlendPalRV( p_filter, p_dst, p_dst_orig, p_src,
-                                i_x_offset, i_y_offset,
-                                i_width, i_height, i_alpha );
-                    return;
-            }
-        case VLC_FOURCC('Y','V','1','2'):
-        case VLC_FOURCC('I','4','2','0'):
-            switch( p_filter->fmt_out.video.i_chroma )
-            {
-                case VLC_FOURCC('I','4','2','0'):
-                case VLC_FOURCC('Y','V','1','2'):
-                    if( i_alpha == 0xff )
-                        BlendI420I420_no_alpha(
-                                   p_filter, p_dst, p_dst_orig, p_src,
-                                   i_x_offset, i_y_offset,
-                                   i_width, i_height );
-                    else
-                        BlendI420I420( p_filter, p_dst, p_dst_orig, p_src,
-                                       i_x_offset, i_y_offset,
-                                       i_width, i_height, i_alpha );
-                    return;
-                case VLC_FOURCC('Y','U','Y','2'):
-                case VLC_FOURCC('U','Y','V','Y'):
-                case VLC_FOURCC('Y','V','Y','U'):
-                    BlendI420YUVPacked( p_filter, p_dst, p_dst_orig, p_src,
-                                        i_x_offset, i_y_offset,
-                                        i_width, i_height, i_alpha );
-                    return;
-                case VLC_FOURCC('R','V','1','6'):
-                    BlendI420R16( p_filter, p_dst, p_dst_orig, p_src,
-                                  i_x_offset, i_y_offset,
-                                  i_width, i_height, i_alpha );
-                    return;
-                case VLC_FOURCC('R','V','2','4'):
-                case VLC_FOURCC('R','V','3','2'):
-                    BlendI420R24( p_filter, p_dst, p_dst_orig, p_src,
-                                  i_x_offset, i_y_offset,
-                                  i_width, i_height, i_alpha );
-                    return;
-            }
-        case VLC_FOURCC('R','G','B','A'):
-            switch( p_filter->fmt_out.video.i_chroma )
-            {
-                case VLC_FOURCC('I','4','2','0'):
-                case VLC_FOURCC('Y','V','1','2'):
-                    BlendRGBAI420( p_filter, p_dst, p_dst_orig, p_src,
-                                   i_x_offset, i_y_offset,
-                                   i_width, i_height, i_alpha );
-                    return;
-                case VLC_FOURCC('Y','U','Y','2'):
-                case VLC_FOURCC('U','Y','V','Y'):
-                case VLC_FOURCC('Y','V','Y','U'):
-                    BlendRGBAYUVPacked( p_filter, p_dst, p_dst_orig, p_src,
-                                        i_x_offset, i_y_offset,
-                                        i_width, i_height, i_alpha );
-                    return;
-                case VLC_FOURCC('R','V','2','4'):
-                case VLC_FOURCC('R','V','3','2'):
-                    BlendRGBAR24( p_filter, p_dst, p_dst_orig, p_src,
-                                  i_x_offset, i_y_offset,
-                                  i_width, i_height, i_alpha );
-                    return;
-                case VLC_FOURCC('R','V','1','6'):
-                    BlendRGBAR16( p_filter, p_dst, p_dst_orig, p_src,
-                                  i_x_offset, i_y_offset,
-                                  i_width, i_height, i_alpha );
-                    return;
-            }
-    }
 
-    msg_Dbg( p_filter, "no matching alpha blending routine "
-             "(chroma: %4.4s -> %4.4s)",
-             (char *)&p_filter->fmt_in.video.i_chroma,
-             (char *)&p_filter->fmt_out.video.i_chroma );
+    p_blend_cfg[p_filter->p_sys->i_blendcfg].pf_blend( p_filter, p_dst, p_src,
+                            i_x_offset, i_y_offset,
+                            i_width, i_height, i_alpha );
+
 }
 
 /***********************************************************************
  * Utils
  ***********************************************************************/
+static inline uint8_t vlc_uint8( int v )
+{
+    if( v > 255 )
+        return 255;
+    else if( v < 0 )
+        return 0;
+    return v;
+}
+
+#define MAX_TRANS 255
+#define TRANS_BITS  8
+
+static inline int vlc_blend( int v1, int v2, int a )
+{
+    /* TODO bench if the tests really increase speed */
+    if( a == 0 )
+        return v2;
+    else if( a == MAX_TRANS )
+        return v1;
+    return ( v1 * a + v2 * (MAX_TRANS - a ) ) >> TRANS_BITS;
+}
+
+static inline int vlc_alpha( int t, int a )
+{
+    if( a == 255 )
+        return t;
+    return (t * a) / 255;
+}
+
 static inline void yuv_to_rgb( int *r, int *g, int *b,
                                uint8_t y1, uint8_t u1, uint8_t v1 )
 {
@@ -319,7 +284,6 @@ static inline void yuv_to_rgb( int *r, int *g, int *b,
 #   define SCALEBITS 10
 #   define ONE_HALF  (1 << (SCALEBITS - 1))
 #   define FIX(x)    ((int) ((x) * (1<<SCALEBITS) + 0.5))
-#   define CLAMP( x ) (((x) > 255) ? 255 : ((x) < 0) ? 0 : (x));
 
     int y, cb, cr, r_add, g_add, b_add;
 
@@ -330,9 +294,12 @@ static inline void yuv_to_rgb( int *r, int *g, int *b,
             - FIX(0.71414*255.0/224.0) * cr + ONE_HALF;
     b_add = FIX(1.77200*255.0/224.0) * cb + ONE_HALF;
     y = (y1 - 16) * FIX(255.0/219.0);
-    *r = CLAMP((y + r_add) >> SCALEBITS);
-    *g = CLAMP((y + g_add) >> SCALEBITS);
-    *b = CLAMP((y + b_add) >> SCALEBITS);
+    *r = vlc_uint8( (y + r_add) >> SCALEBITS );
+    *g = vlc_uint8( (y + g_add) >> SCALEBITS );
+    *b = vlc_uint8( (y + b_add) >> SCALEBITS );
+#undef FIX
+#undef ONE_HALF
+#undef SCALEBITS
 }
 
 static inline void rgb_to_yuv( uint8_t *y, uint8_t *u, uint8_t *v,
@@ -343,78 +310,133 @@ static inline void rgb_to_yuv( uint8_t *y, uint8_t *u, uint8_t *v,
     *v =   ( ( 112 * r -  94 * g -  18 * b + 128 ) >> 8 ) + 128 ;
 }
 
+static uint8_t *vlc_plane_start( int *pi_pitch,
+                                 const picture_t *p_picture,
+                                 int i_plane,
+                                 int i_x_offset, int i_y_offset,
+                                 const video_format_t *p_fmt,
+                                 int r )
+{
+    const int i_pitch = p_picture->p[i_plane].i_pitch;
+    uint8_t *p_pixels = p_picture->p[i_plane].p_pixels;
+
+    const int i_dx = ( i_x_offset + p_fmt->i_x_offset ) / r;
+    const int i_dy = ( i_y_offset + p_fmt->i_y_offset ) / r;
+
+    if( pi_pitch )
+        *pi_pitch = i_pitch;
+    return &p_pixels[ i_dy * i_pitch + i_dx ];
+}
+
+static void vlc_yuv_packed_index( int *pi_y, int *pi_u, int *pi_v, vlc_fourcc_t i_chroma )
+{
+    static const struct {
+        vlc_fourcc_t chroma;
+        int y, u ,v;
+    } p_index[] = {
+        { VLC_CODEC_YUYV, 0, 1, 3 },
+        { VLC_CODEC_UYVY, 1, 0, 2 },
+        { VLC_CODEC_YVYU, 0, 3, 1 },
+        { VLC_CODEC_VYUY, 1, 2, 0 },
+        { 0, 0, 0, 0 }
+    };
+    int i;
+
+    for( i = 0; p_index[i].chroma != 0; i++ )
+    {
+        if( p_index[i].chroma == i_chroma )
+            break;
+    }
+    *pi_y = p_index[i].y;
+    *pi_u = p_index[i].u;
+    *pi_v = p_index[i].v;
+}
+
+static void vlc_blend_packed( uint8_t *p_dst,
+                              int i_offset0, int i_offset1, int i_offset2,
+                              int c0, int c1, int c2, int i_alpha,
+                              bool b_do12 )
+{
+    p_dst[i_offset0] = vlc_blend( c0, p_dst[i_offset0], i_alpha );
+    if( b_do12 )
+    {
+        p_dst[i_offset1] = vlc_blend( c1, p_dst[i_offset1], i_alpha );
+        p_dst[i_offset2] = vlc_blend( c2, p_dst[i_offset2], i_alpha );
+    }
+}
+
+static void vlc_blend_rgb16( uint16_t *p_dst,
+                             int R, int G, int B, int i_alpha,
+                             const video_format_t *p_fmt )
+{
+    const int i_pix = *p_dst;
+    const int r = ( i_pix & p_fmt->i_rmask ) >> p_fmt->i_lrshift;
+    const int g = ( i_pix & p_fmt->i_gmask ) >> p_fmt->i_lgshift;
+    const int b = ( i_pix & p_fmt->i_bmask ) >> p_fmt->i_lbshift;
+
+    *p_dst = ( vlc_blend( R >> p_fmt->i_rrshift, r, i_alpha ) << p_fmt->i_lrshift ) |
+             ( vlc_blend( G >> p_fmt->i_rgshift, g, i_alpha ) << p_fmt->i_lgshift ) |
+             ( vlc_blend( B >> p_fmt->i_rbshift, b, i_alpha ) << p_fmt->i_lbshift );
+}
+
+static void vlc_rgb_index( int *pi_rindex, int *pi_gindex, int *pi_bindex,
+                           const video_format_t *p_fmt )
+{
+    if( p_fmt->i_chroma != VLC_CODEC_RGB24 && p_fmt->i_chroma != VLC_CODEC_RGB32 )
+        return;
+
+    /* XXX it will works only if mask are 8 bits aligned */
+#ifdef WORDS_BIGENDIAN
+    const int i_mask_bits = p_fmt->i_chroma == VLC_CODEC_RGB24 ? 24 : 32;
+    *pi_rindex = ( i_mask_bits - p_fmt->i_lrshift ) / 8;
+    *pi_gindex = ( i_mask_bits - p_fmt->i_lgshift ) / 8;
+    *pi_bindex = ( i_mask_bits - p_fmt->i_lbshift ) / 8;
+#else
+    *pi_rindex = p_fmt->i_lrshift / 8;
+    *pi_gindex = p_fmt->i_lgshift / 8;
+    *pi_bindex = p_fmt->i_lbshift / 8;
+#endif
+}
+
 /***********************************************************************
  * YUVA
  ***********************************************************************/
-static void BlendI420( filter_t *p_filter, picture_t *p_dst,
-                       picture_t *p_dst_orig, picture_t *p_src,
-                       int i_x_offset, int i_y_offset,
-                       int i_width, int i_height, int i_alpha )
+static void BlendYUVAI420( filter_t *p_filter,
+                           picture_t *p_dst, const picture_t *p_src,
+                           int i_x_offset, int i_y_offset,
+                           int i_width, int i_height, int i_alpha )
 {
-    int i_src1_pitch, i_src2_pitch, i_dst_pitch;
-    uint8_t *p_src1_y, *p_src2_y, *p_dst_y;
-    uint8_t *p_src1_u, *p_src2_u, *p_dst_u;
-    uint8_t *p_src1_v, *p_src2_v, *p_dst_v;
+    int i_src_pitch, i_dst_pitch;
+    uint8_t *p_src_y, *p_dst_y;
+    uint8_t *p_src_u, *p_dst_u;
+    uint8_t *p_src_v, *p_dst_v;
     uint8_t *p_trans;
     int i_x, i_y, i_trans = 0;
     bool b_even_scanline = i_y_offset % 2;
 
-    i_dst_pitch = p_dst->p[Y_PLANE].i_pitch;
-    p_dst_y = p_dst->p[Y_PLANE].p_pixels + i_x_offset +
-              p_filter->fmt_out.video.i_x_offset +
-              p_dst->p[Y_PLANE].i_pitch *
-              ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
-    p_dst_u = p_dst->p[U_PLANE].p_pixels + i_x_offset/2 +
-              p_filter->fmt_out.video.i_x_offset/2 +
-              ( i_y_offset + p_filter->fmt_out.video.i_y_offset ) / 2 *
-              p_dst->p[U_PLANE].i_pitch;
-    p_dst_v = p_dst->p[V_PLANE].p_pixels + i_x_offset/2 +
-              p_filter->fmt_out.video.i_x_offset/2 +
-              ( i_y_offset + p_filter->fmt_out.video.i_y_offset ) / 2 *
-              p_dst->p[V_PLANE].i_pitch;
-
-    i_src1_pitch = p_dst_orig->p[Y_PLANE].i_pitch;
-    p_src1_y = p_dst_orig->p[Y_PLANE].p_pixels + i_x_offset +
-               p_filter->fmt_out.video.i_x_offset +
-               p_dst_orig->p[Y_PLANE].i_pitch *
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
-    p_src1_u = p_dst_orig->p[U_PLANE].p_pixels + i_x_offset/2 +
-               p_filter->fmt_out.video.i_x_offset/2 +
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset ) / 2 *
-               p_dst_orig->p[U_PLANE].i_pitch;
-    p_src1_v = p_dst_orig->p[V_PLANE].p_pixels + i_x_offset/2 +
-               p_filter->fmt_out.video.i_x_offset/2 +
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset ) / 2 *
-               p_dst_orig->p[V_PLANE].i_pitch;
-
-    i_src2_pitch = p_src->p[Y_PLANE].i_pitch;
-    p_src2_y = p_src->p[Y_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset +
-               p_src->p[Y_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset;
-    p_src2_u = p_src->p[U_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset/2 +
-               p_src->p[U_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset/2;
-    p_src2_v = p_src->p[V_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset/2 +
-               p_src->p[V_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset/2;
-
-    p_trans = p_src->p[A_PLANE].p_pixels +
-              p_filter->fmt_in.video.i_x_offset +
-              p_src->p[A_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset;
-
-#define MAX_TRANS 255
-#define TRANS_BITS  8
+    p_dst_y = vlc_plane_start( &i_dst_pitch, p_dst, Y_PLANE,
+                               i_x_offset, i_y_offset, &p_filter->fmt_out.video, 1 );
+    p_dst_u = vlc_plane_start( NULL, p_dst, U_PLANE,
+                               i_x_offset, i_y_offset, &p_filter->fmt_out.video, 2 );
+    p_dst_v = vlc_plane_start( NULL, p_dst, V_PLANE,
+                               i_x_offset, i_y_offset, &p_filter->fmt_out.video, 2 );
+
+    p_src_y = vlc_plane_start( &i_src_pitch, p_src, Y_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 1 );
+    p_src_u = vlc_plane_start( NULL, p_src, U_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 2 );
+    p_src_v = vlc_plane_start( NULL, p_src, V_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 2 );
+    p_trans = vlc_plane_start( NULL, p_src, A_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 1 );
 
     /* Draw until we reach the bottom of the subtitle */
-    for( i_y = 0; i_y < i_height; i_y++, p_trans += i_src2_pitch,
-         p_dst_y += i_dst_pitch, p_src1_y += i_src1_pitch,
-         p_src2_y += i_src2_pitch,
+    for( i_y = 0; i_y < i_height; i_y++, p_trans += i_src_pitch,
+         p_dst_y += i_dst_pitch, p_src_y += i_src_pitch,
          p_dst_u += b_even_scanline ? i_dst_pitch/2 : 0,
-         p_src1_u += b_even_scanline ? i_src1_pitch/2 : 0,
-         p_src2_u += i_src2_pitch,
+         p_src_u += i_src_pitch,
          p_dst_v += b_even_scanline ? i_dst_pitch/2 : 0,
-         p_src1_v += b_even_scanline ? i_src1_pitch/2 : 0,
-         p_src2_v += i_src2_pitch )
+         p_src_v += i_src_pitch )
     {
         b_even_scanline = !b_even_scanline;
 
@@ -422,56 +444,30 @@ static void BlendI420( filter_t *p_filter, picture_t *p_dst,
         for( i_x = 0; i_x < i_width; i_x++ )
         {
             if( p_trans )
-                i_trans = ( p_trans[i_x] * i_alpha ) / 255;
-            if( !i_trans )
-            {
-                /* Completely transparent. Don't change pixel */
-                continue;
-            }
-            else if( i_trans == MAX_TRANS )
-            {
-                /* Completely opaque. Completely overwrite underlying pixel */
-                p_dst_y[i_x] = p_src2_y[i_x];
+                i_trans = vlc_alpha( p_trans[i_x], i_alpha );
 
-                if( b_even_scanline && i_x % 2 == 0 )
-                {
-                    p_dst_u[i_x/2] = p_src2_u[i_x];
-                    p_dst_v[i_x/2] = p_src2_v[i_x];
-                }
+            if( !i_trans )
                 continue;
-            }
 
             /* Blending */
-            p_dst_y[i_x] = ( (uint16_t)p_src2_y[i_x] * i_trans +
-                (uint16_t)p_src1_y[i_x] * (MAX_TRANS - i_trans) )
-                >> TRANS_BITS;
-
+            p_dst_y[i_x] = vlc_blend( p_src_y[i_x], p_dst_y[i_x], i_trans );
             if( b_even_scanline && i_x % 2 == 0 )
             {
-                p_dst_u[i_x/2] = ( (uint16_t)p_src2_u[i_x] * i_trans +
-                (uint16_t)p_src1_u[i_x/2] * (MAX_TRANS - i_trans) )
-                >> TRANS_BITS;
-                p_dst_v[i_x/2] = ( (uint16_t)p_src2_v[i_x] * i_trans +
-                (uint16_t)p_src1_v[i_x/2] * (MAX_TRANS - i_trans) )
-                >> TRANS_BITS;
+                p_dst_u[i_x/2] = vlc_blend( p_src_u[i_x], p_dst_u[i_x/2], i_trans );
+                p_dst_v[i_x/2] = vlc_blend( p_src_v[i_x], p_dst_v[i_x/2], i_trans );
             }
         }
     }
-
-#undef MAX_TRANS
-#undef TRANS_BITS
-
-    return;
 }
 
-static void BlendR16( filter_t *p_filter, picture_t *p_dst_pic,
-                      picture_t *p_dst_orig, picture_t *p_src,
-                      int i_x_offset, int i_y_offset,
-                      int i_width, int i_height, int i_alpha )
+static void BlendYUVARV16( filter_t *p_filter,
+                           picture_t *p_dst_pic, const picture_t *p_src,
+                           int i_x_offset, int i_y_offset,
+                           int i_width, int i_height, int i_alpha )
 {
-    int i_src1_pitch, i_src2_pitch, i_dst_pitch;
-    uint8_t *p_dst, *p_src1, *p_src2_y;
-    uint8_t *p_src2_u, *p_src2_v;
+    int i_src_pitch, i_dst_pitch;
+    uint8_t *p_dst, *p_src_y;
+    uint8_t *p_src_u, *p_src_v;
     uint8_t *p_trans;
     int i_x, i_y, i_pix_pitch, i_trans = 0;
     int r, g, b;
@@ -483,79 +479,47 @@ static void BlendR16( filter_t *p_filter, picture_t *p_dst_pic,
             p_dst_pic->p->i_pitch *
             ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
 
-    i_src1_pitch = p_dst_orig->p[Y_PLANE].i_pitch;
-    p_src1 = p_dst_orig->p->p_pixels + i_x_offset * i_pix_pitch +
-               p_filter->fmt_out.video.i_x_offset * i_pix_pitch +
-               p_dst_orig->p->i_pitch *
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
-
-    i_src2_pitch = p_src->p[Y_PLANE].i_pitch;
-    p_src2_y = p_src->p[Y_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset +
-               p_src->p[Y_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset;
-    p_src2_u = p_src->p[U_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset/2 +
-               p_src->p[U_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset/2;
-    p_src2_v = p_src->p[V_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset/2 +
-               p_src->p[V_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset/2;
-
-    p_trans = p_src->p[A_PLANE].p_pixels +
-              p_filter->fmt_in.video.i_x_offset +
-              p_src->p[A_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset;
-
-#define MAX_TRANS 255
-#define TRANS_BITS  8
+    p_src_y = vlc_plane_start( &i_src_pitch, p_src, Y_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 1 );
+    p_src_u = vlc_plane_start( NULL, p_src, U_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 2 );
+    p_src_v = vlc_plane_start( NULL, p_src, V_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 2 );
+    p_trans = vlc_plane_start( NULL, p_src, A_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 1 );
 
     /* Draw until we reach the bottom of the subtitle */
-    for( i_y = 0; i_y < i_height; i_y++, p_trans += i_src2_pitch,
-         p_dst += i_dst_pitch, p_src1 += i_src1_pitch,
-         p_src2_y += i_src2_pitch, p_src2_u += i_src2_pitch,
-         p_src2_v += i_src2_pitch )
+    for( i_y = 0; i_y < i_height; i_y++, p_trans += i_src_pitch,
+         p_dst += i_dst_pitch,
+         p_src_y += i_src_pitch, p_src_u += i_src_pitch,
+         p_src_v += i_src_pitch )
     {
         /* Draw until we reach the end of the line */
         for( i_x = 0; i_x < i_width; i_x++ )
         {
             if( p_trans )
-                i_trans = ( p_trans[i_x] * i_alpha ) / 255;
+                i_trans = vlc_alpha( p_trans[i_x], i_alpha );
             if( !i_trans )
-            {
-                /* Completely transparent. Don't change pixel */
                 continue;
-            }
-            else if( i_trans == MAX_TRANS )
-            {
-                /* Completely opaque. Completely overwrite underlying pixel */
-                yuv_to_rgb( &r, &g, &b,
-                            p_src2_y[i_x], p_src2_u[i_x], p_src2_v[i_x] );
-
-    ((uint16_t *)(&p_dst[i_x * i_pix_pitch]))[0] = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
-                continue;
-            }
 
             /* Blending */
-            /* FIXME: do the blending */
             yuv_to_rgb( &r, &g, &b,
-                        p_src2_y[i_x], p_src2_u[i_x], p_src2_v[i_x] );
+                        p_src_y[i_x], p_src_u[i_x], p_src_v[i_x] );
 
-    ((uint16_t *)(&p_dst[i_x * i_pix_pitch]))[0] = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
+            vlc_blend_rgb16( (uint16_t*)&p_dst[i_x * i_pix_pitch],
+                             r, g, b, i_trans, &p_filter->fmt_out.video );
         }
     }
-
-#undef MAX_TRANS
-#undef TRANS_BITS
-
-    return;
 }
 
-static void BlendR24( filter_t *p_filter, picture_t *p_dst_pic,
-                      picture_t *p_dst_orig, picture_t *p_src,
-                      int i_x_offset, int i_y_offset,
-                      int i_width, int i_height, int i_alpha )
+static void BlendYUVARV24( filter_t *p_filter,
+                           picture_t *p_dst_pic, const picture_t *p_src,
+                           int i_x_offset, int i_y_offset,
+                           int i_width, int i_height, int i_alpha )
 {
-    int i_src1_pitch, i_src2_pitch, i_dst_pitch;
-    uint8_t *p_dst, *p_src1, *p_src2_y;
-    uint8_t *p_src2_u, *p_src2_v;
+    int i_src_pitch, i_dst_pitch;
+    uint8_t *p_dst, *p_src_y;
+    uint8_t *p_src_u, *p_src_v;
     uint8_t *p_trans;
     int i_x, i_y, i_pix_pitch, i_trans = 0;
     int r, g, b;
@@ -567,32 +531,17 @@ static void BlendR24( filter_t *p_filter, picture_t *p_dst_pic,
             p_dst_pic->p->i_pitch *
             ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
 
-    i_src1_pitch = p_dst_orig->p->i_pitch;
-    p_src1 = p_dst_orig->p->p_pixels + i_x_offset * i_pix_pitch +
-               p_filter->fmt_out.video.i_x_offset * i_pix_pitch +
-               p_dst_orig->p->i_pitch *
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
-
-    i_src2_pitch = p_src->p[Y_PLANE].i_pitch;
-    p_src2_y = p_src->p[Y_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset +
-               p_src->p[Y_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset;
-    p_src2_u = p_src->p[U_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset/2 +
-               p_src->p[U_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset/2;
-    p_src2_v = p_src->p[V_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset/2 +
-               p_src->p[V_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset/2;
-
-    p_trans = p_src->p[A_PLANE].p_pixels +
-              p_filter->fmt_in.video.i_x_offset +
-              p_src->p[A_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset;
-
-#define MAX_TRANS 255
-#define TRANS_BITS  8
+    p_src_y = vlc_plane_start( &i_src_pitch, p_src, Y_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 1 );
+    p_src_u = vlc_plane_start( NULL, p_src, U_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 2 );
+    p_src_v = vlc_plane_start( NULL, p_src, V_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 2 );
+    p_trans = vlc_plane_start( NULL, p_src, A_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 1 );
 
     if( (i_pix_pitch == 4)
-     && (((((intptr_t)p_dst)|((intptr_t)p_src1)|i_dst_pitch|i_src1_pitch)
+     && (((((intptr_t)p_dst)|i_dst_pitch) /* FIXME? */
           & 3) == 0) )
     {
         /*
@@ -601,226 +550,107 @@ static void BlendR24( filter_t *p_filter, picture_t *p_dst_pic,
         */
         uint32_t *p32_dst = (uint32_t *)p_dst;
         uint32_t i32_dst_pitch = (uint32_t)(i_dst_pitch>>2);
-        uint32_t *p32_src1 = (uint32_t *)p_src1;
-        uint32_t i32_src1_pitch = (uint32_t)(i_src1_pitch>>2);
 
         int i_rshift, i_gshift, i_bshift;
         uint32_t i_rmask, i_gmask, i_bmask;
 
-        if( p_dst_pic->p_heap )
-        {
-            i_rmask = p_dst_pic->p_heap->i_rmask;
-            i_gmask = p_dst_pic->p_heap->i_gmask;
-            i_bmask = p_dst_pic->p_heap->i_bmask;
-            i_rshift = p_dst_pic->p_heap->i_lrshift;
-            i_gshift = p_dst_pic->p_heap->i_lgshift;
-            i_bshift = p_dst_pic->p_heap->i_lbshift;
-        }
-        else
-        {
-            i_rmask = p_dst_pic->format.i_rmask;
-            i_gmask = p_dst_pic->format.i_gmask;
-            i_bmask = p_dst_pic->format.i_bmask;
+        i_rmask = p_filter->fmt_out.video.i_rmask;
+        i_gmask = p_filter->fmt_out.video.i_gmask;
+        i_bmask = p_filter->fmt_out.video.i_bmask;
+        i_rshift = p_filter->fmt_out.video.i_lrshift;
+        i_gshift = p_filter->fmt_out.video.i_lgshift;
+        i_bshift = p_filter->fmt_out.video.i_lbshift;
 
-            if( (i_rmask == 0x00FF0000)
-             && (i_gmask == 0x0000FF00)
-             && (i_bmask == 0x000000FF) )
-            {
-                /* X8R8G8B8 pixel layout */
-                i_rshift = 16;
-                i_bshift = 8;
-                i_gshift = 0;
-            }
-            else if( (i_rmask == 0xFF000000)
-                  && (i_gmask == 0x00FF0000)
-                  && (i_bmask == 0x0000FF00) )
-            {
-                /* R8G8B8X8 pixel layout */
-                i_rshift = 24;
-                i_bshift = 16;
-                i_gshift = 8;
-            }
-            else
-            {
-                goto slower;
-            }
-        }
         /* Draw until we reach the bottom of the subtitle */
-        for( i_y = 0; i_y < i_height; i_y++, p_trans += i_src2_pitch,
-             p32_dst += i32_dst_pitch, p32_src1 += i32_src1_pitch,
-             p_src2_y += i_src2_pitch, p_src2_u += i_src2_pitch,
-             p_src2_v += i_src2_pitch )
+        for( i_y = 0; i_y < i_height; i_y++, p_trans += i_src_pitch,
+             p32_dst += i32_dst_pitch,
+             p_src_y += i_src_pitch, p_src_u += i_src_pitch,
+             p_src_v += i_src_pitch )
         {
             /* Draw until we reach the end of the line */
             for( i_x = 0; i_x < i_width; i_x++ )
             {
                 if( p_trans )
-                    i_trans = ( p_trans[i_x] * i_alpha ) / 255;
+                    i_trans = vlc_alpha( p_trans[i_x], i_alpha );
                 if( !i_trans )
-                {
-                    /* Completely transparent. Don't change pixel */
                     continue;
-                }
-                else if( i_trans == MAX_TRANS )
+
+                if( i_trans == MAX_TRANS )
                 {
                     /* Completely opaque. Completely overwrite underlying pixel */
                     yuv_to_rgb( &r, &g, &b,
-                                p_src2_y[i_x], p_src2_u[i_x], p_src2_v[i_x] );
+                                p_src_y[i_x], p_src_u[i_x], p_src_v[i_x] );
 
-                    p32_dst[i_x] = (r<<i_rshift)
-                                 + (g<<i_gshift)
-                                 + (b<<i_bshift);
+                    p32_dst[i_x] = (r<<i_rshift) |
+                                   (g<<i_gshift) |
+                                   (b<<i_bshift);
                 }
                 else
                 {
                     /* Blending */
-                    uint32_t i_pix_src1 = p32_src1[i_x];
+                    uint32_t i_pix_dst = p32_dst[i_x];
                     yuv_to_rgb( &r, &g, &b,
-                                p_src2_y[i_x], p_src2_u[i_x], p_src2_v[i_x] );
-
-                    p32_dst[i_x] = ( ( r * i_trans +
-                                     (uint16_t)((i_pix_src1 & i_rmask)>>i_rshift) *
-                                     (MAX_TRANS - i_trans) ) >> TRANS_BITS) << i_rshift
-                                 | ( ( g * i_trans +
-                                     (uint16_t)((i_pix_src1 & i_gmask)>>i_gshift) *
-                                     (MAX_TRANS - i_trans) ) >> TRANS_BITS) << i_gshift
-                                 | ( ( b * i_trans +
-                                     (uint16_t)((i_pix_src1 & i_bmask)>>i_bshift) *
-                                     (MAX_TRANS - i_trans) ) >> TRANS_BITS) << i_bshift;
+                                p_src_y[i_x], p_src_u[i_x], p_src_v[i_x] );
+
+                    p32_dst[i_x] = ( vlc_blend( r, (i_pix_dst & i_rmask)>>i_rshift, i_trans ) << i_rshift ) |
+                                   ( vlc_blend( g, (i_pix_dst & i_gmask)>>i_gshift, i_trans ) << i_gshift ) |
+                                   ( vlc_blend( b, (i_pix_dst & i_bmask)>>i_bshift, i_trans ) << i_bshift );
                 }
             }
         }
     }
     else
     {
-        int i_rindex, i_bindex, i_gindex;
+        int i_rindex, i_gindex, i_bindex;
         uint32_t i_rmask, i_gmask, i_bmask;
 
-        slower:
-
-        i_rmask = p_dst_pic->format.i_rmask;
-        i_gmask = p_dst_pic->format.i_gmask;
-        i_bmask = p_dst_pic->format.i_bmask;
+        i_rmask = p_filter->fmt_out.video.i_rmask;
+        i_gmask = p_filter->fmt_out.video.i_gmask;
+        i_bmask = p_filter->fmt_out.video.i_bmask;
 
-        /*
-        ** quick and dirty way to get byte index from mask
-        ** will only work correctly if mask are 8 bit aligned
-        ** and are 8 bit long
-        */
-#ifdef WORDS_BIGENDIAN
-        i_rindex = ((i_rmask>>16) & 1)
-                 | ((i_rmask>>8) & 2)
-                 | ((i_rmask) & 3);
-        i_gindex = ((i_gmask>>16) & 1)
-                 | ((i_gmask>>8) & 2)
-                 | ((i_gmask) & 3);
-        i_bindex = ((i_bmask>>16) & 1)
-                 | ((i_bmask>>8) & 2)
-                 | ((i_bmask) & 3);
-#else
-        i_rindex = ((i_rmask>>24) & 3)
-                 | ((i_rmask>>16) & 2)
-                 | ((i_rmask>>8) & 1);
-        i_gindex = ((i_gmask>>24) & 3)
-                 | ((i_gmask>>16) & 2)
-                 | ((i_gmask>>8) & 1);
-        i_bindex = ((i_bmask>>24) & 3)
-                 | ((i_bmask>>16) & 2)
-                 | ((i_bmask>>8) & 1);
-#endif
+        vlc_rgb_index( &i_rindex, &i_gindex, &i_bindex, &p_filter->fmt_out.video );
 
         /* Draw until we reach the bottom of the subtitle */
-        for( i_y = 0; i_y < i_height; i_y++, p_trans += i_src2_pitch,
-             p_dst += i_dst_pitch, p_src1 += i_src1_pitch,
-             p_src2_y += i_src2_pitch, p_src2_u += i_src2_pitch,
-             p_src2_v += i_src2_pitch )
+        for( i_y = 0; i_y < i_height; i_y++, p_trans += i_src_pitch,
+             p_dst += i_dst_pitch,
+             p_src_y += i_src_pitch, p_src_u += i_src_pitch,
+             p_src_v += i_src_pitch )
         {
             /* Draw until we reach the end of the line */
             for( i_x = 0; i_x < i_width; i_x++ )
             {
                 if( p_trans )
-                    i_trans = ( p_trans[i_x] * i_alpha ) / 255;
+                    i_trans = vlc_alpha( p_trans[i_x], i_alpha );
                 if( !i_trans )
-                {
-                    /* Completely transparent. Don't change pixel */
                     continue;
-                }
-                else
-                {
-                    int i_pos = i_x * i_pix_pitch;
-                    if( i_trans == MAX_TRANS )
-                    {
-
-                        /* Completely opaque. Completely overwrite underlying pixel */
-                        yuv_to_rgb( &r, &g, &b,
-                                    p_src2_y[i_x], p_src2_u[i_x], p_src2_v[i_x] );
-
-                        p_dst[i_pos + i_rindex ] = r;
-                        p_dst[i_pos + i_gindex ] = g;
-                        p_dst[i_pos + i_bindex ] = b;
-                    }
-                    else
-                    {
-                        int i_rpos = i_pos + i_rindex;
-                        int i_gpos = i_pos + i_gindex;
-                        int i_bpos = i_pos + i_bindex;
-
-                        /* Blending */
-                        yuv_to_rgb( &r, &g, &b,
-                                    p_src2_y[i_x], p_src2_u[i_x], p_src2_v[i_x] );
-
-                        p_dst[i_rpos] = ( r * i_trans +
-                                        (uint16_t)p_src1[i_rpos] *
-                                        (MAX_TRANS - i_trans) ) >> TRANS_BITS;
-                        p_dst[i_gpos] = ( r * i_trans +
-                                        (uint16_t)p_src1[i_gpos] *
-                                        (MAX_TRANS - i_trans) ) >> TRANS_BITS;
-                        p_dst[i_bpos] = ( r * i_trans +
-                                        (uint16_t)p_src1[i_gpos] *
-                                        (MAX_TRANS - i_trans) ) >> TRANS_BITS;
-                    }
-                }
+
+                /* Blending */
+                yuv_to_rgb( &r, &g, &b,
+                            p_src_y[i_x], p_src_u[i_x], p_src_v[i_x] );
+
+                vlc_blend_packed( &p_dst[ i_x * i_pix_pitch],
+                                  i_rindex, i_gindex, i_bindex,
+                                  r, g, b, i_alpha, true );
             }
         }
     }
-
-#undef MAX_TRANS
-#undef TRANS_BITS
-
-    return;
 }
 
-static void BlendYUVPacked( filter_t *p_filter, picture_t *p_dst_pic,
-                            picture_t *p_dst_orig, picture_t *p_src,
-                            int i_x_offset, int i_y_offset,
-                            int i_width, int i_height, int i_alpha )
+static void BlendYUVAYUVPacked( filter_t *p_filter,
+                                picture_t *p_dst_pic, const picture_t *p_src,
+                                int i_x_offset, int i_y_offset,
+                                int i_width, int i_height, int i_alpha )
 {
-    int i_src1_pitch, i_src2_pitch, i_dst_pitch;
-    uint8_t *p_dst, *p_src1, *p_src2_y;
-    uint8_t *p_src2_u, *p_src2_v;
+    int i_src_pitch, i_dst_pitch;
+    uint8_t *p_dst, *p_src_y;
+    uint8_t *p_src_u, *p_src_v;
     uint8_t *p_trans;
     int i_x, i_y, i_pix_pitch, i_trans = 0;
     bool b_even = !((i_x_offset + p_filter->fmt_out.video.i_x_offset)%2);
-    int i_l_offset = 0, i_u_offset = 0, i_v_offset = 0;
+    int i_l_offset, i_u_offset, i_v_offset;
 
-    if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','U','Y','2') )
-    {
-        i_l_offset = 0;
-        i_u_offset = 1;
-        i_v_offset = 3;
-    }
-    else if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('U','Y','V','Y') )
-    {
-        i_l_offset = 1;
-        i_u_offset = 0;
-        i_v_offset = 2;
-    }
-    else if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','V','Y','U') )
-    {
-        i_l_offset = 0;
-        i_u_offset = 3;
-        i_v_offset = 1;
-    }
+    vlc_yuv_packed_index( &i_l_offset, &i_u_offset, &i_v_offset,
+                          p_filter->fmt_out.video.i_chroma );
 
     i_pix_pitch = 2;
     i_dst_pitch = p_dst_pic->p->i_pitch;
@@ -829,117 +659,81 @@ static void BlendYUVPacked( filter_t *p_filter, picture_t *p_dst_pic,
             p_dst_pic->p->i_pitch *
             ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
 
-    i_src1_pitch = p_dst_orig->p[Y_PLANE].i_pitch;
-    p_src1 = p_dst_orig->p->p_pixels + i_x_offset * i_pix_pitch +
-               p_filter->fmt_out.video.i_x_offset * i_pix_pitch +
-               p_dst_orig->p->i_pitch *
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
-
-    i_src2_pitch = p_src->p[Y_PLANE].i_pitch;
-    p_src2_y = p_src->p[Y_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset +
-               p_src->p[Y_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset;
-    p_src2_u = p_src->p[U_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset/2 +
-               p_src->p[U_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset/2;
-    p_src2_v = p_src->p[V_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset/2 +
-               p_src->p[V_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset/2;
-
-    p_trans = p_src->p[A_PLANE].p_pixels +
-              p_filter->fmt_in.video.i_x_offset +
-              p_src->p[A_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset;
-
-    i_width = (i_width >> 1) << 1; /* Needs to be a multiple of 2 */
+    p_src_y = vlc_plane_start( &i_src_pitch, p_src, Y_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 1 );
+    p_src_u = vlc_plane_start( NULL, p_src, U_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 2 );
+    p_src_v = vlc_plane_start( NULL, p_src, V_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 2 );
+    p_trans = vlc_plane_start( NULL, p_src, A_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 1 );
 
-#define MAX_TRANS 255
-#define TRANS_BITS  8
+    i_width &= ~1; /* Needs to be a multiple of 2 */
 
     /* Draw until we reach the bottom of the subtitle */
-    for( i_y = 0; i_y < i_height; i_y++, p_trans += i_src2_pitch,
-         p_dst += i_dst_pitch, p_src1 += i_src1_pitch,
-         p_src2_y += i_src2_pitch, p_src2_u += i_src2_pitch,
-         p_src2_v += i_src2_pitch )
+    for( i_y = 0; i_y < i_height; i_y++, p_trans += i_src_pitch,
+         p_dst += i_dst_pitch,
+         p_src_y += i_src_pitch, p_src_u += i_src_pitch,
+         p_src_v += i_src_pitch )
     {
         /* Draw until we reach the end of the line */
         for( i_x = 0; i_x < i_width; i_x++, b_even = !b_even )
         {
-            i_trans = ( p_trans[i_x] * i_alpha ) / 255;
+            i_trans = vlc_alpha( p_trans[i_x], i_alpha );
             if( !i_trans )
-            {
-                /* Completely transparent. Don't change pixel */
-            }
-            else if( i_trans == MAX_TRANS )
-            {
-                /* Completely opaque. Completely overwrite underlying pixel */
-                p_dst[i_x * 2 + i_l_offset]     = p_src2_y[i_x];
+                continue;
 
-                if( b_even )
+            /* Blending */
+            if( b_even )
+            {
+                int i_u;
+                int i_v;
+                /* FIXME what's with 0xaa ? */
+                if( p_trans[i_x+1] > 0xaa )
+                {
+                    i_u = (p_src_u[i_x]+p_src_u[i_x+1])>>1;
+                    i_v = (p_src_v[i_x]+p_src_v[i_x+1])>>1;
+                }
+                else
                 {
-                    if( p_trans[i_x+1] > 0xaa )
-                    {
-                        p_dst[i_x * 2 + i_u_offset] = (p_src2_u[i_x]+p_src2_u[i_x+1])>>1;
-                        p_dst[i_x * 2 + i_v_offset] = (p_src2_v[i_x]+p_src2_v[i_x+1])>>1;
-                    }
-                    else
-                    {
-                        p_dst[i_x * 2 + i_u_offset] = p_src2_u[i_x];
-                        p_dst[i_x * 2 + i_v_offset] = p_src2_v[i_x];
-                    }
+                    i_u = p_src_u[i_x];
+                    i_v = p_src_v[i_x];
                 }
+
+                vlc_blend_packed( &p_dst[i_x * 2],
+                                  i_l_offset, i_u_offset, i_v_offset,
+                                  p_src_y[i_x], i_u, i_v, i_trans, true );
             }
             else
             {
-                /* Blending */
-                p_dst[i_x * 2 + i_l_offset]     = ( (uint16_t)p_src2_y[i_x] * i_trans +
-                    (uint16_t)p_src1[i_x * 2 + i_l_offset] * (MAX_TRANS - i_trans) )
-                    >> TRANS_BITS;
-
-                if( b_even )
-                {
-                    uint16_t i_u = 0;
-                    uint16_t i_v = 0;
-                    if( p_trans[i_x+1] > 0xaa )
-                    {
-                        i_u = (p_src2_u[i_x]+p_src2_u[i_x+1])>>1;
-                        i_v = (p_src2_v[i_x]+p_src2_v[i_x+1])>>1;
-                    }
-                    else
-                    {
-                        i_u = p_src2_u[i_x];
-                        i_v = p_src2_v[i_x];
-                    }
-                    p_dst[i_x * 2 + i_u_offset] = ( (uint16_t)i_u * i_trans +
-                        (uint16_t)p_src1[i_x * 2 + i_u_offset] * (MAX_TRANS - i_trans) )
-                        >> TRANS_BITS;
-                    p_dst[i_x * 2 + i_v_offset] = ( (uint16_t)i_v * i_trans +
-                        (uint16_t)p_src1[i_x * 2 + i_v_offset] * (MAX_TRANS - i_trans) )
-                        >> TRANS_BITS;
-                }
+                p_dst[i_x * 2 + i_l_offset] = vlc_blend( p_src_y[i_x], p_dst[i_x * 2 + i_l_offset], i_trans );
             }
         }
     }
-
-#undef MAX_TRANS
-#undef TRANS_BITS
-
-    return;
 }
 /***********************************************************************
  * I420, YV12
  ***********************************************************************/
-static void BlendI420I420( filter_t *p_filter, picture_t *p_dst,
-                           picture_t *p_dst_orig, picture_t *p_src,
+static void BlendI420I420( filter_t *p_filter,
+                           picture_t *p_dst, const picture_t *p_src,
                            int i_x_offset, int i_y_offset,
                            int i_width, int i_height, int i_alpha )
 {
-    int i_src1_pitch, i_src2_pitch, i_dst_pitch;
-    uint8_t *p_src1_y, *p_src2_y, *p_dst_y;
-    uint8_t *p_src1_u, *p_src2_u, *p_dst_u;
-    uint8_t *p_src1_v, *p_src2_v, *p_dst_v;
+    int i_src_pitch, i_dst_pitch;
+    uint8_t *p_src_y, *p_dst_y;
+    uint8_t *p_src_u, *p_dst_u;
+    uint8_t *p_src_v, *p_dst_v;
     int i_x, i_y;
     bool b_even_scanline = i_y_offset % 2;
 
+    if( i_alpha == 0xff )
+    {
+        BlendI420I420_no_alpha( p_filter, p_dst, p_src,
+                                i_x_offset, i_y_offset, i_width, i_height );
+        return;
+    }
+
+
     i_dst_pitch = p_dst->p[Y_PLANE].i_pitch;
     p_dst_y = p_dst->p[Y_PLANE].p_pixels + i_x_offset +
               p_filter->fmt_out.video.i_x_offset +
@@ -954,100 +748,56 @@ static void BlendI420I420( filter_t *p_filter, picture_t *p_dst,
               ( i_y_offset + p_filter->fmt_out.video.i_y_offset ) / 2 *
               p_dst->p[V_PLANE].i_pitch;
 
-    i_src1_pitch = p_dst_orig->p[Y_PLANE].i_pitch;
-    p_src1_y = p_dst_orig->p[Y_PLANE].p_pixels + i_x_offset +
-               p_filter->fmt_out.video.i_x_offset +
-               p_dst_orig->p[Y_PLANE].i_pitch *
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
-    p_src1_u = p_dst_orig->p[U_PLANE].p_pixels + i_x_offset/2 +
-               p_filter->fmt_out.video.i_x_offset/2 +
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset ) / 2 *
-               p_dst_orig->p[U_PLANE].i_pitch;
-    p_src1_v = p_dst_orig->p[V_PLANE].p_pixels + i_x_offset/2 +
-               p_filter->fmt_out.video.i_x_offset/2 +
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset ) / 2 *
-               p_dst_orig->p[V_PLANE].i_pitch;
-
-    i_src2_pitch = p_src->p[Y_PLANE].i_pitch;
-    p_src2_y = p_src->p[Y_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset +
-               p_src->p[Y_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset;
-    p_src2_u = p_src->p[U_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset/2 +
-               p_src->p[U_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset/2;
-    p_src2_v = p_src->p[V_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset/2 +
-               p_src->p[V_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset/2;
-
-#define MAX_TRANS 255
-#define TRANS_BITS  8
+    p_src_y = vlc_plane_start( &i_src_pitch, p_src, Y_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 1 );
+    p_src_u = vlc_plane_start( NULL, p_src, U_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 2 );
+    p_src_v = vlc_plane_start( NULL, p_src, V_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 2 );
+    i_width &= ~1;
 
     /* Draw until we reach the bottom of the subtitle */
     for( i_y = 0; i_y < i_height; i_y++,
-         p_dst_y += i_dst_pitch, p_src1_y += i_src1_pitch,
-         p_src2_y += i_src2_pitch )
+         p_dst_y += i_dst_pitch,
+         p_src_y += i_src_pitch )
     {
         if( b_even_scanline )
         {
             p_dst_u  += i_dst_pitch/2;
             p_dst_v  += i_dst_pitch/2;
-            p_src1_u += i_src1_pitch/2;
-            p_src1_v += i_src1_pitch/2;
         }
         b_even_scanline = !b_even_scanline;
 
         /* Draw until we reach the end of the line */
         for( i_x = 0; i_x < i_width; i_x++ )
         {
-            if( i_alpha == MAX_TRANS )
-            {
-                /* Completely opaque. Completely overwrite underlying pixel */
-                p_dst_y[i_x] = p_src2_y[i_x];
-
-                if( b_even_scanline && i_x % 2 == 0 )
-                {
-                    p_dst_u[i_x/2] = p_src2_u[i_x/2];
-                    p_dst_v[i_x/2] = p_src2_v[i_x/2];
-                }
+            if( !i_alpha )
                 continue;
-            }
 
             /* Blending */
-            p_dst_y[i_x] = ( (uint16_t)p_src2_y[i_x] * i_alpha +
-                (uint16_t)p_src1_y[i_x] * (MAX_TRANS - i_alpha) )
-                >> TRANS_BITS;
-
+            p_dst_y[i_x] = vlc_blend( p_src_y[i_x], p_dst_y[i_x], i_alpha );
             if( b_even_scanline && i_x % 2 == 0 )
             {
-                p_dst_u[i_x/2] = ( (uint16_t)p_src2_u[i_x/2] * i_alpha +
-                (uint16_t)p_src1_u[i_x/2] * (MAX_TRANS - i_alpha) )
-                >> TRANS_BITS;
-                p_dst_v[i_x/2] = ( (uint16_t)p_src2_v[i_x/2] * i_alpha +
-                (uint16_t)p_src1_v[i_x/2] * (MAX_TRANS - i_alpha) )
-                >> TRANS_BITS;
+                p_dst_u[i_x/2] = vlc_blend( p_src_u[i_x/2], p_dst_u[i_x/2], i_alpha );
+                p_dst_v[i_x/2] = vlc_blend( p_src_v[i_x/2], p_dst_v[i_x/2], i_alpha );
             }
         }
         if( i_y%2 == 1 )
         {
-            p_src2_u += i_src2_pitch/2;
-            p_src2_v += i_src2_pitch/2;
+            p_src_u += i_src_pitch/2;
+            p_src_v += i_src_pitch/2;
         }
     }
-
-#undef MAX_TRANS
-#undef TRANS_BITS
-
-    return;
 }
-static void BlendI420I420_no_alpha( filter_t *p_filter, picture_t *p_dst,
-                                    picture_t *p_dst_orig, picture_t *p_src,
+static void BlendI420I420_no_alpha( filter_t *p_filter,
+                                    picture_t *p_dst, const picture_t *p_src,
                                     int i_x_offset, int i_y_offset,
                                     int i_width, int i_height )
 {
-    int i_src1_pitch, i_src2_pitch, i_dst_pitch;
-    uint8_t *p_src1_y, *p_src2_y, *p_dst_y;
-    uint8_t *p_src1_u, *p_src2_u, *p_dst_u;
-    uint8_t *p_src1_v, *p_src2_v, *p_dst_v;
+    int i_src_pitch, i_dst_pitch;
+    uint8_t *p_src_y, *p_dst_y;
+    uint8_t *p_src_u, *p_dst_u;
+    uint8_t *p_src_v, *p_dst_v;
     int i_y;
     bool b_even_scanline = i_y_offset % 2;
 
@@ -1065,71 +815,48 @@ static void BlendI420I420_no_alpha( filter_t *p_filter, picture_t *p_dst,
               ( i_y_offset + p_filter->fmt_out.video.i_y_offset ) / 2 *
               p_dst->p[V_PLANE].i_pitch;
 
-    i_src1_pitch = p_dst_orig->p[Y_PLANE].i_pitch;
-    p_src1_y = p_dst_orig->p[Y_PLANE].p_pixels + i_x_offset +
-               p_filter->fmt_out.video.i_x_offset +
-               p_dst_orig->p[Y_PLANE].i_pitch *
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
-    p_src1_u = p_dst_orig->p[U_PLANE].p_pixels + i_x_offset/2 +
-               p_filter->fmt_out.video.i_x_offset/2 +
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset ) / 2 *
-               p_dst_orig->p[U_PLANE].i_pitch;
-    p_src1_v = p_dst_orig->p[V_PLANE].p_pixels + i_x_offset/2 +
-               p_filter->fmt_out.video.i_x_offset/2 +
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset ) / 2 *
-               p_dst_orig->p[V_PLANE].i_pitch;
-
-    i_src2_pitch = p_src->p[Y_PLANE].i_pitch;
-    p_src2_y = p_src->p[Y_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset +
-               p_src->p[Y_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset;
-    p_src2_u = p_src->p[U_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset/2 +
-               p_src->p[U_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset/2;
-    p_src2_v = p_src->p[V_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset/2 +
-               p_src->p[V_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset/2;
+    p_src_y = vlc_plane_start( &i_src_pitch, p_src, Y_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 1 );
+    p_src_u = vlc_plane_start( NULL, p_src, U_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 2 );
+    p_src_v = vlc_plane_start( NULL, p_src, V_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 2 );
 
     i_width &= ~1;
 
     /* Draw until we reach the bottom of the subtitle */
-    for( i_y = 0; i_y < i_height; i_y++,
-         p_dst_y += i_dst_pitch, p_src1_y += i_src1_pitch,
-         p_src2_y += i_src2_pitch )
+    for( i_y = 0; i_y < i_height;
+            i_y++, p_dst_y += i_dst_pitch, p_src_y += i_src_pitch )
     {
         /* Completely opaque. Completely overwrite underlying pixel */
-        vlc_memcpy( p_dst_y, p_src2_y, i_width );
+        vlc_memcpy( p_dst_y, p_src_y, i_width );
         if( b_even_scanline )
         {
             p_dst_u  += i_dst_pitch/2;
             p_dst_v  += i_dst_pitch/2;
-            p_src1_u += i_src1_pitch/2;
-            p_src1_v += i_src1_pitch/2;
         }
         else
         {
-            vlc_memcpy( p_dst_u, p_src2_u, i_width/2 );
-            vlc_memcpy( p_dst_v, p_src2_v, i_width/2 );
+            vlc_memcpy( p_dst_u, p_src_u, i_width/2 );
+            vlc_memcpy( p_dst_v, p_src_v, i_width/2 );
         }
         b_even_scanline = !b_even_scanline;
         if( i_y%2 == 1 )
         {
-            p_src2_u += i_src2_pitch/2;
-            p_src2_v += i_src2_pitch/2;
+            p_src_u += i_src_pitch/2;
+            p_src_v += i_src_pitch/2;
         }
     }
-
-    return;
 }
 
-static void BlendI420R16( filter_t *p_filter, picture_t *p_dst_pic,
-                          picture_t *p_dst_orig, picture_t *p_src,
+static void BlendI420R16( filter_t *p_filter,
+                          picture_t *p_dst_pic, const picture_t *p_src,
                           int i_x_offset, int i_y_offset,
                           int i_width, int i_height, int i_alpha )
 {
-    int i_src1_pitch, i_src2_pitch, i_dst_pitch;
-    uint8_t *p_dst, *p_src1, *p_src2_y;
-    uint8_t *p_src2_u, *p_src2_v;
+    int i_src_pitch, i_dst_pitch;
+    uint8_t *p_dst, *p_src_y;
+    uint8_t *p_src_u, *p_src_v;
     int i_x, i_y, i_pix_pitch;
     int r, g, b;
 
@@ -1140,73 +867,46 @@ static void BlendI420R16( filter_t *p_filter, picture_t *p_dst_pic,
             p_dst_pic->p->i_pitch *
             ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
 
-    i_src1_pitch = p_dst_orig->p[Y_PLANE].i_pitch;
-    p_src1 = p_dst_orig->p->p_pixels + i_x_offset * i_pix_pitch +
-               p_filter->fmt_out.video.i_x_offset * i_pix_pitch +
-               p_dst_orig->p->i_pitch *
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
-
-    i_src2_pitch = p_src->p[Y_PLANE].i_pitch;
-    p_src2_y = p_src->p[Y_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset +
-               p_src->p[Y_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset;
-    p_src2_u = p_src->p[U_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset/2 +
-               p_src->p[U_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset/2;
-    p_src2_v = p_src->p[V_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset/2 +
-               p_src->p[V_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset/2;
-
-#define MAX_TRANS 255
-#define TRANS_BITS  8
+    p_src_y = vlc_plane_start( &i_src_pitch, p_src, Y_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 1 );
+    p_src_u = vlc_plane_start( NULL, p_src, U_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 2 );
+    p_src_v = vlc_plane_start( NULL, p_src, V_PLANE,
+                                0, 0, &p_filter->fmt_in.video, 2 );
 
     /* Draw until we reach the bottom of the subtitle */
     for( i_y = 0; i_y < i_height; i_y++,
-         p_dst += i_dst_pitch, p_src1 += i_src1_pitch,
-         p_src2_y += i_src2_pitch )
+         p_dst += i_dst_pitch,
+         p_src_y += i_src_pitch )
     {
         /* Draw until we reach the end of the line */
         for( i_x = 0; i_x < i_width; i_x++ )
         {
-            if( i_alpha == MAX_TRANS )
-            {
-                /* Completely opaque. Completely overwrite underlying pixel */
-                yuv_to_rgb( &r, &g, &b,
-                            p_src2_y[i_x], p_src2_u[i_x/2], p_src2_v[i_x/2] );
-
-    ((uint16_t *)(&p_dst[i_x * i_pix_pitch]))[0] = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
-                continue;
-            }
-
             /* Blending */
-            /* FIXME: do the blending */
             yuv_to_rgb( &r, &g, &b,
-                        p_src2_y[i_x], p_src2_u[i_x/2], p_src2_v[i_x/2] );
+                        p_src_y[i_x], p_src_u[i_x/2], p_src_v[i_x/2] );
 
-    ((uint16_t *)(&p_dst[i_x * i_pix_pitch]))[0] = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
+            vlc_blend_rgb16( (uint16_t*)&p_dst[i_x * i_pix_pitch],
+                             r, g, b, i_alpha, &p_filter->fmt_out.video );
         }
         if( i_y%2 == 1 )
         {
-            p_src2_u += i_src2_pitch/2;
-            p_src2_v += i_src2_pitch/2;
+            p_src_u += i_src_pitch/2;
+            p_src_v += i_src_pitch/2;
         }
     }
-
-#undef MAX_TRANS
-#undef TRANS_BITS
-
-    return;
 }
 
-static void BlendI420R24( filter_t *p_filter, picture_t *p_dst_pic,
-                          picture_t *p_dst_orig, picture_t *p_src,
+static void BlendI420R24( filter_t *p_filter,
+                          picture_t *p_dst_pic, const picture_t *p_src,
                           int i_x_offset, int i_y_offset,
                           int i_width, int i_height, int i_alpha )
 {
-    int i_src1_pitch, i_src2_pitch, i_dst_pitch;
-    uint8_t *p_dst, *p_src1, *p_src2_y;
-    uint8_t *p_src2_u, *p_src2_v;
+    int i_src_pitch, i_dst_pitch;
+    uint8_t *p_dst, *p_src_y;
+    uint8_t *p_src_u, *p_src_v;
     int i_x, i_y, i_pix_pitch;
+    int i_rindex, i_gindex, i_bindex;
     int r, g, b;
 
     i_pix_pitch = p_dst_pic->p->i_pixel_pitch;
@@ -1216,104 +916,56 @@ static void BlendI420R24( filter_t *p_filter, picture_t *p_dst_pic,
             p_dst_pic->p->i_pitch *
             ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
 
-    i_src1_pitch = p_dst_orig->p[Y_PLANE].i_pitch;
-    p_src1 = p_dst_orig->p->p_pixels + i_x_offset * i_pix_pitch +
-               p_filter->fmt_out.video.i_x_offset * i_pix_pitch +
-               p_dst_orig->p->i_pitch *
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
-
-    i_src2_pitch = p_src->p[Y_PLANE].i_pitch;
-    p_src2_y = p_src->p[Y_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset +
-               p_src->p[Y_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset;
-    p_src2_u = p_src->p[U_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset/2 +
-               p_src->p[U_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset/2;
-    p_src2_v = p_src->p[V_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset/2 +
-               p_src->p[V_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset/2;
+    p_src_y = vlc_plane_start( &i_src_pitch, p_src, Y_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 1 );
+    p_src_u = vlc_plane_start( NULL, p_src, U_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 2 );
+    p_src_v = vlc_plane_start( NULL, p_src, V_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 2 );
 
-#define MAX_TRANS 255
-#define TRANS_BITS  8
+    vlc_rgb_index( &i_rindex, &i_gindex, &i_bindex, &p_filter->fmt_out.video );
 
     /* Draw until we reach the bottom of the subtitle */
     for( i_y = 0; i_y < i_height; i_y++,
-         p_dst += i_dst_pitch, p_src1 += i_src1_pitch,
-         p_src2_y += i_src2_pitch, p_src2_u += i_src2_pitch,
-         p_src2_v += i_src2_pitch )
+         p_dst += i_dst_pitch,
+         p_src_y += i_src_pitch, p_src_u += i_src_pitch,
+         p_src_v += i_src_pitch )
     {
         /* Draw until we reach the end of the line */
         for( i_x = 0; i_x < i_width; i_x++ )
         {
-            if( i_alpha == MAX_TRANS )
-            {
-                /* Completely opaque. Completely overwrite underlying pixel */
-                yuv_to_rgb( &r, &g, &b,
-                            p_src2_y[i_x], p_src2_u[i_x/2], p_src2_v[i_x/2] );
-
-                p_dst[i_x * i_pix_pitch]     = r;
-                p_dst[i_x * i_pix_pitch + 1] = g;
-                p_dst[i_x * i_pix_pitch + 2] = b;
+            if( !i_alpha )
                 continue;
-            }
 
             /* Blending */
             yuv_to_rgb( &r, &g, &b,
-                        p_src2_y[i_x], p_src2_u[i_x/2], p_src2_v[i_x/2] );
-
-            p_dst[i_x * i_pix_pitch]     = ( r * i_alpha +
-                (uint16_t)p_src1[i_x * i_pix_pitch] *
-                (MAX_TRANS - i_alpha) ) >> TRANS_BITS;
-            p_dst[i_x * i_pix_pitch + 1] = ( g * i_alpha +
-                (uint16_t)p_src1[i_x * i_pix_pitch + 1] *
-                (MAX_TRANS - i_alpha) ) >> TRANS_BITS;
-            p_dst[i_x * i_pix_pitch + 2] = ( b * i_alpha +
-                (uint16_t)p_src1[i_x * i_pix_pitch + 2] *
-                (MAX_TRANS - i_alpha) ) >> TRANS_BITS;
+                        p_src_y[i_x], p_src_u[i_x/2], p_src_v[i_x/2] );
+
+            vlc_blend_packed( &p_dst[i_x * i_pix_pitch],
+                              i_rindex, i_gindex, i_bindex, r, g, b, i_alpha, true );
         }
         if( i_y%2 == 1 )
         {
-            p_src2_u += i_src2_pitch/2;
-            p_src2_v += i_src2_pitch/2;
+            p_src_u += i_src_pitch/2;
+            p_src_v += i_src_pitch/2;
         }
     }
-
-#undef MAX_TRANS
-#undef TRANS_BITS
-
-    return;
 }
 
-static void BlendI420YUVPacked( filter_t *p_filter, picture_t *p_dst_pic,
-                                picture_t *p_dst_orig, picture_t *p_src,
+static void BlendI420YUVPacked( filter_t *p_filter,
+                                picture_t *p_dst_pic, const picture_t *p_src,
                                 int i_x_offset, int i_y_offset,
                                 int i_width, int i_height, int i_alpha )
 {
-    int i_src1_pitch, i_src2_pitch, i_dst_pitch;
-    uint8_t *p_dst, *p_src1, *p_src2_y;
-    uint8_t *p_src2_u, *p_src2_v;
+    int i_src_pitch, i_dst_pitch;
+    uint8_t *p_dst, *p_src_y;
+    uint8_t *p_src_u, *p_src_v;
     int i_x, i_y, i_pix_pitch;
     bool b_even = !((i_x_offset + p_filter->fmt_out.video.i_x_offset)%2);
-    int i_l_offset = 0, i_u_offset = 0, i_v_offset = 0;
+    int i_l_offset, i_u_offset, i_v_offset;
 
-    if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','U','Y','2') )
-    {
-        i_l_offset = 0;
-        i_u_offset = 1;
-        i_v_offset = 3;
-    }
-    else if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('U','Y','V','Y') )
-    {
-        i_l_offset = 1;
-        i_u_offset = 0;
-        i_v_offset = 2;
-    }
-    else if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','V','Y','U') )
-    {
-        i_l_offset = 0;
-        i_u_offset = 3;
-        i_v_offset = 1;
-    }
+    vlc_yuv_packed_index( &i_l_offset, &i_u_offset, &i_v_offset,
+                          p_filter->fmt_out.video.i_chroma );
 
     i_pix_pitch = 2;
     i_dst_pitch = p_dst_pic->p->i_pitch;
@@ -1322,93 +974,52 @@ static void BlendI420YUVPacked( filter_t *p_filter, picture_t *p_dst_pic,
             p_dst_pic->p->i_pitch *
             ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
 
-    i_src1_pitch = p_dst_orig->p[Y_PLANE].i_pitch;
-    p_src1 = p_dst_orig->p->p_pixels + i_x_offset * i_pix_pitch +
-               p_filter->fmt_out.video.i_x_offset * i_pix_pitch +
-               p_dst_orig->p->i_pitch *
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
-
-    i_src2_pitch = p_src->p[Y_PLANE].i_pitch;
-    p_src2_y = p_src->p[Y_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset +
-               p_src->p[Y_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset;
-    p_src2_u = p_src->p[U_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset/2 +
-               p_src->p[U_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset/2;
-    p_src2_v = p_src->p[V_PLANE].p_pixels +
-               p_filter->fmt_in.video.i_x_offset/2 +
-               p_src->p[V_PLANE].i_pitch * p_filter->fmt_in.video.i_y_offset/2;
+    p_src_y = vlc_plane_start( &i_src_pitch, p_src, Y_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 1 );
+    p_src_u = vlc_plane_start( NULL, p_src, U_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 2 );
+    p_src_v = vlc_plane_start( NULL, p_src, V_PLANE,
+                               0, 0, &p_filter->fmt_in.video, 2 );
 
     i_width &= ~1; /* Needs to be a multiple of 2 */
 
-#define MAX_TRANS 255
-#define TRANS_BITS  8
-
     /* Draw until we reach the bottom of the subtitle */
     for( i_y = 0; i_y < i_height; i_y++,
-         p_dst += i_dst_pitch, p_src1 += i_src1_pitch,
-         p_src2_y += i_src2_pitch, p_src2_u += i_src2_pitch,
-         p_src2_v += i_src2_pitch )
+         p_dst += i_dst_pitch,
+         p_src_y += i_src_pitch, p_src_u += i_src_pitch,
+         p_src_v += i_src_pitch )
     {
         /* Draw until we reach the end of the line */
         for( i_x = 0; i_x < i_width; i_x++, b_even = !b_even )
         {
-            if( i_alpha == MAX_TRANS )
-            {
-                /* Completely opaque. Completely overwrite underlying pixel */
-                p_dst[i_x * 2 + i_l_offset]     = p_src2_y[i_x];
-
-                if( b_even )
-                {
-                    p_dst[i_x * 2 + i_u_offset] = p_src2_u[i_x/2];
-                    p_dst[i_x * 2 + i_v_offset] = p_src2_v[i_x/2];
-                }
-            }
-            else
-            {
-                /* Blending */
-                p_dst[i_x * 2 + i_l_offset]     = ( (uint16_t)p_src2_y[i_x] * i_alpha +
-                    (uint16_t)p_src1[i_x * 2 + i_l_offset] * (MAX_TRANS - i_alpha) )
-                    >> TRANS_BITS;
+            if( !i_alpha )
+                continue;
 
-                if( b_even )
-                {
-                    uint16_t i_u = p_src2_u[i_x/2];
-                    uint16_t i_v = p_src2_v[i_x/2];
-                    p_dst[i_x * 2 + i_u_offset] = ( (uint16_t)i_u * i_alpha +
-                        (uint16_t)p_src1[i_x * 2 + i_u_offset] * (MAX_TRANS - i_alpha) )
-                        >> TRANS_BITS;
-                    p_dst[i_x * 2 + i_v_offset] = ( (uint16_t)i_v * i_alpha +
-                        (uint16_t)p_src1[i_x * 2 + i_v_offset] * (MAX_TRANS - i_alpha) )
-                        >> TRANS_BITS;
-                }
-            }
+            /* Blending */
+            vlc_blend_packed( &p_dst[i_x * 2],
+                              i_l_offset, i_u_offset, i_v_offset,
+                              p_src_y[i_x], p_src_u[i_x/2], p_src_v[i_x/2], i_alpha, b_even );
         }
         if( i_y%2 == 1 )
         {
-            p_src2_u += i_src2_pitch/2;
-            p_src2_v += i_src2_pitch/2;
+            p_src_u += i_src_pitch/2;
+            p_src_v += i_src_pitch/2;
         }
     }
-
-#undef MAX_TRANS
-#undef TRANS_BITS
-
-    return;
 }
 
 /***********************************************************************
  * YUVP
  ***********************************************************************/
-static void BlendPalI420( filter_t *p_filter, picture_t *p_dst,
-                          picture_t *p_dst_orig, picture_t *p_src,
+static void BlendPalI420( filter_t *p_filter,
+                          picture_t *p_dst, const picture_t *p_src_pic,
                           int i_x_offset, int i_y_offset,
                           int i_width, int i_height, int i_alpha )
 {
-    int i_src1_pitch, i_src2_pitch, i_dst_pitch;
-    uint8_t *p_src1_y, *p_src2, *p_dst_y;
-    uint8_t *p_src1_u, *p_dst_u;
-    uint8_t *p_src1_v, *p_dst_v;
+    int i_src_pitch, i_dst_pitch;
+    uint8_t *p_src, *p_dst_y;
+    uint8_t *p_dst_u;
+    uint8_t *p_dst_v;
     int i_x, i_y, i_trans;
     bool b_even_scanline = i_y_offset % 2;
 
@@ -1426,116 +1037,54 @@ static void BlendPalI420( filter_t *p_filter, picture_t *p_dst,
               ( i_y_offset + p_filter->fmt_out.video.i_y_offset ) / 2 *
               p_dst->p[V_PLANE].i_pitch;
 
-    i_src1_pitch = p_dst_orig->p[Y_PLANE].i_pitch;
-    p_src1_y = p_dst_orig->p[Y_PLANE].p_pixels + i_x_offset +
-               p_filter->fmt_out.video.i_x_offset +
-               p_dst_orig->p[Y_PLANE].i_pitch *
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
-    p_src1_u = p_dst_orig->p[U_PLANE].p_pixels + i_x_offset/2 +
-               p_filter->fmt_out.video.i_x_offset/2 +
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset ) / 2 *
-               p_dst_orig->p[U_PLANE].i_pitch;
-    p_src1_v = p_dst_orig->p[V_PLANE].p_pixels + i_x_offset/2 +
-               p_filter->fmt_out.video.i_x_offset/2 +
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset ) / 2 *
-               p_dst_orig->p[V_PLANE].i_pitch;
-
-    i_src2_pitch = p_src->p->i_pitch;
-    p_src2 = p_src->p->p_pixels + p_filter->fmt_in.video.i_x_offset +
-             i_src2_pitch * p_filter->fmt_in.video.i_y_offset;
+    i_src_pitch = p_src_pic->p->i_pitch;
+    p_src = p_src_pic->p->p_pixels + p_filter->fmt_in.video.i_x_offset +
+            i_src_pitch * p_filter->fmt_in.video.i_y_offset;
 
-#define MAX_TRANS 255
-#define TRANS_BITS  8
-#define p_trans p_src2
 #define p_pal p_filter->fmt_in.video.p_palette->palette
 
     /* Draw until we reach the bottom of the subtitle */
     for( i_y = 0; i_y < i_height; i_y++,
-         p_dst_y += i_dst_pitch, p_src1_y += i_src1_pitch,
-         p_src2 += i_src2_pitch,
+         p_dst_y += i_dst_pitch,
+         p_src += i_src_pitch,
          p_dst_u += b_even_scanline ? i_dst_pitch/2 : 0,
-         p_src1_u += b_even_scanline ? i_src1_pitch/2 : 0,
-         p_dst_v += b_even_scanline ? i_dst_pitch/2 : 0,
-         p_src1_v += b_even_scanline ? i_src1_pitch/2 : 0 )
+         p_dst_v += b_even_scanline ? i_dst_pitch/2 : 0 )
     {
+        const uint8_t *p_trans = p_src;
         b_even_scanline = !b_even_scanline;
 
         /* Draw until we reach the end of the line */
         for( i_x = 0; i_x < i_width; i_x++ )
         {
-            i_trans = ( p_pal[p_trans[i_x]][3] * i_alpha ) / 255;
+            i_trans = vlc_alpha( p_pal[p_trans[i_x]][3], i_alpha );
             if( !i_trans )
-            {
-                /* Completely transparent. Don't change pixel */
                 continue;
-            }
-            else if( i_trans == MAX_TRANS )
-            {
-                /* Completely opaque. Completely overwrite underlying pixel */
-                p_dst_y[i_x] = p_pal[p_src2[i_x]][0];
-
-                if( b_even_scanline && ((i_x % 2) == 0) )
-                {
-                    p_dst_u[i_x/2] = p_pal[p_src2[i_x]][1];
-                    p_dst_v[i_x/2] = p_pal[p_src2[i_x]][2];
-                }
-                continue;
-            }
 
             /* Blending */
-            p_dst_y[i_x] = ( (uint16_t)p_pal[p_src2[i_x]][0] * i_trans +
-                (uint16_t)p_src1_y[i_x] * (MAX_TRANS - i_trans) )
-                >> TRANS_BITS;
-
+            p_dst_y[i_x] = vlc_blend( p_pal[p_src[i_x]][0], p_dst_y[i_x], i_trans );
             if( b_even_scanline && ((i_x % 2) == 0) )
             {
-                p_dst_u[i_x/2] = ( (uint16_t)p_pal[p_src2[i_x]][1] * i_trans +
-                    (uint16_t)p_src1_u[i_x/2] * (MAX_TRANS - i_trans) )
-                    >> TRANS_BITS;
-                p_dst_v[i_x/2] = ( (uint16_t)p_pal[p_src2[i_x]][2] * i_trans +
-                    (uint16_t)p_src1_v[i_x/2] * (MAX_TRANS - i_trans) )
-                    >> TRANS_BITS;
+                p_dst_u[i_x/2] = vlc_blend( p_pal[p_src[i_x]][1], p_dst_u[i_x/2], i_trans );
+                p_dst_v[i_x/2] = vlc_blend( p_pal[p_src[i_x]][2], p_dst_v[i_x/2], i_trans );
             }
         }
     }
-
-#undef MAX_TRANS
-#undef TRANS_BITS
-#undef p_trans
 #undef p_pal
-
-    return;
 }
 
-static void BlendPalYUVPacked( filter_t *p_filter, picture_t *p_dst_pic,
-                               picture_t *p_dst_orig, picture_t *p_src,
+static void BlendPalYUVPacked( filter_t *p_filter,
+                               picture_t *p_dst_pic, const picture_t *p_src_pic,
                                int i_x_offset, int i_y_offset,
                                int i_width, int i_height, int i_alpha )
 {
-    int i_src1_pitch, i_src2_pitch, i_dst_pitch;
-    uint8_t *p_src1, *p_src2, *p_dst;
+    int i_src_pitch, i_dst_pitch;
+    uint8_t *p_src, *p_dst;
     int i_x, i_y, i_pix_pitch, i_trans;
     bool b_even = !((i_x_offset + p_filter->fmt_out.video.i_x_offset)%2);
-    int i_l_offset = 0, i_u_offset = 0, i_v_offset = 0;
+    int i_l_offset, i_u_offset, i_v_offset;
 
-    if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','U','Y','2') )
-    {
-        i_l_offset = 0;
-        i_u_offset = 1;
-        i_v_offset = 3;
-    }
-    else if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('U','Y','V','Y') )
-    {
-        i_l_offset = 1;
-        i_u_offset = 0;
-        i_v_offset = 2;
-    }
-    else if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','V','Y','U') )
-    {
-        i_l_offset = 0;
-        i_u_offset = 3;
-        i_v_offset = 1;
-    }
+    vlc_yuv_packed_index( &i_l_offset, &i_u_offset, &i_v_offset,
+                          p_filter->fmt_out.video.i_chroma );
 
     i_pix_pitch = 2;
     i_dst_pitch = p_dst_pic->p->i_pitch;
@@ -1543,104 +1092,65 @@ static void BlendPalYUVPacked( filter_t *p_filter, picture_t *p_dst_pic,
             p_filter->fmt_out.video.i_x_offset) + p_dst_pic->p->i_pitch *
             ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
 
-    i_src1_pitch = p_dst_orig->p->i_pitch;
-    p_src1 = p_dst_orig->p->p_pixels + i_pix_pitch * (i_x_offset +
-             p_filter->fmt_out.video.i_x_offset) + p_dst_orig->p->i_pitch *
-             ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
-
-    i_src2_pitch = p_src->p->i_pitch;
-    p_src2 = p_src->p->p_pixels + p_filter->fmt_in.video.i_x_offset +
-             i_src2_pitch * p_filter->fmt_in.video.i_y_offset;
+    i_src_pitch = p_src_pic->p->i_pitch;
+    p_src = p_src_pic->p->p_pixels + p_filter->fmt_in.video.i_x_offset +
+            i_src_pitch * p_filter->fmt_in.video.i_y_offset;
 
-    i_width = (i_width >> 1) << 1; /* Needs to be a multiple of 2 */
+    i_width &= ~1; /* Needs to be a multiple of 2 */
 
-#define MAX_TRANS 255
-#define TRANS_BITS  8
-#define p_trans p_src2
 #define p_pal p_filter->fmt_in.video.p_palette->palette
 
     /* Draw until we reach the bottom of the subtitle */
     for( i_y = 0; i_y < i_height; i_y++,
-         p_dst += i_dst_pitch, p_src1 += i_src1_pitch, p_src2 += i_src2_pitch )
+         p_dst += i_dst_pitch, p_src += i_src_pitch )
     {
+        const uint8_t *p_trans = p_src;
         /* Draw until we reach the end of the line */
         for( i_x = 0; i_x < i_width; i_x++, b_even = !b_even )
         {
-            i_trans = ( p_pal[p_trans[i_x]][3] * i_alpha ) / 255;
+            i_trans = vlc_alpha( p_pal[p_trans[i_x]][3], i_alpha );
             if( !i_trans )
-            {
-                /* Completely transparent. Don't change pixel */
-            }
-            else if( i_trans == MAX_TRANS )
-            {
-                /* Completely opaque. Completely overwrite underlying pixel */
-                p_dst[i_x * 2 + i_l_offset]     = p_pal[p_src2[i_x]][0];
+                continue;
 
-                if( b_even )
+            /* Blending */
+            if( b_even )
+            {
+                uint16_t i_u;
+                uint16_t i_v;
+                if( p_trans[i_x+1] > 0xaa )
                 {
-                    if( p_trans[i_x+1] > 0xaa )
-                    {
-                        p_dst[i_x * 2 + i_u_offset] = (p_pal[p_src2[i_x]][1] + p_pal[p_src2[i_x+1]][1]) >> 1;
-                        p_dst[i_x * 2 + i_v_offset] = (p_pal[p_src2[i_x]][2] + p_pal[p_src2[i_x+1]][2]) >> 1;
-                    }
-                    else
-                    {
-                        p_dst[i_x * 2 + i_u_offset] = p_pal[p_src2[i_x]][1];
-                        p_dst[i_x * 2 + i_v_offset] = p_pal[p_src2[i_x]][2];
-                    }
+                    i_u = (p_pal[p_src[i_x]][1] + p_pal[p_src[i_x+1]][1]) >> 1;
+                    i_v = (p_pal[p_src[i_x]][2] + p_pal[p_src[i_x+1]][2]) >> 1;
                 }
+                else
+                {
+                    i_u = p_pal[p_src[i_x]][1];
+                    i_v = p_pal[p_src[i_x]][2];
+                }
+
+                vlc_blend_packed( &p_dst[i_x * 2],
+                                  i_l_offset, i_u_offset, i_v_offset,
+                                  p_pal[p_src[i_x]][0], i_u, i_v, i_trans, true );
             }
             else
             {
-                /* Blending */
-                p_dst[i_x * 2 + i_l_offset]     = ( (uint16_t)p_pal[p_src2[i_x]][0] *
-                    i_trans + (uint16_t)p_src1[i_x * 2 + i_l_offset] *
-                    (MAX_TRANS - i_trans) ) >> TRANS_BITS;
-
-                if( b_even )
-                {
-                    uint16_t i_u = 0;
-                    uint16_t i_v = 0;
-                    if( p_trans[i_x+1] > 0xaa )
-                    {
-                        i_u = (p_pal[p_src2[i_x]][1] + p_pal[p_src2[i_x+1]][1]) >> 1;
-                        i_v = (p_pal[p_src2[i_x]][2] + p_pal[p_src2[i_x+1]][2]) >> 1;
-                    }
-                    else
-                    {
-                        i_u = p_pal[p_src2[i_x]][1];
-                        i_v = p_pal[p_src2[i_x]][2];
-                    }
-
-                    p_dst[i_x * 2 + i_u_offset] = ( (uint16_t)i_u *
-                        i_trans + (uint16_t)p_src1[i_x * 2 + i_u_offset] *
-                        (MAX_TRANS - i_trans) ) >> TRANS_BITS;
-                    p_dst[i_x * 2 + i_v_offset] = ( (uint16_t)i_v *
-                        i_trans + (uint16_t)p_src1[i_x * 2 + i_v_offset] *
-                        (MAX_TRANS - i_trans) ) >> TRANS_BITS;
-                }
+                p_dst[i_x * 2 + i_l_offset] = vlc_blend( p_pal[p_src[i_x]][0], p_dst[i_x * 2 + i_l_offset], i_trans );
             }
         }
     }
-
-#undef MAX_TRANS
-#undef TRANS_BITS
-#undef p_trans
 #undef p_pal
-
-    return;
 }
 
-static void BlendPalRV( filter_t *p_filter, picture_t *p_dst_pic,
-                        picture_t *p_dst_orig, picture_t *p_src,
+static void BlendPalRV( filter_t *p_filter,
+                        picture_t *p_dst_pic, const picture_t *p_src_pic,
                         int i_x_offset, int i_y_offset,
                         int i_width, int i_height, int i_alpha )
 {
-    int i_src1_pitch, i_src2_pitch, i_dst_pitch;
-    uint8_t *p_src1, *p_src2, *p_dst;
+    int i_src_pitch, i_dst_pitch;
+    uint8_t *p_src, *p_dst;
     int i_x, i_y, i_pix_pitch, i_trans;
-    int r, g, b;
     video_palette_t rgbpalette;
+    int i_rindex, i_gindex, i_bindex;
 
     i_pix_pitch = p_dst_pic->p->i_pixel_pitch;
     i_dst_pitch = p_dst_pic->p->i_pitch;
@@ -1648,99 +1158,70 @@ static void BlendPalRV( filter_t *p_filter, picture_t *p_dst_pic,
             p_filter->fmt_out.video.i_x_offset) + p_dst_pic->p->i_pitch *
             ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
 
-    i_src1_pitch = p_dst_orig->p->i_pitch;
-    p_src1 = p_dst_orig->p->p_pixels + i_pix_pitch * (i_x_offset +
-             p_filter->fmt_out.video.i_x_offset) + p_dst_orig->p->i_pitch *
-             ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
+    i_src_pitch = p_src_pic->p->i_pitch;
+    p_src = p_src_pic->p->p_pixels + p_filter->fmt_in.video.i_x_offset +
+            i_src_pitch * p_filter->fmt_in.video.i_y_offset;
 
-    i_src2_pitch = p_src->p->i_pitch;
-    p_src2 = p_src->p->p_pixels + p_filter->fmt_in.video.i_x_offset +
-             i_src2_pitch * p_filter->fmt_in.video.i_y_offset;
-
-#define MAX_TRANS 255
-#define TRANS_BITS  8
-#define p_trans p_src2
 #define p_pal p_filter->fmt_in.video.p_palette->palette
 #define rgbpal rgbpalette.palette
 
     /* Convert palette first */
-    for( i_y = 0; i_y < p_filter->fmt_in.video.p_palette->i_entries &&
-         i_y < 256; i_y++ )
+    for( i_y = 0; i_y < p_filter->fmt_in.video.p_palette->i_entries && i_y < 256; i_y++ )
     {
-        yuv_to_rgb( &r, &g, &b, p_pal[i_y][0], p_pal[i_y][1], p_pal[i_y][2] );
+        int r, g, b;
 
-        if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('R','V','1','6') )
-        {
-            *(uint16_t *)rgbpal[i_y] =
-                ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
-        }
-        else
-        {
-            rgbpal[i_y][0] = r; rgbpal[i_y][1] = g; rgbpal[i_y][2] = b;
-        }
+        yuv_to_rgb( &r, &g, &b, p_pal[i_y][0], p_pal[i_y][1], p_pal[i_y][2] );
+        rgbpal[i_y][0] = r;
+        rgbpal[i_y][1] = g;
+        rgbpal[i_y][2] = b;
     }
 
+    /* */
+    vlc_rgb_index( &i_rindex, &i_gindex, &i_bindex, &p_filter->fmt_out.video );
+
     /* Draw until we reach the bottom of the subtitle */
     for( i_y = 0; i_y < i_height; i_y++,
-         p_dst += i_dst_pitch, p_src1 += i_src1_pitch, p_src2 += i_src2_pitch )
+         p_dst += i_dst_pitch, p_src += i_src_pitch )
     {
+        const uint8_t *p_trans = p_src;
         /* Draw until we reach the end of the line */
         for( i_x = 0; i_x < i_width; i_x++ )
         {
-            i_trans = ( p_pal[p_trans[i_x]][3] * i_alpha ) / 255;
+            i_trans = vlc_alpha( p_pal[p_trans[i_x]][3], i_alpha );
             if( !i_trans )
-            {
-                /* Completely transparent. Don't change pixel */
                 continue;
-            }
-            else if( i_trans == MAX_TRANS ||
-                     p_filter->fmt_out.video.i_chroma ==
-                     VLC_FOURCC('R','V','1','6') )
-            {
-                /* Completely opaque. Completely overwrite underlying pixel */
-                p_dst[i_x * i_pix_pitch]     = rgbpal[p_src2[i_x]][0];
-                p_dst[i_x * i_pix_pitch + 1] = rgbpal[p_src2[i_x]][1];
-                if( p_filter->fmt_out.video.i_chroma !=
-                    VLC_FOURCC('R','V','1','6') )
-                p_dst[i_x * i_pix_pitch + 2] = rgbpal[p_src2[i_x]][2];
-                continue;
-            }
 
             /* Blending */
-            p_dst[i_x * i_pix_pitch]     = ( (uint16_t)rgbpal[p_src2[i_x]][0] *
-                i_trans + (uint16_t)p_src1[i_x * i_pix_pitch] *
-                (MAX_TRANS - i_trans) ) >> TRANS_BITS;
-            p_dst[i_x * i_pix_pitch + 1] = ( (uint16_t)rgbpal[p_src2[i_x]][1] *
-                i_trans + (uint16_t)p_src1[i_x * i_pix_pitch + 1] *
-                (MAX_TRANS - i_trans) ) >> TRANS_BITS;
-            p_dst[i_x * i_pix_pitch + 2] = ( (uint16_t)rgbpal[p_src2[i_x]][2] *
-                i_trans + (uint16_t)p_src1[i_x * i_pix_pitch + 2] *
-                (MAX_TRANS - i_trans) ) >> TRANS_BITS;
+            if( p_filter->fmt_out.video.i_chroma == VLC_CODEC_RGB15 || p_filter->fmt_out.video.i_chroma == VLC_CODEC_RGB16 )
+                vlc_blend_rgb16( (uint16_t*)&p_dst[i_x * i_pix_pitch],
+                                  rgbpal[p_src[i_x]][0], rgbpal[p_src[i_x]][1], rgbpal[p_src[i_x]][2],
+                                  i_trans,
+                                  &p_filter->fmt_out.video );
+            else
+                vlc_blend_packed( &p_dst[i_x * i_pix_pitch],
+                                  i_rindex, i_gindex, i_bindex,
+                                  rgbpal[p_src[i_x]][0], rgbpal[p_src[i_x]][1], rgbpal[p_src[i_x]][2],
+                                  i_trans, true );
         }
     }
 
-#undef MAX_TRANS
-#undef TRANS_BITS
-#undef p_trans
 #undef p_pal
 #undef rgbpal
-
-    return;
 }
 
 /***********************************************************************
  * RGBA
  ***********************************************************************/
-static void BlendRGBAI420( filter_t *p_filter, picture_t *p_dst,
-                           picture_t *p_dst_orig, picture_t *p_src,
+static void BlendRGBAI420( filter_t *p_filter,
+                           picture_t *p_dst, const picture_t *p_src_pic,
                            int i_x_offset, int i_y_offset,
                            int i_width, int i_height, int i_alpha )
 {
-    int i_src1_pitch, i_src2_pitch, i_dst_pitch, i_src_pix_pitch;
-    uint8_t *p_src1_y, *p_dst_y;
-    uint8_t *p_src1_u, *p_dst_u;
-    uint8_t *p_src1_v, *p_dst_v;
-    uint8_t *p_src2;
+    int i_src_pitch, i_dst_pitch, i_src_pix_pitch;
+    uint8_t *p_dst_y;
+    uint8_t *p_dst_u;
+    uint8_t *p_dst_v;
+    uint8_t *p_src;
     int i_x, i_y, i_trans;
     uint8_t y, u, v;
 
@@ -1760,102 +1241,55 @@ static void BlendRGBAI420( filter_t *p_filter, picture_t *p_dst,
               ( i_y_offset + p_filter->fmt_out.video.i_y_offset ) / 2 *
               p_dst->p[V_PLANE].i_pitch;
 
-    i_src1_pitch = p_dst_orig->p[Y_PLANE].i_pitch;
-    p_src1_y = p_dst_orig->p[Y_PLANE].p_pixels + i_x_offset +
-               p_filter->fmt_out.video.i_x_offset +
-               p_dst_orig->p[Y_PLANE].i_pitch *
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
-    p_src1_u = p_dst_orig->p[U_PLANE].p_pixels + i_x_offset/2 +
-               p_filter->fmt_out.video.i_x_offset/2 +
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset ) / 2 *
-               p_dst_orig->p[U_PLANE].i_pitch;
-    p_src1_v = p_dst_orig->p[V_PLANE].p_pixels + i_x_offset/2 +
-               p_filter->fmt_out.video.i_x_offset/2 +
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset ) / 2 *
-               p_dst_orig->p[V_PLANE].i_pitch;
-
-    i_src_pix_pitch = p_src->p->i_pixel_pitch;
-    i_src2_pitch = p_src->p->i_pitch;
-    p_src2 = p_src->p->p_pixels +
-             p_filter->fmt_in.video.i_x_offset * i_src2_pitch +
-             p_src->p->i_pitch * p_filter->fmt_in.video.i_y_offset;
+    i_src_pix_pitch = p_src_pic->p->i_pixel_pitch;
+    i_src_pitch = p_src_pic->p->i_pitch;
+    p_src = p_src_pic->p->p_pixels +
+            p_filter->fmt_in.video.i_x_offset * i_src_pix_pitch +
+            p_src_pic->p->i_pitch * p_filter->fmt_in.video.i_y_offset;
 
 
-#define MAX_TRANS 255
-#define TRANS_BITS  8
-
     /* Draw until we reach the bottom of the subtitle */
     for( i_y = 0; i_y < i_height; i_y++,
-         p_dst_y += i_dst_pitch, p_src1_y += i_src1_pitch,
+         p_dst_y += i_dst_pitch,
          p_dst_u += b_even_scanline ? i_dst_pitch/2 : 0,
-         p_src1_u += b_even_scanline ? i_src1_pitch/2 : 0,
          p_dst_v += b_even_scanline ? i_dst_pitch/2 : 0,
-         p_src1_v += b_even_scanline ? i_src1_pitch/2 : 0,
-         p_src2 += i_src2_pitch )
+         p_src += i_src_pitch )
     {
         b_even_scanline = !b_even_scanline;
 
         /* Draw until we reach the end of the line */
         for( i_x = 0; i_x < i_width; i_x++ )
         {
-#define     R         ( p_src2[i_x * i_src_pix_pitch + 0] )
-#define     G         ( p_src2[i_x * i_src_pix_pitch + 1] )
-#define     B         ( p_src2[i_x * i_src_pix_pitch + 2] )
-            i_trans = ( p_src2[i_x * i_src_pix_pitch + 3] * i_alpha ) / 255;
-            if( !i_trans )
-            {
-                /* Completely transparent. Don't change pixel */
-                continue;
-            }
-            else if( i_trans == MAX_TRANS )
-            {
-                /* Completely opaque. Completely overwrite underlying pixel */
-                rgb_to_yuv( &y, &u, &v, R, G, B );
-                p_dst_y[i_x] = y;
+            const int R = p_src[i_x * i_src_pix_pitch + 0];
+            const int G = p_src[i_x * i_src_pix_pitch + 1];
+            const int B = p_src[i_x * i_src_pix_pitch + 2];
 
-                if( b_even_scanline && i_x % 2 == 0 )
-                {
-                    p_dst_u[i_x/2] = u;
-                    p_dst_v[i_x/2] = v;
-                }
+            i_trans = vlc_alpha( p_src[i_x * i_src_pix_pitch + 3], i_alpha );
+            if( !i_trans )
                 continue;
-            }
 
             /* Blending */
             rgb_to_yuv( &y, &u, &v, R, G, B );
-            p_dst_y[i_x] = ( (uint16_t)y * i_trans +
-                (uint16_t)p_src1_y[i_x] * (MAX_TRANS - i_trans) )
-                >> TRANS_BITS;
 
+            p_dst_y[i_x] = vlc_blend( y, p_dst_y[i_x], i_trans );
             if( b_even_scanline && i_x % 2 == 0 )
             {
-                p_dst_u[i_x/2] = ( (uint16_t)u * i_trans +
-                (uint16_t)p_src1_u[i_x/2] * (MAX_TRANS - i_trans) )
-                >> TRANS_BITS;
-                p_dst_v[i_x/2] = ( (uint16_t)v * i_trans +
-                (uint16_t)p_src1_v[i_x/2] * (MAX_TRANS - i_trans) )
-                >> TRANS_BITS;
+                p_dst_u[i_x/2] = vlc_blend( u, p_dst_u[i_x/2], i_trans );
+                p_dst_v[i_x/2] = vlc_blend( v, p_dst_v[i_x/2], i_trans );
             }
-#undef      R
-#undef      G
-#undef      B
         }
     }
-
-#undef MAX_TRANS
-#undef TRANS_BITS
-
-    return;
 }
 
-static void BlendRGBAR24( filter_t *p_filter, picture_t *p_dst_pic,
-                          picture_t *p_dst_orig, picture_t *p_src,
+static void BlendRGBAR24( filter_t *p_filter,
+                          picture_t *p_dst_pic, const picture_t *p_src_pic,
                           int i_x_offset, int i_y_offset,
                           int i_width, int i_height, int i_alpha )
 {
-    int i_src1_pitch, i_src2_pitch, i_dst_pitch;
-    uint8_t *p_dst, *p_src1, *p_src2;
+    int i_src_pitch, i_dst_pitch;
+    uint8_t *p_dst, *p_src;
     int i_x, i_y, i_pix_pitch, i_trans, i_src_pix_pitch;
+    int i_rindex, i_gindex, i_bindex;
 
     i_pix_pitch = p_dst_pic->p->i_pixel_pitch;
     i_dst_pitch = p_dst_pic->p->i_pitch;
@@ -1864,77 +1298,45 @@ static void BlendRGBAR24( filter_t *p_filter, picture_t *p_dst_pic,
             p_dst_pic->p->i_pitch *
             ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
 
-    i_src1_pitch = p_dst_orig->p->i_pitch;
-    p_src1 = p_dst_orig->p->p_pixels + i_x_offset * i_pix_pitch +
-             p_filter->fmt_out.video.i_x_offset * i_pix_pitch +
-             p_dst_orig->p->i_pitch *
-             ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
-
-    i_src_pix_pitch = p_src->p->i_pixel_pitch;
-    i_src2_pitch = p_src->p->i_pitch;
-    p_src2 = p_src->p->p_pixels +
-             p_filter->fmt_in.video.i_x_offset * i_pix_pitch +
-             p_src->p->i_pitch * p_filter->fmt_in.video.i_y_offset;
+    i_src_pix_pitch = p_src_pic->p->i_pixel_pitch;
+    i_src_pitch = p_src_pic->p->i_pitch;
+    p_src = p_src_pic->p->p_pixels +
+            p_filter->fmt_in.video.i_x_offset * i_src_pix_pitch +
+            p_src_pic->p->i_pitch * p_filter->fmt_in.video.i_y_offset;
 
-#define MAX_TRANS 255
-#define TRANS_BITS  8
+    vlc_rgb_index( &i_rindex, &i_gindex, &i_bindex, &p_filter->fmt_out.video );
 
     /* Draw until we reach the bottom of the subtitle */
     for( i_y = 0; i_y < i_height; i_y++,
-         p_dst += i_dst_pitch, p_src1 += i_src1_pitch, p_src2 += i_src2_pitch )
+         p_dst += i_dst_pitch, p_src += i_src_pitch )
     {
         /* Draw until we reach the end of the line */
         for( i_x = 0; i_x < i_width; i_x++ )
         {
-#define     R         ( p_src2[i_x * i_src_pix_pitch + 0] )
-#define     G         ( p_src2[i_x * i_src_pix_pitch + 1] )
-#define     B         ( p_src2[i_x * i_src_pix_pitch + 2] )
-            i_trans = ( p_src2[i_x * i_src_pix_pitch + 3] * i_alpha ) / 255;
+            const int R = p_src[i_x * i_src_pix_pitch + 0];
+            const int G = p_src[i_x * i_src_pix_pitch + 1];
+            const int B = p_src[i_x * i_src_pix_pitch + 2];
+
+            i_trans = vlc_alpha( p_src[i_x * i_src_pix_pitch + 3], i_alpha );
             if( !i_trans )
-            {
-                /* Completely transparent. Don't change pixel */
                 continue;
-            }
-            else if( i_trans == MAX_TRANS )
-            {
-                /* Completely opaque. Completely overwrite underlying pixel */
-                p_dst[i_x * i_pix_pitch + 0] = R;
-                p_dst[i_x * i_pix_pitch + 1] = G;
-                p_dst[i_x * i_pix_pitch + 2] = B;
-                continue;
-            }
 
             /* Blending */
-            p_dst[i_x * i_pix_pitch + 0] = ( R * i_trans +
-                (uint16_t)p_src1[i_x * i_pix_pitch + 0] *
-                (MAX_TRANS - i_trans) ) >> TRANS_BITS;
-            p_dst[i_x * i_pix_pitch + 1] = ( G * i_trans +
-                (uint16_t)p_src1[i_x * i_pix_pitch + 1] *
-                (MAX_TRANS - i_trans) ) >> TRANS_BITS;
-            p_dst[i_x * i_pix_pitch + 2] = ( B * i_trans +
-                (uint16_t)p_src1[i_x * i_pix_pitch + 2] *
-                (MAX_TRANS - i_trans) ) >> TRANS_BITS;
-#undef      R
-#undef      G
-#undef      B
+            vlc_blend_packed( &p_dst[i_x * i_pix_pitch],
+                              i_rindex, i_gindex, i_bindex,
+                              R, G, B, i_trans, true );
         }
     }
-
-#undef MAX_TRANS
-#undef TRANS_BITS
-
-    return;
 }
 
-static void BlendRGBAR16( filter_t *p_filter, picture_t *p_dst_pic,
-                          picture_t *p_dst_orig, picture_t *p_src,
+static void BlendRGBAR16( filter_t *p_filter,
+                          picture_t *p_dst_pic, const picture_t *p_src_pic,
                           int i_x_offset, int i_y_offset,
                           int i_width, int i_height, int i_alpha )
 {
-    int i_src1_pitch, i_src2_pitch, i_dst_pitch;
-    uint8_t *p_dst, *p_src1, *p_src2;
+    int i_src_pitch, i_dst_pitch;
+    uint8_t *p_dst, *p_src;
     int i_x, i_y, i_pix_pitch, i_trans, i_src_pix_pitch;
-    uint16_t i_pix;
 
     i_pix_pitch = p_dst_pic->p->i_pixel_pitch;
     i_dst_pitch = p_dst_pic->p->i_pitch;
@@ -1943,99 +1345,48 @@ static void BlendRGBAR16( filter_t *p_filter, picture_t *p_dst_pic,
             p_dst_pic->p->i_pitch *
             ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
 
-    i_src1_pitch = p_dst_orig->p->i_pitch;
-    p_src1 = p_dst_orig->p->p_pixels + i_x_offset * i_pix_pitch +
-             p_filter->fmt_out.video.i_x_offset * i_pix_pitch +
-             p_dst_orig->p->i_pitch *
-             ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
-
-    i_src_pix_pitch = p_src->p->i_pixel_pitch;
-    i_src2_pitch = p_src->p->i_pitch;
-    p_src2 = p_src->p->p_pixels +
-             p_filter->fmt_in.video.i_x_offset * i_pix_pitch +
-             p_src->p->i_pitch * p_filter->fmt_in.video.i_y_offset;
-
-#define MAX_TRANS 255
-#define TRANS_BITS  8
+    i_src_pix_pitch = p_src_pic->p->i_pixel_pitch;
+    i_src_pitch = p_src_pic->p->i_pitch;
+    p_src = p_src_pic->p->p_pixels +
+            p_filter->fmt_in.video.i_x_offset * i_src_pix_pitch +
+            p_src_pic->p->i_pitch * p_filter->fmt_in.video.i_y_offset;
 
     /* Draw until we reach the bottom of the subtitle */
     for( i_y = 0; i_y < i_height; i_y++,
-         p_dst += i_dst_pitch, p_src1 += i_src1_pitch, p_src2 += i_src2_pitch )
+         p_dst += i_dst_pitch, p_src += i_src_pitch )
     {
         /* Draw until we reach the end of the line */
         for( i_x = 0; i_x < i_width; i_x++ )
         {
-#define     R         ( p_src2[i_x * i_src_pix_pitch + 0] )
-#define     G         ( p_src2[i_x * i_src_pix_pitch + 1] )
-#define     B         ( p_src2[i_x * i_src_pix_pitch + 2] )
-            i_trans = ( p_src2[i_x * i_src_pix_pitch + 3] * i_alpha ) / 255;
+            const int R = p_src[i_x * i_src_pix_pitch + 0];
+            const int G = p_src[i_x * i_src_pix_pitch + 1];
+            const int B = p_src[i_x * i_src_pix_pitch + 2];
+
+            i_trans = vlc_alpha( p_src[i_x * i_src_pix_pitch + 3], i_alpha );
             if( !i_trans )
-            {
-                /* Completely transparent. Don't change pixel */
                 continue;
-            }
-            else if( i_trans == MAX_TRANS )
-            {
-                /* Completely opaque. Completely overwrite underlying pixel */
-                *((uint16_t *)(&p_dst[i_x * i_pix_pitch])) = ((R >> 3) << 11) | ((G >> 2) << 5) | (B >> 3);
-                continue;
-            }
 
             /* Blending */
-            i_pix = *((uint16_t *)(&p_dst[i_x * i_pix_pitch]));
-            *((uint16_t *)(&p_dst[i_x * i_pix_pitch])) =
-                ( ( ( (R >> 3)*i_trans
-                    + (i_pix >> 11) * (MAX_TRANS - i_trans) )
-                    >> TRANS_BITS ) << 11 )
-              | ( ( ( (G >> 2)*i_trans
-                    + ((i_pix & 0x07e0)>> 5) * (MAX_TRANS - i_trans) )
-                    >> TRANS_BITS ) << 5  )
-              | ( ( ( (B >> 3)*i_trans
-                    + (i_pix & 0x001f) * (MAX_TRANS - i_trans) )
-                    >> TRANS_BITS ) );
-#undef      R
-#undef      G
-#undef      B
+            vlc_blend_rgb16( (uint16_t*)&p_dst[i_x * i_pix_pitch],
+                             R, G, B, i_trans, &p_filter->fmt_out.video );
         }
     }
-
-#undef MAX_TRANS
-#undef TRANS_BITS
-
-    return;
 }
 
-static void BlendRGBAYUVPacked( filter_t *p_filter, picture_t *p_dst_pic,
-                                picture_t *p_dst_orig, picture_t *p_src,
+static void BlendRGBAYUVPacked( filter_t *p_filter,
+                                picture_t *p_dst_pic, const picture_t *p_src_pic,
                                 int i_x_offset, int i_y_offset,
                                 int i_width, int i_height, int i_alpha )
 {
-    int i_src1_pitch, i_src2_pitch, i_dst_pitch, i_src_pix_pitch;
-    uint8_t *p_dst, *p_src1, *p_src2;
-    uint8_t *p_trans;
+    int i_src_pitch, i_dst_pitch, i_src_pix_pitch;
+    uint8_t *p_dst, *p_src;
     int i_x, i_y, i_pix_pitch, i_trans;
     bool b_even = !((i_x_offset + p_filter->fmt_out.video.i_x_offset)%2);
-    int i_l_offset = 0, i_u_offset = 0, i_v_offset = 0;
+    int i_l_offset, i_u_offset, i_v_offset;
     uint8_t y, u, v;
 
-    if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','U','Y','2') )
-    {
-        i_l_offset = 0;
-        i_u_offset = 1;
-        i_v_offset = 3;
-    }
-    else if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('U','Y','V','Y') )
-    {
-        i_l_offset = 1;
-        i_u_offset = 0;
-        i_v_offset = 2;
-    }
-    else if( p_filter->fmt_out.video.i_chroma == VLC_FOURCC('Y','V','Y','U') )
-    {
-        i_l_offset = 0;
-        i_u_offset = 3;
-        i_v_offset = 1;
-    }
+    vlc_yuv_packed_index( &i_l_offset, &i_u_offset, &i_v_offset,
+                          p_filter->fmt_out.video.i_chroma );
 
     i_pix_pitch = 2;
     i_dst_pitch = p_dst_pic->p->i_pitch;
@@ -2044,74 +1395,36 @@ static void BlendRGBAYUVPacked( filter_t *p_filter, picture_t *p_dst_pic,
             p_dst_pic->p->i_pitch *
             ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
 
-    i_src1_pitch = p_dst_orig->p[Y_PLANE].i_pitch;
-    p_src1 = p_dst_orig->p->p_pixels + i_x_offset * i_pix_pitch +
-               p_filter->fmt_out.video.i_x_offset * i_pix_pitch +
-               p_dst_orig->p->i_pitch *
-               ( i_y_offset + p_filter->fmt_out.video.i_y_offset );
+    i_src_pix_pitch = p_src_pic->p->i_pixel_pitch;
+    i_src_pitch = p_src_pic->p->i_pitch;
+    p_src = p_src_pic->p->p_pixels +
+            p_filter->fmt_in.video.i_x_offset * i_src_pitch +
+            p_src_pic->p->i_pitch * p_filter->fmt_in.video.i_y_offset;
 
-    i_src_pix_pitch = p_src->p->i_pixel_pitch;
-    i_src2_pitch = p_src->p->i_pitch;
-    p_src2 = p_src->p->p_pixels +
-             p_filter->fmt_in.video.i_x_offset * i_src2_pitch +
-             p_src->p->i_pitch * p_filter->fmt_in.video.i_y_offset;
-
-    i_width = (i_width >> 1) << 1; /* Needs to be a multiple of 2 */
-
-#define MAX_TRANS 255
-#define TRANS_BITS  8
+    i_width &= ~1; /* Needs to be a multiple of 2 */
 
     /* Draw until we reach the bottom of the subtitle */
-    for( i_y = 0; i_y < i_height; i_y++, p_trans += i_src2_pitch,
-         p_dst += i_dst_pitch, p_src1 += i_src1_pitch,
-         p_src2 += i_src2_pitch )
+    for( i_y = 0; i_y < i_height; i_y++,
+         p_dst += i_dst_pitch,
+         p_src += i_src_pitch )
     {
         /* Draw until we reach the end of the line */
         for( i_x = 0; i_x < i_width; i_x++, b_even = !b_even )
         {
-#define     R         ( p_src2[i_x * i_src_pix_pitch + 0] )
-#define     G         ( p_src2[i_x * i_src_pix_pitch + 1] )
-#define     B         ( p_src2[i_x * i_src_pix_pitch + 2] )
-            i_trans = ( p_src2[i_x * i_src_pix_pitch + 3] * i_alpha ) / 255;
+            const int R = p_src[i_x * i_src_pix_pitch + 0];
+            const int G = p_src[i_x * i_src_pix_pitch + 1];
+            const int B = p_src[i_x * i_src_pix_pitch + 2];
+
+            i_trans = vlc_alpha( p_src[i_x * i_src_pix_pitch + 3], i_alpha );
             if( !i_trans )
-            {
-                /* Completely transparent. Don't change pixel */
-            }
-            else if( i_trans == MAX_TRANS )
-            {
-                /* Completely opaque. Completely overwrite underlying pixel */
-                rgb_to_yuv( &y, &u, &v, R, G, B );
-                p_dst[i_x * 2 + i_l_offset] = y;
+                continue;
 
-                if( b_even )
-                {
-                    p_dst[i_x * 2 + i_u_offset] = u;
-                    p_dst[i_x * 2 + i_v_offset] = v;
-                }
-            }
-            else
-            {
-                /* Blending */
-                rgb_to_yuv( &y, &u, &v, R, G, B );
-                p_dst[i_x * 2 + i_l_offset]     = ( (uint16_t)y * i_trans +
-                    (uint16_t)p_src1[i_x * 2 + i_l_offset] * (MAX_TRANS - i_trans) )
-                    >> TRANS_BITS;
+            /* Blending */
+            rgb_to_yuv( &y, &u, &v, R, G, B );
 
-                if( b_even )
-                {
-                    p_dst[i_x * 2 + i_u_offset] = ( (uint16_t)u * i_trans +
-                        (uint16_t)p_src1[i_x * 2 + i_u_offset] * (MAX_TRANS - i_trans) )
-                        >> TRANS_BITS;
-                    p_dst[i_x * 2 + i_v_offset] = ( (uint16_t)v * i_trans +
-                        (uint16_t)p_src1[i_x * 2 + i_v_offset] * (MAX_TRANS - i_trans) )
-                        >> TRANS_BITS;
-                }
-            }
+            vlc_blend_packed( &p_dst[i_x * 2],
+                              i_l_offset, i_u_offset, i_v_offset,
+                              y, u, v, i_trans, b_even );
         }
     }
-
-#undef MAX_TRANS
-#undef TRANS_BITS
-
-    return;
 }