]> git.sesse.net Git - vlc/blobdiff - plugins/spudec/spu_decoder.c
* ALL: new module API. Makes a few things a lot simpler, and we gain
[vlc] / plugins / spudec / spu_decoder.c
index f5af620803c5b27625d7477aa802e1ddd374b6ed..c23a6a363c8b820978ba05a204af41ededc9adb1 100644 (file)
@@ -2,9 +2,10 @@
  * spu_decoder.c : spu decoder thread
  *****************************************************************************
  * Copyright (C) 2000-2001 VideoLAN
- * $Id: spu_decoder.c,v 1.4 2001/12/27 01:49:34 massiot Exp $
+ * $Id: spu_decoder.c,v 1.31 2002/07/31 20:56:52 sam Exp $
  *
  * Authors: Samuel Hocevar <sam@zoy.org>
+ *          Rudolf Cornelissen <rag.cornelissen@inter.nl.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  *****************************************************************************/
 
-#define MODULE_NAME spudec
-#include "modules_inner.h"
-
 /*****************************************************************************
  * Preamble
  *****************************************************************************/
-#include "defs.h"
+#include <stdlib.h>                                      /* malloc(), free() */
+#include <string.h>                                    /* memcpy(), memset() */
+
+#include <vlc/vlc.h>
+#include <vlc/vout.h>
+#include <vlc/decoder.h>
 
 #ifdef HAVE_UNISTD_H
-#include <unistd.h>                                              /* getpid() */
+#   include <unistd.h>                                           /* getpid() */
 #endif
 
 #ifdef WIN32                   /* getpid() for win32 is located in process.h */
-#include <process.h>
+#   include <process.h>
 #endif
 
-#include <stdlib.h>                                      /* malloc(), free() */
-#include <string.h>                                    /* memcpy(), memset() */
-
-#include "common.h"
-#include "intf_msg.h"
-#include "threads.h"
-#include "mtime.h"
-
-#include "video.h"
-#include "video_output.h"
-
-#include "stream_control.h"
-#include "input_ext-dec.h"
-
 #include "spu_decoder.h"
 
-#include "modules.h"
-#include "modules_export.h"
-
 /*****************************************************************************
  * Local prototypes
  *****************************************************************************/
-static int  spudec_Probe         ( probedata_t * );
-static int  spudec_Run           ( decoder_config_t * );
-static int  spudec_Init          ( spudec_thread_t * );
-static void spudec_ErrorThread   ( spudec_thread_t * );
-static void spudec_EndThread     ( spudec_thread_t * );
+static int  OpenDecoder   ( vlc_object_t * );      
+static int  RunDecoder    ( decoder_fifo_t * );
+static int  InitThread    ( spudec_thread_t * );
+static void EndThread     ( spudec_thread_t * );
 
 static int  SyncPacket           ( spudec_thread_t * );
 static void ParsePacket          ( spudec_thread_t * );
 static int  ParseControlSequences( spudec_thread_t *, subpicture_t * );
 static int  ParseRLE             ( spudec_thread_t *, subpicture_t *, u8 * );
-
-/*****************************************************************************
- * Capabilities
- *****************************************************************************/
-void _M( spudec_getfunctions )( function_list_t * p_function_list )
-{
-    p_function_list->pf_probe = spudec_Probe;
-    p_function_list->functions.dec.pf_run = spudec_Run;
-}
-
+static void RenderSPU            ( vout_thread_t *, picture_t *,
+                                   const subpicture_t * );
 /*****************************************************************************
- * Build configuration tree.
+ * Module descriptor.
  *****************************************************************************/
-MODULE_CONFIG_START
-ADD_WINDOW( "Configuration for SPU decoder module" )
-    ADD_COMMENT( "Nothing to configure" )
-MODULE_CONFIG_STOP
-
-MODULE_INIT_START
-    p_module->i_capabilities = MODULE_CAPABILITY_DEC;
-    p_module->psz_longname = "subtitles decoder module";
-MODULE_INIT_STOP
-
-MODULE_ACTIVATE_START
-    _M( spudec_getfunctions )( &p_module->p_functions->dec );
-MODULE_ACTIVATE_STOP
-
-MODULE_DEACTIVATE_START
-MODULE_DEACTIVATE_STOP
+vlc_module_begin();
+    set_description( _("DVD subtitles decoder module") );
+    set_capability( "decoder", 50 );
+    set_callbacks( OpenDecoder, NULL );
+vlc_module_end();
 
 /*****************************************************************************
- * spudec_Probe: probe the decoder and return score
+ * OpenDecoder: probe the decoder and return score
  *****************************************************************************
  * Tries to launch a decoder and return score so that the interface is able 
  * to chose.
  *****************************************************************************/
-static int spudec_Probe( probedata_t *p_data )
+static int OpenDecoder( vlc_object_t *p_this )
 {
-    if( p_data->i_type == DVD_SPU_ES )
-        return( 50 );
-    else
-        return( 0 );
+    decoder_fifo_t *p_fifo = (decoder_fifo_t*) p_this;
+
+    if( p_fifo->i_fourcc == VLC_FOURCC('s','p','u',' ') )
+    {   
+        p_fifo->pf_run = RunDecoder;
+        return VLC_SUCCESS;
+    }
+    
+    return VLC_EGENERIC;
 }
 
 /*****************************************************************************
- * spudec_Run: this function is called just after the thread is created
+ * RunDecoder: this function is called just after the thread is created
  *****************************************************************************/
-static int spudec_Run( decoder_config_t * p_config )
+static int RunDecoder( decoder_fifo_t * p_fifo )
 {
     spudec_thread_t *     p_spudec;
    
-    intf_WarnMsg( 3, "spudec: thread launched. Initializing ..." );
-
     /* Allocate the memory needed to store the thread's structure */
     p_spudec = (spudec_thread_t *)malloc( sizeof(spudec_thread_t) );
 
     if ( p_spudec == NULL )
     {
-        intf_ErrMsg( "spudec error: not enough memory "
-                     "for spudec_CreateThread() to create the new thread" );
+        msg_Err( p_fifo, "out of memory" );
+        DecoderError( p_fifo );
         return( -1 );
     }
     
     /*
      * Initialize the thread properties
      */
-    p_spudec->p_config = p_config;
-
-    p_spudec->p_fifo = p_config->p_decoder_fifo;
+    p_spudec->p_vout = NULL;
+    p_spudec->p_fifo = p_fifo;
         
     /*
      * Initialize thread and free configuration
      */
-    p_spudec->p_fifo->b_error = spudec_Init( p_spudec );
+    p_spudec->p_fifo->b_error = InitThread( p_spudec );
 
     /*
      * Main loop - it is not executed if an error occured during
@@ -161,102 +129,84 @@ static int spudec_Run( decoder_config_t * p_config )
      */
     if( p_spudec->p_fifo->b_error )
     {
-        spudec_ErrorThread( p_spudec );
-    }
-
-    /* End of thread */
-    spudec_EndThread( p_spudec );
+        DecoderError( p_spudec->p_fifo );
 
-    if( p_spudec->p_fifo->b_error )
-    {
-        return( -1 );
+        /* End of thread */
+        EndThread( p_spudec );
+        return -1;
     }
-   
-    return( 0 );
 
+    /* End of thread */
+    EndThread( p_spudec );
+    return 0;
 }
 
 /* following functions are local */
 
 /*****************************************************************************
- * spudec_Init: initialize spu decoder thread
+ * InitThread: initialize spu decoder thread
  *****************************************************************************
  * This function is called from RunThread and performs the second step of the
  * initialization. It returns 0 on success. Note that the thread's flag are not
  * modified inside this function.
  *****************************************************************************/
-static int spudec_Init( spudec_thread_t *p_spudec )
+static int InitThread( spudec_thread_t *p_spudec )
 {
-    int i_retry = 0;
-
-    /* Spawn a video output if there is none */
-    vlc_mutex_lock( &p_vout_bank->lock );
-
-    while( p_vout_bank->i_count == 0 )
+    /* Find an available video output */
+    do
     {
-        vlc_mutex_unlock( &p_vout_bank->lock );
-
-        if( i_retry++ > 10 )
+        if( p_spudec->p_fifo->b_die || p_spudec->p_fifo->b_error )
         {
-            intf_WarnMsg( 1, "spudec: waited too long for vout, aborting" );
-            free( p_spudec );
+            return -1;
+        }
+
+        p_spudec->p_vout = vlc_object_find( p_spudec->p_fifo, VLC_OBJECT_VOUT,
+                                                              FIND_ANYWHERE );
 
-            return( -1 );
+        if( p_spudec->p_vout )
+        {
+            break;
         }
 
         msleep( VOUT_OUTMEM_SLEEP );
-        vlc_mutex_lock( &p_vout_bank->lock );
     }
+    while( 1 );
 
-    /* Take the first video output FIXME: take the best one */
-    p_spudec->p_vout = p_vout_bank->pp_vout[ 0 ];
-    vlc_mutex_unlock( &p_vout_bank->lock );
-    p_spudec->p_config->pf_init_bit_stream(
-            &p_spudec->bit_stream,
-            p_spudec->p_config->p_decoder_fifo, NULL, NULL );
+    InitBitstream( &p_spudec->bit_stream, p_spudec->p_fifo, NULL, NULL );
 
     /* Mark thread as running and return */
-    return( 0 );
+    return 0;
 }
 
 /*****************************************************************************
- * spudec_ErrorThread: spudec_Run() error loop
+ * EndThread: thread destruction
  *****************************************************************************
- * This function is called when an error occured during thread main's loop. The
- * thread can still receive feed, but must be ready to terminate as soon as
- * possible.
+ * This function is called when the thread ends after a sucessful
+ * initialization.
  *****************************************************************************/
-static void spudec_ErrorThread( spudec_thread_t *p_spudec )
+static void EndThread( spudec_thread_t *p_spudec )
 {
-    /* We take the lock, because we are going to read/write the start/end
-     * indexes of the decoder fifo */
-    vlc_mutex_lock( &p_spudec->p_fifo->data_lock );
-
-    /* Wait until a `die' order is sent */
-    while( !p_spudec->p_fifo->b_die )
+    if( p_spudec->p_vout != NULL 
+     && p_spudec->p_vout->p_subpicture != NULL )
     {
-        /* Trash all received PES packets */
-        p_spudec->p_fifo->pf_delete_pes(
-                        p_spudec->p_fifo->p_packets_mgt,
-                        p_spudec->p_fifo->p_first );
-
-        /* Waiting for the input thread to put new PES packets in the fifo */
-        vlc_cond_wait( &p_spudec->p_fifo->data_wait,
-                       &p_spudec->p_fifo->data_lock );
-    }
+        subpicture_t *  p_subpic;
+        int             i_subpic;
+    
+        for( i_subpic = 0; i_subpic < VOUT_MAX_SUBPICTURES; i_subpic++ )
+        {
+            p_subpic = &p_spudec->p_vout->p_subpicture[i_subpic];
 
-    /* We can release the lock before leaving */
-    vlc_mutex_unlock( &p_spudec->p_fifo->data_lock );
-}
+            if( p_subpic != NULL &&
+              ( ( p_subpic->i_status == RESERVED_SUBPICTURE )
+             || ( p_subpic->i_status == READY_SUBPICTURE ) ) )
+            {
+                vout_DestroySubPicture( p_spudec->p_vout, p_subpic );
+            }
+        }
 
-/*****************************************************************************
- * spudec_EndThread: thread destruction
- *****************************************************************************
- * This function is called when the thread ends after a sucessful
- * initialization.
- *****************************************************************************/
-static void spudec_EndThread( spudec_thread_t *p_spudec )
-{
+        vlc_object_release( p_spudec->p_vout );
+    }
+    
     free( p_spudec );
 }
 
@@ -301,19 +251,20 @@ static void ParsePacket( spudec_thread_t *p_spudec )
     u8           * p_src;
     unsigned int   i_offset;
 
-    intf_WarnMsg( 3, "spudec: trying to gather a 0x%.2x long subtitle",
-                  p_spudec->i_spu_size );
+    msg_Dbg( p_spudec->p_fifo, "trying to gather a 0x%.2x long subtitle",
+                               p_spudec->i_spu_size );
 
     /* We cannot display a subpicture with no date */
     if( p_spudec->p_fifo->p_first->i_pts == 0 )
     {
-        intf_WarnMsg( 3, "spudec error: subtitle without a date" );
+        msg_Warn( p_spudec->p_fifo, "subtitle without a date" );
         return;
     }
 
     /* Allocate the subpicture internal data. */
-    p_spu = vout_CreateSubPicture( p_spudec->p_vout, DVD_SUBPICTURE,
-                                   p_spudec->i_rle_size * 4 );
+    p_spu = vout_CreateSubPicture( p_spudec->p_vout, MEMORY_SUBPICTURE,
+                                   sizeof( subpicture_sys_t )
+                                    + p_spudec->i_rle_size * 4 );
     /* Rationale for the "p_spudec->i_rle_size * 4": we are going to
      * expand the RLE stuff so that we won't need to read nibbles later
      * on. This will speed things up a lot. Plus, we'll only need to do
@@ -324,25 +275,31 @@ static void ParsePacket( spudec_thread_t *p_spudec )
         return;
     }
 
+    /* Fill the p_spu structure */
+    p_spu->pf_render = RenderSPU;
+    p_spu->p_sys->p_data = (u8*)p_spu->p_sys + sizeof( subpicture_sys_t );
+    p_spu->p_sys->b_palette = 0;
+
     /* Get display time now. If we do it later, we may miss the PTS. */
-    p_spudec->i_pts = p_spudec->p_fifo->p_first->i_pts;
+    p_spu->p_sys->i_pts = p_spudec->p_fifo->p_first->i_pts;
 
     /* Allocate the temporary buffer we will parse */
     p_src = malloc( p_spudec->i_rle_size );
 
     if( p_src == NULL )
     {
-        intf_ErrMsg( "spudec error: could not allocate p_src" );
+        msg_Err( p_spudec->p_fifo, "out of memory" );
         vout_DestroySubPicture( p_spudec->p_vout, p_spu );
         return;
     }
 
     /* Get RLE data */
-    for( i_offset = 0;
-         i_offset + SPU_CHUNK_SIZE < p_spudec->i_rle_size;
+    for( i_offset = 0; i_offset < p_spudec->i_rle_size;
          i_offset += SPU_CHUNK_SIZE )
     {
-        GetChunk( &p_spudec->bit_stream, p_src + i_offset, SPU_CHUNK_SIZE );
+        GetChunk( &p_spudec->bit_stream, p_src + i_offset,
+                  ( i_offset + SPU_CHUNK_SIZE < p_spudec->i_rle_size ) ?
+                  SPU_CHUNK_SIZE : p_spudec->i_rle_size - i_offset );
 
         /* Abort subtitle parsing if we were requested to stop */
         if( p_spudec->p_fifo->b_die )
@@ -353,12 +310,9 @@ static void ParsePacket( spudec_thread_t *p_spudec )
         }
     }
 
-    GetChunk( &p_spudec->bit_stream, p_src + i_offset,
-              p_spudec->i_rle_size - i_offset );
-
 #if 0
     /* Dump the subtitle info */
-    intf_WarnHexDump( 5, p_spu->p_data, p_spudec->i_rle_size );
+    intf_WarnHexDump( 5, p_spu->p_sys->p_data, p_spudec->i_rle_size );
 #endif
 
     /* Getting the control part */
@@ -382,9 +336,9 @@ static void ParsePacket( spudec_thread_t *p_spudec )
         return;
     }
 
-    intf_WarnMsg( 3, "spudec: total size: 0x%x, RLE offsets: 0x%x 0x%x",
-                  p_spudec->i_spu_size,
-                  p_spu->type.spu.i_offset[0], p_spu->type.spu.i_offset[1] );
+    msg_Dbg( p_spudec->p_fifo, "total size: 0x%x, RLE offsets: 0x%x 0x%x",
+             p_spudec->i_spu_size,
+             p_spu->p_sys->pi_offset[0], p_spu->p_sys->pi_offset[1] );
 
     /* SPU is finished - we can ask the video output to display it */
     vout_DisplaySubPicture( p_spudec->p_vout, p_spu );
@@ -413,8 +367,10 @@ static int ParseControlSequences( spudec_thread_t *p_spudec,
     u8  i_command;
     int i_date;
 
+    int i, pi_alpha[4];
+
     /* XXX: temporary variables */
-    boolean_t b_force_display = 0;
+    vlc_bool_t b_force_display = 0;
 
     /* Initialize the structure */
     p_spu->i_start = p_spu->i_stop = 0;
@@ -442,7 +398,7 @@ static int ParseControlSequences( spudec_thread_t *p_spudec,
                 case SPU_CMD_FORCE_DISPLAY:
 
                     /* 00 (force displaying) */
-                    p_spu->i_start = p_spudec->i_pts + ( i_date * 11000 );
+                    p_spu->i_start = p_spu->p_sys->i_pts + ( i_date * 11000 );
                     b_force_display = 1;
  
                     break;
@@ -451,29 +407,75 @@ static int ParseControlSequences( spudec_thread_t *p_spudec,
                 case SPU_CMD_START_DISPLAY:
  
                     /* 01 (start displaying) */
-                    p_spu->i_start = p_spudec->i_pts + ( i_date * 11000 );
+                    p_spu->i_start = p_spu->p_sys->i_pts + ( i_date * 11000 );
  
                     break;
  
                 case SPU_CMD_STOP_DISPLAY:
  
                     /* 02 (stop displaying) */
-                    p_spu->i_stop = p_spudec->i_pts + ( i_date * 11000 );
+                    p_spu->i_stop = p_spu->p_sys->i_pts + ( i_date * 11000 );
  
                     break;
  
                 case SPU_CMD_SET_PALETTE:
  
-                    /* 03xxxx (palette) - trashed */
-                    RemoveBits( &p_spudec->bit_stream, 16 );
+                    /* 03xxxx (palette) */
+                    if( p_spudec->p_fifo->p_demux_data &&
+                         *(int*)p_spudec->p_fifo->p_demux_data == 0xBeeF )
+                    {
+                        u32 i_color;
+
+                        p_spu->p_sys->b_palette = 1;
+                        for( i = 0; i < 4 ; i++ )
+                        {
+                            i_color = ((u32*)((char*)p_spudec->p_fifo->
+                                        p_demux_data + sizeof(int)))[
+                                          GetBits(&p_spudec->bit_stream, 4) ];
+
+                            /* FIXME: this job should be done sooner */
+#ifndef WORDS_BIGENDIAN
+                            p_spu->p_sys->pi_yuv[3-i][0] = (i_color>>16) & 0xff;
+                            p_spu->p_sys->pi_yuv[3-i][1] = (i_color>>0) & 0xff;
+                            p_spu->p_sys->pi_yuv[3-i][2] = (i_color>>8) & 0xff;
+#else
+                            p_spu->p_sys->pi_yuv[3-i][0] = (i_color>>8) & 0xff;
+                            p_spu->p_sys->pi_yuv[3-i][1] = (i_color>>24) & 0xff;
+                            p_spu->p_sys->pi_yuv[3-i][2] = (i_color>>16) & 0xff;
+#endif
+                        }
+                    }
+                    else
+                    {
+                        RemoveBits( &p_spudec->bit_stream, 16 );
+                    }
                     i_index += 2;
  
                     break;
  
                 case SPU_CMD_SET_ALPHACHANNEL:
  
-                    /* 04xxxx (alpha channel) - trashed */
-                    RemoveBits( &p_spudec->bit_stream, 16 );
+                    /* 04xxxx (alpha channel) */
+                    pi_alpha[3] = GetBits( &p_spudec->bit_stream, 4 );
+                    pi_alpha[2] = GetBits( &p_spudec->bit_stream, 4 );
+                    pi_alpha[1] = GetBits( &p_spudec->bit_stream, 4 );
+                    pi_alpha[0] = GetBits( &p_spudec->bit_stream, 4 );
+
+                    /* Ignore blank alpha palette. Sometimes spurious blank
+                     * alpha palettes are present - dunno why. */
+                    if( pi_alpha[0] | pi_alpha[1] | pi_alpha[2] | pi_alpha[3] )
+                    {
+                        p_spu->p_sys->pi_alpha[0] = pi_alpha[0];
+                        p_spu->p_sys->pi_alpha[1] = pi_alpha[1];
+                        p_spu->p_sys->pi_alpha[2] = pi_alpha[2];
+                        p_spu->p_sys->pi_alpha[3] = pi_alpha[3];
+                    }
+                    else
+                    {
+                        msg_Warn( p_spudec->p_fifo,
+                                  "ignoring blank alpha palette" );
+                    }
+
                     i_index += 2;
  
                     break;
@@ -496,10 +498,10 @@ static int ParseControlSequences( spudec_thread_t *p_spudec,
                 case SPU_CMD_SET_OFFSETS:
  
                     /* 06xxxxyyyy (byte offsets) */
-                    p_spu->type.spu.i_offset[0] =
+                    p_spu->p_sys->pi_offset[0] =
                         GetBits( &p_spudec->bit_stream, 16 ) - 4;
  
-                    p_spu->type.spu.i_offset[1] =
+                    p_spu->p_sys->pi_offset[1] =
                         GetBits( &p_spudec->bit_stream, 16 ) - 4;
  
                     i_index += 4;
@@ -514,8 +516,8 @@ static int ParseControlSequences( spudec_thread_t *p_spudec,
                 default:
  
                     /* xx (unknown command) */
-                    intf_ErrMsg( "spudec error: unknown command 0x%.2x",
-                                 i_command );
+                    msg_Err( p_spudec->p_fifo, "unknown command 0x%.2x",
+                                               i_command );
                     return( 1 );
             }
 
@@ -532,21 +534,21 @@ static int ParseControlSequences( spudec_thread_t *p_spudec,
     /* Check that the next sequence index matches the current one */
     if( i_next_seq != i_cur_seq )
     {
-        intf_ErrMsg( "spudec error: index mismatch (0x%.4x != 0x%.4x)",
-                     i_next_seq, i_cur_seq );
+        msg_Err( p_spudec->p_fifo, "index mismatch (0x%.4x != 0x%.4x)",
+                                   i_next_seq, i_cur_seq );
         return( 1 );
     }
 
     if( i_index > p_spudec->i_spu_size )
     {
-        intf_ErrMsg( "spudec error: uh-oh, we went too far (0x%.4x > 0x%.4x)",
-                     i_index, p_spudec->i_spu_size );
+        msg_Err( p_spudec->p_fifo, "uh-oh, we went too far (0x%.4x > 0x%.4x)",
+                                   i_index, p_spudec->i_spu_size );
         return( 1 );
     }
 
     if( !p_spu->i_start )
     {
-        intf_ErrMsg( "spudec error: no `start display' command" );
+        msg_Err( p_spudec->p_fifo, "no `start display' command" );
     }
 
     if( !p_spu->i_stop )
@@ -569,9 +571,9 @@ static int ParseControlSequences( spudec_thread_t *p_spudec,
         /* More than one padding byte - this is very strange, but
          * we can deal with it */
         default:
-            intf_WarnMsg( 2, "spudec warning: %i padding bytes, we usually "
-                             "get 0 or 1 of them",
-                          p_spudec->i_spu_size - i_index );
+            msg_Warn( p_spudec->p_fifo,
+                      "%i padding bytes, we usually get 0 or 1 of them",
+                      p_spudec->i_spu_size - i_index );
 
             while( i_index < p_spudec->i_spu_size )
             {
@@ -584,9 +586,9 @@ static int ParseControlSequences( spudec_thread_t *p_spudec,
 
     if( b_force_display )
     {
-        intf_ErrMsg( "spudec: \"force display\" command" );
-        intf_ErrMsg( "spudec: send mail to <sam@zoy.org> if you "
-                     "want to help debugging this" );
+        msg_Err( p_spudec->p_fifo, "\"force display\" command" );
+        msg_Err( p_spudec->p_fifo, "send mail to <sam@zoy.org> if you "
+                                   "want to help debugging this" );
     }
 
     /* Successfully parsed ! */
@@ -609,20 +611,24 @@ static int ParseRLE( spudec_thread_t *p_spudec,
     unsigned int i_height = p_spu->i_height;
     unsigned int i_x, i_y;
 
-    u16 *p_dest = (u16 *)p_spu->p_data;
+    u16 *p_dest = (u16 *)p_spu->p_sys->p_data;
 
     /* The subtitles are interlaced, we need two offsets */
     unsigned int  i_id = 0;                   /* Start on the even SPU layer */
     unsigned int  pi_table[ 2 ];
     unsigned int *pi_offset;
 
-    boolean_t b_empty_top = 1,
-              b_empty_bottom = 0;
+    vlc_bool_t b_empty_top = 1,
+               b_empty_bottom = 0;
     unsigned int i_skipped_top = 0,
                  i_skipped_bottom = 0;
 
-    pi_table[ 0 ] = p_spu->type.spu.i_offset[ 0 ] << 1;
-    pi_table[ 1 ] = p_spu->type.spu.i_offset[ 1 ] << 1;
+    /* Colormap statistics */
+    int i_border = -1;
+    int stats[4]; stats[0] = stats[1] = stats[2] = stats[3] = 0;
+
+    pi_table[ 0 ] = p_spu->p_sys->pi_offset[ 0 ] << 1;
+    pi_table[ 1 ] = p_spu->p_sys->pi_offset[ 1 ] << 1;
 
     for( i_y = 0 ; i_y < i_height ; i_y++ )
     {
@@ -655,8 +661,8 @@ static int ParseRLE( spudec_thread_t *p_spudec,
                             else
                             {
                                 /* We have a boo boo ! */
-                                intf_ErrMsg( "spudec error: unknown RLE code "
-                                             "0x%.4x", i_code );
+                                msg_Err( p_spudec->p_fifo, "unknown RLE code "
+                                         "0x%.4x", i_code );
                                 return( 1 );
                             }
                         }
@@ -666,13 +672,21 @@ static int ParseRLE( spudec_thread_t *p_spudec,
 
             if( ( (i_code >> 2) + i_x + i_y * i_width ) > i_height * i_width )
             {
-                intf_ErrMsg( "spudec error: out of bounds, %i at (%i,%i) is "
-                             "out of %ix%i",
-                             i_code >> 2, i_x, i_y, i_width, i_height );
+                msg_Err( p_spudec->p_fifo,
+                         "out of bounds, %i at (%i,%i) is out of %ix%i",
+                         i_code >> 2, i_x, i_y, i_width, i_height );
                 return( 1 );
             }
 
-            if( i_code == (i_width << 2) ) /* FIXME: we assume 0 is transp */
+            /* Try to find the border color */
+            if( p_spu->p_sys->pi_alpha[ i_code & 0x3 ] != 0x00 )
+            {
+                i_border = i_code & 0x3;
+                stats[i_border] += i_code >> 2;
+            }
+
+            if( (i_code >> 2) == i_width
+                 && p_spu->p_sys->pi_alpha[ i_code & 0x3 ] == 0x00 )
             {
                 if( b_empty_top )
                 {
@@ -704,8 +718,8 @@ static int ParseRLE( spudec_thread_t *p_spudec,
         /* Check that we didn't go too far */
         if( i_x > i_width )
         {
-            intf_ErrMsg( "spudec error: i_x overflowed, %i > %i",
-                         i_x, i_width );
+            msg_Err( p_spudec->p_fifo, "i_x overflowed, %i > %i",
+                                       i_x, i_width );
             return( 1 );
         }
 
@@ -722,22 +736,22 @@ static int ParseRLE( spudec_thread_t *p_spudec,
     /* We shouldn't get any padding bytes */
     if( i_y < i_height )
     {
-        intf_ErrMsg( "spudec: padding bytes found in RLE sequence" );
-        intf_ErrMsg( "spudec: send mail to <sam@zoy.org> if you "
-                     "want to help debugging this" );
+        msg_Err( p_spudec->p_fifo, "padding bytes found in RLE sequence" );
+        msg_Err( p_spudec->p_fifo, "send mail to <sam@zoy.org> if you "
+                                   "want to help debugging this" );
 
         /* Skip them just in case */
         while( i_y < i_height )
-       {
+        {
             *p_dest++ = i_width << 2;
             i_y++;
-       }
+        }
 
         return( 1 );
     }
 
-    intf_WarnMsg( 3, "spudec: valid subtitle, size: %ix%i, position: %i,%i",
-                  p_spu->i_width, p_spu->i_height, p_spu->i_x, p_spu->i_y );
+    msg_Dbg( p_spudec->p_fifo, "valid subtitle, size: %ix%i, position: %i,%i",
+             p_spu->i_width, p_spu->i_height, p_spu->i_x, p_spu->i_y );
 
     /* Crop if necessary */
     if( i_skipped_top || i_skipped_bottom )
@@ -745,10 +759,452 @@ static int ParseRLE( spudec_thread_t *p_spudec,
         p_spu->i_y += i_skipped_top;
         p_spu->i_height -= i_skipped_top + i_skipped_bottom;
 
-        intf_WarnMsg( 3, "spudec: cropped to: %ix%i, position: %i,%i",
-                      p_spu->i_width, p_spu->i_height, p_spu->i_x, p_spu->i_y );
+        msg_Dbg( p_spudec->p_fifo, "cropped to: %ix%i, position: %i,%i",
+                 p_spu->i_width, p_spu->i_height, p_spu->i_x, p_spu->i_y );
+    }
+
+    /* Handle color if no palette was found */
+    if( !p_spu->p_sys->b_palette )
+    {
+        int i, i_inner = -1, i_shade = -1;
+
+        /* Set the border color */
+        p_spu->p_sys->pi_yuv[i_border][0] = 0x00;
+        p_spu->p_sys->pi_yuv[i_border][1] = 0x80;
+        p_spu->p_sys->pi_yuv[i_border][2] = 0x80;
+        stats[i_border] = 0;
+
+        /* Find the inner colors */
+        for( i = 0 ; i < 4 && i_inner == -1 ; i++ )
+        {
+            if( stats[i] )
+            {
+                i_inner = i;
+            }
+        }
+
+        for(       ; i < 4 && i_shade == -1 ; i++ )
+        {
+            if( stats[i] )
+            {
+                if( stats[i] > stats[i_inner] )
+                {
+                    i_shade = i_inner;
+                    i_inner = i;
+                }
+                else
+                {
+                    i_shade = i;
+                }
+            }
+        }
+
+        /* Set the inner color */
+        if( i_inner != -1 )
+        {
+            p_spu->p_sys->pi_yuv[i_inner][0] = 0xff;
+            p_spu->p_sys->pi_yuv[i_inner][1] = 0x80;
+            p_spu->p_sys->pi_yuv[i_inner][2] = 0x80;
+        }
+
+        /* Set the anti-aliasing color */
+        if( i_shade != -1 )
+        {
+            p_spu->p_sys->pi_yuv[i_shade][0] = 0x80;
+            p_spu->p_sys->pi_yuv[i_shade][1] = 0x80;
+            p_spu->p_sys->pi_yuv[i_shade][2] = 0x80;
+        }
+
+        msg_Dbg( p_spudec->p_fifo,
+                 "using custom palette (border %i, inner %i, shade %i)",
+                 i_border, i_inner, i_shade );
     }
 
     return( 0 );
 }
 
+/*****************************************************************************
+ * RenderSPU: draw an SPU on a picture
+ *****************************************************************************
+ * This is a fast implementation of the subpicture drawing code. The data
+ * has been preprocessed once, so we don't need to parse the RLE buffer again
+ * and again. Most sanity checks are already done so that this routine can be
+ * as fast as possible.
+ *****************************************************************************/
+static void RenderSPU( vout_thread_t *p_vout, picture_t *p_pic,
+                       const subpicture_t *p_spu )
+{
+    /* Common variables */
+    u16  p_clut16[4];
+    u32  p_clut32[4];
+    u8  *p_dest;
+    u8  *p_destptr = (u8 *)p_dest;
+    u16 *p_source = (u16 *)p_spu->p_sys->p_data;
+
+    int i_x, i_y;
+    int i_len, i_color, i_colprecomp, i_destalpha;
+    u8  i_cnt;
+
+    /* RGB-specific */
+    int i_xscale, i_yscale, i_width, i_height, i_ytmp, i_yreal, i_ynext;
+
+    switch( p_vout->output.i_chroma )
+    {
+    /* I420 target, no scaling */
+    case VLC_FOURCC('I','4','2','0'):
+    case VLC_FOURCC('I','Y','U','V'):
+    case VLC_FOURCC('Y','V','1','2'):
+
+    p_dest = p_pic->Y_PIXELS + p_spu->i_x + p_spu->i_width
+              + p_pic->Y_PITCH * ( p_spu->i_y + p_spu->i_height );
+
+    /* Draw until we reach the bottom of the subtitle */
+    for( i_y = p_spu->i_height * p_pic->Y_PITCH ;
+         i_y ;
+         i_y -= p_pic->Y_PITCH )
+    {
+        /* Draw until we reach the end of the line */
+        for( i_x = p_spu->i_width ; i_x ; )
+        {
+            /* Get the RLE part, then draw the line */
+            i_color = *p_source & 0x3;
+            i_len = *p_source++ >> 2;
+
+            switch( p_spu->p_sys->pi_alpha[ i_color ] )
+            {
+                case 0x00:
+                    break;
+
+                case 0x0f:
+                    memset( p_dest - i_x - i_y,
+                            p_spu->p_sys->pi_yuv[i_color][0], i_len );
+                    break;
+
+                default:
+                    /* To be able to divide by 16 (>>4) we add 1 to the alpha.
+                     * This means Alpha 0 won't be completely transparent, but
+                     * that's handled in a special case above anyway. */
+                    i_colprecomp = p_spu->p_sys->pi_yuv[i_color][0]
+                                    * (p_spu->p_sys->pi_alpha[ i_color ] + 1);
+                    i_destalpha = 15 - p_spu->p_sys->pi_alpha[ i_color ];
+
+                    for ( p_destptr = p_dest - i_x - i_y;
+                          p_destptr < p_dest - i_x - i_y + i_len;
+                          p_destptr++ )
+                    {
+                        *p_destptr = ( i_colprecomp +
+                                        *p_destptr * i_destalpha ) >> 4;
+                    }
+                    break;
+
+            }
+            i_x -= i_len;
+        }
+    }
+
+    break;
+
+    /* RV16 target, scaling */
+    case VLC_FOURCC('R','V','1','6'):
+
+    /* XXX: this is a COMPLETE HACK, memcpy is unable to do u16s anyway */
+    /* FIXME: get this from the DVD */
+    for( i_color = 0; i_color < 4; i_color++ )
+    {
+        p_clut16[i_color] = 0x1111
+                             * ( (u16)p_spu->p_sys->pi_yuv[i_color][0] >> 4 );
+    }
+
+    i_xscale = ( p_vout->output.i_width << 6 ) / p_vout->render.i_width;
+    i_yscale = ( p_vout->output.i_height << 6 ) / p_vout->render.i_height;
+
+    i_width  = p_spu->i_width  * i_xscale;
+    i_height = p_spu->i_height * i_yscale;
+
+    p_dest = p_pic->p->p_pixels + ( i_width >> 6 ) * 2
+              /* Add the picture coordinates and the SPU coordinates */
+              + ( (p_spu->i_x * i_xscale) >> 6 ) * 2
+              + ( (p_spu->i_y * i_yscale) >> 6 ) * p_pic->p->i_pitch;
+
+    /* Draw until we reach the bottom of the subtitle */
+    for( i_y = 0 ; i_y < i_height ; )
+    {
+        i_ytmp = i_y >> 6;
+        i_y += i_yscale;
+
+        /* Check whether we need to draw one line or more than one */
+        if( i_ytmp + 1 >= ( i_y >> 6 ) )
+        {
+            /* Just one line : we precalculate i_y >> 6 */
+            i_yreal = p_pic->p->i_pitch * i_ytmp;
+
+            /* Draw until we reach the end of the line */
+            for( i_x = i_width ; i_x ; )
+            {
+                /* Get the RLE part, then draw the line */
+                i_color = *p_source & 0x3;
+
+                switch( p_spu->p_sys->pi_alpha[ i_color ] )
+                {
+                case 0x00:
+                    i_x -= i_xscale * ( *p_source++ >> 2 );
+                    break;
+
+                case 0x0f:
+                    i_len = i_xscale * ( *p_source++ >> 2 );
+                    memset( p_dest - 2 * ( i_x >> 6 ) + i_yreal,
+                            p_clut16[ i_color ],
+                            2 * ( ( i_len >> 6 ) + 1 ) );
+                    i_x -= i_len;
+                    break;
+
+                default:
+                    /* FIXME: we should do transparency */
+                    i_len = i_xscale * ( *p_source++ >> 2 );
+                    memset( p_dest - 2 * ( i_x >> 6 ) + i_yreal,
+                            p_clut16[ i_color ],
+                            2 * ( ( i_len >> 6 ) + 1 ) );
+                    i_x -= i_len;
+                    break;
+                }
+
+            }
+        }
+        else
+        {
+            i_yreal = p_pic->p->i_pitch * i_ytmp;
+            i_ynext = p_pic->p->i_pitch * i_y >> 6;
+
+            /* Draw until we reach the end of the line */
+            for( i_x = i_width ; i_x ; )
+            {
+                /* Get the RLE part, then draw as many lines as needed */
+                i_color = *p_source & 0x3;
+
+                switch( p_spu->p_sys->pi_alpha[ i_color ] )
+                {
+                case 0x00:
+                    i_x -= i_xscale * ( *p_source++ >> 2 );
+                    break;
+
+                case 0x0f:
+                    i_len = i_xscale * ( *p_source++ >> 2 );
+                    for( i_ytmp = i_yreal ; i_ytmp < i_ynext ;
+                         i_ytmp += p_pic->p->i_pitch )
+                    {
+                        memset( p_dest - 2 * ( i_x >> 6 ) + i_ytmp,
+                                p_clut16[ i_color ],
+                                2 * ( ( i_len >> 6 ) + 1 ) );
+                    }
+                    i_x -= i_len;
+                    break;
+
+                default:
+                    /* FIXME: we should do transparency */
+                    i_len = i_xscale * ( *p_source++ >> 2 );
+                    for( i_ytmp = i_yreal ; i_ytmp < i_ynext ;
+                         i_ytmp += p_pic->p->i_pitch )
+                    {
+                        memset( p_dest - 2 * ( i_x >> 6 ) + i_ytmp,
+                                p_clut16[ i_color ],
+                                2 * ( ( i_len >> 6 ) + 1 ) );
+                    }
+                    i_x -= i_len;
+                    break;
+                }
+            }
+        }
+    }
+
+    break;
+
+    /* RV32 target, scaling */
+    case VLC_FOURCC('R','V','2','4'):
+    case VLC_FOURCC('R','V','3','2'):
+
+    /* XXX: this is a COMPLETE HACK, memcpy is unable to do u32s anyway */
+    /* FIXME: get this from the DVD */
+    for( i_color = 0; i_color < 4; i_color++ )
+    {
+        p_clut32[i_color] = 0x11111111
+                             * ( (u16)p_spu->p_sys->pi_yuv[i_color][0] >> 4 );
+    }
+
+    i_xscale = ( p_vout->output.i_width << 6 ) / p_vout->render.i_width;
+    i_yscale = ( p_vout->output.i_height << 6 ) / p_vout->render.i_height;
+
+    i_width  = p_spu->i_width  * i_xscale;
+    i_height = p_spu->i_height * i_yscale;
+
+    p_dest = p_pic->p->p_pixels + ( i_width >> 6 ) * 4
+              /* Add the picture coordinates and the SPU coordinates */
+              + ( (p_spu->i_x * i_xscale) >> 6 ) * 4
+              + ( (p_spu->i_y * i_yscale) >> 6 ) * p_pic->p->i_pitch;
+
+    /* Draw until we reach the bottom of the subtitle */
+    for( i_y = 0 ; i_y < i_height ; )
+    {
+        i_ytmp = i_y >> 6;
+        i_y += i_yscale;
+
+        /* Check whether we need to draw one line or more than one */
+        if( i_ytmp + 1 >= ( i_y >> 6 ) )
+        {
+            /* Just one line : we precalculate i_y >> 6 */
+            i_yreal = p_pic->p->i_pitch * i_ytmp;
+
+            /* Draw until we reach the end of the line */
+            for( i_x = i_width ; i_x ; )
+            {
+                /* Get the RLE part, then draw the line */
+                i_color = *p_source & 0x3;
+
+                switch( p_spu->p_sys->pi_alpha[ i_color ] )
+                {
+                case 0x00:
+                    i_x -= i_xscale * ( *p_source++ >> 2 );
+                    break;
+
+                case 0x0f:
+                    i_len = i_xscale * ( *p_source++ >> 2 );
+                    memset( p_dest - 4 * ( i_x >> 6 ) + i_yreal,
+                            p_clut32[ i_color ], 4 * ( ( i_len >> 6 ) + 1 ) );
+                    i_x -= i_len;
+                    break;
+
+                default:
+                    /* FIXME: we should do transparency */
+                    i_len = i_xscale * ( *p_source++ >> 2 );
+                    memset( p_dest - 4 * ( i_x >> 6 ) + i_yreal,
+                            p_clut32[ i_color ], 4 * ( ( i_len >> 6 ) + 1 ) );
+                    i_x -= i_len;
+                    break;
+                }
+
+            }
+        }
+        else
+        {
+            i_yreal = p_pic->p->i_pitch * i_ytmp;
+            i_ynext = p_pic->p->i_pitch * i_y >> 6;
+
+            /* Draw until we reach the end of the line */
+            for( i_x = i_width ; i_x ; )
+            {
+                /* Get the RLE part, then draw as many lines as needed */
+                i_color = *p_source & 0x3;
+
+                switch( p_spu->p_sys->pi_alpha[ i_color ] )
+                {
+                case 0x00:
+                    i_x -= i_xscale * ( *p_source++ >> 2 );
+                    break;
+
+                case 0x0f:
+                    i_len = i_xscale * ( *p_source++ >> 2 );
+                    for( i_ytmp = i_yreal ; i_ytmp < i_ynext ;
+                         i_ytmp += p_pic->p->i_pitch )
+                    {
+                        memset( p_dest - 4 * ( i_x >> 6 ) + i_ytmp,
+                                p_clut32[ i_color ],
+                                4 * ( ( i_len >> 6 ) + 1 ) );
+                    }
+                    i_x -= i_len;
+                    break;
+
+                default:
+                    /* FIXME: we should do transparency */
+                    i_len = i_xscale * ( *p_source++ >> 2 );
+                    for( i_ytmp = i_yreal ; i_ytmp < i_ynext ;
+                         i_ytmp += p_pic->p->i_pitch )
+                    {
+                        memset( p_dest - 4 * ( i_x >> 6 ) + i_ytmp,
+                                p_clut32[ i_color ],
+                                4 * ( ( i_len >> 6 ) + 1 ) );
+                    }
+                    i_x -= i_len;
+                    break;
+                }
+            }
+        }
+    }
+
+    break;
+
+    /* NVidia overlay, no scaling */
+    case VLC_FOURCC('Y','U','Y','2'):
+
+    p_dest = p_pic->p->p_pixels +
+              (p_spu->i_x + p_spu->i_width +
+               p_vout->output.i_width * ( p_spu->i_y + p_spu->i_height )) * 2;
+    /* Draw until we reach the bottom of the subtitle */
+    for( i_y = p_spu->i_height * p_vout->output.i_width;
+         i_y ;
+         i_y -= p_vout->output.i_width )
+    {
+        /* Draw until we reach the end of the line */
+        for( i_x = p_spu->i_width ; i_x ; )
+        {
+            /* Get the RLE part, then draw the line */
+            i_color = *p_source & 0x3;
+
+            switch( p_spu->p_sys->pi_alpha[ i_color ] )
+            {
+            case 0x00:
+                i_x -= *p_source++ >> 2;
+                break;
+
+            case 0x0f:
+                i_len = *p_source++ >> 2;
+                for( i_cnt = 0; i_cnt < i_len; i_cnt++ )
+                {
+                    /* draw a pixel */
+                    /* Y */
+                    memset( p_dest - i_x * 2 - i_y * 2 + i_cnt * 2,
+                            p_spu->p_sys->pi_yuv[i_color][0], 1);
+
+                    if (!(i_cnt & 0x01))
+                    {
+                        /* U and V */
+                        memset( p_dest - i_x * 2 - i_y * 2 + i_cnt * 2 + 1,
+                                0x80, 1);
+                        memset( p_dest - i_x * 2 - i_y * 2 + i_cnt * 2 + 3,
+                                0x80, 1);
+                    }
+                }
+                i_x -= i_len;
+                break;
+
+            default:
+                /* FIXME: we should do transparency */
+                i_len = *p_source++ >> 2;
+                for( i_cnt = 0; i_cnt < i_len; i_cnt++ )
+                {
+                    /* draw a pixel */
+                    /* Y */
+                    memset( p_dest - i_x * 2 - i_y * 2 + i_cnt * 2,
+                            p_spu->p_sys->pi_yuv[i_color][0], 1);
+
+                    if (!(i_cnt & 0x01))
+                    {
+                        /* U and V */
+                        memset( p_dest - i_x * 2 - i_y * 2 + i_cnt * 2 + 1,
+                                0x80, 1);
+                        memset( p_dest - i_x * 2 - i_y * 2 + i_cnt * 2 + 3,
+                                0x80, 1);
+                    }
+                }
+                i_x -= i_len;
+                break;
+            }
+        }
+    }
+
+    break;
+
+
+    default:
+        msg_Err( p_vout, "unknown chroma, can't render SPU" );
+        break;
+    }
+}