]> git.sesse.net Git - vlc/blobdiff - modules/codec/subsdec.c
Use I420 image buffers instead of YUVA for mosaic bridge. Impact on CPU usage for...
[vlc] / modules / codec / subsdec.c
index 05fc3c1f927f1b4c50b7a4b8d2eb97c1367b35ae..e4d66b309b4da2f6a841093c2b7a0a4731e3c5e4 100644 (file)
@@ -7,6 +7,7 @@
  * Authors: Gildas Bazin <gbazin@videolan.org>
  *          Samuel Hocevar <sam@zoy.org>
  *          Derk-Jan Hartman <hartman at videolan dot org>
+ *          Bernie Purcell <b dot purcell at adbglobal dot com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * Preamble
  *****************************************************************************/
 #include <vlc/vlc.h>
-#include <vlc/vout.h>
-#include <vlc/decoder.h>
+#include <vlc_vout.h>
+#include <vlc_codec.h>
 
-#include "vlc_osd.h"
-#include "vlc_filter.h"
-#include "charset.h"
+#include <vlc_osd.h>
+#include <vlc_filter.h>
+#include <vlc_charset.h>
+#include <vlc_stream.h>
+#include <vlc_xml.h>
+#include <errno.h>
+#include <string.h>
 
 typedef struct
 {
@@ -68,9 +73,13 @@ static void CloseDecoder  ( vlc_object_t * );
 static subpicture_t *DecodeBlock   ( decoder_t *, block_t ** );
 static subpicture_t *ParseText     ( decoder_t *, block_t * );
 static void         ParseSSAHeader ( decoder_t * );
+static void         ParseUSFHeader ( decoder_t * );
+static void         ParseUSFHeaderTags( decoder_sys_t *, xml_reader_t * );
 static void         ParseSSAString ( decoder_t *, char *, subpicture_t * );
+static void         ParseUSFString ( decoder_t *, char *, subpicture_t * );
 static void         ParseColor     ( decoder_t *, char *, int *, int * );
-static void         StripTags      ( char * );
+static char        *StripTags      ( char * );
+static char        *CreateHtmlSubtitle ( char * );
 
 #define DEFAULT_NAME "Default"
 #define MAX_LINE 8192
@@ -173,6 +182,7 @@ static int OpenDecoder( vlc_object_t *p_this )
     vlc_value_t    val;
 
     if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','u','b','t') &&
+        p_dec->fmt_in.i_codec != VLC_FOURCC('u','s','f',' ') &&
         p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') )
     {
         return VLC_EGENERIC;
@@ -182,7 +192,7 @@ static int OpenDecoder( vlc_object_t *p_this )
 
     /* Allocate the memory needed to store the decoder's structure */
     if( ( p_dec->p_sys = p_sys =
-          (decoder_sys_t *)malloc(sizeof(decoder_sys_t)) ) == NULL )
+          (decoder_sys_t *)calloc(1, sizeof(decoder_sys_t)) ) == NULL )
     {
         msg_Err( p_dec, "out of memory" );
         return VLC_ENOMEM;
@@ -198,44 +208,65 @@ static int OpenDecoder( vlc_object_t *p_this )
     p_sys->pp_ssa_styles = NULL;
     p_sys->i_ssa_styles = 0;
 
+    char *psz_charset = NULL;
+    /* First try demux-specified encoding */
     if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
     {
-        msg_Dbg( p_dec, "using demux suggested character encoding: %s",
-                 p_dec->fmt_in.subs.psz_encoding );
-        if( strcmp( p_dec->fmt_in.subs.psz_encoding, "UTF-8" ) )
-            p_sys->iconv_handle = vlc_iconv_open( "UTF-8", p_dec->fmt_in.subs.psz_encoding );
+        psz_charset = strdup (p_dec->fmt_in.subs.psz_encoding);
+        msg_Dbg (p_dec, "trying demuxer-specified character encoding: %s",
+                 p_dec->fmt_in.subs.psz_encoding ?: "not specified");
     }
-    else
+
+    /* Second, try configured encoding */
+    if (psz_charset == NULL)
     {
-        var_Create( p_dec, "subsdec-encoding",
-                    VLC_VAR_STRING | VLC_VAR_DOINHERIT );
-        var_Get( p_dec, "subsdec-encoding", &val );
-        if( !strcmp( val.psz_string, DEFAULT_NAME ) )
+        psz_charset = var_CreateGetNonEmptyString (p_dec, "subsdec-encoding");
+        if ((psz_charset != NULL) && !strcasecmp (psz_charset, DEFAULT_NAME))
         {
-            const char *psz_charset = GetFallbackEncoding();
+            free (psz_charset);
+            psz_charset = NULL;
+        }
 
-            p_sys->b_autodetect_utf8 = var_CreateGetBool( p_dec,
-                    "subsdec-autodetect-utf8" );
+        msg_Dbg (p_dec, "trying configured character encoding: %s",
+                 psz_charset ?: "not specified");
+    }
 
-            p_sys->iconv_handle = vlc_iconv_open( "UTF-8", psz_charset );
-            msg_Dbg( p_dec, "using fallback character encoding: %s", psz_charset );
-        }
-        else if( !strcmp( val.psz_string, "UTF-8" ) )
-        {
-            msg_Dbg( p_dec, "using enforced character encoding: UTF-8" );
-        }
-        else if( val.psz_string )
+    /* Third, try "local" encoding with optional UTF-8 autodetection */
+    if (psz_charset == NULL)
+    {
+        psz_charset = strdup (GetFallbackEncoding ());
+        msg_Dbg (p_dec, "trying default character encoding: %s",
+                 psz_charset ?: "not specified");
+
+        if (var_CreateGetBool (p_dec, "subsdec-autodetect-utf8"))
         {
-            msg_Dbg( p_dec, "using enforced character encoding: %s", val.psz_string );
-            p_sys->iconv_handle = vlc_iconv_open( "UTF-8", val.psz_string );
-            if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
-            {
-                msg_Warn( p_dec, "unable to do requested conversion" );
-            }
+            msg_Dbg (p_dec, "using automatic UTF-8 detection");
+            p_sys->b_autodetect_utf8 = VLC_TRUE;
         }
-        if( val.psz_string ) free( val.psz_string );
     }
 
+    if (psz_charset == NULL)
+    {
+        psz_charset = strdup ("UTF-8");
+        msg_Dbg (p_dec, "trying hard-coded character encoding: %s",
+                 psz_charset ?: "error");
+    }
+
+    if (psz_charset == NULL)
+    {
+        free (p_sys);
+        return VLC_ENOMEM;
+    }
+
+    if (strcasecmp (psz_charset, "UTF-8") && strcasecmp (psz_charset, "utf8"))
+    {
+        p_sys->iconv_handle = vlc_iconv_open ("UTF-8", psz_charset);
+        if (p_sys->iconv_handle == (vlc_iconv_t)(-1))
+            msg_Err (p_dec, "cannot convert from %s: %s", psz_charset,
+                     strerror (errno));
+    }
+    free (psz_charset);
+
     var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
     var_Get( p_dec, "subsdec-align", &val );
     p_sys->i_align = val.i_int;
@@ -245,6 +276,11 @@ static int OpenDecoder( vlc_object_t *p_this )
         if( p_dec->fmt_in.i_extra > 0 )
             ParseSSAHeader( p_dec );
     }
+    else if( p_dec->fmt_in.i_codec == VLC_FOURCC('u','s','f',' ') && var_CreateGetBool( p_dec, "subsdec-formatted" ) )
+    {
+        if( p_dec->fmt_in.i_extra > 0 )
+            ParseUSFHeader( p_dec );
+    }
 
     return VLC_SUCCESS;
 }
@@ -331,7 +367,14 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
         return NULL;
 
     if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
-        EnsureUTF8( psz_subtitle );
+    {
+        if (EnsureUTF8( psz_subtitle ) == NULL)
+        {
+            msg_Err( p_dec, _("failed to convert subtitle encoding.\n"
+                     "Try manually setting a character-encoding "
+                     "before you open the file.") );
+        }
+    }
     else
     {
 
@@ -401,7 +444,8 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
     }
 
     /* Decode and format the subpicture unit */
-    if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') )
+    if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') &&
+        p_dec->fmt_in.i_codec != VLC_FOURCC('u','s','f',' ') )
     {
         /* Normal text subs, easy markup */
         p_spu->i_flags = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
@@ -409,9 +453,10 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
         p_spu->i_y = 10;
 
         /* Remove formatting from string */
-        StripTags( psz_subtitle );
 
-        p_spu->p_region->psz_text = psz_subtitle;
+        p_spu->p_region->psz_text = StripTags( psz_subtitle );
+        p_spu->p_region->psz_html = CreateHtmlSubtitle( psz_subtitle );
+
         p_spu->i_start = p_block->i_pts;
         p_spu->i_stop = p_block->i_pts + p_block->i_length;
         p_spu->b_ephemer = (p_block->i_length == 0);
@@ -419,19 +464,128 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
     }
     else
     {
-        /* Decode SSA strings */
-        ParseSSAString( p_dec, psz_subtitle, p_spu );
+        /* Decode SSA/USF strings */
+        if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') )
+            ParseSSAString( p_dec, psz_subtitle, p_spu );
+        else
+            ParseUSFString( p_dec, psz_subtitle, p_spu );
+
         p_spu->i_start = p_block->i_pts;
         p_spu->i_stop = p_block->i_pts + p_block->i_length;
         p_spu->b_ephemer = (p_block->i_length == 0);
         p_spu->b_absolute = VLC_FALSE;
         p_spu->i_original_picture_width = p_sys->i_original_width;
         p_spu->i_original_picture_height = p_sys->i_original_height;
-        if( psz_subtitle ) free( psz_subtitle );
     }
+    if( psz_subtitle ) free( psz_subtitle );
+
     return p_spu;
 }
 
+static void ParseUSFString( decoder_t *p_dec, char *psz_subtitle, subpicture_t *p_spu_in )
+{
+    decoder_sys_t   *p_sys = p_dec->p_sys;
+    subpicture_t    *p_spu = p_spu_in;
+    char            *psz_text;
+    char            *psz_text_start;
+    ssa_style_t     *p_style = NULL;
+    int              i;
+
+    /* Create a text only copy of the subtitle (for legacy implementations) and copy
+     * the rich html version across as is - for parsing by a rendering engine capable
+     * of understanding it.
+     */
+    p_spu->p_region->psz_text = NULL;
+    p_spu->p_region->psz_html = strdup( psz_subtitle );
+
+    for( i = 0; i < p_sys->i_ssa_styles; i++ )
+    {
+        if( !strcasecmp( p_sys->pp_ssa_styles[i]->psz_stylename, "Default" ) )
+            p_style = p_sys->pp_ssa_styles[i];
+    }
+
+    /* The StripTags() function doesn't do everything we need (eg. <br/> tag )
+     * so do it here ourselves.
+     */
+    psz_text_start = malloc( strlen( psz_subtitle ));
+
+    psz_text = psz_text_start;
+    while( *psz_subtitle )
+    {
+        if( *psz_subtitle == '<' )
+        {
+            if( !strncasecmp( psz_subtitle, "<br/>", 5 ))
+                *psz_text++ = '\n';
+            else if( !strncasecmp( psz_subtitle, "<text ", 6 ))
+            {
+                char *psz_style = strcasestr( psz_subtitle, "style=\"" );
+
+                if( psz_style && ( psz_style < strchr( psz_subtitle, '>' ) ))
+                {
+                    int i_len;
+
+                    psz_style += strspn( psz_style, "\"" ) + 1;
+                    i_len = strcspn( psz_style, "\"" );
+
+                    psz_style[ i_len ] = '\0';
+
+                    for( i = 0; i < p_sys->i_ssa_styles; i++ )
+                    {
+                        if( !strcmp( p_sys->pp_ssa_styles[i]->psz_stylename, psz_style ) )
+                            p_style = p_sys->pp_ssa_styles[i];
+                    }
+
+                    psz_style[ i_len ] = '\"';
+                }
+            }
+            
+            psz_subtitle += strcspn( psz_subtitle, ">" );
+        }
+        else if( *psz_subtitle == '&' )
+        {
+            if( !strncasecmp( psz_subtitle, "&lt;", 4 ))
+                *psz_text++ = '<';
+            else if( !strncasecmp( psz_subtitle, "&gt;", 4 ))
+                *psz_text++ = '>';
+            else if( !strncasecmp( psz_subtitle, "&amp;", 5 ))
+                *psz_text++ = '&';
+
+            psz_subtitle += strcspn( psz_subtitle, ";" );
+        }
+        else if( ( *psz_subtitle == '\t' ) ||
+                 ( *psz_subtitle == '\r' ) ||
+                 ( *psz_subtitle == '\n' ) ||
+                 ( *psz_subtitle == ' ' ) )
+        {
+            if( ( psz_text_start < psz_text ) &&
+                ( *(psz_text-1) != ' ' ) )
+            {
+                *psz_text++ = ' ';
+            }
+        }
+        else
+            *psz_text++ = *psz_subtitle;
+
+        psz_subtitle++;
+    }
+    *psz_text = '\0';
+    p_spu->p_region->psz_text = strdup( psz_text_start );
+    free( psz_text_start );
+
+    if( p_style == NULL )
+    {
+        p_spu->i_flags = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
+        p_spu->i_x = p_sys->i_align ? 20 : 0;
+        p_spu->i_y = 10;
+    }
+    else
+    {
+        msg_Dbg( p_dec, "style is: %s", p_style->psz_stylename);
+        p_spu->p_region->p_style = &p_style->font_style;
+        p_spu->i_flags = p_style->i_align;
+    }
+}
+
 static void ParseSSAString( decoder_t *p_dec, char *psz_subtitle, subpicture_t *p_spu_in )
 {
     /* We expect MKV formatted SSA:
@@ -450,6 +604,8 @@ static void ParseSSAString( decoder_t *p_dec, char *psz_subtitle, subpicture_t *
 
     psz_buffer_sub = psz_subtitle;
 
+    p_spu->p_region->psz_html = NULL;
+
     i_comma = 0;
     while( i_comma < 8 && *psz_buffer_sub != '\0' )
     {
@@ -584,6 +740,275 @@ static void ParseColor( decoder_t *p_dec, char *psz_color, int *pi_color, int *p
         *pi_alpha = ( i_color & 0xFF000000 ) >> 24;
 }
 
+/*****************************************************************************
+ * ParseUSFHeader: Retrieve global formatting information etc
+ *****************************************************************************/
+static void ParseUSFHeader( decoder_t *p_dec )
+{
+    decoder_sys_t *p_sys = p_dec->p_sys;
+    stream_t      *p_sub = NULL;
+    xml_t         *p_xml = NULL;
+    xml_reader_t  *p_xml_reader = NULL;
+
+    p_sub = stream_MemoryNew( VLC_OBJECT(p_dec),
+                              p_dec->fmt_in.p_extra,
+                              p_dec->fmt_in.i_extra,
+                              VLC_TRUE );
+    if( p_sub )
+    {
+        p_xml = xml_Create( p_dec );
+        if( p_xml )
+        {
+            p_xml_reader = xml_ReaderCreate( p_xml, p_sub );
+            if( p_xml_reader )
+            {
+                /* Look for Root Node */
+                if( xml_ReaderRead( p_xml_reader ) == 1 )
+                {
+                    char *psz_node = xml_ReaderName( p_xml_reader );
+
+                    if( !strcasecmp( "usfsubtitles", psz_node ) )
+                        ParseUSFHeaderTags( p_sys, p_xml_reader );
+
+                    free( psz_node );
+                }
+
+                xml_ReaderDelete( p_xml, p_xml_reader );
+            }
+            xml_Delete( p_xml );
+        }
+        stream_Delete( p_sub );
+    }
+}
+
+static void ParseUSFHeaderTags( decoder_sys_t *p_sys, xml_reader_t *p_xml_reader )
+{
+    char *psz_node;
+    ssa_style_t *p_style = NULL;
+    int i_style_level = 0;
+    int i_metadata_level = 0;
+
+    while ( xml_ReaderRead( p_xml_reader ) == 1 )
+    {
+        switch ( xml_ReaderNodeType( p_xml_reader ) )
+        {
+            case XML_READER_TEXT:
+            case XML_READER_NONE:
+                break;
+            case XML_READER_ENDELEM:
+                psz_node = xml_ReaderName( p_xml_reader );
+                
+                if( psz_node )
+                {
+                    switch (i_style_level)
+                    {
+                        case 0:
+                            if( !strcasecmp( "metadata", psz_node ) && (i_metadata_level == 1) )
+                            {
+                                i_metadata_level--;
+                            }
+                            break;
+                        case 1:
+                            if( !strcasecmp( "styles", psz_node ) )
+                            {
+                                i_style_level--;
+                            }
+                            break;
+                        case 2:
+                            if( !strcasecmp( "style", psz_node ) )
+                            {
+                                p_style->font_style.i_text_align = p_style->i_align;
+
+                                TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style );
+
+                                p_style = NULL;
+                                i_style_level--;
+                            }
+                            break;
+                    }
+                    
+                    free( psz_node );
+                }
+                break;
+            case XML_READER_STARTELEM:
+                psz_node = xml_ReaderName( p_xml_reader );
+
+                if( psz_node )
+                {
+                    if( !strcasecmp( "metadata", psz_node ) && (i_style_level == 0) )
+                    {
+                        i_metadata_level++;
+                    }
+                    else if( !strcasecmp( "resolution", psz_node ) && (i_metadata_level == 1) )
+                    {
+                        while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
+                        {
+                            char *psz_name = xml_ReaderName ( p_xml_reader );
+                            char *psz_value = xml_ReaderValue ( p_xml_reader );
+
+                            if( psz_name && psz_value )
+                            {
+                                if( !strcasecmp( "x", psz_name ) )
+                                    p_sys->i_original_width = atoi( psz_value );
+                                else if( !strcasecmp( "y", psz_name ) )
+                                    p_sys->i_original_height = atoi( psz_value );
+                            }
+                            if( psz_name )  free( psz_name );
+                            if( psz_value ) free( psz_value );
+                        }
+                    }
+                    else if( !strcasecmp( "styles", psz_node ) && (i_style_level == 0) )
+                    {
+                        i_style_level++;
+                    }
+                    else if( !strcasecmp( "style", psz_node ) && (i_style_level == 1) )
+                    {
+                        i_style_level++;
+
+                        p_style = calloc( 1, sizeof(ssa_style_t) );
+
+                        while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
+                        {
+                            char *psz_name = xml_ReaderName ( p_xml_reader );
+                            char *psz_value = xml_ReaderValue ( p_xml_reader );
+
+                            if( psz_name && psz_value )
+                            {
+                                if( !strcasecmp( "name", psz_name ) )
+                                    p_style->psz_stylename = strdup( psz_value);
+                            }
+                            if( psz_name )  free( psz_name );
+                            if( psz_value ) free( psz_value );
+                        }
+                    }
+                    else if( !strcasecmp( "fontstyle", psz_node ) && (i_style_level == 2) )
+                    {
+                        while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
+                        {
+                            char *psz_name = xml_ReaderName ( p_xml_reader );
+                            char *psz_value = xml_ReaderValue ( p_xml_reader );
+
+                            if( psz_name && psz_value )
+                            {
+                                if( !strcasecmp( "face", psz_name ) )
+                                    p_style->font_style.psz_fontname = strdup( psz_value);
+                                else if( !strcasecmp( "size", psz_name ) )
+                                    p_style->font_style.i_font_size = atoi( psz_value);
+                                else if( !strcasecmp( "italic", psz_name ) )
+                                {
+                                    if( !strcasecmp( "yes", psz_value ))
+                                        p_style->font_style.i_style_flags |= STYLE_ITALIC;
+                                }
+                                else if( !strcasecmp( "weight", psz_name ) )
+                                {
+                                    if( !strcasecmp( "bold", psz_value ))
+                                        p_style->font_style.i_style_flags |= STYLE_BOLD;
+                                }
+                                else if( !strcasecmp( "underline", psz_name ) )
+                                {
+                                    if( !strcasecmp( "yes", psz_value ))
+                                        p_style->font_style.i_style_flags |= STYLE_UNDERLINE;
+                                }
+                                else if( !strcasecmp( "color", psz_name ) )
+                                {
+                                    if( *psz_value == '#' )
+                                    {
+                                        unsigned long col = strtol(psz_value+1, NULL, 16);
+                                        p_style->font_style.i_font_color = (col & 0x00ffffff);
+                                        /* From DTD: <!-- alpha range = 0..100 --> */
+                                        p_style->font_style.i_font_alpha = ((col >> 24) & 0xff) * 255 / 100;
+                                    }
+                                }
+                                else if( !strcasecmp( "outline-color", psz_name ) )
+                                {
+                                    if( *psz_value == '#' )
+                                    {
+                                        unsigned long col = strtol(psz_value+1, NULL, 16);
+                                        p_style->font_style.i_outline_color = (col & 0x00ffffff);
+                                        /* From DTD: <!-- alpha range = 0..100 --> */
+                                        p_style->font_style.i_outline_alpha = ((col >> 24) & 0xff) * 255 / 100;
+                                    }
+                                } 
+                                else if( !strcasecmp( "shadow-color", psz_name ) )
+                                {
+                                    if( *psz_value == '#' )
+                                    {
+                                        unsigned long col = strtol(psz_value+1, NULL, 16);
+                                        p_style->font_style.i_shadow_color = (col & 0x00ffffff);
+                                        /* From DTD: <!-- alpha range = 0..100 --> */
+                                        p_style->font_style.i_shadow_alpha = ((col >> 24) & 0xff) * 255 / 100;
+                                    }
+                                }
+                            }
+                            if( psz_name )  free( psz_name );
+                            if( psz_value ) free( psz_value );
+                        }
+                    }
+                    else if( !strcasecmp( "position", psz_node ) && (i_style_level == 2) )
+                    {
+                        while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
+                        {
+                            char *psz_name = xml_ReaderName ( p_xml_reader );
+                            char *psz_value = xml_ReaderValue ( p_xml_reader );
+
+                            if( psz_name && psz_value )
+                            {
+                                if( !strcasecmp( "alignment", psz_name ) )
+                                {
+                                    if( !strcasecmp( "TopLeft", psz_value ) )
+                                    {
+                                        p_style->i_align |= SUBPICTURE_ALIGN_TOP;
+                                        p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
+                                    }
+                                    else if( !strcasecmp( "TopCenter", psz_value ) )
+                                    {
+                                        p_style->i_align |= SUBPICTURE_ALIGN_TOP;
+                                    }
+                                    else if( !strcasecmp( "TopRight", psz_value ) )
+                                    {
+                                        p_style->i_align |= SUBPICTURE_ALIGN_TOP;
+                                        p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
+                                    }
+                                    else if( !strcasecmp( "MiddleLeft", psz_value ) )
+                                    {
+                                        p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
+                                    }
+                                    else if( !strcasecmp( "MiddleCenter", psz_value ) )
+                                    {
+                                        p_style->i_align = 0;
+                                    }
+                                    else if( !strcasecmp( "MiddleRight", psz_value ) )
+                                    {
+                                        p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
+                                    }
+                                    else if( !strcasecmp( "BottomLeft", psz_value ) )
+                                    {
+                                        p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
+                                        p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
+                                    }
+                                    else if( !strcasecmp( "BottomCenter", psz_value ) )
+                                    {
+                                        p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
+                                    }
+                                    else if( !strcasecmp( "BottomRight", psz_value ) )
+                                    {
+                                        p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
+                                        p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
+                                    }
+                                }
+                            }
+                            if( psz_name )  free( psz_name );
+                            if( psz_value ) free( psz_value );
+                        }
+                    }
+                    
+                    free( psz_node );
+                }
+                break;
+        }
+    }
+    if( p_style ) free( p_style );
+}
 /*****************************************************************************
  * ParseSSAHeader: Retrieve global formatting information etc
  *****************************************************************************/
@@ -743,45 +1168,293 @@ eof:
     return;
 }
 
-static void StripTags( char *psz_text )
+/* Function now handles tags which has attribute values, and tries
+ * to deal with &' commands too. It no longer modifies the string
+ * in place, so that the original text can be reused
+ */
+static char *StripTags( char *psz_subtitle )
 {
-    int i_left_moves = 0;
-    vlc_bool_t b_inside_tag = VLC_FALSE;
-    int i = 0;
-    int i_tag_start = -1;
-    while( psz_text[ i ] )
+    char *psz_text_start;
+
+    psz_text_start = malloc( strlen( psz_subtitle ) + 1 );
+
+    if( psz_text_start != NULL )
     {
-        if( !b_inside_tag )
+        char *psz_text = psz_text_start;
+
+        while( *psz_subtitle )
         {
-            if( psz_text[ i ] == '<' )
+            if( *psz_subtitle == '<' )
+            {
+                psz_subtitle += strcspn( psz_subtitle, ">" );
+            }
+            else if( *psz_subtitle == '&' )
+            {
+                if( !strncasecmp( psz_subtitle, "&lt;", 4 ))
+                {
+                    *psz_text++ = '<';
+                    psz_subtitle += strcspn( psz_subtitle, ";" );
+                }
+                else if( !strncasecmp( psz_subtitle, "&gt;", 4 ))
+                {
+                    *psz_text++ = '>';
+                    psz_subtitle += strcspn( psz_subtitle, ";" );
+                }
+                else if( !strncasecmp( psz_subtitle, "&amp;", 5 ))
+                {
+                    *psz_text++ = '&';
+                    psz_subtitle += strcspn( psz_subtitle, ";" );
+                }
+                else
+                {
+                    /* Assume it is just a normal ampersand */
+                    *psz_text++ = '&';
+                }
+            }
+            else
             {
-                b_inside_tag = VLC_TRUE;
-                i_tag_start = i;
+                *psz_text++ = *psz_subtitle;
             }
-            psz_text[ i - i_left_moves ] = psz_text[ i ];
+
+            psz_subtitle++;
         }
-        else
+        *psz_text = '\0';
+        psz_text_start = realloc( psz_text_start, strlen( psz_text_start ) + 1 );
+    }
+    return psz_text_start;
+}
+
+/* Try to respect any style tags present in the subtitle string. The main
+ * problem here is a lack of adequate specs for the subtitle formats.
+ * SSA/ASS and USF are both detail spec'ed -- but they are handled elsewhere.
+ * SAMI has a detailed spec, but extensive rework is needed in the demux
+ * code to prevent all this style information being excised, as it presently
+ * does.
+ * That leaves the others - none of which were (I guess) originally intended
+ * to be carrying style information. Over time people have used them that way.
+ * In the absence of specifications from which to work, the tags supported
+ * have been restricted to the simple set permitted by the USF DTD, ie. :
+ *  Basic: <br>, <i>, <b>, <u>
+ *  Extended: <font>
+ *    Attributes: face
+ *                family
+ *                size
+ *                color
+ *                outline-color
+ *                shadow-color
+ *                outline-level
+ *                shadow-level
+ *                back-color
+ *                alpha
+ * There is also the further restriction that the subtitle be well-formed
+ * as an XML entity, ie. the HTML sentence:
+ *        <b><i>Bold and Italics</b></i>
+ * doesn't qualify because the tags aren't nested one inside the other.
+ * <text> tags are automatically added to the output to ensure
+ * well-formedness.
+ * If the text doesn't qualify for any reason, a NULL string is
+ * returned, and the rendering engine will fall back to the
+ * plain text version of the subtitle.
+ */
+static char *CreateHtmlSubtitle( char *psz_subtitle )
+{
+    char    psz_tagStack[ 100 ];
+    size_t  i_buf_size     = strlen( psz_subtitle ) + 100;
+    char   *psz_html_start = malloc( i_buf_size );
+
+    psz_tagStack[ 0 ] = '\0';
+
+    if( psz_html_start != NULL )
+    {
+        char *psz_html = psz_html_start;
+
+        strcpy( psz_html, "<text>" );
+        psz_html += 6;
+
+        while( *psz_subtitle )
         {
-            if( ( psz_text[ i ] == ' ' ) ||
-                ( psz_text[ i ] == '\t' ) ||
-                ( psz_text[ i ] == '\n' ) ||
-                ( psz_text[ i ] == '\r' ) )
+            if( *psz_subtitle == '\n' )
             {
-                b_inside_tag = VLC_FALSE;
-                i_tag_start = -1;
+                strcpy( psz_html, "<br/>" );
+                psz_html += 5;
+                psz_subtitle++;
             }
-            else if( psz_text[ i ] == '>' )
+            else if( *psz_subtitle == '<' )
             {
-                i_left_moves += i - i_tag_start + 1;
-                i_tag_start = -1;
-                b_inside_tag = VLC_FALSE;
+                if( !strncasecmp( psz_subtitle, "<br/>", 5 ))
+                {
+                    strcpy( psz_html, "<br/>" );
+                    psz_html += 5;
+                    psz_subtitle += 5;
+                }
+                else if( !strncasecmp( psz_subtitle, "<b>", 3 ) )
+                {
+                    strcpy( psz_html, "<b>" );
+                    strcat( psz_tagStack, "b" );
+                    psz_html += 3;
+                    psz_subtitle += 3;
+                }
+                else if( !strncasecmp( psz_subtitle, "<i>", 3 ) )
+                {
+                    strcpy( psz_html, "<i>" );
+                    strcat( psz_tagStack, "i" );
+                    psz_html += 3;
+                    psz_subtitle += 3;
+                }
+                else if( !strncasecmp( psz_subtitle, "<u>", 3 ) )
+                {
+                    strcpy( psz_html, "<u>" );
+                    strcat( psz_tagStack, "u" );
+                    psz_html += 3;
+                    psz_subtitle += 3;
+                }
+                else if( !strncasecmp( psz_subtitle, "<font ", 6 ))
+                {
+                    const char *psz_attribs[] = { "face=\"", "family=\"", "size=\"",
+                            "color=\"", "outline-color=\"", "shadow-color=\"",
+                            "outline-level=\"", "shadow-level=\"", "back-color=\"",
+                            "alpha=\"", NULL };
+
+                    strcpy( psz_html, "<font " );
+                    strcat( psz_tagStack, "f" );
+                    psz_html += 6;
+                    psz_subtitle += 6;
+
+                    while( *psz_subtitle != '>' )
+                    {
+                        int  k;
+
+                        for( k=0; psz_attribs[ k ]; k++ )
+                        {
+                            int i_len = strlen( psz_attribs[ k ] );
+
+                            if( !strncasecmp( psz_subtitle, psz_attribs[ k ], i_len )) 
+                            {
+                                i_len += strcspn( psz_subtitle + i_len, "\"" ) + 1;
+
+                                strncpy( psz_html, psz_subtitle, i_len );
+                                psz_html += i_len;
+                                psz_subtitle += i_len;
+                                break;
+                            }
+                        }
+                        if( psz_attribs[ k ] == NULL )
+                        {
+                            /* Jump over unrecognised tag */
+                            int i_len = strcspn( psz_subtitle, "\"" ) + 1;
+
+                            i_len += strcspn( psz_subtitle + i_len, "\"" ) + 1;
+                            psz_subtitle += i_len;
+                        }
+                        while (*psz_subtitle == ' ')
+                            *psz_html++ = *psz_subtitle++;
+                    }
+                    *psz_html++ = *psz_subtitle++;
+                }
+                else if( !strncmp( psz_subtitle, "</", 2 ))
+                {
+                    vlc_bool_t  b_match     = VLC_FALSE;
+                    int         i_len       = strlen( psz_tagStack ) - 1;
+                    char       *psz_lastTag = NULL;
+
+                    if( i_len >= 0 )
+                    {
+                        psz_lastTag = psz_tagStack + i_len;
+                        i_len = 0;
+
+                        switch( *psz_lastTag )
+                        {
+                            case 'b':
+                                b_match = !strncasecmp( psz_subtitle, "</b>", 4 );
+                                i_len   = 4;
+                                break;
+                            case 'i':
+                                b_match = !strncasecmp( psz_subtitle, "</i>", 4 );
+                                i_len   = 4;
+                                break;
+                            case 'u':
+                                b_match = !strncasecmp( psz_subtitle, "</u>", 4 );
+                                i_len   = 4;
+                                break;
+                            case 'f':
+                                b_match = !strncasecmp( psz_subtitle, "</font>", 7 );
+                                i_len   = 7;
+                                break;
+                        }
+                    }
+                    if( ! b_match )
+                    {
+                        /* Not well formed -- kill everything */
+                        free( psz_html_start );
+                        psz_html_start = NULL;
+                        break;
+                    }
+                    *psz_lastTag = '\0';
+                    strncpy( psz_html, psz_subtitle, i_len );
+                    psz_html += i_len;
+                    psz_subtitle += i_len;
+                }
+                else
+                {
+                    psz_subtitle += strcspn( psz_subtitle, ">" );
+                }
+            }
+            else if( *psz_subtitle == '&' )
+            {
+                if( !strncasecmp( psz_subtitle, "&lt;", 4 ))
+                {
+                    strcpy( psz_html, "&lt;" );
+                    psz_html += 4;
+                    psz_subtitle += 4;
+                }
+                else if( !strncasecmp( psz_subtitle, "&gt;", 4 ))
+                {
+                    strcpy( psz_html, "&gt;" );
+                    psz_html += 4;
+                    psz_subtitle += 4;
+                }
+                else if( !strncasecmp( psz_subtitle, "&amp;", 5 ))
+                {
+                    strcpy( psz_html, "&amp;" );
+                    psz_html += 5;
+                    psz_subtitle += 5;
+                }
+                else
+                {
+                    strcpy( psz_html, "&amp;" );
+                    psz_html += 5;
+                    psz_subtitle++;
+                }
             }
             else
             {
-                psz_text[ i - i_left_moves ] = psz_text[ i ];
+                *psz_html++ = *psz_subtitle++;
+            }
+
+            if( ( size_t )( psz_html - psz_html_start ) > i_buf_size - 10 )
+            {
+                int i_len = psz_html - psz_html_start;
+
+                i_buf_size += 100;
+                psz_html_start = realloc( psz_html_start, i_buf_size );
+                psz_html = psz_html_start + i_len;
+                *psz_html = '\0';
             }
         }
-        i++;
+        strcpy( psz_html, "</text>" );
+        psz_html += 7;
+
+        if( psz_tagStack[ 0 ] != '\0' )
+        {
+            /* Not well formed -- kill everything */
+            free( psz_html_start );
+            psz_html_start = NULL;
+        }
+        else if( psz_html_start )
+        {
+            /* Shrink the memory requirements */
+            psz_html_start = realloc( psz_html_start,  psz_html - psz_html_start + 1 );
+        }
     }
-    psz_text[ i - i_left_moves ] = '\0';
+    return psz_html_start;
 }