]> git.sesse.net Git - vlc/blob - modules/codec/subsdec.c
Patch by Bernie Purcell :
[vlc] / modules / codec / subsdec.c
1 /*****************************************************************************
2  * subsdec.c : text subtitles decoder
3  *****************************************************************************
4  * Copyright (C) 2000-2006 the VideoLAN team
5  * $Id$
6  *
7  * Authors: Gildas Bazin <gbazin@videolan.org>
8  *          Samuel Hocevar <sam@zoy.org>
9  *          Derk-Jan Hartman <hartman at videolan dot org>
10  *          Bernie Purcell <b dot purcell at adbglobal dot com>
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
25  *****************************************************************************/
26
27 /*****************************************************************************
28  * Preamble
29  *****************************************************************************/
30 #include <vlc/vlc.h>
31 #include <vlc_vout.h>
32 #include <vlc_codec.h>
33
34 #include <vlc_osd.h>
35 #include <vlc_filter.h>
36 #include <vlc_charset.h>
37 #include <vlc_stream.h>
38 #include <vlc_xml.h>
39 #include <errno.h>
40 #include <string.h>
41
42 typedef struct
43 {
44     char *          psz_stylename; /* The name of the style, no comma's allowed */
45     text_style_t    font_style;
46     int             i_align;
47     int             i_margin_h;
48     int             i_margin_v;
49 }  ssa_style_t;
50
51 /*****************************************************************************
52  * decoder_sys_t : decoder descriptor
53  *****************************************************************************/
54 struct decoder_sys_t
55 {
56     vlc_bool_t          b_ass;                           /* The subs are ASS */
57     int                 i_original_height;
58     int                 i_original_width;
59     int                 i_align;          /* Subtitles alignment on the vout */
60     vlc_iconv_t         iconv_handle;            /* handle to iconv instance */
61     vlc_bool_t          b_autodetect_utf8;
62
63     ssa_style_t         **pp_ssa_styles;
64     int                 i_ssa_styles;
65 };
66
67 /*****************************************************************************
68  * Local prototypes
69  *****************************************************************************/
70 static int  OpenDecoder   ( vlc_object_t * );
71 static void CloseDecoder  ( vlc_object_t * );
72
73 static subpicture_t *DecodeBlock   ( decoder_t *, block_t ** );
74 static subpicture_t *ParseText     ( decoder_t *, block_t * );
75 static void         ParseSSAHeader ( decoder_t * );
76 static void         ParseUSFHeader ( decoder_t * );
77 static void         ParseUSFHeaderTags( decoder_sys_t *, xml_reader_t * );
78 static void         ParseSSAString ( decoder_t *, char *, subpicture_t * );
79 static void         ParseUSFString ( decoder_t *, char *, subpicture_t * );
80 static void         ParseColor     ( decoder_t *, char *, int *, int * );
81 static char        *StripTags      ( char * );
82 static char        *CreateHtmlSubtitle ( char * );
83
84 #define DEFAULT_NAME "Default"
85 #define MAX_LINE 8192
86
87 /*****************************************************************************
88  * Module descriptor.
89  *****************************************************************************/
90 static const char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "",
91     "ISO-8859-1", "CP1252", "MacRoman", "MacIceland","ISO-8859-15", "",
92     "ISO-8859-2", "CP1250", "MacCentralEurope", "MacCroatian", "MacRomania", "",
93     "ISO-8859-5", "CP1251", "MacCyrillic", "MacUkraine", "KOI8-R", "KOI8-U", "KOI8-RU", "",
94     "ISO-8859-6", "CP1256", "MacArabic", "",
95     "ISO-8859-7", "CP1253", "MacGreek", "",
96     "ISO-8859-8", "CP1255", "MacHebrew", "",
97     "ISO-8859-9", "CP1254", "MacTurkish", "",
98     "ISO-8859-13", "CP1257", "",
99     "ISO-2022-JP", "ISO-2022-JP-1", "ISO-2022-JP-2", "EUC-JP", "SHIFT_JIS", "",
100     "ISO-2022-CN", "ISO-2022-CN-EXT", "EUC-CN", "EUC-TW", "BIG5", "BIG5-HKSCS", "",
101     "ISO-2022-KR", "EUC-KR", "",
102     "MacThai", "KOI8-T", "",
103     "ISO-8859-3", "ISO-8859-4", "ISO-8859-10", "ISO-8859-14", "ISO-8859-16", "",
104     "CP850", "CP862", "CP866", "CP874", "CP932", "CP949", "CP950", "CP1133", "CP1258", "",
105     "Macintosh", "",
106     "UTF-7", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE",
107     "C99", "JAVA", "UCS-2", "UCS-2BE", "UCS-2LE", "UCS-4", "UCS-4BE", "UCS-4LE", "",
108     "HZ", "GBK", "GB18030", "JOHAB", "ARMSCII-8",
109     "Georgian-Academy", "Georgian-PS", "TIS-620", "MuleLao-1", "VISCII", "TCVN",
110     "HPROMAN8", "NEXTSTEP" };
111 /*
112 SSA supports charset selection.
113 The following known charsets are used:
114
115 0 = Ansi - Western European
116 1 = default
117 2 = symbol
118 3 = invalid
119 77 = Mac
120 128 = Japanese (Shift JIS)
121 129 = Hangul
122 130 = Johab
123 134 = GB2312 Simplified Chinese
124 136 = Big5 Traditional Chinese
125 161 = Greek
126 162 = Turkish
127 163 = Vietnamese
128 177 = Hebrew
129 178 = Arabic
130 186 = Baltic
131 204 = Russian (Cyrillic)
132 222 = Thai
133 238 = Eastern European
134 254 = PC 437
135 */
136
137 static int  pi_justification[] = { 0, 1, 2 };
138 static const char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")};
139
140 #define ENCODING_TEXT N_("Subtitles text encoding")
141 #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles")
142 #define ALIGN_TEXT N_("Subtitles justification")
143 #define ALIGN_LONGTEXT N_("Set the justification of subtitles")
144 #define AUTODETECT_UTF8_TEXT N_("UTF-8 subtitles autodetection")
145 #define AUTODETECT_UTF8_LONGTEXT N_("This enables automatic detection of " \
146             "UTF-8 encoding within subtitles files.")
147 #define FORMAT_TEXT N_("Formatted Subtitles")
148 #define FORMAT_LONGTEXT N_("Some subtitle formats allow for text formatting. " \
149  "VLC partly implements this, but you can choose to disable all formatting.")
150
151
152 vlc_module_begin();
153     set_shortname( _("Subtitles"));
154     set_description( _("Text subtitles decoder") );
155     set_capability( "decoder", 50 );
156     set_callbacks( OpenDecoder, CloseDecoder );
157     set_category( CAT_INPUT );
158     set_subcategory( SUBCAT_INPUT_SCODEC );
159
160     add_integer( "subsdec-align", 0, NULL, ALIGN_TEXT, ALIGN_LONGTEXT,
161                  VLC_FALSE );
162         change_integer_list( pi_justification, ppsz_justification_text, 0 );
163     add_string( "subsdec-encoding", DEFAULT_NAME, NULL,
164                 ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE );
165         change_string_list( ppsz_encodings, 0, 0 );
166     add_bool( "subsdec-autodetect-utf8", VLC_TRUE, NULL,
167               AUTODETECT_UTF8_TEXT, AUTODETECT_UTF8_LONGTEXT, VLC_FALSE );
168     add_bool( "subsdec-formatted", VLC_TRUE, NULL, FORMAT_TEXT, FORMAT_LONGTEXT,
169                  VLC_FALSE );
170 vlc_module_end();
171
172 /*****************************************************************************
173  * OpenDecoder: probe the decoder and return score
174  *****************************************************************************
175  * Tries to launch a decoder and return score so that the interface is able
176  * to chose.
177  *****************************************************************************/
178 static int OpenDecoder( vlc_object_t *p_this )
179 {
180     decoder_t     *p_dec = (decoder_t*)p_this;
181     decoder_sys_t *p_sys;
182     vlc_value_t    val;
183
184     if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','u','b','t') &&
185         p_dec->fmt_in.i_codec != VLC_FOURCC('u','s','f',' ') &&
186         p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') )
187     {
188         return VLC_EGENERIC;
189     }
190
191     p_dec->pf_decode_sub = DecodeBlock;
192
193     /* Allocate the memory needed to store the decoder's structure */
194     if( ( p_dec->p_sys = p_sys =
195           (decoder_sys_t *)calloc(1, sizeof(decoder_sys_t)) ) == NULL )
196     {
197         msg_Err( p_dec, "out of memory" );
198         return VLC_ENOMEM;
199     }
200
201     /* init of p_sys */
202     p_sys->i_align = 0;
203     p_sys->iconv_handle = (vlc_iconv_t)-1;
204     p_sys->b_autodetect_utf8 = VLC_FALSE;
205     p_sys->b_ass = VLC_FALSE;
206     p_sys->i_original_height = -1;
207     p_sys->i_original_width = -1;
208     p_sys->pp_ssa_styles = NULL;
209     p_sys->i_ssa_styles = 0;
210
211     char *psz_charset = NULL;
212     /* First try demux-specified encoding */
213     if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
214     {
215         psz_charset = strdup (p_dec->fmt_in.subs.psz_encoding);
216         msg_Dbg (p_dec, "trying demuxer-specified character encoding: %s",
217                  p_dec->fmt_in.subs.psz_encoding ?: "not specified");
218     }
219
220     /* Second, try configured encoding */
221     if (psz_charset == NULL)
222     {
223         psz_charset = var_CreateGetNonEmptyString (p_dec, "subsdec-encoding");
224         if ((psz_charset != NULL) && !strcasecmp (psz_charset, DEFAULT_NAME))
225         {
226             free (psz_charset);
227             psz_charset = NULL;
228         }
229
230         msg_Dbg (p_dec, "trying configured character encoding: %s",
231                  psz_charset ?: "not specified");
232     }
233
234     /* Third, try "local" encoding with optional UTF-8 autodetection */
235     if (psz_charset == NULL)
236     {
237         psz_charset = strdup (GetFallbackEncoding ());
238         msg_Dbg (p_dec, "trying default character encoding: %s",
239                  psz_charset ?: "not specified");
240
241         if (var_CreateGetBool (p_dec, "subsdec-autodetect-utf8"))
242         {
243             msg_Dbg (p_dec, "using automatic UTF-8 detection");
244             p_sys->b_autodetect_utf8 = VLC_TRUE;
245         }
246     }
247
248     if (psz_charset == NULL)
249     {
250         psz_charset = strdup ("UTF-8");
251         msg_Dbg (p_dec, "trying hard-coded character encoding: %s",
252                  psz_charset ?: "error");
253     }
254
255     if (psz_charset == NULL)
256     {
257         free (p_sys);
258         return VLC_ENOMEM;
259     }
260
261     if (strcasecmp (psz_charset, "UTF-8") && strcasecmp (psz_charset, "utf8"))
262     {
263         p_sys->iconv_handle = vlc_iconv_open ("UTF-8", psz_charset);
264         if (p_sys->iconv_handle == (vlc_iconv_t)(-1))
265             msg_Err (p_dec, "cannot convert from %s: %s", psz_charset,
266                      strerror (errno));
267     }
268     free (psz_charset);
269
270     var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
271     var_Get( p_dec, "subsdec-align", &val );
272     p_sys->i_align = val.i_int;
273
274     if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') && var_CreateGetBool( p_dec, "subsdec-formatted" ) )
275     {
276         if( p_dec->fmt_in.i_extra > 0 )
277             ParseSSAHeader( p_dec );
278     }
279     else if( p_dec->fmt_in.i_codec == VLC_FOURCC('u','s','f',' ') && var_CreateGetBool( p_dec, "subsdec-formatted" ) )
280     {
281         if( p_dec->fmt_in.i_extra > 0 )
282             ParseUSFHeader( p_dec );
283     }
284
285     return VLC_SUCCESS;
286 }
287
288 /****************************************************************************
289  * DecodeBlock: the whole thing
290  ****************************************************************************
291  * This function must be fed with complete subtitles units.
292  ****************************************************************************/
293 static subpicture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block )
294 {
295     subpicture_t *p_spu = NULL;
296
297     if( !pp_block || *pp_block == NULL ) return NULL;
298
299     p_spu = ParseText( p_dec, *pp_block );
300
301     block_Release( *pp_block );
302     *pp_block = NULL;
303
304     return p_spu;
305 }
306
307 /*****************************************************************************
308  * CloseDecoder: clean up the decoder
309  *****************************************************************************/
310 static void CloseDecoder( vlc_object_t *p_this )
311 {
312     decoder_t *p_dec = (decoder_t *)p_this;
313     decoder_sys_t *p_sys = p_dec->p_sys;
314
315     if( p_sys->iconv_handle != (vlc_iconv_t)-1 )
316     {
317         vlc_iconv_close( p_sys->iconv_handle );
318     }
319
320     if( p_sys->pp_ssa_styles )
321     {
322         int i;
323         for( i = 0; i < p_sys->i_ssa_styles; i++ )
324         {
325             if( p_sys->pp_ssa_styles[i]->psz_stylename ) free( p_sys->pp_ssa_styles[i]->psz_stylename );
326             p_sys->pp_ssa_styles[i]->psz_stylename = NULL;
327             if( p_sys->pp_ssa_styles[i]->font_style.psz_fontname ) free( p_sys->pp_ssa_styles[i]->font_style.psz_fontname );
328             p_sys->pp_ssa_styles[i]->font_style.psz_fontname = NULL;
329             if( p_sys->pp_ssa_styles[i] ) free( p_sys->pp_ssa_styles[i] ); p_sys->pp_ssa_styles[i] = NULL;
330         }
331         free( p_sys->pp_ssa_styles ); p_sys->pp_ssa_styles = NULL;
332     }
333
334     free( p_sys );
335 }
336
337 /*****************************************************************************
338  * ParseText: parse an text subtitle packet and send it to the video output
339  *****************************************************************************/
340 static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
341 {
342     decoder_sys_t *p_sys = p_dec->p_sys;
343     subpicture_t *p_spu = NULL;
344     char *psz_subtitle = NULL;
345     video_format_t fmt;
346
347     /* We cannot display a subpicture with no date */
348     if( p_block->i_pts == 0 )
349     {
350         msg_Warn( p_dec, "subtitle without a date" );
351         return NULL;
352     }
353
354     /* Check validity of packet data */
355     /* An "empty" line containing only \0 can be used to force
356        and ephemer picture from the screen */
357     if( p_block->i_buffer < 1 )
358     {
359         msg_Warn( p_dec, "no subtitle data" );
360         return NULL;
361     }
362
363     /* Should be resiliant against bad subtitles */
364     psz_subtitle = strndup( (const char *)p_block->p_buffer,
365                             p_block->i_buffer );
366     if( psz_subtitle == NULL )
367         return NULL;
368
369     if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
370     {
371         if (EnsureUTF8( psz_subtitle ) == NULL)
372         {
373             msg_Err( p_dec, _("failed to convert subtitle encoding.\n"
374                      "Try manually setting a character-encoding "
375                      "before you open the file.") );
376         }
377     }
378     else
379     {
380
381         if( p_sys->b_autodetect_utf8 )
382         {
383             if( IsUTF8( psz_subtitle ) == NULL )
384             {
385                 msg_Dbg( p_dec, "invalid UTF-8 sequence: "
386                          "disabling UTF-8 subtitles autodetection" );
387                 p_sys->b_autodetect_utf8 = VLC_FALSE;
388             }
389         }
390
391         if( !p_sys->b_autodetect_utf8 )
392         {
393             size_t inbytes_left = strlen( psz_subtitle );
394             size_t outbytes_left = 6 * inbytes_left;
395             char *psz_new_subtitle = malloc( outbytes_left + 1 );
396             char *psz_convert_buffer_out = psz_new_subtitle;
397             const char *psz_convert_buffer_in = psz_subtitle;
398
399             size_t ret = vlc_iconv( p_sys->iconv_handle,
400                                     &psz_convert_buffer_in, &inbytes_left,
401                                     &psz_convert_buffer_out, &outbytes_left );
402
403             *psz_convert_buffer_out++ = '\0';
404             free( psz_subtitle );
405
406             if( ( ret == (size_t)(-1) ) || inbytes_left )
407             {
408                 free( psz_new_subtitle );
409                 msg_Err( p_dec, _("failed to convert subtitle encoding.\n"
410                         "Try manually setting a character-encoding "
411                                 "before you open the file.") );
412                 return NULL;
413             }
414
415             psz_subtitle = realloc( psz_new_subtitle,
416                                     psz_convert_buffer_out - psz_new_subtitle );
417         }
418     }
419
420     /* Create the subpicture unit */
421     p_spu = p_dec->pf_spu_buffer_new( p_dec );
422     if( !p_spu )
423     {
424         msg_Warn( p_dec, "can't get spu buffer" );
425         if( psz_subtitle ) free( psz_subtitle );
426         return NULL;
427     }
428
429     p_spu->b_pausable = VLC_TRUE;
430
431     /* Create a new subpicture region */
432     memset( &fmt, 0, sizeof(video_format_t) );
433     fmt.i_chroma = VLC_FOURCC('T','E','X','T');
434     fmt.i_aspect = 0;
435     fmt.i_width = fmt.i_height = 0;
436     fmt.i_x_offset = fmt.i_y_offset = 0;
437     p_spu->p_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt );
438     if( !p_spu->p_region )
439     {
440         msg_Err( p_dec, "cannot allocate SPU region" );
441         if( psz_subtitle ) free( psz_subtitle );
442         p_dec->pf_spu_buffer_del( p_dec, p_spu );
443         return NULL;
444     }
445
446     /* Decode and format the subpicture unit */
447     if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') &&
448         p_dec->fmt_in.i_codec != VLC_FOURCC('u','s','f',' ') )
449     {
450         /* Normal text subs, easy markup */
451         p_spu->p_region->i_align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
452         p_spu->i_x = p_sys->i_align ? 20 : 0;
453         p_spu->i_y = 10;
454
455         /* Remove formatting from string */
456
457         p_spu->p_region->psz_text = StripTags( psz_subtitle );
458         p_spu->p_region->psz_html = CreateHtmlSubtitle( psz_subtitle );
459
460         p_spu->i_start = p_block->i_pts;
461         p_spu->i_stop = p_block->i_pts + p_block->i_length;
462         p_spu->b_ephemer = (p_block->i_length == 0);
463         p_spu->b_absolute = VLC_FALSE;
464     }
465     else
466     {
467         /* Decode SSA/USF strings */
468         if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') )
469             ParseSSAString( p_dec, psz_subtitle, p_spu );
470         else
471             ParseUSFString( p_dec, psz_subtitle, p_spu );
472
473         p_spu->i_start = p_block->i_pts;
474         p_spu->i_stop = p_block->i_pts + p_block->i_length;
475         p_spu->b_ephemer = (p_block->i_length == 0);
476         p_spu->b_absolute = VLC_FALSE;
477         p_spu->i_original_picture_width = p_sys->i_original_width;
478         p_spu->i_original_picture_height = p_sys->i_original_height;
479     }
480     if( psz_subtitle ) free( psz_subtitle );
481
482     return p_spu;
483 }
484
485 static void ParseUSFString( decoder_t *p_dec, char *psz_subtitle, subpicture_t *p_spu_in )
486 {
487     decoder_sys_t   *p_sys = p_dec->p_sys;
488     subpicture_t    *p_spu = p_spu_in;
489     char            *psz_text;
490     char            *psz_text_start;
491     ssa_style_t     *p_style = NULL;
492     int              i;
493
494     /* Create a text only copy of the subtitle (for legacy implementations) and copy
495      * the rich html version across as is - for parsing by a rendering engine capable
496      * of understanding it.
497      */
498     p_spu->p_region->psz_text = NULL;
499     p_spu->p_region->psz_html = strdup( psz_subtitle );
500
501     for( i = 0; i < p_sys->i_ssa_styles; i++ )
502     {
503         if( !strcasecmp( p_sys->pp_ssa_styles[i]->psz_stylename, "Default" ) )
504             p_style = p_sys->pp_ssa_styles[i];
505     }
506
507     /* The StripTags() function doesn't do everything we need (eg. <br/> tag )
508      * so do it here ourselves.
509      */
510     psz_text_start = malloc( strlen( psz_subtitle ));
511
512     psz_text = psz_text_start;
513     while( *psz_subtitle )
514     {
515         if( *psz_subtitle == '<' )
516         {
517             if( !strncasecmp( psz_subtitle, "<br/>", 5 ))
518                 *psz_text++ = '\n';
519             else if( !strncasecmp( psz_subtitle, "<text ", 6 ))
520             {
521                 char *psz_style = strcasestr( psz_subtitle, "style=\"" );
522
523                 if( psz_style && ( psz_style < strchr( psz_subtitle, '>' ) ))
524                 {
525                     int i_len;
526
527                     psz_style += strspn( psz_style, "\"" ) + 1;
528                     i_len = strcspn( psz_style, "\"" );
529
530                     psz_style[ i_len ] = '\0';
531
532                     for( i = 0; i < p_sys->i_ssa_styles; i++ )
533                     {
534                         if( !strcmp( p_sys->pp_ssa_styles[i]->psz_stylename, psz_style ) )
535                             p_style = p_sys->pp_ssa_styles[i];
536                     }
537
538                     psz_style[ i_len ] = '\"';
539                 }
540             }
541             
542             psz_subtitle += strcspn( psz_subtitle, ">" );
543         }
544         else if( *psz_subtitle == '&' )
545         {
546             if( !strncasecmp( psz_subtitle, "&lt;", 4 ))
547                 *psz_text++ = '<';
548             else if( !strncasecmp( psz_subtitle, "&gt;", 4 ))
549                 *psz_text++ = '>';
550             else if( !strncasecmp( psz_subtitle, "&amp;", 5 ))
551                 *psz_text++ = '&';
552
553             psz_subtitle += strcspn( psz_subtitle, ";" );
554         }
555         else if( ( *psz_subtitle == '\t' ) ||
556                  ( *psz_subtitle == '\r' ) ||
557                  ( *psz_subtitle == '\n' ) ||
558                  ( *psz_subtitle == ' ' ) )
559         {
560             if( ( psz_text_start < psz_text ) &&
561                 ( *(psz_text-1) != ' ' ) )
562             {
563                 *psz_text++ = ' ';
564             }
565         }
566         else
567             *psz_text++ = *psz_subtitle;
568
569         psz_subtitle++;
570     }
571     *psz_text = '\0';
572     p_spu->p_region->psz_text = strdup( psz_text_start );
573     free( psz_text_start );
574
575     if( p_style == NULL )
576     {
577         p_spu->p_region->i_align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
578         p_spu->i_x = p_sys->i_align ? 20 : 0;
579         p_spu->i_y = 10;
580     }
581     else
582     {
583         msg_Dbg( p_dec, "style is: %s", p_style->psz_stylename);
584         p_spu->p_region->p_style = &p_style->font_style;
585         p_spu->p_region->i_align = p_style->i_align;
586     }
587 }
588
589 static void ParseSSAString( decoder_t *p_dec, char *psz_subtitle, subpicture_t *p_spu_in )
590 {
591     /* We expect MKV formatted SSA:
592      * ReadOrder, Layer, Style, CharacterName, MarginL, MarginR,
593      * MarginV, Effect, Text */
594     decoder_sys_t   *p_sys = p_dec->p_sys;
595     subpicture_t    *p_spu = p_spu_in;
596     ssa_style_t     *p_style = NULL;
597     char            *psz_new_subtitle = NULL;
598     char            *psz_buffer_sub = NULL;
599     char            *psz_style = NULL;
600     char            *psz_style_start = NULL;
601     char            *psz_style_end = NULL;
602     int             i_text = 0, i_comma = 0, i_strlen = 0, i;
603     int             i_margin_l = 0, i_margin_r = 0, i_margin_v = 0;
604
605     psz_buffer_sub = psz_subtitle;
606
607     p_spu->p_region->psz_html = NULL;
608
609     i_comma = 0;
610     while( i_comma < 8 && *psz_buffer_sub != '\0' )
611     {
612         if( *psz_buffer_sub == ',' )
613         {
614             i_comma++;
615             if( i_comma == 2 ) psz_style_start = &psz_buffer_sub[1];
616             if( i_comma == 3 ) psz_style_end = &psz_buffer_sub[0];
617             if( i_comma == 4 ) i_margin_l = (int)strtol( psz_buffer_sub+1, NULL, 10 );
618             if( i_comma == 5 ) i_margin_r = (int)strtol( psz_buffer_sub+1, NULL, 10 );
619             if( i_comma == 6 ) i_margin_v = (int)strtol( psz_buffer_sub+1, NULL, 10 );
620         }
621         psz_buffer_sub++;
622     }
623
624     if( *psz_buffer_sub == '\0' && i_comma == 8 )
625     {
626         msg_Dbg( p_dec, "couldn't find all fields in this SSA line" );
627         return;
628     }
629
630     psz_new_subtitle = malloc( strlen( psz_buffer_sub ) + 1);
631     i_text = 0;
632     while( psz_buffer_sub[0] != '\0' )
633     {
634         if( psz_buffer_sub[0] == '\\' && psz_buffer_sub[1] == 'n' )
635         {
636             psz_new_subtitle[i_text] = ' ';
637             i_text++;
638             psz_buffer_sub += 2;
639         }
640         else if( psz_buffer_sub[0] == '\\' && psz_buffer_sub[1] == 'N' )
641         {
642             psz_new_subtitle[i_text] = '\n';
643             i_text++;
644             psz_buffer_sub += 2;
645         }
646         else if( psz_buffer_sub[0] == '{' &&
647                  psz_buffer_sub[1] == '\\' )
648         {
649             /* SSA control code */
650             while( psz_buffer_sub[0] != '\0' &&
651                    psz_buffer_sub[0] != '}' )
652             {
653                 psz_buffer_sub++;
654             }
655             psz_buffer_sub++;
656         }
657         else
658         {
659             psz_new_subtitle[i_text] = psz_buffer_sub[0];
660             i_text++;
661             psz_buffer_sub++;
662         }
663     }
664     psz_new_subtitle[i_text] = '\0';
665
666     i_strlen = __MAX( psz_style_end - psz_style_start, 0);
667     psz_style = (char *)malloc( i_strlen + 1);
668     psz_style = memcpy( psz_style, psz_style_start, i_strlen );
669     psz_style[i_strlen] = '\0';
670
671     for( i = 0; i < p_sys->i_ssa_styles; i++ )
672     {
673         if( !strcmp( p_sys->pp_ssa_styles[i]->psz_stylename, psz_style ) )
674             p_style = p_sys->pp_ssa_styles[i];
675     }
676     if( psz_style ) free( psz_style );
677
678     p_spu->p_region->psz_text = psz_new_subtitle;
679     if( p_style == NULL )
680     {
681         p_spu->p_region->i_align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
682         p_spu->i_x = p_sys->i_align ? 20 : 0;
683         p_spu->i_y = 10;
684     }
685     else
686     {
687         msg_Dbg( p_dec, "style is: %s", p_style->psz_stylename);
688         p_spu->p_region->p_style = &p_style->font_style;
689         p_spu->p_region->i_align = p_style->i_align;
690         if( p_style->i_align & SUBPICTURE_ALIGN_LEFT )
691         {
692             p_spu->i_x = (i_margin_l) ? i_margin_l : p_style->i_margin_h;
693         }
694         else if( p_style->i_align & SUBPICTURE_ALIGN_RIGHT ) 
695         {
696             p_spu->i_x = (i_margin_r) ? i_margin_r : p_style->i_margin_h;
697         }
698         p_spu->i_y = (i_margin_v) ? i_margin_v : p_style->i_margin_v;
699     }
700 }
701
702 static char* GotoNextLine( char *psz_text )
703 {
704     char *p_newline = psz_text;
705
706     while( p_newline[0] != '\0' )
707     {
708         if( p_newline[0] == '\n' || p_newline[0] == '\r' )
709         {
710             p_newline++;
711             while( p_newline[0] == '\n' || p_newline[0] == '\r' )
712                 p_newline++;
713             break;
714         }
715         else p_newline++;
716     }
717     return p_newline;
718 }
719
720 /*****************************************************************************
721  * ParseColor: SSA stores color in BBGGRR, in ASS it uses AABBGGRR
722  * The string value in the string can be a pure integer, or hexadecimal &HBBGGRR
723  *****************************************************************************/
724 static void ParseColor( decoder_t *p_dec, char *psz_color, int *pi_color, int *pi_alpha )
725 {
726     int i_color = 0;
727     if( !strncasecmp( psz_color, "&H", 2 ) )
728     {
729         /* textual HEX representation */
730         i_color = (int) strtol( psz_color+2, NULL, 16 );
731     }
732     else i_color = (int) strtol( psz_color, NULL, 0 );
733
734     *pi_color = 0;
735     *pi_color |= ( ( i_color & 0x000000FF ) << 16 ); /* Red */
736     *pi_color |= ( ( i_color & 0x0000FF00 ) );       /* Green */
737     *pi_color |= ( ( i_color & 0x00FF0000 ) >> 16 ); /* Blue */
738
739     if( pi_alpha != NULL )
740         *pi_alpha = ( i_color & 0xFF000000 ) >> 24;
741 }
742
743 /*****************************************************************************
744  * ParseUSFHeader: Retrieve global formatting information etc
745  *****************************************************************************/
746 static void ParseUSFHeader( decoder_t *p_dec )
747 {
748     decoder_sys_t *p_sys = p_dec->p_sys;
749     stream_t      *p_sub = NULL;
750     xml_t         *p_xml = NULL;
751     xml_reader_t  *p_xml_reader = NULL;
752
753     p_sub = stream_MemoryNew( VLC_OBJECT(p_dec),
754                               p_dec->fmt_in.p_extra,
755                               p_dec->fmt_in.i_extra,
756                               VLC_TRUE );
757     if( p_sub )
758     {
759         p_xml = xml_Create( p_dec );
760         if( p_xml )
761         {
762             p_xml_reader = xml_ReaderCreate( p_xml, p_sub );
763             if( p_xml_reader )
764             {
765                 /* Look for Root Node */
766                 if( xml_ReaderRead( p_xml_reader ) == 1 )
767                 {
768                     char *psz_node = xml_ReaderName( p_xml_reader );
769
770                     if( !strcasecmp( "usfsubtitles", psz_node ) )
771                         ParseUSFHeaderTags( p_sys, p_xml_reader );
772
773                     free( psz_node );
774                 }
775
776                 xml_ReaderDelete( p_xml, p_xml_reader );
777             }
778             xml_Delete( p_xml );
779         }
780         stream_Delete( p_sub );
781     }
782 }
783
784 static void ParseUSFHeaderTags( decoder_sys_t *p_sys, xml_reader_t *p_xml_reader )
785 {
786     char *psz_node;
787     ssa_style_t *p_style = NULL;
788     int i_style_level = 0;
789     int i_metadata_level = 0;
790
791     while ( xml_ReaderRead( p_xml_reader ) == 1 )
792     {
793         switch ( xml_ReaderNodeType( p_xml_reader ) )
794         {
795             case XML_READER_TEXT:
796             case XML_READER_NONE:
797                 break;
798             case XML_READER_ENDELEM:
799                 psz_node = xml_ReaderName( p_xml_reader );
800                 
801                 if( psz_node )
802                 {
803                     switch (i_style_level)
804                     {
805                         case 0:
806                             if( !strcasecmp( "metadata", psz_node ) && (i_metadata_level == 1) )
807                             {
808                                 i_metadata_level--;
809                             }
810                             break;
811                         case 1:
812                             if( !strcasecmp( "styles", psz_node ) )
813                             {
814                                 i_style_level--;
815                             }
816                             break;
817                         case 2:
818                             if( !strcasecmp( "style", psz_node ) )
819                             {
820                                 p_style->font_style.i_text_align = p_style->i_align;
821
822                                 TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style );
823
824                                 p_style = NULL;
825                                 i_style_level--;
826                             }
827                             break;
828                     }
829                     
830                     free( psz_node );
831                 }
832                 break;
833             case XML_READER_STARTELEM:
834                 psz_node = xml_ReaderName( p_xml_reader );
835
836                 if( psz_node )
837                 {
838                     if( !strcasecmp( "metadata", psz_node ) && (i_style_level == 0) )
839                     {
840                         i_metadata_level++;
841                     }
842                     else if( !strcasecmp( "resolution", psz_node ) && (i_metadata_level == 1) )
843                     {
844                         while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
845                         {
846                             char *psz_name = xml_ReaderName ( p_xml_reader );
847                             char *psz_value = xml_ReaderValue ( p_xml_reader );
848
849                             if( psz_name && psz_value )
850                             {
851                                 if( !strcasecmp( "x", psz_name ) )
852                                     p_sys->i_original_width = atoi( psz_value );
853                                 else if( !strcasecmp( "y", psz_name ) )
854                                     p_sys->i_original_height = atoi( psz_value );
855                             }
856                             if( psz_name )  free( psz_name );
857                             if( psz_value ) free( psz_value );
858                         }
859                     }
860                     else if( !strcasecmp( "styles", psz_node ) && (i_style_level == 0) )
861                     {
862                         i_style_level++;
863                     }
864                     else if( !strcasecmp( "style", psz_node ) && (i_style_level == 1) )
865                     {
866                         i_style_level++;
867
868                         p_style = calloc( 1, sizeof(ssa_style_t) );
869
870                         while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
871                         {
872                             char *psz_name = xml_ReaderName ( p_xml_reader );
873                             char *psz_value = xml_ReaderValue ( p_xml_reader );
874
875                             if( psz_name && psz_value )
876                             {
877                                 if( !strcasecmp( "name", psz_name ) )
878                                     p_style->psz_stylename = strdup( psz_value);
879                             }
880                             if( psz_name )  free( psz_name );
881                             if( psz_value ) free( psz_value );
882                         }
883                     }
884                     else if( !strcasecmp( "fontstyle", psz_node ) && (i_style_level == 2) )
885                     {
886                         while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
887                         {
888                             char *psz_name = xml_ReaderName ( p_xml_reader );
889                             char *psz_value = xml_ReaderValue ( p_xml_reader );
890
891                             if( psz_name && psz_value )
892                             {
893                                 if( !strcasecmp( "face", psz_name ) )
894                                     p_style->font_style.psz_fontname = strdup( psz_value);
895                                 else if( !strcasecmp( "size", psz_name ) )
896                                     p_style->font_style.i_font_size = atoi( psz_value);
897                                 else if( !strcasecmp( "italic", psz_name ) )
898                                 {
899                                     if( !strcasecmp( "yes", psz_value ))
900                                         p_style->font_style.i_style_flags |= STYLE_ITALIC;
901                                 }
902                                 else if( !strcasecmp( "weight", psz_name ) )
903                                 {
904                                     if( !strcasecmp( "bold", psz_value ))
905                                         p_style->font_style.i_style_flags |= STYLE_BOLD;
906                                 }
907                                 else if( !strcasecmp( "underline", psz_name ) )
908                                 {
909                                     if( !strcasecmp( "yes", psz_value ))
910                                         p_style->font_style.i_style_flags |= STYLE_UNDERLINE;
911                                 }
912                                 else if( !strcasecmp( "color", psz_name ) )
913                                 {
914                                     if( *psz_value == '#' )
915                                     {
916                                         unsigned long col = strtol(psz_value+1, NULL, 16);
917                                         p_style->font_style.i_font_color = (col & 0x00ffffff);
918                                         /* From DTD: <!-- alpha range = 0..100 --> */
919                                         p_style->font_style.i_font_alpha = ((col >> 24) & 0xff) * 255 / 100;
920                                     }
921                                 }
922                                 else if( !strcasecmp( "outline-color", psz_name ) )
923                                 {
924                                     if( *psz_value == '#' )
925                                     {
926                                         unsigned long col = strtol(psz_value+1, NULL, 16);
927                                         p_style->font_style.i_outline_color = (col & 0x00ffffff);
928                                         /* From DTD: <!-- alpha range = 0..100 --> */
929                                         p_style->font_style.i_outline_alpha = ((col >> 24) & 0xff) * 255 / 100;
930                                     }
931                                 } 
932                                 else if( !strcasecmp( "shadow-color", psz_name ) )
933                                 {
934                                     if( *psz_value == '#' )
935                                     {
936                                         unsigned long col = strtol(psz_value+1, NULL, 16);
937                                         p_style->font_style.i_shadow_color = (col & 0x00ffffff);
938                                         /* From DTD: <!-- alpha range = 0..100 --> */
939                                         p_style->font_style.i_shadow_alpha = ((col >> 24) & 0xff) * 255 / 100;
940                                     }
941                                 }
942                             }
943                             if( psz_name )  free( psz_name );
944                             if( psz_value ) free( psz_value );
945                         }
946                     }
947                     else if( !strcasecmp( "position", psz_node ) && (i_style_level == 2) )
948                     {
949                         while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
950                         {
951                             char *psz_name = xml_ReaderName ( p_xml_reader );
952                             char *psz_value = xml_ReaderValue ( p_xml_reader );
953
954                             if( psz_name && psz_value )
955                             {
956                                 if( !strcasecmp( "alignment", psz_name ) )
957                                 {
958                                     if( !strcasecmp( "TopLeft", psz_value ) )
959                                     {
960                                         p_style->i_align |= SUBPICTURE_ALIGN_TOP;
961                                         p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
962                                     }
963                                     else if( !strcasecmp( "TopCenter", psz_value ) )
964                                     {
965                                         p_style->i_align |= SUBPICTURE_ALIGN_TOP;
966                                     }
967                                     else if( !strcasecmp( "TopRight", psz_value ) )
968                                     {
969                                         p_style->i_align |= SUBPICTURE_ALIGN_TOP;
970                                         p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
971                                     }
972                                     else if( !strcasecmp( "MiddleLeft", psz_value ) )
973                                     {
974                                         p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
975                                     }
976                                     else if( !strcasecmp( "MiddleCenter", psz_value ) )
977                                     {
978                                         p_style->i_align = 0;
979                                     }
980                                     else if( !strcasecmp( "MiddleRight", psz_value ) )
981                                     {
982                                         p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
983                                     }
984                                     else if( !strcasecmp( "BottomLeft", psz_value ) )
985                                     {
986                                         p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
987                                         p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
988                                     }
989                                     else if( !strcasecmp( "BottomCenter", psz_value ) )
990                                     {
991                                         p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
992                                     }
993                                     else if( !strcasecmp( "BottomRight", psz_value ) )
994                                     {
995                                         p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
996                                         p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
997                                     }
998                                 }
999                             }
1000                             if( psz_name )  free( psz_name );
1001                             if( psz_value ) free( psz_value );
1002                         }
1003                     }
1004                     
1005                     free( psz_node );
1006                 }
1007                 break;
1008         }
1009     }
1010     if( p_style ) free( p_style );
1011 }
1012 /*****************************************************************************
1013  * ParseSSAHeader: Retrieve global formatting information etc
1014  *****************************************************************************/
1015 static void ParseSSAHeader( decoder_t *p_dec )
1016 {
1017     decoder_sys_t *p_sys = p_dec->p_sys;
1018     char *psz_parser = NULL;
1019     char *psz_header = malloc( p_dec->fmt_in.i_extra+1 );
1020     int i_section_type = 1;
1021
1022     memcpy( psz_header, p_dec->fmt_in.p_extra, p_dec->fmt_in.i_extra );
1023     psz_header[ p_dec->fmt_in.i_extra] = '\0';
1024
1025     /* Handle [Script Info] section */
1026     psz_parser = strcasestr( psz_header, "[Script Info]" );
1027     if( psz_parser == NULL ) goto eof;
1028
1029     psz_parser = GotoNextLine( psz_parser );
1030
1031     while( psz_parser[0] != '\0' )
1032     {
1033         int temp;
1034         char buffer_text[MAX_LINE + 1];
1035
1036         if( psz_parser[0] == '!' || psz_parser[0] == ';' ) /* comment */;
1037         else if( sscanf( psz_parser, "PlayResX: %d", &temp ) == 1 )
1038             p_sys->i_original_width = ( temp > 0 ) ? temp : -1;
1039         else if( sscanf( psz_parser, "PlayResY: %d", &temp ) == 1 )
1040             p_sys->i_original_height = ( temp > 0 ) ? temp : -1;
1041         else if( sscanf( psz_parser, "Script Type: %8192s", buffer_text ) == 1 )
1042         {
1043             if( !strcasecmp( buffer_text, "V4.00+" ) ) p_sys->b_ass = VLC_TRUE;
1044         }
1045         else if( !strncasecmp( psz_parser, "[V4 Styles]", 11 ) )
1046             i_section_type = 1;
1047         else if( !strncasecmp( psz_parser, "[V4+ Styles]", 12) )
1048         {
1049             i_section_type = 2;
1050             p_sys->b_ass = VLC_TRUE;
1051         }
1052         else if( !strncasecmp( psz_parser, "[Events]", 8 ) )
1053             i_section_type = 4;
1054         else if( !strncasecmp( psz_parser, "Style:", 6 ) )
1055         {
1056             int i_font_size, i_bold, i_italic, i_border, i_outline, i_shadow, i_underline,
1057                 i_strikeout, i_scale_x, i_scale_y, i_spacing, i_align, i_margin_l, i_margin_r, i_margin_v;
1058
1059             char psz_temp_stylename[MAX_LINE+1];
1060             char psz_temp_fontname[MAX_LINE+1];
1061             char psz_temp_color1[MAX_LINE+1];
1062             char psz_temp_color2[MAX_LINE+1];
1063             char psz_temp_color3[MAX_LINE+1];
1064             char psz_temp_color4[MAX_LINE+1];
1065
1066             if( i_section_type == 1 ) /* V4 */
1067             {
1068                 if( sscanf( psz_parser, "Style: %8192[^,],%8192[^,],%d,%8192[^,],%8192[^,],%8192[^,],%8192[^,],%d,%d,%d,%d,%d,%d,%d,%d,%d%*[^\r\n]",
1069                     psz_temp_stylename, psz_temp_fontname, &i_font_size,
1070                     psz_temp_color1, psz_temp_color2, psz_temp_color3, psz_temp_color4, &i_bold, &i_italic,
1071                     &i_border, &i_outline, &i_shadow, &i_align, &i_margin_l, &i_margin_r, &i_margin_v ) == 16 )
1072                 {
1073                     ssa_style_t *p_style = malloc( sizeof(ssa_style_t) );
1074
1075                     p_style->psz_stylename = strdup( psz_temp_stylename );
1076                     p_style->font_style.psz_fontname = strdup( psz_temp_fontname );
1077                     p_style->font_style.i_font_size = i_font_size;
1078
1079                     ParseColor( p_dec, psz_temp_color1, &p_style->font_style.i_font_color, NULL );
1080                     ParseColor( p_dec, psz_temp_color4, &p_style->font_style.i_shadow_color, NULL );
1081                     p_style->font_style.i_outline_color = p_style->font_style.i_shadow_color;
1082                     p_style->font_style.i_font_alpha = p_style->font_style.i_outline_alpha = p_style->font_style.i_shadow_alpha = 0x00;
1083                     p_style->font_style.i_style_flags = 0;
1084                     if( i_bold ) p_style->font_style.i_style_flags |= STYLE_BOLD;
1085                     if( i_italic ) p_style->font_style.i_style_flags |= STYLE_ITALIC;
1086
1087                     if( i_border == 1 ) p_style->font_style.i_style_flags |= (STYLE_ITALIC | STYLE_OUTLINE);
1088                     else if( i_border == 3 )
1089                     {
1090                         p_style->font_style.i_style_flags |= STYLE_BACKGROUND;
1091                         p_style->font_style.i_background_color = p_style->font_style.i_shadow_color;
1092                         p_style->font_style.i_background_alpha = p_style->font_style.i_shadow_alpha;
1093                     }
1094                     p_style->font_style.i_shadow_width = i_shadow;
1095                     p_style->font_style.i_outline_width = i_outline;
1096
1097                     p_style->i_align = 0;
1098                     if( i_align == 1 || i_align == 5 || i_align == 9 ) p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
1099                     if( i_align == 3 || i_align == 7 || i_align == 11 ) p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
1100                     if( i_align < 4 ) p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
1101                     else if( i_align < 8 ) p_style->i_align |= SUBPICTURE_ALIGN_TOP; 
1102
1103                     p_style->i_margin_h = ( p_style->i_align & SUBPICTURE_ALIGN_RIGHT ) ? i_margin_r : i_margin_l;
1104                     p_style->i_margin_v = i_margin_v;
1105
1106                     TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style );
1107                 }
1108                 else msg_Warn( p_dec, "SSA v4 styleline parsing failed" );
1109             }
1110             else if( i_section_type == 2 ) /* V4+ */
1111             {
1112                 /* Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour,
1113                    Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline,
1114                    Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
1115                 */
1116                 if( sscanf( psz_parser, "Style: %8192[^,],%8192[^,],%d,%8192[^,],%8192[^,],%8192[^,],%8192[^,],%d,%d,%d,%d,%d,%d,%d,%*f,%d,%d,%d,%d,%d,%d,%d%*[^\r\n]",
1117                     psz_temp_stylename, psz_temp_fontname, &i_font_size,
1118                     psz_temp_color1, psz_temp_color2, psz_temp_color3, psz_temp_color4, &i_bold, &i_italic,
1119                     &i_underline, &i_strikeout, &i_scale_x, &i_scale_y, &i_spacing, &i_border, &i_outline,
1120                     &i_shadow, &i_align, &i_margin_l, &i_margin_r, &i_margin_v ) == 21 )
1121                 {
1122                     ssa_style_t *p_style = malloc( sizeof(ssa_style_t) );
1123
1124                     p_style->psz_stylename = strdup( psz_temp_stylename );
1125                     p_style->font_style.psz_fontname = strdup( psz_temp_fontname );
1126                     p_style->font_style.i_font_size = i_font_size;
1127                     msg_Dbg( p_dec, psz_temp_color1 );
1128                     ParseColor( p_dec, psz_temp_color1, &p_style->font_style.i_font_color, &p_style->font_style.i_font_alpha );
1129                     ParseColor( p_dec, psz_temp_color3, &p_style->font_style.i_outline_color, &p_style->font_style.i_outline_alpha );
1130                     ParseColor( p_dec, psz_temp_color4, &p_style->font_style.i_shadow_color, &p_style->font_style.i_shadow_alpha );
1131
1132                     p_style->font_style.i_style_flags = 0;
1133                     if( i_bold ) p_style->font_style.i_style_flags |= STYLE_BOLD;
1134                     if( i_italic ) p_style->font_style.i_style_flags |= STYLE_ITALIC;
1135                     if( i_underline ) p_style->font_style.i_style_flags |= STYLE_UNDERLINE;
1136                     if( i_strikeout ) p_style->font_style.i_style_flags |= STYLE_STRIKEOUT;
1137                     if( i_border == 1 ) p_style->font_style.i_style_flags |= (STYLE_ITALIC | STYLE_OUTLINE);
1138                     else if( i_border == 3 )
1139                     {
1140                         p_style->font_style.i_style_flags |= STYLE_BACKGROUND;
1141                         p_style->font_style.i_background_color = p_style->font_style.i_shadow_color;
1142                         p_style->font_style.i_background_alpha = p_style->font_style.i_shadow_alpha;
1143                     }
1144                     p_style->font_style.i_shadow_width  = ( i_border == 1 ) ? i_shadow : 0;
1145                     p_style->font_style.i_outline_width = ( i_border == 1 ) ? i_outline : 0;
1146                     p_style->font_style.i_spacing = i_spacing;
1147                     //p_style->font_style.f_angle = f_angle;
1148
1149                     p_style->i_align = 0;
1150                     if( i_align == 0x1 || i_align == 0x4 || i_align == 0x7 ) p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
1151                     if( i_align == 0x3 || i_align == 0x6 || i_align == 0x9 ) p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
1152                     if( i_align == 0x7 || i_align == 0x8 || i_align == 0x9 ) p_style->i_align |= SUBPICTURE_ALIGN_TOP;
1153                     if( i_align == 0x1 || i_align == 0x2 || i_align == 0x3 ) p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
1154                     p_style->i_margin_h = ( p_style->i_align & SUBPICTURE_ALIGN_RIGHT ) ? i_margin_r : i_margin_l;
1155                     p_style->i_margin_v = i_margin_v;
1156
1157                     /*TODO: Ignored: angle i_scale_x|y (fontscaling), i_encoding */
1158                     TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style );
1159                 }
1160                 else msg_Dbg( p_dec, "SSA V4+ styleline parsing failed" );
1161             }
1162         }
1163         psz_parser = GotoNextLine( psz_parser );
1164     }
1165
1166 eof:
1167     if( psz_header ) free( psz_header );
1168     return;
1169 }
1170
1171 /* Function now handles tags which has attribute values, and tries
1172  * to deal with &' commands too. It no longer modifies the string
1173  * in place, so that the original text can be reused
1174  */
1175 static char *StripTags( char *psz_subtitle )
1176 {
1177     char *psz_text_start;
1178
1179     psz_text_start = malloc( strlen( psz_subtitle ) + 1 );
1180
1181     if( psz_text_start != NULL )
1182     {
1183         char *psz_text = psz_text_start;
1184
1185         while( *psz_subtitle )
1186         {
1187             if( *psz_subtitle == '<' )
1188             {
1189                 psz_subtitle += strcspn( psz_subtitle, ">" );
1190             }
1191             else if( *psz_subtitle == '&' )
1192             {
1193                 if( !strncasecmp( psz_subtitle, "&lt;", 4 ))
1194                 {
1195                     *psz_text++ = '<';
1196                     psz_subtitle += strcspn( psz_subtitle, ";" );
1197                 }
1198                 else if( !strncasecmp( psz_subtitle, "&gt;", 4 ))
1199                 {
1200                     *psz_text++ = '>';
1201                     psz_subtitle += strcspn( psz_subtitle, ";" );
1202                 }
1203                 else if( !strncasecmp( psz_subtitle, "&amp;", 5 ))
1204                 {
1205                     *psz_text++ = '&';
1206                     psz_subtitle += strcspn( psz_subtitle, ";" );
1207                 }
1208                 else
1209                 {
1210                     /* Assume it is just a normal ampersand */
1211                     *psz_text++ = '&';
1212                 }
1213             }
1214             else
1215             {
1216                 *psz_text++ = *psz_subtitle;
1217             }
1218
1219             psz_subtitle++;
1220         }
1221         *psz_text = '\0';
1222         psz_text_start = realloc( psz_text_start, strlen( psz_text_start ) + 1 );
1223     }
1224     return psz_text_start;
1225 }
1226
1227 /* Try to respect any style tags present in the subtitle string. The main
1228  * problem here is a lack of adequate specs for the subtitle formats.
1229  * SSA/ASS and USF are both detail spec'ed -- but they are handled elsewhere.
1230  * SAMI has a detailed spec, but extensive rework is needed in the demux
1231  * code to prevent all this style information being excised, as it presently
1232  * does.
1233  * That leaves the others - none of which were (I guess) originally intended
1234  * to be carrying style information. Over time people have used them that way.
1235  * In the absence of specifications from which to work, the tags supported
1236  * have been restricted to the simple set permitted by the USF DTD, ie. :
1237  *  Basic: <br>, <i>, <b>, <u>
1238  *  Extended: <font>
1239  *    Attributes: face
1240  *                family
1241  *                size
1242  *                color
1243  *                outline-color
1244  *                shadow-color
1245  *                outline-level
1246  *                shadow-level
1247  *                back-color
1248  *                alpha
1249  * There is also the further restriction that the subtitle be well-formed
1250  * as an XML entity, ie. the HTML sentence:
1251  *        <b><i>Bold and Italics</b></i>
1252  * doesn't qualify because the tags aren't nested one inside the other.
1253  * <text> tags are automatically added to the output to ensure
1254  * well-formedness.
1255  * If the text doesn't qualify for any reason, a NULL string is
1256  * returned, and the rendering engine will fall back to the
1257  * plain text version of the subtitle.
1258  */
1259 static char *CreateHtmlSubtitle( char *psz_subtitle )
1260 {
1261     char    psz_tagStack[ 100 ];
1262     size_t  i_buf_size     = strlen( psz_subtitle ) + 100;
1263     char   *psz_html_start = malloc( i_buf_size );
1264
1265     psz_tagStack[ 0 ] = '\0';
1266
1267     if( psz_html_start != NULL )
1268     {
1269         char *psz_html = psz_html_start;
1270
1271         strcpy( psz_html, "<text>" );
1272         psz_html += 6;
1273
1274         while( *psz_subtitle )
1275         {
1276             if( *psz_subtitle == '\n' )
1277             {
1278                 strcpy( psz_html, "<br/>" );
1279                 psz_html += 5;
1280                 psz_subtitle++;
1281             }
1282             else if( *psz_subtitle == '<' )
1283             {
1284                 if( !strncasecmp( psz_subtitle, "<br/>", 5 ))
1285                 {
1286                     strcpy( psz_html, "<br/>" );
1287                     psz_html += 5;
1288                     psz_subtitle += 5;
1289                 }
1290                 else if( !strncasecmp( psz_subtitle, "<b>", 3 ) )
1291                 {
1292                     strcpy( psz_html, "<b>" );
1293                     strcat( psz_tagStack, "b" );
1294                     psz_html += 3;
1295                     psz_subtitle += 3;
1296                 }
1297                 else if( !strncasecmp( psz_subtitle, "<i>", 3 ) )
1298                 {
1299                     strcpy( psz_html, "<i>" );
1300                     strcat( psz_tagStack, "i" );
1301                     psz_html += 3;
1302                     psz_subtitle += 3;
1303                 }
1304                 else if( !strncasecmp( psz_subtitle, "<u>", 3 ) )
1305                 {
1306                     strcpy( psz_html, "<u>" );
1307                     strcat( psz_tagStack, "u" );
1308                     psz_html += 3;
1309                     psz_subtitle += 3;
1310                 }
1311                 else if( !strncasecmp( psz_subtitle, "<font ", 6 ))
1312                 {
1313                     const char *psz_attribs[] = { "face=\"", "family=\"", "size=\"",
1314                             "color=\"", "outline-color=\"", "shadow-color=\"",
1315                             "outline-level=\"", "shadow-level=\"", "back-color=\"",
1316                             "alpha=\"", NULL };
1317
1318                     strcpy( psz_html, "<font " );
1319                     strcat( psz_tagStack, "f" );
1320                     psz_html += 6;
1321                     psz_subtitle += 6;
1322
1323                     while( *psz_subtitle != '>' )
1324                     {
1325                         int  k;
1326
1327                         for( k=0; psz_attribs[ k ]; k++ )
1328                         {
1329                             int i_len = strlen( psz_attribs[ k ] );
1330
1331                             if( !strncasecmp( psz_subtitle, psz_attribs[ k ], i_len )) 
1332                             {
1333                                 i_len += strcspn( psz_subtitle + i_len, "\"" ) + 1;
1334
1335                                 strncpy( psz_html, psz_subtitle, i_len );
1336                                 psz_html += i_len;
1337                                 psz_subtitle += i_len;
1338                                 break;
1339                             }
1340                         }
1341                         if( psz_attribs[ k ] == NULL )
1342                         {
1343                             /* Jump over unrecognised tag */
1344                             int i_len = strcspn( psz_subtitle, "\"" ) + 1;
1345
1346                             i_len += strcspn( psz_subtitle + i_len, "\"" ) + 1;
1347                             psz_subtitle += i_len;
1348                         }
1349                         while (*psz_subtitle == ' ')
1350                             *psz_html++ = *psz_subtitle++;
1351                     }
1352                     *psz_html++ = *psz_subtitle++;
1353                 }
1354                 else if( !strncmp( psz_subtitle, "</", 2 ))
1355                 {
1356                     vlc_bool_t  b_match     = VLC_FALSE;
1357                     int         i_len       = strlen( psz_tagStack ) - 1;
1358                     char       *psz_lastTag = NULL;
1359
1360                     if( i_len >= 0 )
1361                     {
1362                         psz_lastTag = psz_tagStack + i_len;
1363                         i_len = 0;
1364
1365                         switch( *psz_lastTag )
1366                         {
1367                             case 'b':
1368                                 b_match = !strncasecmp( psz_subtitle, "</b>", 4 );
1369                                 i_len   = 4;
1370                                 break;
1371                             case 'i':
1372                                 b_match = !strncasecmp( psz_subtitle, "</i>", 4 );
1373                                 i_len   = 4;
1374                                 break;
1375                             case 'u':
1376                                 b_match = !strncasecmp( psz_subtitle, "</u>", 4 );
1377                                 i_len   = 4;
1378                                 break;
1379                             case 'f':
1380                                 b_match = !strncasecmp( psz_subtitle, "</font>", 7 );
1381                                 i_len   = 7;
1382                                 break;
1383                         }
1384                     }
1385                     if( ! b_match )
1386                     {
1387                         /* Not well formed -- kill everything */
1388                         free( psz_html_start );
1389                         psz_html_start = NULL;
1390                         break;
1391                     }
1392                     *psz_lastTag = '\0';
1393                     strncpy( psz_html, psz_subtitle, i_len );
1394                     psz_html += i_len;
1395                     psz_subtitle += i_len;
1396                 }
1397                 else
1398                 {
1399                     psz_subtitle += strcspn( psz_subtitle, ">" );
1400                 }
1401             }
1402             else if( *psz_subtitle == '&' )
1403             {
1404                 if( !strncasecmp( psz_subtitle, "&lt;", 4 ))
1405                 {
1406                     strcpy( psz_html, "&lt;" );
1407                     psz_html += 4;
1408                     psz_subtitle += 4;
1409                 }
1410                 else if( !strncasecmp( psz_subtitle, "&gt;", 4 ))
1411                 {
1412                     strcpy( psz_html, "&gt;" );
1413                     psz_html += 4;
1414                     psz_subtitle += 4;
1415                 }
1416                 else if( !strncasecmp( psz_subtitle, "&amp;", 5 ))
1417                 {
1418                     strcpy( psz_html, "&amp;" );
1419                     psz_html += 5;
1420                     psz_subtitle += 5;
1421                 }
1422                 else
1423                 {
1424                     strcpy( psz_html, "&amp;" );
1425                     psz_html += 5;
1426                     psz_subtitle++;
1427                 }
1428             }
1429             else
1430             {
1431                 *psz_html++ = *psz_subtitle++;
1432             }
1433
1434             if( ( size_t )( psz_html - psz_html_start ) > i_buf_size - 10 )
1435             {
1436                 int i_len = psz_html - psz_html_start;
1437
1438                 i_buf_size += 100;
1439                 psz_html_start = realloc( psz_html_start, i_buf_size );
1440                 psz_html = psz_html_start + i_len;
1441                 *psz_html = '\0';
1442             }
1443         }
1444         strcpy( psz_html, "</text>" );
1445         psz_html += 7;
1446
1447         if( psz_tagStack[ 0 ] != '\0' )
1448         {
1449             /* Not well formed -- kill everything */
1450             free( psz_html_start );
1451             psz_html_start = NULL;
1452         }
1453         else if( psz_html_start )
1454         {
1455             /* Shrink the memory requirements */
1456             psz_html_start = realloc( psz_html_start,  psz_html - psz_html_start + 1 );
1457         }
1458     }
1459     return psz_html_start;
1460 }