]> git.sesse.net Git - vlc/blob - modules/codec/subsdec.c
all: Subtitle improvment patch by Bernie Purcell.
[vlc] / modules / codec / subsdec.c
1 /*****************************************************************************
2  * subsdec.c : text subtitles decoder
3  *****************************************************************************
4  * Copyright (C) 2000-2006 the VideoLAN team
5  * $Id$
6  *
7  * Authors: Gildas Bazin <gbazin@videolan.org>
8  *          Samuel Hocevar <sam@zoy.org>
9  *          Derk-Jan Hartman <hartman at videolan dot org>
10  *          Bernie Purcell <b dot purcell at adbglobal dot com>
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
25  *****************************************************************************/
26
27 /*****************************************************************************
28  * Preamble
29  *****************************************************************************/
30 #include <vlc/vlc.h>
31 #include <vlc_vout.h>
32 #include <vlc_codec.h>
33 #include <vlc_input.h>
34
35 #include <vlc_osd.h>
36 #include <vlc_filter.h>
37 #include <vlc_image.h>
38 #include <vlc_charset.h>
39 #include <vlc_stream.h>
40 #include <vlc_xml.h>
41 #include <errno.h>
42 #include <string.h>
43
44 #define NO_BREAKING_SPACE  "&#160;"
45
46 enum
47 {
48     ATTRIBUTE_ALIGNMENT = (1 << 0),
49     ATTRIBUTE_X         = (1 << 1),
50     ATTRIBUTE_X_PERCENT = (1 << 2),
51     ATTRIBUTE_Y         = (1 << 3),
52     ATTRIBUTE_Y_PERCENT = (1 << 4),
53 };
54
55 typedef struct
56 {
57     char       *psz_filename;
58     picture_t  *p_pic;
59 } image_attach_t;
60
61 typedef struct
62 {
63     char *          psz_stylename; /* The name of the style, no comma's allowed */
64     text_style_t    font_style;
65     int             i_align;
66     int             i_margin_h;
67     int             i_margin_v;
68     int             i_margin_percent_h;
69     int             i_margin_percent_v;
70 }  ssa_style_t;
71
72 /*****************************************************************************
73  * decoder_sys_t : decoder descriptor
74  *****************************************************************************/
75 struct decoder_sys_t
76 {
77     vlc_bool_t          b_ass;                           /* The subs are ASS */
78     int                 i_original_height;
79     int                 i_original_width;
80     int                 i_align;          /* Subtitles alignment on the vout */
81     vlc_iconv_t         iconv_handle;            /* handle to iconv instance */
82     vlc_bool_t          b_autodetect_utf8;
83
84     ssa_style_t         **pp_ssa_styles;
85     int                 i_ssa_styles;
86
87     image_attach_t      **pp_images;
88     int                 i_images;
89 };
90
91 /*****************************************************************************
92  * Local prototypes
93  *****************************************************************************/
94 static int  OpenDecoder   ( vlc_object_t * );
95 static void CloseDecoder  ( vlc_object_t * );
96
97 static subpicture_t *DecodeBlock   ( decoder_t *, block_t ** );
98 static subpicture_t *ParseText     ( decoder_t *, block_t * );
99 static void         ParseSSAHeader ( decoder_t * );
100 static void         ParseUSFHeader ( decoder_t * );
101 static void         ParseUSFHeaderTags( decoder_t *, xml_reader_t * );
102 static void         ParseSSAString ( decoder_t *, char *, subpicture_t * );
103 static subpicture_region_t *ParseUSFString ( decoder_t *, char *, subpicture_t * );
104 static void         ParseColor     ( decoder_t *, char *, int *, int * );
105 static char        *StripTags      ( char * );
106 static char        *CreateHtmlSubtitle ( char * );
107 static char        *CreatePlainText( char * );
108 static int          ParseImageAttachments( decoder_t *p_dec );
109 static subpicture_region_t *LoadEmbeddedImage( decoder_t *p_dec, subpicture_t *p_spu, const char *psz_filename, int i_transparent_color );
110
111 #define DEFAULT_NAME "Default"
112 #define MAX_LINE 8192
113
114 /*****************************************************************************
115  * Module descriptor.
116  *****************************************************************************/
117 static const char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "",
118     "ISO-8859-1", "CP1252", "MacRoman", "MacIceland","ISO-8859-15", "",
119     "ISO-8859-2", "CP1250", "MacCentralEurope", "MacCroatian", "MacRomania", "",
120     "ISO-8859-5", "CP1251", "MacCyrillic", "MacUkraine", "KOI8-R", "KOI8-U", "KOI8-RU", "",
121     "ISO-8859-6", "CP1256", "MacArabic", "",
122     "ISO-8859-7", "CP1253", "MacGreek", "",
123     "ISO-8859-8", "CP1255", "MacHebrew", "",
124     "ISO-8859-9", "CP1254", "MacTurkish", "",
125     "ISO-8859-13", "CP1257", "",
126     "ISO-2022-JP", "ISO-2022-JP-1", "ISO-2022-JP-2", "EUC-JP", "SHIFT_JIS", "",
127     "ISO-2022-CN", "ISO-2022-CN-EXT", "EUC-CN", "EUC-TW", "BIG5", "BIG5-HKSCS", "",
128     "ISO-2022-KR", "EUC-KR", "",
129     "MacThai", "KOI8-T", "",
130     "ISO-8859-3", "ISO-8859-4", "ISO-8859-10", "ISO-8859-14", "ISO-8859-16", "",
131     "CP850", "CP862", "CP866", "CP874", "CP932", "CP949", "CP950", "CP1133", "CP1258", "",
132     "Macintosh", "",
133     "UTF-7", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE",
134     "C99", "JAVA", "UCS-2", "UCS-2BE", "UCS-2LE", "UCS-4", "UCS-4BE", "UCS-4LE", "",
135     "HZ", "GBK", "GB18030", "JOHAB", "ARMSCII-8",
136     "Georgian-Academy", "Georgian-PS", "TIS-620", "MuleLao-1", "VISCII", "TCVN",
137     "HPROMAN8", "NEXTSTEP" };
138 /*
139 SSA supports charset selection.
140 The following known charsets are used:
141
142 0 = Ansi - Western European
143 1 = default
144 2 = symbol
145 3 = invalid
146 77 = Mac
147 128 = Japanese (Shift JIS)
148 129 = Hangul
149 130 = Johab
150 134 = GB2312 Simplified Chinese
151 136 = Big5 Traditional Chinese
152 161 = Greek
153 162 = Turkish
154 163 = Vietnamese
155 177 = Hebrew
156 178 = Arabic
157 186 = Baltic
158 204 = Russian (Cyrillic)
159 222 = Thai
160 238 = Eastern European
161 254 = PC 437
162 */
163
164 static int  pi_justification[] = { 0, 1, 2 };
165 static const char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")};
166
167 #define ENCODING_TEXT N_("Subtitles text encoding")
168 #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles")
169 #define ALIGN_TEXT N_("Subtitles justification")
170 #define ALIGN_LONGTEXT N_("Set the justification of subtitles")
171 #define AUTODETECT_UTF8_TEXT N_("UTF-8 subtitles autodetection")
172 #define AUTODETECT_UTF8_LONGTEXT N_("This enables automatic detection of " \
173             "UTF-8 encoding within subtitles files.")
174 #define FORMAT_TEXT N_("Formatted Subtitles")
175 #define FORMAT_LONGTEXT N_("Some subtitle formats allow for text formatting. " \
176  "VLC partly implements this, but you can choose to disable all formatting.")
177
178
179 vlc_module_begin();
180     set_shortname( _("Subtitles"));
181     set_description( _("Text subtitles decoder") );
182     set_capability( "decoder", 50 );
183     set_callbacks( OpenDecoder, CloseDecoder );
184     set_category( CAT_INPUT );
185     set_subcategory( SUBCAT_INPUT_SCODEC );
186
187     add_integer( "subsdec-align", 0, NULL, ALIGN_TEXT, ALIGN_LONGTEXT,
188                  VLC_FALSE );
189         change_integer_list( pi_justification, ppsz_justification_text, 0 );
190     add_string( "subsdec-encoding", DEFAULT_NAME, NULL,
191                 ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE );
192         change_string_list( ppsz_encodings, 0, 0 );
193     add_bool( "subsdec-autodetect-utf8", VLC_TRUE, NULL,
194               AUTODETECT_UTF8_TEXT, AUTODETECT_UTF8_LONGTEXT, VLC_FALSE );
195     add_bool( "subsdec-formatted", VLC_TRUE, NULL, FORMAT_TEXT, FORMAT_LONGTEXT,
196                  VLC_FALSE );
197 vlc_module_end();
198
199 /*****************************************************************************
200  * OpenDecoder: probe the decoder and return score
201  *****************************************************************************
202  * Tries to launch a decoder and return score so that the interface is able
203  * to chose.
204  *****************************************************************************/
205 static int OpenDecoder( vlc_object_t *p_this )
206 {
207     decoder_t     *p_dec = (decoder_t*)p_this;
208     decoder_sys_t *p_sys;
209     vlc_value_t    val;
210
211     if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','u','b','t') &&
212         p_dec->fmt_in.i_codec != VLC_FOURCC('u','s','f',' ') &&
213         p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') )
214     {
215         return VLC_EGENERIC;
216     }
217
218     p_dec->pf_decode_sub = DecodeBlock;
219
220     /* Allocate the memory needed to store the decoder's structure */
221     if( ( p_dec->p_sys = p_sys =
222           (decoder_sys_t *)calloc(1, sizeof(decoder_sys_t)) ) == NULL )
223     {
224         msg_Err( p_dec, "out of memory" );
225         return VLC_ENOMEM;
226     }
227
228     /* init of p_sys */
229     p_sys->i_align = 0;
230     p_sys->iconv_handle = (vlc_iconv_t)-1;
231     p_sys->b_autodetect_utf8 = VLC_FALSE;
232     p_sys->b_ass = VLC_FALSE;
233     p_sys->i_original_height = -1;
234     p_sys->i_original_width = -1;
235     TAB_INIT( p_sys->i_ssa_styles, p_sys->pp_ssa_styles );
236     TAB_INIT( p_sys->i_images, p_sys->pp_images );
237
238     char *psz_charset = NULL;
239     /* First try demux-specified encoding */
240     if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
241     {
242         psz_charset = strdup (p_dec->fmt_in.subs.psz_encoding);
243         msg_Dbg (p_dec, "trying demuxer-specified character encoding: %s",
244                  p_dec->fmt_in.subs.psz_encoding ?: "not specified");
245     }
246
247     /* Second, try configured encoding */
248     if (psz_charset == NULL)
249     {
250         psz_charset = var_CreateGetNonEmptyString (p_dec, "subsdec-encoding");
251         if ((psz_charset != NULL) && !strcasecmp (psz_charset, DEFAULT_NAME))
252         {
253             free (psz_charset);
254             psz_charset = NULL;
255         }
256
257         msg_Dbg (p_dec, "trying configured character encoding: %s",
258                  psz_charset ?: "not specified");
259     }
260
261     /* Third, try "local" encoding with optional UTF-8 autodetection */
262     if (psz_charset == NULL)
263     {
264         psz_charset = strdup (GetFallbackEncoding ());
265         msg_Dbg (p_dec, "trying default character encoding: %s",
266                  psz_charset ?: "not specified");
267
268         if (var_CreateGetBool (p_dec, "subsdec-autodetect-utf8"))
269         {
270             msg_Dbg (p_dec, "using automatic UTF-8 detection");
271             p_sys->b_autodetect_utf8 = VLC_TRUE;
272         }
273     }
274
275     if (psz_charset == NULL)
276     {
277         psz_charset = strdup ("UTF-8");
278         msg_Dbg (p_dec, "trying hard-coded character encoding: %s",
279                  psz_charset ?: "error");
280     }
281
282     if (psz_charset == NULL)
283     {
284         free (p_sys);
285         return VLC_ENOMEM;
286     }
287
288     if (strcasecmp (psz_charset, "UTF-8") && strcasecmp (psz_charset, "utf8"))
289     {
290         p_sys->iconv_handle = vlc_iconv_open ("UTF-8", psz_charset);
291         if (p_sys->iconv_handle == (vlc_iconv_t)(-1))
292             msg_Err (p_dec, "cannot convert from %s: %s", psz_charset,
293                      strerror (errno));
294     }
295     free (psz_charset);
296
297     var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
298     var_Get( p_dec, "subsdec-align", &val );
299     p_sys->i_align = val.i_int;
300
301     ParseImageAttachments( p_dec );
302
303     if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') && var_CreateGetBool( p_dec, "subsdec-formatted" ) )
304     {
305         if( p_dec->fmt_in.i_extra > 0 )
306             ParseSSAHeader( p_dec );
307     }
308     else if( p_dec->fmt_in.i_codec == VLC_FOURCC('u','s','f',' ') && var_CreateGetBool( p_dec, "subsdec-formatted" ) )
309     {
310         if( p_dec->fmt_in.i_extra > 0 )
311             ParseUSFHeader( p_dec );
312     }
313
314     return VLC_SUCCESS;
315 }
316
317 /****************************************************************************
318  * DecodeBlock: the whole thing
319  ****************************************************************************
320  * This function must be fed with complete subtitles units.
321  ****************************************************************************/
322 static subpicture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block )
323 {
324     subpicture_t *p_spu = NULL;
325
326     if( !pp_block || *pp_block == NULL ) return NULL;
327
328     p_spu = ParseText( p_dec, *pp_block );
329
330     block_Release( *pp_block );
331     *pp_block = NULL;
332
333     return p_spu;
334 }
335
336 /*****************************************************************************
337  * CloseDecoder: clean up the decoder
338  *****************************************************************************/
339 static void CloseDecoder( vlc_object_t *p_this )
340 {
341     decoder_t *p_dec = (decoder_t *)p_this;
342     decoder_sys_t *p_sys = p_dec->p_sys;
343
344     if( p_sys->iconv_handle != (vlc_iconv_t)-1 )
345         vlc_iconv_close( p_sys->iconv_handle );
346
347     if( p_sys->pp_ssa_styles )
348     {
349         int i;
350         for( i = 0; i < p_sys->i_ssa_styles; i++ )
351         {
352             if( !p_sys->pp_ssa_styles[i] )
353                 continue;
354
355             if( p_sys->pp_ssa_styles[i]->psz_stylename )
356                 free( p_sys->pp_ssa_styles[i]->psz_stylename );
357             if( p_sys->pp_ssa_styles[i]->font_style.psz_fontname )
358                 free( p_sys->pp_ssa_styles[i]->font_style.psz_fontname );
359             if( p_sys->pp_ssa_styles[i] )
360                 free( p_sys->pp_ssa_styles[i] );
361         }
362         TAB_CLEAN( p_sys->i_ssa_styles, p_sys->pp_ssa_styles );
363     }
364     if( p_sys->pp_images )
365     {
366         int i;
367         for( i = 0; i < p_sys->i_images; i++ )
368         {
369             if( !p_sys->pp_images[i] )
370                 continue;
371
372             if( p_sys->pp_images[i]->p_pic )
373                 p_sys->pp_images[i]->p_pic->pf_release( p_sys->pp_images[i]->p_pic );
374             if( p_sys->pp_images[i]->psz_filename )
375                 free( p_sys->pp_images[i]->psz_filename );
376
377             free( p_sys->pp_images[i] );
378         }
379         TAB_CLEAN( p_sys->i_images, p_sys->pp_images );
380     }
381
382     free( p_sys );
383 }
384
385 /*****************************************************************************
386  * ParseText: parse an text subtitle packet and send it to the video output
387  *****************************************************************************/
388 static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
389 {
390     decoder_sys_t *p_sys = p_dec->p_sys;
391     subpicture_t *p_spu = NULL;
392     char *psz_subtitle = NULL;
393     video_format_t fmt;
394
395     /* We cannot display a subpicture with no date */
396     if( p_block->i_pts == 0 )
397     {
398         msg_Warn( p_dec, "subtitle without a date" );
399         return NULL;
400     }
401
402     /* Check validity of packet data */
403     /* An "empty" line containing only \0 can be used to force
404        and ephemer picture from the screen */
405     if( p_block->i_buffer < 1 )
406     {
407         msg_Warn( p_dec, "no subtitle data" );
408         return NULL;
409     }
410
411     /* Should be resiliant against bad subtitles */
412     psz_subtitle = strndup( (const char *)p_block->p_buffer,
413                             p_block->i_buffer );
414     if( psz_subtitle == NULL )
415         return NULL;
416
417     if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
418     {
419         if (EnsureUTF8( psz_subtitle ) == NULL)
420         {
421             msg_Err( p_dec, _("failed to convert subtitle encoding.\n"
422                      "Try manually setting a character-encoding "
423                      "before you open the file.") );
424         }
425     }
426     else
427     {
428
429         if( p_sys->b_autodetect_utf8 )
430         {
431             if( IsUTF8( psz_subtitle ) == NULL )
432             {
433                 msg_Dbg( p_dec, "invalid UTF-8 sequence: "
434                          "disabling UTF-8 subtitles autodetection" );
435                 p_sys->b_autodetect_utf8 = VLC_FALSE;
436             }
437         }
438
439         if( !p_sys->b_autodetect_utf8 )
440         {
441             size_t inbytes_left = strlen( psz_subtitle );
442             size_t outbytes_left = 6 * inbytes_left;
443             char *psz_new_subtitle = malloc( outbytes_left + 1 );
444             char *psz_convert_buffer_out = psz_new_subtitle;
445             const char *psz_convert_buffer_in = psz_subtitle;
446
447             size_t ret = vlc_iconv( p_sys->iconv_handle,
448                                     &psz_convert_buffer_in, &inbytes_left,
449                                     &psz_convert_buffer_out, &outbytes_left );
450
451             *psz_convert_buffer_out++ = '\0';
452             free( psz_subtitle );
453
454             if( ( ret == (size_t)(-1) ) || inbytes_left )
455             {
456                 free( psz_new_subtitle );
457                 msg_Err( p_dec, _("failed to convert subtitle encoding.\n"
458                         "Try manually setting a character-encoding "
459                                 "before you open the file.") );
460                 return NULL;
461             }
462
463             psz_subtitle = realloc( psz_new_subtitle,
464                                     psz_convert_buffer_out - psz_new_subtitle );
465         }
466     }
467
468     /* Create the subpicture unit */
469     p_spu = p_dec->pf_spu_buffer_new( p_dec );
470     if( !p_spu )
471     {
472         msg_Warn( p_dec, "can't get spu buffer" );
473         if( psz_subtitle ) free( psz_subtitle );
474         return NULL;
475     }
476
477     p_spu->b_pausable = VLC_TRUE;
478
479     /* Create a new subpicture region */
480     memset( &fmt, 0, sizeof(video_format_t) );
481     fmt.i_chroma = VLC_FOURCC('T','E','X','T');
482     fmt.i_aspect = 0;
483     fmt.i_width = fmt.i_height = 0;
484     fmt.i_x_offset = fmt.i_y_offset = 0;
485     p_spu->p_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt );
486     if( !p_spu->p_region )
487     {
488         msg_Err( p_dec, "cannot allocate SPU region" );
489         if( psz_subtitle ) free( psz_subtitle );
490         p_dec->pf_spu_buffer_del( p_dec, p_spu );
491         return NULL;
492     }
493
494     /* Decode and format the subpicture unit */
495     if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') &&
496         p_dec->fmt_in.i_codec != VLC_FOURCC('u','s','f',' ') )
497     {
498         /* Normal text subs, easy markup */
499         p_spu->p_region->i_align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
500         p_spu->i_x = p_sys->i_align ? 20 : 0;
501         p_spu->i_y = 10;
502
503         /* Remove formatting from string */
504
505         p_spu->p_region->psz_text = StripTags( psz_subtitle );
506         p_spu->p_region->psz_html = CreateHtmlSubtitle( psz_subtitle );
507
508         p_spu->i_start = p_block->i_pts;
509         p_spu->i_stop = p_block->i_pts + p_block->i_length;
510         p_spu->b_ephemer = (p_block->i_length == 0);
511         p_spu->b_absolute = VLC_FALSE;
512     }
513     else
514     {
515         /* Decode SSA/USF strings */
516         if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') )
517             ParseSSAString( p_dec, psz_subtitle, p_spu );
518         else
519         {
520             p_spu->pf_destroy_region( VLC_OBJECT(p_dec), p_spu->p_region );
521             p_spu->p_region = ParseUSFString( p_dec, psz_subtitle, p_spu );
522         }
523
524         p_spu->i_start = p_block->i_pts;
525         p_spu->i_stop = p_block->i_pts + p_block->i_length;
526         p_spu->b_ephemer = (p_block->i_length == 0);
527         p_spu->b_absolute = VLC_FALSE;
528         p_spu->i_original_picture_width = p_sys->i_original_width;
529         p_spu->i_original_picture_height = p_sys->i_original_height;
530     }
531     if( psz_subtitle ) free( psz_subtitle );
532
533     return p_spu;
534 }
535
536 static char *GrabAttributeValue( const char *psz_attribute,
537                                  const char *psz_tag_start )
538 {
539     if( psz_attribute && psz_tag_start )
540     {
541         char *psz_tag_end = strchr( psz_tag_start, '>' );
542         char *psz_found   = strcasestr( psz_tag_start, psz_attribute );
543
544         if( psz_found )
545         {
546             psz_found += strlen( psz_attribute );
547
548             if(( *(psz_found++) == '=' ) &&
549                ( *(psz_found++) == '\"' ))
550             {
551                 if( psz_found < psz_tag_end )
552                 {
553                     int   i_len = strcspn( psz_found, "\"" );
554                     return strndup( psz_found, i_len );
555                 }
556             }
557         }
558     }
559     return NULL;
560 }
561
562 static ssa_style_t *ParseStyle( decoder_sys_t *p_sys, char *psz_subtitle )
563 {
564     ssa_style_t *p_style   = NULL;
565     char        *psz_style = GrabAttributeValue( "style", psz_subtitle );
566
567     if( psz_style )
568     {
569         int i;
570
571         for( i = 0; i < p_sys->i_ssa_styles; i++ )
572         {
573             if( !strcmp( p_sys->pp_ssa_styles[i]->psz_stylename, psz_style ) )
574                 p_style = p_sys->pp_ssa_styles[i];
575         }
576         free( psz_style );
577     }
578     return p_style;
579 }
580
581 static int ParsePositionAttributeList( char *psz_subtitle, int *i_align, int *i_x, int *i_y )
582 {
583     int   i_mask = 0;
584
585     char *psz_align    = GrabAttributeValue( "alignment", psz_subtitle );
586     char *psz_margin_x = GrabAttributeValue( "horizontal-margin", psz_subtitle );
587     char *psz_margin_y = GrabAttributeValue( "vertical-margin", psz_subtitle );
588     /* -- UNSUPPORTED
589     char *psz_relative = GrabAttributeValue( "relative-to", psz_subtitle );
590     char *psz_rotate_x = GrabAttributeValue( "rotate-x", psz_subtitle );
591     char *psz_rotate_y = GrabAttributeValue( "rotate-y", psz_subtitle );
592     char *psz_rotate_z = GrabAttributeValue( "rotate-z", psz_subtitle );
593     */
594
595     *i_align = SUBPICTURE_ALIGN_BOTTOM;
596     *i_x = 0;
597     *i_y = 0;
598
599     if( psz_align )
600     {
601         if( !strcasecmp( "TopLeft", psz_align ) )
602             *i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_LEFT;
603         else if( !strcasecmp( "TopCenter", psz_align ) )
604             *i_align = SUBPICTURE_ALIGN_TOP;
605         else if( !strcasecmp( "TopRight", psz_align ) )
606             *i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_RIGHT;
607         else if( !strcasecmp( "MiddleLeft", psz_align ) )
608             *i_align = SUBPICTURE_ALIGN_LEFT;
609         else if( !strcasecmp( "MiddleCenter", psz_align ) )
610             *i_align = 0;
611         else if( !strcasecmp( "MiddleRight", psz_align ) )
612             *i_align = SUBPICTURE_ALIGN_RIGHT;
613         else if( !strcasecmp( "BottomLeft", psz_align ) )
614             *i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_LEFT;
615         else if( !strcasecmp( "BottomCenter", psz_align ) )
616             *i_align = SUBPICTURE_ALIGN_BOTTOM;
617         else if( !strcasecmp( "BottomRight", psz_align ) )
618             *i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_RIGHT;
619
620         i_mask |= ATTRIBUTE_ALIGNMENT;
621         free( psz_align );
622     }
623     if( psz_margin_x )
624     {
625         *i_x = atoi( psz_margin_x );
626         if( strchr( psz_margin_x, '%' ) )
627             i_mask |= ATTRIBUTE_X_PERCENT;
628         else
629             i_mask |= ATTRIBUTE_X;
630
631         free( psz_margin_x );
632     }
633     if( psz_margin_y )
634     {
635         *i_y = atoi( psz_margin_y );
636         if( strchr( psz_margin_y, '%' ) )
637             i_mask |= ATTRIBUTE_Y_PERCENT;
638         else
639             i_mask |= ATTRIBUTE_Y;
640
641         free( psz_margin_y );
642     }
643     return i_mask;
644 }
645
646 static void SetupPositions( subpicture_region_t *p_region, char *psz_subtitle )
647 {
648     int           i_mask = 0;
649     int           i_align;
650     int           i_x, i_y;
651
652     i_mask = ParsePositionAttributeList( psz_subtitle, &i_align, &i_x, &i_y );
653
654     if( i_mask & ATTRIBUTE_ALIGNMENT )
655         p_region->i_align = i_align;
656     
657     /* TODO: Setup % based offsets properly, without adversely affecting
658      *       everything else in vlc. Will address with separate patch, to
659      *       prevent this one being any more complicated.
660      */
661     if( i_mask & ATTRIBUTE_X )
662         p_region->i_x = i_x;
663     else if( i_mask & ATTRIBUTE_X_PERCENT )
664         p_region->i_x = 0;
665
666     if( i_mask & ATTRIBUTE_Y )
667         p_region->i_y = i_y;
668     else if( i_mask & ATTRIBUTE_Y_PERCENT )
669         p_region->i_y = 0;
670 }
671
672 static subpicture_region_t *CreateTextRegion( decoder_t *p_dec,
673                                               subpicture_t *p_spu,
674                                               char *psz_subtitle,
675                                               int i_len,
676                                               int i_sys_align )
677 {
678     decoder_sys_t        *p_sys = p_dec->p_sys;
679     subpicture_region_t  *p_text_region;
680     video_format_t        fmt;
681
682     /* Create a new subpicture region */
683     memset( &fmt, 0, sizeof(video_format_t) );
684     fmt.i_chroma = VLC_FOURCC('T','E','X','T');
685     fmt.i_aspect = 0;
686     fmt.i_width = fmt.i_height = 0;
687     fmt.i_x_offset = fmt.i_y_offset = 0;
688     p_text_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt );
689
690     if( p_text_region != NULL )
691     {
692         ssa_style_t  *p_style = NULL;
693
694         p_text_region->psz_text = NULL;
695         p_text_region->psz_html = strndup( psz_subtitle, i_len );
696         if( ! p_text_region->psz_html )
697         {
698             msg_Err( p_dec, "out of memory" );
699             p_spu->pf_destroy_region( VLC_OBJECT(p_dec), p_text_region );
700             return NULL;
701         }
702
703         p_style = ParseStyle( p_sys, p_text_region->psz_html );
704         if( !p_style )
705         {
706             int i;
707
708             for( i = 0; i < p_sys->i_ssa_styles; i++ )
709             {
710                 if( !strcasecmp( p_sys->pp_ssa_styles[i]->psz_stylename, "Default" ) )
711                     p_style = p_sys->pp_ssa_styles[i];
712             }
713         }
714
715         if( p_style )
716         {
717             msg_Dbg( p_dec, "style is: %s", p_style->psz_stylename );
718
719             p_text_region->p_style = &p_style->font_style;
720             p_text_region->i_align = p_style->i_align;
721
722             /* TODO: Setup % based offsets properly, without adversely affecting
723              *       everything else in vlc. Will address with separate patch,
724              *       to prevent this one being any more complicated.
725
726                      * p_style->i_margin_percent_h;
727                      * p_style->i_margin_percent_v;
728              */
729             p_text_region->i_x         = p_style->i_margin_h;
730             p_text_region->i_y         = p_style->i_margin_v;
731             
732         }
733         else
734         {
735             p_text_region->i_align = SUBPICTURE_ALIGN_BOTTOM | i_sys_align;
736             p_text_region->i_x = i_sys_align ? 20 : 0;
737             p_text_region->i_y = 10;
738         }
739         /* Look for position arguments which may override the style-based
740          * defaults.
741          */
742         SetupPositions( p_text_region, psz_subtitle );
743         
744         p_text_region->p_next = NULL;
745     }
746     return p_text_region;
747 }
748
749 static subpicture_region_t *ParseUSFString( decoder_t *p_dec, char *psz_subtitle, subpicture_t *p_spu_in )
750 {
751     decoder_sys_t        *p_sys = p_dec->p_sys;
752     subpicture_t         *p_spu = p_spu_in;
753     subpicture_region_t  *p_region_first = NULL;
754     subpicture_region_t  *p_region_upto  = p_region_first;
755
756     while( *psz_subtitle )
757     {
758         if( *psz_subtitle == '<' )
759         {
760             char *psz_end = NULL;
761
762             if(( !strncasecmp( psz_subtitle, "<text ", 6 )) ||
763                ( !strncasecmp( psz_subtitle, "<text>", 6 )))
764             {
765                 psz_end = strcasestr( psz_subtitle, "</text>" );
766                 
767                 if( psz_end )
768                 {
769                     subpicture_region_t  *p_text_region;
770
771                     psz_end += strcspn( psz_end, ">" ) + 1;
772
773                     p_text_region = CreateTextRegion( p_dec,
774                                                       p_spu,
775                                                       psz_subtitle,
776                                                       psz_end - psz_subtitle,
777                                                       p_sys->i_align );
778                 
779                     if( p_text_region )
780                         p_text_region->psz_text = CreatePlainText( p_text_region->psz_html );
781
782                     if( !p_region_first )
783                     {
784                         p_region_first = p_region_upto = p_text_region;
785                     }
786                     else if( p_text_region )
787                     {
788                         p_region_upto->p_next = p_text_region;
789                         p_region_upto = p_region_upto->p_next;
790                     }
791                 }
792             }
793             else if(( !strncasecmp( psz_subtitle, "<karaoke ", 9 )) ||
794                     ( !strncasecmp( psz_subtitle, "<karaoke>", 9 )))
795             {
796                 psz_end = strcasestr( psz_subtitle, "</karaoke>" );
797                 
798                 if( psz_end )
799                 {
800                     subpicture_region_t  *p_text_region;
801
802                     psz_end += strcspn( psz_end, ">" ) + 1;
803
804                     p_text_region = CreateTextRegion( p_dec,
805                                                       p_spu,
806                                                       psz_subtitle,
807                                                       psz_end - psz_subtitle,
808                                                       p_sys->i_align );
809                 
810                     if( !p_region_first )
811                     {
812                         p_region_first = p_region_upto = p_text_region;
813                     }
814                     else if( p_text_region )
815                     {
816                         p_region_upto->p_next = p_text_region;
817                         p_region_upto = p_region_upto->p_next;
818                     }
819                 }
820             }
821             else if(( !strncasecmp( psz_subtitle, "<image ", 7 )) ||
822                     ( !strncasecmp( psz_subtitle, "<image>", 7 )))
823             {
824                 subpicture_region_t *p_image_region = NULL;
825
826                 char *psz_end = strcasestr( psz_subtitle, "</image>" );
827                 char *psz_content = strchr( psz_subtitle, '>' );
828                 int   i_transparent = -1;
829                 
830                 /* If a colorkey parameter is specified, then we have to map
831                  * that index in the picture through as transparent (it is
832                  * required by the USF spec but is also recommended that if the
833                  * creator really wants a transparent colour that they use a
834                  * type like PNG that properly supports it; this goes doubly
835                  * for VLC because the pictures are stored internally in YUV
836                  * and the resulting colour-matching may not produce the
837                  * desired results.)
838                  */
839                 char *psz_tmp = GrabAttributeValue( "colorkey", psz_subtitle );
840                 if( psz_tmp )
841                 {
842                     if( *psz_tmp == '#' )
843                         i_transparent = strtol( psz_tmp + 1, NULL, 16 ) & 0x00ffffff;
844                     free( psz_tmp );
845                 }
846                 if( psz_content && ( psz_content < psz_end ) )
847                 {
848                     char *psz_filename = strndup( &psz_content[1], psz_end - &psz_content[1] );
849                     if( psz_filename )
850                     {
851                         p_image_region = LoadEmbeddedImage( p_dec, p_spu, psz_filename, i_transparent );
852                         free( psz_filename );
853                     }
854                 }
855
856                 if( psz_end ) psz_end += strcspn( psz_end, ">" ) + 1;
857
858                 if( p_image_region )
859                 {
860                     SetupPositions( p_image_region, psz_subtitle );
861
862                     p_image_region->p_next   = NULL;
863                     p_image_region->psz_text = NULL;
864                     p_image_region->psz_html = NULL;
865
866                 }
867                 if( !p_region_first )
868                 {
869                     p_region_first = p_region_upto = p_image_region;
870                 }
871                 else if( p_image_region )
872                 {
873                     p_region_upto->p_next = p_image_region;
874                     p_region_upto = p_region_upto->p_next;
875                 }
876             }
877             if( psz_end )
878                 psz_subtitle = psz_end - 1;
879             
880             psz_subtitle += strcspn( psz_subtitle, ">" );
881         }
882
883         psz_subtitle++;
884     }
885
886     return p_region_first;
887 }
888
889 static void ParseSSAString( decoder_t *p_dec, char *psz_subtitle, subpicture_t *p_spu_in )
890 {
891     /* We expect MKV formatted SSA:
892      * ReadOrder, Layer, Style, CharacterName, MarginL, MarginR,
893      * MarginV, Effect, Text */
894     decoder_sys_t   *p_sys = p_dec->p_sys;
895     subpicture_t    *p_spu = p_spu_in;
896     ssa_style_t     *p_style = NULL;
897     char            *psz_new_subtitle = NULL;
898     char            *psz_buffer_sub = NULL;
899     char            *psz_style = NULL;
900     char            *psz_style_start = NULL;
901     char            *psz_style_end = NULL;
902     int             i_text = 0, i_comma = 0, i_strlen = 0, i;
903     int             i_margin_l = 0, i_margin_r = 0, i_margin_v = 0;
904
905     psz_buffer_sub = psz_subtitle;
906
907     p_spu->p_region->psz_html = NULL;
908
909     i_comma = 0;
910     while( i_comma < 8 && *psz_buffer_sub != '\0' )
911     {
912         if( *psz_buffer_sub == ',' )
913         {
914             i_comma++;
915             if( i_comma == 2 )
916                 psz_style_start = &psz_buffer_sub[1];
917             else if( i_comma == 3 )
918                 psz_style_end = &psz_buffer_sub[0];
919             else if( i_comma == 4 )
920                 i_margin_l = (int)strtol( &psz_buffer_sub[1], NULL, 10 );
921             else if( i_comma == 5 )
922                 i_margin_r = (int)strtol( &psz_buffer_sub[1], NULL, 10 );
923             else if( i_comma == 6 )
924                 i_margin_v = (int)strtol( &psz_buffer_sub[1], NULL, 10 );
925         }
926         psz_buffer_sub++;
927     }
928
929     if( *psz_buffer_sub == '\0' && i_comma == 8 )
930     {
931         msg_Dbg( p_dec, "couldn't find all fields in this SSA line" );
932         return;
933     }
934
935     psz_new_subtitle = malloc( strlen( psz_buffer_sub ) + 1);
936     i_text = 0;
937     while( psz_buffer_sub[0] != '\0' )
938     {
939         if( psz_buffer_sub[0] == '\\' && psz_buffer_sub[1] == 'n' )
940         {
941             psz_new_subtitle[i_text] = ' ';
942             i_text++;
943             psz_buffer_sub += 2;
944         }
945         else if( psz_buffer_sub[0] == '\\' && psz_buffer_sub[1] == 'N' )
946         {
947             psz_new_subtitle[i_text] = '\n';
948             i_text++;
949             psz_buffer_sub += 2;
950         }
951         else if( psz_buffer_sub[0] == '{' &&
952                  psz_buffer_sub[1] == '\\' )
953         {
954             /* SSA control code */
955             while( psz_buffer_sub[0] != '\0' &&
956                    psz_buffer_sub[0] != '}' )
957             {
958                 psz_buffer_sub++;
959             }
960             psz_buffer_sub++;
961         }
962         else
963         {
964             psz_new_subtitle[i_text] = psz_buffer_sub[0];
965             i_text++;
966             psz_buffer_sub++;
967         }
968     }
969     psz_new_subtitle[i_text] = '\0';
970
971     i_strlen = __MAX( psz_style_end - psz_style_start, 0);
972     psz_style = strndup( psz_style_start, i_strlen );
973
974     for( i = 0; i < p_sys->i_ssa_styles; i++ )
975     {
976         if( !strcmp( p_sys->pp_ssa_styles[i]->psz_stylename, psz_style ) )
977             p_style = p_sys->pp_ssa_styles[i];
978     }
979     if( psz_style ) free( psz_style );
980
981     p_spu->p_region->psz_text = psz_new_subtitle;
982     if( p_style == NULL )
983     {
984         p_spu->p_region->i_align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
985         p_spu->i_x = p_sys->i_align ? 20 : 0;
986         p_spu->i_y = 10;
987     }
988     else
989     {
990         msg_Dbg( p_dec, "style is: %s", p_style->psz_stylename);
991         p_spu->p_region->p_style = &p_style->font_style;
992         p_spu->p_region->i_align = p_style->i_align;
993         if( p_style->i_align & SUBPICTURE_ALIGN_LEFT )
994         {
995             p_spu->i_x = (i_margin_l) ? i_margin_l : p_style->i_margin_h;
996         }
997         else if( p_style->i_align & SUBPICTURE_ALIGN_RIGHT ) 
998         {
999             p_spu->i_x = (i_margin_r) ? i_margin_r : p_style->i_margin_h;
1000         }
1001         p_spu->i_y = (i_margin_v) ? i_margin_v : p_style->i_margin_v;
1002     }
1003 }
1004
1005 static char* GotoNextLine( char *psz_text )
1006 {
1007     char *p_newline = psz_text;
1008
1009     while( p_newline[0] != '\0' )
1010     {
1011         if( p_newline[0] == '\n' || p_newline[0] == '\r' )
1012         {
1013             p_newline++;
1014             while( p_newline[0] == '\n' || p_newline[0] == '\r' )
1015                 p_newline++;
1016             break;
1017         }
1018         else p_newline++;
1019     }
1020     return p_newline;
1021 }
1022
1023 /*****************************************************************************
1024  * ParseColor: SSA stores color in BBGGRR, in ASS it uses AABBGGRR
1025  * The string value in the string can be a pure integer, or hexadecimal &HBBGGRR
1026  *****************************************************************************/
1027 static void ParseColor( decoder_t *p_dec, char *psz_color, int *pi_color, int *pi_alpha )
1028 {
1029     int i_color = 0;
1030     if( !strncasecmp( psz_color, "&H", 2 ) )
1031     {
1032         /* textual HEX representation */
1033         i_color = (int) strtol( psz_color+2, NULL, 16 );
1034     }
1035     else i_color = (int) strtol( psz_color, NULL, 0 );
1036
1037     *pi_color = 0;
1038     *pi_color |= ( ( i_color & 0x000000FF ) << 16 ); /* Red */
1039     *pi_color |= ( ( i_color & 0x0000FF00 ) );       /* Green */
1040     *pi_color |= ( ( i_color & 0x00FF0000 ) >> 16 ); /* Blue */
1041
1042     if( pi_alpha != NULL )
1043         *pi_alpha = ( i_color & 0xFF000000 ) >> 24;
1044 }
1045
1046 static int ParseImageAttachments( decoder_t *p_dec )
1047 {
1048     decoder_sys_t        *p_sys = p_dec->p_sys;
1049     input_attachment_t  **pp_attachments;
1050     int                   i_attachments_cnt;
1051     int                   k = 0;
1052
1053     if( VLC_SUCCESS != decoder_GetInputAttachments( p_dec, &pp_attachments, &i_attachments_cnt ))
1054         return VLC_EGENERIC;
1055
1056     for( k = 0; k < i_attachments_cnt; k++ )
1057     {
1058         input_attachment_t *p_attach = pp_attachments[k];
1059
1060         vlc_fourcc_t  type  = 0;
1061
1062         if( ( !strcmp( p_attach->psz_mime, "image/bmp" ) )      || /* BMP */
1063             ( !strcmp( p_attach->psz_mime, "image/x-bmp" ) )    ||
1064             ( !strcmp( p_attach->psz_mime, "image/x-bitmap" ) ) ||
1065             ( !strcmp( p_attach->psz_mime, "image/x-ms-bmp" ) ) )
1066         {
1067              type = VLC_FOURCC('b','m','p',' ');
1068         }
1069         else if( ( !strcmp( p_attach->psz_mime, "image/x-portable-anymap" ) )  || /* PNM */
1070                  ( !strcmp( p_attach->psz_mime, "image/x-portable-bitmap" ) )  || /* PBM */
1071                  ( !strcmp( p_attach->psz_mime, "image/x-portable-graymap" ) ) || /* PGM */
1072                  ( !strcmp( p_attach->psz_mime, "image/x-portable-pixmap" ) ) )   /* PPM */
1073         {
1074             type = VLC_FOURCC('p','n','m',' ');
1075         }
1076         else if ( !strcmp( p_attach->psz_mime, "image/gif" ) )         /* GIF */
1077             type = VLC_FOURCC('g','i','f',' ');
1078         else if ( !strcmp( p_attach->psz_mime, "image/jpeg" ) )        /* JPG, JPEG */
1079             type = VLC_FOURCC('j','p','e','g');
1080         else if ( !strcmp( p_attach->psz_mime, "image/pcx" ) )         /* PCX */
1081             type = VLC_FOURCC('p','c','x',' ');
1082         else if ( !strcmp( p_attach->psz_mime, "image/png" ) )         /* PNG */
1083             type = VLC_FOURCC('p','n','g',' ');
1084         else if ( !strcmp( p_attach->psz_mime, "image/tiff" ) )        /* TIF, TIFF */
1085             type = VLC_FOURCC('t','i','f','f');
1086         else if ( !strcmp( p_attach->psz_mime, "image/x-tga" ) )       /* TGA */
1087             type = VLC_FOURCC('t','g','a',' ');
1088         else if ( !strcmp( p_attach->psz_mime, "image/x-xpixmap") )    /* XPM */
1089             type = VLC_FOURCC('x','p','m',' ');
1090
1091         if( ( type != 0 ) &&
1092             ( p_attach->i_data > 0 ) &&
1093             ( p_attach->p_data != NULL ) )
1094         {
1095             picture_t         *p_pic = NULL;
1096             image_handler_t   *p_image;
1097
1098             p_image = image_HandlerCreate( p_dec );
1099             if( p_image != NULL )
1100             {
1101                 block_t   *p_block;
1102                
1103                 p_block = block_New( p_image->p_parent, p_attach->i_data );
1104
1105                 if( p_block != NULL )
1106                 {
1107                     video_format_t     fmt_in;
1108                     video_format_t     fmt_out;
1109
1110                     memcpy( p_block->p_buffer, p_attach->p_data, p_attach->i_data );
1111
1112                     memset( &fmt_in,  0, sizeof( video_format_t));
1113                     memset( &fmt_out, 0, sizeof( video_format_t));
1114
1115                     fmt_in.i_chroma  = type;
1116                     fmt_out.i_chroma = VLC_FOURCC('Y','U','V','A');
1117
1118                     /* Find a suitable decoder module */
1119                     if( module_Exists( p_dec, "SDL Image decoder" ) )
1120                     {
1121                         /* ffmpeg thinks it can handle bmp properly but it can't (at least
1122                          * not all of them), so use sdl_image if it is available */
1123
1124                         vlc_value_t val;
1125
1126                         var_Create( p_dec, "codec", VLC_VAR_MODULE | VLC_VAR_DOINHERIT );
1127                         val.psz_string = (char*) "sdl_image";
1128                         var_Set( p_dec, "codec", val );
1129                     }
1130
1131                     p_pic = image_Read( p_image, p_block, &fmt_in, &fmt_out );
1132                     var_Destroy( p_dec, "codec" );
1133                 }
1134
1135                 image_HandlerDelete( p_image );
1136             }
1137             if( p_pic )
1138             {
1139                 image_attach_t *p_picture = malloc( sizeof(image_attach_t) );
1140
1141                 if( p_picture )
1142                 {
1143                     p_picture->psz_filename = strdup( p_attach->psz_name );
1144                     p_picture->p_pic = p_pic;
1145
1146                     TAB_APPEND( p_sys->i_images, p_sys->pp_images, p_picture );
1147                 }
1148             }
1149         }
1150         vlc_input_attachment_Delete( pp_attachments[ k ] );
1151     }
1152     free( pp_attachments );        
1153
1154     return VLC_SUCCESS;
1155 }
1156
1157 /*****************************************************************************
1158  * ParseUSFHeader: Retrieve global formatting information etc
1159  *****************************************************************************/
1160 static void ParseUSFHeader( decoder_t *p_dec )
1161 {
1162     stream_t      *p_sub = NULL;
1163     xml_t         *p_xml = NULL;
1164     xml_reader_t  *p_xml_reader = NULL;
1165
1166     p_sub = stream_MemoryNew( VLC_OBJECT(p_dec),
1167                               p_dec->fmt_in.p_extra,
1168                               p_dec->fmt_in.i_extra,
1169                               VLC_TRUE );
1170     if( !p_sub )
1171         return;
1172
1173     p_xml = xml_Create( p_dec );
1174     if( p_xml )
1175     {
1176         p_xml_reader = xml_ReaderCreate( p_xml, p_sub );
1177         if( p_xml_reader )
1178         {
1179             /* Look for Root Node */
1180             if( xml_ReaderRead( p_xml_reader ) == 1 )
1181             {
1182                 char *psz_node = xml_ReaderName( p_xml_reader );
1183
1184                 if( !strcasecmp( "usfsubtitles", psz_node ) )
1185                     ParseUSFHeaderTags( p_dec, p_xml_reader );
1186
1187                 free( psz_node );
1188             }
1189
1190             xml_ReaderDelete( p_xml, p_xml_reader );
1191         }
1192         xml_Delete( p_xml );
1193     }
1194     stream_Delete( p_sub );
1195 }
1196
1197 static void ParseUSFHeaderTags( decoder_t *p_dec, xml_reader_t *p_xml_reader )
1198 {
1199     decoder_sys_t *p_sys = p_dec->p_sys;
1200     char *psz_node;
1201     ssa_style_t *p_style = NULL;
1202     int i_style_level = 0;
1203     int i_metadata_level = 0;
1204
1205     while ( xml_ReaderRead( p_xml_reader ) == 1 )
1206     {
1207         switch ( xml_ReaderNodeType( p_xml_reader ) )
1208         {
1209             case XML_READER_TEXT:
1210             case XML_READER_NONE:
1211                 break;
1212             case XML_READER_ENDELEM:
1213                 psz_node = xml_ReaderName( p_xml_reader );
1214                 
1215                 if( !psz_node )
1216                     break;
1217                 switch (i_style_level)
1218                 {
1219                     case 0:
1220                         if( !strcasecmp( "metadata", psz_node ) && (i_metadata_level == 1) )
1221                         {
1222                             i_metadata_level--;
1223                         }
1224                         break;
1225                     case 1:
1226                         if( !strcasecmp( "styles", psz_node ) )
1227                         {
1228                             i_style_level--;
1229                         }
1230                         break;
1231                     case 2:
1232                         if( !strcasecmp( "style", psz_node ) )
1233                         {
1234                             TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style );
1235
1236                             p_style = NULL;
1237                             i_style_level--;
1238                         }
1239                         break;
1240                 }
1241                 
1242                 free( psz_node );
1243                 break;
1244             case XML_READER_STARTELEM:
1245                 psz_node = xml_ReaderName( p_xml_reader );
1246
1247                 if( !psz_node )
1248                     break;
1249
1250                 if( !strcasecmp( "metadata", psz_node ) && (i_style_level == 0) )
1251                 {
1252                     i_metadata_level++;
1253                 }
1254                 else if( !strcasecmp( "resolution", psz_node ) && (i_metadata_level == 1) )
1255                 {
1256                     while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
1257                     {
1258                         char *psz_name = xml_ReaderName ( p_xml_reader );
1259                         char *psz_value = xml_ReaderValue ( p_xml_reader );
1260
1261                         if( psz_name && psz_value )
1262                         {
1263                             if( !strcasecmp( "x", psz_name ) )
1264                                 p_sys->i_original_width = atoi( psz_value );
1265                             else if( !strcasecmp( "y", psz_name ) )
1266                                 p_sys->i_original_height = atoi( psz_value );
1267                         }
1268                         if( psz_name )  free( psz_name );
1269                         if( psz_value ) free( psz_value );
1270                     }
1271                 }
1272                 else if( !strcasecmp( "styles", psz_node ) && (i_style_level == 0) )
1273                 {
1274                     i_style_level++;
1275                 }
1276                 else if( !strcasecmp( "style", psz_node ) && (i_style_level == 1) )
1277                 {
1278                     i_style_level++;
1279
1280                     p_style = calloc( 1, sizeof(ssa_style_t) );
1281                     if( ! p_style )
1282                     {
1283                         msg_Err( p_dec, "out of memory" );
1284                         free( psz_node );
1285                         break;
1286                     }
1287                     /* All styles are supposed to default to Default, and then
1288                      * one or more settings are over-ridden. 
1289                      * At the moment this only effects styles defined AFTER
1290                      * Default in the XML
1291                      */
1292                     int i;
1293                     for( i = 0; i < p_sys->i_ssa_styles; i++ )
1294                     {
1295                         if( !strcasecmp( p_sys->pp_ssa_styles[i]->psz_stylename, "Default" ) )
1296                         {
1297                             ssa_style_t *p_default_style = p_sys->pp_ssa_styles[i];
1298
1299                             memcpy( p_style, p_default_style, sizeof( ssa_style_t ) );
1300                             p_style->font_style.psz_fontname = strdup( p_style->font_style.psz_fontname );
1301                             p_style->psz_stylename = NULL;
1302                         }
1303                     }
1304
1305                     while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
1306                     {
1307                         char *psz_name = xml_ReaderName ( p_xml_reader );
1308                         char *psz_value = xml_ReaderValue ( p_xml_reader );
1309
1310                         if( psz_name && psz_value )
1311                         {
1312                             if( !strcasecmp( "name", psz_name ) )
1313                                 p_style->psz_stylename = strdup( psz_value);
1314                         }
1315                         if( psz_name )  free( psz_name );
1316                         if( psz_value ) free( psz_value );
1317                     }
1318                 }
1319                 else if( !strcasecmp( "fontstyle", psz_node ) && (i_style_level == 2) )
1320                 {
1321                     while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
1322                     {
1323                         char *psz_name = xml_ReaderName ( p_xml_reader );
1324                         char *psz_value = xml_ReaderValue ( p_xml_reader );
1325
1326                         if( psz_name && psz_value )
1327                         {
1328                             if( !strcasecmp( "face", psz_name ) )
1329                             {
1330                                 if( p_style->font_style.psz_fontname ) free( p_style->font_style.psz_fontname );
1331                                 p_style->font_style.psz_fontname = strdup( psz_value );
1332                             }
1333                             else if( !strcasecmp( "size", psz_name ) )
1334                             {
1335                                 if( ( *psz_value == '+' ) || ( *psz_value == '-' ) )
1336                                 {
1337                                     int i_value = atoi( psz_value );
1338
1339                                     if( ( i_value >= -5 ) && ( i_value <= 5 ) )
1340                                         p_style->font_style.i_font_size  += ( i_value * p_style->font_style.i_font_size ) / 10;
1341                                     else if( i_value < -5 )
1342                                         p_style->font_style.i_font_size  = - i_value;
1343                                     else if( i_value > 5 )
1344                                         p_style->font_style.i_font_size  = i_value;
1345                                 }
1346                                 else
1347                                     p_style->font_style.i_font_size  = atoi( psz_value );
1348                             }
1349                             else if( !strcasecmp( "italic", psz_name ) )
1350                             {
1351                                 if( !strcasecmp( "yes", psz_value ))
1352                                     p_style->font_style.i_style_flags |= STYLE_ITALIC;
1353                                 else
1354                                     p_style->font_style.i_style_flags &= ~STYLE_ITALIC;
1355                             }
1356                             else if( !strcasecmp( "weight", psz_name ) )
1357                             {
1358                                 if( !strcasecmp( "bold", psz_value ))
1359                                     p_style->font_style.i_style_flags |= STYLE_BOLD;
1360                                 else
1361                                     p_style->font_style.i_style_flags &= ~STYLE_BOLD;
1362                             }
1363                             else if( !strcasecmp( "underline", psz_name ) )
1364                             {
1365                                 if( !strcasecmp( "yes", psz_value ))
1366                                     p_style->font_style.i_style_flags |= STYLE_UNDERLINE;
1367                                 else
1368                                     p_style->font_style.i_style_flags &= ~STYLE_UNDERLINE;
1369                             }
1370                             else if( !strcasecmp( "color", psz_name ) )
1371                             {
1372                                 if( *psz_value == '#' )
1373                                 {
1374                                     unsigned long col = strtol(psz_value+1, NULL, 16);
1375                                     p_style->font_style.i_font_color = (col & 0x00ffffff);
1376                                     p_style->font_style.i_font_alpha = (col >> 24) & 0xff;
1377                                 }
1378                             }
1379                             else if( !strcasecmp( "outline-color", psz_name ) )
1380                             {
1381                                 if( *psz_value == '#' )
1382                                 {
1383                                     unsigned long col = strtol(psz_value+1, NULL, 16);
1384                                     p_style->font_style.i_outline_color = (col & 0x00ffffff);
1385                                     p_style->font_style.i_outline_alpha = (col >> 24) & 0xff;
1386                                 }
1387                             } 
1388                             else if( !strcasecmp( "outline-level", psz_name ) )
1389                             {
1390                                 p_style->font_style.i_outline_width = atoi( psz_value );
1391                             } 
1392                             else if( !strcasecmp( "shadow-color", psz_name ) )
1393                             {
1394                                 if( *psz_value == '#' )
1395                                 {
1396                                     unsigned long col = strtol(psz_value+1, NULL, 16);
1397                                     p_style->font_style.i_shadow_color = (col & 0x00ffffff);
1398                                     p_style->font_style.i_shadow_alpha = (col >> 24) & 0xff;
1399                                 }
1400                             }
1401                             else if( !strcasecmp( "shadow-level", psz_name ) )
1402                             {
1403                                 p_style->font_style.i_shadow_width = atoi( psz_value );
1404                             } 
1405                             else if( !strcasecmp( "back-color", psz_name ) )
1406                             {
1407                                 if( *psz_value == '#' )
1408                                 {
1409                                     unsigned long col = strtol(psz_value+1, NULL, 16);
1410                                     p_style->font_style.i_karaoke_background_color = (col & 0x00ffffff);
1411                                     p_style->font_style.i_karaoke_background_alpha = (col >> 24) & 0xff;
1412                                 }
1413                             }
1414                             else if( !strcasecmp( "spacing", psz_name ) )
1415                             {
1416                                 p_style->font_style.i_spacing = atoi( psz_value );
1417                             } 
1418                         }
1419                         if( psz_name )  free( psz_name );
1420                         if( psz_value ) free( psz_value );
1421                     }
1422                 }
1423                 else if( !strcasecmp( "position", psz_node ) && (i_style_level == 2) )
1424                 {
1425                     while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
1426                     {
1427                         char *psz_name = xml_ReaderName ( p_xml_reader );
1428                         char *psz_value = xml_ReaderValue ( p_xml_reader );
1429
1430                         if( psz_name && psz_value )
1431                         {
1432                             if( !strcasecmp( "alignment", psz_name ) )
1433                             {
1434                                 if( !strcasecmp( "TopLeft", psz_value ) )
1435                                     p_style->i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_LEFT;
1436                                 else if( !strcasecmp( "TopCenter", psz_value ) )
1437                                     p_style->i_align = SUBPICTURE_ALIGN_TOP;
1438                                 else if( !strcasecmp( "TopRight", psz_value ) )
1439                                     p_style->i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_RIGHT;
1440                                 else if( !strcasecmp( "MiddleLeft", psz_value ) )
1441                                     p_style->i_align = SUBPICTURE_ALIGN_LEFT;
1442                                 else if( !strcasecmp( "MiddleCenter", psz_value ) )
1443                                     p_style->i_align = 0;
1444                                 else if( !strcasecmp( "MiddleRight", psz_value ) )
1445                                     p_style->i_align = SUBPICTURE_ALIGN_RIGHT;
1446                                 else if( !strcasecmp( "BottomLeft", psz_value ) )
1447                                     p_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_LEFT;
1448                                 else if( !strcasecmp( "BottomCenter", psz_value ) )
1449                                     p_style->i_align = SUBPICTURE_ALIGN_BOTTOM;
1450                                 else if( !strcasecmp( "BottomRight", psz_value ) )
1451                                     p_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_RIGHT;
1452                             }
1453                             else if( !strcasecmp( "horizontal-margin", psz_name ) )
1454                             {
1455                                 if( strchr( psz_value, '%' ) )
1456                                 {
1457                                     p_style->i_margin_h = 0;
1458                                     p_style->i_margin_percent_h = atoi( psz_value );
1459                                 }
1460                                 else
1461                                 {
1462                                     p_style->i_margin_h = atoi( psz_value );
1463                                     p_style->i_margin_percent_h = 0;
1464                                 }
1465                             }
1466                             else if( !strcasecmp( "vertical-margin", psz_name ) )
1467                             {
1468                                 if( strchr( psz_value, '%' ) )
1469                                 {
1470                                     p_style->i_margin_v = 0;
1471                                     p_style->i_margin_percent_v = atoi( psz_value );
1472                                 }
1473                                 else
1474                                 {
1475                                     p_style->i_margin_v = atoi( psz_value );
1476                                     p_style->i_margin_percent_v = 0;
1477                                 }
1478                             }
1479                         }
1480                         if( psz_name )  free( psz_name );
1481                         if( psz_value ) free( psz_value );
1482                     }
1483                 }
1484                 
1485                 free( psz_node );
1486                 break;
1487         }
1488     }
1489     if( p_style ) free( p_style );
1490 }
1491 /*****************************************************************************
1492  * ParseSSAHeader: Retrieve global formatting information etc
1493  *****************************************************************************/
1494 static void ParseSSAHeader( decoder_t *p_dec )
1495 {
1496     decoder_sys_t *p_sys = p_dec->p_sys;
1497     char *psz_parser = NULL;
1498     char *psz_header = malloc( p_dec->fmt_in.i_extra+1 );
1499     int i_section_type = 1;
1500
1501     memcpy( psz_header, p_dec->fmt_in.p_extra, p_dec->fmt_in.i_extra );
1502     psz_header[ p_dec->fmt_in.i_extra] = '\0';
1503
1504     /* Handle [Script Info] section */
1505     psz_parser = strcasestr( psz_header, "[Script Info]" );
1506     if( psz_parser == NULL ) goto eof;
1507
1508     psz_parser = GotoNextLine( psz_parser );
1509
1510     while( psz_parser[0] != '\0' )
1511     {
1512         int temp;
1513         char buffer_text[MAX_LINE + 1];
1514
1515         if( psz_parser[0] == '!' || psz_parser[0] == ';' ) /* comment */;
1516         else if( sscanf( psz_parser, "PlayResX: %d", &temp ) == 1 )
1517             p_sys->i_original_width = ( temp > 0 ) ? temp : -1;
1518         else if( sscanf( psz_parser, "PlayResY: %d", &temp ) == 1 )
1519             p_sys->i_original_height = ( temp > 0 ) ? temp : -1;
1520         else if( sscanf( psz_parser, "Script Type: %8192s", buffer_text ) == 1 )
1521         {
1522             if( !strcasecmp( buffer_text, "V4.00+" ) ) p_sys->b_ass = VLC_TRUE;
1523         }
1524         else if( !strncasecmp( psz_parser, "[V4 Styles]", 11 ) )
1525             i_section_type = 1;
1526         else if( !strncasecmp( psz_parser, "[V4+ Styles]", 12) )
1527         {
1528             i_section_type = 2;
1529             p_sys->b_ass = VLC_TRUE;
1530         }
1531         else if( !strncasecmp( psz_parser, "[Events]", 8 ) )
1532             i_section_type = 4;
1533         else if( !strncasecmp( psz_parser, "Style:", 6 ) )
1534         {
1535             int i_font_size, i_bold, i_italic, i_border, i_outline, i_shadow, i_underline,
1536                 i_strikeout, i_scale_x, i_scale_y, i_spacing, i_align, i_margin_l, i_margin_r, i_margin_v;
1537
1538             char psz_temp_stylename[MAX_LINE+1];
1539             char psz_temp_fontname[MAX_LINE+1];
1540             char psz_temp_color1[MAX_LINE+1];
1541             char psz_temp_color2[MAX_LINE+1];
1542             char psz_temp_color3[MAX_LINE+1];
1543             char psz_temp_color4[MAX_LINE+1];
1544
1545             if( i_section_type == 1 ) /* V4 */
1546             {
1547                 if( sscanf( psz_parser, "Style: %8192[^,],%8192[^,],%d,%8192[^,],%8192[^,],%8192[^,],%8192[^,],%d,%d,%d,%d,%d,%d,%d,%d,%d%*[^\r\n]",
1548                     psz_temp_stylename, psz_temp_fontname, &i_font_size,
1549                     psz_temp_color1, psz_temp_color2, psz_temp_color3, psz_temp_color4, &i_bold, &i_italic,
1550                     &i_border, &i_outline, &i_shadow, &i_align, &i_margin_l, &i_margin_r, &i_margin_v ) == 16 )
1551                 {
1552                     ssa_style_t *p_style = malloc( sizeof(ssa_style_t) );
1553
1554                     p_style->psz_stylename = strdup( psz_temp_stylename );
1555                     p_style->font_style.psz_fontname = strdup( psz_temp_fontname );
1556                     p_style->font_style.i_font_size = i_font_size;
1557
1558                     ParseColor( p_dec, psz_temp_color1, &p_style->font_style.i_font_color, NULL );
1559                     ParseColor( p_dec, psz_temp_color4, &p_style->font_style.i_shadow_color, NULL );
1560                     p_style->font_style.i_outline_color = p_style->font_style.i_shadow_color;
1561                     p_style->font_style.i_font_alpha = p_style->font_style.i_outline_alpha = p_style->font_style.i_shadow_alpha = 0x00;
1562                     p_style->font_style.i_style_flags = 0;
1563                     if( i_bold ) p_style->font_style.i_style_flags |= STYLE_BOLD;
1564                     if( i_italic ) p_style->font_style.i_style_flags |= STYLE_ITALIC;
1565
1566                     if( i_border == 1 ) p_style->font_style.i_style_flags |= (STYLE_ITALIC | STYLE_OUTLINE);
1567                     else if( i_border == 3 )
1568                     {
1569                         p_style->font_style.i_style_flags |= STYLE_BACKGROUND;
1570                         p_style->font_style.i_background_color = p_style->font_style.i_shadow_color;
1571                         p_style->font_style.i_background_alpha = p_style->font_style.i_shadow_alpha;
1572                     }
1573                     p_style->font_style.i_shadow_width = i_shadow;
1574                     p_style->font_style.i_outline_width = i_outline;
1575
1576                     p_style->i_align = 0;
1577                     if( i_align == 1 || i_align == 5 || i_align == 9 ) p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
1578                     if( i_align == 3 || i_align == 7 || i_align == 11 ) p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
1579                     if( i_align < 4 ) p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
1580                     else if( i_align < 8 ) p_style->i_align |= SUBPICTURE_ALIGN_TOP; 
1581
1582                     p_style->i_margin_h = ( p_style->i_align & SUBPICTURE_ALIGN_RIGHT ) ? i_margin_r : i_margin_l;
1583                     p_style->i_margin_v = i_margin_v;
1584                     p_style->i_margin_percent_h = 0;
1585                     p_style->i_margin_percent_v = 0;
1586
1587                     p_style->font_style.i_karaoke_background_color = 0xffffff;
1588                     p_style->font_style.i_karaoke_background_alpha = 0xff;
1589
1590                     TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style );
1591                 }
1592                 else msg_Warn( p_dec, "SSA v4 styleline parsing failed" );
1593             }
1594             else if( i_section_type == 2 ) /* V4+ */
1595             {
1596                 /* Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour,
1597                    Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline,
1598                    Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
1599                 */
1600                 if( sscanf( psz_parser, "Style: %8192[^,],%8192[^,],%d,%8192[^,],%8192[^,],%8192[^,],%8192[^,],%d,%d,%d,%d,%d,%d,%d,%*f,%d,%d,%d,%d,%d,%d,%d%*[^\r\n]",
1601                     psz_temp_stylename, psz_temp_fontname, &i_font_size,
1602                     psz_temp_color1, psz_temp_color2, psz_temp_color3, psz_temp_color4, &i_bold, &i_italic,
1603                     &i_underline, &i_strikeout, &i_scale_x, &i_scale_y, &i_spacing, &i_border, &i_outline,
1604                     &i_shadow, &i_align, &i_margin_l, &i_margin_r, &i_margin_v ) == 21 )
1605                 {
1606                     ssa_style_t *p_style = malloc( sizeof(ssa_style_t) );
1607
1608                     p_style->psz_stylename = strdup( psz_temp_stylename );
1609                     p_style->font_style.psz_fontname = strdup( psz_temp_fontname );
1610                     p_style->font_style.i_font_size = i_font_size;
1611                     msg_Dbg( p_dec, psz_temp_color1 );
1612                     ParseColor( p_dec, psz_temp_color1, &p_style->font_style.i_font_color, &p_style->font_style.i_font_alpha );
1613                     ParseColor( p_dec, psz_temp_color3, &p_style->font_style.i_outline_color, &p_style->font_style.i_outline_alpha );
1614                     ParseColor( p_dec, psz_temp_color4, &p_style->font_style.i_shadow_color, &p_style->font_style.i_shadow_alpha );
1615
1616                     p_style->font_style.i_style_flags = 0;
1617                     if( i_bold ) p_style->font_style.i_style_flags |= STYLE_BOLD;
1618                     if( i_italic ) p_style->font_style.i_style_flags |= STYLE_ITALIC;
1619                     if( i_underline ) p_style->font_style.i_style_flags |= STYLE_UNDERLINE;
1620                     if( i_strikeout ) p_style->font_style.i_style_flags |= STYLE_STRIKEOUT;
1621                     if( i_border == 1 ) p_style->font_style.i_style_flags |= (STYLE_ITALIC | STYLE_OUTLINE);
1622                     else if( i_border == 3 )
1623                     {
1624                         p_style->font_style.i_style_flags |= STYLE_BACKGROUND;
1625                         p_style->font_style.i_background_color = p_style->font_style.i_shadow_color;
1626                         p_style->font_style.i_background_alpha = p_style->font_style.i_shadow_alpha;
1627                     }
1628                     p_style->font_style.i_shadow_width  = ( i_border == 1 ) ? i_shadow : 0;
1629                     p_style->font_style.i_outline_width = ( i_border == 1 ) ? i_outline : 0;
1630                     p_style->font_style.i_spacing = i_spacing;
1631                     //p_style->font_style.f_angle = f_angle;
1632
1633                     p_style->i_align = 0;
1634                     if( i_align == 0x1 || i_align == 0x4 || i_align == 0x7 ) p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
1635                     if( i_align == 0x3 || i_align == 0x6 || i_align == 0x9 ) p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
1636                     if( i_align == 0x7 || i_align == 0x8 || i_align == 0x9 ) p_style->i_align |= SUBPICTURE_ALIGN_TOP;
1637                     if( i_align == 0x1 || i_align == 0x2 || i_align == 0x3 ) p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
1638                     p_style->i_margin_h = ( p_style->i_align & SUBPICTURE_ALIGN_RIGHT ) ? i_margin_r : i_margin_l;
1639                     p_style->i_margin_v = i_margin_v;
1640                     p_style->i_margin_percent_h = 0;
1641                     p_style->i_margin_percent_v = 0;
1642
1643                     p_style->font_style.i_karaoke_background_color = 0xffffff;
1644                     p_style->font_style.i_karaoke_background_alpha = 0xff;
1645
1646                     /*TODO: Ignored: angle i_scale_x|y (fontscaling), i_encoding */
1647                     TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style );
1648                 }
1649                 else msg_Dbg( p_dec, "SSA V4+ styleline parsing failed" );
1650             }
1651         }
1652         psz_parser = GotoNextLine( psz_parser );
1653     }
1654
1655 eof:
1656     if( psz_header ) free( psz_header );
1657     return;
1658 }
1659
1660 /* Function now handles tags which has attribute values, and tries
1661  * to deal with &' commands too. It no longer modifies the string
1662  * in place, so that the original text can be reused
1663  */
1664 static char *StripTags( char *psz_subtitle )
1665 {
1666     char *psz_text_start;
1667     char *psz_text;
1668
1669     psz_text = psz_text_start = malloc( strlen( psz_subtitle ) + 1 );
1670     if( !psz_text_start )
1671         return NULL;
1672
1673     while( *psz_subtitle )
1674     {
1675         if( *psz_subtitle == '<' )
1676         {
1677             if( strncasecmp( psz_subtitle, "<br/>", 5 ) == 0 )
1678                 *psz_text++ = '\n';
1679
1680             psz_subtitle += strcspn( psz_subtitle, ">" );
1681         }
1682         else if( *psz_subtitle == '&' )
1683         {
1684             if( !strncasecmp( psz_subtitle, "&lt;", 4 ))
1685             {
1686                 *psz_text++ = '<';
1687                 psz_subtitle += strcspn( psz_subtitle, ";" );
1688             }
1689             else if( !strncasecmp( psz_subtitle, "&gt;", 4 ))
1690             {
1691                 *psz_text++ = '>';
1692                 psz_subtitle += strcspn( psz_subtitle, ";" );
1693             }
1694             else if( !strncasecmp( psz_subtitle, "&amp;", 5 ))
1695             {
1696                 *psz_text++ = '&';
1697                 psz_subtitle += strcspn( psz_subtitle, ";" );
1698             }
1699             else if( !strncasecmp( psz_subtitle, "&quot;", 6 ))
1700             {
1701                 *psz_text++ = '\"';
1702                 psz_subtitle += strcspn( psz_subtitle, ";" );
1703             }
1704             else
1705             {
1706                 /* Assume it is just a normal ampersand */
1707                 *psz_text++ = '&';
1708             }
1709         }
1710         else
1711         {
1712             *psz_text++ = *psz_subtitle;
1713         }
1714
1715         psz_subtitle++;
1716     }
1717     *psz_text = '\0';
1718     psz_text_start = realloc( psz_text_start, strlen( psz_text_start ) + 1 );
1719
1720     return psz_text_start;
1721 }
1722
1723 /* Try to respect any style tags present in the subtitle string. The main
1724  * problem here is a lack of adequate specs for the subtitle formats.
1725  * SSA/ASS and USF are both detail spec'ed -- but they are handled elsewhere.
1726  * SAMI has a detailed spec, but extensive rework is needed in the demux
1727  * code to prevent all this style information being excised, as it presently
1728  * does.
1729  * That leaves the others - none of which were (I guess) originally intended
1730  * to be carrying style information. Over time people have used them that way.
1731  * In the absence of specifications from which to work, the tags supported
1732  * have been restricted to the simple set permitted by the USF DTD, ie. :
1733  *  Basic: <br>, <i>, <b>, <u>
1734  *  Extended: <font>
1735  *    Attributes: face
1736  *                family
1737  *                size
1738  *                color
1739  *                outline-color
1740  *                shadow-color
1741  *                outline-level
1742  *                shadow-level
1743  *                back-color
1744  *                alpha
1745  * There is also the further restriction that the subtitle be well-formed
1746  * as an XML entity, ie. the HTML sentence:
1747  *        <b><i>Bold and Italics</b></i>
1748  * doesn't qualify because the tags aren't nested one inside the other.
1749  * <text> tags are automatically added to the output to ensure
1750  * well-formedness.
1751  * If the text doesn't qualify for any reason, a NULL string is
1752  * returned, and the rendering engine will fall back to the
1753  * plain text version of the subtitle.
1754  */
1755 static char *CreateHtmlSubtitle( char *psz_subtitle )
1756 {
1757     char    psz_tagStack[ 100 ];
1758     size_t  i_buf_size     = strlen( psz_subtitle ) + 100;
1759     char   *psz_html_start = malloc( i_buf_size );
1760
1761     psz_tagStack[ 0 ] = '\0';
1762
1763     if( psz_html_start != NULL )
1764     {
1765         char *psz_html = psz_html_start;
1766
1767         strcpy( psz_html, "<text>" );
1768         psz_html += 6;
1769
1770         while( *psz_subtitle )
1771         {
1772             if( *psz_subtitle == '\n' )
1773             {
1774                 strcpy( psz_html, "<br/>" );
1775                 psz_html += 5;
1776                 psz_subtitle++;
1777             }
1778             else if( *psz_subtitle == '<' )
1779             {
1780                 if( !strncasecmp( psz_subtitle, "<br/>", 5 ))
1781                 {
1782                     strcpy( psz_html, "<br/>" );
1783                     psz_html += 5;
1784                     psz_subtitle += 5;
1785                 }
1786                 else if( !strncasecmp( psz_subtitle, "<b>", 3 ) )
1787                 {
1788                     strcpy( psz_html, "<b>" );
1789                     strcat( psz_tagStack, "b" );
1790                     psz_html += 3;
1791                     psz_subtitle += 3;
1792                 }
1793                 else if( !strncasecmp( psz_subtitle, "<i>", 3 ) )
1794                 {
1795                     strcpy( psz_html, "<i>" );
1796                     strcat( psz_tagStack, "i" );
1797                     psz_html += 3;
1798                     psz_subtitle += 3;
1799                 }
1800                 else if( !strncasecmp( psz_subtitle, "<u>", 3 ) )
1801                 {
1802                     strcpy( psz_html, "<u>" );
1803                     strcat( psz_tagStack, "u" );
1804                     psz_html += 3;
1805                     psz_subtitle += 3;
1806                 }
1807                 else if( !strncasecmp( psz_subtitle, "<font ", 6 ))
1808                 {
1809                     const char *psz_attribs[] = { "face=\"", "family=\"", "size=\"",
1810                             "color=\"", "outline-color=\"", "shadow-color=\"",
1811                             "outline-level=\"", "shadow-level=\"", "back-color=\"",
1812                             "alpha=\"", NULL };
1813
1814                     strcpy( psz_html, "<font " );
1815                     strcat( psz_tagStack, "f" );
1816                     psz_html += 6;
1817                     psz_subtitle += 6;
1818
1819                     while( *psz_subtitle != '>' )
1820                     {
1821                         int  k;
1822
1823                         for( k=0; psz_attribs[ k ]; k++ )
1824                         {
1825                             int i_len = strlen( psz_attribs[ k ] );
1826
1827                             if( !strncasecmp( psz_subtitle, psz_attribs[ k ], i_len )) 
1828                             {
1829                                 i_len += strcspn( psz_subtitle + i_len, "\"" ) + 1;
1830
1831                                 strncpy( psz_html, psz_subtitle, i_len );
1832                                 psz_html += i_len;
1833                                 psz_subtitle += i_len;
1834                                 break;
1835                             }
1836                         }
1837                         if( psz_attribs[ k ] == NULL )
1838                         {
1839                             /* Jump over unrecognised tag */
1840                             int i_len = strcspn( psz_subtitle, "\"" ) + 1;
1841
1842                             i_len += strcspn( psz_subtitle + i_len, "\"" ) + 1;
1843                             psz_subtitle += i_len;
1844                         }
1845                         while (*psz_subtitle == ' ')
1846                             *psz_html++ = *psz_subtitle++;
1847                     }
1848                     *psz_html++ = *psz_subtitle++;
1849                 }
1850                 else if( !strncmp( psz_subtitle, "</", 2 ))
1851                 {
1852                     vlc_bool_t  b_match     = VLC_FALSE;
1853                     int         i_len       = strlen( psz_tagStack ) - 1;
1854                     char       *psz_lastTag = NULL;
1855
1856                     if( i_len >= 0 )
1857                     {
1858                         psz_lastTag = psz_tagStack + i_len;
1859                         i_len = 0;
1860
1861                         switch( *psz_lastTag )
1862                         {
1863                             case 'b':
1864                                 b_match = !strncasecmp( psz_subtitle, "</b>", 4 );
1865                                 i_len   = 4;
1866                                 break;
1867                             case 'i':
1868                                 b_match = !strncasecmp( psz_subtitle, "</i>", 4 );
1869                                 i_len   = 4;
1870                                 break;
1871                             case 'u':
1872                                 b_match = !strncasecmp( psz_subtitle, "</u>", 4 );
1873                                 i_len   = 4;
1874                                 break;
1875                             case 'f':
1876                                 b_match = !strncasecmp( psz_subtitle, "</font>", 7 );
1877                                 i_len   = 7;
1878                                 break;
1879                         }
1880                     }
1881                     if( ! b_match )
1882                     {
1883                         /* Not well formed -- kill everything */
1884                         free( psz_html_start );
1885                         psz_html_start = NULL;
1886                         break;
1887                     }
1888                     *psz_lastTag = '\0';
1889                     strncpy( psz_html, psz_subtitle, i_len );
1890                     psz_html += i_len;
1891                     psz_subtitle += i_len;
1892                 }
1893                 else
1894                 {
1895                     psz_subtitle += strcspn( psz_subtitle, ">" );
1896                 }
1897             }
1898             else if( *psz_subtitle == '&' )
1899             {
1900                 if( !strncasecmp( psz_subtitle, "&lt;", 4 ))
1901                 {
1902                     strcpy( psz_html, "&lt;" );
1903                     psz_html += 4;
1904                     psz_subtitle += 4;
1905                 }
1906                 else if( !strncasecmp( psz_subtitle, "&gt;", 4 ))
1907                 {
1908                     strcpy( psz_html, "&gt;" );
1909                     psz_html += 4;
1910                     psz_subtitle += 4;
1911                 }
1912                 else if( !strncasecmp( psz_subtitle, "&amp;", 5 ))
1913                 {
1914                     strcpy( psz_html, "&amp;" );
1915                     psz_html += 5;
1916                     psz_subtitle += 5;
1917                 }
1918                 else
1919                 {
1920                     strcpy( psz_html, "&amp;" );
1921                     psz_html += 5;
1922                     psz_subtitle++;
1923                 }
1924             }
1925             else
1926             {
1927                 *psz_html = *psz_subtitle;
1928                 if( psz_html > psz_html_start )
1929                 {
1930                     /* Check for double whitespace */
1931                     if((( *psz_html == ' ' ) ||
1932                         ( *psz_html == '\t' )) &&
1933                        (( *(psz_html-1) == ' ' ) ||
1934                         ( *(psz_html-1) == '\t' )))
1935                     {
1936                         strcpy( psz_html, NO_BREAKING_SPACE );
1937                         psz_html += strlen( NO_BREAKING_SPACE ) - 1;
1938                     }
1939                 }
1940                 psz_html++;
1941                 psz_subtitle++;
1942             }
1943
1944             if( ( size_t )( psz_html - psz_html_start ) > i_buf_size - 10 )
1945             {
1946                 int i_len = psz_html - psz_html_start;
1947
1948                 i_buf_size += 100;
1949                 psz_html_start = realloc( psz_html_start, i_buf_size );
1950                 psz_html = psz_html_start + i_len;
1951                 *psz_html = '\0';
1952             }
1953         }
1954         strcpy( psz_html, "</text>" );
1955         psz_html += 7;
1956
1957         if( psz_tagStack[ 0 ] != '\0' )
1958         {
1959             /* Not well formed -- kill everything */
1960             free( psz_html_start );
1961             psz_html_start = NULL;
1962         }
1963         else if( psz_html_start )
1964         {
1965             /* Shrink the memory requirements */
1966             psz_html_start = realloc( psz_html_start,  psz_html - psz_html_start + 1 );
1967         }
1968     }
1969     return psz_html_start;
1970 }
1971
1972 /* The reverse of the above function - given a HTML subtitle, turn it
1973  * into a plain-text version, complete with sensible whitespace compaction
1974  */
1975
1976 static char *CreatePlainText( char *psz_subtitle )
1977 {
1978     char *psz_text = StripTags( psz_subtitle );
1979     char *s;
1980
1981     if( !psz_text )
1982         return NULL;
1983
1984     s = strpbrk( psz_text, "\t\r\n " );
1985     while( s )
1986     {
1987         int   k;
1988         char  spc = ' ';
1989         int   i_whitespace = strspn( s, "\t\r\n " );
1990
1991         /* Favour '\n' over other whitespaces - if one of these
1992          * occurs in the whitespace use a '\n' as our value,
1993          * otherwise just use a ' '
1994          */
1995         for( k = 0; k < i_whitespace; k++ )
1996             if( s[k] == '\n' ) spc = '\n';
1997
1998         if( i_whitespace > 1 )
1999         {
2000             memmove( &s[1],
2001                      &s[i_whitespace],
2002                      strlen( s ) - i_whitespace + 1 );
2003         }
2004         *s++ = spc;
2005
2006         s = strpbrk( s, "\t\r\n " );
2007     }
2008     return psz_text;
2009 }
2010
2011 /****************************************************************************
2012  * download and resize image located at psz_url
2013  ***************************************************************************/
2014 static subpicture_region_t *LoadEmbeddedImage( decoder_t *p_dec, subpicture_t *p_spu, const char *psz_filename, int i_transparent_color )
2015 {
2016     decoder_sys_t         *p_sys = p_dec->p_sys;
2017     subpicture_region_t   *p_region;
2018     video_format_t         fmt_out;
2019     int                    k;
2020     picture_t             *p_pic = NULL;
2021
2022     for( k = 0; k < p_sys->i_images; k++ )
2023     {
2024         if( p_sys->pp_images &&
2025             !strcmp( p_sys->pp_images[k]->psz_filename, psz_filename ) )
2026         {
2027             p_pic = p_sys->pp_images[k]->p_pic;
2028             break;
2029         }
2030     }
2031
2032     if( !p_pic )
2033     {
2034         msg_Err( p_dec, "Unable to read image %s", psz_filename );
2035         return NULL;
2036     }
2037     
2038     /* Display the feed's image */
2039     memset( &fmt_out, 0, sizeof( video_format_t));
2040
2041     fmt_out.i_chroma = VLC_FOURCC('Y','U','V','A');
2042     fmt_out.i_aspect = VOUT_ASPECT_FACTOR;
2043     fmt_out.i_sar_num = fmt_out.i_sar_den = 1;
2044     fmt_out.i_width =
2045         fmt_out.i_visible_width = p_pic->p[Y_PLANE].i_visible_pitch;
2046     fmt_out.i_height =
2047         fmt_out.i_visible_height = p_pic->p[Y_PLANE].i_visible_lines;
2048
2049     p_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt_out );
2050     if( !p_region )
2051     {
2052         msg_Err( p_dec, "cannot allocate SPU region" );
2053         return NULL;
2054     }
2055     vout_CopyPicture( p_dec, &p_region->picture, p_pic );
2056
2057     /* This isn't the best way to do this - if you really want transparency, then
2058      * you're much better off using an image type that supports it like PNG. The
2059      * spec requires this support though.
2060      */
2061     if( i_transparent_color > 0 )
2062     {
2063         uint8_t i_r = ( i_transparent_color >> 16 ) & 0xff;
2064         uint8_t i_g = ( i_transparent_color >>  8 ) & 0xff;
2065         uint8_t i_b = ( i_transparent_color       ) & 0xff;
2066         uint8_t i_y = ( ( (  66 * i_r + 129 * i_g +  25 * i_b + 128 ) >> 8 ) + 16 );
2067         uint8_t i_u =   ( ( -38 * i_r -  74 * i_g + 112 * i_b + 128 ) >> 8 ) + 128 ;
2068         uint8_t i_v =   ( ( 112 * i_r -  94 * i_g -  18 * i_b + 128 ) >> 8 ) + 128 ;
2069
2070         if( ( p_region->picture.Y_PITCH == p_region->picture.U_PITCH ) &&
2071             ( p_region->picture.Y_PITCH == p_region->picture.V_PITCH ) &&
2072             ( p_region->picture.Y_PITCH == p_region->picture.A_PITCH ) )
2073         {
2074             int i_lines = p_region->picture.p[ Y_PLANE ].i_lines;
2075             if( i_lines > p_region->picture.p[ U_PLANE ].i_lines )
2076                 i_lines = p_region->picture.p[ U_PLANE ].i_lines;
2077             if( i_lines > p_region->picture.p[ V_PLANE ].i_lines )
2078                 i_lines = p_region->picture.p[ V_PLANE ].i_lines;
2079             if( i_lines > p_region->picture.p[ A_PLANE ].i_lines )
2080                 i_lines = p_region->picture.p[ A_PLANE ].i_lines;
2081
2082             int   i;
2083
2084             for( i = 0; i < p_region->picture.A_PITCH * i_lines; i++ )
2085             {
2086                 if(( p_region->picture.Y_PIXELS[ i ] == i_y ) &&
2087                    ( p_region->picture.U_PIXELS[ i ] == i_u ) &&
2088                    ( p_region->picture.V_PIXELS[ i ] == i_v ) )
2089                 {
2090                     p_region->picture.A_PIXELS[ i ] = 1;
2091                 }
2092             }
2093         }
2094     }
2095     return p_region;
2096 }