]> git.sesse.net Git - vlc/blob - modules/codec/subtitles/subsusf.c
Subtitles: attempt to get the history back.
[vlc] / modules / codec / subtitles / subsusf.c
1 /*****************************************************************************
2  * subsdec.c : text subtitles decoder
3  *****************************************************************************
4  * Copyright (C) 2000-2006 the VideoLAN team
5  * $Id$
6  *
7  * Authors: Gildas Bazin <gbazin@videolan.org>
8  *          Samuel Hocevar <sam@zoy.org>
9  *          Derk-Jan Hartman <hartman at videolan dot org>
10  *          Bernie Purcell <b dot purcell at adbglobal dot com>
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
25  *****************************************************************************/
26
27 /*****************************************************************************
28  * Preamble
29  *****************************************************************************/
30 #include <vlc/vlc.h>
31 #include <vlc_vout.h>
32 #include <vlc_codec.h>
33 #include <vlc_input.h>
34
35 #include <vlc_osd.h>
36 #include <vlc_filter.h>
37 #include <vlc_image.h>
38 #include <vlc_charset.h>
39 #include <vlc_stream.h>
40 #include <vlc_xml.h>
41 #include <errno.h>
42 #include <string.h>
43
44 #define NO_BREAKING_SPACE  "&#160;"
45
46 enum
47 {
48     ATTRIBUTE_ALIGNMENT = (1 << 0),
49     ATTRIBUTE_X         = (1 << 1),
50     ATTRIBUTE_X_PERCENT = (1 << 2),
51     ATTRIBUTE_Y         = (1 << 3),
52     ATTRIBUTE_Y_PERCENT = (1 << 4),
53 };
54
55 typedef struct
56 {
57     char       *psz_filename;
58     picture_t  *p_pic;
59 } image_attach_t;
60
61 typedef struct
62 {
63     char *          psz_stylename; /* The name of the style, no comma's allowed */
64     text_style_t    font_style;
65     int             i_align;
66     int             i_margin_h;
67     int             i_margin_v;
68     int             i_margin_percent_h;
69     int             i_margin_percent_v;
70 }  ssa_style_t;
71
72 /*****************************************************************************
73  * decoder_sys_t : decoder descriptor
74  *****************************************************************************/
75 struct decoder_sys_t
76 {
77     vlc_bool_t          b_ass;                           /* The subs are ASS */
78     int                 i_original_height;
79     int                 i_original_width;
80     int                 i_align;          /* Subtitles alignment on the vout */
81     vlc_iconv_t         iconv_handle;            /* handle to iconv instance */
82     vlc_bool_t          b_autodetect_utf8;
83
84     ssa_style_t         **pp_ssa_styles;
85     int                 i_ssa_styles;
86
87     image_attach_t      **pp_images;
88     int                 i_images;
89 };
90
91 /*****************************************************************************
92  * Local prototypes
93  *****************************************************************************/
94 static int  OpenDecoder   ( vlc_object_t * );
95 static void CloseDecoder  ( vlc_object_t * );
96
97 static subpicture_t *DecodeBlock   ( decoder_t *, block_t ** );
98 static subpicture_t *ParseText     ( decoder_t *, block_t * );
99 static void         ParseSSAHeader ( decoder_t * );
100 static void         ParseUSFHeader ( decoder_t * );
101 static void         ParseUSFHeaderTags( decoder_t *, xml_reader_t * );
102 static void         ParseSSAString ( decoder_t *, char *, subpicture_t * );
103 static subpicture_region_t *ParseUSFString ( decoder_t *, char *, subpicture_t * );
104 static void         ParseColor     ( decoder_t *, char *, int *, int * );
105 static char        *StripTags      ( char * );
106 static char        *CreateHtmlSubtitle ( char * );
107 static char        *CreatePlainText( char * );
108 static int          ParseImageAttachments( decoder_t *p_dec );
109 static subpicture_region_t *LoadEmbeddedImage( decoder_t *p_dec, subpicture_t *p_spu, const char *psz_filename, int i_transparent_color );
110
111 #define DEFAULT_NAME "Default"
112 #define MAX_LINE 8192
113
114 /*****************************************************************************
115  * Module descriptor.
116  *****************************************************************************/
117 static const char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "",
118     "ISO-8859-1", "CP1252", "MacRoman", "MacIceland","ISO-8859-15", "",
119     "ISO-8859-2", "CP1250", "MacCentralEurope", "MacCroatian", "MacRomania", "",
120     "ISO-8859-5", "CP1251", "MacCyrillic", "MacUkraine", "KOI8-R", "KOI8-U", "KOI8-RU", "",
121     "ISO-8859-6", "CP1256", "MacArabic", "",
122     "ISO-8859-7", "CP1253", "MacGreek", "",
123     "ISO-8859-8", "CP1255", "MacHebrew", "",
124     "ISO-8859-9", "CP1254", "MacTurkish", "",
125     "ISO-8859-13", "CP1257", "",
126     "ISO-2022-JP", "ISO-2022-JP-1", "ISO-2022-JP-2", "EUC-JP", "SHIFT_JIS", "",
127     "ISO-2022-CN", "ISO-2022-CN-EXT", "EUC-CN", "EUC-TW", "BIG5", "BIG5-HKSCS", "",
128     "ISO-2022-KR", "EUC-KR", "",
129     "MacThai", "KOI8-T", "",
130     "ISO-8859-3", "ISO-8859-4", "ISO-8859-10", "ISO-8859-14", "ISO-8859-16", "",
131     "CP850", "CP862", "CP866", "CP874", "CP932", "CP949", "CP950", "CP1133", "CP1258", "",
132     "Macintosh", "",
133     "UTF-7", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE",
134     "C99", "JAVA", "UCS-2", "UCS-2BE", "UCS-2LE", "UCS-4", "UCS-4BE", "UCS-4LE", "",
135     "HZ", "GBK", "GB18030", "JOHAB", "ARMSCII-8",
136     "Georgian-Academy", "Georgian-PS", "TIS-620", "MuleLao-1", "VISCII", "TCVN",
137     "HPROMAN8", "NEXTSTEP" };
138 /*
139 SSA supports charset selection.
140 The following known charsets are used:
141
142 0 = Ansi - Western European
143 1 = default
144 2 = symbol
145 3 = invalid
146 77 = Mac
147 128 = Japanese (Shift JIS)
148 129 = Hangul
149 130 = Johab
150 134 = GB2312 Simplified Chinese
151 136 = Big5 Traditional Chinese
152 161 = Greek
153 162 = Turkish
154 163 = Vietnamese
155 177 = Hebrew
156 178 = Arabic
157 186 = Baltic
158 204 = Russian (Cyrillic)
159 222 = Thai
160 238 = Eastern European
161 254 = PC 437
162 */
163
164 static int  pi_justification[] = { 0, 1, 2 };
165 static const char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")};
166
167 #define ENCODING_TEXT N_("Subtitles text encoding")
168 #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles")
169 #define ALIGN_TEXT N_("Subtitles justification")
170 #define ALIGN_LONGTEXT N_("Set the justification of subtitles")
171 #define AUTODETECT_UTF8_TEXT N_("UTF-8 subtitles autodetection")
172 #define AUTODETECT_UTF8_LONGTEXT N_("This enables automatic detection of " \
173             "UTF-8 encoding within subtitles files.")
174 #define FORMAT_TEXT N_("Formatted Subtitles")
175 #define FORMAT_LONGTEXT N_("Some subtitle formats allow for text formatting. " \
176  "VLC partly implements this, but you can choose to disable all formatting.")
177
178
179 vlc_module_begin();
180     set_shortname( _("Subtitles"));
181     set_description( _("Text subtitles decoder") );
182     set_capability( "decoder", 50 );
183     set_callbacks( OpenDecoder, CloseDecoder );
184     set_category( CAT_INPUT );
185     set_subcategory( SUBCAT_INPUT_SCODEC );
186
187     add_integer( "subsdec-align", 0, NULL, ALIGN_TEXT, ALIGN_LONGTEXT,
188                  VLC_FALSE );
189         change_integer_list( pi_justification, ppsz_justification_text, 0 );
190     add_string( "subsdec-encoding", DEFAULT_NAME, NULL,
191                 ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE );
192         change_string_list( ppsz_encodings, 0, 0 );
193     add_bool( "subsdec-autodetect-utf8", VLC_TRUE, NULL,
194               AUTODETECT_UTF8_TEXT, AUTODETECT_UTF8_LONGTEXT, VLC_FALSE );
195     add_bool( "subsdec-formatted", VLC_TRUE, NULL, FORMAT_TEXT, FORMAT_LONGTEXT,
196                  VLC_FALSE );
197 vlc_module_end();
198
199 /*****************************************************************************
200  * OpenDecoder: probe the decoder and return score
201  *****************************************************************************
202  * Tries to launch a decoder and return score so that the interface is able
203  * to chose.
204  *****************************************************************************/
205 static int OpenDecoder( vlc_object_t *p_this )
206 {
207     decoder_t     *p_dec = (decoder_t*)p_this;
208     decoder_sys_t *p_sys;
209     vlc_value_t    val;
210
211     if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','u','b','t') &&
212         p_dec->fmt_in.i_codec != VLC_FOURCC('u','s','f',' ') &&
213         p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') )
214     {
215         return VLC_EGENERIC;
216     }
217
218     p_dec->pf_decode_sub = DecodeBlock;
219
220     /* Allocate the memory needed to store the decoder's structure */
221     if( ( p_dec->p_sys = p_sys =
222           (decoder_sys_t *)calloc(1, sizeof(decoder_sys_t)) ) == NULL )
223     {
224         msg_Err( p_dec, "out of memory" );
225         return VLC_ENOMEM;
226     }
227
228     /* init of p_sys */
229     p_sys->i_align = 0;
230     p_sys->iconv_handle = (vlc_iconv_t)-1;
231     p_sys->b_autodetect_utf8 = VLC_FALSE;
232     p_sys->b_ass = VLC_FALSE;
233     p_sys->i_original_height = -1;
234     p_sys->i_original_width = -1;
235     TAB_INIT( p_sys->i_ssa_styles, p_sys->pp_ssa_styles );
236     TAB_INIT( p_sys->i_images, p_sys->pp_images );
237
238     char *psz_charset = NULL;
239     /* First try demux-specified encoding */
240     if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
241     {
242         psz_charset = strdup (p_dec->fmt_in.subs.psz_encoding);
243         msg_Dbg (p_dec, "trying demuxer-specified character encoding: %s",
244                  p_dec->fmt_in.subs.psz_encoding ?: "not specified");
245     }
246
247     /* Second, try configured encoding */
248     if (psz_charset == NULL)
249     {
250         psz_charset = var_CreateGetNonEmptyString (p_dec, "subsdec-encoding");
251         if ((psz_charset != NULL) && !strcasecmp (psz_charset, DEFAULT_NAME))
252         {
253             free (psz_charset);
254             psz_charset = NULL;
255         }
256
257         msg_Dbg (p_dec, "trying configured character encoding: %s",
258                  psz_charset ?: "not specified");
259     }
260
261     /* Third, try "local" encoding with optional UTF-8 autodetection */
262     if (psz_charset == NULL)
263     {
264         psz_charset = strdup (GetFallbackEncoding ());
265         msg_Dbg (p_dec, "trying default character encoding: %s",
266                  psz_charset ?: "not specified");
267
268         if (var_CreateGetBool (p_dec, "subsdec-autodetect-utf8"))
269         {
270             msg_Dbg (p_dec, "using automatic UTF-8 detection");
271             p_sys->b_autodetect_utf8 = VLC_TRUE;
272         }
273     }
274
275     if (psz_charset == NULL)
276     {
277         psz_charset = strdup ("UTF-8");
278         msg_Dbg (p_dec, "trying hard-coded character encoding: %s",
279                  psz_charset ?: "error");
280     }
281
282     if (psz_charset == NULL)
283     {
284         free (p_sys);
285         return VLC_ENOMEM;
286     }
287
288     if (strcasecmp (psz_charset, "UTF-8") && strcasecmp (psz_charset, "utf8"))
289     {
290         p_sys->iconv_handle = vlc_iconv_open ("UTF-8", psz_charset);
291         if (p_sys->iconv_handle == (vlc_iconv_t)(-1))
292             msg_Err (p_dec, "cannot convert from %s: %s", psz_charset,
293                      strerror (errno));
294     }
295     free (psz_charset);
296
297     var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
298     var_Get( p_dec, "subsdec-align", &val );
299     p_sys->i_align = val.i_int;
300
301     ParseImageAttachments( p_dec );
302
303     if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') && var_CreateGetBool( p_dec, "subsdec-formatted" ) )
304     {
305         if( p_dec->fmt_in.i_extra > 0 )
306             ParseSSAHeader( p_dec );
307     }
308     else if( p_dec->fmt_in.i_codec == VLC_FOURCC('u','s','f',' ') && var_CreateGetBool( p_dec, "subsdec-formatted" ) )
309     {
310         if( p_dec->fmt_in.i_extra > 0 )
311             ParseUSFHeader( p_dec );
312     }
313
314     return VLC_SUCCESS;
315 }
316
317 /****************************************************************************
318  * DecodeBlock: the whole thing
319  ****************************************************************************
320  * This function must be fed with complete subtitles units.
321  ****************************************************************************/
322 static subpicture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block )
323 {
324     subpicture_t *p_spu = NULL;
325
326     if( !pp_block || *pp_block == NULL ) return NULL;
327
328     p_spu = ParseText( p_dec, *pp_block );
329
330     block_Release( *pp_block );
331     *pp_block = NULL;
332
333     return p_spu;
334 }
335
336 /*****************************************************************************
337  * CloseDecoder: clean up the decoder
338  *****************************************************************************/
339 static void CloseDecoder( vlc_object_t *p_this )
340 {
341     decoder_t *p_dec = (decoder_t *)p_this;
342     decoder_sys_t *p_sys = p_dec->p_sys;
343
344     if( p_sys->iconv_handle != (vlc_iconv_t)-1 )
345         vlc_iconv_close( p_sys->iconv_handle );
346
347     if( p_sys->pp_ssa_styles )
348     {
349         int i;
350         for( i = 0; i < p_sys->i_ssa_styles; i++ )
351         {
352             if( !p_sys->pp_ssa_styles[i] )
353                 continue;
354
355             if( p_sys->pp_ssa_styles[i]->psz_stylename )
356                 free( p_sys->pp_ssa_styles[i]->psz_stylename );
357             if( p_sys->pp_ssa_styles[i]->font_style.psz_fontname )
358                 free( p_sys->pp_ssa_styles[i]->font_style.psz_fontname );
359             if( p_sys->pp_ssa_styles[i] )
360                 free( p_sys->pp_ssa_styles[i] );
361         }
362         TAB_CLEAN( p_sys->i_ssa_styles, p_sys->pp_ssa_styles );
363     }
364     if( p_sys->pp_images )
365     {
366         int i;
367         for( i = 0; i < p_sys->i_images; i++ )
368         {
369             if( !p_sys->pp_images[i] )
370                 continue;
371
372             if( p_sys->pp_images[i]->p_pic )
373                 p_sys->pp_images[i]->p_pic->pf_release( p_sys->pp_images[i]->p_pic );
374             if( p_sys->pp_images[i]->psz_filename )
375                 free( p_sys->pp_images[i]->psz_filename );
376
377             free( p_sys->pp_images[i] );
378         }
379         TAB_CLEAN( p_sys->i_images, p_sys->pp_images );
380     }
381
382     free( p_sys );
383 }
384
385 /*****************************************************************************
386  * ParseText: parse an text subtitle packet and send it to the video output
387  *****************************************************************************/
388 static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
389 {
390     decoder_sys_t *p_sys = p_dec->p_sys;
391     subpicture_t *p_spu = NULL;
392     char *psz_subtitle = NULL;
393     video_format_t fmt;
394
395     /* We cannot display a subpicture with no date */
396     if( p_block->i_pts == 0 )
397     {
398         msg_Warn( p_dec, "subtitle without a date" );
399         return NULL;
400     }
401
402     /* Check validity of packet data */
403     /* An "empty" line containing only \0 can be used to force
404        and ephemer picture from the screen */
405     if( p_block->i_buffer < 1 )
406     {
407         msg_Warn( p_dec, "no subtitle data" );
408         return NULL;
409     }
410
411     /* Should be resiliant against bad subtitles */
412     psz_subtitle = strndup( (const char *)p_block->p_buffer,
413                             p_block->i_buffer );
414     if( psz_subtitle == NULL )
415         return NULL;
416
417     if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
418     {
419         if (EnsureUTF8( psz_subtitle ) == NULL)
420         {
421             msg_Err( p_dec, _("failed to convert subtitle encoding.\n"
422                      "Try manually setting a character-encoding "
423                      "before you open the file.") );
424         }
425     }
426     else
427     {
428
429         if( p_sys->b_autodetect_utf8 )
430         {
431             if( IsUTF8( psz_subtitle ) == NULL )
432             {
433                 msg_Dbg( p_dec, "invalid UTF-8 sequence: "
434                          "disabling UTF-8 subtitles autodetection" );
435                 p_sys->b_autodetect_utf8 = VLC_FALSE;
436             }
437         }
438
439         if( !p_sys->b_autodetect_utf8 )
440         {
441             size_t inbytes_left = strlen( psz_subtitle );
442             size_t outbytes_left = 6 * inbytes_left;
443             char *psz_new_subtitle = malloc( outbytes_left + 1 );
444             char *psz_convert_buffer_out = psz_new_subtitle;
445             const char *psz_convert_buffer_in = psz_subtitle;
446
447             size_t ret = vlc_iconv( p_sys->iconv_handle,
448                                     &psz_convert_buffer_in, &inbytes_left,
449                                     &psz_convert_buffer_out, &outbytes_left );
450
451             *psz_convert_buffer_out++ = '\0';
452             free( psz_subtitle );
453
454             if( ( ret == (size_t)(-1) ) || inbytes_left )
455             {
456                 free( psz_new_subtitle );
457                 msg_Err( p_dec, _("failed to convert subtitle encoding.\n"
458                         "Try manually setting a character-encoding "
459                                 "before you open the file.") );
460                 return NULL;
461             }
462
463             psz_subtitle = realloc( psz_new_subtitle,
464                                     psz_convert_buffer_out - psz_new_subtitle );
465         }
466     }
467
468     /* Create the subpicture unit */
469     p_spu = p_dec->pf_spu_buffer_new( p_dec );
470     if( !p_spu )
471     {
472         msg_Warn( p_dec, "can't get spu buffer" );
473         if( psz_subtitle ) free( psz_subtitle );
474         return NULL;
475     }
476
477     p_spu->b_pausable = VLC_TRUE;
478
479     /* Create a new subpicture region */
480     memset( &fmt, 0, sizeof(video_format_t) );
481     fmt.i_chroma = VLC_FOURCC('T','E','X','T');
482     fmt.i_aspect = 0;
483     fmt.i_width = fmt.i_height = 0;
484     fmt.i_x_offset = fmt.i_y_offset = 0;
485     p_spu->p_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt );
486     if( !p_spu->p_region )
487     {
488         msg_Err( p_dec, "cannot allocate SPU region" );
489         if( psz_subtitle ) free( psz_subtitle );
490         p_dec->pf_spu_buffer_del( p_dec, p_spu );
491         return NULL;
492     }
493
494     /* Decode and format the subpicture unit */
495     if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') &&
496         p_dec->fmt_in.i_codec != VLC_FOURCC('u','s','f',' ') )
497     {
498         /* Normal text subs, easy markup */
499         p_spu->p_region->i_align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
500         p_spu->i_x = p_sys->i_align ? 20 : 0;
501         p_spu->i_y = 10;
502
503         /* Remove formatting from string */
504
505         p_spu->p_region->psz_text = StripTags( psz_subtitle );
506         if( var_CreateGetBool( p_dec, "subsdec-formatted" ) )
507         {
508             p_spu->p_region->psz_html = CreateHtmlSubtitle( psz_subtitle );
509         }
510
511         p_spu->i_start = p_block->i_pts;
512         p_spu->i_stop = p_block->i_pts + p_block->i_length;
513         p_spu->b_ephemer = (p_block->i_length == 0);
514         p_spu->b_absolute = VLC_FALSE;
515     }
516     else
517     {
518         /* Decode SSA/USF strings */
519         if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') )
520             ParseSSAString( p_dec, psz_subtitle, p_spu );
521         else
522         {
523             p_spu->pf_destroy_region( VLC_OBJECT(p_dec), p_spu->p_region );
524             p_spu->p_region = ParseUSFString( p_dec, psz_subtitle, p_spu );
525         }
526
527         p_spu->i_start = p_block->i_pts;
528         p_spu->i_stop = p_block->i_pts + p_block->i_length;
529         p_spu->b_ephemer = (p_block->i_length == 0);
530         p_spu->b_absolute = VLC_FALSE;
531         p_spu->i_original_picture_width = p_sys->i_original_width;
532         p_spu->i_original_picture_height = p_sys->i_original_height;
533     }
534     if( psz_subtitle ) free( psz_subtitle );
535
536     return p_spu;
537 }
538
539 static char *GrabAttributeValue( const char *psz_attribute,
540                                  const char *psz_tag_start )
541 {
542     if( psz_attribute && psz_tag_start )
543     {
544         char *psz_tag_end = strchr( psz_tag_start, '>' );
545         char *psz_found   = strcasestr( psz_tag_start, psz_attribute );
546
547         if( psz_found )
548         {
549             psz_found += strlen( psz_attribute );
550
551             if(( *(psz_found++) == '=' ) &&
552                ( *(psz_found++) == '\"' ))
553             {
554                 if( psz_found < psz_tag_end )
555                 {
556                     int   i_len = strcspn( psz_found, "\"" );
557                     return strndup( psz_found, i_len );
558                 }
559             }
560         }
561     }
562     return NULL;
563 }
564
565 static ssa_style_t *ParseStyle( decoder_sys_t *p_sys, char *psz_subtitle )
566 {
567     ssa_style_t *p_style   = NULL;
568     char        *psz_style = GrabAttributeValue( "style", psz_subtitle );
569
570     if( psz_style )
571     {
572         int i;
573
574         for( i = 0; i < p_sys->i_ssa_styles; i++ )
575         {
576             if( !strcmp( p_sys->pp_ssa_styles[i]->psz_stylename, psz_style ) )
577                 p_style = p_sys->pp_ssa_styles[i];
578         }
579         free( psz_style );
580     }
581     return p_style;
582 }
583
584 static int ParsePositionAttributeList( char *psz_subtitle, int *i_align, int *i_x, int *i_y )
585 {
586     int   i_mask = 0;
587
588     char *psz_align    = GrabAttributeValue( "alignment", psz_subtitle );
589     char *psz_margin_x = GrabAttributeValue( "horizontal-margin", psz_subtitle );
590     char *psz_margin_y = GrabAttributeValue( "vertical-margin", psz_subtitle );
591     /* -- UNSUPPORTED
592     char *psz_relative = GrabAttributeValue( "relative-to", psz_subtitle );
593     char *psz_rotate_x = GrabAttributeValue( "rotate-x", psz_subtitle );
594     char *psz_rotate_y = GrabAttributeValue( "rotate-y", psz_subtitle );
595     char *psz_rotate_z = GrabAttributeValue( "rotate-z", psz_subtitle );
596     */
597
598     *i_align = SUBPICTURE_ALIGN_BOTTOM;
599     *i_x = 0;
600     *i_y = 0;
601
602     if( psz_align )
603     {
604         if( !strcasecmp( "TopLeft", psz_align ) )
605             *i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_LEFT;
606         else if( !strcasecmp( "TopCenter", psz_align ) )
607             *i_align = SUBPICTURE_ALIGN_TOP;
608         else if( !strcasecmp( "TopRight", psz_align ) )
609             *i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_RIGHT;
610         else if( !strcasecmp( "MiddleLeft", psz_align ) )
611             *i_align = SUBPICTURE_ALIGN_LEFT;
612         else if( !strcasecmp( "MiddleCenter", psz_align ) )
613             *i_align = 0;
614         else if( !strcasecmp( "MiddleRight", psz_align ) )
615             *i_align = SUBPICTURE_ALIGN_RIGHT;
616         else if( !strcasecmp( "BottomLeft", psz_align ) )
617             *i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_LEFT;
618         else if( !strcasecmp( "BottomCenter", psz_align ) )
619             *i_align = SUBPICTURE_ALIGN_BOTTOM;
620         else if( !strcasecmp( "BottomRight", psz_align ) )
621             *i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_RIGHT;
622
623         i_mask |= ATTRIBUTE_ALIGNMENT;
624         free( psz_align );
625     }
626     if( psz_margin_x )
627     {
628         *i_x = atoi( psz_margin_x );
629         if( strchr( psz_margin_x, '%' ) )
630             i_mask |= ATTRIBUTE_X_PERCENT;
631         else
632             i_mask |= ATTRIBUTE_X;
633
634         free( psz_margin_x );
635     }
636     if( psz_margin_y )
637     {
638         *i_y = atoi( psz_margin_y );
639         if( strchr( psz_margin_y, '%' ) )
640             i_mask |= ATTRIBUTE_Y_PERCENT;
641         else
642             i_mask |= ATTRIBUTE_Y;
643
644         free( psz_margin_y );
645     }
646     return i_mask;
647 }
648
649 static void SetupPositions( subpicture_region_t *p_region, char *psz_subtitle )
650 {
651     int           i_mask = 0;
652     int           i_align;
653     int           i_x, i_y;
654
655     i_mask = ParsePositionAttributeList( psz_subtitle, &i_align, &i_x, &i_y );
656
657     if( i_mask & ATTRIBUTE_ALIGNMENT )
658         p_region->i_align = i_align;
659
660     /* TODO: Setup % based offsets properly, without adversely affecting
661      *       everything else in vlc. Will address with separate patch, to
662      *       prevent this one being any more complicated.
663      */
664     if( i_mask & ATTRIBUTE_X )
665         p_region->i_x = i_x;
666     else if( i_mask & ATTRIBUTE_X_PERCENT )
667         p_region->i_x = 0;
668
669     if( i_mask & ATTRIBUTE_Y )
670         p_region->i_y = i_y;
671     else if( i_mask & ATTRIBUTE_Y_PERCENT )
672         p_region->i_y = 0;
673 }
674
675 static subpicture_region_t *CreateTextRegion( decoder_t *p_dec,
676                                               subpicture_t *p_spu,
677                                               char *psz_subtitle,
678                                               int i_len,
679                                               int i_sys_align )
680 {
681     decoder_sys_t        *p_sys = p_dec->p_sys;
682     subpicture_region_t  *p_text_region;
683     video_format_t        fmt;
684
685     /* Create a new subpicture region */
686     memset( &fmt, 0, sizeof(video_format_t) );
687     fmt.i_chroma = VLC_FOURCC('T','E','X','T');
688     fmt.i_aspect = 0;
689     fmt.i_width = fmt.i_height = 0;
690     fmt.i_x_offset = fmt.i_y_offset = 0;
691     p_text_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt );
692
693     if( p_text_region != NULL )
694     {
695         ssa_style_t  *p_style = NULL;
696
697         p_text_region->psz_text = NULL;
698         p_text_region->psz_html = strndup( psz_subtitle, i_len );
699         if( ! p_text_region->psz_html )
700         {
701             msg_Err( p_dec, "out of memory" );
702             p_spu->pf_destroy_region( VLC_OBJECT(p_dec), p_text_region );
703             return NULL;
704         }
705
706         p_style = ParseStyle( p_sys, p_text_region->psz_html );
707         if( !p_style )
708         {
709             int i;
710
711             for( i = 0; i < p_sys->i_ssa_styles; i++ )
712             {
713                 if( !strcasecmp( p_sys->pp_ssa_styles[i]->psz_stylename, "Default" ) )
714                     p_style = p_sys->pp_ssa_styles[i];
715             }
716         }
717
718         if( p_style )
719         {
720             msg_Dbg( p_dec, "style is: %s", p_style->psz_stylename );
721
722             p_text_region->p_style = &p_style->font_style;
723             p_text_region->i_align = p_style->i_align;
724
725             /* TODO: Setup % based offsets properly, without adversely affecting
726              *       everything else in vlc. Will address with separate patch,
727              *       to prevent this one being any more complicated.
728
729                      * p_style->i_margin_percent_h;
730                      * p_style->i_margin_percent_v;
731              */
732             p_text_region->i_x         = p_style->i_margin_h;
733             p_text_region->i_y         = p_style->i_margin_v;
734
735         }
736         else
737         {
738             p_text_region->i_align = SUBPICTURE_ALIGN_BOTTOM | i_sys_align;
739             p_text_region->i_x = i_sys_align ? 20 : 0;
740             p_text_region->i_y = 10;
741         }
742         /* Look for position arguments which may override the style-based
743          * defaults.
744          */
745         SetupPositions( p_text_region, psz_subtitle );
746
747         p_text_region->p_next = NULL;
748     }
749     return p_text_region;
750 }
751
752 static subpicture_region_t *ParseUSFString( decoder_t *p_dec, char *psz_subtitle, subpicture_t *p_spu_in )
753 {
754     decoder_sys_t        *p_sys = p_dec->p_sys;
755     subpicture_t         *p_spu = p_spu_in;
756     subpicture_region_t  *p_region_first = NULL;
757     subpicture_region_t  *p_region_upto  = p_region_first;
758
759     while( *psz_subtitle )
760     {
761         if( *psz_subtitle == '<' )
762         {
763             char *psz_end = NULL;
764
765             if(( !strncasecmp( psz_subtitle, "<text ", 6 )) ||
766                ( !strncasecmp( psz_subtitle, "<text>", 6 )))
767             {
768                 psz_end = strcasestr( psz_subtitle, "</text>" );
769
770                 if( psz_end )
771                 {
772                     subpicture_region_t  *p_text_region;
773
774                     psz_end += strcspn( psz_end, ">" ) + 1;
775
776                     p_text_region = CreateTextRegion( p_dec,
777                                                       p_spu,
778                                                       psz_subtitle,
779                                                       psz_end - psz_subtitle,
780                                                       p_sys->i_align );
781
782                     if( p_text_region )
783                     {
784                         p_text_region->psz_text = CreatePlainText( p_text_region->psz_html );
785
786                         if( ! var_CreateGetBool( p_dec, "subsdec-formatted" ) )
787                         {
788                             free( p_text_region->psz_html );
789                             p_text_region->psz_html = NULL;
790                         }
791                     }
792
793                     if( !p_region_first )
794                     {
795                         p_region_first = p_region_upto = p_text_region;
796                     }
797                     else if( p_text_region )
798                     {
799                         p_region_upto->p_next = p_text_region;
800                         p_region_upto = p_region_upto->p_next;
801                     }
802                 }
803             }
804             else if(( !strncasecmp( psz_subtitle, "<karaoke ", 9 )) ||
805                     ( !strncasecmp( psz_subtitle, "<karaoke>", 9 )))
806             {
807                 psz_end = strcasestr( psz_subtitle, "</karaoke>" );
808
809                 if( psz_end )
810                 {
811                     subpicture_region_t  *p_text_region;
812
813                     psz_end += strcspn( psz_end, ">" ) + 1;
814
815                     p_text_region = CreateTextRegion( p_dec,
816                                                       p_spu,
817                                                       psz_subtitle,
818                                                       psz_end - psz_subtitle,
819                                                       p_sys->i_align );
820
821                     if( p_text_region )
822                     {
823                         if( ! var_CreateGetBool( p_dec, "subsdec-formatted" ) )
824                         {
825                             free( p_text_region->psz_html );
826                             p_text_region->psz_html = NULL;
827                         }
828                     }
829                     if( !p_region_first )
830                     {
831                         p_region_first = p_region_upto = p_text_region;
832                     }
833                     else if( p_text_region )
834                     {
835                         p_region_upto->p_next = p_text_region;
836                         p_region_upto = p_region_upto->p_next;
837                     }
838                 }
839             }
840             else if(( !strncasecmp( psz_subtitle, "<image ", 7 )) ||
841                     ( !strncasecmp( psz_subtitle, "<image>", 7 )))
842             {
843                 subpicture_region_t *p_image_region = NULL;
844
845                 char *psz_end = strcasestr( psz_subtitle, "</image>" );
846                 char *psz_content = strchr( psz_subtitle, '>' );
847                 int   i_transparent = -1;
848
849                 /* If a colorkey parameter is specified, then we have to map
850                  * that index in the picture through as transparent (it is
851                  * required by the USF spec but is also recommended that if the
852                  * creator really wants a transparent colour that they use a
853                  * type like PNG that properly supports it; this goes doubly
854                  * for VLC because the pictures are stored internally in YUV
855                  * and the resulting colour-matching may not produce the
856                  * desired results.)
857                  */
858                 char *psz_tmp = GrabAttributeValue( "colorkey", psz_subtitle );
859                 if( psz_tmp )
860                 {
861                     if( *psz_tmp == '#' )
862                         i_transparent = strtol( psz_tmp + 1, NULL, 16 ) & 0x00ffffff;
863                     free( psz_tmp );
864                 }
865                 if( psz_content && ( psz_content < psz_end ) )
866                 {
867                     char *psz_filename = strndup( &psz_content[1], psz_end - &psz_content[1] );
868                     if( psz_filename )
869                     {
870                         p_image_region = LoadEmbeddedImage( p_dec, p_spu, psz_filename, i_transparent );
871                         free( psz_filename );
872                     }
873                 }
874
875                 if( psz_end ) psz_end += strcspn( psz_end, ">" ) + 1;
876
877                 if( p_image_region )
878                 {
879                     SetupPositions( p_image_region, psz_subtitle );
880
881                     p_image_region->p_next   = NULL;
882                     p_image_region->psz_text = NULL;
883                     p_image_region->psz_html = NULL;
884
885                 }
886                 if( !p_region_first )
887                 {
888                     p_region_first = p_region_upto = p_image_region;
889                 }
890                 else if( p_image_region )
891                 {
892                     p_region_upto->p_next = p_image_region;
893                     p_region_upto = p_region_upto->p_next;
894                 }
895             }
896             if( psz_end )
897                 psz_subtitle = psz_end - 1;
898
899             psz_subtitle += strcspn( psz_subtitle, ">" );
900         }
901
902         psz_subtitle++;
903     }
904
905     return p_region_first;
906 }
907
908 static void ParseSSAString( decoder_t *p_dec, char *psz_subtitle, subpicture_t *p_spu_in )
909 {
910     /* We expect MKV formatted SSA:
911      * ReadOrder, Layer, Style, CharacterName, MarginL, MarginR,
912      * MarginV, Effect, Text */
913     decoder_sys_t   *p_sys = p_dec->p_sys;
914     subpicture_t    *p_spu = p_spu_in;
915     ssa_style_t     *p_style = NULL;
916     char            *psz_new_subtitle = NULL;
917     char            *psz_buffer_sub = NULL;
918     char            *psz_style = NULL;
919     char            *psz_style_start = NULL;
920     char            *psz_style_end = NULL;
921     int             i_text = 0, i_comma = 0, i_strlen = 0, i;
922     int             i_margin_l = 0, i_margin_r = 0, i_margin_v = 0;
923
924     psz_buffer_sub = psz_subtitle;
925
926     p_spu->p_region->psz_html = NULL;
927
928     i_comma = 0;
929     while( i_comma < 8 && *psz_buffer_sub != '\0' )
930     {
931         if( *psz_buffer_sub == ',' )
932         {
933             i_comma++;
934             if( i_comma == 2 )
935                 psz_style_start = &psz_buffer_sub[1];
936             else if( i_comma == 3 )
937                 psz_style_end = &psz_buffer_sub[0];
938             else if( i_comma == 4 )
939                 i_margin_l = (int)strtol( &psz_buffer_sub[1], NULL, 10 );
940             else if( i_comma == 5 )
941                 i_margin_r = (int)strtol( &psz_buffer_sub[1], NULL, 10 );
942             else if( i_comma == 6 )
943                 i_margin_v = (int)strtol( &psz_buffer_sub[1], NULL, 10 );
944         }
945         psz_buffer_sub++;
946     }
947
948     if( *psz_buffer_sub == '\0' && i_comma == 8 )
949     {
950         msg_Dbg( p_dec, "couldn't find all fields in this SSA line" );
951         return;
952     }
953
954     psz_new_subtitle = malloc( strlen( psz_buffer_sub ) + 1);
955     i_text = 0;
956     while( psz_buffer_sub[0] != '\0' )
957     {
958         if( psz_buffer_sub[0] == '\\' && psz_buffer_sub[1] == 'n' )
959         {
960             psz_new_subtitle[i_text] = ' ';
961             i_text++;
962             psz_buffer_sub += 2;
963         }
964         else if( psz_buffer_sub[0] == '\\' && psz_buffer_sub[1] == 'N' )
965         {
966             psz_new_subtitle[i_text] = '\n';
967             i_text++;
968             psz_buffer_sub += 2;
969         }
970         else if( psz_buffer_sub[0] == '{' &&
971                  psz_buffer_sub[1] == '\\' )
972         {
973             /* SSA control code */
974             while( psz_buffer_sub[0] != '\0' &&
975                    psz_buffer_sub[0] != '}' )
976             {
977                 psz_buffer_sub++;
978             }
979             psz_buffer_sub++;
980         }
981         else
982         {
983             psz_new_subtitle[i_text] = psz_buffer_sub[0];
984             i_text++;
985             psz_buffer_sub++;
986         }
987     }
988     psz_new_subtitle[i_text] = '\0';
989
990     i_strlen = __MAX( psz_style_end - psz_style_start, 0);
991     psz_style = strndup( psz_style_start, i_strlen );
992
993     for( i = 0; i < p_sys->i_ssa_styles; i++ )
994     {
995         if( !strcmp( p_sys->pp_ssa_styles[i]->psz_stylename, psz_style ) )
996             p_style = p_sys->pp_ssa_styles[i];
997     }
998     if( psz_style ) free( psz_style );
999
1000     p_spu->p_region->psz_text = psz_new_subtitle;
1001     if( p_style == NULL )
1002     {
1003         p_spu->p_region->i_align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
1004         p_spu->i_x = p_sys->i_align ? 20 : 0;
1005         p_spu->i_y = 10;
1006     }
1007     else
1008     {
1009         msg_Dbg( p_dec, "style is: %s", p_style->psz_stylename);
1010         p_spu->p_region->p_style = &p_style->font_style;
1011         p_spu->p_region->i_align = p_style->i_align;
1012         if( p_style->i_align & SUBPICTURE_ALIGN_LEFT )
1013         {
1014             p_spu->i_x = (i_margin_l) ? i_margin_l : p_style->i_margin_h;
1015         }
1016         else if( p_style->i_align & SUBPICTURE_ALIGN_RIGHT )
1017         {
1018             p_spu->i_x = (i_margin_r) ? i_margin_r : p_style->i_margin_h;
1019         }
1020         p_spu->i_y = (i_margin_v) ? i_margin_v : p_style->i_margin_v;
1021     }
1022 }
1023
1024 static char* GotoNextLine( char *psz_text )
1025 {
1026     char *p_newline = psz_text;
1027
1028     while( p_newline[0] != '\0' )
1029     {
1030         if( p_newline[0] == '\n' || p_newline[0] == '\r' )
1031         {
1032             p_newline++;
1033             while( p_newline[0] == '\n' || p_newline[0] == '\r' )
1034                 p_newline++;
1035             break;
1036         }
1037         else p_newline++;
1038     }
1039     return p_newline;
1040 }
1041
1042 /*****************************************************************************
1043  * ParseColor: SSA stores color in BBGGRR, in ASS it uses AABBGGRR
1044  * The string value in the string can be a pure integer, or hexadecimal &HBBGGRR
1045  *****************************************************************************/
1046 static void ParseColor( decoder_t *p_dec, char *psz_color, int *pi_color, int *pi_alpha )
1047 {
1048     int i_color = 0;
1049     if( !strncasecmp( psz_color, "&H", 2 ) )
1050     {
1051         /* textual HEX representation */
1052         i_color = (int) strtol( psz_color+2, NULL, 16 );
1053     }
1054     else i_color = (int) strtol( psz_color, NULL, 0 );
1055
1056     *pi_color = 0;
1057     *pi_color |= ( ( i_color & 0x000000FF ) << 16 ); /* Red */
1058     *pi_color |= ( ( i_color & 0x0000FF00 ) );       /* Green */
1059     *pi_color |= ( ( i_color & 0x00FF0000 ) >> 16 ); /* Blue */
1060
1061     if( pi_alpha != NULL )
1062         *pi_alpha = ( i_color & 0xFF000000 ) >> 24;
1063 }
1064
1065 static int ParseImageAttachments( decoder_t *p_dec )
1066 {
1067     decoder_sys_t        *p_sys = p_dec->p_sys;
1068     input_attachment_t  **pp_attachments;
1069     int                   i_attachments_cnt;
1070     int                   k = 0;
1071
1072     if( VLC_SUCCESS != decoder_GetInputAttachments( p_dec, &pp_attachments, &i_attachments_cnt ))
1073         return VLC_EGENERIC;
1074
1075     for( k = 0; k < i_attachments_cnt; k++ )
1076     {
1077         input_attachment_t *p_attach = pp_attachments[k];
1078
1079         vlc_fourcc_t  type  = 0;
1080
1081         if( ( !strcmp( p_attach->psz_mime, "image/bmp" ) )      || /* BMP */
1082             ( !strcmp( p_attach->psz_mime, "image/x-bmp" ) )    ||
1083             ( !strcmp( p_attach->psz_mime, "image/x-bitmap" ) ) ||
1084             ( !strcmp( p_attach->psz_mime, "image/x-ms-bmp" ) ) )
1085         {
1086              type = VLC_FOURCC('b','m','p',' ');
1087         }
1088         else if( ( !strcmp( p_attach->psz_mime, "image/x-portable-anymap" ) )  || /* PNM */
1089                  ( !strcmp( p_attach->psz_mime, "image/x-portable-bitmap" ) )  || /* PBM */
1090                  ( !strcmp( p_attach->psz_mime, "image/x-portable-graymap" ) ) || /* PGM */
1091                  ( !strcmp( p_attach->psz_mime, "image/x-portable-pixmap" ) ) )   /* PPM */
1092         {
1093             type = VLC_FOURCC('p','n','m',' ');
1094         }
1095         else if ( !strcmp( p_attach->psz_mime, "image/gif" ) )         /* GIF */
1096             type = VLC_FOURCC('g','i','f',' ');
1097         else if ( !strcmp( p_attach->psz_mime, "image/jpeg" ) )        /* JPG, JPEG */
1098             type = VLC_FOURCC('j','p','e','g');
1099         else if ( !strcmp( p_attach->psz_mime, "image/pcx" ) )         /* PCX */
1100             type = VLC_FOURCC('p','c','x',' ');
1101         else if ( !strcmp( p_attach->psz_mime, "image/png" ) )         /* PNG */
1102             type = VLC_FOURCC('p','n','g',' ');
1103         else if ( !strcmp( p_attach->psz_mime, "image/tiff" ) )        /* TIF, TIFF */
1104             type = VLC_FOURCC('t','i','f','f');
1105         else if ( !strcmp( p_attach->psz_mime, "image/x-tga" ) )       /* TGA */
1106             type = VLC_FOURCC('t','g','a',' ');
1107         else if ( !strcmp( p_attach->psz_mime, "image/x-xpixmap") )    /* XPM */
1108             type = VLC_FOURCC('x','p','m',' ');
1109
1110         if( ( type != 0 ) &&
1111             ( p_attach->i_data > 0 ) &&
1112             ( p_attach->p_data != NULL ) )
1113         {
1114             picture_t         *p_pic = NULL;
1115             image_handler_t   *p_image;
1116
1117             p_image = image_HandlerCreate( p_dec );
1118             if( p_image != NULL )
1119             {
1120                 block_t   *p_block;
1121
1122                 p_block = block_New( p_image->p_parent, p_attach->i_data );
1123
1124                 if( p_block != NULL )
1125                 {
1126                     video_format_t     fmt_in;
1127                     video_format_t     fmt_out;
1128
1129                     memcpy( p_block->p_buffer, p_attach->p_data, p_attach->i_data );
1130
1131                     memset( &fmt_in,  0, sizeof( video_format_t));
1132                     memset( &fmt_out, 0, sizeof( video_format_t));
1133
1134                     fmt_in.i_chroma  = type;
1135                     fmt_out.i_chroma = VLC_FOURCC('Y','U','V','A');
1136
1137                     /* Find a suitable decoder module */
1138                     if( module_Exists( p_dec, "SDL Image decoder" ) )
1139                     {
1140                         /* ffmpeg thinks it can handle bmp properly but it can't (at least
1141                          * not all of them), so use sdl_image if it is available */
1142
1143                         vlc_value_t val;
1144
1145                         var_Create( p_dec, "codec", VLC_VAR_MODULE | VLC_VAR_DOINHERIT );
1146                         val.psz_string = (char*) "sdl_image";
1147                         var_Set( p_dec, "codec", val );
1148                     }
1149
1150                     p_pic = image_Read( p_image, p_block, &fmt_in, &fmt_out );
1151                     var_Destroy( p_dec, "codec" );
1152                 }
1153
1154                 image_HandlerDelete( p_image );
1155             }
1156             if( p_pic )
1157             {
1158                 image_attach_t *p_picture = malloc( sizeof(image_attach_t) );
1159
1160                 if( p_picture )
1161                 {
1162                     p_picture->psz_filename = strdup( p_attach->psz_name );
1163                     p_picture->p_pic = p_pic;
1164
1165                     TAB_APPEND( p_sys->i_images, p_sys->pp_images, p_picture );
1166                 }
1167             }
1168         }
1169         vlc_input_attachment_Delete( pp_attachments[ k ] );
1170     }
1171     free( pp_attachments );
1172
1173     return VLC_SUCCESS;
1174 }
1175
1176 /*****************************************************************************
1177  * ParseUSFHeader: Retrieve global formatting information etc
1178  *****************************************************************************/
1179 static void ParseUSFHeader( decoder_t *p_dec )
1180 {
1181     stream_t      *p_sub = NULL;
1182     xml_t         *p_xml = NULL;
1183     xml_reader_t  *p_xml_reader = NULL;
1184
1185     p_sub = stream_MemoryNew( VLC_OBJECT(p_dec),
1186                               p_dec->fmt_in.p_extra,
1187                               p_dec->fmt_in.i_extra,
1188                               VLC_TRUE );
1189     if( !p_sub )
1190         return;
1191
1192     p_xml = xml_Create( p_dec );
1193     if( p_xml )
1194     {
1195         p_xml_reader = xml_ReaderCreate( p_xml, p_sub );
1196         if( p_xml_reader )
1197         {
1198             /* Look for Root Node */
1199             if( xml_ReaderRead( p_xml_reader ) == 1 )
1200             {
1201                 char *psz_node = xml_ReaderName( p_xml_reader );
1202
1203                 if( !strcasecmp( "usfsubtitles", psz_node ) )
1204                     ParseUSFHeaderTags( p_dec, p_xml_reader );
1205
1206                 free( psz_node );
1207             }
1208
1209             xml_ReaderDelete( p_xml, p_xml_reader );
1210         }
1211         xml_Delete( p_xml );
1212     }
1213     stream_Delete( p_sub );
1214 }
1215
1216 static void ParseUSFHeaderTags( decoder_t *p_dec, xml_reader_t *p_xml_reader )
1217 {
1218     decoder_sys_t *p_sys = p_dec->p_sys;
1219     char *psz_node;
1220     ssa_style_t *p_style = NULL;
1221     int i_style_level = 0;
1222     int i_metadata_level = 0;
1223
1224     while ( xml_ReaderRead( p_xml_reader ) == 1 )
1225     {
1226         switch ( xml_ReaderNodeType( p_xml_reader ) )
1227         {
1228             case XML_READER_TEXT:
1229             case XML_READER_NONE:
1230                 break;
1231             case XML_READER_ENDELEM:
1232                 psz_node = xml_ReaderName( p_xml_reader );
1233
1234                 if( !psz_node )
1235                     break;
1236                 switch (i_style_level)
1237                 {
1238                     case 0:
1239                         if( !strcasecmp( "metadata", psz_node ) && (i_metadata_level == 1) )
1240                         {
1241                             i_metadata_level--;
1242                         }
1243                         break;
1244                     case 1:
1245                         if( !strcasecmp( "styles", psz_node ) )
1246                         {
1247                             i_style_level--;
1248                         }
1249                         break;
1250                     case 2:
1251                         if( !strcasecmp( "style", psz_node ) )
1252                         {
1253                             TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style );
1254
1255                             p_style = NULL;
1256                             i_style_level--;
1257                         }
1258                         break;
1259                 }
1260
1261                 free( psz_node );
1262                 break;
1263             case XML_READER_STARTELEM:
1264                 psz_node = xml_ReaderName( p_xml_reader );
1265
1266                 if( !psz_node )
1267                     break;
1268
1269                 if( !strcasecmp( "metadata", psz_node ) && (i_style_level == 0) )
1270                 {
1271                     i_metadata_level++;
1272                 }
1273                 else if( !strcasecmp( "resolution", psz_node ) && (i_metadata_level == 1) )
1274                 {
1275                     while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
1276                     {
1277                         char *psz_name = xml_ReaderName ( p_xml_reader );
1278                         char *psz_value = xml_ReaderValue ( p_xml_reader );
1279
1280                         if( psz_name && psz_value )
1281                         {
1282                             if( !strcasecmp( "x", psz_name ) )
1283                                 p_sys->i_original_width = atoi( psz_value );
1284                             else if( !strcasecmp( "y", psz_name ) )
1285                                 p_sys->i_original_height = atoi( psz_value );
1286                         }
1287                         if( psz_name )  free( psz_name );
1288                         if( psz_value ) free( psz_value );
1289                     }
1290                 }
1291                 else if( !strcasecmp( "styles", psz_node ) && (i_style_level == 0) )
1292                 {
1293                     i_style_level++;
1294                 }
1295                 else if( !strcasecmp( "style", psz_node ) && (i_style_level == 1) )
1296                 {
1297                     i_style_level++;
1298
1299                     p_style = calloc( 1, sizeof(ssa_style_t) );
1300                     if( ! p_style )
1301                     {
1302                         msg_Err( p_dec, "out of memory" );
1303                         free( psz_node );
1304                         break;
1305                     }
1306                     /* All styles are supposed to default to Default, and then
1307                      * one or more settings are over-ridden.
1308                      * At the moment this only effects styles defined AFTER
1309                      * Default in the XML
1310                      */
1311                     int i;
1312                     for( i = 0; i < p_sys->i_ssa_styles; i++ )
1313                     {
1314                         if( !strcasecmp( p_sys->pp_ssa_styles[i]->psz_stylename, "Default" ) )
1315                         {
1316                             ssa_style_t *p_default_style = p_sys->pp_ssa_styles[i];
1317
1318                             memcpy( p_style, p_default_style, sizeof( ssa_style_t ) );
1319                             p_style->font_style.psz_fontname = strdup( p_style->font_style.psz_fontname );
1320                             p_style->psz_stylename = NULL;
1321                         }
1322                     }
1323
1324                     while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
1325                     {
1326                         char *psz_name = xml_ReaderName ( p_xml_reader );
1327                         char *psz_value = xml_ReaderValue ( p_xml_reader );
1328
1329                         if( psz_name && psz_value )
1330                         {
1331                             if( !strcasecmp( "name", psz_name ) )
1332                                 p_style->psz_stylename = strdup( psz_value);
1333                         }
1334                         if( psz_name )  free( psz_name );
1335                         if( psz_value ) free( psz_value );
1336                     }
1337                 }
1338                 else if( !strcasecmp( "fontstyle", psz_node ) && (i_style_level == 2) )
1339                 {
1340                     while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
1341                     {
1342                         char *psz_name = xml_ReaderName ( p_xml_reader );
1343                         char *psz_value = xml_ReaderValue ( p_xml_reader );
1344
1345                         if( psz_name && psz_value )
1346                         {
1347                             if( !strcasecmp( "face", psz_name ) )
1348                             {
1349                                 if( p_style->font_style.psz_fontname ) free( p_style->font_style.psz_fontname );
1350                                 p_style->font_style.psz_fontname = strdup( psz_value );
1351                             }
1352                             else if( !strcasecmp( "size", psz_name ) )
1353                             {
1354                                 if( ( *psz_value == '+' ) || ( *psz_value == '-' ) )
1355                                 {
1356                                     int i_value = atoi( psz_value );
1357
1358                                     if( ( i_value >= -5 ) && ( i_value <= 5 ) )
1359                                         p_style->font_style.i_font_size  += ( i_value * p_style->font_style.i_font_size ) / 10;
1360                                     else if( i_value < -5 )
1361                                         p_style->font_style.i_font_size  = - i_value;
1362                                     else if( i_value > 5 )
1363                                         p_style->font_style.i_font_size  = i_value;
1364                                 }
1365                                 else
1366                                     p_style->font_style.i_font_size  = atoi( psz_value );
1367                             }
1368                             else if( !strcasecmp( "italic", psz_name ) )
1369                             {
1370                                 if( !strcasecmp( "yes", psz_value ))
1371                                     p_style->font_style.i_style_flags |= STYLE_ITALIC;
1372                                 else
1373                                     p_style->font_style.i_style_flags &= ~STYLE_ITALIC;
1374                             }
1375                             else if( !strcasecmp( "weight", psz_name ) )
1376                             {
1377                                 if( !strcasecmp( "bold", psz_value ))
1378                                     p_style->font_style.i_style_flags |= STYLE_BOLD;
1379                                 else
1380                                     p_style->font_style.i_style_flags &= ~STYLE_BOLD;
1381                             }
1382                             else if( !strcasecmp( "underline", psz_name ) )
1383                             {
1384                                 if( !strcasecmp( "yes", psz_value ))
1385                                     p_style->font_style.i_style_flags |= STYLE_UNDERLINE;
1386                                 else
1387                                     p_style->font_style.i_style_flags &= ~STYLE_UNDERLINE;
1388                             }
1389                             else if( !strcasecmp( "color", psz_name ) )
1390                             {
1391                                 if( *psz_value == '#' )
1392                                 {
1393                                     unsigned long col = strtol(psz_value+1, NULL, 16);
1394                                     p_style->font_style.i_font_color = (col & 0x00ffffff);
1395                                     p_style->font_style.i_font_alpha = (col >> 24) & 0xff;
1396                                 }
1397                             }
1398                             else if( !strcasecmp( "outline-color", psz_name ) )
1399                             {
1400                                 if( *psz_value == '#' )
1401                                 {
1402                                     unsigned long col = strtol(psz_value+1, NULL, 16);
1403                                     p_style->font_style.i_outline_color = (col & 0x00ffffff);
1404                                     p_style->font_style.i_outline_alpha = (col >> 24) & 0xff;
1405                                 }
1406                             }
1407                             else if( !strcasecmp( "outline-level", psz_name ) )
1408                             {
1409                                 p_style->font_style.i_outline_width = atoi( psz_value );
1410                             }
1411                             else if( !strcasecmp( "shadow-color", psz_name ) )
1412                             {
1413                                 if( *psz_value == '#' )
1414                                 {
1415                                     unsigned long col = strtol(psz_value+1, NULL, 16);
1416                                     p_style->font_style.i_shadow_color = (col & 0x00ffffff);
1417                                     p_style->font_style.i_shadow_alpha = (col >> 24) & 0xff;
1418                                 }
1419                             }
1420                             else if( !strcasecmp( "shadow-level", psz_name ) )
1421                             {
1422                                 p_style->font_style.i_shadow_width = atoi( psz_value );
1423                             }
1424                             else if( !strcasecmp( "back-color", psz_name ) )
1425                             {
1426                                 if( *psz_value == '#' )
1427                                 {
1428                                     unsigned long col = strtol(psz_value+1, NULL, 16);
1429                                     p_style->font_style.i_karaoke_background_color = (col & 0x00ffffff);
1430                                     p_style->font_style.i_karaoke_background_alpha = (col >> 24) & 0xff;
1431                                 }
1432                             }
1433                             else if( !strcasecmp( "spacing", psz_name ) )
1434                             {
1435                                 p_style->font_style.i_spacing = atoi( psz_value );
1436                             }
1437                         }
1438                         if( psz_name )  free( psz_name );
1439                         if( psz_value ) free( psz_value );
1440                     }
1441                 }
1442                 else if( !strcasecmp( "position", psz_node ) && (i_style_level == 2) )
1443                 {
1444                     while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
1445                     {
1446                         char *psz_name = xml_ReaderName ( p_xml_reader );
1447                         char *psz_value = xml_ReaderValue ( p_xml_reader );
1448
1449                         if( psz_name && psz_value )
1450                         {
1451                             if( !strcasecmp( "alignment", psz_name ) )
1452                             {
1453                                 if( !strcasecmp( "TopLeft", psz_value ) )
1454                                     p_style->i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_LEFT;
1455                                 else if( !strcasecmp( "TopCenter", psz_value ) )
1456                                     p_style->i_align = SUBPICTURE_ALIGN_TOP;
1457                                 else if( !strcasecmp( "TopRight", psz_value ) )
1458                                     p_style->i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_RIGHT;
1459                                 else if( !strcasecmp( "MiddleLeft", psz_value ) )
1460                                     p_style->i_align = SUBPICTURE_ALIGN_LEFT;
1461                                 else if( !strcasecmp( "MiddleCenter", psz_value ) )
1462                                     p_style->i_align = 0;
1463                                 else if( !strcasecmp( "MiddleRight", psz_value ) )
1464                                     p_style->i_align = SUBPICTURE_ALIGN_RIGHT;
1465                                 else if( !strcasecmp( "BottomLeft", psz_value ) )
1466                                     p_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_LEFT;
1467                                 else if( !strcasecmp( "BottomCenter", psz_value ) )
1468                                     p_style->i_align = SUBPICTURE_ALIGN_BOTTOM;
1469                                 else if( !strcasecmp( "BottomRight", psz_value ) )
1470                                     p_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_RIGHT;
1471                             }
1472                             else if( !strcasecmp( "horizontal-margin", psz_name ) )
1473                             {
1474                                 if( strchr( psz_value, '%' ) )
1475                                 {
1476                                     p_style->i_margin_h = 0;
1477                                     p_style->i_margin_percent_h = atoi( psz_value );
1478                                 }
1479                                 else
1480                                 {
1481                                     p_style->i_margin_h = atoi( psz_value );
1482                                     p_style->i_margin_percent_h = 0;
1483                                 }
1484                             }
1485                             else if( !strcasecmp( "vertical-margin", psz_name ) )
1486                             {
1487                                 if( strchr( psz_value, '%' ) )
1488                                 {
1489                                     p_style->i_margin_v = 0;
1490                                     p_style->i_margin_percent_v = atoi( psz_value );
1491                                 }
1492                                 else
1493                                 {
1494                                     p_style->i_margin_v = atoi( psz_value );
1495                                     p_style->i_margin_percent_v = 0;
1496                                 }
1497                             }
1498                         }
1499                         if( psz_name )  free( psz_name );
1500                         if( psz_value ) free( psz_value );
1501                     }
1502                 }
1503
1504                 free( psz_node );
1505                 break;
1506         }
1507     }
1508     if( p_style ) free( p_style );
1509 }
1510 /*****************************************************************************
1511  * ParseSSAHeader: Retrieve global formatting information etc
1512  *****************************************************************************/
1513 static void ParseSSAHeader( decoder_t *p_dec )
1514 {
1515     decoder_sys_t *p_sys = p_dec->p_sys;
1516     char *psz_parser = NULL;
1517     char *psz_header = malloc( p_dec->fmt_in.i_extra+1 );
1518     int i_section_type = 1;
1519
1520     memcpy( psz_header, p_dec->fmt_in.p_extra, p_dec->fmt_in.i_extra );
1521     psz_header[ p_dec->fmt_in.i_extra] = '\0';
1522
1523     /* Handle [Script Info] section */
1524     psz_parser = strcasestr( psz_header, "[Script Info]" );
1525     if( psz_parser == NULL ) goto eof;
1526
1527     psz_parser = GotoNextLine( psz_parser );
1528
1529     while( psz_parser[0] != '\0' )
1530     {
1531         int temp;
1532         char buffer_text[MAX_LINE + 1];
1533
1534         if( psz_parser[0] == '!' || psz_parser[0] == ';' ) /* comment */;
1535         else if( sscanf( psz_parser, "PlayResX: %d", &temp ) == 1 )
1536             p_sys->i_original_width = ( temp > 0 ) ? temp : -1;
1537         else if( sscanf( psz_parser, "PlayResY: %d", &temp ) == 1 )
1538             p_sys->i_original_height = ( temp > 0 ) ? temp : -1;
1539         else if( sscanf( psz_parser, "Script Type: %8192s", buffer_text ) == 1 )
1540         {
1541             if( !strcasecmp( buffer_text, "V4.00+" ) ) p_sys->b_ass = VLC_TRUE;
1542         }
1543         else if( !strncasecmp( psz_parser, "[V4 Styles]", 11 ) )
1544             i_section_type = 1;
1545         else if( !strncasecmp( psz_parser, "[V4+ Styles]", 12) )
1546         {
1547             i_section_type = 2;
1548             p_sys->b_ass = VLC_TRUE;
1549         }
1550         else if( !strncasecmp( psz_parser, "[Events]", 8 ) )
1551             i_section_type = 4;
1552         else if( !strncasecmp( psz_parser, "Style:", 6 ) )
1553         {
1554             int i_font_size, i_bold, i_italic, i_border, i_outline, i_shadow, i_underline,
1555                 i_strikeout, i_scale_x, i_scale_y, i_spacing, i_align, i_margin_l, i_margin_r, i_margin_v;
1556
1557             char psz_temp_stylename[MAX_LINE+1];
1558             char psz_temp_fontname[MAX_LINE+1];
1559             char psz_temp_color1[MAX_LINE+1];
1560             char psz_temp_color2[MAX_LINE+1];
1561             char psz_temp_color3[MAX_LINE+1];
1562             char psz_temp_color4[MAX_LINE+1];
1563
1564             if( i_section_type == 1 ) /* V4 */
1565             {
1566                 if( sscanf( psz_parser, "Style: %8192[^,],%8192[^,],%d,%8192[^,],%8192[^,],%8192[^,],%8192[^,],%d,%d,%d,%d,%d,%d,%d,%d,%d%*[^\r\n]",
1567                     psz_temp_stylename, psz_temp_fontname, &i_font_size,
1568                     psz_temp_color1, psz_temp_color2, psz_temp_color3, psz_temp_color4, &i_bold, &i_italic,
1569                     &i_border, &i_outline, &i_shadow, &i_align, &i_margin_l, &i_margin_r, &i_margin_v ) == 16 )
1570                 {
1571                     ssa_style_t *p_style = malloc( sizeof(ssa_style_t) );
1572
1573                     p_style->psz_stylename = strdup( psz_temp_stylename );
1574                     p_style->font_style.psz_fontname = strdup( psz_temp_fontname );
1575                     p_style->font_style.i_font_size = i_font_size;
1576
1577                     ParseColor( p_dec, psz_temp_color1, &p_style->font_style.i_font_color, NULL );
1578                     ParseColor( p_dec, psz_temp_color4, &p_style->font_style.i_shadow_color, NULL );
1579                     p_style->font_style.i_outline_color = p_style->font_style.i_shadow_color;
1580                     p_style->font_style.i_font_alpha = p_style->font_style.i_outline_alpha = p_style->font_style.i_shadow_alpha = 0x00;
1581                     p_style->font_style.i_style_flags = 0;
1582                     if( i_bold ) p_style->font_style.i_style_flags |= STYLE_BOLD;
1583                     if( i_italic ) p_style->font_style.i_style_flags |= STYLE_ITALIC;
1584
1585                     if( i_border == 1 ) p_style->font_style.i_style_flags |= (STYLE_ITALIC | STYLE_OUTLINE);
1586                     else if( i_border == 3 )
1587                     {
1588                         p_style->font_style.i_style_flags |= STYLE_BACKGROUND;
1589                         p_style->font_style.i_background_color = p_style->font_style.i_shadow_color;
1590                         p_style->font_style.i_background_alpha = p_style->font_style.i_shadow_alpha;
1591                     }
1592                     p_style->font_style.i_shadow_width = i_shadow;
1593                     p_style->font_style.i_outline_width = i_outline;
1594
1595                     p_style->i_align = 0;
1596                     if( i_align == 1 || i_align == 5 || i_align == 9 ) p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
1597                     if( i_align == 3 || i_align == 7 || i_align == 11 ) p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
1598                     if( i_align < 4 ) p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
1599                     else if( i_align < 8 ) p_style->i_align |= SUBPICTURE_ALIGN_TOP;
1600
1601                     p_style->i_margin_h = ( p_style->i_align & SUBPICTURE_ALIGN_RIGHT ) ? i_margin_r : i_margin_l;
1602                     p_style->i_margin_v = i_margin_v;
1603                     p_style->i_margin_percent_h = 0;
1604                     p_style->i_margin_percent_v = 0;
1605
1606                     p_style->font_style.i_karaoke_background_color = 0xffffff;
1607                     p_style->font_style.i_karaoke_background_alpha = 0xff;
1608
1609                     TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style );
1610                 }
1611                 else msg_Warn( p_dec, "SSA v4 styleline parsing failed" );
1612             }
1613             else if( i_section_type == 2 ) /* V4+ */
1614             {
1615                 /* Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour,
1616                    Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline,
1617                    Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
1618                 */
1619                 if( sscanf( psz_parser, "Style: %8192[^,],%8192[^,],%d,%8192[^,],%8192[^,],%8192[^,],%8192[^,],%d,%d,%d,%d,%d,%d,%d,%*f,%d,%d,%d,%d,%d,%d,%d%*[^\r\n]",
1620                     psz_temp_stylename, psz_temp_fontname, &i_font_size,
1621                     psz_temp_color1, psz_temp_color2, psz_temp_color3, psz_temp_color4, &i_bold, &i_italic,
1622                     &i_underline, &i_strikeout, &i_scale_x, &i_scale_y, &i_spacing, &i_border, &i_outline,
1623                     &i_shadow, &i_align, &i_margin_l, &i_margin_r, &i_margin_v ) == 21 )
1624                 {
1625                     ssa_style_t *p_style = malloc( sizeof(ssa_style_t) );
1626
1627                     p_style->psz_stylename = strdup( psz_temp_stylename );
1628                     p_style->font_style.psz_fontname = strdup( psz_temp_fontname );
1629                     p_style->font_style.i_font_size = i_font_size;
1630                     msg_Dbg( p_dec, psz_temp_color1 );
1631                     ParseColor( p_dec, psz_temp_color1, &p_style->font_style.i_font_color, &p_style->font_style.i_font_alpha );
1632                     ParseColor( p_dec, psz_temp_color3, &p_style->font_style.i_outline_color, &p_style->font_style.i_outline_alpha );
1633                     ParseColor( p_dec, psz_temp_color4, &p_style->font_style.i_shadow_color, &p_style->font_style.i_shadow_alpha );
1634
1635                     p_style->font_style.i_style_flags = 0;
1636                     if( i_bold ) p_style->font_style.i_style_flags |= STYLE_BOLD;
1637                     if( i_italic ) p_style->font_style.i_style_flags |= STYLE_ITALIC;
1638                     if( i_underline ) p_style->font_style.i_style_flags |= STYLE_UNDERLINE;
1639                     if( i_strikeout ) p_style->font_style.i_style_flags |= STYLE_STRIKEOUT;
1640                     if( i_border == 1 ) p_style->font_style.i_style_flags |= (STYLE_ITALIC | STYLE_OUTLINE);
1641                     else if( i_border == 3 )
1642                     {
1643                         p_style->font_style.i_style_flags |= STYLE_BACKGROUND;
1644                         p_style->font_style.i_background_color = p_style->font_style.i_shadow_color;
1645                         p_style->font_style.i_background_alpha = p_style->font_style.i_shadow_alpha;
1646                     }
1647                     p_style->font_style.i_shadow_width  = ( i_border == 1 ) ? i_shadow : 0;
1648                     p_style->font_style.i_outline_width = ( i_border == 1 ) ? i_outline : 0;
1649                     p_style->font_style.i_spacing = i_spacing;
1650                     //p_style->font_style.f_angle = f_angle;
1651
1652                     p_style->i_align = 0;
1653                     if( i_align == 0x1 || i_align == 0x4 || i_align == 0x7 ) p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
1654                     if( i_align == 0x3 || i_align == 0x6 || i_align == 0x9 ) p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
1655                     if( i_align == 0x7 || i_align == 0x8 || i_align == 0x9 ) p_style->i_align |= SUBPICTURE_ALIGN_TOP;
1656                     if( i_align == 0x1 || i_align == 0x2 || i_align == 0x3 ) p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
1657                     p_style->i_margin_h = ( p_style->i_align & SUBPICTURE_ALIGN_RIGHT ) ? i_margin_r : i_margin_l;
1658                     p_style->i_margin_v = i_margin_v;
1659                     p_style->i_margin_percent_h = 0;
1660                     p_style->i_margin_percent_v = 0;
1661
1662                     p_style->font_style.i_karaoke_background_color = 0xffffff;
1663                     p_style->font_style.i_karaoke_background_alpha = 0xff;
1664
1665                     /*TODO: Ignored: angle i_scale_x|y (fontscaling), i_encoding */
1666                     TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style );
1667                 }
1668                 else msg_Dbg( p_dec, "SSA V4+ styleline parsing failed" );
1669             }
1670         }
1671         psz_parser = GotoNextLine( psz_parser );
1672     }
1673
1674 eof:
1675     if( psz_header ) free( psz_header );
1676     return;
1677 }
1678
1679 /* Function now handles tags which has attribute values, and tries
1680  * to deal with &' commands too. It no longer modifies the string
1681  * in place, so that the original text can be reused
1682  */
1683 static char *StripTags( char *psz_subtitle )
1684 {
1685     char *psz_text_start;
1686     char *psz_text;
1687
1688     psz_text = psz_text_start = malloc( strlen( psz_subtitle ) + 1 );
1689     if( !psz_text_start )
1690         return NULL;
1691
1692     while( *psz_subtitle )
1693     {
1694         if( *psz_subtitle == '<' )
1695         {
1696             if( strncasecmp( psz_subtitle, "<br/>", 5 ) == 0 )
1697                 *psz_text++ = '\n';
1698
1699             psz_subtitle += strcspn( psz_subtitle, ">" );
1700         }
1701         else if( *psz_subtitle == '&' )
1702         {
1703             if( !strncasecmp( psz_subtitle, "&lt;", 4 ))
1704             {
1705                 *psz_text++ = '<';
1706                 psz_subtitle += strcspn( psz_subtitle, ";" );
1707             }
1708             else if( !strncasecmp( psz_subtitle, "&gt;", 4 ))
1709             {
1710                 *psz_text++ = '>';
1711                 psz_subtitle += strcspn( psz_subtitle, ";" );
1712             }
1713             else if( !strncasecmp( psz_subtitle, "&amp;", 5 ))
1714             {
1715                 *psz_text++ = '&';
1716                 psz_subtitle += strcspn( psz_subtitle, ";" );
1717             }
1718             else if( !strncasecmp( psz_subtitle, "&quot;", 6 ))
1719             {
1720                 *psz_text++ = '\"';
1721                 psz_subtitle += strcspn( psz_subtitle, ";" );
1722             }
1723             else
1724             {
1725                 /* Assume it is just a normal ampersand */
1726                 *psz_text++ = '&';
1727             }
1728         }
1729         else
1730         {
1731             *psz_text++ = *psz_subtitle;
1732         }
1733
1734         psz_subtitle++;
1735     }
1736     *psz_text = '\0';
1737     psz_text_start = realloc( psz_text_start, strlen( psz_text_start ) + 1 );
1738
1739     return psz_text_start;
1740 }
1741
1742 /* Try to respect any style tags present in the subtitle string. The main
1743  * problem here is a lack of adequate specs for the subtitle formats.
1744  * SSA/ASS and USF are both detail spec'ed -- but they are handled elsewhere.
1745  * SAMI has a detailed spec, but extensive rework is needed in the demux
1746  * code to prevent all this style information being excised, as it presently
1747  * does.
1748  * That leaves the others - none of which were (I guess) originally intended
1749  * to be carrying style information. Over time people have used them that way.
1750  * In the absence of specifications from which to work, the tags supported
1751  * have been restricted to the simple set permitted by the USF DTD, ie. :
1752  *  Basic: <br>, <i>, <b>, <u>
1753  *  Extended: <font>
1754  *    Attributes: face
1755  *                family
1756  *                size
1757  *                color
1758  *                outline-color
1759  *                shadow-color
1760  *                outline-level
1761  *                shadow-level
1762  *                back-color
1763  *                alpha
1764  * There is also the further restriction that the subtitle be well-formed
1765  * as an XML entity, ie. the HTML sentence:
1766  *        <b><i>Bold and Italics</b></i>
1767  * doesn't qualify because the tags aren't nested one inside the other.
1768  * <text> tags are automatically added to the output to ensure
1769  * well-formedness.
1770  * If the text doesn't qualify for any reason, a NULL string is
1771  * returned, and the rendering engine will fall back to the
1772  * plain text version of the subtitle.
1773  */
1774 static char *CreateHtmlSubtitle( char *psz_subtitle )
1775 {
1776     char    psz_tagStack[ 100 ];
1777     size_t  i_buf_size     = strlen( psz_subtitle ) + 100;
1778     char   *psz_html_start = malloc( i_buf_size );
1779
1780     psz_tagStack[ 0 ] = '\0';
1781
1782     if( psz_html_start != NULL )
1783     {
1784         char *psz_html = psz_html_start;
1785
1786         strcpy( psz_html, "<text>" );
1787         psz_html += 6;
1788
1789         while( *psz_subtitle )
1790         {
1791             if( *psz_subtitle == '\n' )
1792             {
1793                 strcpy( psz_html, "<br/>" );
1794                 psz_html += 5;
1795                 psz_subtitle++;
1796             }
1797             else if( *psz_subtitle == '<' )
1798             {
1799                 if( !strncasecmp( psz_subtitle, "<br/>", 5 ))
1800                 {
1801                     strcpy( psz_html, "<br/>" );
1802                     psz_html += 5;
1803                     psz_subtitle += 5;
1804                 }
1805                 else if( !strncasecmp( psz_subtitle, "<b>", 3 ) )
1806                 {
1807                     strcpy( psz_html, "<b>" );
1808                     strcat( psz_tagStack, "b" );
1809                     psz_html += 3;
1810                     psz_subtitle += 3;
1811                 }
1812                 else if( !strncasecmp( psz_subtitle, "<i>", 3 ) )
1813                 {
1814                     strcpy( psz_html, "<i>" );
1815                     strcat( psz_tagStack, "i" );
1816                     psz_html += 3;
1817                     psz_subtitle += 3;
1818                 }
1819                 else if( !strncasecmp( psz_subtitle, "<u>", 3 ) )
1820                 {
1821                     strcpy( psz_html, "<u>" );
1822                     strcat( psz_tagStack, "u" );
1823                     psz_html += 3;
1824                     psz_subtitle += 3;
1825                 }
1826                 else if( !strncasecmp( psz_subtitle, "<font ", 6 ))
1827                 {
1828                     const char *psz_attribs[] = { "face=\"", "family=\"", "size=\"",
1829                             "color=\"", "outline-color=\"", "shadow-color=\"",
1830                             "outline-level=\"", "shadow-level=\"", "back-color=\"",
1831                             "alpha=\"", NULL };
1832
1833                     strcpy( psz_html, "<font " );
1834                     strcat( psz_tagStack, "f" );
1835                     psz_html += 6;
1836                     psz_subtitle += 6;
1837
1838                     while( *psz_subtitle != '>' )
1839                     {
1840                         int  k;
1841
1842                         for( k=0; psz_attribs[ k ]; k++ )
1843                         {
1844                             int i_len = strlen( psz_attribs[ k ] );
1845
1846                             if( !strncasecmp( psz_subtitle, psz_attribs[ k ], i_len ))
1847                             {
1848                                 i_len += strcspn( psz_subtitle + i_len, "\"" ) + 1;
1849
1850                                 strncpy( psz_html, psz_subtitle, i_len );
1851                                 psz_html += i_len;
1852                                 psz_subtitle += i_len;
1853                                 break;
1854                             }
1855                         }
1856                         if( psz_attribs[ k ] == NULL )
1857                         {
1858                             /* Jump over unrecognised tag */
1859                             int i_len = strcspn( psz_subtitle, "\"" ) + 1;
1860
1861                             i_len += strcspn( psz_subtitle + i_len, "\"" ) + 1;
1862                             psz_subtitle += i_len;
1863                         }
1864                         while (*psz_subtitle == ' ')
1865                             *psz_html++ = *psz_subtitle++;
1866                     }
1867                     *psz_html++ = *psz_subtitle++;
1868                 }
1869                 else if( !strncmp( psz_subtitle, "</", 2 ))
1870                 {
1871                     vlc_bool_t  b_match     = VLC_FALSE;
1872                     int         i_len       = strlen( psz_tagStack ) - 1;
1873                     char       *psz_lastTag = NULL;
1874
1875                     if( i_len >= 0 )
1876                     {
1877                         psz_lastTag = psz_tagStack + i_len;
1878                         i_len = 0;
1879
1880                         switch( *psz_lastTag )
1881                         {
1882                             case 'b':
1883                                 b_match = !strncasecmp( psz_subtitle, "</b>", 4 );
1884                                 i_len   = 4;
1885                                 break;
1886                             case 'i':
1887                                 b_match = !strncasecmp( psz_subtitle, "</i>", 4 );
1888                                 i_len   = 4;
1889                                 break;
1890                             case 'u':
1891                                 b_match = !strncasecmp( psz_subtitle, "</u>", 4 );
1892                                 i_len   = 4;
1893                                 break;
1894                             case 'f':
1895                                 b_match = !strncasecmp( psz_subtitle, "</font>", 7 );
1896                                 i_len   = 7;
1897                                 break;
1898                         }
1899                     }
1900                     if( ! b_match )
1901                     {
1902                         /* Not well formed -- kill everything */
1903                         free( psz_html_start );
1904                         psz_html_start = NULL;
1905                         break;
1906                     }
1907                     *psz_lastTag = '\0';
1908                     strncpy( psz_html, psz_subtitle, i_len );
1909                     psz_html += i_len;
1910                     psz_subtitle += i_len;
1911                 }
1912                 else
1913                 {
1914                     psz_subtitle += strcspn( psz_subtitle, ">" );
1915                 }
1916             }
1917             else if( *psz_subtitle == '&' )
1918             {
1919                 if( !strncasecmp( psz_subtitle, "&lt;", 4 ))
1920                 {
1921                     strcpy( psz_html, "&lt;" );
1922                     psz_html += 4;
1923                     psz_subtitle += 4;
1924                 }
1925                 else if( !strncasecmp( psz_subtitle, "&gt;", 4 ))
1926                 {
1927                     strcpy( psz_html, "&gt;" );
1928                     psz_html += 4;
1929                     psz_subtitle += 4;
1930                 }
1931                 else if( !strncasecmp( psz_subtitle, "&amp;", 5 ))
1932                 {
1933                     strcpy( psz_html, "&amp;" );
1934                     psz_html += 5;
1935                     psz_subtitle += 5;
1936                 }
1937                 else
1938                 {
1939                     strcpy( psz_html, "&amp;" );
1940                     psz_html += 5;
1941                     psz_subtitle++;
1942                 }
1943             }
1944             else
1945             {
1946                 *psz_html = *psz_subtitle;
1947                 if( psz_html > psz_html_start )
1948                 {
1949                     /* Check for double whitespace */
1950                     if((( *psz_html == ' ' ) ||
1951                         ( *psz_html == '\t' )) &&
1952                        (( *(psz_html-1) == ' ' ) ||
1953                         ( *(psz_html-1) == '\t' )))
1954                     {
1955                         strcpy( psz_html, NO_BREAKING_SPACE );
1956                         psz_html += strlen( NO_BREAKING_SPACE ) - 1;
1957                     }
1958                 }
1959                 psz_html++;
1960                 psz_subtitle++;
1961             }
1962
1963             if( ( size_t )( psz_html - psz_html_start ) > i_buf_size - 10 )
1964             {
1965                 int i_len = psz_html - psz_html_start;
1966
1967                 i_buf_size += 100;
1968                 psz_html_start = realloc( psz_html_start, i_buf_size );
1969                 psz_html = psz_html_start + i_len;
1970                 *psz_html = '\0';
1971             }
1972         }
1973         strcpy( psz_html, "</text>" );
1974         psz_html += 7;
1975
1976         if( psz_tagStack[ 0 ] != '\0' )
1977         {
1978             /* Not well formed -- kill everything */
1979             free( psz_html_start );
1980             psz_html_start = NULL;
1981         }
1982         else if( psz_html_start )
1983         {
1984             /* Shrink the memory requirements */
1985             psz_html_start = realloc( psz_html_start,  psz_html - psz_html_start + 1 );
1986         }
1987     }
1988     return psz_html_start;
1989 }
1990
1991 /* The reverse of the above function - given a HTML subtitle, turn it
1992  * into a plain-text version, complete with sensible whitespace compaction
1993  */
1994
1995 static char *CreatePlainText( char *psz_subtitle )
1996 {
1997     char *psz_text = StripTags( psz_subtitle );
1998     char *s;
1999
2000     if( !psz_text )
2001         return NULL;
2002
2003     s = strpbrk( psz_text, "\t\r\n " );
2004     while( s )
2005     {
2006         int   k;
2007         char  spc = ' ';
2008         int   i_whitespace = strspn( s, "\t\r\n " );
2009
2010         /* Favour '\n' over other whitespaces - if one of these
2011          * occurs in the whitespace use a '\n' as our value,
2012          * otherwise just use a ' '
2013          */
2014         for( k = 0; k < i_whitespace; k++ )
2015             if( s[k] == '\n' ) spc = '\n';
2016
2017         if( i_whitespace > 1 )
2018         {
2019             memmove( &s[1],
2020                      &s[i_whitespace],
2021                      strlen( s ) - i_whitespace + 1 );
2022         }
2023         *s++ = spc;
2024
2025         s = strpbrk( s, "\t\r\n " );
2026     }
2027     return psz_text;
2028 }
2029
2030 /****************************************************************************
2031  * download and resize image located at psz_url
2032  ***************************************************************************/
2033 static subpicture_region_t *LoadEmbeddedImage( decoder_t *p_dec, subpicture_t *p_spu, const char *psz_filename, int i_transparent_color )
2034 {
2035     decoder_sys_t         *p_sys = p_dec->p_sys;
2036     subpicture_region_t   *p_region;
2037     video_format_t         fmt_out;
2038     int                    k;
2039     picture_t             *p_pic = NULL;
2040
2041     for( k = 0; k < p_sys->i_images; k++ )
2042     {
2043         if( p_sys->pp_images &&
2044             !strcmp( p_sys->pp_images[k]->psz_filename, psz_filename ) )
2045         {
2046             p_pic = p_sys->pp_images[k]->p_pic;
2047             break;
2048         }
2049     }
2050
2051     if( !p_pic )
2052     {
2053         msg_Err( p_dec, "Unable to read image %s", psz_filename );
2054         return NULL;
2055     }
2056
2057     /* Display the feed's image */
2058     memset( &fmt_out, 0, sizeof( video_format_t));
2059
2060     fmt_out.i_chroma = VLC_FOURCC('Y','U','V','A');
2061     fmt_out.i_aspect = VOUT_ASPECT_FACTOR;
2062     fmt_out.i_sar_num = fmt_out.i_sar_den = 1;
2063     fmt_out.i_width =
2064         fmt_out.i_visible_width = p_pic->p[Y_PLANE].i_visible_pitch;
2065     fmt_out.i_height =
2066         fmt_out.i_visible_height = p_pic->p[Y_PLANE].i_visible_lines;
2067
2068     p_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt_out );
2069     if( !p_region )
2070     {
2071         msg_Err( p_dec, "cannot allocate SPU region" );
2072         return NULL;
2073     }
2074     vout_CopyPicture( p_dec, &p_region->picture, p_pic );
2075
2076     /* This isn't the best way to do this - if you really want transparency, then
2077      * you're much better off using an image type that supports it like PNG. The
2078      * spec requires this support though.
2079      */
2080     if( i_transparent_color > 0 )
2081     {
2082         uint8_t i_r = ( i_transparent_color >> 16 ) & 0xff;
2083         uint8_t i_g = ( i_transparent_color >>  8 ) & 0xff;
2084         uint8_t i_b = ( i_transparent_color       ) & 0xff;
2085         uint8_t i_y = ( ( (  66 * i_r + 129 * i_g +  25 * i_b + 128 ) >> 8 ) + 16 );
2086         uint8_t i_u =   ( ( -38 * i_r -  74 * i_g + 112 * i_b + 128 ) >> 8 ) + 128 ;
2087         uint8_t i_v =   ( ( 112 * i_r -  94 * i_g -  18 * i_b + 128 ) >> 8 ) + 128 ;
2088
2089         if( ( p_region->picture.Y_PITCH == p_region->picture.U_PITCH ) &&
2090             ( p_region->picture.Y_PITCH == p_region->picture.V_PITCH ) &&
2091             ( p_region->picture.Y_PITCH == p_region->picture.A_PITCH ) )
2092         {
2093             int i_lines = p_region->picture.p[ Y_PLANE ].i_lines;
2094             if( i_lines > p_region->picture.p[ U_PLANE ].i_lines )
2095                 i_lines = p_region->picture.p[ U_PLANE ].i_lines;
2096             if( i_lines > p_region->picture.p[ V_PLANE ].i_lines )
2097                 i_lines = p_region->picture.p[ V_PLANE ].i_lines;
2098             if( i_lines > p_region->picture.p[ A_PLANE ].i_lines )
2099                 i_lines = p_region->picture.p[ A_PLANE ].i_lines;
2100
2101             int   i;
2102
2103             for( i = 0; i < p_region->picture.A_PITCH * i_lines; i++ )
2104             {
2105                 if(( p_region->picture.Y_PIXELS[ i ] == i_y ) &&
2106                    ( p_region->picture.U_PIXELS[ i ] == i_u ) &&
2107                    ( p_region->picture.V_PIXELS[ i ] == i_v ) )
2108                 {
2109                     p_region->picture.A_PIXELS[ i ] = 1;
2110                 }
2111             }
2112         }
2113     }
2114     return p_region;
2115 }