]> git.sesse.net Git - vlc/blob - modules/codec/subtitles/subsdec.c
Replace strerror() with %m (or Linux DVB: strerror_r) - refs #1297
[vlc] / modules / codec / subtitles / subsdec.c
1 /*****************************************************************************
2  * subsdec.c : text subtitles decoder
3  *****************************************************************************
4  * Copyright (C) 2000-2006 the VideoLAN team
5  * $Id$
6  *
7  * Authors: Gildas Bazin <gbazin@videolan.org>
8  *          Samuel Hocevar <sam@zoy.org>
9  *          Derk-Jan Hartman <hartman at videolan dot org>
10  *          Bernie Purcell <bitmap@videolan.org>
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
25  *****************************************************************************/
26
27 /*****************************************************************************
28  * Preamble
29  *****************************************************************************/
30
31 #include "subsdec.h"
32
33 /*****************************************************************************
34  * Local prototypes
35  *****************************************************************************/
36 static int  OpenDecoder   ( vlc_object_t * );
37 static void CloseDecoder  ( vlc_object_t * );
38
39 static subpicture_t   *DecodeBlock   ( decoder_t *, block_t ** );
40 static subpicture_t   *ParseText     ( decoder_t *, block_t * );
41 static char           *StripTags      ( char * );
42 static char           *CreateHtmlSubtitle ( char * );
43
44
45 /*****************************************************************************
46  * Module descriptor.
47  *****************************************************************************/
48 static const char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "",
49     "ISO-8859-1", "CP1252", "MacRoman", "MacIceland","ISO-8859-15", "",
50     "ISO-8859-2", "CP1250", "MacCentralEurope", "MacCroatian", "MacRomania", "",
51     "ISO-8859-5", "CP1251", "MacCyrillic", "MacUkraine", "KOI8-R", "KOI8-U", "KOI8-RU", "",
52     "ISO-8859-6", "CP1256", "MacArabic", "",
53     "ISO-8859-7", "CP1253", "MacGreek", "",
54     "ISO-8859-8", "CP1255", "MacHebrew", "",
55     "ISO-8859-9", "CP1254", "MacTurkish", "",
56     "ISO-8859-13", "CP1257", "",
57     "ISO-2022-JP", "ISO-2022-JP-1", "ISO-2022-JP-2", "EUC-JP", "SHIFT_JIS", "",
58     "ISO-2022-CN", "ISO-2022-CN-EXT", "EUC-CN", "EUC-TW", "BIG5", "BIG5-HKSCS", "",
59     "ISO-2022-KR", "EUC-KR", "",
60     "MacThai", "KOI8-T", "",
61     "ISO-8859-3", "ISO-8859-4", "ISO-8859-10", "ISO-8859-14", "ISO-8859-16", "",
62     "CP850", "CP862", "CP866", "CP874", "CP932", "CP949", "CP950", "CP1133", "CP1258", "",
63     "Macintosh", "",
64     "UTF-7", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE",
65     "C99", "JAVA", "UCS-2", "UCS-2BE", "UCS-2LE", "UCS-4", "UCS-4BE", "UCS-4LE", "",
66     "HZ", "GBK", "GB18030", "JOHAB", "ARMSCII-8",
67     "Georgian-Academy", "Georgian-PS", "TIS-620", "MuleLao-1", "VISCII", "TCVN",
68     "HPROMAN8", "NEXTSTEP" };
69 /*
70 SSA supports charset selection.
71 The following known charsets are used:
72
73 0 = Ansi - Western European
74 1 = default
75 2 = symbol
76 3 = invalid
77 77 = Mac
78 128 = Japanese (Shift JIS)
79 129 = Hangul
80 130 = Johab
81 134 = GB2312 Simplified Chinese
82 136 = Big5 Traditional Chinese
83 161 = Greek
84 162 = Turkish
85 163 = Vietnamese
86 177 = Hebrew
87 178 = Arabic
88 186 = Baltic
89 204 = Russian (Cyrillic)
90 222 = Thai
91 238 = Eastern European
92 254 = PC 437
93 */
94
95 static int  pi_justification[] = { 0, 1, 2 };
96 static const char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")};
97
98 #define ENCODING_TEXT N_("Subtitles text encoding")
99 #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles")
100 #define ALIGN_TEXT N_("Subtitles justification")
101 #define ALIGN_LONGTEXT N_("Set the justification of subtitles")
102 #define AUTODETECT_UTF8_TEXT N_("UTF-8 subtitles autodetection")
103 #define AUTODETECT_UTF8_LONGTEXT N_("This enables automatic detection of " \
104             "UTF-8 encoding within subtitles files.")
105 #define FORMAT_TEXT N_("Formatted Subtitles")
106 #define FORMAT_LONGTEXT N_("Some subtitle formats allow for text formatting. " \
107  "VLC partly implements this, but you can choose to disable all formatting.")
108
109
110 vlc_module_begin();
111     set_shortname( _("Subtitles"));
112     set_description( _("Text subtitles decoder") );
113     set_capability( "decoder", 50 );
114     set_callbacks( OpenDecoder, CloseDecoder );
115     set_category( CAT_INPUT );
116     set_subcategory( SUBCAT_INPUT_SCODEC );
117
118     add_integer( "subsdec-align", 0, NULL, ALIGN_TEXT, ALIGN_LONGTEXT,
119                  VLC_FALSE );
120         change_integer_list( pi_justification, ppsz_justification_text, 0 );
121     add_string( "subsdec-encoding", DEFAULT_NAME, NULL,
122                 ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE );
123         change_string_list( ppsz_encodings, 0, 0 );
124     add_bool( "subsdec-autodetect-utf8", VLC_TRUE, NULL,
125               AUTODETECT_UTF8_TEXT, AUTODETECT_UTF8_LONGTEXT, VLC_FALSE );
126     add_bool( "subsdec-formatted", VLC_TRUE, NULL, FORMAT_TEXT, FORMAT_LONGTEXT,
127                  VLC_FALSE );
128 vlc_module_end();
129
130 /*****************************************************************************
131  * OpenDecoder: probe the decoder and return score
132  *****************************************************************************
133  * Tries to launch a decoder and return score so that the interface is able
134  * to chose.
135  *****************************************************************************/
136 static int OpenDecoder( vlc_object_t *p_this )
137 {
138     decoder_t     *p_dec = (decoder_t*)p_this;
139     decoder_sys_t *p_sys;
140     vlc_value_t    val;
141
142     switch( p_dec->fmt_in.i_codec )
143     {
144         case VLC_FOURCC('s','u','b','t'):
145         case VLC_FOURCC('s','s','a',' '):
146         case VLC_FOURCC('t','1','4','0'):
147             break;
148         default:
149             return VLC_EGENERIC;
150     }
151
152     p_dec->pf_decode_sub = DecodeBlock;
153
154     /* Allocate the memory needed to store the decoder's structure */
155     p_dec->p_sys = p_sys = malloc( sizeof( *p_sys ) );
156     if( p_sys == NULL )
157     {
158         msg_Err( p_dec, "out of memory" );
159         return VLC_ENOMEM;
160     }
161
162     /* init of p_sys */
163     memset( p_sys, 0, sizeof( *p_sys ) );
164     p_sys->i_align = 0;
165     p_sys->iconv_handle = (vlc_iconv_t)-1;
166     p_sys->b_autodetect_utf8 = VLC_FALSE;
167     p_sys->b_ass = VLC_FALSE;
168     p_sys->i_original_height = -1;
169     p_sys->i_original_width = -1;
170     TAB_INIT( p_sys->i_ssa_styles, p_sys->pp_ssa_styles );
171     TAB_INIT( p_sys->i_images, p_sys->pp_images );
172
173     char *psz_charset = NULL;
174
175     if( p_dec->fmt_in.i_codec == VLC_FOURCC('t','1','4','0') )
176         psz_charset = strdup( "UTF-8" ); /* IUT T.140 is always using UTF-8 */
177     else
178     /* First try demux-specified encoding */
179     if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
180     {
181         psz_charset = strdup (p_dec->fmt_in.subs.psz_encoding);
182         msg_Dbg (p_dec, "trying demuxer-specified character encoding: %s",
183                  p_dec->fmt_in.subs.psz_encoding ?: "not specified");
184     }
185
186     /* Second, try configured encoding */
187     if (psz_charset == NULL)
188     {
189         psz_charset = var_CreateGetNonEmptyString (p_dec, "subsdec-encoding");
190         if ((psz_charset != NULL) && !strcasecmp (psz_charset, DEFAULT_NAME))
191         {
192             free (psz_charset);
193             psz_charset = NULL;
194         }
195
196         msg_Dbg (p_dec, "trying configured character encoding: %s",
197                  psz_charset ?: "not specified");
198     }
199
200     /* Third, try "local" encoding with optional UTF-8 autodetection */
201     if (psz_charset == NULL)
202     {
203         psz_charset = strdup (GetFallbackEncoding ());
204         msg_Dbg (p_dec, "trying default character encoding: %s",
205                  psz_charset ?: "not specified");
206
207         if (var_CreateGetBool (p_dec, "subsdec-autodetect-utf8"))
208         {
209             msg_Dbg (p_dec, "using automatic UTF-8 detection");
210             p_sys->b_autodetect_utf8 = VLC_TRUE;
211         }
212     }
213
214     if (psz_charset == NULL)
215     {
216         psz_charset = strdup ("UTF-8");
217         msg_Dbg (p_dec, "trying hard-coded character encoding: %s",
218                  psz_charset ? psz_charset : "error");
219     }
220
221     if (psz_charset == NULL)
222     {
223         free (p_sys);
224         return VLC_ENOMEM;
225     }
226
227     if (strcasecmp (psz_charset, "UTF-8") && strcasecmp (psz_charset, "utf8"))
228     {
229         p_sys->iconv_handle = vlc_iconv_open ("UTF-8", psz_charset);
230         if (p_sys->iconv_handle == (vlc_iconv_t)(-1))
231             msg_Err (p_dec, "cannot convert from %s: %m", psz_charset);
232     }
233     free (psz_charset);
234
235     var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
236     var_Get( p_dec, "subsdec-align", &val );
237     p_sys->i_align = val.i_int;
238
239     if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ')
240      && var_CreateGetBool( p_dec, "subsdec-formatted" ) )
241     {
242         if( p_dec->fmt_in.i_extra > 0 )
243             ParseSSAHeader( p_dec );
244     }
245
246     return VLC_SUCCESS;
247 }
248
249 /****************************************************************************
250  * DecodeBlock: the whole thing
251  ****************************************************************************
252  * This function must be fed with complete subtitles units.
253  ****************************************************************************/
254 static subpicture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block )
255 {
256     subpicture_t *p_spu = NULL;
257
258     if( !pp_block || *pp_block == NULL ) return NULL;
259
260     p_spu = ParseText( p_dec, *pp_block );
261
262     block_Release( *pp_block );
263     *pp_block = NULL;
264
265     return p_spu;
266 }
267
268 /*****************************************************************************
269  * CloseDecoder: clean up the decoder
270  *****************************************************************************/
271 static void CloseDecoder( vlc_object_t *p_this )
272 {
273     decoder_t *p_dec = (decoder_t *)p_this;
274     decoder_sys_t *p_sys = p_dec->p_sys;
275
276     if( p_sys->iconv_handle != (vlc_iconv_t)-1 )
277         vlc_iconv_close( p_sys->iconv_handle );
278
279     if( p_sys->pp_ssa_styles )
280     {
281         int i;
282         for( i = 0; i < p_sys->i_ssa_styles; i++ )
283         {
284             if( !p_sys->pp_ssa_styles[i] )
285                 continue;
286
287             if( p_sys->pp_ssa_styles[i]->psz_stylename )
288                 free( p_sys->pp_ssa_styles[i]->psz_stylename );
289             if( p_sys->pp_ssa_styles[i]->font_style.psz_fontname )
290                 free( p_sys->pp_ssa_styles[i]->font_style.psz_fontname );
291             if( p_sys->pp_ssa_styles[i] )
292                 free( p_sys->pp_ssa_styles[i] );
293         }
294         TAB_CLEAN( p_sys->i_ssa_styles, p_sys->pp_ssa_styles );
295     }
296     if( p_sys->pp_images )
297     {
298         int i;
299         for( i = 0; i < p_sys->i_images; i++ )
300         {
301             if( !p_sys->pp_images[i] )
302                 continue;
303
304             if( p_sys->pp_images[i]->p_pic )
305                 p_sys->pp_images[i]->p_pic->pf_release( p_sys->pp_images[i]->p_pic );
306             if( p_sys->pp_images[i]->psz_filename )
307                 free( p_sys->pp_images[i]->psz_filename );
308
309             free( p_sys->pp_images[i] );
310         }
311         TAB_CLEAN( p_sys->i_images, p_sys->pp_images );
312     }
313
314     free( p_sys );
315 }
316
317 /*****************************************************************************
318  * ParseText: parse an text subtitle packet and send it to the video output
319  *****************************************************************************/
320 static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
321 {
322     decoder_sys_t *p_sys = p_dec->p_sys;
323     subpicture_t *p_spu = NULL;
324     char *psz_subtitle = NULL;
325     video_format_t fmt;
326
327     /* We cannot display a subpicture with no date */
328     if( p_block->i_pts == 0 )
329     {
330         msg_Warn( p_dec, "subtitle without a date" );
331         return NULL;
332     }
333
334     /* Check validity of packet data */
335     /* An "empty" line containing only \0 can be used to force
336        and ephemer picture from the screen */
337     if( p_block->i_buffer < 1 )
338     {
339         msg_Warn( p_dec, "no subtitle data" );
340         return NULL;
341     }
342
343     /* Should be resiliant against bad subtitles */
344     psz_subtitle = strndup( (const char *)p_block->p_buffer,
345                             p_block->i_buffer );
346     if( psz_subtitle == NULL )
347         return NULL;
348
349     if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
350     {
351         if (EnsureUTF8( psz_subtitle ) == NULL)
352         {
353             msg_Err( p_dec, _("failed to convert subtitle encoding.\n"
354                      "Try manually setting a character-encoding "
355                      "before you open the file.") );
356         }
357     }
358     else
359     {
360
361         if( p_sys->b_autodetect_utf8 )
362         {
363             if( IsUTF8( psz_subtitle ) == NULL )
364             {
365                 msg_Dbg( p_dec, "invalid UTF-8 sequence: "
366                          "disabling UTF-8 subtitles autodetection" );
367                 p_sys->b_autodetect_utf8 = VLC_FALSE;
368             }
369         }
370
371         if( !p_sys->b_autodetect_utf8 )
372         {
373             size_t inbytes_left = strlen( psz_subtitle );
374             size_t outbytes_left = 6 * inbytes_left;
375             char *psz_new_subtitle = malloc( outbytes_left + 1 );
376             char *psz_convert_buffer_out = psz_new_subtitle;
377             const char *psz_convert_buffer_in = psz_subtitle;
378
379             size_t ret = vlc_iconv( p_sys->iconv_handle,
380                                     &psz_convert_buffer_in, &inbytes_left,
381                                     &psz_convert_buffer_out, &outbytes_left );
382
383             *psz_convert_buffer_out++ = '\0';
384             free( psz_subtitle );
385
386             if( ( ret == (size_t)(-1) ) || inbytes_left )
387             {
388                 free( psz_new_subtitle );
389                 msg_Err( p_dec, _("failed to convert subtitle encoding.\n"
390                         "Try manually setting a character-encoding "
391                                 "before you open the file.") );
392                 return NULL;
393             }
394
395             psz_subtitle = realloc( psz_new_subtitle,
396                                     psz_convert_buffer_out - psz_new_subtitle );
397         }
398     }
399
400     /* Create the subpicture unit */
401     p_spu = p_dec->pf_spu_buffer_new( p_dec );
402     if( !p_spu )
403     {
404         msg_Warn( p_dec, "can't get spu buffer" );
405         if( psz_subtitle ) free( psz_subtitle );
406         return NULL;
407     }
408
409     p_spu->b_pausable = VLC_TRUE;
410
411     /* Create a new subpicture region */
412     memset( &fmt, 0, sizeof(video_format_t) );
413     fmt.i_chroma = VLC_FOURCC('T','E','X','T');
414     fmt.i_aspect = 0;
415     fmt.i_width = fmt.i_height = 0;
416     fmt.i_x_offset = fmt.i_y_offset = 0;
417     p_spu->p_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt );
418     if( !p_spu->p_region )
419     {
420         msg_Err( p_dec, "cannot allocate SPU region" );
421         if( psz_subtitle ) free( psz_subtitle );
422         p_dec->pf_spu_buffer_del( p_dec, p_spu );
423         return NULL;
424     }
425
426     /* Decode and format the subpicture unit */
427     if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') )
428     {
429         /* Normal text subs, easy markup */
430         p_spu->p_region->i_align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
431         p_spu->i_x = p_sys->i_align ? 20 : 0;
432         p_spu->i_y = 10;
433
434         /* Remove formatting from string */
435
436         p_spu->p_region->psz_text = StripTags( psz_subtitle );
437         if( var_CreateGetBool( p_dec, "subsdec-formatted" ) )
438         {
439             p_spu->p_region->psz_html = CreateHtmlSubtitle( psz_subtitle );
440         }
441
442         p_spu->i_start = p_block->i_pts;
443         p_spu->i_stop = p_block->i_pts + p_block->i_length;
444         p_spu->b_ephemer = (p_block->i_length == 0);
445         p_spu->b_absolute = VLC_FALSE;
446     }
447     else
448     {
449         /* Decode SSA/USF strings */
450         if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') )
451             ParseSSAString( p_dec, psz_subtitle, p_spu );
452
453         p_spu->i_start = p_block->i_pts;
454         p_spu->i_stop = p_block->i_pts + p_block->i_length;
455         p_spu->b_ephemer = (p_block->i_length == 0);
456         p_spu->b_absolute = VLC_FALSE;
457         p_spu->i_original_picture_width = p_sys->i_original_width;
458         p_spu->i_original_picture_height = p_sys->i_original_height;
459     }
460     if( psz_subtitle ) free( psz_subtitle );
461
462     return p_spu;
463 }
464
465 char* GotoNextLine( char *psz_text )
466 {
467     char *p_newline = psz_text;
468
469     while( p_newline[0] != '\0' )
470     {
471         if( p_newline[0] == '\n' || p_newline[0] == '\r' )
472         {
473             p_newline++;
474             while( p_newline[0] == '\n' || p_newline[0] == '\r' )
475                 p_newline++;
476             break;
477         }
478         else p_newline++;
479     }
480     return p_newline;
481 }
482
483 /* Function now handles tags with attribute values, and tries
484  * to deal with &' commands too. It no longer modifies the string
485  * in place, so that the original text can be reused
486  */
487 static char *StripTags( char *psz_subtitle )
488 {
489     char *psz_text_start;
490     char *psz_text;
491
492     psz_text = psz_text_start = malloc( strlen( psz_subtitle ) + 1 );
493     if( !psz_text_start )
494         return NULL;
495
496     while( *psz_subtitle )
497     {
498         if( *psz_subtitle == '<' )
499         {
500             if( strncasecmp( psz_subtitle, "<br/>", 5 ) == 0 )
501                 *psz_text++ = '\n';
502
503             psz_subtitle += strcspn( psz_subtitle, ">" );
504         }
505         else if( *psz_subtitle == '&' )
506         {
507             if( !strncasecmp( psz_subtitle, "&lt;", 4 ))
508             {
509                 *psz_text++ = '<';
510                 psz_subtitle += strcspn( psz_subtitle, ";" );
511             }
512             else if( !strncasecmp( psz_subtitle, "&gt;", 4 ))
513             {
514                 *psz_text++ = '>';
515                 psz_subtitle += strcspn( psz_subtitle, ";" );
516             }
517             else if( !strncasecmp( psz_subtitle, "&amp;", 5 ))
518             {
519                 *psz_text++ = '&';
520                 psz_subtitle += strcspn( psz_subtitle, ";" );
521             }
522             else if( !strncasecmp( psz_subtitle, "&quot;", 6 ))
523             {
524                 *psz_text++ = '\"';
525                 psz_subtitle += strcspn( psz_subtitle, ";" );
526             }
527             else
528             {
529                 /* Assume it is just a normal ampersand */
530                 *psz_text++ = '&';
531             }
532         }
533         else
534         {
535             *psz_text++ = *psz_subtitle;
536         }
537
538         psz_subtitle++;
539     }
540     *psz_text = '\0';
541     psz_text_start = realloc( psz_text_start, strlen( psz_text_start ) + 1 );
542
543     return psz_text_start;
544 }
545
546 /* Try to respect any style tags present in the subtitle string. The main
547  * problem here is a lack of adequate specs for the subtitle formats.
548  * SSA/ASS and USF are both detail spec'ed -- but they are handled elsewhere.
549  * SAMI has a detailed spec, but extensive rework is needed in the demux
550  * code to prevent all this style information being excised, as it presently
551  * does.
552  * That leaves the others - none of which were (I guess) originally intended
553  * to be carrying style information. Over time people have used them that way.
554  * In the absence of specifications from which to work, the tags supported
555  * have been restricted to the simple set permitted by the USF DTD, ie. :
556  *  Basic: <br>, <i>, <b>, <u>
557  *  Extended: <font>
558  *    Attributes: face
559  *                family
560  *                size
561  *                color
562  *                outline-color
563  *                shadow-color
564  *                outline-level
565  *                shadow-level
566  *                back-color
567  *                alpha
568  * There is also the further restriction that the subtitle be well-formed
569  * as an XML entity, ie. the HTML sentence:
570  *        <b><i>Bold and Italics</b></i>
571  * doesn't qualify because the tags aren't nested one inside the other.
572  * <text> tags are automatically added to the output to ensure
573  * well-formedness.
574  * If the text doesn't qualify for any reason, a NULL string is
575  * returned, and the rendering engine will fall back to the
576  * plain text version of the subtitle.
577  */
578 static char *CreateHtmlSubtitle( char *psz_subtitle )
579 {
580     char    psz_tagStack[ 100 ];
581     size_t  i_buf_size     = strlen( psz_subtitle ) + 100;
582     char   *psz_html_start = malloc( i_buf_size );
583
584     psz_tagStack[ 0 ] = '\0';
585
586     if( psz_html_start != NULL )
587     {
588         char *psz_html = psz_html_start;
589
590         strcpy( psz_html, "<text>" );
591         psz_html += 6;
592
593         while( *psz_subtitle )
594         {
595             if( *psz_subtitle == '\n' )
596             {
597                 strcpy( psz_html, "<br/>" );
598                 psz_html += 5;
599                 psz_subtitle++;
600             }
601             else if( *psz_subtitle == '<' )
602             {
603                 if( !strncasecmp( psz_subtitle, "<br/>", 5 ))
604                 {
605                     strcpy( psz_html, "<br/>" );
606                     psz_html += 5;
607                     psz_subtitle += 5;
608                 }
609                 else if( !strncasecmp( psz_subtitle, "<b>", 3 ) )
610                 {
611                     strcpy( psz_html, "<b>" );
612                     strcat( psz_tagStack, "b" );
613                     psz_html += 3;
614                     psz_subtitle += 3;
615                 }
616                 else if( !strncasecmp( psz_subtitle, "<i>", 3 ) )
617                 {
618                     strcpy( psz_html, "<i>" );
619                     strcat( psz_tagStack, "i" );
620                     psz_html += 3;
621                     psz_subtitle += 3;
622                 }
623                 else if( !strncasecmp( psz_subtitle, "<u>", 3 ) )
624                 {
625                     strcpy( psz_html, "<u>" );
626                     strcat( psz_tagStack, "u" );
627                     psz_html += 3;
628                     psz_subtitle += 3;
629                 }
630                 else if( !strncasecmp( psz_subtitle, "<font ", 6 ))
631                 {
632                     const char *psz_attribs[] = { "face=\"", "family=\"", "size=\"",
633                             "color=\"", "outline-color=\"", "shadow-color=\"",
634                             "outline-level=\"", "shadow-level=\"", "back-color=\"",
635                             "alpha=\"", NULL };
636
637                     strcpy( psz_html, "<font " );
638                     strcat( psz_tagStack, "f" );
639                     psz_html += 6;
640                     psz_subtitle += 6;
641
642                     while( *psz_subtitle != '>' )
643                     {
644                         int  k;
645
646                         for( k=0; psz_attribs[ k ]; k++ )
647                         {
648                             int i_len = strlen( psz_attribs[ k ] );
649
650                             if( !strncasecmp( psz_subtitle, psz_attribs[ k ], i_len ))
651                             {
652                                 i_len += strcspn( psz_subtitle + i_len, "\"" ) + 1;
653
654                                 strncpy( psz_html, psz_subtitle, i_len );
655                                 psz_html += i_len;
656                                 psz_subtitle += i_len;
657                                 break;
658                             }
659                         }
660                         if( psz_attribs[ k ] == NULL )
661                         {
662                             /* Jump over unrecognised tag */
663                             int i_len = strcspn( psz_subtitle, "\"" ) + 1;
664
665                             i_len += strcspn( psz_subtitle + i_len, "\"" ) + 1;
666                             psz_subtitle += i_len;
667                         }
668                         while (*psz_subtitle == ' ')
669                             *psz_html++ = *psz_subtitle++;
670                     }
671                     *psz_html++ = *psz_subtitle++;
672                 }
673                 else if( !strncmp( psz_subtitle, "</", 2 ))
674                 {
675                     vlc_bool_t  b_match     = VLC_FALSE;
676                     int         i_len       = strlen( psz_tagStack ) - 1;
677                     char       *psz_lastTag = NULL;
678
679                     if( i_len >= 0 )
680                     {
681                         psz_lastTag = psz_tagStack + i_len;
682                         i_len = 0;
683
684                         switch( *psz_lastTag )
685                         {
686                             case 'b':
687                                 b_match = !strncasecmp( psz_subtitle, "</b>", 4 );
688                                 i_len   = 4;
689                                 break;
690                             case 'i':
691                                 b_match = !strncasecmp( psz_subtitle, "</i>", 4 );
692                                 i_len   = 4;
693                                 break;
694                             case 'u':
695                                 b_match = !strncasecmp( psz_subtitle, "</u>", 4 );
696                                 i_len   = 4;
697                                 break;
698                             case 'f':
699                                 b_match = !strncasecmp( psz_subtitle, "</font>", 7 );
700                                 i_len   = 7;
701                                 break;
702                         }
703                     }
704                     if( ! b_match )
705                     {
706                         /* Not well formed -- kill everything */
707                         free( psz_html_start );
708                         psz_html_start = NULL;
709                         break;
710                     }
711                     *psz_lastTag = '\0';
712                     strncpy( psz_html, psz_subtitle, i_len );
713                     psz_html += i_len;
714                     psz_subtitle += i_len;
715                 }
716                 else
717                 {
718                     psz_subtitle += strcspn( psz_subtitle, ">" );
719                 }
720             }
721             else if( *psz_subtitle == '&' )
722             {
723                 if( !strncasecmp( psz_subtitle, "&lt;", 4 ))
724                 {
725                     strcpy( psz_html, "&lt;" );
726                     psz_html += 4;
727                     psz_subtitle += 4;
728                 }
729                 else if( !strncasecmp( psz_subtitle, "&gt;", 4 ))
730                 {
731                     strcpy( psz_html, "&gt;" );
732                     psz_html += 4;
733                     psz_subtitle += 4;
734                 }
735                 else if( !strncasecmp( psz_subtitle, "&amp;", 5 ))
736                 {
737                     strcpy( psz_html, "&amp;" );
738                     psz_html += 5;
739                     psz_subtitle += 5;
740                 }
741                 else
742                 {
743                     strcpy( psz_html, "&amp;" );
744                     psz_html += 5;
745                     psz_subtitle++;
746                 }
747             }
748             else
749             {
750                 *psz_html = *psz_subtitle;
751                 if( psz_html > psz_html_start )
752                 {
753                     /* Check for double whitespace */
754                     if((( *psz_html == ' ' ) ||
755                         ( *psz_html == '\t' )) &&
756                        (( *(psz_html-1) == ' ' ) ||
757                         ( *(psz_html-1) == '\t' )))
758                     {
759                         strcpy( psz_html, NO_BREAKING_SPACE );
760                         psz_html += strlen( NO_BREAKING_SPACE ) - 1;
761                     }
762                 }
763                 psz_html++;
764                 psz_subtitle++;
765             }
766
767             if( ( size_t )( psz_html - psz_html_start ) > i_buf_size - 10 )
768             {
769                 int i_len = psz_html - psz_html_start;
770
771                 i_buf_size += 100;
772                 psz_html_start = realloc( psz_html_start, i_buf_size );
773                 psz_html = psz_html_start + i_len;
774                 *psz_html = '\0';
775             }
776         }
777         strcpy( psz_html, "</text>" );
778         psz_html += 7;
779
780         if( psz_tagStack[ 0 ] != '\0' )
781         {
782             /* Not well formed -- kill everything */
783             free( psz_html_start );
784             psz_html_start = NULL;
785         }
786         else if( psz_html_start )
787         {
788             /* Shrink the memory requirements */
789             psz_html_start = realloc( psz_html_start,  psz_html - psz_html_start + 1 );
790         }
791     }
792     return psz_html_start;
793 }