1 /*****************************************************************************
2 * subsdec.c : text subtitles decoder
3 *****************************************************************************
4 * Copyright (C) 2000-2006 the VideoLAN team
7 * Authors: Gildas Bazin <gbazin@videolan.org>
8 * Samuel Hocevar <sam@zoy.org>
9 * Derk-Jan Hartman <hartman at videolan dot org>
10 * Bernie Purcell <b dot purcell at adbglobal dot com>
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
25 *****************************************************************************/
27 /*****************************************************************************
29 *****************************************************************************/
32 #include <vlc_codec.h>
35 #include <vlc_filter.h>
36 #include <vlc_charset.h>
37 #include <vlc_stream.h>
44 char * psz_stylename; /* The name of the style, no comma's allowed */
45 text_style_t font_style;
51 /*****************************************************************************
52 * decoder_sys_t : decoder descriptor
53 *****************************************************************************/
56 vlc_bool_t b_ass; /* The subs are ASS */
57 int i_original_height;
59 int i_align; /* Subtitles alignment on the vout */
60 vlc_iconv_t iconv_handle; /* handle to iconv instance */
61 vlc_bool_t b_autodetect_utf8;
63 ssa_style_t **pp_ssa_styles;
67 /*****************************************************************************
69 *****************************************************************************/
70 static int OpenDecoder ( vlc_object_t * );
71 static void CloseDecoder ( vlc_object_t * );
73 static subpicture_t *DecodeBlock ( decoder_t *, block_t ** );
74 static subpicture_t *ParseText ( decoder_t *, block_t * );
75 static void ParseSSAHeader ( decoder_t * );
76 static void ParseUSFHeader ( decoder_t * );
77 static void ParseUSFHeaderTags( decoder_sys_t *, xml_reader_t * );
78 static void ParseSSAString ( decoder_t *, char *, subpicture_t * );
79 static void ParseUSFString ( decoder_t *, char *, subpicture_t * );
80 static void ParseColor ( decoder_t *, char *, int *, int * );
81 static char *StripTags ( char * );
82 static char *CreateHtmlSubtitle ( char * );
84 #define DEFAULT_NAME "Default"
87 /*****************************************************************************
89 *****************************************************************************/
90 static const char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "",
91 "ISO-8859-1", "CP1252", "MacRoman", "MacIceland","ISO-8859-15", "",
92 "ISO-8859-2", "CP1250", "MacCentralEurope", "MacCroatian", "MacRomania", "",
93 "ISO-8859-5", "CP1251", "MacCyrillic", "MacUkraine", "KOI8-R", "KOI8-U", "KOI8-RU", "",
94 "ISO-8859-6", "CP1256", "MacArabic", "",
95 "ISO-8859-7", "CP1253", "MacGreek", "",
96 "ISO-8859-8", "CP1255", "MacHebrew", "",
97 "ISO-8859-9", "CP1254", "MacTurkish", "",
98 "ISO-8859-13", "CP1257", "",
99 "ISO-2022-JP", "ISO-2022-JP-1", "ISO-2022-JP-2", "EUC-JP", "SHIFT_JIS", "",
100 "ISO-2022-CN", "ISO-2022-CN-EXT", "EUC-CN", "EUC-TW", "BIG5", "BIG5-HKSCS", "",
101 "ISO-2022-KR", "EUC-KR", "",
102 "MacThai", "KOI8-T", "",
103 "ISO-8859-3", "ISO-8859-4", "ISO-8859-10", "ISO-8859-14", "ISO-8859-16", "",
104 "CP850", "CP862", "CP866", "CP874", "CP932", "CP949", "CP950", "CP1133", "CP1258", "",
106 "UTF-7", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE",
107 "C99", "JAVA", "UCS-2", "UCS-2BE", "UCS-2LE", "UCS-4", "UCS-4BE", "UCS-4LE", "",
108 "HZ", "GBK", "GB18030", "JOHAB", "ARMSCII-8",
109 "Georgian-Academy", "Georgian-PS", "TIS-620", "MuleLao-1", "VISCII", "TCVN",
110 "HPROMAN8", "NEXTSTEP" };
112 SSA supports charset selection.
113 The following known charsets are used:
115 0 = Ansi - Western European
120 128 = Japanese (Shift JIS)
123 134 = GB2312 Simplified Chinese
124 136 = Big5 Traditional Chinese
131 204 = Russian (Cyrillic)
133 238 = Eastern European
137 static int pi_justification[] = { 0, 1, 2 };
138 static const char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")};
140 #define ENCODING_TEXT N_("Subtitles text encoding")
141 #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles")
142 #define ALIGN_TEXT N_("Subtitles justification")
143 #define ALIGN_LONGTEXT N_("Set the justification of subtitles")
144 #define AUTODETECT_UTF8_TEXT N_("UTF-8 subtitles autodetection")
145 #define AUTODETECT_UTF8_LONGTEXT N_("This enables automatic detection of " \
146 "UTF-8 encoding within subtitles files.")
147 #define FORMAT_TEXT N_("Formatted Subtitles")
148 #define FORMAT_LONGTEXT N_("Some subtitle formats allow for text formatting. " \
149 "VLC partly implements this, but you can choose to disable all formatting.")
153 set_shortname( _("Subtitles"));
154 set_description( _("Text subtitles decoder") );
155 set_capability( "decoder", 50 );
156 set_callbacks( OpenDecoder, CloseDecoder );
157 set_category( CAT_INPUT );
158 set_subcategory( SUBCAT_INPUT_SCODEC );
160 add_integer( "subsdec-align", 0, NULL, ALIGN_TEXT, ALIGN_LONGTEXT,
162 change_integer_list( pi_justification, ppsz_justification_text, 0 );
163 add_string( "subsdec-encoding", DEFAULT_NAME, NULL,
164 ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE );
165 change_string_list( ppsz_encodings, 0, 0 );
166 add_bool( "subsdec-autodetect-utf8", VLC_TRUE, NULL,
167 AUTODETECT_UTF8_TEXT, AUTODETECT_UTF8_LONGTEXT, VLC_FALSE );
168 add_bool( "subsdec-formatted", VLC_TRUE, NULL, FORMAT_TEXT, FORMAT_LONGTEXT,
172 /*****************************************************************************
173 * OpenDecoder: probe the decoder and return score
174 *****************************************************************************
175 * Tries to launch a decoder and return score so that the interface is able
177 *****************************************************************************/
178 static int OpenDecoder( vlc_object_t *p_this )
180 decoder_t *p_dec = (decoder_t*)p_this;
181 decoder_sys_t *p_sys;
184 if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','u','b','t') &&
185 p_dec->fmt_in.i_codec != VLC_FOURCC('u','s','f',' ') &&
186 p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') )
191 p_dec->pf_decode_sub = DecodeBlock;
193 /* Allocate the memory needed to store the decoder's structure */
194 if( ( p_dec->p_sys = p_sys =
195 (decoder_sys_t *)calloc(1, sizeof(decoder_sys_t)) ) == NULL )
197 msg_Err( p_dec, "out of memory" );
203 p_sys->iconv_handle = (vlc_iconv_t)-1;
204 p_sys->b_autodetect_utf8 = VLC_FALSE;
205 p_sys->b_ass = VLC_FALSE;
206 p_sys->i_original_height = -1;
207 p_sys->i_original_width = -1;
208 p_sys->pp_ssa_styles = NULL;
209 p_sys->i_ssa_styles = 0;
211 char *psz_charset = NULL;
212 /* First try demux-specified encoding */
213 if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
215 psz_charset = strdup (p_dec->fmt_in.subs.psz_encoding);
216 msg_Dbg (p_dec, "trying demuxer-specified character encoding: %s",
217 p_dec->fmt_in.subs.psz_encoding ?: "not specified");
220 /* Second, try configured encoding */
221 if (psz_charset == NULL)
223 psz_charset = var_CreateGetNonEmptyString (p_dec, "subsdec-encoding");
224 if ((psz_charset != NULL) && !strcasecmp (psz_charset, DEFAULT_NAME))
230 msg_Dbg (p_dec, "trying configured character encoding: %s",
231 psz_charset ?: "not specified");
234 /* Third, try "local" encoding with optional UTF-8 autodetection */
235 if (psz_charset == NULL)
237 psz_charset = strdup (GetFallbackEncoding ());
238 msg_Dbg (p_dec, "trying default character encoding: %s",
239 psz_charset ?: "not specified");
241 if (var_CreateGetBool (p_dec, "subsdec-autodetect-utf8"))
243 msg_Dbg (p_dec, "using automatic UTF-8 detection");
244 p_sys->b_autodetect_utf8 = VLC_TRUE;
248 if (psz_charset == NULL)
250 psz_charset = strdup ("UTF-8");
251 msg_Dbg (p_dec, "trying hard-coded character encoding: %s",
252 psz_charset ?: "error");
255 if (psz_charset == NULL)
261 if (strcasecmp (psz_charset, "UTF-8") && strcasecmp (psz_charset, "utf8"))
263 p_sys->iconv_handle = vlc_iconv_open ("UTF-8", psz_charset);
264 if (p_sys->iconv_handle == (vlc_iconv_t)(-1))
265 msg_Err (p_dec, "cannot convert from %s: %s", psz_charset,
270 var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
271 var_Get( p_dec, "subsdec-align", &val );
272 p_sys->i_align = val.i_int;
274 if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') && var_CreateGetBool( p_dec, "subsdec-formatted" ) )
276 if( p_dec->fmt_in.i_extra > 0 )
277 ParseSSAHeader( p_dec );
279 else if( p_dec->fmt_in.i_codec == VLC_FOURCC('u','s','f',' ') && var_CreateGetBool( p_dec, "subsdec-formatted" ) )
281 if( p_dec->fmt_in.i_extra > 0 )
282 ParseUSFHeader( p_dec );
288 /****************************************************************************
289 * DecodeBlock: the whole thing
290 ****************************************************************************
291 * This function must be fed with complete subtitles units.
292 ****************************************************************************/
293 static subpicture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block )
295 subpicture_t *p_spu = NULL;
297 if( !pp_block || *pp_block == NULL ) return NULL;
299 p_spu = ParseText( p_dec, *pp_block );
301 block_Release( *pp_block );
307 /*****************************************************************************
308 * CloseDecoder: clean up the decoder
309 *****************************************************************************/
310 static void CloseDecoder( vlc_object_t *p_this )
312 decoder_t *p_dec = (decoder_t *)p_this;
313 decoder_sys_t *p_sys = p_dec->p_sys;
315 if( p_sys->iconv_handle != (vlc_iconv_t)-1 )
317 vlc_iconv_close( p_sys->iconv_handle );
320 if( p_sys->pp_ssa_styles )
323 for( i = 0; i < p_sys->i_ssa_styles; i++ )
325 if( p_sys->pp_ssa_styles[i]->psz_stylename ) free( p_sys->pp_ssa_styles[i]->psz_stylename );
326 p_sys->pp_ssa_styles[i]->psz_stylename = NULL;
327 if( p_sys->pp_ssa_styles[i]->font_style.psz_fontname ) free( p_sys->pp_ssa_styles[i]->font_style.psz_fontname );
328 p_sys->pp_ssa_styles[i]->font_style.psz_fontname = NULL;
329 if( p_sys->pp_ssa_styles[i] ) free( p_sys->pp_ssa_styles[i] ); p_sys->pp_ssa_styles[i] = NULL;
331 free( p_sys->pp_ssa_styles ); p_sys->pp_ssa_styles = NULL;
337 /*****************************************************************************
338 * ParseText: parse an text subtitle packet and send it to the video output
339 *****************************************************************************/
340 static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
342 decoder_sys_t *p_sys = p_dec->p_sys;
343 subpicture_t *p_spu = NULL;
344 char *psz_subtitle = NULL;
347 /* We cannot display a subpicture with no date */
348 if( p_block->i_pts == 0 )
350 msg_Warn( p_dec, "subtitle without a date" );
354 /* Check validity of packet data */
355 /* An "empty" line containing only \0 can be used to force
356 and ephemer picture from the screen */
357 if( p_block->i_buffer < 1 )
359 msg_Warn( p_dec, "no subtitle data" );
363 /* Should be resiliant against bad subtitles */
364 psz_subtitle = strndup( (const char *)p_block->p_buffer,
366 if( psz_subtitle == NULL )
369 if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
371 if (EnsureUTF8( psz_subtitle ) == NULL)
373 msg_Err( p_dec, _("failed to convert subtitle encoding.\n"
374 "Try manually setting a character-encoding "
375 "before you open the file.") );
381 if( p_sys->b_autodetect_utf8 )
383 if( IsUTF8( psz_subtitle ) == NULL )
385 msg_Dbg( p_dec, "invalid UTF-8 sequence: "
386 "disabling UTF-8 subtitles autodetection" );
387 p_sys->b_autodetect_utf8 = VLC_FALSE;
391 if( !p_sys->b_autodetect_utf8 )
393 size_t inbytes_left = strlen( psz_subtitle );
394 size_t outbytes_left = 6 * inbytes_left;
395 char *psz_new_subtitle = malloc( outbytes_left + 1 );
396 char *psz_convert_buffer_out = psz_new_subtitle;
397 const char *psz_convert_buffer_in = psz_subtitle;
399 size_t ret = vlc_iconv( p_sys->iconv_handle,
400 &psz_convert_buffer_in, &inbytes_left,
401 &psz_convert_buffer_out, &outbytes_left );
403 *psz_convert_buffer_out++ = '\0';
404 free( psz_subtitle );
406 if( ( ret == (size_t)(-1) ) || inbytes_left )
408 free( psz_new_subtitle );
409 msg_Err( p_dec, _("failed to convert subtitle encoding.\n"
410 "Try manually setting a character-encoding "
411 "before you open the file.") );
415 psz_subtitle = realloc( psz_new_subtitle,
416 psz_convert_buffer_out - psz_new_subtitle );
420 /* Create the subpicture unit */
421 p_spu = p_dec->pf_spu_buffer_new( p_dec );
424 msg_Warn( p_dec, "can't get spu buffer" );
425 if( psz_subtitle ) free( psz_subtitle );
429 p_spu->b_pausable = VLC_TRUE;
431 /* Create a new subpicture region */
432 memset( &fmt, 0, sizeof(video_format_t) );
433 fmt.i_chroma = VLC_FOURCC('T','E','X','T');
435 fmt.i_width = fmt.i_height = 0;
436 fmt.i_x_offset = fmt.i_y_offset = 0;
437 p_spu->p_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt );
438 if( !p_spu->p_region )
440 msg_Err( p_dec, "cannot allocate SPU region" );
441 if( psz_subtitle ) free( psz_subtitle );
442 p_dec->pf_spu_buffer_del( p_dec, p_spu );
446 /* Decode and format the subpicture unit */
447 if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') &&
448 p_dec->fmt_in.i_codec != VLC_FOURCC('u','s','f',' ') )
450 /* Normal text subs, easy markup */
451 p_spu->p_region->i_align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
452 p_spu->i_x = p_sys->i_align ? 20 : 0;
455 /* Remove formatting from string */
457 p_spu->p_region->psz_text = StripTags( psz_subtitle );
458 p_spu->p_region->psz_html = CreateHtmlSubtitle( psz_subtitle );
460 p_spu->i_start = p_block->i_pts;
461 p_spu->i_stop = p_block->i_pts + p_block->i_length;
462 p_spu->b_ephemer = (p_block->i_length == 0);
463 p_spu->b_absolute = VLC_FALSE;
467 /* Decode SSA/USF strings */
468 if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') )
469 ParseSSAString( p_dec, psz_subtitle, p_spu );
471 ParseUSFString( p_dec, psz_subtitle, p_spu );
473 p_spu->i_start = p_block->i_pts;
474 p_spu->i_stop = p_block->i_pts + p_block->i_length;
475 p_spu->b_ephemer = (p_block->i_length == 0);
476 p_spu->b_absolute = VLC_FALSE;
477 p_spu->i_original_picture_width = p_sys->i_original_width;
478 p_spu->i_original_picture_height = p_sys->i_original_height;
480 if( psz_subtitle ) free( psz_subtitle );
485 static void ParseUSFString( decoder_t *p_dec, char *psz_subtitle, subpicture_t *p_spu_in )
487 decoder_sys_t *p_sys = p_dec->p_sys;
488 subpicture_t *p_spu = p_spu_in;
490 char *psz_text_start;
491 ssa_style_t *p_style = NULL;
494 /* Create a text only copy of the subtitle (for legacy implementations) and copy
495 * the rich html version across as is - for parsing by a rendering engine capable
496 * of understanding it.
498 p_spu->p_region->psz_text = NULL;
499 p_spu->p_region->psz_html = strdup( psz_subtitle );
501 for( i = 0; i < p_sys->i_ssa_styles; i++ )
503 if( !strcasecmp( p_sys->pp_ssa_styles[i]->psz_stylename, "Default" ) )
504 p_style = p_sys->pp_ssa_styles[i];
507 /* The StripTags() function doesn't do everything we need (eg. <br/> tag )
508 * so do it here ourselves.
510 psz_text_start = malloc( strlen( psz_subtitle ));
512 psz_text = psz_text_start;
513 while( *psz_subtitle )
515 if( *psz_subtitle == '<' )
517 if( !strncasecmp( psz_subtitle, "<br/>", 5 ))
519 else if( !strncasecmp( psz_subtitle, "<text ", 6 ))
521 char *psz_style = strcasestr( psz_subtitle, "style=\"" );
523 if( psz_style && ( psz_style < strchr( psz_subtitle, '>' ) ))
527 psz_style += strspn( psz_style, "\"" ) + 1;
528 i_len = strcspn( psz_style, "\"" );
530 psz_style[ i_len ] = '\0';
532 for( i = 0; i < p_sys->i_ssa_styles; i++ )
534 if( !strcmp( p_sys->pp_ssa_styles[i]->psz_stylename, psz_style ) )
535 p_style = p_sys->pp_ssa_styles[i];
538 psz_style[ i_len ] = '\"';
542 psz_subtitle += strcspn( psz_subtitle, ">" );
544 else if( *psz_subtitle == '&' )
546 if( !strncasecmp( psz_subtitle, "<", 4 ))
548 else if( !strncasecmp( psz_subtitle, ">", 4 ))
550 else if( !strncasecmp( psz_subtitle, "&", 5 ))
553 psz_subtitle += strcspn( psz_subtitle, ";" );
555 else if( ( *psz_subtitle == '\t' ) ||
556 ( *psz_subtitle == '\r' ) ||
557 ( *psz_subtitle == '\n' ) ||
558 ( *psz_subtitle == ' ' ) )
560 if( ( psz_text_start < psz_text ) &&
561 ( *(psz_text-1) != ' ' ) )
567 *psz_text++ = *psz_subtitle;
572 p_spu->p_region->psz_text = strdup( psz_text_start );
573 free( psz_text_start );
575 if( p_style == NULL )
577 p_spu->p_region->i_align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
578 p_spu->i_x = p_sys->i_align ? 20 : 0;
583 msg_Dbg( p_dec, "style is: %s", p_style->psz_stylename);
584 p_spu->p_region->p_style = &p_style->font_style;
585 p_spu->p_region->i_align = p_style->i_align;
589 static void ParseSSAString( decoder_t *p_dec, char *psz_subtitle, subpicture_t *p_spu_in )
591 /* We expect MKV formatted SSA:
592 * ReadOrder, Layer, Style, CharacterName, MarginL, MarginR,
593 * MarginV, Effect, Text */
594 decoder_sys_t *p_sys = p_dec->p_sys;
595 subpicture_t *p_spu = p_spu_in;
596 ssa_style_t *p_style = NULL;
597 char *psz_new_subtitle = NULL;
598 char *psz_buffer_sub = NULL;
599 char *psz_style = NULL;
600 char *psz_style_start = NULL;
601 char *psz_style_end = NULL;
602 int i_text = 0, i_comma = 0, i_strlen = 0, i;
603 int i_margin_l = 0, i_margin_r = 0, i_margin_v = 0;
605 psz_buffer_sub = psz_subtitle;
607 p_spu->p_region->psz_html = NULL;
610 while( i_comma < 8 && *psz_buffer_sub != '\0' )
612 if( *psz_buffer_sub == ',' )
615 if( i_comma == 2 ) psz_style_start = &psz_buffer_sub[1];
616 if( i_comma == 3 ) psz_style_end = &psz_buffer_sub[0];
617 if( i_comma == 4 ) i_margin_l = (int)strtol( psz_buffer_sub+1, NULL, 10 );
618 if( i_comma == 5 ) i_margin_r = (int)strtol( psz_buffer_sub+1, NULL, 10 );
619 if( i_comma == 6 ) i_margin_v = (int)strtol( psz_buffer_sub+1, NULL, 10 );
624 if( *psz_buffer_sub == '\0' && i_comma == 8 )
626 msg_Dbg( p_dec, "couldn't find all fields in this SSA line" );
630 psz_new_subtitle = malloc( strlen( psz_buffer_sub ) + 1);
632 while( psz_buffer_sub[0] != '\0' )
634 if( psz_buffer_sub[0] == '\\' && psz_buffer_sub[1] == 'n' )
636 psz_new_subtitle[i_text] = ' ';
640 else if( psz_buffer_sub[0] == '\\' && psz_buffer_sub[1] == 'N' )
642 psz_new_subtitle[i_text] = '\n';
646 else if( psz_buffer_sub[0] == '{' &&
647 psz_buffer_sub[1] == '\\' )
649 /* SSA control code */
650 while( psz_buffer_sub[0] != '\0' &&
651 psz_buffer_sub[0] != '}' )
659 psz_new_subtitle[i_text] = psz_buffer_sub[0];
664 psz_new_subtitle[i_text] = '\0';
666 i_strlen = __MAX( psz_style_end - psz_style_start, 0);
667 psz_style = (char *)malloc( i_strlen + 1);
668 psz_style = memcpy( psz_style, psz_style_start, i_strlen );
669 psz_style[i_strlen] = '\0';
671 for( i = 0; i < p_sys->i_ssa_styles; i++ )
673 if( !strcmp( p_sys->pp_ssa_styles[i]->psz_stylename, psz_style ) )
674 p_style = p_sys->pp_ssa_styles[i];
676 if( psz_style ) free( psz_style );
678 p_spu->p_region->psz_text = psz_new_subtitle;
679 if( p_style == NULL )
681 p_spu->p_region->i_align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
682 p_spu->i_x = p_sys->i_align ? 20 : 0;
687 msg_Dbg( p_dec, "style is: %s", p_style->psz_stylename);
688 p_spu->p_region->p_style = &p_style->font_style;
689 p_spu->p_region->i_align = p_style->i_align;
690 if( p_style->i_align & SUBPICTURE_ALIGN_LEFT )
692 p_spu->i_x = (i_margin_l) ? i_margin_l : p_style->i_margin_h;
694 else if( p_style->i_align & SUBPICTURE_ALIGN_RIGHT )
696 p_spu->i_x = (i_margin_r) ? i_margin_r : p_style->i_margin_h;
698 p_spu->i_y = (i_margin_v) ? i_margin_v : p_style->i_margin_v;
702 static char* GotoNextLine( char *psz_text )
704 char *p_newline = psz_text;
706 while( p_newline[0] != '\0' )
708 if( p_newline[0] == '\n' || p_newline[0] == '\r' )
711 while( p_newline[0] == '\n' || p_newline[0] == '\r' )
720 /*****************************************************************************
721 * ParseColor: SSA stores color in BBGGRR, in ASS it uses AABBGGRR
722 * The string value in the string can be a pure integer, or hexadecimal &HBBGGRR
723 *****************************************************************************/
724 static void ParseColor( decoder_t *p_dec, char *psz_color, int *pi_color, int *pi_alpha )
727 if( !strncasecmp( psz_color, "&H", 2 ) )
729 /* textual HEX representation */
730 i_color = (int) strtol( psz_color+2, NULL, 16 );
732 else i_color = (int) strtol( psz_color, NULL, 0 );
735 *pi_color |= ( ( i_color & 0x000000FF ) << 16 ); /* Red */
736 *pi_color |= ( ( i_color & 0x0000FF00 ) ); /* Green */
737 *pi_color |= ( ( i_color & 0x00FF0000 ) >> 16 ); /* Blue */
739 if( pi_alpha != NULL )
740 *pi_alpha = ( i_color & 0xFF000000 ) >> 24;
743 /*****************************************************************************
744 * ParseUSFHeader: Retrieve global formatting information etc
745 *****************************************************************************/
746 static void ParseUSFHeader( decoder_t *p_dec )
748 decoder_sys_t *p_sys = p_dec->p_sys;
749 stream_t *p_sub = NULL;
751 xml_reader_t *p_xml_reader = NULL;
753 p_sub = stream_MemoryNew( VLC_OBJECT(p_dec),
754 p_dec->fmt_in.p_extra,
755 p_dec->fmt_in.i_extra,
759 p_xml = xml_Create( p_dec );
762 p_xml_reader = xml_ReaderCreate( p_xml, p_sub );
765 /* Look for Root Node */
766 if( xml_ReaderRead( p_xml_reader ) == 1 )
768 char *psz_node = xml_ReaderName( p_xml_reader );
770 if( !strcasecmp( "usfsubtitles", psz_node ) )
771 ParseUSFHeaderTags( p_sys, p_xml_reader );
776 xml_ReaderDelete( p_xml, p_xml_reader );
780 stream_Delete( p_sub );
784 static void ParseUSFHeaderTags( decoder_sys_t *p_sys, xml_reader_t *p_xml_reader )
787 ssa_style_t *p_style = NULL;
788 int i_style_level = 0;
789 int i_metadata_level = 0;
791 while ( xml_ReaderRead( p_xml_reader ) == 1 )
793 switch ( xml_ReaderNodeType( p_xml_reader ) )
795 case XML_READER_TEXT:
796 case XML_READER_NONE:
798 case XML_READER_ENDELEM:
799 psz_node = xml_ReaderName( p_xml_reader );
803 switch (i_style_level)
806 if( !strcasecmp( "metadata", psz_node ) && (i_metadata_level == 1) )
812 if( !strcasecmp( "styles", psz_node ) )
818 if( !strcasecmp( "style", psz_node ) )
820 p_style->font_style.i_text_align = p_style->i_align;
822 TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style );
833 case XML_READER_STARTELEM:
834 psz_node = xml_ReaderName( p_xml_reader );
838 if( !strcasecmp( "metadata", psz_node ) && (i_style_level == 0) )
842 else if( !strcasecmp( "resolution", psz_node ) && (i_metadata_level == 1) )
844 while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
846 char *psz_name = xml_ReaderName ( p_xml_reader );
847 char *psz_value = xml_ReaderValue ( p_xml_reader );
849 if( psz_name && psz_value )
851 if( !strcasecmp( "x", psz_name ) )
852 p_sys->i_original_width = atoi( psz_value );
853 else if( !strcasecmp( "y", psz_name ) )
854 p_sys->i_original_height = atoi( psz_value );
856 if( psz_name ) free( psz_name );
857 if( psz_value ) free( psz_value );
860 else if( !strcasecmp( "styles", psz_node ) && (i_style_level == 0) )
864 else if( !strcasecmp( "style", psz_node ) && (i_style_level == 1) )
868 p_style = calloc( 1, sizeof(ssa_style_t) );
870 while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
872 char *psz_name = xml_ReaderName ( p_xml_reader );
873 char *psz_value = xml_ReaderValue ( p_xml_reader );
875 if( psz_name && psz_value )
877 if( !strcasecmp( "name", psz_name ) )
878 p_style->psz_stylename = strdup( psz_value);
880 if( psz_name ) free( psz_name );
881 if( psz_value ) free( psz_value );
884 else if( !strcasecmp( "fontstyle", psz_node ) && (i_style_level == 2) )
886 while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
888 char *psz_name = xml_ReaderName ( p_xml_reader );
889 char *psz_value = xml_ReaderValue ( p_xml_reader );
891 if( psz_name && psz_value )
893 if( !strcasecmp( "face", psz_name ) )
894 p_style->font_style.psz_fontname = strdup( psz_value);
895 else if( !strcasecmp( "size", psz_name ) )
896 p_style->font_style.i_font_size = atoi( psz_value);
897 else if( !strcasecmp( "italic", psz_name ) )
899 if( !strcasecmp( "yes", psz_value ))
900 p_style->font_style.i_style_flags |= STYLE_ITALIC;
902 else if( !strcasecmp( "weight", psz_name ) )
904 if( !strcasecmp( "bold", psz_value ))
905 p_style->font_style.i_style_flags |= STYLE_BOLD;
907 else if( !strcasecmp( "underline", psz_name ) )
909 if( !strcasecmp( "yes", psz_value ))
910 p_style->font_style.i_style_flags |= STYLE_UNDERLINE;
912 else if( !strcasecmp( "color", psz_name ) )
914 if( *psz_value == '#' )
916 unsigned long col = strtol(psz_value+1, NULL, 16);
917 p_style->font_style.i_font_color = (col & 0x00ffffff);
918 /* From DTD: <!-- alpha range = 0..100 --> */
919 p_style->font_style.i_font_alpha = ((col >> 24) & 0xff) * 255 / 100;
922 else if( !strcasecmp( "outline-color", psz_name ) )
924 if( *psz_value == '#' )
926 unsigned long col = strtol(psz_value+1, NULL, 16);
927 p_style->font_style.i_outline_color = (col & 0x00ffffff);
928 /* From DTD: <!-- alpha range = 0..100 --> */
929 p_style->font_style.i_outline_alpha = ((col >> 24) & 0xff) * 255 / 100;
932 else if( !strcasecmp( "shadow-color", psz_name ) )
934 if( *psz_value == '#' )
936 unsigned long col = strtol(psz_value+1, NULL, 16);
937 p_style->font_style.i_shadow_color = (col & 0x00ffffff);
938 /* From DTD: <!-- alpha range = 0..100 --> */
939 p_style->font_style.i_shadow_alpha = ((col >> 24) & 0xff) * 255 / 100;
943 if( psz_name ) free( psz_name );
944 if( psz_value ) free( psz_value );
947 else if( !strcasecmp( "position", psz_node ) && (i_style_level == 2) )
949 while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
951 char *psz_name = xml_ReaderName ( p_xml_reader );
952 char *psz_value = xml_ReaderValue ( p_xml_reader );
954 if( psz_name && psz_value )
956 if( !strcasecmp( "alignment", psz_name ) )
958 if( !strcasecmp( "TopLeft", psz_value ) )
960 p_style->i_align |= SUBPICTURE_ALIGN_TOP;
961 p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
963 else if( !strcasecmp( "TopCenter", psz_value ) )
965 p_style->i_align |= SUBPICTURE_ALIGN_TOP;
967 else if( !strcasecmp( "TopRight", psz_value ) )
969 p_style->i_align |= SUBPICTURE_ALIGN_TOP;
970 p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
972 else if( !strcasecmp( "MiddleLeft", psz_value ) )
974 p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
976 else if( !strcasecmp( "MiddleCenter", psz_value ) )
978 p_style->i_align = 0;
980 else if( !strcasecmp( "MiddleRight", psz_value ) )
982 p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
984 else if( !strcasecmp( "BottomLeft", psz_value ) )
986 p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
987 p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
989 else if( !strcasecmp( "BottomCenter", psz_value ) )
991 p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
993 else if( !strcasecmp( "BottomRight", psz_value ) )
995 p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
996 p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
1000 if( psz_name ) free( psz_name );
1001 if( psz_value ) free( psz_value );
1010 if( p_style ) free( p_style );
1012 /*****************************************************************************
1013 * ParseSSAHeader: Retrieve global formatting information etc
1014 *****************************************************************************/
1015 static void ParseSSAHeader( decoder_t *p_dec )
1017 decoder_sys_t *p_sys = p_dec->p_sys;
1018 char *psz_parser = NULL;
1019 char *psz_header = malloc( p_dec->fmt_in.i_extra+1 );
1020 int i_section_type = 1;
1022 memcpy( psz_header, p_dec->fmt_in.p_extra, p_dec->fmt_in.i_extra );
1023 psz_header[ p_dec->fmt_in.i_extra] = '\0';
1025 /* Handle [Script Info] section */
1026 psz_parser = strcasestr( psz_header, "[Script Info]" );
1027 if( psz_parser == NULL ) goto eof;
1029 psz_parser = GotoNextLine( psz_parser );
1031 while( psz_parser[0] != '\0' )
1034 char buffer_text[MAX_LINE + 1];
1036 if( psz_parser[0] == '!' || psz_parser[0] == ';' ) /* comment */;
1037 else if( sscanf( psz_parser, "PlayResX: %d", &temp ) == 1 )
1038 p_sys->i_original_width = ( temp > 0 ) ? temp : -1;
1039 else if( sscanf( psz_parser, "PlayResY: %d", &temp ) == 1 )
1040 p_sys->i_original_height = ( temp > 0 ) ? temp : -1;
1041 else if( sscanf( psz_parser, "Script Type: %8192s", buffer_text ) == 1 )
1043 if( !strcasecmp( buffer_text, "V4.00+" ) ) p_sys->b_ass = VLC_TRUE;
1045 else if( !strncasecmp( psz_parser, "[V4 Styles]", 11 ) )
1047 else if( !strncasecmp( psz_parser, "[V4+ Styles]", 12) )
1050 p_sys->b_ass = VLC_TRUE;
1052 else if( !strncasecmp( psz_parser, "[Events]", 8 ) )
1054 else if( !strncasecmp( psz_parser, "Style:", 6 ) )
1056 int i_font_size, i_bold, i_italic, i_border, i_outline, i_shadow, i_underline,
1057 i_strikeout, i_scale_x, i_scale_y, i_spacing, i_align, i_margin_l, i_margin_r, i_margin_v;
1059 char psz_temp_stylename[MAX_LINE+1];
1060 char psz_temp_fontname[MAX_LINE+1];
1061 char psz_temp_color1[MAX_LINE+1];
1062 char psz_temp_color2[MAX_LINE+1];
1063 char psz_temp_color3[MAX_LINE+1];
1064 char psz_temp_color4[MAX_LINE+1];
1066 if( i_section_type == 1 ) /* V4 */
1068 if( sscanf( psz_parser, "Style: %8192[^,],%8192[^,],%d,%8192[^,],%8192[^,],%8192[^,],%8192[^,],%d,%d,%d,%d,%d,%d,%d,%d,%d%*[^\r\n]",
1069 psz_temp_stylename, psz_temp_fontname, &i_font_size,
1070 psz_temp_color1, psz_temp_color2, psz_temp_color3, psz_temp_color4, &i_bold, &i_italic,
1071 &i_border, &i_outline, &i_shadow, &i_align, &i_margin_l, &i_margin_r, &i_margin_v ) == 16 )
1073 ssa_style_t *p_style = malloc( sizeof(ssa_style_t) );
1075 p_style->psz_stylename = strdup( psz_temp_stylename );
1076 p_style->font_style.psz_fontname = strdup( psz_temp_fontname );
1077 p_style->font_style.i_font_size = i_font_size;
1079 ParseColor( p_dec, psz_temp_color1, &p_style->font_style.i_font_color, NULL );
1080 ParseColor( p_dec, psz_temp_color4, &p_style->font_style.i_shadow_color, NULL );
1081 p_style->font_style.i_outline_color = p_style->font_style.i_shadow_color;
1082 p_style->font_style.i_font_alpha = p_style->font_style.i_outline_alpha = p_style->font_style.i_shadow_alpha = 0x00;
1083 p_style->font_style.i_style_flags = 0;
1084 if( i_bold ) p_style->font_style.i_style_flags |= STYLE_BOLD;
1085 if( i_italic ) p_style->font_style.i_style_flags |= STYLE_ITALIC;
1087 if( i_border == 1 ) p_style->font_style.i_style_flags |= (STYLE_ITALIC | STYLE_OUTLINE);
1088 else if( i_border == 3 )
1090 p_style->font_style.i_style_flags |= STYLE_BACKGROUND;
1091 p_style->font_style.i_background_color = p_style->font_style.i_shadow_color;
1092 p_style->font_style.i_background_alpha = p_style->font_style.i_shadow_alpha;
1094 p_style->font_style.i_shadow_width = i_shadow;
1095 p_style->font_style.i_outline_width = i_outline;
1097 p_style->i_align = 0;
1098 if( i_align == 1 || i_align == 5 || i_align == 9 ) p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
1099 if( i_align == 3 || i_align == 7 || i_align == 11 ) p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
1100 if( i_align < 4 ) p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
1101 else if( i_align < 8 ) p_style->i_align |= SUBPICTURE_ALIGN_TOP;
1103 p_style->i_margin_h = ( p_style->i_align & SUBPICTURE_ALIGN_RIGHT ) ? i_margin_r : i_margin_l;
1104 p_style->i_margin_v = i_margin_v;
1106 TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style );
1108 else msg_Warn( p_dec, "SSA v4 styleline parsing failed" );
1110 else if( i_section_type == 2 ) /* V4+ */
1112 /* Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour,
1113 Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline,
1114 Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
1116 if( sscanf( psz_parser, "Style: %8192[^,],%8192[^,],%d,%8192[^,],%8192[^,],%8192[^,],%8192[^,],%d,%d,%d,%d,%d,%d,%d,%*f,%d,%d,%d,%d,%d,%d,%d%*[^\r\n]",
1117 psz_temp_stylename, psz_temp_fontname, &i_font_size,
1118 psz_temp_color1, psz_temp_color2, psz_temp_color3, psz_temp_color4, &i_bold, &i_italic,
1119 &i_underline, &i_strikeout, &i_scale_x, &i_scale_y, &i_spacing, &i_border, &i_outline,
1120 &i_shadow, &i_align, &i_margin_l, &i_margin_r, &i_margin_v ) == 21 )
1122 ssa_style_t *p_style = malloc( sizeof(ssa_style_t) );
1124 p_style->psz_stylename = strdup( psz_temp_stylename );
1125 p_style->font_style.psz_fontname = strdup( psz_temp_fontname );
1126 p_style->font_style.i_font_size = i_font_size;
1127 msg_Dbg( p_dec, psz_temp_color1 );
1128 ParseColor( p_dec, psz_temp_color1, &p_style->font_style.i_font_color, &p_style->font_style.i_font_alpha );
1129 ParseColor( p_dec, psz_temp_color3, &p_style->font_style.i_outline_color, &p_style->font_style.i_outline_alpha );
1130 ParseColor( p_dec, psz_temp_color4, &p_style->font_style.i_shadow_color, &p_style->font_style.i_shadow_alpha );
1132 p_style->font_style.i_style_flags = 0;
1133 if( i_bold ) p_style->font_style.i_style_flags |= STYLE_BOLD;
1134 if( i_italic ) p_style->font_style.i_style_flags |= STYLE_ITALIC;
1135 if( i_underline ) p_style->font_style.i_style_flags |= STYLE_UNDERLINE;
1136 if( i_strikeout ) p_style->font_style.i_style_flags |= STYLE_STRIKEOUT;
1137 if( i_border == 1 ) p_style->font_style.i_style_flags |= (STYLE_ITALIC | STYLE_OUTLINE);
1138 else if( i_border == 3 )
1140 p_style->font_style.i_style_flags |= STYLE_BACKGROUND;
1141 p_style->font_style.i_background_color = p_style->font_style.i_shadow_color;
1142 p_style->font_style.i_background_alpha = p_style->font_style.i_shadow_alpha;
1144 p_style->font_style.i_shadow_width = ( i_border == 1 ) ? i_shadow : 0;
1145 p_style->font_style.i_outline_width = ( i_border == 1 ) ? i_outline : 0;
1146 p_style->font_style.i_spacing = i_spacing;
1147 //p_style->font_style.f_angle = f_angle;
1149 p_style->i_align = 0;
1150 if( i_align == 0x1 || i_align == 0x4 || i_align == 0x7 ) p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
1151 if( i_align == 0x3 || i_align == 0x6 || i_align == 0x9 ) p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
1152 if( i_align == 0x7 || i_align == 0x8 || i_align == 0x9 ) p_style->i_align |= SUBPICTURE_ALIGN_TOP;
1153 if( i_align == 0x1 || i_align == 0x2 || i_align == 0x3 ) p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
1154 p_style->i_margin_h = ( p_style->i_align & SUBPICTURE_ALIGN_RIGHT ) ? i_margin_r : i_margin_l;
1155 p_style->i_margin_v = i_margin_v;
1157 /*TODO: Ignored: angle i_scale_x|y (fontscaling), i_encoding */
1158 TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style );
1160 else msg_Dbg( p_dec, "SSA V4+ styleline parsing failed" );
1163 psz_parser = GotoNextLine( psz_parser );
1167 if( psz_header ) free( psz_header );
1171 /* Function now handles tags which has attribute values, and tries
1172 * to deal with &' commands too. It no longer modifies the string
1173 * in place, so that the original text can be reused
1175 static char *StripTags( char *psz_subtitle )
1177 char *psz_text_start;
1179 psz_text_start = malloc( strlen( psz_subtitle ) + 1 );
1181 if( psz_text_start != NULL )
1183 char *psz_text = psz_text_start;
1185 while( *psz_subtitle )
1187 if( *psz_subtitle == '<' )
1189 psz_subtitle += strcspn( psz_subtitle, ">" );
1191 else if( *psz_subtitle == '&' )
1193 if( !strncasecmp( psz_subtitle, "<", 4 ))
1196 psz_subtitle += strcspn( psz_subtitle, ";" );
1198 else if( !strncasecmp( psz_subtitle, ">", 4 ))
1201 psz_subtitle += strcspn( psz_subtitle, ";" );
1203 else if( !strncasecmp( psz_subtitle, "&", 5 ))
1206 psz_subtitle += strcspn( psz_subtitle, ";" );
1210 /* Assume it is just a normal ampersand */
1216 *psz_text++ = *psz_subtitle;
1222 psz_text_start = realloc( psz_text_start, strlen( psz_text_start ) + 1 );
1224 return psz_text_start;
1227 /* Try to respect any style tags present in the subtitle string. The main
1228 * problem here is a lack of adequate specs for the subtitle formats.
1229 * SSA/ASS and USF are both detail spec'ed -- but they are handled elsewhere.
1230 * SAMI has a detailed spec, but extensive rework is needed in the demux
1231 * code to prevent all this style information being excised, as it presently
1233 * That leaves the others - none of which were (I guess) originally intended
1234 * to be carrying style information. Over time people have used them that way.
1235 * In the absence of specifications from which to work, the tags supported
1236 * have been restricted to the simple set permitted by the USF DTD, ie. :
1237 * Basic: <br>, <i>, <b>, <u>
1249 * There is also the further restriction that the subtitle be well-formed
1250 * as an XML entity, ie. the HTML sentence:
1251 * <b><i>Bold and Italics</b></i>
1252 * doesn't qualify because the tags aren't nested one inside the other.
1253 * <text> tags are automatically added to the output to ensure
1255 * If the text doesn't qualify for any reason, a NULL string is
1256 * returned, and the rendering engine will fall back to the
1257 * plain text version of the subtitle.
1259 static char *CreateHtmlSubtitle( char *psz_subtitle )
1261 char psz_tagStack[ 100 ];
1262 size_t i_buf_size = strlen( psz_subtitle ) + 100;
1263 char *psz_html_start = malloc( i_buf_size );
1265 psz_tagStack[ 0 ] = '\0';
1267 if( psz_html_start != NULL )
1269 char *psz_html = psz_html_start;
1271 strcpy( psz_html, "<text>" );
1274 while( *psz_subtitle )
1276 if( *psz_subtitle == '\n' )
1278 strcpy( psz_html, "<br/>" );
1282 else if( *psz_subtitle == '<' )
1284 if( !strncasecmp( psz_subtitle, "<br/>", 5 ))
1286 strcpy( psz_html, "<br/>" );
1290 else if( !strncasecmp( psz_subtitle, "<b>", 3 ) )
1292 strcpy( psz_html, "<b>" );
1293 strcat( psz_tagStack, "b" );
1297 else if( !strncasecmp( psz_subtitle, "<i>", 3 ) )
1299 strcpy( psz_html, "<i>" );
1300 strcat( psz_tagStack, "i" );
1304 else if( !strncasecmp( psz_subtitle, "<u>", 3 ) )
1306 strcpy( psz_html, "<u>" );
1307 strcat( psz_tagStack, "u" );
1311 else if( !strncasecmp( psz_subtitle, "<font ", 6 ))
1313 const char *psz_attribs[] = { "face=\"", "family=\"", "size=\"",
1314 "color=\"", "outline-color=\"", "shadow-color=\"",
1315 "outline-level=\"", "shadow-level=\"", "back-color=\"",
1318 strcpy( psz_html, "<font " );
1319 strcat( psz_tagStack, "f" );
1323 while( *psz_subtitle != '>' )
1327 for( k=0; psz_attribs[ k ]; k++ )
1329 int i_len = strlen( psz_attribs[ k ] );
1331 if( !strncasecmp( psz_subtitle, psz_attribs[ k ], i_len ))
1333 i_len += strcspn( psz_subtitle + i_len, "\"" ) + 1;
1335 strncpy( psz_html, psz_subtitle, i_len );
1337 psz_subtitle += i_len;
1341 if( psz_attribs[ k ] == NULL )
1343 /* Jump over unrecognised tag */
1344 int i_len = strcspn( psz_subtitle, "\"" ) + 1;
1346 i_len += strcspn( psz_subtitle + i_len, "\"" ) + 1;
1347 psz_subtitle += i_len;
1349 while (*psz_subtitle == ' ')
1350 *psz_html++ = *psz_subtitle++;
1352 *psz_html++ = *psz_subtitle++;
1354 else if( !strncmp( psz_subtitle, "</", 2 ))
1356 vlc_bool_t b_match = VLC_FALSE;
1357 int i_len = strlen( psz_tagStack ) - 1;
1358 char *psz_lastTag = NULL;
1362 psz_lastTag = psz_tagStack + i_len;
1365 switch( *psz_lastTag )
1368 b_match = !strncasecmp( psz_subtitle, "</b>", 4 );
1372 b_match = !strncasecmp( psz_subtitle, "</i>", 4 );
1376 b_match = !strncasecmp( psz_subtitle, "</u>", 4 );
1380 b_match = !strncasecmp( psz_subtitle, "</font>", 7 );
1387 /* Not well formed -- kill everything */
1388 free( psz_html_start );
1389 psz_html_start = NULL;
1392 *psz_lastTag = '\0';
1393 strncpy( psz_html, psz_subtitle, i_len );
1395 psz_subtitle += i_len;
1399 psz_subtitle += strcspn( psz_subtitle, ">" );
1402 else if( *psz_subtitle == '&' )
1404 if( !strncasecmp( psz_subtitle, "<", 4 ))
1406 strcpy( psz_html, "<" );
1410 else if( !strncasecmp( psz_subtitle, ">", 4 ))
1412 strcpy( psz_html, ">" );
1416 else if( !strncasecmp( psz_subtitle, "&", 5 ))
1418 strcpy( psz_html, "&" );
1424 strcpy( psz_html, "&" );
1431 *psz_html++ = *psz_subtitle++;
1434 if( ( size_t )( psz_html - psz_html_start ) > i_buf_size - 10 )
1436 int i_len = psz_html - psz_html_start;
1439 psz_html_start = realloc( psz_html_start, i_buf_size );
1440 psz_html = psz_html_start + i_len;
1444 strcpy( psz_html, "</text>" );
1447 if( psz_tagStack[ 0 ] != '\0' )
1449 /* Not well formed -- kill everything */
1450 free( psz_html_start );
1451 psz_html_start = NULL;
1453 else if( psz_html_start )
1455 /* Shrink the memory requirements */
1456 psz_html_start = realloc( psz_html_start, psz_html - psz_html_start + 1 );
1459 return psz_html_start;