X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=modules%2Fcodec%2Fsubsdec.c;h=e4d66b309b4da2f6a841093c2b7a0a4731e3c5e4;hb=ef6c490adcbcf6ad2cf3ac92b7a49016dce18042;hp=6ad5db0286cb651a0f50e5b9214d9c3f16b4beaf;hpb=d3fe7f28797d4dba65ffcdd60bf932e758a48a9e;p=vlc diff --git a/modules/codec/subsdec.c b/modules/codec/subsdec.c index 6ad5db0286..e4d66b309b 100644 --- a/modules/codec/subsdec.c +++ b/modules/codec/subsdec.c @@ -7,6 +7,7 @@ * Authors: Gildas Bazin * Samuel Hocevar * Derk-Jan Hartman + * Bernie Purcell * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -33,6 +34,10 @@ #include #include #include +#include +#include +#include +#include typedef struct { @@ -68,9 +73,13 @@ static void CloseDecoder ( vlc_object_t * ); static subpicture_t *DecodeBlock ( decoder_t *, block_t ** ); static subpicture_t *ParseText ( decoder_t *, block_t * ); static void ParseSSAHeader ( decoder_t * ); +static void ParseUSFHeader ( decoder_t * ); +static void ParseUSFHeaderTags( decoder_sys_t *, xml_reader_t * ); static void ParseSSAString ( decoder_t *, char *, subpicture_t * ); +static void ParseUSFString ( decoder_t *, char *, subpicture_t * ); static void ParseColor ( decoder_t *, char *, int *, int * ); -static void StripTags ( char * ); +static char *StripTags ( char * ); +static char *CreateHtmlSubtitle ( char * ); #define DEFAULT_NAME "Default" #define MAX_LINE 8192 @@ -173,6 +182,7 @@ static int OpenDecoder( vlc_object_t *p_this ) vlc_value_t val; if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','u','b','t') && + p_dec->fmt_in.i_codec != VLC_FOURCC('u','s','f',' ') && p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') ) { return VLC_EGENERIC; @@ -182,7 +192,7 @@ static int OpenDecoder( vlc_object_t *p_this ) /* Allocate the memory needed to store the decoder's structure */ if( ( p_dec->p_sys = p_sys = - (decoder_sys_t *)malloc(sizeof(decoder_sys_t)) ) == NULL ) + (decoder_sys_t *)calloc(1, sizeof(decoder_sys_t)) ) == NULL ) { msg_Err( p_dec, "out of memory" ); return VLC_ENOMEM; @@ -198,44 +208,65 @@ static int OpenDecoder( vlc_object_t *p_this ) p_sys->pp_ssa_styles = NULL; p_sys->i_ssa_styles = 0; + char *psz_charset = NULL; + /* First try demux-specified encoding */ if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding ) { - msg_Dbg( p_dec, "using demux suggested character encoding: %s", - p_dec->fmt_in.subs.psz_encoding ); - if( strcmp( p_dec->fmt_in.subs.psz_encoding, "UTF-8" ) ) - p_sys->iconv_handle = vlc_iconv_open( "UTF-8", p_dec->fmt_in.subs.psz_encoding ); + psz_charset = strdup (p_dec->fmt_in.subs.psz_encoding); + msg_Dbg (p_dec, "trying demuxer-specified character encoding: %s", + p_dec->fmt_in.subs.psz_encoding ?: "not specified"); } - else + + /* Second, try configured encoding */ + if (psz_charset == NULL) { - var_Create( p_dec, "subsdec-encoding", - VLC_VAR_STRING | VLC_VAR_DOINHERIT ); - var_Get( p_dec, "subsdec-encoding", &val ); - if( !strcmp( val.psz_string, DEFAULT_NAME ) ) + psz_charset = var_CreateGetNonEmptyString (p_dec, "subsdec-encoding"); + if ((psz_charset != NULL) && !strcasecmp (psz_charset, DEFAULT_NAME)) { - const char *psz_charset = GetFallbackEncoding(); + free (psz_charset); + psz_charset = NULL; + } - p_sys->b_autodetect_utf8 = var_CreateGetBool( p_dec, - "subsdec-autodetect-utf8" ); + msg_Dbg (p_dec, "trying configured character encoding: %s", + psz_charset ?: "not specified"); + } - p_sys->iconv_handle = vlc_iconv_open( "UTF-8", psz_charset ); - msg_Dbg( p_dec, "using fallback character encoding: %s", psz_charset ); - } - else if( !strcmp( val.psz_string, "UTF-8" ) ) - { - msg_Dbg( p_dec, "using enforced character encoding: UTF-8" ); - } - else if( val.psz_string ) + /* Third, try "local" encoding with optional UTF-8 autodetection */ + if (psz_charset == NULL) + { + psz_charset = strdup (GetFallbackEncoding ()); + msg_Dbg (p_dec, "trying default character encoding: %s", + psz_charset ?: "not specified"); + + if (var_CreateGetBool (p_dec, "subsdec-autodetect-utf8")) { - msg_Dbg( p_dec, "using enforced character encoding: %s", val.psz_string ); - p_sys->iconv_handle = vlc_iconv_open( "UTF-8", val.psz_string ); - if( p_sys->iconv_handle == (vlc_iconv_t)-1 ) - { - msg_Warn( p_dec, "unable to do requested conversion" ); - } + msg_Dbg (p_dec, "using automatic UTF-8 detection"); + p_sys->b_autodetect_utf8 = VLC_TRUE; } - if( val.psz_string ) free( val.psz_string ); } + if (psz_charset == NULL) + { + psz_charset = strdup ("UTF-8"); + msg_Dbg (p_dec, "trying hard-coded character encoding: %s", + psz_charset ?: "error"); + } + + if (psz_charset == NULL) + { + free (p_sys); + return VLC_ENOMEM; + } + + if (strcasecmp (psz_charset, "UTF-8") && strcasecmp (psz_charset, "utf8")) + { + p_sys->iconv_handle = vlc_iconv_open ("UTF-8", psz_charset); + if (p_sys->iconv_handle == (vlc_iconv_t)(-1)) + msg_Err (p_dec, "cannot convert from %s: %s", psz_charset, + strerror (errno)); + } + free (psz_charset); + var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT ); var_Get( p_dec, "subsdec-align", &val ); p_sys->i_align = val.i_int; @@ -245,6 +276,11 @@ static int OpenDecoder( vlc_object_t *p_this ) if( p_dec->fmt_in.i_extra > 0 ) ParseSSAHeader( p_dec ); } + else if( p_dec->fmt_in.i_codec == VLC_FOURCC('u','s','f',' ') && var_CreateGetBool( p_dec, "subsdec-formatted" ) ) + { + if( p_dec->fmt_in.i_extra > 0 ) + ParseUSFHeader( p_dec ); + } return VLC_SUCCESS; } @@ -331,7 +367,14 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) return NULL; if( p_sys->iconv_handle == (vlc_iconv_t)-1 ) - EnsureUTF8( psz_subtitle ); + { + if (EnsureUTF8( psz_subtitle ) == NULL) + { + msg_Err( p_dec, _("failed to convert subtitle encoding.\n" + "Try manually setting a character-encoding " + "before you open the file.") ); + } + } else { @@ -401,7 +444,8 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) } /* Decode and format the subpicture unit */ - if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') ) + if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') && + p_dec->fmt_in.i_codec != VLC_FOURCC('u','s','f',' ') ) { /* Normal text subs, easy markup */ p_spu->i_flags = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align; @@ -409,9 +453,10 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) p_spu->i_y = 10; /* Remove formatting from string */ - StripTags( psz_subtitle ); - p_spu->p_region->psz_text = psz_subtitle; + p_spu->p_region->psz_text = StripTags( psz_subtitle ); + p_spu->p_region->psz_html = CreateHtmlSubtitle( psz_subtitle ); + p_spu->i_start = p_block->i_pts; p_spu->i_stop = p_block->i_pts + p_block->i_length; p_spu->b_ephemer = (p_block->i_length == 0); @@ -419,19 +464,128 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) } else { - /* Decode SSA strings */ - ParseSSAString( p_dec, psz_subtitle, p_spu ); + /* Decode SSA/USF strings */ + if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') ) + ParseSSAString( p_dec, psz_subtitle, p_spu ); + else + ParseUSFString( p_dec, psz_subtitle, p_spu ); + p_spu->i_start = p_block->i_pts; p_spu->i_stop = p_block->i_pts + p_block->i_length; p_spu->b_ephemer = (p_block->i_length == 0); p_spu->b_absolute = VLC_FALSE; p_spu->i_original_picture_width = p_sys->i_original_width; p_spu->i_original_picture_height = p_sys->i_original_height; - if( psz_subtitle ) free( psz_subtitle ); } + if( psz_subtitle ) free( psz_subtitle ); + return p_spu; } +static void ParseUSFString( decoder_t *p_dec, char *psz_subtitle, subpicture_t *p_spu_in ) +{ + decoder_sys_t *p_sys = p_dec->p_sys; + subpicture_t *p_spu = p_spu_in; + char *psz_text; + char *psz_text_start; + ssa_style_t *p_style = NULL; + int i; + + /* Create a text only copy of the subtitle (for legacy implementations) and copy + * the rich html version across as is - for parsing by a rendering engine capable + * of understanding it. + */ + p_spu->p_region->psz_text = NULL; + p_spu->p_region->psz_html = strdup( psz_subtitle ); + + for( i = 0; i < p_sys->i_ssa_styles; i++ ) + { + if( !strcasecmp( p_sys->pp_ssa_styles[i]->psz_stylename, "Default" ) ) + p_style = p_sys->pp_ssa_styles[i]; + } + + /* The StripTags() function doesn't do everything we need (eg.
tag ) + * so do it here ourselves. + */ + psz_text_start = malloc( strlen( psz_subtitle )); + + psz_text = psz_text_start; + while( *psz_subtitle ) + { + if( *psz_subtitle == '<' ) + { + if( !strncasecmp( psz_subtitle, "
", 5 )) + *psz_text++ = '\n'; + else if( !strncasecmp( psz_subtitle, "' ) )) + { + int i_len; + + psz_style += strspn( psz_style, "\"" ) + 1; + i_len = strcspn( psz_style, "\"" ); + + psz_style[ i_len ] = '\0'; + + for( i = 0; i < p_sys->i_ssa_styles; i++ ) + { + if( !strcmp( p_sys->pp_ssa_styles[i]->psz_stylename, psz_style ) ) + p_style = p_sys->pp_ssa_styles[i]; + } + + psz_style[ i_len ] = '\"'; + } + } + + psz_subtitle += strcspn( psz_subtitle, ">" ); + } + else if( *psz_subtitle == '&' ) + { + if( !strncasecmp( psz_subtitle, "<", 4 )) + *psz_text++ = '<'; + else if( !strncasecmp( psz_subtitle, ">", 4 )) + *psz_text++ = '>'; + else if( !strncasecmp( psz_subtitle, "&", 5 )) + *psz_text++ = '&'; + + psz_subtitle += strcspn( psz_subtitle, ";" ); + } + else if( ( *psz_subtitle == '\t' ) || + ( *psz_subtitle == '\r' ) || + ( *psz_subtitle == '\n' ) || + ( *psz_subtitle == ' ' ) ) + { + if( ( psz_text_start < psz_text ) && + ( *(psz_text-1) != ' ' ) ) + { + *psz_text++ = ' '; + } + } + else + *psz_text++ = *psz_subtitle; + + psz_subtitle++; + } + *psz_text = '\0'; + p_spu->p_region->psz_text = strdup( psz_text_start ); + free( psz_text_start ); + + if( p_style == NULL ) + { + p_spu->i_flags = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align; + p_spu->i_x = p_sys->i_align ? 20 : 0; + p_spu->i_y = 10; + } + else + { + msg_Dbg( p_dec, "style is: %s", p_style->psz_stylename); + p_spu->p_region->p_style = &p_style->font_style; + p_spu->i_flags = p_style->i_align; + } +} + static void ParseSSAString( decoder_t *p_dec, char *psz_subtitle, subpicture_t *p_spu_in ) { /* We expect MKV formatted SSA: @@ -450,6 +604,8 @@ static void ParseSSAString( decoder_t *p_dec, char *psz_subtitle, subpicture_t * psz_buffer_sub = psz_subtitle; + p_spu->p_region->psz_html = NULL; + i_comma = 0; while( i_comma < 8 && *psz_buffer_sub != '\0' ) { @@ -584,6 +740,275 @@ static void ParseColor( decoder_t *p_dec, char *psz_color, int *pi_color, int *p *pi_alpha = ( i_color & 0xFF000000 ) >> 24; } +/***************************************************************************** + * ParseUSFHeader: Retrieve global formatting information etc + *****************************************************************************/ +static void ParseUSFHeader( decoder_t *p_dec ) +{ + decoder_sys_t *p_sys = p_dec->p_sys; + stream_t *p_sub = NULL; + xml_t *p_xml = NULL; + xml_reader_t *p_xml_reader = NULL; + + p_sub = stream_MemoryNew( VLC_OBJECT(p_dec), + p_dec->fmt_in.p_extra, + p_dec->fmt_in.i_extra, + VLC_TRUE ); + if( p_sub ) + { + p_xml = xml_Create( p_dec ); + if( p_xml ) + { + p_xml_reader = xml_ReaderCreate( p_xml, p_sub ); + if( p_xml_reader ) + { + /* Look for Root Node */ + if( xml_ReaderRead( p_xml_reader ) == 1 ) + { + char *psz_node = xml_ReaderName( p_xml_reader ); + + if( !strcasecmp( "usfsubtitles", psz_node ) ) + ParseUSFHeaderTags( p_sys, p_xml_reader ); + + free( psz_node ); + } + + xml_ReaderDelete( p_xml, p_xml_reader ); + } + xml_Delete( p_xml ); + } + stream_Delete( p_sub ); + } +} + +static void ParseUSFHeaderTags( decoder_sys_t *p_sys, xml_reader_t *p_xml_reader ) +{ + char *psz_node; + ssa_style_t *p_style = NULL; + int i_style_level = 0; + int i_metadata_level = 0; + + while ( xml_ReaderRead( p_xml_reader ) == 1 ) + { + switch ( xml_ReaderNodeType( p_xml_reader ) ) + { + case XML_READER_TEXT: + case XML_READER_NONE: + break; + case XML_READER_ENDELEM: + psz_node = xml_ReaderName( p_xml_reader ); + + if( psz_node ) + { + switch (i_style_level) + { + case 0: + if( !strcasecmp( "metadata", psz_node ) && (i_metadata_level == 1) ) + { + i_metadata_level--; + } + break; + case 1: + if( !strcasecmp( "styles", psz_node ) ) + { + i_style_level--; + } + break; + case 2: + if( !strcasecmp( "style", psz_node ) ) + { + p_style->font_style.i_text_align = p_style->i_align; + + TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style ); + + p_style = NULL; + i_style_level--; + } + break; + } + + free( psz_node ); + } + break; + case XML_READER_STARTELEM: + psz_node = xml_ReaderName( p_xml_reader ); + + if( psz_node ) + { + if( !strcasecmp( "metadata", psz_node ) && (i_style_level == 0) ) + { + i_metadata_level++; + } + else if( !strcasecmp( "resolution", psz_node ) && (i_metadata_level == 1) ) + { + while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS ) + { + char *psz_name = xml_ReaderName ( p_xml_reader ); + char *psz_value = xml_ReaderValue ( p_xml_reader ); + + if( psz_name && psz_value ) + { + if( !strcasecmp( "x", psz_name ) ) + p_sys->i_original_width = atoi( psz_value ); + else if( !strcasecmp( "y", psz_name ) ) + p_sys->i_original_height = atoi( psz_value ); + } + if( psz_name ) free( psz_name ); + if( psz_value ) free( psz_value ); + } + } + else if( !strcasecmp( "styles", psz_node ) && (i_style_level == 0) ) + { + i_style_level++; + } + else if( !strcasecmp( "style", psz_node ) && (i_style_level == 1) ) + { + i_style_level++; + + p_style = calloc( 1, sizeof(ssa_style_t) ); + + while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS ) + { + char *psz_name = xml_ReaderName ( p_xml_reader ); + char *psz_value = xml_ReaderValue ( p_xml_reader ); + + if( psz_name && psz_value ) + { + if( !strcasecmp( "name", psz_name ) ) + p_style->psz_stylename = strdup( psz_value); + } + if( psz_name ) free( psz_name ); + if( psz_value ) free( psz_value ); + } + } + else if( !strcasecmp( "fontstyle", psz_node ) && (i_style_level == 2) ) + { + while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS ) + { + char *psz_name = xml_ReaderName ( p_xml_reader ); + char *psz_value = xml_ReaderValue ( p_xml_reader ); + + if( psz_name && psz_value ) + { + if( !strcasecmp( "face", psz_name ) ) + p_style->font_style.psz_fontname = strdup( psz_value); + else if( !strcasecmp( "size", psz_name ) ) + p_style->font_style.i_font_size = atoi( psz_value); + else if( !strcasecmp( "italic", psz_name ) ) + { + if( !strcasecmp( "yes", psz_value )) + p_style->font_style.i_style_flags |= STYLE_ITALIC; + } + else if( !strcasecmp( "weight", psz_name ) ) + { + if( !strcasecmp( "bold", psz_value )) + p_style->font_style.i_style_flags |= STYLE_BOLD; + } + else if( !strcasecmp( "underline", psz_name ) ) + { + if( !strcasecmp( "yes", psz_value )) + p_style->font_style.i_style_flags |= STYLE_UNDERLINE; + } + else if( !strcasecmp( "color", psz_name ) ) + { + if( *psz_value == '#' ) + { + unsigned long col = strtol(psz_value+1, NULL, 16); + p_style->font_style.i_font_color = (col & 0x00ffffff); + /* From DTD: */ + p_style->font_style.i_font_alpha = ((col >> 24) & 0xff) * 255 / 100; + } + } + else if( !strcasecmp( "outline-color", psz_name ) ) + { + if( *psz_value == '#' ) + { + unsigned long col = strtol(psz_value+1, NULL, 16); + p_style->font_style.i_outline_color = (col & 0x00ffffff); + /* From DTD: */ + p_style->font_style.i_outline_alpha = ((col >> 24) & 0xff) * 255 / 100; + } + } + else if( !strcasecmp( "shadow-color", psz_name ) ) + { + if( *psz_value == '#' ) + { + unsigned long col = strtol(psz_value+1, NULL, 16); + p_style->font_style.i_shadow_color = (col & 0x00ffffff); + /* From DTD: */ + p_style->font_style.i_shadow_alpha = ((col >> 24) & 0xff) * 255 / 100; + } + } + } + if( psz_name ) free( psz_name ); + if( psz_value ) free( psz_value ); + } + } + else if( !strcasecmp( "position", psz_node ) && (i_style_level == 2) ) + { + while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS ) + { + char *psz_name = xml_ReaderName ( p_xml_reader ); + char *psz_value = xml_ReaderValue ( p_xml_reader ); + + if( psz_name && psz_value ) + { + if( !strcasecmp( "alignment", psz_name ) ) + { + if( !strcasecmp( "TopLeft", psz_value ) ) + { + p_style->i_align |= SUBPICTURE_ALIGN_TOP; + p_style->i_align |= SUBPICTURE_ALIGN_LEFT; + } + else if( !strcasecmp( "TopCenter", psz_value ) ) + { + p_style->i_align |= SUBPICTURE_ALIGN_TOP; + } + else if( !strcasecmp( "TopRight", psz_value ) ) + { + p_style->i_align |= SUBPICTURE_ALIGN_TOP; + p_style->i_align |= SUBPICTURE_ALIGN_RIGHT; + } + else if( !strcasecmp( "MiddleLeft", psz_value ) ) + { + p_style->i_align |= SUBPICTURE_ALIGN_LEFT; + } + else if( !strcasecmp( "MiddleCenter", psz_value ) ) + { + p_style->i_align = 0; + } + else if( !strcasecmp( "MiddleRight", psz_value ) ) + { + p_style->i_align |= SUBPICTURE_ALIGN_RIGHT; + } + else if( !strcasecmp( "BottomLeft", psz_value ) ) + { + p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM; + p_style->i_align |= SUBPICTURE_ALIGN_LEFT; + } + else if( !strcasecmp( "BottomCenter", psz_value ) ) + { + p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM; + } + else if( !strcasecmp( "BottomRight", psz_value ) ) + { + p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM; + p_style->i_align |= SUBPICTURE_ALIGN_RIGHT; + } + } + } + if( psz_name ) free( psz_name ); + if( psz_value ) free( psz_value ); + } + } + + free( psz_node ); + } + break; + } + } + if( p_style ) free( p_style ); +} /***************************************************************************** * ParseSSAHeader: Retrieve global formatting information etc *****************************************************************************/ @@ -743,45 +1168,293 @@ eof: return; } -static void StripTags( char *psz_text ) +/* Function now handles tags which has attribute values, and tries + * to deal with &' commands too. It no longer modifies the string + * in place, so that the original text can be reused + */ +static char *StripTags( char *psz_subtitle ) { - int i_left_moves = 0; - vlc_bool_t b_inside_tag = VLC_FALSE; - int i = 0; - int i_tag_start = -1; - while( psz_text[ i ] ) + char *psz_text_start; + + psz_text_start = malloc( strlen( psz_subtitle ) + 1 ); + + if( psz_text_start != NULL ) { - if( !b_inside_tag ) + char *psz_text = psz_text_start; + + while( *psz_subtitle ) { - if( psz_text[ i ] == '<' ) + if( *psz_subtitle == '<' ) + { + psz_subtitle += strcspn( psz_subtitle, ">" ); + } + else if( *psz_subtitle == '&' ) + { + if( !strncasecmp( psz_subtitle, "<", 4 )) + { + *psz_text++ = '<'; + psz_subtitle += strcspn( psz_subtitle, ";" ); + } + else if( !strncasecmp( psz_subtitle, ">", 4 )) + { + *psz_text++ = '>'; + psz_subtitle += strcspn( psz_subtitle, ";" ); + } + else if( !strncasecmp( psz_subtitle, "&", 5 )) + { + *psz_text++ = '&'; + psz_subtitle += strcspn( psz_subtitle, ";" ); + } + else + { + /* Assume it is just a normal ampersand */ + *psz_text++ = '&'; + } + } + else { - b_inside_tag = VLC_TRUE; - i_tag_start = i; + *psz_text++ = *psz_subtitle; } - psz_text[ i - i_left_moves ] = psz_text[ i ]; + + psz_subtitle++; } - else + *psz_text = '\0'; + psz_text_start = realloc( psz_text_start, strlen( psz_text_start ) + 1 ); + } + return psz_text_start; +} + +/* Try to respect any style tags present in the subtitle string. The main + * problem here is a lack of adequate specs for the subtitle formats. + * SSA/ASS and USF are both detail spec'ed -- but they are handled elsewhere. + * SAMI has a detailed spec, but extensive rework is needed in the demux + * code to prevent all this style information being excised, as it presently + * does. + * That leaves the others - none of which were (I guess) originally intended + * to be carrying style information. Over time people have used them that way. + * In the absence of specifications from which to work, the tags supported + * have been restricted to the simple set permitted by the USF DTD, ie. : + * Basic:
, , , + * Extended: + * Attributes: face + * family + * size + * color + * outline-color + * shadow-color + * outline-level + * shadow-level + * back-color + * alpha + * There is also the further restriction that the subtitle be well-formed + * as an XML entity, ie. the HTML sentence: + * Bold and Italics + * doesn't qualify because the tags aren't nested one inside the other. + * tags are automatically added to the output to ensure + * well-formedness. + * If the text doesn't qualify for any reason, a NULL string is + * returned, and the rendering engine will fall back to the + * plain text version of the subtitle. + */ +static char *CreateHtmlSubtitle( char *psz_subtitle ) +{ + char psz_tagStack[ 100 ]; + size_t i_buf_size = strlen( psz_subtitle ) + 100; + char *psz_html_start = malloc( i_buf_size ); + + psz_tagStack[ 0 ] = '\0'; + + if( psz_html_start != NULL ) + { + char *psz_html = psz_html_start; + + strcpy( psz_html, "" ); + psz_html += 6; + + while( *psz_subtitle ) { - if( ( psz_text[ i ] == ' ' ) || - ( psz_text[ i ] == '\t' ) || - ( psz_text[ i ] == '\n' ) || - ( psz_text[ i ] == '\r' ) ) + if( *psz_subtitle == '\n' ) { - b_inside_tag = VLC_FALSE; - i_tag_start = -1; + strcpy( psz_html, "
" ); + psz_html += 5; + psz_subtitle++; } - else if( psz_text[ i ] == '>' ) + else if( *psz_subtitle == '<' ) { - i_left_moves += i - i_tag_start + 1; - i_tag_start = -1; - b_inside_tag = VLC_FALSE; + if( !strncasecmp( psz_subtitle, "
", 5 )) + { + strcpy( psz_html, "
" ); + psz_html += 5; + psz_subtitle += 5; + } + else if( !strncasecmp( psz_subtitle, "", 3 ) ) + { + strcpy( psz_html, "" ); + strcat( psz_tagStack, "b" ); + psz_html += 3; + psz_subtitle += 3; + } + else if( !strncasecmp( psz_subtitle, "", 3 ) ) + { + strcpy( psz_html, "" ); + strcat( psz_tagStack, "i" ); + psz_html += 3; + psz_subtitle += 3; + } + else if( !strncasecmp( psz_subtitle, "", 3 ) ) + { + strcpy( psz_html, "" ); + strcat( psz_tagStack, "u" ); + psz_html += 3; + psz_subtitle += 3; + } + else if( !strncasecmp( psz_subtitle, "= 0 ) + { + psz_lastTag = psz_tagStack + i_len; + i_len = 0; + + switch( *psz_lastTag ) + { + case 'b': + b_match = !strncasecmp( psz_subtitle, "", 4 ); + i_len = 4; + break; + case 'i': + b_match = !strncasecmp( psz_subtitle, "", 4 ); + i_len = 4; + break; + case 'u': + b_match = !strncasecmp( psz_subtitle, "", 4 ); + i_len = 4; + break; + case 'f': + b_match = !strncasecmp( psz_subtitle, "", 7 ); + i_len = 7; + break; + } + } + if( ! b_match ) + { + /* Not well formed -- kill everything */ + free( psz_html_start ); + psz_html_start = NULL; + break; + } + *psz_lastTag = '\0'; + strncpy( psz_html, psz_subtitle, i_len ); + psz_html += i_len; + psz_subtitle += i_len; + } + else + { + psz_subtitle += strcspn( psz_subtitle, ">" ); + } + } + else if( *psz_subtitle == '&' ) + { + if( !strncasecmp( psz_subtitle, "<", 4 )) + { + strcpy( psz_html, "<" ); + psz_html += 4; + psz_subtitle += 4; + } + else if( !strncasecmp( psz_subtitle, ">", 4 )) + { + strcpy( psz_html, ">" ); + psz_html += 4; + psz_subtitle += 4; + } + else if( !strncasecmp( psz_subtitle, "&", 5 )) + { + strcpy( psz_html, "&" ); + psz_html += 5; + psz_subtitle += 5; + } + else + { + strcpy( psz_html, "&" ); + psz_html += 5; + psz_subtitle++; + } } else { - psz_text[ i - i_left_moves ] = psz_text[ i ]; + *psz_html++ = *psz_subtitle++; + } + + if( ( size_t )( psz_html - psz_html_start ) > i_buf_size - 10 ) + { + int i_len = psz_html - psz_html_start; + + i_buf_size += 100; + psz_html_start = realloc( psz_html_start, i_buf_size ); + psz_html = psz_html_start + i_len; + *psz_html = '\0'; } } - i++; + strcpy( psz_html, "
" ); + psz_html += 7; + + if( psz_tagStack[ 0 ] != '\0' ) + { + /* Not well formed -- kill everything */ + free( psz_html_start ); + psz_html_start = NULL; + } + else if( psz_html_start ) + { + /* Shrink the memory requirements */ + psz_html_start = realloc( psz_html_start, psz_html - psz_html_start + 1 ); + } } - psz_text[ i - i_left_moves ] = '\0'; + return psz_html_start; }