X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=modules%2Fcodec%2Fsubsdec.c;h=bdc862be5288bbe51a11759f31e75eb63ea879a6;hb=4d4eb33495d371286c1c2b76f2255cd38217fcf6;hp=a662f03dc5a965f2b7276daa098b062f31ece7a8;hpb=4190fca508abe5194afb7153f1e16b15e8a936d7;p=vlc diff --git a/modules/codec/subsdec.c b/modules/codec/subsdec.c index a662f03dc5..bdc862be52 100644 --- a/modules/codec/subsdec.c +++ b/modules/codec/subsdec.c @@ -7,6 +7,7 @@ * Authors: Gildas Bazin * Samuel Hocevar * Derk-Jan Hartman + * Bernie Purcell * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -27,12 +28,35 @@ * Preamble *****************************************************************************/ #include -#include -#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#define NO_BREAKING_SPACE " " + +enum +{ + ATTRIBUTE_ALIGNMENT = (1 << 0), + ATTRIBUTE_X = (1 << 1), + ATTRIBUTE_X_PERCENT = (1 << 2), + ATTRIBUTE_Y = (1 << 3), + ATTRIBUTE_Y_PERCENT = (1 << 4), +}; -#include "vlc_osd.h" -#include "vlc_filter.h" -#include "charset.h" +typedef struct +{ + char *psz_filename; + picture_t *p_pic; +} image_attach_t; typedef struct { @@ -41,6 +65,8 @@ typedef struct int i_align; int i_margin_h; int i_margin_v; + int i_margin_percent_h; + int i_margin_percent_v; } ssa_style_t; /***************************************************************************** @@ -57,6 +83,9 @@ struct decoder_sys_t ssa_style_t **pp_ssa_styles; int i_ssa_styles; + + image_attach_t **pp_images; + int i_images; }; /***************************************************************************** @@ -68,9 +97,16 @@ static void CloseDecoder ( vlc_object_t * ); static subpicture_t *DecodeBlock ( decoder_t *, block_t ** ); static subpicture_t *ParseText ( decoder_t *, block_t * ); static void ParseSSAHeader ( decoder_t * ); +static void ParseUSFHeader ( decoder_t * ); +static void ParseUSFHeaderTags( decoder_t *, xml_reader_t * ); static void ParseSSAString ( decoder_t *, char *, subpicture_t * ); +static subpicture_region_t *ParseUSFString ( decoder_t *, char *, subpicture_t * ); static void ParseColor ( decoder_t *, char *, int *, int * ); -static void StripTags ( char * ); +static char *StripTags ( char * ); +static char *CreateHtmlSubtitle ( char * ); +static char *CreatePlainText( char * ); +static int ParseImageAttachments( decoder_t *p_dec ); +static subpicture_region_t *LoadEmbeddedImage( decoder_t *p_dec, subpicture_t *p_spu, const char *psz_filename, int i_transparent_color ); #define DEFAULT_NAME "Default" #define MAX_LINE 8192 @@ -78,7 +114,7 @@ static void StripTags ( char * ); /***************************************************************************** * Module descriptor. *****************************************************************************/ -static char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "", +static const char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "", "ISO-8859-1", "CP1252", "MacRoman", "MacIceland","ISO-8859-15", "", "ISO-8859-2", "CP1250", "MacCentralEurope", "MacCroatian", "MacRomania", "", "ISO-8859-5", "CP1251", "MacCyrillic", "MacUkraine", "KOI8-R", "KOI8-U", "KOI8-RU", "", @@ -99,9 +135,34 @@ static char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "", "HZ", "GBK", "GB18030", "JOHAB", "ARMSCII-8", "Georgian-Academy", "Georgian-PS", "TIS-620", "MuleLao-1", "VISCII", "TCVN", "HPROMAN8", "NEXTSTEP" }; +/* +SSA supports charset selection. +The following known charsets are used: + +0 = Ansi - Western European +1 = default +2 = symbol +3 = invalid +77 = Mac +128 = Japanese (Shift JIS) +129 = Hangul +130 = Johab +134 = GB2312 Simplified Chinese +136 = Big5 Traditional Chinese +161 = Greek +162 = Turkish +163 = Vietnamese +177 = Hebrew +178 = Arabic +186 = Baltic +204 = Russian (Cyrillic) +222 = Thai +238 = Eastern European +254 = PC 437 +*/ static int pi_justification[] = { 0, 1, 2 }; -static char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")}; +static const char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")}; #define ENCODING_TEXT N_("Subtitles text encoding") #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles") @@ -148,6 +209,7 @@ static int OpenDecoder( vlc_object_t *p_this ) vlc_value_t val; if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','u','b','t') && + p_dec->fmt_in.i_codec != VLC_FOURCC('u','s','f',' ') && p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') ) { return VLC_EGENERIC; @@ -157,7 +219,7 @@ static int OpenDecoder( vlc_object_t *p_this ) /* Allocate the memory needed to store the decoder's structure */ if( ( p_dec->p_sys = p_sys = - (decoder_sys_t *)malloc(sizeof(decoder_sys_t)) ) == NULL ) + (decoder_sys_t *)calloc(1, sizeof(decoder_sys_t)) ) == NULL ) { msg_Err( p_dec, "out of memory" ); return VLC_ENOMEM; @@ -170,56 +232,84 @@ static int OpenDecoder( vlc_object_t *p_this ) p_sys->b_ass = VLC_FALSE; p_sys->i_original_height = -1; p_sys->i_original_width = -1; - p_sys->pp_ssa_styles = NULL; - p_sys->i_ssa_styles = 0; + TAB_INIT( p_sys->i_ssa_styles, p_sys->pp_ssa_styles ); + TAB_INIT( p_sys->i_images, p_sys->pp_images ); + char *psz_charset = NULL; + /* First try demux-specified encoding */ if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding ) { - msg_Dbg( p_dec, "using character encoding: %s", - p_dec->fmt_in.subs.psz_encoding ); - if( strcmp( p_dec->fmt_in.subs.psz_encoding, "UTF-8" ) ) - p_sys->iconv_handle = vlc_iconv_open( "UTF-8", p_dec->fmt_in.subs.psz_encoding ); + psz_charset = strdup (p_dec->fmt_in.subs.psz_encoding); + msg_Dbg (p_dec, "trying demuxer-specified character encoding: %s", + p_dec->fmt_in.subs.psz_encoding ?: "not specified"); } - else + + /* Second, try configured encoding */ + if (psz_charset == NULL) { - var_Create( p_dec, "subsdec-encoding", - VLC_VAR_STRING | VLC_VAR_DOINHERIT ); - var_Get( p_dec, "subsdec-encoding", &val ); - if( !strcmp( val.psz_string, DEFAULT_NAME ) ) + psz_charset = var_CreateGetNonEmptyString (p_dec, "subsdec-encoding"); + if ((psz_charset != NULL) && !strcasecmp (psz_charset, DEFAULT_NAME)) { - const char *psz_charset = GetFallbackEncoding(); + free (psz_charset); + psz_charset = NULL; + } - p_sys->b_autodetect_utf8 = var_CreateGetBool( p_dec, - "subsdec-autodetect-utf8" ); + msg_Dbg (p_dec, "trying configured character encoding: %s", + psz_charset ?: "not specified"); + } - p_sys->iconv_handle = vlc_iconv_open( "UTF-8", psz_charset ); - msg_Dbg( p_dec, "using default character encoding: %s", psz_charset ); - } - else if( !strcmp( val.psz_string, "UTF-8" ) ) - { - msg_Dbg( p_dec, "using character encoding: UTF-8" ); - } - else if( val.psz_string ) + /* Third, try "local" encoding with optional UTF-8 autodetection */ + if (psz_charset == NULL) + { + psz_charset = strdup (GetFallbackEncoding ()); + msg_Dbg (p_dec, "trying default character encoding: %s", + psz_charset ?: "not specified"); + + if (var_CreateGetBool (p_dec, "subsdec-autodetect-utf8")) { - msg_Dbg( p_dec, "using character encoding: %s", val.psz_string ); - p_sys->iconv_handle = vlc_iconv_open( "UTF-8", val.psz_string ); - if( p_sys->iconv_handle == (vlc_iconv_t)-1 ) - { - msg_Warn( p_dec, "unable to do requested conversion" ); - } + msg_Dbg (p_dec, "using automatic UTF-8 detection"); + p_sys->b_autodetect_utf8 = VLC_TRUE; } - if( val.psz_string ) free( val.psz_string ); } + if (psz_charset == NULL) + { + psz_charset = strdup ("UTF-8"); + msg_Dbg (p_dec, "trying hard-coded character encoding: %s", + psz_charset ?: "error"); + } + + if (psz_charset == NULL) + { + free (p_sys); + return VLC_ENOMEM; + } + + if (strcasecmp (psz_charset, "UTF-8") && strcasecmp (psz_charset, "utf8")) + { + p_sys->iconv_handle = vlc_iconv_open ("UTF-8", psz_charset); + if (p_sys->iconv_handle == (vlc_iconv_t)(-1)) + msg_Err (p_dec, "cannot convert from %s: %s", psz_charset, + strerror (errno)); + } + free (psz_charset); + var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT ); var_Get( p_dec, "subsdec-align", &val ); p_sys->i_align = val.i_int; + ParseImageAttachments( p_dec ); + if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') && var_CreateGetBool( p_dec, "subsdec-formatted" ) ) { if( p_dec->fmt_in.i_extra > 0 ) ParseSSAHeader( p_dec ); } + else if( p_dec->fmt_in.i_codec == VLC_FOURCC('u','s','f',' ') && var_CreateGetBool( p_dec, "subsdec-formatted" ) ) + { + if( p_dec->fmt_in.i_extra > 0 ) + ParseUSFHeader( p_dec ); + } return VLC_SUCCESS; } @@ -252,22 +342,41 @@ static void CloseDecoder( vlc_object_t *p_this ) decoder_sys_t *p_sys = p_dec->p_sys; if( p_sys->iconv_handle != (vlc_iconv_t)-1 ) - { vlc_iconv_close( p_sys->iconv_handle ); - } if( p_sys->pp_ssa_styles ) { int i; for( i = 0; i < p_sys->i_ssa_styles; i++ ) { - if( p_sys->pp_ssa_styles[i]->psz_stylename ) free( p_sys->pp_ssa_styles[i]->psz_stylename ); - p_sys->pp_ssa_styles[i]->psz_stylename = NULL; - if( p_sys->pp_ssa_styles[i]->font_style.psz_fontname ) free( p_sys->pp_ssa_styles[i]->font_style.psz_fontname ); - p_sys->pp_ssa_styles[i]->font_style.psz_fontname = NULL; - if( p_sys->pp_ssa_styles[i] ) free( p_sys->pp_ssa_styles[i] ); p_sys->pp_ssa_styles[i] = NULL; + if( !p_sys->pp_ssa_styles[i] ) + continue; + + if( p_sys->pp_ssa_styles[i]->psz_stylename ) + free( p_sys->pp_ssa_styles[i]->psz_stylename ); + if( p_sys->pp_ssa_styles[i]->font_style.psz_fontname ) + free( p_sys->pp_ssa_styles[i]->font_style.psz_fontname ); + if( p_sys->pp_ssa_styles[i] ) + free( p_sys->pp_ssa_styles[i] ); + } + TAB_CLEAN( p_sys->i_ssa_styles, p_sys->pp_ssa_styles ); + } + if( p_sys->pp_images ) + { + int i; + for( i = 0; i < p_sys->i_images; i++ ) + { + if( !p_sys->pp_images[i] ) + continue; + + if( p_sys->pp_images[i]->p_pic ) + p_sys->pp_images[i]->p_pic->pf_release( p_sys->pp_images[i]->p_pic ); + if( p_sys->pp_images[i]->psz_filename ) + free( p_sys->pp_images[i]->psz_filename ); + + free( p_sys->pp_images[i] ); } - free( p_sys->pp_ssa_styles ); p_sys->pp_ssa_styles = NULL; + TAB_CLEAN( p_sys->i_images, p_sys->pp_images ); } free( p_sys ); @@ -291,9 +400,11 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) } /* Check validity of packet data */ - if( p_block->i_buffer <= 1 || p_block->p_buffer[0] == '\0' ) + /* An "empty" line containing only \0 can be used to force + and ephemer picture from the screen */ + if( p_block->i_buffer < 1 ) { - msg_Warn( p_dec, "empty subtitle" ); + msg_Warn( p_dec, "no subtitle data" ); return NULL; } @@ -304,7 +415,14 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) return NULL; if( p_sys->iconv_handle == (vlc_iconv_t)-1 ) - EnsureUTF8( psz_subtitle ); + { + if (EnsureUTF8( psz_subtitle ) == NULL) + { + msg_Err( p_dec, _("failed to convert subtitle encoding.\n" + "Try manually setting a character-encoding " + "before you open the file.") ); + } + } else { @@ -356,6 +474,8 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) return NULL; } + p_spu->b_pausable = VLC_TRUE; + /* Create a new subpicture region */ memset( &fmt, 0, sizeof(video_format_t) ); fmt.i_chroma = VLC_FOURCC('T','E','X','T'); @@ -372,17 +492,19 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) } /* Decode and format the subpicture unit */ - if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') ) + if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') && + p_dec->fmt_in.i_codec != VLC_FOURCC('u','s','f',' ') ) { /* Normal text subs, easy markup */ - p_spu->i_flags = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align; + p_spu->p_region->i_align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align; p_spu->i_x = p_sys->i_align ? 20 : 0; p_spu->i_y = 10; /* Remove formatting from string */ - StripTags( psz_subtitle ); - p_spu->p_region->psz_text = psz_subtitle; + p_spu->p_region->psz_text = StripTags( psz_subtitle ); + p_spu->p_region->psz_html = CreateHtmlSubtitle( psz_subtitle ); + p_spu->i_start = p_block->i_pts; p_spu->i_stop = p_block->i_pts + p_block->i_length; p_spu->b_ephemer = (p_block->i_length == 0); @@ -390,19 +512,380 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) } else { - /* Decode SSA strings */ - ParseSSAString( p_dec, psz_subtitle, p_spu ); + /* Decode SSA/USF strings */ + if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') ) + ParseSSAString( p_dec, psz_subtitle, p_spu ); + else + { + p_spu->pf_destroy_region( VLC_OBJECT(p_dec), p_spu->p_region ); + p_spu->p_region = ParseUSFString( p_dec, psz_subtitle, p_spu ); + } + p_spu->i_start = p_block->i_pts; p_spu->i_stop = p_block->i_pts + p_block->i_length; p_spu->b_ephemer = (p_block->i_length == 0); p_spu->b_absolute = VLC_FALSE; p_spu->i_original_picture_width = p_sys->i_original_width; p_spu->i_original_picture_height = p_sys->i_original_height; - if( psz_subtitle ) free( psz_subtitle ); } + if( psz_subtitle ) free( psz_subtitle ); + return p_spu; } +static char *GrabAttributeValue( const char *psz_attribute, + const char *psz_tag_start ) +{ + if( psz_attribute && psz_tag_start ) + { + char *psz_tag_end = strchr( psz_tag_start, '>' ); + char *psz_found = strcasestr( psz_tag_start, psz_attribute ); + + if( psz_found ) + { + psz_found += strlen( psz_attribute ); + + if(( *(psz_found++) == '=' ) && + ( *(psz_found++) == '\"' )) + { + if( psz_found < psz_tag_end ) + { + int i_len = strcspn( psz_found, "\"" ); + return strndup( psz_found, i_len ); + } + } + } + } + return NULL; +} + +static ssa_style_t *ParseStyle( decoder_sys_t *p_sys, char *psz_subtitle ) +{ + ssa_style_t *p_style = NULL; + char *psz_style = GrabAttributeValue( "style", psz_subtitle ); + + if( psz_style ) + { + int i; + + for( i = 0; i < p_sys->i_ssa_styles; i++ ) + { + if( !strcmp( p_sys->pp_ssa_styles[i]->psz_stylename, psz_style ) ) + p_style = p_sys->pp_ssa_styles[i]; + } + free( psz_style ); + } + return p_style; +} + +static int ParsePositionAttributeList( char *psz_subtitle, int *i_align, int *i_x, int *i_y ) +{ + int i_mask = 0; + + char *psz_align = GrabAttributeValue( "alignment", psz_subtitle ); + char *psz_margin_x = GrabAttributeValue( "horizontal-margin", psz_subtitle ); + char *psz_margin_y = GrabAttributeValue( "vertical-margin", psz_subtitle ); + /* -- UNSUPPORTED + char *psz_relative = GrabAttributeValue( "relative-to", psz_subtitle ); + char *psz_rotate_x = GrabAttributeValue( "rotate-x", psz_subtitle ); + char *psz_rotate_y = GrabAttributeValue( "rotate-y", psz_subtitle ); + char *psz_rotate_z = GrabAttributeValue( "rotate-z", psz_subtitle ); + */ + + *i_align = SUBPICTURE_ALIGN_BOTTOM; + *i_x = 0; + *i_y = 0; + + if( psz_align ) + { + if( !strcasecmp( "TopLeft", psz_align ) ) + *i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_LEFT; + else if( !strcasecmp( "TopCenter", psz_align ) ) + *i_align = SUBPICTURE_ALIGN_TOP; + else if( !strcasecmp( "TopRight", psz_align ) ) + *i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_RIGHT; + else if( !strcasecmp( "MiddleLeft", psz_align ) ) + *i_align = SUBPICTURE_ALIGN_LEFT; + else if( !strcasecmp( "MiddleCenter", psz_align ) ) + *i_align = 0; + else if( !strcasecmp( "MiddleRight", psz_align ) ) + *i_align = SUBPICTURE_ALIGN_RIGHT; + else if( !strcasecmp( "BottomLeft", psz_align ) ) + *i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_LEFT; + else if( !strcasecmp( "BottomCenter", psz_align ) ) + *i_align = SUBPICTURE_ALIGN_BOTTOM; + else if( !strcasecmp( "BottomRight", psz_align ) ) + *i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_RIGHT; + + i_mask |= ATTRIBUTE_ALIGNMENT; + free( psz_align ); + } + if( psz_margin_x ) + { + *i_x = atoi( psz_margin_x ); + if( strchr( psz_margin_x, '%' ) ) + i_mask |= ATTRIBUTE_X_PERCENT; + else + i_mask |= ATTRIBUTE_X; + + free( psz_margin_x ); + } + if( psz_margin_y ) + { + *i_y = atoi( psz_margin_y ); + if( strchr( psz_margin_y, '%' ) ) + i_mask |= ATTRIBUTE_Y_PERCENT; + else + i_mask |= ATTRIBUTE_Y; + + free( psz_margin_y ); + } + return i_mask; +} + +static void SetupPositions( subpicture_region_t *p_region, char *psz_subtitle ) +{ + int i_mask = 0; + int i_align; + int i_x, i_y; + + i_mask = ParsePositionAttributeList( psz_subtitle, &i_align, &i_x, &i_y ); + + if( i_mask & ATTRIBUTE_ALIGNMENT ) + p_region->i_align = i_align; + + /* TODO: Setup % based offsets properly, without adversely affecting + * everything else in vlc. Will address with separate patch, to + * prevent this one being any more complicated. + */ + if( i_mask & ATTRIBUTE_X ) + p_region->i_x = i_x; + else if( i_mask & ATTRIBUTE_X_PERCENT ) + p_region->i_x = 0; + + if( i_mask & ATTRIBUTE_Y ) + p_region->i_y = i_y; + else if( i_mask & ATTRIBUTE_Y_PERCENT ) + p_region->i_y = 0; +} + +static subpicture_region_t *CreateTextRegion( decoder_t *p_dec, + subpicture_t *p_spu, + char *psz_subtitle, + int i_len, + int i_sys_align ) +{ + decoder_sys_t *p_sys = p_dec->p_sys; + subpicture_region_t *p_text_region; + video_format_t fmt; + + /* Create a new subpicture region */ + memset( &fmt, 0, sizeof(video_format_t) ); + fmt.i_chroma = VLC_FOURCC('T','E','X','T'); + fmt.i_aspect = 0; + fmt.i_width = fmt.i_height = 0; + fmt.i_x_offset = fmt.i_y_offset = 0; + p_text_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt ); + + if( p_text_region != NULL ) + { + ssa_style_t *p_style = NULL; + + p_text_region->psz_text = NULL; + p_text_region->psz_html = strndup( psz_subtitle, i_len ); + if( ! p_text_region->psz_html ) + { + msg_Err( p_dec, "out of memory" ); + p_spu->pf_destroy_region( VLC_OBJECT(p_dec), p_text_region ); + return NULL; + } + + p_style = ParseStyle( p_sys, p_text_region->psz_html ); + if( !p_style ) + { + int i; + + for( i = 0; i < p_sys->i_ssa_styles; i++ ) + { + if( !strcasecmp( p_sys->pp_ssa_styles[i]->psz_stylename, "Default" ) ) + p_style = p_sys->pp_ssa_styles[i]; + } + } + + if( p_style ) + { + msg_Dbg( p_dec, "style is: %s", p_style->psz_stylename ); + + p_text_region->p_style = &p_style->font_style; + p_text_region->i_align = p_style->i_align; + + /* TODO: Setup % based offsets properly, without adversely affecting + * everything else in vlc. Will address with separate patch, + * to prevent this one being any more complicated. + + * p_style->i_margin_percent_h; + * p_style->i_margin_percent_v; + */ + p_text_region->i_x = p_style->i_margin_h; + p_text_region->i_y = p_style->i_margin_v; + + } + else + { + p_text_region->i_align = SUBPICTURE_ALIGN_BOTTOM | i_sys_align; + p_text_region->i_x = i_sys_align ? 20 : 0; + p_text_region->i_y = 10; + } + /* Look for position arguments which may override the style-based + * defaults. + */ + SetupPositions( p_text_region, psz_subtitle ); + + p_text_region->p_next = NULL; + } + return p_text_region; +} + +static subpicture_region_t *ParseUSFString( decoder_t *p_dec, char *psz_subtitle, subpicture_t *p_spu_in ) +{ + decoder_sys_t *p_sys = p_dec->p_sys; + subpicture_t *p_spu = p_spu_in; + subpicture_region_t *p_region_first = NULL; + subpicture_region_t *p_region_upto = p_region_first; + + while( *psz_subtitle ) + { + if( *psz_subtitle == '<' ) + { + char *psz_end = NULL; + + if(( !strncasecmp( psz_subtitle, "", 6 ))) + { + psz_end = strcasestr( psz_subtitle, "" ); + + if( psz_end ) + { + subpicture_region_t *p_text_region; + + psz_end += strcspn( psz_end, ">" ) + 1; + + p_text_region = CreateTextRegion( p_dec, + p_spu, + psz_subtitle, + psz_end - psz_subtitle, + p_sys->i_align ); + + if( p_text_region ) + p_text_region->psz_text = CreatePlainText( p_text_region->psz_html ); + + if( !p_region_first ) + { + p_region_first = p_region_upto = p_text_region; + } + else if( p_text_region ) + { + p_region_upto->p_next = p_text_region; + p_region_upto = p_region_upto->p_next; + } + } + } + else if(( !strncasecmp( psz_subtitle, "", 9 ))) + { + psz_end = strcasestr( psz_subtitle, "" ); + + if( psz_end ) + { + subpicture_region_t *p_text_region; + + psz_end += strcspn( psz_end, ">" ) + 1; + + p_text_region = CreateTextRegion( p_dec, + p_spu, + psz_subtitle, + psz_end - psz_subtitle, + p_sys->i_align ); + + if( !p_region_first ) + { + p_region_first = p_region_upto = p_text_region; + } + else if( p_text_region ) + { + p_region_upto->p_next = p_text_region; + p_region_upto = p_region_upto->p_next; + } + } + } + else if(( !strncasecmp( psz_subtitle, "", 7 ))) + { + subpicture_region_t *p_image_region = NULL; + + char *psz_end = strcasestr( psz_subtitle, "" ); + char *psz_content = strchr( psz_subtitle, '>' ); + int i_transparent = -1; + + /* If a colorkey parameter is specified, then we have to map + * that index in the picture through as transparent (it is + * required by the USF spec but is also recommended that if the + * creator really wants a transparent colour that they use a + * type like PNG that properly supports it; this goes doubly + * for VLC because the pictures are stored internally in YUV + * and the resulting colour-matching may not produce the + * desired results.) + */ + char *psz_tmp = GrabAttributeValue( "colorkey", psz_subtitle ); + if( psz_tmp ) + { + if( *psz_tmp == '#' ) + i_transparent = strtol( psz_tmp + 1, NULL, 16 ) & 0x00ffffff; + free( psz_tmp ); + } + if( psz_content && ( psz_content < psz_end ) ) + { + char *psz_filename = strndup( &psz_content[1], psz_end - &psz_content[1] ); + if( psz_filename ) + { + p_image_region = LoadEmbeddedImage( p_dec, p_spu, psz_filename, i_transparent ); + free( psz_filename ); + } + } + + if( psz_end ) psz_end += strcspn( psz_end, ">" ) + 1; + + if( p_image_region ) + { + SetupPositions( p_image_region, psz_subtitle ); + + p_image_region->p_next = NULL; + p_image_region->psz_text = NULL; + p_image_region->psz_html = NULL; + + } + if( !p_region_first ) + { + p_region_first = p_region_upto = p_image_region; + } + else if( p_image_region ) + { + p_region_upto->p_next = p_image_region; + p_region_upto = p_region_upto->p_next; + } + } + if( psz_end ) + psz_subtitle = psz_end - 1; + + psz_subtitle += strcspn( psz_subtitle, ">" ); + } + + psz_subtitle++; + } + + return p_region_first; +} + static void ParseSSAString( decoder_t *p_dec, char *psz_subtitle, subpicture_t *p_spu_in ) { /* We expect MKV formatted SSA: @@ -421,17 +904,24 @@ static void ParseSSAString( decoder_t *p_dec, char *psz_subtitle, subpicture_t * psz_buffer_sub = psz_subtitle; + p_spu->p_region->psz_html = NULL; + i_comma = 0; while( i_comma < 8 && *psz_buffer_sub != '\0' ) { if( *psz_buffer_sub == ',' ) { i_comma++; - if( i_comma == 2 ) psz_style_start = &psz_buffer_sub[1]; - if( i_comma == 3 ) psz_style_end = &psz_buffer_sub[0]; - if( i_comma == 4 ) i_margin_l = (int)strtol( psz_buffer_sub+1, NULL, 10 ); - if( i_comma == 5 ) i_margin_r = (int)strtol( psz_buffer_sub+1, NULL, 10 ); - if( i_comma == 6 ) i_margin_v = (int)strtol( psz_buffer_sub+1, NULL, 10 ); + if( i_comma == 2 ) + psz_style_start = &psz_buffer_sub[1]; + else if( i_comma == 3 ) + psz_style_end = &psz_buffer_sub[0]; + else if( i_comma == 4 ) + i_margin_l = (int)strtol( &psz_buffer_sub[1], NULL, 10 ); + else if( i_comma == 5 ) + i_margin_r = (int)strtol( &psz_buffer_sub[1], NULL, 10 ); + else if( i_comma == 6 ) + i_margin_v = (int)strtol( &psz_buffer_sub[1], NULL, 10 ); } psz_buffer_sub++; } @@ -479,9 +969,7 @@ static void ParseSSAString( decoder_t *p_dec, char *psz_subtitle, subpicture_t * psz_new_subtitle[i_text] = '\0'; i_strlen = __MAX( psz_style_end - psz_style_start, 0); - psz_style = (char *)malloc( i_strlen + 1); - psz_style = memcpy( psz_style, psz_style_start, i_strlen ); - psz_style[i_strlen] = '\0'; + psz_style = strndup( psz_style_start, i_strlen ); for( i = 0; i < p_sys->i_ssa_styles; i++ ) { @@ -493,7 +981,7 @@ static void ParseSSAString( decoder_t *p_dec, char *psz_subtitle, subpicture_t * p_spu->p_region->psz_text = psz_new_subtitle; if( p_style == NULL ) { - p_spu->i_flags = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align; + p_spu->p_region->i_align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align; p_spu->i_x = p_sys->i_align ? 20 : 0; p_spu->i_y = 10; } @@ -501,7 +989,7 @@ static void ParseSSAString( decoder_t *p_dec, char *psz_subtitle, subpicture_t * { msg_Dbg( p_dec, "style is: %s", p_style->psz_stylename); p_spu->p_region->p_style = &p_style->font_style; - p_spu->i_flags = p_style->i_align; + p_spu->p_region->i_align = p_style->i_align; if( p_style->i_align & SUBPICTURE_ALIGN_LEFT ) { p_spu->i_x = (i_margin_l) ? i_margin_l : p_style->i_margin_h; @@ -555,6 +1043,451 @@ static void ParseColor( decoder_t *p_dec, char *psz_color, int *pi_color, int *p *pi_alpha = ( i_color & 0xFF000000 ) >> 24; } +static int ParseImageAttachments( decoder_t *p_dec ) +{ + decoder_sys_t *p_sys = p_dec->p_sys; + input_attachment_t **pp_attachments; + int i_attachments_cnt; + int k = 0; + + if( VLC_SUCCESS != decoder_GetInputAttachments( p_dec, &pp_attachments, &i_attachments_cnt )) + return VLC_EGENERIC; + + for( k = 0; k < i_attachments_cnt; k++ ) + { + input_attachment_t *p_attach = pp_attachments[k]; + + vlc_fourcc_t type = 0; + + if( ( !strcmp( p_attach->psz_mime, "image/bmp" ) ) || /* BMP */ + ( !strcmp( p_attach->psz_mime, "image/x-bmp" ) ) || + ( !strcmp( p_attach->psz_mime, "image/x-bitmap" ) ) || + ( !strcmp( p_attach->psz_mime, "image/x-ms-bmp" ) ) ) + { + type = VLC_FOURCC('b','m','p',' '); + } + else if( ( !strcmp( p_attach->psz_mime, "image/x-portable-anymap" ) ) || /* PNM */ + ( !strcmp( p_attach->psz_mime, "image/x-portable-bitmap" ) ) || /* PBM */ + ( !strcmp( p_attach->psz_mime, "image/x-portable-graymap" ) ) || /* PGM */ + ( !strcmp( p_attach->psz_mime, "image/x-portable-pixmap" ) ) ) /* PPM */ + { + type = VLC_FOURCC('p','n','m',' '); + } + else if ( !strcmp( p_attach->psz_mime, "image/gif" ) ) /* GIF */ + type = VLC_FOURCC('g','i','f',' '); + else if ( !strcmp( p_attach->psz_mime, "image/jpeg" ) ) /* JPG, JPEG */ + type = VLC_FOURCC('j','p','e','g'); + else if ( !strcmp( p_attach->psz_mime, "image/pcx" ) ) /* PCX */ + type = VLC_FOURCC('p','c','x',' '); + else if ( !strcmp( p_attach->psz_mime, "image/png" ) ) /* PNG */ + type = VLC_FOURCC('p','n','g',' '); + else if ( !strcmp( p_attach->psz_mime, "image/tiff" ) ) /* TIF, TIFF */ + type = VLC_FOURCC('t','i','f','f'); + else if ( !strcmp( p_attach->psz_mime, "image/x-tga" ) ) /* TGA */ + type = VLC_FOURCC('t','g','a',' '); + else if ( !strcmp( p_attach->psz_mime, "image/x-xpixmap") ) /* XPM */ + type = VLC_FOURCC('x','p','m',' '); + + if( ( type != 0 ) && + ( p_attach->i_data > 0 ) && + ( p_attach->p_data != NULL ) ) + { + picture_t *p_pic = NULL; + image_handler_t *p_image; + + p_image = image_HandlerCreate( p_dec ); + if( p_image != NULL ) + { + block_t *p_block; + + p_block = block_New( p_image->p_parent, p_attach->i_data ); + + if( p_block != NULL ) + { + video_format_t fmt_in; + video_format_t fmt_out; + + memcpy( p_block->p_buffer, p_attach->p_data, p_attach->i_data ); + + memset( &fmt_in, 0, sizeof( video_format_t)); + memset( &fmt_out, 0, sizeof( video_format_t)); + + fmt_in.i_chroma = type; + fmt_out.i_chroma = VLC_FOURCC('Y','U','V','A'); + + /* Find a suitable decoder module */ + if( module_Exists( p_dec, "SDL Image decoder" ) ) + { + /* ffmpeg thinks it can handle bmp properly but it can't (at least + * not all of them), so use sdl_image if it is available */ + + vlc_value_t val; + + var_Create( p_dec, "codec", VLC_VAR_MODULE | VLC_VAR_DOINHERIT ); + val.psz_string = (char*) "sdl_image"; + var_Set( p_dec, "codec", val ); + } + + p_pic = image_Read( p_image, p_block, &fmt_in, &fmt_out ); + var_Destroy( p_dec, "codec" ); + } + + image_HandlerDelete( p_image ); + } + if( p_pic ) + { + image_attach_t *p_picture = malloc( sizeof(image_attach_t) ); + + if( p_picture ) + { + p_picture->psz_filename = strdup( p_attach->psz_name ); + p_picture->p_pic = p_pic; + + TAB_APPEND( p_sys->i_images, p_sys->pp_images, p_picture ); + } + } + } + vlc_input_attachment_Delete( pp_attachments[ k ] ); + } + free( pp_attachments ); + + return VLC_SUCCESS; +} + +/***************************************************************************** + * ParseUSFHeader: Retrieve global formatting information etc + *****************************************************************************/ +static void ParseUSFHeader( decoder_t *p_dec ) +{ + stream_t *p_sub = NULL; + xml_t *p_xml = NULL; + xml_reader_t *p_xml_reader = NULL; + + p_sub = stream_MemoryNew( VLC_OBJECT(p_dec), + p_dec->fmt_in.p_extra, + p_dec->fmt_in.i_extra, + VLC_TRUE ); + if( !p_sub ) + return; + + p_xml = xml_Create( p_dec ); + if( p_xml ) + { + p_xml_reader = xml_ReaderCreate( p_xml, p_sub ); + if( p_xml_reader ) + { + /* Look for Root Node */ + if( xml_ReaderRead( p_xml_reader ) == 1 ) + { + char *psz_node = xml_ReaderName( p_xml_reader ); + + if( !strcasecmp( "usfsubtitles", psz_node ) ) + ParseUSFHeaderTags( p_dec, p_xml_reader ); + + free( psz_node ); + } + + xml_ReaderDelete( p_xml, p_xml_reader ); + } + xml_Delete( p_xml ); + } + stream_Delete( p_sub ); +} + +static void ParseUSFHeaderTags( decoder_t *p_dec, xml_reader_t *p_xml_reader ) +{ + decoder_sys_t *p_sys = p_dec->p_sys; + char *psz_node; + ssa_style_t *p_style = NULL; + int i_style_level = 0; + int i_metadata_level = 0; + + while ( xml_ReaderRead( p_xml_reader ) == 1 ) + { + switch ( xml_ReaderNodeType( p_xml_reader ) ) + { + case XML_READER_TEXT: + case XML_READER_NONE: + break; + case XML_READER_ENDELEM: + psz_node = xml_ReaderName( p_xml_reader ); + + if( !psz_node ) + break; + switch (i_style_level) + { + case 0: + if( !strcasecmp( "metadata", psz_node ) && (i_metadata_level == 1) ) + { + i_metadata_level--; + } + break; + case 1: + if( !strcasecmp( "styles", psz_node ) ) + { + i_style_level--; + } + break; + case 2: + if( !strcasecmp( "style", psz_node ) ) + { + TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style ); + + p_style = NULL; + i_style_level--; + } + break; + } + + free( psz_node ); + break; + case XML_READER_STARTELEM: + psz_node = xml_ReaderName( p_xml_reader ); + + if( !psz_node ) + break; + + if( !strcasecmp( "metadata", psz_node ) && (i_style_level == 0) ) + { + i_metadata_level++; + } + else if( !strcasecmp( "resolution", psz_node ) && (i_metadata_level == 1) ) + { + while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS ) + { + char *psz_name = xml_ReaderName ( p_xml_reader ); + char *psz_value = xml_ReaderValue ( p_xml_reader ); + + if( psz_name && psz_value ) + { + if( !strcasecmp( "x", psz_name ) ) + p_sys->i_original_width = atoi( psz_value ); + else if( !strcasecmp( "y", psz_name ) ) + p_sys->i_original_height = atoi( psz_value ); + } + if( psz_name ) free( psz_name ); + if( psz_value ) free( psz_value ); + } + } + else if( !strcasecmp( "styles", psz_node ) && (i_style_level == 0) ) + { + i_style_level++; + } + else if( !strcasecmp( "style", psz_node ) && (i_style_level == 1) ) + { + i_style_level++; + + p_style = calloc( 1, sizeof(ssa_style_t) ); + if( ! p_style ) + { + msg_Err( p_dec, "out of memory" ); + free( psz_node ); + break; + } + /* All styles are supposed to default to Default, and then + * one or more settings are over-ridden. + * At the moment this only effects styles defined AFTER + * Default in the XML + */ + int i; + for( i = 0; i < p_sys->i_ssa_styles; i++ ) + { + if( !strcasecmp( p_sys->pp_ssa_styles[i]->psz_stylename, "Default" ) ) + { + ssa_style_t *p_default_style = p_sys->pp_ssa_styles[i]; + + memcpy( p_style, p_default_style, sizeof( ssa_style_t ) ); + p_style->font_style.psz_fontname = strdup( p_style->font_style.psz_fontname ); + p_style->psz_stylename = NULL; + } + } + + while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS ) + { + char *psz_name = xml_ReaderName ( p_xml_reader ); + char *psz_value = xml_ReaderValue ( p_xml_reader ); + + if( psz_name && psz_value ) + { + if( !strcasecmp( "name", psz_name ) ) + p_style->psz_stylename = strdup( psz_value); + } + if( psz_name ) free( psz_name ); + if( psz_value ) free( psz_value ); + } + } + else if( !strcasecmp( "fontstyle", psz_node ) && (i_style_level == 2) ) + { + while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS ) + { + char *psz_name = xml_ReaderName ( p_xml_reader ); + char *psz_value = xml_ReaderValue ( p_xml_reader ); + + if( psz_name && psz_value ) + { + if( !strcasecmp( "face", psz_name ) ) + { + if( p_style->font_style.psz_fontname ) free( p_style->font_style.psz_fontname ); + p_style->font_style.psz_fontname = strdup( psz_value ); + } + else if( !strcasecmp( "size", psz_name ) ) + { + if( ( *psz_value == '+' ) || ( *psz_value == '-' ) ) + { + int i_value = atoi( psz_value ); + + if( ( i_value >= -5 ) && ( i_value <= 5 ) ) + p_style->font_style.i_font_size += ( i_value * p_style->font_style.i_font_size ) / 10; + else if( i_value < -5 ) + p_style->font_style.i_font_size = - i_value; + else if( i_value > 5 ) + p_style->font_style.i_font_size = i_value; + } + else + p_style->font_style.i_font_size = atoi( psz_value ); + } + else if( !strcasecmp( "italic", psz_name ) ) + { + if( !strcasecmp( "yes", psz_value )) + p_style->font_style.i_style_flags |= STYLE_ITALIC; + else + p_style->font_style.i_style_flags &= ~STYLE_ITALIC; + } + else if( !strcasecmp( "weight", psz_name ) ) + { + if( !strcasecmp( "bold", psz_value )) + p_style->font_style.i_style_flags |= STYLE_BOLD; + else + p_style->font_style.i_style_flags &= ~STYLE_BOLD; + } + else if( !strcasecmp( "underline", psz_name ) ) + { + if( !strcasecmp( "yes", psz_value )) + p_style->font_style.i_style_flags |= STYLE_UNDERLINE; + else + p_style->font_style.i_style_flags &= ~STYLE_UNDERLINE; + } + else if( !strcasecmp( "color", psz_name ) ) + { + if( *psz_value == '#' ) + { + unsigned long col = strtol(psz_value+1, NULL, 16); + p_style->font_style.i_font_color = (col & 0x00ffffff); + p_style->font_style.i_font_alpha = (col >> 24) & 0xff; + } + } + else if( !strcasecmp( "outline-color", psz_name ) ) + { + if( *psz_value == '#' ) + { + unsigned long col = strtol(psz_value+1, NULL, 16); + p_style->font_style.i_outline_color = (col & 0x00ffffff); + p_style->font_style.i_outline_alpha = (col >> 24) & 0xff; + } + } + else if( !strcasecmp( "outline-level", psz_name ) ) + { + p_style->font_style.i_outline_width = atoi( psz_value ); + } + else if( !strcasecmp( "shadow-color", psz_name ) ) + { + if( *psz_value == '#' ) + { + unsigned long col = strtol(psz_value+1, NULL, 16); + p_style->font_style.i_shadow_color = (col & 0x00ffffff); + p_style->font_style.i_shadow_alpha = (col >> 24) & 0xff; + } + } + else if( !strcasecmp( "shadow-level", psz_name ) ) + { + p_style->font_style.i_shadow_width = atoi( psz_value ); + } + else if( !strcasecmp( "back-color", psz_name ) ) + { + if( *psz_value == '#' ) + { + unsigned long col = strtol(psz_value+1, NULL, 16); + p_style->font_style.i_karaoke_background_color = (col & 0x00ffffff); + p_style->font_style.i_karaoke_background_alpha = (col >> 24) & 0xff; + } + } + else if( !strcasecmp( "spacing", psz_name ) ) + { + p_style->font_style.i_spacing = atoi( psz_value ); + } + } + if( psz_name ) free( psz_name ); + if( psz_value ) free( psz_value ); + } + } + else if( !strcasecmp( "position", psz_node ) && (i_style_level == 2) ) + { + while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS ) + { + char *psz_name = xml_ReaderName ( p_xml_reader ); + char *psz_value = xml_ReaderValue ( p_xml_reader ); + + if( psz_name && psz_value ) + { + if( !strcasecmp( "alignment", psz_name ) ) + { + if( !strcasecmp( "TopLeft", psz_value ) ) + p_style->i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_LEFT; + else if( !strcasecmp( "TopCenter", psz_value ) ) + p_style->i_align = SUBPICTURE_ALIGN_TOP; + else if( !strcasecmp( "TopRight", psz_value ) ) + p_style->i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_RIGHT; + else if( !strcasecmp( "MiddleLeft", psz_value ) ) + p_style->i_align = SUBPICTURE_ALIGN_LEFT; + else if( !strcasecmp( "MiddleCenter", psz_value ) ) + p_style->i_align = 0; + else if( !strcasecmp( "MiddleRight", psz_value ) ) + p_style->i_align = SUBPICTURE_ALIGN_RIGHT; + else if( !strcasecmp( "BottomLeft", psz_value ) ) + p_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_LEFT; + else if( !strcasecmp( "BottomCenter", psz_value ) ) + p_style->i_align = SUBPICTURE_ALIGN_BOTTOM; + else if( !strcasecmp( "BottomRight", psz_value ) ) + p_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_RIGHT; + } + else if( !strcasecmp( "horizontal-margin", psz_name ) ) + { + if( strchr( psz_value, '%' ) ) + { + p_style->i_margin_h = 0; + p_style->i_margin_percent_h = atoi( psz_value ); + } + else + { + p_style->i_margin_h = atoi( psz_value ); + p_style->i_margin_percent_h = 0; + } + } + else if( !strcasecmp( "vertical-margin", psz_name ) ) + { + if( strchr( psz_value, '%' ) ) + { + p_style->i_margin_v = 0; + p_style->i_margin_percent_v = atoi( psz_value ); + } + else + { + p_style->i_margin_v = atoi( psz_value ); + p_style->i_margin_percent_v = 0; + } + } + } + if( psz_name ) free( psz_name ); + if( psz_value ) free( psz_value ); + } + } + + free( psz_node ); + break; + } + } + if( p_style ) free( p_style ); +} /***************************************************************************** * ParseSSAHeader: Retrieve global formatting information etc *****************************************************************************/ @@ -648,6 +1581,11 @@ static void ParseSSAHeader( decoder_t *p_dec ) p_style->i_margin_h = ( p_style->i_align & SUBPICTURE_ALIGN_RIGHT ) ? i_margin_r : i_margin_l; p_style->i_margin_v = i_margin_v; + p_style->i_margin_percent_h = 0; + p_style->i_margin_percent_v = 0; + + p_style->font_style.i_karaoke_background_color = 0xffffff; + p_style->font_style.i_karaoke_background_alpha = 0xff; TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style ); } @@ -693,12 +1631,17 @@ static void ParseSSAHeader( decoder_t *p_dec ) //p_style->font_style.f_angle = f_angle; p_style->i_align = 0; - if( i_align == 0x1 || i_align == 0x4 || i_align == 0x1 ) p_style->i_align |= SUBPICTURE_ALIGN_LEFT; + if( i_align == 0x1 || i_align == 0x4 || i_align == 0x7 ) p_style->i_align |= SUBPICTURE_ALIGN_LEFT; if( i_align == 0x3 || i_align == 0x6 || i_align == 0x9 ) p_style->i_align |= SUBPICTURE_ALIGN_RIGHT; if( i_align == 0x7 || i_align == 0x8 || i_align == 0x9 ) p_style->i_align |= SUBPICTURE_ALIGN_TOP; if( i_align == 0x1 || i_align == 0x2 || i_align == 0x3 ) p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM; p_style->i_margin_h = ( p_style->i_align & SUBPICTURE_ALIGN_RIGHT ) ? i_margin_r : i_margin_l; p_style->i_margin_v = i_margin_v; + p_style->i_margin_percent_h = 0; + p_style->i_margin_percent_v = 0; + + p_style->font_style.i_karaoke_background_color = 0xffffff; + p_style->font_style.i_karaoke_background_alpha = 0xff; /*TODO: Ignored: angle i_scale_x|y (fontscaling), i_encoding */ TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style ); @@ -714,45 +1657,440 @@ eof: return; } -static void StripTags( char *psz_text ) +/* Function now handles tags which has attribute values, and tries + * to deal with &' commands too. It no longer modifies the string + * in place, so that the original text can be reused + */ +static char *StripTags( char *psz_subtitle ) { - int i_left_moves = 0; - vlc_bool_t b_inside_tag = VLC_FALSE; - int i = 0; - int i_tag_start = -1; - while( psz_text[ i ] ) + char *psz_text_start; + char *psz_text; + + psz_text = psz_text_start = malloc( strlen( psz_subtitle ) + 1 ); + if( !psz_text_start ) + return NULL; + + while( *psz_subtitle ) { - if( !b_inside_tag ) + if( *psz_subtitle == '<' ) + { + if( strncasecmp( psz_subtitle, "
", 5 ) == 0 ) + *psz_text++ = '\n'; + + psz_subtitle += strcspn( psz_subtitle, ">" ); + } + else if( *psz_subtitle == '&' ) { - if( psz_text[ i ] == '<' ) + if( !strncasecmp( psz_subtitle, "<", 4 )) + { + *psz_text++ = '<'; + psz_subtitle += strcspn( psz_subtitle, ";" ); + } + else if( !strncasecmp( psz_subtitle, ">", 4 )) + { + *psz_text++ = '>'; + psz_subtitle += strcspn( psz_subtitle, ";" ); + } + else if( !strncasecmp( psz_subtitle, "&", 5 )) + { + *psz_text++ = '&'; + psz_subtitle += strcspn( psz_subtitle, ";" ); + } + else if( !strncasecmp( psz_subtitle, """, 6 )) + { + *psz_text++ = '\"'; + psz_subtitle += strcspn( psz_subtitle, ";" ); + } + else { - b_inside_tag = VLC_TRUE; - i_tag_start = i; + /* Assume it is just a normal ampersand */ + *psz_text++ = '&'; } - psz_text[ i - i_left_moves ] = psz_text[ i ]; } else { - if( ( psz_text[ i ] == ' ' ) || - ( psz_text[ i ] == '\t' ) || - ( psz_text[ i ] == '\n' ) || - ( psz_text[ i ] == '\r' ) ) + *psz_text++ = *psz_subtitle; + } + + psz_subtitle++; + } + *psz_text = '\0'; + psz_text_start = realloc( psz_text_start, strlen( psz_text_start ) + 1 ); + + return psz_text_start; +} + +/* Try to respect any style tags present in the subtitle string. The main + * problem here is a lack of adequate specs for the subtitle formats. + * SSA/ASS and USF are both detail spec'ed -- but they are handled elsewhere. + * SAMI has a detailed spec, but extensive rework is needed in the demux + * code to prevent all this style information being excised, as it presently + * does. + * That leaves the others - none of which were (I guess) originally intended + * to be carrying style information. Over time people have used them that way. + * In the absence of specifications from which to work, the tags supported + * have been restricted to the simple set permitted by the USF DTD, ie. : + * Basic:
, , , + * Extended: + * Attributes: face + * family + * size + * color + * outline-color + * shadow-color + * outline-level + * shadow-level + * back-color + * alpha + * There is also the further restriction that the subtitle be well-formed + * as an XML entity, ie. the HTML sentence: + * Bold and Italics + * doesn't qualify because the tags aren't nested one inside the other. + * tags are automatically added to the output to ensure + * well-formedness. + * If the text doesn't qualify for any reason, a NULL string is + * returned, and the rendering engine will fall back to the + * plain text version of the subtitle. + */ +static char *CreateHtmlSubtitle( char *psz_subtitle ) +{ + char psz_tagStack[ 100 ]; + size_t i_buf_size = strlen( psz_subtitle ) + 100; + char *psz_html_start = malloc( i_buf_size ); + + psz_tagStack[ 0 ] = '\0'; + + if( psz_html_start != NULL ) + { + char *psz_html = psz_html_start; + + strcpy( psz_html, "" ); + psz_html += 6; + + while( *psz_subtitle ) + { + if( *psz_subtitle == '\n' ) { - b_inside_tag = VLC_FALSE; - i_tag_start = -1; + strcpy( psz_html, "
" ); + psz_html += 5; + psz_subtitle++; + } + else if( *psz_subtitle == '<' ) + { + if( !strncasecmp( psz_subtitle, "
", 5 )) + { + strcpy( psz_html, "
" ); + psz_html += 5; + psz_subtitle += 5; + } + else if( !strncasecmp( psz_subtitle, "", 3 ) ) + { + strcpy( psz_html, "" ); + strcat( psz_tagStack, "b" ); + psz_html += 3; + psz_subtitle += 3; + } + else if( !strncasecmp( psz_subtitle, "", 3 ) ) + { + strcpy( psz_html, "" ); + strcat( psz_tagStack, "i" ); + psz_html += 3; + psz_subtitle += 3; + } + else if( !strncasecmp( psz_subtitle, "", 3 ) ) + { + strcpy( psz_html, "" ); + strcat( psz_tagStack, "u" ); + psz_html += 3; + psz_subtitle += 3; + } + else if( !strncasecmp( psz_subtitle, "= 0 ) + { + psz_lastTag = psz_tagStack + i_len; + i_len = 0; + + switch( *psz_lastTag ) + { + case 'b': + b_match = !strncasecmp( psz_subtitle, "", 4 ); + i_len = 4; + break; + case 'i': + b_match = !strncasecmp( psz_subtitle, "", 4 ); + i_len = 4; + break; + case 'u': + b_match = !strncasecmp( psz_subtitle, "", 4 ); + i_len = 4; + break; + case 'f': + b_match = !strncasecmp( psz_subtitle, "", 7 ); + i_len = 7; + break; + } + } + if( ! b_match ) + { + /* Not well formed -- kill everything */ + free( psz_html_start ); + psz_html_start = NULL; + break; + } + *psz_lastTag = '\0'; + strncpy( psz_html, psz_subtitle, i_len ); + psz_html += i_len; + psz_subtitle += i_len; + } + else + { + psz_subtitle += strcspn( psz_subtitle, ">" ); + } } - else if( psz_text[ i ] == '>' ) + else if( *psz_subtitle == '&' ) { - i_left_moves += i - i_tag_start + 1; - i_tag_start = -1; - b_inside_tag = VLC_FALSE; + if( !strncasecmp( psz_subtitle, "<", 4 )) + { + strcpy( psz_html, "<" ); + psz_html += 4; + psz_subtitle += 4; + } + else if( !strncasecmp( psz_subtitle, ">", 4 )) + { + strcpy( psz_html, ">" ); + psz_html += 4; + psz_subtitle += 4; + } + else if( !strncasecmp( psz_subtitle, "&", 5 )) + { + strcpy( psz_html, "&" ); + psz_html += 5; + psz_subtitle += 5; + } + else + { + strcpy( psz_html, "&" ); + psz_html += 5; + psz_subtitle++; + } } else { - psz_text[ i - i_left_moves ] = psz_text[ i ]; + *psz_html = *psz_subtitle; + if( psz_html > psz_html_start ) + { + /* Check for double whitespace */ + if((( *psz_html == ' ' ) || + ( *psz_html == '\t' )) && + (( *(psz_html-1) == ' ' ) || + ( *(psz_html-1) == '\t' ))) + { + strcpy( psz_html, NO_BREAKING_SPACE ); + psz_html += strlen( NO_BREAKING_SPACE ) - 1; + } + } + psz_html++; + psz_subtitle++; + } + + if( ( size_t )( psz_html - psz_html_start ) > i_buf_size - 10 ) + { + int i_len = psz_html - psz_html_start; + + i_buf_size += 100; + psz_html_start = realloc( psz_html_start, i_buf_size ); + psz_html = psz_html_start + i_len; + *psz_html = '\0'; + } + } + strcpy( psz_html, "
" ); + psz_html += 7; + + if( psz_tagStack[ 0 ] != '\0' ) + { + /* Not well formed -- kill everything */ + free( psz_html_start ); + psz_html_start = NULL; + } + else if( psz_html_start ) + { + /* Shrink the memory requirements */ + psz_html_start = realloc( psz_html_start, psz_html - psz_html_start + 1 ); + } + } + return psz_html_start; +} + +/* The reverse of the above function - given a HTML subtitle, turn it + * into a plain-text version, complete with sensible whitespace compaction + */ + +static char *CreatePlainText( char *psz_subtitle ) +{ + char *psz_text = StripTags( psz_subtitle ); + char *s; + + if( !psz_text ) + return NULL; + + s = strpbrk( psz_text, "\t\r\n " ); + while( s ) + { + int k; + char spc = ' '; + int i_whitespace = strspn( s, "\t\r\n " ); + + /* Favour '\n' over other whitespaces - if one of these + * occurs in the whitespace use a '\n' as our value, + * otherwise just use a ' ' + */ + for( k = 0; k < i_whitespace; k++ ) + if( s[k] == '\n' ) spc = '\n'; + + if( i_whitespace > 1 ) + { + memmove( &s[1], + &s[i_whitespace], + strlen( s ) - i_whitespace + 1 ); + } + *s++ = spc; + + s = strpbrk( s, "\t\r\n " ); + } + return psz_text; +} + +/**************************************************************************** + * download and resize image located at psz_url + ***************************************************************************/ +static subpicture_region_t *LoadEmbeddedImage( decoder_t *p_dec, subpicture_t *p_spu, const char *psz_filename, int i_transparent_color ) +{ + decoder_sys_t *p_sys = p_dec->p_sys; + subpicture_region_t *p_region; + video_format_t fmt_out; + int k; + picture_t *p_pic = NULL; + + for( k = 0; k < p_sys->i_images; k++ ) + { + if( p_sys->pp_images && + !strcmp( p_sys->pp_images[k]->psz_filename, psz_filename ) ) + { + p_pic = p_sys->pp_images[k]->p_pic; + break; + } + } + + if( !p_pic ) + { + msg_Err( p_dec, "Unable to read image %s", psz_filename ); + return NULL; + } + + /* Display the feed's image */ + memset( &fmt_out, 0, sizeof( video_format_t)); + + fmt_out.i_chroma = VLC_FOURCC('Y','U','V','A'); + fmt_out.i_aspect = VOUT_ASPECT_FACTOR; + fmt_out.i_sar_num = fmt_out.i_sar_den = 1; + fmt_out.i_width = + fmt_out.i_visible_width = p_pic->p[Y_PLANE].i_visible_pitch; + fmt_out.i_height = + fmt_out.i_visible_height = p_pic->p[Y_PLANE].i_visible_lines; + + p_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt_out ); + if( !p_region ) + { + msg_Err( p_dec, "cannot allocate SPU region" ); + return NULL; + } + vout_CopyPicture( p_dec, &p_region->picture, p_pic ); + + /* This isn't the best way to do this - if you really want transparency, then + * you're much better off using an image type that supports it like PNG. The + * spec requires this support though. + */ + if( i_transparent_color > 0 ) + { + uint8_t i_r = ( i_transparent_color >> 16 ) & 0xff; + uint8_t i_g = ( i_transparent_color >> 8 ) & 0xff; + uint8_t i_b = ( i_transparent_color ) & 0xff; + uint8_t i_y = ( ( ( 66 * i_r + 129 * i_g + 25 * i_b + 128 ) >> 8 ) + 16 ); + uint8_t i_u = ( ( -38 * i_r - 74 * i_g + 112 * i_b + 128 ) >> 8 ) + 128 ; + uint8_t i_v = ( ( 112 * i_r - 94 * i_g - 18 * i_b + 128 ) >> 8 ) + 128 ; + + if( ( p_region->picture.Y_PITCH == p_region->picture.U_PITCH ) && + ( p_region->picture.Y_PITCH == p_region->picture.V_PITCH ) && + ( p_region->picture.Y_PITCH == p_region->picture.A_PITCH ) ) + { + int i_lines = p_region->picture.p[ Y_PLANE ].i_lines; + if( i_lines > p_region->picture.p[ U_PLANE ].i_lines ) + i_lines = p_region->picture.p[ U_PLANE ].i_lines; + if( i_lines > p_region->picture.p[ V_PLANE ].i_lines ) + i_lines = p_region->picture.p[ V_PLANE ].i_lines; + if( i_lines > p_region->picture.p[ A_PLANE ].i_lines ) + i_lines = p_region->picture.p[ A_PLANE ].i_lines; + + int i; + + for( i = 0; i < p_region->picture.A_PITCH * i_lines; i++ ) + { + if(( p_region->picture.Y_PIXELS[ i ] == i_y ) && + ( p_region->picture.U_PIXELS[ i ] == i_u ) && + ( p_region->picture.V_PIXELS[ i ] == i_v ) ) + { + p_region->picture.A_PIXELS[ i ] = 1; + } } } - i++; } - psz_text[ i - i_left_moves ] = '\0'; + return p_region; }