X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=modules%2Fcodec%2Fsubtitles%2Fsubsdec.c;h=4e053a149a5bc2efb9432db67b4fbccad60d84d1;hb=f0c76d5063ea5929f8b514e3bc5b0681cdf8d35e;hp=05934bbd5fbf462bb94272a6f814cbb755a574f2;hpb=b2167185764f49f12927dc557d6e2d74e1427c8e;p=vlc diff --git a/modules/codec/subtitles/subsdec.c b/modules/codec/subtitles/subsdec.c index 05934bbd5f..4e053a149a 100644 --- a/modules/codec/subtitles/subsdec.c +++ b/modules/codec/subtitles/subsdec.c @@ -27,8 +27,12 @@ /***************************************************************************** * Preamble *****************************************************************************/ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif #include "subsdec.h" +#include /***************************************************************************** * Local prototypes @@ -39,13 +43,14 @@ static void CloseDecoder ( vlc_object_t * ); static subpicture_t *DecodeBlock ( decoder_t *, block_t ** ); static subpicture_t *ParseText ( decoder_t *, block_t * ); static char *StripTags ( char * ); -static char *CreateHtmlSubtitle ( char * ); +static char *CreateHtmlSubtitle( int *pi_align, char * ); /***************************************************************************** * Module descriptor. *****************************************************************************/ -static const char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "", +static const char *const ppsz_encodings[] = { + DEFAULT_NAME, "ASCII", "UTF-8", "", "ISO-8859-1", "CP1252", "MacRoman", "MacIceland","ISO-8859-15", "", "ISO-8859-2", "CP1250", "MacCentralEurope", "MacCroatian", "MacRomania", "", "ISO-8859-5", "CP1251", "MacCyrillic", "MacUkraine", "KOI8-R", "KOI8-U", "KOI8-RU", "", @@ -92,8 +97,9 @@ The following known charsets are used: 254 = PC 437 */ -static int pi_justification[] = { 0, 1, 2 }; -static const char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")}; +static const int pi_justification[] = { 0, 1, 2 }; +static const char *const ppsz_justification_text[] = { + N_("Center"),N_("Left"),N_("Right")}; #define ENCODING_TEXT N_("Subtitles text encoding") #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles") @@ -108,23 +114,23 @@ static const char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Righ vlc_module_begin(); - set_shortname( _("Subtitles")); - set_description( _("Text subtitles decoder") ); + set_shortname( N_("Subtitles")); + set_description( N_("Text subtitles decoder") ); set_capability( "decoder", 50 ); set_callbacks( OpenDecoder, CloseDecoder ); set_category( CAT_INPUT ); set_subcategory( SUBCAT_INPUT_SCODEC ); add_integer( "subsdec-align", 0, NULL, ALIGN_TEXT, ALIGN_LONGTEXT, - VLC_FALSE ); - change_integer_list( pi_justification, ppsz_justification_text, 0 ); + false ); + change_integer_list( pi_justification, ppsz_justification_text, NULL ); add_string( "subsdec-encoding", DEFAULT_NAME, NULL, - ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE ); + ENCODING_TEXT, ENCODING_LONGTEXT, false ); change_string_list( ppsz_encodings, 0, 0 ); - add_bool( "subsdec-autodetect-utf8", VLC_TRUE, NULL, - AUTODETECT_UTF8_TEXT, AUTODETECT_UTF8_LONGTEXT, VLC_FALSE ); - add_bool( "subsdec-formatted", VLC_TRUE, NULL, FORMAT_TEXT, FORMAT_LONGTEXT, - VLC_FALSE ); + add_bool( "subsdec-autodetect-utf8", true, NULL, + AUTODETECT_UTF8_TEXT, AUTODETECT_UTF8_LONGTEXT, false ); + add_bool( "subsdec-formatted", true, NULL, FORMAT_TEXT, FORMAT_LONGTEXT, + false ); vlc_module_end(); /***************************************************************************** @@ -154,17 +160,14 @@ static int OpenDecoder( vlc_object_t *p_this ) /* Allocate the memory needed to store the decoder's structure */ p_dec->p_sys = p_sys = malloc( sizeof( *p_sys ) ); if( p_sys == NULL ) - { - msg_Err( p_dec, "out of memory" ); return VLC_ENOMEM; - } /* init of p_sys */ memset( p_sys, 0, sizeof( *p_sys ) ); p_sys->i_align = 0; p_sys->iconv_handle = (vlc_iconv_t)-1; - p_sys->b_autodetect_utf8 = VLC_FALSE; - p_sys->b_ass = VLC_FALSE; + p_sys->b_autodetect_utf8 = false; + p_sys->b_ass = false; p_sys->i_original_height = -1; p_sys->i_original_width = -1; TAB_INIT( p_sys->i_ssa_styles, p_sys->pp_ssa_styles ); @@ -207,7 +210,7 @@ static int OpenDecoder( vlc_object_t *p_this ) if (var_CreateGetBool (p_dec, "subsdec-autodetect-utf8")) { msg_Dbg (p_dec, "using automatic UTF-8 detection"); - p_sys->b_autodetect_utf8 = VLC_TRUE; + p_sys->b_autodetect_utf8 = true; } } @@ -215,18 +218,12 @@ static int OpenDecoder( vlc_object_t *p_this ) if (psz_charset == NULL) { psz_charset = strdup ("UTF-8"); - msg_Dbg (p_dec, "trying hard-coded character encoding: %s", - psz_charset ? psz_charset : "error"); - } - - /* Fifth, fail */ - if (psz_charset == NULL) - { - free (p_sys); - return VLC_ENOMEM; + msg_Dbg (p_dec, "using UTF-8 character encoding" ); } - if (strcasecmp (psz_charset, "UTF-8") && strcasecmp (psz_charset, "utf8")) + if ((psz_charset != NULL) + && strcasecmp (psz_charset, "UTF-8") + && strcasecmp (psz_charset, "utf8")) { p_sys->iconv_handle = vlc_iconv_open ("UTF-8", psz_charset); if (p_sys->iconv_handle == (vlc_iconv_t)(-1)) @@ -255,13 +252,24 @@ static int OpenDecoder( vlc_object_t *p_this ) ****************************************************************************/ static subpicture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block ) { - subpicture_t *p_spu = NULL; + subpicture_t *p_spu; + block_t *p_block; - if( !pp_block || *pp_block == NULL ) return NULL; + if( !pp_block || *pp_block == NULL ) + return NULL; - p_spu = ParseText( p_dec, *pp_block ); + p_block = *pp_block; + if( p_block->i_flags & (BLOCK_FLAG_DISCONTINUITY|BLOCK_FLAG_CORRUPTED) ) + { + block_Release( p_block ); + return NULL; + } + if( p_block->i_rate != 0 ) + p_block->i_length = p_block->i_length * p_block->i_rate / INPUT_RATE_DEFAULT; - block_Release( *pp_block ); + p_spu = ParseText( p_dec, p_block ); + + block_Release( p_block ); *pp_block = NULL; return p_spu; @@ -286,12 +294,9 @@ static void CloseDecoder( vlc_object_t *p_this ) if( !p_sys->pp_ssa_styles[i] ) continue; - if( p_sys->pp_ssa_styles[i]->psz_stylename ) - free( p_sys->pp_ssa_styles[i]->psz_stylename ); - if( p_sys->pp_ssa_styles[i]->font_style.psz_fontname ) - free( p_sys->pp_ssa_styles[i]->font_style.psz_fontname ); - if( p_sys->pp_ssa_styles[i] ) - free( p_sys->pp_ssa_styles[i] ); + free( p_sys->pp_ssa_styles[i]->psz_stylename ); + free( p_sys->pp_ssa_styles[i]->font_style.psz_fontname ); + free( p_sys->pp_ssa_styles[i] ); } TAB_CLEAN( p_sys->i_ssa_styles, p_sys->pp_ssa_styles ); } @@ -304,9 +309,8 @@ static void CloseDecoder( vlc_object_t *p_this ) continue; if( p_sys->pp_images[i]->p_pic ) - p_sys->pp_images[i]->p_pic->pf_release( p_sys->pp_images[i]->p_pic ); - if( p_sys->pp_images[i]->psz_filename ) - free( p_sys->pp_images[i]->psz_filename ); + picture_Release( p_sys->pp_images[i]->p_pic ); + free( p_sys->pp_images[i]->psz_filename ); free( p_sys->pp_images[i] ); } @@ -352,9 +356,9 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) { if (EnsureUTF8( psz_subtitle ) == NULL) { - msg_Err( p_dec, _("failed to convert subtitle encoding.\n" + msg_Err( p_dec, "failed to convert subtitle encoding.\n" "Try manually setting a character-encoding " - "before you open the file.") ); + "before you open the file." ); } } else @@ -366,7 +370,7 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) { msg_Dbg( p_dec, "invalid UTF-8 sequence: " "disabling UTF-8 subtitles autodetection" ); - p_sys->b_autodetect_utf8 = VLC_FALSE; + p_sys->b_autodetect_utf8 = false; } } @@ -388,9 +392,9 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) if( ( ret == (size_t)(-1) ) || inbytes_left ) { free( psz_new_subtitle ); - msg_Err( p_dec, _("failed to convert subtitle encoding.\n" + msg_Err( p_dec, "failed to convert subtitle encoding.\n" "Try manually setting a character-encoding " - "before you open the file.") ); + "before you open the file." ); return NULL; } @@ -404,23 +408,21 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) if( !p_spu ) { msg_Warn( p_dec, "can't get spu buffer" ); - if( psz_subtitle ) free( psz_subtitle ); + free( psz_subtitle ); return NULL; } - p_spu->b_pausable = VLC_TRUE; - /* Create a new subpicture region */ memset( &fmt, 0, sizeof(video_format_t) ); fmt.i_chroma = VLC_FOURCC('T','E','X','T'); fmt.i_aspect = 0; fmt.i_width = fmt.i_height = 0; fmt.i_x_offset = fmt.i_y_offset = 0; - p_spu->p_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt ); + p_spu->p_region = subpicture_region_New( &fmt ); if( !p_spu->p_region ) { msg_Err( p_dec, "cannot allocate SPU region" ); - if( psz_subtitle ) free( psz_subtitle ); + free( psz_subtitle ); p_dec->pf_spu_buffer_del( p_dec, p_spu ); return NULL; } @@ -430,21 +432,21 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) { /* Normal text subs, easy markup */ p_spu->p_region->i_align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align; - p_spu->i_x = p_sys->i_align ? 20 : 0; - p_spu->i_y = 10; + p_spu->p_region->i_x = p_sys->i_align ? 20 : 0; + p_spu->p_region->i_y = 10; /* Remove formatting from string */ p_spu->p_region->psz_text = StripTags( psz_subtitle ); if( var_CreateGetBool( p_dec, "subsdec-formatted" ) ) { - p_spu->p_region->psz_html = CreateHtmlSubtitle( psz_subtitle ); + p_spu->p_region->psz_html = CreateHtmlSubtitle( &p_spu->p_region->i_align, psz_subtitle ); } p_spu->i_start = p_block->i_pts; p_spu->i_stop = p_block->i_pts + p_block->i_length; p_spu->b_ephemer = (p_block->i_length == 0); - p_spu->b_absolute = VLC_FALSE; + p_spu->b_absolute = false; } else { @@ -455,11 +457,11 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) p_spu->i_start = p_block->i_pts; p_spu->i_stop = p_block->i_pts + p_block->i_length; p_spu->b_ephemer = (p_block->i_length == 0); - p_spu->b_absolute = VLC_FALSE; + p_spu->b_absolute = false; p_spu->i_original_picture_width = p_sys->i_original_width; p_spu->i_original_picture_height = p_sys->i_original_height; } - if( psz_subtitle ) free( psz_subtitle ); + free( psz_subtitle ); return p_spu; } @@ -577,219 +579,256 @@ static char *StripTags( char *psz_subtitle ) * returned, and the rendering engine will fall back to the * plain text version of the subtitle. */ -static char *CreateHtmlSubtitle( char *psz_subtitle ) +static void HtmlNPut( char **ppsz_html, const char *psz_text, int i_max ) { - char psz_tagStack[ 100 ]; + const int i_len = strlen(psz_text); + + strncpy( *ppsz_html, psz_text, i_max ); + *ppsz_html += __MIN(i_max,i_len); +} + +static void HtmlPut( char **ppsz_html, const char *psz_text ) +{ + strcpy( *ppsz_html, psz_text ); + *ppsz_html += strlen(psz_text); +} +static void HtmlCopy( char **ppsz_html, char **ppsz_subtitle, const char *psz_text ) +{ + HtmlPut( ppsz_html, psz_text ); + *ppsz_subtitle += strlen(psz_text); +} + +static char *CreateHtmlSubtitle( int *pi_align, char *psz_subtitle ) +{ + char *psz_tag = malloc( ( strlen( psz_subtitle ) / 3 ) + 1 ); + if( !psz_tag ) return NULL; size_t i_buf_size = strlen( psz_subtitle ) + 100; char *psz_html_start = malloc( i_buf_size ); - psz_tagStack[ 0 ] = '\0'; + psz_tag[ 0 ] = '\0'; - if( psz_html_start != NULL ) + if( psz_html_start == NULL ) { - char *psz_html = psz_html_start; + free( psz_tag ); + return NULL; + } + + char *psz_html = psz_html_start; - strcpy( psz_html, "" ); - psz_html += 6; + strcpy( psz_html, "" ); + psz_html += 6; - while( *psz_subtitle ) + /* Check for forced alignment */ + if( !strncmp( psz_subtitle, "{\\an", 4 ) && psz_subtitle[4] >= '1' && psz_subtitle[4] <= '9' && psz_subtitle[5] == '}' ) + { + static const int pi_vertical[3] = { SUBPICTURE_ALIGN_BOTTOM, 0, SUBPICTURE_ALIGN_TOP }; + static const int pi_horizontal[3] = { SUBPICTURE_ALIGN_LEFT, 0, SUBPICTURE_ALIGN_RIGHT }; + const int i_id = psz_subtitle[4] - '1'; + + *pi_align = pi_vertical[i_id/3] | pi_horizontal[i_id%3]; + + psz_subtitle += 6; + } + + /* */ + while( *psz_subtitle ) + { + if( *psz_subtitle == '\n' ) { - if( *psz_subtitle == '\n' ) + HtmlPut( &psz_html, "
" ); + psz_subtitle++; + } + else if( *psz_subtitle == '<' ) + { + if( !strncasecmp( psz_subtitle, "
", 5 )) { - strcpy( psz_html, "
" ); - psz_html += 5; - psz_subtitle++; + HtmlCopy( &psz_html, &psz_subtitle, "
" ); } - else if( *psz_subtitle == '<' ) + else if( !strncasecmp( psz_subtitle, "", 3 ) ) { - if( !strncasecmp( psz_subtitle, "
", 5 )) - { - strcpy( psz_html, "
" ); - psz_html += 5; - psz_subtitle += 5; - } - else if( !strncasecmp( psz_subtitle, "", 3 ) ) - { - strcpy( psz_html, "" ); - strcat( psz_tagStack, "b" ); - psz_html += 3; - psz_subtitle += 3; - } - else if( !strncasecmp( psz_subtitle, "", 3 ) ) - { - strcpy( psz_html, "" ); - strcat( psz_tagStack, "i" ); - psz_html += 3; - psz_subtitle += 3; - } - else if( !strncasecmp( psz_subtitle, "", 3 ) ) - { - strcpy( psz_html, "" ); - strcat( psz_tagStack, "u" ); - psz_html += 3; - psz_subtitle += 3; - } - else if( !strncasecmp( psz_subtitle, "" ); + strcat( psz_tag, "b" ); + } + else if( !strncasecmp( psz_subtitle, "", 3 ) ) + { + HtmlCopy( &psz_html, &psz_subtitle, "" ); + strcat( psz_tag, "i" ); + } + else if( !strncasecmp( psz_subtitle, "", 3 ) ) + { + HtmlCopy( &psz_html, &psz_subtitle, "" ); + strcat( psz_tag, "u" ); + } + else if( !strncasecmp( psz_subtitle, "" ); + } + HtmlPut( &psz_html, "\"" ); + if( !strcmp( psz_attribs[ k ], "color=" ) && *psz_subtitle >= '0' && *psz_subtitle <= '9' ) + HtmlPut( &psz_html, "#" ); + HtmlNPut( &psz_html, psz_subtitle, i_len ); + HtmlPut( &psz_html, "\"" ); - i_len += strcspn( psz_subtitle + i_len, "\"" ) + 1; psz_subtitle += i_len; + if( *psz_subtitle == '\"' ) + psz_subtitle++; + break; } - while (*psz_subtitle == ' ') - *psz_html++ = *psz_subtitle++; } - *psz_html++ = *psz_subtitle++; - } - else if( !strncmp( psz_subtitle, "= 0 ) + if( psz_attribs[ k ] == NULL ) { - psz_lastTag = psz_tagStack + i_len; - i_len = 0; + /* Jump over unrecognised tag */ + int i_len = strcspn( psz_subtitle, "\"" ) + 1; - switch( *psz_lastTag ) - { - case 'b': - b_match = !strncasecmp( psz_subtitle, "", 4 ); - i_len = 4; - break; - case 'i': - b_match = !strncasecmp( psz_subtitle, "", 4 ); - i_len = 4; - break; - case 'u': - b_match = !strncasecmp( psz_subtitle, "", 4 ); - i_len = 4; - break; - case 'f': - b_match = !strncasecmp( psz_subtitle, "", 7 ); - i_len = 7; - break; - } + i_len += strcspn( psz_subtitle + i_len, "\"" ) + 1; + psz_subtitle += i_len; } - if( ! b_match ) + while (*psz_subtitle == ' ') + *psz_html++ = *psz_subtitle++; + } + *psz_html++ = *psz_subtitle++; + } + else if( !strncmp( psz_subtitle, "= 0 ) + { + psz_lastTag = psz_tag + i_len; + i_len = 0; + + switch( *psz_lastTag ) { - /* Not well formed -- kill everything */ - free( psz_html_start ); - psz_html_start = NULL; + case 'b': + b_match = !strncasecmp( psz_subtitle, "", 4 ); + i_len = 4; + break; + case 'i': + b_match = !strncasecmp( psz_subtitle, "", 4 ); + i_len = 4; + break; + case 'u': + b_match = !strncasecmp( psz_subtitle, "", 4 ); + i_len = 4; + break; + case 'f': + b_match = !strncasecmp( psz_subtitle, "", 7 ); + i_len = 7; break; } - *psz_lastTag = '\0'; - strncpy( psz_html, psz_subtitle, i_len ); - psz_html += i_len; - psz_subtitle += i_len; } - else + if( ! b_match ) { - psz_subtitle += strcspn( psz_subtitle, ">" ); + /* Not well formed -- kill everything */ + free( psz_html_start ); + psz_html_start = NULL; + break; } + *psz_lastTag = '\0'; + strncpy( psz_html, psz_subtitle, i_len ); + psz_html += i_len; + psz_subtitle += i_len; } - else if( *psz_subtitle == '&' ) + else { - if( !strncasecmp( psz_subtitle, "<", 4 )) - { - strcpy( psz_html, "<" ); - psz_html += 4; - psz_subtitle += 4; - } - else if( !strncasecmp( psz_subtitle, ">", 4 )) - { - strcpy( psz_html, ">" ); - psz_html += 4; - psz_subtitle += 4; - } - else if( !strncasecmp( psz_subtitle, "&", 5 )) - { - strcpy( psz_html, "&" ); - psz_html += 5; - psz_subtitle += 5; - } - else - { - strcpy( psz_html, "&" ); - psz_html += 5; - psz_subtitle++; - } + psz_subtitle += strcspn( psz_subtitle, ">" ); + } + } + else if( *psz_subtitle == '&' ) + { + if( !strncasecmp( psz_subtitle, "<", 4 )) + { + HtmlCopy( &psz_html, &psz_subtitle, "<" ); + } + else if( !strncasecmp( psz_subtitle, ">", 4 )) + { + HtmlCopy( &psz_html, &psz_subtitle, ">" ); + } + else if( !strncasecmp( psz_subtitle, "&", 5 )) + { + HtmlCopy( &psz_html, &psz_subtitle, "&" ); } else { - *psz_html = *psz_subtitle; - if( psz_html > psz_html_start ) - { - /* Check for double whitespace */ - if((( *psz_html == ' ' ) || - ( *psz_html == '\t' )) && - (( *(psz_html-1) == ' ' ) || - ( *(psz_html-1) == '\t' ))) - { - strcpy( psz_html, NO_BREAKING_SPACE ); - psz_html += strlen( NO_BREAKING_SPACE ) - 1; - } - } - psz_html++; + HtmlPut( &psz_html, "&" ); psz_subtitle++; } - - if( ( size_t )( psz_html - psz_html_start ) > i_buf_size - 10 ) + } + else + { + *psz_html = *psz_subtitle; + if( psz_html > psz_html_start ) { - int i_len = psz_html - psz_html_start; - - i_buf_size += 100; - psz_html_start = realloc( psz_html_start, i_buf_size ); - psz_html = psz_html_start + i_len; - *psz_html = '\0'; + /* Check for double whitespace */ + if( ( *psz_html == ' ' || *psz_html == '\t' ) && + ( *(psz_html-1) == ' ' || *(psz_html-1) == '\t' ) ) + { + HtmlPut( &psz_html, NO_BREAKING_SPACE ); + psz_html--; + } } + psz_html++; + psz_subtitle++; } - strcpy( psz_html, "
" ); - psz_html += 7; - if( psz_tagStack[ 0 ] != '\0' ) + if( ( size_t )( psz_html - psz_html_start ) > i_buf_size - 50 ) { - /* Not well formed -- kill everything */ - free( psz_html_start ); - psz_html_start = NULL; - } - else if( psz_html_start ) - { - /* Shrink the memory requirements */ - psz_html_start = realloc( psz_html_start, psz_html - psz_html_start + 1 ); + int i_len = psz_html - psz_html_start; + + i_buf_size += 200; + psz_html_start = realloc( psz_html_start, i_buf_size ); + psz_html = psz_html_start + i_len; + *psz_html = '\0'; } } + strcpy( psz_html, "
" ); + psz_html += 7; + + if( psz_tag[ 0 ] != '\0' ) + { + /* Not well formed -- kill everything */ + free( psz_html_start ); + psz_html_start = NULL; + } + else if( psz_html_start ) + { + /* Shrink the memory requirements */ + psz_html_start = realloc( psz_html_start, psz_html - psz_html_start + 1 ); + } + free( psz_tag ); + return psz_html_start; } +