X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=modules%2Fcodec%2Fsubtitles%2Fsubsdec.c;h=c49e8f1dae08de4ec555e6295e984095472a93be;hb=22da31efd99ff625cfd7e74d2231667a74c582d7;hp=f217f0b77e244effe9cd8e718a68939c6f68a15d;hpb=9d5e75a7a5643a2144dc561fc246a7a3ca7e5fb0;p=vlc
diff --git a/modules/codec/subtitles/subsdec.c b/modules/codec/subtitles/subsdec.c
index f217f0b77e..c49e8f1dae 100644
--- a/modules/codec/subtitles/subsdec.c
+++ b/modules/codec/subtitles/subsdec.c
@@ -68,7 +68,7 @@ static const char *const ppsz_encodings[] = {
"ISO-8859-6",
"Windows-1256",
"ISO-8859-7",
- "Windows-1256",
+ "Windows-1253",
"ISO-8859-8",
"Windows-1255",
"ISO-8859-9",
@@ -84,8 +84,8 @@ static const char *const ppsz_encodings[] = {
"ISO-2022-JP-2",
"EUC-JP",
"Shift_JIS",
+ "CP949",
"ISO-2022-KR",
- "EUC-KR",
"Big5",
"ISO-2022-TW",
"Big5-HKSCS",
@@ -94,7 +94,11 @@ static const char *const ppsz_encodings[] = {
};
static const char *const ppsz_encoding_names[] = {
- N_("Auto"),
+ /* xgettext:
+ The character encoding name in parenthesis corresponds to that used for
+ the GetACP translation. "Windows-1252" applies to Western European
+ languages using the Latin alphabet. */
+ N_("Default (Windows-1252)"),
N_("Universal (UTF-8)"),
N_("Universal (UTF-16)"),
N_("Universal (big endian UTF-16)"),
@@ -121,7 +125,7 @@ static const char *const ppsz_encoding_names[] = {
N_("Arabic (Windows-1256)"),
/* 7 */
N_("Greek (ISO 8859-7)"),
- N_("Greek (Windows-1256)"),
+ N_("Greek (Windows-1253)"),
/* 8 */
N_("Hebrew (ISO 8859-8)"),
N_("Hebrew (Windows-1255)"),
@@ -147,8 +151,8 @@ static const char *const ppsz_encoding_names[] = {
N_("Japanese (7-bits JIS/ISO-2022-JP-2)"),
N_("Japanese Unix (EUC-JP)"),
N_("Japanese (Shift JIS)"),
+ N_("Korean (EUC-KR/CP949)"),
N_("Korean (ISO-2022-KR)"),
- N_("Korean Unix (EUC-KR)"),
N_("Traditional Chinese (Big5)"),
N_("Traditional Chinese Unix (EUC-TW)"),
N_("Hong-Kong Supplementary (HKSCS)"),
@@ -228,7 +232,6 @@ static int OpenDecoder( vlc_object_t *p_this )
{
decoder_t *p_dec = (decoder_t*)p_this;
decoder_sys_t *p_sys;
- vlc_value_t val;
switch( p_dec->fmt_in.i_codec )
{
@@ -276,7 +279,7 @@ static int OpenDecoder( vlc_object_t *p_this )
/* Second, try configured encoding */
if (psz_charset == NULL)
{
- psz_charset = var_CreateGetNonEmptyString (p_dec, "subsdec-encoding");
+ psz_charset = var_InheritString (p_dec, "subsdec-encoding");
msg_Dbg (p_dec, "trying configured character encoding: %s",
psz_charset ? psz_charset : "not specified");
}
@@ -284,11 +287,23 @@ static int OpenDecoder( vlc_object_t *p_this )
/* Third, try "local" encoding with optional UTF-8 autodetection */
if (psz_charset == NULL)
{
- psz_charset = strdup (GetFallbackEncoding ());
+ /* xgettext:
+ The Windows ANSI code page most commonly used for this language.
+ VLC uses this as a guess of the subtitle files character set
+ (if UTF-8 and UTF-16 autodetection fails).
+ Western European languages normally use "CP1252", which is a
+ Microsoft-variant of ISO 8859-1. That suits the Latin alphabet.
+ Other scripts use other code pages.
+
+ This MUST be a valid iconv character set. If unsure, please refer
+ the VideoLAN translators mailing list. */
+ const char *acp = vlc_pgettext("GetACP", "CP1252");
+
+ psz_charset = strdup (acp);
msg_Dbg (p_dec, "trying default character encoding: %s",
psz_charset ? psz_charset : "not specified");
- if (var_CreateGetBool (p_dec, "subsdec-autodetect-utf8"))
+ if (var_InheritBool (p_dec, "subsdec-autodetect-utf8"))
{
msg_Dbg (p_dec, "using automatic UTF-8 detection");
p_sys->b_autodetect_utf8 = true;
@@ -312,12 +327,10 @@ static int OpenDecoder( vlc_object_t *p_this )
}
free (psz_charset);
- var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
- var_Get( p_dec, "subsdec-align", &val );
- p_sys->i_align = val.i_int;
+ p_sys->i_align = var_InheritInteger( p_dec, "subsdec-align" );
if( p_dec->fmt_in.i_codec == VLC_CODEC_SSA
- && var_CreateGetBool( p_dec, "subsdec-formatted" ) )
+ && var_InheritBool( p_dec, "subsdec-formatted" ) )
{
if( p_dec->fmt_in.i_extra > 0 )
ParseSSAHeader( p_dec );
@@ -410,7 +423,7 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
video_format_t fmt;
/* We cannot display a subpicture with no date */
- if( p_block->i_pts == 0 )
+ if( p_block->i_pts <= VLC_TS_INVALID )
{
msg_Warn( p_dec, "subtitle without a date" );
return NULL;
@@ -426,10 +439,11 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
}
/* Should be resiliant against bad subtitles */
- psz_subtitle = strndup( (const char *)p_block->p_buffer,
- p_block->i_buffer );
+ psz_subtitle = malloc( p_block->i_buffer + 1 );
if( psz_subtitle == NULL )
return NULL;
+ memcpy( psz_subtitle, p_block->p_buffer, p_block->i_buffer );
+ psz_subtitle[p_block->i_buffer] = '\0';
if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
{
@@ -457,7 +471,7 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
{
size_t inbytes_left = strlen( psz_subtitle );
size_t outbytes_left = 6 * inbytes_left;
- char *psz_new_subtitle = malloc( outbytes_left + 1 );
+ char *psz_new_subtitle = xmalloc( outbytes_left + 1 );
char *psz_convert_buffer_out = psz_new_subtitle;
const char *psz_convert_buffer_in = psz_subtitle;
@@ -479,11 +493,13 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
psz_subtitle = realloc( psz_new_subtitle,
psz_convert_buffer_out - psz_new_subtitle );
+ if( !psz_subtitle )
+ psz_subtitle = psz_new_subtitle;
}
}
/* Create the subpicture unit */
- p_spu = decoder_NewSubpicture( p_dec );
+ p_spu = decoder_NewSubpicture( p_dec, NULL );
if( !p_spu )
{
msg_Warn( p_dec, "can't get spu buffer" );
@@ -494,7 +510,6 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
/* Create a new subpicture region */
memset( &fmt, 0, sizeof(video_format_t) );
fmt.i_chroma = VLC_CODEC_TEXT;
- fmt.i_aspect = 0;
fmt.i_width = fmt.i_height = 0;
fmt.i_x_offset = fmt.i_y_offset = 0;
p_spu->p_region = subpicture_region_New( &fmt );
@@ -517,7 +532,7 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
/* Remove formatting from string */
p_spu->p_region->psz_text = StripTags( psz_subtitle );
- if( var_CreateGetBool( p_dec, "subsdec-formatted" ) )
+ if( var_InheritBool( p_dec, "subsdec-formatted" ) )
{
p_spu->p_region->psz_html = CreateHtmlSubtitle( &p_spu->p_region->i_align, psz_subtitle );
}
@@ -620,7 +635,8 @@ static char *StripTags( char *psz_subtitle )
psz_subtitle++;
}
*psz_text = '\0';
- psz_text_start = realloc( psz_text_start, strlen( psz_text_start ) + 1 );
+ char *psz = realloc( psz_text_start, strlen( psz_text_start ) + 1 );
+ if( psz ) psz_text_start = psz;
return psz_text_start;
}
@@ -635,7 +651,7 @@ static char *StripTags( char *psz_subtitle )
* to be carrying style information. Over time people have used them that way.
* In the absence of specifications from which to work, the tags supported
* have been restricted to the simple set permitted by the USF DTD, ie. :
- * Basic:
, , ,
+ * Basic:
, , , ,
* Extended:
* Attributes: face
* family
@@ -685,6 +701,7 @@ static char *CreateHtmlSubtitle( int *pi_align, char *psz_subtitle )
psz_tag[ 0 ] = '\0';
/* */
+ //Oo + 100 ???
size_t i_buf_size = strlen( psz_subtitle ) + 100;
char *psz_html_start = malloc( i_buf_size );
char *psz_html = psz_html_start;
@@ -728,6 +745,11 @@ static char *CreateHtmlSubtitle( int *pi_align, char *psz_subtitle )
HtmlCopy( &psz_html, &psz_subtitle, "" );
strcat( psz_tag, "u" );
}
+ else if( !strncasecmp( psz_subtitle, "", 3 ) )
+ {
+ HtmlCopy( &psz_html, &psz_subtitle, "" );
+ strcat( psz_tag, "s" );
+ }
else if( !strncasecmp( psz_subtitle, "", 4 );
i_len = 4;
break;
+ case 's':
+ b_match = !strncasecmp( psz_subtitle, "", 4 );
+ i_len = 4;
+ break;
case 'f':
b_match = !strncasecmp( psz_subtitle, "", 7 );
i_len = 7;
@@ -940,6 +970,31 @@ static char *CreateHtmlSubtitle( int *pi_align, char *psz_subtitle )
/* Hide {\stupidity} */
psz_subtitle = strchr( psz_subtitle, '}' ) + 1;
}
+ else if( psz_subtitle[0] == '{' && psz_subtitle[1] == 'Y'
+ && psz_subtitle[2] == ':' && strchr( psz_subtitle, '}' ) )
+ {
+ /* Hide {Y:stupidity} */
+ psz_subtitle = strchr( psz_subtitle, '}' ) + 1;
+ }
+ else if( psz_subtitle[0] == '\\' && psz_subtitle[1] )
+ {
+ if( psz_subtitle[1] == 'N' || psz_subtitle[1] == 'n' )
+ {
+ HtmlPut( &psz_html, "
" );
+ psz_subtitle += 2;
+ }
+ else if( psz_subtitle[1] == 'h' )
+ {
+ /* Non breakable space */
+ HtmlPut( &psz_html, NO_BREAKING_SPACE );
+ psz_subtitle += 2;
+ }
+ else
+ {
+ HtmlPut( &psz_html, "\\" );
+ psz_subtitle++;
+ }
+ }
else
{
*psz_html = *psz_subtitle;
@@ -1000,6 +1055,9 @@ static char *CreateHtmlSubtitle( int *pi_align, char *psz_subtitle )
case 'u':
HtmlPut( &psz_html, "" );
break;
+ case 's':
+ HtmlPut( &psz_html, "" );
+ break;
case 'f':
HtmlPut( &psz_html, "/font>" );
break;