# include "config.h"
#endif
-#include <assert.h>
-
#include "subsdec.h"
#include <vlc_plugin.h>
*****************************************************************************/
static const char *const ppsz_encodings[] = {
"",
+ "system",
"UTF-8",
"UTF-16",
"UTF-16BE",
};
static const char *const ppsz_encoding_names[] = {
- N_("Auto"),
+ /* xgettext:
+ The character encoding name in parenthesis corresponds to that used for
+ the GetACP translation. "Windows-1252" applies to Western European
+ languages using the Latin alphabet. */
+ N_("Default (Windows-1252)"),
+ N_("System codeset"),
N_("Universal (UTF-8)"),
N_("Universal (UTF-16)"),
N_("Universal (big endian UTF-16)"),
/* Second, try configured encoding */
if (psz_charset == NULL)
{
- psz_charset = var_CreateGetNonEmptyString (p_dec, "subsdec-encoding");
+ psz_charset = var_InheritString (p_dec, "subsdec-encoding");
msg_Dbg (p_dec, "trying configured character encoding: %s",
psz_charset ? psz_charset : "not specified");
+ if (!strcmp (psz_charset, "system"))
+ {
+ free (psz_charset);
+ psz_charset = strdup ("");
+ /* ^ iconv() treats "" as nl_langinfo(CODESET) */
+ }
}
/* Third, try "local" encoding with optional UTF-8 autodetection */
if (psz_charset == NULL)
{
- psz_charset = strdup (GetFallbackEncoding ());
+ /* xgettext:
+ The Windows ANSI code page most commonly used for this language.
+ VLC uses this as a guess of the subtitle files character set
+ (if UTF-8 and UTF-16 autodetection fails).
+ Western European languages normally use "CP1252", which is a
+ Microsoft-variant of ISO 8859-1. That suits the Latin alphabet.
+ Other scripts use other code pages.
+
+ This MUST be a valid iconv character set. If unsure, please refer
+ the VideoLAN translators mailing list. */
+ const char *acp = vlc_pgettext("GetACP", "CP1252");
+
+ psz_charset = strdup (acp);
msg_Dbg (p_dec, "trying default character encoding: %s",
psz_charset ? psz_charset : "not specified");
- if (var_CreateGetBool (p_dec, "subsdec-autodetect-utf8"))
+ if (var_InheritBool (p_dec, "subsdec-autodetect-utf8"))
{
msg_Dbg (p_dec, "using automatic UTF-8 detection");
p_sys->b_autodetect_utf8 = true;
}
free (psz_charset);
- p_sys->i_align = var_CreateGetInteger( p_dec, "subsdec-align" );
+ p_sys->i_align = var_InheritInteger( p_dec, "subsdec-align" );
if( p_dec->fmt_in.i_codec == VLC_CODEC_SSA
- && var_CreateGetBool( p_dec, "subsdec-formatted" ) )
+ && var_InheritBool( p_dec, "subsdec-formatted" ) )
{
if( p_dec->fmt_in.i_extra > 0 )
ParseSSAHeader( p_dec );
video_format_t fmt;
/* We cannot display a subpicture with no date */
- if( p_block->i_pts == 0 )
+ if( p_block->i_pts <= VLC_TS_INVALID )
{
msg_Warn( p_dec, "subtitle without a date" );
return NULL;
}
/* Should be resiliant against bad subtitles */
- psz_subtitle = strndup( (const char *)p_block->p_buffer,
- p_block->i_buffer );
+ psz_subtitle = malloc( p_block->i_buffer + 1 );
if( psz_subtitle == NULL )
return NULL;
+ memcpy( psz_subtitle, p_block->p_buffer, p_block->i_buffer );
+ psz_subtitle[p_block->i_buffer] = '\0';
if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
{
{
size_t inbytes_left = strlen( psz_subtitle );
size_t outbytes_left = 6 * inbytes_left;
- char *psz_new_subtitle = malloc( outbytes_left + 1 );
+ char *psz_new_subtitle = xmalloc( outbytes_left + 1 );
char *psz_convert_buffer_out = psz_new_subtitle;
const char *psz_convert_buffer_in = psz_subtitle;
- assert( psz_new_subtitle );
-
size_t ret = vlc_iconv( p_sys->iconv_handle,
&psz_convert_buffer_in, &inbytes_left,
&psz_convert_buffer_out, &outbytes_left );
}
/* Create the subpicture unit */
- p_spu = decoder_NewSubpicture( p_dec );
+ p_spu = decoder_NewSubpicture( p_dec, NULL );
if( !p_spu )
{
msg_Warn( p_dec, "can't get spu buffer" );
/* Create a new subpicture region */
memset( &fmt, 0, sizeof(video_format_t) );
fmt.i_chroma = VLC_CODEC_TEXT;
- fmt.i_aspect = 0;
fmt.i_width = fmt.i_height = 0;
fmt.i_x_offset = fmt.i_y_offset = 0;
p_spu->p_region = subpicture_region_New( &fmt );
/* Remove formatting from string */
p_spu->p_region->psz_text = StripTags( psz_subtitle );
- if( var_CreateGetBool( p_dec, "subsdec-formatted" ) )
+ if( var_InheritBool( p_dec, "subsdec-formatted" ) )
{
p_spu->p_region->psz_html = CreateHtmlSubtitle( &p_spu->p_region->i_align, psz_subtitle );
}
* to be carrying style information. Over time people have used them that way.
* In the absence of specifications from which to work, the tags supported
* have been restricted to the simple set permitted by the USF DTD, ie. :
- * Basic: <br>, <i>, <b>, <u>
+ * Basic: <br>, <i>, <b>, <u>, <s>
* Extended: <font>
* Attributes: face
* family
HtmlCopy( &psz_html, &psz_subtitle, "<u>" );
strcat( psz_tag, "u" );
}
+ else if( !strncasecmp( psz_subtitle, "<s>", 3 ) )
+ {
+ HtmlCopy( &psz_html, &psz_subtitle, "<s>" );
+ strcat( psz_tag, "s" );
+ }
else if( !strncasecmp( psz_subtitle, "<font ", 6 ))
{
const char *psz_attribs[] = { "face=", "family=", "size=",
if( psz_attribs[ k ] == NULL )
{
/* Jump over unrecognised tag */
- int i_len = strcspn( psz_subtitle, "\"" ) + 1;
-
- i_len += strcspn( psz_subtitle + i_len, "\"" ) + 1;
+ int i_len = strcspn( psz_subtitle, "\"" );
+ if( psz_subtitle[i_len] == '\"' )
+ {
+ i_len += 1 + strcspn( &psz_subtitle[i_len + 1], "\"" );
+ if( psz_subtitle[i_len] == '\"' )
+ i_len++;
+ }
psz_subtitle += i_len;
}
while (*psz_subtitle == ' ')
b_match = !strncasecmp( psz_subtitle, "</u>", 4 );
i_len = 4;
break;
+ case 's':
+ b_match = !strncasecmp( psz_subtitle, "</s>", 4 );
+ i_len = 4;
+ break;
case 'f':
b_match = !strncasecmp( psz_subtitle, "</font>", 7 );
i_len = 7;
/* Hide {\stupidity} */
psz_subtitle = strchr( psz_subtitle, '}' ) + 1;
}
+ else if( psz_subtitle[0] == '{' && psz_subtitle[1] == 'Y'
+ && psz_subtitle[2] == ':' && strchr( psz_subtitle, '}' ) )
+ {
+ /* Hide {Y:stupidity} */
+ psz_subtitle = strchr( psz_subtitle, '}' ) + 1;
+ }
+ else if( psz_subtitle[0] == '\\' && psz_subtitle[1] )
+ {
+ if( psz_subtitle[1] == 'N' || psz_subtitle[1] == 'n' )
+ {
+ HtmlPut( &psz_html, "<br/>" );
+ psz_subtitle += 2;
+ }
+ else if( psz_subtitle[1] == 'h' )
+ {
+ /* Non breakable space */
+ HtmlPut( &psz_html, NO_BREAKING_SPACE );
+ psz_subtitle += 2;
+ }
+ else
+ {
+ HtmlPut( &psz_html, "\\" );
+ psz_subtitle++;
+ }
+ }
else
{
*psz_html = *psz_subtitle;
case 'u':
HtmlPut( &psz_html, "</u>" );
break;
+ case 's':
+ HtmlPut( &psz_html, "</s>" );
+ break;
case 'f':
HtmlPut( &psz_html, "/font>" );
break;