/*****************************************************************************
* subsdec.c : text subtitles decoder
*****************************************************************************
- * Copyright (C) 2000-2001 the VideoLAN team
+ * Copyright (C) 2000-2006 the VideoLAN team
* $Id$
*
* Authors: Gildas Bazin <gbazin@videolan.org>
int i_original_width;
int i_align; /* Subtitles alignment on the vout */
vlc_iconv_t iconv_handle; /* handle to iconv instance */
+ vlc_bool_t b_autodetect_utf8;
ssa_style_t **pp_ssa_styles;
int i_ssa_styles;
/*****************************************************************************
* Module descriptor.
*****************************************************************************/
-static char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "",
+static const char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "",
"ISO-8859-1", "CP1252", "MacRoman", "MacIceland","ISO-8859-15", "",
"ISO-8859-2", "CP1250", "MacCentralEurope", "MacCroatian", "MacRomania", "",
"ISO-8859-5", "CP1251", "MacCyrillic", "MacUkraine", "KOI8-R", "KOI8-U", "KOI8-RU", "",
"HZ", "GBK", "GB18030", "JOHAB", "ARMSCII-8",
"Georgian-Academy", "Georgian-PS", "TIS-620", "MuleLao-1", "VISCII", "TCVN",
"HPROMAN8", "NEXTSTEP" };
+/*
+SSA supports charset selection.
+The following known charsets are used:
+
+0 = Ansi - Western European
+1 = default
+2 = symbol
+3 = invalid
+77 = Mac
+128 = Japanese (Shift JIS)
+129 = Hangul
+130 = Johab
+134 = GB2312 Simplified Chinese
+136 = Big5 Traditional Chinese
+161 = Greek
+162 = Turkish
+163 = Vietnamese
+177 = Hebrew
+178 = Arabic
+186 = Baltic
+204 = Russian (Cyrillic)
+222 = Thai
+238 = Eastern European
+254 = PC 437
+*/
static int pi_justification[] = { 0, 1, 2 };
-static char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")};
+static const char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")};
#define ENCODING_TEXT N_("Subtitles text encoding")
#define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles")
#define ALIGN_TEXT N_("Subtitles justification")
#define ALIGN_LONGTEXT N_("Set the justification of subtitles")
+#define AUTODETECT_UTF8_TEXT N_("UTF-8 subtitles autodetection")
+#define AUTODETECT_UTF8_LONGTEXT N_("This enables automatic detection of " \
+ "UTF-8 encoding within subtitles files.")
#define FORMAT_TEXT N_("Formatted Subtitles")
-#define FORMAT_LONGTEXT N_("Some subtitle formats allow for text formatting.\
- VLC partly implements this, but you can choose to disable all formatting.")
+#define FORMAT_LONGTEXT N_("Some subtitle formats allow for text formatting. " \
+ "VLC partly implements this, but you can choose to disable all formatting.")
vlc_module_begin();
add_string( "subsdec-encoding", DEFAULT_NAME, NULL,
ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE );
change_string_list( ppsz_encodings, 0, 0 );
+ add_bool( "subsdec-autodetect-utf8", VLC_TRUE, NULL,
+ AUTODETECT_UTF8_TEXT, AUTODETECT_UTF8_LONGTEXT, VLC_FALSE );
add_bool( "subsdec-formatted", VLC_TRUE, NULL, FORMAT_TEXT, FORMAT_LONGTEXT,
VLC_FALSE );
vlc_module_end();
(decoder_sys_t *)malloc(sizeof(decoder_sys_t)) ) == NULL )
{
msg_Err( p_dec, "out of memory" );
- return VLC_EGENERIC;
+ return VLC_ENOMEM;
}
-
+
/* init of p_sys */
p_sys->i_align = 0;
p_sys->iconv_handle = (vlc_iconv_t)-1;
+ p_sys->b_autodetect_utf8 = VLC_FALSE;
p_sys->b_ass = VLC_FALSE;
p_sys->i_original_height = -1;
p_sys->i_original_width = -1;
if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
{
- msg_Dbg( p_dec, "using character encoding: %s",
+ msg_Dbg( p_dec, "using demux suggested character encoding: %s",
p_dec->fmt_in.subs.psz_encoding );
if( strcmp( p_dec->fmt_in.subs.psz_encoding, "UTF-8" ) )
p_sys->iconv_handle = vlc_iconv_open( "UTF-8", p_dec->fmt_in.subs.psz_encoding );
var_Get( p_dec, "subsdec-encoding", &val );
if( !strcmp( val.psz_string, DEFAULT_NAME ) )
{
- char *psz_charset =(char*)malloc( 100 );
-#ifdef __APPLE__
- /* Most subtitles are not in UTF-8, which is the default on Mac OS X */
- sprintf( psz_charset, "ISO-8859-1" );
-#else
- vlc_current_charset( &psz_charset );
-#endif
- p_sys->iconv_handle = vlc_iconv_open( "UTF-8", psz_charset );
- msg_Dbg( p_dec, "using default character encoding: %s", psz_charset );
- free( psz_charset );
+ const char *psz_charset = GetFallbackEncoding();
+
+ p_sys->b_autodetect_utf8 = var_CreateGetBool( p_dec,
+ "subsdec-autodetect-utf8" );
+
+ p_sys->iconv_handle = vlc_iconv_open( "UTF-8", psz_charset );
+ msg_Dbg( p_dec, "using fallback character encoding: %s", psz_charset );
}
else if( !strcmp( val.psz_string, "UTF-8" ) )
{
- msg_Dbg( p_dec, "using character encoding: UTF-8" );
+ msg_Dbg( p_dec, "using enforced character encoding: UTF-8" );
}
else if( val.psz_string )
{
- msg_Dbg( p_dec, "using character encoding: %s", val.psz_string );
+ msg_Dbg( p_dec, "using enforced character encoding: %s", val.psz_string );
p_sys->iconv_handle = vlc_iconv_open( "UTF-8", val.psz_string );
if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
{
}
if( val.psz_string ) free( val.psz_string );
}
-
+
var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
var_Get( p_dec, "subsdec-align", &val );
p_sys->i_align = val.i_int;
{
vlc_iconv_close( p_sys->iconv_handle );
}
-
+
if( p_sys->pp_ssa_styles )
{
int i;
}
/* Check validity of packet data */
- if( p_block->i_buffer <= 1 || p_block->p_buffer[0] == '\0' )
+ /* An "empty" line containing only \0 can be used to force
+ and ephemer picture from the screen */
+ if( p_block->i_buffer < 1 )
{
- msg_Warn( p_dec, "empty subtitle" );
+ msg_Warn( p_dec, "no subtitle data" );
return NULL;
}
/* Should be resiliant against bad subtitles */
psz_subtitle = strndup( (const char *)p_block->p_buffer,
p_block->i_buffer );
+ if( psz_subtitle == NULL )
+ return NULL;
- if( p_sys->iconv_handle != (vlc_iconv_t)-1 )
+ if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
+ EnsureUTF8( psz_subtitle );
+ else
{
- char *psz_new_subtitle;
- char *psz_convert_buffer_out;
- char *psz_convert_buffer_in;
- size_t ret, inbytes_left, outbytes_left;
-
- psz_new_subtitle = malloc( 6 * strlen( psz_subtitle ) );
- psz_convert_buffer_out = psz_new_subtitle;
- psz_convert_buffer_in = psz_subtitle;
- inbytes_left = strlen( psz_subtitle );
- outbytes_left = 6 * inbytes_left;
- ret = vlc_iconv( p_sys->iconv_handle, &psz_convert_buffer_in,
- &inbytes_left, &psz_convert_buffer_out,
- &outbytes_left );
- *psz_convert_buffer_out = '\0';
- if( psz_subtitle ) free( psz_subtitle );
- psz_subtitle = NULL;
+ if( p_sys->b_autodetect_utf8 )
+ {
+ if( IsUTF8( psz_subtitle ) == NULL )
+ {
+ msg_Dbg( p_dec, "invalid UTF-8 sequence: "
+ "disabling UTF-8 subtitles autodetection" );
+ p_sys->b_autodetect_utf8 = VLC_FALSE;
+ }
+ }
- if( inbytes_left )
+ if( !p_sys->b_autodetect_utf8 )
{
- msg_Err( p_dec, _("Failed to convert subtitle encoding.\n"
- "Try manually setting a character-encoding "
- "before you open the file.") );
- return NULL;
+ size_t inbytes_left = strlen( psz_subtitle );
+ size_t outbytes_left = 6 * inbytes_left;
+ char *psz_new_subtitle = malloc( outbytes_left + 1 );
+ char *psz_convert_buffer_out = psz_new_subtitle;
+ const char *psz_convert_buffer_in = psz_subtitle;
+
+ size_t ret = vlc_iconv( p_sys->iconv_handle,
+ &psz_convert_buffer_in, &inbytes_left,
+ &psz_convert_buffer_out, &outbytes_left );
+
+ *psz_convert_buffer_out++ = '\0';
+ free( psz_subtitle );
+
+ if( ( ret == (size_t)(-1) ) || inbytes_left )
+ {
+ free( psz_new_subtitle );
+ msg_Err( p_dec, _("failed to convert subtitle encoding.\n"
+ "Try manually setting a character-encoding "
+ "before you open the file.") );
+ return NULL;
+ }
+
+ psz_subtitle = realloc( psz_new_subtitle,
+ psz_convert_buffer_out - psz_new_subtitle );
}
- psz_subtitle = psz_new_subtitle;
}
/* Create the subpicture unit */
return NULL;
}
+ p_spu->b_pausable = VLC_TRUE;
+
/* Create a new subpicture region */
memset( &fmt, 0, sizeof(video_format_t) );
fmt.i_chroma = VLC_FOURCC('T','E','X','T');
p_spu->i_flags = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
p_spu->i_x = p_sys->i_align ? 20 : 0;
p_spu->i_y = 10;
-
+
/* Remove formatting from string */
StripTags( psz_subtitle );
-
- p_spu->p_region->psz_text = psz_subtitle;
+
+ p_spu->p_region->psz_text = psz_subtitle;
p_spu->i_start = p_block->i_pts;
p_spu->i_stop = p_block->i_pts + p_block->i_length;
p_spu->b_ephemer = (p_block->i_length == 0);
static char* GotoNextLine( char *psz_text )
{
char *p_newline = psz_text;
-
+
while( p_newline[0] != '\0' )
{
if( p_newline[0] == '\n' || p_newline[0] == '\r' )
i_color = (int) strtol( psz_color+2, NULL, 16 );
}
else i_color = (int) strtol( psz_color, NULL, 0 );
-
+
*pi_color = 0;
*pi_color |= ( ( i_color & 0x000000FF ) << 16 ); /* Red */
*pi_color |= ( ( i_color & 0x0000FF00 ) ); /* Green */
*pi_color |= ( ( i_color & 0x00FF0000 ) >> 16 ); /* Blue */
-
+
if( pi_alpha != NULL )
*pi_alpha = ( i_color & 0xFF000000 ) >> 24;
}
char *psz_parser = NULL;
char *psz_header = malloc( p_dec->fmt_in.i_extra+1 );
int i_section_type = 1;
-
+
memcpy( psz_header, p_dec->fmt_in.p_extra, p_dec->fmt_in.i_extra );
psz_header[ p_dec->fmt_in.i_extra] = '\0';
-
+
/* Handle [Script Info] section */
psz_parser = strcasestr( psz_header, "[Script Info]" );
if( psz_parser == NULL ) goto eof;
{
int temp;
char buffer_text[MAX_LINE + 1];
-
+
if( psz_parser[0] == '!' || psz_parser[0] == ';' ) /* comment */;
else if( sscanf( psz_parser, "PlayResX: %d", &temp ) == 1 )
p_sys->i_original_width = ( temp > 0 ) ? temp : -1;
p_style->font_style.i_style_flags = 0;
if( i_bold ) p_style->font_style.i_style_flags |= STYLE_BOLD;
if( i_italic ) p_style->font_style.i_style_flags |= STYLE_ITALIC;
-
+
if( i_border == 1 ) p_style->font_style.i_style_flags |= (STYLE_ITALIC | STYLE_OUTLINE);
else if( i_border == 3 )
{
p_style->font_style.i_outline_width = ( i_border == 1 ) ? i_outline : 0;
p_style->font_style.i_spacing = i_spacing;
//p_style->font_style.f_angle = f_angle;
-
+
p_style->i_align = 0;
if( i_align == 0x1 || i_align == 0x4 || i_align == 0x1 ) p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
if( i_align == 0x3 || i_align == 0x6 || i_align == 0x9 ) p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
if( i_align == 0x1 || i_align == 0x2 || i_align == 0x3 ) p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
p_style->i_margin_h = ( p_style->i_align & SUBPICTURE_ALIGN_RIGHT ) ? i_margin_r : i_margin_l;
p_style->i_margin_v = i_margin_v;
-
+
/*TODO: Ignored: angle i_scale_x|y (fontscaling), i_encoding */
TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style );
}