# include "config.h"
#endif
-#include "subsdec.h"
+#include <vlc_common.h>
#include <vlc_plugin.h>
+#include <vlc_codec.h>
+#include <vlc_charset.h>
-/*****************************************************************************
- * Local prototypes
- *****************************************************************************/
-static int OpenDecoder ( vlc_object_t * );
-static void CloseDecoder ( vlc_object_t * );
-
-static subpicture_t *DecodeBlock ( decoder_t *, block_t ** );
-static subpicture_t *ParseText ( decoder_t *, block_t * );
-static char *StripTags ( char * );
-static char *CreateHtmlSubtitle( int *pi_align, char * );
-
+#include "substext.h"
/*****************************************************************************
* Module descriptor.
N_("Vietnamese (VISCII)"),
N_("Vietnamese (Windows-1258)"),
};
-/*
-SSA supports charset selection.
-The following known charsets are used:
-
-0 = Ansi - Western European
-1 = default
-2 = symbol
-3 = invalid
-77 = Mac
-128 = Japanese (Shift JIS)
-129 = Hangul
-130 = Johab
-134 = GB2312 Simplified Chinese
-136 = Big5 Traditional Chinese
-161 = Greek
-162 = Turkish
-163 = Vietnamese
-177 = Hebrew
-178 = Arabic
-186 = Baltic
-204 = Russian (Cyrillic)
-222 = Thai
-238 = Eastern European
-254 = PC 437
-*/
static const int pi_justification[] = { 0, 1, 2 };
static const char *const ppsz_justification_text[] = {
#define FORMAT_LONGTEXT N_("Some subtitle formats allow for text formatting. " \
"VLC partly implements this, but you can choose to disable all formatting.")
+static int OpenDecoder ( vlc_object_t * );
+static void CloseDecoder ( vlc_object_t * );
vlc_module_begin ()
set_shortname( N_("Subtitles"))
set_category( CAT_INPUT )
set_subcategory( SUBCAT_INPUT_SCODEC )
- add_integer( "subsdec-align", 0, NULL, ALIGN_TEXT, ALIGN_LONGTEXT,
+ add_integer( "subsdec-align", 0, ALIGN_TEXT, ALIGN_LONGTEXT,
false )
change_integer_list( pi_justification, ppsz_justification_text )
- add_string( "subsdec-encoding", "", NULL,
+ add_string( "subsdec-encoding", "",
ENCODING_TEXT, ENCODING_LONGTEXT, false )
change_string_list( ppsz_encodings, ppsz_encoding_names, 0 )
- add_bool( "subsdec-autodetect-utf8", true, NULL,
+ add_bool( "subsdec-autodetect-utf8", true,
AUTODETECT_UTF8_TEXT, AUTODETECT_UTF8_LONGTEXT, false )
- add_bool( "subsdec-formatted", true, NULL, FORMAT_TEXT, FORMAT_LONGTEXT,
+ add_bool( "subsdec-formatted", true, FORMAT_TEXT, FORMAT_LONGTEXT,
false )
vlc_module_end ()
+/*****************************************************************************
+ * Local prototypes
+ *****************************************************************************/
+#define NO_BREAKING_SPACE " "
+
+struct decoder_sys_t
+{
+ int i_align; /* Subtitles alignment on the vout */
+
+ vlc_iconv_t iconv_handle; /* handle to iconv instance */
+ bool b_autodetect_utf8;
+};
+
+
+static subpicture_t *DecodeBlock ( decoder_t *, block_t ** );
+static subpicture_t *ParseText ( decoder_t *, block_t * );
+static char *StripTags ( char * );
+static char *CreateHtmlSubtitle( int *pi_align, char * );
+
/*****************************************************************************
* OpenDecoder: probe the decoder and return score
*****************************************************************************
switch( p_dec->fmt_in.i_codec )
{
case VLC_CODEC_SUBT:
- case VLC_CODEC_SSA:
case VLC_CODEC_ITU_T140:
break;
default:
p_sys->i_align = 0;
p_sys->iconv_handle = (vlc_iconv_t)-1;
p_sys->b_autodetect_utf8 = false;
- p_sys->b_ass = false;
- p_sys->i_original_height = -1;
- p_sys->i_original_width = -1;
- TAB_INIT( p_sys->i_ssa_styles, p_sys->pp_ssa_styles );
- TAB_INIT( p_sys->i_images, p_sys->pp_images );
- char *psz_charset = NULL;
+ const char *encoding;
+ char *var = NULL;
/* First try demux-specified encoding */
if( p_dec->fmt_in.i_codec == VLC_CODEC_ITU_T140 )
- psz_charset = strdup( "UTF-8" ); /* IUT T.140 is always using UTF-8 */
+ encoding = "UTF-8"; /* IUT T.140 is always using UTF-8 */
else
if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
{
- psz_charset = strdup (p_dec->fmt_in.subs.psz_encoding);
+ encoding = p_dec->fmt_in.subs.psz_encoding;
msg_Dbg (p_dec, "trying demuxer-specified character encoding: %s",
- p_dec->fmt_in.subs.psz_encoding ?
- p_dec->fmt_in.subs.psz_encoding : "not specified");
+ encoding);
}
-
+ else
/* Second, try configured encoding */
- if (psz_charset == NULL)
+ if ((var = var_InheritString (p_dec, "subsdec-encoding")) != NULL)
{
- psz_charset = var_InheritString (p_dec, "subsdec-encoding");
- msg_Dbg (p_dec, "trying configured character encoding: %s",
- psz_charset ? psz_charset : "not specified");
- if (psz_charset != NULL && !strcmp (psz_charset, "system"))
+ msg_Dbg (p_dec, "trying configured character encoding: %s", var);
+ if (!strcmp (var, "system"))
{
- free (psz_charset);
- psz_charset = strdup ("");
+ free (var);
+ var = NULL;
+ encoding = "";
/* ^ iconv() treats "" as nl_langinfo(CODESET) */
}
+ else
+ encoding = var;
}
-
+ else
/* Third, try "local" encoding with optional UTF-8 autodetection */
- if (psz_charset == NULL)
{
/* xgettext:
The Windows ANSI code page most commonly used for this language.
This MUST be a valid iconv character set. If unsure, please refer
the VideoLAN translators mailing list. */
- const char *acp = vlc_pgettext("GetACP", "CP1252");
-
- psz_charset = strdup (acp);
- msg_Dbg (p_dec, "trying default character encoding: %s",
- psz_charset ? psz_charset : "not specified");
-
+ encoding = vlc_pgettext("GetACP", "CP1252");
+ msg_Dbg (p_dec, "trying default character encoding: %s", encoding);
if (var_InheritBool (p_dec, "subsdec-autodetect-utf8"))
{
msg_Dbg (p_dec, "using automatic UTF-8 detection");
}
}
- /* Forth, don't do character decoding, i.e. assume UTF-8 */
- if (psz_charset == NULL)
+ if (strcasecmp (encoding, "UTF-8") && strcasecmp (encoding, "utf8"))
{
- psz_charset = strdup ("UTF-8");
- msg_Dbg (p_dec, "using UTF-8 character encoding" );
- }
-
- if ((psz_charset != NULL)
- && strcasecmp (psz_charset, "UTF-8")
- && strcasecmp (psz_charset, "utf8"))
- {
- p_sys->iconv_handle = vlc_iconv_open ("UTF-8", psz_charset);
+ p_sys->iconv_handle = vlc_iconv_open ("UTF-8", encoding);
if (p_sys->iconv_handle == (vlc_iconv_t)(-1))
- msg_Err (p_dec, "cannot convert from %s: %m", psz_charset);
+ msg_Err (p_dec, "cannot convert from %s: %m", encoding);
}
- free (psz_charset);
+ free (var);
p_sys->i_align = var_InheritInteger( p_dec, "subsdec-align" );
- if( p_dec->fmt_in.i_codec == VLC_CODEC_SSA
- && var_InheritBool( p_dec, "subsdec-formatted" ) )
- {
- if( p_dec->fmt_in.i_extra > 0 )
- ParseSSAHeader( p_dec );
- }
-
return VLC_SUCCESS;
}
if( p_sys->iconv_handle != (vlc_iconv_t)-1 )
vlc_iconv_close( p_sys->iconv_handle );
- if( p_sys->pp_ssa_styles )
- {
- int i;
- for( i = 0; i < p_sys->i_ssa_styles; i++ )
- {
- if( !p_sys->pp_ssa_styles[i] )
- continue;
-
- free( p_sys->pp_ssa_styles[i]->psz_stylename );
- free( p_sys->pp_ssa_styles[i]->font_style.psz_fontname );
- free( p_sys->pp_ssa_styles[i] );
- }
- TAB_CLEAN( p_sys->i_ssa_styles, p_sys->pp_ssa_styles );
- }
- if( p_sys->pp_images )
- {
- int i;
- for( i = 0; i < p_sys->i_images; i++ )
- {
- if( !p_sys->pp_images[i] )
- continue;
-
- if( p_sys->pp_images[i]->p_pic )
- picture_Release( p_sys->pp_images[i]->p_pic );
- free( p_sys->pp_images[i]->psz_filename );
-
- free( p_sys->pp_images[i] );
- }
- TAB_CLEAN( p_sys->i_images, p_sys->pp_images );
- }
-
free( p_sys );
}
decoder_sys_t *p_sys = p_dec->p_sys;
subpicture_t *p_spu = NULL;
char *psz_subtitle = NULL;
- video_format_t fmt;
/* We cannot display a subpicture with no date */
if( p_block->i_pts <= VLC_TS_INVALID )
}
/* Create the subpicture unit */
- p_spu = decoder_NewSubpicture( p_dec, NULL );
+ p_spu = decoder_NewSubpictureText( p_dec );
if( !p_spu )
{
- msg_Warn( p_dec, "can't get spu buffer" );
free( psz_subtitle );
return NULL;
}
+ p_spu->i_start = p_block->i_pts;
+ p_spu->i_stop = p_block->i_pts + p_block->i_length;
+ p_spu->b_ephemer = (p_block->i_length == 0);
+ p_spu->b_absolute = false;
- /* Create a new subpicture region */
- memset( &fmt, 0, sizeof(video_format_t) );
- fmt.i_chroma = VLC_CODEC_TEXT;
- fmt.i_width = fmt.i_height = 0;
- fmt.i_x_offset = fmt.i_y_offset = 0;
- p_spu->p_region = subpicture_region_New( &fmt );
- if( !p_spu->p_region )
- {
- msg_Err( p_dec, "cannot allocate SPU region" );
- free( psz_subtitle );
- decoder_DeleteSubpicture( p_dec, p_spu );
- return NULL;
- }
+ subpicture_updater_sys_t *p_spu_sys = p_spu->updater.p_sys;
- /* Decode and format the subpicture unit */
- if( p_dec->fmt_in.i_codec != VLC_CODEC_SSA )
- {
- /* Normal text subs, easy markup */
- p_spu->p_region->i_align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
- p_spu->p_region->i_x = p_sys->i_align ? 20 : 0;
- p_spu->p_region->i_y = 10;
+ p_spu_sys->align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
+ p_spu_sys->text = StripTags( psz_subtitle );
+ if( var_InheritBool( p_dec, "subsdec-formatted" ) )
+ p_spu_sys->html = CreateHtmlSubtitle( &p_spu_sys->align, psz_subtitle );
- /* Remove formatting from string */
-
- p_spu->p_region->psz_text = StripTags( psz_subtitle );
- if( var_InheritBool( p_dec, "subsdec-formatted" ) )
- {
- p_spu->p_region->psz_html = CreateHtmlSubtitle( &p_spu->p_region->i_align, psz_subtitle );
- }
-
- p_spu->i_start = p_block->i_pts;
- p_spu->i_stop = p_block->i_pts + p_block->i_length;
- p_spu->b_ephemer = (p_block->i_length == 0);
- p_spu->b_absolute = false;
- }
- else
- {
- /* Decode SSA/USF strings */
- ParseSSAString( p_dec, psz_subtitle, p_spu );
-
- p_spu->i_start = p_block->i_pts;
- p_spu->i_stop = p_block->i_pts + p_block->i_length;
- p_spu->b_ephemer = (p_block->i_length == 0);
- p_spu->b_absolute = false;
- p_spu->i_original_picture_width = p_sys->i_original_width;
- p_spu->i_original_picture_height = p_sys->i_original_height;
- }
free( psz_subtitle );
return p_spu;
}
-char* GotoNextLine( char *psz_text )
-{
- char *p_newline = psz_text;
-
- while( p_newline[0] != '\0' )
- {
- if( p_newline[0] == '\n' || p_newline[0] == '\r' )
- {
- p_newline++;
- while( p_newline[0] == '\n' || p_newline[0] == '\r' )
- p_newline++;
- break;
- }
- else p_newline++;
- }
- return p_newline;
-}
-
/* Function now handles tags with attribute values, and tries
* to deal with &' commands too. It no longer modifies the string
* in place, so that the original text can be reused
*psz_text++ = *psz_subtitle;
}
+ /* Security fix: Account for the case where input ends early */
+ if( *psz_subtitle == '\0' ) break;
+
psz_subtitle++;
}
*psz_text = '\0';
psz_subtitle++;
i_len = strcspn( psz_subtitle, "\"" );
}
+ else if( *psz_subtitle == '\'' )
+ {
+ psz_subtitle++;
+ i_len = strcspn( psz_subtitle, "'" );
+ }
else
{
i_len = strcspn( psz_subtitle, " \t>" );
}
HtmlPut( &psz_html, "\"" );
- if( !strcmp( psz_attribs[ k ], "color=" ) && *psz_subtitle >= '0' && *psz_subtitle <= '9' )
- HtmlPut( &psz_html, "#" );
HtmlNPut( &psz_html, psz_subtitle, i_len );
HtmlPut( &psz_html, "\"" );
psz_subtitle += i_len;
- if( *psz_subtitle == '\"' )
+ if( *psz_subtitle == '\"' || *psz_subtitle == '\'' )
psz_subtitle++;
break;
}
/* Hide {\stupidity} */
psz_subtitle = strchr( psz_subtitle, '}' ) + 1;
}
- else if( psz_subtitle[0] == '{' && psz_subtitle[1] == 'Y'
+ else if( psz_subtitle[0] == '{' &&
+ ( psz_subtitle[1] == 'Y' || psz_subtitle[1] == 'y' )
&& psz_subtitle[2] == ':' && strchr( psz_subtitle, '}' ) )
{
- /* Hide {Y:stupidity} */
+ // FIXME: We don't do difference between Y and y, and we should.
+ if( psz_subtitle[3] == 'i' )
+ {
+ HtmlPut( &psz_html, "<i>" );
+ strcat( psz_tag, "i" );
+ }
+ if( psz_subtitle[3] == 'b' )
+ {
+ HtmlPut( &psz_html, "<b>" );
+ strcat( psz_tag, "b" );
+ }
+ if( psz_subtitle[3] == 'u' )
+ {
+ HtmlPut( &psz_html, "<u>" );
+ strcat( psz_tag, "u" );
+ }
+ psz_subtitle = strchr( psz_subtitle, '}' ) + 1;
+ }
+ else if( psz_subtitle[0] == '{' && psz_subtitle[2] == ':' && strchr( psz_subtitle, '}' ) )
+ {
+ // Hide other {x:y} atrocities, like {c:$bbggrr} or {P:x}
psz_subtitle = strchr( psz_subtitle, '}' ) + 1;
}
else if( psz_subtitle[0] == '\\' && psz_subtitle[1] )