]> git.sesse.net Git - vlc/blobdiff - modules/codec/subsdec.c
Also control plugin export table
[vlc] / modules / codec / subsdec.c
index 343c65d2b318e482f8695942eb89ab731968e79f..ee5ef448082e4c8b516dc39ea05996c45822140c 100644 (file)
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * subsdec.c : text subtitles decoder
  *****************************************************************************
- * Copyright (C) 2000-2001 the VideoLAN team
+ * Copyright (C) 2000-2006 the VideoLAN team
  * $Id$
  *
  * Authors: Gildas Bazin <gbazin@videolan.org>
@@ -53,6 +53,7 @@ struct decoder_sys_t
     int                 i_original_width;
     int                 i_align;          /* Subtitles alignment on the vout */
     vlc_iconv_t         iconv_handle;            /* handle to iconv instance */
+    vlc_bool_t          b_autodetect_utf8;
 
     ssa_style_t         **pp_ssa_styles;
     int                 i_ssa_styles;
@@ -98,6 +99,31 @@ static char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "",
     "HZ", "GBK", "GB18030", "JOHAB", "ARMSCII-8",
     "Georgian-Academy", "Georgian-PS", "TIS-620", "MuleLao-1", "VISCII", "TCVN",
     "HPROMAN8", "NEXTSTEP" };
+/*
+SSA supports charset selection.
+The following known charsets are used:
+
+0 = Ansi - Western European
+1 = default
+2 = symbol
+3 = invalid
+77 = Mac
+128 = Japanese (Shift JIS)
+129 = Hangul
+130 = Johab
+134 = GB2312 Simplified Chinese
+136 = Big5 Traditional Chinese
+161 = Greek
+162 = Turkish
+163 = Vietnamese
+177 = Hebrew
+178 = Arabic
+186 = Baltic
+204 = Russian (Cyrillic)
+222 = Thai
+238 = Eastern European
+254 = PC 437
+*/
 
 static int  pi_justification[] = { 0, 1, 2 };
 static char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")};
@@ -106,9 +132,12 @@ static char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")};
 #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles")
 #define ALIGN_TEXT N_("Subtitles justification")
 #define ALIGN_LONGTEXT N_("Set the justification of subtitles")
+#define AUTODETECT_UTF8_TEXT N_("UTF-8 subtitles autodetection")
+#define AUTODETECT_UTF8_LONGTEXT N_("This enables automatic detection of " \
+            "UTF-8 encoding within subtitles files.")
 #define FORMAT_TEXT N_("Formatted Subtitles")
-#define FORMAT_LONGTEXT N_("Some subtitle formats allow for text formatting.\
            VLC partly implements this, but you can choose to disable all formatting.")
+#define FORMAT_LONGTEXT N_("Some subtitle formats allow for text formatting. " \
"VLC partly implements this, but you can choose to disable all formatting.")
 
 
 vlc_module_begin();
@@ -125,6 +154,8 @@ vlc_module_begin();
     add_string( "subsdec-encoding", DEFAULT_NAME, NULL,
                 ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE );
         change_string_list( ppsz_encodings, 0, 0 );
+    add_bool( "subsdec-autodetect-utf8", VLC_TRUE, NULL,
+              AUTODETECT_UTF8_TEXT, AUTODETECT_UTF8_LONGTEXT, VLC_FALSE );
     add_bool( "subsdec-formatted", VLC_TRUE, NULL, FORMAT_TEXT, FORMAT_LONGTEXT,
                  VLC_FALSE );
 vlc_module_end();
@@ -154,12 +185,13 @@ static int OpenDecoder( vlc_object_t *p_this )
           (decoder_sys_t *)malloc(sizeof(decoder_sys_t)) ) == NULL )
     {
         msg_Err( p_dec, "out of memory" );
-        return VLC_EGENERIC;
+        return VLC_ENOMEM;
     }
-    
+
     /* init of p_sys */
     p_sys->i_align = 0;
     p_sys->iconv_handle = (vlc_iconv_t)-1;
+    p_sys->b_autodetect_utf8 = VLC_FALSE;
     p_sys->b_ass = VLC_FALSE;
     p_sys->i_original_height = -1;
     p_sys->i_original_width = -1;
@@ -168,7 +200,7 @@ static int OpenDecoder( vlc_object_t *p_this )
 
     if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
     {
-        msg_Dbg( p_dec, "using character encoding: %s",
+        msg_Dbg( p_dec, "using demux suggested character encoding: %s",
                  p_dec->fmt_in.subs.psz_encoding );
         if( strcmp( p_dec->fmt_in.subs.psz_encoding, "UTF-8" ) )
             p_sys->iconv_handle = vlc_iconv_open( "UTF-8", p_dec->fmt_in.subs.psz_encoding );
@@ -180,24 +212,21 @@ static int OpenDecoder( vlc_object_t *p_this )
         var_Get( p_dec, "subsdec-encoding", &val );
         if( !strcmp( val.psz_string, DEFAULT_NAME ) )
         {
-            char *psz_charset =(char*)malloc( 100 );  
-#ifdef __APPLE__
-            /* Most subtitles are not in UTF-8, which is the default on Mac OS X */
-            sprintf( psz_charset, "ISO-8859-1" );
-#else
-            vlc_current_charset( &psz_charset );
-#endif
-            p_sys->iconv_handle = vlc_iconv_open( "UTF-8", psz_charset );  
-            msg_Dbg( p_dec, "using default character encoding: %s", psz_charset );  
-            free( psz_charset );
+            const char *psz_charset = GetFallbackEncoding();
+
+            p_sys->b_autodetect_utf8 = var_CreateGetBool( p_dec,
+                    "subsdec-autodetect-utf8" );
+
+            p_sys->iconv_handle = vlc_iconv_open( "UTF-8", psz_charset );
+            msg_Dbg( p_dec, "using fallback character encoding: %s", psz_charset );
         }
         else if( !strcmp( val.psz_string, "UTF-8" ) )
         {
-            msg_Dbg( p_dec, "using character encoding: UTF-8" );
+            msg_Dbg( p_dec, "using enforced character encoding: UTF-8" );
         }
         else if( val.psz_string )
         {
-            msg_Dbg( p_dec, "using character encoding: %s", val.psz_string );
+            msg_Dbg( p_dec, "using enforced character encoding: %s", val.psz_string );
             p_sys->iconv_handle = vlc_iconv_open( "UTF-8", val.psz_string );
             if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
             {
@@ -206,7 +235,7 @@ static int OpenDecoder( vlc_object_t *p_this )
         }
         if( val.psz_string ) free( val.psz_string );
     }
-    
+
     var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
     var_Get( p_dec, "subsdec-align", &val );
     p_sys->i_align = val.i_int;
@@ -251,7 +280,7 @@ static void CloseDecoder( vlc_object_t *p_this )
     {
         vlc_iconv_close( p_sys->iconv_handle );
     }
-    
+
     if( p_sys->pp_ssa_styles )
     {
         int i;
@@ -296,35 +325,51 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
     /* Should be resiliant against bad subtitles */
     psz_subtitle = strndup( (const char *)p_block->p_buffer,
                             p_block->i_buffer );
+    if( psz_subtitle == NULL )
+        return NULL;
 
-    if( p_sys->iconv_handle != (vlc_iconv_t)-1 )
+    if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
+        EnsureUTF8( psz_subtitle );
+    else
     {
-        char *psz_new_subtitle;
-        char *psz_convert_buffer_out;
-        char *psz_convert_buffer_in;
-        size_t ret, inbytes_left, outbytes_left;
-
-        psz_new_subtitle = malloc( 6 * strlen( psz_subtitle ) );
-        psz_convert_buffer_out = psz_new_subtitle;
-        psz_convert_buffer_in = psz_subtitle;
-        inbytes_left = strlen( psz_subtitle );
-        outbytes_left = 6 * inbytes_left;
-        ret = vlc_iconv( p_sys->iconv_handle, &psz_convert_buffer_in,
-                         &inbytes_left, &psz_convert_buffer_out,
-                         &outbytes_left );
-        *psz_convert_buffer_out = '\0';
 
-        if( psz_subtitle ) free( psz_subtitle );
-        psz_subtitle = NULL;
+        if( p_sys->b_autodetect_utf8 )
+        {
+            if( IsUTF8( psz_subtitle ) == NULL )
+            {
+                msg_Dbg( p_dec, "invalid UTF-8 sequence: "
+                         "disabling UTF-8 subtitles autodetection" );
+                p_sys->b_autodetect_utf8 = VLC_FALSE;
+            }
+        }
 
-        if( inbytes_left )
+        if( !p_sys->b_autodetect_utf8 )
         {
-            msg_Err( p_dec, _("Failed to convert subtitle encoding.\n"
-                      "Try manually setting a character-encoding "
-                      "before you open the file.") );
-            return NULL;
+            size_t inbytes_left = strlen( psz_subtitle );
+            size_t outbytes_left = 6 * inbytes_left;
+            char *psz_new_subtitle = malloc( outbytes_left + 1 );
+            char *psz_convert_buffer_out = psz_new_subtitle;
+            const char *psz_convert_buffer_in = psz_subtitle;
+
+            size_t ret = vlc_iconv( p_sys->iconv_handle,
+                                    &psz_convert_buffer_in, &inbytes_left,
+                                    &psz_convert_buffer_out, &outbytes_left );
+
+            *psz_convert_buffer_out++ = '\0';
+            free( psz_subtitle );
+
+            if( ( ret == (size_t)(-1) ) || inbytes_left )
+            {
+                free( psz_new_subtitle );
+                msg_Err( p_dec, _("failed to convert subtitle encoding.\n"
+                        "Try manually setting a character-encoding "
+                                "before you open the file.") );
+                return NULL;
+            }
+
+            psz_subtitle = realloc( psz_new_subtitle,
+                                    psz_convert_buffer_out - psz_new_subtitle );
         }
-        psz_subtitle = psz_new_subtitle;
     }
 
     /* Create the subpicture unit */
@@ -336,6 +381,8 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
         return NULL;
     }
 
+    p_spu->b_pausable = VLC_TRUE;
+
     /* Create a new subpicture region */
     memset( &fmt, 0, sizeof(video_format_t) );
     fmt.i_chroma = VLC_FOURCC('T','E','X','T');
@@ -358,11 +405,11 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
         p_spu->i_flags = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
         p_spu->i_x = p_sys->i_align ? 20 : 0;
         p_spu->i_y = 10;
-        
+
         /* Remove formatting from string */
         StripTags( psz_subtitle );
-        
-        p_spu->p_region->psz_text = psz_subtitle;        
+
+        p_spu->p_region->psz_text = psz_subtitle;
         p_spu->i_start = p_block->i_pts;
         p_spu->i_stop = p_block->i_pts + p_block->i_length;
         p_spu->b_ephemer = (p_block->i_length == 0);
@@ -497,7 +544,7 @@ static void ParseSSAString( decoder_t *p_dec, char *psz_subtitle, subpicture_t *
 static char* GotoNextLine( char *psz_text )
 {
     char *p_newline = psz_text;
-    
+
     while( p_newline[0] != '\0' )
     {
         if( p_newline[0] == '\n' || p_newline[0] == '\r' )
@@ -525,12 +572,12 @@ static void ParseColor( decoder_t *p_dec, char *psz_color, int *pi_color, int *p
         i_color = (int) strtol( psz_color+2, NULL, 16 );
     }
     else i_color = (int) strtol( psz_color, NULL, 0 );
-    
+
     *pi_color = 0;
     *pi_color |= ( ( i_color & 0x000000FF ) << 16 ); /* Red */
     *pi_color |= ( ( i_color & 0x0000FF00 ) );       /* Green */
     *pi_color |= ( ( i_color & 0x00FF0000 ) >> 16 ); /* Blue */
-    
+
     if( pi_alpha != NULL )
         *pi_alpha = ( i_color & 0xFF000000 ) >> 24;
 }
@@ -544,10 +591,10 @@ static void ParseSSAHeader( decoder_t *p_dec )
     char *psz_parser = NULL;
     char *psz_header = malloc( p_dec->fmt_in.i_extra+1 );
     int i_section_type = 1;
-    
+
     memcpy( psz_header, p_dec->fmt_in.p_extra, p_dec->fmt_in.i_extra );
     psz_header[ p_dec->fmt_in.i_extra] = '\0';
-    
+
     /* Handle [Script Info] section */
     psz_parser = strcasestr( psz_header, "[Script Info]" );
     if( psz_parser == NULL ) goto eof;
@@ -558,7 +605,7 @@ static void ParseSSAHeader( decoder_t *p_dec )
     {
         int temp;
         char buffer_text[MAX_LINE + 1];
-        
+
         if( psz_parser[0] == '!' || psz_parser[0] == ';' ) /* comment */;
         else if( sscanf( psz_parser, "PlayResX: %d", &temp ) == 1 )
             p_sys->i_original_width = ( temp > 0 ) ? temp : -1;
@@ -609,7 +656,7 @@ static void ParseSSAHeader( decoder_t *p_dec )
                     p_style->font_style.i_style_flags = 0;
                     if( i_bold ) p_style->font_style.i_style_flags |= STYLE_BOLD;
                     if( i_italic ) p_style->font_style.i_style_flags |= STYLE_ITALIC;
-    
+
                     if( i_border == 1 ) p_style->font_style.i_style_flags |= (STYLE_ITALIC | STYLE_OUTLINE);
                     else if( i_border == 3 )
                     {
@@ -671,7 +718,7 @@ static void ParseSSAHeader( decoder_t *p_dec )
                     p_style->font_style.i_outline_width = ( i_border == 1 ) ? i_outline : 0;
                     p_style->font_style.i_spacing = i_spacing;
                     //p_style->font_style.f_angle = f_angle;
-                    
+
                     p_style->i_align = 0;
                     if( i_align == 0x1 || i_align == 0x4 || i_align == 0x1 ) p_style->i_align |= SUBPICTURE_ALIGN_LEFT;
                     if( i_align == 0x3 || i_align == 0x6 || i_align == 0x9 ) p_style->i_align |= SUBPICTURE_ALIGN_RIGHT;
@@ -679,7 +726,7 @@ static void ParseSSAHeader( decoder_t *p_dec )
                     if( i_align == 0x1 || i_align == 0x2 || i_align == 0x3 ) p_style->i_align |= SUBPICTURE_ALIGN_BOTTOM;
                     p_style->i_margin_h = ( p_style->i_align & SUBPICTURE_ALIGN_RIGHT ) ? i_margin_r : i_margin_l;
                     p_style->i_margin_v = i_margin_v;
-                    
+
                     /*TODO: Ignored: angle i_scale_x|y (fontscaling), i_encoding */
                     TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style );
                 }