X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=modules%2Fdemux%2Fsubtitle.c;h=f7b9ae3a488446db350851f491a2c4e90fa2ce95;hb=688f0a8d8913f20ac5d3e27bf4f2bbe35c9f5de7;hp=8d481102e1d8b5082900e7f86d29f16211496ba9;hpb=70b2027658b66231dc15e37e50693f1135cae614;p=vlc diff --git a/modules/demux/subtitle.c b/modules/demux/subtitle.c index 8d481102e1..f7b9ae3a48 100644 --- a/modules/demux/subtitle.c +++ b/modules/demux/subtitle.c @@ -31,7 +31,7 @@ # include "config.h" #endif -#include +#include #include #include @@ -57,20 +57,22 @@ static void Close( vlc_object_t *p_this ); "This will only work with MicroDVD and SubRIP (SRT) subtitles.") #define SUB_TYPE_LONGTEXT \ N_("Force the subtiles format. Valid values are : \"microdvd\", " \ - "\"subrip\", \"ssa1\", \"ssa2-4\", \"ass\", \"vplayer\" " \ - "\"sami\", \"dvdsubtitle\", \"mpl2\", \"aqt\", \"pjs\" "\ - "\"mpsub\" \"jacosub\" \"psb\" and \"auto\" (meaning autodetection, this " \ - "should always work).") -static const char *ppsz_sub_type[] = + "\"subrip\", \"subviewer\", \"ssa1\", \"ssa2-4\", \"ass\", \"vplayer\", " \ + "\"sami\", \"dvdsubtitle\", \"mpl2\", \"aqt\", \"pjs\", "\ + "\"mpsub\", \"jacosub\", \"psb\", \"realtext\", \"dks\", \"subviewer1\", " \ + " and \"auto\" (meaning autodetection, this should always work).") + +static const char *const ppsz_sub_type[] = { "auto", "microdvd", "subrip", "subviewer", "ssa1", "ssa2-4", "ass", "vplayer", "sami", "dvdsubtitle", "mpl2", - "aqt", "pjs", "mpsub", "jacosub", "psb" + "aqt", "pjs", "mpsub", "jacosub", "psb", "realtext", "dks", + "subviewer1" }; vlc_module_begin(); - set_shortname( _("Subtitles")); - set_description( _("Text subtitles parser") ); + set_shortname( N_("Subtitles")); + set_description( N_("Text subtitles parser") ); set_capability( "demux", 0 ); set_category( CAT_INPUT ); set_subcategory( SUBCAT_INPUT_DEMUX ); @@ -101,15 +103,18 @@ enum SUB_TYPE_ASS, SUB_TYPE_VPLAYER, SUB_TYPE_SAMI, - SUB_TYPE_SUBVIEWER, //SUBVIEWER 2! - SUB_TYPE_DVDSUBTITLE, + SUB_TYPE_SUBVIEWER, /* SUBVIEWER 2 */ + SUB_TYPE_DVDSUBTITLE, /* Mplayer calls it subviewer2 */ SUB_TYPE_MPL2, SUB_TYPE_AQT, SUB_TYPE_PJS, SUB_TYPE_MPSUB, SUB_TYPE_JACOSUB, SUB_TYPE_PSB, - SUB_TYPE_RT + SUB_TYPE_RT, + SUB_TYPE_DKS, + SUB_TYPE_SUBVIEW1 /* SUBVIEWER 1 - mplayer calls it subrip09, + and Gnome subtitles SubViewer 1.0 */ }; typedef struct @@ -162,6 +167,8 @@ static int ParseMPSub ( demux_t *, subtitle_t *, int ); static int ParseJSS ( demux_t *, subtitle_t *, int ); static int ParsePSB ( demux_t *, subtitle_t *, int ); static int ParseRealText ( demux_t *, subtitle_t *, int ); +static int ParseDKS ( demux_t *, subtitle_t *, int ); +static int ParseSubViewer1 ( demux_t *, subtitle_t *, int ); static struct { @@ -187,15 +194,11 @@ static struct { "jacosub", SUB_TYPE_JACOSUB, "JacoSub", ParseJSS }, { "psb", SUB_TYPE_PSB, "PowerDivx", ParsePSB }, { "realtext", SUB_TYPE_RT, "RealText", ParseRealText }, + { "dks", SUB_TYPE_DKS, "DKS", ParseDKS }, + { "subviewer1", SUB_TYPE_SUBVIEW1, "Subviewer 1", ParseSubViewer1 }, { NULL, SUB_TYPE_UNKNOWN, "Unknown", NULL } }; -/* Missing Detect - SubViewer 1 - Subrip09 - */ - - static int Demux( demux_t * ); static int Control( demux_t *, int, va_list ); @@ -233,7 +236,7 @@ static int Open ( vlc_object_t *p_this ) p_sys->i_microsecperframe = 40000; /* Get the FPS */ - f_fps = var_CreateGetFloat( p_demux, "sub-original-fps" ); + f_fps = var_CreateGetFloat( p_demux, "sub-original-fps" ); /* FIXME */ if( f_fps >= 1.0 ) p_sys->i_microsecperframe = (int64_t)( (float)1000000 / f_fps ); @@ -250,7 +253,7 @@ static int Open ( vlc_object_t *p_this ) /* Get or probe the type */ p_sys->i_type = SUB_TYPE_UNKNOWN; psz_type = var_CreateGetString( p_demux, "sub-type" ); - if( *psz_type ) + if( psz_type && *psz_type ) { int i; @@ -333,10 +336,13 @@ static int Open ( vlc_object_t *p_this ) p_sys->i_type = SUB_TYPE_SUBVIEWER; /* I hope this will work */ break; } - else if( sscanf( s, "%d:%d:%d.%d %d:%d:%d", &i_dummy, &i_dummy, &i_dummy, &i_dummy, &i_dummy, &i_dummy, &i_dummy ) == 7 || - sscanf( s, "@%d @%d", &i_dummy, &i_dummy) == 2) + else if( sscanf( s, "%d:%d:%d.%d %d:%d:%d", + &i_dummy, &i_dummy, &i_dummy, &i_dummy, + &i_dummy, &i_dummy, &i_dummy ) == 7 || + sscanf( s, "@%d @%d", &i_dummy, &i_dummy) == 2) { p_sys->i_type = SUB_TYPE_JACOSUB; + break; } else if( sscanf( s, "%d:%d:%d:", &i_dummy, &i_dummy, &i_dummy ) == 3 || sscanf( s, "%d:%d:%d ", &i_dummy, &i_dummy, &i_dummy ) == 3 ) @@ -350,6 +356,17 @@ static int Open ( vlc_object_t *p_this ) p_sys->i_type = SUB_TYPE_DVDSUBTITLE; break; } + else if( sscanf( s, "[%d:%d:%d]%c", + &i_dummy, &i_dummy, &i_dummy, &p_dummy ) == 4 ) + { + p_sys->i_type = SUB_TYPE_DKS; + break; + } + else if( strstr( s, "*** START SCRIPT" ) ) + { + p_sys->i_type = SUB_TYPE_SUBVIEW1; + break; + } else if( sscanf( s, "[%d][%d]", &i_dummy, &i_dummy ) == 2 || sscanf( s, "[%d][]", &i_dummy ) == 1) { @@ -361,23 +378,28 @@ static int Open ( vlc_object_t *p_this ) && p_dummy =='E' ) ) { p_sys->i_type = SUB_TYPE_MPSUB; + break; } else if( sscanf( s, "-->> %d", &i_dummy) == 1 ) { p_sys->i_type = SUB_TYPE_AQT; + break; } else if( sscanf( s, "%d,%d,", &i_dummy, &i_dummy ) == 2 ) { p_sys->i_type = SUB_TYPE_PJS; + break; } else if( sscanf( s, "{%d:%d:%d}", &i_dummy, &i_dummy, &i_dummy ) == 3 ) { p_sys->i_type = SUB_TYPE_PSB; + break; } else if( strcasestr( s, "i_type = SUB_TYPE_RT; + break; } free( s ); @@ -427,7 +449,6 @@ static int Open ( vlc_object_t *p_this ) if( !( p_sys->subtitle = realloc( p_sys->subtitle, sizeof(subtitle_t) * i_max ) ) ) { - msg_Err( p_demux, "out of memory"); free( p_sys->subtitle ); TextUnload( &p_sys->txt ); free( p_sys ); @@ -893,7 +914,7 @@ static int ParseSubRipSubViewer( demux_t *p_demux, subtitle_t *p_subtitle, if( b_replace_br ) { char *p; - + while( ( p = strstr( psz_text, "[br]" ) ) ) { *p++ = '\n'; @@ -1333,7 +1354,7 @@ static int ParseAQT( demux_t *p_demux, subtitle_t *p_subtitle, int i_idx ) /* We have been too far: end of the subtitle, begin of next */ else { - txt->i_line--; + TextPreviousLine( txt ); break; } } @@ -1448,8 +1469,10 @@ static int ParseMPSub( demux_t *p_demux, subtitle_t *p_subtitle, int i_idx ) free( psz_temp ); } /* Data Lines */ - if( sscanf (s, "%f %f", &f1, &f2 ) == 2 ) + f1 = us_strtod( s, &psz_temp ); + if( *psz_temp ) { + f2 = us_strtod( psz_temp, NULL ); mpsub_total += f1 * mpsub_factor; p_subtitle->i_start = (int64_t)(10000.0 * mpsub_total); mpsub_total += f2 * mpsub_factor; @@ -1505,10 +1528,10 @@ static int ParseJSS( demux_t *p_demux, subtitle_t *p_subtitle, int i_idx ) if( !s ) return VLC_EGENERIC; - psz_text = malloc( strlen( s ) + 1 ); - if( !psz_text ) + psz_orig = malloc( strlen( s ) + 1 ); + if( !psz_orig ) return VLC_ENOMEM; - psz_orig = psz_text; + psz_text = psz_orig; /* Complete time lines */ if( sscanf( s, "%d:%d:%d.%d %d:%d:%d.%d %[^\n\r]", @@ -1520,6 +1543,7 @@ static int ParseJSS( demux_t *p_demux, subtitle_t *p_subtitle, int i_idx ) p_subtitle->i_stop = ( (int64_t)( h2 *3600 + m2 * 60 + s2 ) + (int64_t)( ( f2 + jss_time_shift ) / jss_time_resolution ) ) * 1000000; + break; } /* Short time lines */ else if( sscanf( s, "@%d @%d %[^\n\r]", &f1, &f2, psz_text ) == 3 ) @@ -1528,6 +1552,7 @@ static int ParseJSS( demux_t *p_demux, subtitle_t *p_subtitle, int i_idx ) ( f1 + jss_time_shift ) / jss_time_resolution * 1000000.0 ); p_subtitle->i_stop = (int64_t)( ( f2 + jss_time_shift ) / jss_time_resolution * 1000000.0 ); + break; } /* General Directive lines */ /* Only TIME and SHIFT are supported so far */ @@ -1584,123 +1609,136 @@ static int ParseJSS( demux_t *p_demux, subtitle_t *p_subtitle, int i_idx ) sscanf( &psz_text[shift], "%d", &jss_time_resolution ); break; } - free( psz_text ); + free( psz_orig ); continue; } else /* Unkown type line, probably a comment */ { - free( psz_text ); + free( psz_orig ); continue; } + } + + while( psz_text[ strlen( psz_text ) - 1 ] == '\\' ) + { + const char *s2 = TextGetLine( txt ); - /* Skip the blanks */ - while( *psz_text == ' ' || *psz_text == '\t' ) psz_text++; + if( !s2 ) + return VLC_EGENERIC; - /* Parse the directives */ - if( isalpha( *psz_text ) || *psz_text == '[' ) - { - while( *psz_text != ' ' ) - { psz_text++ ;}; + int i_len = strlen( s2 ); + if( i_len == 0 ) + break; - /* Directives are NOT parsed yet */ - /* This has probably a better place in a decoder ? */ - /* directive = malloc( strlen( psz_text ) + 1 ); - if( sscanf( psz_text, "%s %[^\n\r]", directive, psz_text2 ) == 2 )*/ - } + int i_old = strlen( psz_text ); + + psz_text = realloc( psz_text, i_old + i_len + 1 ); + if( !psz_text ) + return VLC_ENOMEM; + + psz_orig = psz_text; + strcat( psz_text, s2 ); + } + + /* Skip the blanks */ + while( *psz_text == ' ' || *psz_text == '\t' ) psz_text++; - /* Skip the blanks after directives */ - while( *psz_text == ' ' || *psz_text == '\t' ) psz_text++; + /* Parse the directives */ + if( isalpha( *psz_text ) || *psz_text == '[' ) + { + while( *psz_text != ' ' ) + { psz_text++ ;}; + + /* Directives are NOT parsed yet */ + /* This has probably a better place in a decoder ? */ + /* directive = malloc( strlen( psz_text ) + 1 ); + if( sscanf( psz_text, "%s %[^\n\r]", directive, psz_text2 ) == 2 )*/ + } + /* Skip the blanks after directives */ + while( *psz_text == ' ' || *psz_text == '\t' ) psz_text++; - /* Clean all the lines from inline comments and other stuffs */ - psz_text2 = calloc( strlen( psz_text) + 1, 1 ); - psz_orig2 = psz_text2; + /* Clean all the lines from inline comments and other stuffs */ + psz_orig2 = calloc( strlen( psz_text) + 1, 1 ); + psz_text2 = psz_orig2; - for( ; *psz_text != '\0' && *psz_text != '\n' && *psz_text != '\r'; ) + for( ; *psz_text != '\0' && *psz_text != '\n' && *psz_text != '\r'; ) + { + switch( *psz_text ) { - switch( *psz_text ) + case '{': + i_comment++; + break; + case '}': + if( i_comment ) { - case '{': - i_comment++; + i_comment = 0; + if( (*(psz_text + 1 ) ) == ' ' ) psz_text++; + } + break; + case '~': + if( !i_comment ) + { + *psz_text2 = ' '; + psz_text2++; + } + break; + case ' ': + case '\t': + if( (*(psz_text + 1 ) ) == ' ' || (*(psz_text + 1 ) ) == '\t' ) break; - case '}': - if( i_comment ) - { - i_comment = 0; - if( (*(psz_text + 1 ) ) == ' ' ) psz_text++; - } + if( !i_comment ) + { + *psz_text2 = ' '; + psz_text2++; + } + break; + case '\\': + if( (*(psz_text + 1 ) ) == 'n' ) + { + *psz_text2 = '\n'; + psz_text++; + psz_text2++; break; - case '~': - if( !i_comment ) - { - *psz_text2 = ' '; - psz_text2++; - } + } + if( ( toupper(*(psz_text + 1 ) ) == 'C' ) || + ( toupper(*(psz_text + 1 ) ) == 'F' ) ) + { + psz_text++; psz_text++; break; - case ' ': - case '\t': - if( (*(psz_text + 1 ) ) == ' ' || (*(psz_text + 1 ) ) == '\t' ) - break; - if( !i_comment ) - { - *psz_text2 = ' '; - psz_text2++; - } + } + if( (*(psz_text + 1 ) ) == 'B' || (*(psz_text + 1 ) ) == 'b' || + (*(psz_text + 1 ) ) == 'I' || (*(psz_text + 1 ) ) == 'i' || + (*(psz_text + 1 ) ) == 'U' || (*(psz_text + 1 ) ) == 'u' || + (*(psz_text + 1 ) ) == 'D' || (*(psz_text + 1 ) ) == 'N' ) + { + psz_text++; break; - case '\\': - if( (*(psz_text + 1 ) ) == 'n' ) - { - *psz_text2 = '\n'; - psz_text++; - psz_text2++; - break; - } - if( ( toupper(*(psz_text + 1 ) ) == 'C' ) || - ( toupper(*(psz_text + 1 ) ) == 'F' ) ) - { - psz_text++; psz_text++; - break; - } - if( (*(psz_text + 1 ) ) == 'B' || (*(psz_text + 1 ) ) == 'b' || - (*(psz_text + 1 ) ) == 'I' || (*(psz_text + 1 ) ) == 'i' || - (*(psz_text + 1 ) ) == 'U' || (*(psz_text + 1 ) ) == 'u' || - (*(psz_text + 1 ) ) == 'D' || (*(psz_text + 1 ) ) == 'N' ) - { - psz_text++; - break; - } - if( (*(psz_text + 1 ) ) == '~' || (*(psz_text + 1 ) ) == '{' || - (*(psz_text + 1 ) ) == '\\' ) - psz_text++; - else if( *(psz_text + 1 ) == '\r' || *(psz_text + 1 ) == '\n' - || *(psz_text + 1 ) == '\0' ) - { - char *s2 = TextGetLine( txt ); - if( !s2 ) - return VLC_EGENERIC; - - while ( *s2 == ' ' ) s2++; - - /* Here to parse the second line, we should add s2 to - psz_text and go on the for( ) line 1556 in order to - parse the next line. - */ - } - default: - if( !i_comment ) - { - *psz_text2 = *psz_text; - psz_text2++; - } } - psz_text++; + if( (*(psz_text + 1 ) ) == '~' || (*(psz_text + 1 ) ) == '{' || + (*(psz_text + 1 ) ) == '\\' ) + psz_text++; + else if( *(psz_text + 1 ) == '\r' || *(psz_text + 1 ) == '\n' || + *(psz_text + 1 ) == '\0' ) + { + psz_text++; + } + break; + default: + if( !i_comment ) + { + *psz_text2 = *psz_text; + psz_text2++; + } } - - p_subtitle->psz_text = psz_orig2; - free( psz_orig ); - return VLC_SUCCESS; + psz_text++; } + + p_subtitle->psz_text = psz_orig2; + msg_Dbg( p_demux, "%s", p_subtitle->psz_text ); + free( psz_orig ); + return VLC_SUCCESS; } static int ParsePSB( demux_t *p_demux, subtitle_t *p_subtitle, int i_idx ) @@ -1792,7 +1830,7 @@ static int ParseRealText( demux_t *p_demux, subtitle_t *p_subtitle, int i_idx ) { /* Line has begin and end */ if( ( sscanf( psz_temp, - "<%*[t|T]ime %*[b|B]egin=\"%[^\"]\" %*[e|E]nd=\"%[^\"]%*[^>]%[^\n\r]", + "<%*[t|T]ime %*[b|B]egin=\"%[^\"]\" %*[e|E]nd=\"%[^\"]%*[^>]%[^\n\r]", psz_begin, psz_end, psz_text) != 3 ) && /* Line has begin and no end */ ( sscanf( psz_temp, @@ -1837,7 +1875,7 @@ static int ParseRealText( demux_t *p_demux, subtitle_t *p_subtitle, int i_idx ) if( strcasestr( s, "i_line--; + TextPreviousLine( txt ); break; } @@ -1859,3 +1897,104 @@ static int ParseRealText( demux_t *p_demux, subtitle_t *p_subtitle, int i_idx ) return VLC_SUCCESS; } +static int ParseDKS( demux_t *p_demux, subtitle_t *p_subtitle, int i_idx ) +{ + VLC_UNUSED( i_idx ); + + demux_sys_t *p_sys = p_demux->p_sys; + text_t *txt = &p_sys->txt; + char *psz_text; + + for( ;; ) + { + int h1, m1, s1; + int h2, m2, s2; + char *s = TextGetLine( txt ); + + if( !s ) + return VLC_EGENERIC; + + psz_text = malloc( strlen( s ) + 1 ); + if( !psz_text ) + return VLC_ENOMEM; + + if( sscanf( s, "[%d:%d:%d]%[^\r\n]", + &h1, &m1, &s1, psz_text ) == 4 ) + { + p_subtitle->i_start = ( (int64_t)h1 * 3600*1000 + + (int64_t)m1 * 60*1000 + + (int64_t)s1 * 1000 ) * 1000; + + char *s = TextGetLine( txt ); + if( !s ) + return VLC_EGENERIC; + + if( sscanf( s, "[%d:%d:%d]", &h2, &m2, &s2 ) == 3 ) + p_subtitle->i_stop = ( (int64_t)h2 * 3600*1000 + + (int64_t)m2 * 60*1000 + + (int64_t)s2 * 1000 ) * 1000; + break; + } + free( psz_text ); + } + + /* replace [br] by \n */ + char *p; + while( ( p = strstr( psz_text, "[br]" ) ) ) + { + *p++ = '\n'; + memmove( p, &p[3], strlen(&p[3])+1 ); + } + + p_subtitle->psz_text = psz_text; + return VLC_SUCCESS; +} + +static int ParseSubViewer1( demux_t *p_demux, subtitle_t *p_subtitle, int i_idx ) +{ + VLC_UNUSED( i_idx ); + + demux_sys_t *p_sys = p_demux->p_sys; + text_t *txt = &p_sys->txt; + char *psz_text; + + for( ;; ) + { + int h1, m1, s1; + int h2, m2, s2; + char *s = TextGetLine( txt ); + + if( !s ) + return VLC_EGENERIC; + + if( sscanf( s, "[%d:%d:%d]", &h1, &m1, &s1 ) == 3 ) + { + p_subtitle->i_start = ( (int64_t)h1 * 3600*1000 + + (int64_t)m1 * 60*1000 + + (int64_t)s1 * 1000 ) * 1000; + + char *s = TextGetLine( txt ); + if( !s ) + return VLC_EGENERIC; + + psz_text = strdup( s ); + if( !psz_text ) + return VLC_ENOMEM; + + s = TextGetLine( txt ); + if( !s ) + return VLC_EGENERIC; + + if( sscanf( s, "[%d:%d:%d]", &h2, &m2, &s2 ) == 3 ) + p_subtitle->i_stop = ( (int64_t)h2 * 3600*1000 + + (int64_t)m2 * 60*1000 + + (int64_t)s2 * 1000 ) * 1000; + break; + } + } + + p_subtitle->psz_text = psz_text; + + return VLC_SUCCESS; +} +