1 /*****************************************************************************
2 * subsdec.c : text subtitles decoder
3 *****************************************************************************
4 * Copyright (C) 2000-2001 the VideoLAN team
7 * Authors: Gildas Bazin <gbazin@videolan.org>
8 * Samuel Hocevar <sam@zoy.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
30 #include <vlc/decoder.h>
33 #include "vlc_filter.h"
37 /*****************************************************************************
38 * decoder_sys_t : decoder descriptor
39 *****************************************************************************/
42 int i_align; /* Subtitles alignment on the vout */
43 vlc_iconv_t iconv_handle; /* handle to iconv instance */
46 /*****************************************************************************
48 *****************************************************************************/
49 static int OpenDecoder ( vlc_object_t * );
50 static void CloseDecoder ( vlc_object_t * );
52 static subpicture_t *DecodeBlock ( decoder_t *, block_t ** );
53 static subpicture_t *ParseText ( decoder_t *, block_t * );
54 static void StripTags ( char * );
56 #define DEFAULT_NAME "System Default"
58 /*****************************************************************************
60 *****************************************************************************/
61 static char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "",
62 "ISO-8859-1", "CP1252", "MacRoman", "MacIceland","ISO-8859-15", "",
63 "ISO-8859-2", "CP1250", "MacCentralEurope", "MacCroatian", "MacRomania", "",
64 "ISO-8859-5", "CP1251", "MacCyrillic", "MacUkraine", "KOI8-R", "KOI8-U", "KOI8-RU", "",
65 "ISO-8859-6", "CP1256", "MacArabic", "",
66 "ISO-8859-7", "CP1253", "MacGreek", "",
67 "ISO-8859-8", "CP1255", "MacHebrew", "",
68 "ISO-8859-9", "CP1254", "MacTurkish", "",
69 "ISO-8859-13", "CP1257", "",
70 "ISO-2022-JP", "ISO-2022-JP-1", "ISO-2022-JP-2", "EUC-JP", "SHIFT_JIS", "",
71 "ISO-2022-CN", "ISO-2022-CN-EXT", "EUC-CN", "EUC-TW", "BIG5", "BIG5-HKSCS", "",
72 "ISO-2022-KR", "EUC-KR", "",
73 "MacThai", "KOI8-T", "",
74 "ISO-8859-3", "ISO-8859-4", "ISO-8859-10", "ISO-8859-14", "ISO-8859-16", "",
75 "CP850", "CP862", "CP866", "CP874", "CP932", "CP949", "CP950", "CP1133", "CP1258", "",
77 "UTF-7", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE",
78 "C99", "JAVA", "UCS-2", "UCS-2BE", "UCS-2LE", "UCS-4", "UCS-4BE", "UCS-4LE", "",
79 "HZ", "GBK", "GB18030", "JOHAB", "ARMSCII-8",
80 "Georgian-Academy", "Georgian-PS", "TIS-620", "MuleLao-1", "VISCII", "TCVN",
81 "HPROMAN8", "NEXTSTEP" };
83 static int pi_justification[] = { 0, 1, 2 };
84 static char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")};
86 #define ENCODING_TEXT N_("Subtitles text encoding")
87 #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles")
88 #define ALIGN_TEXT N_("Subtitles justification")
89 #define ALIGN_LONGTEXT N_("Set the justification of subtitles")
92 set_shortname( _("Subtitles"));
93 set_description( _("Text subtitles decoder") );
94 set_capability( "decoder", 50 );
95 set_callbacks( OpenDecoder, CloseDecoder );
96 set_category( CAT_INPUT );
97 set_subcategory( SUBCAT_INPUT_SCODEC );
99 add_integer( "subsdec-align", 0, NULL, ALIGN_TEXT, ALIGN_LONGTEXT,
101 change_integer_list( pi_justification, ppsz_justification_text, 0 );
102 add_string( "subsdec-encoding", DEFAULT_NAME, NULL,
103 ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE );
104 change_string_list( ppsz_encodings, 0, 0 );
107 /*****************************************************************************
108 * OpenDecoder: probe the decoder and return score
109 *****************************************************************************
110 * Tries to launch a decoder and return score so that the interface is able
112 *****************************************************************************/
113 static int OpenDecoder( vlc_object_t *p_this )
115 decoder_t *p_dec = (decoder_t*)p_this;
116 decoder_sys_t *p_sys;
119 if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','u','b','t') &&
120 p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') )
125 p_dec->pf_decode_sub = DecodeBlock;
127 /* Allocate the memory needed to store the decoder's structure */
128 if( ( p_dec->p_sys = p_sys =
129 (decoder_sys_t *)malloc(sizeof(decoder_sys_t)) ) == NULL )
131 msg_Err( p_dec, "out of memory" );
135 var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
136 var_Get( p_dec, "subsdec-align", &val );
137 p_sys->i_align = val.i_int;
139 if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
141 msg_Dbg( p_dec, "using character encoding: %s",
142 p_dec->fmt_in.subs.psz_encoding );
143 p_sys->iconv_handle =
144 vlc_iconv_open( "UTF-8", p_dec->fmt_in.subs.psz_encoding );
148 var_Create( p_dec, "subsdec-encoding",
149 VLC_VAR_STRING | VLC_VAR_DOINHERIT );
150 var_Get( p_dec, "subsdec-encoding", &val );
151 if( !strcmp( val.psz_string, DEFAULT_NAME ) )
153 char *psz_charset =(char*)malloc( 100 );
154 vlc_current_charset( &psz_charset );
155 p_sys->iconv_handle = vlc_iconv_open( "UTF-8", psz_charset );
156 msg_Dbg( p_dec, "using character encoding: %s", psz_charset );
159 else if( val.psz_string )
161 msg_Dbg( p_dec, "using character encoding: %s", val.psz_string );
162 p_sys->iconv_handle = vlc_iconv_open( "UTF-8", val.psz_string );
165 if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
167 msg_Warn( p_dec, "unable to do requested conversion" );
170 if( val.psz_string ) free( val.psz_string );
176 /****************************************************************************
177 * DecodeBlock: the whole thing
178 ****************************************************************************
179 * This function must be fed with complete subtitles units.
180 ****************************************************************************/
181 static subpicture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block )
185 if( !pp_block || *pp_block == NULL ) return NULL;
187 p_spu = ParseText( p_dec, *pp_block );
189 block_Release( *pp_block );
195 /*****************************************************************************
196 * CloseDecoder: clean up the decoder
197 *****************************************************************************/
198 static void CloseDecoder( vlc_object_t *p_this )
200 decoder_t *p_dec = (decoder_t *)p_this;
201 decoder_sys_t *p_sys = p_dec->p_sys;
203 if( p_sys->iconv_handle != (vlc_iconv_t)-1 )
205 vlc_iconv_close( p_sys->iconv_handle );
211 /*****************************************************************************
212 * ParseText: parse an text subtitle packet and send it to the video output
213 *****************************************************************************/
214 static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
216 decoder_sys_t *p_sys = p_dec->p_sys;
217 subpicture_t *p_spu = 0;
219 int i_align_h, i_align_v;
222 /* We cannot display a subpicture with no date */
223 if( p_block->i_pts == 0 )
225 msg_Warn( p_dec, "subtitle without a date" );
229 /* Check validity of packet data */
230 if( p_block->i_buffer <= 1 || p_block->p_buffer[0] == '\0' )
232 msg_Warn( p_dec, "empty subtitle" );
236 /* Should be resiliant against bad subtitles */
237 psz_subtitle = strndup( p_block->p_buffer, p_block->i_buffer );
239 i_align_h = p_sys->i_align ? 20 : 0;
242 if( p_sys->iconv_handle != (vlc_iconv_t)-1 )
244 char *psz_new_subtitle;
245 char *psz_convert_buffer_out;
246 char *psz_convert_buffer_in;
247 size_t ret, inbytes_left, outbytes_left;
249 psz_new_subtitle = malloc( 6 * strlen( psz_subtitle ) );
250 psz_convert_buffer_out = psz_new_subtitle;
251 psz_convert_buffer_in = psz_subtitle;
252 inbytes_left = strlen( psz_subtitle );
253 outbytes_left = 6 * inbytes_left;
254 ret = vlc_iconv( p_sys->iconv_handle, &psz_convert_buffer_in,
255 &inbytes_left, &psz_convert_buffer_out,
257 *psz_convert_buffer_out = '\0';
261 msg_Warn( p_dec, "Failed to convert subtitle encoding, "
262 "dropping subtitle.\nTry setting a different "
263 "character-encoding for the subtitle." );
264 free( psz_subtitle );
269 free( psz_subtitle );
270 psz_subtitle = psz_new_subtitle;
274 if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') )
276 /* Decode SSA strings */
277 /* We expect: ReadOrder, Layer, Style, Name, MarginL, MarginR,
278 * MarginV, Effect, Text */
279 char *psz_new_subtitle;
280 char *psz_buffer_sub;
284 psz_buffer_sub = psz_subtitle;
288 while( i_comma < 8 &&
289 *psz_buffer_sub != '\0' )
291 if( *psz_buffer_sub == ',' )
297 psz_new_subtitle = malloc( strlen( psz_buffer_sub ) + 1);
299 while( psz_buffer_sub[0] != '\0' )
301 if( psz_buffer_sub[0] == '\\' && ( psz_buffer_sub[1] == 'n' ||
302 psz_buffer_sub[1] == 'N' ) )
304 psz_new_subtitle[i_text] = '\n';
308 else if( psz_buffer_sub[0] == '{' &&
309 psz_buffer_sub[1] == '\\' )
311 /* SSA control code */
312 while( psz_buffer_sub[0] != '\0' &&
313 psz_buffer_sub[0] != '}' )
321 psz_new_subtitle[i_text] = psz_buffer_sub[0];
326 psz_new_subtitle[i_text] = '\0';
327 free( psz_subtitle );
328 psz_subtitle = psz_new_subtitle;
333 StripTags( psz_subtitle );
335 p_spu = p_dec->pf_spu_buffer_new( p_dec );
338 msg_Warn( p_dec, "can't get spu buffer" );
339 free( psz_subtitle );
343 /* Create a new subpicture region */
344 memset( &fmt, 0, sizeof(video_format_t) );
345 fmt.i_chroma = VLC_FOURCC('T','E','X','T');
347 fmt.i_width = fmt.i_height = 0;
348 fmt.i_x_offset = fmt.i_y_offset = 0;
349 p_spu->p_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt );
350 if( !p_spu->p_region )
352 msg_Err( p_dec, "cannot allocate SPU region" );
353 free( psz_subtitle );
354 p_dec->pf_spu_buffer_del( p_dec, p_spu );
358 p_spu->p_region->psz_text = psz_subtitle;
359 p_spu->i_start = p_block->i_pts;
360 p_spu->i_stop = p_block->i_pts + p_block->i_length;
361 p_spu->b_ephemer = (p_block->i_length == 0);
362 p_spu->b_absolute = VLC_FALSE;
364 p_spu->i_flags = OSD_ALIGN_BOTTOM | p_sys->i_align;
365 p_spu->i_x = i_align_h;
366 p_spu->i_y = i_align_v;
371 static void StripTags( char *psz_text )
373 int i_left_moves = 0;
374 vlc_bool_t b_inside_tag = VLC_FALSE;
376 int i_tag_start = -1;
377 while( psz_text[ i ] )
381 if( psz_text[ i ] == '<' )
383 b_inside_tag = VLC_TRUE;
386 psz_text[ i - i_left_moves ] = psz_text[ i ];
390 if( ( psz_text[ i ] == ' ' ) ||
391 ( psz_text[ i ] == '\t' ) ||
392 ( psz_text[ i ] == '\n' ) ||
393 ( psz_text[ i ] == '\r' ) )
395 b_inside_tag = VLC_FALSE;
398 else if( psz_text[ i ] == '>' )
400 i_left_moves += i - i_tag_start + 1;
402 b_inside_tag = VLC_FALSE;
406 psz_text[ i - i_left_moves ] = psz_text[ i ];
411 psz_text[ i - i_left_moves ] = '\0';