]> git.sesse.net Git - vlc/blob - modules/codec/subsdec.c
* src/video_output/vout_subpictures.c, include/vlc_video.h:
[vlc] / modules / codec / subsdec.c
1 /*****************************************************************************
2  * subsdec.c : text subtitles decoder
3  *****************************************************************************
4  * Copyright (C) 2000-2001 VideoLAN
5  * $Id$
6  *
7  * Authors: Gildas Bazin <gbazin@videolan.org>
8  *          Samuel Hocevar <sam@zoy.org>
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
23  *****************************************************************************/
24
25 /*****************************************************************************
26  * Preamble
27  *****************************************************************************/
28 #include <vlc/vlc.h>
29 #include <vlc/vout.h>
30 #include <vlc/decoder.h>
31
32 #include "osd.h"
33 #include "vlc_filter.h"
34
35 #if defined(HAVE_ICONV)
36 #include <iconv.h>
37 #endif
38
39 #include "charset.h"
40
41 /*****************************************************************************
42  * decoder_sys_t : decoder descriptor
43  *****************************************************************************/
44 struct decoder_sys_t
45 {
46     int                 i_align;          /* Subtitles alignment on the vout */
47
48 #if defined(HAVE_ICONV)
49     iconv_t             iconv_handle;            /* handle to iconv instance */
50 #endif
51
52 };
53
54 /*****************************************************************************
55  * Local prototypes
56  *****************************************************************************/
57 static int  OpenDecoder   ( vlc_object_t * );
58 static void CloseDecoder  ( vlc_object_t * );
59
60 static subpicture_t *DecodeBlock   ( decoder_t *, block_t ** );
61 static subpicture_t *ParseText     ( decoder_t *, block_t * );
62 static void         StripTags      ( char * );
63
64 #define DEFAULT_NAME "System Default"
65
66 /*****************************************************************************
67  * Module descriptor.
68  *****************************************************************************/
69 #if defined(HAVE_ICONV)
70 static char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "",
71     "ISO-8859-1", "CP1252", "MacRoman", "MacIceland","ISO-8859-15", "",
72     "ISO-8859-2", "CP1250", "MacCentralEurope", "MacCroatian", "MacRomania", "",
73     "ISO-8859-5", "CP1251", "MacCyrillic", "MacUkraine", "KOI8-R", "KOI8-U", "KOI8-RU", "",
74     "ISO-8859-6", "CP1256", "MacArabic", "",
75     "ISO-8859-7", "CP1253", "MacGreek", "",
76     "ISO-8859-8", "CP1255", "MacHebrew", "",
77     "ISO-8859-9", "CP1254", "MacTurkish", "",
78     "ISO-8859-13", "CP1257", "",
79     "ISO-2022-JP", "ISO-2022-JP-1", "ISO-2022-JP-2", "EUC-JP", "SHIFT_JIS", "",
80     "ISO-2022-CN", "ISO-2022-CN-EXT", "EUC-CN", "EUC-TW", "BIG5", "BIG5-HKSCS", "",
81     "ISO-2022-KR", "EUC-KR", "",
82     "MacThai", "KOI8-T", "",
83     "ISO-8859-3", "ISO-8859-4", "ISO-8859-10", "ISO-8859-14", "ISO-8859-16", "",
84     "CP850", "CP862", "CP866", "CP874", "CP932", "CP949", "CP950", "CP1133", "CP1258", "",
85     "Macintosh", "",
86     "UTF-7", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE",
87     "C99", "JAVA", "UCS-2", "UCS-2BE", "UCS-2LE", "UCS-4", "UCS-4BE", "UCS-4LE", "",
88     "HZ", "GBK", "GB18030", "JOHAB", "ARMSCII-8",
89     "Georgian-Academy", "Georgian-PS", "TIS-620", "MuleLao-1", "VISCII", "TCVN",
90     "HPROMAN8", "NEXTSTEP" };
91 #endif
92
93 static int  pi_justification[] = { 0, 1, 2 };
94 static char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")};
95
96 #define ENCODING_TEXT N_("Subtitles text encoding")
97 #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles")
98 #define ALIGN_TEXT N_("Subtitles justification")
99 #define ALIGN_LONGTEXT N_("Set the justification of subtitles")
100
101 vlc_module_begin();
102     set_description( _("text subtitles decoder") );
103     set_capability( "decoder", 50 );
104     set_callbacks( OpenDecoder, CloseDecoder );
105
106     add_integer( "subsdec-align", 0, NULL, ALIGN_TEXT, ALIGN_LONGTEXT,
107                  VLC_TRUE );
108         change_integer_list( pi_justification, ppsz_justification_text, 0 );
109 #if defined(HAVE_ICONV)
110     add_string( "subsdec-encoding", DEFAULT_NAME, NULL,
111                 ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE );
112         change_string_list( ppsz_encodings, 0, 0 );
113 #endif
114 vlc_module_end();
115
116 /*****************************************************************************
117  * OpenDecoder: probe the decoder and return score
118  *****************************************************************************
119  * Tries to launch a decoder and return score so that the interface is able
120  * to chose.
121  *****************************************************************************/
122 static int OpenDecoder( vlc_object_t *p_this )
123 {
124     decoder_t     *p_dec = (decoder_t*)p_this;
125     decoder_sys_t *p_sys;
126     vlc_value_t val;
127
128     if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','u','b','t') &&
129         p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') )
130     {
131         return VLC_EGENERIC;
132     }
133
134     p_dec->pf_decode_sub = DecodeBlock;
135
136     /* Allocate the memory needed to store the decoder's structure */
137     if( ( p_dec->p_sys = p_sys =
138           (decoder_sys_t *)malloc(sizeof(decoder_sys_t)) ) == NULL )
139     {
140         msg_Err( p_dec, "out of memory" );
141         return VLC_EGENERIC;
142     }
143
144     var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
145     var_Get( p_dec, "subsdec-align", &val );
146     p_sys->i_align = val.i_int;
147
148 #if defined(HAVE_ICONV)
149     if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
150     {
151         msg_Dbg( p_dec, "using character encoding: %s",
152                  p_dec->fmt_in.subs.psz_encoding );
153         p_sys->iconv_handle =
154             iconv_open( "UTF-8", p_dec->fmt_in.subs.psz_encoding );
155     }
156     else
157     {
158         var_Create( p_dec, "subsdec-encoding",
159                     VLC_VAR_STRING | VLC_VAR_DOINHERIT );
160         var_Get( p_dec, "subsdec-encoding", &val );
161         if( !strcmp( val.psz_string, DEFAULT_NAME ) )
162         {
163             char *psz_charset =(char*)malloc( 100 );
164             vlc_current_charset( &psz_charset );
165             p_sys->iconv_handle = iconv_open( "UTF-8", psz_charset );
166             msg_Dbg( p_dec, "using character encoding: %s", psz_charset );
167             free( psz_charset );
168         }
169         else if( val.psz_string )
170         {
171             msg_Dbg( p_dec, "using character encoding: %s", val.psz_string );
172             p_sys->iconv_handle = iconv_open( "UTF-8", val.psz_string );
173         }
174
175         if( p_sys->iconv_handle == (iconv_t)-1 )
176         {
177             msg_Warn( p_dec, "unable to do requested conversion" );
178         }
179
180         if( val.psz_string ) free( val.psz_string );
181     }
182 #else
183
184     msg_Dbg( p_dec, "no iconv support available" );
185 #endif
186
187     return VLC_SUCCESS;
188 }
189
190 /****************************************************************************
191  * DecodeBlock: the whole thing
192  ****************************************************************************
193  * This function must be fed with complete subtitles units.
194  ****************************************************************************/
195 static subpicture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block )
196 {
197     subpicture_t *p_spu;
198
199     if( !pp_block || *pp_block == NULL ) return NULL;
200
201     p_spu = ParseText( p_dec, *pp_block );
202
203     block_Release( *pp_block );
204     *pp_block = NULL;
205
206     return p_spu;
207 }
208
209 /*****************************************************************************
210  * CloseDecoder: clean up the decoder
211  *****************************************************************************/
212 static void CloseDecoder( vlc_object_t *p_this )
213 {
214     decoder_t *p_dec = (decoder_t *)p_this;
215     decoder_sys_t *p_sys = p_dec->p_sys;
216
217 #if defined(HAVE_ICONV)
218     if( p_sys->iconv_handle != (iconv_t)-1 )
219     {
220         iconv_close( p_sys->iconv_handle );
221     }
222 #endif
223
224     free( p_sys );
225 }
226
227 /*****************************************************************************
228  * ParseText: parse an text subtitle packet and send it to the video output
229  *****************************************************************************/
230 static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
231 {
232     decoder_sys_t *p_sys = p_dec->p_sys;
233     subpicture_t *p_spu = 0;
234     char *psz_subtitle;
235     int i_align_h, i_align_v;
236     video_format_t fmt;
237
238     /* We cannot display a subpicture with no date */
239     if( p_block->i_pts == 0 )
240     {
241         msg_Warn( p_dec, "subtitle without a date" );
242         return NULL;
243     }
244
245     /* Check validity of packet data */
246     if( p_block->i_buffer <= 1 || p_block->p_buffer[0] == '\0' )
247     {
248         msg_Warn( p_dec, "empty subtitle" );
249         return NULL;
250     }
251
252     /* Should be resiliant against bad subtitles */
253     psz_subtitle = strndup( p_block->p_buffer, p_block->i_buffer );
254
255     i_align_h = p_sys->i_align ? 20 : 0;
256     i_align_v = 10;
257
258 #if defined(HAVE_ICONV)
259     if( p_sys->iconv_handle != (iconv_t)-1 )
260     {
261         char *psz_new_subtitle;
262         char *psz_convert_buffer_out;
263         char *psz_convert_buffer_in;
264         size_t ret, inbytes_left, outbytes_left;
265
266         psz_new_subtitle = malloc( 6 * strlen( psz_subtitle ) );
267         psz_convert_buffer_out = psz_new_subtitle;
268         psz_convert_buffer_in = psz_subtitle;
269         inbytes_left = strlen( psz_subtitle );
270         outbytes_left = 6 * inbytes_left;
271         ret = iconv( p_sys->iconv_handle, &psz_convert_buffer_in,
272                      &inbytes_left, &psz_convert_buffer_out, &outbytes_left );
273         *psz_convert_buffer_out = '\0';
274
275         if( inbytes_left )
276         {
277             msg_Warn( p_dec, "Failed to convert subtitle encoding, "
278                       "dropping subtitle.\nTry setting a different "
279                       "character-encoding for the subtitle." );
280             free( psz_subtitle );
281             return NULL;
282         }
283         else
284         {
285             free( psz_subtitle );
286             psz_subtitle = psz_new_subtitle;
287         }
288     }
289 #endif
290
291     if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') )
292     {
293         /* Decode SSA strings */
294         /* We expect: ReadOrder, Layer, Style, Name, MarginL, MarginR,
295          * MarginV, Effect, Text */
296         char *psz_new_subtitle;
297         char *psz_buffer_sub;
298         int         i_comma;
299         int         i_text;
300
301         psz_buffer_sub = psz_subtitle;
302         for( ;; )
303         {
304             i_comma = 0;
305             while( i_comma < 8 &&
306                 *psz_buffer_sub != '\0' )
307             {
308                 if( *psz_buffer_sub == ',' )
309                 {
310                     i_comma++;
311                 }
312                 psz_buffer_sub++;
313             }
314             psz_new_subtitle = malloc( strlen( psz_buffer_sub ) + 1);
315             i_text = 0;
316             while( psz_buffer_sub[0] != '\0' )
317             {
318                 if( psz_buffer_sub[0] == '\\' && ( psz_buffer_sub[1] == 'n' ||
319                     psz_buffer_sub[1] == 'N' ) )
320                 {
321                     psz_new_subtitle[i_text] = '\n';
322                     i_text++;
323                     psz_buffer_sub += 2;
324                 }
325                 else if( psz_buffer_sub[0] == '{' &&
326                          psz_buffer_sub[1] == '\\' )
327                 {
328                     /* SSA control code */
329                     while( psz_buffer_sub[0] != '\0' &&
330                            psz_buffer_sub[0] != '}' )
331                     {
332                         psz_buffer_sub++;
333                     }
334                     psz_buffer_sub++;
335                 }
336                 else
337                 {
338                     psz_new_subtitle[i_text] = psz_buffer_sub[0];
339                     i_text++;
340                     psz_buffer_sub++;
341                 }
342             }
343             psz_new_subtitle[i_text] = '\0';
344             free( psz_subtitle );
345             psz_subtitle = psz_new_subtitle;
346             break;
347         }
348     }
349
350     StripTags( psz_subtitle );
351
352     p_spu = p_dec->pf_spu_buffer_new( p_dec );
353     if( !p_spu )
354     {
355         msg_Warn( p_dec, "can't get spu buffer" );
356         free( psz_subtitle );
357         return 0;
358     }
359
360     /* Create a new subpicture region */
361     memset( &fmt, 0, sizeof(video_format_t) );
362     fmt.i_chroma = VLC_FOURCC('T','E','X','T');
363     fmt.i_aspect = 0;
364     fmt.i_width = fmt.i_height = 0;
365     fmt.i_x_offset = fmt.i_y_offset = 0;
366     p_spu->p_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt );
367     if( !p_spu->p_region )
368     {
369         msg_Err( p_dec, "cannot allocate SPU region" );
370         free( psz_subtitle );
371         p_dec->pf_spu_buffer_del( p_dec, p_spu );
372         return 0;
373     }
374
375     p_spu->p_region->psz_text = psz_subtitle;
376     p_spu->i_start = p_block->i_pts;
377     p_spu->i_stop = p_block->i_pts + p_block->i_length;
378     p_spu->b_ephemer = (p_block->i_length == 0);
379     p_spu->b_absolute = VLC_FALSE;
380
381     p_spu->i_flags = OSD_ALIGN_BOTTOM | p_sys->i_align;
382     p_spu->i_x = i_align_h;
383     p_spu->i_y = i_align_v;
384
385     return p_spu;
386 }
387
388 static void StripTags( char *psz_text )
389 {
390     int i_left_moves = 0;
391     vlc_bool_t b_inside_tag = VLC_FALSE;
392     int i = 0;
393     int i_tag_start = -1;
394     while( psz_text[ i ] )
395     {
396         if( !b_inside_tag )
397         {
398             if( psz_text[ i ] == '<' )
399             {
400                 b_inside_tag = VLC_TRUE;
401                 i_tag_start = i;
402             }
403             psz_text[ i - i_left_moves ] = psz_text[ i ];
404         }
405         else
406         {
407             if( ( psz_text[ i ] == ' ' ) ||
408                 ( psz_text[ i ] == '\t' ) ||
409                 ( psz_text[ i ] == '\n' ) ||
410                 ( psz_text[ i ] == '\r' ) )
411             {
412                 b_inside_tag = VLC_FALSE;
413                 i_tag_start = -1;
414             }
415             else if( psz_text[ i ] == '>' )
416             {
417                 i_left_moves += i - i_tag_start + 1;
418                 i_tag_start = -1;
419                 b_inside_tag = VLC_FALSE;
420             }
421             else
422             {
423                 psz_text[ i - i_left_moves ] = psz_text[ i ];
424             }
425         }
426         i++;
427     }
428     psz_text[ i - i_left_moves ] = '\0';
429 }