]> git.sesse.net Git - vlc/blob - modules/codec/subsdec.c
* ALL: Major rework of the subpictures architecture.
[vlc] / modules / codec / subsdec.c
1 /*****************************************************************************
2  * subsdec.c : text subtitles decoder
3  *****************************************************************************
4  * Copyright (C) 2000-2001 VideoLAN
5  * $Id$
6  *
7  * Authors: Gildas Bazin <gbazin@netcourrier.com>
8  *          Samuel Hocevar <sam@zoy.org>
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
23  *****************************************************************************/
24
25 /*****************************************************************************
26  * Preamble
27  *****************************************************************************/
28 #include <vlc/vlc.h>
29 #include <vlc/vout.h>
30 #include <vlc/decoder.h>
31
32 #include "osd.h"
33 #include "vlc_filter.h"
34
35 #if defined(HAVE_ICONV)
36 #include <iconv.h>
37 #endif
38
39 #include "charset.h"
40
41 /*****************************************************************************
42  * decoder_sys_t : decoder descriptor
43  *****************************************************************************/
44 struct decoder_sys_t
45 {
46     int                 i_align;          /* Subtitles alignment on the vout */
47
48 #if defined(HAVE_ICONV)
49     iconv_t             iconv_handle;            /* handle to iconv instance */
50 #endif
51
52     filter_t *p_render;                              /* text renderer filter */
53 };
54
55 /*****************************************************************************
56  * Local prototypes
57  *****************************************************************************/
58 static int  OpenDecoder   ( vlc_object_t * );
59 static void CloseDecoder  ( vlc_object_t * );
60
61 static subpicture_t *DecodeBlock   ( decoder_t *, block_t ** );
62 static subpicture_t *ParseText     ( decoder_t *, block_t * );
63 static void         StripTags      ( char * );
64
65 static subpicture_t *spu_new_buffer( filter_t * );
66 static void spu_del_buffer( filter_t *, subpicture_t * );
67
68 #define DEFAULT_NAME "System Default"
69
70 /*****************************************************************************
71  * Module descriptor.
72  *****************************************************************************/
73 #if defined(HAVE_ICONV)
74 static char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "",
75     "ISO-8859-1", "CP1252", "MacRoman", "MacIceland","ISO-8859-15", "",
76     "ISO-8859-2", "CP1250", "MacCentralEurope", "MacCroatian", "MacRomania", "",
77     "ISO-8859-5", "CP1251", "MacCyrillic", "MacUkraine", "KOI8-R", "KOI8-U", "KOI8-RU", "",
78     "ISO-8859-6", "CP1256", "MacArabic", "",
79     "ISO-8859-7", "CP1253", "MacGreek", "",
80     "ISO-8859-8", "CP1255", "MacHebrew", "",
81     "ISO-8859-9", "CP1254", "MacTurkish", "",
82     "ISO-8859-13", "CP1257", "",
83     "ISO-2022-JP", "ISO-2022-JP-1", "ISO-2022-JP-2", "EUC-JP", "SHIFT_JIS", "",
84     "ISO-2022-CN", "ISO-2022-CN-EXT", "EUC-CN", "EUC-TW", "BIG5", "BIG5-HKSCS", "",
85     "ISO-2022-KR", "EUC-KR", "",
86     "MacThai", "KOI8-T", "",
87     "ISO-8859-3", "ISO-8859-4", "ISO-8859-10", "ISO-8859-14", "ISO-8859-16", "",
88     "CP850", "CP862", "CP866", "CP874", "CP932", "CP949", "CP950", "CP1133", "CP1258", "",
89     "Macintosh", "",
90     "UTF-7", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE",
91     "C99", "JAVA", "UCS-2", "UCS-2BE", "UCS-2LE", "UCS-4", "UCS-4BE", "UCS-4LE", "",
92     "HZ", "GBK", "GB18030", "JOHAB", "ARMSCII-8",
93     "Georgian-Academy", "Georgian-PS", "TIS-620", "MuleLao-1", "VISCII", "TCVN",
94     "HPROMAN8", "NEXTSTEP" };
95 #endif
96
97 static int  pi_justification[] = { 0, 1, 2 };
98 static char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")};
99
100 #define ENCODING_TEXT N_("Subtitles text encoding")
101 #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles")
102 #define ALIGN_TEXT N_("Subtitles justification")
103 #define ALIGN_LONGTEXT N_("Set the justification of subtitles")
104
105 vlc_module_begin();
106     set_description( _("text subtitles decoder") );
107     set_capability( "decoder", 50 );
108     set_callbacks( OpenDecoder, CloseDecoder );
109
110     add_integer( "subsdec-align", 0, NULL, ALIGN_TEXT, ALIGN_LONGTEXT,
111                  VLC_TRUE );
112         change_integer_list( pi_justification, ppsz_justification_text, 0 );
113 #if defined(HAVE_ICONV)
114     add_string( "subsdec-encoding", DEFAULT_NAME, NULL,
115                 ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE );
116         change_string_list( ppsz_encodings, 0, 0 );
117 #endif
118 vlc_module_end();
119
120 /*****************************************************************************
121  * OpenDecoder: probe the decoder and return score
122  *****************************************************************************
123  * Tries to launch a decoder and return score so that the interface is able
124  * to chose.
125  *****************************************************************************/
126 static int OpenDecoder( vlc_object_t *p_this )
127 {
128     decoder_t     *p_dec = (decoder_t*)p_this;
129     decoder_sys_t *p_sys;
130     vlc_value_t val;
131
132     if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','u','b','t') &&
133         p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') )
134     {
135         return VLC_EGENERIC;
136     }
137
138     p_dec->pf_decode_sub = DecodeBlock;
139
140     /* Allocate the memory needed to store the decoder's structure */
141     if( ( p_dec->p_sys = p_sys =
142           (decoder_sys_t *)malloc(sizeof(decoder_sys_t)) ) == NULL )
143     {
144         msg_Err( p_dec, "out of memory" );
145         return VLC_EGENERIC;
146     }
147
148     var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
149     var_Get( p_dec, "subsdec-align", &val );
150     p_sys->i_align = val.i_int;
151
152 #if defined(HAVE_ICONV)
153     if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
154     {
155         msg_Dbg( p_dec, "using character encoding: %s",
156                  p_dec->fmt_in.subs.psz_encoding );
157         p_sys->iconv_handle =
158             iconv_open( "UTF-8", p_dec->fmt_in.subs.psz_encoding );
159     }
160     else
161     {
162         var_Create( p_dec, "subsdec-encoding",
163                     VLC_VAR_STRING | VLC_VAR_DOINHERIT );
164         var_Get( p_dec, "subsdec-encoding", &val );
165         if( !strcmp( val.psz_string, DEFAULT_NAME ) )
166         {
167             char *psz_charset =(char*)malloc( 100 );
168             vlc_current_charset( &psz_charset );
169             p_sys->iconv_handle = iconv_open( "UTF-8", psz_charset );
170             msg_Dbg( p_dec, "using character encoding: %s", psz_charset );
171             free( psz_charset );
172         }
173         else if( val.psz_string )
174         {
175             msg_Dbg( p_dec, "using character encoding: %s", val.psz_string );
176             p_sys->iconv_handle = iconv_open( "UTF-8", val.psz_string );
177         }
178
179         if( p_sys->iconv_handle == (iconv_t)-1 )
180         {
181             msg_Warn( p_dec, "unable to do requested conversion" );
182         }
183
184         if( val.psz_string ) free( val.psz_string );
185     }
186 #else
187
188     msg_Dbg( p_dec, "no iconv support available" );
189 #endif
190
191     /* Load the text rendering module */
192     p_sys->p_render = vlc_object_create( p_dec, sizeof(filter_t) );
193     p_sys->p_render->pf_spu_buffer_new = spu_new_buffer;
194     p_sys->p_render->pf_spu_buffer_del = spu_del_buffer;
195     p_sys->p_render->p_owner = (filter_owner_sys_t *)p_dec;
196     vlc_object_attach( p_sys->p_render, p_dec );
197     p_sys->p_render->p_module =
198         module_Need( p_sys->p_render, "text renderer", 0, 0 );
199     if( p_sys->p_render->p_module == NULL )
200     {
201         msg_Warn( p_dec, "no suitable text renderer module" );
202         vlc_object_detach( p_sys->p_render );
203         vlc_object_destroy( p_sys->p_render );
204         p_sys->p_render = NULL;
205     }
206
207     return VLC_SUCCESS;
208 }
209
210 /****************************************************************************
211  * DecodeBlock: the whole thing
212  ****************************************************************************
213  * This function must be fed with complete subtitles units.
214  ****************************************************************************/
215 static subpicture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block )
216 {
217     subpicture_t *p_spu;
218
219     if( !pp_block || *pp_block == NULL ) return NULL;
220
221     p_spu = ParseText( p_dec, *pp_block );
222
223     block_Release( *pp_block );
224     *pp_block = NULL;
225
226     return p_spu;
227 }
228
229 /*****************************************************************************
230  * CloseDecoder: clean up the decoder
231  *****************************************************************************/
232 static void CloseDecoder( vlc_object_t *p_this )
233 {
234     decoder_t *p_dec = (decoder_t *)p_this;
235     decoder_sys_t *p_sys = p_dec->p_sys;
236
237 #if defined(HAVE_ICONV)
238     if( p_sys->iconv_handle != (iconv_t)-1 )
239     {
240         iconv_close( p_sys->iconv_handle );
241     }
242 #endif
243
244     if( p_sys->p_render )
245     {
246         if( p_sys->p_render->p_module )
247             module_Unneed( p_sys->p_render, p_sys->p_render->p_module );
248
249         vlc_object_detach( p_sys->p_render );
250         vlc_object_destroy( p_sys->p_render );
251     }
252
253     free( p_sys );
254 }
255
256 /*****************************************************************************
257  * ParseText: parse an text subtitle packet and send it to the video output
258  *****************************************************************************/
259 static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
260 {
261     decoder_sys_t *p_sys = p_dec->p_sys;
262     subpicture_t *p_spu = 0;
263     char *psz_subtitle;
264     int i_align_h, i_align_v;
265
266     /* We cannot display a subpicture with no date */
267     if( p_block->i_pts == 0 )
268     {
269         msg_Warn( p_dec, "subtitle without a date" );
270         return NULL;
271     }
272
273     /* Check validity of packet data */
274     if( p_block->i_buffer <= 1 ||  p_block->p_buffer[0] == '\0' )
275     {
276         msg_Warn( p_dec, "empty subtitle" );
277         return NULL;
278     }
279
280     /* Should be resiliant against bad subtitles */
281     psz_subtitle = strndup( p_block->p_buffer, p_block->i_buffer );
282
283     i_align_h = p_sys->i_align ? 20 : 0;
284     i_align_v = 10;
285
286 #if defined(HAVE_ICONV)
287     if( p_sys->iconv_handle != (iconv_t)-1 )
288     {
289         char *psz_new_subtitle;
290         char *psz_convert_buffer_out;
291         char *psz_convert_buffer_in;
292         size_t ret, inbytes_left, outbytes_left;
293
294         psz_new_subtitle = malloc( 6 * strlen( psz_subtitle ) );
295         psz_convert_buffer_out = psz_new_subtitle;
296         psz_convert_buffer_in = psz_subtitle;
297         inbytes_left = strlen( psz_subtitle );
298         outbytes_left = 6 * inbytes_left;
299         ret = iconv( p_sys->iconv_handle, &psz_convert_buffer_in,
300                      &inbytes_left, &psz_convert_buffer_out, &outbytes_left );
301         *psz_convert_buffer_out = '\0';
302
303         if( inbytes_left )
304         {
305             msg_Warn( p_dec, "Failed to convert subtitle encoding, "
306                       "dropping subtitle.\nTry setting a different "
307                       "character-encoding for the subtitle." );
308             free( psz_subtitle );
309             return NULL;
310         }
311         else
312         {
313             free( psz_subtitle );
314             psz_subtitle = psz_new_subtitle;
315         }
316     }
317 #endif
318
319     if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') )
320     {
321         /* Decode SSA strings */
322         /* We expect: ReadOrder, Layer, Style, Name, MarginL, MarginR,
323          * MarginV, Effect, Text */
324         char *psz_new_subtitle;
325         char *psz_buffer_sub;
326         int         i_comma;
327         int         i_text;
328
329         psz_buffer_sub = psz_subtitle;
330         for( ;; )
331         {
332             i_comma = 0;
333             while( i_comma < 8 &&
334                 *psz_buffer_sub != '\0' )
335             {
336                 if( *psz_buffer_sub == ',' )
337                 {
338                     i_comma++;
339                 }
340                 psz_buffer_sub++;
341             }
342             psz_new_subtitle = malloc( strlen( psz_buffer_sub ) + 1);
343             i_text = 0;
344             while( psz_buffer_sub[0] != '\0' )
345             {
346                 if( psz_buffer_sub[0] == '\\' && ( psz_buffer_sub[1] == 'n' ||
347                     psz_buffer_sub[1] == 'N' ) )
348                 {
349                     psz_new_subtitle[i_text] = '\n';
350                     i_text++;
351                     psz_buffer_sub += 2;
352                 }
353                 else if( psz_buffer_sub[0] == '{' &&
354                          psz_buffer_sub[1] == '\\' )
355                 {
356                     /* SSA control code */
357                     while( psz_buffer_sub[0] != '\0' &&
358                            psz_buffer_sub[0] != '}' )
359                     {
360                         psz_buffer_sub++;
361                     }
362                     psz_buffer_sub++;
363                 }
364                 else
365                 {
366                     psz_new_subtitle[i_text] = psz_buffer_sub[0];
367                     i_text++;
368                     psz_buffer_sub++;
369                 }
370             }
371             psz_new_subtitle[i_text] = '\0';
372             free( psz_subtitle );
373             psz_subtitle = psz_new_subtitle;
374             break;
375         }
376     }
377
378     StripTags( psz_subtitle );
379
380     if( p_sys->p_render && p_sys->p_render->p_module &&
381         p_sys->p_render->pf_render_string )
382     {
383         block_t *p_new_block = block_New( p_dec, strlen(psz_subtitle) + 1 );
384         if( p_new_block )
385         {
386             memcpy( p_new_block->p_buffer, psz_subtitle,
387                     p_new_block->i_buffer );
388             p_new_block->i_pts = p_new_block->i_dts = p_block->i_pts;
389             p_new_block->i_length = p_block->i_length;
390             p_spu = p_sys->p_render->pf_render_string( p_sys->p_render,
391                                                        p_new_block );
392         }
393     }
394
395     if( p_spu )
396     {
397         p_spu->i_flags = OSD_ALIGN_BOTTOM | p_sys->i_align;
398         p_spu->i_x = i_align_h;
399         p_spu->i_y = i_align_v;
400     }
401
402     free( psz_subtitle );
403     return p_spu;
404 }
405
406 static void StripTags( char *psz_text )
407 {
408     int i_left_moves = 0;
409     vlc_bool_t b_inside_tag = VLC_FALSE;
410     int i = 0;
411     int i_tag_start = -1;
412     while( psz_text[ i ] )
413     {
414         if( !b_inside_tag )
415         {
416             if( psz_text[ i ] == '<' )
417             {
418                 b_inside_tag = VLC_TRUE;
419                 i_tag_start = i;
420             }
421             psz_text[ i - i_left_moves ] = psz_text[ i ];
422         }
423         else
424         {
425             if( ( psz_text[ i ] == ' ' ) ||
426                 ( psz_text[ i ] == '\t' ) ||
427                 ( psz_text[ i ] == '\n' ) ||
428                 ( psz_text[ i ] == '\r' ) )
429             {
430                 b_inside_tag = VLC_FALSE;
431                 i_tag_start = -1;
432             }
433             else if( psz_text[ i ] == '>' )
434             {
435                 i_left_moves += i - i_tag_start + 1;
436                 i_tag_start = -1;
437                 b_inside_tag = VLC_FALSE;
438             }
439             else
440             {
441                 psz_text[ i - i_left_moves ] = psz_text[ i ];
442             }
443         }
444         i++;
445     }
446     psz_text[ i - i_left_moves ] = '\0';
447 }
448
449 /*****************************************************************************
450  * Buffers allocation callbacks for the filters
451  *****************************************************************************/
452 static subpicture_t *spu_new_buffer( filter_t *p_filter )
453 {
454     decoder_t *p_dec = (decoder_t *)p_filter->p_owner;
455     return p_dec->pf_spu_buffer_new( p_dec );
456 }
457
458 static void spu_del_buffer( filter_t *p_filter, subpicture_t *p_spu )
459 {
460     decoder_t *p_dec = (decoder_t *)p_filter->p_owner;
461     p_dec->pf_spu_buffer_del( p_dec, p_spu );
462 }