]> git.sesse.net Git - vlc/blob - modules/codec/subsdec.c
* src/video_output/vout_subpictures.c : New OSD channels
[vlc] / modules / codec / subsdec.c
1 /*****************************************************************************
2  * subsdec.c : text subtitles decoder
3  *****************************************************************************
4  * Copyright (C) 2000-2001 VideoLAN
5  * $Id$
6  *
7  * Authors: Gildas Bazin <gbazin@netcourrier.com>
8  *          Samuel Hocevar <sam@zoy.org>
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
23  *****************************************************************************/
24
25 /*****************************************************************************
26  * Preamble
27  *****************************************************************************/
28 #include <vlc/vlc.h>
29 #include <vlc/vout.h>
30 #include <vlc/decoder.h>
31
32 #include <osd.h>
33
34 #if defined(HAVE_ICONV)
35 #include <iconv.h>
36 #endif
37
38 #include "charset.h"
39
40 /*****************************************************************************
41  * decoder_sys_t : decoder descriptor
42  *****************************************************************************/
43 struct decoder_sys_t
44 {
45     int                 i_align;          /* Subtitles alignment on the vout */
46     int                 i_subpic_channel;    /* Subpic channel for subtitles */
47
48     vout_thread_t       *p_vout;                           /* last vout used */
49
50 #if defined(HAVE_ICONV)
51     iconv_t             iconv_handle;            /* handle to iconv instance */
52 #endif
53 };
54
55 /*****************************************************************************
56  * Local prototypes
57  *****************************************************************************/
58 static int  OpenDecoder   ( vlc_object_t * );
59 static void CloseDecoder  ( vlc_object_t * );
60
61 static void DecodeBlock   ( decoder_t *, block_t ** );
62
63 static void ParseText     ( decoder_t *, block_t *, vout_thread_t * );
64 static void StripTags     ( char * );
65
66 #define DEFAULT_NAME "System Default"
67
68 /*****************************************************************************
69  * Module descriptor.
70  *****************************************************************************/
71 #if defined(HAVE_ICONV)
72 static char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "",
73     "ISO-8859-1", "CP1252", "MacRoman", "MacIceland","ISO-8859-15", "",
74     "ISO-8859-2", "CP1250", "MacCentralEurope", "MacCroatian", "MacRomania", "",
75     "ISO-8859-5", "CP1251", "MacCyrillic", "MacUkraine", "KOI8-R", "KOI8-U", "KOI8-RU", "",
76     "ISO-8859-6", "CP1256", "MacArabic", "",
77     "ISO-8859-7", "CP1253", "MacGreek", "",
78     "ISO-8859-8", "CP1255", "MacHebrew", "",
79     "ISO-8859-9", "CP1254", "MacTurkish", "",
80     "ISO-8859-13", "CP1257", "",
81     "ISO-2022-JP", "ISO-2022-JP-1", "ISO-2022-JP-2", "EUC-JP", "SHIFT_JIS", "",
82     "ISO-2022-CN", "ISO-2022-CN-EXT", "EUC-CN", "EUC-TW", "BIG5", "BIG5-HKSCS", "",
83     "ISO-2022-KR", "EUC-KR", "",
84     "MacThai", "KOI8-T", "",
85     "ISO-8859-3", "ISO-8859-4", "ISO-8859-10", "ISO-8859-14", "ISO-8859-16", "",
86     "CP850", "CP862", "CP866", "CP874", "CP932", "CP949", "CP950", "CP1133", "CP1258", "",
87     "Macintosh", "",
88     "UTF-7", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE",
89     "C99", "JAVA", "UCS-2", "UCS-2BE", "UCS-2LE", "UCS-4", "UCS-4BE", "UCS-4LE", "",
90     "HZ", "GBK", "GB18030", "JOHAB", "ARMSCII-8",
91     "Georgian-Academy", "Georgian-PS", "TIS-620", "MuleLao-1", "VISCII", "TCVN",
92     "HPROMAN8", "NEXTSTEP" };
93 #endif
94
95 static int  pi_justification[] = { 0, 1, 2 };
96 static char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")};
97
98 #define ENCODING_TEXT N_("Subtitles text encoding")
99 #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles")
100 #define ALIGN_TEXT N_("Subtitles justification")
101 #define ALIGN_LONGTEXT N_("Set the justification of subtitles")
102
103 vlc_module_begin();
104     set_description( _("text subtitles decoder") );
105     set_capability( "decoder", 50 );
106     set_callbacks( OpenDecoder, CloseDecoder );
107
108     add_integer( "subsdec-align", 0, NULL, ALIGN_TEXT, ALIGN_LONGTEXT,
109                  VLC_TRUE );
110         change_integer_list( pi_justification, ppsz_justification_text, 0 );
111 #if defined(HAVE_ICONV)
112     add_string( "subsdec-encoding", DEFAULT_NAME, NULL,
113                 ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE );
114         change_string_list( ppsz_encodings, 0, 0 );
115 #endif
116 vlc_module_end();
117
118 /*****************************************************************************
119  * OpenDecoder: probe the decoder and return score
120  *****************************************************************************
121  * Tries to launch a decoder and return score so that the interface is able
122  * to chose.
123  *****************************************************************************/
124 static int OpenDecoder( vlc_object_t *p_this )
125 {
126     decoder_t     *p_dec = (decoder_t*)p_this;
127     decoder_sys_t *p_sys;
128     vlc_value_t val;
129
130     if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','u','b','t') &&
131         p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') )
132     {
133         return VLC_EGENERIC;
134     }
135
136     p_dec->pf_decode_sub = DecodeBlock;
137
138     /* Allocate the memory needed to store the decoder's structure */
139     if( ( p_dec->p_sys = p_sys =
140           (decoder_sys_t *)malloc(sizeof(decoder_sys_t)) ) == NULL )
141     {
142         msg_Err( p_dec, "out of memory" );
143         return VLC_EGENERIC;
144     }
145
146     var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
147     var_Get( p_dec, "subsdec-align", &val );
148     p_sys->i_align = val.i_int;
149
150 #if defined(HAVE_ICONV)
151     if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
152     {
153         msg_Dbg( p_dec, "using character encoding: %s",
154                  p_dec->fmt_in.subs.psz_encoding );
155         p_sys->iconv_handle = iconv_open( "UTF-8",
156                                           p_dec->fmt_in.subs.psz_encoding );
157     }
158     else
159     {
160         var_Create( p_dec, "subsdec-encoding",
161                     VLC_VAR_STRING | VLC_VAR_DOINHERIT );
162         var_Get( p_dec, "subsdec-encoding", &val );
163         if( !strcmp( val.psz_string, DEFAULT_NAME ) )
164         {
165             char *psz_charset =(char*)malloc( 100 );
166             vlc_current_charset( &psz_charset );
167             p_sys->iconv_handle = iconv_open( "UTF-8", psz_charset );
168             msg_Dbg( p_dec, "using character encoding: %s", psz_charset );
169             free( psz_charset );
170         }
171         else if( val.psz_string )
172         {
173             msg_Dbg( p_dec, "using character encoding: %s", val.psz_string );
174             p_sys->iconv_handle = iconv_open( "UTF-8", val.psz_string );
175         }
176
177         if( p_sys->iconv_handle == (iconv_t)-1 )
178         {
179             msg_Warn( p_dec, "unable to do requested conversion" );
180         }
181
182         if( val.psz_string ) free( val.psz_string );
183     }
184 #else
185
186     msg_Dbg( p_dec, "no iconv support available" );
187 #endif
188     
189     p_dec->p_sys->p_vout = NULL;
190
191     return VLC_SUCCESS;
192 }
193
194 /****************************************************************************
195  * DecodeBlock: the whole thing
196  ****************************************************************************
197  * This function must be fed with complete subtitles units.
198  ****************************************************************************/
199 static void DecodeBlock( decoder_t *p_dec, block_t **pp_block )
200 {
201     vout_thread_t *p_vout;
202
203     if( !pp_block || *pp_block == NULL )
204     {
205         return;
206     }
207
208     /* Here we are dealing with text subtitles */
209     p_vout = vlc_object_find( p_dec, VLC_OBJECT_VOUT, FIND_ANYWHERE );
210     if( p_vout )
211     {
212         if( p_dec->p_sys->p_vout != p_vout )
213         {
214             p_dec->p_sys->i_subpic_channel = vout_RegisterOSDChannel( p_vout );
215         }                
216         ParseText( p_dec, *pp_block, p_vout );
217         vlc_object_release( p_vout );
218     }
219     else
220     {
221         msg_Warn( p_dec, "couldn't find a video output, trashing subtitle" );
222     }
223     p_dec->p_sys->p_vout = p_vout;
224
225     block_Release( *pp_block );
226     *pp_block = NULL;
227 }
228
229 /*****************************************************************************
230  * CloseDecoder: clean up the decoder
231  *****************************************************************************/
232 static void CloseDecoder( vlc_object_t *p_this )
233 {
234     decoder_t *p_dec = (decoder_t *)p_this;
235     decoder_sys_t *p_sys = p_dec->p_sys;
236     vout_thread_t *p_vout;
237
238     p_vout = vlc_object_find( p_dec, VLC_OBJECT_VOUT, FIND_ANYWHERE );
239     if( p_vout != NULL && p_vout->p_subpicture != NULL )
240     {
241         subpicture_t *p_subpic;
242         int          i_subpic;
243
244         for( i_subpic = 0; i_subpic < VOUT_MAX_SUBPICTURES; i_subpic++ )
245         {
246             p_subpic = &p_vout->p_subpicture[i_subpic];
247
248             if( p_subpic != NULL &&
249               ( p_subpic->i_status == RESERVED_SUBPICTURE
250                 || p_subpic->i_status == READY_SUBPICTURE ) )
251             {
252                 vout_DestroySubPicture( p_vout, p_subpic );
253             }
254         }
255     }
256     if( p_vout ) vlc_object_release( p_vout );
257
258 #if defined(HAVE_ICONV)
259     if( p_sys->iconv_handle != (iconv_t)-1 )
260     {
261         iconv_close( p_sys->iconv_handle );
262     }
263 #endif
264
265     free( p_sys );
266 }
267
268 /*****************************************************************************
269  * ParseText: parse an text subtitle packet and send it to the video output
270  *****************************************************************************/
271 static void ParseText( decoder_t *p_dec, block_t *p_block,
272                        vout_thread_t *p_vout )
273 {
274     decoder_sys_t *p_sys = p_dec->p_sys;
275     char *psz_subtitle;
276     int i_align_h, i_align_v;
277
278     /* We cannot display a subpicture with no date */
279     if( p_block->i_pts == 0 )
280     {
281         msg_Warn( p_dec, "subtitle without a date" );
282         return;
283     }
284
285     /* Check validity of packet data */
286     if( p_block->i_buffer <= 1 ||  p_block->p_buffer[0] == '\0' )
287     {
288         msg_Warn( p_dec, "empty subtitle" );
289         return;
290     }
291
292     /* Should be resiliant against bad subtitles */
293     psz_subtitle = strndup( p_block->p_buffer, p_block->i_buffer );
294
295     i_align_h = p_sys->i_align ? 20 : 0;
296     i_align_v = 10;
297
298 #if defined(HAVE_ICONV)
299     if( p_sys->iconv_handle != (iconv_t)-1 )
300     {
301         char *psz_new_subtitle;
302         char *psz_convert_buffer_out;
303         char *psz_convert_buffer_in;
304         size_t ret, inbytes_left, outbytes_left;
305
306         psz_new_subtitle = malloc( 6 * strlen( psz_subtitle ) );
307         psz_convert_buffer_out = psz_new_subtitle;
308         psz_convert_buffer_in = psz_subtitle;
309         inbytes_left = strlen( psz_subtitle );
310         outbytes_left = 6 * inbytes_left;
311         ret = iconv( p_sys->iconv_handle, &psz_convert_buffer_in,
312                      &inbytes_left, &psz_convert_buffer_out, &outbytes_left );
313         *psz_convert_buffer_out = '\0';
314
315         if( inbytes_left )
316         {
317             msg_Warn( p_dec, "Failed to convert subtitle encoding, dropping subtitle.\nTry setting a different character-encoding for the subtitle." );
318             free( psz_subtitle );
319             return;
320         }
321         else
322         {
323             free( psz_subtitle );
324             psz_subtitle = psz_new_subtitle;
325         }
326     }
327 #endif
328
329     if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') )
330     {
331         /* Decode SSA strings */
332         /* We expect: ReadOrder, Layer, Style, Name, MarginL, MarginR, MarginV, Effect, Text */
333         char *psz_new_subtitle;
334         char *psz_buffer_sub;
335         int         i_comma;
336         int         i_text;
337
338         psz_buffer_sub = psz_subtitle;
339         for( ;; )
340         {
341             i_comma = 0;
342             while( i_comma < 8 &&
343                 *psz_buffer_sub != '\0' )
344             {
345                 if( *psz_buffer_sub == ',' )
346                 {
347                     i_comma++;
348                 }
349                 psz_buffer_sub++;
350             }
351             psz_new_subtitle = malloc( strlen( psz_buffer_sub ) + 1);
352             i_text = 0;
353             while( psz_buffer_sub[0] != '\0' )
354             {
355                 if( psz_buffer_sub[0] == '\\' && ( psz_buffer_sub[1] =='n' || psz_buffer_sub[1] =='N' ) )
356                 {
357                     psz_new_subtitle[i_text] = '\n';
358                     i_text++;
359                     psz_buffer_sub += 2;
360                 }
361                 else if( psz_buffer_sub[0] == '{' && psz_buffer_sub[1] == '\\' )
362                 {
363                     /* SSA control code */
364                     while( psz_buffer_sub[0] != '\0' && psz_buffer_sub[0] != '}' )
365                     {
366                         psz_buffer_sub++;
367                     }
368                     psz_buffer_sub++;
369                 }
370                 else
371                 {
372                     psz_new_subtitle[i_text] = psz_buffer_sub[0];
373                     i_text++;
374                     psz_buffer_sub++;
375                 }
376             }
377             psz_new_subtitle[i_text] = '\0';
378             free( psz_subtitle );
379             psz_subtitle = psz_new_subtitle;
380             break;
381         }
382     }
383     StripTags( psz_subtitle );
384     vout_ShowTextAbsolute( p_vout, p_sys->i_subpic_channel, psz_subtitle, NULL,
385         OSD_ALIGN_BOTTOM | p_sys->i_align, i_align_h,
386         i_align_v, p_block->i_pts,
387         p_block->i_length ? p_block->i_pts + p_block->i_length : 0 );
388
389     free( psz_subtitle );
390 }
391
392 static void StripTags( char *psz_text )
393 {
394     int i_left_moves = 0;
395     vlc_bool_t b_inside_tag = VLC_FALSE;
396     int i = 0;
397     int i_tag_start = -1;
398     while( psz_text[ i ] )
399     {
400         if( !b_inside_tag )
401         {
402             if( psz_text[ i ] == '<' )
403             {
404                 b_inside_tag = VLC_TRUE;
405                 i_tag_start = i;
406             }
407             psz_text[ i - i_left_moves ] = psz_text[ i ];
408         }
409         else
410         {
411             if( ( psz_text[ i ] == ' ' ) ||
412                 ( psz_text[ i ] == '\t' ) ||
413                 ( psz_text[ i ] == '\n' ) ||
414                 ( psz_text[ i ] == '\r' ) )
415             {
416                 b_inside_tag = VLC_FALSE;
417                 i_tag_start = -1;
418             }
419             else if( psz_text[ i ] == '>' )
420             {
421                 i_left_moves += i - i_tag_start + 1;
422                 i_tag_start = -1;
423                 b_inside_tag = VLC_FALSE;
424             }
425             else
426             {
427                 psz_text[ i - i_left_moves ] = psz_text[ i ];
428             }
429         }
430         i++;
431     }
432     psz_text[ i - i_left_moves ] = '\0';
433 }