]> git.sesse.net Git - vlc/blob - modules/codec/subsdec.c
allow subpel option in the 1 to 6 range for X264_BUILD >= 30 (r262)
[vlc] / modules / codec / subsdec.c
1 /*****************************************************************************
2  * subsdec.c : text subtitles decoder
3  *****************************************************************************
4  * Copyright (C) 2000-2001 the VideoLAN team
5  * $Id$
6  *
7  * Authors: Gildas Bazin <gbazin@videolan.org>
8  *          Samuel Hocevar <sam@zoy.org>
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
23  *****************************************************************************/
24
25 /*****************************************************************************
26  * Preamble
27  *****************************************************************************/
28 #include <vlc/vlc.h>
29 #include <vlc/vout.h>
30 #include <vlc/decoder.h>
31
32 #include "vlc_osd.h"
33 #include "vlc_filter.h"
34
35 #include "charset.h"
36
37 /*****************************************************************************
38  * decoder_sys_t : decoder descriptor
39  *****************************************************************************/
40 struct decoder_sys_t
41 {
42     int                 i_align;          /* Subtitles alignment on the vout */
43     vlc_iconv_t         iconv_handle;            /* handle to iconv instance */
44 };
45
46 /*****************************************************************************
47  * Local prototypes
48  *****************************************************************************/
49 static int  OpenDecoder   ( vlc_object_t * );
50 static void CloseDecoder  ( vlc_object_t * );
51
52 static subpicture_t *DecodeBlock   ( decoder_t *, block_t ** );
53 static subpicture_t *ParseText     ( decoder_t *, block_t * );
54 static void         StripTags      ( char * );
55
56 #define DEFAULT_NAME "System Default"
57
58 /*****************************************************************************
59  * Module descriptor.
60  *****************************************************************************/
61 static char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "",
62     "ISO-8859-1", "CP1252", "MacRoman", "MacIceland","ISO-8859-15", "",
63     "ISO-8859-2", "CP1250", "MacCentralEurope", "MacCroatian", "MacRomania", "",
64     "ISO-8859-5", "CP1251", "MacCyrillic", "MacUkraine", "KOI8-R", "KOI8-U", "KOI8-RU", "",
65     "ISO-8859-6", "CP1256", "MacArabic", "",
66     "ISO-8859-7", "CP1253", "MacGreek", "",
67     "ISO-8859-8", "CP1255", "MacHebrew", "",
68     "ISO-8859-9", "CP1254", "MacTurkish", "",
69     "ISO-8859-13", "CP1257", "",
70     "ISO-2022-JP", "ISO-2022-JP-1", "ISO-2022-JP-2", "EUC-JP", "SHIFT_JIS", "",
71     "ISO-2022-CN", "ISO-2022-CN-EXT", "EUC-CN", "EUC-TW", "BIG5", "BIG5-HKSCS", "",
72     "ISO-2022-KR", "EUC-KR", "",
73     "MacThai", "KOI8-T", "",
74     "ISO-8859-3", "ISO-8859-4", "ISO-8859-10", "ISO-8859-14", "ISO-8859-16", "",
75     "CP850", "CP862", "CP866", "CP874", "CP932", "CP949", "CP950", "CP1133", "CP1258", "",
76     "Macintosh", "",
77     "UTF-7", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE",
78     "C99", "JAVA", "UCS-2", "UCS-2BE", "UCS-2LE", "UCS-4", "UCS-4BE", "UCS-4LE", "",
79     "HZ", "GBK", "GB18030", "JOHAB", "ARMSCII-8",
80     "Georgian-Academy", "Georgian-PS", "TIS-620", "MuleLao-1", "VISCII", "TCVN",
81     "HPROMAN8", "NEXTSTEP" };
82
83 static int  pi_justification[] = { 0, 1, 2 };
84 static char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")};
85
86 #define ENCODING_TEXT N_("Subtitles text encoding")
87 #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles")
88 #define ALIGN_TEXT N_("Subtitles justification")
89 #define ALIGN_LONGTEXT N_("Set the justification of subtitles")
90
91 vlc_module_begin();
92     set_shortname( _("Subtitles"));
93     set_description( _("Text subtitles decoder") );
94     set_capability( "decoder", 50 );
95     set_callbacks( OpenDecoder, CloseDecoder );
96     set_category( CAT_INPUT );
97     set_subcategory( SUBCAT_INPUT_SCODEC );
98
99     add_integer( "subsdec-align", 0, NULL, ALIGN_TEXT, ALIGN_LONGTEXT,
100                  VLC_FALSE );
101         change_integer_list( pi_justification, ppsz_justification_text, 0 );
102     add_string( "subsdec-encoding", DEFAULT_NAME, NULL,
103                 ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE );
104         change_string_list( ppsz_encodings, 0, 0 );
105 vlc_module_end();
106
107 /*****************************************************************************
108  * OpenDecoder: probe the decoder and return score
109  *****************************************************************************
110  * Tries to launch a decoder and return score so that the interface is able
111  * to chose.
112  *****************************************************************************/
113 static int OpenDecoder( vlc_object_t *p_this )
114 {
115     decoder_t     *p_dec = (decoder_t*)p_this;
116     decoder_sys_t *p_sys;
117     vlc_value_t val;
118
119     if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','u','b','t') &&
120         p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') )
121     {
122         return VLC_EGENERIC;
123     }
124
125     p_dec->pf_decode_sub = DecodeBlock;
126
127     /* Allocate the memory needed to store the decoder's structure */
128     if( ( p_dec->p_sys = p_sys =
129           (decoder_sys_t *)malloc(sizeof(decoder_sys_t)) ) == NULL )
130     {
131         msg_Err( p_dec, "out of memory" );
132         return VLC_EGENERIC;
133     }
134
135     var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
136     var_Get( p_dec, "subsdec-align", &val );
137     p_sys->i_align = val.i_int;
138
139     if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
140     {
141         msg_Dbg( p_dec, "using character encoding: %s",
142                  p_dec->fmt_in.subs.psz_encoding );
143         p_sys->iconv_handle =
144             vlc_iconv_open( "UTF-8", p_dec->fmt_in.subs.psz_encoding );
145     }
146     else
147     {
148         var_Create( p_dec, "subsdec-encoding",
149                     VLC_VAR_STRING | VLC_VAR_DOINHERIT );
150         var_Get( p_dec, "subsdec-encoding", &val );
151         if( !strcmp( val.psz_string, DEFAULT_NAME ) )
152         {
153             char *psz_charset =(char*)malloc( 100 );
154             vlc_current_charset( &psz_charset );
155             p_sys->iconv_handle = vlc_iconv_open( "UTF-8", psz_charset );
156             msg_Dbg( p_dec, "using character encoding: %s", psz_charset );
157             free( psz_charset );
158         }
159         else if( val.psz_string )
160         {
161             msg_Dbg( p_dec, "using character encoding: %s", val.psz_string );
162             p_sys->iconv_handle = vlc_iconv_open( "UTF-8", val.psz_string );
163         }
164
165         if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
166         {
167             msg_Warn( p_dec, "unable to do requested conversion" );
168         }
169
170         if( val.psz_string ) free( val.psz_string );
171     }
172
173     return VLC_SUCCESS;
174 }
175
176 /****************************************************************************
177  * DecodeBlock: the whole thing
178  ****************************************************************************
179  * This function must be fed with complete subtitles units.
180  ****************************************************************************/
181 static subpicture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block )
182 {
183     subpicture_t *p_spu;
184
185     if( !pp_block || *pp_block == NULL ) return NULL;
186
187     p_spu = ParseText( p_dec, *pp_block );
188
189     block_Release( *pp_block );
190     *pp_block = NULL;
191
192     return p_spu;
193 }
194
195 /*****************************************************************************
196  * CloseDecoder: clean up the decoder
197  *****************************************************************************/
198 static void CloseDecoder( vlc_object_t *p_this )
199 {
200     decoder_t *p_dec = (decoder_t *)p_this;
201     decoder_sys_t *p_sys = p_dec->p_sys;
202
203     if( p_sys->iconv_handle != (vlc_iconv_t)-1 )
204     {
205         vlc_iconv_close( p_sys->iconv_handle );
206     }
207
208     free( p_sys );
209 }
210
211 /*****************************************************************************
212  * ParseText: parse an text subtitle packet and send it to the video output
213  *****************************************************************************/
214 static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
215 {
216     decoder_sys_t *p_sys = p_dec->p_sys;
217     subpicture_t *p_spu = 0;
218     char *psz_subtitle;
219     int i_align_h, i_align_v;
220     video_format_t fmt;
221
222     /* We cannot display a subpicture with no date */
223     if( p_block->i_pts == 0 )
224     {
225         msg_Warn( p_dec, "subtitle without a date" );
226         return NULL;
227     }
228
229     /* Check validity of packet data */
230     if( p_block->i_buffer <= 1 || p_block->p_buffer[0] == '\0' )
231     {
232         msg_Warn( p_dec, "empty subtitle" );
233         return NULL;
234     }
235
236     /* Should be resiliant against bad subtitles */
237     psz_subtitle = strndup( (const char *)p_block->p_buffer,
238                             p_block->i_buffer );
239
240     i_align_h = p_sys->i_align ? 20 : 0;
241     i_align_v = 10;
242
243     if( p_sys->iconv_handle != (vlc_iconv_t)-1 )
244     {
245         char *psz_new_subtitle;
246         char *psz_convert_buffer_out;
247         char *psz_convert_buffer_in;
248         size_t ret, inbytes_left, outbytes_left;
249
250         psz_new_subtitle = malloc( 6 * strlen( psz_subtitle ) );
251         psz_convert_buffer_out = psz_new_subtitle;
252         psz_convert_buffer_in = psz_subtitle;
253         inbytes_left = strlen( psz_subtitle );
254         outbytes_left = 6 * inbytes_left;
255         ret = vlc_iconv( p_sys->iconv_handle, &psz_convert_buffer_in,
256                          &inbytes_left, &psz_convert_buffer_out,
257                          &outbytes_left );
258         *psz_convert_buffer_out = '\0';
259
260         if( inbytes_left )
261         {
262             msg_Warn( p_dec, "Failed to convert subtitle encoding, "
263                       "dropping subtitle.\nTry setting a different "
264                       "character-encoding for the subtitle." );
265             free( psz_subtitle );
266             return NULL;
267         }
268         else
269         {
270             free( psz_subtitle );
271             psz_subtitle = psz_new_subtitle;
272         }
273     }
274
275     if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') )
276     {
277         /* Decode SSA strings */
278         /* We expect: ReadOrder, Layer, Style, Name, MarginL, MarginR,
279          * MarginV, Effect, Text */
280         char *psz_new_subtitle;
281         char *psz_buffer_sub;
282         int         i_comma;
283         int         i_text;
284
285         psz_buffer_sub = psz_subtitle;
286         for( ;; )
287         {
288             i_comma = 0;
289             while( i_comma < 8 &&
290                 *psz_buffer_sub != '\0' )
291             {
292                 if( *psz_buffer_sub == ',' )
293                 {
294                     i_comma++;
295                 }
296                 psz_buffer_sub++;
297             }
298             psz_new_subtitle = malloc( strlen( psz_buffer_sub ) + 1);
299             i_text = 0;
300             while( psz_buffer_sub[0] != '\0' )
301             {
302                 if( psz_buffer_sub[0] == '\\' && ( psz_buffer_sub[1] == 'n' ||
303                     psz_buffer_sub[1] == 'N' ) )
304                 {
305                     psz_new_subtitle[i_text] = '\n';
306                     i_text++;
307                     psz_buffer_sub += 2;
308                 }
309                 else if( psz_buffer_sub[0] == '{' &&
310                          psz_buffer_sub[1] == '\\' )
311                 {
312                     /* SSA control code */
313                     while( psz_buffer_sub[0] != '\0' &&
314                            psz_buffer_sub[0] != '}' )
315                     {
316                         psz_buffer_sub++;
317                     }
318                     psz_buffer_sub++;
319                 }
320                 else
321                 {
322                     psz_new_subtitle[i_text] = psz_buffer_sub[0];
323                     i_text++;
324                     psz_buffer_sub++;
325                 }
326             }
327             psz_new_subtitle[i_text] = '\0';
328             free( psz_subtitle );
329             psz_subtitle = psz_new_subtitle;
330             break;
331         }
332     }
333
334     StripTags( psz_subtitle );
335
336     p_spu = p_dec->pf_spu_buffer_new( p_dec );
337     if( !p_spu )
338     {
339         msg_Warn( p_dec, "can't get spu buffer" );
340         free( psz_subtitle );
341         return 0;
342     }
343
344     /* Create a new subpicture region */
345     memset( &fmt, 0, sizeof(video_format_t) );
346     fmt.i_chroma = VLC_FOURCC('T','E','X','T');
347     fmt.i_aspect = 0;
348     fmt.i_width = fmt.i_height = 0;
349     fmt.i_x_offset = fmt.i_y_offset = 0;
350     p_spu->p_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt );
351     if( !p_spu->p_region )
352     {
353         msg_Err( p_dec, "cannot allocate SPU region" );
354         free( psz_subtitle );
355         p_dec->pf_spu_buffer_del( p_dec, p_spu );
356         return 0;
357     }
358
359     p_spu->p_region->psz_text = psz_subtitle;
360     p_spu->i_start = p_block->i_pts;
361     p_spu->i_stop = p_block->i_pts + p_block->i_length;
362     p_spu->b_ephemer = (p_block->i_length == 0);
363     p_spu->b_absolute = VLC_FALSE;
364
365     p_spu->i_flags = OSD_ALIGN_BOTTOM | p_sys->i_align;
366     p_spu->i_x = i_align_h;
367     p_spu->i_y = i_align_v;
368
369     return p_spu;
370 }
371
372 static void StripTags( char *psz_text )
373 {
374     int i_left_moves = 0;
375     vlc_bool_t b_inside_tag = VLC_FALSE;
376     int i = 0;
377     int i_tag_start = -1;
378     while( psz_text[ i ] )
379     {
380         if( !b_inside_tag )
381         {
382             if( psz_text[ i ] == '<' )
383             {
384                 b_inside_tag = VLC_TRUE;
385                 i_tag_start = i;
386             }
387             psz_text[ i - i_left_moves ] = psz_text[ i ];
388         }
389         else
390         {
391             if( ( psz_text[ i ] == ' ' ) ||
392                 ( psz_text[ i ] == '\t' ) ||
393                 ( psz_text[ i ] == '\n' ) ||
394                 ( psz_text[ i ] == '\r' ) )
395             {
396                 b_inside_tag = VLC_FALSE;
397                 i_tag_start = -1;
398             }
399             else if( psz_text[ i ] == '>' )
400             {
401                 i_left_moves += i - i_tag_start + 1;
402                 i_tag_start = -1;
403                 b_inside_tag = VLC_FALSE;
404             }
405             else
406             {
407                 psz_text[ i - i_left_moves ] = psz_text[ i ];
408             }
409         }
410         i++;
411     }
412     psz_text[ i - i_left_moves ] = '\0';
413 }