]> git.sesse.net Git - vlc/blob - modules/codec/subtitles/subsusf.c
Subsdec: Split the decoder for subsdec / SSA / USF in three files, since the code...
[vlc] / modules / codec / subtitles / subsusf.c
1 /*****************************************************************************
2  * subsusf.c : USF subtitles decoder
3  *****************************************************************************
4  * Copyright (C) 2000-2006 the VideoLAN team
5  * $Id: subsdec.c 20996 2007-08-05 20:01:21Z jb $
6  *
7  * Authors: Gildas Bazin <gbazin@videolan.org>
8  *          Samuel Hocevar <sam@zoy.org>
9  *          Derk-Jan Hartman <hartman at videolan dot org>
10  *          Bernie Purcell <b dot purcell at adbglobal dot com>
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
25  *****************************************************************************/
26
27 #include "subsdec.h"
28
29 static void ParseUSFHeaderTags( decoder_t *p_dec, xml_reader_t *p_xml_reader )
30 {
31     decoder_sys_t *p_sys = p_dec->p_sys;
32     char *psz_node;
33     ssa_style_t *p_style = NULL;
34     int i_style_level = 0;
35     int i_metadata_level = 0;
36
37     while ( xml_ReaderRead( p_xml_reader ) == 1 )
38     {
39         switch ( xml_ReaderNodeType( p_xml_reader ) )
40         {
41             case XML_READER_TEXT:
42             case XML_READER_NONE:
43                 break;
44             case XML_READER_ENDELEM:
45                 psz_node = xml_ReaderName( p_xml_reader );
46
47                 if( !psz_node )
48                     break;
49                 switch (i_style_level)
50                 {
51                     case 0:
52                         if( !strcasecmp( "metadata", psz_node ) && (i_metadata_level == 1) )
53                         {
54                             i_metadata_level--;
55                         }
56                         break;
57                     case 1:
58                         if( !strcasecmp( "styles", psz_node ) )
59                         {
60                             i_style_level--;
61                         }
62                         break;
63                     case 2:
64                         if( !strcasecmp( "style", psz_node ) )
65                         {
66                             TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style );
67
68                             p_style = NULL;
69                             i_style_level--;
70                         }
71                         break;
72                 }
73
74                 free( psz_node );
75                 break;
76             case XML_READER_STARTELEM:
77                 psz_node = xml_ReaderName( p_xml_reader );
78
79                 if( !psz_node )
80                     break;
81
82                 if( !strcasecmp( "metadata", psz_node ) && (i_style_level == 0) )
83                 {
84                     i_metadata_level++;
85                 }
86                 else if( !strcasecmp( "resolution", psz_node ) &&
87                          ( i_metadata_level == 1) )
88                 {
89                     while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
90                     {
91                         char *psz_name = xml_ReaderName ( p_xml_reader );
92                         char *psz_value = xml_ReaderValue ( p_xml_reader );
93
94                         if( psz_name && psz_value )
95                         {
96                             if( !strcasecmp( "x", psz_name ) )
97                                 p_sys->i_original_width = atoi( psz_value );
98                             else if( !strcasecmp( "y", psz_name ) )
99                                 p_sys->i_original_height = atoi( psz_value );
100                         }
101                         if( psz_name )  free( psz_name );
102                         if( psz_value ) free( psz_value );
103                     }
104                 }
105                 else if( !strcasecmp( "styles", psz_node ) && (i_style_level == 0) )
106                 {
107                     i_style_level++;
108                 }
109                 else if( !strcasecmp( "style", psz_node ) && (i_style_level == 1) )
110                 {
111                     i_style_level++;
112
113                     p_style = calloc( 1, sizeof(ssa_style_t) );
114                     if( ! p_style )
115                     {
116                         msg_Err( p_dec, "out of memory" );
117                         free( psz_node );
118                         break;
119                     }
120                     /* All styles are supposed to default to Default, and then
121                      * one or more settings are over-ridden.
122                      * At the moment this only effects styles defined AFTER
123                      * Default in the XML
124                      */
125                     int i;
126                     for( i = 0; i < p_sys->i_ssa_styles; i++ )
127                     {
128                         if( !strcasecmp( p_sys->pp_ssa_styles[i]->psz_stylename, "Default" ) )
129                         {
130                             ssa_style_t *p_default_style = p_sys->pp_ssa_styles[i];
131
132                             memcpy( p_style, p_default_style, sizeof( ssa_style_t ) );
133                             p_style->font_style.psz_fontname = strdup( p_style->font_style.psz_fontname );
134                             p_style->psz_stylename = NULL;
135                         }
136                     }
137
138                     while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
139                     {
140                         char *psz_name = xml_ReaderName ( p_xml_reader );
141                         char *psz_value = xml_ReaderValue ( p_xml_reader );
142
143                         if( psz_name && psz_value )
144                         {
145                             if( !strcasecmp( "name", psz_name ) )
146                                 p_style->psz_stylename = strdup( psz_value);
147                         }
148                         if( psz_name )  free( psz_name );
149                         if( psz_value ) free( psz_value );
150                     }
151                 }
152                 else if( !strcasecmp( "fontstyle", psz_node ) && (i_style_level == 2) )
153                 {
154                     while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
155                     {
156                         char *psz_name = xml_ReaderName ( p_xml_reader );
157                         char *psz_value = xml_ReaderValue ( p_xml_reader );
158
159                         if( psz_name && psz_value )
160                         {
161                             if( !strcasecmp( "face", psz_name ) )
162                             {
163                                 if( p_style->font_style.psz_fontname )
164                                     free( p_style->font_style.psz_fontname );
165                                 p_style->font_style.psz_fontname = strdup( psz_value );
166                             }
167                             else if( !strcasecmp( "size", psz_name ) )
168                             {
169                                 if( ( *psz_value == '+' ) || ( *psz_value == '-' ) )
170                                 {
171                                     int i_value = atoi( psz_value );
172
173                                     if( ( i_value >= -5 ) && ( i_value <= 5 ) )
174                                         p_style->font_style.i_font_size  +=
175                                             ( i_value * p_style->font_style.i_font_size ) / 10;
176                                     else if( i_value < -5 )
177                                         p_style->font_style.i_font_size  = - i_value;
178                                     else if( i_value > 5 )
179                                         p_style->font_style.i_font_size  = i_value;
180                                 }
181                                 else
182                                     p_style->font_style.i_font_size  = atoi( psz_value );
183                             }
184                             else if( !strcasecmp( "italic", psz_name ) )
185                             {
186                                 if( !strcasecmp( "yes", psz_value ))
187                                     p_style->font_style.i_style_flags |= STYLE_ITALIC;
188                                 else
189                                     p_style->font_style.i_style_flags &= ~STYLE_ITALIC;
190                             }
191                             else if( !strcasecmp( "weight", psz_name ) )
192                             {
193                                 if( !strcasecmp( "bold", psz_value ))
194                                     p_style->font_style.i_style_flags |= STYLE_BOLD;
195                                 else
196                                     p_style->font_style.i_style_flags &= ~STYLE_BOLD;
197                             }
198                             else if( !strcasecmp( "underline", psz_name ) )
199                             {
200                                 if( !strcasecmp( "yes", psz_value ))
201                                     p_style->font_style.i_style_flags |= STYLE_UNDERLINE;
202                                 else
203                                     p_style->font_style.i_style_flags &= ~STYLE_UNDERLINE;
204                             }
205                             else if( !strcasecmp( "color", psz_name ) )
206                             {
207                                 if( *psz_value == '#' )
208                                 {
209                                     unsigned long col = strtol(psz_value+1, NULL, 16);
210                                     p_style->font_style.i_font_color = (col & 0x00ffffff);
211                                     p_style->font_style.i_font_alpha = (col >> 24) & 0xff;
212                                 }
213                             }
214                             else if( !strcasecmp( "outline-color", psz_name ) )
215                             {
216                                 if( *psz_value == '#' )
217                                 {
218                                     unsigned long col = strtol(psz_value+1, NULL, 16);
219                                     p_style->font_style.i_outline_color = (col & 0x00ffffff);
220                                     p_style->font_style.i_outline_alpha = (col >> 24) & 0xff;
221                                 }
222                             }
223                             else if( !strcasecmp( "outline-level", psz_name ) )
224                             {
225                                 p_style->font_style.i_outline_width = atoi( psz_value );
226                             }
227                             else if( !strcasecmp( "shadow-color", psz_name ) )
228                             {
229                                 if( *psz_value == '#' )
230                                 {
231                                     unsigned long col = strtol(psz_value+1, NULL, 16);
232                                     p_style->font_style.i_shadow_color = (col & 0x00ffffff);
233                                     p_style->font_style.i_shadow_alpha = (col >> 24) & 0xff;
234                                 }
235                             }
236                             else if( !strcasecmp( "shadow-level", psz_name ) )
237                             {
238                                 p_style->font_style.i_shadow_width = atoi( psz_value );
239                             }
240                             else if( !strcasecmp( "back-color", psz_name ) )
241                             {
242                                 if( *psz_value == '#' )
243                                 {
244                                     unsigned long col = strtol(psz_value+1, NULL, 16);
245                                     p_style->font_style.i_karaoke_background_color = (col & 0x00ffffff);
246                                     p_style->font_style.i_karaoke_background_alpha = (col >> 24) & 0xff;
247                                 }
248                             }
249                             else if( !strcasecmp( "spacing", psz_name ) )
250                             {
251                                 p_style->font_style.i_spacing = atoi( psz_value );
252                             }
253                         }
254                         if( psz_name )  free( psz_name );
255                         if( psz_value ) free( psz_value );
256                     }
257                 }
258                 else if( !strcasecmp( "position", psz_node ) && (i_style_level == 2) )
259                 {
260                     while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
261                     {
262                         char *psz_name = xml_ReaderName ( p_xml_reader );
263                         char *psz_value = xml_ReaderValue ( p_xml_reader );
264
265                         if( psz_name && psz_value )
266                         {
267                             if( !strcasecmp( "alignment", psz_name ) )
268                             {
269                                 if( !strcasecmp( "TopLeft", psz_value ) )
270                                     p_style->i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_LEFT;
271                                 else if( !strcasecmp( "TopCenter", psz_value ) )
272                                     p_style->i_align = SUBPICTURE_ALIGN_TOP;
273                                 else if( !strcasecmp( "TopRight", psz_value ) )
274                                     p_style->i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_RIGHT;
275                                 else if( !strcasecmp( "MiddleLeft", psz_value ) )
276                                     p_style->i_align = SUBPICTURE_ALIGN_LEFT;
277                                 else if( !strcasecmp( "MiddleCenter", psz_value ) )
278                                     p_style->i_align = 0;
279                                 else if( !strcasecmp( "MiddleRight", psz_value ) )
280                                     p_style->i_align = SUBPICTURE_ALIGN_RIGHT;
281                                 else if( !strcasecmp( "BottomLeft", psz_value ) )
282                                     p_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_LEFT;
283                                 else if( !strcasecmp( "BottomCenter", psz_value ) )
284                                     p_style->i_align = SUBPICTURE_ALIGN_BOTTOM;
285                                 else if( !strcasecmp( "BottomRight", psz_value ) )
286                                     p_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_RIGHT;
287                             }
288                             else if( !strcasecmp( "horizontal-margin", psz_name ) )
289                             {
290                                 if( strchr( psz_value, '%' ) )
291                                 {
292                                     p_style->i_margin_h = 0;
293                                     p_style->i_margin_percent_h = atoi( psz_value );
294                                 }
295                                 else
296                                 {
297                                     p_style->i_margin_h = atoi( psz_value );
298                                     p_style->i_margin_percent_h = 0;
299                                 }
300                             }
301                             else if( !strcasecmp( "vertical-margin", psz_name ) )
302                             {
303                                 if( strchr( psz_value, '%' ) )
304                                 {
305                                     p_style->i_margin_v = 0;
306                                     p_style->i_margin_percent_v = atoi( psz_value );
307                                 }
308                                 else
309                                 {
310                                     p_style->i_margin_v = atoi( psz_value );
311                                     p_style->i_margin_percent_v = 0;
312                                 }
313                             }
314                         }
315                         if( psz_name )  free( psz_name );
316                         if( psz_value ) free( psz_value );
317                     }
318                 }
319
320                 free( psz_node );
321                 break;
322         }
323     }
324     if( p_style ) free( p_style );
325 }
326
327
328
329 subpicture_region_t *ParseUSFString( decoder_t *p_dec,
330                                      char *psz_subtitle,
331                                      subpicture_t *p_spu_in )
332 {
333     decoder_sys_t        *p_sys = p_dec->p_sys;
334     subpicture_t         *p_spu = p_spu_in;
335     subpicture_region_t  *p_region_first = NULL;
336     subpicture_region_t  *p_region_upto  = p_region_first;
337
338     while( *psz_subtitle )
339     {
340         if( *psz_subtitle == '<' )
341         {
342             char *psz_end = NULL;
343
344             if(( !strncasecmp( psz_subtitle, "<text ", 6 )) ||
345                ( !strncasecmp( psz_subtitle, "<text>", 6 )))
346             {
347                 psz_end = strcasestr( psz_subtitle, "</text>" );
348
349                 if( psz_end )
350                 {
351                     subpicture_region_t  *p_text_region;
352
353                     psz_end += strcspn( psz_end, ">" ) + 1;
354
355                     p_text_region = CreateTextRegion( p_dec,
356                                                       p_spu,
357                                                       psz_subtitle,
358                                                       psz_end - psz_subtitle,
359                                                       p_sys->i_align );
360
361                     if( p_text_region )
362                     {
363                         p_text_region->psz_text = CreatePlainText( p_text_region->psz_html );
364
365                         if( ! var_CreateGetBool( p_dec, "subsdec-formatted" ) )
366                         {
367                             free( p_text_region->psz_html );
368                             p_text_region->psz_html = NULL;
369                         }
370                     }
371
372                     if( !p_region_first )
373                     {
374                         p_region_first = p_region_upto = p_text_region;
375                     }
376                     else if( p_text_region )
377                     {
378                         p_region_upto->p_next = p_text_region;
379                         p_region_upto = p_region_upto->p_next;
380                     }
381                 }
382             }
383             else if(( !strncasecmp( psz_subtitle, "<karaoke ", 9 )) ||
384                     ( !strncasecmp( psz_subtitle, "<karaoke>", 9 )))
385             {
386                 psz_end = strcasestr( psz_subtitle, "</karaoke>" );
387
388                 if( psz_end )
389                 {
390                     subpicture_region_t  *p_text_region;
391
392                     psz_end += strcspn( psz_end, ">" ) + 1;
393
394                     p_text_region = CreateTextRegion( p_dec,
395                                                       p_spu,
396                                                       psz_subtitle,
397                                                       psz_end - psz_subtitle,
398                                                       p_sys->i_align );
399
400                     if( p_text_region )
401                     {
402                         if( ! var_CreateGetBool( p_dec, "subsdec-formatted" ) )
403                         {
404                             free( p_text_region->psz_html );
405                             p_text_region->psz_html = NULL;
406                         }
407                     }
408                     if( !p_region_first )
409                     {
410                         p_region_first = p_region_upto = p_text_region;
411                     }
412                     else if( p_text_region )
413                     {
414                         p_region_upto->p_next = p_text_region;
415                         p_region_upto = p_region_upto->p_next;
416                     }
417                 }
418             }
419             else if(( !strncasecmp( psz_subtitle, "<image ", 7 )) ||
420                     ( !strncasecmp( psz_subtitle, "<image>", 7 )))
421             {
422                 subpicture_region_t *p_image_region = NULL;
423
424                 char *psz_end = strcasestr( psz_subtitle, "</image>" );
425                 char *psz_content = strchr( psz_subtitle, '>' );
426                 int   i_transparent = -1;
427
428                 /* If a colorkey parameter is specified, then we have to map
429                  * that index in the picture through as transparent (it is
430                  * required by the USF spec but is also recommended that if the
431                  * creator really wants a transparent colour that they use a
432                  * type like PNG that properly supports it; this goes doubly
433                  * for VLC because the pictures are stored internally in YUV
434                  * and the resulting colour-matching may not produce the
435                  * desired results.)
436                  */
437                 char *psz_tmp = GrabAttributeValue( "colorkey", psz_subtitle );
438                 if( psz_tmp )
439                 {
440                     if( *psz_tmp == '#' )
441                         i_transparent = strtol( psz_tmp + 1, NULL, 16 ) & 0x00ffffff;
442                     free( psz_tmp );
443                 }
444                 if( psz_content && ( psz_content < psz_end ) )
445                 {
446                     char *psz_filename = strndup( &psz_content[1], psz_end - &psz_content[1] );
447                     if( psz_filename )
448                     {
449                         p_image_region = LoadEmbeddedImage( p_dec, p_spu,
450                                             psz_filename, i_transparent );
451                         free( psz_filename );
452                     }
453                 }
454
455                 if( psz_end ) psz_end += strcspn( psz_end, ">" ) + 1;
456
457                 if( p_image_region )
458                 {
459                     SetupPositions( p_image_region, psz_subtitle );
460
461                     p_image_region->p_next   = NULL;
462                     p_image_region->psz_text = NULL;
463                     p_image_region->psz_html = NULL;
464
465                 }
466                 if( !p_region_first )
467                 {
468                     p_region_first = p_region_upto = p_image_region;
469                 }
470                 else if( p_image_region )
471                 {
472                     p_region_upto->p_next = p_image_region;
473                     p_region_upto = p_region_upto->p_next;
474                 }
475             }
476             if( psz_end )
477                 psz_subtitle = psz_end - 1;
478
479             psz_subtitle += strcspn( psz_subtitle, ">" );
480         }
481
482         psz_subtitle++;
483     }
484
485     return p_region_first;
486 }
487
488 /*****************************************************************************
489  * ParseUSFHeader: Retrieve global formatting information etc
490  *****************************************************************************/
491 void ParseUSFHeader( decoder_t *p_dec )
492 {
493     stream_t      *p_sub = NULL;
494     xml_t         *p_xml = NULL;
495     xml_reader_t  *p_xml_reader = NULL;
496
497     p_sub = stream_MemoryNew( VLC_OBJECT(p_dec),
498                               p_dec->fmt_in.p_extra,
499                               p_dec->fmt_in.i_extra,
500                               VLC_TRUE );
501     if( !p_sub )
502         return;
503
504     p_xml = xml_Create( p_dec );
505     if( p_xml )
506     {
507         p_xml_reader = xml_ReaderCreate( p_xml, p_sub );
508         if( p_xml_reader )
509         {
510             /* Look for Root Node */
511             if( xml_ReaderRead( p_xml_reader ) == 1 )
512             {
513                 char *psz_node = xml_ReaderName( p_xml_reader );
514
515                 if( !strcasecmp( "usfsubtitles", psz_node ) )
516                     ParseUSFHeaderTags( p_dec, p_xml_reader );
517
518                 free( psz_node );
519             }
520
521             xml_ReaderDelete( p_xml, p_xml_reader );
522         }
523         xml_Delete( p_xml );
524     }
525     stream_Delete( p_sub );
526 }
527
528