]> git.sesse.net Git - vlc/blob - modules/codec/subtitles/subsusf.c
Move more of the code for USF subtitles decoding out into separate
[vlc] / modules / codec / subtitles / subsusf.c
1 /*****************************************************************************
2  * subsusf.c : USF subtitles decoder
3  *****************************************************************************
4  * Copyright (C) 2000-2006 the VideoLAN team
5  * $Id$
6  *
7  * Authors: Bernie Purcell <bitmap@videolan.org>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22  *****************************************************************************/
23
24 #include "subsdec.h"
25
26 /*****************************************************************************
27  * Local prototypes
28  *****************************************************************************/
29 static int  OpenDecoder   ( vlc_object_t * );
30 static void CloseDecoder  ( vlc_object_t * );
31
32 static subpicture_t *DecodeBlock   ( decoder_t *, block_t ** );
33 static char         *CreatePlainText( char * );
34 static int           ParseImageAttachments( decoder_t *p_dec );
35
36 static subpicture_t        *ParseText     ( decoder_t *, block_t * );
37 static void                 ParseUSFHeader( decoder_t * );
38 static subpicture_region_t *ParseUSFString( decoder_t *, char *, subpicture_t * );
39 static subpicture_region_t *LoadEmbeddedImage( decoder_t *p_dec, subpicture_t *p_spu, const char *psz_filename, int i_transparent_color );
40
41 /*****************************************************************************
42  * Module descriptor.
43  *****************************************************************************/
44
45 vlc_module_begin();
46     set_capability( "decoder", 40 );
47     set_callbacks( OpenDecoder, CloseDecoder );
48     set_category( CAT_INPUT );
49     set_subcategory( SUBCAT_INPUT_SCODEC );
50     /* We inherit subsdec-align and subsdec-formatted from subsdec.c */
51 vlc_module_end();
52
53 /*****************************************************************************
54  * OpenDecoder: probe the decoder and return score
55  *****************************************************************************
56  * Tries to launch a decoder and return score so that the interface is able
57  * to chose.
58  *****************************************************************************/
59 static int OpenDecoder( vlc_object_t *p_this )
60 {
61     decoder_t     *p_dec = (decoder_t*)p_this;
62     decoder_sys_t *p_sys;
63     vlc_value_t    val;
64
65     if( p_dec->fmt_in.i_codec != VLC_FOURCC('u','s','f',' ') )
66     {
67         return VLC_EGENERIC;
68     }
69
70     p_dec->pf_decode_sub = DecodeBlock;
71
72     /* Allocate the memory needed to store the decoder's structure */
73     if( ( p_dec->p_sys = p_sys =
74           (decoder_sys_t *)calloc(1, sizeof(decoder_sys_t)) ) == NULL )
75     {
76         msg_Err( p_dec, "out of memory" );
77         return VLC_ENOMEM;
78     }
79
80     /* Unused fields of p_sys - not needed for USF decoding */
81     p_sys->b_ass = VLC_FALSE;
82     p_sys->iconv_handle = (vlc_iconv_t)-1;
83     p_sys->b_autodetect_utf8 = VLC_FALSE;
84
85     /* init of p_sys */
86     p_sys->i_align = 0;
87     p_sys->i_original_height = -1;
88     p_sys->i_original_width = -1;
89     TAB_INIT( p_sys->i_ssa_styles, p_sys->pp_ssa_styles );
90     TAB_INIT( p_sys->i_images, p_sys->pp_images );
91
92     /* USF subtitles are mandated to be UTF-8, so don't need vlc_iconv */
93
94     var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
95     var_Get( p_dec, "subsdec-align", &val );
96     p_sys->i_align = val.i_int;
97
98     ParseImageAttachments( p_dec );
99
100     if( var_CreateGetBool( p_dec, "subsdec-formatted" ) )
101     {
102         if( p_dec->fmt_in.i_extra > 0 )
103             ParseUSFHeader( p_dec );
104     }
105
106     return VLC_SUCCESS;
107 }
108
109 /****************************************************************************
110  * DecodeBlock: the whole thing
111  ****************************************************************************
112  * This function must be fed with complete subtitles units.
113  ****************************************************************************/
114 static subpicture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block )
115 {
116     subpicture_t *p_spu = NULL;
117
118     if( !pp_block || *pp_block == NULL ) return NULL;
119
120     p_spu = ParseText( p_dec, *pp_block );
121
122     block_Release( *pp_block );
123     *pp_block = NULL;
124
125     return p_spu;
126 }
127
128 /*****************************************************************************
129  * CloseDecoder: clean up the decoder
130  *****************************************************************************/
131 static void CloseDecoder( vlc_object_t *p_this )
132 {
133     decoder_t *p_dec = (decoder_t *)p_this;
134     decoder_sys_t *p_sys = p_dec->p_sys;
135
136     if( p_sys->pp_ssa_styles )
137     {
138         int i;
139         for( i = 0; i < p_sys->i_ssa_styles; i++ )
140         {
141             if( !p_sys->pp_ssa_styles[i] )
142                 continue;
143
144             if( p_sys->pp_ssa_styles[i]->psz_stylename )
145                 free( p_sys->pp_ssa_styles[i]->psz_stylename );
146             if( p_sys->pp_ssa_styles[i]->font_style.psz_fontname )
147                 free( p_sys->pp_ssa_styles[i]->font_style.psz_fontname );
148             if( p_sys->pp_ssa_styles[i] )
149                 free( p_sys->pp_ssa_styles[i] );
150         }
151         TAB_CLEAN( p_sys->i_ssa_styles, p_sys->pp_ssa_styles );
152     }
153     if( p_sys->pp_images )
154     {
155         int i;
156         for( i = 0; i < p_sys->i_images; i++ )
157         {
158             if( !p_sys->pp_images[i] )
159                 continue;
160
161             if( p_sys->pp_images[i]->p_pic )
162                 p_sys->pp_images[i]->p_pic->pf_release( p_sys->pp_images[i]->p_pic );
163             if( p_sys->pp_images[i]->psz_filename )
164                 free( p_sys->pp_images[i]->psz_filename );
165
166             free( p_sys->pp_images[i] );
167         }
168         TAB_CLEAN( p_sys->i_images, p_sys->pp_images );
169     }
170
171     free( p_sys );
172 }
173
174 /*****************************************************************************
175  * ParseText: parse an text subtitle packet and send it to the video output
176  *****************************************************************************/
177 static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
178 {
179     decoder_sys_t *p_sys = p_dec->p_sys;
180     subpicture_t *p_spu = NULL;
181     char *psz_subtitle = NULL;
182
183     /* We cannot display a subpicture with no date */
184     if( p_block->i_pts == 0 )
185     {
186         msg_Warn( p_dec, "subtitle without a date" );
187         return NULL;
188     }
189
190     /* Check validity of packet data */
191     /* An "empty" line containing only \0 can be used to force
192        and ephemer picture from the screen */
193     if( p_block->i_buffer < 1 )
194     {
195         msg_Warn( p_dec, "no subtitle data" );
196         return NULL;
197     }
198
199     /* Should be resiliant against bad subtitles */
200     psz_subtitle = strndup( (const char *)p_block->p_buffer,
201                             p_block->i_buffer );
202     if( psz_subtitle == NULL )
203         return NULL;
204
205     /* USF Subtitles are mandated to be UTF-8 -- make sure it is */
206     if (EnsureUTF8( psz_subtitle ) == NULL)
207     {
208         msg_Err( p_dec, _("USF subtitles must be in UTF-8 format.\n"
209                  "This stream contains USF subtitles which aren't.") );
210     }
211
212     /* Create the subpicture unit */
213     p_spu = p_dec->pf_spu_buffer_new( p_dec );
214     if( !p_spu )
215     {
216         msg_Warn( p_dec, "can't get spu buffer" );
217         if( psz_subtitle ) free( psz_subtitle );
218         return NULL;
219     }
220
221     p_spu->b_pausable = VLC_TRUE;
222
223     /* Decode USF strings */
224     p_spu->p_region = ParseUSFString( p_dec, psz_subtitle, p_spu );
225
226     p_spu->i_start = p_block->i_pts;
227     p_spu->i_stop = p_block->i_pts + p_block->i_length;
228     p_spu->b_ephemer = (p_block->i_length == 0);
229     p_spu->b_absolute = VLC_FALSE;
230     p_spu->i_original_picture_width = p_sys->i_original_width;
231     p_spu->i_original_picture_height = p_sys->i_original_height;
232
233     if( psz_subtitle ) free( psz_subtitle );
234
235     return p_spu;
236 }
237
238 static char *GrabAttributeValue( const char *psz_attribute,
239                                  const char *psz_tag_start )
240 {
241     if( psz_attribute && psz_tag_start )
242     {
243         char *psz_tag_end = strchr( psz_tag_start, '>' );
244         char *psz_found   = strcasestr( psz_tag_start, psz_attribute );
245
246         if( psz_found )
247         {
248             psz_found += strlen( psz_attribute );
249
250             if(( *(psz_found++) == '=' ) &&
251                ( *(psz_found++) == '\"' ))
252             {
253                 if( psz_found < psz_tag_end )
254                 {
255                     int   i_len = strcspn( psz_found, "\"" );
256                     return strndup( psz_found, i_len );
257                 }
258             }
259         }
260     }
261     return NULL;
262 }
263
264 static ssa_style_t *ParseStyle( decoder_sys_t *p_sys, char *psz_subtitle )
265 {
266     ssa_style_t *p_style   = NULL;
267     char        *psz_style = GrabAttributeValue( "style", psz_subtitle );
268
269     if( psz_style )
270     {
271         int i;
272
273         for( i = 0; i < p_sys->i_ssa_styles; i++ )
274         {
275             if( !strcmp( p_sys->pp_ssa_styles[i]->psz_stylename, psz_style ) )
276                 p_style = p_sys->pp_ssa_styles[i];
277         }
278         free( psz_style );
279     }
280     return p_style;
281 }
282
283 static int ParsePositionAttributeList( char *psz_subtitle, int *i_align,
284                                        int *i_x, int *i_y )
285 {
286     int   i_mask = 0;
287
288     char *psz_align    = GrabAttributeValue( "alignment", psz_subtitle );
289     char *psz_margin_x = GrabAttributeValue( "horizontal-margin", psz_subtitle );
290     char *psz_margin_y = GrabAttributeValue( "vertical-margin", psz_subtitle );
291     /* -- UNSUPPORTED
292     char *psz_relative = GrabAttributeValue( "relative-to", psz_subtitle );
293     char *psz_rotate_x = GrabAttributeValue( "rotate-x", psz_subtitle );
294     char *psz_rotate_y = GrabAttributeValue( "rotate-y", psz_subtitle );
295     char *psz_rotate_z = GrabAttributeValue( "rotate-z", psz_subtitle );
296     */
297
298     *i_align = SUBPICTURE_ALIGN_BOTTOM;
299     *i_x = 0;
300     *i_y = 0;
301
302     if( psz_align )
303     {
304         if( !strcasecmp( "TopLeft", psz_align ) )
305             *i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_LEFT;
306         else if( !strcasecmp( "TopCenter", psz_align ) )
307             *i_align = SUBPICTURE_ALIGN_TOP;
308         else if( !strcasecmp( "TopRight", psz_align ) )
309             *i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_RIGHT;
310         else if( !strcasecmp( "MiddleLeft", psz_align ) )
311             *i_align = SUBPICTURE_ALIGN_LEFT;
312         else if( !strcasecmp( "MiddleCenter", psz_align ) )
313             *i_align = 0;
314         else if( !strcasecmp( "MiddleRight", psz_align ) )
315             *i_align = SUBPICTURE_ALIGN_RIGHT;
316         else if( !strcasecmp( "BottomLeft", psz_align ) )
317             *i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_LEFT;
318         else if( !strcasecmp( "BottomCenter", psz_align ) )
319             *i_align = SUBPICTURE_ALIGN_BOTTOM;
320         else if( !strcasecmp( "BottomRight", psz_align ) )
321             *i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_RIGHT;
322
323         i_mask |= ATTRIBUTE_ALIGNMENT;
324         free( psz_align );
325     }
326     if( psz_margin_x )
327     {
328         *i_x = atoi( psz_margin_x );
329         if( strchr( psz_margin_x, '%' ) )
330             i_mask |= ATTRIBUTE_X_PERCENT;
331         else
332             i_mask |= ATTRIBUTE_X;
333
334         free( psz_margin_x );
335     }
336     if( psz_margin_y )
337     {
338         *i_y = atoi( psz_margin_y );
339         if( strchr( psz_margin_y, '%' ) )
340             i_mask |= ATTRIBUTE_Y_PERCENT;
341         else
342             i_mask |= ATTRIBUTE_Y;
343
344         free( psz_margin_y );
345     }
346     return i_mask;
347 }
348
349 static void SetupPositions( subpicture_region_t *p_region, char *psz_subtitle )
350 {
351     int           i_mask = 0;
352     int           i_align;
353     int           i_x, i_y;
354
355     i_mask = ParsePositionAttributeList( psz_subtitle, &i_align, &i_x, &i_y );
356
357     if( i_mask & ATTRIBUTE_ALIGNMENT )
358         p_region->i_align = i_align;
359
360     /* TODO: Setup % based offsets properly, without adversely affecting
361      *       everything else in vlc. Will address with separate patch, to
362      *       prevent this one being any more complicated.
363      */
364     if( i_mask & ATTRIBUTE_X )
365         p_region->i_x = i_x;
366     else if( i_mask & ATTRIBUTE_X_PERCENT )
367         p_region->i_x = 0;
368
369     if( i_mask & ATTRIBUTE_Y )
370         p_region->i_y = i_y;
371     else if( i_mask & ATTRIBUTE_Y_PERCENT )
372         p_region->i_y = 0;
373 }
374
375 static subpicture_region_t *CreateTextRegion( decoder_t *p_dec,
376                                               subpicture_t *p_spu,
377                                               char *psz_subtitle,
378                                               int i_len,
379                                               int i_sys_align )
380 {
381     decoder_sys_t        *p_sys = p_dec->p_sys;
382     subpicture_region_t  *p_text_region;
383     video_format_t        fmt;
384
385     /* Create a new subpicture region */
386     memset( &fmt, 0, sizeof(video_format_t) );
387     fmt.i_chroma = VLC_FOURCC('T','E','X','T');
388     fmt.i_aspect = 0;
389     fmt.i_width = fmt.i_height = 0;
390     fmt.i_x_offset = fmt.i_y_offset = 0;
391     p_text_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt );
392
393     if( p_text_region != NULL )
394     {
395         ssa_style_t  *p_style = NULL;
396
397         p_text_region->psz_text = NULL;
398         p_text_region->psz_html = strndup( psz_subtitle, i_len );
399         if( ! p_text_region->psz_html )
400         {
401             msg_Err( p_dec, "out of memory" );
402             p_spu->pf_destroy_region( VLC_OBJECT(p_dec), p_text_region );
403             return NULL;
404         }
405
406         p_style = ParseStyle( p_sys, p_text_region->psz_html );
407         if( !p_style )
408         {
409             int i;
410
411             for( i = 0; i < p_sys->i_ssa_styles; i++ )
412             {
413                 if( !strcasecmp( p_sys->pp_ssa_styles[i]->psz_stylename, "Default" ) )
414                     p_style = p_sys->pp_ssa_styles[i];
415             }
416         }
417
418         if( p_style )
419         {
420             msg_Dbg( p_dec, "style is: %s", p_style->psz_stylename );
421
422             p_text_region->p_style = &p_style->font_style;
423             p_text_region->i_align = p_style->i_align;
424
425             /* TODO: Setup % based offsets properly, without adversely affecting
426              *       everything else in vlc. Will address with separate patch,
427              *       to prevent this one being any more complicated.
428
429                      * p_style->i_margin_percent_h;
430                      * p_style->i_margin_percent_v;
431              */
432             p_text_region->i_x         = p_style->i_margin_h;
433             p_text_region->i_y         = p_style->i_margin_v;
434
435         }
436         else
437         {
438             p_text_region->i_align = SUBPICTURE_ALIGN_BOTTOM | i_sys_align;
439             p_text_region->i_x = i_sys_align ? 20 : 0;
440             p_text_region->i_y = 10;
441         }
442         /* Look for position arguments which may override the style-based
443          * defaults.
444          */
445         SetupPositions( p_text_region, psz_subtitle );
446
447         p_text_region->p_next = NULL;
448     }
449     return p_text_region;
450 }
451
452 static int ParseImageAttachments( decoder_t *p_dec )
453 {
454     decoder_sys_t        *p_sys = p_dec->p_sys;
455     input_attachment_t  **pp_attachments;
456     int                   i_attachments_cnt;
457     int                   k = 0;
458
459     if( VLC_SUCCESS != decoder_GetInputAttachments( p_dec, &pp_attachments, &i_attachments_cnt ))
460         return VLC_EGENERIC;
461
462     for( k = 0; k < i_attachments_cnt; k++ )
463     {
464         input_attachment_t *p_attach = pp_attachments[k];
465
466         vlc_fourcc_t  type  = 0;
467
468         if( ( !strcmp( p_attach->psz_mime, "image/bmp" ) )      || /* BMP */
469             ( !strcmp( p_attach->psz_mime, "image/x-bmp" ) )    ||
470             ( !strcmp( p_attach->psz_mime, "image/x-bitmap" ) ) ||
471             ( !strcmp( p_attach->psz_mime, "image/x-ms-bmp" ) ) )
472         {
473              type = VLC_FOURCC('b','m','p',' ');
474         }
475         else if( ( !strcmp( p_attach->psz_mime, "image/x-portable-anymap" ) )  || /* PNM */
476                  ( !strcmp( p_attach->psz_mime, "image/x-portable-bitmap" ) )  || /* PBM */
477                  ( !strcmp( p_attach->psz_mime, "image/x-portable-graymap" ) ) || /* PGM */
478                  ( !strcmp( p_attach->psz_mime, "image/x-portable-pixmap" ) ) )   /* PPM */
479         {
480             type = VLC_FOURCC('p','n','m',' ');
481         }
482         else if ( !strcmp( p_attach->psz_mime, "image/gif" ) )         /* GIF */
483             type = VLC_FOURCC('g','i','f',' ');
484         else if ( !strcmp( p_attach->psz_mime, "image/jpeg" ) )        /* JPG, JPEG */
485             type = VLC_FOURCC('j','p','e','g');
486         else if ( !strcmp( p_attach->psz_mime, "image/pcx" ) )         /* PCX */
487             type = VLC_FOURCC('p','c','x',' ');
488         else if ( !strcmp( p_attach->psz_mime, "image/png" ) )         /* PNG */
489             type = VLC_FOURCC('p','n','g',' ');
490         else if ( !strcmp( p_attach->psz_mime, "image/tiff" ) )        /* TIF, TIFF */
491             type = VLC_FOURCC('t','i','f','f');
492         else if ( !strcmp( p_attach->psz_mime, "image/x-tga" ) )       /* TGA */
493             type = VLC_FOURCC('t','g','a',' ');
494         else if ( !strcmp( p_attach->psz_mime, "image/x-xpixmap") )    /* XPM */
495             type = VLC_FOURCC('x','p','m',' ');
496
497         if( ( type != 0 ) &&
498             ( p_attach->i_data > 0 ) &&
499             ( p_attach->p_data != NULL ) )
500         {
501             picture_t         *p_pic = NULL;
502             image_handler_t   *p_image;
503
504             p_image = image_HandlerCreate( p_dec );
505             if( p_image != NULL )
506             {
507                 block_t   *p_block;
508
509                 p_block = block_New( p_image->p_parent, p_attach->i_data );
510
511                 if( p_block != NULL )
512                 {
513                     video_format_t     fmt_in;
514                     video_format_t     fmt_out;
515
516                     memcpy( p_block->p_buffer, p_attach->p_data, p_attach->i_data );
517
518                     memset( &fmt_in,  0, sizeof( video_format_t));
519                     memset( &fmt_out, 0, sizeof( video_format_t));
520
521                     fmt_in.i_chroma  = type;
522                     fmt_out.i_chroma = VLC_FOURCC('Y','U','V','A');
523
524                     /* Find a suitable decoder module */
525                     if( module_Exists( p_dec, "sdl_image" ) )
526                     {
527                         /* ffmpeg thinks it can handle bmp properly but it can't (at least
528                          * not all of them), so use sdl_image if it is available */
529
530                         vlc_value_t val;
531
532                         var_Create( p_dec, "codec", VLC_VAR_MODULE | VLC_VAR_DOINHERIT );
533                         val.psz_string = (char*) "sdl_image";
534                         var_Set( p_dec, "codec", val );
535                     }
536
537                     p_pic = image_Read( p_image, p_block, &fmt_in, &fmt_out );
538                     var_Destroy( p_dec, "codec" );
539                 }
540
541                 image_HandlerDelete( p_image );
542             }
543             if( p_pic )
544             {
545                 image_attach_t *p_picture = malloc( sizeof(image_attach_t) );
546
547                 if( p_picture )
548                 {
549                     p_picture->psz_filename = strdup( p_attach->psz_name );
550                     p_picture->p_pic = p_pic;
551
552                     TAB_APPEND( p_sys->i_images, p_sys->pp_images, p_picture );
553                 }
554             }
555         }
556         vlc_input_attachment_Delete( pp_attachments[ k ] );
557     }
558     free( pp_attachments );
559
560     return VLC_SUCCESS;
561 }
562
563 static void ParseUSFHeaderTags( decoder_t *p_dec, xml_reader_t *p_xml_reader )
564 {
565     decoder_sys_t *p_sys = p_dec->p_sys;
566     char *psz_node;
567     ssa_style_t *p_style = NULL;
568     int i_style_level = 0;
569     int i_metadata_level = 0;
570
571     while ( xml_ReaderRead( p_xml_reader ) == 1 )
572     {
573         switch ( xml_ReaderNodeType( p_xml_reader ) )
574         {
575             case XML_READER_TEXT:
576             case XML_READER_NONE:
577                 break;
578             case XML_READER_ENDELEM:
579                 psz_node = xml_ReaderName( p_xml_reader );
580
581                 if( !psz_node )
582                     break;
583                 switch (i_style_level)
584                 {
585                     case 0:
586                         if( !strcasecmp( "metadata", psz_node ) && (i_metadata_level == 1) )
587                         {
588                             i_metadata_level--;
589                         }
590                         break;
591                     case 1:
592                         if( !strcasecmp( "styles", psz_node ) )
593                         {
594                             i_style_level--;
595                         }
596                         break;
597                     case 2:
598                         if( !strcasecmp( "style", psz_node ) )
599                         {
600                             TAB_APPEND( p_sys->i_ssa_styles, p_sys->pp_ssa_styles, p_style );
601
602                             p_style = NULL;
603                             i_style_level--;
604                         }
605                         break;
606                 }
607
608                 free( psz_node );
609                 break;
610             case XML_READER_STARTELEM:
611                 psz_node = xml_ReaderName( p_xml_reader );
612
613                 if( !psz_node )
614                     break;
615
616                 if( !strcasecmp( "metadata", psz_node ) && (i_style_level == 0) )
617                 {
618                     i_metadata_level++;
619                 }
620                 else if( !strcasecmp( "resolution", psz_node ) &&
621                          ( i_metadata_level == 1) )
622                 {
623                     while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
624                     {
625                         char *psz_name = xml_ReaderName ( p_xml_reader );
626                         char *psz_value = xml_ReaderValue ( p_xml_reader );
627
628                         if( psz_name && psz_value )
629                         {
630                             if( !strcasecmp( "x", psz_name ) )
631                                 p_sys->i_original_width = atoi( psz_value );
632                             else if( !strcasecmp( "y", psz_name ) )
633                                 p_sys->i_original_height = atoi( psz_value );
634                         }
635                         if( psz_name )  free( psz_name );
636                         if( psz_value ) free( psz_value );
637                     }
638                 }
639                 else if( !strcasecmp( "styles", psz_node ) && (i_style_level == 0) )
640                 {
641                     i_style_level++;
642                 }
643                 else if( !strcasecmp( "style", psz_node ) && (i_style_level == 1) )
644                 {
645                     i_style_level++;
646
647                     p_style = calloc( 1, sizeof(ssa_style_t) );
648                     if( ! p_style )
649                     {
650                         msg_Err( p_dec, "out of memory" );
651                         free( psz_node );
652                         break;
653                     }
654                     /* All styles are supposed to default to Default, and then
655                      * one or more settings are over-ridden.
656                      * At the moment this only effects styles defined AFTER
657                      * Default in the XML
658                      */
659                     int i;
660                     for( i = 0; i < p_sys->i_ssa_styles; i++ )
661                     {
662                         if( !strcasecmp( p_sys->pp_ssa_styles[i]->psz_stylename, "Default" ) )
663                         {
664                             ssa_style_t *p_default_style = p_sys->pp_ssa_styles[i];
665
666                             memcpy( p_style, p_default_style, sizeof( ssa_style_t ) );
667                             p_style->font_style.psz_fontname = strdup( p_style->font_style.psz_fontname );
668                             p_style->psz_stylename = NULL;
669                         }
670                     }
671
672                     while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
673                     {
674                         char *psz_name = xml_ReaderName ( p_xml_reader );
675                         char *psz_value = xml_ReaderValue ( p_xml_reader );
676
677                         if( psz_name && psz_value )
678                         {
679                             if( !strcasecmp( "name", psz_name ) )
680                                 p_style->psz_stylename = strdup( psz_value);
681                         }
682                         if( psz_name )  free( psz_name );
683                         if( psz_value ) free( psz_value );
684                     }
685                 }
686                 else if( !strcasecmp( "fontstyle", psz_node ) && (i_style_level == 2) )
687                 {
688                     while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
689                     {
690                         char *psz_name = xml_ReaderName ( p_xml_reader );
691                         char *psz_value = xml_ReaderValue ( p_xml_reader );
692
693                         if( psz_name && psz_value )
694                         {
695                             if( !strcasecmp( "face", psz_name ) )
696                             {
697                                 if( p_style->font_style.psz_fontname )
698                                     free( p_style->font_style.psz_fontname );
699                                 p_style->font_style.psz_fontname = strdup( psz_value );
700                             }
701                             else if( !strcasecmp( "size", psz_name ) )
702                             {
703                                 if( ( *psz_value == '+' ) || ( *psz_value == '-' ) )
704                                 {
705                                     int i_value = atoi( psz_value );
706
707                                     if( ( i_value >= -5 ) && ( i_value <= 5 ) )
708                                         p_style->font_style.i_font_size  +=
709                                             ( i_value * p_style->font_style.i_font_size ) / 10;
710                                     else if( i_value < -5 )
711                                         p_style->font_style.i_font_size  = - i_value;
712                                     else if( i_value > 5 )
713                                         p_style->font_style.i_font_size  = i_value;
714                                 }
715                                 else
716                                     p_style->font_style.i_font_size  = atoi( psz_value );
717                             }
718                             else if( !strcasecmp( "italic", psz_name ) )
719                             {
720                                 if( !strcasecmp( "yes", psz_value ))
721                                     p_style->font_style.i_style_flags |= STYLE_ITALIC;
722                                 else
723                                     p_style->font_style.i_style_flags &= ~STYLE_ITALIC;
724                             }
725                             else if( !strcasecmp( "weight", psz_name ) )
726                             {
727                                 if( !strcasecmp( "bold", psz_value ))
728                                     p_style->font_style.i_style_flags |= STYLE_BOLD;
729                                 else
730                                     p_style->font_style.i_style_flags &= ~STYLE_BOLD;
731                             }
732                             else if( !strcasecmp( "underline", psz_name ) )
733                             {
734                                 if( !strcasecmp( "yes", psz_value ))
735                                     p_style->font_style.i_style_flags |= STYLE_UNDERLINE;
736                                 else
737                                     p_style->font_style.i_style_flags &= ~STYLE_UNDERLINE;
738                             }
739                             else if( !strcasecmp( "color", psz_name ) )
740                             {
741                                 if( *psz_value == '#' )
742                                 {
743                                     unsigned long col = strtol(psz_value+1, NULL, 16);
744                                     p_style->font_style.i_font_color = (col & 0x00ffffff);
745                                     p_style->font_style.i_font_alpha = (col >> 24) & 0xff;
746                                 }
747                             }
748                             else if( !strcasecmp( "outline-color", psz_name ) )
749                             {
750                                 if( *psz_value == '#' )
751                                 {
752                                     unsigned long col = strtol(psz_value+1, NULL, 16);
753                                     p_style->font_style.i_outline_color = (col & 0x00ffffff);
754                                     p_style->font_style.i_outline_alpha = (col >> 24) & 0xff;
755                                 }
756                             }
757                             else if( !strcasecmp( "outline-level", psz_name ) )
758                             {
759                                 p_style->font_style.i_outline_width = atoi( psz_value );
760                             }
761                             else if( !strcasecmp( "shadow-color", psz_name ) )
762                             {
763                                 if( *psz_value == '#' )
764                                 {
765                                     unsigned long col = strtol(psz_value+1, NULL, 16);
766                                     p_style->font_style.i_shadow_color = (col & 0x00ffffff);
767                                     p_style->font_style.i_shadow_alpha = (col >> 24) & 0xff;
768                                 }
769                             }
770                             else if( !strcasecmp( "shadow-level", psz_name ) )
771                             {
772                                 p_style->font_style.i_shadow_width = atoi( psz_value );
773                             }
774                             else if( !strcasecmp( "back-color", psz_name ) )
775                             {
776                                 if( *psz_value == '#' )
777                                 {
778                                     unsigned long col = strtol(psz_value+1, NULL, 16);
779                                     p_style->font_style.i_karaoke_background_color = (col & 0x00ffffff);
780                                     p_style->font_style.i_karaoke_background_alpha = (col >> 24) & 0xff;
781                                 }
782                             }
783                             else if( !strcasecmp( "spacing", psz_name ) )
784                             {
785                                 p_style->font_style.i_spacing = atoi( psz_value );
786                             }
787                         }
788                         if( psz_name )  free( psz_name );
789                         if( psz_value ) free( psz_value );
790                     }
791                 }
792                 else if( !strcasecmp( "position", psz_node ) && (i_style_level == 2) )
793                 {
794                     while ( xml_ReaderNextAttr( p_xml_reader ) == VLC_SUCCESS )
795                     {
796                         char *psz_name = xml_ReaderName ( p_xml_reader );
797                         char *psz_value = xml_ReaderValue ( p_xml_reader );
798
799                         if( psz_name && psz_value )
800                         {
801                             if( !strcasecmp( "alignment", psz_name ) )
802                             {
803                                 if( !strcasecmp( "TopLeft", psz_value ) )
804                                     p_style->i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_LEFT;
805                                 else if( !strcasecmp( "TopCenter", psz_value ) )
806                                     p_style->i_align = SUBPICTURE_ALIGN_TOP;
807                                 else if( !strcasecmp( "TopRight", psz_value ) )
808                                     p_style->i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_RIGHT;
809                                 else if( !strcasecmp( "MiddleLeft", psz_value ) )
810                                     p_style->i_align = SUBPICTURE_ALIGN_LEFT;
811                                 else if( !strcasecmp( "MiddleCenter", psz_value ) )
812                                     p_style->i_align = 0;
813                                 else if( !strcasecmp( "MiddleRight", psz_value ) )
814                                     p_style->i_align = SUBPICTURE_ALIGN_RIGHT;
815                                 else if( !strcasecmp( "BottomLeft", psz_value ) )
816                                     p_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_LEFT;
817                                 else if( !strcasecmp( "BottomCenter", psz_value ) )
818                                     p_style->i_align = SUBPICTURE_ALIGN_BOTTOM;
819                                 else if( !strcasecmp( "BottomRight", psz_value ) )
820                                     p_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_RIGHT;
821                             }
822                             else if( !strcasecmp( "horizontal-margin", psz_name ) )
823                             {
824                                 if( strchr( psz_value, '%' ) )
825                                 {
826                                     p_style->i_margin_h = 0;
827                                     p_style->i_margin_percent_h = atoi( psz_value );
828                                 }
829                                 else
830                                 {
831                                     p_style->i_margin_h = atoi( psz_value );
832                                     p_style->i_margin_percent_h = 0;
833                                 }
834                             }
835                             else if( !strcasecmp( "vertical-margin", psz_name ) )
836                             {
837                                 if( strchr( psz_value, '%' ) )
838                                 {
839                                     p_style->i_margin_v = 0;
840                                     p_style->i_margin_percent_v = atoi( psz_value );
841                                 }
842                                 else
843                                 {
844                                     p_style->i_margin_v = atoi( psz_value );
845                                     p_style->i_margin_percent_v = 0;
846                                 }
847                             }
848                         }
849                         if( psz_name )  free( psz_name );
850                         if( psz_value ) free( psz_value );
851                     }
852                 }
853
854                 free( psz_node );
855                 break;
856         }
857     }
858     if( p_style ) free( p_style );
859 }
860
861
862
863 static subpicture_region_t *ParseUSFString( decoder_t *p_dec,
864                                             char *psz_subtitle,
865                                             subpicture_t *p_spu_in )
866 {
867     decoder_sys_t        *p_sys = p_dec->p_sys;
868     subpicture_t         *p_spu = p_spu_in;
869     subpicture_region_t  *p_region_first = NULL;
870     subpicture_region_t  *p_region_upto  = p_region_first;
871
872     while( *psz_subtitle )
873     {
874         if( *psz_subtitle == '<' )
875         {
876             char *psz_end = NULL;
877
878             if(( !strncasecmp( psz_subtitle, "<text ", 6 )) ||
879                ( !strncasecmp( psz_subtitle, "<text>", 6 )))
880             {
881                 psz_end = strcasestr( psz_subtitle, "</text>" );
882
883                 if( psz_end )
884                 {
885                     subpicture_region_t  *p_text_region;
886
887                     psz_end += strcspn( psz_end, ">" ) + 1;
888
889                     p_text_region = CreateTextRegion( p_dec,
890                                                       p_spu,
891                                                       psz_subtitle,
892                                                       psz_end - psz_subtitle,
893                                                       p_sys->i_align );
894
895                     if( p_text_region )
896                     {
897                         p_text_region->psz_text = CreatePlainText( p_text_region->psz_html );
898
899                         if( ! var_CreateGetBool( p_dec, "subsdec-formatted" ) )
900                         {
901                             free( p_text_region->psz_html );
902                             p_text_region->psz_html = NULL;
903                         }
904                     }
905
906                     if( !p_region_first )
907                     {
908                         p_region_first = p_region_upto = p_text_region;
909                     }
910                     else if( p_text_region )
911                     {
912                         p_region_upto->p_next = p_text_region;
913                         p_region_upto = p_region_upto->p_next;
914                     }
915                 }
916             }
917             else if(( !strncasecmp( psz_subtitle, "<karaoke ", 9 )) ||
918                     ( !strncasecmp( psz_subtitle, "<karaoke>", 9 )))
919             {
920                 psz_end = strcasestr( psz_subtitle, "</karaoke>" );
921
922                 if( psz_end )
923                 {
924                     subpicture_region_t  *p_text_region;
925
926                     psz_end += strcspn( psz_end, ">" ) + 1;
927
928                     p_text_region = CreateTextRegion( p_dec,
929                                                       p_spu,
930                                                       psz_subtitle,
931                                                       psz_end - psz_subtitle,
932                                                       p_sys->i_align );
933
934                     if( p_text_region )
935                     {
936                         if( ! var_CreateGetBool( p_dec, "subsdec-formatted" ) )
937                         {
938                             free( p_text_region->psz_html );
939                             p_text_region->psz_html = NULL;
940                         }
941                     }
942                     if( !p_region_first )
943                     {
944                         p_region_first = p_region_upto = p_text_region;
945                     }
946                     else if( p_text_region )
947                     {
948                         p_region_upto->p_next = p_text_region;
949                         p_region_upto = p_region_upto->p_next;
950                     }
951                 }
952             }
953             else if(( !strncasecmp( psz_subtitle, "<image ", 7 )) ||
954                     ( !strncasecmp( psz_subtitle, "<image>", 7 )))
955             {
956                 subpicture_region_t *p_image_region = NULL;
957
958                 char *psz_end = strcasestr( psz_subtitle, "</image>" );
959                 char *psz_content = strchr( psz_subtitle, '>' );
960                 int   i_transparent = -1;
961
962                 /* If a colorkey parameter is specified, then we have to map
963                  * that index in the picture through as transparent (it is
964                  * required by the USF spec but is also recommended that if the
965                  * creator really wants a transparent colour that they use a
966                  * type like PNG that properly supports it; this goes doubly
967                  * for VLC because the pictures are stored internally in YUV
968                  * and the resulting colour-matching may not produce the
969                  * desired results.)
970                  */
971                 char *psz_tmp = GrabAttributeValue( "colorkey", psz_subtitle );
972                 if( psz_tmp )
973                 {
974                     if( *psz_tmp == '#' )
975                         i_transparent = strtol( psz_tmp + 1, NULL, 16 ) & 0x00ffffff;
976                     free( psz_tmp );
977                 }
978                 if( psz_content && ( psz_content < psz_end ) )
979                 {
980                     char *psz_filename = strndup( &psz_content[1], psz_end - &psz_content[1] );
981                     if( psz_filename )
982                     {
983                         p_image_region = LoadEmbeddedImage( p_dec, p_spu,
984                                             psz_filename, i_transparent );
985                         free( psz_filename );
986                     }
987                 }
988
989                 if( psz_end ) psz_end += strcspn( psz_end, ">" ) + 1;
990
991                 if( p_image_region )
992                 {
993                     SetupPositions( p_image_region, psz_subtitle );
994
995                     p_image_region->p_next   = NULL;
996                     p_image_region->psz_text = NULL;
997                     p_image_region->psz_html = NULL;
998
999                 }
1000                 if( !p_region_first )
1001                 {
1002                     p_region_first = p_region_upto = p_image_region;
1003                 }
1004                 else if( p_image_region )
1005                 {
1006                     p_region_upto->p_next = p_image_region;
1007                     p_region_upto = p_region_upto->p_next;
1008                 }
1009             }
1010             if( psz_end )
1011                 psz_subtitle = psz_end - 1;
1012
1013             psz_subtitle += strcspn( psz_subtitle, ">" );
1014         }
1015
1016         psz_subtitle++;
1017     }
1018
1019     return p_region_first;
1020 }
1021
1022 /*****************************************************************************
1023  * ParseUSFHeader: Retrieve global formatting information etc
1024  *****************************************************************************/
1025 static void ParseUSFHeader( decoder_t *p_dec )
1026 {
1027     stream_t      *p_sub = NULL;
1028     xml_t         *p_xml = NULL;
1029     xml_reader_t  *p_xml_reader = NULL;
1030
1031     p_sub = stream_MemoryNew( VLC_OBJECT(p_dec),
1032                               p_dec->fmt_in.p_extra,
1033                               p_dec->fmt_in.i_extra,
1034                               VLC_TRUE );
1035     if( !p_sub )
1036         return;
1037
1038     p_xml = xml_Create( p_dec );
1039     if( p_xml )
1040     {
1041         p_xml_reader = xml_ReaderCreate( p_xml, p_sub );
1042         if( p_xml_reader )
1043         {
1044             /* Look for Root Node */
1045             if( xml_ReaderRead( p_xml_reader ) == 1 )
1046             {
1047                 char *psz_node = xml_ReaderName( p_xml_reader );
1048
1049                 if( !strcasecmp( "usfsubtitles", psz_node ) )
1050                     ParseUSFHeaderTags( p_dec, p_xml_reader );
1051
1052                 free( psz_node );
1053             }
1054
1055             xml_ReaderDelete( p_xml, p_xml_reader );
1056         }
1057         xml_Delete( p_xml );
1058     }
1059     stream_Delete( p_sub );
1060 }
1061
1062 /* Function now handles tags which has attribute values, and tries
1063  * to deal with &' commands too. It no longer modifies the string
1064  * in place, so that the original text can be reused
1065  */
1066 static char *StripTags( char *psz_subtitle )
1067 {
1068     char *psz_text_start;
1069     char *psz_text;
1070
1071     psz_text = psz_text_start = malloc( strlen( psz_subtitle ) + 1 );
1072     if( !psz_text_start )
1073         return NULL;
1074
1075     while( *psz_subtitle )
1076     {
1077         /* Mask out any pre-existing LFs in the subtitle */
1078         if( *psz_subtitle == '\n' )
1079             *psz_subtitle = ' ';
1080
1081         if( *psz_subtitle == '<' )
1082         {
1083             if( strncasecmp( psz_subtitle, "<br/>", 5 ) == 0 )
1084                 *psz_text++ = '\n';
1085
1086             psz_subtitle += strcspn( psz_subtitle, ">" );
1087         }
1088         else if( *psz_subtitle == '&' )
1089         {
1090             if( !strncasecmp( psz_subtitle, "&lt;", 4 ))
1091             {
1092                 *psz_text++ = '<';
1093                 psz_subtitle += strcspn( psz_subtitle, ";" );
1094             }
1095             else if( !strncasecmp( psz_subtitle, "&gt;", 4 ))
1096             {
1097                 *psz_text++ = '>';
1098                 psz_subtitle += strcspn( psz_subtitle, ";" );
1099             }
1100             else if( !strncasecmp( psz_subtitle, "&amp;", 5 ))
1101             {
1102                 *psz_text++ = '&';
1103                 psz_subtitle += strcspn( psz_subtitle, ";" );
1104             }
1105             else if( !strncasecmp( psz_subtitle, "&quot;", 6 ))
1106             {
1107                 *psz_text++ = '\"';
1108                 psz_subtitle += strcspn( psz_subtitle, ";" );
1109             }
1110             else
1111             {
1112                 /* Assume it is just a normal ampersand */
1113                 *psz_text++ = '&';
1114             }
1115         }
1116         else
1117         {
1118             *psz_text++ = *psz_subtitle;
1119         }
1120
1121         psz_subtitle++;
1122     }
1123     *psz_text = '\0';
1124     psz_text_start = realloc( psz_text_start, strlen( psz_text_start ) + 1 );
1125
1126     return psz_text_start;
1127 }
1128
1129 /* Turn a HTML subtitle, turn into a plain-text version,
1130  *  complete with sensible whitespace compaction
1131  */
1132
1133 static char *CreatePlainText( char *psz_subtitle )
1134 {
1135     char *psz_text = StripTags( psz_subtitle );
1136     char *s;
1137
1138     if( !psz_text )
1139         return NULL;
1140
1141     s = strpbrk( psz_text, "\t\r\n " );
1142     while( s )
1143     {
1144         int   k;
1145         char  spc = ' ';
1146         int   i_whitespace = strspn( s, "\t\r\n " );
1147
1148         /* Favour '\n' over other whitespaces - if one of these
1149          * occurs in the whitespace use a '\n' as our value,
1150          * otherwise just use a ' '
1151          */
1152         for( k = 0; k < i_whitespace; k++ )
1153             if( s[k] == '\n' ) spc = '\n';
1154
1155         if( i_whitespace > 1 )
1156         {
1157             memmove( &s[1],
1158                      &s[i_whitespace],
1159                      strlen( s ) - i_whitespace + 1 );
1160         }
1161         *s++ = spc;
1162
1163         s = strpbrk( s, "\t\r\n " );
1164     }
1165     return psz_text;
1166 }
1167
1168 /****************************************************************************
1169  * download and resize image located at psz_url
1170  ***************************************************************************/
1171 static subpicture_region_t *LoadEmbeddedImage( decoder_t *p_dec,
1172                                                subpicture_t *p_spu,
1173                                                const char *psz_filename,
1174                                                int i_transparent_color )
1175 {
1176     decoder_sys_t         *p_sys = p_dec->p_sys;
1177     subpicture_region_t   *p_region;
1178     video_format_t         fmt_out;
1179     int                    k;
1180     picture_t             *p_pic = NULL;
1181
1182     for( k = 0; k < p_sys->i_images; k++ )
1183     {
1184         if( p_sys->pp_images &&
1185             !strcmp( p_sys->pp_images[k]->psz_filename, psz_filename ) )
1186         {
1187             p_pic = p_sys->pp_images[k]->p_pic;
1188             break;
1189         }
1190     }
1191
1192     if( !p_pic )
1193     {
1194         msg_Err( p_dec, "Unable to read image %s", psz_filename );
1195         return NULL;
1196     }
1197
1198     /* Display the feed's image */
1199     memset( &fmt_out, 0, sizeof( video_format_t));
1200
1201     fmt_out.i_chroma = VLC_FOURCC('Y','U','V','A');
1202     fmt_out.i_aspect = VOUT_ASPECT_FACTOR;
1203     fmt_out.i_sar_num = fmt_out.i_sar_den = 1;
1204     fmt_out.i_width =
1205         fmt_out.i_visible_width = p_pic->p[Y_PLANE].i_visible_pitch;
1206     fmt_out.i_height =
1207         fmt_out.i_visible_height = p_pic->p[Y_PLANE].i_visible_lines;
1208
1209     p_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt_out );
1210     if( !p_region )
1211     {
1212         msg_Err( p_dec, "cannot allocate SPU region" );
1213         return NULL;
1214     }
1215     vout_CopyPicture( p_dec, &p_region->picture, p_pic );
1216
1217     /* This isn't the best way to do this - if you really want transparency, then
1218      * you're much better off using an image type that supports it like PNG. The
1219      * spec requires this support though.
1220      */
1221     if( i_transparent_color > 0 )
1222     {
1223         uint8_t i_r = ( i_transparent_color >> 16 ) & 0xff;
1224         uint8_t i_g = ( i_transparent_color >>  8 ) & 0xff;
1225         uint8_t i_b = ( i_transparent_color       ) & 0xff;
1226         uint8_t i_y = ( ( (  66 * i_r + 129 * i_g +  25 * i_b + 128 ) >> 8 ) + 16 );
1227         uint8_t i_u =   ( ( -38 * i_r -  74 * i_g + 112 * i_b + 128 ) >> 8 ) + 128 ;
1228         uint8_t i_v =   ( ( 112 * i_r -  94 * i_g -  18 * i_b + 128 ) >> 8 ) + 128 ;
1229
1230         if( ( p_region->picture.Y_PITCH == p_region->picture.U_PITCH ) &&
1231             ( p_region->picture.Y_PITCH == p_region->picture.V_PITCH ) &&
1232             ( p_region->picture.Y_PITCH == p_region->picture.A_PITCH ) )
1233         {
1234             int i_lines = p_region->picture.p[ Y_PLANE ].i_lines;
1235             if( i_lines > p_region->picture.p[ U_PLANE ].i_lines )
1236                 i_lines = p_region->picture.p[ U_PLANE ].i_lines;
1237             if( i_lines > p_region->picture.p[ V_PLANE ].i_lines )
1238                 i_lines = p_region->picture.p[ V_PLANE ].i_lines;
1239             if( i_lines > p_region->picture.p[ A_PLANE ].i_lines )
1240                 i_lines = p_region->picture.p[ A_PLANE ].i_lines;
1241
1242             int   i;
1243
1244             for( i = 0; i < p_region->picture.A_PITCH * i_lines; i++ )
1245             {
1246                 if(( p_region->picture.Y_PIXELS[ i ] == i_y ) &&
1247                    ( p_region->picture.U_PIXELS[ i ] == i_u ) &&
1248                    ( p_region->picture.V_PIXELS[ i ] == i_v ) )
1249                 {
1250                     p_region->picture.A_PIXELS[ i ] = 1;
1251                 }
1252             }
1253         }
1254     }
1255     return p_region;
1256 }