1 /*****************************************************************************
2 * xtag.c : a trivial parser for XML-like tags
3 *****************************************************************************
4 * Copyright (C) 2003-2004 Commonwealth Scientific and Industrial Research
5 * Organisation (CSIRO) Australia
6 * Copyright (C) 2000-2004 the VideoLAN team
10 * Authors: Conrad Parker <Conrad.Parker@csiro.au>
11 * Andre Pang <Andre.Pang@csiro.au>
12 * Gildas Bazin <gbazin@videolan.org>
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
24 * You should have received a copy of the GNU General Public License
25 * along with this program; if not, write to the Free Software
26 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
27 *****************************************************************************/
33 #include <vlc_common.h>
34 #include <vlc_plugin.h>
37 #include "vlc_block.h"
38 #include "vlc_stream.h"
55 * struct XTag is kind of a union ... it normally represents a whole
56 * tag (and its children), but it could alternatively represent some
57 * PCDATA. Basically, if tag->pcdata is non-NULL, interpret only it and
58 * ignore the name, attributes and inner_tags.
70 typedef struct _XAttribute
76 typedef struct _XTagParser
78 int valid; /* boolean */
84 /*****************************************************************************
86 *****************************************************************************/
87 static int Open ( vlc_object_t * );
88 static void Close( vlc_object_t * );
91 set_description( N_("Simple XML Parser") );
92 set_capability( "xml", 5 );
93 set_callbacks( Open, Close );
96 struct xml_reader_sys_t
98 XTag *p_root; /* Root tag */
99 XTag *p_curtag; /* Current tag */
100 XList *p_curattr; /* Current attribute */
104 static xml_reader_t *ReaderCreate( xml_t *, stream_t * );
105 static void ReaderDelete( xml_reader_t * );
106 static int ReaderRead( xml_reader_t * );
107 static int ReaderNodeType( xml_reader_t * );
108 static char *ReaderName( xml_reader_t * );
109 static char *ReaderValue( xml_reader_t * );
110 static int ReaderNextAttr( xml_reader_t * );
112 static int ReaderUseDTD ( xml_reader_t *, bool );
114 static void CatalogLoad( xml_t *, const char * );
115 static void CatalogAdd( xml_t *, const char *, const char *, const char * );
117 static XTag *xtag_new_parse( const char *, int );
118 static char *xtag_get_name( XTag * );
120 static char *xtag_get_pcdata( XTag * );
121 static char *xtag_get_attribute( XTag *, char * );
123 static XTag *xtag_first_child( XTag *, char * );
124 static XTag *xtag_next_child( XTag *, char * );
125 static XTag *xtag_free( XTag * );
126 static int xtag_snprint( char *, int, XTag * );
128 /*****************************************************************************
129 * Module initialization
130 *****************************************************************************/
131 static int Open( vlc_object_t *p_this )
133 xml_t *p_xml = (xml_t *)p_this;
135 p_xml->pf_reader_create = ReaderCreate;
136 p_xml->pf_reader_delete = ReaderDelete;
138 p_xml->pf_catalog_load = CatalogLoad;
139 p_xml->pf_catalog_add = CatalogAdd;
144 /*****************************************************************************
145 * Module deinitialization
146 *****************************************************************************/
147 static void Close( vlc_object_t *p_this )
153 /*****************************************************************************
154 * Catalogue functions
155 *****************************************************************************/
156 static void CatalogLoad( xml_t *p_xml, const char *psz_filename )
158 VLC_UNUSED(psz_filename);
159 msg_Dbg( p_xml, "catalog support not implemented" );
162 static void CatalogAdd( xml_t *p_xml, const char *psz_arg1,
163 const char *psz_arg2, const char *psz_filename )
165 VLC_UNUSED(p_xml); VLC_UNUSED(psz_arg1); VLC_UNUSED(psz_arg2);
166 VLC_UNUSED(psz_filename);
169 /*****************************************************************************
171 *****************************************************************************/
172 static xml_reader_t *ReaderCreate( xml_t *p_xml, stream_t *s )
174 xml_reader_t *p_reader;
175 char *p_buffer, *p_new;
176 int i_size, i_pos = 0, i_buffer = 2048;
179 /* Open and read file */
180 p_buffer = malloc( i_buffer );
181 if( p_buffer == NULL )
184 while( ( i_size = stream_Read( s, &p_buffer[i_pos], 2048 ) ) == 2048 )
188 p_new = realloc( p_buffer, i_buffer );
196 p_buffer[ i_pos + i_size ] = 0; /* 0 terminated string */
198 if( i_pos + i_size == 0 )
200 msg_Dbg( p_xml, "empty XML" );
205 p_root = xtag_new_parse( p_buffer, i_buffer );
208 msg_Warn( p_xml, "couldn't parse XML" );
214 p_reader = malloc( sizeof(xml_reader_t) );
215 p_reader->p_sys = malloc( sizeof(xml_reader_sys_t) );
216 p_reader->p_sys->p_root = p_root;
217 p_reader->p_sys->p_curtag = NULL;
218 p_reader->p_sys->p_curattr = NULL;
219 p_reader->p_sys->b_endtag = false;
220 p_reader->p_xml = p_xml;
222 p_reader->pf_read = ReaderRead;
223 p_reader->pf_node_type = ReaderNodeType;
224 p_reader->pf_name = ReaderName;
225 p_reader->pf_value = ReaderValue;
226 p_reader->pf_next_attr = ReaderNextAttr;
227 p_reader->pf_use_dtd = ReaderUseDTD;
232 static void ReaderDelete( xml_reader_t *p_reader )
234 xtag_free( p_reader->p_sys->p_root );
235 free( p_reader->p_sys );
239 static int ReaderUseDTD ( xml_reader_t *p_reader, bool b_use )
241 VLC_UNUSED(p_reader); VLC_UNUSED(b_use);
245 static int ReaderRead( xml_reader_t *p_reader )
249 if( !p_reader->p_sys->p_curtag )
251 p_reader->p_sys->p_curtag = p_reader->p_sys->p_root;
257 if( (p_child = xtag_next_child( p_reader->p_sys->p_curtag, 0 )) )
259 p_reader->p_sys->p_curtag = p_child;
260 p_reader->p_sys->p_curattr = 0;
261 p_reader->p_sys->b_endtag = false;
265 if( p_reader->p_sys->p_curtag->name && /* no end tag for pcdata */
266 !p_reader->p_sys->b_endtag )
268 p_reader->p_sys->b_endtag = true;
272 p_reader->p_sys->b_endtag = false;
273 if( !p_reader->p_sys->p_curtag->parent ) return 0;
274 p_reader->p_sys->p_curtag = p_reader->p_sys->p_curtag->parent;
280 static int ReaderNodeType( xml_reader_t *p_reader )
282 if( p_reader->p_sys->p_curtag->name &&
283 p_reader->p_sys->b_endtag ) return XML_READER_ENDELEM;
284 if( p_reader->p_sys->p_curtag->name ) return XML_READER_STARTELEM;
285 if( p_reader->p_sys->p_curtag->pcdata ) return XML_READER_TEXT;
286 return XML_READER_NONE;
289 static char *ReaderName( xml_reader_t *p_reader )
291 const char *psz_name;
293 if( !p_reader->p_sys->p_curattr )
295 psz_name = xtag_get_name( p_reader->p_sys->p_curtag );
297 fprintf( stderr, "TAG: %s\n", psz_name );
301 psz_name = ((XAttribute *)p_reader->p_sys->p_curattr->data)->name;
303 if( psz_name ) return strdup( psz_name );
307 static char *ReaderValue( xml_reader_t *p_reader )
309 const char *psz_name;
310 if( p_reader->p_sys->p_curtag->pcdata )
313 fprintf( stderr, "%s\n", p_reader->p_sys->p_curtag->pcdata );
315 return strdup( p_reader->p_sys->p_curtag->pcdata );
318 if( !p_reader->p_sys->p_curattr ) return 0;
321 fprintf( stderr, "%s=%s\n", ((XAttribute *)p_reader->p_sys->p_curattr->data)->name,
322 ((XAttribute *)p_reader->p_sys->p_curattr->data)->value );
325 psz_name = ((XAttribute *)p_reader->p_sys->p_curattr->data)->value;
327 if( psz_name ) return strdup( psz_name );
331 static int ReaderNextAttr( xml_reader_t *p_reader )
333 if( !p_reader->p_sys->p_curattr )
334 p_reader->p_sys->p_curattr = p_reader->p_sys->p_curtag->attributes;
335 else if( p_reader->p_sys->p_curattr )
336 p_reader->p_sys->p_curattr = p_reader->p_sys->p_curattr->next;
338 if( p_reader->p_sys->p_curattr ) return VLC_SUCCESS;
339 else return VLC_EGENERIC;
342 /*****************************************************************************
343 * XTAG parser functions
344 *****************************************************************************/
346 static XList *xlist_append( XList *list, void *data )
350 l = (XList *)malloc( sizeof(XList) );
351 l->prev = l->next = NULL;
354 if( list == NULL ) return l;
356 for( last = list; last; last = last->next )
357 if( last->next == NULL ) break;
359 if( last ) last->next = l;
364 static void xlist_free( XList *list )
368 for( l = list; l; l = ln )
375 /* Character classes */
377 #define X_WHITESPACE 1<<0
378 #define X_OPENTAG 1<<1
379 #define X_CLOSETAG 1<<2
380 #define X_DQUOTE 1<<3
381 #define X_SQUOTE 1<<4
388 static int xtag_cin( char c, int char_class )
390 if( char_class & X_WHITESPACE ) if( isspace(c) ) return true;
391 if( char_class & X_OPENTAG ) if( c == '<' ) return true;
392 if( char_class & X_CLOSETAG ) if( c == '>' ) return true;
393 if( char_class & X_DQUOTE ) if( c == '"' ) return true;
394 if( char_class & X_SQUOTE ) if( c == '\'' ) return true;
395 if( char_class & X_EQUAL ) if( c == '=' ) return true;
396 if( char_class & X_SLASH ) if( c == '/' ) return true;
397 if( char_class & X_QMARK ) if( c == '?' ) return true;
398 if( char_class & X_DASH ) if( c == '-' ) return true;
399 if( char_class & X_EMARK ) if( c == '!' ) return true;
404 static int xtag_index( XTagParser *parser, int char_class )
406 char *s = parser->start;
409 for( i = 0; s[i] && s != parser->end; i++ )
411 if( xtag_cin( s[i], char_class ) ) return i;
417 static void xtag_skip_over( XTagParser *parser, int char_class )
419 char *s = parser->start;
422 if( !parser->valid ) return;
424 for( i = 0; s[i] && s != parser->end; i++ )
426 if( !xtag_cin( s[i], char_class ) )
428 parser->start = &s[i];
436 static void xtag_skip_whitespace( XTagParser * parser )
438 xtag_skip_over( parser, X_WHITESPACE );
441 static char *xtag_slurp_to( XTagParser *parser, int good_end, int bad_end )
443 char *ret, *s = parser->start;
446 if( !parser->valid ) return NULL;
448 xi = xtag_index( parser, good_end | bad_end );
450 if( xi > 0 && xtag_cin (s[xi], good_end) )
452 ret = malloc( (xi+1) * sizeof(char) );
453 strncpy( ret, s, xi );
455 parser->start = &s[xi];
462 static int xtag_assert_and_pass( XTagParser *parser, int char_class )
464 char *s = parser->start;
466 if( !parser->valid ) return false;
468 if( !xtag_cin( s[0], char_class ) )
470 parser->valid = false;
474 parser->start = &s[1];
479 static char *xtag_slurp_quoted( XTagParser *parser )
482 int quote = X_DQUOTE; /* quote char to match on */
485 if( !parser->valid ) return NULL;
487 xtag_skip_whitespace( parser );
491 if( xtag_cin( s[0], X_SQUOTE ) ) quote = X_SQUOTE;
493 if( !xtag_assert_and_pass( parser, quote ) ) return NULL;
497 for( xi = 0; s[xi]; xi++ )
499 if( xtag_cin( s[xi], quote ) )
501 if( !(xi > 1 && s[xi-1] == '\\') ) break;
505 ret = malloc( (xi+1) * sizeof(char) );
506 strncpy( ret, s, xi );
508 parser->start = &s[xi];
510 if( !xtag_assert_and_pass( parser, quote ) ) return NULL;
515 static XAttribute *xtag_parse_attribute( XTagParser *parser )
521 if( !parser->valid ) return NULL;
523 xtag_skip_whitespace( parser );
525 name = xtag_slurp_to( parser, X_WHITESPACE|X_EQUAL, X_SLASH|X_CLOSETAG );
526 if( name == NULL ) return NULL;
528 xtag_skip_whitespace( parser );
531 if( !xtag_assert_and_pass( parser, X_EQUAL ) )
534 fprintf( stderr, "xtag: attr failed EQUAL on <%s>\n", name );
539 xtag_skip_whitespace( parser );
541 value = xtag_slurp_quoted( parser );
546 fprintf (stderr, "Got NULL quoted attribute value\n");
551 attr = malloc( sizeof (*attr) );
558 parser->valid = false;
562 static XTag *xtag_parse_tag( XTagParser *parser )
571 if( !parser->valid ) return NULL;
575 /* if this starts a comment tag, skip until end */
576 if( (parser->end - parser->start) > 7 &&
577 xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_EMARK ) &&
578 xtag_cin( s[2], X_DASH ) && xtag_cin( s[3], X_DASH ) )
580 parser->start = s = &s[4];
581 while( (xi = xtag_index( parser, X_DASH )) >= 0 )
583 parser->start = s = &s[xi+1];
584 if( xtag_cin( s[0], X_DASH ) && xtag_cin( s[1], X_CLOSETAG ) )
586 parser->start = &s[2];
587 xtag_skip_whitespace( parser );
588 return xtag_parse_tag( parser );
594 /* ignore processing instructions '<?' ... '?>' */
595 if( (parser->end - parser->start) > 4 &&
596 xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_QMARK ) )
598 parser->start = s = &s[2];
599 while ((xi = xtag_index( parser, X_QMARK )) >= 0) {
600 if (xtag_cin( s[xi+1], X_CLOSETAG )) {
601 parser->start = &s[xi+2];
602 xtag_skip_whitespace( parser );
603 return xtag_parse_tag( parser );
609 /* ignore doctype '<!DOCTYPE' ... '>' */
610 if ( (parser->end - parser->start) > 8 &&
611 !strncmp( s, "<!DOCTYPE", 9 ) ) {
612 xi = xtag_index( parser, X_CLOSETAG );
614 parser->start = s = &s[xi+1];
615 xtag_skip_whitespace( parser );
616 return xtag_parse_tag( parser );
623 if( (pcdata = xtag_slurp_to( parser, X_OPENTAG, X_NONE )) != NULL )
625 tag = malloc( sizeof(*tag) );
627 tag->pcdata = pcdata;
628 tag->parent = parser->current_tag;
629 tag->attributes = NULL;
630 tag->children = NULL;
631 tag->current_child = NULL;
636 /* if this starts a close tag, return NULL and let the parent take it */
637 if( xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_SLASH ) )
640 /* parse CDATA content */
641 if ( (parser->end - parser->start) > 8 &&
642 !strncmp( s, "<![CDATA[", 9 ) ) {
643 parser->start = s = &s[9];
644 while (parser->end - s > 2) {
645 if (strncmp( s, "]]>", 3 ) == 0) {
646 if ( !(tag = malloc( sizeof(*tag))) ) return NULL;
647 if ( !(pcdata = malloc( sizeof(char)*(s - parser->start + 1))) ) return NULL;
648 strncpy( pcdata, parser->start, s - parser->start );
649 pcdata[s - parser->start]='\0';
650 parser->start = s = &s[3];
652 tag->pcdata = pcdata;
653 tag->parent = parser->current_tag;
654 tag->attributes = NULL;
655 tag->children = NULL;
656 tag->current_child = NULL;
666 if( !xtag_assert_and_pass( parser, X_OPENTAG ) ) return NULL;
668 name = xtag_slurp_to( parser, X_WHITESPACE|X_SLASH|X_CLOSETAG, X_NONE );
669 if( name == NULL ) return NULL;
672 fprintf (stderr, "<%s ...\n", name);
675 tag = malloc( sizeof(*tag) );
678 tag->parent = parser->current_tag;
679 tag->attributes = NULL;
680 tag->children = NULL;
681 tag->current_child = NULL;
685 if( xtag_cin( s[0], X_WHITESPACE ) )
687 while( (attr = xtag_parse_attribute( parser )) != NULL )
689 tag->attributes = xlist_append( tag->attributes, attr );
693 xtag_skip_whitespace( parser );
697 if( xtag_cin( s[0], X_CLOSETAG ) )
699 parser->current_tag = tag;
701 xtag_assert_and_pass( parser, X_CLOSETAG );
703 while( (inner = xtag_parse_tag( parser ) ) != NULL )
705 tag->children = xlist_append( tag->children, inner );
708 parser->current_tag = tag->parent;
709 xtag_skip_whitespace( parser );
711 xtag_assert_and_pass( parser, X_OPENTAG );
712 xtag_assert_and_pass( parser, X_SLASH );
713 name = xtag_slurp_to( parser, X_WHITESPACE | X_CLOSETAG, X_NONE );
716 if( strcmp( name, tag->name ) )
719 fprintf (stderr, "got %s expected %s\n", name, tag->name);
721 parser->valid = false;
726 xtag_skip_whitespace( parser );
727 xtag_assert_and_pass( parser, X_CLOSETAG );
728 xtag_skip_whitespace( parser );
732 xtag_assert_and_pass( parser, X_SLASH );
733 xtag_assert_and_pass( parser, X_CLOSETAG );
734 xtag_skip_whitespace( parser );
740 static XTag *xtag_free( XTag *xtag )
746 if( xtag == NULL ) return NULL;
749 free( xtag->pcdata );
751 for( l = xtag->attributes; l; l = l->next )
753 if( (attr = (XAttribute *)l->data) != NULL )
760 xlist_free( xtag->attributes );
762 for( l = xtag->children; l; l = l->next )
764 child = (XTag *)l->data;
767 xlist_free( xtag->children );
774 static XTag *xtag_new_parse( const char *s, int n )
777 XTag *tag, *ttag, *wrapper;
780 parser.current_tag = NULL;
781 parser.start = (char *)s;
783 if( n == -1 ) parser.end = NULL;
787 fprintf (stderr, "empty buffer\n");
791 else parser.end = (char *)&s[n];
793 /* can't have whitespace pcdata outside rootnode */
794 xtag_skip_whitespace( &parser );
796 tag = xtag_parse_tag( &parser );
801 fprintf (stderr, "invalid file\n");
807 if( (ttag = xtag_parse_tag( &parser )) != NULL )
815 wrapper = malloc( sizeof(XTag) );
816 wrapper->name = NULL;
817 wrapper->pcdata = NULL;
818 wrapper->parent = NULL;
819 wrapper->attributes = NULL;
820 wrapper->children = NULL;
821 wrapper->current_child = NULL;
823 wrapper->children = xlist_append( wrapper->children, tag );
824 wrapper->children = xlist_append( wrapper->children, ttag );
826 while( (ttag = xtag_parse_tag( &parser )) != NULL )
834 wrapper->children = xlist_append( wrapper->children, ttag );
842 static char *xtag_get_name( XTag *xtag )
844 return xtag ? xtag->name : NULL;
848 static char *xtag_get_pcdata( XTag *xtag )
853 if( xtag == NULL ) return NULL;
855 for( l = xtag->children; l; l = l->next )
857 child = (XTag *)l->data;
858 if( child->pcdata != NULL )
860 return child->pcdata;
867 static char *xtag_get_attribute( XTag *xtag, char *attribute )
872 if( xtag == NULL ) return NULL;
874 for( l = xtag->attributes; l; l = l->next )
876 if( (attr = (XAttribute *)l->data) != NULL )
878 if( !strcmp( attr->name, attribute ) ) return attr->value;
886 static XTag *xtag_first_child( XTag *xtag, char *name )
891 if( xtag == NULL ) return NULL;
892 if( (l = xtag->children) == NULL ) return NULL;
896 xtag->current_child = l;
897 return (XTag *)l->data;
900 for( ; l; l = l->next )
902 child = (XTag *)l->data;
904 if( !strcmp( child->name, name ) )
906 xtag->current_child = l;
911 xtag->current_child = NULL;
916 static XTag *xtag_next_child( XTag *xtag, char *name )
921 if( xtag == NULL ) return NULL;
923 if( (l = xtag->current_child) == NULL )
924 return xtag_first_child( xtag, name );
926 if( (l = l->next) == NULL ) return NULL;
930 xtag->current_child = l;
931 return (XTag *)l->data;
934 for( ; l; l = l->next )
936 child = (XTag *)l->data;
938 if( !strcmp( child->name, name ) )
940 xtag->current_child = l;
945 xtag->current_child = NULL;
951 * This snprints function takes a variable list of char *, the last of
952 * which must be NULL, and prints each in turn to buf.
953 * Returns C99-style total length that would have been written, even if
954 * this is larger than n.
956 static int xtag_snprints( char *buf, int n, ... )
960 int len, to_copy, total = 0;
964 for( s = va_arg( ap, char * ); s; s = va_arg( ap, char *) )
968 if( (to_copy = __MIN(n, len) ) > 0 )
970 memcpy( buf, s, to_copy );
983 static int xtag_snprint( char *buf, int n, XTag *xtag )
991 buf += __MIN(n, N); \
997 if( n > 0 ) buf[0] = '\0';
1003 nn = xtag_snprints( buf, n, xtag->pcdata, NULL );
1011 nn = xtag_snprints( buf, n, "<", xtag->name, NULL );
1014 for( l = xtag->attributes; l; l = l->next )
1016 attr = (XAttribute *)l->data;
1018 nn = xtag_snprints( buf, n, " ", attr->name, "=\"", attr->value,
1023 if( xtag->children == NULL )
1025 nn = xtag_snprints ( buf, n, "/>", NULL );
1031 nn = xtag_snprints( buf, n, ">", NULL );
1035 for( l = xtag->children; l; l = l->next )
1037 child = (XTag *)l->data;
1039 nn = xtag_snprint( buf, n, child );
1045 nn = xtag_snprints( buf, n, "</", xtag->name, ">", NULL );