1 /*****************************************************************************
2 * xtag.c : a trivial parser for XML-like tags
3 *****************************************************************************
4 * Copyright (C) 2003-2004 Commonwealth Scientific and Industrial Research
5 * Organisation (CSIRO) Australia
6 * Copyright (C) 2000-2004 the VideoLAN team
10 * Authors: Conrad Parker <Conrad.Parker@csiro.au>
11 * Andre Pang <Andre.Pang@csiro.au>
12 * Gildas Bazin <gbazin@videolan.org>
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
24 * You should have received a copy of the GNU General Public License
25 * along with this program; if not, write to the Free Software
26 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
27 *****************************************************************************/
33 #include <vlc_common.h>
34 #include <vlc_plugin.h>
37 #include <vlc_block.h>
38 #include <vlc_stream.h>
39 #include <vlc_memory.h>
56 * struct XTag is kind of a union ... it normally represents a whole
57 * tag (and its children), but it could alternatively represent some
58 * PCDATA. Basically, if tag->pcdata is non-NULL, interpret only it and
59 * ignore the name, attributes and inner_tags.
71 typedef struct _XAttribute
77 typedef struct _XTagParser
79 int valid; /* boolean */
85 /*****************************************************************************
87 *****************************************************************************/
88 static int Open ( vlc_object_t * );
89 static void Close( vlc_object_t * );
92 set_description( N_("Simple XML Parser") )
93 set_capability( "xml", 5 )
94 set_callbacks( Open, Close )
97 struct xml_reader_sys_t
99 XTag *p_root; /* Root tag */
100 XTag *p_curtag; /* Current tag */
101 XList *p_curattr; /* Current attribute */
105 static xml_reader_t *ReaderCreate( xml_t *, stream_t * );
106 static void ReaderDelete( xml_reader_t * );
107 static int ReaderRead( xml_reader_t * );
108 static int ReaderNodeType( xml_reader_t * );
109 static char *ReaderName( xml_reader_t * );
110 static char *ReaderValue( xml_reader_t * );
111 static int ReaderNextAttr( xml_reader_t * );
113 static int ReaderUseDTD ( xml_reader_t *, bool );
115 static void CatalogLoad( xml_t *, const char * );
116 static void CatalogAdd( xml_t *, const char *, const char *, const char * );
118 static XTag *xtag_new_parse( const char *, int );
119 static char *xtag_get_name( XTag * );
121 static char *xtag_get_pcdata( XTag * );
122 static char *xtag_get_attribute( XTag *, char * );
124 static XTag *xtag_first_child( XTag *, char * );
125 static XTag *xtag_next_child( XTag *, char * );
126 static void xtag_free( XTag * );
128 static int xtag_snprint( char *, int, XTag * );
131 /*****************************************************************************
132 * Module initialization
133 *****************************************************************************/
134 static int Open( vlc_object_t *p_this )
136 xml_t *p_xml = (xml_t *)p_this;
138 p_xml->pf_reader_create = ReaderCreate;
139 p_xml->pf_reader_delete = ReaderDelete;
141 p_xml->pf_catalog_load = CatalogLoad;
142 p_xml->pf_catalog_add = CatalogAdd;
147 /*****************************************************************************
148 * Module deinitialization
149 *****************************************************************************/
150 static void Close( vlc_object_t *p_this )
156 /*****************************************************************************
157 * Catalogue functions
158 *****************************************************************************/
159 static void CatalogLoad( xml_t *p_xml, const char *psz_filename )
161 VLC_UNUSED(psz_filename);
162 msg_Dbg( p_xml, "catalog support not implemented" );
165 static void CatalogAdd( xml_t *p_xml, const char *psz_arg1,
166 const char *psz_arg2, const char *psz_filename )
168 VLC_UNUSED(p_xml); VLC_UNUSED(psz_arg1); VLC_UNUSED(psz_arg2);
169 VLC_UNUSED(psz_filename);
172 /*****************************************************************************
174 *****************************************************************************/
175 static xml_reader_t *ReaderCreate( xml_t *p_xml, stream_t *s )
177 xml_reader_t *p_reader;
179 int i_size, i_pos = 0, i_buffer = 2048;
182 /* Open and read file */
183 p_buffer = malloc( i_buffer );
184 if( p_buffer == NULL )
187 while( ( i_size = stream_Read( s, &p_buffer[i_pos], 2048 ) ) == 2048 )
191 p_buffer = realloc_or_free( p_buffer, i_buffer );
195 if( i_pos + i_size == 0 )
197 msg_Dbg( p_xml, "empty XML" );
201 p_buffer[ i_pos + i_size ] = '\0'; /* 0 terminated string */
203 p_root = xtag_new_parse( p_buffer, i_buffer );
206 msg_Warn( p_xml, "couldn't parse XML" );
212 p_reader = malloc( sizeof(xml_reader_t) );
215 p_reader->p_sys = malloc( sizeof(xml_reader_sys_t) );
216 if( !p_reader->p_sys )
221 p_reader->p_sys->p_root = p_root;
222 p_reader->p_sys->p_curtag = NULL;
223 p_reader->p_sys->p_curattr = NULL;
224 p_reader->p_sys->b_endtag = false;
225 p_reader->p_xml = p_xml;
227 p_reader->pf_read = ReaderRead;
228 p_reader->pf_node_type = ReaderNodeType;
229 p_reader->pf_name = ReaderName;
230 p_reader->pf_value = ReaderValue;
231 p_reader->pf_next_attr = ReaderNextAttr;
232 p_reader->pf_use_dtd = ReaderUseDTD;
237 static void ReaderDelete( xml_reader_t *p_reader )
239 xtag_free( p_reader->p_sys->p_root );
240 free( p_reader->p_sys );
244 static int ReaderUseDTD ( xml_reader_t *p_reader, bool b_use )
246 VLC_UNUSED(p_reader); VLC_UNUSED(b_use);
250 static int ReaderRead( xml_reader_t *p_reader )
254 if( !p_reader->p_sys->p_curtag )
256 p_reader->p_sys->p_curtag = p_reader->p_sys->p_root;
262 if( (p_child = xtag_next_child( p_reader->p_sys->p_curtag, 0 )) )
264 p_reader->p_sys->p_curtag = p_child;
265 p_reader->p_sys->p_curattr = NULL;
266 p_reader->p_sys->b_endtag = false;
270 if( p_reader->p_sys->p_curtag->name && /* no end tag for pcdata */
271 !p_reader->p_sys->b_endtag )
273 p_reader->p_sys->b_endtag = true;
277 p_reader->p_sys->b_endtag = false;
278 if( !p_reader->p_sys->p_curtag->parent ) return 0;
279 p_reader->p_sys->p_curtag = p_reader->p_sys->p_curtag->parent;
285 static int ReaderNodeType( xml_reader_t *p_reader )
287 if( p_reader->p_sys->p_curtag->name && p_reader->p_sys->b_endtag )
288 return XML_READER_ENDELEM;
289 if( p_reader->p_sys->p_curtag->name )
290 return XML_READER_STARTELEM;
291 if( p_reader->p_sys->p_curtag->pcdata )
292 return XML_READER_TEXT;
293 return XML_READER_NONE;
296 static char *ReaderName( xml_reader_t *p_reader )
298 const char *psz_name;
300 if( !p_reader->p_sys->p_curattr )
302 psz_name = xtag_get_name( p_reader->p_sys->p_curtag );
304 fprintf( stderr, "TAG: %s\n", psz_name );
308 psz_name = ((XAttribute *)p_reader->p_sys->p_curattr->data)->name;
310 return psz_name ? strdup( psz_name ) : NULL;
313 static char *ReaderValue( xml_reader_t *p_reader )
315 const char *psz_name;
316 if( p_reader->p_sys->p_curtag->pcdata )
319 fprintf( stderr, "%s\n", p_reader->p_sys->p_curtag->pcdata );
321 return strdup( p_reader->p_sys->p_curtag->pcdata );
324 if( !p_reader->p_sys->p_curattr ) return NULL;
327 fprintf( stderr, "%s=%s\n", ((XAttribute *)p_reader->p_sys->p_curattr->data)->name,
328 ((XAttribute *)p_reader->p_sys->p_curattr->data)->value );
331 psz_name = ((XAttribute *)p_reader->p_sys->p_curattr->data)->value;
333 return psz_name ? strdup( psz_name ) : NULL;
336 static int ReaderNextAttr( xml_reader_t *p_reader )
338 if( !p_reader->p_sys->p_curattr )
339 p_reader->p_sys->p_curattr = p_reader->p_sys->p_curtag->attributes;
340 else if( p_reader->p_sys->p_curattr )
341 p_reader->p_sys->p_curattr = p_reader->p_sys->p_curattr->next;
343 return p_reader->p_sys->p_curattr ? VLC_SUCCESS : VLC_EGENERIC;
346 /*****************************************************************************
347 * XTAG parser functions
348 *****************************************************************************/
350 static XList *xlist_append( XList *list, void *data )
354 l = (XList *)malloc( sizeof(XList) );
356 l->prev = l->next = NULL;
362 /* Find the last element */
372 static void xlist_free( XList *list )
376 for( l = list; l; l = ln )
383 /* Character classes */
385 #define X_WHITESPACE 1<<0
386 #define X_OPENTAG 1<<1
387 #define X_CLOSETAG 1<<2
388 #define X_DQUOTE 1<<3
389 #define X_SQUOTE 1<<4
396 static int xtag_cin( char c, int char_class )
398 if( char_class & X_WHITESPACE ) if( isspace(c) ) return true;
399 if( char_class & X_OPENTAG ) if( c == '<' ) return true;
400 if( char_class & X_CLOSETAG ) if( c == '>' ) return true;
401 if( char_class & X_DQUOTE ) if( c == '"' ) return true;
402 if( char_class & X_SQUOTE ) if( c == '\'' ) return true;
403 if( char_class & X_EQUAL ) if( c == '=' ) return true;
404 if( char_class & X_SLASH ) if( c == '/' ) return true;
405 if( char_class & X_QMARK ) if( c == '?' ) return true;
406 if( char_class & X_DASH ) if( c == '-' ) return true;
407 if( char_class & X_EMARK ) if( c == '!' ) return true;
412 static int xtag_index( XTagParser *parser, int char_class )
414 char *s = parser->start;
417 for( i = 0; s[i] && s != parser->end; i++ )
419 if( xtag_cin( s[i], char_class ) ) return i;
425 static void xtag_skip_over( XTagParser *parser, int char_class )
427 char *s = parser->start;
430 if( !parser->valid ) return;
432 for( i = 0; s[i] && s != parser->end; i++ )
434 if( !xtag_cin( s[i], char_class ) )
436 parser->start = &s[i];
444 static void xtag_skip_whitespace( XTagParser * parser )
446 xtag_skip_over( parser, X_WHITESPACE );
449 static char *xtag_slurp_to( XTagParser *parser, int good_end, int bad_end )
451 char *ret, *s = parser->start;
454 if( !parser->valid ) return NULL;
456 xi = xtag_index( parser, good_end | bad_end );
458 if( xi > 0 && xtag_cin (s[xi], good_end) )
460 ret = malloc( xi+1 );
462 strncpy( ret, s, xi );
464 parser->start = &s[xi];
471 static int xtag_assert_and_pass( XTagParser *parser, int char_class )
473 char *s = parser->start;
475 if( !parser->valid ) return false;
477 if( !xtag_cin( s[0], char_class ) )
479 parser->valid = false;
483 parser->start = &s[1];
488 static char *xtag_slurp_quoted( XTagParser *parser )
491 int quote = X_DQUOTE; /* quote char to match on */
494 if( !parser->valid ) return NULL;
496 xtag_skip_whitespace( parser );
500 if( xtag_cin( s[0], X_SQUOTE ) ) quote = X_SQUOTE;
502 if( !xtag_assert_and_pass( parser, quote ) ) return NULL;
506 for( xi = 0; s[xi]; xi++ )
508 if( xtag_cin( s[xi], quote ) )
510 if( !(xi > 1 && s[xi-1] == '\\') ) break;
514 ret = malloc( xi+1 );
516 strncpy( ret, s, xi );
518 parser->start = &s[xi];
520 if( !xtag_assert_and_pass( parser, quote ) )
529 static XAttribute *xtag_parse_attribute( XTagParser *parser )
538 xtag_skip_whitespace( parser );
540 name = xtag_slurp_to( parser, X_WHITESPACE|X_EQUAL, X_SLASH|X_CLOSETAG );
544 xtag_skip_whitespace( parser );
547 if( !xtag_assert_and_pass( parser, X_EQUAL ) )
550 fprintf( stderr, "xtag: attr failed EQUAL on <%s>\n", name );
555 xtag_skip_whitespace( parser );
557 value = xtag_slurp_quoted( parser );
562 fprintf (stderr, "Got NULL quoted attribute value\n");
567 attr = malloc( sizeof (*attr) );
575 parser->valid = false;
579 static XTag *xtag_parse_tag( XTagParser *parser )
588 if( !parser->valid ) return NULL;
592 /* if this starts a comment tag, skip until end */
593 if( (parser->end - parser->start) > 7 &&
594 xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_EMARK ) &&
595 xtag_cin( s[2], X_DASH ) && xtag_cin( s[3], X_DASH ) )
597 parser->start = s = &s[4];
598 while( (xi = xtag_index( parser, X_DASH )) >= 0 )
600 parser->start = s = &s[xi+1];
601 if( xtag_cin( s[0], X_DASH ) && xtag_cin( s[1], X_CLOSETAG ) )
603 parser->start = &s[2];
604 xtag_skip_whitespace( parser );
605 return xtag_parse_tag( parser );
611 /* ignore processing instructions '<?' ... '?>' */
612 if( (parser->end - parser->start) > 4 &&
613 xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_QMARK ) )
615 parser->start = s = &s[2];
616 while ((xi = xtag_index( parser, X_QMARK )) >= 0) {
617 if (xtag_cin( s[xi+1], X_CLOSETAG )) {
618 parser->start = &s[xi+2];
619 xtag_skip_whitespace( parser );
620 return xtag_parse_tag( parser );
626 /* ignore doctype '<!DOCTYPE' ... '>' */
627 if ( (parser->end - parser->start) > 8 &&
628 !strncmp( s, "<!DOCTYPE", 9 ) ) {
629 xi = xtag_index( parser, X_CLOSETAG );
631 parser->start = &s[xi+1];
632 xtag_skip_whitespace( parser );
633 return xtag_parse_tag( parser );
640 if( (pcdata = xtag_slurp_to( parser, X_OPENTAG, X_NONE )) != NULL )
642 tag = malloc( sizeof(*tag) );
645 tag->pcdata = pcdata;
646 tag->parent = parser->current_tag;
647 tag->attributes = NULL;
648 tag->children = NULL;
649 tag->current_child = NULL;
654 /* if this starts a close tag, return NULL and let the parent take it */
655 if( xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_SLASH ) )
658 /* parse CDATA content */
659 if ( (parser->end - parser->start) > 8 &&
660 !strncmp( s, "<![CDATA[", 9 ) ) {
661 parser->start = s = &s[9];
662 while (parser->end - s > 2) {
663 if (strncmp( s, "]]>", 3 ) == 0) {
664 if ( !(tag = malloc( sizeof(*tag))) ) return NULL;
665 if ( !(pcdata = malloc( s - parser->start + 1)) )
670 strncpy( pcdata, parser->start, s - parser->start );
671 pcdata[s - parser->start]='\0';
672 parser->start = &s[3];
674 tag->pcdata = pcdata;
675 tag->parent = parser->current_tag;
676 tag->attributes = NULL;
677 tag->children = NULL;
678 tag->current_child = NULL;
688 if( !xtag_assert_and_pass( parser, X_OPENTAG ) ) return NULL;
690 name = xtag_slurp_to( parser, X_WHITESPACE|X_SLASH|X_CLOSETAG, X_NONE );
691 if( name == NULL ) return NULL;
694 fprintf (stderr, "<%s ...\n", name);
697 tag = malloc( sizeof(*tag) );
701 tag->parent = parser->current_tag;
702 tag->attributes = NULL;
703 tag->children = NULL;
704 tag->current_child = NULL;
708 if( xtag_cin( s[0], X_WHITESPACE ) )
710 while( (attr = xtag_parse_attribute( parser )) != NULL )
712 tag->attributes = xlist_append( tag->attributes, attr );
716 xtag_skip_whitespace( parser );
720 if( xtag_cin( s[0], X_CLOSETAG ) )
722 parser->current_tag = tag;
724 xtag_assert_and_pass( parser, X_CLOSETAG );
726 while( (inner = xtag_parse_tag( parser ) ) != NULL )
728 tag->children = xlist_append( tag->children, inner );
731 parser->current_tag = tag->parent;
732 xtag_skip_whitespace( parser );
734 xtag_assert_and_pass( parser, X_OPENTAG );
735 xtag_assert_and_pass( parser, X_SLASH );
736 name = xtag_slurp_to( parser, X_WHITESPACE | X_CLOSETAG, X_NONE );
739 if( strcmp( name, tag->name ) )
742 fprintf (stderr, "got %s expected %s\n", name, tag->name);
744 parser->valid = false;
749 xtag_skip_whitespace( parser );
750 xtag_assert_and_pass( parser, X_CLOSETAG );
751 xtag_skip_whitespace( parser );
755 xtag_assert_and_pass( parser, X_SLASH );
756 xtag_assert_and_pass( parser, X_CLOSETAG );
757 xtag_skip_whitespace( parser );
763 static void xtag_free( XTag *xtag )
773 free( xtag->pcdata );
775 for( l = xtag->attributes; l; l = l->next )
777 if( (attr = (XAttribute *)l->data) != NULL )
784 xlist_free( xtag->attributes );
786 for( l = xtag->children; l; l = l->next )
788 child = (XTag *)l->data;
791 xlist_free( xtag->children );
796 static XTag *xtag_new_parse( const char *s, int n )
799 XTag *tag, *ttag, *wrapper;
802 parser.current_tag = NULL;
803 parser.start = (char *)s;
805 if( n == -1 ) parser.end = NULL;
809 fprintf (stderr, "empty buffer\n");
813 else parser.end = (char *)&s[n];
815 /* can't have whitespace pcdata outside rootnode */
816 xtag_skip_whitespace( &parser );
818 tag = xtag_parse_tag( &parser );
823 fprintf (stderr, "invalid file\n");
829 if( (ttag = xtag_parse_tag( &parser )) != NULL )
837 wrapper = malloc( sizeof(XTag) );
839 wrapper->name = NULL;
840 wrapper->pcdata = NULL;
841 wrapper->parent = NULL;
842 wrapper->attributes = NULL;
843 wrapper->children = NULL;
844 wrapper->current_child = NULL;
846 wrapper->children = xlist_append( wrapper->children, tag );
847 wrapper->children = xlist_append( wrapper->children, ttag );
849 while( (ttag = xtag_parse_tag( &parser )) != NULL )
857 wrapper->children = xlist_append( wrapper->children, ttag );
865 static char *xtag_get_name( XTag *xtag )
867 return xtag ? xtag->name : NULL;
871 static char *xtag_get_pcdata( XTag *xtag )
876 if( xtag == NULL ) return NULL;
878 for( l = xtag->children; l; l = l->next )
880 child = (XTag *)l->data;
881 if( child->pcdata != NULL )
883 return child->pcdata;
890 static char *xtag_get_attribute( XTag *xtag, char *attribute )
895 if( xtag == NULL ) return NULL;
897 for( l = xtag->attributes; l; l = l->next )
899 if( (attr = (XAttribute *)l->data) != NULL )
901 if( !strcmp( attr->name, attribute ) ) return attr->value;
909 static XTag *xtag_first_child( XTag *xtag, char *name )
914 if( xtag == NULL ) return NULL;
915 if( (l = xtag->children) == NULL ) return NULL;
919 xtag->current_child = l;
920 return (XTag *)l->data;
923 for( ; l; l = l->next )
925 child = (XTag *)l->data;
927 if( !strcmp( child->name, name ) )
929 xtag->current_child = l;
934 xtag->current_child = NULL;
939 static XTag *xtag_next_child( XTag *xtag, char *name )
944 if( xtag == NULL ) return NULL;
946 if( (l = xtag->current_child) == NULL )
947 return xtag_first_child( xtag, name );
949 if( (l = l->next) == NULL ) return NULL;
953 xtag->current_child = l;
954 return (XTag *)l->data;
957 for( ; l; l = l->next )
959 child = (XTag *)l->data;
961 if( !strcmp( child->name, name ) )
963 xtag->current_child = l;
968 xtag->current_child = NULL;
975 * This snprints function takes a variable list of char *, the last of
976 * which must be NULL, and prints each in turn to buf.
977 * Returns C99-style total length that would have been written, even if
978 * this is larger than n.
980 static int xtag_snprints( char *buf, int n, ... )
984 int len, to_copy, total = 0;
988 for( s = va_arg( ap, char * ); s; s = va_arg( ap, char *) )
992 if( (to_copy = __MIN(n, len) ) > 0 )
994 memcpy( buf, s, to_copy );
1007 static int xtag_snprint( char *buf, int n, XTag *xtag )
1009 int nn, written = 0;
1014 #define FORWARD(N) \
1015 buf += __MIN(n, N); \
1016 n = __MAX(n-N, 0); \
1021 if( n > 0 ) buf[0] = '\0';
1027 nn = xtag_snprints( buf, n, xtag->pcdata, NULL );
1035 nn = xtag_snprints( buf, n, "<", xtag->name, NULL );
1038 for( l = xtag->attributes; l; l = l->next )
1040 attr = (XAttribute *)l->data;
1042 nn = xtag_snprints( buf, n, " ", attr->name, "=\"", attr->value,
1047 if( xtag->children == NULL )
1049 nn = xtag_snprints ( buf, n, "/>", NULL );
1055 nn = xtag_snprints( buf, n, ">", NULL );
1059 for( l = xtag->children; l; l = l->next )
1061 child = (XTag *)l->data;
1063 nn = xtag_snprint( buf, n, child );
1069 nn = xtag_snprints( buf, n, "</", xtag->name, ">", NULL );