1 /*****************************************************************************
2 * xtag.c : a trivial parser for XML-like tags
3 *****************************************************************************
4 * Copyright (C) 2003-2004 Commonwealth Scientific and Industrial Research
5 * Organisation (CSIRO) Australia
6 * Copyright (C) 2000-2004 VideoLAN
10 * Authors: Conrad Parker <Conrad.Parker@csiro.au>
11 * Andre Pang <Andre.Pang@csiro.au>
12 * Gildas Bazin <gbazin@videolan.org>
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
24 * You should have received a copy of the GNU General Public License
25 * along with this program; if not, write to the Free Software
26 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
27 *****************************************************************************/
33 #include "vlc_block.h"
34 #include "vlc_stream.h"
52 * struct XTag is kind of a union ... it normally represents a whole
53 * tag (and its children), but it could alternatively represent some
54 * PCDATA. Basically, if tag->pcdata is non-NULL, interpret only it and
55 * ignore the name, attributes and inner_tags.
67 typedef struct _XAttribute
73 typedef struct _XTagParser
75 int valid; /* boolean */
81 /*****************************************************************************
83 *****************************************************************************/
84 static int Open ( vlc_object_t * );
85 static void Close( vlc_object_t * );
88 set_category( CAT_ADVANCED );
89 set_subcategory( SUBCAT_ADVANCED_XML );
90 set_description( _("Simple XML Parser") );
91 set_capability( "xml", 5 );
92 set_callbacks( Open, Close );
95 struct xml_reader_sys_t
97 XTag *p_root; /* Root tag */
98 XTag *p_curtag; /* Current tag */
99 XList *p_curattr; /* Current attribute */
103 static xml_reader_t *ReaderCreate( xml_t *, stream_t * );
104 static void ReaderDelete( xml_reader_t * );
105 static int ReaderRead( xml_reader_t * );
106 static int ReaderNodeType( xml_reader_t * );
107 static char *ReaderName( xml_reader_t * );
108 static char *ReaderValue( xml_reader_t * );
109 static int ReaderNextAttr( xml_reader_t * );
111 static int ReaderUseDTD ( xml_reader_t *, vlc_bool_t );
113 static void CatalogLoad( xml_t *, const char * );
114 static void CatalogAdd( xml_t *, const char *, const char *, const char * );
116 static XTag *xtag_new_parse( const char *, int );
117 static char *xtag_get_name( XTag * );
118 static char *xtag_get_pcdata( XTag * );
119 static char *xtag_get_attribute( XTag *, char * );
120 static XTag *xtag_first_child( XTag *, char * );
121 static XTag *xtag_next_child( XTag *, char * );
122 static XTag *xtag_free( XTag * );
123 static int xtag_snprint( char *, int, XTag * );
125 /*****************************************************************************
126 * Module initialization
127 *****************************************************************************/
128 static int Open( vlc_object_t *p_this )
130 xml_t *p_xml = (xml_t *)p_this;
132 p_xml->pf_reader_create = ReaderCreate;
133 p_xml->pf_reader_delete = ReaderDelete;
135 p_xml->pf_catalog_load = CatalogLoad;
136 p_xml->pf_catalog_add = CatalogAdd;
141 /*****************************************************************************
142 * Module deinitialization
143 *****************************************************************************/
144 static void Close( vlc_object_t *p_this )
149 /*****************************************************************************
150 * Catalogue functions
151 *****************************************************************************/
152 static void CatalogLoad( xml_t *p_xml, const char *psz_filename )
154 msg_Dbg( p_xml, "catalog support not implemented" );
157 static void CatalogAdd( xml_t *p_xml, const char *psz_arg1,
158 const char *psz_arg2, const char *psz_filename )
162 /*****************************************************************************
164 *****************************************************************************/
165 static xml_reader_t *ReaderCreate( xml_t *p_xml, stream_t *s )
167 xml_reader_t *p_reader;
172 /* Open and read file */
174 i_size = stream_Size( s ) - stream_Tell( s );
175 p_buffer = malloc( i_size + 1 );
176 if( p_buffer == NULL )
180 while( i_buffer < i_size )
182 msg_Dbg( p_xml, "got %d, want %d", i_buffer, i_size );
183 i_buffer += stream_Read( s, &p_buffer[i_buffer], i_size - i_buffer );
185 p_buffer[ i_buffer ] = 0;
189 msg_Dbg( p_xml, "empty xml" );
194 p_root = xtag_new_parse( p_buffer, i_buffer );
197 msg_Warn( p_xml, "couldn't parse xml" );
202 p_reader = malloc( sizeof(xml_reader_t) );
203 p_reader->p_sys = malloc( sizeof(xml_reader_sys_t) );
204 p_reader->p_sys->p_root = p_root;
205 p_reader->p_sys->p_curtag = NULL;
206 p_reader->p_sys->p_curattr = NULL;
207 p_reader->p_sys->b_endtag = VLC_FALSE;
208 p_reader->p_xml = p_xml;
210 p_reader->pf_read = ReaderRead;
211 p_reader->pf_node_type = ReaderNodeType;
212 p_reader->pf_name = ReaderName;
213 p_reader->pf_value = ReaderValue;
214 p_reader->pf_next_attr = ReaderNextAttr;
215 p_reader->pf_use_dtd = ReaderUseDTD;
220 static void ReaderDelete( xml_reader_t *p_reader )
222 xtag_free( p_reader->p_sys->p_root );
223 free( p_reader->p_sys );
227 static int ReaderUseDTD ( xml_reader_t *p_reader, vlc_bool_t b_use )
232 static int ReaderRead( xml_reader_t *p_reader )
236 if( !p_reader->p_sys->p_curtag )
238 p_reader->p_sys->p_curtag = p_reader->p_sys->p_root;
244 if( (p_child = xtag_next_child( p_reader->p_sys->p_curtag, 0 )) )
246 p_reader->p_sys->p_curtag = p_child;
247 p_reader->p_sys->p_curattr = 0;
248 p_reader->p_sys->b_endtag = VLC_FALSE;
252 if( p_reader->p_sys->p_curtag->name && /* no end tag for pcdata */
253 !p_reader->p_sys->b_endtag )
255 p_reader->p_sys->b_endtag = VLC_TRUE;
259 p_reader->p_sys->b_endtag = VLC_FALSE;
260 if( !p_reader->p_sys->p_curtag->parent ) return 0;
261 p_reader->p_sys->p_curtag = p_reader->p_sys->p_curtag->parent;
267 static int ReaderNodeType( xml_reader_t *p_reader )
269 if( p_reader->p_sys->p_curtag->name &&
270 p_reader->p_sys->b_endtag ) return XML_READER_ENDELEM;
271 if( p_reader->p_sys->p_curtag->name ) return XML_READER_STARTELEM;
272 if( p_reader->p_sys->p_curtag->pcdata ) return XML_READER_TEXT;
273 return XML_READER_NONE;
276 static char *ReaderName( xml_reader_t *p_reader )
278 const char *psz_name;
280 if( !p_reader->p_sys->p_curattr )
282 psz_name = xtag_get_name( p_reader->p_sys->p_curtag );
284 printf( "TAG: %s\n", psz_name );
288 psz_name = ((XAttribute *)p_reader->p_sys->p_curattr->data)->name;
290 if( psz_name ) return strdup( psz_name );
294 static char *ReaderValue( xml_reader_t *p_reader )
296 const char *psz_name;
297 if( p_reader->p_sys->p_curtag->pcdata )
300 printf( "%s\n", p_reader->p_sys->p_curtag->pcdata );
302 return strdup( p_reader->p_sys->p_curtag->pcdata );
305 if( !p_reader->p_sys->p_curattr ) return 0;
308 printf( "%s=%s\n", ((XAttribute *)p_reader->p_sys->p_curattr->data)->name,
309 ((XAttribute *)p_reader->p_sys->p_curattr->data)->value );
312 psz_name = ((XAttribute *)p_reader->p_sys->p_curattr->data)->value;
314 if( psz_name ) return strdup( psz_name );
318 static int ReaderNextAttr( xml_reader_t *p_reader )
320 if( !p_reader->p_sys->p_curattr )
321 p_reader->p_sys->p_curattr = p_reader->p_sys->p_curtag->attributes;
322 else if( p_reader->p_sys->p_curattr )
323 p_reader->p_sys->p_curattr = p_reader->p_sys->p_curattr->next;
325 if( p_reader->p_sys->p_curattr ) return VLC_SUCCESS;
326 else return VLC_EGENERIC;
329 /*****************************************************************************
330 * XTAG parser functions
331 *****************************************************************************/
333 static XList *xlist_append( XList *list, void *data )
337 l = (XList *)malloc( sizeof(XList) );
338 l->prev = l->next = NULL;
341 if( list == NULL ) return l;
343 for( last = list; last; last = last->next )
344 if( last->next == NULL ) break;
346 if( last ) last->next = l;
351 static void xlist_free( XList *list )
355 for( l = list; l; l = ln )
362 /* Character classes */
364 #define X_WHITESPACE 1<<0
365 #define X_OPENTAG 1<<1
366 #define X_CLOSETAG 1<<2
367 #define X_DQUOTE 1<<3
368 #define X_SQUOTE 1<<4
374 static int xtag_cin( char c, int char_class )
376 if( char_class & X_WHITESPACE ) if( isspace(c) ) return VLC_TRUE;
377 if( char_class & X_OPENTAG ) if( c == '<' ) return VLC_TRUE;
378 if( char_class & X_CLOSETAG ) if( c == '>' ) return VLC_TRUE;
379 if( char_class & X_DQUOTE ) if( c == '"' ) return VLC_TRUE;
380 if( char_class & X_SQUOTE ) if( c == '\'' ) return VLC_TRUE;
381 if( char_class & X_EQUAL ) if( c == '=' ) return VLC_TRUE;
382 if( char_class & X_SLASH ) if( c == '/' ) return VLC_TRUE;
383 if( char_class & X_QMARK ) if( c == '!' ) return VLC_TRUE;
384 if( char_class & X_DASH ) if( c == '-' ) return VLC_TRUE;
389 static int xtag_index( XTagParser *parser, int char_class )
391 char *s = parser->start;
394 for( i = 0; s[i] && s != parser->end; i++ )
396 if( xtag_cin( s[i], char_class ) ) return i;
402 static void xtag_skip_over( XTagParser *parser, int char_class )
404 char *s = parser->start;
407 if( !parser->valid ) return;
409 for( i = 0; s[i] && s != parser->end; i++ )
411 if( !xtag_cin( s[i], char_class ) )
413 parser->start = &s[i];
421 static void xtag_skip_whitespace( XTagParser * parser )
423 xtag_skip_over( parser, X_WHITESPACE );
426 static char *xtag_slurp_to( XTagParser *parser, int good_end, int bad_end )
428 char *ret, *s = parser->start;
431 if( !parser->valid ) return NULL;
433 xi = xtag_index( parser, good_end | bad_end );
435 if( xi > 0 && xtag_cin (s[xi], good_end) )
437 ret = malloc( (xi+1) * sizeof(char) );
438 strncpy( ret, s, xi );
440 parser->start = &s[xi];
447 static int xtag_assert_and_pass( XTagParser *parser, int char_class )
449 char *s = parser->start;
451 if( !parser->valid ) return VLC_FALSE;
453 if( !xtag_cin( s[0], char_class ) )
455 parser->valid = VLC_FALSE;
459 parser->start = &s[1];
464 static char *xtag_slurp_quoted( XTagParser *parser )
467 int quote = X_DQUOTE; /* quote char to match on */
470 if( !parser->valid ) return NULL;
472 xtag_skip_whitespace( parser );
476 if( xtag_cin( s[0], X_SQUOTE ) ) quote = X_SQUOTE;
478 if( !xtag_assert_and_pass( parser, quote ) ) return NULL;
482 for( xi = 0; s[xi]; xi++ )
484 if( xtag_cin( s[xi], quote ) )
486 if( !(xi > 1 && s[xi-1] == '\\') ) break;
490 ret = malloc( (xi+1) * sizeof(char) );
491 strncpy( ret, s, xi );
493 parser->start = &s[xi];
495 if( !xtag_assert_and_pass( parser, quote ) ) return NULL;
500 static XAttribute *xtag_parse_attribute( XTagParser *parser )
506 if( !parser->valid ) return NULL;
508 xtag_skip_whitespace( parser );
510 name = xtag_slurp_to( parser, X_WHITESPACE|X_EQUAL, X_SLASH|X_CLOSETAG );
511 if( name == NULL ) return NULL;
513 xtag_skip_whitespace( parser );
516 if( !xtag_assert_and_pass( parser, X_EQUAL ) )
519 printf( "xtag: attr failed EQUAL on <%s>\n", name );
524 xtag_skip_whitespace( parser );
526 value = xtag_slurp_quoted( parser );
531 printf ("Got NULL quoted attribute value\n");
536 attr = malloc( sizeof (*attr) );
543 parser->valid = VLC_FALSE;
547 static XTag *xtag_parse_tag( XTagParser *parser )
555 if( !parser->valid ) return NULL;
557 #if 0 /* Do we really want all the whitespace pcdata ? */
558 xtag_skip_whitespace( parser );
561 if( (pcdata = xtag_slurp_to( parser, X_OPENTAG, X_NONE )) != NULL )
563 tag = malloc( sizeof(*tag) );
565 tag->pcdata = pcdata;
566 tag->parent = parser->current_tag;
567 tag->attributes = NULL;
568 tag->children = NULL;
569 tag->current_child = NULL;
576 /* if this starts a close tag, return NULL and let the parent take it */
577 if( xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_SLASH ) )
580 /* if this starts a comment tag, skip until end */
581 if( xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_QMARK ) &&
582 xtag_cin( s[2], X_DASH ) && xtag_cin( s[3], X_DASH ) )
586 parser->start = s = &s[4];
588 while( (xi = xtag_index( parser, X_DASH )) >= 0 )
590 parser->start = s = &s[xi+1];
592 if( xtag_cin( s[0], X_DASH ) && xtag_cin( s[1], X_CLOSETAG ) )
594 parser->start = &s[2];
595 xtag_skip_whitespace( parser );
596 return xtag_parse_tag( parser );
603 /* FIXME: if this starts a DOCTYPE tag, skip until end */
604 if( xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_QMARK ) )
606 int xi = xtag_index( parser, X_CLOSETAG );
607 if( xi <= 0 ) return NULL;
609 parser->start = &s[xi+1];
610 xtag_skip_whitespace( parser );
611 return xtag_parse_tag( parser );
614 if( !xtag_assert_and_pass( parser, X_OPENTAG ) ) return NULL;
616 name = xtag_slurp_to( parser, X_WHITESPACE|X_SLASH|X_CLOSETAG, X_NONE );
617 if( name == NULL ) return NULL;
620 printf ("<%s ...\n", name);
623 tag = malloc( sizeof(*tag) );
626 tag->parent = parser->current_tag;
627 tag->attributes = NULL;
628 tag->children = NULL;
629 tag->current_child = NULL;
633 if( xtag_cin( s[0], X_WHITESPACE ) )
635 while( (attr = xtag_parse_attribute( parser )) != NULL )
637 tag->attributes = xlist_append( tag->attributes, attr );
641 xtag_skip_whitespace( parser );
645 if( xtag_cin( s[0], X_CLOSETAG ) )
647 parser->current_tag = tag;
649 xtag_assert_and_pass( parser, X_CLOSETAG );
651 while( (inner = xtag_parse_tag( parser ) ) != NULL )
653 tag->children = xlist_append( tag->children, inner );
656 parser->current_tag = tag->parent;
657 xtag_skip_whitespace( parser );
659 xtag_assert_and_pass( parser, X_OPENTAG );
660 xtag_assert_and_pass( parser, X_SLASH );
661 name = xtag_slurp_to( parser, X_WHITESPACE | X_CLOSETAG, X_NONE );
664 if( strcmp( name, tag->name ) )
667 printf ("got %s expected %s\n", name, tag->name);
669 parser->valid = VLC_FALSE;
674 xtag_skip_whitespace( parser );
675 xtag_assert_and_pass( parser, X_CLOSETAG );
680 xtag_assert_and_pass( parser, X_SLASH );
681 xtag_assert_and_pass( parser, X_CLOSETAG );
687 static XTag *xtag_free( XTag *xtag )
693 if( xtag == NULL ) return NULL;
695 if( xtag->name ) free( xtag->name );
696 if( xtag->pcdata ) free( xtag->pcdata );
698 for( l = xtag->attributes; l; l = l->next )
700 if( (attr = (XAttribute *)l->data) != NULL )
702 if( attr->name ) free( attr->name );
703 if( attr->value ) free( attr->value );
707 xlist_free( xtag->attributes );
709 for( l = xtag->children; l; l = l->next )
711 child = (XTag *)l->data;
714 xlist_free( xtag->children );
721 static XTag *xtag_new_parse( const char *s, int n )
724 XTag *tag, *ttag, *wrapper;
726 parser.valid = VLC_TRUE;
727 parser.current_tag = NULL;
728 parser.start = (char *)s;
730 if( n == -1 ) parser.end = NULL;
734 printf ("empty buffer");
738 else parser.end = (char *)&s[n];
740 /* can't have whitespace pcdata outside rootnode */
741 xtag_skip_whitespace( &parser );
743 tag = xtag_parse_tag( &parser );
748 printf ("invalid file");
754 if( (ttag = xtag_parse_tag( &parser )) != NULL )
762 wrapper = malloc( sizeof(XTag) );
763 wrapper->name = NULL;
764 wrapper->pcdata = NULL;
765 wrapper->parent = NULL;
766 wrapper->attributes = NULL;
767 wrapper->children = NULL;
768 wrapper->current_child = NULL;
770 wrapper->children = xlist_append( wrapper->children, tag );
771 wrapper->children = xlist_append( wrapper->children, ttag );
773 while( (ttag = xtag_parse_tag( &parser )) != NULL )
781 wrapper->children = xlist_append( wrapper->children, ttag );
789 static char *xtag_get_name( XTag *xtag )
791 return xtag ? xtag->name : NULL;
794 static char *xtag_get_pcdata( XTag *xtag )
799 if( xtag == NULL ) return NULL;
801 for( l = xtag->children; l; l = l->next )
803 child = (XTag *)l->data;
804 if( child->pcdata != NULL )
806 return child->pcdata;
813 static char *xtag_get_attribute( XTag *xtag, char *attribute )
818 if( xtag == NULL ) return NULL;
820 for( l = xtag->attributes; l; l = l->next )
822 if( (attr = (XAttribute *)l->data) != NULL )
824 if( !strcmp( attr->name, attribute ) ) return attr->value;
831 static XTag *xtag_first_child( XTag *xtag, char *name )
836 if( xtag == NULL ) return NULL;
837 if( (l = xtag->children) == NULL ) return NULL;
841 xtag->current_child = l;
842 return (XTag *)l->data;
845 for( ; l; l = l->next )
847 child = (XTag *)l->data;
849 if( !strcmp( child->name, name ) )
851 xtag->current_child = l;
856 xtag->current_child = NULL;
861 static XTag *xtag_next_child( XTag *xtag, char *name )
866 if( xtag == NULL ) return NULL;
868 if( (l = xtag->current_child) == NULL )
869 return xtag_first_child( xtag, name );
871 if( (l = l->next) == NULL ) return NULL;
875 xtag->current_child = l;
876 return (XTag *)l->data;
879 for( ; l; l = l->next )
881 child = (XTag *)l->data;
883 if( !strcmp( child->name, name ) )
885 xtag->current_child = l;
890 xtag->current_child = NULL;
896 * This snprints function takes a variable list of char *, the last of
897 * which must be NULL, and prints each in turn to buf.
898 * Returns C99-style total length that would have been written, even if
899 * this is larger than n.
901 static int xtag_snprints( char *buf, int n, ... )
905 int len, to_copy, total = 0;
909 for( s = va_arg( ap, char * ); s; s = va_arg( ap, char *) )
913 if( (to_copy = __MIN(n, len) ) > 0 )
915 memcpy( buf, s, to_copy );
928 static int xtag_snprint( char *buf, int n, XTag *xtag )
936 buf += __MIN(n, N); \
942 if( n > 0 ) buf[0] = '\0';
948 nn = xtag_snprints( buf, n, xtag->pcdata, NULL );
956 nn = xtag_snprints( buf, n, "<", xtag->name, NULL );
959 for( l = xtag->attributes; l; l = l->next )
961 attr = (XAttribute *)l->data;
963 nn = xtag_snprints( buf, n, " ", attr->name, "=\"", attr->value,
968 if( xtag->children == NULL )
970 nn = xtag_snprints ( buf, n, "/>", NULL );
976 nn = xtag_snprints( buf, n, ">", NULL );
980 for( l = xtag->children; l; l = l->next )
982 child = (XTag *)l->data;
984 nn = xtag_snprint( buf, n, child );
990 nn = xtag_snprints( buf, n, "</", xtag->name, ">", NULL );