1 /*****************************************************************************
2 * xtag.c : a trivial parser for XML-like tags
3 *****************************************************************************
4 * Copyright (C) 2003-2004 Commonwealth Scientific and Industrial Research
5 * Organisation (CSIRO) Australia
6 * Copyright (C) 2000-2004 VideoLAN
10 * Authors: Conrad Parker <Conrad.Parker@csiro.au>
11 * Andre Pang <Andre.Pang@csiro.au>
12 * Gildas Bazin <gbazin@videolan.org>
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
24 * You should have received a copy of the GNU General Public License
25 * along with this program; if not, write to the Free Software
26 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
27 *****************************************************************************/
50 * struct XTag is kind of a union ... it normally represents a whole
51 * tag (and its children), but it could alternatively represent some
52 * PCDATA. Basically, if tag->pcdata is non-NULL, interpret only it and
53 * ignore the name, attributes and inner_tags.
65 typedef struct _XAttribute
71 typedef struct _XTagParser
73 int valid; /* boolean */
79 /*****************************************************************************
81 *****************************************************************************/
82 static int Open ( vlc_object_t * );
83 static void Close( vlc_object_t * );
86 set_description( _("Simple XML Parser") );
87 set_capability( "xml", 5 );
88 set_callbacks( Open, Close );
91 struct xml_reader_sys_t
93 XTag *p_root; /* Root tag */
94 XTag *p_curtag; /* Current tag */
95 XList *p_curattr; /* Current attribute */
99 static xml_reader_t *ReaderCreate( xml_t *, const char * );
100 static void ReaderDelete( xml_reader_t * );
101 static int ReaderRead( xml_reader_t * );
102 static int ReaderNodeType( xml_reader_t * );
103 static char *ReaderName( xml_reader_t * );
104 static char *ReaderValue( xml_reader_t * );
105 static int ReaderNextAttr( xml_reader_t * );
107 static void CatalogLoad( xml_t *, const char * );
108 static void CatalogAdd( xml_t *, const char *, const char *, const char * );
110 static XTag *xtag_new_parse( const char *, int );
111 static char *xtag_get_name( XTag * );
112 static char *xtag_get_pcdata( XTag * );
113 static char *xtag_get_attribute( XTag *, char * );
114 static XTag *xtag_first_child( XTag *, char * );
115 static XTag *xtag_next_child( XTag *, char * );
116 static XTag *xtag_free( XTag * );
117 static int xtag_snprint( char *, int, XTag * );
119 /*****************************************************************************
120 * Module initialization
121 *****************************************************************************/
122 static int Open( vlc_object_t *p_this )
124 xml_t *p_xml = (xml_t *)p_this;
126 p_xml->pf_reader_create = ReaderCreate;
127 p_xml->pf_reader_delete = ReaderDelete;
129 p_xml->pf_catalog_load = CatalogLoad;
130 p_xml->pf_catalog_add = CatalogAdd;
135 /*****************************************************************************
136 * Module deinitialization
137 *****************************************************************************/
138 static void Close( vlc_object_t *p_this )
143 /*****************************************************************************
144 * Catalogue functions
145 *****************************************************************************/
146 static void CatalogLoad( xml_t *p_xml, const char *psz_filename )
148 msg_Dbg( p_xml, "catalog support not implemented" );
151 static void CatalogAdd( xml_t *p_xml, const char *psz_arg1,
152 const char *psz_arg2, const char *psz_filename )
156 /*****************************************************************************
158 *****************************************************************************/
159 static xml_reader_t *ReaderCreate( xml_t *p_xml, const char *psz_filename )
161 xml_reader_t *p_reader;
162 xml_reader_sys_t *p_sys;
168 /* Open and read file */
169 file = fopen( psz_filename, "rt" );
172 msg_Warn( p_xml, "could not open file '%s'", psz_filename );
176 fseek( file, 0L, SEEK_END );
177 i_buffer = ftell( file );
178 fseek( file, 0L, SEEK_SET );
179 p_buffer = malloc( i_buffer + 1 );
180 i_buffer = fread( p_buffer, 1, i_buffer, file );
181 p_buffer[i_buffer] = 0;
186 msg_Dbg( p_xml, "file '%s' is empty", psz_filename );
191 p_root = xtag_new_parse( p_buffer, i_buffer );
194 msg_Warn( p_xml, "couldn't parse file '%s'", psz_filename );
199 p_reader = malloc( sizeof(xml_reader_t) );
200 p_reader->p_sys = p_sys = malloc( sizeof(xml_reader_sys_t) );
201 p_reader->p_sys->p_root = p_root;
202 p_reader->p_sys->p_curtag = NULL;
203 p_reader->p_sys->p_curattr = NULL;
204 p_reader->p_sys->b_endtag = VLC_FALSE;
205 p_reader->p_xml = p_xml;
207 p_reader->pf_read = ReaderRead;
208 p_reader->pf_node_type = ReaderNodeType;
209 p_reader->pf_name = ReaderName;
210 p_reader->pf_value = ReaderValue;
211 p_reader->pf_next_attr = ReaderNextAttr;
216 static void ReaderDelete( xml_reader_t *p_reader )
218 xtag_free( p_reader->p_sys->p_root );
219 free( p_reader->p_sys );
223 static int ReaderRead( xml_reader_t *p_reader )
227 if( !p_reader->p_sys->p_curtag )
229 p_reader->p_sys->p_curtag = p_reader->p_sys->p_root;
235 if( (p_child = xtag_next_child( p_reader->p_sys->p_curtag, 0 )) )
237 p_reader->p_sys->p_curtag = p_child;
238 p_reader->p_sys->p_curattr = 0;
239 p_reader->p_sys->b_endtag = VLC_FALSE;
243 if( p_reader->p_sys->p_curtag->name && /* no end tag for pcdata */
244 !p_reader->p_sys->b_endtag )
246 p_reader->p_sys->b_endtag = VLC_TRUE;
250 p_reader->p_sys->b_endtag = VLC_FALSE;
251 if( !p_reader->p_sys->p_curtag->parent ) return 0;
252 p_reader->p_sys->p_curtag = p_reader->p_sys->p_curtag->parent;
258 static int ReaderNodeType( xml_reader_t *p_reader )
260 if( p_reader->p_sys->p_curtag->name &&
261 p_reader->p_sys->b_endtag ) return XML_READER_ENDELEM;
262 if( p_reader->p_sys->p_curtag->name ) return XML_READER_STARTELEM;
263 if( p_reader->p_sys->p_curtag->pcdata ) return XML_READER_TEXT;
264 return XML_READER_NONE;
267 static char *ReaderName( xml_reader_t *p_reader )
269 const char *psz_name;
271 if( !p_reader->p_sys->p_curattr )
273 psz_name = xtag_get_name( p_reader->p_sys->p_curtag );
275 printf( "TAG: %s\n", psz_name );
279 psz_name = ((XAttribute *)p_reader->p_sys->p_curattr->data)->name;
281 if( psz_name ) return strdup( psz_name );
285 static char *ReaderValue( xml_reader_t *p_reader )
287 const char *psz_name;
289 if( !p_reader->p_sys->p_curattr ) return 0;
292 printf( "%s=%s\n", ((XAttribute *)p_reader->p_sys->p_curattr->data)->name,
293 ((XAttribute *)p_reader->p_sys->p_curattr->data)->value );
296 psz_name = ((XAttribute *)p_reader->p_sys->p_curattr->data)->value;
298 if( psz_name ) return strdup( psz_name );
302 static int ReaderNextAttr( xml_reader_t *p_reader )
304 if( !p_reader->p_sys->p_curattr )
305 p_reader->p_sys->p_curattr = p_reader->p_sys->p_curtag->attributes;
306 else if( p_reader->p_sys->p_curattr )
307 p_reader->p_sys->p_curattr = p_reader->p_sys->p_curattr->next;
309 if( p_reader->p_sys->p_curattr ) return VLC_SUCCESS;
310 else return VLC_EGENERIC;
313 /*****************************************************************************
314 * XTAG parser functions
315 *****************************************************************************/
317 static XList *xlist_append( XList *list, void *data )
321 l = (XList *)malloc( sizeof(XList) );
322 l->prev = l->next = NULL;
325 if( list == NULL ) return l;
327 for( last = list; last; last = last->next )
328 if( last->next == NULL ) break;
330 if( last ) last->next = l;
335 static void xlist_free( XList *list )
339 for( l = list; l; l = ln )
346 /* Character classes */
348 #define X_WHITESPACE 1<<0
349 #define X_OPENTAG 1<<1
350 #define X_CLOSETAG 1<<2
351 #define X_DQUOTE 1<<3
352 #define X_SQUOTE 1<<4
358 static int xtag_cin( char c, int char_class )
360 if( char_class & X_WHITESPACE ) if( isspace(c) ) return VLC_TRUE;
361 if( char_class & X_OPENTAG ) if( c == '<' ) return VLC_TRUE;
362 if( char_class & X_CLOSETAG ) if( c == '>' ) return VLC_TRUE;
363 if( char_class & X_DQUOTE ) if( c == '"' ) return VLC_TRUE;
364 if( char_class & X_SQUOTE ) if( c == '\'' ) return VLC_TRUE;
365 if( char_class & X_EQUAL ) if( c == '=' ) return VLC_TRUE;
366 if( char_class & X_SLASH ) if( c == '/' ) return VLC_TRUE;
367 if( char_class & X_QMARK ) if( c == '!' ) return VLC_TRUE;
368 if( char_class & X_DASH ) if( c == '-' ) return VLC_TRUE;
373 static int xtag_index( XTagParser *parser, int char_class )
375 char *s = parser->start;
378 for( i = 0; s[i] && s != parser->end; i++ )
380 if( xtag_cin( s[i], char_class ) ) return i;
386 static void xtag_skip_over( XTagParser *parser, int char_class )
388 char *s = parser->start;
391 if( !parser->valid ) return;
393 for( i = 0; s[i] && s != parser->end; i++ )
395 if( !xtag_cin( s[i], char_class ) )
397 parser->start = &s[i];
405 static void xtag_skip_whitespace( XTagParser * parser )
407 xtag_skip_over( parser, X_WHITESPACE );
410 static char *xtag_slurp_to( XTagParser *parser, int good_end, int bad_end )
412 char *ret, *s = parser->start;
415 if( !parser->valid ) return NULL;
417 xi = xtag_index( parser, good_end | bad_end );
419 if( xi > 0 && xtag_cin (s[xi], good_end) )
421 ret = malloc( (xi+1) * sizeof(char) );
422 strncpy( ret, s, xi );
424 parser->start = &s[xi];
431 static int xtag_assert_and_pass( XTagParser *parser, int char_class )
433 char *s = parser->start;
435 if( !parser->valid ) return VLC_FALSE;
437 if( !xtag_cin( s[0], char_class ) )
439 parser->valid = VLC_FALSE;
443 parser->start = &s[1];
448 static char *xtag_slurp_quoted( XTagParser *parser )
451 int quote = X_DQUOTE; /* quote char to match on */
454 if( !parser->valid ) return NULL;
456 xtag_skip_whitespace( parser );
460 if( xtag_cin( s[0], X_SQUOTE ) ) quote = X_SQUOTE;
462 if( !xtag_assert_and_pass( parser, quote ) ) return NULL;
466 for( xi = 0; s[xi]; xi++ )
468 if( xtag_cin( s[xi], quote ) )
470 if( !(xi > 1 && s[xi-1] == '\\') ) break;
474 ret = malloc( (xi+1) * sizeof(char) );
475 strncpy( ret, s, xi );
477 parser->start = &s[xi];
479 if( !xtag_assert_and_pass( parser, quote ) ) return NULL;
484 static XAttribute *xtag_parse_attribute( XTagParser *parser )
490 if( !parser->valid ) return NULL;
492 xtag_skip_whitespace( parser );
494 name = xtag_slurp_to( parser, X_WHITESPACE|X_EQUAL, X_SLASH|X_CLOSETAG );
495 if( name == NULL ) return NULL;
497 xtag_skip_whitespace( parser );
500 if( !xtag_assert_and_pass( parser, X_EQUAL ) )
503 printf( "xtag: attr failed EQUAL on <%s>\n", name );
508 xtag_skip_whitespace( parser );
510 value = xtag_slurp_quoted( parser );
515 printf ("Got NULL quoted attribute value\n");
520 attr = malloc( sizeof (*attr) );
527 parser->valid = VLC_FALSE;
531 static XTag *xtag_parse_tag( XTagParser *parser )
539 if( !parser->valid ) return NULL;
541 #if 0 /* Do we really want all the whitespace pcdata ? */
542 xtag_skip_whitespace( parser );
545 if( (pcdata = xtag_slurp_to( parser, X_OPENTAG, X_NONE )) != NULL )
547 tag = malloc( sizeof(*tag) );
549 tag->pcdata = pcdata;
550 tag->parent = parser->current_tag;
551 tag->attributes = NULL;
552 tag->children = NULL;
553 tag->current_child = NULL;
560 /* if this starts a close tag, return NULL and let the parent take it */
561 if( xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_SLASH ) )
564 /* if this starts a comment tag, skip until end */
565 if( xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_QMARK ) &&
566 xtag_cin( s[2], X_DASH ) && xtag_cin( s[3], X_DASH ) )
570 parser->start = s = &s[4];
572 while( (xi = xtag_index( parser, X_DASH )) >= 0 )
574 parser->start = s = &s[xi+1];
576 if( xtag_cin( s[0], X_DASH ) && xtag_cin( s[1], X_CLOSETAG ) )
578 parser->start = &s[2];
579 xtag_skip_whitespace( parser );
580 return xtag_parse_tag( parser );
587 /* FIXME: if this starts a DOCTYPE tag, skip until end */
588 if( xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_QMARK ) )
590 int xi = xtag_index( parser, X_CLOSETAG );
591 if( xi <= 0 ) return NULL;
593 parser->start = &s[xi+1];
594 xtag_skip_whitespace( parser );
595 return xtag_parse_tag( parser );
598 if( !xtag_assert_and_pass( parser, X_OPENTAG ) ) return NULL;
600 name = xtag_slurp_to( parser, X_WHITESPACE|X_SLASH|X_CLOSETAG, X_NONE );
601 if( name == NULL ) return NULL;
604 printf ("<%s ...\n", name);
607 tag = malloc( sizeof(*tag) );
610 tag->parent = parser->current_tag;
611 tag->attributes = NULL;
612 tag->children = NULL;
613 tag->current_child = NULL;
617 if( xtag_cin( s[0], X_WHITESPACE ) )
619 while( (attr = xtag_parse_attribute( parser )) != NULL )
621 tag->attributes = xlist_append( tag->attributes, attr );
625 xtag_skip_whitespace( parser );
629 if( xtag_cin( s[0], X_CLOSETAG ) )
631 parser->current_tag = tag;
633 xtag_assert_and_pass( parser, X_CLOSETAG );
635 while( (inner = xtag_parse_tag( parser ) ) != NULL )
637 tag->children = xlist_append( tag->children, inner );
640 parser->current_tag = tag->parent;
641 xtag_skip_whitespace( parser );
643 xtag_assert_and_pass( parser, X_OPENTAG );
644 xtag_assert_and_pass( parser, X_SLASH );
645 name = xtag_slurp_to( parser, X_WHITESPACE | X_CLOSETAG, X_NONE );
648 if( strcmp( name, tag->name ) )
651 printf ("got %s expected %s\n", name, tag->name);
653 parser->valid = VLC_FALSE;
658 xtag_skip_whitespace( parser );
659 xtag_assert_and_pass( parser, X_CLOSETAG );
664 xtag_assert_and_pass( parser, X_SLASH );
665 xtag_assert_and_pass( parser, X_CLOSETAG );
671 static XTag *xtag_free( XTag *xtag )
677 if( xtag == NULL ) return NULL;
679 if( xtag->name ) free( xtag->name );
680 if( xtag->pcdata ) free( xtag->pcdata );
682 for( l = xtag->attributes; l; l = l->next )
684 if( (attr = (XAttribute *)l->data) != NULL )
686 if( attr->name ) free( attr->name );
687 if( attr->value ) free( attr->value );
691 xlist_free( xtag->attributes );
693 for( l = xtag->children; l; l = l->next )
695 child = (XTag *)l->data;
698 xlist_free( xtag->children );
705 static XTag *xtag_new_parse( const char *s, int n )
708 XTag *tag, *ttag, *wrapper;
710 parser.valid = VLC_TRUE;
711 parser.current_tag = NULL;
712 parser.start = (char *)s;
714 if( n == -1 ) parser.end = NULL;
715 else if( n == 0 ) return NULL;
716 else parser.end = (char *)&s[n];
718 tag = xtag_parse_tag( &parser );
726 if( (ttag = xtag_parse_tag( &parser )) != NULL )
734 wrapper = malloc( sizeof(XTag) );
735 wrapper->name = NULL;
736 wrapper->pcdata = NULL;
737 wrapper->parent = NULL;
738 wrapper->attributes = NULL;
739 wrapper->children = NULL;
740 wrapper->current_child = NULL;
742 wrapper->children = xlist_append( wrapper->children, tag );
743 wrapper->children = xlist_append( wrapper->children, ttag );
745 while( (ttag = xtag_parse_tag( &parser )) != NULL )
753 wrapper->children = xlist_append( wrapper->children, ttag );
761 static char *xtag_get_name( XTag *xtag )
763 return xtag ? xtag->name : NULL;
766 static char *xtag_get_pcdata( XTag *xtag )
771 if( xtag == NULL ) return NULL;
773 for( l = xtag->children; l; l = l->next )
775 child = (XTag *)l->data;
776 if( child->pcdata != NULL )
778 return child->pcdata;
785 static char *xtag_get_attribute( XTag *xtag, char *attribute )
790 if( xtag == NULL ) return NULL;
792 for( l = xtag->attributes; l; l = l->next )
794 if( (attr = (XAttribute *)l->data) != NULL )
796 if( !strcmp( attr->name, attribute ) ) return attr->value;
803 static XTag *xtag_first_child( XTag *xtag, char *name )
808 if( xtag == NULL ) return NULL;
809 if( (l = xtag->children) == NULL ) return NULL;
813 xtag->current_child = l;
814 return (XTag *)l->data;
817 for( ; l; l = l->next )
819 child = (XTag *)l->data;
821 if( !strcmp( child->name, name ) )
823 xtag->current_child = l;
828 xtag->current_child = NULL;
833 static XTag *xtag_next_child( XTag *xtag, char *name )
838 if( xtag == NULL ) return NULL;
840 if( (l = xtag->current_child) == NULL )
841 return xtag_first_child( xtag, name );
843 if( (l = l->next) == NULL ) return NULL;
847 xtag->current_child = l;
848 return (XTag *)l->data;
851 for( ; l; l = l->next )
853 child = (XTag *)l->data;
855 if( !strcmp( child->name, name ) )
857 xtag->current_child = l;
862 xtag->current_child = NULL;
868 * This snprints function takes a variable list of char *, the last of
869 * which must be NULL, and prints each in turn to buf.
870 * Returns C99-style total length that would have been written, even if
871 * this is larger than n.
873 static int xtag_snprints( char *buf, int n, ... )
877 int len, to_copy, total = 0;
881 for( s = va_arg( ap, char * ); s; s = va_arg( ap, char *) )
885 if( (to_copy = __MIN(n, len) ) > 0 )
887 memcpy( buf, s, to_copy );
900 static int xtag_snprint( char *buf, int n, XTag *xtag )
908 buf += __MIN(n, N); \
914 if( n > 0 ) buf[0] = '\0';
920 nn = xtag_snprints( buf, n, xtag->pcdata, NULL );
928 nn = xtag_snprints( buf, n, "<", xtag->name, NULL );
931 for( l = xtag->attributes; l; l = l->next )
933 attr = (XAttribute *)l->data;
935 nn = xtag_snprints( buf, n, " ", attr->name, "=\"", attr->value,
940 if( xtag->children == NULL )
942 nn = xtag_snprints ( buf, n, "/>", NULL );
948 nn = xtag_snprints( buf, n, ">", NULL );
952 for( l = xtag->children; l; l = l->next )
954 child = (XTag *)l->data;
956 nn = xtag_snprint( buf, n, child );
962 nn = xtag_snprints( buf, n, "</", xtag->name, ">", NULL );