1 /*****************************************************************************
2 * xtag.c : a trivial parser for XML-like tags
3 *****************************************************************************
4 * Copyright (C) 2003-2004 Commonwealth Scientific and Industrial Research
5 * Organisation (CSIRO) Australia
6 * Copyright (C) 2000-2004 the VideoLAN team
10 * Authors: Conrad Parker <Conrad.Parker@csiro.au>
11 * Andre Pang <Andre.Pang@csiro.au>
12 * Gildas Bazin <gbazin@videolan.org>
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
24 * You should have received a copy of the GNU General Public License
25 * along with this program; if not, write to the Free Software
26 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
27 *****************************************************************************/
32 #include "vlc_block.h"
33 #include "vlc_stream.h"
49 * struct XTag is kind of a union ... it normally represents a whole
50 * tag (and its children), but it could alternatively represent some
51 * PCDATA. Basically, if tag->pcdata is non-NULL, interpret only it and
52 * ignore the name, attributes and inner_tags.
64 typedef struct _XAttribute
70 typedef struct _XTagParser
72 int valid; /* boolean */
78 /*****************************************************************************
80 *****************************************************************************/
81 static int Open ( vlc_object_t * );
82 static void Close( vlc_object_t * );
85 set_description( _("Simple XML Parser") );
86 set_capability( "xml", 5 );
87 set_callbacks( Open, Close );
90 struct xml_reader_sys_t
92 XTag *p_root; /* Root tag */
93 XTag *p_curtag; /* Current tag */
94 XList *p_curattr; /* Current attribute */
98 static xml_reader_t *ReaderCreate( xml_t *, stream_t * );
99 static void ReaderDelete( xml_reader_t * );
100 static int ReaderRead( xml_reader_t * );
101 static int ReaderNodeType( xml_reader_t * );
102 static char *ReaderName( xml_reader_t * );
103 static char *ReaderValue( xml_reader_t * );
104 static int ReaderNextAttr( xml_reader_t * );
106 static int ReaderUseDTD ( xml_reader_t *, vlc_bool_t );
108 static void CatalogLoad( xml_t *, const char * );
109 static void CatalogAdd( xml_t *, const char *, const char *, const char * );
111 static XTag *xtag_new_parse( const char *, int );
112 static char *xtag_get_name( XTag * );
113 static char *xtag_get_pcdata( XTag * );
114 static char *xtag_get_attribute( XTag *, char * );
115 static XTag *xtag_first_child( XTag *, char * );
116 static XTag *xtag_next_child( XTag *, char * );
117 static XTag *xtag_free( XTag * );
118 static int xtag_snprint( char *, int, XTag * );
120 /*****************************************************************************
121 * Module initialization
122 *****************************************************************************/
123 static int Open( vlc_object_t *p_this )
125 xml_t *p_xml = (xml_t *)p_this;
127 p_xml->pf_reader_create = ReaderCreate;
128 p_xml->pf_reader_delete = ReaderDelete;
130 p_xml->pf_catalog_load = CatalogLoad;
131 p_xml->pf_catalog_add = CatalogAdd;
136 /*****************************************************************************
137 * Module deinitialization
138 *****************************************************************************/
139 static void Close( vlc_object_t *p_this )
144 /*****************************************************************************
145 * Catalogue functions
146 *****************************************************************************/
147 static void CatalogLoad( xml_t *p_xml, const char *psz_filename )
149 msg_Dbg( p_xml, "catalog support not implemented" );
152 static void CatalogAdd( xml_t *p_xml, const char *psz_arg1,
153 const char *psz_arg2, const char *psz_filename )
157 /*****************************************************************************
159 *****************************************************************************/
160 static xml_reader_t *ReaderCreate( xml_t *p_xml, stream_t *s )
162 xml_reader_t *p_reader;
163 char *p_buffer, *p_new;
164 int i_size, i_pos = 0, i_buffer = 2048;
167 /* Open and read file */
168 p_buffer = malloc( i_buffer );
169 if( p_buffer == NULL ) {
170 msg_Err( p_xml, "out of memory" );
174 while( ( i_size = stream_Read( s, &p_buffer[i_pos], 2048 ) ) == 2048 )
178 p_new = realloc( p_buffer, i_buffer );
180 msg_Err( p_xml, "out of memory" );
186 p_buffer[ i_pos + i_size ] = 0; /* 0 terminated string */
188 if( i_pos + i_size == 0 )
190 msg_Dbg( p_xml, "empty XML" );
195 p_root = xtag_new_parse( p_buffer, i_buffer );
198 msg_Warn( p_xml, "couldn't parse XML" );
204 p_reader = malloc( sizeof(xml_reader_t) );
205 p_reader->p_sys = malloc( sizeof(xml_reader_sys_t) );
206 p_reader->p_sys->p_root = p_root;
207 p_reader->p_sys->p_curtag = NULL;
208 p_reader->p_sys->p_curattr = NULL;
209 p_reader->p_sys->b_endtag = VLC_FALSE;
210 p_reader->p_xml = p_xml;
212 p_reader->pf_read = ReaderRead;
213 p_reader->pf_node_type = ReaderNodeType;
214 p_reader->pf_name = ReaderName;
215 p_reader->pf_value = ReaderValue;
216 p_reader->pf_next_attr = ReaderNextAttr;
217 p_reader->pf_use_dtd = ReaderUseDTD;
222 static void ReaderDelete( xml_reader_t *p_reader )
224 xtag_free( p_reader->p_sys->p_root );
225 free( p_reader->p_sys );
229 static int ReaderUseDTD ( xml_reader_t *p_reader, vlc_bool_t b_use )
234 static int ReaderRead( xml_reader_t *p_reader )
238 if( !p_reader->p_sys->p_curtag )
240 p_reader->p_sys->p_curtag = p_reader->p_sys->p_root;
246 if( (p_child = xtag_next_child( p_reader->p_sys->p_curtag, 0 )) )
248 p_reader->p_sys->p_curtag = p_child;
249 p_reader->p_sys->p_curattr = 0;
250 p_reader->p_sys->b_endtag = VLC_FALSE;
254 if( p_reader->p_sys->p_curtag->name && /* no end tag for pcdata */
255 !p_reader->p_sys->b_endtag )
257 p_reader->p_sys->b_endtag = VLC_TRUE;
261 p_reader->p_sys->b_endtag = VLC_FALSE;
262 if( !p_reader->p_sys->p_curtag->parent ) return 0;
263 p_reader->p_sys->p_curtag = p_reader->p_sys->p_curtag->parent;
269 static int ReaderNodeType( xml_reader_t *p_reader )
271 if( p_reader->p_sys->p_curtag->name &&
272 p_reader->p_sys->b_endtag ) return XML_READER_ENDELEM;
273 if( p_reader->p_sys->p_curtag->name ) return XML_READER_STARTELEM;
274 if( p_reader->p_sys->p_curtag->pcdata ) return XML_READER_TEXT;
275 return XML_READER_NONE;
278 static char *ReaderName( xml_reader_t *p_reader )
280 const char *psz_name;
282 if( !p_reader->p_sys->p_curattr )
284 psz_name = xtag_get_name( p_reader->p_sys->p_curtag );
286 printf( "TAG: %s\n", psz_name );
290 psz_name = ((XAttribute *)p_reader->p_sys->p_curattr->data)->name;
292 if( psz_name ) return strdup( psz_name );
296 static char *ReaderValue( xml_reader_t *p_reader )
298 const char *psz_name;
299 if( p_reader->p_sys->p_curtag->pcdata )
302 printf( "%s\n", p_reader->p_sys->p_curtag->pcdata );
304 return strdup( p_reader->p_sys->p_curtag->pcdata );
307 if( !p_reader->p_sys->p_curattr ) return 0;
310 printf( "%s=%s\n", ((XAttribute *)p_reader->p_sys->p_curattr->data)->name,
311 ((XAttribute *)p_reader->p_sys->p_curattr->data)->value );
314 psz_name = ((XAttribute *)p_reader->p_sys->p_curattr->data)->value;
316 if( psz_name ) return strdup( psz_name );
320 static int ReaderNextAttr( xml_reader_t *p_reader )
322 if( !p_reader->p_sys->p_curattr )
323 p_reader->p_sys->p_curattr = p_reader->p_sys->p_curtag->attributes;
324 else if( p_reader->p_sys->p_curattr )
325 p_reader->p_sys->p_curattr = p_reader->p_sys->p_curattr->next;
327 if( p_reader->p_sys->p_curattr ) return VLC_SUCCESS;
328 else return VLC_EGENERIC;
331 /*****************************************************************************
332 * XTAG parser functions
333 *****************************************************************************/
335 static XList *xlist_append( XList *list, void *data )
339 l = (XList *)malloc( sizeof(XList) );
340 l->prev = l->next = NULL;
343 if( list == NULL ) return l;
345 for( last = list; last; last = last->next )
346 if( last->next == NULL ) break;
348 if( last ) last->next = l;
353 static void xlist_free( XList *list )
357 for( l = list; l; l = ln )
364 /* Character classes */
366 #define X_WHITESPACE 1<<0
367 #define X_OPENTAG 1<<1
368 #define X_CLOSETAG 1<<2
369 #define X_DQUOTE 1<<3
370 #define X_SQUOTE 1<<4
377 static int xtag_cin( char c, int char_class )
379 if( char_class & X_WHITESPACE ) if( isspace(c) ) return VLC_TRUE;
380 if( char_class & X_OPENTAG ) if( c == '<' ) return VLC_TRUE;
381 if( char_class & X_CLOSETAG ) if( c == '>' ) return VLC_TRUE;
382 if( char_class & X_DQUOTE ) if( c == '"' ) return VLC_TRUE;
383 if( char_class & X_SQUOTE ) if( c == '\'' ) return VLC_TRUE;
384 if( char_class & X_EQUAL ) if( c == '=' ) return VLC_TRUE;
385 if( char_class & X_SLASH ) if( c == '/' ) return VLC_TRUE;
386 if( char_class & X_QMARK ) if( c == '?' ) return VLC_TRUE;
387 if( char_class & X_DASH ) if( c == '-' ) return VLC_TRUE;
388 if( char_class & X_EMARK ) if( c == '!' ) return VLC_TRUE;
393 static int xtag_index( XTagParser *parser, int char_class )
395 char *s = parser->start;
398 for( i = 0; s[i] && s != parser->end; i++ )
400 if( xtag_cin( s[i], char_class ) ) return i;
406 static void xtag_skip_over( XTagParser *parser, int char_class )
408 char *s = parser->start;
411 if( !parser->valid ) return;
413 for( i = 0; s[i] && s != parser->end; i++ )
415 if( !xtag_cin( s[i], char_class ) )
417 parser->start = &s[i];
425 static void xtag_skip_whitespace( XTagParser * parser )
427 xtag_skip_over( parser, X_WHITESPACE );
430 static char *xtag_slurp_to( XTagParser *parser, int good_end, int bad_end )
432 char *ret, *s = parser->start;
435 if( !parser->valid ) return NULL;
437 xi = xtag_index( parser, good_end | bad_end );
439 if( xi > 0 && xtag_cin (s[xi], good_end) )
441 ret = malloc( (xi+1) * sizeof(char) );
442 strncpy( ret, s, xi );
444 parser->start = &s[xi];
451 static int xtag_assert_and_pass( XTagParser *parser, int char_class )
453 char *s = parser->start;
455 if( !parser->valid ) return VLC_FALSE;
457 if( !xtag_cin( s[0], char_class ) )
459 parser->valid = VLC_FALSE;
463 parser->start = &s[1];
468 static char *xtag_slurp_quoted( XTagParser *parser )
471 int quote = X_DQUOTE; /* quote char to match on */
474 if( !parser->valid ) return NULL;
476 xtag_skip_whitespace( parser );
480 if( xtag_cin( s[0], X_SQUOTE ) ) quote = X_SQUOTE;
482 if( !xtag_assert_and_pass( parser, quote ) ) return NULL;
486 for( xi = 0; s[xi]; xi++ )
488 if( xtag_cin( s[xi], quote ) )
490 if( !(xi > 1 && s[xi-1] == '\\') ) break;
494 ret = malloc( (xi+1) * sizeof(char) );
495 strncpy( ret, s, xi );
497 parser->start = &s[xi];
499 if( !xtag_assert_and_pass( parser, quote ) ) return NULL;
504 static XAttribute *xtag_parse_attribute( XTagParser *parser )
510 if( !parser->valid ) return NULL;
512 xtag_skip_whitespace( parser );
514 name = xtag_slurp_to( parser, X_WHITESPACE|X_EQUAL, X_SLASH|X_CLOSETAG );
515 if( name == NULL ) return NULL;
517 xtag_skip_whitespace( parser );
520 if( !xtag_assert_and_pass( parser, X_EQUAL ) )
523 printf( "xtag: attr failed EQUAL on <%s>\n", name );
528 xtag_skip_whitespace( parser );
530 value = xtag_slurp_quoted( parser );
535 printf ("Got NULL quoted attribute value\n");
540 attr = malloc( sizeof (*attr) );
547 parser->valid = VLC_FALSE;
551 static XTag *xtag_parse_tag( XTagParser *parser )
560 if( !parser->valid ) return NULL;
564 /* if this starts a comment tag, skip until end */
565 if( (parser->end - parser->start) > 7 &&
566 xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_EMARK ) &&
567 xtag_cin( s[2], X_DASH ) && xtag_cin( s[3], X_DASH ) )
569 parser->start = s = &s[4];
570 while( (xi = xtag_index( parser, X_DASH )) >= 0 )
572 parser->start = s = &s[xi+1];
573 if( xtag_cin( s[0], X_DASH ) && xtag_cin( s[1], X_CLOSETAG ) )
575 parser->start = &s[2];
576 xtag_skip_whitespace( parser );
577 return xtag_parse_tag( parser );
583 /* ignore processing instructions '<?' ... '?>' */
584 if( (parser->end - parser->start) > 4 &&
585 xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_QMARK ) )
587 parser->start = s = &s[2];
588 while ((xi = xtag_index( parser, X_QMARK )) >= 0) {
589 if (xtag_cin( s[xi+1], X_CLOSETAG )) {
590 parser->start = &s[xi+2];
591 xtag_skip_whitespace( parser );
592 return xtag_parse_tag( parser );
598 /* ignore doctype '<!DOCTYPE' ... '>' */
599 if ( (parser->end - parser->start) > 8 &&
600 !strncmp( s, "<!DOCTYPE", 9 ) ) {
601 xi = xtag_index( parser, X_CLOSETAG );
603 parser->start = s = &s[xi+1];
604 xtag_skip_whitespace( parser );
605 return xtag_parse_tag( parser );
612 if( (pcdata = xtag_slurp_to( parser, X_OPENTAG, X_NONE )) != NULL )
614 tag = malloc( sizeof(*tag) );
616 tag->pcdata = pcdata;
617 tag->parent = parser->current_tag;
618 tag->attributes = NULL;
619 tag->children = NULL;
620 tag->current_child = NULL;
625 /* if this starts a close tag, return NULL and let the parent take it */
626 if( xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_SLASH ) )
629 /* parse CDATA content */
630 if ( (parser->end - parser->start) > 8 &&
631 !strncmp( s, "<![CDATA[", 9 ) ) {
632 parser->start = s = &s[9];
633 while (parser->end - s > 2) {
634 if (strncmp( s, "]]>", 3 ) == 0) {
635 if ( !(tag = malloc( sizeof(*tag))) ) return NULL;
636 if ( !(pcdata = malloc( sizeof(char)*(s - parser->start + 1))) ) return NULL;
637 strncpy( pcdata, parser->start, s - parser->start );
638 pcdata[s - parser->start]='\0';
639 parser->start = s = &s[3];
641 tag->pcdata = pcdata;
642 tag->parent = parser->current_tag;
643 tag->attributes = NULL;
644 tag->children = NULL;
645 tag->current_child = NULL;
655 if( !xtag_assert_and_pass( parser, X_OPENTAG ) ) return NULL;
657 name = xtag_slurp_to( parser, X_WHITESPACE|X_SLASH|X_CLOSETAG, X_NONE );
658 if( name == NULL ) return NULL;
661 printf ("<%s ...\n", name);
664 tag = malloc( sizeof(*tag) );
667 tag->parent = parser->current_tag;
668 tag->attributes = NULL;
669 tag->children = NULL;
670 tag->current_child = NULL;
674 if( xtag_cin( s[0], X_WHITESPACE ) )
676 while( (attr = xtag_parse_attribute( parser )) != NULL )
678 tag->attributes = xlist_append( tag->attributes, attr );
682 xtag_skip_whitespace( parser );
686 if( xtag_cin( s[0], X_CLOSETAG ) )
688 parser->current_tag = tag;
690 xtag_assert_and_pass( parser, X_CLOSETAG );
692 while( (inner = xtag_parse_tag( parser ) ) != NULL )
694 tag->children = xlist_append( tag->children, inner );
697 parser->current_tag = tag->parent;
698 xtag_skip_whitespace( parser );
700 xtag_assert_and_pass( parser, X_OPENTAG );
701 xtag_assert_and_pass( parser, X_SLASH );
702 name = xtag_slurp_to( parser, X_WHITESPACE | X_CLOSETAG, X_NONE );
705 if( strcmp( name, tag->name ) )
708 printf ("got %s expected %s\n", name, tag->name);
710 parser->valid = VLC_FALSE;
715 xtag_skip_whitespace( parser );
716 xtag_assert_and_pass( parser, X_CLOSETAG );
717 xtag_skip_whitespace( parser );
721 xtag_assert_and_pass( parser, X_SLASH );
722 xtag_assert_and_pass( parser, X_CLOSETAG );
723 xtag_skip_whitespace( parser );
729 static XTag *xtag_free( XTag *xtag )
735 if( xtag == NULL ) return NULL;
737 if( xtag->name ) free( xtag->name );
738 if( xtag->pcdata ) free( xtag->pcdata );
740 for( l = xtag->attributes; l; l = l->next )
742 if( (attr = (XAttribute *)l->data) != NULL )
744 if( attr->name ) free( attr->name );
745 if( attr->value ) free( attr->value );
749 xlist_free( xtag->attributes );
751 for( l = xtag->children; l; l = l->next )
753 child = (XTag *)l->data;
756 xlist_free( xtag->children );
763 static XTag *xtag_new_parse( const char *s, int n )
766 XTag *tag, *ttag, *wrapper;
768 parser.valid = VLC_TRUE;
769 parser.current_tag = NULL;
770 parser.start = (char *)s;
772 if( n == -1 ) parser.end = NULL;
776 printf ("empty buffer");
780 else parser.end = (char *)&s[n];
782 /* can't have whitespace pcdata outside rootnode */
783 xtag_skip_whitespace( &parser );
785 tag = xtag_parse_tag( &parser );
790 printf ("invalid file");
796 if( (ttag = xtag_parse_tag( &parser )) != NULL )
804 wrapper = malloc( sizeof(XTag) );
805 wrapper->name = NULL;
806 wrapper->pcdata = NULL;
807 wrapper->parent = NULL;
808 wrapper->attributes = NULL;
809 wrapper->children = NULL;
810 wrapper->current_child = NULL;
812 wrapper->children = xlist_append( wrapper->children, tag );
813 wrapper->children = xlist_append( wrapper->children, ttag );
815 while( (ttag = xtag_parse_tag( &parser )) != NULL )
823 wrapper->children = xlist_append( wrapper->children, ttag );
831 static char *xtag_get_name( XTag *xtag )
833 return xtag ? xtag->name : NULL;
836 static char *xtag_get_pcdata( XTag *xtag )
841 if( xtag == NULL ) return NULL;
843 for( l = xtag->children; l; l = l->next )
845 child = (XTag *)l->data;
846 if( child->pcdata != NULL )
848 return child->pcdata;
855 static char *xtag_get_attribute( XTag *xtag, char *attribute )
860 if( xtag == NULL ) return NULL;
862 for( l = xtag->attributes; l; l = l->next )
864 if( (attr = (XAttribute *)l->data) != NULL )
866 if( !strcmp( attr->name, attribute ) ) return attr->value;
873 static XTag *xtag_first_child( XTag *xtag, char *name )
878 if( xtag == NULL ) return NULL;
879 if( (l = xtag->children) == NULL ) return NULL;
883 xtag->current_child = l;
884 return (XTag *)l->data;
887 for( ; l; l = l->next )
889 child = (XTag *)l->data;
891 if( !strcmp( child->name, name ) )
893 xtag->current_child = l;
898 xtag->current_child = NULL;
903 static XTag *xtag_next_child( XTag *xtag, char *name )
908 if( xtag == NULL ) return NULL;
910 if( (l = xtag->current_child) == NULL )
911 return xtag_first_child( xtag, name );
913 if( (l = l->next) == NULL ) return NULL;
917 xtag->current_child = l;
918 return (XTag *)l->data;
921 for( ; l; l = l->next )
923 child = (XTag *)l->data;
925 if( !strcmp( child->name, name ) )
927 xtag->current_child = l;
932 xtag->current_child = NULL;
938 * This snprints function takes a variable list of char *, the last of
939 * which must be NULL, and prints each in turn to buf.
940 * Returns C99-style total length that would have been written, even if
941 * this is larger than n.
943 static int xtag_snprints( char *buf, int n, ... )
947 int len, to_copy, total = 0;
951 for( s = va_arg( ap, char * ); s; s = va_arg( ap, char *) )
955 if( (to_copy = __MIN(n, len) ) > 0 )
957 memcpy( buf, s, to_copy );
970 static int xtag_snprint( char *buf, int n, XTag *xtag )
978 buf += __MIN(n, N); \
984 if( n > 0 ) buf[0] = '\0';
990 nn = xtag_snprints( buf, n, xtag->pcdata, NULL );
998 nn = xtag_snprints( buf, n, "<", xtag->name, NULL );
1001 for( l = xtag->attributes; l; l = l->next )
1003 attr = (XAttribute *)l->data;
1005 nn = xtag_snprints( buf, n, " ", attr->name, "=\"", attr->value,
1010 if( xtag->children == NULL )
1012 nn = xtag_snprints ( buf, n, "/>", NULL );
1018 nn = xtag_snprints( buf, n, ">", NULL );
1022 for( l = xtag->children; l; l = l->next )
1024 child = (XTag *)l->data;
1026 nn = xtag_snprint( buf, n, child );
1032 nn = xtag_snprints( buf, n, "</", xtag->name, ">", NULL );