1 /*****************************************************************************
2 * xtag.c : a trivial parser for XML-like tags
3 *****************************************************************************
4 * Copyright (C) 2003-2004 Commonwealth Scientific and Industrial Research
5 * Organisation (CSIRO) Australia
6 * Copyright (C) 2000-2004 the VideoLAN team
10 * Authors: Conrad Parker <Conrad.Parker@csiro.au>
11 * Andre Pang <Andre.Pang@csiro.au>
12 * Gildas Bazin <gbazin@videolan.org>
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
24 * You should have received a copy of the GNU General Public License
25 * along with this program; if not, write to the Free Software
26 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
27 *****************************************************************************/
33 #include <vlc_common.h>
34 #include <vlc_plugin.h>
37 #include <vlc_block.h>
38 #include <vlc_stream.h>
39 #include <vlc_memory.h>
54 * struct XTag is kind of a union ... it normally represents a whole
55 * tag (and its children), but it could alternatively represent some
56 * PCDATA. Basically, if tag->pcdata is non-NULL, interpret only it and
57 * ignore the name, attributes and inner_tags.
69 typedef struct _XAttribute
75 typedef struct _XTagParser
77 int valid; /* boolean */
83 /*****************************************************************************
85 *****************************************************************************/
86 static int ReaderOpen( vlc_object_t * );
87 static void ReaderClose( vlc_object_t * );
90 set_description( N_("Simple XML Parser") )
91 set_capability( "xml reader", 5 )
92 set_callbacks( ReaderOpen, ReaderClose )
95 struct xml_reader_sys_t
97 XTag *p_root; /* Root tag */
98 XTag *p_curtag; /* Current tag */
99 XList *p_curattr; /* Current attribute */
103 static int ReaderRead( xml_reader_t * );
104 static int ReaderNodeType( xml_reader_t * );
105 static char *ReaderName( xml_reader_t * );
106 static char *ReaderValue( xml_reader_t * );
107 static int ReaderNextAttr( xml_reader_t * );
109 static int ReaderUseDTD ( xml_reader_t * );
111 static XTag *xtag_new_parse( const char *, int );
112 static char *xtag_get_name( XTag * );
114 static char *xtag_get_pcdata( XTag * );
115 static char *xtag_get_attribute( XTag *, char * );
117 static XTag *xtag_first_child( XTag *, char * );
118 static XTag *xtag_next_child( XTag *, char * );
119 static void xtag_free( XTag * );
121 static int xtag_snprint( char *, int, XTag * );
124 /*****************************************************************************
126 *****************************************************************************/
127 static int ReaderOpen( vlc_object_t *p_this )
129 xml_reader_t *p_reader = (xml_reader_t *)p_this;
130 stream_t *s = p_reader->p_stream;
132 int i_size, i_pos = 0, i_buffer = 2048;
135 /* Open and read file */
136 p_buffer = malloc( i_buffer );
137 if( p_buffer == NULL )
140 while( ( i_size = stream_Read( s, &p_buffer[i_pos], 2048 ) ) == 2048 )
144 p_buffer = realloc_or_free( p_buffer, i_buffer );
148 if( i_pos + i_size == 0 )
150 msg_Dbg( p_this, "empty XML" );
154 p_buffer[ i_pos + i_size ] = '\0'; /* 0 terminated string */
156 p_root = xtag_new_parse( p_buffer, i_buffer );
160 msg_Warn( p_this, "couldn't parse XML" );
164 p_reader->p_sys = malloc( sizeof(xml_reader_sys_t) );
165 if( !p_reader->p_sys )
170 p_reader->p_sys->p_root = p_root;
171 p_reader->p_sys->p_curtag = NULL;
172 p_reader->p_sys->p_curattr = NULL;
173 p_reader->p_sys->b_endtag = false;
175 p_reader->pf_read = ReaderRead;
176 p_reader->pf_node_type = ReaderNodeType;
177 p_reader->pf_name = ReaderName;
178 p_reader->pf_value = ReaderValue;
179 p_reader->pf_next_attr = ReaderNextAttr;
180 p_reader->pf_use_dtd = ReaderUseDTD;
185 static void ReaderClose( vlc_object_t *p_this )
187 xml_reader_t *p_reader = (xml_reader_t *)p_this;
189 xtag_free( p_reader->p_sys->p_root );
190 free( p_reader->p_sys );
193 static int ReaderUseDTD ( xml_reader_t *p_reader )
195 VLC_UNUSED(p_reader);
199 static int ReaderRead( xml_reader_t *p_reader )
203 if( !p_reader->p_sys->p_curtag )
205 p_reader->p_sys->p_curtag = p_reader->p_sys->p_root;
211 if( (p_child = xtag_next_child( p_reader->p_sys->p_curtag, 0 )) )
213 p_reader->p_sys->p_curtag = p_child;
214 p_reader->p_sys->p_curattr = NULL;
215 p_reader->p_sys->b_endtag = false;
219 if( p_reader->p_sys->p_curtag->name && /* no end tag for pcdata */
220 !p_reader->p_sys->b_endtag )
222 p_reader->p_sys->b_endtag = true;
226 p_reader->p_sys->b_endtag = false;
227 if( !p_reader->p_sys->p_curtag->parent ) return 0;
228 p_reader->p_sys->p_curtag = p_reader->p_sys->p_curtag->parent;
234 static int ReaderNodeType( xml_reader_t *p_reader )
236 if( p_reader->p_sys->p_curtag->name && p_reader->p_sys->b_endtag )
237 return XML_READER_ENDELEM;
238 if( p_reader->p_sys->p_curtag->name )
239 return XML_READER_STARTELEM;
240 if( p_reader->p_sys->p_curtag->pcdata )
241 return XML_READER_TEXT;
242 return XML_READER_NONE;
245 static char *ReaderName( xml_reader_t *p_reader )
247 const char *psz_name;
249 if( !p_reader->p_sys->p_curattr )
251 psz_name = xtag_get_name( p_reader->p_sys->p_curtag );
253 fprintf( stderr, "TAG: %s\n", psz_name );
257 psz_name = ((XAttribute *)p_reader->p_sys->p_curattr->data)->name;
259 return psz_name ? strdup( psz_name ) : NULL;
262 static char *ReaderValue( xml_reader_t *p_reader )
264 const char *psz_name;
265 if( p_reader->p_sys->p_curtag->pcdata )
268 fprintf( stderr, "%s\n", p_reader->p_sys->p_curtag->pcdata );
270 return strdup( p_reader->p_sys->p_curtag->pcdata );
273 if( !p_reader->p_sys->p_curattr ) return NULL;
276 fprintf( stderr, "%s=%s\n", ((XAttribute *)p_reader->p_sys->p_curattr->data)->name,
277 ((XAttribute *)p_reader->p_sys->p_curattr->data)->value );
280 psz_name = ((XAttribute *)p_reader->p_sys->p_curattr->data)->value;
282 return psz_name ? strdup( psz_name ) : NULL;
285 static int ReaderNextAttr( xml_reader_t *p_reader )
287 if( !p_reader->p_sys->p_curattr )
288 p_reader->p_sys->p_curattr = p_reader->p_sys->p_curtag->attributes;
289 else if( p_reader->p_sys->p_curattr )
290 p_reader->p_sys->p_curattr = p_reader->p_sys->p_curattr->next;
292 return p_reader->p_sys->p_curattr ? VLC_SUCCESS : VLC_EGENERIC;
295 /*****************************************************************************
296 * XTAG parser functions
297 *****************************************************************************/
299 static XList *xlist_append( XList *list, void *data )
303 l = (XList *)xmalloc( sizeof(XList) );
304 l->prev = l->next = NULL;
310 /* Find the last element */
320 static void xlist_free( XList *list )
324 for( l = list; l; l = ln )
331 /* Character classes */
333 #define X_WHITESPACE 1<<0
334 #define X_OPENTAG 1<<1
335 #define X_CLOSETAG 1<<2
336 #define X_DQUOTE 1<<3
337 #define X_SQUOTE 1<<4
344 static int xtag_cin( char c, int char_class )
346 if( char_class & X_WHITESPACE ) if( isspace(c) ) return true;
347 if( char_class & X_OPENTAG ) if( c == '<' ) return true;
348 if( char_class & X_CLOSETAG ) if( c == '>' ) return true;
349 if( char_class & X_DQUOTE ) if( c == '"' ) return true;
350 if( char_class & X_SQUOTE ) if( c == '\'' ) return true;
351 if( char_class & X_EQUAL ) if( c == '=' ) return true;
352 if( char_class & X_SLASH ) if( c == '/' ) return true;
353 if( char_class & X_QMARK ) if( c == '?' ) return true;
354 if( char_class & X_DASH ) if( c == '-' ) return true;
355 if( char_class & X_EMARK ) if( c == '!' ) return true;
360 static int xtag_index( XTagParser *parser, int char_class )
362 char *s = parser->start;
365 for( i = 0; s[i] && s != parser->end; i++ )
367 if( xtag_cin( s[i], char_class ) ) return i;
373 static void xtag_skip_over( XTagParser *parser, int char_class )
375 char *s = parser->start;
378 if( !parser->valid ) return;
380 for( i = 0; s[i] && s != parser->end; i++ )
382 if( !xtag_cin( s[i], char_class ) )
384 parser->start = &s[i];
392 static void xtag_skip_whitespace( XTagParser * parser )
394 xtag_skip_over( parser, X_WHITESPACE );
397 static char *xtag_slurp_to( XTagParser *parser, int good_end, int bad_end )
399 char *ret, *s = parser->start;
402 if( !parser->valid ) return NULL;
404 xi = xtag_index( parser, good_end | bad_end );
406 if( xi > 0 && xtag_cin (s[xi], good_end) )
408 ret = xmalloc( xi+1 );
409 strncpy( ret, s, xi );
411 parser->start = &s[xi];
418 static int xtag_assert_and_pass( XTagParser *parser, int char_class )
420 char *s = parser->start;
422 if( !parser->valid ) return false;
424 if( !xtag_cin( s[0], char_class ) )
426 parser->valid = false;
430 parser->start = &s[1];
435 static char *xtag_slurp_quoted( XTagParser *parser )
438 int quote = X_DQUOTE; /* quote char to match on */
441 if( !parser->valid ) return NULL;
443 xtag_skip_whitespace( parser );
447 if( xtag_cin( s[0], X_SQUOTE ) ) quote = X_SQUOTE;
449 if( !xtag_assert_and_pass( parser, quote ) ) return NULL;
453 for( xi = 0; s[xi]; xi++ )
455 if( xtag_cin( s[xi], quote ) )
457 if( !(xi > 1 && s[xi-1] == '\\') ) break;
461 ret = xmalloc( xi+1 );
462 strncpy( ret, s, xi );
464 parser->start = &s[xi];
466 if( !xtag_assert_and_pass( parser, quote ) )
475 static XAttribute *xtag_parse_attribute( XTagParser *parser )
484 xtag_skip_whitespace( parser );
486 name = xtag_slurp_to( parser, X_WHITESPACE|X_EQUAL, X_SLASH|X_CLOSETAG );
490 xtag_skip_whitespace( parser );
493 if( !xtag_assert_and_pass( parser, X_EQUAL ) )
496 fprintf( stderr, "xtag: attr failed EQUAL on <%s>\n", name );
501 xtag_skip_whitespace( parser );
503 value = xtag_slurp_quoted( parser );
508 fprintf (stderr, "Got NULL quoted attribute value\n");
513 attr = xmalloc( sizeof (*attr) );
520 parser->valid = false;
524 static XTag *xtag_parse_tag( XTagParser *parser )
533 if( !parser->valid ) return NULL;
537 /* if this starts a comment tag, skip until end */
538 if( (parser->end - parser->start) > 7 &&
539 xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_EMARK ) &&
540 xtag_cin( s[2], X_DASH ) && xtag_cin( s[3], X_DASH ) )
542 parser->start = s = &s[4];
543 while( (xi = xtag_index( parser, X_DASH )) >= 0 )
545 parser->start = s = &s[xi+1];
546 if( xtag_cin( s[0], X_DASH ) && xtag_cin( s[1], X_CLOSETAG ) )
548 parser->start = &s[2];
549 xtag_skip_whitespace( parser );
550 return xtag_parse_tag( parser );
556 /* ignore processing instructions '<?' ... '?>' */
557 if( (parser->end - parser->start) > 4 &&
558 xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_QMARK ) )
560 parser->start = s = &s[2];
561 while ((xi = xtag_index( parser, X_QMARK )) >= 0) {
562 if (xtag_cin( s[xi+1], X_CLOSETAG )) {
563 parser->start = &s[xi+2];
564 xtag_skip_whitespace( parser );
565 return xtag_parse_tag( parser );
571 /* ignore doctype '<!DOCTYPE' ... '>' */
572 if ( (parser->end - parser->start) > 8 &&
573 !strncmp( s, "<!DOCTYPE", 9 ) ) {
574 xi = xtag_index( parser, X_CLOSETAG );
576 parser->start = &s[xi+1];
577 xtag_skip_whitespace( parser );
578 return xtag_parse_tag( parser );
585 if( (pcdata = xtag_slurp_to( parser, X_OPENTAG, X_NONE )) != NULL )
587 tag = xmalloc( sizeof(*tag) );
589 tag->pcdata = pcdata;
590 tag->parent = parser->current_tag;
591 tag->attributes = NULL;
592 tag->children = NULL;
593 tag->current_child = NULL;
598 /* if this starts a close tag, return NULL and let the parent take it */
599 if( xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_SLASH ) )
602 /* parse CDATA content */
603 if ( (parser->end - parser->start) > 8 &&
604 !strncmp( s, "<![CDATA[", 9 ) ) {
605 parser->start = s = &s[9];
606 while (parser->end - s > 2) {
607 if (strncmp( s, "]]>", 3 ) == 0) {
608 if ( !(tag = malloc( sizeof(*tag))) ) return NULL;
609 if ( !(pcdata = malloc( s - parser->start + 1)) )
614 strncpy( pcdata, parser->start, s - parser->start );
615 pcdata[s - parser->start]='\0';
616 parser->start = &s[3];
618 tag->pcdata = pcdata;
619 tag->parent = parser->current_tag;
620 tag->attributes = NULL;
621 tag->children = NULL;
622 tag->current_child = NULL;
632 if( !xtag_assert_and_pass( parser, X_OPENTAG ) ) return NULL;
634 name = xtag_slurp_to( parser, X_WHITESPACE|X_SLASH|X_CLOSETAG, X_NONE );
635 if( name == NULL ) return NULL;
638 fprintf (stderr, "<%s ...\n", name);
641 tag = xmalloc( sizeof(*tag) );
644 tag->parent = parser->current_tag;
645 tag->attributes = NULL;
646 tag->children = NULL;
647 tag->current_child = NULL;
651 if( xtag_cin( s[0], X_WHITESPACE ) )
653 while( (attr = xtag_parse_attribute( parser )) != NULL )
655 tag->attributes = xlist_append( tag->attributes, attr );
659 xtag_skip_whitespace( parser );
663 if( xtag_cin( s[0], X_CLOSETAG ) )
665 parser->current_tag = tag;
667 xtag_assert_and_pass( parser, X_CLOSETAG );
669 while( (inner = xtag_parse_tag( parser ) ) != NULL )
671 tag->children = xlist_append( tag->children, inner );
674 parser->current_tag = tag->parent;
675 xtag_skip_whitespace( parser );
677 xtag_assert_and_pass( parser, X_OPENTAG );
678 xtag_assert_and_pass( parser, X_SLASH );
679 name = xtag_slurp_to( parser, X_WHITESPACE | X_CLOSETAG, X_NONE );
682 if( strcmp( name, tag->name ) )
685 fprintf (stderr, "got %s expected %s\n", name, tag->name);
687 parser->valid = false;
692 xtag_skip_whitespace( parser );
693 xtag_assert_and_pass( parser, X_CLOSETAG );
694 xtag_skip_whitespace( parser );
698 xtag_assert_and_pass( parser, X_SLASH );
699 xtag_assert_and_pass( parser, X_CLOSETAG );
700 xtag_skip_whitespace( parser );
706 static void xtag_free( XTag *xtag )
716 free( xtag->pcdata );
718 for( l = xtag->attributes; l; l = l->next )
720 if( (attr = (XAttribute *)l->data) != NULL )
727 xlist_free( xtag->attributes );
729 for( l = xtag->children; l; l = l->next )
731 child = (XTag *)l->data;
734 xlist_free( xtag->children );
739 static XTag *xtag_new_parse( const char *s, int n )
742 XTag *tag, *ttag, *wrapper;
745 parser.current_tag = NULL;
746 parser.start = (char *)s;
748 if( n == -1 ) parser.end = NULL;
752 fprintf (stderr, "empty buffer\n");
756 else parser.end = (char *)&s[n];
758 /* can't have whitespace pcdata outside rootnode */
759 xtag_skip_whitespace( &parser );
761 tag = xtag_parse_tag( &parser );
766 fprintf (stderr, "invalid file\n");
772 if( (ttag = xtag_parse_tag( &parser )) != NULL )
780 wrapper = xmalloc( sizeof(XTag) );
781 wrapper->name = NULL;
782 wrapper->pcdata = NULL;
783 wrapper->parent = NULL;
784 wrapper->attributes = NULL;
785 wrapper->children = NULL;
786 wrapper->current_child = NULL;
788 wrapper->children = xlist_append( wrapper->children, tag );
789 wrapper->children = xlist_append( wrapper->children, ttag );
791 while( (ttag = xtag_parse_tag( &parser )) != NULL )
799 wrapper->children = xlist_append( wrapper->children, ttag );
807 static char *xtag_get_name( XTag *xtag )
809 return xtag ? xtag->name : NULL;
813 static char *xtag_get_pcdata( XTag *xtag )
818 if( xtag == NULL ) return NULL;
820 for( l = xtag->children; l; l = l->next )
822 child = (XTag *)l->data;
823 if( child->pcdata != NULL )
825 return child->pcdata;
832 static char *xtag_get_attribute( XTag *xtag, char *attribute )
837 if( xtag == NULL ) return NULL;
839 for( l = xtag->attributes; l; l = l->next )
841 if( (attr = (XAttribute *)l->data) != NULL )
843 if( !strcmp( attr->name, attribute ) ) return attr->value;
851 static XTag *xtag_first_child( XTag *xtag, char *name )
856 if( xtag == NULL ) return NULL;
857 if( (l = xtag->children) == NULL ) return NULL;
861 xtag->current_child = l;
862 return (XTag *)l->data;
865 for( ; l; l = l->next )
867 child = (XTag *)l->data;
869 if( !strcmp( child->name, name ) )
871 xtag->current_child = l;
876 xtag->current_child = NULL;
881 static XTag *xtag_next_child( XTag *xtag, char *name )
886 if( xtag == NULL ) return NULL;
888 if( (l = xtag->current_child) == NULL )
889 return xtag_first_child( xtag, name );
891 if( (l = l->next) == NULL ) return NULL;
895 xtag->current_child = l;
896 return (XTag *)l->data;
899 for( ; l; l = l->next )
901 child = (XTag *)l->data;
903 if( !strcmp( child->name, name ) )
905 xtag->current_child = l;
910 xtag->current_child = NULL;
917 * This snprints function takes a variable list of char *, the last of
918 * which must be NULL, and prints each in turn to buf.
919 * Returns C99-style total length that would have been written, even if
920 * this is larger than n.
922 static int xtag_snprints( char *buf, int n, ... )
926 int len, to_copy, total = 0;
930 for( s = va_arg( ap, char * ); s; s = va_arg( ap, char *) )
934 if( (to_copy = __MIN(n, len) ) > 0 )
936 memcpy( buf, s, to_copy );
949 static int xtag_snprint( char *buf, int n, XTag *xtag )
957 buf += __MIN(n, N); \
963 if( n > 0 ) buf[0] = '\0';
969 nn = xtag_snprints( buf, n, xtag->pcdata, NULL );
977 nn = xtag_snprints( buf, n, "<", xtag->name, NULL );
980 for( l = xtag->attributes; l; l = l->next )
982 attr = (XAttribute *)l->data;
984 nn = xtag_snprints( buf, n, " ", attr->name, "=\"", attr->value,
989 if( xtag->children == NULL )
991 nn = xtag_snprints ( buf, n, "/>", NULL );
997 nn = xtag_snprints( buf, n, ">", NULL );
1001 for( l = xtag->children; l; l = l->next )
1003 child = (XTag *)l->data;
1005 nn = xtag_snprint( buf, n, child );
1011 nn = xtag_snprints( buf, n, "</", xtag->name, ">", NULL );