From ab4f5ab7148b76ac532eea9273185c53130afd80 Mon Sep 17 00:00:00 2001 From: Dan Dennedy Date: Sun, 22 Jan 2012 15:13:49 -0800 Subject: [PATCH] quick fix for xml containing bad characters MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Per the XML standard only the following characters are permitted: Char       ::=      #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]  /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ This fix does not properly handle the ranges for multi-byte characters, but it does remove the invalid single-byte "control characters." --- src/modules/xml/consumer_xml.c | 61 +++++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 8 deletions(-) diff --git a/src/modules/xml/consumer_xml.c b/src/modules/xml/consumer_xml.c index f744fcb9..98cc1e92 100644 --- a/src/modules/xml/consumer_xml.c +++ b/src/modules/xml/consumer_xml.c @@ -26,6 +26,9 @@ #include #include #include +#ifdef FILTER_WCHAR +#include +#endif #define ID_SIZE 128 @@ -59,6 +62,43 @@ static int consumer_is_stopped( mlt_consumer this ); static void *consumer_thread( void *arg ); static void serialise_service( serialise_context context, mlt_service service, xmlNode *node ); +#ifdef FILTER_WCHAR + +void* filter_restricted( const wchar_t *in ) +{ + if ( !in ) return NULL; + wchar_t *out = calloc( 1, strlen( (const char*) in ) ); + size_t i, j, n = wcslen( in ); + for ( i = 0, j = 0; i < n; i++ ) + { + wchar_t w = in[i]; + if ( w == 0x9 || w == 0xA || w == 0xD || + ( w >= 0x20 && w <= 0xD7FF ) || + ( w >= 0xE000 && w <= 0xFFFD ) || + ( w >= 0x10000 && w <= 0x10FFFF ) ) + out[ j++ ] = w; + } + return out; +} + +#else + +void* filter_restricted( const char *in ) +{ + if ( !in ) return NULL; + char *out = calloc( 1, strlen( in ) ); + size_t i, j, n = strlen( in ); + for ( i = 0, j = 0; i < n; i++ ) + { + char c = in[i]; + if ( c == 0x9 || c == 0xA || c == 0xD || ( c >= 0x20 && c <= 0xFF ) ) + out[ j++ ] = c; + } + return out; +} + +#endif + typedef enum { xml_existing, @@ -200,12 +240,16 @@ static void serialise_properties( serialise_context context, mlt_properties prop strcmp( name, "width" ) && strcmp( name, "height" ) ) { - char *value = mlt_properties_get_value( properties, i ); - int rootlen = strlen( context->root ); - if ( rootlen && !strncmp( value, context->root, rootlen ) && value[ rootlen ] == '/' ) - value += rootlen + 1; - p = xmlNewTextChild( node, NULL, _x("property"), _x(value) ); - xmlNewProp( p, _x("name"), _x(name) ); + char *value = filter_restricted( mlt_properties_get_value( properties, i ) ); + if ( value ) + { + int rootlen = strlen( context->root ); + if ( rootlen && !strncmp( value, context->root, rootlen ) && value[ rootlen ] == '/' ) + value += rootlen + 1; + p = xmlNewTextChild( node, NULL, _x("property"), _x(value) ); + xmlNewProp( p, _x("name"), _x(name) ); + free( value ); + } } } } @@ -221,14 +265,15 @@ static void serialise_store_properties( serialise_context context, mlt_propertie char *name = mlt_properties_get_name( properties, i ); if ( !strncmp( name, store, strlen( store ) ) ) { - char *value = mlt_properties_get_value( properties, i ); - if ( value != NULL ) + char *value = filter_restricted( mlt_properties_get_value( properties, i ) ); + if ( value ) { int rootlen = strlen( context->root ); if ( rootlen && !strncmp( value, context->root, rootlen ) && value[ rootlen ] == '/' ) value += rootlen + 1; p = xmlNewTextChild( node, NULL, _x("property"), _x(value) ); xmlNewProp( p, _x("name"), _x(name) ); + free( value ); } } } -- 2.39.2