From 2c113555e9fe760a4cdd27f67f82f8029bd6ce24 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Andre Date: Wed, 13 May 2009 01:56:17 +0800 Subject: [PATCH] Zip: fix handling of special characters (#2467) --- modules/access/zip/zip.h | 3 ++ modules/access/zip/zipaccess.c | 66 ++++++++++++++++++++---- modules/access/zip/zipstream.c | 94 +++++++++++++++++++++++++++++----- 3 files changed, 140 insertions(+), 23 deletions(-) diff --git a/modules/access/zip/zip.h b/modules/access/zip/zip.h index c78a6ca044..d52f8d5208 100644 --- a/modules/access/zip/zip.h +++ b/modules/access/zip/zip.h @@ -58,6 +58,9 @@ void StreamClose( vlc_object_t* ); int AccessOpen( vlc_object_t *p_this ); void AccessClose( vlc_object_t *p_this ); +/** Common function */ +bool isAllowedChar( char c ); + /** ************************************************************************** * zipIO function headers : how to use vlc_stream to read the zip * Note: static because the implementations differ diff --git a/modules/access/zip/zipaccess.c b/modules/access/zip/zipaccess.c index 3689e12f24..1c5d67873d 100644 --- a/modules/access/zip/zipaccess.c +++ b/modules/access/zip/zipaccess.c @@ -22,7 +22,7 @@ *****************************************************************************/ /** @todo: - * - implement crypto (using url zip://user:password@path-to-archive#ZIP#file + * - implement crypto (using url zip://user:password@path-to-archive|file * - read files in zip with long name (use unz_file_info.size_filename) * - multi-volume archive support ? */ @@ -31,8 +31,6 @@ # include "config.h" #endif -#ifdef HAVE_ZLIB_H - #include "zip.h" #include @@ -53,6 +51,47 @@ static int AccessControl( access_t *p_access, int i_query, va_list args ); static ssize_t AccessRead( access_t *, uint8_t *, size_t ); static int AccessSeek( access_t *, int64_t ); static int OpenFileInZip( access_t *p_access, int i_pos ); +static char *unescapeXml( const char *psz_text ); + +/** ************************************************************************** + * \brief Unescape valid XML string + * The exact reverse of escapeToXml (zipstream.c) + *****************************************************************************/ +static char *unescapeXml( const char *psz_text ) +{ + char *psz_ret = malloc( strlen( psz_text ) + 1 ); + if( !psz_ret ) return NULL; + + char *psz_tmp = psz_ret; + for( char *psz_iter = (char*) psz_text; *psz_iter; ++psz_iter, ++psz_tmp ) + { + if( *psz_iter == '?' ) + { + int i_value; + if( !sscanf( ++psz_iter, "%02x", &i_value ) ) + { + /* Invalid number: URL incorrectly encoded */ + free( psz_ret ); + return NULL; + } + *psz_tmp = (char) i_value; + psz_iter++; + } + else if( isAllowedChar( *psz_iter ) ) + { + *psz_tmp = *psz_iter; + } + else + { + /* Invalid character encoding for the URL */ + free( psz_ret ); + return NULL; + } + } + *psz_tmp = '\0'; + + return psz_ret; +} /** ************************************************************************** * \brief Open access @@ -78,8 +117,19 @@ int AccessOpen( vlc_object_t *p_this ) return VLC_EGENERIC; *psz_sep = '\0'; - psz_pathToZip = unescape_URI_duplicate( psz_path ); - p_sys->psz_fileInzip = strdup( psz_sep + 1 ); + psz_pathToZip = unescapeXml( psz_path ); + if( !psz_pathToZip ) + { + /* Maybe this was not an encoded string */ + msg_Dbg( p_access, "this is not an encoded url. Trying file '%s'", + psz_path ); + psz_pathToZip = strdup( psz_path ); + } + p_sys->psz_fileInzip = unescapeXml( psz_sep + 1 ); + if( !p_sys->psz_fileInzip ) + { + p_sys->psz_fileInzip = strdup( psz_sep + 1 ); + } /* Define IO functions */ zlib_filefunc_def *p_func = (zlib_filefunc_def*) @@ -409,9 +459,3 @@ static int ZCALLBACK ZipIO_Error( void* opaque, void* stream ) //msg_Dbg( p_access, "error" ); return 0; } - - - -#else -# error Can not compile zip demuxer without zlib support -#endif diff --git a/modules/access/zip/zipstream.c b/modules/access/zip/zipstream.c index c29d11f064..0250312cbe 100644 --- a/modules/access/zip/zipstream.c +++ b/modules/access/zip/zipstream.c @@ -360,8 +360,6 @@ static int CreatePlaylist( stream_t *s, char **pp_buffer ) goto exit; } - // msg_Dbg( s, "%d files in Zip", vlc_array_count( p_filenames ) ); - /* Close archive */ unzClose( file ); s->p_sys->zipFile = NULL; @@ -469,6 +467,81 @@ static int GetFilesInZip( stream_t *p_this, unzFile file, * XSPF generation functions *****************************************************************************/ +/** ************************************************************************** + * \brief Check a character for allowance in the Xml. + * Allowed chars are: a-z, A-Z, 0-9, \, /, ., ' ', _ and : + *****************************************************************************/ +bool isAllowedChar( char c ) +{ + return ( c >= 'a' && c <= 'z' ) + || ( c >= 'A' && c <= 'Z' ) + || ( c >= '0' && c <= '9' ) + || ( c == ':' ) || ( c == '/' ) + || ( c == '\\' ) || ( c == '.' ) + || ( c == ' ' ) || ( c == '_' ); +} + +/** ************************************************************************** + * \brief Escape string to be XML valid + * Allowed chars are defined by the above function isAllowedChar() + * Invalid chars are escaped using non standard '?XX' notation. + * NOTE: We cannot trust VLC internal Web encoding functions + * because they are not able to encode and decode some rare utf-8 + * characters properly. Also, we don't control exactly when they are + * called (from this module). + *****************************************************************************/ +static int escapeToXml( char **ppsz_encoded, const char *psz_url ) +{ + char *psz_iter, *psz_tmp; + + /* Count number of unallowed characters in psz_url */ + size_t i_num = 0, i_len = 0; + for( psz_iter = (char*) psz_url; *psz_iter; ++psz_iter ) + { + if( isAllowedChar( *psz_iter ) ) + { + i_len++; + } + else + { + i_len++; + i_num++; + } + } + + /* Special case */ + if( i_num == 0 ) + { + *ppsz_encoded = malloc( i_len + 1 ); + memcpy( *ppsz_encoded, psz_url, i_len + 1 ); + return VLC_SUCCESS; + } + + /* Copy string, replacing invalid characters */ + char *psz_ret = malloc( i_len + 3*i_num + 2 ); + if( !psz_ret ) return VLC_ENOMEM; + + for( psz_iter = (char*) psz_url, psz_tmp = psz_ret; + *psz_iter; ++psz_iter, ++psz_tmp ) + { + if( isAllowedChar( *psz_iter ) ) + { + *psz_tmp = *psz_iter; + } + else + { + *(psz_tmp++) = '?'; + snprintf( psz_tmp, 3, "%02x", ( *psz_iter & 0x000000FF ) ); + psz_tmp++; + } + } + *psz_tmp = '\0'; + + /* Return success */ + *ppsz_encoded = psz_ret; + return VLC_SUCCESS; +} + /** ************************************************************************** * \brief Write the XSPF playlist given the list of files *****************************************************************************/ @@ -488,9 +561,10 @@ static int WriteXSPF( char **pp_buffer, vlc_array_t *p_filenames, /* Root node */ node *playlist = new_node( psz_zip ); - /* Web-Encode the URI and append '!' */ - char *psz_pathtozip = vlc_UrlEncode( psz_zippath ); - if( astrcatf( &psz_pathtozip, ZIP_SEP ) < 0 ) return -1; + /* Encode the URI and append ZIP_SEP */ + char *psz_pathtozip; + escapeToXml( &psz_pathtozip, psz_zippath ); + if( astrcatf( &psz_pathtozip, "%s", ZIP_SEP ) < 0 ) return -1; int i_track = 0; for( int i = 0; i < vlc_array_count( p_filenames ); ++i ) @@ -514,12 +588,9 @@ static int WriteXSPF( char **pp_buffer, vlc_array_t *p_filenames, /* Build full MRL */ char *psz_path = strdup( psz_pathtozip ); - if( astrcatf( &psz_path, psz_name ) < 0 ) return -1; - - /* Double url-encode */ - char *psz_tmp = psz_path; - psz_path = vlc_UrlEncode( psz_tmp ); - free( psz_tmp ); + char *psz_escapedName; + escapeToXml( &psz_escapedName, psz_name ); + if( astrcatf( &psz_path, "%s", psz_escapedName ) < 0 ) return -1; /* Track information */ if( astrcatf( pp_buffer, @@ -771,7 +842,6 @@ static int ZCALLBACK ZipIO_Error( void* opaque, void* stream ) { (void)opaque; (void)stream; - //msg_Dbg( p_access, "error" ); return 0; } -- 2.39.2