X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;ds=sidebyside;f=src%2Ftext%2Fstrings.c;h=736bb872592cc1877e6d90f39a669fdf9c39ade0;hb=2341f8bbc23622d0d8b8a511b7cc745d20beb878;hp=71caaaa8e2cbd14132f3f70c4b415f01fdc4c371;hpb=79de221529a46aa6f247d71fbbbcc20105802a91;p=vlc diff --git a/src/text/strings.c b/src/text/strings.c index 71caaaa8e2..736bb87259 100644 --- a/src/text/strings.c +++ b/src/text/strings.c @@ -2,6 +2,7 @@ * strings.c: String related functions ***************************************************************************** * Copyright (C) 2006 the VideoLAN team + * Copyright (C) 2008-2009 Rémi Denis-Courmont * $Id$ * * Authors: Antoine Cellerier @@ -35,6 +36,7 @@ /* Needed by str_format_time */ #include +#include /* Needed by str_format_meta */ #include @@ -47,90 +49,7 @@ #include /** - * Unescape URI encoded string - * \return decoded duplicated string - */ -char *unescape_URI_duplicate( const char *psz ) -{ - char *psz_dup = strdup( psz ); - unescape_URI( psz_dup ); - return psz_dup; -} - -/** - * Unescape URI encoded string in place - * \return nothing - */ -void unescape_URI( char *psz ) -{ - unsigned char *in = (unsigned char *)psz, *out = in, c; - if( psz == NULL ) - return; - - while( ( c = *in++ ) != '\0' ) - { - switch( c ) - { - case '%': - { - char val[5], *pval = val; - unsigned long cp; - - switch( c = *in++ ) - { - case '\0': - return; - - case 'u': - case 'U': - if( ( *pval++ = *in++ ) == '\0' ) - return; - if( ( *pval++ = *in++ ) == '\0' ) - return; - c = *in++; - - default: - *pval++ = c; - if( ( *pval++ = *in++ ) == '\0' ) - return; - *pval = '\0'; - } - - cp = strtoul( val, NULL, 0x10 ); - if( cp < 0x80 ) - *out++ = cp; - else - if( cp < 0x800 ) - { - *out++ = (( cp >> 6) | 0xc0); - *out++ = (( cp & 0x3f) | 0x80); - } - else - { - assert( cp < 0x10000 ); - *out++ = (( cp >> 12) | 0xe0); - *out++ = (((cp >> 6) & 0x3f) | 0x80); - *out++ = (( cp & 0x3f) | 0x80); - } - break; - } - - /* + is not a special case - it means plus, not space. */ - - default: - /* Inserting non-ASCII or non-printable characters is unsafe, - * and no sane browser will send these unencoded */ - if( ( c < 32 ) || ( c > 127 ) ) - *out++ = '?'; - else - *out++ = c; - } - } - *out = '\0'; -} - -/** - * Decode encoded URI string + * Decode encoded URI component. See also decode_URI(). * \return decoded duplicated string */ char *decode_URI_duplicate( const char *psz ) @@ -141,14 +60,23 @@ char *decode_URI_duplicate( const char *psz ) } /** - * Decode encoded URI string in place - * \return nothing + * Decode an encoded URI component in place. + * This function does NOT decode entire URIs. + * It decodes components (e.g. host name, directory, file name). + * Decoded URIs do not exist in the real world (see RFC3986 §2.4). + * Complete URIs are always "encoded" (or they are syntaxically invalid). + * + * Note that URI encoding is different from Javascript escaping. Especially, + * white spaces and Unicode non-ASCII code points are encoded differently. + * + * \return psz on success, NULL if it was not properly encoded */ -void decode_URI( char *psz ) +char *decode_URI( char *psz ) { unsigned char *in = (unsigned char *)psz, *out = in, c; + if( psz == NULL ) - return; + return NULL; while( ( c = *in++ ) != '\0' ) { @@ -160,14 +88,14 @@ void decode_URI( char *psz ) if( ( ( hex[0] = *in++ ) == 0 ) || ( ( hex[1] = *in++ ) == 0 ) ) - return; + return NULL; hex[2] = '\0'; *out++ = (unsigned char)strtoul( hex, NULL, 0x10 ); break; } - case '+': + case '+': /* This is HTTP forms, not URI decoding... */ *out++ = ' '; break; @@ -182,6 +110,7 @@ void decode_URI( char *psz ) } *out = '\0'; EnsureUTF8( psz ); + return psz; } static inline bool isurisafe( int c ) @@ -193,23 +122,13 @@ static inline bool isurisafe( int c ) || ( strchr( "-._~", c ) != NULL ); } -/** - * Encodes an URI component (RFC3986 §2). - * - * @param psz_uri nul-terminated UTF-8 representation of the component. - * Obviously, you can't pass an URI containing a nul character, but you don't - * want to do that, do you? - * - * @return encoded string (must be free()'d), or NULL for ENOMEM. - */ -char *encode_URI_component( const char *psz_uri ) +static char *encode_URI_bytes (const char *psz_uri, size_t len) { - char *psz_enc = malloc ((3 * strlen (psz_uri)) + 1), *out = psz_enc; - + char *psz_enc = malloc (3 * len + 1), *out = psz_enc; if (psz_enc == NULL) return NULL; - while (*psz_uri) + for (size_t i = 0; i < len; i++) { static const char hex[16] = "0123456789ABCDEF"; uint8_t c = *psz_uri; @@ -232,6 +151,21 @@ char *encode_URI_component( const char *psz_uri ) return out ? out : psz_enc; /* realloc() can fail (safe) */ } +/** + * Encodes an URI component (RFC3986 §2). + * + * @param psz_uri nul-terminated UTF-8 representation of the component. + * Obviously, you can't pass an URI containing a nul character, but you don't + * want to do that, do you? + * + * @return encoded string (must be free()'d), or NULL for ENOMEM. + */ +char *encode_URI_component( const char *psz_uri ) +{ + return encode_URI_bytes (psz_uri, strlen (psz_uri)); +} + + static const struct xml_entity_s { char psz_entity[8]; @@ -1100,16 +1034,10 @@ char* filename_sanitize( const char *str_origin ) */ void path_sanitize( char *str ) { -#if 0 - /* - * Uncomment the two blocks to prevent /../ or /./, i'm not sure that we - * want to. - */ - char *prev = str - 1; -#endif #ifdef WIN32 /* check drive prefix if path is absolute */ - if( isalpha(*str) && (':' == *(str+1)) ) + if( (((unsigned char)(str[0] - 'A') < 26) + || ((unsigned char)(str[0] - 'a') < 26)) && (':' == str[1]) ) str += 2; #endif while( *str ) @@ -1118,37 +1046,86 @@ void path_sanitize( char *str ) if( *str == ':' ) *str = '_'; #elif defined( WIN32 ) - switch( *str ) - { - case '*': - case '"': - case '?': - case ':': - case '|': - case '<': - case '>': - *str = '_'; - } + if( strchr( "*\"?:|<>", *str ) ) + *str = '_'; + if( *str == '/' ) + *str = DIR_SEP_CHAR; #endif -#if 0 - if( *str == '/' + str++; + } +} + +#include + +/** + * Convert a file path to an URI. If already an URI, do nothing. + */ +char *make_URI (const char *path) +{ + if (path == NULL) + return NULL; + if (strstr (path, "://") != NULL) + return strdup (path); /* Already an URI */ + /* Note: VLC cannot handle URI schemes without double slash after the + * scheme name (such as mailto: or news:). */ + + char *buf; #ifdef WIN32 - || *str == '\\' + if (isalpha (path[0]) && (path[1] == ':')) + { + if (asprintf (&buf, "file:///%c:", path[0]) == -1) + buf = NULL; + path += 2; + } + else +#endif +#if 0 + /* Windows UNC paths (file://host/share/path instead of file:///path) */ + if (!strncmp (path, "\\\\", 2)) + { + path += 2; + buf = strdup ("file://"); + } + else #endif - ) + if (path[0] != DIR_SEP_CHAR) + { /* Relative path: prepend the current working directory */ + char cwd[PATH_MAX]; + + if (getcwd (cwd, sizeof (cwd)) == NULL) /* FIXME: UTF8? */ + return NULL; + if (asprintf (&buf, "%s/%s", cwd, path) == -1) + return NULL; + char *ret = make_URI (buf); + free (buf); + return ret; + } + else + buf = strdup ("file://"); + if (buf == NULL) + return NULL; + + assert (path[0] == DIR_SEP_CHAR); + + /* Absolute file path */ + for (const char *ptr = path + 1;; ptr++) + { + size_t len = strcspn (ptr, DIR_SEP); + char *component = encode_URI_bytes (ptr, len); + if (component == NULL) { - if( str - prev == 2 && prev[1] == '.' ) - { - prev[1] = '.'; - } - else if( str - prev == 3 && prev[1] == '.' && prev[2] == '.' ) - { - prev[1] = '_'; - prev[2] = '_'; - } - prev = str; + free (buf); + return NULL; } -#endif - str++; + char *uri; + int val = asprintf (&uri, "%s/%s", buf, component); + free (component); + free (buf); + if (val == -1) + return NULL; + buf = uri; + ptr += len; + if (*ptr == '\0') + return buf; } }