* strings.c: String related functions
*****************************************************************************
* Copyright (C) 2006 the VideoLAN team
+ * Copyright (C) 2008-2009 Rémi Denis-Courmont
* $Id$
*
* Authors: Antoine Cellerier <dionoea at videolan dot org>
/* Needed by str_format_time */
#include <time.h>
+#include <limits.h>
/* Needed by str_format_meta */
#include <vlc_input.h>
#include <vlc_charset.h>
/**
- * Unescape URI encoded string
- * \return decoded duplicated string
- */
-char *unescape_URI_duplicate( const char *psz )
-{
- char *psz_dup = strdup( psz );
- unescape_URI( psz_dup );
- return psz_dup;
-}
-
-/**
- * Unescape URI encoded string in place
- * \return nothing
- */
-void unescape_URI( char *psz )
-{
- unsigned char *in = (unsigned char *)psz, *out = in, c;
- if( psz == NULL )
- return;
-
- while( ( c = *in++ ) != '\0' )
- {
- switch( c )
- {
- case '%':
- {
- char val[5], *pval = val;
- unsigned long cp;
-
- switch( c = *in++ )
- {
- case '\0':
- return;
-
- case 'u':
- case 'U':
- if( ( *pval++ = *in++ ) == '\0' )
- return;
- if( ( *pval++ = *in++ ) == '\0' )
- return;
- c = *in++;
-
- default:
- *pval++ = c;
- if( ( *pval++ = *in++ ) == '\0' )
- return;
- *pval = '\0';
- }
-
- cp = strtoul( val, NULL, 0x10 );
- if( cp < 0x80 )
- *out++ = cp;
- else
- if( cp < 0x800 )
- {
- *out++ = (( cp >> 6) | 0xc0);
- *out++ = (( cp & 0x3f) | 0x80);
- }
- else
- {
- assert( cp < 0x10000 );
- *out++ = (( cp >> 12) | 0xe0);
- *out++ = (((cp >> 6) & 0x3f) | 0x80);
- *out++ = (( cp & 0x3f) | 0x80);
- }
- break;
- }
-
- /* + is not a special case - it means plus, not space. */
-
- default:
- /* Inserting non-ASCII or non-printable characters is unsafe,
- * and no sane browser will send these unencoded */
- if( ( c < 32 ) || ( c > 127 ) )
- *out++ = '?';
- else
- *out++ = c;
- }
- }
- *out = '\0';
-}
-
-/**
- * Decode encoded URI string
+ * Decode encoded URI component. See also decode_URI().
* \return decoded duplicated string
*/
char *decode_URI_duplicate( const char *psz )
}
/**
- * Decode encoded URI string in place
- * \return nothing
+ * Decode an encoded URI component in place.
+ * <b>This function does NOT decode entire URIs.</b>
+ * It decodes components (e.g. host name, directory, file name).
+ * Decoded URIs do not exist in the real world (see RFC3986 §2.4).
+ * Complete URIs are always "encoded" (or they are syntaxically invalid).
+ *
+ * Note that URI encoding is different from Javascript escaping. Especially,
+ * white spaces and Unicode non-ASCII code points are encoded differently.
+ *
+ * \return psz on success, NULL if it was not properly encoded
*/
-void decode_URI( char *psz )
+char *decode_URI( char *psz )
{
unsigned char *in = (unsigned char *)psz, *out = in, c;
+
if( psz == NULL )
- return;
+ return NULL;
while( ( c = *in++ ) != '\0' )
{
if( ( ( hex[0] = *in++ ) == 0 )
|| ( ( hex[1] = *in++ ) == 0 ) )
- return;
+ return NULL;
hex[2] = '\0';
*out++ = (unsigned char)strtoul( hex, NULL, 0x10 );
break;
}
- case '+':
+ case '+': /* This is HTTP forms, not URI decoding... */
*out++ = ' ';
break;
}
*out = '\0';
EnsureUTF8( psz );
+ return psz;
}
static inline bool isurisafe( int c )
|| ( strchr( "-._~", c ) != NULL );
}
-/**
- * Encodes an URI component (RFC3986 §2).
- *
- * @param psz_uri nul-terminated UTF-8 representation of the component.
- * Obviously, you can't pass an URI containing a nul character, but you don't
- * want to do that, do you?
- *
- * @return encoded string (must be free()'d), or NULL for ENOMEM.
- */
-char *encode_URI_component( const char *psz_uri )
+static char *encode_URI_bytes (const char *psz_uri, size_t len)
{
- char *psz_enc = malloc ((3 * strlen (psz_uri)) + 1), *out = psz_enc;
-
+ char *psz_enc = malloc (3 * len + 1), *out = psz_enc;
if (psz_enc == NULL)
return NULL;
- while (*psz_uri)
+ for (size_t i = 0; i < len; i++)
{
static const char hex[16] = "0123456789ABCDEF";
uint8_t c = *psz_uri;
return out ? out : psz_enc; /* realloc() can fail (safe) */
}
+/**
+ * Encodes an URI component (RFC3986 §2).
+ *
+ * @param psz_uri nul-terminated UTF-8 representation of the component.
+ * Obviously, you can't pass an URI containing a nul character, but you don't
+ * want to do that, do you?
+ *
+ * @return encoded string (must be free()'d), or NULL for ENOMEM.
+ */
+char *encode_URI_component( const char *psz_uri )
+{
+ return encode_URI_bytes (psz_uri, strlen (psz_uri));
+}
+
+
static const struct xml_entity_s
{
char psz_entity[8];
*/
void path_sanitize( char *str )
{
-#if 0
- /*
- * Uncomment the two blocks to prevent /../ or /./, i'm not sure that we
- * want to.
- */
- char *prev = str - 1;
-#endif
#ifdef WIN32
/* check drive prefix if path is absolute */
- if( isalpha(*str) && (':' == *(str+1)) )
+ if( (((unsigned char)(str[0] - 'A') < 26)
+ || ((unsigned char)(str[0] - 'a') < 26)) && (':' == str[1]) )
str += 2;
#endif
while( *str )
if( *str == ':' )
*str = '_';
#elif defined( WIN32 )
- switch( *str )
- {
- case '*':
- case '"':
- case '?':
- case ':':
- case '|':
- case '<':
- case '>':
- *str = '_';
- }
+ if( strchr( "*\"?:|<>", *str ) )
+ *str = '_';
+ if( *str == '/' )
+ *str = DIR_SEP_CHAR;
#endif
-#if 0
- if( *str == '/'
+ str++;
+ }
+}
+
+#include <vlc_url.h>
+
+/**
+ * Convert a file path to an URI. If already an URI, do nothing.
+ */
+char *make_URI (const char *path)
+{
+ if (path == NULL)
+ return NULL;
+ if (strstr (path, "://") != NULL)
+ return strdup (path); /* Already an URI */
+ /* Note: VLC cannot handle URI schemes without double slash after the
+ * scheme name (such as mailto: or news:). */
+
+ char *buf;
#ifdef WIN32
- || *str == '\\'
+ if (isalpha (path[0]) && (path[1] == ':'))
+ {
+ if (asprintf (&buf, "file:///%c:", path[0]) == -1)
+ buf = NULL;
+ path += 2;
+ }
+ else
+#endif
+#if 0
+ /* Windows UNC paths (file://host/share/path instead of file:///path) */
+ if (!strncmp (path, "\\\\", 2))
+ {
+ path += 2;
+ buf = strdup ("file://");
+ }
+ else
#endif
- )
+ if (path[0] != DIR_SEP_CHAR)
+ { /* Relative path: prepend the current working directory */
+ char cwd[PATH_MAX];
+
+ if (getcwd (cwd, sizeof (cwd)) == NULL) /* FIXME: UTF8? */
+ return NULL;
+ if (asprintf (&buf, "%s/%s", cwd, path) == -1)
+ return NULL;
+ char *ret = make_URI (buf);
+ free (buf);
+ return ret;
+ }
+ else
+ buf = strdup ("file://");
+ if (buf == NULL)
+ return NULL;
+
+ assert (path[0] == DIR_SEP_CHAR);
+
+ /* Absolute file path */
+ for (const char *ptr = path + 1;; ptr++)
+ {
+ size_t len = strcspn (ptr, DIR_SEP);
+ char *component = encode_URI_bytes (ptr, len);
+ if (component == NULL)
{
- if( str - prev == 2 && prev[1] == '.' )
- {
- prev[1] = '.';
- }
- else if( str - prev == 3 && prev[1] == '.' && prev[2] == '.' )
- {
- prev[1] = '_';
- prev[2] = '_';
- }
- prev = str;
+ free (buf);
+ return NULL;
}
-#endif
- str++;
+ char *uri;
+ int val = asprintf (&uri, "%s/%s", buf, component);
+ free (component);
+ free (buf);
+ if (val == -1)
+ return NULL;
+ buf = uri;
+ ptr += len;
+ if (*ptr == '\0')
+ return buf;
}
}