/*****************************************************************************
* vlc_url.h: URL related macros
*****************************************************************************
- * Copyright (C) 2002-2005 the VideoLAN team
+ * Copyright (C) 2002-2006 the VideoLAN team
* $Id$
*
* Authors: Christophe Massiot <massiot@via.ecp.fr>
return ( (unsigned char)( c - 'a' ) < 26 )
|| ( (unsigned char)( c - 'A' ) < 26 )
|| ( (unsigned char)( c - '0' ) < 10 )
+ || ( (unsigned char)( c ) > 127 )
/* Hmm, we should not encode character that are allowed in URLs
* (even if they are not URL-safe), nor URL-safe characters.
* We still encode some of them because of Microsoft's crap browser.
|| ( strchr( "-_.", c ) != NULL );
}
+static inline char url_hexchar( int c )
+{
+ return ( c < 10 ) ? c + '0' : c + 'A' - 10;
+}
+
/*****************************************************************************
* vlc_UrlEncode:
*****************************************************************************
*****************************************************************************/
static inline char *vlc_UrlEncode( const char *psz_url )
{
- char *psz_enc, *out;
+ char psz_enc[3 * strlen( psz_url ) + 1], *out = psz_enc;
const unsigned char *in;
- psz_enc = (char *)malloc( 3 * strlen( psz_url ) + 1 );
- if( psz_enc == NULL )
- return NULL;
-
- out = psz_enc;
for( in = (const unsigned char *)psz_url; *in; in++ )
{
unsigned char c = *in;
*out++ = (char)c;
else
{
+ uint16_t cp;
+
*out++ = '%';
- *out++ = ( ( c >> 4 ) >= 0xA ) ? 'A' + ( c >> 4 ) - 0xA
- : '0' + ( c >> 4 );
- *out++ = ( ( c & 0xf ) >= 0xA ) ? 'A' + ( c & 0xf ) - 0xA
- : '0' + ( c & 0xf );
+ /* UTF-8 to UCS-2 conversion */
+ if( ( c & 0x7f ) == 0 )
+ cp = c;
+ else
+ if( ( c & 0xe0 ) == 0xc0 )
+ {
+ cp = ((c & 0x1f) << 6) | (in[1] & 0x3f);
+ in++;
+ }
+ else
+ if( ( c & 0xf0 ) == 0xe0 )
+ {
+ cp = ((c & 0xf) << 12) | ((in[1] & 0x3f) << 6) | (in[2] & 0x3f);
+ in += 2;
+ }
+ else
+ /* cannot URL-encode code points outside the BMP */
+ return NULL;
+
+ if( cp < 0xff )
+ {
+ /* Encode ISO-8859-1 characters */
+ *out++ = url_hexchar( cp >> 4 );
+ *out++ = url_hexchar( cp & 0xf );
+ }
+ else
+ {
+ /* Encode non-Latin-1 characters */
+ *out++ = 'u';
+ *out++ = url_hexchar( cp >> 12 );
+ *out++ = url_hexchar((cp >> 8) & 0xf );
+ *out++ = url_hexchar((cp >> 4) & 0xf );
+ *out++ = url_hexchar( cp & 0xf );
+ }
}
}
*out++ = '\0';
- return (char *)realloc( psz_enc, out - psz_enc );
+ return strdup( psz_enc );
}
/*****************************************************************************