const char * (*IsUTF8_inner) (const char *);
const char * (*GetFallbackEncoding_inner) (void);
int (*utf8_scandir_inner) (const char *dirname, char ***namelist, int (*select)( const char * ), int (*compar)( const char **, const char ** ));
+ char * (*decode_URI_duplicate_inner) (const char *psz);
+ void (*decode_URI_inner) (char *psz);
+ char * (*encode_URI_inner) (const char *psz);
};
# if defined (__PLUGIN__)
# define aout_FiltersCreatePipeline (p_symbols)->aout_FiltersCreatePipeline_inner
# define IsUTF8 (p_symbols)->IsUTF8_inner
# define GetFallbackEncoding (p_symbols)->GetFallbackEncoding_inner
# define utf8_scandir (p_symbols)->utf8_scandir_inner
+# define decode_URI_duplicate (p_symbols)->decode_URI_duplicate_inner
+# define decode_URI (p_symbols)->decode_URI_inner
+# define encode_URI (p_symbols)->encode_URI_inner
# elif defined (HAVE_DYNAMIC_PLUGINS) && !defined (__BUILTIN__)
/******************************************************************
* STORE_SYMBOLS: store VLC APIs into p_symbols for plugin access.
((p_symbols)->IsUTF8_inner) = IsUTF8; \
((p_symbols)->GetFallbackEncoding_inner) = GetFallbackEncoding; \
((p_symbols)->utf8_scandir_inner) = utf8_scandir; \
+ ((p_symbols)->decode_URI_duplicate_inner) = decode_URI_duplicate; \
+ ((p_symbols)->decode_URI_inner) = decode_URI; \
+ ((p_symbols)->encode_URI_inner) = encode_URI; \
(p_symbols)->net_ConvertIPv4_deprecated = NULL; \
(p_symbols)->__stats_CounterGet_deprecated = NULL; \
(p_symbols)->__stats_TimerDumpAll_deprecated = NULL; \
VLC_EXPORT( char *, unescape_URI_duplicate, ( const char *psz ) );
VLC_EXPORT( void, unescape_URI, ( char *psz ) );
+VLC_EXPORT( char *, decode_URI_duplicate, ( const char *psz ) );
+VLC_EXPORT( void, decode_URI, ( char *psz ) );
+VLC_EXPORT( char *, encode_URI, ( const char *psz ) );
-static inline int isurlsafe( int c )
-{
- return ( (unsigned char)( c - 'a' ) < 26 )
- || ( (unsigned char)( c - 'A' ) < 26 )
- || ( (unsigned char)( c - '0' ) < 10 )
- /* Hmm, we should not encode character that are allowed in URLs
- * (even if they are not URL-safe), nor URL-safe characters.
- * We still encode some of them because of Microsoft's crap browser.
- */
- || ( strchr( "-_.", c ) != NULL );
-}
-
-static inline char url_hexchar( int c )
-{
- return ( c < 10 ) ? c + '0' : c + 'A' - 10;
-}
-
-/*****************************************************************************
- * vlc_UrlEncode:
- *****************************************************************************
- * perform URL encoding
- * (you do NOT want to do URL decoding - it is not reversible - do NOT do it)
- *****************************************************************************/
static inline char *vlc_UrlEncode( const char *psz_url )
{
- char psz_enc[3 * strlen( psz_url ) + 1], *out = psz_enc;
- const uint8_t *in;
-
- for( in = (const uint8_t *)psz_url; *in; in++ )
- {
- uint8_t c = *in;
-
- if( isurlsafe( c ) )
- {
- *out++ = (char)c;
- }
- else
- {
- uint16_t cp;
-
- *out++ = '%';
- /* UTF-8 to UCS-2 conversion */
- if( ( c & 0x80 ) == 0 )
- {
- cp = c;
- }
- else if( ( c & 0xe0 ) == 0xc0 )
- {
- cp = (((uint16_t)c & 0x1f) << 6) | (in[1] & 0x3f);
- in++;
- }
- else if( ( c & 0xf0 ) == 0xe0 )
- {
- cp = (((uint16_t)c & 0xf) << 12) | (((uint16_t)(in[1]) & 0x3f) << 6) | (in[2] & 0x3f);
- in += 2;
- }
- else
- {
- /* cannot URL-encode code points outside the BMP */
- /* better a wrong conversion than a crash */
- cp = '?';
- }
-
- if( cp < 0xff )
- {
- /* Encode ISO-8859-1 characters */
- *out++ = url_hexchar( cp >> 4 );
- *out++ = url_hexchar( cp & 0xf );
- }
- else
- {
- /* Encode non-Latin-1 characters */
- *out++ = 'u';
- *out++ = url_hexchar( cp >> 12 );
- *out++ = url_hexchar((cp >> 8) & 0xf );
- *out++ = url_hexchar((cp >> 4) & 0xf );
- *out++ = url_hexchar( cp & 0xf );
- }
- }
- }
- *out++ = '\0';
-
- return strdup( psz_enc );
+ /* FIXME: do not encode / : ? and & _when_ not needed */
+ return encode_URI( psz_url );
}
/*****************************************************************************
ptr += 2;
}
else
- if( !isurlsafe( c ) )
+ if( ( (unsigned char)( c - 'a' ) < 26 )
+ || ( (unsigned char)( c - 'A' ) < 26 )
+ || ( (unsigned char)( c - '0' ) < 10 )
+ || ( strchr( "-_.", c ) != NULL ) )
return 1;
}
return 0; /* looks fine - but maybe it is not encoded */
#include <assert.h>
#include "vlc_strings.h"
-#include "vlc_url.h"
+#include "vlc_url.h"
+#include "charset.h"
/**
* Unescape URI encoded string
*out = '\0';
}
+/**
+ * Decode encoded URI string
+ * \return decoded duplicated string
+ */
+char *decode_URI_duplicate( const char *psz )
+{
+ char *psz_dup = strdup( psz );
+ unescape_URI( psz_dup );
+ return psz_dup;
+}
+
+/**
+ * Decode encoded URI string in place
+ * \return nothing
+ */
+void decode_URI( char *psz )
+{
+ unsigned char *in = (unsigned char *)psz, *out = in, c;
+
+ while( ( c = *in++ ) != '\0' )
+ {
+ switch( c )
+ {
+ case '%':
+ {
+ char hex[2];
+
+ if( ( ( hex[0] = *in++ ) == 0 )
+ || ( ( hex[1] = *in++ ) == 0 ) )
+ return;
+
+ hex[2] = '\0';
+ *out++ = (unsigned char)strtoul( hex, NULL, 0x10 );
+ break;
+ }
+
+ case '+':
+ *out++ = ' ';
+
+ default:
+ /* Inserting non-ASCII or non-printable characters is unsafe,
+ * and no sane browser will send these unencoded */
+ if( ( c < 32 ) || ( c > 127 ) )
+ *out++ = '?';
+ else
+ *out++ = c;
+ }
+ }
+ *out = '\0';
+ EnsureUTF8( psz );
+}
+
+static inline int isurlsafe( int c )
+{
+ return ( (unsigned char)( c - 'a' ) < 26 )
+ || ( (unsigned char)( c - 'A' ) < 26 )
+ || ( (unsigned char)( c - '0' ) < 10 )
+ /* Hmm, we should not encode character that are allowed in URLs
+ * (even if they are not URL-safe), nor URL-safe characters.
+ * We still encode some of them because of Microsoft's crap browser.
+ */
+ || ( strchr( "-_.", c ) != NULL );
+}
+
+static inline char url_hexchar( int c )
+{
+ return ( c < 10 ) ? c + '0' : c + 'A' - 10;
+}
+
+/**
+ * encode_URI_component
+ * Encodes an URI component.
+ *
+ * @param psz_url nul-terminated UTF-8 representation of the component.
+ * Obviously, you can't pass an URI containing a nul character, but you don't
+ * want to do that, do you?
+ *
+ * @return encoded string (must be free()'d)
+ */
+char *encode_URI_component( const char *psz_url )
+{
+ char psz_enc[3 * strlen( psz_url ) + 1], *out = psz_enc;
+ const uint8_t *in;
+
+ for( in = (const uint8_t *)psz_url; *in; in++ )
+ {
+ uint8_t c = *in;
+
+ if( isurlsafe( c ) )
+ *out++ = (char)c;
+ else
+ if ( c == ' ')
+ *out++ = '+';
+ else
+ {
+ *out++ = '%';
+ *out++ = url_hexchar( c >> 4 );
+ *out++ = url_hexchar( c & 0xf );
+ }
+ }
+ *out++ = '\0';
+
+ return strdup( psz_enc );
+}
+
/**
* Converts "<", ">" and "&" to "<", ">" and "&"
* \param string to convert