-
-/**
- * Unescape URI encoded string
- * \return decoded duplicated string
- */
-char *unescape_URI_duplicate( const char *psz )
-{
- char *psz_dup = strdup( psz );
- unescape_URI( psz_dup );
- return psz_dup;
-}
-
-/**
- * Unescape URI encoded string in place
- * \return nothing
- */
-void unescape_URI( char *psz )
-{
- unsigned char *in = (unsigned char *)psz, *out = in, c;
- if( psz == NULL )
- return;
-
- while( ( c = *in++ ) != '\0' )
- {
- switch( c )
- {
- case '%':
- {
- char val[5], *pval = val;
- unsigned long cp;
-
- switch( c = *in++ )
- {
- case '\0':
- return;
-
- case 'u':
- case 'U':
- if( ( *pval++ = *in++ ) == '\0' )
- return;
- if( ( *pval++ = *in++ ) == '\0' )
- return;
- c = *in++;
-
- default:
- *pval++ = c;
- if( ( *pval++ = *in++ ) == '\0' )
- return;
- *pval = '\0';
- }
-
- cp = strtoul( val, NULL, 0x10 );
- if( cp < 0x80 )
- *out++ = cp;
- else
- if( cp < 0x800 )
- {
- *out++ = (( cp >> 6) | 0xc0);
- *out++ = (( cp & 0x3f) | 0x80);
- }
- else
- {
- assert( cp < 0x10000 );
- *out++ = (( cp >> 12) | 0xe0);
- *out++ = (((cp >> 6) & 0x3f) | 0x80);
- *out++ = (( cp & 0x3f) | 0x80);
- }
- break;
- }
-
- /* + is not a special case - it means plus, not space. */
-
- default:
- /* Inserting non-ASCII or non-printable characters is unsafe,
- * and no sane browser will send these unencoded */
- if( ( c < 32 ) || ( c > 127 ) )
- *out++ = '?';
- else
- *out++ = c;
- }
- }
- *out = '\0';
-}
-
-/**
- * Decode encoded URI component. See also decode_URI().
- * \return decoded duplicated string
- */
-char *decode_URI_duplicate( const char *psz )
-{
- char *psz_dup = strdup( psz );
- decode_URI( psz_dup );
- return psz_dup;
-}
-
-/**
- * Decode an encoded URI component in place.
- * <b>This function does NOT decode entire URIs.</b>
- * It decodes components (e.g. host name, directory, file name).
- * Decoded URIs do not exist in the real world (see RFC3986 §2.4).
- * Complete URIs are always "encoded" (or they are syntaxically invalid).
- *
- * Note that URI encoding is different from Javascript escaping. Especially,
- * white spaces and Unicode non-ASCII code points are encoded differently.
- *
- * \return psz on success, NULL if it was not properly encoded
- */
-char *decode_URI( char *psz )
-{
- unsigned char *in = (unsigned char *)psz, *out = in, c;
-
- if( psz == NULL )
- return NULL;
-
- while( ( c = *in++ ) != '\0' )
- {
- switch( c )
- {
- case '%':
- {
- char hex[3];
-
- if( ( ( hex[0] = *in++ ) == 0 )
- || ( ( hex[1] = *in++ ) == 0 ) )
- return NULL;
-
- hex[2] = '\0';
- *out++ = (unsigned char)strtoul( hex, NULL, 0x10 );
- break;
- }
-
- case '+': /* This is HTTP forms, not URI decoding... */
- *out++ = ' ';
- break;
-
- default:
- /* Inserting non-ASCII or non-printable characters is unsafe,
- * and no sane browser will send these unencoded */
- if( ( c < 32 ) || ( c > 127 ) )
- *out++ = '?';
- else
- *out++ = c;
- }
- }
- *out = '\0';
- EnsureUTF8( psz );
- return psz;
-}
-
-static inline bool isurisafe( int c )
-{
- /* These are the _unreserved_ URI characters (RFC3986 §2.3) */
- return ( (unsigned char)( c - 'a' ) < 26 )
- || ( (unsigned char)( c - 'A' ) < 26 )
- || ( (unsigned char)( c - '0' ) < 10 )
- || ( strchr( "-._~", c ) != NULL );
-}
-
-static char *encode_URI_bytes (const char *psz_uri, size_t len)
-{
- char *psz_enc = malloc (3 * len + 1), *out = psz_enc;
- if (psz_enc == NULL)
- return NULL;
-
- for (size_t i = 0; i < len; i++)
- {
- static const char hex[16] = "0123456789ABCDEF";
- uint8_t c = *psz_uri;
-
- if( isurisafe( c ) )
- *out++ = c;
- /* This is URI encoding, not HTTP forms:
- * Space is encoded as '%20', not '+'. */
- else
- {
- *out++ = '%';
- *out++ = hex[c >> 4];
- *out++ = hex[c & 0xf];
- }
- psz_uri++;
- }
- *out++ = '\0';
-
- out = realloc (psz_enc, out - psz_enc);
- return out ? out : psz_enc; /* realloc() can fail (safe) */
-}
-
-/**
- * Encodes an URI component (RFC3986 §2).
- *
- * @param psz_uri nul-terminated UTF-8 representation of the component.
- * Obviously, you can't pass an URI containing a nul character, but you don't
- * want to do that, do you?
- *
- * @return encoded string (must be free()'d), or NULL for ENOMEM.
- */
-char *encode_URI_component( const char *psz_uri )
-{
- return encode_URI_bytes (psz_uri, strlen (psz_uri));
-}
-