1 /*****************************************************************************
2 * strings.c: String related functions
3 *****************************************************************************
4 * Copyright (C) 2006 the VideoLAN team
7 * Authors: Antoine Cellerier <dionoea at videolan dot org>
8 * Daniel Stranger <vlc at schmaller dot de>
9 * RĂ©mi Denis-Courmont <rem # videolan org>
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
24 *****************************************************************************/
26 /*****************************************************************************
28 *****************************************************************************/
35 #include "vlc_strings.h"
40 * Unescape URI encoded string
41 * \return decoded duplicated string
43 char *unescape_URI_duplicate( const char *psz )
45 char *psz_dup = strdup( psz );
46 unescape_URI( psz_dup );
51 * Unescape URI encoded string in place
54 void unescape_URI( char *psz )
56 unsigned char *in = (unsigned char *)psz, *out = in, c;
58 while( ( c = *in++ ) != '\0' )
64 char val[5], *pval = val;
74 if( ( *pval++ = *in++ ) == '\0' )
76 if( ( *pval++ = *in++ ) == '\0' )
82 if( ( *pval++ = *in++ ) == '\0' )
87 cp = strtoul( val, NULL, 0x10 );
93 *out++ = (( cp >> 6) | 0xc0);
94 *out++ = (( cp & 0x3f) | 0x80);
98 assert( cp < 0x10000 );
99 *out++ = (( cp >> 12) | 0xe0);
100 *out++ = (((cp >> 6) & 0x3f) | 0x80);
101 *out++ = (( cp & 0x3f) | 0x80);
106 /* + is not a special case - it means plus, not space. */
109 /* Inserting non-ASCII or non-printable characters is unsafe,
110 * and no sane browser will send these unencoded */
111 if( ( c < 32 ) || ( c > 127 ) )
121 * Decode encoded URI string
122 * \return decoded duplicated string
124 char *decode_URI_duplicate( const char *psz )
126 char *psz_dup = strdup( psz );
127 unescape_URI( psz_dup );
132 * Decode encoded URI string in place
135 void decode_URI( char *psz )
137 unsigned char *in = (unsigned char *)psz, *out = in, c;
139 while( ( c = *in++ ) != '\0' )
147 if( ( ( hex[0] = *in++ ) == 0 )
148 || ( ( hex[1] = *in++ ) == 0 ) )
152 *out++ = (unsigned char)strtoul( hex, NULL, 0x10 );
160 /* Inserting non-ASCII or non-printable characters is unsafe,
161 * and no sane browser will send these unencoded */
162 if( ( c < 32 ) || ( c > 127 ) )
172 static inline int isurlsafe( int c )
174 return ( (unsigned char)( c - 'a' ) < 26 )
175 || ( (unsigned char)( c - 'A' ) < 26 )
176 || ( (unsigned char)( c - '0' ) < 10 )
177 /* Hmm, we should not encode character that are allowed in URLs
178 * (even if they are not URL-safe), nor URL-safe characters.
179 * We still encode some of them because of Microsoft's crap browser.
181 || ( strchr( "-_.", c ) != NULL );
184 static inline char url_hexchar( int c )
186 return ( c < 10 ) ? c + '0' : c + 'A' - 10;
190 * encode_URI_component
191 * Encodes an URI component.
193 * @param psz_url nul-terminated UTF-8 representation of the component.
194 * Obviously, you can't pass an URI containing a nul character, but you don't
195 * want to do that, do you?
197 * @return encoded string (must be free()'d)
199 char *encode_URI_component( const char *psz_url )
201 char psz_enc[3 * strlen( psz_url ) + 1], *out = psz_enc;
204 for( in = (const uint8_t *)psz_url; *in; in++ )
216 *out++ = url_hexchar( c >> 4 );
217 *out++ = url_hexchar( c & 0xf );
222 return strdup( psz_enc );
226 * Converts "<", ">" and "&" to "<", ">" and "&"
227 * \param string to convert
229 void resolve_xml_special_chars( char *psz_value )
231 char *p_pos = psz_value;
235 if( !strncmp( psz_value, "<", 4 ) )
240 else if( !strncmp( psz_value, ">", 4 ) )
245 else if( !strncmp( psz_value, "&", 5 ) )
250 else if( !strncmp( psz_value, """, 6 ) )
255 else if( !strncmp( psz_value, "'", 6 ) )
273 * Converts '<', '>', '\"', '\'' and '&' to their html entities
274 * \param psz_content simple element content that is to be converted
276 char *convert_xml_special_chars( const char *psz_content )
278 char *psz_temp = malloc( 6 * strlen( psz_content ) + 1 );
279 const char *p_from = psz_content;
280 char *p_to = psz_temp;
284 if ( *p_from == '<' )
286 strcpy( p_to, "<" );
289 else if ( *p_from == '>' )
291 strcpy( p_to, ">" );
294 else if ( *p_from == '&' )
296 strcpy( p_to, "&" );
299 else if( *p_from == '\"' )
301 strcpy( p_to, """ );
304 else if( *p_from == '\'' )
306 strcpy( p_to, "'" );