1 /*****************************************************************************
2 * strings.c: String related functions
3 *****************************************************************************
4 * Copyright (C) 2006 the VideoLAN team
7 * Authors: Antoine Cellerier <dionoea at videolan dot org>
8 * Daniel Stranger <vlc at schmaller dot de>
9 * RĂ©mi Denis-Courmont <rem # videolan org>
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
24 *****************************************************************************/
26 /*****************************************************************************
28 *****************************************************************************/
35 #include "vlc_strings.h"
40 * Unescape URI encoded string
41 * \return decoded duplicated string
43 char *unescape_URI_duplicate( const char *psz )
45 char *psz_dup = strdup( psz );
46 unescape_URI( psz_dup );
51 * Unescape URI encoded string in place
54 void unescape_URI( char *psz )
56 unsigned char *in = (unsigned char *)psz, *out = in, c;
58 while( ( c = *in++ ) != '\0' )
64 char val[5], *pval = val;
74 if( ( *pval++ = *in++ ) == '\0' )
76 if( ( *pval++ = *in++ ) == '\0' )
82 if( ( *pval++ = *in++ ) == '\0' )
87 cp = strtoul( val, NULL, 0x10 );
93 *out++ = (( cp >> 6) | 0xc0);
94 *out++ = (( cp & 0x3f) | 0x80);
98 assert( cp < 0x10000 );
99 *out++ = (( cp >> 12) | 0xe0);
100 *out++ = (((cp >> 6) & 0x3f) | 0x80);
101 *out++ = (( cp & 0x3f) | 0x80);
106 /* + is not a special case - it means plus, not space. */
109 /* Inserting non-ASCII or non-printable characters is unsafe,
110 * and no sane browser will send these unencoded */
111 if( ( c < 32 ) || ( c > 127 ) )
121 * Decode encoded URI string
122 * \return decoded duplicated string
124 char *decode_URI_duplicate( const char *psz )
126 char *psz_dup = strdup( psz );
127 decode_URI( psz_dup );
132 * Decode encoded URI string in place
135 void decode_URI( char *psz )
137 unsigned char *in = (unsigned char *)psz, *out = in, c;
139 while( ( c = *in++ ) != '\0' )
147 if( ( ( hex[0] = *in++ ) == 0 )
148 || ( ( hex[1] = *in++ ) == 0 ) )
152 *out++ = (unsigned char)strtoul( hex, NULL, 0x10 );
161 /* Inserting non-ASCII or non-printable characters is unsafe,
162 * and no sane browser will send these unencoded */
163 if( ( c < 32 ) || ( c > 127 ) )
173 static inline int isurlsafe( int c )
175 return ( (unsigned char)( c - 'a' ) < 26 )
176 || ( (unsigned char)( c - 'A' ) < 26 )
177 || ( (unsigned char)( c - '0' ) < 10 )
178 /* Hmm, we should not encode character that are allowed in URLs
179 * (even if they are not URL-safe), nor URL-safe characters.
180 * We still encode some of them because of Microsoft's crap browser.
182 || ( strchr( "-_.", c ) != NULL );
185 static inline char url_hexchar( int c )
187 return ( c < 10 ) ? c + '0' : c + 'A' - 10;
191 * encode_URI_component
192 * Encodes an URI component.
194 * @param psz_url nul-terminated UTF-8 representation of the component.
195 * Obviously, you can't pass an URI containing a nul character, but you don't
196 * want to do that, do you?
198 * @return encoded string (must be free()'d)
200 char *encode_URI_component( const char *psz_url )
202 char psz_enc[3 * strlen( psz_url ) + 1], *out = psz_enc;
205 for( in = (const uint8_t *)psz_url; *in; in++ )
217 *out++ = url_hexchar( c >> 4 );
218 *out++ = url_hexchar( c & 0xf );
223 return strdup( psz_enc );
227 * Converts "<", ">" and "&" to "<", ">" and "&"
228 * \param string to convert
230 void resolve_xml_special_chars( char *psz_value )
232 char *p_pos = psz_value;
236 if( !strncmp( psz_value, "<", 4 ) )
241 else if( !strncmp( psz_value, ">", 4 ) )
246 else if( !strncmp( psz_value, "&", 5 ) )
251 else if( !strncmp( psz_value, """, 6 ) )
256 else if( !strncmp( psz_value, "'", 6 ) )
274 * Converts '<', '>', '\"', '\'' and '&' to their html entities
275 * \param psz_content simple element content that is to be converted
277 char *convert_xml_special_chars( const char *psz_content )
279 char *psz_temp = malloc( 6 * strlen( psz_content ) + 1 );
280 const char *p_from = psz_content;
281 char *p_to = psz_temp;
285 if ( *p_from == '<' )
287 strcpy( p_to, "<" );
290 else if ( *p_from == '>' )
292 strcpy( p_to, ">" );
295 else if ( *p_from == '&' )
297 strcpy( p_to, "&" );
300 else if( *p_from == '\"' )
302 strcpy( p_to, """ );
305 else if( *p_from == '\'' )
307 strcpy( p_to, "'" );