1 /*****************************************************************************
2 * strings.c: String related functions
3 *****************************************************************************
4 * Copyright (C) 2006 the VideoLAN team
7 * Authors: Antoine Cellerier <dionoea at videolan dot org>
8 * Daniel Stranger <vlc at schmaller dot de>
9 * Rémi Denis-Courmont <rem # videolan org>
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
24 *****************************************************************************/
26 /*****************************************************************************
28 *****************************************************************************/
33 #include <vlc_common.h>
36 /* Needed by str_format_time */
39 /* Needed by str_format_meta */
40 #include <vlc_input.h>
42 #include <vlc_playlist.h>
45 #include <vlc_strings.h>
47 #include <vlc_charset.h>
50 * Unescape URI encoded string
51 * \return decoded duplicated string
53 char *unescape_URI_duplicate( const char *psz )
55 char *psz_dup = strdup( psz );
56 unescape_URI( psz_dup );
61 * Unescape URI encoded string in place
64 void unescape_URI( char *psz )
66 unsigned char *in = (unsigned char *)psz, *out = in, c;
70 while( ( c = *in++ ) != '\0' )
76 char val[5], *pval = val;
86 if( ( *pval++ = *in++ ) == '\0' )
88 if( ( *pval++ = *in++ ) == '\0' )
94 if( ( *pval++ = *in++ ) == '\0' )
99 cp = strtoul( val, NULL, 0x10 );
105 *out++ = (( cp >> 6) | 0xc0);
106 *out++ = (( cp & 0x3f) | 0x80);
110 assert( cp < 0x10000 );
111 *out++ = (( cp >> 12) | 0xe0);
112 *out++ = (((cp >> 6) & 0x3f) | 0x80);
113 *out++ = (( cp & 0x3f) | 0x80);
118 /* + is not a special case - it means plus, not space. */
121 /* Inserting non-ASCII or non-printable characters is unsafe,
122 * and no sane browser will send these unencoded */
123 if( ( c < 32 ) || ( c > 127 ) )
133 * Decode encoded URI string
134 * \return decoded duplicated string
136 char *decode_URI_duplicate( const char *psz )
138 char *psz_dup = strdup( psz );
139 decode_URI( psz_dup );
144 * Decode encoded URI string in place
147 void decode_URI( char *psz )
149 unsigned char *in = (unsigned char *)psz, *out = in, c;
153 while( ( c = *in++ ) != '\0' )
161 if( ( ( hex[0] = *in++ ) == 0 )
162 || ( ( hex[1] = *in++ ) == 0 ) )
166 *out++ = (unsigned char)strtoul( hex, NULL, 0x10 );
175 /* Inserting non-ASCII or non-printable characters is unsafe,
176 * and no sane browser will send these unencoded */
177 if( ( c < 32 ) || ( c > 127 ) )
187 static inline int isurlsafe( int c )
189 return ( (unsigned char)( c - 'a' ) < 26 )
190 || ( (unsigned char)( c - 'A' ) < 26 )
191 || ( (unsigned char)( c - '0' ) < 10 )
192 /* Hmm, we should not encode character that are allowed in URLs
193 * (even if they are not URL-safe), nor URL-safe characters.
194 * We still encode some of them because of Microsoft's crap browser.
196 || ( strchr( "-_.", c ) != NULL );
199 static inline char url_hexchar( int c )
201 return ( c < 10 ) ? c + '0' : c + 'A' - 10;
205 * encode_URI_component
206 * Encodes an URI component.
208 * @param psz_url nul-terminated UTF-8 representation of the component.
209 * Obviously, you can't pass an URI containing a nul character, but you don't
210 * want to do that, do you?
212 * @return encoded string (must be free()'d)
214 char *encode_URI_component( const char *psz_url )
216 char psz_enc[3 * strlen( psz_url ) + 1], *out = psz_enc;
219 for( in = (const uint8_t *)psz_url; *in; in++ )
231 *out++ = url_hexchar( c >> 4 );
232 *out++ = url_hexchar( c & 0xf );
237 return strdup( psz_enc );
240 static struct xml_entity_s
242 const char *psz_entity;
244 const char *psz_char;
245 } p_xml_entities[] = {
246 /* Important: this list has to be in alphabetical order (psz_entity-wise) */
247 { "Æ", 7, "Æ" },
248 { "Á", 8, "Á" },
249 { "Â", 7, "Â" },
250 { "À", 8, "À" },
251 { "Å", 7, "Å" },
252 { "Ã", 8, "Ã" },
253 { "Ä", 6, "Ä" },
254 { "Ç", 8, "Ç" },
255 { "‡", 8, "‡" },
257 { "É", 8, "É" },
258 { "Ê", 7, "Ê" },
259 { "È", 8, "È" },
260 { "Ë", 6, "Ë" },
261 { "Í", 8, "Í" },
262 { "Î", 7, "Î" },
263 { "Ì", 8, "Ì" },
264 { "Ï", 6, "Ï" },
265 { "Ñ", 8, "Ñ" },
266 { "Œ", 7, "Œ" },
267 { "Ó", 8, "Ó" },
268 { "Ô", 7, "Ô" },
269 { "Ò", 8, "Ò" },
270 { "Ø", 8, "Ø" },
271 { "Õ", 8, "Õ" },
272 { "Ö", 6, "Ö" },
273 { "Š", 8, "Š" },
274 { "Þ", 7, "Þ" },
275 { "Ú", 8, "Ú" },
276 { "Û", 7, "Û" },
277 { "Ù", 8, "Ù" },
278 { "Ü", 6, "Ü" },
279 { "Ý", 8, "Ý" },
280 { "Ÿ", 6, "Ÿ" },
281 { "á", 8, "á" },
282 { "â", 7, "â" },
283 { "´", 7, "´" },
284 { "æ", 7, "æ" },
285 { "à", 8, "à" },
286 { "å", 7, "å" },
287 { "ã", 8, "ã" },
288 { "ä", 6, "ä" },
289 { "„", 7, "„" },
290 { "¦", 8, "¦" },
291 { "ç", 8, "ç" },
292 { "¸", 7, "¸" },
293 { "¢", 6, "¢" },
294 { "ˆ", 6, "ˆ" },
295 { "©", 6, "©" },
296 { "¤", 8, "¤" },
297 { "†", 8, "†" },
299 { "÷", 8, "÷" },
300 { "é", 8, "é" },
301 { "ê", 7, "ê" },
302 { "è", 8, "è" },
304 { "ë", 6, "ë" },
305 { "€", 6, "€" },
306 { "½", 8, "½" },
307 { "¼", 8, "¼" },
308 { "¾", 8, "¾" },
309 { "…", 8, "…" },
310 { "í", 8, "í" },
311 { "î", 7, "î" },
312 { "¡", 7, "¡" },
313 { "ì", 8, "ì" },
314 { "¿", 8, "¿" },
315 { "ï", 6, "ï" },
316 { "«", 7, "«" },
317 { "“", 7, "“" },
318 { "‹", 8, "‹" },
319 { "‘", 7, "‘" },
320 { "¯", 6, "¯" },
321 { "—", 7, "—" },
322 { "µ", 7, "µ" },
323 { "·", 8, "·" },
324 { "–", 7, "–" },
326 { "ñ", 8, "ñ" },
327 { "ó", 8, "ó" },
328 { "ô", 7, "ô" },
329 { "œ", 7, "œ" },
330 { "ò", 8, "ò" },
331 { "ª", 6, "ª" },
332 { "º", 6, "º" },
333 { "ø", 8, "ø" },
334 { "õ", 8, "õ" },
335 { "ö", 6, "ö" },
336 { "¶", 6, "¶" },
337 { "‰", 8, "‰" },
338 { "±", 8, "±" },
339 { "£", 7, "£" },
340 { "»", 7, "»" },
341 { "”", 7, "”" },
343 { "›", 8, "›" },
344 { "’", 7, "’" },
345 { "‚", 7, "‚" },
346 { "š", 8, "š" },
347 { "§", 6, "§" },
349 { "¹", 6, "¹" },
350 { "²", 6, "²" },
351 { "³", 6, "³" },
352 { "ß", 7, "ß" },
353 { "þ", 7, "þ" },
354 { "˜", 7, "˜" },
355 { "×", 7, "×" },
356 { "™", 7, "™" },
357 { "ú", 8, "ú" },
358 { "û", 7, "û" },
359 { "ù", 8, "ù" },
361 { "ü", 6, "ü" },
362 { "ý", 8, "ý" },
364 { "ÿ", 6, "ÿ" },
368 * Converts "<", ">" and "&" to "<", ">" and "&"
369 * \param string to convert
371 void resolve_xml_special_chars( char *psz_value )
373 char *p_pos = psz_value;
377 if( *psz_value == '&' )
379 #define TRY_CHAR( src, len, dst ) \
380 if( !strncmp( psz_value, src, len ) ) \
385 TRY_CHAR( "<", 4, '<' )
386 else TRY_CHAR( "&", 5, '&' )
387 else TRY_CHAR( "'", 6, '\'' )
388 else TRY_CHAR( ">", 4, '>' )
389 else TRY_CHAR( """, 6, '"' )
391 else if( psz_value[1] == '#' )
394 int i = strtol( psz_value+2, &psz_end, 10 );
395 if( *psz_end == ';' )
397 if( i >= 32 && i <= 126 )
400 psz_value = psz_end+1;
404 /* Unhandled code, FIXME */
411 /* Invalid entity number */
418 const size_t i_entities = sizeof( p_xml_entities ) /
419 sizeof( p_xml_entities[0] );
420 assert( i_entities < 128 );
421 size_t step = 128>>1;
427 if( i >= i_entities )
430 cmp = strncmp( psz_value, p_xml_entities[i].psz_entity,
431 p_xml_entities[i].i_length );
434 strncpy( p_pos, p_xml_entities[i].psz_char,
435 p_xml_entities[i].i_length );
436 p_pos += strlen( p_xml_entities[i].psz_char ) - 1;
437 psz_value += p_xml_entities[i].i_length;
465 * Converts '<', '>', '\"', '\'' and '&' to their html entities
466 * \param psz_content simple element content that is to be converted
468 char *convert_xml_special_chars( const char *psz_content )
470 char *psz_temp = malloc( 6 * strlen( psz_content ) + 1 );
471 const char *p_from = psz_content;
472 char *p_to = psz_temp;
476 if ( *p_from == '<' )
478 strcpy( p_to, "<" );
481 else if ( *p_from == '>' )
483 strcpy( p_to, ">" );
486 else if ( *p_from == '&' )
488 strcpy( p_to, "&" );
491 else if( *p_from == '\"' )
493 strcpy( p_to, """ );
496 else if( *p_from == '\'' )
498 strcpy( p_to, "'" );
513 /* Base64 encoding */
514 char *vlc_b64_encode_binary( const uint8_t *src, size_t i_src )
516 static const char b64[] =
517 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
519 char *ret = malloc( ( i_src + 4 ) * 4 / 3 );
527 /* pops (up to) 3 bytes of input, push 4 bytes */
532 *dst++ = b64[v >> 26];
538 *dst++ = b64[v >> 26];
543 v |= *src++ << 20; // 3/3
544 *dst++ = ( i_src >= 2 ) ? b64[v >> 26] : '='; // 3/4
548 *dst++ = ( i_src >= 3 ) ? b64[v >> 26] : '='; // 4/4
560 char *vlc_b64_encode( const char *src )
563 return vlc_b64_encode_binary( (const uint8_t*)src, strlen(src) );
565 return vlc_b64_encode_binary( (const uint8_t*)"", 0 );
568 /* Base64 decoding */
569 size_t vlc_b64_decode_binary_to_buffer( uint8_t *p_dst, size_t i_dst, const char *p_src )
571 static const int b64[256] = {
572 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 00-0F */
573 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 10-1F */
574 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, /* 20-2F */
575 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, /* 30-3F */
576 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, /* 40-4F */
577 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, /* 50-5F */
578 -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, /* 60-6F */
579 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, /* 70-7F */
580 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 80-8F */
581 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 90-9F */
582 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* A0-AF */
583 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* B0-BF */
584 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* C0-CF */
585 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* D0-DF */
586 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* E0-EF */
587 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 /* F0-FF */
589 uint8_t *p_start = p_dst;
590 uint8_t *p = (uint8_t *)p_src;
595 for( i_level = 0, i_last = 0; (size_t)( p_dst - p_start ) < i_dst && *p != '\0'; p++ )
597 const int c = b64[(unsigned int)*p];
607 *p_dst++ = ( i_last << 2 ) | ( ( c >> 4)&0x03 );
611 *p_dst++ = ( ( i_last << 4 )&0xf0 ) | ( ( c >> 2 )&0x0f );
615 *p_dst++ = ( ( i_last &0x03 ) << 6 ) | c;
621 return p_dst - p_start;
623 size_t vlc_b64_decode_binary( uint8_t **pp_dst, const char *psz_src )
625 const int i_src = strlen( psz_src );
628 *pp_dst = p_dst = malloc( i_src );
631 return vlc_b64_decode_binary_to_buffer( p_dst, i_src, psz_src );
633 char *vlc_b64_decode( const char *psz_src )
635 const int i_src = strlen( psz_src );
636 char *p_dst = malloc( i_src + 1 );
641 i_dst = vlc_b64_decode_binary_to_buffer( (uint8_t*)p_dst, i_src, psz_src );
647 /****************************************************************************
648 * String formating functions
649 ****************************************************************************/
650 char *str_format_time( const char *tformat )
656 /* Get the current time. */
657 curtime = time( NULL );
659 /* Convert it to local time representation. */
660 localtime_r( &curtime, &loctime );
661 strftime( buffer, 255, tformat, &loctime );
662 return strdup( buffer );
665 #define INSERT_STRING( string ) \
666 if( string != NULL ) \
668 int len = strlen( string ); \
669 dst = realloc( dst, i_size = i_size + len );\
670 memcpy( (dst+d), string, len ); \
674 else if( !b_empty_if_na ) \
680 /* same than INSERT_STRING, except that string won't be freed */
681 #define INSERT_STRING_NO_FREE( string ) \
683 int len = strlen( string ); \
684 dst = realloc( dst, i_size = i_size + len );\
685 memcpy( dst+d, string, len ); \
688 char *__str_format_meta( vlc_object_t *p_object, const char *string )
690 const char *s = string;
691 bool b_is_format = false;
692 bool b_empty_if_na = false;
694 int i_size = strlen( string ) + 1; /* +1 to store '\0' */
695 char *dst = strdup( string );
696 if( !dst ) return NULL;
699 playlist_t *p_playlist = pl_Hold( p_object );
700 input_thread_t *p_input = playlist_CurrentInput( p_playlist );
701 input_item_t *p_item = NULL;
702 pl_Release( p_object );
705 p_item = input_GetItem(p_input);
717 INSERT_STRING( input_item_GetArtist( p_item ) );
723 INSERT_STRING( input_item_GetAlbum( p_item ) );
729 INSERT_STRING( input_item_GetCopyright( p_item ) );
735 INSERT_STRING( input_item_GetDescription( p_item ) );
741 INSERT_STRING( input_item_GetEncodedBy( p_item ) );
745 if( p_item && p_item->p_stats )
747 snprintf( buf, 10, "%d",
748 p_item->p_stats->i_displayed_pictures );
752 sprintf( buf, b_empty_if_na ? "" : "-" );
754 INSERT_STRING_NO_FREE( buf );
759 INSERT_STRING( input_item_GetGenre( p_item ) );
765 INSERT_STRING( input_item_GetLanguage( p_item ) );
771 INSERT_STRING( input_item_GetTrackNum( p_item ) );
777 INSERT_STRING( input_item_GetNowPlaying( p_item ) );
783 INSERT_STRING( input_item_GetRating( p_item ) );
790 lang = var_GetNonEmptyString( p_input, "sub-language" );
792 lang = strdup( b_empty_if_na ? "" : "-" );
793 INSERT_STRING( lang );
799 INSERT_STRING( input_item_GetTitle( p_item ) );
805 INSERT_STRING( input_item_GetURL( p_item ) );
811 INSERT_STRING( input_item_GetDate( p_item ) );
817 snprintf( buf, 10, "%d",
818 var_GetInteger( p_input, "bit-rate" )/1000 );
822 sprintf( buf, b_empty_if_na ? "" : "-" );
824 INSERT_STRING_NO_FREE( buf );
829 snprintf( buf, 10, "%d",
830 var_GetInteger( p_input, "chapter" ) );
834 sprintf( buf, b_empty_if_na ? "" : "-" );
836 INSERT_STRING_NO_FREE( buf );
841 mtime_t i_duration = input_item_GetDuration( p_item );
842 sprintf( buf, "%02d:%02d:%02d",
843 (int)(i_duration/(3600000000)),
844 (int)((i_duration/(60000000))%60),
845 (int)((i_duration/1000000)%60) );
849 sprintf( buf, b_empty_if_na ? "" : "--:--:--" );
851 INSERT_STRING_NO_FREE( buf );
856 INSERT_STRING( input_item_GetURI( p_item ) );
862 snprintf( buf, 10, "%d",
863 var_GetInteger( p_input, "title" ) );
867 sprintf( buf, b_empty_if_na ? "" : "-" );
869 INSERT_STRING_NO_FREE( buf );
872 if( p_item && p_input )
874 mtime_t i_duration = input_item_GetDuration( p_item );
875 int64_t i_time = p_input->i_time;
876 sprintf( buf, "%02d:%02d:%02d",
877 (int)( ( i_duration - i_time ) / 3600000000 ),
878 (int)( ( ( i_duration - i_time ) / 60000000 ) % 60 ),
879 (int)( ( ( i_duration - i_time ) / 1000000 ) % 60 ) );
883 sprintf( buf, b_empty_if_na ? "" : "--:--:--" );
885 INSERT_STRING_NO_FREE( buf );
890 INSERT_STRING( input_item_GetName( p_item ) );
897 lang = var_GetNonEmptyString( p_input,
900 lang = strdup( b_empty_if_na ? "" : "-" );
901 INSERT_STRING( lang );
907 snprintf( buf, 10, "%2.1lf",
908 var_GetFloat( p_input, "position" ) * 100. );
912 sprintf( buf, b_empty_if_na ? "" : "--.-%%" );
914 INSERT_STRING_NO_FREE( buf );
919 int r = var_GetInteger( p_input, "rate" );
920 snprintf( buf, 10, "%d.%d", r/1000, r%1000 );
924 sprintf( buf, b_empty_if_na ? "" : "-" );
926 INSERT_STRING_NO_FREE( buf );
931 int r = var_GetInteger( p_input, "sample-rate" );
932 snprintf( buf, 10, "%d.%d", r/1000, (r/100)%10 );
936 sprintf( buf, b_empty_if_na ? "" : "-" );
938 INSERT_STRING_NO_FREE( buf );
943 sprintf( buf, "%02d:%02d:%02d",
944 (int)( p_input->i_time / ( 3600000000 ) ),
945 (int)( ( p_input->i_time / ( 60000000 ) ) % 60 ),
946 (int)( ( p_input->i_time / 1000000 ) % 60 ) );
950 sprintf( buf, b_empty_if_na ? "" : "--:--:--" );
952 INSERT_STRING_NO_FREE( buf );
957 INSERT_STRING( input_item_GetPublisher( p_item ) );
962 audio_volume_t volume;
963 aout_VolumeGet( p_object, &volume );
964 snprintf( buf, 10, "%d", volume );
965 INSERT_STRING_NO_FREE( buf );
974 b_empty_if_na = true;
988 b_empty_if_na = false;
1000 vlc_object_release( p_input );
1004 #undef INSERT_STRING
1005 #undef INSERT_STRING_NO_FREE
1008 * Apply str format time and str format meta
1010 char *__str_format( vlc_object_t *p_this, const char *psz_src )
1012 char *psz_buf1, *psz_buf2;
1013 psz_buf1 = str_format_time( psz_src );
1014 psz_buf2 = str_format_meta( p_this, psz_buf1 );
1020 * Remove forbidden characters from filenames (including slashes)
1022 void filename_sanitize( char *str )
1024 if( *str == '.' && (str[1] == '\0' || (str[1] == '.' && str[2] == '\0' ) ) )
1039 #if defined( __APPLE__ )
1041 #elif defined( WIN32 )
1058 * Remove forbidden characters from full paths (leaves slashes)
1060 void path_sanitize( char *str )
1064 * Uncomment the two blocks to prevent /../ or /./, i'm not sure that we
1067 char *prev = str - 1;
1070 /* check drive prefix if path is absolute */
1071 if( isalpha(*str) && (':' == *(str+1)) )
1076 #if defined( __APPLE__ )
1079 #elif defined( WIN32 )
1099 if( str - prev == 2 && prev[1] == '.' )
1103 else if( str - prev == 3 && prev[1] == '.' && prev[2] == '.' )