1 /*****************************************************************************
2 * strings.c: String related functions
3 *****************************************************************************
4 * Copyright (C) 2006 the VideoLAN team
7 * Authors: Antoine Cellerier <dionoea at videolan dot org>
8 * Daniel Stranger <vlc at schmaller dot de>
9 * Rémi Denis-Courmont <rem # videolan org>
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
24 *****************************************************************************/
26 /*****************************************************************************
28 *****************************************************************************/
33 #include <vlc_common.h>
36 /* Needed by str_format_time */
39 /* Needed by str_format_meta */
40 #include <vlc_input.h>
42 #include <vlc_playlist.h>
45 #include <vlc_strings.h>
47 #include <vlc_charset.h>
50 * Unescape URI encoded string
51 * \return decoded duplicated string
53 char *unescape_URI_duplicate( const char *psz )
55 char *psz_dup = strdup( psz );
56 unescape_URI( psz_dup );
61 * Unescape URI encoded string in place
64 void unescape_URI( char *psz )
66 unsigned char *in = (unsigned char *)psz, *out = in, c;
70 while( ( c = *in++ ) != '\0' )
76 char val[5], *pval = val;
86 if( ( *pval++ = *in++ ) == '\0' )
88 if( ( *pval++ = *in++ ) == '\0' )
94 if( ( *pval++ = *in++ ) == '\0' )
99 cp = strtoul( val, NULL, 0x10 );
105 *out++ = (( cp >> 6) | 0xc0);
106 *out++ = (( cp & 0x3f) | 0x80);
110 assert( cp < 0x10000 );
111 *out++ = (( cp >> 12) | 0xe0);
112 *out++ = (((cp >> 6) & 0x3f) | 0x80);
113 *out++ = (( cp & 0x3f) | 0x80);
118 /* + is not a special case - it means plus, not space. */
121 /* Inserting non-ASCII or non-printable characters is unsafe,
122 * and no sane browser will send these unencoded */
123 if( ( c < 32 ) || ( c > 127 ) )
133 * Decode encoded URI string
134 * \return decoded duplicated string
136 char *decode_URI_duplicate( const char *psz )
138 char *psz_dup = strdup( psz );
139 decode_URI( psz_dup );
144 * Decode encoded URI string in place
147 void decode_URI( char *psz )
149 unsigned char *in = (unsigned char *)psz, *out = in, c;
153 while( ( c = *in++ ) != '\0' )
161 if( ( ( hex[0] = *in++ ) == 0 )
162 || ( ( hex[1] = *in++ ) == 0 ) )
166 *out++ = (unsigned char)strtoul( hex, NULL, 0x10 );
175 /* Inserting non-ASCII or non-printable characters is unsafe,
176 * and no sane browser will send these unencoded */
177 if( ( c < 32 ) || ( c > 127 ) )
187 static inline bool isurisafe( int c )
189 /* These are the _unreserved_ URI characters (RFC3986 §2.3) */
190 return ( (unsigned char)( c - 'a' ) < 26 )
191 || ( (unsigned char)( c - 'A' ) < 26 )
192 || ( (unsigned char)( c - '0' ) < 10 )
193 || ( strchr( "-._~", c ) != NULL );
197 * Encodes an URI component (RFC3986 §2).
199 * @param psz_uri nul-terminated UTF-8 representation of the component.
200 * Obviously, you can't pass an URI containing a nul character, but you don't
201 * want to do that, do you?
203 * @return encoded string (must be free()'d), or NULL for ENOMEM.
205 char *encode_URI_component( const char *psz_uri )
207 char *psz_enc = malloc ((3 * strlen (psz_uri)) + 1), *out = psz_enc;
214 static const char hex[16] = "0123456789ABCDEF";
215 uint8_t c = *psz_uri;
219 /* This is URI encoding, not HTTP forms:
220 * Space is encoded as '%20', not '+'. */
224 *out++ = hex[c >> 4];
225 *out++ = hex[c & 0xf];
231 out = realloc (psz_enc, out - psz_enc);
232 return out ? out : psz_enc; /* realloc() can fail (safe) */
235 static const struct xml_entity_s
240 } p_xml_entities[] = {
241 /* Important: this list has to be in alphabetical order (psz_entity-wise) */
242 { "AElig;", 6, "Æ" },
243 { "Aacute;", 7, "Á" },
244 { "Acirc;", 6, "Â" },
245 { "Agrave;", 7, "À" },
246 { "Aring;", 6, "Å" },
247 { "Atilde;", 7, "Ã" },
249 { "Ccedil;", 7, "Ç" },
250 { "Dagger;", 7, "‡" },
252 { "Eacute;", 7, "É" },
253 { "Ecirc;", 6, "Ê" },
254 { "Egrave;", 7, "È" },
256 { "Iacute;", 7, "Í" },
257 { "Icirc;", 6, "Î" },
258 { "Igrave;", 7, "Ì" },
260 { "Ntilde;", 7, "Ñ" },
261 { "OElig;", 6, "Œ" },
262 { "Oacute;", 7, "Ó" },
263 { "Ocirc;", 6, "Ô" },
264 { "Ograve;", 7, "Ò" },
265 { "Oslash;", 7, "Ø" },
266 { "Otilde;", 7, "Õ" },
268 { "Scaron;", 7, "Š" },
269 { "THORN;", 6, "Þ" },
270 { "Uacute;", 7, "Ú" },
271 { "Ucirc;", 6, "Û" },
272 { "Ugrave;", 7, "Ù" },
274 { "Yacute;", 7, "Ý" },
276 { "aacute;", 7, "á" },
277 { "acirc;", 6, "â" },
278 { "acute;", 6, "´" },
279 { "aelig;", 6, "æ" },
280 { "agrave;", 7, "à" },
281 { "aring;", 6, "å" },
282 { "atilde;", 7, "ã" },
284 { "bdquo;", 6, "„" },
285 { "brvbar;", 7, "¦" },
286 { "ccedil;", 7, "ç" },
287 { "cedil;", 6, "¸" },
291 { "curren;", 7, "¤" },
292 { "dagger;", 7, "†" },
294 { "divide;", 7, "÷" },
295 { "eacute;", 7, "é" },
296 { "ecirc;", 6, "ê" },
297 { "egrave;", 7, "è" },
301 { "frac12;", 7, "½" },
302 { "frac14;", 7, "¼" },
303 { "frac34;", 7, "¾" },
304 { "hellip;", 7, "…" },
305 { "iacute;", 7, "í" },
306 { "icirc;", 6, "î" },
307 { "iexcl;", 6, "¡" },
308 { "igrave;", 7, "ì" },
309 { "iquest;", 7, "¿" },
311 { "laquo;", 6, "«" },
312 { "ldquo;", 6, "“" },
313 { "lsaquo;", 7, "‹" },
314 { "lsquo;", 6, "‘" },
316 { "mdash;", 6, "—" },
317 { "micro;", 6, "µ" },
318 { "middot;", 7, "·" },
319 { "ndash;", 6, "–" },
321 { "ntilde;", 7, "ñ" },
322 { "oacute;", 7, "ó" },
323 { "ocirc;", 6, "ô" },
324 { "oelig;", 6, "œ" },
325 { "ograve;", 7, "ò" },
328 { "oslash;", 7, "ø" },
329 { "otilde;", 7, "õ" },
332 { "permil;", 7, "‰" },
333 { "plusmn;", 7, "±" },
334 { "pound;", 6, "£" },
335 { "raquo;", 6, "»" },
336 { "rdquo;", 6, "”" },
338 { "rsaquo;", 7, "›" },
339 { "rsquo;", 6, "’" },
340 { "sbquo;", 6, "‚" },
341 { "scaron;", 7, "š" },
347 { "szlig;", 6, "ß" },
348 { "thorn;", 6, "þ" },
349 { "tilde;", 6, "˜" },
350 { "times;", 6, "×" },
351 { "trade;", 6, "™" },
352 { "uacute;", 7, "ú" },
353 { "ucirc;", 6, "û" },
354 { "ugrave;", 7, "ù" },
357 { "yacute;", 7, "ý" },
363 * Converts "<", ">" and "&" to "<", ">" and "&"
364 * \param string to convert
366 void resolve_xml_special_chars( char *psz_value )
368 char *p_pos = psz_value;
372 if( *psz_value == '&' )
374 char *psz_value1 = psz_value + 1;
375 #define TRY_CHAR( src, len, dst ) \
376 if( !strncmp( psz_value1, src, len ) ) \
379 psz_value += len + 1; \
381 TRY_CHAR( "lt;", 3, '<' )
382 else TRY_CHAR( "amp;", 4, '&' )
383 else TRY_CHAR( "apos;", 5, '\'' )
384 else TRY_CHAR( "gt;", 3, '>' )
385 else TRY_CHAR( "quot;", 5, '"' )
387 else if( *psz_value1 == '#' )
390 int i = strtol( psz_value+2, &psz_end, 10 );
391 if( *psz_end == ';' )
393 if( i >= 32 && i <= 126 )
396 psz_value = psz_end+1;
400 /* Unhandled code, FIXME */
407 /* Invalid entity number */
414 const size_t i_entities = sizeof( p_xml_entities ) /
415 sizeof( p_xml_entities[0] );
416 assert( i_entities < 128 );
417 size_t step = 128>>1;
423 if( i >= i_entities )
426 cmp = strncmp( psz_value1, /* Skip the & */
427 p_xml_entities[i].psz_entity,
428 p_xml_entities[i].i_length );
431 size_t i_len = strlen( p_xml_entities[i].psz_char );
432 strncpy( p_pos, p_xml_entities[i].psz_char, i_len );
434 psz_value += p_xml_entities[i].i_length+1;
462 * Converts '<', '>', '\"', '\'' and '&' to their html entities
463 * \param psz_content simple element content that is to be converted
465 char *convert_xml_special_chars( const char *psz_content )
467 char *psz_temp = malloc( 6 * strlen( psz_content ) + 1 );
468 const char *p_from = psz_content;
469 char *p_to = psz_temp;
473 if ( *p_from == '<' )
475 strcpy( p_to, "<" );
478 else if ( *p_from == '>' )
480 strcpy( p_to, ">" );
483 else if ( *p_from == '&' )
485 strcpy( p_to, "&" );
488 else if( *p_from == '\"' )
490 strcpy( p_to, """ );
493 else if( *p_from == '\'' )
495 strcpy( p_to, "'" );
510 /* Base64 encoding */
511 char *vlc_b64_encode_binary( const uint8_t *src, size_t i_src )
513 static const char b64[] =
514 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
516 char *ret = malloc( ( i_src + 4 ) * 4 / 3 );
524 /* pops (up to) 3 bytes of input, push 4 bytes */
529 *dst++ = b64[v >> 26];
535 *dst++ = b64[v >> 26];
540 v |= *src++ << 20; // 3/3
541 *dst++ = ( i_src >= 2 ) ? b64[v >> 26] : '='; // 3/4
545 *dst++ = ( i_src >= 3 ) ? b64[v >> 26] : '='; // 4/4
557 char *vlc_b64_encode( const char *src )
560 return vlc_b64_encode_binary( (const uint8_t*)src, strlen(src) );
562 return vlc_b64_encode_binary( (const uint8_t*)"", 0 );
565 /* Base64 decoding */
566 size_t vlc_b64_decode_binary_to_buffer( uint8_t *p_dst, size_t i_dst, const char *p_src )
568 static const int b64[256] = {
569 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 00-0F */
570 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 10-1F */
571 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, /* 20-2F */
572 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, /* 30-3F */
573 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, /* 40-4F */
574 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, /* 50-5F */
575 -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, /* 60-6F */
576 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, /* 70-7F */
577 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 80-8F */
578 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 90-9F */
579 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* A0-AF */
580 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* B0-BF */
581 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* C0-CF */
582 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* D0-DF */
583 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* E0-EF */
584 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 /* F0-FF */
586 uint8_t *p_start = p_dst;
587 uint8_t *p = (uint8_t *)p_src;
592 for( i_level = 0, i_last = 0; (size_t)( p_dst - p_start ) < i_dst && *p != '\0'; p++ )
594 const int c = b64[(unsigned int)*p];
604 *p_dst++ = ( i_last << 2 ) | ( ( c >> 4)&0x03 );
608 *p_dst++ = ( ( i_last << 4 )&0xf0 ) | ( ( c >> 2 )&0x0f );
612 *p_dst++ = ( ( i_last &0x03 ) << 6 ) | c;
618 return p_dst - p_start;
620 size_t vlc_b64_decode_binary( uint8_t **pp_dst, const char *psz_src )
622 const int i_src = strlen( psz_src );
625 *pp_dst = p_dst = malloc( i_src );
628 return vlc_b64_decode_binary_to_buffer( p_dst, i_src, psz_src );
630 char *vlc_b64_decode( const char *psz_src )
632 const int i_src = strlen( psz_src );
633 char *p_dst = malloc( i_src + 1 );
638 i_dst = vlc_b64_decode_binary_to_buffer( (uint8_t*)p_dst, i_src, psz_src );
644 /****************************************************************************
645 * String formating functions
646 ****************************************************************************/
647 char *str_format_time( const char *tformat )
653 /* Get the current time. */
654 curtime = time( NULL );
656 /* Convert it to local time representation. */
657 localtime_r( &curtime, &loctime );
658 strftime( buffer, 255, tformat, &loctime );
659 return strdup( buffer );
662 #define INSERT_STRING( string ) \
663 if( string != NULL ) \
665 int len = strlen( string ); \
666 dst = realloc( dst, i_size = i_size + len );\
667 memcpy( (dst+d), string, len ); \
671 else if( !b_empty_if_na ) \
677 /* same than INSERT_STRING, except that string won't be freed */
678 #define INSERT_STRING_NO_FREE( string ) \
680 int len = strlen( string ); \
681 dst = realloc( dst, i_size = i_size + len );\
682 memcpy( dst+d, string, len ); \
685 char *__str_format_meta( vlc_object_t *p_object, const char *string )
687 const char *s = string;
688 bool b_is_format = false;
689 bool b_empty_if_na = false;
691 int i_size = strlen( string ) + 1; /* +1 to store '\0' */
692 char *dst = strdup( string );
693 if( !dst ) return NULL;
696 playlist_t *p_playlist = pl_Hold( p_object );
697 input_thread_t *p_input = playlist_CurrentInput( p_playlist );
698 input_item_t *p_item = NULL;
699 pl_Release( p_object );
702 p_item = input_GetItem(p_input);
714 INSERT_STRING( input_item_GetArtist( p_item ) );
720 INSERT_STRING( input_item_GetAlbum( p_item ) );
726 INSERT_STRING( input_item_GetCopyright( p_item ) );
732 INSERT_STRING( input_item_GetDescription( p_item ) );
738 INSERT_STRING( input_item_GetEncodedBy( p_item ) );
742 if( p_item && p_item->p_stats )
744 snprintf( buf, 10, "%d",
745 p_item->p_stats->i_displayed_pictures );
749 sprintf( buf, b_empty_if_na ? "" : "-" );
751 INSERT_STRING_NO_FREE( buf );
756 INSERT_STRING( input_item_GetGenre( p_item ) );
762 INSERT_STRING( input_item_GetLanguage( p_item ) );
768 INSERT_STRING( input_item_GetTrackNum( p_item ) );
774 INSERT_STRING( input_item_GetNowPlaying( p_item ) );
780 INSERT_STRING( input_item_GetRating( p_item ) );
787 lang = var_GetNonEmptyString( p_input, "sub-language" );
789 lang = strdup( b_empty_if_na ? "" : "-" );
790 INSERT_STRING( lang );
796 INSERT_STRING( input_item_GetTitle( p_item ) );
802 INSERT_STRING( input_item_GetURL( p_item ) );
808 INSERT_STRING( input_item_GetDate( p_item ) );
814 snprintf( buf, 10, "%d",
815 var_GetInteger( p_input, "bit-rate" )/1000 );
819 sprintf( buf, b_empty_if_na ? "" : "-" );
821 INSERT_STRING_NO_FREE( buf );
826 snprintf( buf, 10, "%d",
827 var_GetInteger( p_input, "chapter" ) );
831 sprintf( buf, b_empty_if_na ? "" : "-" );
833 INSERT_STRING_NO_FREE( buf );
838 mtime_t i_duration = input_item_GetDuration( p_item );
839 sprintf( buf, "%02d:%02d:%02d",
840 (int)(i_duration/(3600000000)),
841 (int)((i_duration/(60000000))%60),
842 (int)((i_duration/1000000)%60) );
846 sprintf( buf, b_empty_if_na ? "" : "--:--:--" );
848 INSERT_STRING_NO_FREE( buf );
853 INSERT_STRING( input_item_GetURI( p_item ) );
859 snprintf( buf, 10, "%d",
860 var_GetInteger( p_input, "title" ) );
864 sprintf( buf, b_empty_if_na ? "" : "-" );
866 INSERT_STRING_NO_FREE( buf );
869 if( p_item && p_input )
871 mtime_t i_duration = input_item_GetDuration( p_item );
872 int64_t i_time = p_input->i_time;
873 sprintf( buf, "%02d:%02d:%02d",
874 (int)( ( i_duration - i_time ) / 3600000000 ),
875 (int)( ( ( i_duration - i_time ) / 60000000 ) % 60 ),
876 (int)( ( ( i_duration - i_time ) / 1000000 ) % 60 ) );
880 sprintf( buf, b_empty_if_na ? "" : "--:--:--" );
882 INSERT_STRING_NO_FREE( buf );
887 INSERT_STRING( input_item_GetName( p_item ) );
894 lang = var_GetNonEmptyString( p_input,
897 lang = strdup( b_empty_if_na ? "" : "-" );
898 INSERT_STRING( lang );
904 snprintf( buf, 10, "%2.1lf",
905 var_GetFloat( p_input, "position" ) * 100. );
909 sprintf( buf, b_empty_if_na ? "" : "--.-%%" );
911 INSERT_STRING_NO_FREE( buf );
916 int r = var_GetInteger( p_input, "rate" );
917 snprintf( buf, 10, "%d.%d", r/1000, r%1000 );
921 sprintf( buf, b_empty_if_na ? "" : "-" );
923 INSERT_STRING_NO_FREE( buf );
928 int r = var_GetInteger( p_input, "sample-rate" );
929 snprintf( buf, 10, "%d.%d", r/1000, (r/100)%10 );
933 sprintf( buf, b_empty_if_na ? "" : "-" );
935 INSERT_STRING_NO_FREE( buf );
940 sprintf( buf, "%02d:%02d:%02d",
941 (int)( p_input->i_time / ( 3600000000 ) ),
942 (int)( ( p_input->i_time / ( 60000000 ) ) % 60 ),
943 (int)( ( p_input->i_time / 1000000 ) % 60 ) );
947 sprintf( buf, b_empty_if_na ? "" : "--:--:--" );
949 INSERT_STRING_NO_FREE( buf );
954 INSERT_STRING( input_item_GetPublisher( p_item ) );
959 audio_volume_t volume;
960 aout_VolumeGet( p_object, &volume );
961 snprintf( buf, 10, "%d", volume );
962 INSERT_STRING_NO_FREE( buf );
971 b_empty_if_na = true;
985 b_empty_if_na = false;
997 vlc_object_release( p_input );
1001 #undef INSERT_STRING
1002 #undef INSERT_STRING_NO_FREE
1005 * Apply str format time and str format meta
1007 char *__str_format( vlc_object_t *p_this, const char *psz_src )
1009 char *psz_buf1, *psz_buf2;
1010 psz_buf1 = str_format_time( psz_src );
1011 psz_buf2 = str_format_meta( p_this, psz_buf1 );
1017 * Remove forbidden characters from filenames (including slashes)
1019 void filename_sanitize( char *str )
1021 if( *str == '.' && (str[1] == '\0' || (str[1] == '.' && str[2] == '\0' ) ) )
1036 #if defined( __APPLE__ )
1038 #elif defined( WIN32 )
1055 * Remove forbidden characters from full paths (leaves slashes)
1057 void path_sanitize( char *str )
1061 * Uncomment the two blocks to prevent /../ or /./, i'm not sure that we
1064 char *prev = str - 1;
1067 /* check drive prefix if path is absolute */
1068 if( isalpha(*str) && (':' == *(str+1)) )
1073 #if defined( __APPLE__ )
1076 #elif defined( WIN32 )
1096 if( str - prev == 2 && prev[1] == '.' )
1100 else if( str - prev == 3 && prev[1] == '.' && prev[2] == '.' )