1 /*****************************************************************************
2 * strings.c: String related functions
3 *****************************************************************************
4 * Copyright (C) 2006 the VideoLAN team
5 * Copyright (C) 2008-2009 Rémi Denis-Courmont
8 * Authors: Antoine Cellerier <dionoea at videolan dot org>
9 * Daniel Stranger <vlc at schmaller dot de>
10 * Rémi Denis-Courmont <rem # videolan org>
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
25 *****************************************************************************/
27 /*****************************************************************************
29 *****************************************************************************/
34 #include <vlc_common.h>
37 /* Needed by str_format_time */
41 /* Needed by str_format_meta */
42 #include <vlc_input.h>
44 #include <vlc_playlist.h>
47 #include <vlc_strings.h>
49 #include <vlc_charset.h>
52 * Unescape URI encoded string
53 * \return decoded duplicated string
55 char *unescape_URI_duplicate( const char *psz )
57 char *psz_dup = strdup( psz );
58 unescape_URI( psz_dup );
63 * Unescape URI encoded string in place
66 void unescape_URI( char *psz )
68 unsigned char *in = (unsigned char *)psz, *out = in, c;
72 while( ( c = *in++ ) != '\0' )
78 char val[5], *pval = val;
88 if( ( *pval++ = *in++ ) == '\0' )
90 if( ( *pval++ = *in++ ) == '\0' )
96 if( ( *pval++ = *in++ ) == '\0' )
101 cp = strtoul( val, NULL, 0x10 );
107 *out++ = (( cp >> 6) | 0xc0);
108 *out++ = (( cp & 0x3f) | 0x80);
112 assert( cp < 0x10000 );
113 *out++ = (( cp >> 12) | 0xe0);
114 *out++ = (((cp >> 6) & 0x3f) | 0x80);
115 *out++ = (( cp & 0x3f) | 0x80);
120 /* + is not a special case - it means plus, not space. */
123 /* Inserting non-ASCII or non-printable characters is unsafe,
124 * and no sane browser will send these unencoded */
125 if( ( c < 32 ) || ( c > 127 ) )
135 * Decode encoded URI component. See also decode_URI().
136 * \return decoded duplicated string
138 char *decode_URI_duplicate( const char *psz )
140 char *psz_dup = strdup( psz );
141 decode_URI( psz_dup );
146 * Decode an encoded URI component in place.
147 * <b>This function does NOT decode entire URIs.</b>
148 * It decodes components (e.g. host name, directory, file name).
149 * Decoded URIs do not exist in the real world (see RFC3986 §2.4).
150 * Complete URIs are always "encoded" (or they are syntaxically invalid).
152 * Note that URI encoding is different from Javascript escaping. Especially,
153 * white spaces and Unicode non-ASCII code points are encoded differently.
155 * \return psz on success, NULL if it was not properly encoded
157 char *decode_URI( char *psz )
159 unsigned char *in = (unsigned char *)psz, *out = in, c;
164 while( ( c = *in++ ) != '\0' )
172 if( ( ( hex[0] = *in++ ) == 0 )
173 || ( ( hex[1] = *in++ ) == 0 ) )
177 *out++ = (unsigned char)strtoul( hex, NULL, 0x10 );
181 case '+': /* This is HTTP forms, not URI decoding... */
186 /* Inserting non-ASCII or non-printable characters is unsafe,
187 * and no sane browser will send these unencoded */
188 if( ( c < 32 ) || ( c > 127 ) )
199 static inline bool isurisafe( int c )
201 /* These are the _unreserved_ URI characters (RFC3986 §2.3) */
202 return ( (unsigned char)( c - 'a' ) < 26 )
203 || ( (unsigned char)( c - 'A' ) < 26 )
204 || ( (unsigned char)( c - '0' ) < 10 )
205 || ( strchr( "-._~", c ) != NULL );
208 static char *encode_URI_bytes (const char *psz_uri, size_t len)
210 char *psz_enc = malloc (3 * len + 1), *out = psz_enc;
214 for (size_t i = 0; i < len; i++)
216 static const char hex[16] = "0123456789ABCDEF";
217 uint8_t c = *psz_uri;
221 /* This is URI encoding, not HTTP forms:
222 * Space is encoded as '%20', not '+'. */
226 *out++ = hex[c >> 4];
227 *out++ = hex[c & 0xf];
233 out = realloc (psz_enc, out - psz_enc);
234 return out ? out : psz_enc; /* realloc() can fail (safe) */
238 * Encodes an URI component (RFC3986 §2).
240 * @param psz_uri nul-terminated UTF-8 representation of the component.
241 * Obviously, you can't pass an URI containing a nul character, but you don't
242 * want to do that, do you?
244 * @return encoded string (must be free()'d), or NULL for ENOMEM.
246 char *encode_URI_component( const char *psz_uri )
248 return encode_URI_bytes (psz_uri, strlen (psz_uri));
252 static const struct xml_entity_s
257 /* Important: this list has to be in alphabetical order (psz_entity-wise) */
339 { "nbsp;", "\xc2\xa0" },
384 static int cmp_entity (const void *key, const void *elem)
386 const struct xml_entity_s *ent = elem;
387 const char *name = key;
389 return strncmp (name, ent->psz_entity, strlen (ent->psz_entity));
393 * Converts "<", ">" and "&" to "<", ">" and "&"
394 * \param string to convert
396 void resolve_xml_special_chars( char *psz_value )
398 char *p_pos = psz_value;
402 if( *psz_value == '&' )
404 if( psz_value[1] == '#' )
405 { /* &#xxx; Unicode code point */
407 unsigned long cp = strtoul( psz_value+2, &psz_end, 10 );
408 if( *psz_end == ';' )
410 psz_value = psz_end + 1;
412 (void)0; /* skip nuls */
419 /* Unicode code point outside ASCII.
420 * &#xxx; representation is longer than UTF-8 :) */
423 *p_pos++ = 0xC0 | (cp >> 6);
424 *p_pos = 0x80 | (cp & 0x3F);
429 *p_pos++ = 0xE0 | (cp >> 12);
430 *p_pos++ = 0x80 | ((cp >> 6) & 0x3F);
431 *p_pos = 0x80 | (cp & 0x3F);
434 if( cp <= 0x1FFFFF ) /* Outside the BMP */
435 { /* Unicode stops at 10FFFF, but who cares? */
436 *p_pos++ = 0xF0 | (cp >> 18);
437 *p_pos++ = 0x80 | ((cp >> 12) & 0x3F);
438 *p_pos++ = 0x80 | ((cp >> 6) & 0x3F);
439 *p_pos = 0x80 | (cp & 0x3F);
444 /* Invalid entity number */
450 { /* Well-known XML entity */
451 const struct xml_entity_s *ent;
453 ent = bsearch (psz_value + 1, xml_entities,
454 sizeof (xml_entities) / sizeof (*ent),
455 sizeof (*ent), cmp_entity);
458 size_t olen = strlen (ent->psz_char);
459 memcpy (p_pos, ent->psz_char, olen);
461 psz_value += strlen (ent->psz_entity) + 1;
483 * Converts '<', '>', '\"', '\'' and '&' to their html entities
484 * \param psz_content simple element content that is to be converted
486 char *convert_xml_special_chars( const char *psz_content )
488 char *psz_temp = malloc( 6 * strlen( psz_content ) + 1 );
489 const char *p_from = psz_content;
490 char *p_to = psz_temp;
494 if ( *p_from == '<' )
496 strcpy( p_to, "<" );
499 else if ( *p_from == '>' )
501 strcpy( p_to, ">" );
504 else if ( *p_from == '&' )
506 strcpy( p_to, "&" );
509 else if( *p_from == '\"' )
511 strcpy( p_to, """ );
514 else if( *p_from == '\'' )
516 strcpy( p_to, "'" );
531 /* Base64 encoding */
532 char *vlc_b64_encode_binary( const uint8_t *src, size_t i_src )
534 static const char b64[] =
535 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
537 char *ret = malloc( ( i_src + 4 ) * 4 / 3 );
545 /* pops (up to) 3 bytes of input, push 4 bytes */
550 *dst++ = b64[v >> 26];
556 *dst++ = b64[v >> 26];
561 v |= *src++ << 20; // 3/3
562 *dst++ = ( i_src >= 2 ) ? b64[v >> 26] : '='; // 3/4
566 *dst++ = ( i_src >= 3 ) ? b64[v >> 26] : '='; // 4/4
578 char *vlc_b64_encode( const char *src )
581 return vlc_b64_encode_binary( (const uint8_t*)src, strlen(src) );
583 return vlc_b64_encode_binary( (const uint8_t*)"", 0 );
586 /* Base64 decoding */
587 size_t vlc_b64_decode_binary_to_buffer( uint8_t *p_dst, size_t i_dst, const char *p_src )
589 static const int b64[256] = {
590 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 00-0F */
591 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 10-1F */
592 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, /* 20-2F */
593 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, /* 30-3F */
594 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, /* 40-4F */
595 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, /* 50-5F */
596 -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, /* 60-6F */
597 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, /* 70-7F */
598 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 80-8F */
599 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 90-9F */
600 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* A0-AF */
601 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* B0-BF */
602 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* C0-CF */
603 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* D0-DF */
604 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* E0-EF */
605 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 /* F0-FF */
607 uint8_t *p_start = p_dst;
608 uint8_t *p = (uint8_t *)p_src;
613 for( i_level = 0, i_last = 0; (size_t)( p_dst - p_start ) < i_dst && *p != '\0'; p++ )
615 const int c = b64[(unsigned int)*p];
625 *p_dst++ = ( i_last << 2 ) | ( ( c >> 4)&0x03 );
629 *p_dst++ = ( ( i_last << 4 )&0xf0 ) | ( ( c >> 2 )&0x0f );
633 *p_dst++ = ( ( i_last &0x03 ) << 6 ) | c;
639 return p_dst - p_start;
641 size_t vlc_b64_decode_binary( uint8_t **pp_dst, const char *psz_src )
643 const int i_src = strlen( psz_src );
646 *pp_dst = p_dst = malloc( i_src );
649 return vlc_b64_decode_binary_to_buffer( p_dst, i_src, psz_src );
651 char *vlc_b64_decode( const char *psz_src )
653 const int i_src = strlen( psz_src );
654 char *p_dst = malloc( i_src + 1 );
659 i_dst = vlc_b64_decode_binary_to_buffer( (uint8_t*)p_dst, i_src, psz_src );
666 * Formats current time into a heap-allocated string.
667 * @param tformat time format (as with C strftime())
668 * @return an allocated string (must be free()'d), or NULL on memory error.
670 char *str_format_time( const char *tformat )
675 if (strcmp (tformat, "") == 0)
676 return strdup (""); /* corner case w.r.t. strftime() return value */
678 /* Get the current time. */
681 /* Convert it to local time representation. */
682 localtime_r( &curtime, &loctime );
683 for (size_t buflen = strlen (tformat) + 32;; buflen += 32)
685 char *str = malloc (buflen);
689 size_t len = strftime (str, buflen, tformat, &loctime);
692 char *ret = realloc (str, len + 1);
693 return ret ? ret : str; /* <- this cannot fail */
699 #define INSERT_STRING( string ) \
700 if( string != NULL ) \
702 int len = strlen( string ); \
703 dst = realloc( dst, i_size = i_size + len );\
704 memcpy( (dst+d), string, len ); \
708 else if( !b_empty_if_na ) \
714 /* same than INSERT_STRING, except that string won't be freed */
715 #define INSERT_STRING_NO_FREE( string ) \
717 int len = strlen( string ); \
718 dst = realloc( dst, i_size = i_size + len );\
719 memcpy( dst+d, string, len ); \
722 char *__str_format_meta( vlc_object_t *p_object, const char *string )
724 const char *s = string;
725 bool b_is_format = false;
726 bool b_empty_if_na = false;
728 int i_size = strlen( string ) + 1; /* +1 to store '\0' */
729 char *dst = strdup( string );
730 if( !dst ) return NULL;
733 playlist_t *p_playlist = pl_Hold( p_object );
734 input_thread_t *p_input = playlist_CurrentInput( p_playlist );
735 input_item_t *p_item = NULL;
736 pl_Release( p_object );
739 p_item = input_GetItem(p_input);
751 INSERT_STRING( input_item_GetArtist( p_item ) );
757 INSERT_STRING( input_item_GetAlbum( p_item ) );
763 INSERT_STRING( input_item_GetCopyright( p_item ) );
769 INSERT_STRING( input_item_GetDescription( p_item ) );
775 INSERT_STRING( input_item_GetEncodedBy( p_item ) );
779 if( p_item && p_item->p_stats )
781 vlc_mutex_lock( &p_item->p_stats->lock );
782 snprintf( buf, 10, "%d",
783 p_item->p_stats->i_displayed_pictures );
784 vlc_mutex_unlock( &p_item->p_stats->lock );
788 sprintf( buf, b_empty_if_na ? "" : "-" );
790 INSERT_STRING_NO_FREE( buf );
795 INSERT_STRING( input_item_GetGenre( p_item ) );
801 INSERT_STRING( input_item_GetLanguage( p_item ) );
807 INSERT_STRING( input_item_GetTrackNum( p_item ) );
813 INSERT_STRING( input_item_GetNowPlaying( p_item ) );
819 INSERT_STRING( input_item_GetRating( p_item ) );
826 lang = var_GetNonEmptyString( p_input, "sub-language" );
828 lang = strdup( b_empty_if_na ? "" : "-" );
829 INSERT_STRING( lang );
835 INSERT_STRING( input_item_GetTitle( p_item ) );
841 INSERT_STRING( input_item_GetURL( p_item ) );
847 INSERT_STRING( input_item_GetDate( p_item ) );
853 snprintf( buf, 10, "%d",
854 var_GetInteger( p_input, "bit-rate" )/1000 );
858 sprintf( buf, b_empty_if_na ? "" : "-" );
860 INSERT_STRING_NO_FREE( buf );
865 snprintf( buf, 10, "%d",
866 var_GetInteger( p_input, "chapter" ) );
870 sprintf( buf, b_empty_if_na ? "" : "-" );
872 INSERT_STRING_NO_FREE( buf );
877 mtime_t i_duration = input_item_GetDuration( p_item );
878 sprintf( buf, "%02d:%02d:%02d",
879 (int)(i_duration/(3600000000)),
880 (int)((i_duration/(60000000))%60),
881 (int)((i_duration/1000000)%60) );
885 sprintf( buf, b_empty_if_na ? "" : "--:--:--" );
887 INSERT_STRING_NO_FREE( buf );
892 INSERT_STRING( input_item_GetURI( p_item ) );
898 snprintf( buf, 10, "%d",
899 var_GetInteger( p_input, "title" ) );
903 sprintf( buf, b_empty_if_na ? "" : "-" );
905 INSERT_STRING_NO_FREE( buf );
908 if( p_item && p_input )
910 mtime_t i_duration = input_item_GetDuration( p_item );
911 int64_t i_time = var_GetInteger( p_input, "time" );
912 sprintf( buf, "%02d:%02d:%02d",
913 (int)( ( i_duration - i_time ) / 3600000000 ),
914 (int)( ( ( i_duration - i_time ) / 60000000 ) % 60 ),
915 (int)( ( ( i_duration - i_time ) / 1000000 ) % 60 ) );
919 sprintf( buf, b_empty_if_na ? "" : "--:--:--" );
921 INSERT_STRING_NO_FREE( buf );
926 INSERT_STRING( input_item_GetName( p_item ) );
933 lang = var_GetNonEmptyString( p_input,
936 lang = strdup( b_empty_if_na ? "" : "-" );
937 INSERT_STRING( lang );
943 snprintf( buf, 10, "%2.1lf",
944 var_GetFloat( p_input, "position" ) * 100. );
948 sprintf( buf, b_empty_if_na ? "" : "--.-%%" );
950 INSERT_STRING_NO_FREE( buf );
955 int r = var_GetInteger( p_input, "rate" );
956 snprintf( buf, 10, "%d.%d", r/1000, r%1000 );
960 sprintf( buf, b_empty_if_na ? "" : "-" );
962 INSERT_STRING_NO_FREE( buf );
967 int r = var_GetInteger( p_input, "sample-rate" );
968 snprintf( buf, 10, "%d.%d", r/1000, (r/100)%10 );
972 sprintf( buf, b_empty_if_na ? "" : "-" );
974 INSERT_STRING_NO_FREE( buf );
979 int64_t i_time = var_GetInteger( p_input, "time" );
980 sprintf( buf, "%02d:%02d:%02d",
981 (int)( i_time / ( 3600000000 ) ),
982 (int)( ( i_time / ( 60000000 ) ) % 60 ),
983 (int)( ( i_time / 1000000 ) % 60 ) );
987 sprintf( buf, b_empty_if_na ? "" : "--:--:--" );
989 INSERT_STRING_NO_FREE( buf );
994 INSERT_STRING( input_item_GetPublisher( p_item ) );
999 audio_volume_t volume;
1000 aout_VolumeGet( p_object, &volume );
1001 snprintf( buf, 10, "%d", volume );
1002 INSERT_STRING_NO_FREE( buf );
1011 b_empty_if_na = true;
1020 b_is_format = false;
1022 else if( *s == '$' )
1025 b_empty_if_na = false;
1037 vlc_object_release( p_input );
1041 #undef INSERT_STRING
1042 #undef INSERT_STRING_NO_FREE
1045 * Apply str format time and str format meta
1047 char *__str_format( vlc_object_t *p_this, const char *psz_src )
1049 char *psz_buf1, *psz_buf2;
1050 psz_buf1 = str_format_time( psz_src );
1051 psz_buf2 = str_format_meta( p_this, psz_buf1 );
1057 * Remove forbidden characters from filenames (including slashes)
1059 char* filename_sanitize( const char *str_origin )
1061 char *str = strdup( str_origin );
1062 char *str_base = str;
1063 if( *str == '.' && (str[1] == '\0' || (str[1] == '.' && str[2] == '\0' ) ) )
1073 #if defined( WIN32 )
1074 // Change leading spaces into underscores
1075 while( *str && *str == ' ' )
1084 #if defined( __APPLE__ )
1086 #elif defined( WIN32 )
1101 #if defined( WIN32 )
1102 // Change trailing spaces into underscores
1104 while( str != str_base )
1116 * Remove forbidden characters from full paths (leaves slashes)
1118 void path_sanitize( char *str )
1121 /* check drive prefix if path is absolute */
1122 if( (((unsigned char)(str[0] - 'A') < 26)
1123 || ((unsigned char)(str[0] - 'a') < 26)) && (':' == str[1]) )
1128 #if defined( __APPLE__ )
1131 #elif defined( WIN32 )
1132 if( strchr( "*\"?:|<>", *str ) )
1135 *str = DIR_SEP_CHAR;
1141 #include <vlc_url.h>
1144 * Convert a file path to an URI. If already an URI, do nothing.
1146 char *make_URI (const char *path)
1150 if (strstr (path, "://") != NULL)
1151 return strdup (path); /* Already an URI */
1152 /* Note: VLC cannot handle URI schemes without double slash after the
1153 * scheme name (such as mailto: or news:). */
1157 if (isalpha (path[0]) && (path[1] == ':'))
1159 if (asprintf (&buf, "file:///%c:", path[0]) == -1)
1166 /* Windows UNC paths (file://host/share/path instead of file:///path) */
1167 if (!strncmp (path, "\\\\", 2))
1170 buf = strdup ("file://");
1174 if (path[0] != DIR_SEP_CHAR)
1175 { /* Relative path: prepend the current working directory */
1178 if (getcwd (cwd, sizeof (cwd)) == NULL) /* FIXME: UTF8? */
1180 if (asprintf (&buf, "%s/%s", cwd, path) == -1)
1182 char *ret = make_URI (buf);
1187 buf = strdup ("file://");
1191 assert (path[0] == DIR_SEP_CHAR);
1193 /* Absolute file path */
1194 for (const char *ptr = path + 1;; ptr++)
1196 size_t len = strcspn (ptr, DIR_SEP);
1197 char *component = encode_URI_bytes (ptr, len);
1198 if (component == NULL)
1204 int val = asprintf (&uri, "%s/%s", buf, component);