1 /*****************************************************************************
2 * strings.c: String related functions
3 *****************************************************************************
4 * Copyright (C) 2006 the VideoLAN team
5 * Copyright (C) 2008-2009 Rémi Denis-Courmont
8 * Authors: Antoine Cellerier <dionoea at videolan dot org>
9 * Daniel Stranger <vlc at schmaller dot de>
10 * Rémi Denis-Courmont <rem # videolan org>
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
25 *****************************************************************************/
27 /*****************************************************************************
29 *****************************************************************************/
34 #include <vlc_common.h>
37 /* Needed by str_format_time */
41 /* Needed by str_format_meta */
42 #include <vlc_input.h>
44 #include <vlc_playlist.h>
47 #include <vlc_strings.h>
49 #include <vlc_charset.h>
52 * Decode encoded URI component. See also decode_URI().
53 * \return decoded duplicated string
55 char *decode_URI_duplicate( const char *psz )
57 char *psz_dup = strdup( psz );
58 decode_URI( psz_dup );
63 * Decode an encoded URI component in place.
64 * <b>This function does NOT decode entire URIs.</b>
65 * It decodes components (e.g. host name, directory, file name).
66 * Decoded URIs do not exist in the real world (see RFC3986 §2.4).
67 * Complete URIs are always "encoded" (or they are syntaxically invalid).
69 * Note that URI encoding is different from Javascript escaping. Especially,
70 * white spaces and Unicode non-ASCII code points are encoded differently.
72 * \return psz on success, NULL if it was not properly encoded
74 char *decode_URI( char *psz )
76 unsigned char *in = (unsigned char *)psz, *out = in, c;
81 while( ( c = *in++ ) != '\0' )
89 if( ( ( hex[0] = *in++ ) == 0 )
90 || ( ( hex[1] = *in++ ) == 0 ) )
94 *out++ = (unsigned char)strtoul( hex, NULL, 0x10 );
99 /* Inserting non-ASCII or non-printable characters is unsafe,
100 * and no sane browser will send these unencoded */
101 if( ( c < 32 ) || ( c > 127 ) )
111 static inline bool isurisafe( int c )
113 /* These are the _unreserved_ URI characters (RFC3986 §2.3) */
114 return ( (unsigned char)( c - 'a' ) < 26 )
115 || ( (unsigned char)( c - 'A' ) < 26 )
116 || ( (unsigned char)( c - '0' ) < 10 )
117 || ( strchr( "-._~", c ) != NULL );
120 static char *encode_URI_bytes (const char *psz_uri, size_t len)
122 char *psz_enc = malloc (3 * len + 1), *out = psz_enc;
126 for (size_t i = 0; i < len; i++)
128 static const char hex[16] = "0123456789ABCDEF";
129 uint8_t c = *psz_uri;
133 /* This is URI encoding, not HTTP forms:
134 * Space is encoded as '%20', not '+'. */
138 *out++ = hex[c >> 4];
139 *out++ = hex[c & 0xf];
145 out = realloc (psz_enc, out - psz_enc);
146 return out ? out : psz_enc; /* realloc() can fail (safe) */
150 * Encodes an URI component (RFC3986 §2).
152 * @param psz_uri nul-terminated UTF-8 representation of the component.
153 * Obviously, you can't pass an URI containing a nul character, but you don't
154 * want to do that, do you?
156 * @return encoded string (must be free()'d), or NULL for ENOMEM.
158 char *encode_URI_component( const char *psz_uri )
160 return encode_URI_bytes (psz_uri, strlen (psz_uri));
164 static const struct xml_entity_s
169 /* Important: this list has to be in alphabetical order (psz_entity-wise) */
251 { "nbsp;", "\xc2\xa0" },
296 static int cmp_entity (const void *key, const void *elem)
298 const struct xml_entity_s *ent = elem;
299 const char *name = key;
301 return strncmp (name, ent->psz_entity, strlen (ent->psz_entity));
305 * Converts "<", ">" and "&" to "<", ">" and "&"
306 * \param string to convert
308 void resolve_xml_special_chars( char *psz_value )
310 char *p_pos = psz_value;
314 if( *psz_value == '&' )
316 if( psz_value[1] == '#' )
317 { /* &#xxx; Unicode code point */
319 unsigned long cp = strtoul( psz_value+2, &psz_end, 10 );
320 if( *psz_end == ';' )
322 psz_value = psz_end + 1;
324 (void)0; /* skip nuls */
331 /* Unicode code point outside ASCII.
332 * &#xxx; representation is longer than UTF-8 :) */
335 *p_pos++ = 0xC0 | (cp >> 6);
336 *p_pos = 0x80 | (cp & 0x3F);
341 *p_pos++ = 0xE0 | (cp >> 12);
342 *p_pos++ = 0x80 | ((cp >> 6) & 0x3F);
343 *p_pos = 0x80 | (cp & 0x3F);
346 if( cp <= 0x1FFFFF ) /* Outside the BMP */
347 { /* Unicode stops at 10FFFF, but who cares? */
348 *p_pos++ = 0xF0 | (cp >> 18);
349 *p_pos++ = 0x80 | ((cp >> 12) & 0x3F);
350 *p_pos++ = 0x80 | ((cp >> 6) & 0x3F);
351 *p_pos = 0x80 | (cp & 0x3F);
356 /* Invalid entity number */
362 { /* Well-known XML entity */
363 const struct xml_entity_s *ent;
365 ent = bsearch (psz_value + 1, xml_entities,
366 sizeof (xml_entities) / sizeof (*ent),
367 sizeof (*ent), cmp_entity);
370 size_t olen = strlen (ent->psz_char);
371 memcpy (p_pos, ent->psz_char, olen);
373 psz_value += strlen (ent->psz_entity) + 1;
395 * Converts '<', '>', '\"', '\'' and '&' to their html entities
396 * \param psz_content simple element content that is to be converted
398 char *convert_xml_special_chars( const char *psz_content )
400 assert( psz_content );
402 const size_t len = strlen( psz_content );
403 char *const psz_temp = malloc( 6 * len + 1 );
404 char *p_to = psz_temp;
406 if( psz_temp == NULL )
408 for( size_t i = 0; i < len; i++ )
411 char c = psz_content[i];
415 case '\"': str = "quot"; break;
416 case '&': str = "amp"; break;
417 case '\'': str = "#39"; break;
418 case '<': str = "lt"; break;
419 case '>': str = "gt"; break;
424 p_to += sprintf( p_to, "&%s;", str );
428 p_to = realloc( psz_temp, p_to - psz_temp );
429 return p_to ? p_to : psz_temp; /* cannot fail */
432 /* Base64 encoding */
433 char *vlc_b64_encode_binary( const uint8_t *src, size_t i_src )
435 static const char b64[] =
436 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
438 char *ret = malloc( ( i_src + 4 ) * 4 / 3 );
446 /* pops (up to) 3 bytes of input, push 4 bytes */
451 *dst++ = b64[v >> 26];
457 *dst++ = b64[v >> 26];
462 v |= *src++ << 20; // 3/3
463 *dst++ = ( i_src >= 2 ) ? b64[v >> 26] : '='; // 3/4
467 *dst++ = ( i_src >= 3 ) ? b64[v >> 26] : '='; // 4/4
479 char *vlc_b64_encode( const char *src )
482 return vlc_b64_encode_binary( (const uint8_t*)src, strlen(src) );
484 return vlc_b64_encode_binary( (const uint8_t*)"", 0 );
487 /* Base64 decoding */
488 size_t vlc_b64_decode_binary_to_buffer( uint8_t *p_dst, size_t i_dst, const char *p_src )
490 static const int b64[256] = {
491 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 00-0F */
492 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 10-1F */
493 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, /* 20-2F */
494 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, /* 30-3F */
495 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, /* 40-4F */
496 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, /* 50-5F */
497 -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, /* 60-6F */
498 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, /* 70-7F */
499 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 80-8F */
500 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 90-9F */
501 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* A0-AF */
502 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* B0-BF */
503 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* C0-CF */
504 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* D0-DF */
505 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* E0-EF */
506 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 /* F0-FF */
508 uint8_t *p_start = p_dst;
509 uint8_t *p = (uint8_t *)p_src;
514 for( i_level = 0, i_last = 0; (size_t)( p_dst - p_start ) < i_dst && *p != '\0'; p++ )
516 const int c = b64[(unsigned int)*p];
526 *p_dst++ = ( i_last << 2 ) | ( ( c >> 4)&0x03 );
530 *p_dst++ = ( ( i_last << 4 )&0xf0 ) | ( ( c >> 2 )&0x0f );
534 *p_dst++ = ( ( i_last &0x03 ) << 6 ) | c;
540 return p_dst - p_start;
542 size_t vlc_b64_decode_binary( uint8_t **pp_dst, const char *psz_src )
544 const int i_src = strlen( psz_src );
547 *pp_dst = p_dst = malloc( i_src );
550 return vlc_b64_decode_binary_to_buffer( p_dst, i_src, psz_src );
552 char *vlc_b64_decode( const char *psz_src )
554 const int i_src = strlen( psz_src );
555 char *p_dst = malloc( i_src + 1 );
560 i_dst = vlc_b64_decode_binary_to_buffer( (uint8_t*)p_dst, i_src, psz_src );
567 * Formats current time into a heap-allocated string.
568 * @param tformat time format (as with C strftime())
569 * @return an allocated string (must be free()'d), or NULL on memory error.
571 char *str_format_time( const char *tformat )
576 if (strcmp (tformat, "") == 0)
577 return strdup (""); /* corner case w.r.t. strftime() return value */
579 /* Get the current time. */
582 /* Convert it to local time representation. */
583 localtime_r( &curtime, &loctime );
584 for (size_t buflen = strlen (tformat) + 32;; buflen += 32)
586 char *str = malloc (buflen);
590 size_t len = strftime (str, buflen, tformat, &loctime);
593 char *ret = realloc (str, len + 1);
594 return ret ? ret : str; /* <- this cannot fail */
600 static void format_duration (char *buf, size_t len, int64_t duration)
605 duration /= CLOCK_FREQ;
606 d = lldiv (duration, 60);
608 d = lldiv (d.quot, 60);
609 snprintf (buf, len, "%02lld:%02d:%02d", d.quot, (int)d.rem, sec);
612 #define INSERT_STRING( string ) \
613 if( string != NULL ) \
615 int len = strlen( string ); \
616 dst = xrealloc( dst, i_size = i_size + len );\
617 memcpy( (dst+d), string, len ); \
621 else if( !b_empty_if_na ) \
627 /* same than INSERT_STRING, except that string won't be freed */
628 #define INSERT_STRING_NO_FREE( string ) \
630 int len = strlen( string ); \
631 dst = xrealloc( dst, i_size = i_size + len );\
632 memcpy( dst+d, string, len ); \
635 #undef str_format_meta
636 char *str_format_meta( vlc_object_t *p_object, const char *string )
638 const char *s = string;
639 bool b_is_format = false;
640 bool b_empty_if_na = false;
642 int i_size = strlen( string ) + 1; /* +1 to store '\0' */
643 char *dst = strdup( string );
644 if( !dst ) return NULL;
647 input_thread_t *p_input = playlist_CurrentInput( pl_Get(p_object) );
648 input_item_t *p_item = NULL;
651 p_item = input_GetItem(p_input);
663 INSERT_STRING( input_item_GetArtist( p_item ) );
669 INSERT_STRING( input_item_GetAlbum( p_item ) );
675 INSERT_STRING( input_item_GetCopyright( p_item ) );
681 INSERT_STRING( input_item_GetDescription( p_item ) );
687 INSERT_STRING( input_item_GetEncodedBy( p_item ) );
691 if( p_item && p_item->p_stats )
693 vlc_mutex_lock( &p_item->p_stats->lock );
694 snprintf( buf, 10, "%"PRIi64,
695 p_item->p_stats->i_displayed_pictures );
696 vlc_mutex_unlock( &p_item->p_stats->lock );
699 strcpy( buf, b_empty_if_na ? "" : "-" );
700 INSERT_STRING_NO_FREE( buf );
705 INSERT_STRING( input_item_GetGenre( p_item ) );
711 INSERT_STRING( input_item_GetLanguage( p_item ) );
717 INSERT_STRING( input_item_GetTrackNum( p_item ) );
723 INSERT_STRING( input_item_GetNowPlaying( p_item ) );
729 INSERT_STRING( input_item_GetRating( p_item ) );
736 lang = var_GetNonEmptyString( p_input, "sub-language" );
738 lang = strdup( b_empty_if_na ? "" : "-" );
739 INSERT_STRING( lang );
745 INSERT_STRING( input_item_GetTitle( p_item ) );
751 INSERT_STRING( input_item_GetURL( p_item ) );
757 INSERT_STRING( input_item_GetDate( p_item ) );
763 snprintf( buf, 10, "%"PRId64,
764 var_GetInteger( p_input, "bit-rate" )/1000 );
767 strcpy( buf, b_empty_if_na ? "" : "-" );
768 INSERT_STRING_NO_FREE( buf );
773 snprintf( buf, 10, "%"PRId64,
774 var_GetInteger( p_input, "chapter" ) );
777 strcpy( buf, b_empty_if_na ? "" : "-" );
778 INSERT_STRING_NO_FREE( buf );
783 mtime_t i_duration = input_item_GetDuration( p_item );
784 format_duration (buf, sizeof (buf), i_duration);
787 strcpy( buf, b_empty_if_na ? "" : "--:--:--" );
788 INSERT_STRING_NO_FREE( buf );
793 INSERT_STRING( input_item_GetURI( p_item ) );
799 snprintf( buf, 10, "%"PRId64,
800 var_GetInteger( p_input, "title" ) );
803 strcpy( buf, b_empty_if_na ? "" : "-" );
804 INSERT_STRING_NO_FREE( buf );
807 if( p_item && p_input )
809 mtime_t i_duration = input_item_GetDuration( p_item );
810 int64_t i_time = var_GetTime( p_input, "time" );
811 format_duration( buf, sizeof(buf),
812 i_duration - i_time );
815 strcpy( buf, b_empty_if_na ? "" : "--:--:--" );
816 INSERT_STRING_NO_FREE( buf );
821 INSERT_STRING( input_item_GetName( p_item ) );
828 lang = var_GetNonEmptyString( p_input,
831 lang = strdup( b_empty_if_na ? "" : "-" );
832 INSERT_STRING( lang );
838 snprintf( buf, 10, "%2.1lf",
839 var_GetFloat( p_input, "position" ) * 100. );
843 snprintf( buf, 10, b_empty_if_na ? "" : "--.-%%" );
845 INSERT_STRING_NO_FREE( buf );
850 float f = var_GetFloat( p_input, "rate" );
851 snprintf( buf, 10, "%.3f", f );
854 strcpy( buf, b_empty_if_na ? "" : "-" );
855 INSERT_STRING_NO_FREE( buf );
860 int r = var_GetInteger( p_input, "sample-rate" );
861 snprintf( buf, 10, "%d.%d", r/1000, (r/100)%10 );
864 strcpy( buf, b_empty_if_na ? "" : "-" );
865 INSERT_STRING_NO_FREE( buf );
870 int64_t i_time = var_GetTime( p_input, "time" );
871 format_duration( buf, sizeof(buf), i_time );
874 strcpy( buf, b_empty_if_na ? "" : "--:--:--" );
875 INSERT_STRING_NO_FREE( buf );
880 INSERT_STRING( input_item_GetPublisher( p_item ) );
885 audio_volume_t volume;
886 aout_VolumeGet( p_object, &volume );
887 snprintf( buf, 10, "%d", volume );
888 INSERT_STRING_NO_FREE( buf );
897 b_empty_if_na = true;
911 b_empty_if_na = false;
923 vlc_object_release( p_input );
928 #undef INSERT_STRING_NO_FREE
932 * Apply str format time and str format meta
934 char *str_format( vlc_object_t *p_this, const char *psz_src )
936 char *psz_buf1, *psz_buf2;
937 psz_buf1 = str_format_time( psz_src );
938 psz_buf2 = str_format_meta( p_this, psz_buf1 );
944 * Remove forbidden characters from filenames (including slashes)
946 void filename_sanitize( char *str )
949 char *str_base = str;
952 if( *str == '.' && (str[1] == '\0' || (str[1] == '.' && str[2] == '\0' ) ) )
963 // Change leading spaces into underscores
964 while( *str && *str == ' ' )
973 #if defined( __APPLE__ )
975 #elif defined( WIN32 )
991 // Change trailing spaces into underscores
993 while( str != str_base )
1003 * Remove forbidden characters from full paths (leaves slashes)
1005 void path_sanitize( char *str )
1008 /* check drive prefix if path is absolute */
1009 if( (((unsigned char)(str[0] - 'A') < 26)
1010 || ((unsigned char)(str[0] - 'a') < 26)) && (':' == str[1]) )
1015 #if defined( __APPLE__ )
1018 #elif defined( WIN32 )
1019 if( strchr( "*\"?:|<>", *str ) )
1022 *str = DIR_SEP_CHAR;
1028 #include <vlc_url.h>
1034 * Convert a file path to an URI.
1035 * If already an URI, return a copy of the string.
1036 * @param path path to convert (or URI to copy)
1037 * @param scheme URI scheme to use (default is auto: "file", "fd" or "smb")
1038 * @return a nul-terminated URI string (use free() to release it),
1039 * or NULL in case of error
1041 char *make_URI (const char *path, const char *scheme)
1045 if (scheme == NULL && !strcmp (path, "-"))
1046 return strdup ("fd://0"); // standard input
1047 if (strstr (path, "://") != NULL)
1048 return strdup (path); /* Already an URI */
1049 /* Note: VLC cannot handle URI schemes without double slash after the
1050 * scheme name (such as mailto: or news:). */
1055 if (isalpha (path[0]) && (path[1] == ':'))
1057 if (asprintf (&buf, "%s:///%c:", scheme ? scheme : "file",
1061 # warning Drive letter-relative path not implemented!
1062 if (path[0] != DIR_SEP_CHAR)
1067 if (!strncmp (path, "\\\\", 2))
1068 { /* Windows UNC paths */
1071 return NULL; /* remote files not supported */
1073 /* \\host\share\path -> smb://host/share/path */
1074 if (strchr (path + 2, '\\') != NULL)
1075 { /* Convert backslashes to slashes */
1076 char *dup = strdup (path);
1079 for (size_t i = 2; dup[i]; i++)
1081 dup[i] = DIR_SEP_CHAR;
1083 char *ret = make_URI (dup, scheme);
1087 # define SMB_SCHEME "smb"
1089 /* \\host\share\path -> file://host/share/path */
1090 # define SMB_SCHEME "file"
1092 size_t hostlen = strcspn (path + 2, DIR_SEP);
1094 buf = malloc (sizeof (SMB_SCHEME) + 3 + hostlen);
1096 snprintf (buf, sizeof (SMB_SCHEME) + 3 + hostlen,
1097 SMB_SCHEME"://%s", path + 2);
1098 path += 2 + hostlen;
1100 if (path[0] == '\0')
1101 return buf; /* Hostname without path */
1104 if (path[0] != DIR_SEP_CHAR)
1105 { /* Relative path: prepend the current working directory */
1108 if (getcwd (cwd, sizeof (cwd)) == NULL) /* FIXME: UTF8? */
1110 if (asprintf (&buf, "%s/%s", cwd, path) == -1)
1112 char *ret = make_URI (buf, scheme);
1117 if (asprintf (&buf, "%s://", scheme ? scheme : "file") == -1)
1122 assert (path[0] == DIR_SEP_CHAR);
1124 /* Absolute file path */
1125 for (const char *ptr = path + 1;; ptr++)
1127 size_t len = strcspn (ptr, DIR_SEP);
1128 char *component = encode_URI_bytes (ptr, len);
1129 if (component == NULL)
1135 int val = asprintf (&uri, "%s/%s", buf, component);
1148 * Tries to convert an URI to a local (UTF-8-encoded) file path.
1149 * @param url URI to convert
1150 * @return NULL on error, a nul-terminated string otherwise
1151 * (use free() to release it)
1153 char *make_path (const char *url)
1158 char *path = strstr (url, "://");
1160 return NULL; /* unsupported scheme or invalid syntax */
1162 end = memchr (url, '/', path - url);
1163 size_t schemelen = ((end != NULL) ? end : path) - url;
1164 path += 3; /* skip "://" */
1166 /* Remove HTML anchor if present */
1167 end = strchr (path, '#');
1169 path = strndup (path, end - path);
1171 path = strdup (path);
1172 if (unlikely(path == NULL))
1173 return NULL; /* boom! */
1178 if (schemelen == 4 && !strncasecmp (url, "file", 4))
1180 #if (DIR_SEP_CHAR != '/')
1181 for (char *p = strchr (path, '/'); p; p = strchr (p + 1, '/'))
1184 /* Leading slash => local path */
1185 if (*path == DIR_SEP_CHAR)
1186 #if !defined (WIN32) || defined (UNDER_CE)
1189 return memmove (path, path + 1, strlen (path + 1) + 1);
1192 /* Local path disguised as a remote one (MacOS X) */
1193 if (!strncasecmp (path, "localhost"DIR_SEP, 10))
1194 return memmove (path, path + 9, strlen (path + 9) + 1);
1197 if (*path && asprintf (&ret, "\\\\%s", path) == -1)
1200 /* non-local path :-( */
1203 if (schemelen == 2 && !strncasecmp (url, "fd", 2))
1205 int fd = strtol (path, &end, 0);
1214 ret = strdup ("/dev/stdin");
1217 ret = strdup ("/dev/stdout");
1220 ret = strdup ("/dev/stderr");
1223 if (asprintf (&ret, "/dev/fd/%d", fd) == -1)
1227 /* XXX: Does this work on WinCE? */
1229 ret = strdup ("CON");
1237 return ret; /* unknown scheme */