X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=src%2Ftext%2Fstrings.c;h=3f21500f7aa654c5a64712f7733b91e97fb5d996;hb=d5fd75161509b82a98b0ed81f60d09dc69b6ff5a;hp=750d565109777fe8bc17bf80d31e19b42ed6b466;hpb=ce572d7eec691f6ee486f88c8fe215e99d5b4dd8;p=vlc diff --git a/src/text/strings.c b/src/text/strings.c index 750d565109..3f21500f7a 100644 --- a/src/text/strings.c +++ b/src/text/strings.c @@ -2,6 +2,7 @@ * strings.c: String related functions ***************************************************************************** * Copyright (C) 2006 the VideoLAN team + * Copyright (C) 2008-2009 Rémi Denis-Courmont * $Id$ * * Authors: Antoine Cellerier @@ -26,11 +27,16 @@ /***************************************************************************** * Preamble *****************************************************************************/ -#include +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include #include /* Needed by str_format_time */ #include +#include /* Needed by str_format_meta */ #include @@ -43,90 +49,7 @@ #include /** - * Unescape URI encoded string - * \return decoded duplicated string - */ -char *unescape_URI_duplicate( const char *psz ) -{ - char *psz_dup = strdup( psz ); - unescape_URI( psz_dup ); - return psz_dup; -} - -/** - * Unescape URI encoded string in place - * \return nothing - */ -void unescape_URI( char *psz ) -{ - unsigned char *in = (unsigned char *)psz, *out = in, c; - if( psz == NULL ) - return; - - while( ( c = *in++ ) != '\0' ) - { - switch( c ) - { - case '%': - { - char val[5], *pval = val; - unsigned long cp; - - switch( c = *in++ ) - { - case '\0': - return; - - case 'u': - case 'U': - if( ( *pval++ = *in++ ) == '\0' ) - return; - if( ( *pval++ = *in++ ) == '\0' ) - return; - c = *in++; - - default: - *pval++ = c; - if( ( *pval++ = *in++ ) == '\0' ) - return; - *pval = '\0'; - } - - cp = strtoul( val, NULL, 0x10 ); - if( cp < 0x80 ) - *out++ = cp; - else - if( cp < 0x800 ) - { - *out++ = (( cp >> 6) | 0xc0); - *out++ = (( cp & 0x3f) | 0x80); - } - else - { - assert( cp < 0x10000 ); - *out++ = (( cp >> 12) | 0xe0); - *out++ = (((cp >> 6) & 0x3f) | 0x80); - *out++ = (( cp & 0x3f) | 0x80); - } - break; - } - - /* + is not a special case - it means plus, not space. */ - - default: - /* Inserting non-ASCII or non-printable characters is unsafe, - * and no sane browser will send these unencoded */ - if( ( c < 32 ) || ( c > 127 ) ) - *out++ = '?'; - else - *out++ = c; - } - } - *out = '\0'; -} - -/** - * Decode encoded URI string + * Decode encoded URI component. See also decode_URI(). * \return decoded duplicated string */ char *decode_URI_duplicate( const char *psz ) @@ -137,14 +60,23 @@ char *decode_URI_duplicate( const char *psz ) } /** - * Decode encoded URI string in place - * \return nothing + * Decode an encoded URI component in place. + * This function does NOT decode entire URIs. + * It decodes components (e.g. host name, directory, file name). + * Decoded URIs do not exist in the real world (see RFC3986 §2.4). + * Complete URIs are always "encoded" (or they are syntaxically invalid). + * + * Note that URI encoding is different from Javascript escaping. Especially, + * white spaces and Unicode non-ASCII code points are encoded differently. + * + * \return psz on success, NULL if it was not properly encoded */ -void decode_URI( char *psz ) +char *decode_URI( char *psz ) { unsigned char *in = (unsigned char *)psz, *out = in, c; + if( psz == NULL ) - return; + return NULL; while( ( c = *in++ ) != '\0' ) { @@ -156,14 +88,14 @@ void decode_URI( char *psz ) if( ( ( hex[0] = *in++ ) == 0 ) || ( ( hex[1] = *in++ ) == 0 ) ) - return; + return NULL; hex[2] = '\0'; *out++ = (unsigned char)strtoul( hex, NULL, 0x10 ); break; } - case '+': + case '+': /* This is HTTP forms, not URI decoding... */ *out++ = ' '; break; @@ -178,59 +110,200 @@ void decode_URI( char *psz ) } *out = '\0'; EnsureUTF8( psz ); + return psz; } -static inline int isurlsafe( int c ) +static inline bool isurisafe( int c ) { + /* These are the _unreserved_ URI characters (RFC3986 §2.3) */ return ( (unsigned char)( c - 'a' ) < 26 ) || ( (unsigned char)( c - 'A' ) < 26 ) || ( (unsigned char)( c - '0' ) < 10 ) - /* Hmm, we should not encode character that are allowed in URLs - * (even if they are not URL-safe), nor URL-safe characters. - * We still encode some of them because of Microsoft's crap browser. - */ - || ( strchr( "-_.", c ) != NULL ); + || ( strchr( "-._~", c ) != NULL ); } -static inline char url_hexchar( int c ) +static char *encode_URI_bytes (const char *psz_uri, size_t len) { - return ( c < 10 ) ? c + '0' : c + 'A' - 10; + char *psz_enc = malloc (3 * len + 1), *out = psz_enc; + if (psz_enc == NULL) + return NULL; + + for (size_t i = 0; i < len; i++) + { + static const char hex[16] = "0123456789ABCDEF"; + uint8_t c = *psz_uri; + + if( isurisafe( c ) ) + *out++ = c; + /* This is URI encoding, not HTTP forms: + * Space is encoded as '%20', not '+'. */ + else + { + *out++ = '%'; + *out++ = hex[c >> 4]; + *out++ = hex[c & 0xf]; + } + psz_uri++; + } + *out++ = '\0'; + + out = realloc (psz_enc, out - psz_enc); + return out ? out : psz_enc; /* realloc() can fail (safe) */ } /** - * encode_URI_component - * Encodes an URI component. + * Encodes an URI component (RFC3986 §2). * - * @param psz_url nul-terminated UTF-8 representation of the component. + * @param psz_uri nul-terminated UTF-8 representation of the component. * Obviously, you can't pass an URI containing a nul character, but you don't * want to do that, do you? * - * @return encoded string (must be free()'d) + * @return encoded string (must be free()'d), or NULL for ENOMEM. */ -char *encode_URI_component( const char *psz_url ) +char *encode_URI_component( const char *psz_uri ) { - char psz_enc[3 * strlen( psz_url ) + 1], *out = psz_enc; - const uint8_t *in; + return encode_URI_bytes (psz_uri, strlen (psz_uri)); +} - for( in = (const uint8_t *)psz_url; *in; in++ ) - { - uint8_t c = *in; - if( isurlsafe( c ) ) - *out++ = (char)c; - else - if ( c == ' ') - *out++ = '+'; - else - { - *out++ = '%'; - *out++ = url_hexchar( c >> 4 ); - *out++ = url_hexchar( c & 0xf ); - } - } - *out++ = '\0'; +static const struct xml_entity_s +{ + char psz_entity[8]; + char psz_char[4]; +} xml_entities[] = { + /* Important: this list has to be in alphabetical order (psz_entity-wise) */ + { "AElig;", "Æ" }, + { "Aacute;", "Á" }, + { "Acirc;", "Â" }, + { "Agrave;", "À" }, + { "Aring;", "Å" }, + { "Atilde;", "Ã" }, + { "Auml;", "Ä" }, + { "Ccedil;", "Ç" }, + { "Dagger;", "‡" }, + { "ETH;", "Ð" }, + { "Eacute;", "É" }, + { "Ecirc;", "Ê" }, + { "Egrave;", "È" }, + { "Euml;", "Ë" }, + { "Iacute;", "Í" }, + { "Icirc;", "Î" }, + { "Igrave;", "Ì" }, + { "Iuml;", "Ï" }, + { "Ntilde;", "Ñ" }, + { "OElig;", "Œ" }, + { "Oacute;", "Ó" }, + { "Ocirc;", "Ô" }, + { "Ograve;", "Ò" }, + { "Oslash;", "Ø" }, + { "Otilde;", "Õ" }, + { "Ouml;", "Ö" }, + { "Scaron;", "Å " }, + { "THORN;", "Þ" }, + { "Uacute;", "Ú" }, + { "Ucirc;", "Û" }, + { "Ugrave;", "Ù" }, + { "Uuml;", "Ü" }, + { "Yacute;", "Ý" }, + { "Yuml;", "Ÿ" }, + { "aacute;", "á" }, + { "acirc;", "â" }, + { "acute;", "´" }, + { "aelig;", "æ" }, + { "agrave;", "à" }, + { "amp;", "&" }, + { "apos;", "'" }, + { "aring;", "Ã¥" }, + { "atilde;", "ã" }, + { "auml;", "ä" }, + { "bdquo;", "„" }, + { "brvbar;", "¦" }, + { "ccedil;", "ç" }, + { "cedil;", "¸" }, + { "cent;", "¢" }, + { "circ;", "ˆ" }, + { "copy;", "©" }, + { "curren;", "¤" }, + { "dagger;", "†" }, + { "deg;", "°" }, + { "divide;", "÷" }, + { "eacute;", "é" }, + { "ecirc;", "ê" }, + { "egrave;", "è" }, + { "eth;", "ð" }, + { "euml;", "ë" }, + { "euro;", "€" }, + { "frac12;", "½" }, + { "frac14;", "¼" }, + { "frac34;", "¾" }, + { "gt;", ">" }, + { "hellip;", "…" }, + { "iacute;", "í" }, + { "icirc;", "î" }, + { "iexcl;", "¡" }, + { "igrave;", "ì" }, + { "iquest;", "¿" }, + { "iuml;", "ï" }, + { "laquo;", "«" }, + { "ldquo;", "“" }, + { "lsaquo;", "‹" }, + { "lsquo;", "‘" }, + { "lt;", "<" }, + { "macr;", "¯" }, + { "mdash;", "—" }, + { "micro;", "µ" }, + { "middot;", "·" }, + { "nbsp;", "\xc2\xa0" }, + { "ndash;", "–" }, + { "not;", "¬" }, + { "ntilde;", "ñ" }, + { "oacute;", "ó" }, + { "ocirc;", "ô" }, + { "oelig;", "œ" }, + { "ograve;", "ò" }, + { "ordf;", "ª" }, + { "ordm;", "º" }, + { "oslash;", "ø" }, + { "otilde;", "õ" }, + { "ouml;", "ö" }, + { "para;", "¶" }, + { "permil;", "‰" }, + { "plusmn;", "±" }, + { "pound;", "£" }, + { "quot;", "\"" }, + { "raquo;", "»" }, + { "rdquo;", "”" }, + { "reg;", "®" }, + { "rsaquo;", "›" }, + { "rsquo;", "’" }, + { "sbquo;", "‚" }, + { "scaron;", "Å¡" }, + { "sect;", "§" }, + { "shy;", "­" }, + { "sup1;", "¹" }, + { "sup2;", "²" }, + { "sup3;", "³" }, + { "szlig;", "ß" }, + { "thorn;", "þ" }, + { "tilde;", "˜" }, + { "times;", "×" }, + { "trade;", "™" }, + { "uacute;", "ú" }, + { "ucirc;", "û" }, + { "ugrave;", "ù" }, + { "uml;", "¨" }, + { "uuml;", "ü" }, + { "yacute;", "ý" }, + { "yen;", "Â¥" }, + { "yuml;", "ÿ" }, +}; + +static int cmp_entity (const void *key, const void *elem) +{ + const struct xml_entity_s *ent = elem; + const char *name = key; - return strdup( psz_enc ); + return strncmp (name, ent->psz_entity, strlen (ent->psz_entity)); } /** @@ -245,40 +318,42 @@ void resolve_xml_special_chars( char *psz_value ) { if( *psz_value == '&' ) { -#define TRY_CHAR( src, len, dst ) \ - if( !strncmp( psz_value, src, len ) ) \ - { \ - *p_pos = dst; \ - psz_value += len; \ - } -#define TRY_LONGCHAR( src, len, dst ) \ - if( !strncmp( psz_value, src, len ) ) \ - { \ - strncpy( p_pos, dst, strlen( dst ) ); \ - p_pos += strlen( dst ) - 1; \ - psz_value += len; \ - } - TRY_CHAR( "<", 4, '<' ) - else TRY_CHAR( ">", 4, '>' ) - else TRY_CHAR( "&", 5, '&' ) - else TRY_CHAR( """, 6, '"' ) - else TRY_CHAR( "'", 6, '\'' ) - else if( psz_value[1] == '#' ) - { + if( psz_value[1] == '#' ) + { /* &#xxx; Unicode code point */ char *psz_end; - int i = strtol( psz_value+2, &psz_end, 10 ); + unsigned long cp = strtoul( psz_value+2, &psz_end, 10 ); if( *psz_end == ';' ) { - if( i >= 32 && i <= 126 ) + psz_value = psz_end + 1; + if( cp == 0 ) + (void)0; /* skip nuls */ + else + if( cp <= 0x7F ) { - *p_pos = (char)i; - psz_value = psz_end+1; + *p_pos = cp; } else + /* Unicode code point outside ASCII. + * &#xxx; representation is longer than UTF-8 :) */ + if( cp <= 0x7FF ) { - /* Unhandled code, FIXME */ - *p_pos = *psz_value; - psz_value++; + *p_pos++ = 0xC0 | (cp >> 6); + *p_pos = 0x80 | (cp & 0x3F); + } + else + if( cp <= 0xFFFF ) + { + *p_pos++ = 0xE0 | (cp >> 12); + *p_pos++ = 0x80 | ((cp >> 6) & 0x3F); + *p_pos = 0x80 | (cp & 0x3F); + } + else + if( cp <= 0x1FFFFF ) /* Outside the BMP */ + { /* Unicode stops at 10FFFF, but who cares? */ + *p_pos++ = 0xF0 | (cp >> 18); + *p_pos++ = 0x80 | ((cp >> 12) & 0x3F); + *p_pos++ = 0x80 | ((cp >> 6) & 0x3F); + *p_pos = 0x80 | (cp & 0x3F); } } else @@ -288,128 +363,25 @@ void resolve_xml_special_chars( char *psz_value ) psz_value++; } } - else TRY_LONGCHAR( "À", 8, "À" ) - else TRY_LONGCHAR( "Á", 8, "Á" ) - else TRY_LONGCHAR( "Â", 7, "Â" ) - else TRY_LONGCHAR( "Ã", 8, "Ã" ) - else TRY_LONGCHAR( "Ä", 6, "Ä" ) - else TRY_LONGCHAR( "Å", 7, "Å" ) - else TRY_LONGCHAR( "Æ", 7, "Æ" ) - else TRY_LONGCHAR( "Ç", 8, "Ç" ) - else TRY_LONGCHAR( "È", 8, "È" ) - else TRY_LONGCHAR( "É", 8, "É" ) - else TRY_LONGCHAR( "Ê", 7, "Ê" ) - else TRY_LONGCHAR( "Ë", 6, "Ë" ) - else TRY_LONGCHAR( "Ì", 8, "Ì" ) - else TRY_LONGCHAR( "Í", 8, "Í" ) - else TRY_LONGCHAR( "Î", 7, "Î" ) - else TRY_LONGCHAR( "Ï", 6, "Ï" ) - else TRY_LONGCHAR( "Ð", 5, "Ð" ) - else TRY_LONGCHAR( "Ñ", 8, "Ñ" ) - else TRY_LONGCHAR( "Ò", 8, "Ò" ) - else TRY_LONGCHAR( "Ó", 8, "Ó" ) - else TRY_LONGCHAR( "Ô", 7, "Ô" ) - else TRY_LONGCHAR( "Õ", 8, "Õ" ) - else TRY_LONGCHAR( "Ö", 6, "Ö" ) - else TRY_LONGCHAR( "Ø", 8, "Ø" ) - else TRY_LONGCHAR( "Ù", 8, "Ù" ) - else TRY_LONGCHAR( "Ú", 8, "Ú" ) - else TRY_LONGCHAR( "Û", 7, "Û" ) - else TRY_LONGCHAR( "Ü", 6, "Ü" ) - else TRY_LONGCHAR( "Ý", 8, "Ý" ) - else TRY_LONGCHAR( "Þ", 7, "Þ" ) - else TRY_LONGCHAR( "ß", 7, "ß" ) - else TRY_LONGCHAR( "à", 8, "à" ) - else TRY_LONGCHAR( "á", 8, "á" ) - else TRY_LONGCHAR( "â", 7, "â" ) - else TRY_LONGCHAR( "ã", 8, "ã" ) - else TRY_LONGCHAR( "ä", 6, "ä" ) - else TRY_LONGCHAR( "å", 7, "Ã¥" ) - else TRY_LONGCHAR( "æ", 7, "æ" ) - else TRY_LONGCHAR( "ç", 8, "ç" ) - else TRY_LONGCHAR( "è", 8, "è" ) - else TRY_LONGCHAR( "é", 8, "é" ) - else TRY_LONGCHAR( "ê", 7, "ê" ) - else TRY_LONGCHAR( "ë", 6, "ë" ) - else TRY_LONGCHAR( "ì", 8, "ì" ) - else TRY_LONGCHAR( "í", 8, "í" ) - else TRY_LONGCHAR( "î", 7, "î" ) - else TRY_LONGCHAR( "ï", 6, "ï" ) - else TRY_LONGCHAR( "ð", 5, "ð" ) - else TRY_LONGCHAR( "ñ", 8, "ñ" ) - else TRY_LONGCHAR( "ò", 8, "ò" ) - else TRY_LONGCHAR( "ó", 8, "ó" ) - else TRY_LONGCHAR( "ô", 7, "ô" ) - else TRY_LONGCHAR( "õ", 8, "õ" ) - else TRY_LONGCHAR( "ö", 6, "ö" ) - else TRY_LONGCHAR( "ø", 8, "ø" ) - else TRY_LONGCHAR( "ù", 8, "ù" ) - else TRY_LONGCHAR( "ú", 8, "ú" ) - else TRY_LONGCHAR( "û", 7, "û" ) - else TRY_LONGCHAR( "ü", 6, "ü" ) - else TRY_LONGCHAR( "ý", 8, "ý" ) - else TRY_LONGCHAR( "þ", 7, "þ" ) - else TRY_LONGCHAR( "ÿ", 6, "ÿ" ) - else TRY_LONGCHAR( "¡", 7, "¡" ) - else TRY_LONGCHAR( "¤", 8, "¤" ) - else TRY_LONGCHAR( "¢", 6, "¢" ) - else TRY_LONGCHAR( "£", 7, "£" ) - else TRY_LONGCHAR( "¥", 5, "Â¥" ) - else TRY_LONGCHAR( "¦", 8, "¦" ) - else TRY_LONGCHAR( "§", 6, "§" ) - else TRY_LONGCHAR( "¨", 5, "¨" ) - else TRY_LONGCHAR( "©", 6, "©" ) - else TRY_LONGCHAR( "ª", 6, "ª" ) - else TRY_LONGCHAR( "«", 7, "«" ) - else TRY_LONGCHAR( "¬", 5, "¬" ) - else TRY_LONGCHAR( "­", 5, "­" ) - else TRY_LONGCHAR( "®", 5, "®" ) - else TRY_LONGCHAR( "™", 7, "™" ) - else TRY_LONGCHAR( "¯", 6, "¯" ) - else TRY_LONGCHAR( "°", 5, "°" ) - else TRY_LONGCHAR( "±", 8, "±" ) - else TRY_LONGCHAR( "²", 6, "²" ) - else TRY_LONGCHAR( "³", 6, "³" ) - else TRY_LONGCHAR( "´", 7, "´" ) - else TRY_LONGCHAR( "µ", 7, "µ" ) - else TRY_LONGCHAR( "¶", 6, "¶" ) - else TRY_LONGCHAR( "·", 8, "·" ) - else TRY_LONGCHAR( "¸", 7, "¸" ) - else TRY_LONGCHAR( "¹", 6, "¹" ) - else TRY_LONGCHAR( "º", 6, "º" ) - else TRY_LONGCHAR( "»", 7, "»" ) - else TRY_LONGCHAR( "¼", 8, "¼" ) - else TRY_LONGCHAR( "½", 8, "½" ) - else TRY_LONGCHAR( "¾", 8, "¾" ) - else TRY_LONGCHAR( "¿", 8, "¿" ) - else TRY_LONGCHAR( "×", 7, "×" ) - else TRY_LONGCHAR( "÷", 8, "÷" ) - else TRY_LONGCHAR( "Œ", 7, "Œ" ) - else TRY_LONGCHAR( "œ", 7, "œ" ) - else TRY_LONGCHAR( "Š", 8, "Å " ) - else TRY_LONGCHAR( "š", 8, "Å¡" ) - else TRY_LONGCHAR( "Ÿ", 6, "Ÿ" ) - else TRY_LONGCHAR( "ˆ", 6, "ˆ" ) - else TRY_LONGCHAR( "˜", 7, "˜" ) - else TRY_LONGCHAR( "–", 7, "–" ) - else TRY_LONGCHAR( "—", 7, "—" ) - else TRY_LONGCHAR( "‘", 7, "‘" ) - else TRY_LONGCHAR( "’", 7, "’" ) - else TRY_LONGCHAR( "‚", 7, "‚" ) - else TRY_LONGCHAR( "“", 7, "“" ) - else TRY_LONGCHAR( "”", 7, "”" ) - else TRY_LONGCHAR( "„", 7, "„" ) - else TRY_LONGCHAR( "†", 8, "†" ) - else TRY_LONGCHAR( "‡", 8, "‡" ) - else TRY_LONGCHAR( "…", 8, "…" ) - else TRY_LONGCHAR( "‰", 8, "‰" ) - else TRY_LONGCHAR( "‹", 8, "‹" ) - else TRY_LONGCHAR( "›", 8, "›" ) - else TRY_LONGCHAR( "€", 6, "€" ) else - { - *p_pos = *psz_value; - psz_value++; + { /* Well-known XML entity */ + const struct xml_entity_s *ent; + + ent = bsearch (psz_value + 1, xml_entities, + sizeof (xml_entities) / sizeof (*ent), + sizeof (*ent), cmp_entity); + if (ent != NULL) + { + size_t olen = strlen (ent->psz_char); + memcpy (p_pos, ent->psz_char, olen); + p_pos += olen - 1; + psz_value += strlen (ent->psz_entity) + 1; + } + else + { /* No match */ + *p_pos = *psz_value; + psz_value++; + } } } else @@ -430,47 +402,36 @@ void resolve_xml_special_chars( char *psz_value ) */ char *convert_xml_special_chars( const char *psz_content ) { - char *psz_temp = malloc( 6 * strlen( psz_content ) + 1 ); - const char *p_from = psz_content; + assert( psz_content ); + + const size_t len = strlen( psz_content ); + char *const psz_temp = malloc( 6 * len + 1 ); char *p_to = psz_temp; - while ( *p_from ) + if( psz_temp == NULL ) + return NULL; + for( size_t i = 0; i < len; i++ ) { - if ( *p_from == '<' ) - { - strcpy( p_to, "<" ); - p_to += 4; - } - else if ( *p_from == '>' ) - { - strcpy( p_to, ">" ); - p_to += 4; - } - else if ( *p_from == '&' ) - { - strcpy( p_to, "&" ); - p_to += 5; - } - else if( *p_from == '\"' ) - { - strcpy( p_to, """ ); - p_to += 6; - } - else if( *p_from == '\'' ) - { - strcpy( p_to, "'" ); - p_to += 6; - } - else + const char *str; + char c = psz_content[i]; + + switch ( c ) { - *p_to = *p_from; - p_to++; + case '\"': str = "quot"; break; + case '&': str = "amp"; break; + case '\'': str = "#39"; break; + case '<': str = "lt"; break; + case '>': str = "gt"; break; + default: + *(p_to++) = c; + continue; } - p_from++; + p_to += sprintf( p_to, "&%s;", str ); } - *p_to = '\0'; + *(p_to++) = '\0'; - return psz_temp; + p_to = realloc( psz_temp, p_to - psz_temp ); + return p_to ? p_to : psz_temp; /* cannot fail */ } /* Base64 encoding */ @@ -555,7 +516,7 @@ size_t vlc_b64_decode_binary_to_buffer( uint8_t *p_dst, size_t i_dst, const char int i_level; int i_last; - for( i_level = 0, i_last = 0; i_dst > 0 && *p != '\0'; i_dst--, p++ ) + for( i_level = 0, i_last = 0; (size_t)( p_dst - p_start ) < i_dst && *p != '\0'; p++ ) { const int c = b64[(unsigned int)*p]; if( c == -1 ) @@ -607,143 +568,190 @@ char *vlc_b64_decode( const char *psz_src ) return p_dst; } -/**************************************************************************** - * String formating functions - ****************************************************************************/ +/** + * Formats current time into a heap-allocated string. + * @param tformat time format (as with C strftime()) + * @return an allocated string (must be free()'d), or NULL on memory error. + */ char *str_format_time( const char *tformat ) { - char buffer[255]; time_t curtime; -#if defined(HAVE_LOCALTIME_R) struct tm loctime; -#else - struct tm *loctime; -#endif + + if (strcmp (tformat, "") == 0) + return strdup (""); /* corner case w.r.t. strftime() return value */ /* Get the current time. */ - curtime = time( NULL ); + time( &curtime ); /* Convert it to local time representation. */ -#if defined(HAVE_LOCALTIME_R) localtime_r( &curtime, &loctime ); - strftime( buffer, 255, tformat, &loctime ); -#else - loctime = localtime( &curtime ); - strftime( buffer, 255, tformat, loctime ); -#endif - return strdup( buffer ); + for (size_t buflen = strlen (tformat) + 32;; buflen += 32) + { + char *str = malloc (buflen); + if (str == NULL) + return NULL; + + size_t len = strftime (str, buflen, tformat, &loctime); + if (len > 0) + { + char *ret = realloc (str, len + 1); + return ret ? ret : str; /* <- this cannot fail */ + } + } + assert (0); } -#define INSERT_STRING( check, string ) \ - if( check ) \ +#define INSERT_STRING( string ) \ + if( string != NULL ) \ { \ - psz_meta = string; \ - if( psz_meta ) \ - { \ - int len = strlen( string ); \ - dst = realloc( dst, \ - i_size = i_size + len + 1 ); \ - strncpy( d, psz_meta, len+1 ); \ - d += len; \ - } \ - else \ - { \ - *d = '-'; \ - d++; \ - } \ - } + int len = strlen( string ); \ + dst = realloc( dst, i_size = i_size + len );\ + memcpy( (dst+d), string, len ); \ + d += len; \ + free( string ); \ + } \ + else if( !b_empty_if_na ) \ + { \ + *(dst+d) = '-'; \ + d++; \ + } \ /* same than INSERT_STRING, except that string won't be freed */ #define INSERT_STRING_NO_FREE( string ) \ { \ - int len = strlen( string ); \ - dst = realloc( dst, \ - i_size = i_size + len + 1 ); \ - strncpy( d, string, len+1 ); \ - d += len; \ - free( string ); \ - } + int len = strlen( string ); \ + dst = realloc( dst, i_size = i_size + len );\ + memcpy( dst+d, string, len ); \ + d += len; \ + } char *__str_format_meta( vlc_object_t *p_object, const char *string ) { const char *s = string; - char *dst = malloc( 1000 ); - char *d = dst; - int b_is_format = 0; - int b_empty_if_na = 0; + bool b_is_format = false; + bool b_empty_if_na = false; char buf[10]; - int i_size = strlen( string ); + int i_size = strlen( string ) + 1; /* +1 to store '\0' */ + char *dst = strdup( string ); + if( !dst ) return NULL; + int d = 0; - playlist_t *p_playlist = pl_Yield( p_object ); - input_thread_t *p_input = p_playlist->p_input; + playlist_t *p_playlist = pl_Hold( p_object ); + input_thread_t *p_input = playlist_CurrentInput( p_playlist ); input_item_t *p_item = NULL; pl_Release( p_object ); if( p_input ) { - vlc_object_yield( p_input ); p_item = input_GetItem(p_input); } - sprintf( dst, string ); - while( *s ) { if( b_is_format ) { switch( *s ) { - char *psz_meta; /* used by INSERT_STRING */ case 'a': - INSERT_STRING( p_item, input_item_GetArtist(p_item) ); + if( p_item ) + { + INSERT_STRING( input_item_GetArtist( p_item ) ); + } break; case 'b': - INSERT_STRING( p_item, input_item_GetAlbum(p_item) ); + if( p_item ) + { + INSERT_STRING( input_item_GetAlbum( p_item ) ); + } break; case 'c': - INSERT_STRING( p_item, input_item_GetCopyright(p_item) ); + if( p_item ) + { + INSERT_STRING( input_item_GetCopyright( p_item ) ); + } break; case 'd': - INSERT_STRING( p_item, input_item_GetDescription(p_item) ); + if( p_item ) + { + INSERT_STRING( input_item_GetDescription( p_item ) ); + } break; case 'e': - INSERT_STRING( p_item, input_item_GetEncodedBy(p_item) ); + if( p_item ) + { + INSERT_STRING( input_item_GetEncodedBy( p_item ) ); + } + break; + case 'f': + if( p_item && p_item->p_stats ) + { + vlc_mutex_lock( &p_item->p_stats->lock ); + snprintf( buf, 10, "%d", + p_item->p_stats->i_displayed_pictures ); + vlc_mutex_unlock( &p_item->p_stats->lock ); + } + else + { + sprintf( buf, b_empty_if_na ? "" : "-" ); + } + INSERT_STRING_NO_FREE( buf ); break; case 'g': - INSERT_STRING( p_item, input_item_GetGenre(p_item) ); + if( p_item ) + { + INSERT_STRING( input_item_GetGenre( p_item ) ); + } break; case 'l': - INSERT_STRING( p_item, input_item_GetLanguage(p_item) ); + if( p_item ) + { + INSERT_STRING( input_item_GetLanguage( p_item ) ); + } break; case 'n': - INSERT_STRING( p_item, input_item_GetTrackNum(p_item) ); + if( p_item ) + { + INSERT_STRING( input_item_GetTrackNum( p_item ) ); + } break; case 'p': - INSERT_STRING( p_item, input_item_GetNowPlaying(p_item) ); + if( p_item ) + { + INSERT_STRING( input_item_GetNowPlaying( p_item ) ); + } break; case 'r': - INSERT_STRING( p_item, input_item_GetRating(p_item) ); + if( p_item ) + { + INSERT_STRING( input_item_GetRating( p_item ) ); + } break; case 's': { - char *lang; + char *lang = NULL; if( p_input ) - { - lang = var_GetString( p_input, "sub-language" ); - } - else - { + lang = var_GetNonEmptyString( p_input, "sub-language" ); + if( lang == NULL ) lang = strdup( b_empty_if_na ? "" : "-" ); - } - INSERT_STRING( 1, lang ); + INSERT_STRING( lang ); break; } case 't': - INSERT_STRING( p_item, input_item_GetTitle(p_item) ); + if( p_item ) + { + INSERT_STRING( input_item_GetTitle( p_item ) ); + } break; case 'u': - INSERT_STRING( p_item, input_item_GetURL(p_item) ); + if( p_item ) + { + INSERT_STRING( input_item_GetURL( p_item ) ); + } break; case 'A': - INSERT_STRING( p_item, input_item_GetDate(p_item) ); + if( p_item ) + { + INSERT_STRING( input_item_GetDate( p_item ) ); + } break; case 'B': if( p_input ) @@ -773,19 +781,22 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string ) if( p_item ) { mtime_t i_duration = input_item_GetDuration( p_item ); - sprintf( buf, "%02d:%02d:%02d", + snprintf( buf, 10, "%02d:%02d:%02d", (int)(i_duration/(3600000000)), (int)((i_duration/(60000000))%60), (int)((i_duration/1000000)%60) ); } else { - sprintf( buf, b_empty_if_na ? "" : "--:--:--" ); + snprintf( buf, 10, b_empty_if_na ? "" : "--:--:--" ); } INSERT_STRING_NO_FREE( buf ); break; case 'F': - INSERT_STRING( p_item, input_item_GetURI( p_item ) ); + if( p_item ) + { + INSERT_STRING( input_item_GetURI( p_item ) ); + } break; case 'I': if( p_input ) @@ -803,33 +814,33 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string ) if( p_item && p_input ) { mtime_t i_duration = input_item_GetDuration( p_item ); - int64_t i_time = p_input->i_time; - sprintf( buf, "%02d:%02d:%02d", + int64_t i_time = var_GetTime( p_input, "time" ); + snprintf( buf, 10, "%02d:%02d:%02d", (int)( ( i_duration - i_time ) / 3600000000 ), (int)( ( ( i_duration - i_time ) / 60000000 ) % 60 ), (int)( ( ( i_duration - i_time ) / 1000000 ) % 60 ) ); } else { - sprintf( buf, b_empty_if_na ? "" : "--:--:--" ); + snprintf( buf, 10, b_empty_if_na ? "" : "--:--:--" ); } INSERT_STRING_NO_FREE( buf ); break; case 'N': - INSERT_STRING( p_item, input_item_GetName( p_item ) ); + if( p_item ) + { + INSERT_STRING( input_item_GetName( p_item ) ); + } break; case 'O': { - char *lang; + char *lang = NULL; if( p_input ) - { - lang = var_GetString( p_input, "audio-language" ); - } - else - { + lang = var_GetNonEmptyString( p_input, + "audio-language" ); + if( lang == NULL ) lang = strdup( b_empty_if_na ? "" : "-" ); - } - INSERT_STRING( 1, lang ); + INSERT_STRING( lang ); break; } case 'P': @@ -840,7 +851,7 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string ) } else { - sprintf( buf, b_empty_if_na ? "" : "--.-%%" ); + snprintf( buf, 10, b_empty_if_na ? "" : "--.-%%" ); } INSERT_STRING_NO_FREE( buf ); break; @@ -871,19 +882,23 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string ) case 'T': if( p_input ) { - sprintf( buf, "%02d:%02d:%02d", - (int)( p_input->i_time / ( 3600000000 ) ), - (int)( ( p_input->i_time / ( 60000000 ) ) % 60 ), - (int)( ( p_input->i_time / 1000000 ) % 60 ) ); + int64_t i_time = var_GetTime( p_input, "time" ); + snprintf( buf, 10, "%02d:%02d:%02d", + (int)( i_time / ( 3600000000 ) ), + (int)( ( i_time / ( 60000000 ) ) % 60 ), + (int)( ( i_time / 1000000 ) % 60 ) ); } else { - sprintf( buf, b_empty_if_na ? "" : "--:--:--" ); + snprintf( buf, 10, b_empty_if_na ? "" : "--:--:--" ); } INSERT_STRING_NO_FREE( buf ); break; case 'U': - INSERT_STRING( p_item, input_item_GetPublisher(p_item) ); + if( p_item ) + { + INSERT_STRING( input_item_GetPublisher( p_item ) ); + } break; case 'V': { @@ -894,41 +909,43 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string ) break; } case '_': - *d = '\n'; + *(dst+d) = '\n'; d++; break; case ' ': - b_empty_if_na = 1; + b_empty_if_na = true; break; default: - *d = *s; + *(dst+d) = *s; d++; break; } if( *s != ' ' ) - b_is_format = 0; + b_is_format = false; } else if( *s == '$' ) { - b_is_format = 1; - b_empty_if_na = 0; + b_is_format = true; + b_empty_if_na = false; } else { - *d = *s; + *(dst+d) = *s; d++; } s++; } - *d = '\0'; + *(dst+d) = '\0'; if( p_input ) vlc_object_release( p_input ); return dst; } +#undef INSERT_STRING +#undef INSERT_STRING_NO_FREE /** * Apply str format time and str format meta @@ -945,8 +962,10 @@ char *__str_format( vlc_object_t *p_this, const char *psz_src ) /** * Remove forbidden characters from filenames (including slashes) */ -void filename_sanitize( char *str ) +char* filename_sanitize( const char *str_origin ) { + char *str = strdup( str_origin ); + char *str_base = str; if( *str == '.' && (str[1] == '\0' || (str[1] == '.' && str[2] == '\0' ) ) ) { while( *str ) @@ -954,15 +973,23 @@ void filename_sanitize( char *str ) *str = '_'; str++; } - return; + return str_base; } +#if defined( WIN32 ) + // Change leading spaces into underscores + while( *str && *str == ' ' ) + *str++ = '_'; +#endif + while( *str ) { switch( *str ) { case '/': -#ifdef WIN32 +#if defined( __APPLE__ ) + case ':': +#elif defined( WIN32 ) case '\\': case '*': case '"': @@ -976,6 +1003,19 @@ void filename_sanitize( char *str ) } str++; } + +#if defined( WIN32 ) + // Change trailing spaces into underscores + str--; + while( str != str_base ) + { + if( *str != ' ' ) + break; + *str-- = '_'; + } +#endif + + return str_base; } /** @@ -983,52 +1023,120 @@ void filename_sanitize( char *str ) */ void path_sanitize( char *str ) { -#if 0 - /* - * Uncomment the two blocks to prevent /../ or /./, i'm not sure that we - * want to. - */ - char *prev = str - 1; -#endif #ifdef WIN32 /* check drive prefix if path is absolute */ - if( isalpha(*str) && (':' == *(str+1)) ) + if( (((unsigned char)(str[0] - 'A') < 26) + || ((unsigned char)(str[0] - 'a') < 26)) && (':' == str[1]) ) str += 2; #endif while( *str ) { -#ifdef WIN32 - switch( *str ) - { - case '*': - case '"': - case '?': - case ':': - case '|': - case '<': - case '>': - *str = '_'; - } +#if defined( __APPLE__ ) + if( *str == ':' ) + *str = '_'; +#elif defined( WIN32 ) + if( strchr( "*\"?:|<>", *str ) ) + *str = '_'; + if( *str == '/' ) + *str = DIR_SEP_CHAR; #endif -#if 0 - if( *str == '/' + str++; + } +} + +#include + +/** + * Convert a file path to an URI. If already an URI, do nothing. + */ +char *make_URI (const char *path) +{ + if (path == NULL) + return NULL; + if (strstr (path, "://") != NULL) + return strdup (path); /* Already an URI */ + /* Note: VLC cannot handle URI schemes without double slash after the + * scheme name (such as mailto: or news:). */ + + char *buf; #ifdef WIN32 - || *str == '\\' + if (isalpha (path[0]) && (path[1] == ':')) + { + if (asprintf (&buf, "file:///%c:", path[0]) == -1) + buf = NULL; + path += 2; + } + else #endif - ) - { - if( str - prev == 2 && prev[1] == '.' ) - { - prev[1] = '.'; - } - else if( str - prev == 3 && prev[1] == '.' && prev[2] == '.' ) - { - prev[1] = '_'; - prev[2] = '_'; - } - prev = str; + if (!strncmp (path, "\\\\", 2)) + { /* Windows UNC paths */ +#ifndef WIN32 + /* \\host\share\path -> smb://host/share/path */ + if (strchr (path + 2, '\\') != NULL) + { /* Convert antislashes to slashes */ + char *dup = strdup (path); + if (dup == NULL) + return NULL; + for (size_t i = 2; dup[i]; i++) + if (dup[i] == '\\') + dup[i] = DIR_SEP_CHAR; + + char *ret = make_URI (dup); + free (dup); + return ret; } +# define SMB_SCHEME "smb" +#else + /* \\host\share\path -> file://host/share/path */ +# define SMB_SCHEME "file" #endif - str++; + size_t hostlen = strcspn (path + 2, DIR_SEP); + + buf = malloc (sizeof (SMB_SCHEME) + 3 + hostlen); + if (buf != NULL) + snprintf (buf, sizeof (SMB_SCHEME) + 3 + hostlen, + SMB_SCHEME"://%s", path + 2); + path += 2 + hostlen; + } + else + if (path[0] != DIR_SEP_CHAR) + { /* Relative path: prepend the current working directory */ + char cwd[PATH_MAX]; + + if (getcwd (cwd, sizeof (cwd)) == NULL) /* FIXME: UTF8? */ + return NULL; + if (asprintf (&buf, "%s/%s", cwd, path) == -1) + return NULL; + char *ret = make_URI (buf); + free (buf); + return ret; + } + else + buf = strdup ("file://"); + if (buf == NULL) + return NULL; + + assert (path[0] == DIR_SEP_CHAR); + + /* Absolute file path */ + for (const char *ptr = path + 1;; ptr++) + { + size_t len = strcspn (ptr, DIR_SEP); + char *component = encode_URI_bytes (ptr, len); + if (component == NULL) + { + free (buf); + return NULL; + } + char *uri; + int val = asprintf (&uri, "%s/%s", buf, component); + free (component); + free (buf); + if (val == -1) + return NULL; + buf = uri; + ptr += len; + if (*ptr == '\0') + return buf; } }