X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=src%2Ftext%2Fstrings.c;h=0e5de0fa9df237801b9579f03dbc5b3c2be60058;hb=2ee87126c258da4f56dd10f63224eaf0ca9379c5;hp=82e29db6302214ea7f38a26207b9fb008bac1998;hpb=e3a2e4e485c300546b356a63aa7c7ab15c5f3682;p=vlc

diff --git a/src/text/strings.c b/src/text/strings.c
index 82e29db630..0e5de0fa9d 100644
--- a/src/text/strings.c
+++ b/src/text/strings.c
@@ -2,6 +2,7 @@
  * strings.c: String related functions
  *****************************************************************************
  * Copyright (C) 2006 the VideoLAN team
+ * Copyright (C) 2008-2009 RÃ©mi Denis-Courmont
  * $Id$
  *
  * Authors: Antoine Cellerier <dionoea at videolan dot org>
@@ -35,102 +36,23 @@
 
 /* Needed by str_format_time */
 #include <time.h>
+#include <limits.h>
 
 /* Needed by str_format_meta */
 #include <vlc_input.h>
 #include <vlc_meta.h>
 #include <vlc_playlist.h>
-#include <vlc_aout.h>
+#include <vlc_aout_intf.h>
 
 #include <vlc_strings.h>
 #include <vlc_url.h>
 #include <vlc_charset.h>
+#include <vlc_fs.h>
+#include <libvlc.h>
+#include <errno.h>
 
 /**
- * Unescape URI encoded string
- * \return decoded duplicated string
- */
-char *unescape_URI_duplicate( const char *psz )
-{
-    char *psz_dup = strdup( psz );
-    unescape_URI( psz_dup );
-    return psz_dup;
-}
-
-/**
- * Unescape URI encoded string in place
- * \return nothing
- */
-void unescape_URI( char *psz )
-{
-    unsigned char *in = (unsigned char *)psz, *out = in, c;
-    if( psz == NULL )
-        return;
-
-    while( ( c = *in++ ) != '\0' )
-    {
-        switch( c )
-        {
-            case '%':
-            {
-                char val[5], *pval = val;
-                unsigned long cp;
-
-                switch( c = *in++ )
-                {
-                    case '\0':
-                        return;
-
-                    case 'u':
-                    case 'U':
-                        if( ( *pval++ = *in++ ) == '\0' )
-                            return;
-                        if( ( *pval++ = *in++ ) == '\0' )
-                            return;
-                        c = *in++;
-
-                    default:
-                        *pval++ = c;
-                        if( ( *pval++ = *in++ ) == '\0' )
-                            return;
-                        *pval = '\0';
-                }
-
-                cp = strtoul( val, NULL, 0x10 );
-                if( cp < 0x80 )
-                    *out++ = cp;
-                else
-                if( cp < 0x800 )
-                {
-                    *out++ = (( cp >>  6)         | 0xc0);
-                    *out++ = (( cp        & 0x3f) | 0x80);
-                }
-                else
-                {
-                    assert( cp < 0x10000 );
-                    *out++ = (( cp >> 12)         | 0xe0);
-                    *out++ = (((cp >>  6) & 0x3f) | 0x80);
-                    *out++ = (( cp        & 0x3f) | 0x80);
-                }
-                break;
-            }
-
-            /* + is not a special case - it means plus, not space. */
-
-            default:
-                /* Inserting non-ASCII or non-printable characters is unsafe,
-                 * and no sane browser will send these unencoded */
-                if( ( c < 32 ) || ( c > 127 ) )
-                    *out++ = '?';
-                else
-                    *out++ = c;
-        }
-    }
-    *out = '\0';
-}
-
-/**
- * Decode encoded URI string
+ * Decode encoded URI component. See also decode_URI().
  * \return decoded duplicated string
  */
 char *decode_URI_duplicate( const char *psz )
@@ -141,14 +63,23 @@ char *decode_URI_duplicate( const char *psz )
 }
 
 /**
- * Decode encoded URI string in place
- * \return nothing
+ * Decode an encoded URI component in place.
+ * <b>This function does NOT decode entire URIs.</b>
+ * It decodes components (e.g. host name, directory, file name).
+ * Decoded URIs do not exist in the real world (see RFC3986 Â§2.4).
+ * Complete URIs are always "encoded" (or they are syntaxically invalid).
+ *
+ * Note that URI encoding is different from Javascript escaping. Especially,
+ * white spaces and Unicode non-ASCII code points are encoded differently.
+ *
+ * \return psz on success, NULL if it was not properly encoded
  */
-void decode_URI( char *psz )
+char *decode_URI( char *psz )
 {
     unsigned char *in = (unsigned char *)psz, *out = in, c;
+
     if( psz == NULL )
-        return;
+        return NULL;
 
     while( ( c = *in++ ) != '\0' )
     {
@@ -160,17 +91,13 @@ void decode_URI( char *psz )
 
                 if( ( ( hex[0] = *in++ ) == 0 )
                  || ( ( hex[1] = *in++ ) == 0 ) )
-                    return;
+                    return NULL;
 
                 hex[2] = '\0';
                 *out++ = (unsigned char)strtoul( hex, NULL, 0x10 );
                 break;
             }
 
-            case '+':
-                *out++ = ' ';
-                break;
-
             default:
                 /* Inserting non-ASCII or non-printable characters is unsafe,
                  * and no sane browser will send these unencoded */
@@ -181,7 +108,7 @@ void decode_URI( char *psz )
         }
     }
     *out = '\0';
-    EnsureUTF8( psz );
+    return psz;
 }
 
 static inline bool isurisafe( int c )
@@ -193,23 +120,13 @@ static inline bool isurisafe( int c )
             || ( strchr( "-._~", c ) != NULL );
 }
 
-/**
- * Encodes an URI component (RFC3986 Â§2).
- *
- * @param psz_uri nul-terminated UTF-8 representation of the component.
- * Obviously, you can't pass an URI containing a nul character, but you don't
- * want to do that, do you?
- *
- * @return encoded string (must be free()'d), or NULL for ENOMEM.
- */
-char *encode_URI_component( const char *psz_uri )
+static char *encode_URI_bytes (const char *psz_uri, size_t len)
 {
-    char *psz_enc = malloc ((3 * strlen (psz_uri)) + 1), *out = psz_enc;
-
+    char *psz_enc = malloc (3 * len + 1), *out = psz_enc;
     if (psz_enc == NULL)
         return NULL;
 
-    while (*psz_uri)
+    for (size_t i = 0; i < len; i++)
     {
         static const char hex[16] = "0123456789ABCDEF";
         uint8_t c = *psz_uri;
@@ -232,133 +149,161 @@ char *encode_URI_component( const char *psz_uri )
     return out ? out : psz_enc; /* realloc() can fail (safe) */
 }
 
+/**
+ * Encodes a URI component (RFC3986 Â§2).
+ *
+ * @param psz_uri nul-terminated UTF-8 representation of the component.
+ * Obviously, you can't pass a URI containing a nul character, but you don't
+ * want to do that, do you?
+ *
+ * @return encoded string (must be free()'d), or NULL for ENOMEM.
+ */
+char *encode_URI_component( const char *psz_uri )
+{
+    return encode_URI_bytes (psz_uri, strlen (psz_uri));
+}
+
+
 static const struct xml_entity_s
 {
     char    psz_entity[8];
-    uint8_t i_length;
     char    psz_char[4];
-} p_xml_entities[] = {
+} xml_entities[] = {
     /* Important: this list has to be in alphabetical order (psz_entity-wise) */
-    { "AElig;", 6, "Ã" },
-    { "Aacute;", 7, "Ã" },
-    { "Acirc;", 6, "Ã" },
-    { "Agrave;", 7, "Ã" },
-    { "Aring;", 6, "Ã" },
-    { "Atilde;", 7, "Ã" },
-    { "Auml;", 5, "Ã" },
-    { "Ccedil;", 7, "Ã" },
-    { "Dagger;", 7, "â¡" },
-    { "ETH;", 4, "Ã" },
-    { "Eacute;", 7, "Ã" },
-    { "Ecirc;", 6, "Ã" },
-    { "Egrave;", 7, "Ã" },
-    { "Euml;", 5, "Ã" },
-    { "Iacute;", 7, "Ã" },
-    { "Icirc;", 6, "Ã" },
-    { "Igrave;", 7, "Ã" },
-    { "Iuml;", 5, "Ã" },
-    { "Ntilde;", 7, "Ã" },
-    { "OElig;", 6, "Å" },
-    { "Oacute;", 7, "Ã" },
-    { "Ocirc;", 6, "Ã" },
-    { "Ograve;", 7, "Ã" },
-    { "Oslash;", 7, "Ã" },
-    { "Otilde;", 7, "Ã" },
-    { "Ouml;", 5, "Ã" },
-    { "Scaron;", 7, "Å " },
-    { "THORN;", 6, "Ã" },
-    { "Uacute;", 7, "Ã" },
-    { "Ucirc;", 6, "Ã" },
-    { "Ugrave;", 7, "Ã" },
-    { "Uuml;", 5, "Ã" },
-    { "Yacute;", 7, "Ã" },
-    { "Yuml;", 5, "Å¸" },
-    { "aacute;", 7, "Ã¡" },
-    { "acirc;", 6, "Ã¢" },
-    { "acute;", 6, "Â´" },
-    { "aelig;", 6, "Ã¦" },
-    { "agrave;", 7, "Ã " },
-    { "aring;", 6, "Ã¥" },
-    { "atilde;", 7, "Ã£" },
-    { "auml;", 5, "Ã¤" },
-    { "bdquo;", 6, "â" },
-    { "brvbar;", 7, "Â¦" },
-    { "ccedil;", 7, "Ã§" },
-    { "cedil;", 6, "Â¸" },
-    { "cent;", 5, "Â¢" },
-    { "circ;", 5, "Ë" },
-    { "copy;", 5, "Â©" },
-    { "curren;", 7, "Â¤" },
-    { "dagger;", 7, "â " },
-    { "deg;", 4, "Â°" },
-    { "divide;", 7, "Ã·" },
-    { "eacute;", 7, "Ã©" },
-    { "ecirc;", 6, "Ãª" },
-    { "egrave;", 7, "Ã¨" },
-    { "eth;", 4, "Ã°" },
-    { "euml;", 5, "Ã«" },
-    { "euro;", 5, "â¬" },
-    { "frac12;", 7, "Â½" },
-    { "frac14;", 7, "Â¼" },
-    { "frac34;", 7, "Â¾" },
-    { "hellip;", 7, "â¦" },
-    { "iacute;", 7, "Ã­" },
-    { "icirc;", 6, "Ã®" },
-    { "iexcl;", 6, "Â¡" },
-    { "igrave;", 7, "Ã¬" },
-    { "iquest;", 7, "Â¿" },
-    { "iuml;", 5, "Ã¯" },
-    { "laquo;", 6, "Â«" },
-    { "ldquo;", 6, "â" },
-    { "lsaquo;", 7, "â¹" },
-    { "lsquo;", 6, "â" },
-    { "macr;", 5, "Â¯" },
-    { "mdash;", 6, "â" },
-    { "micro;", 6, "Âµ" },
-    { "middot;", 7, "Â·" },
-    { "ndash;", 6, "â" },
-    { "not;", 4, "Â¬" },
-    { "ntilde;", 7, "Ã±" },
-    { "oacute;", 7, "Ã³" },
-    { "ocirc;", 6, "Ã´" },
-    { "oelig;", 6, "Å" },
-    { "ograve;", 7, "Ã²" },
-    { "ordf;", 5, "Âª" },
-    { "ordm;", 5, "Âº" },
-    { "oslash;", 7, "Ã¸" },
-    { "otilde;", 7, "Ãµ" },
-    { "ouml;", 5, "Ã¶" },
-    { "para;", 5, "Â¶" },
-    { "permil;", 7, "â°" },
-    { "plusmn;", 7, "Â±" },
-    { "pound;", 6, "Â£" },
-    { "raquo;", 6, "Â»" },
-    { "rdquo;", 6, "â" },
-    { "reg;", 4, "Â®" },
-    { "rsaquo;", 7, "âº" },
-    { "rsquo;", 6, "â" },
-    { "sbquo;", 6, "â" },
-    { "scaron;", 7, "Å¡" },
-    { "sect;", 5, "Â§" },
-    { "shy;", 4, "Â­" },
-    { "sup1;", 5, "Â¹" },
-    { "sup2;", 5, "Â²" },
-    { "sup3;", 5, "Â³" },
-    { "szlig;", 6, "Ã" },
-    { "thorn;", 6, "Ã¾" },
-    { "tilde;", 6, "Ë" },
-    { "times;", 6, "Ã" },
-    { "trade;", 6, "â¢" },
-    { "uacute;", 7, "Ãº" },
-    { "ucirc;", 6, "Ã»" },
-    { "ugrave;", 7, "Ã¹" },
-    { "uml;", 4, "Â¨" },
-    { "uuml;", 5, "Ã¼" },
-    { "yacute;", 7, "Ã½" },
-    { "yen;", 4, "Â¥" },
-    { "yuml;", 5, "Ã¿" },
+    { "AElig;",  "Ã" },
+    { "Aacute;", "Ã" },
+    { "Acirc;",  "Ã" },
+    { "Agrave;", "Ã" },
+    { "Aring;",  "Ã" },
+    { "Atilde;", "Ã" },
+    { "Auml;",   "Ã" },
+    { "Ccedil;", "Ã" },
+    { "Dagger;", "â¡" },
+    { "ETH;",    "Ã" },
+    { "Eacute;", "Ã" },
+    { "Ecirc;",  "Ã" },
+    { "Egrave;", "Ã" },
+    { "Euml;",   "Ã" },
+    { "Iacute;", "Ã" },
+    { "Icirc;",  "Ã" },
+    { "Igrave;", "Ã" },
+    { "Iuml;",   "Ã" },
+    { "Ntilde;", "Ã" },
+    { "OElig;",  "Å" },
+    { "Oacute;", "Ã" },
+    { "Ocirc;",  "Ã" },
+    { "Ograve;", "Ã" },
+    { "Oslash;", "Ã" },
+    { "Otilde;", "Ã" },
+    { "Ouml;",   "Ã" },
+    { "Scaron;", "Å " },
+    { "THORN;",  "Ã" },
+    { "Uacute;", "Ã" },
+    { "Ucirc;",  "Ã" },
+    { "Ugrave;", "Ã" },
+    { "Uuml;",   "Ã" },
+    { "Yacute;", "Ã" },
+    { "Yuml;",   "Å¸" },
+    { "aacute;", "Ã¡" },
+    { "acirc;",  "Ã¢" },
+    { "acute;",  "Â´" },
+    { "aelig;",  "Ã¦" },
+    { "agrave;", "Ã " },
+    { "amp;",    "&" },
+    { "apos;",   "'" },
+    { "aring;",  "Ã¥" },
+    { "atilde;", "Ã£" },
+    { "auml;",   "Ã¤" },
+    { "bdquo;",  "â" },
+    { "brvbar;", "Â¦" },
+    { "ccedil;", "Ã§" },
+    { "cedil;",  "Â¸" },
+    { "cent;",   "Â¢" },
+    { "circ;",   "Ë" },
+    { "copy;",   "Â©" },
+    { "curren;", "Â¤" },
+    { "dagger;", "â " },
+    { "deg;",    "Â°" },
+    { "divide;", "Ã·" },
+    { "eacute;", "Ã©" },
+    { "ecirc;",  "Ãª" },
+    { "egrave;", "Ã¨" },
+    { "eth;",    "Ã°" },
+    { "euml;",   "Ã«" },
+    { "euro;",   "â¬" },
+    { "frac12;", "Â½" },
+    { "frac14;", "Â¼" },
+    { "frac34;", "Â¾" },
+    { "gt;",     ">" },
+    { "hellip;", "â¦" },
+    { "iacute;", "Ã­" },
+    { "icirc;",  "Ã®" },
+    { "iexcl;",  "Â¡" },
+    { "igrave;", "Ã¬" },
+    { "iquest;", "Â¿" },
+    { "iuml;",   "Ã¯" },
+    { "laquo;",  "Â«" },
+    { "ldquo;",  "â" },
+    { "lsaquo;", "â¹" },
+    { "lsquo;",  "â" },
+    { "lt;",     "<" },
+    { "macr;",   "Â¯" },
+    { "mdash;",  "â" },
+    { "micro;",  "Âµ" },
+    { "middot;", "Â·" },
+    { "nbsp;",   "\xc2\xa0" },
+    { "ndash;",  "â" },
+    { "not;",    "Â¬" },
+    { "ntilde;", "Ã±" },
+    { "oacute;", "Ã³" },
+    { "ocirc;",  "Ã´" },
+    { "oelig;",  "Å" },
+    { "ograve;", "Ã²" },
+    { "ordf;",   "Âª" },
+    { "ordm;",   "Âº" },
+    { "oslash;", "Ã¸" },
+    { "otilde;", "Ãµ" },
+    { "ouml;",   "Ã¶" },
+    { "para;",   "Â¶" },
+    { "permil;", "â°" },
+    { "plusmn;", "Â±" },
+    { "pound;",  "Â£" },
+    { "quot;",   "\"" },
+    { "raquo;",  "Â»" },
+    { "rdquo;",  "â" },
+    { "reg;",    "Â®" },
+    { "rsaquo;", "âº" },
+    { "rsquo;",  "â" },
+    { "sbquo;",  "â" },
+    { "scaron;", "Å¡" },
+    { "sect;",   "Â§" },
+    { "shy;",    "Â­" },
+    { "sup1;",   "Â¹" },
+    { "sup2;",   "Â²" },
+    { "sup3;",   "Â³" },
+    { "szlig;",  "Ã" },
+    { "thorn;",  "Ã¾" },
+    { "tilde;",  "Ë" },
+    { "times;",  "Ã" },
+    { "trade;",  "â¢" },
+    { "uacute;", "Ãº" },
+    { "ucirc;",  "Ã»" },
+    { "ugrave;", "Ã¹" },
+    { "uml;",    "Â¨" },
+    { "uuml;",   "Ã¼" },
+    { "yacute;", "Ã½" },
+    { "yen;",    "Â¥" },
+    { "yuml;",   "Ã¿" },
 };
 
+static int cmp_entity (const void *key, const void *elem)
+{
+    const struct xml_entity_s *ent = elem;
+    const char *name = key;
+
+    return strncmp (name, ent->psz_entity, strlen (ent->psz_entity));
+}
+
 /**
  * Converts "&lt;", "&gt;" and "&amp;" to "<", ">" and "&"
  * \param string to convert
@@ -371,35 +316,42 @@ void resolve_xml_special_chars( char *psz_value )
     {
         if( *psz_value == '&' )
         {
-            char *psz_value1 = psz_value + 1;
-#define TRY_CHAR( src, len, dst )                     \
-            if( !strncmp( psz_value1, src, len ) )   \
-            {                                         \
-                *p_pos = dst;                         \
-                psz_value += len + 1;                 \
-            }
-            TRY_CHAR( "lt;", 3, '<' )
-            else TRY_CHAR( "amp;", 4, '&' )
-            else TRY_CHAR( "apos;", 5, '\'' )
-            else TRY_CHAR( "gt;", 3, '>' )
-            else TRY_CHAR( "quot;", 5, '"' )
-#undef TRY_CHAR
-            else if( *psz_value1 == '#' )
-            {
+            if( psz_value[1] == '#' )
+            {   /* &#xxx; Unicode code point */
                 char *psz_end;
-                int i = strtol( psz_value+2, &psz_end, 10 );
+                unsigned long cp = strtoul( psz_value+2, &psz_end, 10 );
                 if( *psz_end == ';' )
                 {
-                    if( i >= 32 && i <= 126 )
+                    psz_value = psz_end + 1;
+                    if( cp == 0 )
+                        (void)0; /* skip nuls */
+                    else
+                    if( cp <= 0x7F )
+                    {
+                        *p_pos =            cp;
+                    }
+                    else
+                    /* Unicode code point outside ASCII.
+                     * &#xxx; representation is longer than UTF-8 :) */
+                    if( cp <= 0x7FF )
                     {
-                        *p_pos = (char)i;
-                        psz_value = psz_end+1;
+                        *p_pos++ = 0xC0 |  (cp >>  6);
+                        *p_pos   = 0x80 |  (cp        & 0x3F);
                     }
                     else
+                    if( cp <= 0xFFFF )
                     {
-                        /* Unhandled code, FIXME */
-                        *p_pos = *psz_value;
-                        psz_value++;
+                        *p_pos++ = 0xE0 |  (cp >> 12);
+                        *p_pos++ = 0x80 | ((cp >>  6) & 0x3F);
+                        *p_pos   = 0x80 |  (cp        & 0x3F);
+                    }
+                    else
+                    if( cp <= 0x1FFFFF ) /* Outside the BMP */
+                    {   /* Unicode stops at 10FFFF, but who cares? */
+                        *p_pos++ = 0xF0 |  (cp >> 18);
+                        *p_pos++ = 0x80 | ((cp >> 12) & 0x3F);
+                        *p_pos++ = 0x80 | ((cp >>  6) & 0x3F);
+                        *p_pos   = 0x80 |  (cp        & 0x3F);
                     }
                 }
                 else
@@ -410,37 +362,21 @@ void resolve_xml_special_chars( char *psz_value )
                 }
             }
             else
-            {
-                const size_t i_entities = sizeof( p_xml_entities ) /
-                                          sizeof( p_xml_entities[0] );
-                assert( i_entities < 128 );
-                size_t step = 128>>1;
-                size_t i = step-1;
-                int cmp = -1;
-                while( step )
+            {   /* Well-known XML entity */
+                const struct xml_entity_s *ent;
+
+                ent = bsearch (psz_value + 1, xml_entities,
+                               sizeof (xml_entities) / sizeof (*ent),
+                               sizeof (*ent), cmp_entity);
+                if (ent != NULL)
                 {
-                    step >>= 1;
-                    if( i >= i_entities )
-                        cmp = -1;
-                    else
-                        cmp = strncmp( psz_value1, /* Skip the & */
-                                       p_xml_entities[i].psz_entity,
-                                       p_xml_entities[i].i_length );
-                    if( cmp == 0 )
-                    {
-                        size_t i_len = strlen( p_xml_entities[i].psz_char );
-                        strncpy( p_pos, p_xml_entities[i].psz_char, i_len );
-                        p_pos += i_len - 1;
-                        psz_value += p_xml_entities[i].i_length+1;
-                        break;
-                    }
-                    else if( cmp < 0 )
-                        i -= step;
-                    else
-                        i += step;
+                    size_t olen = strlen (ent->psz_char);
+                    memcpy (p_pos, ent->psz_char, olen);
+                    p_pos += olen - 1;
+                    psz_value += strlen (ent->psz_entity) + 1;
                 }
-                if( cmp != 0 )
-                {
+                else
+                {   /* No match */
                     *p_pos = *psz_value;
                     psz_value++;
                 }
@@ -459,52 +395,51 @@ void resolve_xml_special_chars( char *psz_value )
 }
 
 /**
- * Converts '<', '>', '\"', '\'' and '&' to their html entities
- * \param psz_content simple element content that is to be converted
+ * XML-encode an UTF-8 string
+ * \param str nul-terminated UTF-8 byte sequence to XML-encode
+ * \return XML encoded string or NULL on error
+ * (errno is set to ENOMEM or EILSEQ as appropriate)
  */
-char *convert_xml_special_chars( const char *psz_content )
+char *convert_xml_special_chars (const char *str)
 {
-    char *psz_temp = malloc( 6 * strlen( psz_content ) + 1 );
-    const char *p_from = psz_content;
-    char *p_to   = psz_temp;
+    assert (str != NULL);
 
-    while ( *p_from )
+    const size_t len = strlen (str);
+    char *const buf = malloc (6 * len + 1), *ptr = buf;
+    if (unlikely(buf == NULL))
+        return NULL;
+
+    size_t n;
+    uint32_t cp;
+
+    while ((n = vlc_towc (str, &cp)) != 0)
     {
-        if ( *p_from == '<' )
-        {
-            strcpy( p_to, "&lt;" );
-            p_to += 4;
-        }
-        else if ( *p_from == '>' )
-        {
-            strcpy( p_to, "&gt;" );
-            p_to += 4;
-        }
-        else if ( *p_from == '&' )
-        {
-            strcpy( p_to, "&amp;" );
-            p_to += 5;
-        }
-        else if( *p_from == '\"' )
-        {
-            strcpy( p_to, "&quot;" );
-            p_to += 6;
-        }
-        else if( *p_from == '\'' )
+        if (unlikely(n == (size_t)-1))
         {
-            strcpy( p_to, "&#039;" );
-            p_to += 6;
+            free (buf);
+            errno = EILSEQ;
+            return NULL;
         }
+
+        if ((cp & ~0x0080) < 32 /* C0/C1 control codes */
+         && strchr ("\x09\x0A\x0D\x85", cp) == NULL)
+            ptr += sprintf (ptr, "&#%"PRIu32";", cp);
         else
+        switch (cp)
         {
-            *p_to = *p_from;
-            p_to++;
+            case '\"': strcpy (ptr, "&quot;"); ptr += 6; break;
+            case '&':  strcpy (ptr, "&amp;");  ptr += 5; break;
+            case '\'': strcpy (ptr, "&#39;");  ptr += 5; break;
+            case '<':  strcpy (ptr, "&lt;");   ptr += 4; break;
+            case '>':  strcpy (ptr, "&gt;");   ptr += 4; break;
+            default:   memcpy (ptr, str, n);   ptr += n; break;
         }
-        p_from++;
+        str += n;
     }
-    *p_to = '\0';
+    *(ptr++) = '\0';
 
-    return psz_temp;
+    ptr = realloc (buf, ptr - buf);
+    return likely(ptr != NULL) ? ptr : buf; /* cannot fail */
 }
 
 /* Base64 encoding */
@@ -675,30 +610,38 @@ char *str_format_time( const char *tformat )
     assert (0);
 }
 
+static void format_duration (char *buf, size_t len, int64_t duration)
+{
+    lldiv_t d;
+    int sec;
+
+    duration /= CLOCK_FREQ;
+    d = lldiv (duration, 60);
+    sec = d.rem;
+    d = lldiv (d.quot, 60);
+    snprintf (buf, len, "%02lld:%02d:%02d", d.quot, (int)d.rem, sec);
+}
+
 #define INSERT_STRING( string )                                     \
                     if( string != NULL )                            \
                     {                                               \
                         int len = strlen( string );                 \
-                        dst = realloc( dst, i_size = i_size + len );\
+                        dst = xrealloc( dst, i_size = i_size + len );\
                         memcpy( (dst+d), string, len );             \
                         d += len;                                   \
                         free( string );                             \
-                    }                                               \
-                    else if( !b_empty_if_na )                       \
-                    {                                               \
-                        *(dst+d) = '-';                             \
-                        d++;                                        \
-                    }                                               \
+                    }
 
 /* same than INSERT_STRING, except that string won't be freed */
 #define INSERT_STRING_NO_FREE( string )                             \
                     {                                               \
                         int len = strlen( string );                 \
-                        dst = realloc( dst, i_size = i_size + len );\
+                        dst = xrealloc( dst, i_size = i_size + len );\
                         memcpy( dst+d, string, len );               \
                         d += len;                                   \
                     }
-char *__str_format_meta( vlc_object_t *p_object, const char *string )
+#undef str_format_meta
+char *str_format_meta( vlc_object_t *p_object, const char *string )
 {
     const char *s = string;
     bool b_is_format = false;
@@ -709,10 +652,8 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string )
     if( !dst ) return NULL;
     int d = 0;
 
-    playlist_t *p_playlist = pl_Hold( p_object );
-    input_thread_t *p_input = playlist_CurrentInput( p_playlist );
+    input_thread_t *p_input = playlist_CurrentInput( pl_Get(p_object) );
     input_item_t *p_item = NULL;
-    pl_Release( p_object );
     if( p_input )
     {
         p_item = input_GetItem(p_input);
@@ -757,13 +698,13 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string )
                 case 'f':
                     if( p_item && p_item->p_stats )
                     {
-                        snprintf( buf, 10, "%d",
+                        vlc_mutex_lock( &p_item->p_stats->lock );
+                        snprintf( buf, 10, "%"PRIi64,
                                   p_item->p_stats->i_displayed_pictures );
+                        vlc_mutex_unlock( &p_item->p_stats->lock );
                     }
                     else
-                    {
-                        sprintf( buf, b_empty_if_na ? "" : "-" );
-                    }
+                        strcpy( buf, b_empty_if_na ? "" : "-" );
                     INSERT_STRING_NO_FREE( buf );
                     break;
                 case 'g':
@@ -797,15 +738,15 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string )
                     }
                     break;
                 case 's':
-                {
-                    char *lang = NULL;
-                    if( p_input )
-                        lang = var_GetNonEmptyString( p_input, "sub-language" );
-                    if( lang == NULL )
-                        lang = strdup( b_empty_if_na ? "" : "-" );
-                    INSERT_STRING( lang );
-                    break;
-                }
+                    {
+                        char *psz_lang = NULL;
+                        if( p_input )
+                            psz_lang = var_GetNonEmptyString( p_input, "sub-language" );
+                        if( psz_lang == NULL )
+                            psz_lang = strdup( b_empty_if_na ? "" : "-" );
+                        INSERT_STRING( psz_lang );
+                        break;
+                    }
                 case 't':
                     if( p_item )
                     {
@@ -827,40 +768,31 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string )
                 case 'B':
                     if( p_input )
                     {
-                        snprintf( buf, 10, "%d",
+                        snprintf( buf, 10, "%"PRId64,
                                   var_GetInteger( p_input, "bit-rate" )/1000 );
                     }
                     else
-                    {
-                        sprintf( buf, b_empty_if_na ? "" : "-" );
-                    }
+                        strcpy( buf, b_empty_if_na ? "" : "-" );
                     INSERT_STRING_NO_FREE( buf );
                     break;
                 case 'C':
                     if( p_input )
                     {
-                        snprintf( buf, 10, "%d",
+                        snprintf( buf, 10, "%"PRId64,
                                   var_GetInteger( p_input, "chapter" ) );
                     }
                     else
-                    {
-                        sprintf( buf, b_empty_if_na ? "" : "-" );
-                    }
+                        strcpy( buf, b_empty_if_na ? "" : "-" );
                     INSERT_STRING_NO_FREE( buf );
                     break;
                 case 'D':
                     if( p_item )
                     {
                         mtime_t i_duration = input_item_GetDuration( p_item );
-                        sprintf( buf, "%02d:%02d:%02d",
-                                 (int)(i_duration/(3600000000)),
-                                 (int)((i_duration/(60000000))%60),
-                                 (int)((i_duration/1000000)%60) );
+                        format_duration (buf, sizeof (buf), i_duration);
                     }
                     else
-                    {
-                        sprintf( buf, b_empty_if_na ? "" : "--:--:--" );
-                    }
+                        strcpy( buf, b_empty_if_na ? "" : "--:--:--" );
                     INSERT_STRING_NO_FREE( buf );
                     break;
                 case 'F':
@@ -872,29 +804,23 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string )
                 case 'I':
                     if( p_input )
                     {
-                        snprintf( buf, 10, "%d",
+                        snprintf( buf, 10, "%"PRId64,
                                   var_GetInteger( p_input, "title" ) );
                     }
                     else
-                    {
-                        sprintf( buf, b_empty_if_na ? "" : "-" );
-                    }
+                        strcpy( buf, b_empty_if_na ? "" : "-" );
                     INSERT_STRING_NO_FREE( buf );
                     break;
                 case 'L':
                     if( p_item && p_input )
                     {
                         mtime_t i_duration = input_item_GetDuration( p_item );
-                        int64_t i_time = p_input->i_time;
-                        sprintf( buf, "%02d:%02d:%02d",
-                     (int)( ( i_duration - i_time ) / 3600000000 ),
-                     (int)( ( ( i_duration - i_time ) / 60000000 ) % 60 ),
-                     (int)( ( ( i_duration - i_time ) / 1000000 ) % 60 ) );
+                        int64_t i_time = var_GetTime( p_input, "time" );
+                        format_duration( buf, sizeof(buf),
+                                         i_duration - i_time );
                     }
                     else
-                    {
-                        sprintf( buf, b_empty_if_na ? "" : "--:--:--" );
-                    }
+                        strcpy( buf, b_empty_if_na ? "" : "--:--:--" );
                     INSERT_STRING_NO_FREE( buf );
                     break;
                 case 'N':
@@ -904,16 +830,16 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string )
                     }
                     break;
                 case 'O':
-                {
-                    char *lang = NULL;
-                    if( p_input )
-                        lang = var_GetNonEmptyString( p_input,
-                                                      "audio-language" );
-                    if( lang == NULL )
-                        lang = strdup( b_empty_if_na ? "" : "-" );
-                    INSERT_STRING( lang );
-                    break;
-                }
+                    {
+                        char *lang = NULL;
+                        if( p_input )
+                            lang = var_GetNonEmptyString( p_input,
+                                                          "audio-language" );
+                        if( lang == NULL )
+                            lang = strdup( b_empty_if_na ? "" : "-" );
+                        INSERT_STRING( lang );
+                        break;
+                    }
                 case 'P':
                     if( p_input )
                     {
@@ -922,20 +848,18 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string )
                     }
                     else
                     {
-                        sprintf( buf, b_empty_if_na ? "" : "--.-%%" );
+                        snprintf( buf, 10, b_empty_if_na ? "" : "--.-%%" );
                     }
                     INSERT_STRING_NO_FREE( buf );
                     break;
                 case 'R':
                     if( p_input )
                     {
-                        int r = var_GetInteger( p_input, "rate" );
-                        snprintf( buf, 10, "%d.%d", r/1000, r%1000 );
+                        float f = var_GetFloat( p_input, "rate" );
+                        snprintf( buf, 10, "%.3f", f );
                     }
                     else
-                    {
-                        sprintf( buf, b_empty_if_na ? "" : "-" );
-                    }
+                        strcpy( buf, b_empty_if_na ? "" : "-" );
                     INSERT_STRING_NO_FREE( buf );
                     break;
                 case 'S':
@@ -945,23 +869,17 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string )
                         snprintf( buf, 10, "%d.%d", r/1000, (r/100)%10 );
                     }
                     else
-                    {
-                        sprintf( buf, b_empty_if_na ? "" : "-" );
-                    }
+                        strcpy( buf, b_empty_if_na ? "" : "-" );
                     INSERT_STRING_NO_FREE( buf );
                     break;
                 case 'T':
                     if( p_input )
                     {
-                        sprintf( buf, "%02d:%02d:%02d",
-                            (int)( p_input->i_time / ( 3600000000 ) ),
-                            (int)( ( p_input->i_time / ( 60000000 ) ) % 60 ),
-                            (int)( ( p_input->i_time / 1000000 ) % 60 ) );
+                        int64_t i_time = var_GetTime( p_input, "time" );
+                        format_duration( buf, sizeof(buf), i_time );
                     }
                     else
-                    {
-                        sprintf( buf, b_empty_if_na ? "" :  "--:--:--" );
-                    }
+                        strcpy( buf, b_empty_if_na ? "" : "--:--:--" );
                     INSERT_STRING_NO_FREE( buf );
                     break;
                 case 'U':
@@ -971,17 +889,36 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string )
                     }
                     break;
                 case 'V':
-                {
-                    audio_volume_t volume;
-                    aout_VolumeGet( p_object, &volume );
-                    snprintf( buf, 10, "%d", volume );
-                    INSERT_STRING_NO_FREE( buf );
-                    break;
-                }
+                    {
+                        audio_volume_t volume = aout_VolumeGet( p_object );
+                        snprintf( buf, 10, "%d", volume );
+                        INSERT_STRING_NO_FREE( buf );
+                        break;
+                    }
                 case '_':
                     *(dst+d) = '\n';
                     d++;
                     break;
+                case 'Z':
+                    if( p_item )
+                    {
+                        char *psz_now_playing = input_item_GetNowPlaying( p_item );
+                        if ( psz_now_playing == NULL )
+                        {
+                            char *psz_temp = input_item_GetTitleFbName( p_item );
+                            char *psz_artist = input_item_GetArtist( p_item );
+                            if( !EMPTY_STR( psz_temp ) )
+                            {
+                                INSERT_STRING( psz_temp );
+                                if ( !EMPTY_STR( psz_artist ) )
+                                    INSERT_STRING_NO_FREE( " - " );
+                            }
+                            INSERT_STRING( psz_artist );
+                        }
+                        else
+                            INSERT_STRING( psz_now_playing );
+                    }
+                    break;
 
                 case ' ':
                     b_empty_if_na = true;
@@ -1017,10 +954,11 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string )
 #undef INSERT_STRING
 #undef INSERT_STRING_NO_FREE
 
+#undef str_format
 /**
  * Apply str format time and str format meta
  */
-char *__str_format( vlc_object_t *p_this, const char *psz_src )
+char *str_format( vlc_object_t *p_this, const char *psz_src )
 {
     char *psz_buf1, *psz_buf2;
     psz_buf1 = str_format_time( psz_src );
@@ -1030,41 +968,60 @@ char *__str_format( vlc_object_t *p_this, const char *psz_src )
 }
 
 /**
- * Remove forbidden characters from filenames (including slashes)
+ * Remove forbidden, potentially forbidden and otherwise evil characters from
+ * filenames. This includes slashes, and popular characters like colon
+ * (on Unix anyway), so this should only be used for automatically generated
+ * filenames.
+ * \warning Do not use this on full paths,
+ * only single file names without any directory separator!
  */
 void filename_sanitize( char *str )
 {
-    if( *str == '.' && (str[1] == '\0' || (str[1] == '.' && str[2] == '\0' ) ) )
+    unsigned char c;
+
+    /* Special file names, not allowed */
+    if( !strcmp( str, "." ) || !strcmp( str, ".." ) )
     {
         while( *str )
-        {
-            *str = '_';
-            str++;
-        }
+            *(str++) = '_';
         return;
     }
 
-    while( *str )
+    /* On platforms not using UTF-7, VLC cannot access non-Unicode paths.
+     * Also, some file systems require Unicode file names.
+     * NOTE: This may inserts '?' thus is done replacing '?' with '_'. */
+    EnsureUTF8( str );
+
+    /* Avoid leading spaces to please Windows. */
+    while( (c = *str) != '\0' )
     {
-        switch( *str )
-        {
-            case '/':
-#if defined( __APPLE__ )
-            case ':':
-#elif defined( WIN32 )
-            case '\\':
-            case '*':
-            case '"':
-            case '?':
-            case ':':
-            case '|':
-            case '<':
-            case '>':
-#endif
-                *str = '_';
-        }
+        if( c != ' ' )
+            break;
+        *(str++) = '_';
+    }
+
+    char *start = str;
+
+    while( (c = *str) != '\0' )
+    {
+        /* Non-printable characters are not a good idea */
+        if( c < 32 )
+            *str = '_';
+        /* This is the list of characters not allowed by Microsoft.
+         * We also black-list them on Unix as they may be confusing, and are
+         * not supported by some file system types (notably CIFS). */
+        else if( strchr( "/:\\*\"?|<>", c ) != NULL )
+            *str = '_';
         str++;
     }
+
+    /* Avoid trailing spaces also to please Windows. */
+    while( str > start )
+    {
+        if( *(--str) != ' ' )
+            break;
+        *str = '_';
+    }
 }
 
 /**
@@ -1072,16 +1029,10 @@ void filename_sanitize( char *str )
  */
 void path_sanitize( char *str )
 {
-#if 0
-    /*
-     * Uncomment the two blocks to prevent /../ or /./, i'm not sure that we
-     * want to.
-     */
-    char *prev = str - 1;
-#endif
-#ifdef WIN32
+#if defined( WIN32 ) || defined( __OS2__ )
     /* check drive prefix if path is absolute */
-    if( isalpha(*str) && (':' == *(str+1)) )
+    if( (((unsigned char)(str[0] - 'A') < 26)
+      || ((unsigned char)(str[0] - 'a') < 26)) && (':' == str[1]) )
         str += 2;
 #endif
     while( *str )
@@ -1089,38 +1040,238 @@ void path_sanitize( char *str )
 #if defined( __APPLE__ )
         if( *str == ':' )
             *str = '_';
-#elif defined( WIN32 )
-        switch( *str )
-        {
-            case '*':
-            case '"':
-            case '?':
-            case ':':
-            case '|':
-            case '<':
-            case '>':
-                *str = '_';
-        }
+#elif defined( WIN32 ) || defined( __OS2__ )
+        if( strchr( "*\"?:|<>", *str ) )
+            *str = '_';
+        if( *str == '/' )
+            *str = DIR_SEP_CHAR;
 #endif
-#if 0
-        if( *str == '/'
+        str++;
+    }
+}
+
+#include <vlc_url.h>
 #ifdef WIN32
-            || *str == '\\'
+# include <io.h>
+#endif
+
+/**
+ * Convert a file path to a URI.
+ * If already a URI, return a copy of the string.
+ * @param path path to convert (or URI to copy)
+ * @param scheme URI scheme to use (default is auto: "file", "fd" or "smb")
+ * @return a nul-terminated URI string (use free() to release it),
+ * or NULL in case of error
+ */
+char *make_URI (const char *path, const char *scheme)
+{
+    if (path == NULL)
+        return NULL;
+    if (scheme == NULL && !strcmp (path, "-"))
+        return strdup ("fd://0"); // standard input
+    if (strstr (path, "://") != NULL)
+        return strdup (path); /* Already a URI */
+    /* Note: VLC cannot handle URI schemes without double slash after the
+     * scheme name (such as mailto: or news:). */
+
+    char *buf;
+
+#ifdef __OS2__
+    char p[strlen (path) + 1];
+
+    for (buf = p; *path; buf++, path++)
+        *buf = (*path == '/') ? DIR_SEP_CHAR : *path;
+    *buf = '\0';
+
+    path = p;
+#endif
+
+#if defined( WIN32 ) || defined( __OS2__ )
+    /* Drive letter */
+    if (isalpha ((unsigned char)path[0]) && (path[1] == ':'))
+    {
+        if (asprintf (&buf, "%s:///%c:", scheme ? scheme : "file",
+                      path[0]) == -1)
+            buf = NULL;
+        path += 2;
+# warning Drive letter-relative path not implemented!
+        if (path[0] != DIR_SEP_CHAR)
+            return NULL;
+    }
+    else
 #endif
-            )
+    if (!strncmp (path, "\\\\", 2))
+    {   /* Windows UNC paths */
+#if !defined( WIN32 ) && !defined( __OS2__ )
+        if (scheme != NULL)
+            return NULL; /* remote files not supported */
+
+        /* \\host\share\path -> smb://host/share/path */
+        if (strchr (path + 2, '\\') != NULL)
+        {   /* Convert backslashes to slashes */
+            char *dup = strdup (path);
+            if (dup == NULL)
+                return NULL;
+            for (size_t i = 2; dup[i]; i++)
+                if (dup[i] == '\\')
+                    dup[i] = DIR_SEP_CHAR;
+
+            char *ret = make_URI (dup, scheme);
+            free (dup);
+            return ret;
+        }
+# define SMB_SCHEME "smb"
+#else
+        /* \\host\share\path -> file://host/share/path */
+# define SMB_SCHEME "file"
+#endif
+        size_t hostlen = strcspn (path + 2, DIR_SEP);
+
+        buf = malloc (sizeof (SMB_SCHEME) + 3 + hostlen);
+        if (buf != NULL)
+            snprintf (buf, sizeof (SMB_SCHEME) + 3 + hostlen,
+                      SMB_SCHEME"://%s", path + 2);
+        path += 2 + hostlen;
+
+        if (path[0] == '\0')
+            return buf; /* Hostname without path */
+    }
+    else
+    if (path[0] != DIR_SEP_CHAR)
+    {   /* Relative path: prepend the current working directory */
+        char *cwd, *ret;
+
+        if ((cwd = vlc_getcwd ()) == NULL)
+            return NULL;
+        if (asprintf (&buf, "%s"DIR_SEP"%s", cwd, path) == -1)
+            buf = NULL;
+
+        free (cwd);
+        ret = (buf != NULL) ? make_URI (buf, scheme) : NULL;
+        free (buf);
+        return ret;
+    }
+    else
+    if (asprintf (&buf, "%s://", scheme ? scheme : "file") == -1)
+        buf = NULL;
+    if (buf == NULL)
+        return NULL;
+
+    assert (path[0] == DIR_SEP_CHAR);
+
+    /* Absolute file path */
+    for (const char *ptr = path + 1;; ptr++)
+    {
+        size_t len = strcspn (ptr, DIR_SEP);
+        char *component = encode_URI_bytes (ptr, len);
+        if (component == NULL)
         {
-            if( str - prev == 2 && prev[1] == '.' )
-            {
-                prev[1] = '.';
-            }
-            else if( str - prev == 3 && prev[1] == '.' && prev[2] == '.' )
-            {
-                prev[1] = '_';
-                prev[2] = '_';
-            }
-            prev = str;
+            free (buf);
+            return NULL;
         }
+        char *uri;
+        int val = asprintf (&uri, "%s/%s", buf, component);
+        free (component);
+        free (buf);
+        if (val == -1)
+            return NULL;
+        buf = uri;
+        ptr += len;
+        if (*ptr == '\0')
+            return buf;
+    }
+}
+
+/**
+ * Tries to convert a URI to a local (UTF-8-encoded) file path.
+ * @param url URI to convert
+ * @return NULL on error, a nul-terminated string otherwise
+ * (use free() to release it)
+ */
+char *make_path (const char *url)
+{
+    char *ret = NULL;
+    char *end;
+
+    char *path = strstr (url, "://");
+    if (path == NULL)
+        return NULL; /* unsupported scheme or invalid syntax */
+
+    end = memchr (url, '/', path - url);
+    size_t schemelen = ((end != NULL) ? end : path) - url;
+    path += 3; /* skip "://" */
+
+    /* Remove HTML anchor if present */
+    end = strchr (path, '#');
+    if (end)
+        path = strndup (path, end - path);
+    else
+        path = strdup (path);
+    if (unlikely(path == NULL))
+        return NULL; /* boom! */
+
+    /* Decode path */
+    decode_URI (path);
+
+    if (schemelen == 4 && !strncasecmp (url, "file", 4))
+    {
+#if (!defined (WIN32) && !defined (__OS2__)) || defined (UNDER_CE)
+        /* Leading slash => local path */
+        if (*path == '/')
+            return path;
+        /* Local path disguised as a remote one */
+        if (!strncasecmp (path, "localhost/", 10))
+            return memmove (path, path + 9, strlen (path + 9) + 1);
+#else
+        for (char *p = strchr (path, '/'); p; p = strchr (p + 1, '/'))
+            *p = '\\';
+
+        /* Leading backslash => local path */
+        if (*path == '\\')
+            return memmove (path, path + 1, strlen (path + 1) + 1);
+        /* Local path disguised as a remote one */
+        if (!strncasecmp (path, "localhost\\", 10))
+            return memmove (path, path + 10, strlen (path + 10) + 1);
+        /* UNC path */
+        if (*path && asprintf (&ret, "\\\\%s", path) == -1)
+            ret = NULL;
+#endif
+        /* non-local path :-( */
+    }
+    else
+    if (schemelen == 2 && !strncasecmp (url, "fd", 2))
+    {
+        int fd = strtol (path, &end, 0);
+
+        if (*end)
+            goto out;
+
+#if !defined( WIN32 ) && !defined( __OS2__ )
+        switch (fd)
+        {
+            case 0:
+                ret = strdup ("/dev/stdin");
+                break;
+            case 1:
+                ret = strdup ("/dev/stdout");
+                break;
+            case 2:
+                ret = strdup ("/dev/stderr");
+                break;
+            default:
+                if (asprintf (&ret, "/dev/fd/%d", fd) == -1)
+                    ret = NULL;
+        }
+#else
+        /* XXX: Does this work on WinCE? */
+        if (fd < 2)
+            ret = strdup ("CON");
+        else
+            ret = NULL;
 #endif
-        str++;
     }
+
+out:
+    free (path);
+    return ret; /* unknown scheme */
 }