X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=src%2Ftext%2Fstrings.c;h=3f21500f7aa654c5a64712f7733b91e97fb5d996;hb=d5fd75161509b82a98b0ed81f60d09dc69b6ff5a;hp=750d565109777fe8bc17bf80d31e19b42ed6b466;hpb=ce572d7eec691f6ee486f88c8fe215e99d5b4dd8;p=vlc

diff --git a/src/text/strings.c b/src/text/strings.c
index 750d565109..3f21500f7a 100644
--- a/src/text/strings.c
+++ b/src/text/strings.c
@@ -2,6 +2,7 @@
  * strings.c: String related functions
  *****************************************************************************
  * Copyright (C) 2006 the VideoLAN team
+ * Copyright (C) 2008-2009 RÃ©mi Denis-Courmont
  * $Id$
  *
  * Authors: Antoine Cellerier <dionoea at videolan dot org>
@@ -26,11 +27,16 @@
 /*****************************************************************************
  * Preamble
  *****************************************************************************/
-#include <vlc/vlc.h>
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
 #include <assert.h>
 
 /* Needed by str_format_time */
 #include <time.h>
+#include <limits.h>
 
 /* Needed by str_format_meta */
 #include <vlc_input.h>
@@ -43,90 +49,7 @@
 #include <vlc_charset.h>
 
 /**
- * Unescape URI encoded string
- * \return decoded duplicated string
- */
-char *unescape_URI_duplicate( const char *psz )
-{
-    char *psz_dup = strdup( psz );
-    unescape_URI( psz_dup );
-    return psz_dup;
-}
-
-/**
- * Unescape URI encoded string in place
- * \return nothing
- */
-void unescape_URI( char *psz )
-{
-    unsigned char *in = (unsigned char *)psz, *out = in, c;
-    if( psz == NULL )
-        return;
-
-    while( ( c = *in++ ) != '\0' )
-    {
-        switch( c )
-        {
-            case '%':
-            {
-                char val[5], *pval = val;
-                unsigned long cp;
-
-                switch( c = *in++ )
-                {
-                    case '\0':
-                        return;
-
-                    case 'u':
-                    case 'U':
-                        if( ( *pval++ = *in++ ) == '\0' )
-                            return;
-                        if( ( *pval++ = *in++ ) == '\0' )
-                            return;
-                        c = *in++;
-
-                    default:
-                        *pval++ = c;
-                        if( ( *pval++ = *in++ ) == '\0' )
-                            return;
-                        *pval = '\0';
-                }
-
-                cp = strtoul( val, NULL, 0x10 );
-                if( cp < 0x80 )
-                    *out++ = cp;
-                else
-                if( cp < 0x800 )
-                {
-                    *out++ = (( cp >>  6)         | 0xc0);
-                    *out++ = (( cp        & 0x3f) | 0x80);
-                }
-                else
-                {
-                    assert( cp < 0x10000 );
-                    *out++ = (( cp >> 12)         | 0xe0);
-                    *out++ = (((cp >>  6) & 0x3f) | 0x80);
-                    *out++ = (( cp        & 0x3f) | 0x80);
-                }
-                break;
-            }
-
-            /* + is not a special case - it means plus, not space. */
-
-            default:
-                /* Inserting non-ASCII or non-printable characters is unsafe,
-                 * and no sane browser will send these unencoded */
-                if( ( c < 32 ) || ( c > 127 ) )
-                    *out++ = '?';
-                else
-                    *out++ = c;
-        }
-    }
-    *out = '\0';
-}
-
-/**
- * Decode encoded URI string
+ * Decode encoded URI component. See also decode_URI().
  * \return decoded duplicated string
  */
 char *decode_URI_duplicate( const char *psz )
@@ -137,14 +60,23 @@ char *decode_URI_duplicate( const char *psz )
 }
 
 /**
- * Decode encoded URI string in place
- * \return nothing
+ * Decode an encoded URI component in place.
+ * <b>This function does NOT decode entire URIs.</b>
+ * It decodes components (e.g. host name, directory, file name).
+ * Decoded URIs do not exist in the real world (see RFC3986 Â§2.4).
+ * Complete URIs are always "encoded" (or they are syntaxically invalid).
+ *
+ * Note that URI encoding is different from Javascript escaping. Especially,
+ * white spaces and Unicode non-ASCII code points are encoded differently.
+ *
+ * \return psz on success, NULL if it was not properly encoded
  */
-void decode_URI( char *psz )
+char *decode_URI( char *psz )
 {
     unsigned char *in = (unsigned char *)psz, *out = in, c;
+
     if( psz == NULL )
-        return;
+        return NULL;
 
     while( ( c = *in++ ) != '\0' )
     {
@@ -156,14 +88,14 @@ void decode_URI( char *psz )
 
                 if( ( ( hex[0] = *in++ ) == 0 )
                  || ( ( hex[1] = *in++ ) == 0 ) )
-                    return;
+                    return NULL;
 
                 hex[2] = '\0';
                 *out++ = (unsigned char)strtoul( hex, NULL, 0x10 );
                 break;
             }
 
-            case '+':
+            case '+': /* This is HTTP forms, not URI decoding... */
                 *out++ = ' ';
                 break;
 
@@ -178,59 +110,200 @@ void decode_URI( char *psz )
     }
     *out = '\0';
     EnsureUTF8( psz );
+    return psz;
 }
 
-static inline int isurlsafe( int c )
+static inline bool isurisafe( int c )
 {
+    /* These are the _unreserved_ URI characters (RFC3986 Â§2.3) */
     return ( (unsigned char)( c - 'a' ) < 26 )
             || ( (unsigned char)( c - 'A' ) < 26 )
             || ( (unsigned char)( c - '0' ) < 10 )
-        /* Hmm, we should not encode character that are allowed in URLs
-         * (even if they are not URL-safe), nor URL-safe characters.
-         * We still encode some of them because of Microsoft's crap browser.
-         */
-            || ( strchr( "-_.", c ) != NULL );
+            || ( strchr( "-._~", c ) != NULL );
 }
 
-static inline char url_hexchar( int c )
+static char *encode_URI_bytes (const char *psz_uri, size_t len)
 {
-    return ( c < 10 ) ? c + '0' : c + 'A' - 10;
+    char *psz_enc = malloc (3 * len + 1), *out = psz_enc;
+    if (psz_enc == NULL)
+        return NULL;
+
+    for (size_t i = 0; i < len; i++)
+    {
+        static const char hex[16] = "0123456789ABCDEF";
+        uint8_t c = *psz_uri;
+
+        if( isurisafe( c ) )
+            *out++ = c;
+        /* This is URI encoding, not HTTP forms:
+         * Space is encoded as '%20', not '+'. */
+        else
+        {
+            *out++ = '%';
+            *out++ = hex[c >> 4];
+            *out++ = hex[c & 0xf];
+        }
+        psz_uri++;
+    }
+    *out++ = '\0';
+
+    out = realloc (psz_enc, out - psz_enc);
+    return out ? out : psz_enc; /* realloc() can fail (safe) */
 }
 
 /**
- * encode_URI_component
- * Encodes an URI component.
+ * Encodes an URI component (RFC3986 Â§2).
  *
- * @param psz_url nul-terminated UTF-8 representation of the component.
+ * @param psz_uri nul-terminated UTF-8 representation of the component.
  * Obviously, you can't pass an URI containing a nul character, but you don't
  * want to do that, do you?
  *
- * @return encoded string (must be free()'d)
+ * @return encoded string (must be free()'d), or NULL for ENOMEM.
  */
-char *encode_URI_component( const char *psz_url )
+char *encode_URI_component( const char *psz_uri )
 {
-    char psz_enc[3 * strlen( psz_url ) + 1], *out = psz_enc;
-    const uint8_t *in;
+    return encode_URI_bytes (psz_uri, strlen (psz_uri));
+}
 
-    for( in = (const uint8_t *)psz_url; *in; in++ )
-    {
-        uint8_t c = *in;
 
-        if( isurlsafe( c ) )
-            *out++ = (char)c;
-        else
-        if ( c == ' ')
-            *out++ = '+';
-        else
-        {
-            *out++ = '%';
-            *out++ = url_hexchar( c >> 4 );
-            *out++ = url_hexchar( c & 0xf );
-        }
-    }
-    *out++ = '\0';
+static const struct xml_entity_s
+{
+    char    psz_entity[8];
+    char    psz_char[4];
+} xml_entities[] = {
+    /* Important: this list has to be in alphabetical order (psz_entity-wise) */
+    { "AElig;",  "Ã" },
+    { "Aacute;", "Ã" },
+    { "Acirc;",  "Ã" },
+    { "Agrave;", "Ã" },
+    { "Aring;",  "Ã" },
+    { "Atilde;", "Ã" },
+    { "Auml;",   "Ã" },
+    { "Ccedil;", "Ã" },
+    { "Dagger;", "â¡" },
+    { "ETH;",    "Ã" },
+    { "Eacute;", "Ã" },
+    { "Ecirc;",  "Ã" },
+    { "Egrave;", "Ã" },
+    { "Euml;",   "Ã" },
+    { "Iacute;", "Ã" },
+    { "Icirc;",  "Ã" },
+    { "Igrave;", "Ã" },
+    { "Iuml;",   "Ã" },
+    { "Ntilde;", "Ã" },
+    { "OElig;",  "Å" },
+    { "Oacute;", "Ã" },
+    { "Ocirc;",  "Ã" },
+    { "Ograve;", "Ã" },
+    { "Oslash;", "Ã" },
+    { "Otilde;", "Ã" },
+    { "Ouml;",   "Ã" },
+    { "Scaron;", "Å " },
+    { "THORN;",  "Ã" },
+    { "Uacute;", "Ã" },
+    { "Ucirc;",  "Ã" },
+    { "Ugrave;", "Ã" },
+    { "Uuml;",   "Ã" },
+    { "Yacute;", "Ã" },
+    { "Yuml;",   "Å¸" },
+    { "aacute;", "Ã¡" },
+    { "acirc;",  "Ã¢" },
+    { "acute;",  "Â´" },
+    { "aelig;",  "Ã¦" },
+    { "agrave;", "Ã " },
+    { "amp;",    "&" },
+    { "apos;",   "'" },
+    { "aring;",  "Ã¥" },
+    { "atilde;", "Ã£" },
+    { "auml;",   "Ã¤" },
+    { "bdquo;",  "â" },
+    { "brvbar;", "Â¦" },
+    { "ccedil;", "Ã§" },
+    { "cedil;",  "Â¸" },
+    { "cent;",   "Â¢" },
+    { "circ;",   "Ë" },
+    { "copy;",   "Â©" },
+    { "curren;", "Â¤" },
+    { "dagger;", "â " },
+    { "deg;",    "Â°" },
+    { "divide;", "Ã·" },
+    { "eacute;", "Ã©" },
+    { "ecirc;",  "Ãª" },
+    { "egrave;", "Ã¨" },
+    { "eth;",    "Ã°" },
+    { "euml;",   "Ã«" },
+    { "euro;",   "â¬" },
+    { "frac12;", "Â½" },
+    { "frac14;", "Â¼" },
+    { "frac34;", "Â¾" },
+    { "gt;",     ">" },
+    { "hellip;", "â¦" },
+    { "iacute;", "Ã­" },
+    { "icirc;",  "Ã®" },
+    { "iexcl;",  "Â¡" },
+    { "igrave;", "Ã¬" },
+    { "iquest;", "Â¿" },
+    { "iuml;",   "Ã¯" },
+    { "laquo;",  "Â«" },
+    { "ldquo;",  "â" },
+    { "lsaquo;", "â¹" },
+    { "lsquo;",  "â" },
+    { "lt;",     "<" },
+    { "macr;",   "Â¯" },
+    { "mdash;",  "â" },
+    { "micro;",  "Âµ" },
+    { "middot;", "Â·" },
+    { "nbsp;",   "\xc2\xa0" },
+    { "ndash;",  "â" },
+    { "not;",    "Â¬" },
+    { "ntilde;", "Ã±" },
+    { "oacute;", "Ã³" },
+    { "ocirc;",  "Ã´" },
+    { "oelig;",  "Å" },
+    { "ograve;", "Ã²" },
+    { "ordf;",   "Âª" },
+    { "ordm;",   "Âº" },
+    { "oslash;", "Ã¸" },
+    { "otilde;", "Ãµ" },
+    { "ouml;",   "Ã¶" },
+    { "para;",   "Â¶" },
+    { "permil;", "â°" },
+    { "plusmn;", "Â±" },
+    { "pound;",  "Â£" },
+    { "quot;",   "\"" },
+    { "raquo;",  "Â»" },
+    { "rdquo;",  "â" },
+    { "reg;",    "Â®" },
+    { "rsaquo;", "âº" },
+    { "rsquo;",  "â" },
+    { "sbquo;",  "â" },
+    { "scaron;", "Å¡" },
+    { "sect;",   "Â§" },
+    { "shy;",    "Â­" },
+    { "sup1;",   "Â¹" },
+    { "sup2;",   "Â²" },
+    { "sup3;",   "Â³" },
+    { "szlig;",  "Ã" },
+    { "thorn;",  "Ã¾" },
+    { "tilde;",  "Ë" },
+    { "times;",  "Ã" },
+    { "trade;",  "â¢" },
+    { "uacute;", "Ãº" },
+    { "ucirc;",  "Ã»" },
+    { "ugrave;", "Ã¹" },
+    { "uml;",    "Â¨" },
+    { "uuml;",   "Ã¼" },
+    { "yacute;", "Ã½" },
+    { "yen;",    "Â¥" },
+    { "yuml;",   "Ã¿" },
+};
+
+static int cmp_entity (const void *key, const void *elem)
+{
+    const struct xml_entity_s *ent = elem;
+    const char *name = key;
 
-    return strdup( psz_enc );
+    return strncmp (name, ent->psz_entity, strlen (ent->psz_entity));
 }
 
 /**
@@ -245,40 +318,42 @@ void resolve_xml_special_chars( char *psz_value )
     {
         if( *psz_value == '&' )
         {
-#define TRY_CHAR( src, len, dst )                   \
-            if( !strncmp( psz_value, src, len ) )   \
-            {                                       \
-                *p_pos = dst;                       \
-                psz_value += len;                   \
-            }
-#define TRY_LONGCHAR( src, len, dst )                   \
-            if( !strncmp( psz_value, src, len ) )       \
-            {                                           \
-                strncpy( p_pos, dst, strlen( dst ) );   \
-                p_pos += strlen( dst ) - 1;             \
-                psz_value += len;                       \
-            }
-            TRY_CHAR( "&lt;", 4, '<' )
-            else TRY_CHAR( "&gt;", 4, '>' )
-            else TRY_CHAR( "&amp;", 5, '&' )
-            else TRY_CHAR( "&quot;", 6, '"' )
-            else TRY_CHAR( "&apos;", 6, '\'' )
-            else if( psz_value[1] == '#' )
-            {
+            if( psz_value[1] == '#' )
+            {   /* &#xxx; Unicode code point */
                 char *psz_end;
-                int i = strtol( psz_value+2, &psz_end, 10 );
+                unsigned long cp = strtoul( psz_value+2, &psz_end, 10 );
                 if( *psz_end == ';' )
                 {
-                    if( i >= 32 && i <= 126 )
+                    psz_value = psz_end + 1;
+                    if( cp == 0 )
+                        (void)0; /* skip nuls */
+                    else
+                    if( cp <= 0x7F )
                     {
-                        *p_pos = (char)i;
-                        psz_value = psz_end+1;
+                        *p_pos =            cp;
                     }
                     else
+                    /* Unicode code point outside ASCII.
+                     * &#xxx; representation is longer than UTF-8 :) */
+                    if( cp <= 0x7FF )
                     {
-                        /* Unhandled code, FIXME */
-                        *p_pos = *psz_value;
-                        psz_value++;
+                        *p_pos++ = 0xC0 |  (cp >>  6);
+                        *p_pos   = 0x80 |  (cp        & 0x3F);
+                    }
+                    else
+                    if( cp <= 0xFFFF )
+                    {
+                        *p_pos++ = 0xE0 |  (cp >> 12);
+                        *p_pos++ = 0x80 | ((cp >>  6) & 0x3F);
+                        *p_pos   = 0x80 |  (cp        & 0x3F);
+                    }
+                    else
+                    if( cp <= 0x1FFFFF ) /* Outside the BMP */
+                    {   /* Unicode stops at 10FFFF, but who cares? */
+                        *p_pos++ = 0xF0 |  (cp >> 18);
+                        *p_pos++ = 0x80 | ((cp >> 12) & 0x3F);
+                        *p_pos++ = 0x80 | ((cp >>  6) & 0x3F);
+                        *p_pos   = 0x80 |  (cp        & 0x3F);
                     }
                 }
                 else
@@ -288,128 +363,25 @@ void resolve_xml_special_chars( char *psz_value )
                     psz_value++;
                 }
             }
-            else TRY_LONGCHAR( "&Agrave;", 8, "Ã" )
-            else TRY_LONGCHAR( "&Aacute;", 8, "Ã" )
-            else TRY_LONGCHAR( "&Acirc;", 7, "Ã" )
-            else TRY_LONGCHAR( "&Atilde;", 8, "Ã" )
-            else TRY_LONGCHAR( "&Auml;", 6, "Ã" )
-            else TRY_LONGCHAR( "&Aring;", 7, "Ã" )
-            else TRY_LONGCHAR( "&AElig;", 7, "Ã" )
-            else TRY_LONGCHAR( "&Ccedil;", 8, "Ã" )
-            else TRY_LONGCHAR( "&Egrave;", 8, "Ã" )
-            else TRY_LONGCHAR( "&Eacute;", 8, "Ã" )
-            else TRY_LONGCHAR( "&Ecirc;", 7, "Ã" )
-            else TRY_LONGCHAR( "&Euml;", 6, "Ã" )
-            else TRY_LONGCHAR( "&Igrave;", 8, "Ã" )
-            else TRY_LONGCHAR( "&Iacute;", 8, "Ã" )
-            else TRY_LONGCHAR( "&Icirc;", 7, "Ã" )
-            else TRY_LONGCHAR( "&Iuml;", 6, "Ã" )
-            else TRY_LONGCHAR( "&ETH;", 5, "Ã" )
-            else TRY_LONGCHAR( "&Ntilde;", 8, "Ã" )
-            else TRY_LONGCHAR( "&Ograve;", 8, "Ã" )
-            else TRY_LONGCHAR( "&Oacute;", 8, "Ã" )
-            else TRY_LONGCHAR( "&Ocirc;", 7, "Ã" )
-            else TRY_LONGCHAR( "&Otilde;", 8, "Ã" )
-            else TRY_LONGCHAR( "&Ouml;", 6, "Ã" )
-            else TRY_LONGCHAR( "&Oslash;", 8, "Ã" )
-            else TRY_LONGCHAR( "&Ugrave;", 8, "Ã" )
-            else TRY_LONGCHAR( "&Uacute;", 8, "Ã" )
-            else TRY_LONGCHAR( "&Ucirc;", 7, "Ã" )
-            else TRY_LONGCHAR( "&Uuml;", 6, "Ã" )
-            else TRY_LONGCHAR( "&Yacute;", 8, "Ã" )
-            else TRY_LONGCHAR( "&THORN;", 7, "Ã" )
-            else TRY_LONGCHAR( "&szlig;", 7, "Ã" )
-            else TRY_LONGCHAR( "&agrave;", 8, "Ã " )
-            else TRY_LONGCHAR( "&aacute;", 8, "Ã¡" )
-            else TRY_LONGCHAR( "&acirc;", 7, "Ã¢" )
-            else TRY_LONGCHAR( "&atilde;", 8, "Ã£" )
-            else TRY_LONGCHAR( "&auml;", 6, "Ã¤" )
-            else TRY_LONGCHAR( "&aring;", 7, "Ã¥" )
-            else TRY_LONGCHAR( "&aelig;", 7, "Ã¦" )
-            else TRY_LONGCHAR( "&ccedil;", 8, "Ã§" )
-            else TRY_LONGCHAR( "&egrave;", 8, "Ã¨" )
-            else TRY_LONGCHAR( "&eacute;", 8, "Ã©" )
-            else TRY_LONGCHAR( "&ecirc;", 7, "Ãª" )
-            else TRY_LONGCHAR( "&euml;", 6, "Ã«" )
-            else TRY_LONGCHAR( "&igrave;", 8, "Ã¬" )
-            else TRY_LONGCHAR( "&iacute;", 8, "Ã­" )
-            else TRY_LONGCHAR( "&icirc;", 7, "Ã®" )
-            else TRY_LONGCHAR( "&iuml;", 6, "Ã¯" )
-            else TRY_LONGCHAR( "&eth;", 5, "Ã°" )
-            else TRY_LONGCHAR( "&ntilde;", 8, "Ã±" )
-            else TRY_LONGCHAR( "&ograve;", 8, "Ã²" )
-            else TRY_LONGCHAR( "&oacute;", 8, "Ã³" )
-            else TRY_LONGCHAR( "&ocirc;", 7, "Ã´" )
-            else TRY_LONGCHAR( "&otilde;", 8, "Ãµ" )
-            else TRY_LONGCHAR( "&ouml;", 6, "Ã¶" )
-            else TRY_LONGCHAR( "&oslash;", 8, "Ã¸" )
-            else TRY_LONGCHAR( "&ugrave;", 8, "Ã¹" )
-            else TRY_LONGCHAR( "&uacute;", 8, "Ãº" )
-            else TRY_LONGCHAR( "&ucirc;", 7, "Ã»" )
-            else TRY_LONGCHAR( "&uuml;", 6, "Ã¼" )
-            else TRY_LONGCHAR( "&yacute;", 8, "Ã½" )
-            else TRY_LONGCHAR( "&thorn;", 7, "Ã¾" )
-            else TRY_LONGCHAR( "&yuml;", 6, "Ã¿" )
-            else TRY_LONGCHAR( "&iexcl;", 7, "Â¡" )
-            else TRY_LONGCHAR( "&curren;", 8, "Â¤" )
-            else TRY_LONGCHAR( "&cent;", 6, "Â¢" )
-            else TRY_LONGCHAR( "&pound;", 7, "Â£" )
-            else TRY_LONGCHAR( "&yen;", 5, "Â¥" )
-            else TRY_LONGCHAR( "&brvbar;", 8, "Â¦" )
-            else TRY_LONGCHAR( "&sect;", 6, "Â§" )
-            else TRY_LONGCHAR( "&uml;", 5, "Â¨" )
-            else TRY_LONGCHAR( "&copy;", 6, "Â©" )
-            else TRY_LONGCHAR( "&ordf;", 6, "Âª" )
-            else TRY_LONGCHAR( "&laquo;", 7, "Â«" )
-            else TRY_LONGCHAR( "&not;", 5, "Â¬" )
-            else TRY_LONGCHAR( "&shy;", 5, "Â­" )
-            else TRY_LONGCHAR( "&reg;", 5, "Â®" )
-            else TRY_LONGCHAR( "&trade;", 7, "â¢" )
-            else TRY_LONGCHAR( "&macr;", 6, "Â¯" )
-            else TRY_LONGCHAR( "&deg;", 5, "Â°" )
-            else TRY_LONGCHAR( "&plusmn;", 8, "Â±" )
-            else TRY_LONGCHAR( "&sup2;", 6, "Â²" )
-            else TRY_LONGCHAR( "&sup3;", 6, "Â³" )
-            else TRY_LONGCHAR( "&acute;", 7, "Â´" )
-            else TRY_LONGCHAR( "&micro;", 7, "Âµ" )
-            else TRY_LONGCHAR( "&para;", 6, "Â¶" )
-            else TRY_LONGCHAR( "&middot;", 8, "Â·" )
-            else TRY_LONGCHAR( "&cedil;", 7, "Â¸" )
-            else TRY_LONGCHAR( "&sup1;", 6, "Â¹" )
-            else TRY_LONGCHAR( "&ordm;", 6, "Âº" )
-            else TRY_LONGCHAR( "&raquo;", 7, "Â»" )
-            else TRY_LONGCHAR( "&frac14;", 8, "Â¼" )
-            else TRY_LONGCHAR( "&frac12;", 8, "Â½" )
-            else TRY_LONGCHAR( "&frac34;", 8, "Â¾" )
-            else TRY_LONGCHAR( "&iquest;", 8, "Â¿" )
-            else TRY_LONGCHAR( "&times;", 7, "Ã" )
-            else TRY_LONGCHAR( "&divide;", 8, "Ã·" )
-            else TRY_LONGCHAR( "&OElig;", 7, "Å" )
-            else TRY_LONGCHAR( "&oelig;", 7, "Å" )
-            else TRY_LONGCHAR( "&Scaron;", 8, "Å " )
-            else TRY_LONGCHAR( "&scaron;", 8, "Å¡" )
-            else TRY_LONGCHAR( "&Yuml;", 6, "Å¸" )
-            else TRY_LONGCHAR( "&circ;", 6, "Ë" )
-            else TRY_LONGCHAR( "&tilde;", 7, "Ë" )
-            else TRY_LONGCHAR( "&ndash;", 7, "â" )
-            else TRY_LONGCHAR( "&mdash;", 7, "â" )
-            else TRY_LONGCHAR( "&lsquo;", 7, "â" )
-            else TRY_LONGCHAR( "&rsquo;", 7, "â" )
-            else TRY_LONGCHAR( "&sbquo;", 7, "â" )
-            else TRY_LONGCHAR( "&ldquo;", 7, "â" )
-            else TRY_LONGCHAR( "&rdquo;", 7, "â" )
-            else TRY_LONGCHAR( "&bdquo;", 7, "â" )
-            else TRY_LONGCHAR( "&dagger;", 8, "â " )
-            else TRY_LONGCHAR( "&Dagger;", 8, "â¡" )
-            else TRY_LONGCHAR( "&hellip;", 8, "â¦" )
-            else TRY_LONGCHAR( "&permil;", 8, "â°" )
-            else TRY_LONGCHAR( "&lsaquo;", 8, "â¹" )
-            else TRY_LONGCHAR( "&rsaquo;", 8, "âº" )
-            else TRY_LONGCHAR( "&euro;", 6, "â¬" )
             else
-            {
-                *p_pos = *psz_value;
-                psz_value++;
+            {   /* Well-known XML entity */
+                const struct xml_entity_s *ent;
+
+                ent = bsearch (psz_value + 1, xml_entities,
+                               sizeof (xml_entities) / sizeof (*ent),
+                               sizeof (*ent), cmp_entity);
+                if (ent != NULL)
+                {
+                    size_t olen = strlen (ent->psz_char);
+                    memcpy (p_pos, ent->psz_char, olen);
+                    p_pos += olen - 1;
+                    psz_value += strlen (ent->psz_entity) + 1;
+                }
+                else
+                {   /* No match */
+                    *p_pos = *psz_value;
+                    psz_value++;
+                }
             }
         }
         else
@@ -430,47 +402,36 @@ void resolve_xml_special_chars( char *psz_value )
  */
 char *convert_xml_special_chars( const char *psz_content )
 {
-    char *psz_temp = malloc( 6 * strlen( psz_content ) + 1 );
-    const char *p_from = psz_content;
+    assert( psz_content );
+
+    const size_t len = strlen( psz_content );
+    char *const psz_temp = malloc( 6 * len + 1 );
     char *p_to   = psz_temp;
 
-    while ( *p_from )
+    if( psz_temp == NULL )
+        return NULL;
+    for( size_t i = 0; i < len; i++ )
     {
-        if ( *p_from == '<' )
-        {
-            strcpy( p_to, "&lt;" );
-            p_to += 4;
-        }
-        else if ( *p_from == '>' )
-        {
-            strcpy( p_to, "&gt;" );
-            p_to += 4;
-        }
-        else if ( *p_from == '&' )
-        {
-            strcpy( p_to, "&amp;" );
-            p_to += 5;
-        }
-        else if( *p_from == '\"' )
-        {
-            strcpy( p_to, "&quot;" );
-            p_to += 6;
-        }
-        else if( *p_from == '\'' )
-        {
-            strcpy( p_to, "&#039;" );
-            p_to += 6;
-        }
-        else
+        const char *str;
+        char c = psz_content[i];
+
+        switch ( c )
         {
-            *p_to = *p_from;
-            p_to++;
+            case '\"': str = "quot"; break;
+            case '&':  str = "amp";  break;
+            case '\'': str = "#39";  break;
+            case '<':  str = "lt";   break;
+            case '>':  str = "gt";   break;
+            default:
+                *(p_to++) = c;
+                continue;
         }
-        p_from++;
+        p_to += sprintf( p_to, "&%s;", str );
     }
-    *p_to = '\0';
+    *(p_to++) = '\0';
 
-    return psz_temp;
+    p_to = realloc( psz_temp, p_to - psz_temp );
+    return p_to ? p_to : psz_temp; /* cannot fail */
 }
 
 /* Base64 encoding */
@@ -555,7 +516,7 @@ size_t vlc_b64_decode_binary_to_buffer( uint8_t *p_dst, size_t i_dst, const char
     int i_level;
     int i_last;
 
-    for( i_level = 0, i_last = 0; i_dst > 0 && *p != '\0'; i_dst--, p++ )
+    for( i_level = 0, i_last = 0; (size_t)( p_dst - p_start ) < i_dst && *p != '\0'; p++ )
     {
         const int c = b64[(unsigned int)*p];
         if( c == -1 )
@@ -607,143 +568,190 @@ char *vlc_b64_decode( const char *psz_src )
     return p_dst;
 }
 
-/****************************************************************************
- * String formating functions
- ****************************************************************************/
+/**
+ * Formats current time into a heap-allocated string.
+ * @param tformat time format (as with C strftime())
+ * @return an allocated string (must be free()'d), or NULL on memory error.
+ */
 char *str_format_time( const char *tformat )
 {
-    char buffer[255];
     time_t curtime;
-#if defined(HAVE_LOCALTIME_R)
     struct tm loctime;
-#else
-    struct tm *loctime;
-#endif
+
+    if (strcmp (tformat, "") == 0)
+        return strdup (""); /* corner case w.r.t. strftime() return value */
 
     /* Get the current time.  */
-    curtime = time( NULL );
+    time( &curtime );
 
     /* Convert it to local time representation.  */
-#if defined(HAVE_LOCALTIME_R)
     localtime_r( &curtime, &loctime );
-    strftime( buffer, 255, tformat, &loctime );
-#else
-    loctime = localtime( &curtime );
-    strftime( buffer, 255, tformat, loctime );
-#endif
-    return strdup( buffer );
+    for (size_t buflen = strlen (tformat) + 32;; buflen += 32)
+    {
+        char *str = malloc (buflen);
+        if (str == NULL)
+            return NULL;
+
+        size_t len = strftime (str, buflen, tformat, &loctime);
+        if (len > 0)
+        {
+            char *ret = realloc (str, len + 1);
+            return ret ? ret : str; /* <- this cannot fail */
+        }
+    }
+    assert (0);
 }
 
-#define INSERT_STRING( check, string )                              \
-                    if( check )                                     \
+#define INSERT_STRING( string )                                     \
+                    if( string != NULL )                            \
                     {                                               \
-                        psz_meta = string;                          \
-                        if( psz_meta )                              \
-                        {                                           \
-                            int len = strlen( string );             \
-                            dst = realloc( dst,                     \
-                                   i_size = i_size + len + 1 );     \
-                            strncpy( d, psz_meta, len+1 );          \
-                            d += len;                               \
-                        }                                           \
-                        else                                        \
-                        {                                           \
-                                *d = '-';                           \
-                                d++;                                \
-                        }                                           \
-                    }
+                        int len = strlen( string );                 \
+                        dst = realloc( dst, i_size = i_size + len );\
+                        memcpy( (dst+d), string, len );             \
+                        d += len;                                   \
+                        free( string );                             \
+                    }                                               \
+                    else if( !b_empty_if_na )                       \
+                    {                                               \
+                        *(dst+d) = '-';                             \
+                        d++;                                        \
+                    }                                               \
 
 /* same than INSERT_STRING, except that string won't be freed */
 #define INSERT_STRING_NO_FREE( string )                             \
                     {                                               \
-                            int len = strlen( string );             \
-                            dst = realloc( dst,                     \
-                                   i_size = i_size + len + 1 );     \
-                            strncpy( d, string, len+1 );            \
-                            d += len;                               \
-                            free( string );                         \
-                    }                                               
+                        int len = strlen( string );                 \
+                        dst = realloc( dst, i_size = i_size + len );\
+                        memcpy( dst+d, string, len );               \
+                        d += len;                                   \
+                    }
 char *__str_format_meta( vlc_object_t *p_object, const char *string )
 {
     const char *s = string;
-    char *dst = malloc( 1000 );
-    char *d = dst;
-    int b_is_format = 0;
-    int b_empty_if_na = 0;
+    bool b_is_format = false;
+    bool b_empty_if_na = false;
     char buf[10];
-    int i_size = strlen( string );
+    int i_size = strlen( string ) + 1; /* +1 to store '\0' */
+    char *dst = strdup( string );
+    if( !dst ) return NULL;
+    int d = 0;
 
-    playlist_t *p_playlist = pl_Yield( p_object );
-    input_thread_t *p_input = p_playlist->p_input;
+    playlist_t *p_playlist = pl_Hold( p_object );
+    input_thread_t *p_input = playlist_CurrentInput( p_playlist );
     input_item_t *p_item = NULL;
     pl_Release( p_object );
     if( p_input )
     {
-        vlc_object_yield( p_input );
         p_item = input_GetItem(p_input);
     }
 
-    sprintf( dst, string );
-
     while( *s )
     {
         if( b_is_format )
         {
             switch( *s )
             {
-                char *psz_meta; /* used by INSERT_STRING */
                 case 'a':
-                    INSERT_STRING( p_item, input_item_GetArtist(p_item) );
+                    if( p_item )
+                    {
+                        INSERT_STRING( input_item_GetArtist( p_item ) );
+                    }
                     break;
                 case 'b':
-                    INSERT_STRING( p_item, input_item_GetAlbum(p_item) );
+                    if( p_item )
+                    {
+                        INSERT_STRING( input_item_GetAlbum( p_item ) );
+                    }
                     break;
                 case 'c':
-                    INSERT_STRING( p_item, input_item_GetCopyright(p_item) );
+                    if( p_item )
+                    {
+                        INSERT_STRING( input_item_GetCopyright( p_item ) );
+                    }
                     break;
                 case 'd':
-                    INSERT_STRING( p_item, input_item_GetDescription(p_item) );
+                    if( p_item )
+                    {
+                        INSERT_STRING( input_item_GetDescription( p_item ) );
+                    }
                     break;
                 case 'e':
-                    INSERT_STRING( p_item, input_item_GetEncodedBy(p_item) );
+                    if( p_item )
+                    {
+                        INSERT_STRING( input_item_GetEncodedBy( p_item ) );
+                    }
+                    break;
+                case 'f':
+                    if( p_item && p_item->p_stats )
+                    {
+                        vlc_mutex_lock( &p_item->p_stats->lock );
+                        snprintf( buf, 10, "%d",
+                                  p_item->p_stats->i_displayed_pictures );
+                        vlc_mutex_unlock( &p_item->p_stats->lock );
+                    }
+                    else
+                    {
+                        sprintf( buf, b_empty_if_na ? "" : "-" );
+                    }
+                    INSERT_STRING_NO_FREE( buf );
                     break;
                 case 'g':
-                    INSERT_STRING( p_item, input_item_GetGenre(p_item) );
+                    if( p_item )
+                    {
+                        INSERT_STRING( input_item_GetGenre( p_item ) );
+                    }
                     break;
                 case 'l':
-                    INSERT_STRING( p_item, input_item_GetLanguage(p_item) );
+                    if( p_item )
+                    {
+                        INSERT_STRING( input_item_GetLanguage( p_item ) );
+                    }
                     break;
                 case 'n':
-                    INSERT_STRING( p_item, input_item_GetTrackNum(p_item) );
+                    if( p_item )
+                    {
+                        INSERT_STRING( input_item_GetTrackNum( p_item ) );
+                    }
                     break;
                 case 'p':
-                    INSERT_STRING( p_item, input_item_GetNowPlaying(p_item) );
+                    if( p_item )
+                    {
+                        INSERT_STRING( input_item_GetNowPlaying( p_item ) );
+                    }
                     break;
                 case 'r':
-                    INSERT_STRING( p_item, input_item_GetRating(p_item) );
+                    if( p_item )
+                    {
+                        INSERT_STRING( input_item_GetRating( p_item ) );
+                    }
                     break;
                 case 's':
                 {
-                    char *lang;
+                    char *lang = NULL;
                     if( p_input )
-                    {
-                        lang = var_GetString( p_input, "sub-language" );
-                    }
-                    else
-                    {
+                        lang = var_GetNonEmptyString( p_input, "sub-language" );
+                    if( lang == NULL )
                         lang = strdup( b_empty_if_na ? "" : "-" );
-                    }
-                    INSERT_STRING( 1, lang );
+                    INSERT_STRING( lang );
                     break;
                 }
                 case 't':
-                    INSERT_STRING( p_item, input_item_GetTitle(p_item) );
+                    if( p_item )
+                    {
+                        INSERT_STRING( input_item_GetTitle( p_item ) );
+                    }
                     break;
                 case 'u':
-                    INSERT_STRING( p_item, input_item_GetURL(p_item) );
+                    if( p_item )
+                    {
+                        INSERT_STRING( input_item_GetURL( p_item ) );
+                    }
                     break;
                 case 'A':
-                    INSERT_STRING( p_item, input_item_GetDate(p_item) );
+                    if( p_item )
+                    {
+                        INSERT_STRING( input_item_GetDate( p_item ) );
+                    }
                     break;
                 case 'B':
                     if( p_input )
@@ -773,19 +781,22 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string )
                     if( p_item )
                     {
                         mtime_t i_duration = input_item_GetDuration( p_item );
-                        sprintf( buf, "%02d:%02d:%02d",
+                        snprintf( buf, 10, "%02d:%02d:%02d",
                                  (int)(i_duration/(3600000000)),
                                  (int)((i_duration/(60000000))%60),
                                  (int)((i_duration/1000000)%60) );
                     }
                     else
                     {
-                        sprintf( buf, b_empty_if_na ? "" : "--:--:--" );
+                        snprintf( buf, 10, b_empty_if_na ? "" : "--:--:--" );
                     }
                     INSERT_STRING_NO_FREE( buf );
                     break;
                 case 'F':
-                    INSERT_STRING( p_item, input_item_GetURI( p_item ) );
+                    if( p_item )
+                    {
+                        INSERT_STRING( input_item_GetURI( p_item ) );
+                    }
                     break;
                 case 'I':
                     if( p_input )
@@ -803,33 +814,33 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string )
                     if( p_item && p_input )
                     {
                         mtime_t i_duration = input_item_GetDuration( p_item );
-                        int64_t i_time = p_input->i_time;
-                        sprintf( buf, "%02d:%02d:%02d",
+                        int64_t i_time = var_GetTime( p_input, "time" );
+                        snprintf( buf, 10, "%02d:%02d:%02d",
                      (int)( ( i_duration - i_time ) / 3600000000 ),
                      (int)( ( ( i_duration - i_time ) / 60000000 ) % 60 ),
                      (int)( ( ( i_duration - i_time ) / 1000000 ) % 60 ) );
                     }
                     else
                     {
-                        sprintf( buf, b_empty_if_na ? "" : "--:--:--" );
+                        snprintf( buf, 10, b_empty_if_na ? "" : "--:--:--" );
                     }
                     INSERT_STRING_NO_FREE( buf );
                     break;
                 case 'N':
-                    INSERT_STRING( p_item, input_item_GetName( p_item ) );
+                    if( p_item )
+                    {
+                        INSERT_STRING( input_item_GetName( p_item ) );
+                    }
                     break;
                 case 'O':
                 {
-                    char *lang;
+                    char *lang = NULL;
                     if( p_input )
-                    {
-                        lang = var_GetString( p_input, "audio-language" );
-                    }
-                    else
-                    {
+                        lang = var_GetNonEmptyString( p_input,
+                                                      "audio-language" );
+                    if( lang == NULL )
                         lang = strdup( b_empty_if_na ? "" : "-" );
-                    }
-                    INSERT_STRING( 1, lang );
+                    INSERT_STRING( lang );
                     break;
                 }
                 case 'P':
@@ -840,7 +851,7 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string )
                     }
                     else
                     {
-                        sprintf( buf, b_empty_if_na ? "" : "--.-%%" );
+                        snprintf( buf, 10, b_empty_if_na ? "" : "--.-%%" );
                     }
                     INSERT_STRING_NO_FREE( buf );
                     break;
@@ -871,19 +882,23 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string )
                 case 'T':
                     if( p_input )
                     {
-                        sprintf( buf, "%02d:%02d:%02d",
-                            (int)( p_input->i_time / ( 3600000000 ) ),
-                            (int)( ( p_input->i_time / ( 60000000 ) ) % 60 ),
-                            (int)( ( p_input->i_time / 1000000 ) % 60 ) );
+                        int64_t i_time = var_GetTime( p_input, "time" );
+                        snprintf( buf, 10, "%02d:%02d:%02d",
+                            (int)( i_time / ( 3600000000 ) ),
+                            (int)( ( i_time / ( 60000000 ) ) % 60 ),
+                            (int)( ( i_time / 1000000 ) % 60 ) );
                     }
                     else
                     {
-                        sprintf( buf, b_empty_if_na ? "" :  "--:--:--" );
+                        snprintf( buf, 10, b_empty_if_na ? "" :  "--:--:--" );
                     }
                     INSERT_STRING_NO_FREE( buf );
                     break;
                 case 'U':
-                    INSERT_STRING( p_item, input_item_GetPublisher(p_item) );
+                    if( p_item )
+                    {
+                        INSERT_STRING( input_item_GetPublisher( p_item ) );
+                    }
                     break;
                 case 'V':
                 {
@@ -894,41 +909,43 @@ char *__str_format_meta( vlc_object_t *p_object, const char *string )
                     break;
                 }
                 case '_':
-                    *d = '\n';
+                    *(dst+d) = '\n';
                     d++;
                     break;
 
                 case ' ':
-                    b_empty_if_na = 1;
+                    b_empty_if_na = true;
                     break;
 
                 default:
-                    *d = *s;
+                    *(dst+d) = *s;
                     d++;
                     break;
             }
             if( *s != ' ' )
-                b_is_format = 0;
+                b_is_format = false;
         }
         else if( *s == '$' )
         {
-            b_is_format = 1;
-            b_empty_if_na = 0;
+            b_is_format = true;
+            b_empty_if_na = false;
         }
         else
         {
-            *d = *s;
+            *(dst+d) = *s;
             d++;
         }
         s++;
     }
-    *d = '\0';
+    *(dst+d) = '\0';
 
     if( p_input )
         vlc_object_release( p_input );
 
     return dst;
 }
+#undef INSERT_STRING
+#undef INSERT_STRING_NO_FREE
 
 /**
  * Apply str format time and str format meta
@@ -945,8 +962,10 @@ char *__str_format( vlc_object_t *p_this, const char *psz_src )
 /**
  * Remove forbidden characters from filenames (including slashes)
  */
-void filename_sanitize( char *str )
+char* filename_sanitize( const char *str_origin )
 {
+    char *str = strdup( str_origin );
+    char *str_base = str;
     if( *str == '.' && (str[1] == '\0' || (str[1] == '.' && str[2] == '\0' ) ) )
     {
         while( *str )
@@ -954,15 +973,23 @@ void filename_sanitize( char *str )
             *str = '_';
             str++;
         }
-        return;
+        return str_base;
     }
 
+#if defined( WIN32 )
+    // Change leading spaces into underscores
+    while( *str && *str == ' ' )
+        *str++ = '_';
+#endif
+
     while( *str )
     {
         switch( *str )
         {
             case '/':
-#ifdef WIN32
+#if defined( __APPLE__ )
+            case ':':
+#elif defined( WIN32 )
             case '\\':
             case '*':
             case '"':
@@ -976,6 +1003,19 @@ void filename_sanitize( char *str )
         }
         str++;
     }
+
+#if defined( WIN32 )
+    // Change trailing spaces into underscores
+    str--;
+    while( str != str_base )
+    {
+        if( *str != ' ' )
+            break;
+        *str-- = '_';
+    }
+#endif
+
+    return str_base;
 }
 
 /**
@@ -983,52 +1023,120 @@ void filename_sanitize( char *str )
  */
 void path_sanitize( char *str )
 {
-#if 0
-    /*
-     * Uncomment the two blocks to prevent /../ or /./, i'm not sure that we
-     * want to.
-     */
-    char *prev = str - 1;
-#endif
 #ifdef WIN32
     /* check drive prefix if path is absolute */
-    if( isalpha(*str) && (':' == *(str+1)) )
+    if( (((unsigned char)(str[0] - 'A') < 26)
+      || ((unsigned char)(str[0] - 'a') < 26)) && (':' == str[1]) )
         str += 2;
 #endif
     while( *str )
     {
-#ifdef WIN32
-        switch( *str )
-        {
-            case '*':
-            case '"':
-            case '?':
-            case ':':
-            case '|':
-            case '<':
-            case '>':
-                *str = '_';
-        }
+#if defined( __APPLE__ )
+        if( *str == ':' )
+            *str = '_';
+#elif defined( WIN32 )
+        if( strchr( "*\"?:|<>", *str ) )
+            *str = '_';
+        if( *str == '/' )
+            *str = DIR_SEP_CHAR;
 #endif
-#if 0
-        if( *str == '/'
+        str++;
+    }
+}
+
+#include <vlc_url.h>
+
+/**
+ * Convert a file path to an URI. If already an URI, do nothing.
+ */
+char *make_URI (const char *path)
+{
+    if (path == NULL)
+        return NULL;
+    if (strstr (path, "://") != NULL)
+        return strdup (path); /* Already an URI */
+    /* Note: VLC cannot handle URI schemes without double slash after the
+     * scheme name (such as mailto: or news:). */
+
+    char *buf;
 #ifdef WIN32
-            || *str == '\\'
+    if (isalpha (path[0]) && (path[1] == ':'))
+    {
+        if (asprintf (&buf, "file:///%c:", path[0]) == -1)
+            buf = NULL;
+        path += 2;
+    }
+    else
 #endif
-            )
-        {
-            if( str - prev == 2 && prev[1] == '.' )
-            {
-                prev[1] = '.';
-            }
-            else if( str - prev == 3 && prev[1] == '.' && prev[2] == '.' )
-            {
-                prev[1] = '_';
-                prev[2] = '_';
-            }
-            prev = str;
+    if (!strncmp (path, "\\\\", 2))
+    {   /* Windows UNC paths */
+#ifndef WIN32
+        /* \\host\share\path -> smb://host/share/path */
+        if (strchr (path + 2, '\\') != NULL)
+        {   /* Convert antislashes to slashes */
+            char *dup = strdup (path);
+            if (dup == NULL)
+                return NULL;
+            for (size_t i = 2; dup[i]; i++)
+                if (dup[i] == '\\')
+                    dup[i] = DIR_SEP_CHAR;
+
+            char *ret = make_URI (dup);
+            free (dup);
+            return ret;
         }
+# define SMB_SCHEME "smb"
+#else
+        /* \\host\share\path -> file://host/share/path */
+# define SMB_SCHEME "file"
 #endif
-        str++;
+        size_t hostlen = strcspn (path + 2, DIR_SEP);
+
+        buf = malloc (sizeof (SMB_SCHEME) + 3 + hostlen);
+        if (buf != NULL)
+            snprintf (buf, sizeof (SMB_SCHEME) + 3 + hostlen,
+                      SMB_SCHEME"://%s", path + 2);
+        path += 2 + hostlen;
+    }
+    else
+    if (path[0] != DIR_SEP_CHAR)
+    {   /* Relative path: prepend the current working directory */
+        char cwd[PATH_MAX];
+
+        if (getcwd (cwd, sizeof (cwd)) == NULL) /* FIXME: UTF8? */
+            return NULL;
+        if (asprintf (&buf, "%s/%s", cwd, path) == -1)
+            return NULL;
+        char *ret = make_URI (buf);
+        free (buf);
+        return ret;
+    }
+    else
+        buf = strdup ("file://");
+    if (buf == NULL)
+        return NULL;
+
+    assert (path[0] == DIR_SEP_CHAR);
+
+    /* Absolute file path */
+    for (const char *ptr = path + 1;; ptr++)
+    {
+        size_t len = strcspn (ptr, DIR_SEP);
+        char *component = encode_URI_bytes (ptr, len);
+        if (component == NULL)
+        {
+            free (buf);
+            return NULL;
+        }
+        char *uri;
+        int val = asprintf (&uri, "%s/%s", buf, component);
+        free (component);
+        free (buf);
+        if (val == -1)
+            return NULL;
+        buf = uri;
+        ptr += len;
+        if (*ptr == '\0')
+            return buf;
     }
 }