]> git.sesse.net Git - vlc/blobdiff - src/misc/unicode.c
Fix minor typo
[vlc] / src / misc / unicode.c
index a57852c7132ea8d64b08d349f968082aef2a2304..c7c2387ddf6db87ba41b086892ba18ce2e3a308a 100644 (file)
@@ -30,6 +30,7 @@
 #include <assert.h>
 
 #include <stdio.h>
+#include <stdarg.h>
 #include <errno.h>
 #include <sys/types.h>
 #ifdef HAVE_DIRENT_H
@@ -78,19 +79,25 @@ void LocaleInit( vlc_object_t *p_this )
     else
     {
         /* not UTF-8 */
-        char *psz_conv = psz_charset;
+        char psz_buf[strlen( psz_charset ) + sizeof( "//translit" )];
+        const char *psz_conv;
 
         /*
          * Still allow non-ASCII characters when the locale is not set.
          * Western Europeans are being favored for historical reasons.
          */
-        psz_conv = strcmp( psz_charset, "ASCII" )
-                ? psz_charset : "ISO-8859-1";
+        if( strcmp( psz_charset, "ASCII" ) )
+        {
+            sprintf( psz_buf, "%s//translit", psz_charset );
+            psz_conv = psz_buf;
+        }
+        else
+            psz_conv = "ISO-8859-1//translit";
 
         vlc_mutex_init( p_this, &from_locale.lock );
         vlc_mutex_init( p_this, &to_locale.lock );
-        from_locale.hd = vlc_iconv_open( "UTF-8", psz_charset );
-        to_locale.hd = vlc_iconv_open( psz_charset, "UTF-8" );
+        from_locale.hd = vlc_iconv_open( "UTF-8", psz_conv );
+        to_locale.hd = vlc_iconv_open( psz_conv, "UTF-8" );
     }
 
     free( psz_charset );
@@ -123,23 +130,23 @@ void LocaleDeinit( void )
 static char *MB2MB( const char *string, UINT fromCP, UINT toCP )
 {
     char *out;
-    int ilen = strlen( string ), olen = (4 / sizeof (wchar_t)) * ilen + 1;
-    wchar_t wide[olen];
+    wchar_t *wide;
+    int len;
 
-    ilen = MultiByteToWideChar( fromCP, 0, string, ilen + 1, wide, olen );
-    if( ilen == 0 )
+    len = MultiByteToWideChar( fromCP, 0, string, -1, NULL, 0 );
+    assert( len > 0 );
+    wide = (wchar_t *)malloc (len * sizeof (wchar_t));
+    if( wide == NULL )
         return NULL;
 
-    olen = 4 * ilen + 1;
-    out = malloc( olen );
+    MultiByteToWideChar( fromCP, 0, string, -1, wide, len );
+    len = WideCharToMultiByte( toCP, 0, wide, -1, NULL, 0, NULL, NULL );
+    assert( len > 0 );
+    out = malloc( len );
 
-    olen = WideCharToMultiByte( toCP, 0, wide, ilen, out, olen, NULL, NULL );
-    if( olen == 0 )
-    {
-        free( out );
-        return NULL;
-    }
-    return realloc( out, olen );
+    WideCharToMultiByte( toCP, 0, wide, -1, out, len, NULL, NULL );
+    free( wide );
+    return out;
 }
 #endif
 
@@ -266,6 +273,19 @@ char *ToLocale( const char *utf8 )
 #endif
 }
 
+char *ToLocaleDup( const char *utf8 )
+{
+#if defined (ASSUME_UTF8)
+    return strdup( utf8 );
+#else
+# ifdef USE_ICONV
+    if (to_locale.hd == (vlc_iconv_t)(-1))
+        return strdup( utf8 );
+# endif
+    return ToLocale( utf8 );
+#endif
+}
+
 void LocaleFree( const char *str )
 {
 #ifdef USE_ICONV
@@ -297,15 +317,17 @@ FILE *utf8_fopen( const char *filename, const char *mode )
         errno = ENOENT;
     return NULL;
 #else
-    wchar_t wpath[MAX_PATH];
-    wchar_t wmode[4];
+    wchar_t wpath[MAX_PATH + 1];
+    size_t len = strlen( mode ) + 1;
+    wchar_t wmode[len];
 
-    if( !MultiByteToWideChar( CP_UTF8, 0, filename, -1, wpath, MAX_PATH - 1)
-     || !MultiByteToWideChar( CP_ACP, 0, mode, -1, wmode, 3 ) )
+    if( !MultiByteToWideChar( CP_UTF8, 0, filename, -1, wpath, MAX_PATH )
+     || !MultiByteToWideChar( CP_ACP, 0, mode, len, wmode, len ) )
     {
         errno = ENOENT;
         return NULL;
     }
+    wpath[MAX_PATH] = L'\0';
 
     return _wfopen( wpath, wmode );
 #endif
@@ -317,8 +339,8 @@ FILE *utf8_fopen( const char *filename, const char *mode )
 int utf8_mkdir( const char *dirname )
 {
 #if defined (UNDER_CE) || defined (WIN32)
-    wchar_t wname[MAX_PATH];
-    char mod[MAX_PATH];
+    wchar_t wname[MAX_PATH + 1];
+    char mod[MAX_PATH + 1];
     int i;
 
     /* Convert '/' into '\' */
@@ -341,6 +363,7 @@ int utf8_mkdir( const char *dirname )
         errno = ENOENT;
         return -1;
     }
+    wname[MAX_PATH] = L'\0';
 
     if( CreateDirectoryW( wname, NULL ) == 0 )
     {
@@ -397,7 +420,8 @@ const char *utf8_readdir( void *dir )
 static int utf8_statEx( const char *filename, void *buf,
                         vlc_bool_t deref )
 {
-#ifdef HAVE_SYS_STAT_H
+#if !(defined (WIN32) || defined (UNDER_CE))
+# ifdef HAVE_SYS_STAT_H
     const char *local_name = ToLocale( filename );
 
     if( local_name != NULL )
@@ -408,8 +432,21 @@ static int utf8_statEx( const char *filename, void *buf,
         return res;
     }
     errno = ENOENT;
-#endif
+# endif
     return -1;
+#else
+    wchar_t wpath[MAX_PATH + 1];
+
+    if( !MultiByteToWideChar( CP_UTF8, 0, filename, -1, wpath, MAX_PATH ) )
+    {
+        errno = ENOENT;
+        return -1;
+    }
+    wpath[MAX_PATH] = L'\0';
+
+    /* struct _stat is just a silly Microsoft alias for struct stat */
+    return _wstat( wpath, (struct _stat *)buf );
+#endif
 }
 
 
@@ -423,11 +460,50 @@ int utf8_lstat( const char *filename, void *buf)
     return utf8_statEx( filename, buf, VLC_FALSE );
 }
 
+/*****************************************************************************
+ * utf8_*printf: *printf with conversion from UTF-8 to local encoding
+ *****************************************************************************/
+static int utf8_vasprintf( char **str, const char *fmt, va_list ap )
+{
+       char *utf8;
+       int res = vasprintf( &utf8, fmt, ap );
+       if( res == -1 )
+               return -1;
+
+       *str = ToLocaleDup( utf8 );
+       free( utf8 );
+       return res;
+}
+
+static int utf8_vfprintf( FILE *stream, const char *fmt, va_list ap )
+{
+       char *str;
+       int res = utf8_vasprintf( &str, fmt, ap );
+       if( res == -1 )
+               return -1;
+
+       fputs( str, stream );
+       free( str );
+       return res;
+}
+
+int utf8_fprintf( FILE *stream, const char *fmt, ... )
+{
+       va_list ap;
+       int res;
+
+       va_start( ap, fmt );
+       res = utf8_vfprintf( stream, fmt, ap );
+       va_end( ap );
+       return res;
+}
+
 /*****************************************************************************
  * EnsureUTF8: replaces invalid/overlong UTF-8 sequences with question marks
  *****************************************************************************
  * Not Todo : convert Latin1 to UTF-8 on the flu
  * It is not possible given UTF-8 needs more space
+ * Returns str if it was valid UTF-8, NULL if not.
  *****************************************************************************/
 #define isutf8cont( c ) (((c) >= 0x80) && ((c) <= 0xBF)) 
 char *EnsureUTF8( char *str )
@@ -451,7 +527,10 @@ char *EnsureUTF8( char *str )
             if( isutf8cont( c ) )
                 ptr += 2; /* OK */
             else
+            {
                 *ptr++ = '?'; /* invalid */
+                str = NULL;
+            }
         }
         else
         /* 3 bytes */
@@ -464,10 +543,16 @@ char *EnsureUTF8( char *str )
                 if( isutf8cont( c ) )
                     ptr += 3; /* OK */
                 else
+                {
                     *ptr++ = '?';
+                    str = NULL;
+                }
             }
             else
+            {
                 *ptr++ = '?';
+                str = NULL;
+            }
         }
         else
         if( ( ( c >= 0xE1 ) && ( c <= 0xEC ) ) || ( c == 0xEC )
@@ -480,10 +565,16 @@ char *EnsureUTF8( char *str )
                 if( isutf8cont( c ) )
                     ptr += 3; /* OK */
                 else
+                {
                     *ptr++ = '?';
+                    str = NULL;
+                }
             }
             else
+            {
                 *ptr++ = '?';
+                str = NULL;
+            }
         }
         else
         if( c == 0xED )
@@ -495,10 +586,16 @@ char *EnsureUTF8( char *str )
                 if( isutf8cont( c ) )
                     ptr += 3; /* OK */
                 else
+                {
                     *ptr++ = '?';
+                    str = NULL;
+                }
             }
             else
+            {
                 *ptr++ = '?';
+                str = NULL;
+            }
         }
         else
         /* 4 bytes */
@@ -514,13 +611,22 @@ char *EnsureUTF8( char *str )
                     if( isutf8cont( c ) )
                         ptr += 4; /* OK */
                     else
+                    {
                         *ptr++ = '?';
+                        str = NULL;
+                    }
                 }
                 else
+                {
                     *ptr++ = '?';
+                    str = NULL;
+                }
             }
             else
+            {
                 *ptr++ = '?';
+                str = NULL;
+            }
         }
         else
         if( ( c >= 0xF1 ) && ( c <= 0xF3 ) )
@@ -535,13 +641,22 @@ char *EnsureUTF8( char *str )
                     if( isutf8cont( c ) )
                         ptr += 4; /* OK */
                     else
+                    {
                         *ptr++ = '?';
+                        str = NULL;
+                    }
                 }
                 else
+                {
                     *ptr++ = '?';
+                    str = NULL;
+                }
             }
             else
+            {
                 *ptr++ = '?';
+                str = NULL;
+            }
         }
         else
         if( c == 0xF4 )
@@ -556,16 +671,28 @@ char *EnsureUTF8( char *str )
                     if( isutf8cont( c ) )
                         ptr += 4; /* OK */
                     else
+                    {
                         *ptr++ = '?';
+                        str = NULL;
+                    }
                 }
                 else
+                {
                     *ptr++ = '?';
+                    str = NULL;
+                }
             }
             else
+            {
                 *ptr++ = '?';
+                str = NULL;
+            }
         }
         else
+        {
             *ptr++ = '?';
+            str = NULL;
+        }
     }
 
     return str;