X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=src%2Fmisc%2Funicode.c;h=c7c2387ddf6db87ba41b086892ba18ce2e3a308a;hb=a69be9de1c84ac60979ca0010c243ea6f68570cd;hp=a57852c7132ea8d64b08d349f968082aef2a2304;hpb=2678c532b12e275ac2b601cc12d349ff00b7b6b8;p=vlc diff --git a/src/misc/unicode.c b/src/misc/unicode.c index a57852c713..c7c2387ddf 100644 --- a/src/misc/unicode.c +++ b/src/misc/unicode.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #ifdef HAVE_DIRENT_H @@ -78,19 +79,25 @@ void LocaleInit( vlc_object_t *p_this ) else { /* not UTF-8 */ - char *psz_conv = psz_charset; + char psz_buf[strlen( psz_charset ) + sizeof( "//translit" )]; + const char *psz_conv; /* * Still allow non-ASCII characters when the locale is not set. * Western Europeans are being favored for historical reasons. */ - psz_conv = strcmp( psz_charset, "ASCII" ) - ? psz_charset : "ISO-8859-1"; + if( strcmp( psz_charset, "ASCII" ) ) + { + sprintf( psz_buf, "%s//translit", psz_charset ); + psz_conv = psz_buf; + } + else + psz_conv = "ISO-8859-1//translit"; vlc_mutex_init( p_this, &from_locale.lock ); vlc_mutex_init( p_this, &to_locale.lock ); - from_locale.hd = vlc_iconv_open( "UTF-8", psz_charset ); - to_locale.hd = vlc_iconv_open( psz_charset, "UTF-8" ); + from_locale.hd = vlc_iconv_open( "UTF-8", psz_conv ); + to_locale.hd = vlc_iconv_open( psz_conv, "UTF-8" ); } free( psz_charset ); @@ -123,23 +130,23 @@ void LocaleDeinit( void ) static char *MB2MB( const char *string, UINT fromCP, UINT toCP ) { char *out; - int ilen = strlen( string ), olen = (4 / sizeof (wchar_t)) * ilen + 1; - wchar_t wide[olen]; + wchar_t *wide; + int len; - ilen = MultiByteToWideChar( fromCP, 0, string, ilen + 1, wide, olen ); - if( ilen == 0 ) + len = MultiByteToWideChar( fromCP, 0, string, -1, NULL, 0 ); + assert( len > 0 ); + wide = (wchar_t *)malloc (len * sizeof (wchar_t)); + if( wide == NULL ) return NULL; - olen = 4 * ilen + 1; - out = malloc( olen ); + MultiByteToWideChar( fromCP, 0, string, -1, wide, len ); + len = WideCharToMultiByte( toCP, 0, wide, -1, NULL, 0, NULL, NULL ); + assert( len > 0 ); + out = malloc( len ); - olen = WideCharToMultiByte( toCP, 0, wide, ilen, out, olen, NULL, NULL ); - if( olen == 0 ) - { - free( out ); - return NULL; - } - return realloc( out, olen ); + WideCharToMultiByte( toCP, 0, wide, -1, out, len, NULL, NULL ); + free( wide ); + return out; } #endif @@ -266,6 +273,19 @@ char *ToLocale( const char *utf8 ) #endif } +char *ToLocaleDup( const char *utf8 ) +{ +#if defined (ASSUME_UTF8) + return strdup( utf8 ); +#else +# ifdef USE_ICONV + if (to_locale.hd == (vlc_iconv_t)(-1)) + return strdup( utf8 ); +# endif + return ToLocale( utf8 ); +#endif +} + void LocaleFree( const char *str ) { #ifdef USE_ICONV @@ -297,15 +317,17 @@ FILE *utf8_fopen( const char *filename, const char *mode ) errno = ENOENT; return NULL; #else - wchar_t wpath[MAX_PATH]; - wchar_t wmode[4]; + wchar_t wpath[MAX_PATH + 1]; + size_t len = strlen( mode ) + 1; + wchar_t wmode[len]; - if( !MultiByteToWideChar( CP_UTF8, 0, filename, -1, wpath, MAX_PATH - 1) - || !MultiByteToWideChar( CP_ACP, 0, mode, -1, wmode, 3 ) ) + if( !MultiByteToWideChar( CP_UTF8, 0, filename, -1, wpath, MAX_PATH ) + || !MultiByteToWideChar( CP_ACP, 0, mode, len, wmode, len ) ) { errno = ENOENT; return NULL; } + wpath[MAX_PATH] = L'\0'; return _wfopen( wpath, wmode ); #endif @@ -317,8 +339,8 @@ FILE *utf8_fopen( const char *filename, const char *mode ) int utf8_mkdir( const char *dirname ) { #if defined (UNDER_CE) || defined (WIN32) - wchar_t wname[MAX_PATH]; - char mod[MAX_PATH]; + wchar_t wname[MAX_PATH + 1]; + char mod[MAX_PATH + 1]; int i; /* Convert '/' into '\' */ @@ -341,6 +363,7 @@ int utf8_mkdir( const char *dirname ) errno = ENOENT; return -1; } + wname[MAX_PATH] = L'\0'; if( CreateDirectoryW( wname, NULL ) == 0 ) { @@ -397,7 +420,8 @@ const char *utf8_readdir( void *dir ) static int utf8_statEx( const char *filename, void *buf, vlc_bool_t deref ) { -#ifdef HAVE_SYS_STAT_H +#if !(defined (WIN32) || defined (UNDER_CE)) +# ifdef HAVE_SYS_STAT_H const char *local_name = ToLocale( filename ); if( local_name != NULL ) @@ -408,8 +432,21 @@ static int utf8_statEx( const char *filename, void *buf, return res; } errno = ENOENT; -#endif +# endif return -1; +#else + wchar_t wpath[MAX_PATH + 1]; + + if( !MultiByteToWideChar( CP_UTF8, 0, filename, -1, wpath, MAX_PATH ) ) + { + errno = ENOENT; + return -1; + } + wpath[MAX_PATH] = L'\0'; + + /* struct _stat is just a silly Microsoft alias for struct stat */ + return _wstat( wpath, (struct _stat *)buf ); +#endif } @@ -423,11 +460,50 @@ int utf8_lstat( const char *filename, void *buf) return utf8_statEx( filename, buf, VLC_FALSE ); } +/***************************************************************************** + * utf8_*printf: *printf with conversion from UTF-8 to local encoding + *****************************************************************************/ +static int utf8_vasprintf( char **str, const char *fmt, va_list ap ) +{ + char *utf8; + int res = vasprintf( &utf8, fmt, ap ); + if( res == -1 ) + return -1; + + *str = ToLocaleDup( utf8 ); + free( utf8 ); + return res; +} + +static int utf8_vfprintf( FILE *stream, const char *fmt, va_list ap ) +{ + char *str; + int res = utf8_vasprintf( &str, fmt, ap ); + if( res == -1 ) + return -1; + + fputs( str, stream ); + free( str ); + return res; +} + +int utf8_fprintf( FILE *stream, const char *fmt, ... ) +{ + va_list ap; + int res; + + va_start( ap, fmt ); + res = utf8_vfprintf( stream, fmt, ap ); + va_end( ap ); + return res; +} + /***************************************************************************** * EnsureUTF8: replaces invalid/overlong UTF-8 sequences with question marks ***************************************************************************** * Not Todo : convert Latin1 to UTF-8 on the flu * It is not possible given UTF-8 needs more space + * Returns str if it was valid UTF-8, NULL if not. *****************************************************************************/ #define isutf8cont( c ) (((c) >= 0x80) && ((c) <= 0xBF)) char *EnsureUTF8( char *str ) @@ -451,7 +527,10 @@ char *EnsureUTF8( char *str ) if( isutf8cont( c ) ) ptr += 2; /* OK */ else + { *ptr++ = '?'; /* invalid */ + str = NULL; + } } else /* 3 bytes */ @@ -464,10 +543,16 @@ char *EnsureUTF8( char *str ) if( isutf8cont( c ) ) ptr += 3; /* OK */ else + { *ptr++ = '?'; + str = NULL; + } } else + { *ptr++ = '?'; + str = NULL; + } } else if( ( ( c >= 0xE1 ) && ( c <= 0xEC ) ) || ( c == 0xEC ) @@ -480,10 +565,16 @@ char *EnsureUTF8( char *str ) if( isutf8cont( c ) ) ptr += 3; /* OK */ else + { *ptr++ = '?'; + str = NULL; + } } else + { *ptr++ = '?'; + str = NULL; + } } else if( c == 0xED ) @@ -495,10 +586,16 @@ char *EnsureUTF8( char *str ) if( isutf8cont( c ) ) ptr += 3; /* OK */ else + { *ptr++ = '?'; + str = NULL; + } } else + { *ptr++ = '?'; + str = NULL; + } } else /* 4 bytes */ @@ -514,13 +611,22 @@ char *EnsureUTF8( char *str ) if( isutf8cont( c ) ) ptr += 4; /* OK */ else + { *ptr++ = '?'; + str = NULL; + } } else + { *ptr++ = '?'; + str = NULL; + } } else + { *ptr++ = '?'; + str = NULL; + } } else if( ( c >= 0xF1 ) && ( c <= 0xF3 ) ) @@ -535,13 +641,22 @@ char *EnsureUTF8( char *str ) if( isutf8cont( c ) ) ptr += 4; /* OK */ else + { *ptr++ = '?'; + str = NULL; + } } else + { *ptr++ = '?'; + str = NULL; + } } else + { *ptr++ = '?'; + str = NULL; + } } else if( c == 0xF4 ) @@ -556,16 +671,28 @@ char *EnsureUTF8( char *str ) if( isutf8cont( c ) ) ptr += 4; /* OK */ else + { *ptr++ = '?'; + str = NULL; + } } else + { *ptr++ = '?'; + str = NULL; + } } else + { *ptr++ = '?'; + str = NULL; + } } else + { *ptr++ = '?'; + str = NULL; + } } return str;