X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=src%2Ftext%2Funicode.c;h=036027cfdbf2dd5d02261d4c06b543eb50d9359a;hb=7b0dd84fff6c300cab5e0bdbf2a29fb1b3618e62;hp=499b0e7906393545911bf3d246928fede056ec15;hpb=5158c3af473f61e501440e1fb499271d69b6fe55;p=vlc diff --git a/src/text/unicode.c b/src/text/unicode.c index 499b0e7906..036027cfdb 100644 --- a/src/text/unicode.c +++ b/src/text/unicode.c @@ -2,8 +2,7 @@ * unicode.c: Unicode <-> locale functions ***************************************************************************** * Copyright (C) 2005-2006 the VideoLAN team - * Copyright © 2005-2006 Rémi Denis-Courmont - * $Id$ + * Copyright © 2005-2008 Rémi Denis-Courmont * * Authors: Rémi Denis-Courmont * @@ -25,7 +24,11 @@ /***************************************************************************** * Preamble *****************************************************************************/ -#include +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include #include #include "libvlc.h" /* utf8_mkdir */ @@ -74,31 +77,27 @@ #endif #if defined (USE_ICONV) -static char charset[sizeof ("CSISO11SWEDISHFORNAMES//translit")] = ""; +# include +static char charset[sizeof ("CSISO11SWEDISHFORNAMES")] = ""; static void find_charset_once (void) { - char *psz_charset; - if (vlc_current_charset (&psz_charset) - || (psz_charset == NULL) - || (strcmp (psz_charset, "ASCII") == 0) - || ((size_t)snprintf (charset, sizeof (charset), "%s//translit", - psz_charset) >= sizeof (charset))) - strcpy (charset, "UTF-8"); - - free (psz_charset); + strlcpy (charset, nl_langinfo (CODESET), sizeof (charset)); + if (!strcasecmp (charset, "ASCII") + || !strcasecmp (charset, "ANSI_X3.4-1968")) + strcpy (charset, "UTF-8"); /* superset... */ } static int find_charset (void) { static pthread_once_t once = PTHREAD_ONCE_INIT; pthread_once (&once, find_charset_once); - return !strcmp (charset, "UTF-8"); + return !strcasecmp (charset, "UTF-8"); } #endif -static char *locale_fast (const char *string, vlc_bool_t from) +static char *locale_fast (const char *string, bool from) { #if defined (USE_ICONV) if (find_charset ()) @@ -107,7 +106,7 @@ static char *locale_fast (const char *string, vlc_bool_t from) vlc_iconv_t hd = vlc_iconv_open (from ? "UTF-8" : charset, from ? charset : "UTF-8"); if (hd == (vlc_iconv_t)(-1)) - return strdup (string); /* Uho! */ + return NULL; /* Uho! */ const char *iptr = string; size_t inb = strlen (string); @@ -123,7 +122,7 @@ static char *locale_fast (const char *string, vlc_bool_t from) outb--; iptr++; inb--; - vlc_iconv (hd, NULL, NULL, NULL, NULL); + vlc_iconv (hd, NULL, NULL, NULL, NULL); /* reset */ } *optr = '\0'; vlc_iconv_close (hd); @@ -145,21 +144,26 @@ static char *locale_fast (const char *string, vlc_bool_t from) wchar_t wide[len]; MultiByteToWideChar (from ? CP_ACP : CP_UTF8, 0, string, -1, wide, len); - len = 1 + WideCharToMultiByte (from ? CP_UTF8 : CP_ACP, 0, wide, -1, NULL, 0, NULL, NULL); + len = 1 + WideCharToMultiByte (from ? CP_UTF8 : CP_ACP, 0, wide, -1, + NULL, 0, NULL, NULL); out = malloc (len); if (out == NULL) return NULL; - WideCharToMultiByte (from ? CP_UTF8 : CP_ACP, 0, wide, -1, out, len, NULL, NULL); + WideCharToMultiByte (from ? CP_UTF8 : CP_ACP, 0, wide, -1, out, len, + NULL, NULL); return out; #else + (void)from; return (char *)string; #endif } -static inline char *locale_dup (const char *string, vlc_bool_t from) +static inline char *locale_dup (const char *string, bool from) { + assert( string ); + #if defined (USE_ICONV) if (find_charset ()) return strdup (string); @@ -167,11 +171,15 @@ static inline char *locale_dup (const char *string, vlc_bool_t from) #elif defined (USE_MB2MB) return locale_fast (string, from); #else + (void)from; return strdup (string); #endif } - +/** + * Releases (if needed) a localized or uniformized string. + * @param str non-NULL return value from FromLocale() or ToLocale(). + */ void LocaleFree (const char *str) { #if defined (USE_ICONV) @@ -179,14 +187,16 @@ void LocaleFree (const char *str) free ((char *)str); #elif defined (USE_MB2MB) free ((char *)str); +#else + (void)str; #endif } /** - * FromLocale: converts a locale string to UTF-8 + * Converts a string from the system locale character encoding to UTF-8. * - * @param locale nul-terminated string to be converted + * @param locale nul-terminated string to convert * * @return a nul-terminated UTF-8 string, or NULL in case of error. * To avoid memory leak, you have to pass the result to LocaleFree() @@ -194,17 +204,26 @@ void LocaleFree (const char *str) */ char *FromLocale (const char *locale) { - return locale_fast (locale, VLC_TRUE); + return locale_fast (locale, true); } +/** + * converts a string from the system locale character encoding to utf-8, + * the result is always allocated on the heap. + * + * @param locale nul-terminated string to convert + * + * @return a nul-terminated utf-8 string, or null in case of error. + * The result must be freed using free() - as with the strdup() function. + */ char *FromLocaleDup (const char *locale) { - return locale_dup (locale, VLC_TRUE); + return locale_dup (locale, true); } /** - * ToLocale: converts a UTF-8 string to local system encoding. + * ToLocale: converts an UTF-8 string to local system encoding. * * @param utf8 nul-terminated string to be converted * @@ -214,18 +233,32 @@ char *FromLocaleDup (const char *locale) */ char *ToLocale (const char *utf8) { - return locale_fast (utf8, VLC_FALSE); + return locale_fast (utf8, false); } -static char *ToLocaleDup (const char *utf8) +/** + * converts a string from UTF-8 to the system locale character encoding, + * the result is always allocated on the heap. + * + * @param utf8 nul-terminated string to convert + * + * @return a nul-terminated string, or null in case of error. + * The result must be freed using free() - as with the strdup() function. + */ +char *ToLocaleDup (const char *utf8) { - return locale_dup (utf8, VLC_FALSE); + return locale_dup (utf8, false); } /** - * utf8_open: open() wrapper for UTF-8 filenames + * Opens a system file handle using UTF-8 paths. + * + * @param filename file path to open (with UTF-8 encoding) + * @param flags open() flags, see the C library open() documentation + * @param mode file permissions if creating a new file + * @return a file handle on success, -1 on error (see errno). */ int utf8_open (const char *filename, int flags, mode_t mode) { @@ -263,12 +296,15 @@ int utf8_open (const char *filename, int flags, mode_t mode) } /** - * utf8_fopen: fopen() wrapper for UTF-8 filenames + * Opens a FILE pointer using UTF-8 filenames. + * @param filename file path, using UTF-8 encoding + * @param mode fopen file open mode + * @return NULL on error, an open FILE pointer on success. */ FILE *utf8_fopen (const char *filename, const char *mode) { int rwflags = 0, oflags = 0; - vlc_bool_t append = VLC_FALSE; + bool append = false; for (const char *ptr = mode; *ptr; ptr++) { @@ -281,7 +317,7 @@ FILE *utf8_fopen (const char *filename, const char *mode) case 'a': rwflags = O_WRONLY; oflags |= O_CREAT; - append = VLC_TRUE; + append = true; break; case 'w': @@ -319,16 +355,18 @@ FILE *utf8_fopen (const char *filename, const char *mode) } /** - * utf8_mkdir: Calls mkdir() after conversion of file name to OS locale + * Creates a directory using UTF-8 paths. * * @param dirname a UTF-8 string with the name of the directory that you * want to create. - * @return A 0 return value indicates success. A -1 return value indicates an - * error, and an error code is stored in errno + * @param mode directory permissions + * @return 0 on success, -1 on error (see errno). */ int utf8_mkdir( const char *dirname, mode_t mode ) { #if defined (UNDER_CE) || defined (WIN32) + VLC_UNUSED( mode ); + wchar_t wname[MAX_PATH + 1]; char mod[MAX_PATH + 1]; int i; @@ -381,11 +419,11 @@ int utf8_mkdir( const char *dirname, mode_t mode ) } /** - * utf8_opendir: wrapper that converts dirname to the locale in use by the OS + * Opens a DIR pointer using UTF-8 paths * * @param dirname UTF-8 representation of the directory name - * - * @return a pointer to the DIR struct. Release with closedir(). + * @return a pointer to the DIR struct, or NULL in case of error. + * Release with standard closedir(). */ DIR *utf8_opendir( const char *dirname ) { @@ -413,12 +451,12 @@ DIR *utf8_opendir( const char *dirname ) } /** - * utf8_readdir: a readdir wrapper that returns the name of the next entry - * in the directory as a UTF-8 string. + * Reads the next file name from an open directory. * * @param dir The directory that is being read * - * @return a UTF-8 string of the directory entry. Use free() to free this memory. + * @return a UTF-8 string of the directory entry. + * Use free() to free this memory. */ char *utf8_readdir( DIR *dir ) { @@ -445,6 +483,10 @@ static int dummy_select( const char *str ) return 1; } +/** + * Does the same as utf8_scandir(), but takes an open directory pointer + * instead of a directory path. + */ int utf8_loaddir( DIR *dir, char ***namelist, int (*select)( const char * ), int (*compar)( const char **, const char ** ) ) @@ -501,6 +543,18 @@ int utf8_loaddir( DIR *dir, char ***namelist, return -1; } +/** + * Selects file entries from a directory, as GNU C scandir(), yet using + * UTF-8 file names. + * + * @param dirname UTF-8 diretory path + * @param pointer [OUT] pointer set, on succesful completion, to the address + * of a table of UTF-8 filenames. All filenames must be freed with free(). + * The table itself must be freed with free() as well. + * + * @return How many file names were selected (possibly 0), + * or -1 in case of error. + */ int utf8_scandir( const char *dirname, char ***namelist, int (*select)( const char * ), int (*compar)( const char **, const char ** ) ) @@ -517,7 +571,7 @@ int utf8_scandir( const char *dirname, char ***namelist, } static int utf8_statEx( const char *filename, struct stat *buf, - vlc_bool_t deref ) + bool deref ) { #if defined (WIN32) || defined (UNDER_CE) /* retrieve Windows OS version */ @@ -551,19 +605,30 @@ static int utf8_statEx( const char *filename, struct stat *buf, return -1; } - +/** + * Finds file/inode informations, as stat(). + * Consider usign fstat() instead, if possible. + * + * @param filename UTF-8 file path + */ int utf8_stat( const char *filename, struct stat *buf) { - return utf8_statEx( filename, buf, VLC_TRUE ); + return utf8_statEx( filename, buf, true ); } +/** + * Finds file/inode informations, as lstat(). + * Consider usign fstat() instead, if possible. + * + * @param filename UTF-8 file path + */ int utf8_lstat( const char *filename, struct stat *buf) { - return utf8_statEx( filename, buf, VLC_FALSE ); + return utf8_statEx( filename, buf, false ); } /** - * utf8_unlink: Calls unlink() after conversion of file name to OS locale + * Removes a file. * * @param filename a UTF-8 string with the name of the file you want to delete. * @return A 0 return value indicates success. A -1 return value indicates an @@ -607,7 +672,8 @@ int utf8_unlink( const char *filename ) /** - * utf8_*printf: *printf with conversion from UTF-8 to local encoding + * Formats an UTF-8 string as vasprintf(), then print it to stdout, with + * appropriate conversion to local encoding. */ static int utf8_vasprintf( char **str, const char *fmt, va_list ap ) { @@ -621,6 +687,10 @@ static int utf8_vasprintf( char **str, const char *fmt, va_list ap ) return res; } +/** + * Formats an UTF-8 string as vfprintf(), then print it, with + * appropriate conversion to local encoding. + */ int utf8_vfprintf( FILE *stream, const char *fmt, va_list ap ) { char *str; @@ -633,6 +703,10 @@ int utf8_vfprintf( FILE *stream, const char *fmt, va_list ap ) return res; } +/** + * Formats an UTF-8 string as fprintf(), then print it, with + * appropriate conversion to local encoding. + */ int utf8_fprintf( FILE *stream, const char *fmt, ... ) { va_list ap; @@ -710,7 +784,7 @@ static char *CheckUTF8( char *str, char rep ) } /** - * EnsureUTF8: replaces invalid/overlong UTF-8 sequences with question marks + * Replaces invalid/overlong UTF-8 sequences with question marks. * Note that it is not possible to convert from Latin-1 to UTF-8 on the fly, * so we don't try that, even though it would be less disruptive. * @@ -723,7 +797,7 @@ char *EnsureUTF8( char *str ) /** - * IsUTF8: checks whether a string is a valid UTF-8 byte sequence. + * Checks whether a string is a valid UTF-8 byte sequence. * * @param str nul-terminated string to be checked *