]> git.sesse.net Git - vlc/commitdiff
* reimplement EnableUTF8 without iconv
authorRémi Denis-Courmont <rem@videolan.org>
Thu, 4 Aug 2005 16:38:57 +0000 (16:38 +0000)
committerRémi Denis-Courmont <rem@videolan.org>
Thu, 4 Aug 2005 16:38:57 +0000 (16:38 +0000)
 * move Unicode stuff to a separate file

Makefile.am
src/libvlc.c
src/misc/unicode.c [new file with mode: 0644]

index 64d64da596df492294a4b6804d9acf09b38aa39b..909cd4a9e00b810606221c29d2760480d3cd06dc 100644 (file)
@@ -384,6 +384,7 @@ SOURCES_libvlc_common = \
        src/misc/block.c \
        src/misc/modules.c \
        src/misc/threads.c \
+       src/misc/unicode.c \
        src/misc/cpu.c \
        src/misc/configuration.c \
        src/misc/image.c \
index 1d5d074dd2154ad356013f7f33a8aee0296a5246..79bab43ccbf06443a605cee6914b2fe55695061b 100644 (file)
@@ -1883,121 +1883,6 @@ static void SetLanguage ( char const *psz_lang )
 #endif
 }
 
-/*****************************************************************************
- * FromLocale: converts a locale string to UTF-8
- *****************************************************************************/
-/* FIXME FIXME: it really has to be made quicker */
-char *FromLocale( const char *locale )
-{
-    char *psz_charset;
-
-    if( locale == NULL )
-        return NULL;
-
-    if( !vlc_current_charset( &psz_charset ) )
-    {
-        char *iptr = (char *)locale, *output, *optr;
-        size_t inb, outb;
-
-        /* cannot fail (unless vlc_current_charset sucks) */
-        vlc_iconv_t hd = vlc_iconv_open( "UTF-8", psz_charset );
-        free( psz_charset );
-
-        /*
-         * We are not allowed to modify the locale pointer, even if we cast it to
-         * non-const.
-         */
-        inb = strlen( locale );
-        outb = inb * 6 + 1;
-
-        /* FIXME: I'm not sure about the value for the multiplication
-         * (for western people, multiplication by 3 (Latin9) is sufficient) */
-        optr = output = calloc( outb , 1);
-        while( vlc_iconv( hd, &iptr, &inb, &optr, &outb ) == (size_t)-1 )
-            *iptr = '?'; /* should not happen, and yes, it sucks */
-
-        vlc_iconv_close( hd );
-        return realloc( output, strlen( output ) + 1 );
-    }
-    free( psz_charset );
-    return (char *)locale;
-}
-
-/*****************************************************************************
- * ToLocale: converts an UTF-8 string to locale
- *****************************************************************************/
-/* FIXME FIXME: it really has to be made quicker */
-char *ToLocale( const char *utf8 )
-{
-    char *psz_charset;
-
-    if( utf8 == NULL )
-        return NULL;
-
-    if( !vlc_current_charset( &psz_charset ) )
-    {
-        char *iptr = (char *)utf8, *output, *optr;
-        size_t inb, outb;
-
-        /* cannot fail (unless vlc_current_charset sucks) */
-        vlc_iconv_t hd = vlc_iconv_open( psz_charset, "UTF-8" );
-        free( psz_charset );
-
-        /*
-         * We are not allowed to modify the locale pointer, even if we cast it to
-         * non-const.
-         */
-        inb = strlen( utf8 );
-        /* FIXME: I'm not sure about the value for the multiplication
-         * (for western people, multiplication is not needed) */
-        outb = inb * 2 + 1;
-
-        optr = output = calloc( outb, 1 );
-        while( vlc_iconv( hd, &iptr, &inb, &optr, &outb ) == (size_t)-1 )
-            *iptr = '?'; /* should not happen, and yes, it sucks */
-
-        vlc_iconv_close( hd );
-        return realloc( output, strlen( output ) + 1 );
-    }
-    free( psz_charset );
-    return (char *)utf8;
-}
-
-void LocaleFree( const char *str )
-{
-    if( str != NULL )
-    {
-        /* FIXME: this deserve a price for the most inefficient peice of code */
-        char *psz_charset;
-    
-        if( !vlc_current_charset( &psz_charset ) )
-            free( (char *)str );
-    
-        free( psz_charset );
-    }
-}
-
-/* FIXME: don't use iconv at all */
-char *EnsureUTF8( char *str )
-{
-    vlc_iconv_t hd;
-    size_t inb, outb;
-    char *ostr, *istr;
-
-    if( str == NULL )
-        return NULL;
-
-    ostr = istr = str;
-
-    inb = outb = strlen( str );
-    hd = vlc_iconv_open( "UTF-8", "UTF-8" );
-    while( vlc_iconv( hd, &istr, &inb, &ostr, &outb ) == (size_t)-1 )
-        *istr = '?';
-
-    vlc_iconv_close( hd );
-    return str;
-}
-
 /*****************************************************************************
  * GetFilenames: parse command line options which are not flags
  *****************************************************************************
diff --git a/src/misc/unicode.c b/src/misc/unicode.c
new file mode 100644 (file)
index 0000000..c1bcc51
--- /dev/null
@@ -0,0 +1,270 @@
+/*****************************************************************************
+ * unicode.c: UTF8 <-> locale functions
+ *****************************************************************************
+ * Copyright (C) 2005 the VideoLAN team
+ * $Id$
+ *
+ * Authors: Rémi Denis-Courmont <rem # videolan.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include <vlc/vlc.h>
+#include "charset.h"
+
+/*****************************************************************************
+ * FromLocale: converts a locale string to UTF-8
+ *****************************************************************************/
+/* FIXME FIXME: it really has to be made quicker */
+char *FromLocale( const char *locale )
+{
+    char *psz_charset;
+
+    if( locale == NULL )
+        return NULL;
+
+    if( !vlc_current_charset( &psz_charset ) )
+    {
+        char *iptr = (char *)locale, *output, *optr;
+        size_t inb, outb;
+
+        /* cannot fail (unless vlc_current_charset sucks) */
+        vlc_iconv_t hd = vlc_iconv_open( "UTF-8", psz_charset );
+        free( psz_charset );
+
+        /*
+         * We are not allowed to modify the locale pointer, even if we cast it to
+         * non-const.
+         */
+        inb = strlen( locale );
+        outb = inb * 6 + 1;
+
+        /* FIXME: I'm not sure about the value for the multiplication
+         * (for western people, multiplication by 3 (Latin9) is sufficient) */
+        optr = output = calloc( outb , 1);
+        while( vlc_iconv( hd, &iptr, &inb, &optr, &outb ) == (size_t)-1 )
+            *iptr = '?'; /* should not happen, and yes, it sucks */
+
+        vlc_iconv_close( hd );
+        return realloc( output, strlen( output ) + 1 );
+    }
+    free( psz_charset );
+    return (char *)locale;
+}
+
+/*****************************************************************************
+ * ToLocale: converts an UTF-8 string to locale
+ *****************************************************************************/
+/* FIXME FIXME: it really has to be made quicker */
+char *ToLocale( const char *utf8 )
+{
+    char *psz_charset;
+
+    if( utf8 == NULL )
+        return NULL;
+
+    if( !vlc_current_charset( &psz_charset ) )
+    {
+        char *iptr = (char *)utf8, *output, *optr;
+        size_t inb, outb;
+
+        /* cannot fail (unless vlc_current_charset sucks) */
+        vlc_iconv_t hd = vlc_iconv_open( psz_charset, "UTF-8" );
+        free( psz_charset );
+
+        /*
+         * We are not allowed to modify the locale pointer, even if we cast it to
+         * non-const.
+         */
+        inb = strlen( utf8 );
+        /* FIXME: I'm not sure about the value for the multiplication
+         * (for western people, multiplication is not needed) */
+        outb = inb * 2 + 1;
+
+        optr = output = calloc( outb, 1 );
+        while( vlc_iconv( hd, &iptr, &inb, &optr, &outb ) == (size_t)-1 )
+            *iptr = '?'; /* should not happen, and yes, it sucks */
+
+        vlc_iconv_close( hd );
+        return realloc( output, strlen( output ) + 1 );
+    }
+    free( psz_charset );
+    return (char *)utf8;
+}
+
+void LocaleFree( const char *str )
+{
+    if( str != NULL )
+    {
+        /* FIXME: this deserve a price for the most inefficient peice of code */
+        char *psz_charset;
+    
+        if( !vlc_current_charset( &psz_charset ) )
+            free( (char *)str );
+    
+        free( psz_charset );
+    }
+}
+
+/*****************************************************************************
+ * EnsureUTF8: replaces invalid/overlong UTF-8 sequences with question marks
+ *****************************************************************************
+ * Not Todo : convert Latin1 to UTF-8 on the flu
+ * It is not possible given UTF-8 needs more space
+ *****************************************************************************/
+#define isutf8cont( c ) (((c) >= 0x80) && ((c) <= 0xBF)) 
+char *EnsureUTF8( char *str )
+{
+    unsigned char *ptr, c;
+
+    ptr = (unsigned char *)str;
+    while( (c = *ptr) != '\0' )
+    {
+        /* US-ASCII, 1 byte */
+        if( ( ( c >= 0x20 ) && ( c <= 0x7F ) )
+         || ( c == 0x09 ) || ( c == 0x0A ) || ( c == 0x0D ) )
+        {
+            ptr++; /* OK */
+        }
+        else
+        /* 2 bytes */
+        if( ( c >= 0xC2 ) && ( c <= 0xDF ) )
+        {
+            c = ptr[1];
+            if( isutf8cont( c ) )
+                ptr += 2; /* OK */
+            else
+                *ptr++ = '?'; /* invalid */
+        }
+        else
+        /* 3 bytes */
+        if( c == 0xE0 )
+        {
+            c = ptr[1];
+            if( ( c >= 0xA0 ) && ( c <= 0xBF ) )
+            {
+                c = ptr[2];
+                if( isutf8cont( c ) )
+                    ptr += 3; /* OK */
+                else
+                    *ptr++ = '?';
+            }
+            else
+                *ptr++ = '?';
+        }
+        else
+        if( ( ( c >= 0xE1 ) && ( c <= 0xEC ) ) || ( c == 0xEC )
+         || ( c == 0xEE ) || ( c == 0xEF ) )
+        {
+            c = ptr[1];
+            if( isutf8cont( c ) )
+            {
+                c = ptr[2];
+                if( isutf8cont( c ) )
+                    ptr += 3; /* OK */
+                else
+                    *ptr++ = '?';
+            }
+            else
+                *ptr++ = '?';
+        }
+        else
+        if( c == 0xED )
+        {
+            c = ptr[1];
+            if( ( c >= 0x80 ) && ( c <= 0x9F ) )
+            {
+                c = ptr[2];
+                if( isutf8cont( c ) )
+                    ptr += 3; /* OK */
+                else
+                    *ptr++ = '?';
+            }
+            else
+                *ptr++ = '?';
+        }
+        else
+        /* 4 bytes */
+        if( c == 0xF0 )
+        {
+            c = ptr[1];
+            if( ( c >= 0x90 ) && ( c <= 0xBF ) )
+            {
+                c = ptr[2];
+                if( isutf8cont( c ) )
+                {
+                    c = ptr[3];
+                    if( isutf8cont( c ) )
+                        ptr += 4; /* OK */
+                    else
+                        *ptr++ = '?';
+                }
+                else
+                    *ptr++ = '?';
+            }
+            else
+                *ptr++ = '?';
+        }
+        else
+        if( ( c >= 0xF1 ) && ( c <= 0xF3 ) )
+        {
+            c = ptr[1];
+            if( isutf8cont( c ) )
+            {
+                c = ptr[2];
+                if( isutf8cont( c ) )
+                {
+                    c = ptr[3];
+                    if( isutf8cont( c ) )
+                        ptr += 4; /* OK */
+                    else
+                        *ptr++ = '?';
+                }
+                else
+                    *ptr++ = '?';
+            }
+            else
+                *ptr++ = '?';
+        }
+        else
+        if( c == 0xF4 )
+        {
+            c = ptr[1];
+            if( ( c >= 0x80 ) && ( c <= 0x8F ) )
+            {
+                c = ptr[2];
+                if( isutf8cont( c ) )
+                {
+                    c = ptr[3];
+                    if( isutf8cont( c ) )
+                        ptr += 4; /* OK */
+                    else
+                        *ptr++ = '?';
+                }
+                else
+                    *ptr++ = '?';
+            }
+            else
+                *ptr++ = '?';
+        }
+        else
+            *ptr++ = '?';
+    }
+
+    return str;
+}