git.sesse.net Git - vlc/blob - src/misc/charset.c

   1 /*****************************************************************************
   2  * charset.c: Locale's character encoding stuff.
   3  *****************************************************************************
   4  * See also unicode.c for Unicode to locale conversion helpers.
   5  *
   6  * Copyright (C) 2003-2006 the VideoLAN team
   7  * $Id$
   8  *
   9  * Authors: Derk-Jan Hartman <thedj at users.sf.net>
  10  *          Christophe Massiot
  11  *          Rémi Denis-Courmont
  12  *
  13  * vlc_current_charset() an adaption of mp_locale_charset():
  14  *
  15  *  Copyright (C) 2001-2003 The Mape Project
  16  *  Written by Karel Zak  <zakkr@zf.jcu.cz>.
  17  *
  18  * This program is free software; you can redistribute it and/or modify
  19  * it under the terms of the GNU General Public License as published by
  20  * the Free Software Foundation; either version 2 of the License, or
  21  * (at your option) any later version.
  22  *
  23  * This program is distributed in the hope that it will be useful,
  24  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  25  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  26  * GNU General Public License for more details.
  27  *
  28  * You should have received a copy of the GNU General Public License
  29  * along with this program; if not, write to the Free Software
  30  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  31  *****************************************************************************/
  32
  33 #include <stdlib.h>
  34 #include <stdio.h>
  35 #include <vlc/vlc.h>
  36
  37 #if !defined WIN32
  38 # if HAVE_LANGINFO_CODESET
  39 #  include <langinfo.h>
  40 # endif
  41 # if HAVE_LOCALE_H
  42 #  include <locale.h>
  43 # endif
  44 #else
  45 # include <windows.h>
  46 #endif
  47
  48 #ifdef __APPLE__
  49 #   include <errno.h>
  50 #   include <string.h>
  51 #endif
  52
  53 #include "charset.h"
  54
  55 typedef struct VLCCharsetAlias
  56 {
  57     char *psz_alias, *psz_name;
  58 } VLCCharsetAlias;
  59
  60 /*
  61  * The libcharset load all from external text file, but it's strange and
  62  * slow solution, we rather use array(s) compiled into source. In the
  63  * "good" libc this is not needful -- for example in linux.
  64  *
  65  * Please, put to this funtion exotic aliases only. The libc 'iconv' knows
  66  * a lot of basic aliases (check it first by iconv -l).
  67  *
  68  */
  69 #if (defined OS2 || !HAVE_LANGINFO_CODESET) && !defined WIN32
  70 static const char* vlc_encoding_from_language( const char *l )
  71 {
  72     /* check for language (and perhaps country) codes */
  73     if (strstr(l, "zh_TW")) return "Big5";
  74     if (strstr(l, "zh_HK")) return "Big5HKSCS";   /* no MIME charset */
  75     if (strstr(l, "zh")) return "GB2312";
  76     if (strstr(l, "th")) return "TIS-620";
  77     if (strstr(l, "ja")) return "EUC-JP";
  78     if (strstr(l, "ko")) return "EUC-KR";
  79     if (strstr(l, "ru")) return "KOI8-R";
  80     if (strstr(l, "uk")) return "KOI8-U";
  81     if (strstr(l, "pl") || strstr(l, "hr") ||
  82         strstr(l, "hu") || strstr(l, "cs") ||
  83         strstr(l, "sk") || strstr(l, "sl")) return "ISO-8859-2";
  84     if (strstr(l, "eo") || strstr(l, "mt")) return "ISO-8859-3";
  85     if (strstr(l, "lt") || strstr(l, "la")) return "ISO-8859-4";
  86     if (strstr(l, "bg") || strstr(l, "be") ||
  87         strstr(l, "mk") || strstr(l, "uk")) return "ISO-8859-5";
  88     if (strstr(l, "ar")) return "ISO-8859-6";
  89     if (strstr(l, "el")) return "ISO-8859-7";
  90     if (strstr(l, "he") || strstr(l, "iw")) return "ISO-8859-8";
  91     if (strstr(l, "tr")) return "ISO-8859-9";
  92     if (strstr(l, "th")) return "ISO-8859-11";
  93     if (strstr(l, "lv")) return "ISO-8859-13";
  94     if (strstr(l, "cy")) return "ISO-8859-14";
  95     if (strstr(l, "et")) return "ISO-8859-15"; /* all latin1 could be iso15 as well */
  96     if (strstr(l, "ro")) return "ISO-8859-2";   /* or ISO-8859-16 */
  97     if (strstr(l, "am") || strstr(l, "vi")) return "UTF-8";
  98     /* We don't know. This ain't working go to default. */
  99     return "ISO-8859-1";
 100 }
 101 #endif
 102
 103 static const char* vlc_charset_aliases( const char *psz_name )
 104 {
 105     VLCCharsetAlias     *a;
 106
 107 #if defined WIN32
 108     VLCCharsetAlias aliases[] =
 109     {
 110         { "CP936",      "GBK" },
 111         { "CP1361",     "JOHAB" },
 112         { "CP20127",    "ASCII" },
 113         { "CP20866",    "KOI8-R" },
 114         { "CP21866",    "KOI8-RU" },
 115         { "CP28591",    "ISO-8859-1" },
 116         { "CP28592",    "ISO-8859-2" },
 117         { "CP28593",    "ISO-8859-3" },
 118         { "CP28594",    "ISO-8859-4" },
 119         { "CP28595",    "ISO-8859-5" },
 120         { "CP28596",    "ISO-8859-6" },
 121         { "CP28597",    "ISO-8859-7" },
 122         { "CP28598",    "ISO-8859-8" },
 123         { "CP28599",    "ISO-8859-9" },
 124         { "CP28605",    "ISO-8859-15" },
 125         { NULL,         NULL }
 126     };
 127 #elif SYS_AIX
 128     VLCCharsetAlias aliases[] =
 129     {
 130         { "IBM-850",    "CP850" },
 131         { "IBM-856",    "CP856" },
 132         { "IBM-921",    "ISO-8859-13" },
 133         { "IBM-922",    "CP922" },
 134         { "IBM-932",    "CP932" },
 135         { "IBM-943",    "CP943" },
 136         { "IBM-1046",   "CP1046" },
 137         { "IBM-1124",   "CP1124" },
 138         { "IBM-1129",   "CP1129" },
 139         { "IBM-1252",   "CP1252" },
 140         { "IBM-EUCCN",  "GB2312" },
 141         { "IBM-EUCJP",  "EUC-JP" },
 142         { "IBM-EUCKR",  "EUC-KR" },
 143         { "IBM-EUCTW",  "EUC-TW" },
 144         { NULL, NULL }
 145     };
 146 #elif SYS_HPUX
 147     VLCCharsetAlias aliases[] =
 148     {
 149         { "ROMAN8",     "HP-ROMAN8" },
 150         { "ARABIC8",    "HP-ARABIC8" },
 151         { "GREEK8",     "HP-GREEK8" },
 152         { "HEBREW8",    "HP-HEBREW8" },
 153         { "TURKISH8",   "HP-TURKISH8" },
 154         { "KANA8",      "HP-KANA8" },
 155         { "HP15CN",     "GB2312" },
 156         { NULL, NULL }
 157     };
 158 #elif SYS_IRIX
 159     VLCCharsetAlias aliases[] =
 160     {
 161         { "EUCCN",      "GB2312" },
 162         { NULL, NULL }
 163     };
 164 #elif SYS_OSF
 165     VLCCharsetAlias aliases[] =
 166     {
 167         { "KSC5601",    "CP949" },
 168         { "SDECKANJI",  "EUC-JP" },
 169         { "TACTIS",     "TIS-620" },
 170         { NULL, NULL }
 171     };
 172 #elif SYS_SOLARIS
 173     VLCCharsetAlias aliases[] =
 174     {
 175         { "646",        "ASCII" },
 176         { "CNS11643",   "EUC-TW" },
 177         { "5601",       "EUC-KR" },
 178         { "JOHAP92",    "JOHAB" },
 179         { "PCK",        "SHIFT_JIS" },
 180         { "2533",       "TIS-620" },
 181         { NULL, NULL }
 182     };
 183 #elif SYS_BSD
 184     VLCCharsetAlias aliases[] =
 185     {
 186         { "646", " ASCII" },
 187         { "EUCCN", "GB2312" },
 188         { NULL, NULL }
 189     };
 190 #else
 191     VLCCharsetAlias aliases[] = {{NULL, NULL}};
 192 #endif
 193
 194     for (a = aliases; a->psz_alias; a++)
 195         if (strcasecmp (a->psz_alias, psz_name) == 0)
 196             return a->psz_name;
 197
 198     /* we return original name beacuse iconv() probably will know
 199      * something better about name if we don't know it :-) */
 200     return psz_name;
 201 }
 202
 203 /* Returns charset from "language_COUNTRY.charset@modifier" string */
 204 #if (defined OS2 || !HAVE_LANGINFO_CODESET) && !defined WIN32
 205 static void vlc_encoding_from_locale( char *psz_locale, char *psz_charset )
 206 {
 207     char *psz_dot = strchr( psz_locale, '.' );
 208
 209     if( psz_dot != NULL )
 210     {
 211         const char *psz_modifier;
 212
 213         psz_dot++;
 214
 215         /* Look for the possible @... trailer and remove it, if any.  */
 216         psz_modifier = strchr( psz_dot, '@' );
 217
 218         if( psz_modifier == NULL )
 219         {
 220             strcpy( psz_charset, psz_dot );
 221             return;
 222         }
 223         if( 0 < ( psz_modifier - psz_dot )
 224              && ( psz_modifier - psz_dot ) < 2 + 10 + 1 )
 225         {
 226             memcpy( psz_charset, psz_dot, psz_modifier - psz_dot );
 227             psz_charset[ psz_modifier - psz_dot ] = '\0';
 228             return;
 229         }
 230     }
 231     /* try language mapping */
 232     strcpy( psz_charset, vlc_encoding_from_language( psz_locale ) );
 233 }
 234 #endif
 235
 236 vlc_bool_t vlc_current_charset( char **psz_charset )
 237 {
 238     const char *psz_codeset;
 239
 240 #if !(defined WIN32 || defined OS2 || defined __APPLE__)
 241
 242 # if HAVE_LANGINFO_CODESET
 243     /* Most systems support nl_langinfo( CODESET ) nowadays.  */
 244     psz_codeset = nl_langinfo( CODESET );
 245     if( !strcmp( psz_codeset, "ANSI_X3.4-1968" ) )
 246         psz_codeset = "ASCII";
 247 # else
 248     /* On old systems which lack it, use setlocale or getenv.  */
 249     const char *psz_locale = NULL;
 250     char buf[2 + 10 + 1];
 251
 252     /* But most old systems don't have a complete set of locales.  Some
 253      * (like SunOS 4 or DJGPP) have only the C locale.  Therefore we don't
 254      * use setlocale here; it would return "C" when it doesn't support the
 255      * locale name the user has set. Darwin's setlocale is broken. */
 256 #  if HAVE_SETLOCALE && !__APPLE__
 257     psz_locale = setlocale( LC_ALL, NULL );
 258 #  endif
 259     if( psz_locale == NULL || psz_locale[0] == '\0' )
 260     {
 261         psz_locale = getenv( "LC_ALL" );
 262         if( psz_locale == NULL || psz_locale[0] == '\0' )
 263         {
 264             psz_locale = getenv( "LC_CTYPE" );
 265             if( psz_locale == NULL || psz_locale[0] == '\0')
 266                 psz_locale = getenv( "LANG" );
 267         }
 268     }
 269
 270     /* On some old systems, one used to set locale = "iso8859_1". On others,
 271      * you set it to "language_COUNTRY.charset". Darwin only has LANG :( */
 272     vlc_encoding_from_locale( (char *)psz_locale, buf );
 273     psz_codeset =  buf;
 274 # endif /* HAVE_LANGINFO_CODESET */
 275
 276 #elif defined __APPLE__
 277
 278     /* Darwin is always using UTF-8 internally. */
 279     psz_codeset = "UTF-8";
 280
 281 #elif defined WIN32
 282
 283     char buf[2 + 10 + 1];
 284
 285     /* Woe32 has a function returning the locale's codepage as a number.  */
 286     snprintf( buf, sizeof( buf ), "CP%u", GetACP() );
 287     psz_codeset = buf;
 288
 289 #elif defined OS2
 290
 291     const char *psz_locale;
 292     char buf[2 + 10 + 1];
 293     ULONG cp[3];
 294     ULONG cplen;
 295
 296     /* Allow user to override the codeset, as set in the operating system,
 297      * with standard language environment variables. */
 298     psz_locale = getenv( "LC_ALL" );
 299     if( psz_locale == NULL || psz_locale[0] == '\0' )
 300     {
 301         psz+locale = getenv( "LC_CTYPE" );
 302         if( psz_locale == NULL || locale[0] == '\0' )
 303             locale = getenv( "LANG" );
 304     }
 305     if( psz_locale != NULL && psz_locale[0] != '\0' )
 306         vlc_encoding_from_locale( psz_locale, buf );
 307         psz_codeset = buf;
 308     else
 309     {
 310         /* OS/2 has a function returning the locale's codepage as a number. */
 311         if( DosQueryCp( sizeof( cp ), cp, &cplen ) )
 312             psz_codeset = "";
 313         else
 314         {
 315             snprintf( buf, sizeof( buf ), "CP%u", cp[0] );
 316             psz_codeset = buf;
 317         }
 318     }
 319 #endif
 320     if( psz_codeset == NULL )
 321         /* The canonical name cannot be determined. */
 322         psz_codeset = "";
 323     else
 324         psz_codeset = vlc_charset_aliases( psz_codeset );
 325
 326     /* Don't return an empty string.  GNU libc and GNU libiconv interpret
 327      * the empty string as denoting "the locale's character encoding",
 328      * thus GNU libiconv would call this function a second time. */
 329     if( psz_codeset[0] == '\0' )
 330     {
 331         /* Last possibility is 'CHARSET' enviroment variable */
 332         if( !( psz_codeset = getenv( "CHARSET" ) ) )
 333             psz_codeset = "ISO-8859-1";
 334     }
 335
 336     if( psz_charset )
 337         *psz_charset = strdup(psz_codeset);
 338
 339     if( !strcasecmp(psz_codeset, "UTF8") || !strcasecmp(psz_codeset, "UTF-8") )
 340         return VLC_TRUE;
 341
 342     return VLC_FALSE;
 343 }
 344
 345 char *__vlc_fix_readdir_charset( vlc_object_t *p_this, const char *psz_string )
 346 {
 347 #ifdef __APPLE__
 348     if ( p_this->p_libvlc_global->iconv_macosx != (vlc_iconv_t)-1 )
 349     {
 350         const char *psz_in = psz_string;
 351         size_t i_in = strlen(psz_in);
 352         size_t i_out = i_in * 2;
 353         char *psz_utf8 = malloc(i_out + 1);
 354         char *psz_out = psz_utf8;
 355
 356         vlc_mutex_lock( &p_this->p_libvlc_global->iconv_lock );
 357         size_t i_ret = vlc_iconv( p_this->p_libvlc_global->iconv_macosx,
 358                                   &psz_in, &i_in, &psz_out, &i_out );
 359         vlc_mutex_unlock( &p_this->p_libvlc_global->iconv_lock );
 360         if( i_ret == (size_t)-1 || i_in )
 361         {
 362             msg_Warn( p_this,
 363                       "failed to convert \"%s\" from HFS+ charset (%s)",
 364                       psz_string, strerror(errno) );
 365             free( psz_utf8 );
 366             return strdup( psz_string );
 367         }
 368
 369         *psz_out = '\0';
 370         return psz_utf8;
 371     }
 372 #endif
 373
 374     (void)p_this;
 375     return strdup( psz_string );
 376 }
 377
 378 /**
 379  * @return a fallback characters encoding to be used, given a locale.
 380  */
 381 const char *FindFallbackEncoding( const char *locale )
 382 {
 383     if( ( locale == NULL ) || ( strlen( locale ) < 2 ) )
 384         return "ASCII";
 385
 386     switch( U16_AT( locale ) )
 387     {
 388         /*** The ISO-8859 series (anything but Asia) ***/
 389         /* Latin-1 Western-European languages (ISO-8859-1) */
 390         case 'aa':
 391         case 'af':
 392         case 'an':
 393         case 'br':
 394         case 'ca':
 395         case 'da':
 396         case 'de':
 397         case 'en':
 398         case 'es':
 399         case 'et':
 400         case 'eu':
 401         case 'fi':
 402         case 'fo':
 403         case 'fr':
 404         case 'ga':
 405         case 'gd':
 406         case 'gl':
 407         case 'gv':
 408         case 'id':
 409         case 'is':
 410         case 'it':
 411         case 'kl':
 412         case 'kw':
 413         case 'mg':
 414         case 'ms':
 415         case 'nb':
 416         case 'nl':
 417         case 'nn':
 418         case 'no':
 419         case 'oc':
 420         case 'om':
 421         case 'pt':
 422         case 'so':
 423         case 'sq':
 424         case 'st':
 425         case 'sv':
 426         case 'tl':
 427         case 'uz':
 428         case 'wa':
 429         case 'xh':
 430         case 'zu':
 431             /* Compatible Microsoft superset */
 432             return "CP1252";
 433
 434         /* Latin-2 Slavic languages (ISO-8859-2) */
 435         case 'bs':
 436         case 'cs':
 437         case 'hr':
 438         case 'hu':
 439         case 'pl':
 440         case 'ro':
 441         case 'sk':
 442         case 'sl':
 443             /* CP1250 is more common, but incompatible */
 444             return "CP1250";
 445
 446         /* Latin-3 Southern European languages (ISO-8859-3) */
 447         case 'eo':
 448         case 'mt':
 449         /*case 'tr': Turkish uses ISO-8859-9 instead */
 450             return "ISO-8859-3";
 451
 452         /* Latin-4 North-European languages (ISO-8859-4) */
 453         /* All use Latin-1 or Latin-6 instead */
 454
 455         /* Cyrillic alphabet languages (ISO-8859-5) */
 456         case 'be':
 457         case 'bg':
 458         case 'mk':
 459         case 'ru':
 460         case 'sr':
 461             /* KOI8, ISO-8859-5 and CP1251 are supposedly incompatible */
 462             return "CP1251";
 463
 464         /* Arabic (ISO-8859-6) */
 465         case 'ar':
 466             /* FIXME: someone check if we should return CP1256
 467              * or ISO-8859-6 */
 468             /* CP1256 is(?) more common, but incompatible(?) */
 469             return "CP1256";
 470
 471         /* Greek (ISO-8859-7) */
 472         case 'el':
 473             /* FIXME: someone check if we should return CP1253
 474             * or ISO-8859-7 */
 475             /* CP1253 is(?) more common and partially compatible */
 476             return "CP1253";
 477
 478         /* Hebrew (ISO-8859-8) */
 479         case 'he':
 480         case 'iw':
 481         case 'yi':
 482             /* Compatible Microsoft superset */
 483             return "CP1255";
 484
 485         /* Latin-5 Turkish (ISO-8859-9) */
 486         case 'tr':
 487         case 'ku':
 488             /* Compatible Microsoft superset */
 489             return "CP1254";
 490
 491         /* Latin-6 “North-European” languages (ISO-8859-10) */
 492         /* It is so much north European that glibc only uses that for Luganda
 493          * which is spoken in Uganda... unless someone complains, I'm not
 494          * using this one; let's fallback to CP1252 here. */
 495         /* ISO-8859-11 does arguably not exist. Thai is handled below. */
 496         /* ISO-8859-12 really doesn't exist. */
 497
 498         /* Latin-7 Baltic languages (ISO-8859-13) */
 499         case 'lt':
 500         case 'lv':
 501         case 'mi': /* FIXME: ??? that's in New Zealand, doesn't sound baltic */
 502             /* Compatible Microsoft superset */
 503             return "CP1257";
 504
 505         /* Latin-8 Celtic languages (ISO-8859-14) */
 506         case 'cy':
 507             return "ISO-8859-14";
 508
 509         /* Latin-9 (ISO-8859-15) -> see Latin-1 */
 510         /* Latin-10 (ISO-8859-16) does not seem to be used */
 511
 512         /* KOI series */
 513         /* For Russian, we use CP1251 */
 514         case 'uk':
 515             return "KOI8-U";
 516         case 'tg':
 517             return "KOI8-T";
 518
 519         /*** Asia ***/
 520         case 'jp': /* Japanese */
 521             /* Shift-JIS is way more common than EUC-JP */
 522             return "SHIFT-JIS";
 523         case 'ko': /* Korean */
 524             return "EUC-KR";
 525         case 'th': /* Thai */
 526             return "TIS-620";
 527         case 'vt': /* Vietnamese FIXME: infos needed */
 528             /* VISCII is probably a bad idea as it is not extended ASCII */
 529             /* glibc has TCVN5712-1, but I could find no infos on this one */
 530             return "CP1258";
 531
 532         case 'kk': /* Kazakh FIXME: infos needed */
 533             return "PT154";
 534
 535         case 'zh': /* Chinese, charset is country dependant */
 536             if( ( strlen( locale ) >= 5 ) && ( locale[2] != '_' ) )
 537                 switch( U16_AT( locale + 3 ) )
 538                 {
 539                     case 'HK': /* Hong Kong */
 540                         /* FIXME: use something else? */
 541                         return "BIG5-HKSCS";
 542
 543                     case 'TW': /* Taiwan */
 544                         return "BIG5";
 545                 }
 546             /* People's Republic of China */
 547             /* Singapore */
 548             /*
 549              * GB18030 can represent any Unicode code point
 550              * (like UTF-8), while remaining compatible with GBK
 551              * FIXME: is it compatible with GB2312? if not, should we
 552              * use GB2312 instead?
 553              */
 554             return "GB18030";
 555     }
 556
 557     return "ASCII";
 558 }
 559
 560 /**
 561  * GetFallbackEncoding() suggests an encoding to be used for non UTF-8
 562  * text files accord to the system's local settings. It is only a best
 563  * guess.
 564  */
 565 const char *GetFallbackEncoding( void )
 566 {
 567 #ifndef WIN32
 568     const char *psz_lang = NULL;
 569
 570     /* Some systems (like Darwin, SunOS 4 or DJGPP) have only the C locale.
 571      * Therefore we don't use setlocale here; it would return "C". */
 572 #  if defined (HAVE_SETLOCALE) && !defined ( __APPLE__)
 573     psz_lang = setlocale( LC_ALL, NULL );
 574 #  endif
 575     if( psz_lang == NULL || psz_lang[0] == '\0' )
 576     {
 577         psz_lang = getenv( "LC_ALL" );
 578         if( psz_lang == NULL || psz_lang == '\0' )
 579         {
 580             psz_lang = getenv( "LC_CTYPE" );
 581             if( psz_lang == NULL || psz_lang[0] == '\0')
 582                 psz_lang = getenv( "LANG" );
 583         }
 584     }
 585
 586     return FindFallbackEncoding( psz_lang );
 587 #else
 588     /*
 589      * This should be thread-safe given GetACP() should always return
 590      * the same result.
 591      */
 592     static char buf[2 + 10 + 1] = "";
 593
 594     if( buf[0] == 0 )
 595         snprintf( buf, sizeof( buf ), "CP%u", GetACP() );
 596     return buf;
 597 #endif
 598 }
 599
 600 /**
 601  * There are two decimal separators in the computer world-wide locales:
 602  * dot (which is the american default), and comma (which is used in France,
 603  * the country with the most VLC developers, among others).
 604  *
 605  * i18n_strtod() has the same prototype as ANSI C strtod() but it accepts
 606  * either decimal separator when deserializing the string to a float number,
 607  * independant of the local computer setting.
 608  */
 609 double i18n_strtod( const char *str, char **end )
 610 {
 611     char *end_buf, e;
 612     double d;
 613
 614     if( end == NULL )
 615         end = &end_buf;
 616     d = strtod( str, end );
 617
 618     e = **end;
 619     if(( e == ',' ) || ( e == '.' ))
 620     {
 621         char dup[strlen( str ) + 1];
 622         strcpy( dup, str );
 623
 624         if( dup == NULL )
 625             return d;
 626
 627         dup[*end - str] = ( e == ',' ) ? '.' : ',';
 628         d = strtod( dup, end );
 629     }
 630     return d;
 631 }
 632
 633 /**
 634  * i18n_atof() has the same prototype as ANSI C atof() but it accepts
 635  * either decimal separator when deserializing the string to a float number,
 636  * independant of the local computer setting.
 637  */
 638 double i18n_atof( const char *str )
 639 {
 640     return i18n_strtod( str, NULL );
 641 }
 642
 643
 644 /**
 645  * us_strtod() has the same prototype as ANSI C strtod() but it expects
 646  * a dot as decimal separator regardless of the system locale.
 647  */
 648 double us_strtod( const char *str, char **end )
 649 {
 650     char dup[strlen( str ) + 1], *ptr;
 651     double d;
 652     strcpy( dup, str );
 653
 654     ptr = strchr( dup, ',' );
 655     if( ptr != NULL )
 656         *ptr = '\0';
 657
 658     d = strtod( dup, &ptr );
 659     if( end != NULL )
 660         *end = (char *)&str[ptr - dup];
 661
 662     return d;
 663 }
 664
 665 /**
 666  * us_atof() has the same prototype as ANSI C atof() but it expects a dot
 667  * as decimal separator, regardless of the system locale.
 668  */
 669 double us_atof( const char *str )
 670 {
 671     return us_strtod( str, NULL );
 672 }
 673