git.sesse.net Git - casparcg/blob - SFML-1.6/include/SFML/System/Unicode.inl

   1 ////////////////////////////////////////////////////////////\r
   2 //\r
   3 // SFML - Simple and Fast Multimedia Library\r
   4 // Copyright (C) 2007-2009 Laurent Gomila (laurent.gom@gmail.com)\r
   5 //\r
   6 // This software is provided 'as-is', without any express or implied warranty.\r
   7 // In no event will the authors be held liable for any damages arising from the use of this software.\r
   8 //\r
   9 // Permission is granted to anyone to use this software for any purpose,\r
  10 // including commercial applications, and to alter it and redistribute it freely,\r
  11 // subject to the following restrictions:\r
  12 //\r
  13 // 1. The origin of this software must not be misrepresented;\r
  14 //    you must not claim that you wrote the original software.\r
  15 //    If you use this software in a product, an acknowledgment\r
  16 //    in the product documentation would be appreciated but is not required.\r
  17 //\r
  18 // 2. Altered source versions must be plainly marked as such,\r
  19 //    and must not be misrepresented as being the original software.\r
  20 //\r
  21 // 3. This notice may not be removed or altered from any source distribution.\r
  22 //\r
  23 ////////////////////////////////////////////////////////////\r
  24 \r
  25 \r
  26 ////////////////////////////////////////////////////////////\r
  27 /// Generic function to convert an UTF-32 characters range\r
  28 /// to an ANSI characters range, using the given locale\r
  29 ////////////////////////////////////////////////////////////\r
  30 template <typename In, typename Out>\r
  31 inline Out Unicode::UTF32ToANSI(In Begin, In End, Out Output, char Replacement, const std::locale& Locale)\r
  32 {\r
  33     #ifdef __MINGW32__\r
  34 \r
  35         // MinGW has a almost no support for unicode stuff\r
  36         // As a consequence, the MinGW version of this function can only use the default locale\r
  37         // and ignores the one passed as parameter\r
  38         while (Begin < End)\r
  39         {\r
  40             char Char = 0;\r
  41             if (wctomb(&Char, static_cast<wchar_t>(*Begin++)) >= 0)\r
  42                 *Output++ = Char;\r
  43             else if (Replacement)\r
  44                 *Output++ = Replacement;\r
  45         }\r
  46 \r
  47     #else\r
  48 \r
  49         // Get the facet of the locale which deals with character conversion\r
  50         const std::ctype<wchar_t>& Facet = std::use_facet< std::ctype<wchar_t> >(Locale);\r
  51 \r
  52         // Use the facet to convert each character of the input string\r
  53         while (Begin < End)\r
  54             *Output++ = Facet.narrow(static_cast<wchar_t>(*Begin++), Replacement);\r
  55 \r
  56     #endif\r
  57 \r
  58     return Output;\r
  59 }\r
  60 \r
  61 \r
  62 ////////////////////////////////////////////////////////////\r
  63 /// Generic function to convert an ANSI characters range\r
  64 /// to an UTF-32 characters range, using the given locale\r
  65 ////////////////////////////////////////////////////////////\r
  66 template <typename In, typename Out>\r
  67 inline Out Unicode::ANSIToUTF32(In Begin, In End, Out Output, const std::locale& Locale)\r
  68 {\r
  69     #ifdef __MINGW32__\r
  70 \r
  71         // MinGW has a almost no support for unicode stuff\r
  72         // As a consequence, the MinGW version of this function can only use the default locale\r
  73         // and ignores the one passed as parameter\r
  74         while (Begin < End)\r
  75         {\r
  76             wchar_t Char = 0;\r
  77             mbtowc(&Char, &*Begin, 1);\r
  78             Begin++;\r
  79             *Output++ = static_cast<Uint32>(Char);\r
  80         }\r
  81 \r
  82     #else\r
  83 \r
  84         // Get the facet of the locale which deals with character conversion\r
  85         const std::ctype<wchar_t>& Facet = std::use_facet< std::ctype<wchar_t> >(Locale);\r
  86 \r
  87         // Use the facet to convert each character of the input string\r
  88         while (Begin < End)\r
  89             *Output++ = static_cast<Uint32>(Facet.widen(*Begin++));\r
  90 \r
  91     #endif\r
  92 \r
  93     return Output;\r
  94 }\r
  95 \r
  96 \r
  97 ////////////////////////////////////////////////////////////\r
  98 /// Generic function to convert an UTF-8 characters range\r
  99 /// to an UTF-16 characters range, using the given locale\r
 100 ////////////////////////////////////////////////////////////\r
 101 template <typename In, typename Out>\r
 102 inline Out Unicode::UTF8ToUTF16(In Begin, In End, Out Output, Uint16 Replacement)\r
 103 {\r
 104     while (Begin < End)\r
 105     {\r
 106         Uint32 c = 0;\r
 107         int TrailingBytes = UTF8TrailingBytes[static_cast<int>(*Begin)];\r
 108         if (Begin + TrailingBytes < End)\r
 109         {\r
 110             // First decode the UTF-8 character\r
 111             switch (TrailingBytes)\r
 112             {\r
 113                 case 5 : c += *Begin++; c <<= 6;\r
 114                 case 4 : c += *Begin++; c <<= 6;\r
 115                 case 3 : c += *Begin++; c <<= 6;\r
 116                 case 2 : c += *Begin++; c <<= 6;\r
 117                 case 1 : c += *Begin++; c <<= 6;\r
 118                 case 0 : c += *Begin++;\r
 119             }\r
 120                 c -= UTF8Offsets[TrailingBytes];\r
 121 \r
 122             // Then encode it in UTF-16\r
 123             if (c < 0xFFFF)\r
 124             {\r
 125                 // Character can be converted directly to 16 bits, just need to check it's in the valid range\r
 126                 if ((c >= 0xD800) && (c <= 0xDFFF))\r
 127                 {\r
 128                     // Invalid character (this range is reserved)\r
 129                     if (Replacement)\r
 130                         *Output++ = Replacement;\r
 131                 }\r
 132                 else\r
 133                 {\r
 134                     // Valid character directly convertible to 16 bits\r
 135                     *Output++ = static_cast<Uint16>(c);\r
 136                 }\r
 137             }\r
 138             else if (c > 0x0010FFFF)\r
 139             {\r
 140                 // Invalid character (greater than the maximum unicode value)\r
 141                 if (Replacement)\r
 142                     *Output++ = Replacement;\r
 143             }\r
 144             else\r
 145             {\r
 146                 // Character will be converted to 2 UTF-16 elements\r
 147                 c -= 0x0010000;\r
 148                 *Output++ = static_cast<Uint16>((c >> 10)     + 0xD800);\r
 149                 *Output++ = static_cast<Uint16>((c & 0x3FFUL) + 0xDC00);\r
 150             }\r
 151         }\r
 152     }\r
 153 \r
 154     return Output;\r
 155 }\r
 156 \r
 157 \r
 158 ////////////////////////////////////////////////////////////\r
 159 /// Generic function to convert an UTF-8 characters range\r
 160 /// to an UTF-32 characters range, using the given locale\r
 161 ////////////////////////////////////////////////////////////\r
 162 template <typename In, typename Out>\r
 163 inline Out Unicode::UTF8ToUTF32(In Begin, In End, Out Output, Uint32 Replacement)\r
 164 {\r
 165     while (Begin < End)\r
 166     {\r
 167         Uint32 c = 0;\r
 168         int TrailingBytes = UTF8TrailingBytes[static_cast<int>(*Begin)];\r
 169         if (Begin + TrailingBytes < End)\r
 170         {\r
 171             // First decode the UTF-8 character\r
 172             switch (TrailingBytes)\r
 173             {\r
 174                 case 5 : c += *Begin++; c <<= 6;\r
 175                 case 4 : c += *Begin++; c <<= 6;\r
 176                 case 3 : c += *Begin++; c <<= 6;\r
 177                 case 2 : c += *Begin++; c <<= 6;\r
 178                 case 1 : c += *Begin++; c <<= 6;\r
 179                 case 0 : c += *Begin++;\r
 180             }\r
 181                 c -= UTF8Offsets[TrailingBytes];\r
 182 \r
 183             // Then write it if valid\r
 184             if ((c < 0xD800) || (c > 0xDFFF))\r
 185             {\r
 186                 // Valid UTF-32 character\r
 187                 *Output++ = c;\r
 188             }\r
 189             else\r
 190             {\r
 191                 // Invalid UTF-32 character\r
 192                 if (Replacement)\r
 193                     *Output++ = Replacement;\r
 194             }\r
 195         }\r
 196     }\r
 197 \r
 198     return Output;\r
 199 }\r
 200 \r
 201 \r
 202 ////////////////////////////////////////////////////////////\r
 203 /// Generic function to convert an UTF-16 characters range\r
 204 /// to an UTF-8 characters range, using the given locale\r
 205 ////////////////////////////////////////////////////////////\r
 206 template <typename In, typename Out>\r
 207 inline Out Unicode::UTF16ToUTF8(In Begin, In End, Out Output, Uint8 Replacement)\r
 208 {\r
 209     while (Begin < End)\r
 210     {\r
 211         Uint32 c = *Begin++;\r
 212 \r
 213         // If it's a surrogate pair, first convert to a single UTF-32 character\r
 214         if ((c >= 0xD800) && (c <= 0xDBFF))\r
 215         {\r
 216             if (Begin < End)\r
 217             {\r
 218                 // The second element is valid : convert the two elements to a UTF-32 character\r
 219                 Uint32 d = *Begin++;\r
 220                 if ((d >= 0xDC00) && (d <= 0xDFFF))\r
 221                     c = static_cast<Uint32>(((c - 0xD800) << 10) + (d - 0xDC00) + 0x0010000);\r
 222             }\r
 223             else\r
 224             {\r
 225                 // Invalid second element\r
 226                 if (Replacement)\r
 227                     *Output++ = Replacement;\r
 228             }\r
 229         }\r
 230 \r
 231         // Then convert to UTF-8\r
 232         if (c > 0x0010FFFF)\r
 233         {\r
 234             // Invalid character (greater than the maximum unicode value)\r
 235             if (Replacement)\r
 236                 *Output++ = Replacement;\r
 237         }\r
 238         else\r
 239         {\r
 240             // Valid character\r
 241 \r
 242             // Get number of bytes to write\r
 243             int BytesToWrite = 1;\r
 244             if      (c <  0x80)       BytesToWrite = 1;\r
 245             else if (c <  0x800)      BytesToWrite = 2;\r
 246             else if (c <  0x10000)    BytesToWrite = 3;\r
 247             else if (c <= 0x0010FFFF) BytesToWrite = 4;\r
 248 \r
 249             // Extract bytes to write\r
 250             Uint8 Bytes[4];\r
 251             switch (BytesToWrite)\r
 252             {\r
 253                 case 4 : Bytes[3] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;\r
 254                 case 3 : Bytes[2] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;\r
 255                 case 2 : Bytes[1] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;\r
 256                 case 1 : Bytes[0] = static_cast<Uint8> (c | UTF8FirstBytes[BytesToWrite]);\r
 257             }\r
 258 \r
 259             // Add them to the output\r
 260             const Uint8* CurByte = Bytes;\r
 261             switch (BytesToWrite)\r
 262             {\r
 263                 case 4 : *Output++ = *CurByte++;\r
 264                 case 3 : *Output++ = *CurByte++;\r
 265                 case 2 : *Output++ = *CurByte++;\r
 266                 case 1 : *Output++ = *CurByte++;\r
 267             }\r
 268         }\r
 269     }\r
 270 \r
 271     return Output;\r
 272 }\r
 273 \r
 274 \r
 275 ////////////////////////////////////////////////////////////\r
 276 /// Generic function to convert an UTF-16 characters range\r
 277 /// to an UTF-32 characters range, using the given locale\r
 278 ////////////////////////////////////////////////////////////\r
 279 template <typename In, typename Out>\r
 280 inline Out Unicode::UTF16ToUTF32(In Begin, In End, Out Output, Uint32 Replacement)\r
 281 {\r
 282     while (Begin < End)\r
 283     {\r
 284         Uint16 c = *Begin++;\r
 285         if ((c >= 0xD800) && (c <= 0xDBFF))\r
 286         {\r
 287             // We have a surrogate pair, ie. a character composed of two elements\r
 288             if (Begin < End)\r
 289             {\r
 290                 Uint16 d = *Begin++;\r
 291                 if ((d >= 0xDC00) && (d <= 0xDFFF))\r
 292                 {\r
 293                     // The second element is valid : convert the two elements to a UTF-32 character\r
 294                     *Output++ = static_cast<Uint32>(((c - 0xD800) << 10) + (d - 0xDC00) + 0x0010000);\r
 295                 }\r
 296                 else\r
 297                 {\r
 298                     // Invalid second element\r
 299                     if (Replacement)\r
 300                         *Output++ = Replacement;\r
 301                 }\r
 302             }\r
 303         }\r
 304         else if ((c >= 0xDC00) && (c <= 0xDFFF))\r
 305         {\r
 306             // Invalid character\r
 307             if (Replacement)\r
 308                 *Output++ = Replacement;\r
 309         }\r
 310         else\r
 311         {\r
 312             // Valid character directly convertible to UTF-32\r
 313             *Output++ = static_cast<Uint32>(c);\r
 314         }\r
 315     }\r
 316 \r
 317     return Output;\r
 318 }\r
 319 \r
 320 \r
 321 ////////////////////////////////////////////////////////////\r
 322 /// Generic function to convert an UTF-32 characters range\r
 323 /// to an UTF-8 characters range, using the given locale\r
 324 ////////////////////////////////////////////////////////////\r
 325 template <typename In, typename Out>\r
 326 inline Out Unicode::UTF32ToUTF8(In Begin, In End, Out Output, Uint8 Replacement)\r
 327 {\r
 328     while (Begin < End)\r
 329     {\r
 330         Uint32 c = *Begin++;\r
 331         if (c > 0x0010FFFF)\r
 332         {\r
 333             // Invalid character (greater than the maximum unicode value)\r
 334             if (Replacement)\r
 335                 *Output++ = Replacement;\r
 336         }\r
 337         else\r
 338         {\r
 339             // Valid character\r
 340 \r
 341             // Get number of bytes to write\r
 342             int BytesToWrite = 1;\r
 343             if      (c <  0x80)       BytesToWrite = 1;\r
 344             else if (c <  0x800)      BytesToWrite = 2;\r
 345             else if (c <  0x10000)    BytesToWrite = 3;\r
 346             else if (c <= 0x0010FFFF) BytesToWrite = 4;\r
 347 \r
 348             // Extract bytes to write\r
 349             Uint8 Bytes[4];\r
 350             switch (BytesToWrite)\r
 351             {\r
 352                 case 4 : Bytes[3] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;\r
 353                 case 3 : Bytes[2] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;\r
 354                 case 2 : Bytes[1] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;\r
 355                 case 1 : Bytes[0] = static_cast<Uint8> (c | UTF8FirstBytes[BytesToWrite]);\r
 356             }\r
 357 \r
 358             // Add them to the output\r
 359             const Uint8* CurByte = Bytes;\r
 360             switch (BytesToWrite)\r
 361             {\r
 362                 case 4 : *Output++ = *CurByte++;\r
 363                 case 3 : *Output++ = *CurByte++;\r
 364                 case 2 : *Output++ = *CurByte++;\r
 365                 case 1 : *Output++ = *CurByte++;\r
 366             }\r
 367         }\r
 368     }\r
 369 \r
 370     return Output;\r
 371 }\r
 372 \r
 373 \r
 374 ////////////////////////////////////////////////////////////\r
 375 /// Generic function to convert an UTF-32 characters range\r
 376 /// to an UTF-16 characters range, using the given locale\r
 377 ////////////////////////////////////////////////////////////\r
 378 template <typename In, typename Out>\r
 379 inline Out Unicode::UTF32ToUTF16(In Begin, In End, Out Output, Uint16 Replacement)\r
 380 {\r
 381     while (Begin < End)\r
 382     {\r
 383         Uint32 c = *Begin++;\r
 384         if (c < 0xFFFF)\r
 385         {\r
 386             // Character can be converted directly to 16 bits, just need to check it's in the valid range\r
 387             if ((c >= 0xD800) && (c <= 0xDFFF))\r
 388             {\r
 389                 // Invalid character (this range is reserved)\r
 390                 if (Replacement)\r
 391                     *Output++ = Replacement;\r
 392             }\r
 393             else\r
 394             {\r
 395                 // Valid character directly convertible to 16 bits\r
 396                 *Output++ = static_cast<Uint16>(c);\r
 397             }\r
 398         }\r
 399         else if (c > 0x0010FFFF)\r
 400         {\r
 401             // Invalid character (greater than the maximum unicode value)\r
 402             if (Replacement)\r
 403                 *Output++ = Replacement;\r
 404         }\r
 405         else\r
 406         {\r
 407             // Character will be converted to 2 UTF-16 elements\r
 408             c -= 0x0010000;\r
 409             *Output++ = static_cast<Uint16>((c >> 10)     + 0xD800);\r
 410             *Output++ = static_cast<Uint16>((c & 0x3FFUL) + 0xDC00);\r
 411         }\r
 412     }\r
 413 \r
 414     return Output;\r
 415 }\r
 416 \r
 417 \r
 418 ////////////////////////////////////////////////////////////\r
 419 /// Get the number of characters composing an UTF-8 string\r
 420 ////////////////////////////////////////////////////////////\r
 421 template <typename In>\r
 422 inline std::size_t Unicode::GetUTF8Length(In Begin, In End)\r
 423 {\r
 424     std::size_t Length = 0;\r
 425     while (Begin < End)\r
 426     {\r
 427         int NbBytes = UTF8TrailingBytes[static_cast<int>(*Begin)];\r
 428         if (Begin + NbBytes < End)\r
 429             ++Length;\r
 430 \r
 431         Begin += NbBytes + 1;\r
 432     }\r
 433 \r
 434     return Length;\r
 435 }\r
 436 \r
 437 \r
 438 ////////////////////////////////////////////////////////////\r
 439 /// Get the number of characters composing an UTF-16 string\r
 440 ////////////////////////////////////////////////////////////\r
 441 template <typename In>\r
 442 inline std::size_t Unicode::GetUTF16Length(In Begin, In End)\r
 443 {\r
 444     std::size_t Length = 0;\r
 445     while (Begin < End)\r
 446     {\r
 447         if ((*Begin >= 0xD800) && (*Begin <= 0xDBFF))\r
 448         {\r
 449             ++Begin;\r
 450             if ((Begin < End) && ((*Begin >= 0xDC00) && (*Begin <= 0xDFFF)))\r
 451             {\r
 452                 ++Length;\r
 453             }\r
 454         }\r
 455         else\r
 456         {\r
 457             ++Length;\r
 458         }\r
 459 \r
 460         ++Begin;\r
 461     }\r
 462 \r
 463     return Length;\r
 464 }\r
 465 \r
 466 \r
 467 ////////////////////////////////////////////////////////////\r
 468 /// Get the number of characters composing an UTF-32 string\r
 469 ////////////////////////////////////////////////////////////\r
 470 template <typename In>\r
 471 inline std::size_t Unicode::GetUTF32Length(In Begin, In End)\r
 472 {\r
 473     return End - Begin;\r
 474 }\r