1 ////////////////////////////////////////////////////////////
\r
3 // SFML - Simple and Fast Multimedia Library
\r
4 // Copyright (C) 2007-2009 Laurent Gomila (laurent.gom@gmail.com)
\r
6 // This software is provided 'as-is', without any express or implied warranty.
\r
7 // In no event will the authors be held liable for any damages arising from the use of this software.
\r
9 // Permission is granted to anyone to use this software for any purpose,
\r
10 // including commercial applications, and to alter it and redistribute it freely,
\r
11 // subject to the following restrictions:
\r
13 // 1. The origin of this software must not be misrepresented;
\r
14 // you must not claim that you wrote the original software.
\r
15 // If you use this software in a product, an acknowledgment
\r
16 // in the product documentation would be appreciated but is not required.
\r
18 // 2. Altered source versions must be plainly marked as such,
\r
19 // and must not be misrepresented as being the original software.
\r
21 // 3. This notice may not be removed or altered from any source distribution.
\r
23 ////////////////////////////////////////////////////////////
\r
26 ////////////////////////////////////////////////////////////
\r
27 /// Generic function to convert an UTF-32 characters range
\r
28 /// to an ANSI characters range, using the given locale
\r
29 ////////////////////////////////////////////////////////////
\r
30 template <typename In, typename Out>
\r
31 inline Out Unicode::UTF32ToANSI(In Begin, In End, Out Output, char Replacement, const std::locale& Locale)
\r
35 // MinGW has a almost no support for unicode stuff
\r
36 // As a consequence, the MinGW version of this function can only use the default locale
\r
37 // and ignores the one passed as parameter
\r
41 if (wctomb(&Char, static_cast<wchar_t>(*Begin++)) >= 0)
\r
43 else if (Replacement)
\r
44 *Output++ = Replacement;
\r
49 // Get the facet of the locale which deals with character conversion
\r
50 const std::ctype<wchar_t>& Facet = std::use_facet< std::ctype<wchar_t> >(Locale);
\r
52 // Use the facet to convert each character of the input string
\r
54 *Output++ = Facet.narrow(static_cast<wchar_t>(*Begin++), Replacement);
\r
62 ////////////////////////////////////////////////////////////
\r
63 /// Generic function to convert an ANSI characters range
\r
64 /// to an UTF-32 characters range, using the given locale
\r
65 ////////////////////////////////////////////////////////////
\r
66 template <typename In, typename Out>
\r
67 inline Out Unicode::ANSIToUTF32(In Begin, In End, Out Output, const std::locale& Locale)
\r
71 // MinGW has a almost no support for unicode stuff
\r
72 // As a consequence, the MinGW version of this function can only use the default locale
\r
73 // and ignores the one passed as parameter
\r
77 mbtowc(&Char, &*Begin, 1);
\r
79 *Output++ = static_cast<Uint32>(Char);
\r
84 // Get the facet of the locale which deals with character conversion
\r
85 const std::ctype<wchar_t>& Facet = std::use_facet< std::ctype<wchar_t> >(Locale);
\r
87 // Use the facet to convert each character of the input string
\r
89 *Output++ = static_cast<Uint32>(Facet.widen(*Begin++));
\r
97 ////////////////////////////////////////////////////////////
\r
98 /// Generic function to convert an UTF-8 characters range
\r
99 /// to an UTF-16 characters range, using the given locale
\r
100 ////////////////////////////////////////////////////////////
\r
101 template <typename In, typename Out>
\r
102 inline Out Unicode::UTF8ToUTF16(In Begin, In End, Out Output, Uint16 Replacement)
\r
104 while (Begin < End)
\r
107 int TrailingBytes = UTF8TrailingBytes[static_cast<int>(*Begin)];
\r
108 if (Begin + TrailingBytes < End)
\r
110 // First decode the UTF-8 character
\r
111 switch (TrailingBytes)
\r
113 case 5 : c += *Begin++; c <<= 6;
\r
114 case 4 : c += *Begin++; c <<= 6;
\r
115 case 3 : c += *Begin++; c <<= 6;
\r
116 case 2 : c += *Begin++; c <<= 6;
\r
117 case 1 : c += *Begin++; c <<= 6;
\r
118 case 0 : c += *Begin++;
\r
120 c -= UTF8Offsets[TrailingBytes];
\r
122 // Then encode it in UTF-16
\r
125 // Character can be converted directly to 16 bits, just need to check it's in the valid range
\r
126 if ((c >= 0xD800) && (c <= 0xDFFF))
\r
128 // Invalid character (this range is reserved)
\r
130 *Output++ = Replacement;
\r
134 // Valid character directly convertible to 16 bits
\r
135 *Output++ = static_cast<Uint16>(c);
\r
138 else if (c > 0x0010FFFF)
\r
140 // Invalid character (greater than the maximum unicode value)
\r
142 *Output++ = Replacement;
\r
146 // Character will be converted to 2 UTF-16 elements
\r
148 *Output++ = static_cast<Uint16>((c >> 10) + 0xD800);
\r
149 *Output++ = static_cast<Uint16>((c & 0x3FFUL) + 0xDC00);
\r
158 ////////////////////////////////////////////////////////////
\r
159 /// Generic function to convert an UTF-8 characters range
\r
160 /// to an UTF-32 characters range, using the given locale
\r
161 ////////////////////////////////////////////////////////////
\r
162 template <typename In, typename Out>
\r
163 inline Out Unicode::UTF8ToUTF32(In Begin, In End, Out Output, Uint32 Replacement)
\r
165 while (Begin < End)
\r
168 int TrailingBytes = UTF8TrailingBytes[static_cast<int>(*Begin)];
\r
169 if (Begin + TrailingBytes < End)
\r
171 // First decode the UTF-8 character
\r
172 switch (TrailingBytes)
\r
174 case 5 : c += *Begin++; c <<= 6;
\r
175 case 4 : c += *Begin++; c <<= 6;
\r
176 case 3 : c += *Begin++; c <<= 6;
\r
177 case 2 : c += *Begin++; c <<= 6;
\r
178 case 1 : c += *Begin++; c <<= 6;
\r
179 case 0 : c += *Begin++;
\r
181 c -= UTF8Offsets[TrailingBytes];
\r
183 // Then write it if valid
\r
184 if ((c < 0xD800) || (c > 0xDFFF))
\r
186 // Valid UTF-32 character
\r
191 // Invalid UTF-32 character
\r
193 *Output++ = Replacement;
\r
202 ////////////////////////////////////////////////////////////
\r
203 /// Generic function to convert an UTF-16 characters range
\r
204 /// to an UTF-8 characters range, using the given locale
\r
205 ////////////////////////////////////////////////////////////
\r
206 template <typename In, typename Out>
\r
207 inline Out Unicode::UTF16ToUTF8(In Begin, In End, Out Output, Uint8 Replacement)
\r
209 while (Begin < End)
\r
211 Uint32 c = *Begin++;
\r
213 // If it's a surrogate pair, first convert to a single UTF-32 character
\r
214 if ((c >= 0xD800) && (c <= 0xDBFF))
\r
218 // The second element is valid : convert the two elements to a UTF-32 character
\r
219 Uint32 d = *Begin++;
\r
220 if ((d >= 0xDC00) && (d <= 0xDFFF))
\r
221 c = static_cast<Uint32>(((c - 0xD800) << 10) + (d - 0xDC00) + 0x0010000);
\r
225 // Invalid second element
\r
227 *Output++ = Replacement;
\r
231 // Then convert to UTF-8
\r
232 if (c > 0x0010FFFF)
\r
234 // Invalid character (greater than the maximum unicode value)
\r
236 *Output++ = Replacement;
\r
242 // Get number of bytes to write
\r
243 int BytesToWrite = 1;
\r
244 if (c < 0x80) BytesToWrite = 1;
\r
245 else if (c < 0x800) BytesToWrite = 2;
\r
246 else if (c < 0x10000) BytesToWrite = 3;
\r
247 else if (c <= 0x0010FFFF) BytesToWrite = 4;
\r
249 // Extract bytes to write
\r
251 switch (BytesToWrite)
\r
253 case 4 : Bytes[3] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;
\r
254 case 3 : Bytes[2] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;
\r
255 case 2 : Bytes[1] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;
\r
256 case 1 : Bytes[0] = static_cast<Uint8> (c | UTF8FirstBytes[BytesToWrite]);
\r
259 // Add them to the output
\r
260 const Uint8* CurByte = Bytes;
\r
261 switch (BytesToWrite)
\r
263 case 4 : *Output++ = *CurByte++;
\r
264 case 3 : *Output++ = *CurByte++;
\r
265 case 2 : *Output++ = *CurByte++;
\r
266 case 1 : *Output++ = *CurByte++;
\r
275 ////////////////////////////////////////////////////////////
\r
276 /// Generic function to convert an UTF-16 characters range
\r
277 /// to an UTF-32 characters range, using the given locale
\r
278 ////////////////////////////////////////////////////////////
\r
279 template <typename In, typename Out>
\r
280 inline Out Unicode::UTF16ToUTF32(In Begin, In End, Out Output, Uint32 Replacement)
\r
282 while (Begin < End)
\r
284 Uint16 c = *Begin++;
\r
285 if ((c >= 0xD800) && (c <= 0xDBFF))
\r
287 // We have a surrogate pair, ie. a character composed of two elements
\r
290 Uint16 d = *Begin++;
\r
291 if ((d >= 0xDC00) && (d <= 0xDFFF))
\r
293 // The second element is valid : convert the two elements to a UTF-32 character
\r
294 *Output++ = static_cast<Uint32>(((c - 0xD800) << 10) + (d - 0xDC00) + 0x0010000);
\r
298 // Invalid second element
\r
300 *Output++ = Replacement;
\r
304 else if ((c >= 0xDC00) && (c <= 0xDFFF))
\r
306 // Invalid character
\r
308 *Output++ = Replacement;
\r
312 // Valid character directly convertible to UTF-32
\r
313 *Output++ = static_cast<Uint32>(c);
\r
321 ////////////////////////////////////////////////////////////
\r
322 /// Generic function to convert an UTF-32 characters range
\r
323 /// to an UTF-8 characters range, using the given locale
\r
324 ////////////////////////////////////////////////////////////
\r
325 template <typename In, typename Out>
\r
326 inline Out Unicode::UTF32ToUTF8(In Begin, In End, Out Output, Uint8 Replacement)
\r
328 while (Begin < End)
\r
330 Uint32 c = *Begin++;
\r
331 if (c > 0x0010FFFF)
\r
333 // Invalid character (greater than the maximum unicode value)
\r
335 *Output++ = Replacement;
\r
341 // Get number of bytes to write
\r
342 int BytesToWrite = 1;
\r
343 if (c < 0x80) BytesToWrite = 1;
\r
344 else if (c < 0x800) BytesToWrite = 2;
\r
345 else if (c < 0x10000) BytesToWrite = 3;
\r
346 else if (c <= 0x0010FFFF) BytesToWrite = 4;
\r
348 // Extract bytes to write
\r
350 switch (BytesToWrite)
\r
352 case 4 : Bytes[3] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;
\r
353 case 3 : Bytes[2] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;
\r
354 case 2 : Bytes[1] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;
\r
355 case 1 : Bytes[0] = static_cast<Uint8> (c | UTF8FirstBytes[BytesToWrite]);
\r
358 // Add them to the output
\r
359 const Uint8* CurByte = Bytes;
\r
360 switch (BytesToWrite)
\r
362 case 4 : *Output++ = *CurByte++;
\r
363 case 3 : *Output++ = *CurByte++;
\r
364 case 2 : *Output++ = *CurByte++;
\r
365 case 1 : *Output++ = *CurByte++;
\r
374 ////////////////////////////////////////////////////////////
\r
375 /// Generic function to convert an UTF-32 characters range
\r
376 /// to an UTF-16 characters range, using the given locale
\r
377 ////////////////////////////////////////////////////////////
\r
378 template <typename In, typename Out>
\r
379 inline Out Unicode::UTF32ToUTF16(In Begin, In End, Out Output, Uint16 Replacement)
\r
381 while (Begin < End)
\r
383 Uint32 c = *Begin++;
\r
386 // Character can be converted directly to 16 bits, just need to check it's in the valid range
\r
387 if ((c >= 0xD800) && (c <= 0xDFFF))
\r
389 // Invalid character (this range is reserved)
\r
391 *Output++ = Replacement;
\r
395 // Valid character directly convertible to 16 bits
\r
396 *Output++ = static_cast<Uint16>(c);
\r
399 else if (c > 0x0010FFFF)
\r
401 // Invalid character (greater than the maximum unicode value)
\r
403 *Output++ = Replacement;
\r
407 // Character will be converted to 2 UTF-16 elements
\r
409 *Output++ = static_cast<Uint16>((c >> 10) + 0xD800);
\r
410 *Output++ = static_cast<Uint16>((c & 0x3FFUL) + 0xDC00);
\r
418 ////////////////////////////////////////////////////////////
\r
419 /// Get the number of characters composing an UTF-8 string
\r
420 ////////////////////////////////////////////////////////////
\r
421 template <typename In>
\r
422 inline std::size_t Unicode::GetUTF8Length(In Begin, In End)
\r
424 std::size_t Length = 0;
\r
425 while (Begin < End)
\r
427 int NbBytes = UTF8TrailingBytes[static_cast<int>(*Begin)];
\r
428 if (Begin + NbBytes < End)
\r
431 Begin += NbBytes + 1;
\r
438 ////////////////////////////////////////////////////////////
\r
439 /// Get the number of characters composing an UTF-16 string
\r
440 ////////////////////////////////////////////////////////////
\r
441 template <typename In>
\r
442 inline std::size_t Unicode::GetUTF16Length(In Begin, In End)
\r
444 std::size_t Length = 0;
\r
445 while (Begin < End)
\r
447 if ((*Begin >= 0xD800) && (*Begin <= 0xDBFF))
\r
450 if ((Begin < End) && ((*Begin >= 0xDC00) && (*Begin <= 0xDFFF)))
\r
467 ////////////////////////////////////////////////////////////
\r
468 /// Get the number of characters composing an UTF-32 string
\r
469 ////////////////////////////////////////////////////////////
\r
470 template <typename In>
\r
471 inline std::size_t Unicode::GetUTF32Length(In Begin, In End)
\r
473 return End - Begin;
\r