--- /dev/null
+//////////////////////////////////////////////////////////////////////////\r
+//\r
+// FILE: utf8conv.h\r
+//\r
+// Header file defining prototypes of helper functions for converting \r
+// strings between Unicode UTF-8 and UTF-16.\r
+// (The implementation file is "utf8conv_inl.h").\r
+//\r
+// UTF-8 text is stored in std::string; \r
+// UTF-16 text is stored in std::wstring.\r
+//\r
+// This code just uses Win32 Platform SDK and C++ standard library; \r
+// so it can be used also with the Express editions of Visual Studio.\r
+//\r
+//\r
+// Original code: February 4th, 2011\r
+// Last update: October 15th, 2011\r
+//\r
+// - Added more information to the utf8_conversion_error class\r
+// (like the return code of ::GetLastError());\r
+// moreover, the class now derives from std::runtime_error.\r
+//\r
+// - Added conversion function overloads taking raw C strings as input.\r
+// (This is more efficient when there are raw C strings already\r
+// available, because it avoids the creation of temporary\r
+// new std::[w]string's.)\r
+//\r
+// - UTF-8 conversion functions now detect invalid UTF-8 sequences\r
+// thanks to MB_ERR_INVALID_CHARS flag, and throw an exception\r
+// in this case.\r
+//\r
+//\r
+// by Giovanni Dicanio <gdicanio@mvps.org>\r
+//\r
+//////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+#pragma once\r
+\r
+\r
+//------------------------------------------------------------------------\r
+// INCLUDES\r
+//------------------------------------------------------------------------\r
+\r
+#include <stdexcept> // std::runtime_error\r
+#include <string> // STL string classes\r
+\r
+\r
+\r
+namespace utf8util {\r
+\r
+\r
+\r
+//------------------------------------------------------------------------\r
+// Exception class representing an error occurred during UTF-8 conversion.\r
+//------------------------------------------------------------------------\r
+class utf8_conversion_error \r
+ : public std::runtime_error\r
+{\r
+public:\r
+ \r
+ //\r
+ // Naming convention note:\r
+ // -----------------------\r
+ //\r
+ // This exception class is derived from std::runtime_error class,\r
+ // so I chose to use the same naming convention of STL classes\r
+ // (e.g. do_something_intersting() instead of DoSomethingInteresting()).\r
+ //\r
+\r
+\r
+ // Error code type \r
+ // (a DWORD, as the return value type from ::GetLastError())\r
+ typedef unsigned long error_code_type;\r
+\r
+ // Type of conversion\r
+ enum conversion_type\r
+ {\r
+ conversion_utf8_from_utf16, // UTF-16 ---> UTF-8\r
+ conversion_utf16_from_utf8 // UTF-8 ---> UTF-16\r
+ };\r
+\r
+\r
+ // Constructs an UTF-8 conversion error exception \r
+ // with a raw C string message, conversion type and error code.\r
+ utf8_conversion_error(\r
+ const char * message, \r
+ conversion_type conversion, \r
+ error_code_type error_code\r
+ );\r
+\r
+\r
+ // Constructs an UTF-8 conversion error exception \r
+ // with a std::string message, conversion type and error code.\r
+ utf8_conversion_error(\r
+ const std::string & message, \r
+ conversion_type conversion, \r
+ error_code_type error_code\r
+ );\r
+\r
+\r
+ // Returns the type of conversion (UTF-8 from UTF-16, or vice versa)\r
+ conversion_type conversion() const;\r
+\r
+\r
+ // Returns the error code occurred during the conversion\r
+ // (which is typically the return value of ::GetLastError()).\r
+ error_code_type error_code() const;\r
+\r
+\r
+\r
+ //\r
+ // IMPLEMENTATION\r
+ //\r
+private:\r
+ conversion_type m_conversion; // kind of conversion\r
+ error_code_type m_error_code; // error code\r
+};\r
+\r
+//------------------------------------------------------------------------\r
+\r
+\r
+\r
+//------------------------------------------------------------------------\r
+// Converts a string from UTF-8 to UTF-16.\r
+// On error, can throw an utf8_conversion_error exception.\r
+//------------------------------------------------------------------------\r
+std::wstring UTF16FromUTF8(const std::string & utf8);\r
+\r
+\r
+//------------------------------------------------------------------------\r
+// Converts a raw C string from UTF-8 to UTF-16.\r
+// On error, can throw an utf8_conversion_error exception.\r
+// If the input pointer is NULL, an empty string is returned.\r
+//------------------------------------------------------------------------\r
+std::wstring UTF16FromUTF8(const char * utf8);\r
+\r
+\r
+//------------------------------------------------------------------------\r
+// Converts a string from UTF-16 to UTF-8.\r
+// On error, can throw an utf8_conversion_error exception.\r
+//------------------------------------------------------------------------\r
+std::string UTF8FromUTF16(const std::wstring & utf16);\r
+\r
+\r
+//------------------------------------------------------------------------\r
+// Converts a raw C string from UTF-16 to UTF-8.\r
+// On error, can throw an utf8_conversion_error exception.\r
+// If the input pointer is NULL, an empty string is returned.\r
+//------------------------------------------------------------------------\r
+std::string UTF8FromUTF16(const wchar_t * utf16);\r
+\r
+\r
+} // namespace utf8util\r
+\r
+\r
+\r
+#include "utf8conv_inl.h" // inline implementations\r
+\r
+\r
+//////////////////////////////////////////////////////////////////////////\r
+\r
--- /dev/null
+//////////////////////////////////////////////////////////////////////////\r
+//\r
+// FILE: utf8conv_inl.h\r
+//\r
+// by Giovanni Dicanio <gdicanio@mvps.org>\r
+//\r
+// Private header file containing implementations of inline functions.\r
+// The public header file for this module is "utf8conv.h"; \r
+// users should *not* #include this private header file directly.\r
+//\r
+//////////////////////////////////////////////////////////////////////////\r
+\r
+#pragma once\r
+\r
+\r
+#include <string.h> // strlen()\r
+\r
+#include <Windows.h> // Win32 Platform SDK main header\r
+\r
+\r
+\r
+namespace utf8util {\r
+\r
+\r
+//------------------------------------------------------------------------\r
+// Implementation of utf8_conversion_error class methods\r
+//------------------------------------------------------------------------\r
+\r
+inline utf8_conversion_error::utf8_conversion_error(\r
+ const char * message, \r
+ conversion_type conversion, \r
+ error_code_type error_code\r
+ ) : \r
+ std::runtime_error(message),\r
+ m_conversion(conversion),\r
+ m_error_code(error_code)\r
+{\r
+}\r
+\r
+\r
+inline utf8_conversion_error::utf8_conversion_error(\r
+ const std::string & message, \r
+ conversion_type conversion, \r
+ error_code_type error_code\r
+ ) : \r
+ std::runtime_error(message),\r
+ m_conversion(conversion),\r
+ m_error_code(error_code)\r
+{\r
+}\r
+\r
+\r
+inline utf8_conversion_error::conversion_type utf8_conversion_error::conversion() const\r
+{\r
+ return m_conversion;\r
+}\r
+\r
+\r
+inline utf8_conversion_error::error_code_type utf8_conversion_error::error_code() const\r
+{\r
+ return m_error_code;\r
+}\r
+\r
+\r
+\r
+//------------------------------------------------------------------------\r
+// Implementation of module functions\r
+//------------------------------------------------------------------------\r
+\r
+\r
+inline std::wstring UTF16FromUTF8(const std::string & utf8)\r
+{\r
+ //\r
+ // Special case of empty input string\r
+ //\r
+ if (utf8.empty())\r
+ return std::wstring();\r
+\r
+\r
+ // Fail if an invalid input character is encountered\r
+ const DWORD conversionFlags = MB_ERR_INVALID_CHARS;\r
+\r
+\r
+ //\r
+ // Get length (in wchar_t's) of resulting UTF-16 string\r
+ //\r
+ const int utf16Length = ::MultiByteToWideChar(\r
+ CP_UTF8, // convert from UTF-8\r
+ conversionFlags, // flags\r
+ utf8.data(), // source UTF-8 string\r
+ utf8.length(), // length (in chars) of source UTF-8 string\r
+ NULL, // unused - no conversion done in this step\r
+ 0 // request size of destination buffer, in wchar_t's\r
+ );\r
+ if (utf16Length == 0)\r
+ {\r
+ // Error\r
+ DWORD error = ::GetLastError();\r
+\r
+ throw utf8_conversion_error(\r
+ (error == ERROR_NO_UNICODE_TRANSLATION) ? \r
+ "Invalid UTF-8 sequence found in input string." :\r
+ "Can't get length of UTF-16 string (MultiByteToWideChar failed).", \r
+ utf8_conversion_error::conversion_utf16_from_utf8,\r
+ error); \r
+ }\r
+\r
+\r
+ //\r
+ // Allocate destination buffer for UTF-16 string\r
+ //\r
+ std::wstring utf16;\r
+ utf16.resize(utf16Length);\r
+\r
+\r
+ //\r
+ // Do the conversion from UTF-8 to UTF-16\r
+ //\r
+ if ( ! ::MultiByteToWideChar(\r
+ CP_UTF8, // convert from UTF-8\r
+ 0, // validation was done in previous call, \r
+ // so speed up things with default flags\r
+ utf8.data(), // source UTF-8 string\r
+ utf8.length(), // length (in chars) of source UTF-8 string\r
+ &utf16[0], // destination buffer\r
+ utf16.length() // size of destination buffer, in wchar_t's\r
+ ) )\r
+ {\r
+ // Error\r
+ DWORD error = ::GetLastError();\r
+ throw utf8_conversion_error(\r
+ "Can't convert string from UTF-8 to UTF-16 (MultiByteToWideChar failed).", \r
+ utf8_conversion_error::conversion_utf16_from_utf8,\r
+ error);\r
+ }\r
+\r
+\r
+ //\r
+ // Return resulting UTF-16 string\r
+ //\r
+ return utf16;\r
+}\r
+\r
+\r
+\r
+inline std::wstring UTF16FromUTF8(const char * utf8)\r
+{\r
+ //\r
+ // Special case of empty input string\r
+ //\r
+ if (utf8 == NULL || *utf8 == '\0')\r
+ return std::wstring();\r
+\r
+\r
+ // Prefetch the length of the input UTF-8 string\r
+ const int utf8Length = static_cast<int>(strlen(utf8));\r
+\r
+ // Fail if an invalid input character is encountered\r
+ const DWORD conversionFlags = MB_ERR_INVALID_CHARS;\r
+\r
+ //\r
+ // Get length (in wchar_t's) of resulting UTF-16 string\r
+ //\r
+ const int utf16Length = ::MultiByteToWideChar(\r
+ CP_UTF8, // convert from UTF-8\r
+ conversionFlags, // flags\r
+ utf8, // source UTF-8 string\r
+ utf8Length, // length (in chars) of source UTF-8 string\r
+ NULL, // unused - no conversion done in this step\r
+ 0 // request size of destination buffer, in wchar_t's\r
+ );\r
+ if (utf16Length == 0)\r
+ {\r
+ // Error\r
+ DWORD error = ::GetLastError();\r
+ throw utf8_conversion_error(\r
+ (error == ERROR_NO_UNICODE_TRANSLATION) ? \r
+ "Invalid UTF-8 sequence found in input string." :\r
+ "Can't get length of UTF-16 string (MultiByteToWideChar failed).", \r
+ utf8_conversion_error::conversion_utf16_from_utf8,\r
+ error);\r
+ }\r
+\r
+\r
+ //\r
+ // Allocate destination buffer for UTF-16 string\r
+ //\r
+ std::wstring utf16;\r
+ utf16.resize(utf16Length);\r
+\r
+\r
+ //\r
+ // Do the conversion from UTF-8 to UTF-16\r
+ //\r
+ if ( ! ::MultiByteToWideChar(\r
+ CP_UTF8, // convert from UTF-8\r
+ 0, // validation was done in previous call, \r
+ // so speed up things with default flags\r
+ utf8, // source UTF-8 string\r
+ utf8Length, // length (in chars) of source UTF-8 string\r
+ &utf16[0], // destination buffer\r
+ utf16.length() // size of destination buffer, in wchar_t's\r
+ ) )\r
+ {\r
+ // Error\r
+ DWORD error = ::GetLastError();\r
+ throw utf8_conversion_error(\r
+ "Can't convert string from UTF-8 to UTF-16 (MultiByteToWideChar failed).", \r
+ utf8_conversion_error::conversion_utf16_from_utf8,\r
+ error);\r
+ }\r
+\r
+\r
+ //\r
+ // Return resulting UTF-16 string\r
+ //\r
+ return utf16;\r
+}\r
+\r
+\r
+\r
+inline std::string UTF8FromUTF16(const std::wstring & utf16)\r
+{\r
+ //\r
+ // Special case of empty input string\r
+ //\r
+ if (utf16.empty())\r
+ return std::string();\r
+\r
+\r
+ //\r
+ // Get length (in chars) of resulting UTF-8 string\r
+ //\r
+ const int utf8Length = ::WideCharToMultiByte(\r
+ CP_UTF8, // convert to UTF-8\r
+ 0, // default flags\r
+ utf16.data(), // source UTF-16 string\r
+ utf16.length(), // source string length, in wchar_t's,\r
+ NULL, // unused - no conversion required in this step\r
+ 0, // request buffer size\r
+ NULL, NULL // unused\r
+ );\r
+ if (utf8Length == 0)\r
+ {\r
+ // Error\r
+ DWORD error = ::GetLastError();\r
+ throw utf8_conversion_error(\r
+ "Can't get length of UTF-8 string (WideCharToMultiByte failed).", \r
+ utf8_conversion_error::conversion_utf8_from_utf16,\r
+ error);\r
+ }\r
+\r
+\r
+ //\r
+ // Allocate destination buffer for UTF-8 string\r
+ //\r
+ std::string utf8;\r
+ utf8.resize(utf8Length);\r
+\r
+\r
+ //\r
+ // Do the conversion from UTF-16 to UTF-8\r
+ //\r
+ if ( ! ::WideCharToMultiByte(\r
+ CP_UTF8, // convert to UTF-8\r
+ 0, // default flags\r
+ utf16.data(), // source UTF-16 string\r
+ utf16.length(), // source string length, in wchar_t's,\r
+ &utf8[0], // destination buffer\r
+ utf8.length(), // destination buffer size, in chars\r
+ NULL, NULL // unused\r
+ ) )\r
+ {\r
+ // Error\r
+ DWORD error = ::GetLastError();\r
+ throw utf8_conversion_error(\r
+ "Can't convert string from UTF-16 to UTF-8 (WideCharToMultiByte failed).", \r
+ utf8_conversion_error::conversion_utf8_from_utf16,\r
+ error);\r
+ }\r
+\r
+\r
+ //\r
+ // Return resulting UTF-8 string\r
+ //\r
+ return utf8;\r
+}\r
+\r
+\r
+\r
+inline std::string UTF8FromUTF16(const wchar_t * utf16)\r
+{\r
+ //\r
+ // Special case of empty input string\r
+ //\r
+ if (utf16 == NULL || *utf16 == L'\0')\r
+ return std::string();\r
+\r
+\r
+ // Prefetch the length of the input UTF-16 string\r
+ const int utf16Length = static_cast<int>(wcslen(utf16));\r
+ \r
+\r
+ //\r
+ // Get length (in chars) of resulting UTF-8 string\r
+ //\r
+ const int utf8Length = ::WideCharToMultiByte(\r
+ CP_UTF8, // convert to UTF-8\r
+ 0, // default flags\r
+ utf16, // source UTF-16 string\r
+ utf16Length, // source string length, in wchar_t's,\r
+ NULL, // unused - no conversion required in this step\r
+ 0, // request buffer size\r
+ NULL, NULL // unused\r
+ );\r
+ if (utf8Length == 0)\r
+ {\r
+ // Error\r
+ DWORD error = ::GetLastError();\r
+ throw utf8_conversion_error(\r
+ "Can't get length of UTF-8 string (WideCharToMultiByte failed).", \r
+ utf8_conversion_error::conversion_utf8_from_utf16,\r
+ error);\r
+ }\r
+\r
+\r
+ //\r
+ // Allocate destination buffer for UTF-8 string\r
+ //\r
+ std::string utf8;\r
+ utf8.resize(utf8Length);\r
+\r
+\r
+ //\r
+ // Do the conversion from UTF-16 to UTF-8\r
+ //\r
+ if ( ! ::WideCharToMultiByte(\r
+ CP_UTF8, // convert to UTF-8\r
+ 0, // default flags\r
+ utf16, // source UTF-16 string\r
+ utf16Length, // source string length, in wchar_t's,\r
+ &utf8[0], // destination buffer\r
+ utf8.length(), // destination buffer size, in chars\r
+ NULL, NULL // unused\r
+ ) )\r
+ {\r
+ // Error\r
+ DWORD error = ::GetLastError();\r
+ throw utf8_conversion_error(\r
+ "Can't convert string from UTF-16 to UTF-8 (WideCharToMultiByte failed).", \r
+ utf8_conversion_error::conversion_utf8_from_utf16,\r
+ error);\r
+ }\r
+\r
+\r
+ //\r
+ // Return resulting UTF-8 string\r
+ //\r
+ return utf8;\r
+}\r
+\r
+\r
+\r
+} // namespace utf8util\r
+\r
+\r
+//////////////////////////////////////////////////////////////////////////\r
+\r