]> git.sesse.net Git - casparcg/commitdiff
2.0.2: string: Properly convert between utf16 and utf8.
authorronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
Mon, 5 Dec 2011 09:42:16 +0000 (09:42 +0000)
committerronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
Mon, 5 Dec 2011 09:42:16 +0000 (09:42 +0000)
git-svn-id: https://casparcg.svn.sourceforge.net/svnroot/casparcg/server/branches/2.0.2@1790 362d55ac-95cf-4e76-9f9a-cbaa9c17b72d

common/common.vcxproj
common/common.vcxproj.filters
common/utility/string.cpp
common/utility/utf8conv.h [new file with mode: 0644]
common/utility/utf8conv_inl.h [new file with mode: 0644]
modules/decklink/interop/DeckLinkAPI_h.h
modules/decklink/interop/DeckLinkAPI_i.c

index 98c2b1750c157dae6ad5c5ab0e71dcb51f8b7848..fecb1cad9ce984363a1c6936e5d2206dbeb8bdef 100644 (file)
     <ClInclude Include="utility\string.h" />\r
     <ClInclude Include="utility\timer.h" />\r
     <ClInclude Include="utility\tweener.h" />\r
+    <ClInclude Include="utility\utf8conv.h" />\r
+    <ClInclude Include="utility\utf8conv_inl.h" />\r
   </ItemGroup>\r
   <ItemGroup>\r
     <ClCompile Include="diagnostics\graph.cpp">\r
index a6a80de9c8faffb049f4d819b75d5f771cda77f1..8d1362df984288324a4ac0535bb310584caf63d7 100644 (file)
     <ClInclude Include="utility\param.h">\r
       <Filter>source\utility</Filter>\r
     </ClInclude>\r
+    <ClInclude Include="utility\utf8conv.h">\r
+      <Filter>source\utility</Filter>\r
+    </ClInclude>\r
+    <ClInclude Include="utility\utf8conv_inl.h">\r
+      <Filter>source\utility</Filter>\r
+    </ClInclude>\r
   </ItemGroup>\r
 </Project>
\ No newline at end of file
index d0ab7503a585cfc8e8f37e636f761a78ff7531dd..76954b89b7c5c056c1fc6b23fdc752f5aa9434f3 100644 (file)
 \r
 #include "../stdafx.h"\r
 \r
+#include "utf8conv.h"\r
+\r
 namespace caspar {\r
        \r
 std::wstring widen(const std::string& str)\r
 {\r
-       return std::wstring(str.begin(), str.end());\r
+       return utf8util::UTF16FromUTF8(str);//std::wstring(str.begin(), str.end());\r
 }\r
 \r
 std::wstring widen(const std::wstring& str)\r
@@ -35,7 +37,7 @@ std::wstring widen(const std::wstring& str)
           \r
 std::string narrow(const std::wstring& str)\r
 {\r
-       return std::string(str.begin(), str.end());\r
+       return utf8util::UTF8FromUTF16(str);//std::string(str.begin(), str.end());\r
 }\r
           \r
 std::string narrow(const std::string& str)\r
diff --git a/common/utility/utf8conv.h b/common/utility/utf8conv.h
new file mode 100644 (file)
index 0000000..8194247
--- /dev/null
@@ -0,0 +1,162 @@
+//////////////////////////////////////////////////////////////////////////\r
+//\r
+// FILE: utf8conv.h\r
+//\r
+// Header file defining prototypes of helper functions for converting \r
+// strings between Unicode UTF-8 and UTF-16.\r
+// (The implementation file is "utf8conv_inl.h").\r
+//\r
+// UTF-8 text is stored in std::string; \r
+// UTF-16 text is stored in std::wstring.\r
+//\r
+// This code just uses Win32 Platform SDK and C++ standard library; \r
+// so it can be used also with the Express editions of Visual Studio.\r
+//\r
+//\r
+// Original code: February 4th, 2011\r
+// Last update:   October 15th, 2011\r
+//\r
+// - Added more information to the utf8_conversion_error class\r
+//   (like the return code of ::GetLastError());\r
+//   moreover, the class now derives from std::runtime_error.\r
+//\r
+// - Added conversion function overloads taking raw C strings as input.\r
+//   (This is more efficient when there are raw C strings already\r
+//   available, because it avoids the creation of temporary\r
+//   new std::[w]string's.)\r
+//\r
+// - UTF-8 conversion functions now detect invalid UTF-8 sequences\r
+//   thanks to MB_ERR_INVALID_CHARS flag, and throw an exception\r
+//   in this case.\r
+//\r
+//\r
+// by Giovanni Dicanio <gdicanio@mvps.org>\r
+//\r
+//////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+#pragma once\r
+\r
+\r
+//------------------------------------------------------------------------\r
+//                              INCLUDES\r
+//------------------------------------------------------------------------\r
+\r
+#include <stdexcept>    // std::runtime_error\r
+#include <string>       // STL string classes\r
+\r
+\r
+\r
+namespace utf8util {\r
+\r
+\r
+\r
+//------------------------------------------------------------------------\r
+// Exception class representing an error occurred during UTF-8 conversion.\r
+//------------------------------------------------------------------------\r
+class utf8_conversion_error \r
+    : public std::runtime_error\r
+{\r
+public:\r
+  \r
+    //\r
+    // Naming convention note:\r
+    // -----------------------\r
+    //\r
+    // This exception class is derived from std::runtime_error class,\r
+    // so I chose to use the same naming convention of STL classes\r
+    // (e.g. do_something_intersting() instead of DoSomethingInteresting()).\r
+    //\r
+\r
+\r
+    // Error code type \r
+    // (a DWORD, as the return value type from ::GetLastError())\r
+    typedef unsigned long error_code_type;\r
+\r
+    // Type of conversion\r
+    enum conversion_type\r
+    {\r
+        conversion_utf8_from_utf16,     // UTF-16 ---> UTF-8\r
+        conversion_utf16_from_utf8      // UTF-8  ---> UTF-16\r
+    };\r
+\r
+\r
+    // Constructs an UTF-8 conversion error exception \r
+    // with a raw C string message, conversion type and error code.\r
+    utf8_conversion_error(\r
+        const char * message, \r
+        conversion_type conversion, \r
+        error_code_type error_code\r
+    );\r
+\r
+\r
+    // Constructs an UTF-8 conversion error exception \r
+    // with a std::string message, conversion type and error code.\r
+    utf8_conversion_error(\r
+        const std::string & message, \r
+        conversion_type conversion, \r
+        error_code_type error_code\r
+    );\r
+\r
+\r
+    // Returns the type of conversion (UTF-8 from UTF-16, or vice versa)\r
+    conversion_type conversion() const;\r
+\r
+\r
+    // Returns the error code occurred during the conversion\r
+    // (which is typically the return value of ::GetLastError()).\r
+    error_code_type error_code() const;\r
+\r
+\r
+\r
+    //\r
+    // IMPLEMENTATION\r
+    //\r
+private:\r
+    conversion_type m_conversion;   // kind of conversion\r
+    error_code_type m_error_code;   // error code\r
+};\r
+\r
+//------------------------------------------------------------------------\r
+\r
+\r
+\r
+//------------------------------------------------------------------------\r
+// Converts a string from UTF-8 to UTF-16.\r
+// On error, can throw an utf8_conversion_error exception.\r
+//------------------------------------------------------------------------\r
+std::wstring UTF16FromUTF8(const std::string & utf8);\r
+\r
+\r
+//------------------------------------------------------------------------\r
+// Converts a raw C string from UTF-8 to UTF-16.\r
+// On error, can throw an utf8_conversion_error exception.\r
+// If the input pointer is NULL, an empty string is returned.\r
+//------------------------------------------------------------------------\r
+std::wstring UTF16FromUTF8(const char * utf8);\r
+\r
+\r
+//------------------------------------------------------------------------\r
+// Converts a string from UTF-16 to UTF-8.\r
+// On error, can throw an utf8_conversion_error exception.\r
+//------------------------------------------------------------------------\r
+std::string UTF8FromUTF16(const std::wstring & utf16);\r
+\r
+\r
+//------------------------------------------------------------------------\r
+// Converts a raw C string from UTF-16 to UTF-8.\r
+// On error, can throw an utf8_conversion_error exception.\r
+// If the input pointer is NULL, an empty string is returned.\r
+//------------------------------------------------------------------------\r
+std::string UTF8FromUTF16(const wchar_t * utf16);\r
+\r
+\r
+} // namespace utf8util\r
+\r
+\r
+\r
+#include "utf8conv_inl.h"     // inline implementations\r
+\r
+\r
+//////////////////////////////////////////////////////////////////////////\r
+\r
diff --git a/common/utility/utf8conv_inl.h b/common/utility/utf8conv_inl.h
new file mode 100644 (file)
index 0000000..234c79d
--- /dev/null
@@ -0,0 +1,368 @@
+//////////////////////////////////////////////////////////////////////////\r
+//\r
+// FILE: utf8conv_inl.h\r
+//\r
+// by Giovanni Dicanio <gdicanio@mvps.org>\r
+//\r
+// Private header file containing implementations of inline functions.\r
+// The public header file for this module is "utf8conv.h"; \r
+// users should *not* #include this private header file directly.\r
+//\r
+//////////////////////////////////////////////////////////////////////////\r
+\r
+#pragma once\r
+\r
+\r
+#include <string.h>     // strlen()\r
+\r
+#include <Windows.h>    // Win32 Platform SDK main header\r
+\r
+\r
+\r
+namespace utf8util {\r
+\r
+\r
+//------------------------------------------------------------------------\r
+//      Implementation of utf8_conversion_error class methods\r
+//------------------------------------------------------------------------\r
+\r
+inline utf8_conversion_error::utf8_conversion_error(\r
+    const char * message, \r
+    conversion_type conversion, \r
+    error_code_type error_code\r
+    ) : \r
+        std::runtime_error(message),\r
+        m_conversion(conversion),\r
+        m_error_code(error_code)\r
+{\r
+}\r
+\r
+\r
+inline utf8_conversion_error::utf8_conversion_error(\r
+    const std::string & message, \r
+    conversion_type conversion, \r
+    error_code_type error_code\r
+    ) : \r
+        std::runtime_error(message),\r
+        m_conversion(conversion),\r
+        m_error_code(error_code)\r
+{\r
+}\r
+\r
+\r
+inline utf8_conversion_error::conversion_type utf8_conversion_error::conversion() const\r
+{\r
+    return m_conversion;\r
+}\r
+\r
+\r
+inline utf8_conversion_error::error_code_type utf8_conversion_error::error_code() const\r
+{\r
+    return m_error_code;\r
+}\r
+\r
+\r
+\r
+//------------------------------------------------------------------------\r
+//              Implementation of module functions\r
+//------------------------------------------------------------------------\r
+\r
+\r
+inline std::wstring UTF16FromUTF8(const std::string & utf8)\r
+{\r
+    //\r
+    // Special case of empty input string\r
+    //\r
+    if (utf8.empty())\r
+        return std::wstring();\r
+\r
+\r
+    // Fail if an invalid input character is encountered\r
+    const DWORD conversionFlags = MB_ERR_INVALID_CHARS;\r
+\r
+\r
+    //\r
+    // Get length (in wchar_t's) of resulting UTF-16 string\r
+    //\r
+    const int utf16Length = ::MultiByteToWideChar(\r
+        CP_UTF8,            // convert from UTF-8\r
+        conversionFlags,    // flags\r
+        utf8.data(),        // source UTF-8 string\r
+        utf8.length(),      // length (in chars) of source UTF-8 string\r
+        NULL,               // unused - no conversion done in this step\r
+        0                   // request size of destination buffer, in wchar_t's\r
+        );\r
+    if (utf16Length == 0)\r
+    {\r
+        // Error\r
+        DWORD error = ::GetLastError();\r
+\r
+        throw utf8_conversion_error(\r
+            (error == ERROR_NO_UNICODE_TRANSLATION) ? \r
+                "Invalid UTF-8 sequence found in input string." :\r
+                "Can't get length of UTF-16 string (MultiByteToWideChar failed).", \r
+            utf8_conversion_error::conversion_utf16_from_utf8,\r
+            error);      \r
+    }\r
+\r
+\r
+    //\r
+    // Allocate destination buffer for UTF-16 string\r
+    //\r
+    std::wstring utf16;\r
+    utf16.resize(utf16Length);\r
+\r
+\r
+    //\r
+    // Do the conversion from UTF-8 to UTF-16\r
+    //\r
+    if ( ! ::MultiByteToWideChar(\r
+        CP_UTF8,            // convert from UTF-8\r
+        0,                  // validation was done in previous call, \r
+                            // so speed up things with default flags\r
+        utf8.data(),        // source UTF-8 string\r
+        utf8.length(),      // length (in chars) of source UTF-8 string\r
+        &utf16[0],          // destination buffer\r
+        utf16.length()      // size of destination buffer, in wchar_t's\r
+        ) )\r
+    {\r
+        // Error\r
+        DWORD error = ::GetLastError();\r
+        throw utf8_conversion_error(\r
+            "Can't convert string from UTF-8 to UTF-16 (MultiByteToWideChar failed).", \r
+            utf8_conversion_error::conversion_utf16_from_utf8,\r
+            error);\r
+    }\r
+\r
+\r
+    //\r
+    // Return resulting UTF-16 string\r
+    //\r
+    return utf16;\r
+}\r
+\r
+\r
+\r
+inline std::wstring UTF16FromUTF8(const char * utf8)\r
+{\r
+    //\r
+    // Special case of empty input string\r
+    //\r
+    if (utf8 == NULL || *utf8 == '\0')\r
+        return std::wstring();\r
+\r
+\r
+    // Prefetch the length of the input UTF-8 string\r
+    const int utf8Length = static_cast<int>(strlen(utf8));\r
+\r
+    // Fail if an invalid input character is encountered\r
+    const DWORD conversionFlags = MB_ERR_INVALID_CHARS;\r
+\r
+    //\r
+    // Get length (in wchar_t's) of resulting UTF-16 string\r
+    //\r
+    const int utf16Length = ::MultiByteToWideChar(\r
+        CP_UTF8,            // convert from UTF-8\r
+        conversionFlags,    // flags\r
+        utf8,               // source UTF-8 string\r
+        utf8Length,         // length (in chars) of source UTF-8 string\r
+        NULL,               // unused - no conversion done in this step\r
+        0                   // request size of destination buffer, in wchar_t's\r
+        );\r
+    if (utf16Length == 0)\r
+    {\r
+        // Error\r
+        DWORD error = ::GetLastError();\r
+        throw utf8_conversion_error(\r
+            (error == ERROR_NO_UNICODE_TRANSLATION) ? \r
+            "Invalid UTF-8 sequence found in input string." :\r
+            "Can't get length of UTF-16 string (MultiByteToWideChar failed).", \r
+            utf8_conversion_error::conversion_utf16_from_utf8,\r
+            error);\r
+    }\r
+\r
+\r
+    //\r
+    // Allocate destination buffer for UTF-16 string\r
+    //\r
+    std::wstring utf16;\r
+    utf16.resize(utf16Length);\r
+\r
+\r
+    //\r
+    // Do the conversion from UTF-8 to UTF-16\r
+    //\r
+    if ( ! ::MultiByteToWideChar(\r
+        CP_UTF8,            // convert from UTF-8\r
+        0,                  // validation was done in previous call, \r
+                            // so speed up things with default flags\r
+        utf8,               // source UTF-8 string\r
+        utf8Length,         // length (in chars) of source UTF-8 string\r
+        &utf16[0],          // destination buffer\r
+        utf16.length()      // size of destination buffer, in wchar_t's\r
+        ) )\r
+    {\r
+        // Error\r
+        DWORD error = ::GetLastError();\r
+        throw utf8_conversion_error(\r
+            "Can't convert string from UTF-8 to UTF-16 (MultiByteToWideChar failed).", \r
+            utf8_conversion_error::conversion_utf16_from_utf8,\r
+            error);\r
+    }\r
+\r
+\r
+    //\r
+    // Return resulting UTF-16 string\r
+    //\r
+    return utf16;\r
+}\r
+\r
+\r
+\r
+inline std::string UTF8FromUTF16(const std::wstring & utf16)\r
+{\r
+    //\r
+    // Special case of empty input string\r
+    //\r
+    if (utf16.empty())\r
+        return std::string();\r
+\r
+\r
+    //\r
+    // Get length (in chars) of resulting UTF-8 string\r
+    //\r
+    const int utf8Length = ::WideCharToMultiByte(\r
+        CP_UTF8,            // convert to UTF-8\r
+        0,                  // default flags\r
+        utf16.data(),       // source UTF-16 string\r
+        utf16.length(),     // source string length, in wchar_t's,\r
+        NULL,               // unused - no conversion required in this step\r
+        0,                  // request buffer size\r
+        NULL, NULL          // unused\r
+        );\r
+    if (utf8Length == 0)\r
+    {\r
+        // Error\r
+        DWORD error = ::GetLastError();\r
+        throw utf8_conversion_error(\r
+            "Can't get length of UTF-8 string (WideCharToMultiByte failed).", \r
+            utf8_conversion_error::conversion_utf8_from_utf16,\r
+            error);\r
+    }\r
+\r
+\r
+    //\r
+    // Allocate destination buffer for UTF-8 string\r
+    //\r
+    std::string utf8;\r
+    utf8.resize(utf8Length);\r
+\r
+\r
+    //\r
+    // Do the conversion from UTF-16 to UTF-8\r
+    //\r
+    if ( ! ::WideCharToMultiByte(\r
+        CP_UTF8,                // convert to UTF-8\r
+        0,                      // default flags\r
+        utf16.data(),           // source UTF-16 string\r
+        utf16.length(),         // source string length, in wchar_t's,\r
+        &utf8[0],               // destination buffer\r
+        utf8.length(),          // destination buffer size, in chars\r
+        NULL, NULL              // unused\r
+        ) )\r
+    {\r
+        // Error\r
+        DWORD error = ::GetLastError();\r
+        throw utf8_conversion_error(\r
+            "Can't convert string from UTF-16 to UTF-8 (WideCharToMultiByte failed).", \r
+            utf8_conversion_error::conversion_utf8_from_utf16,\r
+            error);\r
+    }\r
+\r
+\r
+    //\r
+    // Return resulting UTF-8 string\r
+    //\r
+    return utf8;\r
+}\r
+\r
+\r
+\r
+inline std::string UTF8FromUTF16(const wchar_t * utf16)\r
+{\r
+    //\r
+    // Special case of empty input string\r
+    //\r
+    if (utf16 == NULL || *utf16 == L'\0')\r
+        return std::string();\r
+\r
+\r
+    // Prefetch the length of the input UTF-16 string\r
+    const int utf16Length = static_cast<int>(wcslen(utf16));\r
+  \r
+\r
+    //\r
+    // Get length (in chars) of resulting UTF-8 string\r
+    //\r
+    const int utf8Length = ::WideCharToMultiByte(\r
+        CP_UTF8,            // convert to UTF-8\r
+        0,                  // default flags\r
+        utf16,              // source UTF-16 string\r
+        utf16Length,        // source string length, in wchar_t's,\r
+        NULL,               // unused - no conversion required in this step\r
+        0,                  // request buffer size\r
+        NULL, NULL          // unused\r
+        );\r
+    if (utf8Length == 0)\r
+    {\r
+        // Error\r
+        DWORD error = ::GetLastError();\r
+        throw utf8_conversion_error(\r
+            "Can't get length of UTF-8 string (WideCharToMultiByte failed).", \r
+            utf8_conversion_error::conversion_utf8_from_utf16,\r
+            error);\r
+    }\r
+\r
+\r
+    //\r
+    // Allocate destination buffer for UTF-8 string\r
+    //\r
+    std::string utf8;\r
+    utf8.resize(utf8Length);\r
+\r
+\r
+    //\r
+    // Do the conversion from UTF-16 to UTF-8\r
+    //\r
+    if ( ! ::WideCharToMultiByte(\r
+        CP_UTF8,                // convert to UTF-8\r
+        0,                      // default flags\r
+        utf16,                  // source UTF-16 string\r
+        utf16Length,            // source string length, in wchar_t's,\r
+        &utf8[0],               // destination buffer\r
+        utf8.length(),          // destination buffer size, in chars\r
+        NULL, NULL              // unused\r
+        ) )\r
+    {\r
+        // Error\r
+        DWORD error = ::GetLastError();\r
+        throw utf8_conversion_error(\r
+            "Can't convert string from UTF-16 to UTF-8 (WideCharToMultiByte failed).", \r
+            utf8_conversion_error::conversion_utf8_from_utf16,\r
+            error);\r
+    }\r
+\r
+\r
+    //\r
+    // Return resulting UTF-8 string\r
+    //\r
+    return utf8;\r
+}\r
+\r
+\r
+\r
+} // namespace utf8util\r
+\r
+\r
+//////////////////////////////////////////////////////////////////////////\r
+\r
index 6d98b32bc8314c51444630e5f85a8eed4f810f92..54ff2a8861fd068bf70c735c7d46035d941d228a 100644 (file)
@@ -4,7 +4,7 @@
 \r
 \r
  /* File created by MIDL compiler version 7.00.0555 */\r
-/* at Sun Dec 04 16:03:37 2011\r
+/* at Mon Dec 05 09:41:37 2011\r
  */\r
 /* Compiler settings for interop\DeckLinkAPI.idl:\r
     Oicf, W1, Zp8, env=Win32 (32b run), target_arch=X86 7.00.0555 \r
index 23d0474f0322a8f86c85a760fd797c0d8acb9810..6025889b8d19d15c5b8749d7172531c272bd503d 100644 (file)
@@ -6,7 +6,7 @@
 \r
 \r
  /* File created by MIDL compiler version 7.00.0555 */\r
-/* at Sun Dec 04 16:03:37 2011\r
+/* at Mon Dec 05 09:41:37 2011\r
  */\r
 /* Compiler settings for interop\DeckLinkAPI.idl:\r
     Oicf, W1, Zp8, env=Win32 (32b run), target_arch=X86 7.00.0555 \r