+std::wstring read_utf8_file(const boost::filesystem::wpath& file)\r
+{\r
+ std::wstringstream result;\r
+ boost::filesystem::wifstream filestream(file);\r
+\r
+ if (filestream) \r
+ {\r
+ // Consume BOM first\r
+ filestream.get();\r
+ // read all data\r
+ result << filestream.rdbuf();\r
+ }\r
+\r
+ return result.str();\r
+}\r
+\r
+std::wstring read_latin1_file(const boost::filesystem::wpath& file)\r
+{\r
+ boost::locale::generator gen;\r
+ gen.locale_cache_enabled(true);\r
+ gen.categories(boost::locale::codepage_facet);\r
+\r
+ std::stringstream result_stream;\r
+ boost::filesystem::ifstream filestream(file);\r
+ filestream.imbue(gen("en_US.ISO8859-1"));\r
+\r
+ if (filestream)\r
+ {\r
+ // read all data\r
+ result_stream << filestream.rdbuf();\r
+ }\r
+\r
+ std::string result = result_stream.str();\r
+ std::wstring widened_result;\r
+\r
+ // The first 255 codepoints in unicode is the same as in latin1\r
+ auto from_signed_to_signed = std::function<unsigned char(char)>(\r
+ [] (char c) { return static_cast<unsigned char>(c); }\r
+ );\r
+ boost::copy(\r
+ result | boost::adaptors::transformed(from_signed_to_signed),\r
+ std::back_inserter(widened_result));\r
+\r
+ return widened_result;\r
+}\r
+\r
+std::wstring read_file(const boost::filesystem::wpath& file)\r
+{\r
+ static const uint8_t BOM[] = {0xef, 0xbb, 0xbf};\r
+\r
+ if (!boost::filesystem::exists(file))\r
+ {\r
+ return L"";\r
+ }\r
+\r
+ if (boost::filesystem::file_size(file) >= 3)\r
+ {\r
+ boost::filesystem::ifstream bom_stream(file);\r
+\r
+ char header[3];\r
+ bom_stream.read(header, 3);\r
+ bom_stream.close();\r
+\r
+ if (std::memcmp(BOM, header, 3) == 0)\r
+ return read_utf8_file(file);\r
+ }\r
+\r
+ return read_latin1_file(file);\r
+}\r
+\r