From 8efe1d91c4c3371831b6e01c2b315bfd97cb76d0 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Wed, 1 May 2019 05:01:30 +0800 Subject: [PATCH] Added: More comprehensive codepage conversion for tstring --- code/nel/include/nel/misc/common.h | 10 +- code/nel/include/nel/misc/string_common.h | 151 ++++++++---- code/nel/include/nel/misc/types_nl.h | 6 + code/nel/src/misc/string_common.cpp | 272 +++++++++++++++++++++- 4 files changed, 381 insertions(+), 58 deletions(-) diff --git a/code/nel/include/nel/misc/common.h b/code/nel/include/nel/misc/common.h index 8ad0b5dcc..48468d6fd 100644 --- a/code/nel/include/nel/misc/common.h +++ b/code/nel/include/nel/misc/common.h @@ -303,23 +303,23 @@ inline sint nlstricmp(const std::string &lhs, const std::string &rhs) { return s inline sint nlstricmp(const std::string &lhs, const char *rhs) { return stricmp(lhs.c_str(),rhs); } inline sint nlstricmp(const char *lhs, const std::string &rhs) { return stricmp(lhs,rhs.c_str()); } -// TODO: Can we prefix these with 'nl' like other methods? +// TODO: Can we prefix these with 'nl' like other macros? // Macros helper to convert UTF-8 std::string and wchar_t* -#define wideToUtf8(str) (ucstring((ucchar*)str).toUtf8()) -#define utf8ToWide(str) ((wchar_t*)ucstring::makeFromUtf8(str).c_str()) +// #define wideToUtf8(str) (ucstring((ucchar*)str).toUtf8()) +// #define utf8ToWide(str) ((wchar_t*)ucstring::makeFromUtf8(str).c_str()) // Macros helper to convert UTF-8 std::string and TCHAR* #ifdef _UNICODE #define tStrToUtf8(str) (ucstring((ucchar*)(LPCWSTR)str).toUtf8()) #define utf8ToTStr(str) ((const wchar_t *)ucstring::makeFromUtf8(str).c_str()) -#define tstring wstring +// #define tstring wstring #else // FIXME: This is not accurate, it should be a conversion between local charset and utf8 #define tStrToUtf8(str) (std::string((LPCSTR)str)) inline const char *nlutf8ToTStr(const char *str) { return str; } inline const char *nlutf8ToTStr(const std::string &str) { return str.c_str(); } #define utf8ToTStr(str) NLMISC::nlutf8ToTStr(str) -#define tstring string +// #define tstring string #endif #if (NL_COMP_VC_VERSION <= 90) diff --git a/code/nel/include/nel/misc/string_common.h b/code/nel/include/nel/misc/string_common.h index 02cafec92..4b757b989 100644 --- a/code/nel/include/nel/misc/string_common.h +++ b/code/nel/include/nel/misc/string_common.h @@ -246,55 +246,7 @@ inline bool fromString(const std::string &str, double &val) { bool ret = sscanf( // (str[0] == '1' || (str[0] & 0xD2) == 0x50) // - Kaetemi -inline bool fromString(const std::string &str, bool &val) -{ - if (str.length() == 1) - { - const char c = str[0]; - - switch(c) - { - case '1': - case 't': - case 'T': - case 'y': - case 'Y': - val = true; - break; - - case '0': - case 'f': - case 'F': - case 'n': - case 'N': - val = false; - break; - - default: - val = false; - return false; - } - } - else - { - std::string strl = toLower(str); - if (strl == "true" || strl == "yes") - { - val = true; - } - else if (strl == "false" || strl == "no") - { - val = false; - } - else - { - val = false; - return false; - } - } - - return true; -} +bool fromString(const std::string &str, bool &val); inline bool fromString(const std::string &str, std::string &val) { val = str; return true; } @@ -305,6 +257,107 @@ inline bool fromString(const std::string &str, uint &val) { return sscanf(str.c_ inline bool fromString(const std::string &str, sint &val) { return sscanf(str.c_str(), "%d", &val) == 1; } #endif // NL_COMP_VC6 +// Convert local codepage to UTF-8 +// On Windows, the local codepage is undetermined +// On Linux, the local codepage is always UTF-8 (no-op) +std::string mbcsToUtf8(const char *str, size_t len = 0); +std::string mbcsToUtf8(const std::string &str); + +// Convert wide codepage to UTF-8 +// On Windows, the wide codepage is UTF-16 +// On Linux, the wide codepage is UTF-32 +std::string wideToUtf8(const wchar_t *str, size_t len = 0); +std::string wideToUtf8(const std::wstring &str); + +// Convert UTF-8 to wide character set +std::wstring utf8ToWide(const char *str, size_t len = 0); +std::wstring utf8ToWide(const std::string &str); + +// Convert UTF-8 to local multibyte character set +std::string utf8ToMbcs(const char *str, size_t len = 0); +std::string utf8ToMbcs(const std::string &str); + +// Convert wide to local multibyte character set +std::string wideToMbcs(const wchar_t *str, size_t len = 0); +std::string wideToMbcs(const std::wstring &str); + +// Convert local multibyte to wide character set +std::wstring mbcsToWide(const char *str, size_t len = 0); +std::wstring mbcsToWide(const std::string &str); + +inline const char* asCStr(const char *str) { return str; } +inline const char* asCStr(const std::string &str) { return str.c_str(); } +inline const wchar_t* asCStr(const wchar_t *str) { return str; } +inline const wchar_t* asCStr(const std::wstring &str) { return str.c_str(); } + +#if defined(NL_OS_WINDOWS) +#define nlUtf8ToMbcs(str) (NLMISC::utf8ToMbcs(str).c_str()) +#define nlMbcsToUtf8(str) (NLMISC::mbcsToUtf8(str).c_str()) +#else +#define nlUtf8ToMbcs(str) (NLMISC::asCStr(str)) +#define nlMbcsToUtf8(str) (NLMISC::asCStr(str)) +#endif +#define nlWideToUtf8(str) (NLMISC::wideToUtf8(str).c_str()) +#define nlUtf8ToWide(str) (NLMISC::utf8ToWide(str).c_str() +#define nlWideToMbcs(str) (NLMISC::wideToMbcs(str).c_str()) +#define nlMbcsToWide(str) (NLMISC::mbcsToWide(str).c_str()) + +// On Windows, tstring is either local multibyte or utf-16 wide +// On Linux, tstring is always utf-8 + +#if defined(NL_OS_WINDOWS) && (defined(UNICODE) || defined(_UNICODE)) +typedef std::wstring tstring; +typedef wchar_t tchar; +#define nltmain wmain +inline std::string tStrToUtf8(const tchar *str) { return wideToUtf8((const wchar_t *)str); } +inline std::string tStrToUtf8(const tstring &str) { return wideToUtf8((const std::wstring &)str); } +inline std::wstring tStrToWide(const tchar *str) { return (const wchar_t *)str; } +inline std::wstring tStrToWide(const tstring &str) { return (const std::wstring &)str; } +inline std::string tStrToMbcs(const tchar *str) { return wideToMbcs((const wchar_t *)str); } +inline std::string tStrToMbcs(const tstring &str) { return wideToMbcs((const std::wstring &)str); } +#define nlTStrToUtf8(str) (NLMISC::tStrToUtf8(str).c_str()) +#define nlTStrToWide(str) ((const wchar_t *)NLMISC::asCStr(str)) +#define nlTStrToMbcs(str) (NLMISC::tStrToMbcs(str).c_str()) +inline tstring utf8ToTStr(const char *str) {return (const tstring &)utf8ToWide(str); } +inline tstring utf8ToTStr(const std::string &str) { return (const tstring &)utf8ToWide(str); } +inline tstring wideToTStr(const wchar_t *str) { return (const tchar *)str; } +inline tstring wideToTStr(const std::wstring &str) { return (const tstring &)str; } +inline tstring mbcsToTStr(const char *str) { return (const tstring &)mbcsToWide(str); } +inline tstring mbcsToTStr(const std::string &str) { return (const tstring &)mbcsToWide(str); } +#define nlUtf8ToTStr(str) (NLMISC::utf8ToTStr(str).c_str()) +#define nlWideToTStr(str) ((const tchar *)NLMISC::asCStr(str)) +#define nlMbcsToTStr(str) (NLMISC::mbcsToTStr(str).c_str()) +#else +typedef std::string tstring; +typedef char tchar; +#define nltmain main +inline std::string tStrToUtf8(const tchar *str) { return mbcsToUtf8((const char *)str); } +inline std::string tStrToUtf8(const tstring &str) { return mbcsToUtf8((const std::string &)str); } +inline std::wstring tStrToWide(const tchar *str) { return mbcsToWide((const char *)str); } +inline std::wstring tStrToWide(const tstring &str) { return mbcsToWide((const std::string &)str); } +inline std::string tStrToMbcs(const tchar *str) { return (const char *)str; } +inline std::string tStrToMbcs(const tstring &str) { return (const std::string &)str; } +#if defined(NL_OS_WINDOWS) +#define nlTStrToUtf8(str) (NLMISC::tStrToUtf8(str).c_str()) +#else +#define nlTStrToUtf8(str) ((const char *)NLMISC::asCStr(str)) +#endif +#define nlTStrToWide(str) (NLMISC::tStrToWide(str).c_str()) +#define nlTStrToMbcs(str) ((const char *)NLMISC::asCStr(str)) +inline tstring utf8ToTStr(const char *str) { return (const tstring &)utf8ToMbcs(str); } +inline tstring utf8ToTStr(const std::string &str) { return (const tstring &)utf8ToMbcs(str); } +inline tstring wideToTStr(const wchar_t *str) { return (const tstring &)wideToMbcs(str); } +inline tstring wideToTStr(const std::wstring &str) { return (const tstring &)wideToMbcs(str); } +inline tstring mbcsToTStr(const char *str) { return (const tchar *)str; } +inline tstring mbcsToTStr(const std::string &str) { return (const tstring &)str; } +#if defined(NL_OS_WINDOWS) +#define nlUtf8ToTStr(str) (NLMISC::utf8ToTStr(str).c_str()) +#else +#define nlUtf8ToTStr(str) ((const tchar *)NLMISC::asCStr(str)) +#endif +#define nlWideToTStr(str) (NLMISC::wideToTStr(str).c_str()) +#define nlMbcsToTStr(str) ((const tchar *)NLMISC::asCStr(str)) +#endif } // NLMISC diff --git a/code/nel/include/nel/misc/types_nl.h b/code/nel/include/nel/misc/types_nl.h index 1a1da7be2..27d147ffd 100644 --- a/code/nel/include/nel/misc/types_nl.h +++ b/code/nel/include/nel/misc/types_nl.h @@ -173,6 +173,12 @@ # define NL_NO_EXCEPTION_SPECS #endif +#if defined(NL_COMP_VC) && (NL_COMP_VC_VERSION >= 140) +#define nlmove(v) std::move(v) +#else +#define nlmove(v) (v) +#endif + // gcc 3.4 introduced ISO C++ with tough template rules // // NL_ISO_SYNTAX can be used using #if NL_ISO_SYNTAX or #if !NL_ISO_SYNTAX diff --git a/code/nel/src/misc/string_common.cpp b/code/nel/src/misc/string_common.cpp index 9c9085e4e..3c8913ce1 100644 --- a/code/nel/src/misc/string_common.cpp +++ b/code/nel/src/misc/string_common.cpp @@ -17,23 +17,24 @@ #include "stdmisc.h" #include "nel/misc/string_common.h" +#include "nel/misc/sstring.h" using namespace std; #ifdef DEBUG_NEW - #define new DEBUG_NEW +#define new DEBUG_NEW #endif namespace NLMISC { -string addSlashR (const string &str) +string addSlashR(const string &str) { string formatedStr; // replace \n with \r\n for (uint i = 0; i < str.size(); i++) { - if (str[i] == '\n' && i > 0 && str[i-1] != '\r') + if (str[i] == '\n' && i > 0 && str[i - 1] != '\r') { formatedStr += '\r'; } @@ -42,7 +43,7 @@ string addSlashR (const string &str) return formatedStr; } -string removeSlashR (const string &str) +string removeSlashR(const string &str) { string formatedStr; // remove \r @@ -54,4 +55,267 @@ string removeSlashR (const string &str) return formatedStr; } +bool fromString(const std::string &str, bool &val) +{ + if (str.length() == 1) + { + const char c = str[0]; + + switch (c) + { + case '1': + case 't': + case 'T': + case 'y': + case 'Y': + val = true; + break; + + case '0': + case 'f': + case 'F': + case 'n': + case 'N': + val = false; + break; + + default: + val = false; + return false; + } + } + else + { + std::string strl = toLower(str); + if (strl == "true" || strl == "yes") + { + val = true; + } + else if (strl == "false" || strl == "no") + { + val = false; + } + else + { + val = false; + return false; + } + } + + return true; +} + +#if defined(NL_OS_WINDOWS) + +std::string winWideToCp(const wchar_t *str, size_t len, UINT cp) +{ + if (!len) + len = wcslen(str); + if (!len) + return std::string(); + + // Convert from wide to codepage + char *tmp = (char *)_malloca((len + 1) * 4); + if (!tmp) + return std::string(); + int tmpLen = WideCharToMultiByte(cp, 0, + str, (int)(len + 1), + tmp, (int)((len + 1) * 4), + NULL, NULL); + if (tmpLen <= 1) + { + _freea(tmp); + return std::string(); + } + + std::string res = tmp; + _freea(tmp); + return res; +} + +std::string winCpToCp(const char *str, size_t len, UINT srcCp, UINT dstCp) +{ + if (!len) + len = strlen(str); + if (!len) + return std::string(); + + // First convert from codepage to wide + wchar_t *tmp = (wchar_t *)_malloca((len + 1) * 4); + if (!tmp) + return std::string(); + int tmpLen = MultiByteToWideChar(srcCp, MB_PRECOMPOSED, + str, (int)(len + 1), /* include null-termination */ + tmp, (int)((len + 1) * 4)); + if (tmpLen <= 1) + { + _freea(tmp); + return std::string(); + } + + // Then convert from wide to codepage + std::string res = winWideToCp(tmp, (size_t)tmpLen - 1, dstCp); /* tmpLen includes null-term */ + _freea(tmp); + return res; +} + +std::wstring winCpToWide(const char *str, size_t len, UINT cp) +{ + if (!len) + len = strlen(str); + if (!len) + return std::wstring(); + + // Convert from codepage to wide + wchar_t *tmp = (wchar_t *)_malloca((len + 1) * 4); + if (!tmp) + return std::wstring(); + int tmpLen = MultiByteToWideChar(cp, MB_PRECOMPOSED, + str, (int)(len + 1), /* include null-termination */ + tmp, (int)((len + 1) * 4)); + if (tmpLen <= 1) + { + _freea(tmp); + return std::wstring(); + } + + std::wstring res = tmp; + _freea(tmp); + return res; +} + +#endif + +// Convert local codepage to UTF-8 +// On Windows, the local codepage is undetermined +// On Linux, the local codepage is always UTF-8 (no-op) +std::string mbcsToUtf8(const char *str, size_t len) +{ +#if defined(NL_OS_WINDOWS) + UINT codePage = GetACP(); + // Windows 10 allows setting the local codepage to UTF-8 + if (codePage == CP_UTF8) /* 65001 */ + return str; + return winCpToCp(str, len, CP_ACP, CP_UTF8); +#else + return str; /* no-op */ +#endif +} + +std::string mbcsToUtf8(const std::string &str) +{ +#if defined(NL_OS_WINDOWS) + if (str.empty()) + return str; + UINT codePage = GetACP(); + // Windows 10 allows setting the local codepage to UTF-8 + if (codePage == CP_UTF8) /* 65001 */ + return str; + return winCpToCp(str.c_str(), str.size(), CP_ACP, CP_UTF8); +#else + return str; /* no-op */ +#endif +} + +// Convert wide codepage to UTF-8 +// On Windows, the wide codepage is UTF-16 +// On Linux, the wide codepage is UTF-32 +std::string wideToUtf8(const wchar_t *str, size_t len) +{ +#if defined(NL_OS_WINDOWS) + return winWideToCp(str, len, CP_UTF8); +#else + // TODO: UTF-32 to UTF-8 + nlassert(false); +#endif +} + +std::string wideToUtf8(const std::wstring &str) +{ + return wideToUtf8(str.c_str(), str.size()); +} + +// Convert UTF-8 to wide character set +std::wstring utf8ToWide(const char *str, size_t len) +{ +#if defined(NL_OS_WINDOWS) + return winCpToWide(str, len, CP_UTF8); +#else + // TODO: UTF-32 to UTF-8 + nlassert(false); +#endif +} + +std::wstring utf8ToWide(const std::string &str) +{ + return utf8ToWide(str.c_str(), str.size()); +} + +// Convert UTF-8 to local multibyte character set +std::string utf8ToMbcs(const char *str, size_t len) +{ +#if defined(NL_OS_WINDOWS) + UINT codePage = GetACP(); + // Windows 10 allows setting the local codepage to UTF-8 + if (codePage == CP_UTF8) /* 65001 */ + return str; + return winCpToCp(str, len, CP_UTF8, CP_ACP); +#else + return str; /* no-op */ +#endif +} + +std::string utf8ToMbcs(const std::string &str) +{ +#if defined(NL_OS_WINDOWS) + if (str.empty()) + return str; + UINT codePage = GetACP(); + // Windows 10 allows setting the local codepage to UTF-8 + if (codePage == CP_UTF8) /* 65001 */ + return str; + return winCpToCp(str.c_str(), str.size(), CP_UTF8, CP_ACP); +#else + return str; /* no-op */ +#endif +} + +// Convert wide to local multibyte character set +std::string wideToMbcs(const wchar_t *str, size_t len) +{ +#if defined(NL_OS_WINDOWS) + return winWideToCp(str, len, CP_ACP); +#else + return wideToUTf8(str, len); +#endif +} + +std::string wideToMbcs(const std::wstring &str) +{ +#if defined(NL_OS_WINDOWS) + return winWideToCp(str.c_str(), str.size(), CP_ACP); +#else + return wideToUTf8(str); +#endif +} + +// Convert local multibyte to wide character set +std::wstring mbcsToWide(const char *str, size_t len) +{ +#if defined(NL_OS_WINDOWS) + return winCpToWide(str, len, CP_ACP); +#else + return utf8ToWide(str, len); +#endif +} + +std::wstring mbcsToWide(const std::string &str) +{ +#if defined(NL_OS_WINDOWS) + return winCpToWide(str.c_str(), str.size(), CP_ACP); +#else + return utf8ToWide(str); +#endif +} + }