From 25049eaead0d2cf2cb9e0d902923a7195e09663a Mon Sep 17 00:00:00 2001 From: kaetemi Date: Wed, 28 Oct 2020 01:15:16 +0800 Subject: [PATCH] Complete some missing implementation for wide strings --- nel/include/nel/misc/utf_string_view.h | 15 ++++++++++++ nel/src/misc/string_common.cpp | 9 ++++--- nel/src/misc/utf_string_view.cpp | 33 ++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 5 deletions(-) diff --git a/nel/include/nel/misc/utf_string_view.h b/nel/include/nel/misc/utf_string_view.h index 503fc3737..e972ca7cf 100644 --- a/nel/include/nel/misc/utf_string_view.h +++ b/nel/include/nel/misc/utf_string_view.h @@ -35,6 +35,19 @@ public: { nlassert(len <= strlen(utf8Str)); } +#if defined(NL_OS_WINDOWS) + inline CUtfStringView(const wchar_t *utf16Str) : m_Str(utf16Str), m_Size(wcslen(utf16Str)), m_Iterator(utf16Iterator) {} + inline CUtfStringView(const wchar_t *utf16Str, size_t len): m_Str(utf16Str), m_Size(len), m_Iterator(utf16Iterator) + { + nlassert(len <= wcslen(utf16Str)); + } +#else + inline CUtfStringView(const wchar_t *utf32Str) : m_Str(utf32Str), m_Size(wcslen(utf32Str)), m_Iterator(utf32Iterator) {} + inline CUtfStringView(const wchar_t *utf32Str, size_t len): m_Str(utf32Str), m_Size(len), m_Iterator(utf32Iterator) + { + nlassert(len <= wcslen(utf32Str)); + } +#endif inline CUtfStringView(const std::string &utf8Str) : m_Str(utf8Str.c_str()), m_Size(utf8Str.size()), m_Iterator(utf8Iterator) {} inline CUtfStringView(const ucstring &utf16Str) : m_Str(utf16Str.c_str()), m_Size(utf16Str.size() << 1), m_Iterator(utf16Iterator) {} @@ -44,6 +57,8 @@ public: ucstring toUtf16(bool reEncode = false) const; // Makes a copy u32string toUtf32() const; // Makes a copy + std::wstring toWide() const; // Platform dependent, UTF-16 or UTF-32. Makes a copy. + inline bool isUtf8() const { return m_Iterator == utf8Iterator; } inline bool isUtf16() const { return m_Iterator == utf16Iterator; } inline bool isUtf32() const { return m_Iterator == utf32Iterator; } diff --git a/nel/src/misc/string_common.cpp b/nel/src/misc/string_common.cpp index 93122cc70..f869a5d35 100644 --- a/nel/src/misc/string_common.cpp +++ b/nel/src/misc/string_common.cpp @@ -21,6 +21,7 @@ #include "nel/misc/string_common.h" #include "nel/misc/sstring.h" +#include "nel/misc/utf_string_view.h" using namespace std; @@ -228,8 +229,7 @@ std::string wideToUtf8(const wchar_t *str, size_t len) #if defined(NL_OS_WINDOWS) return winWideToCp(str, len, CP_UTF8); #else - // TODO: UTF-32 to UTF-8 - nlassert(false); + return CUtfStringView(str, len).toUtf8(); #endif } @@ -242,10 +242,9 @@ std::string wideToUtf8(const std::wstring &str) std::wstring utf8ToWide(const char *str, size_t len) { #if defined(NL_OS_WINDOWS) - return winCpToWide(str, len, CP_UTF8); + return winCpToWide(str, len, CP_UTF8); // UTF-16 #else - // TODO: UTF-8 to UTF-32 - nlassert(false); + return CUtfStringView(str, len).toWide(); // UTF-32 #endif } diff --git a/nel/src/misc/utf_string_view.cpp b/nel/src/misc/utf_string_view.cpp index ee5fdf089..db2ea7cec 100644 --- a/nel/src/misc/utf_string_view.cpp +++ b/nel/src/misc/utf_string_view.cpp @@ -106,6 +106,39 @@ u32string CUtfStringView::toUtf32() const return res; } +std::wstring CUtfStringView::toWide() const +{ +#ifdef NL_OS_WINDOWS + if (m_Iterator == utf16Iterator) + return std::wstring((const wchar_t *)m_Str, (const wchar_t *)((ptrdiff_t)m_Str + m_Size)); + std::wstring res; + res.reserve(m_Size << 1); + for (iterator it(begin()), end(end()); it != end; ++it) + { + u32char c = *it; + if (c < 0x10000) + { + res += c; + } + else + { + c -= 0x10000; + res += (c >> 10) | 0xD800; + res += (c & 0x3FF) | 0xDC00; + } + } + return res; +#else + if (m_Iterator == utf32Iterator) + return std::wstring((const wchar_t *)m_Str, (const wchar_t *)((ptrdiff_t)m_Str + m_Size)); + std::wstring res; + res.reserve(m_Size << 2); + for (iterator it(begin()), end(end()); it != end; ++it) + res += *it; + return res; +#endif +} + u32char CUtfStringView::utf8Iterator(const void **addr) { // Decode UTF-8