diff --git a/nel/src/misc/utf_string_view.cpp b/nel/src/misc/utf_string_view.cpp index 222265e91..17dbaea58 100644 --- a/nel/src/misc/utf_string_view.cpp +++ b/nel/src/misc/utf_string_view.cpp @@ -49,7 +49,7 @@ NL_FORCE_INLINE void appendUtf8(std::string &str, u32char c) str += (char)((c & 0x0FC0) >> 6) | 0x80; str += (char)(c & 0x3F) | 0x80; } - else + else if (c < 0x110000) { // Encode as 4 bytes str += (char)((c & 0x1C0000) >> 18) | 0xF0; @@ -57,6 +57,11 @@ NL_FORCE_INLINE void appendUtf8(std::string &str, u32char c) str += (char)((c & 0x0FC0) >> 6) | 0x80; str += (char)(c & 0x3F) | 0x80; } + else + { + // Replacement character � + str += "\xEF\xB\xBD"; + } } void CUtfStringView::append(std::string &str, u32char c) @@ -175,6 +180,7 @@ u32char CUtfStringView::utf8Iterator(const void **addr) { // Decode UTF-8 // This implementation makes no attempt at fixing bad encoding, except for bad UTF-16 surrogate pairs + // Invalid characters are replaced with the replacement character const uint8 **pp = reinterpret_cast(addr); u32char c0 = **pp; ++(*pp); @@ -209,7 +215,12 @@ u32char CUtfStringView::utf8Iterator(const void **addr) ++(*pp); c0 &= 0xFFFF; // Drop first bit c0 <<= 6; - c0 |= (cx & 0x3F); // 22 bits now (17 - 1 + 6), 3-byte encoding + c0 |= (cx & 0x3F); // 22 bits now (17 - 1 + 6), 4-byte encoding + if (c0 >= 0x110000) + { + // Replacement character � + return 0xFFFD; + } } else {