diff --git a/nel/include/nel/misc/common.h b/nel/include/nel/misc/common.h index 637c3c800..d9edc8e14 100644 --- a/nel/include/nel/misc/common.h +++ b/nel/include/nel/misc/common.h @@ -257,6 +257,8 @@ int compareCaseInsensitive(const char *a, const char *b); int compareCaseInsensitive(const char *a, size_t lenA, const char *b, size_t lenB); inline int compareCaseInsensitive(const std::string &a, const std::string &b) { return compareCaseInsensitive(&a[0], a.size(), &b[0], b.size()); } inline bool ltCaseInsensitive(const std::string &a, const std::string &b) { return compareCaseInsensitive(&a[0], a.size(), &b[0], b.size()) < 0; } +std::string toCaseInsensitive(const char *str); // UTF-8, case-insensitive toLower +std::string toCaseInsensitive(const std::string &str); // UTF-8, case-insensitive toLower /** ASCII to lowercase. Useful for internal identifiers. * Characters outside of the 7-bit ASCII space, and control characters, are replaced. diff --git a/nel/src/misc/string_to_ci.cpp b/nel/src/misc/string_to_ci.cpp index bdde3df23..1aeb36da2 100644 --- a/nel/src/misc/string_to_ci.cpp +++ b/nel/src/misc/string_to_ci.cpp @@ -2740,4 +2740,114 @@ int compareCaseInsensitive(const char *a, size_t lenA, const char *b, size_t len // *************************************************************************** +NL_FORCE_INLINE void appendToCaseInsensitiveAsUtf8(std::string &res, const char *str, ptrdiff_t &i) +{ + unsigned char c = str[i]; + unsigned char d, e, f; + if (c < 0x80) + { + if (c >= 'A' && c <= 'Z') + { + // 1-byte UTF-8 + c += 'a' - 'A'; + } + } + else if (c < 0xC0) + { + // non-starting byte + } + else if (c < 0xE0) + { + // 2-byte UTF-8 + if (((d = str[i + 1]) & 0xC0) == 0x80) + { + const char *table = s_StringToCaseInsensitiveMap[c & 0x1F]; + if (table) + { + unsigned char idx = (d & 0x3F) << 2; + if (table[idx]) + { + res += &table[idx]; + i += 2; + return; + } + } + } + } + else if (c < 0xF0) + { + // 3-byte UTF-8 + if (((d = str[i + 1]) & 0xC0) == 0x80 && ((e = str[i + 2]) & 0xC0) == 0x80) + { + const char **map = s_StringToCaseInsensitiveMapMap[c & 0x0F]; + if (map) + { + const char *table = map[d & 0x3F]; + if (table) + { + unsigned char idx = (e & 0x3F) << 2; + if (table[idx]) + { + res += &table[idx]; + i += 3; + return; + } + } + } + } + } + else if (c < 0xF8) + { + // 4-byte UTF-8 + if (((d = str[i + 1]) & 0xC0) == 0x80 && ((e = str[i + 2]) & 0xC0) == 0x80 && ((f = str[i + 3]) & 0xC0) == 0x80) + { + const char ***mapMap = s_StringToCaseInsensitiveMapMapMap[c & 0x07]; + if (mapMap) + { + const char **map = mapMap[d & 0x3F]; + if (map) + { + const char *table = map[e & 0x3F]; + if (table) + { + unsigned char idx = (f & 0x3F) << 2; + if (table[idx]) + { + res += &table[idx]; + i += 4; + return; + } + } + } + } + } + } + res += c; + ++i; +} + +// *************************************************************************** + +std::string toCaseInsensitive(const char *str) +{ + // UTF-8 toCaseInsensitive + std::string res; + for (ptrdiff_t i = 0; str[i];) + appendToCaseInsensitiveAsUtf8(res, str, i); + return res; +} + +// *************************************************************************** + +std::string toCaseInsensitive(const std::string &str) +{ + // UTF-8 toCaseInsensitive + std::string res; + res.reserve(str.size() + (str.size() >> 2)); + const char *cstr = &str[0]; + for (ptrdiff_t i = 0; i < (ptrdiff_t)str.size();) + appendToCaseInsensitiveAsUtf8(res, cstr, i); + return res; +} + } // NLMISC diff --git a/nel/src/misc/string_to_lower.cpp b/nel/src/misc/string_to_lower.cpp index cd40abe88..d20dd015f 100644 --- a/nel/src/misc/string_to_lower.cpp +++ b/nel/src/misc/string_to_lower.cpp @@ -2530,7 +2530,7 @@ NL_FORCE_INLINE void appendToLowerAsUtf8(std::string &res, const char *str, ptrd std::string toLower(const char *str) { - // UTF-8 toLower, tables generated from UTF-16 tables + // UTF-8 toLower std::string res; for (ptrdiff_t i = 0; str[i];) appendToLowerAsUtf8(res, str, i); @@ -2541,7 +2541,7 @@ std::string toLower(const char *str) std::string toLower(const std::string &str) { - // UTF-8 toLower, tables generated from UTF-16 tables + // UTF-8 toLower std::string res; res.reserve(str.size() + (str.size() >> 2)); const char *cstr = &str[0]; diff --git a/nel/src/misc/string_to_upper.cpp b/nel/src/misc/string_to_upper.cpp index eb037969c..eefd0c6c6 100644 --- a/nel/src/misc/string_to_upper.cpp +++ b/nel/src/misc/string_to_upper.cpp @@ -2624,7 +2624,7 @@ static const char ***s_StringToUpperMapMapMap[8] = { std::string toUpper(const char *str) { - // UTF-8 toLower, tables generated from UTF-16 tables + // UTF-8 toLower std::string res; for (ptrdiff_t i = 0; str[i];) appendToUpperAsUtf8(res, str, i); @@ -2635,7 +2635,7 @@ std::string toUpper(const char *str) std::string toUpper(const std::string &str) { - // UTF-8 toLower, tables generated from UTF-16 tables + // UTF-8 toLower std::string res; res.reserve(str.size() + (str.size() >> 2)); const char *cstr = &str[0]; diff --git a/ryzom/tools/leveldesign/uni_conv/uni_conv.cpp b/ryzom/tools/leveldesign/uni_conv/uni_conv.cpp index d6afe9a57..554fc0f3d 100644 --- a/ryzom/tools/leveldesign/uni_conv/uni_conv.cpp +++ b/ryzom/tools/leveldesign/uni_conv/uni_conv.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -76,21 +77,10 @@ int main(int argc, char *argv[]) ucstring str; CI18N::readTextFile(inputFile, str, false, false); - if (outMode == ASCII) - { - // remove any outof ascii char - ucstring temp; - for (uint i=0; i