Merge branch 'develop' into ryzomclassic-develop

ryzomclassic-develop
kaetemi 4 years ago
commit 625d269c11

@ -257,6 +257,8 @@ int compareCaseInsensitive(const char *a, const char *b);
int compareCaseInsensitive(const char *a, size_t lenA, const char *b, size_t lenB); int compareCaseInsensitive(const char *a, size_t lenA, const char *b, size_t lenB);
inline int compareCaseInsensitive(const std::string &a, const std::string &b) { return compareCaseInsensitive(&a[0], a.size(), &b[0], b.size()); } inline int compareCaseInsensitive(const std::string &a, const std::string &b) { return compareCaseInsensitive(&a[0], a.size(), &b[0], b.size()); }
inline bool ltCaseInsensitive(const std::string &a, const std::string &b) { return compareCaseInsensitive(&a[0], a.size(), &b[0], b.size()) < 0; } inline bool ltCaseInsensitive(const std::string &a, const std::string &b) { return compareCaseInsensitive(&a[0], a.size(), &b[0], b.size()) < 0; }
std::string toCaseInsensitive(const char *str); // UTF-8, case-insensitive toLower
std::string toCaseInsensitive(const std::string &str); // UTF-8, case-insensitive toLower
/** ASCII to lowercase. Useful for internal identifiers. /** ASCII to lowercase. Useful for internal identifiers.
* Characters outside of the 7-bit ASCII space, and control characters, are replaced. * Characters outside of the 7-bit ASCII space, and control characters, are replaced.

@ -2740,4 +2740,114 @@ int compareCaseInsensitive(const char *a, size_t lenA, const char *b, size_t len
// *************************************************************************** // ***************************************************************************
NL_FORCE_INLINE void appendToCaseInsensitiveAsUtf8(std::string &res, const char *str, ptrdiff_t &i)
{
unsigned char c = str[i];
unsigned char d, e, f;
if (c < 0x80)
{
if (c >= 'A' && c <= 'Z')
{
// 1-byte UTF-8
c += 'a' - 'A';
}
}
else if (c < 0xC0)
{
// non-starting byte
}
else if (c < 0xE0)
{
// 2-byte UTF-8
if (((d = str[i + 1]) & 0xC0) == 0x80)
{
const char *table = s_StringToCaseInsensitiveMap[c & 0x1F];
if (table)
{
unsigned char idx = (d & 0x3F) << 2;
if (table[idx])
{
res += &table[idx];
i += 2;
return;
}
}
}
}
else if (c < 0xF0)
{
// 3-byte UTF-8
if (((d = str[i + 1]) & 0xC0) == 0x80 && ((e = str[i + 2]) & 0xC0) == 0x80)
{
const char **map = s_StringToCaseInsensitiveMapMap[c & 0x0F];
if (map)
{
const char *table = map[d & 0x3F];
if (table)
{
unsigned char idx = (e & 0x3F) << 2;
if (table[idx])
{
res += &table[idx];
i += 3;
return;
}
}
}
}
}
else if (c < 0xF8)
{
// 4-byte UTF-8
if (((d = str[i + 1]) & 0xC0) == 0x80 && ((e = str[i + 2]) & 0xC0) == 0x80 && ((f = str[i + 3]) & 0xC0) == 0x80)
{
const char ***mapMap = s_StringToCaseInsensitiveMapMapMap[c & 0x07];
if (mapMap)
{
const char **map = mapMap[d & 0x3F];
if (map)
{
const char *table = map[e & 0x3F];
if (table)
{
unsigned char idx = (f & 0x3F) << 2;
if (table[idx])
{
res += &table[idx];
i += 4;
return;
}
}
}
}
}
}
res += c;
++i;
}
// ***************************************************************************
std::string toCaseInsensitive(const char *str)
{
// UTF-8 toCaseInsensitive
std::string res;
for (ptrdiff_t i = 0; str[i];)
appendToCaseInsensitiveAsUtf8(res, str, i);
return res;
}
// ***************************************************************************
std::string toCaseInsensitive(const std::string &str)
{
// UTF-8 toCaseInsensitive
std::string res;
res.reserve(str.size() + (str.size() >> 2));
const char *cstr = &str[0];
for (ptrdiff_t i = 0; i < (ptrdiff_t)str.size();)
appendToCaseInsensitiveAsUtf8(res, cstr, i);
return res;
}
} // NLMISC } // NLMISC

@ -2530,7 +2530,7 @@ NL_FORCE_INLINE void appendToLowerAsUtf8(std::string &res, const char *str, ptrd
std::string toLower(const char *str) std::string toLower(const char *str)
{ {
// UTF-8 toLower, tables generated from UTF-16 tables // UTF-8 toLower
std::string res; std::string res;
for (ptrdiff_t i = 0; str[i];) for (ptrdiff_t i = 0; str[i];)
appendToLowerAsUtf8(res, str, i); appendToLowerAsUtf8(res, str, i);
@ -2541,7 +2541,7 @@ std::string toLower(const char *str)
std::string toLower(const std::string &str) std::string toLower(const std::string &str)
{ {
// UTF-8 toLower, tables generated from UTF-16 tables // UTF-8 toLower
std::string res; std::string res;
res.reserve(str.size() + (str.size() >> 2)); res.reserve(str.size() + (str.size() >> 2));
const char *cstr = &str[0]; const char *cstr = &str[0];

@ -2624,7 +2624,7 @@ static const char ***s_StringToUpperMapMapMap[8] = {
std::string toUpper(const char *str) std::string toUpper(const char *str)
{ {
// UTF-8 toLower, tables generated from UTF-16 tables // UTF-8 toLower
std::string res; std::string res;
for (ptrdiff_t i = 0; str[i];) for (ptrdiff_t i = 0; str[i];)
appendToUpperAsUtf8(res, str, i); appendToUpperAsUtf8(res, str, i);
@ -2635,7 +2635,7 @@ std::string toUpper(const char *str)
std::string toUpper(const std::string &str) std::string toUpper(const std::string &str)
{ {
// UTF-8 toLower, tables generated from UTF-16 tables // UTF-8 toLower
std::string res; std::string res;
res.reserve(str.size() + (str.size() >> 2)); res.reserve(str.size() + (str.size() >> 2));
const char *cstr = &str[0]; const char *cstr = &str[0];

@ -17,6 +17,7 @@
#include <nel/misc/types_nl.h> #include <nel/misc/types_nl.h>
#include <nel/misc/ucstring.h> #include <nel/misc/ucstring.h>
#include <nel/misc/utf_string_view.h>
#include <nel/misc/common.h> #include <nel/misc/common.h>
#include <nel/misc/sstring.h> #include <nel/misc/sstring.h>
#include <nel/misc/i18n.h> #include <nel/misc/i18n.h>
@ -76,21 +77,10 @@ int main(int argc, char *argv[])
ucstring str; ucstring str;
CI18N::readTextFile(inputFile, str, false, false); CI18N::readTextFile(inputFile, str, false, false);
if (outMode == ASCII)
{
// remove any outof ascii char
ucstring temp;
for (uint i=0; i<str.size(); ++i)
{
if (str[i] < 256)
temp += str[i];
}
str = temp;
}
if (xmlSupport) if (xmlSupport)
{ {
ucstring temp; ucstring temp;
temp.reserve(str.size());
for (uint i=0; i<str.size(); ++i) for (uint i=0; i<str.size(); ++i)
{ {
switch(str[i]) switch(str[i])
@ -121,9 +111,16 @@ int main(int argc, char *argv[])
break; break;
case ASCII: case ASCII:
{ {
string s = str.toString(); std::string res;
res.reserve(str.size());
for (ucstring::const_iterator it(str.begin()), end(str.end()); it != end; ++it)
{
ucchar c = *it;
if (c < 0x80)
res += (char)c;
}
FILE *fp = nlfopen(outputFile, "wt"); FILE *fp = nlfopen(outputFile, "wt");
fwrite(s.data(), s.size(), 1, fp); fwrite(res.data(), res.size(), 1, fp);
fclose(fp); fclose(fp);
} }
break; break;

Loading…
Cancel
Save