Reimplement setCase in purely UTF-8, kaetemi/ryzomclassic#172

develop
kaetemi 4 years ago
parent c061f3df3a
commit 13f99b393f

@ -240,6 +240,16 @@ std::string toUpper ( const char *str ); // UTF-8
std::string toUpper ( const std::string &str); // UTF-8
void toUpper ( char *str); // Ascii only
/** Convert a single character in UTF-8 to upper or lowercase.
* \param res Character is appended in UTF-8 into this string.
* \param src Character is sourced from this UTF-8 string.
* \param i Index in `str`, incremented by the number of bytes read.
*/
void appendToLower(std::string &res, const char *str, ptrdiff_t &i);
void appendToLower(std::string &res, const std::string &str, ptrdiff_t &i);
void appendToUpper(std::string &res, const char *str, ptrdiff_t &i);
void appendToUpper(std::string &res, const std::string &str, ptrdiff_t &i);
/**
* Convert to an hexadecimal std::string

@ -28,19 +28,19 @@
namespace NLGUI
{
inline bool isSeparator (u32char c)
inline bool isSeparator (char c)
{
return (c == (u32char)' ') || (c == (u32char)'\t') || (c == (u32char)'\n') || (c == (u32char)'\r');
return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r');
}
inline bool isEndSentence (u32char c, u32char lastChar)
inline bool isEndSentence (char c, char lastChar)
{
// Ex: One sentence. Another sentence.
// ^
// Counterexample: nevrax.com
// ^
return ((c == (u32char)' ') || (c == (u32char)'\n'))
&& (lastChar == (u32char)'.') || (lastChar == (u32char)'!') || (lastChar == (u32char)'?');
return ((c == ' ') || (c == '\n'))
&& (lastChar == '.') || (lastChar == '!') || (lastChar == '?');
}
void setCase(std::string &str, TCaseMode mode)
@ -60,85 +60,83 @@ namespace NLGUI
break;
case CaseFirstStringLetterUp:
{
NLMISC::CUtfStringView sv(str);
std::string res;
res.reserve(sv.largestSize());
for (NLMISC::CUtfStringView::iterator it(sv.begin()), end(sv.end()); it != end; ++it)
res.reserve(str.size() + (str.size() >> 2));
for (ptrdiff_t i = 0; i < (ptrdiff_t)str.size();)
{
u32char c = *it;
if (c < 0x10000)
char c = str[i];
if (!isSeparator(c))
{
if (!isSeparator(c))
{
if (newString)
c = NLMISC::toUpper((ucchar)c);
else
c = NLMISC::toLower((ucchar)c);
newString = false;
}
if (newString)
NLMISC::appendToUpper(res, str, i);
else
NLMISC::appendToLower(res, str, i);
newString = false;
}
else
{
res += c;
++i;
}
NLMISC::CUtfStringView::append(res, c);
}
str = nlmove(res);
str.swap(res);
break;
}
case CaseFirstSentenceLetterUp:
{
NLMISC::CUtfStringView sv(str);
std::string res;
res.reserve(sv.largestSize());
u32char lastChar = 0;
for (NLMISC::CUtfStringView::iterator it(sv.begin()), end(sv.end()); it != end; ++it)
res.reserve(str.size() + (str.size() >> 2));
char lastChar = 0;
for (ptrdiff_t i = 0; i < (ptrdiff_t)str.size();)
{
u32char c = *it;
if (c < 0x10000)
char c = str[i];
if (isEndSentence(c, lastChar))
{
if (isEndSentence(c, lastChar))
newSentence = true;
newSentence = true;
res += c;
++i;
}
else
{
if (newSentence)
NLMISC::appendToUpper(res, str, i);
else
{
if (newSentence)
c = NLMISC::toUpper((ucchar)c);
else
c = NLMISC::toLower((ucchar)c);
NLMISC::appendToLower(res, str, i);
if (!isSeparator(c))
newSentence = false;
}
if (!isSeparator(c))
newSentence = false;
}
NLMISC::CUtfStringView::append(res, c);
lastChar = c;
}
str = nlmove(res);
str.swap(res);
break;
}
case CaseFirstWordLetterUp:
{
NLMISC::CUtfStringView sv(str);
std::string res;
res.reserve(sv.largestSize());
u32char lastChar = 0;
for (NLMISC::CUtfStringView::iterator it(sv.begin()), end(sv.end()); it != end; ++it)
res.reserve(str.size() + (str.size() >> 2));
char lastChar = 0;
for (ptrdiff_t i = 0; i < (ptrdiff_t)str.size();)
{
u32char c = *it;
if (c < 0x10000)
char c = str[i];
if (isSeparator(c) || isEndSentence(c, lastChar))
{
newWord = true;
res += c;
++i;
}
else
{
if (isSeparator(c) || isEndSentence(c, lastChar))
newWord = true;
if (newWord)
NLMISC::appendToUpper(res, str, i);
else
{
if (newWord)
c = NLMISC::toUpper((ucchar)c);
else
c = NLMISC::toLower((ucchar)c);
NLMISC::appendToLower(res, str, i);
newWord = false;
}
newWord = false;
}
NLMISC::CUtfStringView::append(res, c);
lastChar = c;
}
str = nlmove(res);
str.swap(res);
break;
}
default:

@ -4752,7 +4752,7 @@ NL_FORCE_INLINE void appendToLowerAsUtf8(std::string &res, const char *str, ptrd
if (table[idx])
{
res += &table[idx];
++i;
i += 2;
return;
}
}
@ -4770,20 +4770,23 @@ NL_FORCE_INLINE void appendToLowerAsUtf8(std::string &res, const char *str, ptrd
if (table[idx])
{
res += &table[idx];
i += 2;
i += 3;
return;
}
}
}
}
res += c;
++i;
}
// ***************************************************************************
std::string toLower(const char *str)
{
// UTF-8 toLower, tables generated from UTF-16 tables
std::string res;
for (ptrdiff_t i = 0; str[i]; ++i)
for (ptrdiff_t i = 0; str[i];)
appendToLowerAsUtf8(res, str, i);
return res;
}
@ -4796,11 +4799,25 @@ std::string toLower(const std::string &str)
std::string res;
res.reserve(str.size() + (str.size() >> 2));
const char *cstr = &str[0];
for (ptrdiff_t i = 0; i < (ptrdiff_t)str.size(); ++i)
for (ptrdiff_t i = 0; i < (ptrdiff_t)str.size();)
appendToLowerAsUtf8(res, cstr, i);
return res;
}
// ***************************************************************************
void appendToLower(std::string &res, const char *str, ptrdiff_t &i)
{
appendToLowerAsUtf8(res, str, i);
}
// ***************************************************************************
void appendToLower(std::string &res, const std::string &str, ptrdiff_t &i)
{
appendToLowerAsUtf8(res, &str[0], i);
}
// ***************************************************************************
// ***************************************************************************
// ***************************************************************************
@ -4827,7 +4844,7 @@ NL_FORCE_INLINE void appendToUpperAsUtf8(std::string &res, const char *str, ptrd
if (table[idx])
{
res += &table[idx];
++i;
i += 2;
return;
}
}
@ -4845,13 +4862,14 @@ NL_FORCE_INLINE void appendToUpperAsUtf8(std::string &res, const char *str, ptrd
if (table[idx])
{
res += &table[idx];
i += 2;
i += 3;
return;
}
}
}
}
res += c;
++i;
}
// ***************************************************************************
@ -4860,7 +4878,7 @@ std::string toUpper(const char *str)
{
// UTF-8 toLower, tables generated from UTF-16 tables
std::string res;
for (ptrdiff_t i = 0; str[i]; ++i)
for (ptrdiff_t i = 0; str[i];)
appendToUpperAsUtf8(res, str, i);
return res;
}
@ -4873,13 +4891,27 @@ std::string toUpper(const std::string &str)
std::string res;
res.reserve(str.size() + (str.size() >> 2));
const char *cstr = &str[0];
for (ptrdiff_t i = 0; i < (ptrdiff_t)str.size(); ++i)
for (ptrdiff_t i = 0; i < (ptrdiff_t)str.size();)
appendToUpperAsUtf8(res, cstr, i);
return res;
}
// ***************************************************************************
void appendToUpper(std::string &res, const char *str, ptrdiff_t &i)
{
appendToUpperAsUtf8(res, str, i);
}
// ***************************************************************************
void appendToUpper(std::string &res, const std::string &str, ptrdiff_t &i)
{
appendToUpperAsUtf8(res, &str[0], i);
}
// ***************************************************************************
#else
// ***************************************************************************

Loading…
Cancel
Save