Move ucstring utf8 implementation to header

develop
kaetemi 4 years ago
parent bbf4a0ac8a
commit 3dddc5ab40

@ -149,43 +149,7 @@ public:
}
/// Convert this ucstring (16bits char) into a utf8 string
std::string toUtf8() const
{
std::string res;
ucstring::const_iterator first(begin()), last(end());
for (; first != last; ++first)
{
//ucchar c = *first;
uint nbLoop = 0;
if (*first < 0x80)
res += char(*first);
else if (*first < 0x800)
{
ucchar c = *first;
c = c >> 6;
c = c & 0x1F;
res += char(c) | 0xC0;
nbLoop = 1;
}
else /*if (*first < 0x10000)*/
{
ucchar c = *first;
c = c >> 12;
c = c & 0x0F;
res += char(c) | 0xE0;
nbLoop = 2;
}
for (uint i=0; i<nbLoop; ++i)
{
ucchar c = *first;
c = c >> ((nbLoop - i - 1) * 6);
c = c & 0x3F;
res += char(c) | 0x80;
}
}
return res;
}
std::string toUtf8() const;
ucstring substr(size_type pos = 0, size_type n = npos) const
{
@ -199,86 +163,7 @@ public:
}
/// Convert the utf8 string into this ucstring (16 bits char)
void fromUtf8(const std::string &stringUtf8)
{
// clear the string
erase();
uint8 c;
ucchar code;
sint iterations = 0;
std::string::const_iterator first(stringUtf8.begin()), last(stringUtf8.end());
for (; first != last; )
{
c = *first++;
code = c;
if ((code & 0xFE) == 0xFC)
{
code &= 0x01;
iterations = 5;
}
else if ((code & 0xFC) == 0xF8)
{
code &= 0x03;
iterations = 4;
}
else if ((code & 0xF8) == 0xF0)
{
code &= 0x07;
iterations = 3;
}
else if ((code & 0xF0) == 0xE0)
{
code &= 0x0F;
iterations = 2;
}
else if ((code & 0xE0) == 0xC0)
{
code &= 0x1F;
iterations = 1;
}
else if ((code & 0x80) == 0x80)
{
// If it's not a valid UTF8 string, just copy the line without utf8 conversion
rawCopy(stringUtf8);
return;
}
else
{
push_back(code);
iterations = 0;
}
if (iterations)
{
for (sint i = 0; i < iterations; i++)
{
if (first == last)
{
// If it's not a valid UTF8 string, just copy the line without utf8 conversion
rawCopy(stringUtf8);
return;
}
uint8 ch;
ch = *first ++;
if ((ch & 0xC0) != 0x80)
{
// If it's not a valid UTF8 string, just copy the line without utf8 conversion
rawCopy(stringUtf8);
return;
}
code <<= 6;
code |= (ucchar)(ch & 0x3F);
}
push_back(code);
}
}
}
void fromUtf8(const std::string &stringUtf8);
static ucstring makeFromUtf8(const std::string &stringUtf8)
{
@ -289,19 +174,8 @@ public:
}
private:
void rawCopy(const std::string &str);
void rawCopy(const std::string &str)
{
// We need to convert the char into 8bits unsigned int before promotion to 16 bits
// otherwise, as char are signed on some compiler (MSCV for ex), the sign bit is extended to 16 bits.
resize(str.size());
std::string::const_iterator first(str.begin()), last(str.end());
iterator dest(begin());
for (;first != last; ++first, ++dest)
{
*dest = uint8(*first);
}
}
};
inline ucstring operator+(const ucstringbase &ucstr, ucchar c)

@ -0,0 +1,152 @@
// NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
// Copyright (C) 2010 Winch Gate Property Limited
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "stdmisc.h"
#include "nel/misc/ucstring.h"
std::string ucstring::toUtf8() const
{
std::string res;
ucstring::const_iterator first(begin()), last(end());
for (; first != last; ++first)
{
//ucchar c = *first;
uint nbLoop = 0;
if (*first < 0x80)
res += char(*first);
else if (*first < 0x800)
{
ucchar c = *first;
c = c >> 6;
c = c & 0x1F;
res += char(c) | 0xC0;
nbLoop = 1;
}
else /*if (*first < 0x10000)*/
{
ucchar c = *first;
c = c >> 12;
c = c & 0x0F;
res += char(c) | 0xE0;
nbLoop = 2;
}
for (uint i=0; i<nbLoop; ++i)
{
ucchar c = *first;
c = c >> ((nbLoop - i - 1) * 6);
c = c & 0x3F;
res += char(c) | 0x80;
}
}
return res;
}
void ucstring::fromUtf8(const std::string &stringUtf8)
{
// clear the string
erase();
uint8 c;
ucchar code;
sint iterations = 0;
std::string::const_iterator first(stringUtf8.begin()), last(stringUtf8.end());
for (; first != last; )
{
c = *first++;
code = c;
if ((code & 0xFE) == 0xFC)
{
code &= 0x01;
iterations = 5;
}
else if ((code & 0xFC) == 0xF8)
{
code &= 0x03;
iterations = 4;
}
else if ((code & 0xF8) == 0xF0)
{
code &= 0x07;
iterations = 3;
}
else if ((code & 0xF0) == 0xE0)
{
code &= 0x0F;
iterations = 2;
}
else if ((code & 0xE0) == 0xC0)
{
code &= 0x1F;
iterations = 1;
}
else if ((code & 0x80) == 0x80)
{
// If it's not a valid UTF8 string, just copy the line without utf8 conversion
rawCopy(stringUtf8);
return;
}
else
{
push_back(code);
iterations = 0;
}
if (iterations)
{
for (sint i = 0; i < iterations; i++)
{
if (first == last)
{
// If it's not a valid UTF8 string, just copy the line without utf8 conversion
rawCopy(stringUtf8);
return;
}
uint8 ch;
ch = *first ++;
if ((ch & 0xC0) != 0x80)
{
// If it's not a valid UTF8 string, just copy the line without utf8 conversion
rawCopy(stringUtf8);
return;
}
code <<= 6;
code |= (ucchar)(ch & 0x3F);
}
push_back(code);
}
}
}
void ucstring::rawCopy(const std::string &str)
{
// We need to convert the char into 8bits unsigned int before promotion to 16 bits
// otherwise, as char are signed on some compiler (MSCV for ex), the sign bit is extended to 16 bits.
resize(str.size());
std::string::const_iterator first(str.begin()), last(str.end());
iterator dest(begin());
for (;first != last; ++first, ++dest)
{
*dest = uint8(*first);
}
}
/* end of file */
Loading…
Cancel
Save