From 3dddc5ab4062b89c55cfbf6c35eb456a7fc54ff6 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Sat, 24 Oct 2020 03:57:15 +0800 Subject: [PATCH] Move ucstring utf8 implementation to header --- nel/include/nel/misc/ucstring.h | 132 +-------------------------- nel/src/misc/ucstring.cpp | 152 ++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+), 129 deletions(-) create mode 100644 nel/src/misc/ucstring.cpp diff --git a/nel/include/nel/misc/ucstring.h b/nel/include/nel/misc/ucstring.h index b3dc1294f..8cedb04ec 100644 --- a/nel/include/nel/misc/ucstring.h +++ b/nel/include/nel/misc/ucstring.h @@ -149,43 +149,7 @@ public: } /// Convert this ucstring (16bits char) into a utf8 string - std::string toUtf8() const - { - std::string res; - ucstring::const_iterator first(begin()), last(end()); - for (; first != last; ++first) - { - //ucchar c = *first; - uint nbLoop = 0; - if (*first < 0x80) - res += char(*first); - else if (*first < 0x800) - { - ucchar c = *first; - c = c >> 6; - c = c & 0x1F; - res += char(c) | 0xC0; - nbLoop = 1; - } - else /*if (*first < 0x10000)*/ - { - ucchar c = *first; - c = c >> 12; - c = c & 0x0F; - res += char(c) | 0xE0; - nbLoop = 2; - } - - for (uint i=0; i> ((nbLoop - i - 1) * 6); - c = c & 0x3F; - res += char(c) | 0x80; - } - } - return res; - } + std::string toUtf8() const; ucstring substr(size_type pos = 0, size_type n = npos) const { @@ -199,86 +163,7 @@ public: } /// Convert the utf8 string into this ucstring (16 bits char) - void fromUtf8(const std::string &stringUtf8) - { - // clear the string - erase(); - - uint8 c; - ucchar code; - sint iterations = 0; - - std::string::const_iterator first(stringUtf8.begin()), last(stringUtf8.end()); - for (; first != last; ) - { - c = *first++; - code = c; - - if ((code & 0xFE) == 0xFC) - { - code &= 0x01; - iterations = 5; - } - else if ((code & 0xFC) == 0xF8) - { - code &= 0x03; - iterations = 4; - } - else if ((code & 0xF8) == 0xF0) - { - code &= 0x07; - iterations = 3; - } - else if ((code & 0xF0) == 0xE0) - { - code &= 0x0F; - iterations = 2; - } - else if ((code & 0xE0) == 0xC0) - { - code &= 0x1F; - iterations = 1; - } - else if ((code & 0x80) == 0x80) - { - // If it's not a valid UTF8 string, just copy the line without utf8 conversion - rawCopy(stringUtf8); - return; - } - else - { - push_back(code); - iterations = 0; - } - - if (iterations) - { - for (sint i = 0; i < iterations; i++) - { - if (first == last) - { - // If it's not a valid UTF8 string, just copy the line without utf8 conversion - rawCopy(stringUtf8); - return; - } - - uint8 ch; - ch = *first ++; - - if ((ch & 0xC0) != 0x80) - { - // If it's not a valid UTF8 string, just copy the line without utf8 conversion - rawCopy(stringUtf8); - return; - } - - code <<= 6; - code |= (ucchar)(ch & 0x3F); - } - push_back(code); - } - } - } + void fromUtf8(const std::string &stringUtf8); static ucstring makeFromUtf8(const std::string &stringUtf8) { @@ -289,19 +174,8 @@ public: } private: + void rawCopy(const std::string &str); - void rawCopy(const std::string &str) - { - // We need to convert the char into 8bits unsigned int before promotion to 16 bits - // otherwise, as char are signed on some compiler (MSCV for ex), the sign bit is extended to 16 bits. - resize(str.size()); - std::string::const_iterator first(str.begin()), last(str.end()); - iterator dest(begin()); - for (;first != last; ++first, ++dest) - { - *dest = uint8(*first); - } - } }; inline ucstring operator+(const ucstringbase &ucstr, ucchar c) diff --git a/nel/src/misc/ucstring.cpp b/nel/src/misc/ucstring.cpp new file mode 100644 index 000000000..5d7112114 --- /dev/null +++ b/nel/src/misc/ucstring.cpp @@ -0,0 +1,152 @@ +// NeL - MMORPG Framework +// Copyright (C) 2010 Winch Gate Property Limited +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +#include "stdmisc.h" +#include "nel/misc/ucstring.h" + +std::string ucstring::toUtf8() const +{ + std::string res; + ucstring::const_iterator first(begin()), last(end()); + for (; first != last; ++first) + { + //ucchar c = *first; + uint nbLoop = 0; + if (*first < 0x80) + res += char(*first); + else if (*first < 0x800) + { + ucchar c = *first; + c = c >> 6; + c = c & 0x1F; + res += char(c) | 0xC0; + nbLoop = 1; + } + else /*if (*first < 0x10000)*/ + { + ucchar c = *first; + c = c >> 12; + c = c & 0x0F; + res += char(c) | 0xE0; + nbLoop = 2; + } + + for (uint i=0; i> ((nbLoop - i - 1) * 6); + c = c & 0x3F; + res += char(c) | 0x80; + } + } + return res; +} + +void ucstring::fromUtf8(const std::string &stringUtf8) +{ + // clear the string + erase(); + + uint8 c; + ucchar code; + sint iterations = 0; + + std::string::const_iterator first(stringUtf8.begin()), last(stringUtf8.end()); + for (; first != last; ) + { + c = *first++; + code = c; + + if ((code & 0xFE) == 0xFC) + { + code &= 0x01; + iterations = 5; + } + else if ((code & 0xFC) == 0xF8) + { + code &= 0x03; + iterations = 4; + } + else if ((code & 0xF8) == 0xF0) + { + code &= 0x07; + iterations = 3; + } + else if ((code & 0xF0) == 0xE0) + { + code &= 0x0F; + iterations = 2; + } + else if ((code & 0xE0) == 0xC0) + { + code &= 0x1F; + iterations = 1; + } + else if ((code & 0x80) == 0x80) + { + // If it's not a valid UTF8 string, just copy the line without utf8 conversion + rawCopy(stringUtf8); + return; + } + else + { + push_back(code); + iterations = 0; + } + + if (iterations) + { + for (sint i = 0; i < iterations; i++) + { + if (first == last) + { + // If it's not a valid UTF8 string, just copy the line without utf8 conversion + rawCopy(stringUtf8); + return; + } + + uint8 ch; + ch = *first ++; + + if ((ch & 0xC0) != 0x80) + { + // If it's not a valid UTF8 string, just copy the line without utf8 conversion + rawCopy(stringUtf8); + return; + } + + code <<= 6; + code |= (ucchar)(ch & 0x3F); + } + push_back(code); + } + } +} + +void ucstring::rawCopy(const std::string &str) +{ + // We need to convert the char into 8bits unsigned int before promotion to 16 bits + // otherwise, as char are signed on some compiler (MSCV for ex), the sign bit is extended to 16 bits. + resize(str.size()); + std::string::const_iterator first(str.begin()), last(str.end()); + iterator dest(begin()); + for (;first != last; ++first, ++dest) + { + *dest = uint8(*first); + } +} + +/* end of file */