UTF string view base, ryzom/ryzomcore#335

develop
kaetemi 4 years ago
parent a3780e307e
commit 973531f461

@ -552,6 +552,14 @@ typedef wchar_t ucchar;
typedef uint16 ucchar;
#endif
#ifdef NL_CPP14
typedef char32_t u32char;
typedef std::u32string u32string;
#else
typedef uint32 u32char;
typedef std::basic_string<uint32> u32string;
#endif
#ifndef NL_OVERRIDE
#define NL_OVERRIDE override
#endif

@ -0,0 +1,95 @@
// NeL - MMORPG Framework <https://wiki.ryzom.dev/>
// Copyright (C) 2020 Jan BOON (Kaetemi) <jan.boon@kaetemi.be>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef NLMISC_UTF_STRING_VIEW_H
#define NLMISC_UTF_STRING_VIEW_H
#include <nel/misc/types_nl.h>
#include <nel/misc/ucstring.h>
#include <nel/misc/utf32_string.h>
#include <string>
namespace NLMISC {
/// String view for UTF-8 and UTF-32 iteration as 32-bit codepoints.
/// This string view keeps the string as a reference, it does not make a copy.
/// Only use this for iterating a string's codepoints.
/// Strings are not necessarily NUL-terminated.
class CUtfStringView
{
public:
inline CUtfStringView(const char *utf8Str) : m_Str(utf8Str), m_Size(strlen(utf8Str)), m_Iterator(utf8Iterator) {}
inline CUtfStringView(const char *utf8Str, size_t len): m_Str(utf8Str), m_Size(len), m_Iterator(utf8Iterator)
{
nlassert(len <= strlen(utf8Str));
}
inline CUtfStringView(const std::string &utf8Str) : m_Str(utf8Str.c_str()), m_Size(utf8Str.size()), m_Iterator(utf8Iterator) {}
inline CUtfStringView(const ucstring &utf16Str) : m_Str(utf16Str.c_str()), m_Size(utf16Str.size() << 1), m_Iterator(utf16Iterator) {}
inline CUtfStringView(const u32string &utf32Str) : m_Str(utf32Str.c_str()), m_Size(utf32Str.size() << 2), m_Iterator(utf32Iterator) {}
std::string toUtf8(); // Makes a copy
u32string toUtf32(); // Makes a copy
inline bool isUtf8() { return m_Iterator == utf8Iterator; }
inline bool isUtf16() { return m_Iterator == utf16Iterator; }
inline bool isUtf32() { return m_Iterator == utf32Iterator; }
struct const_iterator
{
public:
inline void operator++()
{
m_Char = m_View.m_Iterator(&m_Addr);
if ((ptrdiff_t)m_Addr > ((ptrdiff_t)m_View.m_Str + m_View.m_Size))
{
m_Addr = 0;
m_Char = 0;
}
}
inline bool operator!=(const const_iterator &o) const { return m_Addr != o.m_Addr; }
inline bool operator==(const const_iterator &o) const { return m_Addr == o.m_Addr; }
inline const u32char &operator*() const { return m_Char; }
private:
friend class CUtfStringView;
inline const_iterator(const CUtfStringView &view, const void *addr) : m_View(view), m_Addr(addr), m_Char(addr ? view.m_Iterator(&m_Addr) : 0) { }
const CUtfStringView &m_View;
const void *m_Addr; // Next address
u32char m_Char;
};
typedef const_iterator iterator;
iterator begin() const { return iterator(*this, m_Str); }
inline iterator end() const { return iterator(*this, NULL); }
private:
typedef u32char (*TIterator)(const void **addr);
static u32char utf8Iterator(const void **addr);
static u32char utf16Iterator(const void **addr);
static u32char utf32Iterator(const void **addr);
const void *const m_Str;
const size_t m_Size;
const TIterator m_Iterator;
}; /* class CUtfStringView */
} /* namespace NLMISC */
#endif /* #ifndef NLMISC_STREAMED_PACKAGE_PROVIDER_H */
/* end of file */

@ -0,0 +1,76 @@
// NeL - MMORPG Framework <https://wiki.ryzom.dev/>
// Copyright (C) 2020 Jan BOON (Kaetemi) <jan.boon@kaetemi.be>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "stdmisc.h"
// Project includes
#include <nel/misc/utf_string_view.h>
namespace NLMISC
{
std::string CUtfStringView::toUtf8()
{
if (m_Iterator == utf8Iterator)
return std::string((const char *)m_Str, (const char *)((ptrdiff_t)m_Str + m_Size));
std::string res;
res.reserve((m_Size << 1) + 1);
for (iterator it(begin()), end(end()); it != end; ++it)
{
u32char c = *it;
res += (char)c; /* TODO: Encode UTF-8 */
}
}
u32string CUtfStringView::toUtf32()
{
if (m_Iterator == utf32Iterator)
return u32string((const u32char *)m_Str, (const u32char *)((ptrdiff_t)m_Str + m_Size));
u32string res;
res.reserve(m_Size + 1);
for (iterator it(begin()), end(end()); it != end; ++it)
res += *it;
}
u32char CUtfStringView::utf8Iterator(const void **addr)
{
/* TODO: Decode UTF-8 */
const ucchar **pp = reinterpret_cast<const ucchar **>(addr);
ucchar c = **pp;
++(*pp);
return c;
}
u32char CUtfStringView::utf16Iterator(const void **addr)
{
/* TODO: Decode UTF-16 */
const ucchar **pp = reinterpret_cast<const ucchar **>(addr);
ucchar c = **pp;
++(*pp);
return c;
}
u32char CUtfStringView::utf32Iterator(const void **addr)
{
const u32char **pp = reinterpret_cast<const u32char **>(addr);
u32char c = **pp;
++(*pp);
return c;
}
} /* namespace NLMISC */
/* end of file */
Loading…
Cancel
Save