UTF string view base, ryzom/ryzomcore#335
parent
a3780e307e
commit
973531f461
@ -0,0 +1,95 @@
|
||||
// NeL - MMORPG Framework <https://wiki.ryzom.dev/>
|
||||
// Copyright (C) 2020 Jan BOON (Kaetemi) <jan.boon@kaetemi.be>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef NLMISC_UTF_STRING_VIEW_H
|
||||
#define NLMISC_UTF_STRING_VIEW_H
|
||||
|
||||
#include <nel/misc/types_nl.h>
|
||||
#include <nel/misc/ucstring.h>
|
||||
#include <nel/misc/utf32_string.h>
|
||||
#include <string>
|
||||
|
||||
namespace NLMISC {
|
||||
|
||||
/// String view for UTF-8 and UTF-32 iteration as 32-bit codepoints.
|
||||
/// This string view keeps the string as a reference, it does not make a copy.
|
||||
/// Only use this for iterating a string's codepoints.
|
||||
/// Strings are not necessarily NUL-terminated.
|
||||
class CUtfStringView
|
||||
{
|
||||
public:
|
||||
inline CUtfStringView(const char *utf8Str) : m_Str(utf8Str), m_Size(strlen(utf8Str)), m_Iterator(utf8Iterator) {}
|
||||
inline CUtfStringView(const char *utf8Str, size_t len): m_Str(utf8Str), m_Size(len), m_Iterator(utf8Iterator)
|
||||
{
|
||||
nlassert(len <= strlen(utf8Str));
|
||||
}
|
||||
|
||||
inline CUtfStringView(const std::string &utf8Str) : m_Str(utf8Str.c_str()), m_Size(utf8Str.size()), m_Iterator(utf8Iterator) {}
|
||||
inline CUtfStringView(const ucstring &utf16Str) : m_Str(utf16Str.c_str()), m_Size(utf16Str.size() << 1), m_Iterator(utf16Iterator) {}
|
||||
inline CUtfStringView(const u32string &utf32Str) : m_Str(utf32Str.c_str()), m_Size(utf32Str.size() << 2), m_Iterator(utf32Iterator) {}
|
||||
|
||||
std::string toUtf8(); // Makes a copy
|
||||
u32string toUtf32(); // Makes a copy
|
||||
|
||||
inline bool isUtf8() { return m_Iterator == utf8Iterator; }
|
||||
inline bool isUtf16() { return m_Iterator == utf16Iterator; }
|
||||
inline bool isUtf32() { return m_Iterator == utf32Iterator; }
|
||||
|
||||
struct const_iterator
|
||||
{
|
||||
public:
|
||||
inline void operator++()
|
||||
{
|
||||
m_Char = m_View.m_Iterator(&m_Addr);
|
||||
if ((ptrdiff_t)m_Addr > ((ptrdiff_t)m_View.m_Str + m_View.m_Size))
|
||||
{
|
||||
m_Addr = 0;
|
||||
m_Char = 0;
|
||||
}
|
||||
}
|
||||
inline bool operator!=(const const_iterator &o) const { return m_Addr != o.m_Addr; }
|
||||
inline bool operator==(const const_iterator &o) const { return m_Addr == o.m_Addr; }
|
||||
inline const u32char &operator*() const { return m_Char; }
|
||||
private:
|
||||
friend class CUtfStringView;
|
||||
inline const_iterator(const CUtfStringView &view, const void *addr) : m_View(view), m_Addr(addr), m_Char(addr ? view.m_Iterator(&m_Addr) : 0) { }
|
||||
const CUtfStringView &m_View;
|
||||
const void *m_Addr; // Next address
|
||||
u32char m_Char;
|
||||
};
|
||||
|
||||
typedef const_iterator iterator;
|
||||
|
||||
iterator begin() const { return iterator(*this, m_Str); }
|
||||
inline iterator end() const { return iterator(*this, NULL); }
|
||||
|
||||
private:
|
||||
typedef u32char (*TIterator)(const void **addr);
|
||||
static u32char utf8Iterator(const void **addr);
|
||||
static u32char utf16Iterator(const void **addr);
|
||||
static u32char utf32Iterator(const void **addr);
|
||||
|
||||
const void *const m_Str;
|
||||
const size_t m_Size;
|
||||
const TIterator m_Iterator;
|
||||
|
||||
}; /* class CUtfStringView */
|
||||
|
||||
} /* namespace NLMISC */
|
||||
|
||||
#endif /* #ifndef NLMISC_STREAMED_PACKAGE_PROVIDER_H */
|
||||
|
||||
/* end of file */
|
@ -0,0 +1,76 @@
|
||||
// NeL - MMORPG Framework <https://wiki.ryzom.dev/>
|
||||
// Copyright (C) 2020 Jan BOON (Kaetemi) <jan.boon@kaetemi.be>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#include "stdmisc.h"
|
||||
|
||||
// Project includes
|
||||
#include <nel/misc/utf_string_view.h>
|
||||
|
||||
namespace NLMISC
|
||||
{
|
||||
|
||||
std::string CUtfStringView::toUtf8()
|
||||
{
|
||||
if (m_Iterator == utf8Iterator)
|
||||
return std::string((const char *)m_Str, (const char *)((ptrdiff_t)m_Str + m_Size));
|
||||
std::string res;
|
||||
res.reserve((m_Size << 1) + 1);
|
||||
for (iterator it(begin()), end(end()); it != end; ++it)
|
||||
{
|
||||
u32char c = *it;
|
||||
res += (char)c; /* TODO: Encode UTF-8 */
|
||||
}
|
||||
}
|
||||
|
||||
u32string CUtfStringView::toUtf32()
|
||||
{
|
||||
if (m_Iterator == utf32Iterator)
|
||||
return u32string((const u32char *)m_Str, (const u32char *)((ptrdiff_t)m_Str + m_Size));
|
||||
u32string res;
|
||||
res.reserve(m_Size + 1);
|
||||
for (iterator it(begin()), end(end()); it != end; ++it)
|
||||
res += *it;
|
||||
}
|
||||
|
||||
u32char CUtfStringView::utf8Iterator(const void **addr)
|
||||
{
|
||||
/* TODO: Decode UTF-8 */
|
||||
const ucchar **pp = reinterpret_cast<const ucchar **>(addr);
|
||||
ucchar c = **pp;
|
||||
++(*pp);
|
||||
return c;
|
||||
}
|
||||
|
||||
u32char CUtfStringView::utf16Iterator(const void **addr)
|
||||
{
|
||||
/* TODO: Decode UTF-16 */
|
||||
const ucchar **pp = reinterpret_cast<const ucchar **>(addr);
|
||||
ucchar c = **pp;
|
||||
++(*pp);
|
||||
return c;
|
||||
}
|
||||
|
||||
u32char CUtfStringView::utf32Iterator(const void **addr)
|
||||
{
|
||||
const u32char **pp = reinterpret_cast<const u32char **>(addr);
|
||||
u32char c = **pp;
|
||||
++(*pp);
|
||||
return c;
|
||||
}
|
||||
|
||||
} /* namespace NLMISC */
|
||||
|
||||
/* end of file */
|
Loading…
Reference in New Issue