Stricter UTF-8

develop
kaetemi 4 years ago
parent 19e00522d9
commit 38e5dfe007

@ -178,8 +178,13 @@ u32char CUtfStringView::utf8Iterator(const void **addr)
const uint8 **pp = reinterpret_cast<const uint8 **>(addr);
u32char c0 = **pp;
++(*pp);
if ((c0 & 0xC0) == 0xC0)
if (c0 >= 0x80)
{
if (c0 < 0xC0)
{
// Replacement character <20>
return 0xFFFD;
}
uint8 cx = **pp;
if ((cx & 0xC0) == 0x80)
{
@ -206,6 +211,11 @@ u32char CUtfStringView::utf8Iterator(const void **addr)
c0 <<= 6;
c0 |= (cx & 0x3F); // 22 bits now (17 - 1 + 6), 3-byte encoding
}
else
{
// Replacement character <20>
return 0xFFFD;
}
}
else if ((c0 & 0xFC00) == 0xD800) // Higher bits of nutcase UTF-16 encoded as UTF-8
{
@ -222,10 +232,30 @@ u32char CUtfStringView::utf8Iterator(const void **addr)
c0 |= (c1 & 0x03FF);
c0 += 0x10000;
}
else
{
// Replacement character <20>
return 0xFFFD;
}
}
else if ((c0 & 0xFC00) == 0xDC00) // Lower bits of nutcase UTF-16 encoded as UTF-8
{
// Replacement character <20>
return 0xFFFD;
}
}
else
{
// Replacement character <20>
return 0xFFFD;
}
}
}
else
{
// Replacement character <20>
return 0xFFFD;
}
}
return c0;
}

Loading…
Cancel
Save