fix(util/utf8): UTF8 decoding
This commit is contained in:
parent
734ee66514
commit
1a80ea9c8e
3 changed files with 7 additions and 3 deletions
|
|
@ -150,7 +150,7 @@ unsigned next_utf8(std::string const & str, size_t & i) {
|
|||
/* one continuation (128 to 2047) */
|
||||
if ((c & 0xe0) == 0xc0 && i + 1 < str.size()) {
|
||||
unsigned c1 = static_cast<unsigned char>(str[i+1]);
|
||||
unsigned r = ((c & 0x1f) << 6) | c1;
|
||||
unsigned r = ((c & 0x1f) << 6) | (c1 & 0x3f);
|
||||
if (r >= 128) {
|
||||
i += 2;
|
||||
return r;
|
||||
|
|
@ -161,7 +161,7 @@ unsigned next_utf8(std::string const & str, size_t & i) {
|
|||
if ((c & 0xf0) == 0xe0 && i + 2 < str.size()) {
|
||||
unsigned c1 = static_cast<unsigned char>(str[i+1]);
|
||||
unsigned c2 = static_cast<unsigned char>(str[i+2]);
|
||||
unsigned r = ((c & 0x0f) << 12) | (c1 << 6) | c2;
|
||||
unsigned r = ((c & 0x0f) << 12) | ((c1 & 0x3f) << 6) | (c2 & 0x3f);
|
||||
if (r >= 2048 && (r < 55296 || r > 57343)) {
|
||||
i += 3;
|
||||
return r;
|
||||
|
|
@ -173,7 +173,7 @@ unsigned next_utf8(std::string const & str, size_t & i) {
|
|||
unsigned c1 = static_cast<unsigned char>(str[i+1]);
|
||||
unsigned c2 = static_cast<unsigned char>(str[i+2]);
|
||||
unsigned c3 = static_cast<unsigned char>(str[i+3]);
|
||||
unsigned r = ((c & 0x07) << 18) | (c1 << 12) | (c2 << 6) | c3;
|
||||
unsigned r = ((c & 0x07) << 18) | ((c1 & 0x3f) << 12) | ((c2 & 0x3f) << 6) | (c3 & 0x3f);
|
||||
if (r >= 65536 && r <= 1114111) {
|
||||
i += 4;
|
||||
return r;
|
||||
|
|
|
|||
2
tests/lean/unicode_lit.lean
Normal file
2
tests/lean/unicode_lit.lean
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
#eval "≠∀Π∃⁻ʰℵ⬝"
|
||||
#eval "≠∀Π∃⁻ʰℵ⬝".to_list.map char.to_nat
|
||||
2
tests/lean/unicode_lit.lean.expected.out
Normal file
2
tests/lean/unicode_lit.lean.expected.out
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
"≠∀Π∃⁻ʰℵ⬝"
|
||||
[8800, 8704, 928, 8707, 8315, 688, 8501, 11037]
|
||||
Loading…
Add table
Reference in a new issue