diff --git a/src/util/utf8.cpp b/src/util/utf8.cpp index 1082ea25de..494b15f2f6 100644 --- a/src/util/utf8.cpp +++ b/src/util/utf8.cpp @@ -150,7 +150,7 @@ unsigned next_utf8(std::string const & str, size_t & i) { /* one continuation (128 to 2047) */ if ((c & 0xe0) == 0xc0 && i + 1 < str.size()) { unsigned c1 = static_cast(str[i+1]); - unsigned r = ((c & 0x1f) << 6) | c1; + unsigned r = ((c & 0x1f) << 6) | (c1 & 0x3f); if (r >= 128) { i += 2; return r; @@ -161,7 +161,7 @@ unsigned next_utf8(std::string const & str, size_t & i) { if ((c & 0xf0) == 0xe0 && i + 2 < str.size()) { unsigned c1 = static_cast(str[i+1]); unsigned c2 = static_cast(str[i+2]); - unsigned r = ((c & 0x0f) << 12) | (c1 << 6) | c2; + unsigned r = ((c & 0x0f) << 12) | ((c1 & 0x3f) << 6) | (c2 & 0x3f); if (r >= 2048 && (r < 55296 || r > 57343)) { i += 3; return r; @@ -173,7 +173,7 @@ unsigned next_utf8(std::string const & str, size_t & i) { unsigned c1 = static_cast(str[i+1]); unsigned c2 = static_cast(str[i+2]); unsigned c3 = static_cast(str[i+3]); - unsigned r = ((c & 0x07) << 18) | (c1 << 12) | (c2 << 6) | c3; + unsigned r = ((c & 0x07) << 18) | ((c1 & 0x3f) << 12) | ((c2 & 0x3f) << 6) | (c3 & 0x3f); if (r >= 65536 && r <= 1114111) { i += 4; return r; diff --git a/tests/lean/unicode_lit.lean b/tests/lean/unicode_lit.lean new file mode 100644 index 0000000000..69438d5c55 --- /dev/null +++ b/tests/lean/unicode_lit.lean @@ -0,0 +1,2 @@ +#eval "≠∀Π∃⁻ʰℵ⬝" +#eval "≠∀Π∃⁻ʰℵ⬝".to_list.map char.to_nat diff --git a/tests/lean/unicode_lit.lean.expected.out b/tests/lean/unicode_lit.lean.expected.out new file mode 100644 index 0000000000..2ab9ad6608 --- /dev/null +++ b/tests/lean/unicode_lit.lean.expected.out @@ -0,0 +1,2 @@ +"≠∀Π∃⁻ʰℵ⬝" +[8800, 8704, 928, 8707, 8315, 688, 8501, 11037]