fix(runtime/object): string_utf8_extract
This commit is contained in:
parent
67944a6c81
commit
01b4983fa2
4 changed files with 31 additions and 5 deletions
|
|
@ -1593,11 +1593,23 @@ usize string_utf8_next(b_obj_arg s, usize i) {
|
|||
return i + 1;
|
||||
}
|
||||
|
||||
static inline bool is_utf8_first_byte(unsigned char c) {
|
||||
return (c & 0x80) == 0 || (c & 0xe0) == 0xc0 || (c & 0xf0) == 0xe0 || (c & 0xf8) == 0xf0;
|
||||
}
|
||||
|
||||
obj_res string_utf8_extract(b_obj_arg s, usize b, usize e) {
|
||||
char const * str = string_cstr(s);
|
||||
usize sz = string_size(s) - 1;
|
||||
if (b >= e || b >= sz) return mk_string("");
|
||||
/* In the reference implementation if `b` is not pointing to a valid UTF8
|
||||
character start position, the result is the empty string. */
|
||||
if (!is_utf8_first_byte(str[b])) return mk_string("");
|
||||
if (e > sz) e = sz;
|
||||
lean_assert(b < e);
|
||||
lean_assert(e > 0);
|
||||
/* In the reference implementation if `e` is not pointing to a valid UTF8
|
||||
character start position, it is assumed to be at the end. */
|
||||
if (e < sz && !is_utf8_first_byte(str[e])) e = sz;
|
||||
usize new_sz = e - b;
|
||||
lean_assert(new_sz > 0);
|
||||
obj_res r = alloc_string(new_sz+1, new_sz+1, 0);
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ unsigned get_utf8_size(unsigned char c) {
|
|||
else if (c == 0xFF)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
return 1; /* invalid */
|
||||
}
|
||||
|
||||
size_t utf8_strlen(char const * str) {
|
||||
|
|
|
|||
|
|
@ -6,7 +6,15 @@ def show_chars : nat → string → string.utf8_pos → io unit
|
|||
show_chars n s (s.utf8_next idx)
|
||||
|
||||
def main : io uint32 :=
|
||||
let s₁ := "hello α world β" in
|
||||
io.println' (to_string s₁.utf8_byte_size) *>
|
||||
let s₁ := "hello α_world_β" in
|
||||
let b := string.utf8_begin in
|
||||
let e := s₁.utf8_byte_size in
|
||||
io.println' (s₁.utf8_extract b e) *>
|
||||
io.println' (s₁.utf8_extract (b+2) e) *>
|
||||
io.println' (s₁.utf8_extract (b+2) (e-1)) *>
|
||||
io.println' (s₁.utf8_extract (b+2) (e-2)) *>
|
||||
io.println' (s₁.utf8_extract (b+7) e) *>
|
||||
io.println' (s₁.utf8_extract (b+8) e) *>
|
||||
io.println' (to_string e) *>
|
||||
show_chars s₁.utf8_byte_size.to_nat s₁ 0 *>
|
||||
pure 0
|
||||
|
|
|
|||
|
|
@ -1,3 +1,9 @@
|
|||
hello α_world_β
|
||||
llo α_world_β
|
||||
llo α_world_β
|
||||
llo α_world_
|
||||
|
||||
_world_β
|
||||
17
|
||||
>> h
|
||||
>> e
|
||||
|
|
@ -6,11 +12,11 @@
|
|||
>> o
|
||||
>>
|
||||
>> α
|
||||
>>
|
||||
>> _
|
||||
>> w
|
||||
>> o
|
||||
>> r
|
||||
>> l
|
||||
>> d
|
||||
>>
|
||||
>> _
|
||||
>> β
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue