feat: String.Pos.isValid (#3959)
This adds a function that can be used to check whether a position is on a UTF-8 byte boundary.
This commit is contained in:
parent
291bb84c97
commit
aeacb7b69e
2 changed files with 23 additions and 0 deletions
|
|
@ -44,6 +44,16 @@ def append : String → (@& String) → String
|
|||
def toList (s : String) : List Char :=
|
||||
s.data
|
||||
|
||||
/-- Returns true if `p` is a valid UTF-8 position in the string `s`, meaning that `p ≤ s.endPos`
|
||||
and `p` lies on a UTF-8 character boundary. This has an O(1) implementation in the runtime. -/
|
||||
@[extern "lean_string_is_valid_pos"]
|
||||
def Pos.isValid (s : @&String) (p : @& Pos) : Bool :=
|
||||
go s.data 0
|
||||
where
|
||||
go : List Char → Pos → Bool
|
||||
| [], i => i = p
|
||||
| c::cs, i => if i = p then true else go cs (i + c)
|
||||
|
||||
def utf8GetAux : List Char → Pos → Pos → Char
|
||||
| [], _, _ => default
|
||||
| c::cs, i, p => if i = p then c else utf8GetAux cs (i + c) p
|
||||
|
|
|
|||
|
|
@ -1922,6 +1922,19 @@ static inline bool is_utf8_first_byte(unsigned char c) {
|
|||
return (c & 0x80) == 0 || (c & 0xe0) == 0xc0 || (c & 0xf0) == 0xe0 || (c & 0xf8) == 0xf0;
|
||||
}
|
||||
|
||||
extern "C" LEAN_EXPORT uint8 lean_string_is_valid_pos(b_obj_arg s, b_obj_arg i0) {
|
||||
if (!lean_is_scalar(i0)) {
|
||||
/* See comment at string_utf8_get */
|
||||
return false;
|
||||
}
|
||||
usize i = lean_unbox(i0);
|
||||
usize sz = lean_string_size(s) - 1;
|
||||
if (i > sz) return false;
|
||||
if (i == sz) return true;
|
||||
char const * str = lean_string_cstr(s);
|
||||
return is_utf8_first_byte(str[i]);
|
||||
}
|
||||
|
||||
extern "C" LEAN_EXPORT obj_res lean_string_utf8_extract(b_obj_arg s, b_obj_arg b0, b_obj_arg e0) {
|
||||
if (!lean_is_scalar(b0) || !lean_is_scalar(e0)) {
|
||||
/* See comment at string_utf8_get */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue