feat(library/init/string): add utf8_length
This commit is contained in:
parent
d8e6915366
commit
3dccaa8e39
4 changed files with 35 additions and 2 deletions
|
|
@ -119,7 +119,7 @@ meta_definition subtype.has_to_format [instance] {A : Type} {P : A → Prop} [ha
|
|||
has_to_format.mk (λ s, to_fmt (elt_of s))
|
||||
|
||||
meta_definition format.bracket : string → string → format → format
|
||||
| o c f := to_fmt o ++ nest (length o) f ++ to_fmt c
|
||||
| o c f := to_fmt o ++ nest (utf8_length o) f ++ to_fmt c
|
||||
|
||||
meta_definition format.paren (f : format) : format :=
|
||||
format.bracket "(" ")" f
|
||||
|
|
@ -131,5 +131,4 @@ meta_definition format.sbracket (f : format) : format :=
|
|||
format.bracket "[" "]" f
|
||||
|
||||
meta_definition format.dcbrace (f : format) : format :=
|
||||
-- TODO(Leo): backet uses length, but ⦃ is unicode, we need a function that computes the utf8 size of a string
|
||||
to_fmt "⦃" ++ nest 1 f ++ to_fmt "⦄"
|
||||
|
|
|
|||
|
|
@ -18,3 +18,21 @@ end string
|
|||
|
||||
definition string.has_append [instance] : has_append string :=
|
||||
has_append.mk string.concat
|
||||
|
||||
open list
|
||||
|
||||
private definition utf8_length_aux : nat → nat → string → nat
|
||||
| 0 r (c::s) :=
|
||||
let n := char.to_nat c in
|
||||
if n < 0x80 then utf8_length_aux 0 (r+1) s
|
||||
else if 0xC0 ≤ n ∧ n < 0xE0 then utf8_length_aux 1 (r+1) s
|
||||
else if 0xE0 ≤ n ∧ n < 0xF0 then utf8_length_aux 2 (r+1) s
|
||||
else if 0xF0 ≤ n ∧ n < 0xF8 then utf8_length_aux 3 (r+1) s
|
||||
else if 0xF8 ≤ n ∧ n < 0xFC then utf8_length_aux 4 (r+1) s
|
||||
else if 0xFC ≤ n ∧ n < 0xFE then utf8_length_aux 5 (r+1) s
|
||||
else utf8_length_aux 0 (r+1) s
|
||||
| (n+1) r (c::s) := utf8_length_aux n r s
|
||||
| _ r [] := r
|
||||
|
||||
definition utf8_length : string → nat
|
||||
| s := utf8_length_aux 0 0 (reverse s)
|
||||
|
|
|
|||
9
tests/lean/utf8.lean
Normal file
9
tests/lean/utf8.lean
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
open list
|
||||
|
||||
vm_eval length "α₁"
|
||||
vm_eval length "α₁ → β₁"
|
||||
vm_eval length "∀ α : nat → nat, α 0 ≥ 0"
|
||||
print "------------"
|
||||
vm_eval utf8_length "α₁"
|
||||
vm_eval utf8_length "α₁ → β₁"
|
||||
vm_eval utf8_length "∀ α : nat → nat, α 0 ≥ 0"
|
||||
7
tests/lean/utf8.lean.expected.out
Normal file
7
tests/lean/utf8.lean.expected.out
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
5
|
||||
15
|
||||
32
|
||||
------------
|
||||
2
|
||||
7
|
||||
24
|
||||
Loading…
Add table
Reference in a new issue