feat(library/init/string): add utf8_length

This commit is contained in:
Leonardo de Moura 2016-07-19 14:22:37 -04:00
parent d8e6915366
commit 3dccaa8e39
4 changed files with 35 additions and 2 deletions

View file

@ -119,7 +119,7 @@ meta_definition subtype.has_to_format [instance] {A : Type} {P : A → Prop} [ha
has_to_format.mk (λ s, to_fmt (elt_of s))
meta_definition format.bracket : string → string → format → format
| o c f := to_fmt o ++ nest (length o) f ++ to_fmt c
| o c f := to_fmt o ++ nest (utf8_length o) f ++ to_fmt c
meta_definition format.paren (f : format) : format :=
format.bracket "(" ")" f
@ -131,5 +131,4 @@ meta_definition format.sbracket (f : format) : format :=
format.bracket "[" "]" f
meta_definition format.dcbrace (f : format) : format :=
-- TODO(Leo): backet uses length, but ⦃ is unicode, we need a function that computes the utf8 size of a string
to_fmt "⦃" ++ nest 1 f ++ to_fmt "⦄"

View file

@ -18,3 +18,21 @@ end string
definition string.has_append [instance] : has_append string :=
has_append.mk string.concat
open list
private definition utf8_length_aux : nat → nat → string → nat
| 0 r (c::s) :=
let n := char.to_nat c in
if n < 0x80 then utf8_length_aux 0 (r+1) s
else if 0xC0 ≤ n ∧ n < 0xE0 then utf8_length_aux 1 (r+1) s
else if 0xE0 ≤ n ∧ n < 0xF0 then utf8_length_aux 2 (r+1) s
else if 0xF0 ≤ n ∧ n < 0xF8 then utf8_length_aux 3 (r+1) s
else if 0xF8 ≤ n ∧ n < 0xFC then utf8_length_aux 4 (r+1) s
else if 0xFC ≤ n ∧ n < 0xFE then utf8_length_aux 5 (r+1) s
else utf8_length_aux 0 (r+1) s
| (n+1) r (c::s) := utf8_length_aux n r s
| _ r [] := r
definition utf8_length : string → nat
| s := utf8_length_aux 0 0 (reverse s)

9
tests/lean/utf8.lean Normal file
View file

@ -0,0 +1,9 @@
open list
vm_eval length "α₁"
vm_eval length "α₁ → β₁"
vm_eval length "∀ α : nat → nat, α 0 ≥ 0"
print "------------"
vm_eval utf8_length "α₁"
vm_eval utf8_length "α₁ → β₁"
vm_eval utf8_length "∀ α : nat → nat, α 0 ≥ 0"

View file

@ -0,0 +1,7 @@
5
15
32
------------
2
7
24