diff --git a/library/init/meta/format.lean b/library/init/meta/format.lean index 68cd7f9bcf..e10df75dd6 100644 --- a/library/init/meta/format.lean +++ b/library/init/meta/format.lean @@ -119,7 +119,7 @@ meta_definition subtype.has_to_format [instance] {A : Type} {P : A → Prop} [ha has_to_format.mk (λ s, to_fmt (elt_of s)) meta_definition format.bracket : string → string → format → format -| o c f := to_fmt o ++ nest (length o) f ++ to_fmt c +| o c f := to_fmt o ++ nest (utf8_length o) f ++ to_fmt c meta_definition format.paren (f : format) : format := format.bracket "(" ")" f @@ -131,5 +131,4 @@ meta_definition format.sbracket (f : format) : format := format.bracket "[" "]" f meta_definition format.dcbrace (f : format) : format := --- TODO(Leo): backet uses length, but ⦃ is unicode, we need a function that computes the utf8 size of a string to_fmt "⦃" ++ nest 1 f ++ to_fmt "⦄" diff --git a/library/init/string.lean b/library/init/string.lean index 65549546e6..d71de1ceab 100644 --- a/library/init/string.lean +++ b/library/init/string.lean @@ -18,3 +18,21 @@ end string definition string.has_append [instance] : has_append string := has_append.mk string.concat + +open list + +private definition utf8_length_aux : nat → nat → string → nat +| 0 r (c::s) := + let n := char.to_nat c in + if n < 0x80 then utf8_length_aux 0 (r+1) s + else if 0xC0 ≤ n ∧ n < 0xE0 then utf8_length_aux 1 (r+1) s + else if 0xE0 ≤ n ∧ n < 0xF0 then utf8_length_aux 2 (r+1) s + else if 0xF0 ≤ n ∧ n < 0xF8 then utf8_length_aux 3 (r+1) s + else if 0xF8 ≤ n ∧ n < 0xFC then utf8_length_aux 4 (r+1) s + else if 0xFC ≤ n ∧ n < 0xFE then utf8_length_aux 5 (r+1) s + else utf8_length_aux 0 (r+1) s +| (n+1) r (c::s) := utf8_length_aux n r s +| _ r [] := r + +definition utf8_length : string → nat +| s := utf8_length_aux 0 0 (reverse s) diff --git a/tests/lean/utf8.lean b/tests/lean/utf8.lean new file mode 100644 index 0000000000..89c5f39565 --- /dev/null +++ b/tests/lean/utf8.lean @@ -0,0 +1,9 @@ +open list + +vm_eval length "α₁" +vm_eval length "α₁ → β₁" +vm_eval length "∀ α : nat → nat, α 0 ≥ 0" +print "------------" +vm_eval utf8_length "α₁" +vm_eval utf8_length "α₁ → β₁" +vm_eval utf8_length "∀ α : nat → nat, α 0 ≥ 0" diff --git a/tests/lean/utf8.lean.expected.out b/tests/lean/utf8.lean.expected.out new file mode 100644 index 0000000000..908d347ee2 --- /dev/null +++ b/tests/lean/utf8.lean.expected.out @@ -0,0 +1,7 @@ +5 +15 +32 +------------ +2 +7 +24