chore: use UTF8 instead of Utf8 in identifiers (#10636)

This PR renames `String.getUtf8Byte` to `String.getUTF8Byte` in order to
adhere to the standard library naming convention.
This commit is contained in:
Markus Himmel 2025-10-01 19:57:32 +02:00 committed by GitHub
parent 784a063092
commit 2cca32ccc3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 195 additions and 206 deletions

View file

@ -33,9 +33,9 @@ theorem List.utf8Encode_append {l l' : List Char} :
theorem List.utf8Encode_cons {c : Char} {l : List Char} : (c :: l).utf8Encode = [c].utf8Encode ++ l.utf8Encode := by
rw [← singleton_append, List.utf8Encode_append]
theorem List.isUtf8FirstByte_getElem_utf8Encode_singleton {c : Char} {i : Nat} {hi : i < [c].utf8Encode.size} :
UInt8.IsUtf8FirstByte [c].utf8Encode[i] ↔ i = 0 := by
simp [List.utf8Encode_singleton, UInt8.isUtf8FirstByte_getElem_utf8EncodeChar]
theorem List.isUTF8FirstByte_getElem_utf8Encode_singleton {c : Char} {i : Nat} {hi : i < [c].utf8Encode.size} :
UInt8.IsUTF8FirstByte [c].utf8Encode[i] ↔ i = 0 := by
simp [List.utf8Encode_singleton, UInt8.isUTF8FirstByte_getElem_utf8EncodeChar]
@[simp]
theorem String.utf8EncodeChar_ne_nil {c : Char} : String.utf8EncodeChar c ≠ [] := by
@ -45,26 +45,26 @@ theorem String.utf8EncodeChar_ne_nil {c : Char} : String.utf8EncodeChar c ≠ []
theorem List.utf8Encode_eq_empty {l : List Char} : l.utf8Encode = ByteArray.empty ↔ l = [] := by
simp [utf8Encode, ← List.eq_nil_iff_forall_not_mem]
theorem ByteArray.isValidUtf8_utf8Encode {l : List Char} : IsValidUtf8 l.utf8Encode :=
theorem ByteArray.isValidUTF8_utf8Encode {l : List Char} : IsValidUTF8 l.utf8Encode :=
.intro l rfl
@[simp]
theorem ByteArray.isValidUtf8_empty : IsValidUtf8 ByteArray.empty :=
theorem ByteArray.isValidUTF8_empty : IsValidUTF8 ByteArray.empty :=
.intro [] (by simp)
theorem Char.isValidUtf8_toByteArray_utf8EncodeChar {c : Char} :
ByteArray.IsValidUtf8 (String.utf8EncodeChar c).toByteArray :=
theorem Char.isValidUTF8_toByteArray_utf8EncodeChar {c : Char} :
ByteArray.IsValidUTF8 (String.utf8EncodeChar c).toByteArray :=
.intro [c] (by simp [List.utf8Encode_singleton])
theorem ByteArray.IsValidUtf8.append {b b' : ByteArray} (h : IsValidUtf8 b) (h' : IsValidUtf8 b') :
IsValidUtf8 (b ++ b') := by
theorem ByteArray.IsValidUTF8.append {b b' : ByteArray} (h : IsValidUTF8 b) (h' : IsValidUTF8 b') :
IsValidUTF8 (b ++ b') := by
rcases h with ⟨m, rfl⟩
rcases h' with ⟨m', rfl⟩
exact .intro (m ++ m') (by simp)
theorem ByteArray.isValidUtf8_utf8Encode_singleton_append_iff {b : ByteArray} {c : Char} :
IsValidUtf8 ([c].utf8Encode ++ b) ↔ IsValidUtf8 b := by
refine ⟨?_, fun h => IsValidUtf8.append isValidUtf8_utf8Encode h⟩
theorem ByteArray.isValidUTF8_utf8Encode_singleton_append_iff {b : ByteArray} {c : Char} :
IsValidUTF8 ([c].utf8Encode ++ b) ↔ IsValidUTF8 b := by
refine ⟨?_, fun h => IsValidUTF8.append isValidUTF8_utf8Encode h⟩
rintro ⟨l, hl⟩
match l with
| [] => simp at hl
@ -75,7 +75,7 @@ theorem ByteArray.isValidUtf8_utf8Encode_singleton_append_iff {b : ByteArray} {c
List.utf8DecodeChar?_utf8Encode_cons] using hl
rw [← List.singleton_append (l := l), List.utf8Encode_append,
ByteArray.append_right_inj] at hl
exact hl ▸ isValidUtf8_utf8Encode
exact hl ▸ isValidUTF8_utf8Encode
@[inline, expose]
def ByteArray.utf8Decode? (b : ByteArray) : Option (Array Char) :=
@ -104,13 +104,13 @@ where
if hi : i = b.size then
true
else
match h : validateUtf8At b i with
match h : validateUTF8At b i with
| false => false
| true => go fuel (i + (b[i].utf8ByteSize (isUtf8FirstByte_of_validateUtf8At h)).byteIdx)
| true => go fuel (i + (b[i].utf8ByteSize (isUTF8FirstByte_of_validateUTF8At h)).byteIdx)
?_ ?_
termination_by structural fuel
finally
all_goals rw [ByteArray.validateUtf8At_eq_isSome_utf8DecodeChar?] at h
all_goals rw [ByteArray.validateUTF8At_eq_isSome_utf8DecodeChar?] at h
· rw [← ByteArray.utf8Size_utf8DecodeChar (h := h)]
exact add_utf8Size_utf8DecodeChar_le_size
· rw [← ByteArray.utf8Size_utf8DecodeChar (h := h)]
@ -118,7 +118,7 @@ finally
have := (b.utf8DecodeChar i h).utf8Size_pos
omega
theorem ByteArray.isSome_utf8Decode?Go_eq_validateUtf8Go {b : ByteArray} {fuel : Nat}
theorem ByteArray.isSome_utf8Decode?Go_eq_validateUTF8Go {b : ByteArray} {fuel : Nat}
{i : Nat} {acc : Array Char} {hi : i ≤ b.size} {hf : b.size - i < fuel} :
(utf8Decode?.go b fuel i acc hi hf).isSome = validateUTF8.go b fuel i hi hf := by
fun_induction utf8Decode?.go with
@ -128,20 +128,20 @@ theorem ByteArray.isSome_utf8Decode?Go_eq_validateUtf8Go {b : ByteArray} {fuel :
split
· rfl
· rename_i heq
simp [validateUtf8At_eq_isSome_utf8DecodeChar?, h₂] at heq
simp [validateUTF8At_eq_isSome_utf8DecodeChar?, h₂] at heq
| case3 i acc hi fuel hf h₁ c h₂ ih =>
simp [validateUTF8.go, h₁]
split
· rename_i heq
simp [validateUtf8At_eq_isSome_utf8DecodeChar?, h₂] at heq
simp [validateUTF8At_eq_isSome_utf8DecodeChar?, h₂] at heq
· rw [ih]
congr
rw [← ByteArray.utf8Size_utf8DecodeChar (h := by simp [h₂])]
simp [utf8DecodeChar, h₂]
theorem ByteArray.isSome_utf8Decode?_eq_validateUtf8 {b : ByteArray} :
theorem ByteArray.isSome_utf8Decode?_eq_validateUTF8 {b : ByteArray} :
b.utf8Decode?.isSome = b.validateUTF8 :=
b.isSome_utf8Decode?Go_eq_validateUtf8Go
b.isSome_utf8Decode?Go_eq_validateUTF8Go
theorem ByteArray.utf8Decode?.go.congr {b b' : ByteArray} {fuel fuel' i i' : Nat} {acc acc' : Array Char} {hi hi' hf hf'}
(hbb' : b = b') (hii' : i = i') (hacc : acc = acc') :
@ -172,7 +172,7 @@ theorem ByteArray.utf8Decode?_empty : ByteArray.empty.utf8Decode? = some #[] :=
simp [utf8Decode?, utf8Decode?.go]
private theorem ByteArray.isSome_utf8Decode?go_iff {b : ByteArray} {fuel i : Nat} {hi : i ≤ b.size} {hf} {acc : Array Char} :
(ByteArray.utf8Decode?.go b fuel i acc hi hf).isSome ↔ IsValidUtf8 (b.extract i b.size) := by
(ByteArray.utf8Decode?.go b fuel i acc hi hf).isSome ↔ IsValidUTF8 (b.extract i b.size) := by
fun_induction ByteArray.utf8Decode?.go with
| case1 => simp
| case2 fuel i hi hf acc h₁ h₂ =>
@ -193,23 +193,23 @@ private theorem ByteArray.isSome_utf8Decode?go_iff {b : ByteArray} {fuel i : Nat
rw [ByteArray.extract_eq_extract_append_extract (i := i) (i + c.utf8Size) (by omega)
(le_size_of_utf8DecodeChar?_eq_some h₂)] at hl ⊢
rw [ByteArray.append_inj_left hl (by have := le_size_of_utf8DecodeChar?_eq_some h₂; simp; omega),
← List.utf8Encode_singleton, isValidUtf8_utf8Encode_singleton_append_iff]
← List.utf8Encode_singleton, isValidUTF8_utf8Encode_singleton_append_iff]
theorem ByteArray.isSome_utf8Decode?_iff {b : ByteArray} :
b.utf8Decode?.isSome ↔ IsValidUtf8 b := by
b.utf8Decode?.isSome ↔ IsValidUTF8 b := by
rw [utf8Decode?, isSome_utf8Decode?go_iff, extract_zero_size]
@[simp]
theorem ByteArray.validateUTF8_eq_true_iff {b : ByteArray} :
b.validateUTF8 = true ↔ IsValidUtf8 b := by
rw [← isSome_utf8Decode?_eq_validateUtf8, isSome_utf8Decode?_iff]
b.validateUTF8 = true ↔ IsValidUTF8 b := by
rw [← isSome_utf8Decode?_eq_validateUTF8, isSome_utf8Decode?_iff]
@[simp]
theorem ByteArray.validateUTF8_eq_false_iff {b : ByteArray} :
b.validateUTF8 = false ↔ ¬ IsValidUtf8 b := by
b.validateUTF8 = false ↔ ¬ IsValidUTF8 b := by
simp [← Bool.not_eq_true]
instance {b : ByteArray} : Decidable b.IsValidUtf8 :=
instance {b : ByteArray} : Decidable b.IsValidUTF8 :=
decidable_of_iff (b.validateUTF8 = true) ByteArray.validateUTF8_eq_true_iff
/--
@ -217,7 +217,7 @@ Decodes an array of bytes that encode a string as [UTF-8](https://en.wikipedia.o
the corresponding string.
-/
@[inline, expose]
def String.fromUTF8 (a : @& ByteArray) (h : a.IsValidUtf8) : String :=
def String.fromUTF8 (a : @& ByteArray) (h : a.IsValidUTF8) : String :=
.ofByteArray a h
/--
@ -225,14 +225,14 @@ Decodes an array of bytes that encode a string as [UTF-8](https://en.wikipedia.o
the corresponding string, or returns `none` if the array is not a valid UTF-8 encoding of a string.
-/
@[inline, expose] def String.fromUTF8? (a : ByteArray) : Option String :=
if h : a.IsValidUtf8 then some (fromUTF8 a h) else none
if h : a.IsValidUTF8 then some (fromUTF8 a h) else none
/--
Decodes an array of bytes that encode a string as [UTF-8](https://en.wikipedia.org/wiki/UTF-8) into
the corresponding string, or panics if the array is not a valid UTF-8 encoding of a string.
-/
@[inline, expose] def String.fromUTF8! (a : ByteArray) : String :=
if h : a.IsValidUtf8 then fromUTF8 a h else panic! "invalid UTF-8 string"
if h : a.IsValidUTF8 then fromUTF8 a h else panic! "invalid UTF-8 string"
/--
Encodes a string in UTF-8 as an array of bytes.
@ -258,7 +258,7 @@ Examples:
@[extern "lean_string_append", expose]
def String.append (s : String) (t : @& String) : String where
bytes := s.bytes ++ t.bytes
isValidUtf8 := s.isValidUtf8.append t.isValidUtf8
isValidUTF8 := s.isValidUTF8.append t.isValidUTF8
instance : Append String where
append s t := s.append t
@ -294,7 +294,7 @@ theorem List.asString_append {l₁ l₂ : List Char} : (l₁ ++ l₂).asString =
@[expose]
def String.Internal.toArray (b : String) : Array Char :=
b.bytes.utf8Decode?.get (b.bytes.isSome_utf8Decode?_iff.2 b.isValidUtf8)
b.bytes.utf8Decode?.get (b.bytes.isSome_utf8Decode?_iff.2 b.isValidUTF8)
@[simp]
theorem String.Internal.toArray_empty : String.Internal.toArray "" = #[] := by
@ -556,10 +556,10 @@ Predicate for validity of positions inside a `String`.
There are multiple equivalent definitions for validity.
We say that a position is valid if the string obtained by taking all of the bytes up to, but
excluding, the given position, is valid UTF-8; see `Pos.isValid_iff_isValidUtf8_extract_zero`.
excluding, the given position, is valid UTF-8; see `Pos.isValid_iff_isValidUTF8_extract_zero`.
Similarly, a position is valid if the string obtained by taking all of the bytes starting at the
given position is valid UTF-8; see `Pos.isValid_iff_isValidUtf8_extract_utf8ByteSize`.
given position is valid UTF-8; see `Pos.isValid_iff_isValidUTF8_extract_utf8ByteSize`.
An equivalent condition is that the position is the length of the UTF-8 encoding of
some prefix of the characters of the string; see `Pos.isValid_iff_exists_append` and
@ -567,7 +567,7 @@ some prefix of the characters of the string; see `Pos.isValid_iff_exists_append`
Another equivalent condition that can be checked efficiently is that the position is either the
end position or strictly smaller than the end position and the byte at the position satisfies
`UInt8.IsUtf8FirstByte`; see `Pos.isValid_iff_isUtf8FirstByte`.
`UInt8.IsUTF8FirstByte`; see `Pos.isValid_iff_isUTF8FirstByte`.
Examples:
* `String.Pos.IsValid "abc" ⟨0⟩`
@ -582,7 +582,7 @@ Examples:
-/
structure Pos.Raw.IsValid (s : String) (off : String.Pos.Raw) : Prop where private mk ::
le_endPos : off ≤ s.endPos
isValidUtf8_extract_zero : (s.bytes.extract 0 off.byteIdx).IsValidUtf8
isValidUTF8_extract_zero : (s.bytes.extract 0 off.byteIdx).IsValidUTF8
theorem _root_.List.isPrefix_of_utf8Encode_append_eq_utf8Encode {l m : List Char} (b : ByteArray)
(h : l.utf8Encode ++ b = m.utf8Encode) : l <+: m := by
@ -604,8 +604,8 @@ theorem _root_.List.isPrefix_of_utf8Encode_append_eq_utf8Encode {l m : List Char
open List in
theorem Pos.Raw.IsValid.exists {s : String} {p : Pos.Raw} (h : p.IsValid s) :
∃ m₁ m₂ : List Char, m₁.utf8Encode = s.bytes.extract 0 p.byteIdx ∧ (m₁ ++ m₂).asString = s := by
obtain ⟨l, hl⟩ := s.isValidUtf8
obtain ⟨m₁, hm₁⟩ := h.isValidUtf8_extract_zero
obtain ⟨l, hl⟩ := s.isValidUTF8
obtain ⟨m₁, hm₁⟩ := h.isValidUTF8_extract_zero
suffices m₁ <+: l by
obtain ⟨m₂, rfl⟩ := this
refine ⟨m₁, m₂, hm₁.symm, ?_⟩
@ -616,12 +616,12 @@ theorem Pos.Raw.IsValid.exists {s : String} {p : Pos.Raw} (h : p.IsValid s) :
ByteArray.extract_zero_size]
simpa using h.le_endPos
theorem Pos.Raw.IsValid.isValidUtf8_extract_utf8ByteSize {s : String} {p : Pos.Raw} (h : p.IsValid s) :
ByteArray.IsValidUtf8 (s.bytes.extract p.byteIdx s.utf8ByteSize) := by
theorem Pos.Raw.IsValid.isValidUTF8_extract_utf8ByteSize {s : String} {p : Pos.Raw} (h : p.IsValid s) :
ByteArray.IsValidUTF8 (s.bytes.extract p.byteIdx s.utf8ByteSize) := by
obtain ⟨m₁, m₂, hm, rfl⟩ := h.exists
simp only [List.asString_append, bytes_append, List.bytes_asString]
rw [ByteArray.extract_append_eq_right]
· exact ByteArray.isValidUtf8_utf8Encode
· exact ByteArray.isValidUTF8_utf8Encode
· rw [hm]
simp only [List.asString_append, bytes_append, List.bytes_asString, ByteArray.size_extract,
ByteArray.size_append, Nat.sub_zero]
@ -631,7 +631,7 @@ theorem Pos.Raw.IsValid.isValidUtf8_extract_utf8ByteSize {s : String} {p : Pos.R
theorem Pos.Raw.isValid_iff_exists_append {s : String} {p : Pos.Raw} :
p.IsValid s ↔ ∃ s₁ s₂ : String, s = s₁ ++ s₂ ∧ p = s₁.endPos := by
refine ⟨fun h => ⟨⟨_, h.isValidUtf8_extract_zero⟩, ⟨_, h.isValidUtf8_extract_utf8ByteSize⟩, ?_, ?_⟩, ?_⟩
refine ⟨fun h => ⟨⟨_, h.isValidUTF8_extract_zero⟩, ⟨_, h.isValidUTF8_extract_utf8ByteSize⟩, ?_, ?_⟩, ?_⟩
· apply String.bytes_inj.1
have := Pos.Raw.le_iff.1 h.le_endPos
simp_all [← size_bytes]
@ -640,7 +640,7 @@ theorem Pos.Raw.isValid_iff_exists_append {s : String} {p : Pos.Raw} :
simp [Nat.min_eq_left this]
· rintro ⟨s₁, s₂, rfl, rfl⟩
refine ⟨by simp [Pos.Raw.le_iff], ?_⟩
simpa [ByteArray.extract_append_eq_left] using s₁.isValidUtf8
simpa [ByteArray.extract_append_eq_left] using s₁.isValidUTF8
@[simp]
theorem Pos.Raw.byteIdx_zero : (0 : Pos.Raw).byteIdx = 0 := rfl
@ -648,12 +648,12 @@ theorem Pos.Raw.byteIdx_zero : (0 : Pos.Raw).byteIdx = 0 := rfl
@[simp]
theorem Pos.Raw.isValid_zero {s : String} : (0 : Pos.Raw).IsValid s where
le_endPos := by simp [Pos.Raw.le_iff]
isValidUtf8_extract_zero := by simp
isValidUTF8_extract_zero := by simp
@[simp]
theorem Pos.Raw.isValid_endPos {s : String} : s.endPos.IsValid s where
le_endPos := by simp [Pos.Raw.le_iff]
isValidUtf8_extract_zero := by simp [← size_bytes, s.isValidUtf8]
isValidUTF8_extract_zero := by simp [← size_bytes, s.isValidUTF8]
@[simp]
theorem Pos.Raw.isValid_empty_iff {p : Pos.Raw} : p.IsValid "" ↔ p = 0 := by
@ -798,26 +798,21 @@ where
/--
Accesses the indicated byte in the UTF-8 encoding of a string.
At runtime, this function is implemented by efficient, constant-time code.
-/
@[extern "lean_string_get_byte_fast", expose]
def getUtf8Byte (s : @& String) (p : Pos.Raw) (h : p < s.endPos) : UInt8 :=
s.bytes[p.byteIdx]
/--
Accesses the indicated byte in the UTF-8 encoding of a string.
At runtime, this function is implemented by efficient, constant-time code.
-/
@[extern "lean_string_get_byte_fast", expose]
def getUTF8Byte (s : @& String) (p : Pos.Raw) (h : p < s.endPos) : UInt8 :=
s.bytes[p.byteIdx]
@[deprecated getUTF8Byte (since := "2025-10-01"), extern "lean_string_get_byte_fast", expose]
abbrev getUtf8Byte (s : String) (p : Pos.Raw) (h : p < s.endPos) : UInt8 :=
s.getUTF8Byte p h
@[simp]
theorem endPos_empty : "".endPos = 0 := rfl
theorem Pos.Raw.isValid_iff_isUtf8FirstByte {s : String} {p : Pos.Raw} :
p.IsValid s ↔ p = s.endPos ∃ (h : p < s.endPos), (s.getUtf8Byte p h).IsUtf8FirstByte := by
theorem Pos.Raw.isValid_iff_isUTF8FirstByte {s : String} {p : Pos.Raw} :
p.IsValid s ↔ p = s.endPos ∃ (h : p < s.endPos), (s.getUTF8Byte p h).IsUTF8FirstByte := by
induction s using push_induction with
| empty => simp [Pos.Raw.lt_iff]
| push s c ih =>
@ -825,22 +820,22 @@ theorem Pos.Raw.isValid_iff_isUtf8FirstByte {s : String} {p : Pos.Raw} :
refine ⟨?_, ?_⟩
· rintro ((rfl|⟨h, hb⟩)|h)
· refine Or.inr ⟨by simp [Pos.Raw.lt_iff, Char.utf8Size_pos], ?_⟩
simp only [getUtf8Byte, bytes_push, byteIdx_endPos]
simp only [getUTF8Byte, bytes_push, byteIdx_endPos]
rw [ByteArray.getElem_append_right (by simp)]
simp [List.isUtf8FirstByte_getElem_utf8Encode_singleton]
simp [List.isUTF8FirstByte_getElem_utf8Encode_singleton]
· refine Or.inr ⟨by simp [lt_iff] at h ⊢; omega, ?_⟩
simp only [getUtf8Byte, bytes_push]
rwa [ByteArray.getElem_append_left, ← getUtf8Byte]
simp only [getUTF8Byte, bytes_push]
rwa [ByteArray.getElem_append_left, ← getUTF8Byte]
· exact Or.inl (by simpa [endPos_push])
· rintro (h|⟨h, hb⟩)
· exact Or.inr (by simpa [endPos_push] using h)
· simp only [getUtf8Byte, bytes_push] at hb
· simp only [getUTF8Byte, bytes_push] at hb
by_cases h' : p < s.endPos
· refine Or.inl (Or.inr ⟨h', ?_⟩)
rwa [ByteArray.getElem_append_left h', ← getUtf8Byte] at hb
rwa [ByteArray.getElem_append_left h', ← getUTF8Byte] at hb
· refine Or.inl (Or.inl ?_)
rw [ByteArray.getElem_append_right (by simp [lt_iff] at h' ⊢; omega)] at hb
simp only [size_bytes, List.isUtf8FirstByte_getElem_utf8Encode_singleton] at hb
simp only [size_bytes, List.isUTF8FirstByte_getElem_utf8Encode_singleton] at hb
ext
simp only [lt_iff, byteIdx_endPos, Nat.not_lt] at ⊢ h'
omega
@ -865,13 +860,13 @@ Examples:
@[extern "lean_string_is_valid_pos", expose]
def Pos.Raw.isValid (s : @&String) (p : @& Pos.Raw) : Bool :=
if h : p < s.endPos then
(s.getUtf8Byte p h).IsUtf8FirstByte
(s.getUTF8Byte p h).IsUTF8FirstByte
else
p = s.endPos
@[simp]
theorem Pos.Raw.isValid_eq_true_iff {s : String} {p : Pos.Raw} : p.isValid s = true ↔ p.IsValid s := by
rw [isValid_iff_isUtf8FirstByte]
rw [isValid_iff_isUTF8FirstByte]
fun_cases isValid s p with
| case1 h =>
simp_all only [decide_eq_true_eq, exists_true_left, iff_or_self]
@ -899,29 +894,29 @@ theorem Pos.Raw.isValid_iff_isSome_utf8DecodeChar? {s : String} {p : Pos.Raw} :
simp
· exact Or.inr (ByteArray.isSome_utf8DecodeChar?_append h _)
· simp [endPos_push]
· refine isValid_iff_isUtf8FirstByte.2 (Or.inr ?_)
· refine isValid_iff_isUTF8FirstByte.2 (Or.inr ?_)
obtain ⟨c, hc⟩ := Option.isSome_iff_exists.1 h
refine ⟨?_, ?_⟩
· have := ByteArray.le_size_of_utf8DecodeChar?_eq_some hc
have := c.utf8Size_pos
simp only [lt_iff, byteIdx_endPos, gt_iff_lt, ← size_bytes]
omega
· rw [getUtf8Byte]
exact ByteArray.isUtf8FirstByte_of_isSome_utf8DecodeChar? h
· rw [getUTF8Byte]
exact ByteArray.isUTF8FirstByte_of_isSome_utf8DecodeChar? h
theorem _root_.ByteArray.IsValidUtf8.isUtf8FirstByte_getElem_zero {b : ByteArray}
(h : b.IsValidUtf8) (h₀ : 0 < b.size) : b[0].IsUtf8FirstByte := by
theorem _root_.ByteArray.IsValidUTF8.isUTF8FirstByte_getElem_zero {b : ByteArray}
(h : b.IsValidUTF8) (h₀ : 0 < b.size) : b[0].IsUTF8FirstByte := by
rcases h with ⟨m, rfl⟩
have : m ≠ [] := by
rintro rfl
simp at h₀
conv => congr; congr; rw [← List.cons_head_tail this, ← List.singleton_append, List.utf8Encode_append]
rw [ByteArray.getElem_append_left]
· exact List.isUtf8FirstByte_getElem_utf8Encode_singleton.2 rfl
· exact List.isUTF8FirstByte_getElem_utf8Encode_singleton.2 rfl
· simp [List.utf8Encode_singleton, Char.utf8Size_pos]
theorem isUtf8FirstByte_getUtf8Byte_zero {b : String} {h} : (b.getUtf8Byte 0 h).IsUtf8FirstByte :=
b.isValidUtf8.isUtf8FirstByte_getElem_zero _
theorem isUTF8FirstByte_getUTF8Byte_zero {b : String} {h} : (b.getUTF8Byte 0 h).IsUTF8FirstByte :=
b.isValidUTF8.isUTF8FirstByte_getElem_zero _
protected theorem Pos.Raw.le_trans {a b c : Pos.Raw} : a ≤ b → b ≤ c → a ≤ c := by
simpa [le_iff] using Nat.le_trans
@ -929,43 +924,43 @@ protected theorem Pos.Raw.le_trans {a b c : Pos.Raw} : a ≤ b → b ≤ c → a
protected theorem Pos.Raw.lt_of_lt_of_le {a b c : Pos.Raw} : a < b → b ≤ c → a < c := by
simpa [le_iff, lt_iff] using Nat.lt_of_lt_of_le
theorem Pos.Raw.isValidUtf8_extract_iff {s : String} (p₁ p₂ : Pos.Raw) (hle : p₁ ≤ p₂) (hle' : p₂ ≤ s.endPos) :
(s.bytes.extract p₁.byteIdx p₂.byteIdx).IsValidUtf8 ↔ p₁ = p₂ (p₁.IsValid s ∧ p₂.IsValid s) := by
theorem Pos.Raw.isValidUTF8_extract_iff {s : String} (p₁ p₂ : Pos.Raw) (hle : p₁ ≤ p₂) (hle' : p₂ ≤ s.endPos) :
(s.bytes.extract p₁.byteIdx p₂.byteIdx).IsValidUTF8 ↔ p₁ = p₂ (p₁.IsValid s ∧ p₂.IsValid s) := by
have hle'' : p₂.byteIdx ≤ s.bytes.size := by simpa [le_iff] using hle'
refine ⟨fun h => Classical.or_iff_not_imp_left.2 (fun h' => ?_), ?_⟩
· have hlt : p₁ < p₂ := by
simp_all [le_iff, lt_iff, Pos.Raw.ext_iff]
omega
have h₁ : p₁.IsValid s := by
rw [isValid_iff_isUtf8FirstByte]
rw [isValid_iff_isUTF8FirstByte]
refine Or.inr ⟨Pos.Raw.lt_of_lt_of_le hlt hle', ?_⟩
have hlt' : 0 < p₂.byteIdx - p₁.byteIdx := by
simp [lt_iff] at hlt
omega
have := h.isUtf8FirstByte_getElem_zero
have := h.isUTF8FirstByte_getElem_zero
simp only [ByteArray.size_extract, Nat.min_eq_left hle'', hlt', ByteArray.getElem_extract, Nat.add_zero] at this
simp [getUtf8Byte, this trivial]
simp [getUTF8Byte, this trivial]
refine ⟨h₁, ⟨hle', ?_⟩⟩
rw [ByteArray.extract_eq_extract_append_extract p₁.byteIdx (by simp) hle]
exact h₁.isValidUtf8_extract_zero.append h
exact h₁.isValidUTF8_extract_zero.append h
· refine fun h => h.elim (by rintro rfl; simp) (fun ⟨h₁, h₂⟩ => ?_)
let t : String := ⟨_, h₂.isValidUtf8_extract_zero⟩
let t : String := ⟨_, h₂.isValidUTF8_extract_zero⟩
have htb : t.bytes = s.bytes.extract 0 p₂.byteIdx := rfl
have ht : p₁.IsValid t := by
refine ⟨?_, ?_⟩
· simpa [le_iff, t, Nat.min_eq_left hle'', ← size_bytes]
· simpa [htb, ByteArray.extract_extract, Nat.min_eq_left (le_iff.1 hle)] using h₁.isValidUtf8_extract_zero
· simpa [htb, ByteArray.extract_extract, Nat.min_eq_left (le_iff.1 hle)] using h₁.isValidUTF8_extract_zero
simpa [htb, ByteArray.extract_extract, Nat.zero_add, Nat.min_eq_left hle'', ← size_bytes]
using ht.isValidUtf8_extract_utf8ByteSize
using ht.isValidUTF8_extract_utf8ByteSize
theorem Pos.Raw.isValid_iff_isValidUtf8_extract_zero {s : String} {p : Pos.Raw} :
p.IsValid s ↔ p ≤ s.endPos ∧ (s.bytes.extract 0 p.byteIdx).IsValidUtf8 :=
theorem Pos.Raw.isValid_iff_isValidUTF8_extract_zero {s : String} {p : Pos.Raw} :
p.IsValid s ↔ p ≤ s.endPos ∧ (s.bytes.extract 0 p.byteIdx).IsValidUTF8 :=
⟨fun ⟨h₁, h₂⟩ => ⟨h₁, h₂⟩, fun ⟨h₁, h₂⟩ => ⟨h₁, h₂⟩⟩
theorem Pos.Raw.isValid_iff_isValidUtf8_extract_utf8ByteSize {s : String} {p : Pos.Raw} :
p.IsValid s ↔ p ≤ s.endPos ∧ (s.bytes.extract p.byteIdx s.utf8ByteSize).IsValidUtf8 := by
refine ⟨fun h => ⟨h.le_endPos, h.isValidUtf8_extract_utf8ByteSize⟩, fun ⟨h₁, h₂⟩ => ?_⟩
rw [← byteIdx_endPos, isValidUtf8_extract_iff _ _ h₁ (by simp [le_iff])] at h₂
theorem Pos.Raw.isValid_iff_isValidUTF8_extract_utf8ByteSize {s : String} {p : Pos.Raw} :
p.IsValid s ↔ p ≤ s.endPos ∧ (s.bytes.extract p.byteIdx s.utf8ByteSize).IsValidUTF8 := by
refine ⟨fun h => ⟨h.le_endPos, h.isValidUTF8_extract_utf8ByteSize⟩, fun ⟨h₁, h₂⟩ => ?_⟩
rw [← byteIdx_endPos, isValidUTF8_extract_iff _ _ h₁ (by simp [le_iff])] at h₂
obtain (rfl|h₂) := h₂
· simp
· exact h₂.1
@ -1000,12 +995,12 @@ def endValidPos (s : String) : s.ValidPos where
offset := s.endPos
isValid := by simp
theorem ValidPos.isValidUtf8_extract {s : String} (pos₁ pos₂ : s.ValidPos) :
(s.bytes.extract pos₁.offset.byteIdx pos₂.offset.byteIdx).IsValidUtf8 := by
theorem ValidPos.isValidUTF8_extract {s : String} (pos₁ pos₂ : s.ValidPos) :
(s.bytes.extract pos₁.offset.byteIdx pos₂.offset.byteIdx).IsValidUTF8 := by
by_cases h : pos₁.offset ≤ pos₂.offset
· exact (Pos.Raw.isValidUtf8_extract_iff _ _ h pos₂.isValid.le_endPos).2 (Or.inr ⟨pos₁.isValid, pos₂.isValid⟩)
· exact (Pos.Raw.isValidUTF8_extract_iff _ _ h pos₂.isValid.le_endPos).2 (Or.inr ⟨pos₁.isValid, pos₂.isValid⟩)
· rw [ByteArray.extract_eq_empty_iff.2]
· exact ByteArray.isValidUtf8_empty
· exact ByteArray.isValidUTF8_empty
· rw [Nat.min_eq_left]
· rw [Pos.Raw.le_iff] at h
omega
@ -1063,8 +1058,8 @@ Accesses the indicated byte in the UTF-8 encoding of a string slice.
At runtime, this function is implemented by efficient, constant-time code.
-/
@[inline, expose]
def Slice.getUtf8Byte (s : Slice) (p : Pos.Raw) (h : p < s.utf8ByteSize) : UInt8 :=
s.str.getUtf8Byte (s.startInclusive.offset + p) (by
def Slice.getUTF8Byte (s : Slice) (p : Pos.Raw) (h : p < s.utf8ByteSize) : UInt8 :=
s.str.getUTF8Byte (s.startInclusive.offset + p) (by
have := s.endExclusive.isValid.le_endPos
simp only [Pos.Raw.lt_iff, byteIdx_utf8ByteSize, Pos.Raw.le_iff, byteIdx_endPos, Pos.Raw.byteIdx_add] at *
omega)
@ -1074,16 +1069,16 @@ Accesses the indicated byte in the UTF-8 encoding of the string slice, or panics
is out-of-bounds.
-/
@[expose]
def Slice.getUtf8Byte! (s : Slice) (p : String.Pos.Raw) : UInt8 :=
def Slice.getUTF8Byte! (s : Slice) (p : String.Pos.Raw) : UInt8 :=
if h : p < s.utf8ByteSize then
s.getUtf8Byte p h
s.getUTF8Byte p h
else
panic! "String slice access is out of bounds."
@[extern "lean_string_utf8_extract"]
def ValidPos.extract {s : @& String} (b e : @& s.ValidPos) : String where
bytes := s.bytes.extract b.offset.byteIdx e.offset.byteIdx
isValidUtf8 := b.isValidUtf8_extract e
isValidUTF8 := b.isValidUTF8_extract e
/-- Creates a `String` from a `String.Slice` by copying the bytes. -/
@[inline]
@ -1103,17 +1098,17 @@ theorem Slice.utf8ByteSize_copy {s : Slice} :
theorem Slice.endPos_copy {s : Slice} : s.copy.endPos = s.utf8ByteSize := by
simp [Pos.Raw.ext_iff]
theorem Slice.getUtf8Byte_eq_getUtf8Byte_copy {s : Slice} {p : Pos.Raw} {h : p < s.utf8ByteSize} :
s.getUtf8Byte p h = s.copy.getUtf8Byte p (by simpa) := by
simp [getUtf8Byte, String.getUtf8Byte, bytes_copy, ByteArray.getElem_extract]
theorem Slice.getUTF8Byte_eq_getUTF8Byte_copy {s : Slice} {p : Pos.Raw} {h : p < s.utf8ByteSize} :
s.getUTF8Byte p h = s.copy.getUTF8Byte p (by simpa) := by
simp [getUTF8Byte, String.getUTF8Byte, bytes_copy, ByteArray.getElem_extract]
theorem Slice.getUtf8Byte_copy {s : Slice} {p : Pos.Raw} {h} :
s.copy.getUtf8Byte p h = s.getUtf8Byte p (by simpa using h) := by
rw [getUtf8Byte_eq_getUtf8Byte_copy]
theorem Slice.getUTF8Byte_copy {s : Slice} {p : Pos.Raw} {h} :
s.copy.getUTF8Byte p h = s.getUTF8Byte p (by simpa using h) := by
rw [getUTF8Byte_eq_getUTF8Byte_copy]
theorem Slice.isUtf8FirstByte_utf8ByteAt_zero {s : Slice} {h} :
(s.getUtf8Byte 0 h).IsUtf8FirstByte := by
simpa [getUtf8Byte_eq_getUtf8Byte_copy] using s.copy.isUtf8FirstByte_getUtf8Byte_zero
theorem Slice.isUTF8FirstByte_utf8ByteAt_zero {s : Slice} {h} :
(s.getUTF8Byte 0 h).IsUTF8FirstByte := by
simpa [getUTF8Byte_eq_getUTF8Byte_copy] using s.copy.isUTF8FirstByte_getUTF8Byte_zero
@[simp]
theorem Pos.Raw.add_zero {p : Pos.Raw} : p + 0 = p := by
@ -1127,7 +1122,7 @@ theorem Pos.Raw.isValid_copy_iff {s : Slice} {p : Pos.Raw} :
· have := s.startInclusive_le_endExclusive
simp_all only [Slice.endPos_copy, le_iff, Slice.byteIdx_utf8ByteSize]
rw [Slice.bytes_copy, ByteArray.extract_extract, Nat.add_zero, Nat.min_eq_left (by omega)] at h₂
rw [← byteIdx_add, Pos.Raw.isValidUtf8_extract_iff] at h₂
rw [← byteIdx_add, Pos.Raw.isValidUTF8_extract_iff] at h₂
· rcases h₂ with (h₂|⟨-, h₂⟩)
· rw [← h₂]
exact s.startInclusive.isValid
@ -1140,7 +1135,7 @@ theorem Pos.Raw.isValid_copy_iff {s : Slice} {p : Pos.Raw} :
· have := s.startInclusive_le_endExclusive
simp_all only [le_iff, Slice.byteIdx_utf8ByteSize]
rw [Slice.bytes_copy, ByteArray.extract_extract, Nat.add_zero, Nat.min_eq_left (by omega)]
rw [← byteIdx_add, Pos.Raw.isValidUtf8_extract_iff]
rw [← byteIdx_add, Pos.Raw.isValidUTF8_extract_iff]
· exact Or.inr ⟨s.startInclusive.isValid, h₂⟩
· simp [le_iff]
· have := s.endExclusive.isValid.le_endPos
@ -1186,22 +1181,22 @@ def Slice.endPos (s : Slice) : s.Pos where
@[simp]
theorem ByteString.Slice.offset_endPos {s : Slice} : s.endPos.offset = s.utf8ByteSize := (rfl)
theorem Pos.Raw.isValidForSlice_iff_isUtf8FirstByte {s : Slice} {p : Pos.Raw} :
p.IsValidForSlice s ↔ (p = s.utf8ByteSize (∃ (h : p < s.utf8ByteSize), (s.getUtf8Byte p h).IsUtf8FirstByte)) := by
simp [← isValid_copy_iff, isValid_iff_isUtf8FirstByte, Slice.getUtf8Byte_copy]
theorem Pos.Raw.isValidForSlice_iff_isUTF8FirstByte {s : Slice} {p : Pos.Raw} :
p.IsValidForSlice s ↔ (p = s.utf8ByteSize (∃ (h : p < s.utf8ByteSize), (s.getUTF8Byte p h).IsUTF8FirstByte)) := by
simp [← isValid_copy_iff, isValid_iff_isUTF8FirstByte, Slice.getUTF8Byte_copy]
/-- Efficiently checks whether a position is at a UTF-8 character boundary of the slice `s`. -/
@[expose]
def Pos.Raw.isValidForSlice (s : Slice) (p : Pos.Raw) : Bool :=
if h : p < s.utf8ByteSize then
(s.getUtf8Byte p h).IsUtf8FirstByte
(s.getUTF8Byte p h).IsUTF8FirstByte
else
p = s.utf8ByteSize
@[simp]
theorem Pos.Raw.isValidForSlice_eq_true_iff {s : Slice} {p : Pos.Raw} :
p.isValidForSlice s = true ↔ p.IsValidForSlice s := by
rw [isValidForSlice_iff_isUtf8FirstByte]
rw [isValidForSlice_iff_isUTF8FirstByte]
fun_cases isValidForSlice with
| case1 h =>
simp_all only [decide_eq_true_eq, exists_true_left, iff_or_self]
@ -1257,22 +1252,22 @@ theorem Pos.Raw.isValidForSlice_iff_isSome_utf8DecodeChar? {s : Slice} {p : Pos.
simp only [size_bytes, Slice.utf8ByteSize_copy, ge_iff_le]
omega
· simp
· rw [isValidForSlice_iff_isUtf8FirstByte]
· rw [isValidForSlice_iff_isUTF8FirstByte]
rintro (rfl|⟨h₁, h₂⟩)
· simp
· exact Or.inr ⟨h₁, ByteArray.isUtf8FirstByte_of_isSome_utf8DecodeChar? h₂⟩
· exact Or.inr ⟨h₁, ByteArray.isUTF8FirstByte_of_isSome_utf8DecodeChar? h₂⟩
/-- Returns the byte at a position in a slice that is not the end position. -/
@[inline, expose]
def Slice.Pos.byte {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : UInt8 :=
s.getUtf8Byte pos.offset (by
s.getUTF8Byte pos.offset (by
have := pos.isValidForSlice.le_utf8ByteSize
simp_all [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff, Pos.Raw.lt_iff]
omega)
theorem Slice.Pos.isUtf8FirstByte_byte {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
(pos.byte h).IsUtf8FirstByte :=
((Pos.Raw.isValidForSlice_iff_isUtf8FirstByte.1 pos.isValidForSlice).elim (fun h' => (h (Pos.ext h')).elim) (·.2))
theorem Slice.Pos.isUTF8FirstByte_byte {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
(pos.byte h).IsUTF8FirstByte :=
((Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.1 pos.isValidForSlice).elim (fun h' => (h (Pos.ext h')).elim) (·.2))
/-- Given a valid position on a slice `s`, obtains the corresponding valid position on the
underlying string `s.str`. -/
@ -1687,7 +1682,7 @@ theorem Slice.Pos.get_eq_get_toCopy {s : Slice} {pos : s.Pos} {h} :
theorem Slice.Pos.byte_toCopy {s : Slice} {pos : s.Pos} (h) :
pos.toCopy.byte h = pos.byte (by rintro rfl; simp at h) := by
rw [ValidPos.byte, Slice.Pos.byte, Slice.Pos.byte]
simp [getUtf8Byte, String.getUtf8Byte, bytes_copy, ByteArray.getElem_extract]
simp [getUTF8Byte, String.getUTF8Byte, bytes_copy, ByteArray.getElem_extract]
theorem Slice.Pos.byte_eq_byte_toCopy {s : Slice} {pos : s.Pos} {h} :
pos.byte h = pos.toCopy.byte (ne_of_apply_ne ValidPos.ofCopy (by simp [h])) :=
@ -1745,14 +1740,14 @@ theorem Slice.Pos.copy_eq_append_get {s : Slice} {pos : s.Pos} (h : pos ≠ s.en
rw [append_assoc, ← ht₂, ← copy_eq_copy_replaceEnd]
theorem Slice.Pos.utf8ByteSize_byte {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
(pos.byte h).utf8ByteSize pos.isUtf8FirstByte_byte = ⟨(pos.get h).utf8Size⟩ := by
simp [getUtf8Byte, byte, String.getUtf8Byte, get_eq_utf8DecodeChar, ByteArray.utf8Size_utf8DecodeChar]
(pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte = ⟨(pos.get h).utf8Size⟩ := by
simp [getUTF8Byte, byte, String.getUTF8Byte, get_eq_utf8DecodeChar, ByteArray.utf8Size_utf8DecodeChar]
/-- Advances a valid position on a slice to the next valid position, given a proof that the
position is not the past-the-end position, which guarantees that such a position exists. -/
@[expose]
def Slice.Pos.next {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : s.Pos where
offset := pos.offset + (pos.byte h).utf8ByteSize pos.isUtf8FirstByte_byte
offset := pos.offset + (pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte
isValidForSlice := by
obtain ⟨t₁, t₂, ht, ht'⟩ := copy_eq_append_get h
replace ht' : pos.offset = ⟨t₁.utf8ByteSize⟩ := Eq.symm (String.Pos.Raw.ext ht')
@ -1802,13 +1797,13 @@ def Slice.Pos.prevAux {s : Slice} (pos : s.Pos) (h : pos ≠ s.startPos) : Strin
omega)
where
go (off : Nat) (h₁ : ⟨off⟩ < s.utf8ByteSize) : String.Pos.Raw :=
if hbyte : (s.getUtf8Byte ⟨off⟩ h₁).IsUtf8FirstByte then
if hbyte : (s.getUTF8Byte ⟨off⟩ h₁).IsUTF8FirstByte then
⟨off⟩
else
have : 0 ≠ off := by
intro h
obtain hoff : (⟨off⟩ : String.Pos.Raw) = 0 := by simpa [String.Pos.Raw.ext_iff] using h.symm
simp [hoff, s.isUtf8FirstByte_utf8ByteAt_zero] at hbyte
simp [hoff, s.isUTF8FirstByte_utf8ByteAt_zero] at hbyte
match off with
| 0 => False.elim (by contradiction)
| off + 1 => go off (by simp [Pos.Raw.lt_iff] at ⊢ h₁; omega)
@ -1820,12 +1815,12 @@ theorem Pos.Raw.isValidForSlice_prevAuxGo {s : Slice} (off : Nat) (h₁ : ⟨off
| zero =>
rw [Slice.Pos.prevAux.go]
split
· exact Pos.Raw.isValidForSlice_iff_isUtf8FirstByte.2 (Or.inr ⟨_, _⟩)
· exact Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.2 (Or.inr ⟨_, _⟩)
· simpa using elim
| succ off ih =>
rw [Slice.Pos.prevAux.go]
split
· exact Pos.Raw.isValidForSlice_iff_isUtf8FirstByte.2 (Or.inr ⟨_, _⟩)
· exact Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.2 (Or.inr ⟨_, _⟩)
· simpa using ih _
where
elim {P : Pos.Raw → Prop} {h : False} : P h.elim := h.elim
@ -1965,8 +1960,8 @@ def Slice.findNextPos (offset : String.Pos.Raw) (s : Slice) (_h : offset < s.utf
where
go (offset : String.Pos.Raw) : s.Pos :=
if h : offset < s.utf8ByteSize then
if h' : (s.getUtf8Byte offset h).IsUtf8FirstByte then
s.pos offset (Pos.Raw.isValidForSlice_iff_isUtf8FirstByte.2 (Or.inr ⟨_, h'⟩))
if h' : (s.getUTF8Byte offset h).IsUTF8FirstByte then
s.pos offset (Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.2 (Or.inr ⟨_, h'⟩))
else
go offset.inc
else

View file

@ -982,7 +982,7 @@ end ByteArray.utf8DecodeChar?
open ByteArray.utf8DecodeChar?
/- # `utf8DecodeChar?` and `validateUtf8At` -/
/- # `utf8DecodeChar?` and `validateUTF8At` -/
@[inline, expose]
public def ByteArray.utf8DecodeChar? (bytes : ByteArray) (i : Nat) : Option Char :=
@ -1006,7 +1006,7 @@ public def ByteArray.utf8DecodeChar? (bytes : ByteArray) (i : Nat) : Option Char
else none
@[inline, expose]
public def ByteArray.validateUtf8At (bytes : ByteArray) (i : Nat) : Bool :=
public def ByteArray.validateUTF8At (bytes : ByteArray) (i : Nat) : Bool :=
if h₀ : i < bytes.size then
match h : parseFirstByte bytes[i] with
| .invalid => false
@ -1212,9 +1212,9 @@ public theorem String.toByteArray_utf8EncodeChar_of_utf8DecodeChar?_eq_some {b :
rw [← assemble₄_eq_some_iff_utf8EncodeChar_eq]
exact ⟨this, h⟩
public theorem ByteArray.validateUtf8At_eq_isSome_utf8DecodeChar? {b : ByteArray} {i : Nat} :
b.validateUtf8At i = (b.utf8DecodeChar? i).isSome := by
simp only [validateUtf8At, utf8DecodeChar?]
public theorem ByteArray.validateUTF8At_eq_isSome_utf8DecodeChar? {b : ByteArray} {i : Nat} :
b.validateUTF8At i = (b.utf8DecodeChar? i).isSome := by
simp only [validateUTF8At, utf8DecodeChar?]
split
· split <;> (try split) <;> simp [verify₁_eq_isSome_assemble₁, verify₂_eq_isSome_assemble₂,
verify₃_eq_isSome_assemble₃, verify₄_eq_isSome_assemble₄]
@ -1285,9 +1285,9 @@ public theorem ByteArray.lt_size_of_isSome_utf8DecodeChar? {b : ByteArray} {i :
have := c.utf8Size_pos
omega
public theorem ByteArray.lt_size_of_validateUtf8At {b : ByteArray} {i : Nat} :
validateUtf8At b i = true → i < b.size :=
validateUtf8At_eq_isSome_utf8DecodeChar? ▸ lt_size_of_isSome_utf8DecodeChar?
public theorem ByteArray.lt_size_of_validateUTF8At {b : ByteArray} {i : Nat} :
validateUTF8At b i = true → i < b.size :=
validateUTF8At_eq_isSome_utf8DecodeChar? ▸ lt_size_of_isSome_utf8DecodeChar?
public theorem ByteArray.utf8DecodeChar?_append_eq_some {b : ByteArray} {i : Nat} {c : Char} (h : utf8DecodeChar? b i = some c)
(b' : ByteArray) : utf8DecodeChar? (b ++ b') i = some c := by
@ -1366,7 +1366,7 @@ public theorem List.utf8DecodeChar_utf8Encode_cons {l : List Char} {c : Char} {h
ByteArray.utf8DecodeChar (c::l).utf8Encode 0 h = c := by
simp [ByteArray.utf8DecodeChar]
/-! # `UInt8.IsUtf8FirstByte` -/
/-! # `UInt8.IsUTF8FirstByte` -/
namespace UInt8
@ -1375,27 +1375,27 @@ Predicate for whether a byte can appear as the first byte of the UTF-8 encoding
scalar value.
-/
@[expose]
public def IsUtf8FirstByte (c : UInt8) : Prop :=
public def IsUTF8FirstByte (c : UInt8) : Prop :=
c &&& 0x80 = 0 c &&& 0xe0 = 0xc0 c &&& 0xf0 = 0xe0 c &&& 0xf8 = 0xf0
public instance {c : UInt8} : Decidable c.IsUtf8FirstByte :=
public instance {c : UInt8} : Decidable c.IsUTF8FirstByte :=
inferInstanceAs <| Decidable (c &&& 0x80 = 0 c &&& 0xe0 = 0xc0 c &&& 0xf0 = 0xe0 c &&& 0xf8 = 0xf0)
theorem isUtf8FirstByte_iff_parseFirstByte_ne_invalid {c : UInt8} :
c.IsUtf8FirstByte ↔ parseFirstByte c ≠ FirstByte.invalid := by
fun_cases parseFirstByte with simp_all [IsUtf8FirstByte]
theorem isUTF8FirstByte_iff_parseFirstByte_ne_invalid {c : UInt8} :
c.IsUTF8FirstByte ↔ parseFirstByte c ≠ FirstByte.invalid := by
fun_cases parseFirstByte with simp_all [IsUTF8FirstByte]
@[simp]
public theorem isUtf8FirstByte_getElem_utf8EncodeChar {c : Char} {i : Nat} {hi : i < (String.utf8EncodeChar c).length} :
(String.utf8EncodeChar c)[i].IsUtf8FirstByte ↔ i = 0 := by
public theorem isUTF8FirstByte_getElem_utf8EncodeChar {c : Char} {i : Nat} {hi : i < (String.utf8EncodeChar c).length} :
(String.utf8EncodeChar c)[i].IsUTF8FirstByte ↔ i = 0 := by
obtain (rfl|hi₀) := Nat.eq_zero_or_pos i
· simp only [isUtf8FirstByte_iff_parseFirstByte_ne_invalid, iff_true]
· simp only [isUTF8FirstByte_iff_parseFirstByte_ne_invalid, iff_true]
match h : c.utf8Size, c.utf8Size_pos, c.utf8Size_le_four with
| 1, _, _ => simp [parseFirstByte_utf8EncodeChar_eq_done h]
| 2, _, _ => simp [parseFirstByte_utf8EncodeChar_eq_oneMore h]
| 3, _, _ => simp [parseFirstByte_utf8EncodeChar_eq_twoMore h]
| 4, _, _ => simp [parseFirstByte_utf8EncodeChar_eq_threeMore h]
· simp only [isUtf8FirstByte_iff_parseFirstByte_ne_invalid, ne_eq, Nat.ne_of_lt' hi₀, iff_false,
· simp only [isUTF8FirstByte_iff_parseFirstByte_ne_invalid, ne_eq, Nat.ne_of_lt' hi₀, iff_false,
Classical.not_not]
apply parseFirstByte_eq_invalid_of_isInvalidContinuationByte_eq_false
simp only [String.length_utf8EncodeChar] at hi
@ -1408,12 +1408,12 @@ public theorem isUtf8FirstByte_getElem_utf8EncodeChar {c : Char} {i : Nat} {hi :
| 4, _, _, 2, _, _ => simp [isInvalidContinuationByte_getElem_utf8EncodeChar_two_of_utf8Size_eq_four h]
| 4, _, _, 3, _, _ => simp [isInvalidContinuationByte_getElem_utf8EncodeChar_three_of_utf8Size_eq_four h]
public theorem isUtf8FirstByte_getElem_zero_utf8EncodeChar {c : Char} :
((String.utf8EncodeChar c)[0]'(by simp [c.utf8Size_pos])).IsUtf8FirstByte := by
public theorem isUTF8FirstByte_getElem_zero_utf8EncodeChar {c : Char} :
((String.utf8EncodeChar c)[0]'(by simp [c.utf8Size_pos])).IsUTF8FirstByte := by
simp
@[expose]
public def utf8ByteSize (c : UInt8) (_h : c.IsUtf8FirstByte) : String.Pos.Raw :=
public def utf8ByteSize (c : UInt8) (_h : c.IsUTF8FirstByte) : String.Pos.Raw :=
if c &&& 0x80 = 0 then
⟨1⟩
else if c &&& 0xe0 = 0xc0 then
@ -1430,7 +1430,7 @@ def _root_.ByteArray.utf8DecodeChar?.FirstByte.utf8ByteSize : FirstByte → Stri
| .twoMore => ⟨3⟩
| .threeMore => ⟨4⟩
theorem utf8ByteSize_eq_utf8ByteSize_parseFirstByte {c : UInt8} {h : c.IsUtf8FirstByte} :
theorem utf8ByteSize_eq_utf8ByteSize_parseFirstByte {c : UInt8} {h : c.IsUTF8FirstByte} :
c.utf8ByteSize h = (parseFirstByte c).utf8ByteSize := by
simp only [utf8ByteSize, FirstByte.utf8ByteSize, parseFirstByte, beq_iff_eq]
split
@ -1447,28 +1447,28 @@ theorem utf8ByteSize_eq_utf8ByteSize_parseFirstByte {c : UInt8} {h : c.IsUtf8Fir
end UInt8
public theorem ByteArray.isUtf8FirstByte_getElem_zero_utf8EncodeChar_append {c : Char} {b : ByteArray} :
(((String.utf8EncodeChar c).toByteArray ++ b)[0]'(by simp; have := c.utf8Size_pos; omega)).IsUtf8FirstByte := by
public theorem ByteArray.isUTF8FirstByte_getElem_zero_utf8EncodeChar_append {c : Char} {b : ByteArray} :
(((String.utf8EncodeChar c).toByteArray ++ b)[0]'(by simp; have := c.utf8Size_pos; omega)).IsUTF8FirstByte := by
rw [ByteArray.getElem_append_left (by simp [c.utf8Size_pos]),
List.getElem_toByteArray, UInt8.isUtf8FirstByte_getElem_utf8EncodeChar]
List.getElem_toByteArray, UInt8.isUTF8FirstByte_getElem_utf8EncodeChar]
public theorem ByteArray.isUtf8FirstByte_of_isSome_utf8DecodeChar? {b : ByteArray} {i : Nat}
(h : (utf8DecodeChar? b i).isSome) : (b[i]'(lt_size_of_isSome_utf8DecodeChar? h)).IsUtf8FirstByte := by
public theorem ByteArray.isUTF8FirstByte_of_isSome_utf8DecodeChar? {b : ByteArray} {i : Nat}
(h : (utf8DecodeChar? b i).isSome) : (b[i]'(lt_size_of_isSome_utf8DecodeChar? h)).IsUTF8FirstByte := by
rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h
suffices ((b.extract i b.size)[0]'(lt_size_of_isSome_utf8DecodeChar? h)).IsUtf8FirstByte by
suffices ((b.extract i b.size)[0]'(lt_size_of_isSome_utf8DecodeChar? h)).IsUTF8FirstByte by
simpa [ByteArray.getElem_extract, Nat.add_zero] using this
obtain ⟨c, hc⟩ := Option.isSome_iff_exists.1 h
conv => congr; congr; rw [eq_of_utf8DecodeChar?_eq_some hc]
exact isUtf8FirstByte_getElem_zero_utf8EncodeChar_append
exact isUTF8FirstByte_getElem_zero_utf8EncodeChar_append
public theorem ByteArray.isUtf8FirstByte_of_validateUtf8At {b : ByteArray} {i : Nat} :
(h : validateUtf8At b i = true) → (b[i]'(lt_size_of_validateUtf8At h)).IsUtf8FirstByte := by
simp only [validateUtf8At_eq_isSome_utf8DecodeChar?]
exact isUtf8FirstByte_of_isSome_utf8DecodeChar?
public theorem ByteArray.isUTF8FirstByte_of_validateUTF8At {b : ByteArray} {i : Nat} :
(h : validateUTF8At b i = true) → (b[i]'(lt_size_of_validateUTF8At h)).IsUTF8FirstByte := by
simp only [validateUTF8At_eq_isSome_utf8DecodeChar?]
exact isUTF8FirstByte_of_isSome_utf8DecodeChar?
theorem Char.byteIdx_utf8ByteSize_getElem_utf8EncodeChar {c : Char} :
(((String.utf8EncodeChar c)[0]'(by simp [c.utf8Size_pos])).utf8ByteSize
UInt8.isUtf8FirstByte_getElem_zero_utf8EncodeChar).byteIdx = c.utf8Size := by
UInt8.isUTF8FirstByte_getElem_zero_utf8EncodeChar).byteIdx = c.utf8Size := by
rw [UInt8.utf8ByteSize_eq_utf8ByteSize_parseFirstByte]
obtain (hc|hc|hc|hc) := c.utf8Size_eq
· rw [parseFirstByte_utf8EncodeChar_eq_done hc, FirstByte.utf8ByteSize, hc]
@ -1478,7 +1478,7 @@ theorem Char.byteIdx_utf8ByteSize_getElem_utf8EncodeChar {c : Char} :
public theorem ByteArray.utf8Size_utf8DecodeChar {b : ByteArray} {i} {h} :
(utf8DecodeChar b i h).utf8Size =
((b[i]'(lt_size_of_isSome_utf8DecodeChar? h)).utf8ByteSize (isUtf8FirstByte_of_isSome_utf8DecodeChar? h)).byteIdx := by
((b[i]'(lt_size_of_isSome_utf8DecodeChar? h)).utf8ByteSize (isUTF8FirstByte_of_isSome_utf8DecodeChar? h)).byteIdx := by
rw [← Char.byteIdx_utf8ByteSize_getElem_utf8EncodeChar]
simp only [List.getElem_eq_getElem_toByteArray, utf8EncodeChar_utf8DecodeChar]
simp [ByteArray.getElem_extract]

View file

@ -88,7 +88,7 @@ where
have hr := by
simp [Pos.Raw.le_iff] at h h2 ⊢
omega
if lhs.getUtf8Byte (lstart + curr) hl == rhs.getUtf8Byte (rstart + curr) hr then
if lhs.getUTF8Byte (lstart + curr) hl == rhs.getUTF8Byte (rstart + curr) hr then
go curr.inc
else
false

View file

@ -39,16 +39,16 @@ partial def buildTable (pat : Slice) : Array String.Pos.Raw :=
where
go (pos : String.Pos.Raw) (table : Array String.Pos.Raw) :=
if h : pos < pat.utf8ByteSize then
let patByte := pat.getUtf8Byte pos h
let patByte := pat.getUTF8Byte pos h
let distance := computeDistance table[table.size - 1]! patByte table
let distance := if patByte = pat.getUtf8Byte! distance then distance.inc else distance
let distance := if patByte = pat.getUTF8Byte! distance then distance.inc else distance
go pos.inc (table.push distance)
else
table
computeDistance (distance : String.Pos.Raw) (patByte : UInt8) (table : Array String.Pos.Raw) :
String.Pos.Raw :=
if distance > 0 && patByte != pat.getUtf8Byte! distance then
if distance > 0 && patByte != pat.getUTF8Byte! distance then
computeDistance table[distance.byteIdx - 1]! patByte table
else
distance
@ -62,7 +62,7 @@ def iter (s : Slice) (pat : Slice) : Std.Iter (α := ForwardSliceSearcher s) (Se
partial def backtrackIfNecessary (pat : Slice) (table : Array String.Pos.Raw) (stackByte : UInt8)
(needlePos : String.Pos.Raw) : String.Pos.Raw :=
if needlePos != 0 && stackByte != pat.getUtf8Byte! needlePos then
if needlePos != 0 && stackByte != pat.getUTF8Byte! needlePos then
backtrackIfNecessary pat table stackByte table[needlePos.byteIdx - 1]!
else
needlePos
@ -95,9 +95,9 @@ instance (s : Slice) : Std.Iterators.Iterator (ForwardSliceSearcher s) Id (Searc
let rec findNext (startPos : String.Pos.Raw)
(currStackPos : String.Pos.Raw) (needlePos : String.Pos.Raw) (h : stackPos ≤ currStackPos) :=
if h1 : currStackPos < s.utf8ByteSize then
let stackByte := s.getUtf8Byte currStackPos h1
let stackByte := s.getUTF8Byte currStackPos h1
let needlePos := backtrackIfNecessary needle table stackByte needlePos
let patByte := needle.getUtf8Byte! needlePos
let patByte := needle.getUTF8Byte! needlePos
if stackByte != patByte then
let nextStackPos := s.findNextPos currStackPos h1 |>.offset
let res := .rejected (s.pos! startPos) (s.pos! nextStackPos)

View file

@ -688,8 +688,8 @@ def eqIgnoreAsciiCase (s1 s2 : Slice) : Bool :=
where
go (s1 : Slice) (s1Curr : String.Pos.Raw) (s2 : Slice) (s2Curr : String.Pos.Raw) : Bool :=
if h : s1Curr < s1.utf8ByteSize ∧ s2Curr < s2.utf8ByteSize then
let c1 := (s1.getUtf8Byte s1Curr h.left).toAsciiLower
let c2 := (s2.getUtf8Byte s2Curr h.right).toAsciiLower
let c1 := (s1.getUTF8Byte s1Curr h.left).toAsciiLower
let c2 := (s2.getUTF8Byte s2Curr h.right).toAsciiLower
if c1 == c2 then
go s1 s1Curr.inc s2 s2Curr.inc
else
@ -900,12 +900,12 @@ instance [Pure m] : Std.Iterators.Iterator ByteIterator m UInt8 where
∃ h1 : it.internalState.offset < it.internalState.s.utf8ByteSize,
it.internalState.s = it'.internalState.s ∧
it'.internalState.offset = it.internalState.offset.inc ∧
it.internalState.s.getUtf8Byte it.internalState.offset h1 = out
it.internalState.s.getUTF8Byte it.internalState.offset h1 = out
| .skip _ => False
| .done => ¬ it.internalState.offset < it.internalState.s.utf8ByteSize
step := fun ⟨s, offset⟩ =>
if h : offset < s.utf8ByteSize then
pure ⟨.yield ⟨s, offset.inc⟩ (s.getUtf8Byte offset h), by simp [h]⟩
pure ⟨.yield ⟨s, offset.inc⟩ (s.getUTF8Byte offset h), by simp [h]⟩
else
pure ⟨.done, by simp [h]⟩
@ -981,7 +981,7 @@ instance [Pure m] : Std.Iterators.Iterator RevByteIterator m UInt8 where
it.internalState.s = it'.internalState.s ∧
it.internalState.offset ≠ 0 ∧
it'.internalState.offset = it.internalState.offset.dec ∧
it.internalState.s.getUtf8Byte it.internalState.offset.dec h1 = out
it.internalState.s.getUTF8Byte it.internalState.offset.dec h1 = out
| .skip _ => False
| .done => it.internalState.offset = 0
step := fun ⟨s, offset, hinv⟩ =>
@ -994,7 +994,7 @@ instance [Pure m] : Std.Iterators.Iterator RevByteIterator m UInt8 where
simp [String.Pos.Raw.le_iff, nextOffset] at hinv ⊢
omega
have hiter := by simp [nextOffset, hbound, h]
pure ⟨.yield ⟨s, nextOffset, hinv⟩ (s.getUtf8Byte nextOffset hbound), hiter⟩
pure ⟨.yield ⟨s, nextOffset, hinv⟩ (s.getUTF8Byte nextOffset hbound), hiter⟩
else
pure ⟨.done, by simpa using h⟩

View file

@ -26,14 +26,14 @@ namespace Lean.Name
-- If you change this, also change the corresponding function in `Init.Meta`.
private partial def needsNoEscapeAsciiRest (s : String) (i : Nat) : Bool :=
if h : i < s.utf8ByteSize then
let c := s.getUtf8Byte ⟨i⟩ h
let c := s.getUTF8Byte ⟨i⟩ h
isIdRestAscii c && needsNoEscapeAsciiRest s (i + 1)
else
true
-- If you change this, also change the corresponding function in `Init.Meta`.
@[inline] private def needsNoEscapeAscii (s : String) (h : s.utf8ByteSize > 0) : Bool :=
let c := s.getUtf8Byte 0 h
let c := s.getUTF8Byte 0 h
isIdFirstAscii c && needsNoEscapeAsciiRest s 1
-- If you change this, also change the corresponding function in `Init.Meta`.

View file

@ -156,12 +156,6 @@ def isInaccessibleUserName : Name → Bool
| Name.num p _ => isInaccessibleUserName p
| _ => false
-- FIXME: `getUtf8Byte` is in `Init.Data.String.Extra`, which causes an import cycle with
-- `Init.Meta`. Moving `getUtf8Byte` up to `Init.Data.String.Basic` creates another import cycle.
-- Please replace this definition with `getUtf8Byte` when the string refactor is through.
@[extern "lean_string_get_byte_fast"]
private opaque getUtf8Byte' (s : @& String) (n : Nat) (h : n < s.utf8ByteSize) : UInt8
section ToString
/-!
@ -177,14 +171,14 @@ inner-loop function like `Name.toString`.
-- If you change this, also change the corresponding function in `Init.Data.ToString.Name`.
private partial def needsNoEscapeAsciiRest (s : String) (i : Nat) : Bool :=
if h : i < s.utf8ByteSize then
let c := getUtf8Byte' s i h
let c := String.Internal.getUTF8Byte s i h
isIdRestAscii c && needsNoEscapeAsciiRest s (i + 1)
else
true
-- If you change this, also change the corresponding function in `Init.Data.ToString.Name`.
@[inline] private def needsNoEscapeAscii (s : String) (h : s.utf8ByteSize > 0) : Bool :=
let c := getUtf8Byte' s 0 h
let c := String.Internal.getUTF8Byte s 0 h
isIdFirstAscii c && needsNoEscapeAsciiRest s 1
-- If you change this, also change the corresponding function in `Init.Data.ToString.Name`.

View file

@ -3378,7 +3378,7 @@ def List.utf8Encode (l : List Char) : ByteArray :=
Note that in order for this definition to be well-behaved it is necessary to know that this `m`
is unique. To show this, one defines UTF-8 decoding and shows that encoding and decoding are
mutually inverse. -/
inductive ByteArray.IsValidUtf8 (b : ByteArray) : Prop
inductive ByteArray.IsValidUTF8 (b : ByteArray) : Prop
/-- Show that a byte -/
| intro (m : List Char) (hm : Eq b (List.utf8Encode m))
@ -3393,10 +3393,10 @@ modifications when the reference to the string is unique.
structure String where ofByteArray ::
/-- The bytes of the UTF-8 encoding of the string. Since strings have a special representation in
the runtime, this function actually takes linear time and space at runtime. For efficient access
to the string's bytes, use `String.utf8ByteSize` and `String.getUtf8Byte`. -/
to the string's bytes, use `String.utf8ByteSize` and `String.getUTF8Byte`. -/
bytes : ByteArray
/-- The bytes of the string form valid UTF-8. -/
isValidUtf8 : ByteArray.IsValidUtf8 bytes
isValidUTF8 : ByteArray.IsValidUTF8 bytes
attribute [extern "lean_string_to_utf8"] String.bytes
attribute [extern "lean_string_from_utf8_unchecked"] String.ofByteArray

View file

@ -64,7 +64,7 @@ private def needEscape (s : String) : Bool :=
where
go (s : String) (i : Nat) : Bool :=
if h : i < s.utf8ByteSize then
let byte := s.getUtf8Byte ⟨i⟩ h
let byte := s.getUTF8Byte ⟨i⟩ h
have h1 : byte.toNat < 256 := UInt8.toNat_lt_size byte
have h2 : escapeTable.val.size = 256 := escapeTable.property
if escapeTable.val.get byte.toNat (Nat.lt_of_lt_of_eq h1 h2.symm) == 0 then

View file

@ -63,7 +63,7 @@ instance : Inhabited (Trie α) where
partial def upsert (t : Trie α) (s : String) (f : Option αα) : Trie α :=
let rec insertEmpty (i : Nat) : Trie α :=
if h : i < s.utf8ByteSize then
let c := s.getUtf8Byte ⟨i⟩ h
let c := s.getUTF8Byte ⟨i⟩ h
let t := insertEmpty (i + 1)
node1 none c t
else
@ -71,14 +71,14 @@ partial def upsert (t : Trie α) (s : String) (f : Option αα) : Trie α :
let rec loop
| i, leaf v =>
if h : i < s.utf8ByteSize then
let c := s.getUtf8Byte ⟨i⟩ h
let c := s.getUTF8Byte ⟨i⟩ h
let t := insertEmpty (i + 1)
node1 v c t
else
leaf (f v)
| i, node1 v c' t' =>
if h : i < s.utf8ByteSize then
let c := s.getUtf8Byte ⟨i⟩ h
let c := s.getUTF8Byte ⟨i⟩ h
if c == c'
then node1 v c' (loop (i + 1) t')
else
@ -88,7 +88,7 @@ partial def upsert (t : Trie α) (s : String) (f : Option αα) : Trie α :
node1 (f v) c' t'
| i, node v cs ts =>
if h : i < s.utf8ByteSize then
let c := s.getUtf8Byte ⟨i⟩ h
let c := s.getUTF8Byte ⟨i⟩ h
match cs.findIdx? (· == c) with
| none =>
let t := insertEmpty (i + 1)
@ -113,7 +113,7 @@ partial def find? (t : Trie α) (s : String) : Option α :=
val
| i, node1 val c' t' =>
if h : i < s.utf8ByteSize then
let c := s.getUtf8Byte ⟨i⟩ h
let c := s.getUTF8Byte ⟨i⟩ h
if c == c'
then loop (i + 1) t'
else none
@ -121,7 +121,7 @@ partial def find? (t : Trie α) (s : String) : Option α :=
val
| i, node val cs ts =>
if h : i < s.utf8ByteSize then
let c := s.getUtf8Byte ⟨i⟩ h
let c := s.getUTF8Byte ⟨i⟩ h
match cs.findIdx? (· == c) with
| none => none
| some idx => loop (i + 1) ts[idx]!
@ -150,7 +150,7 @@ partial def findPrefix (t : Trie α) (pre : String) : Array α := go t 0
where
go (t : Trie α) (i : Nat) : Array α :=
if h : i < pre.utf8ByteSize then
let c := pre.getUtf8Byte ⟨i⟩ h
let c := pre.getUTF8Byte ⟨i⟩ h
match t with
| leaf _val => .empty
| node1 _val c' t' =>
@ -175,7 +175,7 @@ partial def matchPrefix (s : String) (t : Trie α) (i : String.Pos.Raw)
| node1 v c' t', i, res =>
let res := if v.isSome then v else res
if h : i < endByte then
let c := s.getUtf8Byte ⟨i⟩ (by simp; omega)
let c := s.getUTF8Byte ⟨i⟩ (by simp; omega)
if c == c'
then loop t' (i + 1) res
else res
@ -184,7 +184,7 @@ partial def matchPrefix (s : String) (t : Trie α) (i : String.Pos.Raw)
| node v cs ts, i, res =>
let res := if v.isSome then v else res
if h : i < endByte then
let c := s.getUtf8Byte ⟨i⟩ (by simp; omega)
let c := s.getUTF8Byte ⟨i⟩ (by simp; omega)
match cs.findIdx? (· == c) with
| none => res
| some idx => loop ts[idx]! (i + 1) res

View file

@ -124,7 +124,7 @@ public def ofJsonNumber? (n : JsonNumber) : Except String Hash :=
/-- Parse a hash from a string of hexadecimal digits. Does no validation. -/
public def ofHex (s : String) : Hash :=
mk <| s.utf8ByteSize.fold (init := 0) fun i h n =>
let c := s.getUtf8Byte ⟨i⟩ h
let c := s.getUTF8Byte ⟨i⟩ h
if c ≤ 57 then n*16 + (c - 48).toUInt64
else if 97 ≤ c then n*16 + (c - 87).toUInt64 -- c - 'a' + 10 = (c - 87)
else n*16 + (c - 55).toUInt64 -- c - 'A' + 10 = (c - 55)

View file

@ -24,7 +24,7 @@ public def zpad (n : Nat) (len : Nat) : String :=
/-- Returns whether a string is composed of only hexadecimal digits. -/
public def isHex (s : String) : Bool :=
s.utf8ByteSize.all fun i h =>
let c := s.getUtf8Byte ⟨i⟩ h
let c := s.getUTF8Byte ⟨i⟩ h
if c ≤ 57 then -- '9'
48 ≤ c -- '0'
else if c ≤ 102 then -- 'f'

View file

@ -12,7 +12,7 @@ macro "test_extern'" t:term " => " v:term : command =>
def checkGet (s : String) (arr : Array UInt8) :=
(List.range s.utf8ByteSize).all fun i =>
let c := if h : _ then s.getUtf8Byte ⟨i⟩ h else unreachable!
let c := if h : _ then s.getUTF8Byte ⟨i⟩ h else unreachable!
c == arr[i]!
macro "validate" arr:term " => " "↯" : command =>

View file

@ -4,11 +4,11 @@ setLit.lean:22:19-22:21: error: overloaded, errors
Hint: Additional diagnostic information may be available using the `set_option diagnostics true` command.
Fields missing: `bytes`, `isValidUtf8`
Fields missing: `bytes`, `isValidUTF8`
Hint: Add missing fields:
̲b̲y̲t̲e̲s̲ ̲:̲=̲ ̲_̲
̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲i̲s̲V̲a̲l̲i̲d̲U̲t̲f̲8̲ ̲:̲=̲ ̲_̲ ̲
̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲i̲s̲V̲a̲l̲i̲d̲U̲T̲F̲8̲ ̲:̲=̲ ̲_̲ ̲
setLit.lean:24:31-24:38: error: overloaded, errors
failed to synthesize
Singleton Nat String