chore: use UTF8 instead of Utf8 in identifiers (#10636)
This PR renames `String.getUtf8Byte` to `String.getUTF8Byte` in order to adhere to the standard library naming convention.
This commit is contained in:
parent
784a063092
commit
2cca32ccc3
14 changed files with 195 additions and 206 deletions
|
|
@ -33,9 +33,9 @@ theorem List.utf8Encode_append {l l' : List Char} :
|
|||
theorem List.utf8Encode_cons {c : Char} {l : List Char} : (c :: l).utf8Encode = [c].utf8Encode ++ l.utf8Encode := by
|
||||
rw [← singleton_append, List.utf8Encode_append]
|
||||
|
||||
theorem List.isUtf8FirstByte_getElem_utf8Encode_singleton {c : Char} {i : Nat} {hi : i < [c].utf8Encode.size} :
|
||||
UInt8.IsUtf8FirstByte [c].utf8Encode[i] ↔ i = 0 := by
|
||||
simp [List.utf8Encode_singleton, UInt8.isUtf8FirstByte_getElem_utf8EncodeChar]
|
||||
theorem List.isUTF8FirstByte_getElem_utf8Encode_singleton {c : Char} {i : Nat} {hi : i < [c].utf8Encode.size} :
|
||||
UInt8.IsUTF8FirstByte [c].utf8Encode[i] ↔ i = 0 := by
|
||||
simp [List.utf8Encode_singleton, UInt8.isUTF8FirstByte_getElem_utf8EncodeChar]
|
||||
|
||||
@[simp]
|
||||
theorem String.utf8EncodeChar_ne_nil {c : Char} : String.utf8EncodeChar c ≠ [] := by
|
||||
|
|
@ -45,26 +45,26 @@ theorem String.utf8EncodeChar_ne_nil {c : Char} : String.utf8EncodeChar c ≠ []
|
|||
theorem List.utf8Encode_eq_empty {l : List Char} : l.utf8Encode = ByteArray.empty ↔ l = [] := by
|
||||
simp [utf8Encode, ← List.eq_nil_iff_forall_not_mem]
|
||||
|
||||
theorem ByteArray.isValidUtf8_utf8Encode {l : List Char} : IsValidUtf8 l.utf8Encode :=
|
||||
theorem ByteArray.isValidUTF8_utf8Encode {l : List Char} : IsValidUTF8 l.utf8Encode :=
|
||||
.intro l rfl
|
||||
|
||||
@[simp]
|
||||
theorem ByteArray.isValidUtf8_empty : IsValidUtf8 ByteArray.empty :=
|
||||
theorem ByteArray.isValidUTF8_empty : IsValidUTF8 ByteArray.empty :=
|
||||
.intro [] (by simp)
|
||||
|
||||
theorem Char.isValidUtf8_toByteArray_utf8EncodeChar {c : Char} :
|
||||
ByteArray.IsValidUtf8 (String.utf8EncodeChar c).toByteArray :=
|
||||
theorem Char.isValidUTF8_toByteArray_utf8EncodeChar {c : Char} :
|
||||
ByteArray.IsValidUTF8 (String.utf8EncodeChar c).toByteArray :=
|
||||
.intro [c] (by simp [List.utf8Encode_singleton])
|
||||
|
||||
theorem ByteArray.IsValidUtf8.append {b b' : ByteArray} (h : IsValidUtf8 b) (h' : IsValidUtf8 b') :
|
||||
IsValidUtf8 (b ++ b') := by
|
||||
theorem ByteArray.IsValidUTF8.append {b b' : ByteArray} (h : IsValidUTF8 b) (h' : IsValidUTF8 b') :
|
||||
IsValidUTF8 (b ++ b') := by
|
||||
rcases h with ⟨m, rfl⟩
|
||||
rcases h' with ⟨m', rfl⟩
|
||||
exact .intro (m ++ m') (by simp)
|
||||
|
||||
theorem ByteArray.isValidUtf8_utf8Encode_singleton_append_iff {b : ByteArray} {c : Char} :
|
||||
IsValidUtf8 ([c].utf8Encode ++ b) ↔ IsValidUtf8 b := by
|
||||
refine ⟨?_, fun h => IsValidUtf8.append isValidUtf8_utf8Encode h⟩
|
||||
theorem ByteArray.isValidUTF8_utf8Encode_singleton_append_iff {b : ByteArray} {c : Char} :
|
||||
IsValidUTF8 ([c].utf8Encode ++ b) ↔ IsValidUTF8 b := by
|
||||
refine ⟨?_, fun h => IsValidUTF8.append isValidUTF8_utf8Encode h⟩
|
||||
rintro ⟨l, hl⟩
|
||||
match l with
|
||||
| [] => simp at hl
|
||||
|
|
@ -75,7 +75,7 @@ theorem ByteArray.isValidUtf8_utf8Encode_singleton_append_iff {b : ByteArray} {c
|
|||
List.utf8DecodeChar?_utf8Encode_cons] using hl
|
||||
rw [← List.singleton_append (l := l), List.utf8Encode_append,
|
||||
ByteArray.append_right_inj] at hl
|
||||
exact hl ▸ isValidUtf8_utf8Encode
|
||||
exact hl ▸ isValidUTF8_utf8Encode
|
||||
|
||||
@[inline, expose]
|
||||
def ByteArray.utf8Decode? (b : ByteArray) : Option (Array Char) :=
|
||||
|
|
@ -104,13 +104,13 @@ where
|
|||
if hi : i = b.size then
|
||||
true
|
||||
else
|
||||
match h : validateUtf8At b i with
|
||||
match h : validateUTF8At b i with
|
||||
| false => false
|
||||
| true => go fuel (i + (b[i].utf8ByteSize (isUtf8FirstByte_of_validateUtf8At h)).byteIdx)
|
||||
| true => go fuel (i + (b[i].utf8ByteSize (isUTF8FirstByte_of_validateUTF8At h)).byteIdx)
|
||||
?_ ?_
|
||||
termination_by structural fuel
|
||||
finally
|
||||
all_goals rw [ByteArray.validateUtf8At_eq_isSome_utf8DecodeChar?] at h
|
||||
all_goals rw [ByteArray.validateUTF8At_eq_isSome_utf8DecodeChar?] at h
|
||||
· rw [← ByteArray.utf8Size_utf8DecodeChar (h := h)]
|
||||
exact add_utf8Size_utf8DecodeChar_le_size
|
||||
· rw [← ByteArray.utf8Size_utf8DecodeChar (h := h)]
|
||||
|
|
@ -118,7 +118,7 @@ finally
|
|||
have := (b.utf8DecodeChar i h).utf8Size_pos
|
||||
omega
|
||||
|
||||
theorem ByteArray.isSome_utf8Decode?Go_eq_validateUtf8Go {b : ByteArray} {fuel : Nat}
|
||||
theorem ByteArray.isSome_utf8Decode?Go_eq_validateUTF8Go {b : ByteArray} {fuel : Nat}
|
||||
{i : Nat} {acc : Array Char} {hi : i ≤ b.size} {hf : b.size - i < fuel} :
|
||||
(utf8Decode?.go b fuel i acc hi hf).isSome = validateUTF8.go b fuel i hi hf := by
|
||||
fun_induction utf8Decode?.go with
|
||||
|
|
@ -128,20 +128,20 @@ theorem ByteArray.isSome_utf8Decode?Go_eq_validateUtf8Go {b : ByteArray} {fuel :
|
|||
split
|
||||
· rfl
|
||||
· rename_i heq
|
||||
simp [validateUtf8At_eq_isSome_utf8DecodeChar?, h₂] at heq
|
||||
simp [validateUTF8At_eq_isSome_utf8DecodeChar?, h₂] at heq
|
||||
| case3 i acc hi fuel hf h₁ c h₂ ih =>
|
||||
simp [validateUTF8.go, h₁]
|
||||
split
|
||||
· rename_i heq
|
||||
simp [validateUtf8At_eq_isSome_utf8DecodeChar?, h₂] at heq
|
||||
simp [validateUTF8At_eq_isSome_utf8DecodeChar?, h₂] at heq
|
||||
· rw [ih]
|
||||
congr
|
||||
rw [← ByteArray.utf8Size_utf8DecodeChar (h := by simp [h₂])]
|
||||
simp [utf8DecodeChar, h₂]
|
||||
|
||||
theorem ByteArray.isSome_utf8Decode?_eq_validateUtf8 {b : ByteArray} :
|
||||
theorem ByteArray.isSome_utf8Decode?_eq_validateUTF8 {b : ByteArray} :
|
||||
b.utf8Decode?.isSome = b.validateUTF8 :=
|
||||
b.isSome_utf8Decode?Go_eq_validateUtf8Go
|
||||
b.isSome_utf8Decode?Go_eq_validateUTF8Go
|
||||
|
||||
theorem ByteArray.utf8Decode?.go.congr {b b' : ByteArray} {fuel fuel' i i' : Nat} {acc acc' : Array Char} {hi hi' hf hf'}
|
||||
(hbb' : b = b') (hii' : i = i') (hacc : acc = acc') :
|
||||
|
|
@ -172,7 +172,7 @@ theorem ByteArray.utf8Decode?_empty : ByteArray.empty.utf8Decode? = some #[] :=
|
|||
simp [utf8Decode?, utf8Decode?.go]
|
||||
|
||||
private theorem ByteArray.isSome_utf8Decode?go_iff {b : ByteArray} {fuel i : Nat} {hi : i ≤ b.size} {hf} {acc : Array Char} :
|
||||
(ByteArray.utf8Decode?.go b fuel i acc hi hf).isSome ↔ IsValidUtf8 (b.extract i b.size) := by
|
||||
(ByteArray.utf8Decode?.go b fuel i acc hi hf).isSome ↔ IsValidUTF8 (b.extract i b.size) := by
|
||||
fun_induction ByteArray.utf8Decode?.go with
|
||||
| case1 => simp
|
||||
| case2 fuel i hi hf acc h₁ h₂ =>
|
||||
|
|
@ -193,23 +193,23 @@ private theorem ByteArray.isSome_utf8Decode?go_iff {b : ByteArray} {fuel i : Nat
|
|||
rw [ByteArray.extract_eq_extract_append_extract (i := i) (i + c.utf8Size) (by omega)
|
||||
(le_size_of_utf8DecodeChar?_eq_some h₂)] at hl ⊢
|
||||
rw [ByteArray.append_inj_left hl (by have := le_size_of_utf8DecodeChar?_eq_some h₂; simp; omega),
|
||||
← List.utf8Encode_singleton, isValidUtf8_utf8Encode_singleton_append_iff]
|
||||
← List.utf8Encode_singleton, isValidUTF8_utf8Encode_singleton_append_iff]
|
||||
|
||||
theorem ByteArray.isSome_utf8Decode?_iff {b : ByteArray} :
|
||||
b.utf8Decode?.isSome ↔ IsValidUtf8 b := by
|
||||
b.utf8Decode?.isSome ↔ IsValidUTF8 b := by
|
||||
rw [utf8Decode?, isSome_utf8Decode?go_iff, extract_zero_size]
|
||||
|
||||
@[simp]
|
||||
theorem ByteArray.validateUTF8_eq_true_iff {b : ByteArray} :
|
||||
b.validateUTF8 = true ↔ IsValidUtf8 b := by
|
||||
rw [← isSome_utf8Decode?_eq_validateUtf8, isSome_utf8Decode?_iff]
|
||||
b.validateUTF8 = true ↔ IsValidUTF8 b := by
|
||||
rw [← isSome_utf8Decode?_eq_validateUTF8, isSome_utf8Decode?_iff]
|
||||
|
||||
@[simp]
|
||||
theorem ByteArray.validateUTF8_eq_false_iff {b : ByteArray} :
|
||||
b.validateUTF8 = false ↔ ¬ IsValidUtf8 b := by
|
||||
b.validateUTF8 = false ↔ ¬ IsValidUTF8 b := by
|
||||
simp [← Bool.not_eq_true]
|
||||
|
||||
instance {b : ByteArray} : Decidable b.IsValidUtf8 :=
|
||||
instance {b : ByteArray} : Decidable b.IsValidUTF8 :=
|
||||
decidable_of_iff (b.validateUTF8 = true) ByteArray.validateUTF8_eq_true_iff
|
||||
|
||||
/--
|
||||
|
|
@ -217,7 +217,7 @@ Decodes an array of bytes that encode a string as [UTF-8](https://en.wikipedia.o
|
|||
the corresponding string.
|
||||
-/
|
||||
@[inline, expose]
|
||||
def String.fromUTF8 (a : @& ByteArray) (h : a.IsValidUtf8) : String :=
|
||||
def String.fromUTF8 (a : @& ByteArray) (h : a.IsValidUTF8) : String :=
|
||||
.ofByteArray a h
|
||||
|
||||
/--
|
||||
|
|
@ -225,14 +225,14 @@ Decodes an array of bytes that encode a string as [UTF-8](https://en.wikipedia.o
|
|||
the corresponding string, or returns `none` if the array is not a valid UTF-8 encoding of a string.
|
||||
-/
|
||||
@[inline, expose] def String.fromUTF8? (a : ByteArray) : Option String :=
|
||||
if h : a.IsValidUtf8 then some (fromUTF8 a h) else none
|
||||
if h : a.IsValidUTF8 then some (fromUTF8 a h) else none
|
||||
|
||||
/--
|
||||
Decodes an array of bytes that encode a string as [UTF-8](https://en.wikipedia.org/wiki/UTF-8) into
|
||||
the corresponding string, or panics if the array is not a valid UTF-8 encoding of a string.
|
||||
-/
|
||||
@[inline, expose] def String.fromUTF8! (a : ByteArray) : String :=
|
||||
if h : a.IsValidUtf8 then fromUTF8 a h else panic! "invalid UTF-8 string"
|
||||
if h : a.IsValidUTF8 then fromUTF8 a h else panic! "invalid UTF-8 string"
|
||||
|
||||
/--
|
||||
Encodes a string in UTF-8 as an array of bytes.
|
||||
|
|
@ -258,7 +258,7 @@ Examples:
|
|||
@[extern "lean_string_append", expose]
|
||||
def String.append (s : String) (t : @& String) : String where
|
||||
bytes := s.bytes ++ t.bytes
|
||||
isValidUtf8 := s.isValidUtf8.append t.isValidUtf8
|
||||
isValidUTF8 := s.isValidUTF8.append t.isValidUTF8
|
||||
|
||||
instance : Append String where
|
||||
append s t := s.append t
|
||||
|
|
@ -294,7 +294,7 @@ theorem List.asString_append {l₁ l₂ : List Char} : (l₁ ++ l₂).asString =
|
|||
|
||||
@[expose]
|
||||
def String.Internal.toArray (b : String) : Array Char :=
|
||||
b.bytes.utf8Decode?.get (b.bytes.isSome_utf8Decode?_iff.2 b.isValidUtf8)
|
||||
b.bytes.utf8Decode?.get (b.bytes.isSome_utf8Decode?_iff.2 b.isValidUTF8)
|
||||
|
||||
@[simp]
|
||||
theorem String.Internal.toArray_empty : String.Internal.toArray "" = #[] := by
|
||||
|
|
@ -556,10 +556,10 @@ Predicate for validity of positions inside a `String`.
|
|||
There are multiple equivalent definitions for validity.
|
||||
|
||||
We say that a position is valid if the string obtained by taking all of the bytes up to, but
|
||||
excluding, the given position, is valid UTF-8; see `Pos.isValid_iff_isValidUtf8_extract_zero`.
|
||||
excluding, the given position, is valid UTF-8; see `Pos.isValid_iff_isValidUTF8_extract_zero`.
|
||||
|
||||
Similarly, a position is valid if the string obtained by taking all of the bytes starting at the
|
||||
given position is valid UTF-8; see `Pos.isValid_iff_isValidUtf8_extract_utf8ByteSize`.
|
||||
given position is valid UTF-8; see `Pos.isValid_iff_isValidUTF8_extract_utf8ByteSize`.
|
||||
|
||||
An equivalent condition is that the position is the length of the UTF-8 encoding of
|
||||
some prefix of the characters of the string; see `Pos.isValid_iff_exists_append` and
|
||||
|
|
@ -567,7 +567,7 @@ some prefix of the characters of the string; see `Pos.isValid_iff_exists_append`
|
|||
|
||||
Another equivalent condition that can be checked efficiently is that the position is either the
|
||||
end position or strictly smaller than the end position and the byte at the position satisfies
|
||||
`UInt8.IsUtf8FirstByte`; see `Pos.isValid_iff_isUtf8FirstByte`.
|
||||
`UInt8.IsUTF8FirstByte`; see `Pos.isValid_iff_isUTF8FirstByte`.
|
||||
|
||||
Examples:
|
||||
* `String.Pos.IsValid "abc" ⟨0⟩`
|
||||
|
|
@ -582,7 +582,7 @@ Examples:
|
|||
-/
|
||||
structure Pos.Raw.IsValid (s : String) (off : String.Pos.Raw) : Prop where private mk ::
|
||||
le_endPos : off ≤ s.endPos
|
||||
isValidUtf8_extract_zero : (s.bytes.extract 0 off.byteIdx).IsValidUtf8
|
||||
isValidUTF8_extract_zero : (s.bytes.extract 0 off.byteIdx).IsValidUTF8
|
||||
|
||||
theorem _root_.List.isPrefix_of_utf8Encode_append_eq_utf8Encode {l m : List Char} (b : ByteArray)
|
||||
(h : l.utf8Encode ++ b = m.utf8Encode) : l <+: m := by
|
||||
|
|
@ -604,8 +604,8 @@ theorem _root_.List.isPrefix_of_utf8Encode_append_eq_utf8Encode {l m : List Char
|
|||
open List in
|
||||
theorem Pos.Raw.IsValid.exists {s : String} {p : Pos.Raw} (h : p.IsValid s) :
|
||||
∃ m₁ m₂ : List Char, m₁.utf8Encode = s.bytes.extract 0 p.byteIdx ∧ (m₁ ++ m₂).asString = s := by
|
||||
obtain ⟨l, hl⟩ := s.isValidUtf8
|
||||
obtain ⟨m₁, hm₁⟩ := h.isValidUtf8_extract_zero
|
||||
obtain ⟨l, hl⟩ := s.isValidUTF8
|
||||
obtain ⟨m₁, hm₁⟩ := h.isValidUTF8_extract_zero
|
||||
suffices m₁ <+: l by
|
||||
obtain ⟨m₂, rfl⟩ := this
|
||||
refine ⟨m₁, m₂, hm₁.symm, ?_⟩
|
||||
|
|
@ -616,12 +616,12 @@ theorem Pos.Raw.IsValid.exists {s : String} {p : Pos.Raw} (h : p.IsValid s) :
|
|||
ByteArray.extract_zero_size]
|
||||
simpa using h.le_endPos
|
||||
|
||||
theorem Pos.Raw.IsValid.isValidUtf8_extract_utf8ByteSize {s : String} {p : Pos.Raw} (h : p.IsValid s) :
|
||||
ByteArray.IsValidUtf8 (s.bytes.extract p.byteIdx s.utf8ByteSize) := by
|
||||
theorem Pos.Raw.IsValid.isValidUTF8_extract_utf8ByteSize {s : String} {p : Pos.Raw} (h : p.IsValid s) :
|
||||
ByteArray.IsValidUTF8 (s.bytes.extract p.byteIdx s.utf8ByteSize) := by
|
||||
obtain ⟨m₁, m₂, hm, rfl⟩ := h.exists
|
||||
simp only [List.asString_append, bytes_append, List.bytes_asString]
|
||||
rw [ByteArray.extract_append_eq_right]
|
||||
· exact ByteArray.isValidUtf8_utf8Encode
|
||||
· exact ByteArray.isValidUTF8_utf8Encode
|
||||
· rw [hm]
|
||||
simp only [List.asString_append, bytes_append, List.bytes_asString, ByteArray.size_extract,
|
||||
ByteArray.size_append, Nat.sub_zero]
|
||||
|
|
@ -631,7 +631,7 @@ theorem Pos.Raw.IsValid.isValidUtf8_extract_utf8ByteSize {s : String} {p : Pos.R
|
|||
|
||||
theorem Pos.Raw.isValid_iff_exists_append {s : String} {p : Pos.Raw} :
|
||||
p.IsValid s ↔ ∃ s₁ s₂ : String, s = s₁ ++ s₂ ∧ p = s₁.endPos := by
|
||||
refine ⟨fun h => ⟨⟨_, h.isValidUtf8_extract_zero⟩, ⟨_, h.isValidUtf8_extract_utf8ByteSize⟩, ?_, ?_⟩, ?_⟩
|
||||
refine ⟨fun h => ⟨⟨_, h.isValidUTF8_extract_zero⟩, ⟨_, h.isValidUTF8_extract_utf8ByteSize⟩, ?_, ?_⟩, ?_⟩
|
||||
· apply String.bytes_inj.1
|
||||
have := Pos.Raw.le_iff.1 h.le_endPos
|
||||
simp_all [← size_bytes]
|
||||
|
|
@ -640,7 +640,7 @@ theorem Pos.Raw.isValid_iff_exists_append {s : String} {p : Pos.Raw} :
|
|||
simp [Nat.min_eq_left this]
|
||||
· rintro ⟨s₁, s₂, rfl, rfl⟩
|
||||
refine ⟨by simp [Pos.Raw.le_iff], ?_⟩
|
||||
simpa [ByteArray.extract_append_eq_left] using s₁.isValidUtf8
|
||||
simpa [ByteArray.extract_append_eq_left] using s₁.isValidUTF8
|
||||
|
||||
@[simp]
|
||||
theorem Pos.Raw.byteIdx_zero : (0 : Pos.Raw).byteIdx = 0 := rfl
|
||||
|
|
@ -648,12 +648,12 @@ theorem Pos.Raw.byteIdx_zero : (0 : Pos.Raw).byteIdx = 0 := rfl
|
|||
@[simp]
|
||||
theorem Pos.Raw.isValid_zero {s : String} : (0 : Pos.Raw).IsValid s where
|
||||
le_endPos := by simp [Pos.Raw.le_iff]
|
||||
isValidUtf8_extract_zero := by simp
|
||||
isValidUTF8_extract_zero := by simp
|
||||
|
||||
@[simp]
|
||||
theorem Pos.Raw.isValid_endPos {s : String} : s.endPos.IsValid s where
|
||||
le_endPos := by simp [Pos.Raw.le_iff]
|
||||
isValidUtf8_extract_zero := by simp [← size_bytes, s.isValidUtf8]
|
||||
isValidUTF8_extract_zero := by simp [← size_bytes, s.isValidUTF8]
|
||||
|
||||
@[simp]
|
||||
theorem Pos.Raw.isValid_empty_iff {p : Pos.Raw} : p.IsValid "" ↔ p = 0 := by
|
||||
|
|
@ -798,26 +798,21 @@ where
|
|||
/--
|
||||
Accesses the indicated byte in the UTF-8 encoding of a string.
|
||||
|
||||
At runtime, this function is implemented by efficient, constant-time code.
|
||||
-/
|
||||
@[extern "lean_string_get_byte_fast", expose]
|
||||
def getUtf8Byte (s : @& String) (p : Pos.Raw) (h : p < s.endPos) : UInt8 :=
|
||||
s.bytes[p.byteIdx]
|
||||
|
||||
/--
|
||||
Accesses the indicated byte in the UTF-8 encoding of a string.
|
||||
|
||||
At runtime, this function is implemented by efficient, constant-time code.
|
||||
-/
|
||||
@[extern "lean_string_get_byte_fast", expose]
|
||||
def getUTF8Byte (s : @& String) (p : Pos.Raw) (h : p < s.endPos) : UInt8 :=
|
||||
s.bytes[p.byteIdx]
|
||||
|
||||
@[deprecated getUTF8Byte (since := "2025-10-01"), extern "lean_string_get_byte_fast", expose]
|
||||
abbrev getUtf8Byte (s : String) (p : Pos.Raw) (h : p < s.endPos) : UInt8 :=
|
||||
s.getUTF8Byte p h
|
||||
|
||||
@[simp]
|
||||
theorem endPos_empty : "".endPos = 0 := rfl
|
||||
|
||||
theorem Pos.Raw.isValid_iff_isUtf8FirstByte {s : String} {p : Pos.Raw} :
|
||||
p.IsValid s ↔ p = s.endPos ∨ ∃ (h : p < s.endPos), (s.getUtf8Byte p h).IsUtf8FirstByte := by
|
||||
theorem Pos.Raw.isValid_iff_isUTF8FirstByte {s : String} {p : Pos.Raw} :
|
||||
p.IsValid s ↔ p = s.endPos ∨ ∃ (h : p < s.endPos), (s.getUTF8Byte p h).IsUTF8FirstByte := by
|
||||
induction s using push_induction with
|
||||
| empty => simp [Pos.Raw.lt_iff]
|
||||
| push s c ih =>
|
||||
|
|
@ -825,22 +820,22 @@ theorem Pos.Raw.isValid_iff_isUtf8FirstByte {s : String} {p : Pos.Raw} :
|
|||
refine ⟨?_, ?_⟩
|
||||
· rintro ((rfl|⟨h, hb⟩)|h)
|
||||
· refine Or.inr ⟨by simp [Pos.Raw.lt_iff, Char.utf8Size_pos], ?_⟩
|
||||
simp only [getUtf8Byte, bytes_push, byteIdx_endPos]
|
||||
simp only [getUTF8Byte, bytes_push, byteIdx_endPos]
|
||||
rw [ByteArray.getElem_append_right (by simp)]
|
||||
simp [List.isUtf8FirstByte_getElem_utf8Encode_singleton]
|
||||
simp [List.isUTF8FirstByte_getElem_utf8Encode_singleton]
|
||||
· refine Or.inr ⟨by simp [lt_iff] at h ⊢; omega, ?_⟩
|
||||
simp only [getUtf8Byte, bytes_push]
|
||||
rwa [ByteArray.getElem_append_left, ← getUtf8Byte]
|
||||
simp only [getUTF8Byte, bytes_push]
|
||||
rwa [ByteArray.getElem_append_left, ← getUTF8Byte]
|
||||
· exact Or.inl (by simpa [endPos_push])
|
||||
· rintro (h|⟨h, hb⟩)
|
||||
· exact Or.inr (by simpa [endPos_push] using h)
|
||||
· simp only [getUtf8Byte, bytes_push] at hb
|
||||
· simp only [getUTF8Byte, bytes_push] at hb
|
||||
by_cases h' : p < s.endPos
|
||||
· refine Or.inl (Or.inr ⟨h', ?_⟩)
|
||||
rwa [ByteArray.getElem_append_left h', ← getUtf8Byte] at hb
|
||||
rwa [ByteArray.getElem_append_left h', ← getUTF8Byte] at hb
|
||||
· refine Or.inl (Or.inl ?_)
|
||||
rw [ByteArray.getElem_append_right (by simp [lt_iff] at h' ⊢; omega)] at hb
|
||||
simp only [size_bytes, List.isUtf8FirstByte_getElem_utf8Encode_singleton] at hb
|
||||
simp only [size_bytes, List.isUTF8FirstByte_getElem_utf8Encode_singleton] at hb
|
||||
ext
|
||||
simp only [lt_iff, byteIdx_endPos, Nat.not_lt] at ⊢ h'
|
||||
omega
|
||||
|
|
@ -865,13 +860,13 @@ Examples:
|
|||
@[extern "lean_string_is_valid_pos", expose]
|
||||
def Pos.Raw.isValid (s : @&String) (p : @& Pos.Raw) : Bool :=
|
||||
if h : p < s.endPos then
|
||||
(s.getUtf8Byte p h).IsUtf8FirstByte
|
||||
(s.getUTF8Byte p h).IsUTF8FirstByte
|
||||
else
|
||||
p = s.endPos
|
||||
|
||||
@[simp]
|
||||
theorem Pos.Raw.isValid_eq_true_iff {s : String} {p : Pos.Raw} : p.isValid s = true ↔ p.IsValid s := by
|
||||
rw [isValid_iff_isUtf8FirstByte]
|
||||
rw [isValid_iff_isUTF8FirstByte]
|
||||
fun_cases isValid s p with
|
||||
| case1 h =>
|
||||
simp_all only [decide_eq_true_eq, exists_true_left, iff_or_self]
|
||||
|
|
@ -899,29 +894,29 @@ theorem Pos.Raw.isValid_iff_isSome_utf8DecodeChar? {s : String} {p : Pos.Raw} :
|
|||
simp
|
||||
· exact Or.inr (ByteArray.isSome_utf8DecodeChar?_append h _)
|
||||
· simp [endPos_push]
|
||||
· refine isValid_iff_isUtf8FirstByte.2 (Or.inr ?_)
|
||||
· refine isValid_iff_isUTF8FirstByte.2 (Or.inr ?_)
|
||||
obtain ⟨c, hc⟩ := Option.isSome_iff_exists.1 h
|
||||
refine ⟨?_, ?_⟩
|
||||
· have := ByteArray.le_size_of_utf8DecodeChar?_eq_some hc
|
||||
have := c.utf8Size_pos
|
||||
simp only [lt_iff, byteIdx_endPos, gt_iff_lt, ← size_bytes]
|
||||
omega
|
||||
· rw [getUtf8Byte]
|
||||
exact ByteArray.isUtf8FirstByte_of_isSome_utf8DecodeChar? h
|
||||
· rw [getUTF8Byte]
|
||||
exact ByteArray.isUTF8FirstByte_of_isSome_utf8DecodeChar? h
|
||||
|
||||
theorem _root_.ByteArray.IsValidUtf8.isUtf8FirstByte_getElem_zero {b : ByteArray}
|
||||
(h : b.IsValidUtf8) (h₀ : 0 < b.size) : b[0].IsUtf8FirstByte := by
|
||||
theorem _root_.ByteArray.IsValidUTF8.isUTF8FirstByte_getElem_zero {b : ByteArray}
|
||||
(h : b.IsValidUTF8) (h₀ : 0 < b.size) : b[0].IsUTF8FirstByte := by
|
||||
rcases h with ⟨m, rfl⟩
|
||||
have : m ≠ [] := by
|
||||
rintro rfl
|
||||
simp at h₀
|
||||
conv => congr; congr; rw [← List.cons_head_tail this, ← List.singleton_append, List.utf8Encode_append]
|
||||
rw [ByteArray.getElem_append_left]
|
||||
· exact List.isUtf8FirstByte_getElem_utf8Encode_singleton.2 rfl
|
||||
· exact List.isUTF8FirstByte_getElem_utf8Encode_singleton.2 rfl
|
||||
· simp [List.utf8Encode_singleton, Char.utf8Size_pos]
|
||||
|
||||
theorem isUtf8FirstByte_getUtf8Byte_zero {b : String} {h} : (b.getUtf8Byte 0 h).IsUtf8FirstByte :=
|
||||
b.isValidUtf8.isUtf8FirstByte_getElem_zero _
|
||||
theorem isUTF8FirstByte_getUTF8Byte_zero {b : String} {h} : (b.getUTF8Byte 0 h).IsUTF8FirstByte :=
|
||||
b.isValidUTF8.isUTF8FirstByte_getElem_zero _
|
||||
|
||||
protected theorem Pos.Raw.le_trans {a b c : Pos.Raw} : a ≤ b → b ≤ c → a ≤ c := by
|
||||
simpa [le_iff] using Nat.le_trans
|
||||
|
|
@ -929,43 +924,43 @@ protected theorem Pos.Raw.le_trans {a b c : Pos.Raw} : a ≤ b → b ≤ c → a
|
|||
protected theorem Pos.Raw.lt_of_lt_of_le {a b c : Pos.Raw} : a < b → b ≤ c → a < c := by
|
||||
simpa [le_iff, lt_iff] using Nat.lt_of_lt_of_le
|
||||
|
||||
theorem Pos.Raw.isValidUtf8_extract_iff {s : String} (p₁ p₂ : Pos.Raw) (hle : p₁ ≤ p₂) (hle' : p₂ ≤ s.endPos) :
|
||||
(s.bytes.extract p₁.byteIdx p₂.byteIdx).IsValidUtf8 ↔ p₁ = p₂ ∨ (p₁.IsValid s ∧ p₂.IsValid s) := by
|
||||
theorem Pos.Raw.isValidUTF8_extract_iff {s : String} (p₁ p₂ : Pos.Raw) (hle : p₁ ≤ p₂) (hle' : p₂ ≤ s.endPos) :
|
||||
(s.bytes.extract p₁.byteIdx p₂.byteIdx).IsValidUTF8 ↔ p₁ = p₂ ∨ (p₁.IsValid s ∧ p₂.IsValid s) := by
|
||||
have hle'' : p₂.byteIdx ≤ s.bytes.size := by simpa [le_iff] using hle'
|
||||
refine ⟨fun h => Classical.or_iff_not_imp_left.2 (fun h' => ?_), ?_⟩
|
||||
· have hlt : p₁ < p₂ := by
|
||||
simp_all [le_iff, lt_iff, Pos.Raw.ext_iff]
|
||||
omega
|
||||
have h₁ : p₁.IsValid s := by
|
||||
rw [isValid_iff_isUtf8FirstByte]
|
||||
rw [isValid_iff_isUTF8FirstByte]
|
||||
refine Or.inr ⟨Pos.Raw.lt_of_lt_of_le hlt hle', ?_⟩
|
||||
have hlt' : 0 < p₂.byteIdx - p₁.byteIdx := by
|
||||
simp [lt_iff] at hlt
|
||||
omega
|
||||
have := h.isUtf8FirstByte_getElem_zero
|
||||
have := h.isUTF8FirstByte_getElem_zero
|
||||
simp only [ByteArray.size_extract, Nat.min_eq_left hle'', hlt', ByteArray.getElem_extract, Nat.add_zero] at this
|
||||
simp [getUtf8Byte, this trivial]
|
||||
simp [getUTF8Byte, this trivial]
|
||||
refine ⟨h₁, ⟨hle', ?_⟩⟩
|
||||
rw [ByteArray.extract_eq_extract_append_extract p₁.byteIdx (by simp) hle]
|
||||
exact h₁.isValidUtf8_extract_zero.append h
|
||||
exact h₁.isValidUTF8_extract_zero.append h
|
||||
· refine fun h => h.elim (by rintro rfl; simp) (fun ⟨h₁, h₂⟩ => ?_)
|
||||
let t : String := ⟨_, h₂.isValidUtf8_extract_zero⟩
|
||||
let t : String := ⟨_, h₂.isValidUTF8_extract_zero⟩
|
||||
have htb : t.bytes = s.bytes.extract 0 p₂.byteIdx := rfl
|
||||
have ht : p₁.IsValid t := by
|
||||
refine ⟨?_, ?_⟩
|
||||
· simpa [le_iff, t, Nat.min_eq_left hle'', ← size_bytes]
|
||||
· simpa [htb, ByteArray.extract_extract, Nat.min_eq_left (le_iff.1 hle)] using h₁.isValidUtf8_extract_zero
|
||||
· simpa [htb, ByteArray.extract_extract, Nat.min_eq_left (le_iff.1 hle)] using h₁.isValidUTF8_extract_zero
|
||||
simpa [htb, ByteArray.extract_extract, Nat.zero_add, Nat.min_eq_left hle'', ← size_bytes]
|
||||
using ht.isValidUtf8_extract_utf8ByteSize
|
||||
using ht.isValidUTF8_extract_utf8ByteSize
|
||||
|
||||
theorem Pos.Raw.isValid_iff_isValidUtf8_extract_zero {s : String} {p : Pos.Raw} :
|
||||
p.IsValid s ↔ p ≤ s.endPos ∧ (s.bytes.extract 0 p.byteIdx).IsValidUtf8 :=
|
||||
theorem Pos.Raw.isValid_iff_isValidUTF8_extract_zero {s : String} {p : Pos.Raw} :
|
||||
p.IsValid s ↔ p ≤ s.endPos ∧ (s.bytes.extract 0 p.byteIdx).IsValidUTF8 :=
|
||||
⟨fun ⟨h₁, h₂⟩ => ⟨h₁, h₂⟩, fun ⟨h₁, h₂⟩ => ⟨h₁, h₂⟩⟩
|
||||
|
||||
theorem Pos.Raw.isValid_iff_isValidUtf8_extract_utf8ByteSize {s : String} {p : Pos.Raw} :
|
||||
p.IsValid s ↔ p ≤ s.endPos ∧ (s.bytes.extract p.byteIdx s.utf8ByteSize).IsValidUtf8 := by
|
||||
refine ⟨fun h => ⟨h.le_endPos, h.isValidUtf8_extract_utf8ByteSize⟩, fun ⟨h₁, h₂⟩ => ?_⟩
|
||||
rw [← byteIdx_endPos, isValidUtf8_extract_iff _ _ h₁ (by simp [le_iff])] at h₂
|
||||
theorem Pos.Raw.isValid_iff_isValidUTF8_extract_utf8ByteSize {s : String} {p : Pos.Raw} :
|
||||
p.IsValid s ↔ p ≤ s.endPos ∧ (s.bytes.extract p.byteIdx s.utf8ByteSize).IsValidUTF8 := by
|
||||
refine ⟨fun h => ⟨h.le_endPos, h.isValidUTF8_extract_utf8ByteSize⟩, fun ⟨h₁, h₂⟩ => ?_⟩
|
||||
rw [← byteIdx_endPos, isValidUTF8_extract_iff _ _ h₁ (by simp [le_iff])] at h₂
|
||||
obtain (rfl|h₂) := h₂
|
||||
· simp
|
||||
· exact h₂.1
|
||||
|
|
@ -1000,12 +995,12 @@ def endValidPos (s : String) : s.ValidPos where
|
|||
offset := s.endPos
|
||||
isValid := by simp
|
||||
|
||||
theorem ValidPos.isValidUtf8_extract {s : String} (pos₁ pos₂ : s.ValidPos) :
|
||||
(s.bytes.extract pos₁.offset.byteIdx pos₂.offset.byteIdx).IsValidUtf8 := by
|
||||
theorem ValidPos.isValidUTF8_extract {s : String} (pos₁ pos₂ : s.ValidPos) :
|
||||
(s.bytes.extract pos₁.offset.byteIdx pos₂.offset.byteIdx).IsValidUTF8 := by
|
||||
by_cases h : pos₁.offset ≤ pos₂.offset
|
||||
· exact (Pos.Raw.isValidUtf8_extract_iff _ _ h pos₂.isValid.le_endPos).2 (Or.inr ⟨pos₁.isValid, pos₂.isValid⟩)
|
||||
· exact (Pos.Raw.isValidUTF8_extract_iff _ _ h pos₂.isValid.le_endPos).2 (Or.inr ⟨pos₁.isValid, pos₂.isValid⟩)
|
||||
· rw [ByteArray.extract_eq_empty_iff.2]
|
||||
· exact ByteArray.isValidUtf8_empty
|
||||
· exact ByteArray.isValidUTF8_empty
|
||||
· rw [Nat.min_eq_left]
|
||||
· rw [Pos.Raw.le_iff] at h
|
||||
omega
|
||||
|
|
@ -1063,8 +1058,8 @@ Accesses the indicated byte in the UTF-8 encoding of a string slice.
|
|||
At runtime, this function is implemented by efficient, constant-time code.
|
||||
-/
|
||||
@[inline, expose]
|
||||
def Slice.getUtf8Byte (s : Slice) (p : Pos.Raw) (h : p < s.utf8ByteSize) : UInt8 :=
|
||||
s.str.getUtf8Byte (s.startInclusive.offset + p) (by
|
||||
def Slice.getUTF8Byte (s : Slice) (p : Pos.Raw) (h : p < s.utf8ByteSize) : UInt8 :=
|
||||
s.str.getUTF8Byte (s.startInclusive.offset + p) (by
|
||||
have := s.endExclusive.isValid.le_endPos
|
||||
simp only [Pos.Raw.lt_iff, byteIdx_utf8ByteSize, Pos.Raw.le_iff, byteIdx_endPos, Pos.Raw.byteIdx_add] at *
|
||||
omega)
|
||||
|
|
@ -1074,16 +1069,16 @@ Accesses the indicated byte in the UTF-8 encoding of the string slice, or panics
|
|||
is out-of-bounds.
|
||||
-/
|
||||
@[expose]
|
||||
def Slice.getUtf8Byte! (s : Slice) (p : String.Pos.Raw) : UInt8 :=
|
||||
def Slice.getUTF8Byte! (s : Slice) (p : String.Pos.Raw) : UInt8 :=
|
||||
if h : p < s.utf8ByteSize then
|
||||
s.getUtf8Byte p h
|
||||
s.getUTF8Byte p h
|
||||
else
|
||||
panic! "String slice access is out of bounds."
|
||||
|
||||
@[extern "lean_string_utf8_extract"]
|
||||
def ValidPos.extract {s : @& String} (b e : @& s.ValidPos) : String where
|
||||
bytes := s.bytes.extract b.offset.byteIdx e.offset.byteIdx
|
||||
isValidUtf8 := b.isValidUtf8_extract e
|
||||
isValidUTF8 := b.isValidUTF8_extract e
|
||||
|
||||
/-- Creates a `String` from a `String.Slice` by copying the bytes. -/
|
||||
@[inline]
|
||||
|
|
@ -1103,17 +1098,17 @@ theorem Slice.utf8ByteSize_copy {s : Slice} :
|
|||
theorem Slice.endPos_copy {s : Slice} : s.copy.endPos = s.utf8ByteSize := by
|
||||
simp [Pos.Raw.ext_iff]
|
||||
|
||||
theorem Slice.getUtf8Byte_eq_getUtf8Byte_copy {s : Slice} {p : Pos.Raw} {h : p < s.utf8ByteSize} :
|
||||
s.getUtf8Byte p h = s.copy.getUtf8Byte p (by simpa) := by
|
||||
simp [getUtf8Byte, String.getUtf8Byte, bytes_copy, ByteArray.getElem_extract]
|
||||
theorem Slice.getUTF8Byte_eq_getUTF8Byte_copy {s : Slice} {p : Pos.Raw} {h : p < s.utf8ByteSize} :
|
||||
s.getUTF8Byte p h = s.copy.getUTF8Byte p (by simpa) := by
|
||||
simp [getUTF8Byte, String.getUTF8Byte, bytes_copy, ByteArray.getElem_extract]
|
||||
|
||||
theorem Slice.getUtf8Byte_copy {s : Slice} {p : Pos.Raw} {h} :
|
||||
s.copy.getUtf8Byte p h = s.getUtf8Byte p (by simpa using h) := by
|
||||
rw [getUtf8Byte_eq_getUtf8Byte_copy]
|
||||
theorem Slice.getUTF8Byte_copy {s : Slice} {p : Pos.Raw} {h} :
|
||||
s.copy.getUTF8Byte p h = s.getUTF8Byte p (by simpa using h) := by
|
||||
rw [getUTF8Byte_eq_getUTF8Byte_copy]
|
||||
|
||||
theorem Slice.isUtf8FirstByte_utf8ByteAt_zero {s : Slice} {h} :
|
||||
(s.getUtf8Byte 0 h).IsUtf8FirstByte := by
|
||||
simpa [getUtf8Byte_eq_getUtf8Byte_copy] using s.copy.isUtf8FirstByte_getUtf8Byte_zero
|
||||
theorem Slice.isUTF8FirstByte_utf8ByteAt_zero {s : Slice} {h} :
|
||||
(s.getUTF8Byte 0 h).IsUTF8FirstByte := by
|
||||
simpa [getUTF8Byte_eq_getUTF8Byte_copy] using s.copy.isUTF8FirstByte_getUTF8Byte_zero
|
||||
|
||||
@[simp]
|
||||
theorem Pos.Raw.add_zero {p : Pos.Raw} : p + 0 = p := by
|
||||
|
|
@ -1127,7 +1122,7 @@ theorem Pos.Raw.isValid_copy_iff {s : Slice} {p : Pos.Raw} :
|
|||
· have := s.startInclusive_le_endExclusive
|
||||
simp_all only [Slice.endPos_copy, le_iff, Slice.byteIdx_utf8ByteSize]
|
||||
rw [Slice.bytes_copy, ByteArray.extract_extract, Nat.add_zero, Nat.min_eq_left (by omega)] at h₂
|
||||
rw [← byteIdx_add, Pos.Raw.isValidUtf8_extract_iff] at h₂
|
||||
rw [← byteIdx_add, Pos.Raw.isValidUTF8_extract_iff] at h₂
|
||||
· rcases h₂ with (h₂|⟨-, h₂⟩)
|
||||
· rw [← h₂]
|
||||
exact s.startInclusive.isValid
|
||||
|
|
@ -1140,7 +1135,7 @@ theorem Pos.Raw.isValid_copy_iff {s : Slice} {p : Pos.Raw} :
|
|||
· have := s.startInclusive_le_endExclusive
|
||||
simp_all only [le_iff, Slice.byteIdx_utf8ByteSize]
|
||||
rw [Slice.bytes_copy, ByteArray.extract_extract, Nat.add_zero, Nat.min_eq_left (by omega)]
|
||||
rw [← byteIdx_add, Pos.Raw.isValidUtf8_extract_iff]
|
||||
rw [← byteIdx_add, Pos.Raw.isValidUTF8_extract_iff]
|
||||
· exact Or.inr ⟨s.startInclusive.isValid, h₂⟩
|
||||
· simp [le_iff]
|
||||
· have := s.endExclusive.isValid.le_endPos
|
||||
|
|
@ -1186,22 +1181,22 @@ def Slice.endPos (s : Slice) : s.Pos where
|
|||
@[simp]
|
||||
theorem ByteString.Slice.offset_endPos {s : Slice} : s.endPos.offset = s.utf8ByteSize := (rfl)
|
||||
|
||||
theorem Pos.Raw.isValidForSlice_iff_isUtf8FirstByte {s : Slice} {p : Pos.Raw} :
|
||||
p.IsValidForSlice s ↔ (p = s.utf8ByteSize ∨ (∃ (h : p < s.utf8ByteSize), (s.getUtf8Byte p h).IsUtf8FirstByte)) := by
|
||||
simp [← isValid_copy_iff, isValid_iff_isUtf8FirstByte, Slice.getUtf8Byte_copy]
|
||||
theorem Pos.Raw.isValidForSlice_iff_isUTF8FirstByte {s : Slice} {p : Pos.Raw} :
|
||||
p.IsValidForSlice s ↔ (p = s.utf8ByteSize ∨ (∃ (h : p < s.utf8ByteSize), (s.getUTF8Byte p h).IsUTF8FirstByte)) := by
|
||||
simp [← isValid_copy_iff, isValid_iff_isUTF8FirstByte, Slice.getUTF8Byte_copy]
|
||||
|
||||
/-- Efficiently checks whether a position is at a UTF-8 character boundary of the slice `s`. -/
|
||||
@[expose]
|
||||
def Pos.Raw.isValidForSlice (s : Slice) (p : Pos.Raw) : Bool :=
|
||||
if h : p < s.utf8ByteSize then
|
||||
(s.getUtf8Byte p h).IsUtf8FirstByte
|
||||
(s.getUTF8Byte p h).IsUTF8FirstByte
|
||||
else
|
||||
p = s.utf8ByteSize
|
||||
|
||||
@[simp]
|
||||
theorem Pos.Raw.isValidForSlice_eq_true_iff {s : Slice} {p : Pos.Raw} :
|
||||
p.isValidForSlice s = true ↔ p.IsValidForSlice s := by
|
||||
rw [isValidForSlice_iff_isUtf8FirstByte]
|
||||
rw [isValidForSlice_iff_isUTF8FirstByte]
|
||||
fun_cases isValidForSlice with
|
||||
| case1 h =>
|
||||
simp_all only [decide_eq_true_eq, exists_true_left, iff_or_self]
|
||||
|
|
@ -1257,22 +1252,22 @@ theorem Pos.Raw.isValidForSlice_iff_isSome_utf8DecodeChar? {s : Slice} {p : Pos.
|
|||
simp only [size_bytes, Slice.utf8ByteSize_copy, ge_iff_le]
|
||||
omega
|
||||
· simp
|
||||
· rw [isValidForSlice_iff_isUtf8FirstByte]
|
||||
· rw [isValidForSlice_iff_isUTF8FirstByte]
|
||||
rintro (rfl|⟨h₁, h₂⟩)
|
||||
· simp
|
||||
· exact Or.inr ⟨h₁, ByteArray.isUtf8FirstByte_of_isSome_utf8DecodeChar? h₂⟩
|
||||
· exact Or.inr ⟨h₁, ByteArray.isUTF8FirstByte_of_isSome_utf8DecodeChar? h₂⟩
|
||||
|
||||
/-- Returns the byte at a position in a slice that is not the end position. -/
|
||||
@[inline, expose]
|
||||
def Slice.Pos.byte {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : UInt8 :=
|
||||
s.getUtf8Byte pos.offset (by
|
||||
s.getUTF8Byte pos.offset (by
|
||||
have := pos.isValidForSlice.le_utf8ByteSize
|
||||
simp_all [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff, Pos.Raw.lt_iff]
|
||||
omega)
|
||||
|
||||
theorem Slice.Pos.isUtf8FirstByte_byte {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
|
||||
(pos.byte h).IsUtf8FirstByte :=
|
||||
((Pos.Raw.isValidForSlice_iff_isUtf8FirstByte.1 pos.isValidForSlice).elim (fun h' => (h (Pos.ext h')).elim) (·.2))
|
||||
theorem Slice.Pos.isUTF8FirstByte_byte {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
|
||||
(pos.byte h).IsUTF8FirstByte :=
|
||||
((Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.1 pos.isValidForSlice).elim (fun h' => (h (Pos.ext h')).elim) (·.2))
|
||||
|
||||
/-- Given a valid position on a slice `s`, obtains the corresponding valid position on the
|
||||
underlying string `s.str`. -/
|
||||
|
|
@ -1687,7 +1682,7 @@ theorem Slice.Pos.get_eq_get_toCopy {s : Slice} {pos : s.Pos} {h} :
|
|||
theorem Slice.Pos.byte_toCopy {s : Slice} {pos : s.Pos} (h) :
|
||||
pos.toCopy.byte h = pos.byte (by rintro rfl; simp at h) := by
|
||||
rw [ValidPos.byte, Slice.Pos.byte, Slice.Pos.byte]
|
||||
simp [getUtf8Byte, String.getUtf8Byte, bytes_copy, ByteArray.getElem_extract]
|
||||
simp [getUTF8Byte, String.getUTF8Byte, bytes_copy, ByteArray.getElem_extract]
|
||||
|
||||
theorem Slice.Pos.byte_eq_byte_toCopy {s : Slice} {pos : s.Pos} {h} :
|
||||
pos.byte h = pos.toCopy.byte (ne_of_apply_ne ValidPos.ofCopy (by simp [h])) :=
|
||||
|
|
@ -1745,14 +1740,14 @@ theorem Slice.Pos.copy_eq_append_get {s : Slice} {pos : s.Pos} (h : pos ≠ s.en
|
|||
rw [append_assoc, ← ht₂, ← copy_eq_copy_replaceEnd]
|
||||
|
||||
theorem Slice.Pos.utf8ByteSize_byte {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
|
||||
(pos.byte h).utf8ByteSize pos.isUtf8FirstByte_byte = ⟨(pos.get h).utf8Size⟩ := by
|
||||
simp [getUtf8Byte, byte, String.getUtf8Byte, get_eq_utf8DecodeChar, ByteArray.utf8Size_utf8DecodeChar]
|
||||
(pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte = ⟨(pos.get h).utf8Size⟩ := by
|
||||
simp [getUTF8Byte, byte, String.getUTF8Byte, get_eq_utf8DecodeChar, ByteArray.utf8Size_utf8DecodeChar]
|
||||
|
||||
/-- Advances a valid position on a slice to the next valid position, given a proof that the
|
||||
position is not the past-the-end position, which guarantees that such a position exists. -/
|
||||
@[expose]
|
||||
def Slice.Pos.next {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : s.Pos where
|
||||
offset := pos.offset + (pos.byte h).utf8ByteSize pos.isUtf8FirstByte_byte
|
||||
offset := pos.offset + (pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte
|
||||
isValidForSlice := by
|
||||
obtain ⟨t₁, t₂, ht, ht'⟩ := copy_eq_append_get h
|
||||
replace ht' : pos.offset = ⟨t₁.utf8ByteSize⟩ := Eq.symm (String.Pos.Raw.ext ht')
|
||||
|
|
@ -1802,13 +1797,13 @@ def Slice.Pos.prevAux {s : Slice} (pos : s.Pos) (h : pos ≠ s.startPos) : Strin
|
|||
omega)
|
||||
where
|
||||
go (off : Nat) (h₁ : ⟨off⟩ < s.utf8ByteSize) : String.Pos.Raw :=
|
||||
if hbyte : (s.getUtf8Byte ⟨off⟩ h₁).IsUtf8FirstByte then
|
||||
if hbyte : (s.getUTF8Byte ⟨off⟩ h₁).IsUTF8FirstByte then
|
||||
⟨off⟩
|
||||
else
|
||||
have : 0 ≠ off := by
|
||||
intro h
|
||||
obtain hoff : (⟨off⟩ : String.Pos.Raw) = 0 := by simpa [String.Pos.Raw.ext_iff] using h.symm
|
||||
simp [hoff, s.isUtf8FirstByte_utf8ByteAt_zero] at hbyte
|
||||
simp [hoff, s.isUTF8FirstByte_utf8ByteAt_zero] at hbyte
|
||||
match off with
|
||||
| 0 => False.elim (by contradiction)
|
||||
| off + 1 => go off (by simp [Pos.Raw.lt_iff] at ⊢ h₁; omega)
|
||||
|
|
@ -1820,12 +1815,12 @@ theorem Pos.Raw.isValidForSlice_prevAuxGo {s : Slice} (off : Nat) (h₁ : ⟨off
|
|||
| zero =>
|
||||
rw [Slice.Pos.prevAux.go]
|
||||
split
|
||||
· exact Pos.Raw.isValidForSlice_iff_isUtf8FirstByte.2 (Or.inr ⟨_, ‹_›⟩)
|
||||
· exact Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.2 (Or.inr ⟨_, ‹_›⟩)
|
||||
· simpa using elim
|
||||
| succ off ih =>
|
||||
rw [Slice.Pos.prevAux.go]
|
||||
split
|
||||
· exact Pos.Raw.isValidForSlice_iff_isUtf8FirstByte.2 (Or.inr ⟨_, ‹_›⟩)
|
||||
· exact Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.2 (Or.inr ⟨_, ‹_›⟩)
|
||||
· simpa using ih _
|
||||
where
|
||||
elim {P : Pos.Raw → Prop} {h : False} : P h.elim := h.elim
|
||||
|
|
@ -1965,8 +1960,8 @@ def Slice.findNextPos (offset : String.Pos.Raw) (s : Slice) (_h : offset < s.utf
|
|||
where
|
||||
go (offset : String.Pos.Raw) : s.Pos :=
|
||||
if h : offset < s.utf8ByteSize then
|
||||
if h' : (s.getUtf8Byte offset h).IsUtf8FirstByte then
|
||||
s.pos offset (Pos.Raw.isValidForSlice_iff_isUtf8FirstByte.2 (Or.inr ⟨_, h'⟩))
|
||||
if h' : (s.getUTF8Byte offset h).IsUTF8FirstByte then
|
||||
s.pos offset (Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.2 (Or.inr ⟨_, h'⟩))
|
||||
else
|
||||
go offset.inc
|
||||
else
|
||||
|
|
|
|||
|
|
@ -982,7 +982,7 @@ end ByteArray.utf8DecodeChar?
|
|||
|
||||
open ByteArray.utf8DecodeChar?
|
||||
|
||||
/- # `utf8DecodeChar?` and `validateUtf8At` -/
|
||||
/- # `utf8DecodeChar?` and `validateUTF8At` -/
|
||||
|
||||
@[inline, expose]
|
||||
public def ByteArray.utf8DecodeChar? (bytes : ByteArray) (i : Nat) : Option Char :=
|
||||
|
|
@ -1006,7 +1006,7 @@ public def ByteArray.utf8DecodeChar? (bytes : ByteArray) (i : Nat) : Option Char
|
|||
else none
|
||||
|
||||
@[inline, expose]
|
||||
public def ByteArray.validateUtf8At (bytes : ByteArray) (i : Nat) : Bool :=
|
||||
public def ByteArray.validateUTF8At (bytes : ByteArray) (i : Nat) : Bool :=
|
||||
if h₀ : i < bytes.size then
|
||||
match h : parseFirstByte bytes[i] with
|
||||
| .invalid => false
|
||||
|
|
@ -1212,9 +1212,9 @@ public theorem String.toByteArray_utf8EncodeChar_of_utf8DecodeChar?_eq_some {b :
|
|||
rw [← assemble₄_eq_some_iff_utf8EncodeChar_eq]
|
||||
exact ⟨this, h⟩
|
||||
|
||||
public theorem ByteArray.validateUtf8At_eq_isSome_utf8DecodeChar? {b : ByteArray} {i : Nat} :
|
||||
b.validateUtf8At i = (b.utf8DecodeChar? i).isSome := by
|
||||
simp only [validateUtf8At, utf8DecodeChar?]
|
||||
public theorem ByteArray.validateUTF8At_eq_isSome_utf8DecodeChar? {b : ByteArray} {i : Nat} :
|
||||
b.validateUTF8At i = (b.utf8DecodeChar? i).isSome := by
|
||||
simp only [validateUTF8At, utf8DecodeChar?]
|
||||
split
|
||||
· split <;> (try split) <;> simp [verify₁_eq_isSome_assemble₁, verify₂_eq_isSome_assemble₂,
|
||||
verify₃_eq_isSome_assemble₃, verify₄_eq_isSome_assemble₄]
|
||||
|
|
@ -1285,9 +1285,9 @@ public theorem ByteArray.lt_size_of_isSome_utf8DecodeChar? {b : ByteArray} {i :
|
|||
have := c.utf8Size_pos
|
||||
omega
|
||||
|
||||
public theorem ByteArray.lt_size_of_validateUtf8At {b : ByteArray} {i : Nat} :
|
||||
validateUtf8At b i = true → i < b.size :=
|
||||
validateUtf8At_eq_isSome_utf8DecodeChar? ▸ lt_size_of_isSome_utf8DecodeChar?
|
||||
public theorem ByteArray.lt_size_of_validateUTF8At {b : ByteArray} {i : Nat} :
|
||||
validateUTF8At b i = true → i < b.size :=
|
||||
validateUTF8At_eq_isSome_utf8DecodeChar? ▸ lt_size_of_isSome_utf8DecodeChar?
|
||||
|
||||
public theorem ByteArray.utf8DecodeChar?_append_eq_some {b : ByteArray} {i : Nat} {c : Char} (h : utf8DecodeChar? b i = some c)
|
||||
(b' : ByteArray) : utf8DecodeChar? (b ++ b') i = some c := by
|
||||
|
|
@ -1366,7 +1366,7 @@ public theorem List.utf8DecodeChar_utf8Encode_cons {l : List Char} {c : Char} {h
|
|||
ByteArray.utf8DecodeChar (c::l).utf8Encode 0 h = c := by
|
||||
simp [ByteArray.utf8DecodeChar]
|
||||
|
||||
/-! # `UInt8.IsUtf8FirstByte` -/
|
||||
/-! # `UInt8.IsUTF8FirstByte` -/
|
||||
|
||||
namespace UInt8
|
||||
|
||||
|
|
@ -1375,27 +1375,27 @@ Predicate for whether a byte can appear as the first byte of the UTF-8 encoding
|
|||
scalar value.
|
||||
-/
|
||||
@[expose]
|
||||
public def IsUtf8FirstByte (c : UInt8) : Prop :=
|
||||
public def IsUTF8FirstByte (c : UInt8) : Prop :=
|
||||
c &&& 0x80 = 0 ∨ c &&& 0xe0 = 0xc0 ∨ c &&& 0xf0 = 0xe0 ∨ c &&& 0xf8 = 0xf0
|
||||
|
||||
public instance {c : UInt8} : Decidable c.IsUtf8FirstByte :=
|
||||
public instance {c : UInt8} : Decidable c.IsUTF8FirstByte :=
|
||||
inferInstanceAs <| Decidable (c &&& 0x80 = 0 ∨ c &&& 0xe0 = 0xc0 ∨ c &&& 0xf0 = 0xe0 ∨ c &&& 0xf8 = 0xf0)
|
||||
|
||||
theorem isUtf8FirstByte_iff_parseFirstByte_ne_invalid {c : UInt8} :
|
||||
c.IsUtf8FirstByte ↔ parseFirstByte c ≠ FirstByte.invalid := by
|
||||
fun_cases parseFirstByte with simp_all [IsUtf8FirstByte]
|
||||
theorem isUTF8FirstByte_iff_parseFirstByte_ne_invalid {c : UInt8} :
|
||||
c.IsUTF8FirstByte ↔ parseFirstByte c ≠ FirstByte.invalid := by
|
||||
fun_cases parseFirstByte with simp_all [IsUTF8FirstByte]
|
||||
|
||||
@[simp]
|
||||
public theorem isUtf8FirstByte_getElem_utf8EncodeChar {c : Char} {i : Nat} {hi : i < (String.utf8EncodeChar c).length} :
|
||||
(String.utf8EncodeChar c)[i].IsUtf8FirstByte ↔ i = 0 := by
|
||||
public theorem isUTF8FirstByte_getElem_utf8EncodeChar {c : Char} {i : Nat} {hi : i < (String.utf8EncodeChar c).length} :
|
||||
(String.utf8EncodeChar c)[i].IsUTF8FirstByte ↔ i = 0 := by
|
||||
obtain (rfl|hi₀) := Nat.eq_zero_or_pos i
|
||||
· simp only [isUtf8FirstByte_iff_parseFirstByte_ne_invalid, iff_true]
|
||||
· simp only [isUTF8FirstByte_iff_parseFirstByte_ne_invalid, iff_true]
|
||||
match h : c.utf8Size, c.utf8Size_pos, c.utf8Size_le_four with
|
||||
| 1, _, _ => simp [parseFirstByte_utf8EncodeChar_eq_done h]
|
||||
| 2, _, _ => simp [parseFirstByte_utf8EncodeChar_eq_oneMore h]
|
||||
| 3, _, _ => simp [parseFirstByte_utf8EncodeChar_eq_twoMore h]
|
||||
| 4, _, _ => simp [parseFirstByte_utf8EncodeChar_eq_threeMore h]
|
||||
· simp only [isUtf8FirstByte_iff_parseFirstByte_ne_invalid, ne_eq, Nat.ne_of_lt' hi₀, iff_false,
|
||||
· simp only [isUTF8FirstByte_iff_parseFirstByte_ne_invalid, ne_eq, Nat.ne_of_lt' hi₀, iff_false,
|
||||
Classical.not_not]
|
||||
apply parseFirstByte_eq_invalid_of_isInvalidContinuationByte_eq_false
|
||||
simp only [String.length_utf8EncodeChar] at hi
|
||||
|
|
@ -1408,12 +1408,12 @@ public theorem isUtf8FirstByte_getElem_utf8EncodeChar {c : Char} {i : Nat} {hi :
|
|||
| 4, _, _, 2, _, _ => simp [isInvalidContinuationByte_getElem_utf8EncodeChar_two_of_utf8Size_eq_four h]
|
||||
| 4, _, _, 3, _, _ => simp [isInvalidContinuationByte_getElem_utf8EncodeChar_three_of_utf8Size_eq_four h]
|
||||
|
||||
public theorem isUtf8FirstByte_getElem_zero_utf8EncodeChar {c : Char} :
|
||||
((String.utf8EncodeChar c)[0]'(by simp [c.utf8Size_pos])).IsUtf8FirstByte := by
|
||||
public theorem isUTF8FirstByte_getElem_zero_utf8EncodeChar {c : Char} :
|
||||
((String.utf8EncodeChar c)[0]'(by simp [c.utf8Size_pos])).IsUTF8FirstByte := by
|
||||
simp
|
||||
|
||||
@[expose]
|
||||
public def utf8ByteSize (c : UInt8) (_h : c.IsUtf8FirstByte) : String.Pos.Raw :=
|
||||
public def utf8ByteSize (c : UInt8) (_h : c.IsUTF8FirstByte) : String.Pos.Raw :=
|
||||
if c &&& 0x80 = 0 then
|
||||
⟨1⟩
|
||||
else if c &&& 0xe0 = 0xc0 then
|
||||
|
|
@ -1430,7 +1430,7 @@ def _root_.ByteArray.utf8DecodeChar?.FirstByte.utf8ByteSize : FirstByte → Stri
|
|||
| .twoMore => ⟨3⟩
|
||||
| .threeMore => ⟨4⟩
|
||||
|
||||
theorem utf8ByteSize_eq_utf8ByteSize_parseFirstByte {c : UInt8} {h : c.IsUtf8FirstByte} :
|
||||
theorem utf8ByteSize_eq_utf8ByteSize_parseFirstByte {c : UInt8} {h : c.IsUTF8FirstByte} :
|
||||
c.utf8ByteSize h = (parseFirstByte c).utf8ByteSize := by
|
||||
simp only [utf8ByteSize, FirstByte.utf8ByteSize, parseFirstByte, beq_iff_eq]
|
||||
split
|
||||
|
|
@ -1447,28 +1447,28 @@ theorem utf8ByteSize_eq_utf8ByteSize_parseFirstByte {c : UInt8} {h : c.IsUtf8Fir
|
|||
|
||||
end UInt8
|
||||
|
||||
public theorem ByteArray.isUtf8FirstByte_getElem_zero_utf8EncodeChar_append {c : Char} {b : ByteArray} :
|
||||
(((String.utf8EncodeChar c).toByteArray ++ b)[0]'(by simp; have := c.utf8Size_pos; omega)).IsUtf8FirstByte := by
|
||||
public theorem ByteArray.isUTF8FirstByte_getElem_zero_utf8EncodeChar_append {c : Char} {b : ByteArray} :
|
||||
(((String.utf8EncodeChar c).toByteArray ++ b)[0]'(by simp; have := c.utf8Size_pos; omega)).IsUTF8FirstByte := by
|
||||
rw [ByteArray.getElem_append_left (by simp [c.utf8Size_pos]),
|
||||
List.getElem_toByteArray, UInt8.isUtf8FirstByte_getElem_utf8EncodeChar]
|
||||
List.getElem_toByteArray, UInt8.isUTF8FirstByte_getElem_utf8EncodeChar]
|
||||
|
||||
public theorem ByteArray.isUtf8FirstByte_of_isSome_utf8DecodeChar? {b : ByteArray} {i : Nat}
|
||||
(h : (utf8DecodeChar? b i).isSome) : (b[i]'(lt_size_of_isSome_utf8DecodeChar? h)).IsUtf8FirstByte := by
|
||||
public theorem ByteArray.isUTF8FirstByte_of_isSome_utf8DecodeChar? {b : ByteArray} {i : Nat}
|
||||
(h : (utf8DecodeChar? b i).isSome) : (b[i]'(lt_size_of_isSome_utf8DecodeChar? h)).IsUTF8FirstByte := by
|
||||
rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h
|
||||
suffices ((b.extract i b.size)[0]'(lt_size_of_isSome_utf8DecodeChar? h)).IsUtf8FirstByte by
|
||||
suffices ((b.extract i b.size)[0]'(lt_size_of_isSome_utf8DecodeChar? h)).IsUTF8FirstByte by
|
||||
simpa [ByteArray.getElem_extract, Nat.add_zero] using this
|
||||
obtain ⟨c, hc⟩ := Option.isSome_iff_exists.1 h
|
||||
conv => congr; congr; rw [eq_of_utf8DecodeChar?_eq_some hc]
|
||||
exact isUtf8FirstByte_getElem_zero_utf8EncodeChar_append
|
||||
exact isUTF8FirstByte_getElem_zero_utf8EncodeChar_append
|
||||
|
||||
public theorem ByteArray.isUtf8FirstByte_of_validateUtf8At {b : ByteArray} {i : Nat} :
|
||||
(h : validateUtf8At b i = true) → (b[i]'(lt_size_of_validateUtf8At h)).IsUtf8FirstByte := by
|
||||
simp only [validateUtf8At_eq_isSome_utf8DecodeChar?]
|
||||
exact isUtf8FirstByte_of_isSome_utf8DecodeChar?
|
||||
public theorem ByteArray.isUTF8FirstByte_of_validateUTF8At {b : ByteArray} {i : Nat} :
|
||||
(h : validateUTF8At b i = true) → (b[i]'(lt_size_of_validateUTF8At h)).IsUTF8FirstByte := by
|
||||
simp only [validateUTF8At_eq_isSome_utf8DecodeChar?]
|
||||
exact isUTF8FirstByte_of_isSome_utf8DecodeChar?
|
||||
|
||||
theorem Char.byteIdx_utf8ByteSize_getElem_utf8EncodeChar {c : Char} :
|
||||
(((String.utf8EncodeChar c)[0]'(by simp [c.utf8Size_pos])).utf8ByteSize
|
||||
UInt8.isUtf8FirstByte_getElem_zero_utf8EncodeChar).byteIdx = c.utf8Size := by
|
||||
UInt8.isUTF8FirstByte_getElem_zero_utf8EncodeChar).byteIdx = c.utf8Size := by
|
||||
rw [UInt8.utf8ByteSize_eq_utf8ByteSize_parseFirstByte]
|
||||
obtain (hc|hc|hc|hc) := c.utf8Size_eq
|
||||
· rw [parseFirstByte_utf8EncodeChar_eq_done hc, FirstByte.utf8ByteSize, hc]
|
||||
|
|
@ -1478,7 +1478,7 @@ theorem Char.byteIdx_utf8ByteSize_getElem_utf8EncodeChar {c : Char} :
|
|||
|
||||
public theorem ByteArray.utf8Size_utf8DecodeChar {b : ByteArray} {i} {h} :
|
||||
(utf8DecodeChar b i h).utf8Size =
|
||||
((b[i]'(lt_size_of_isSome_utf8DecodeChar? h)).utf8ByteSize (isUtf8FirstByte_of_isSome_utf8DecodeChar? h)).byteIdx := by
|
||||
((b[i]'(lt_size_of_isSome_utf8DecodeChar? h)).utf8ByteSize (isUTF8FirstByte_of_isSome_utf8DecodeChar? h)).byteIdx := by
|
||||
rw [← Char.byteIdx_utf8ByteSize_getElem_utf8EncodeChar]
|
||||
simp only [List.getElem_eq_getElem_toByteArray, utf8EncodeChar_utf8DecodeChar]
|
||||
simp [ByteArray.getElem_extract]
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ where
|
|||
have hr := by
|
||||
simp [Pos.Raw.le_iff] at h h2 ⊢
|
||||
omega
|
||||
if lhs.getUtf8Byte (lstart + curr) hl == rhs.getUtf8Byte (rstart + curr) hr then
|
||||
if lhs.getUTF8Byte (lstart + curr) hl == rhs.getUTF8Byte (rstart + curr) hr then
|
||||
go curr.inc
|
||||
else
|
||||
false
|
||||
|
|
|
|||
|
|
@ -39,16 +39,16 @@ partial def buildTable (pat : Slice) : Array String.Pos.Raw :=
|
|||
where
|
||||
go (pos : String.Pos.Raw) (table : Array String.Pos.Raw) :=
|
||||
if h : pos < pat.utf8ByteSize then
|
||||
let patByte := pat.getUtf8Byte pos h
|
||||
let patByte := pat.getUTF8Byte pos h
|
||||
let distance := computeDistance table[table.size - 1]! patByte table
|
||||
let distance := if patByte = pat.getUtf8Byte! distance then distance.inc else distance
|
||||
let distance := if patByte = pat.getUTF8Byte! distance then distance.inc else distance
|
||||
go pos.inc (table.push distance)
|
||||
else
|
||||
table
|
||||
|
||||
computeDistance (distance : String.Pos.Raw) (patByte : UInt8) (table : Array String.Pos.Raw) :
|
||||
String.Pos.Raw :=
|
||||
if distance > 0 && patByte != pat.getUtf8Byte! distance then
|
||||
if distance > 0 && patByte != pat.getUTF8Byte! distance then
|
||||
computeDistance table[distance.byteIdx - 1]! patByte table
|
||||
else
|
||||
distance
|
||||
|
|
@ -62,7 +62,7 @@ def iter (s : Slice) (pat : Slice) : Std.Iter (α := ForwardSliceSearcher s) (Se
|
|||
|
||||
partial def backtrackIfNecessary (pat : Slice) (table : Array String.Pos.Raw) (stackByte : UInt8)
|
||||
(needlePos : String.Pos.Raw) : String.Pos.Raw :=
|
||||
if needlePos != 0 && stackByte != pat.getUtf8Byte! needlePos then
|
||||
if needlePos != 0 && stackByte != pat.getUTF8Byte! needlePos then
|
||||
backtrackIfNecessary pat table stackByte table[needlePos.byteIdx - 1]!
|
||||
else
|
||||
needlePos
|
||||
|
|
@ -95,9 +95,9 @@ instance (s : Slice) : Std.Iterators.Iterator (ForwardSliceSearcher s) Id (Searc
|
|||
let rec findNext (startPos : String.Pos.Raw)
|
||||
(currStackPos : String.Pos.Raw) (needlePos : String.Pos.Raw) (h : stackPos ≤ currStackPos) :=
|
||||
if h1 : currStackPos < s.utf8ByteSize then
|
||||
let stackByte := s.getUtf8Byte currStackPos h1
|
||||
let stackByte := s.getUTF8Byte currStackPos h1
|
||||
let needlePos := backtrackIfNecessary needle table stackByte needlePos
|
||||
let patByte := needle.getUtf8Byte! needlePos
|
||||
let patByte := needle.getUTF8Byte! needlePos
|
||||
if stackByte != patByte then
|
||||
let nextStackPos := s.findNextPos currStackPos h1 |>.offset
|
||||
let res := .rejected (s.pos! startPos) (s.pos! nextStackPos)
|
||||
|
|
|
|||
|
|
@ -688,8 +688,8 @@ def eqIgnoreAsciiCase (s1 s2 : Slice) : Bool :=
|
|||
where
|
||||
go (s1 : Slice) (s1Curr : String.Pos.Raw) (s2 : Slice) (s2Curr : String.Pos.Raw) : Bool :=
|
||||
if h : s1Curr < s1.utf8ByteSize ∧ s2Curr < s2.utf8ByteSize then
|
||||
let c1 := (s1.getUtf8Byte s1Curr h.left).toAsciiLower
|
||||
let c2 := (s2.getUtf8Byte s2Curr h.right).toAsciiLower
|
||||
let c1 := (s1.getUTF8Byte s1Curr h.left).toAsciiLower
|
||||
let c2 := (s2.getUTF8Byte s2Curr h.right).toAsciiLower
|
||||
if c1 == c2 then
|
||||
go s1 s1Curr.inc s2 s2Curr.inc
|
||||
else
|
||||
|
|
@ -900,12 +900,12 @@ instance [Pure m] : Std.Iterators.Iterator ByteIterator m UInt8 where
|
|||
∃ h1 : it.internalState.offset < it.internalState.s.utf8ByteSize,
|
||||
it.internalState.s = it'.internalState.s ∧
|
||||
it'.internalState.offset = it.internalState.offset.inc ∧
|
||||
it.internalState.s.getUtf8Byte it.internalState.offset h1 = out
|
||||
it.internalState.s.getUTF8Byte it.internalState.offset h1 = out
|
||||
| .skip _ => False
|
||||
| .done => ¬ it.internalState.offset < it.internalState.s.utf8ByteSize
|
||||
step := fun ⟨s, offset⟩ =>
|
||||
if h : offset < s.utf8ByteSize then
|
||||
pure ⟨.yield ⟨s, offset.inc⟩ (s.getUtf8Byte offset h), by simp [h]⟩
|
||||
pure ⟨.yield ⟨s, offset.inc⟩ (s.getUTF8Byte offset h), by simp [h]⟩
|
||||
else
|
||||
pure ⟨.done, by simp [h]⟩
|
||||
|
||||
|
|
@ -981,7 +981,7 @@ instance [Pure m] : Std.Iterators.Iterator RevByteIterator m UInt8 where
|
|||
it.internalState.s = it'.internalState.s ∧
|
||||
it.internalState.offset ≠ 0 ∧
|
||||
it'.internalState.offset = it.internalState.offset.dec ∧
|
||||
it.internalState.s.getUtf8Byte it.internalState.offset.dec h1 = out
|
||||
it.internalState.s.getUTF8Byte it.internalState.offset.dec h1 = out
|
||||
| .skip _ => False
|
||||
| .done => it.internalState.offset = 0
|
||||
step := fun ⟨s, offset, hinv⟩ =>
|
||||
|
|
@ -994,7 +994,7 @@ instance [Pure m] : Std.Iterators.Iterator RevByteIterator m UInt8 where
|
|||
simp [String.Pos.Raw.le_iff, nextOffset] at hinv ⊢
|
||||
omega
|
||||
have hiter := by simp [nextOffset, hbound, h]
|
||||
pure ⟨.yield ⟨s, nextOffset, hinv⟩ (s.getUtf8Byte nextOffset hbound), hiter⟩
|
||||
pure ⟨.yield ⟨s, nextOffset, hinv⟩ (s.getUTF8Byte nextOffset hbound), hiter⟩
|
||||
else
|
||||
pure ⟨.done, by simpa using h⟩
|
||||
|
||||
|
|
|
|||
|
|
@ -26,14 +26,14 @@ namespace Lean.Name
|
|||
-- If you change this, also change the corresponding function in `Init.Meta`.
|
||||
private partial def needsNoEscapeAsciiRest (s : String) (i : Nat) : Bool :=
|
||||
if h : i < s.utf8ByteSize then
|
||||
let c := s.getUtf8Byte ⟨i⟩ h
|
||||
let c := s.getUTF8Byte ⟨i⟩ h
|
||||
isIdRestAscii c && needsNoEscapeAsciiRest s (i + 1)
|
||||
else
|
||||
true
|
||||
|
||||
-- If you change this, also change the corresponding function in `Init.Meta`.
|
||||
@[inline] private def needsNoEscapeAscii (s : String) (h : s.utf8ByteSize > 0) : Bool :=
|
||||
let c := s.getUtf8Byte 0 h
|
||||
let c := s.getUTF8Byte 0 h
|
||||
isIdFirstAscii c && needsNoEscapeAsciiRest s 1
|
||||
|
||||
-- If you change this, also change the corresponding function in `Init.Meta`.
|
||||
|
|
|
|||
|
|
@ -156,12 +156,6 @@ def isInaccessibleUserName : Name → Bool
|
|||
| Name.num p _ => isInaccessibleUserName p
|
||||
| _ => false
|
||||
|
||||
-- FIXME: `getUtf8Byte` is in `Init.Data.String.Extra`, which causes an import cycle with
|
||||
-- `Init.Meta`. Moving `getUtf8Byte` up to `Init.Data.String.Basic` creates another import cycle.
|
||||
-- Please replace this definition with `getUtf8Byte` when the string refactor is through.
|
||||
@[extern "lean_string_get_byte_fast"]
|
||||
private opaque getUtf8Byte' (s : @& String) (n : Nat) (h : n < s.utf8ByteSize) : UInt8
|
||||
|
||||
section ToString
|
||||
|
||||
/-!
|
||||
|
|
@ -177,14 +171,14 @@ inner-loop function like `Name.toString`.
|
|||
-- If you change this, also change the corresponding function in `Init.Data.ToString.Name`.
|
||||
private partial def needsNoEscapeAsciiRest (s : String) (i : Nat) : Bool :=
|
||||
if h : i < s.utf8ByteSize then
|
||||
let c := getUtf8Byte' s i h
|
||||
let c := String.Internal.getUTF8Byte s i h
|
||||
isIdRestAscii c && needsNoEscapeAsciiRest s (i + 1)
|
||||
else
|
||||
true
|
||||
|
||||
-- If you change this, also change the corresponding function in `Init.Data.ToString.Name`.
|
||||
@[inline] private def needsNoEscapeAscii (s : String) (h : s.utf8ByteSize > 0) : Bool :=
|
||||
let c := getUtf8Byte' s 0 h
|
||||
let c := String.Internal.getUTF8Byte s 0 h
|
||||
isIdFirstAscii c && needsNoEscapeAsciiRest s 1
|
||||
|
||||
-- If you change this, also change the corresponding function in `Init.Data.ToString.Name`.
|
||||
|
|
|
|||
|
|
@ -3378,7 +3378,7 @@ def List.utf8Encode (l : List Char) : ByteArray :=
|
|||
Note that in order for this definition to be well-behaved it is necessary to know that this `m`
|
||||
is unique. To show this, one defines UTF-8 decoding and shows that encoding and decoding are
|
||||
mutually inverse. -/
|
||||
inductive ByteArray.IsValidUtf8 (b : ByteArray) : Prop
|
||||
inductive ByteArray.IsValidUTF8 (b : ByteArray) : Prop
|
||||
/-- Show that a byte -/
|
||||
| intro (m : List Char) (hm : Eq b (List.utf8Encode m))
|
||||
|
||||
|
|
@ -3393,10 +3393,10 @@ modifications when the reference to the string is unique.
|
|||
structure String where ofByteArray ::
|
||||
/-- The bytes of the UTF-8 encoding of the string. Since strings have a special representation in
|
||||
the runtime, this function actually takes linear time and space at runtime. For efficient access
|
||||
to the string's bytes, use `String.utf8ByteSize` and `String.getUtf8Byte`. -/
|
||||
to the string's bytes, use `String.utf8ByteSize` and `String.getUTF8Byte`. -/
|
||||
bytes : ByteArray
|
||||
/-- The bytes of the string form valid UTF-8. -/
|
||||
isValidUtf8 : ByteArray.IsValidUtf8 bytes
|
||||
isValidUTF8 : ByteArray.IsValidUTF8 bytes
|
||||
|
||||
attribute [extern "lean_string_to_utf8"] String.bytes
|
||||
attribute [extern "lean_string_from_utf8_unchecked"] String.ofByteArray
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ private def needEscape (s : String) : Bool :=
|
|||
where
|
||||
go (s : String) (i : Nat) : Bool :=
|
||||
if h : i < s.utf8ByteSize then
|
||||
let byte := s.getUtf8Byte ⟨i⟩ h
|
||||
let byte := s.getUTF8Byte ⟨i⟩ h
|
||||
have h1 : byte.toNat < 256 := UInt8.toNat_lt_size byte
|
||||
have h2 : escapeTable.val.size = 256 := escapeTable.property
|
||||
if escapeTable.val.get byte.toNat (Nat.lt_of_lt_of_eq h1 h2.symm) == 0 then
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ instance : Inhabited (Trie α) where
|
|||
partial def upsert (t : Trie α) (s : String) (f : Option α → α) : Trie α :=
|
||||
let rec insertEmpty (i : Nat) : Trie α :=
|
||||
if h : i < s.utf8ByteSize then
|
||||
let c := s.getUtf8Byte ⟨i⟩ h
|
||||
let c := s.getUTF8Byte ⟨i⟩ h
|
||||
let t := insertEmpty (i + 1)
|
||||
node1 none c t
|
||||
else
|
||||
|
|
@ -71,14 +71,14 @@ partial def upsert (t : Trie α) (s : String) (f : Option α → α) : Trie α :
|
|||
let rec loop
|
||||
| i, leaf v =>
|
||||
if h : i < s.utf8ByteSize then
|
||||
let c := s.getUtf8Byte ⟨i⟩ h
|
||||
let c := s.getUTF8Byte ⟨i⟩ h
|
||||
let t := insertEmpty (i + 1)
|
||||
node1 v c t
|
||||
else
|
||||
leaf (f v)
|
||||
| i, node1 v c' t' =>
|
||||
if h : i < s.utf8ByteSize then
|
||||
let c := s.getUtf8Byte ⟨i⟩ h
|
||||
let c := s.getUTF8Byte ⟨i⟩ h
|
||||
if c == c'
|
||||
then node1 v c' (loop (i + 1) t')
|
||||
else
|
||||
|
|
@ -88,7 +88,7 @@ partial def upsert (t : Trie α) (s : String) (f : Option α → α) : Trie α :
|
|||
node1 (f v) c' t'
|
||||
| i, node v cs ts =>
|
||||
if h : i < s.utf8ByteSize then
|
||||
let c := s.getUtf8Byte ⟨i⟩ h
|
||||
let c := s.getUTF8Byte ⟨i⟩ h
|
||||
match cs.findIdx? (· == c) with
|
||||
| none =>
|
||||
let t := insertEmpty (i + 1)
|
||||
|
|
@ -113,7 +113,7 @@ partial def find? (t : Trie α) (s : String) : Option α :=
|
|||
val
|
||||
| i, node1 val c' t' =>
|
||||
if h : i < s.utf8ByteSize then
|
||||
let c := s.getUtf8Byte ⟨i⟩ h
|
||||
let c := s.getUTF8Byte ⟨i⟩ h
|
||||
if c == c'
|
||||
then loop (i + 1) t'
|
||||
else none
|
||||
|
|
@ -121,7 +121,7 @@ partial def find? (t : Trie α) (s : String) : Option α :=
|
|||
val
|
||||
| i, node val cs ts =>
|
||||
if h : i < s.utf8ByteSize then
|
||||
let c := s.getUtf8Byte ⟨i⟩ h
|
||||
let c := s.getUTF8Byte ⟨i⟩ h
|
||||
match cs.findIdx? (· == c) with
|
||||
| none => none
|
||||
| some idx => loop (i + 1) ts[idx]!
|
||||
|
|
@ -150,7 +150,7 @@ partial def findPrefix (t : Trie α) (pre : String) : Array α := go t 0
|
|||
where
|
||||
go (t : Trie α) (i : Nat) : Array α :=
|
||||
if h : i < pre.utf8ByteSize then
|
||||
let c := pre.getUtf8Byte ⟨i⟩ h
|
||||
let c := pre.getUTF8Byte ⟨i⟩ h
|
||||
match t with
|
||||
| leaf _val => .empty
|
||||
| node1 _val c' t' =>
|
||||
|
|
@ -175,7 +175,7 @@ partial def matchPrefix (s : String) (t : Trie α) (i : String.Pos.Raw)
|
|||
| node1 v c' t', i, res =>
|
||||
let res := if v.isSome then v else res
|
||||
if h : i < endByte then
|
||||
let c := s.getUtf8Byte ⟨i⟩ (by simp; omega)
|
||||
let c := s.getUTF8Byte ⟨i⟩ (by simp; omega)
|
||||
if c == c'
|
||||
then loop t' (i + 1) res
|
||||
else res
|
||||
|
|
@ -184,7 +184,7 @@ partial def matchPrefix (s : String) (t : Trie α) (i : String.Pos.Raw)
|
|||
| node v cs ts, i, res =>
|
||||
let res := if v.isSome then v else res
|
||||
if h : i < endByte then
|
||||
let c := s.getUtf8Byte ⟨i⟩ (by simp; omega)
|
||||
let c := s.getUTF8Byte ⟨i⟩ (by simp; omega)
|
||||
match cs.findIdx? (· == c) with
|
||||
| none => res
|
||||
| some idx => loop ts[idx]! (i + 1) res
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ public def ofJsonNumber? (n : JsonNumber) : Except String Hash :=
|
|||
/-- Parse a hash from a string of hexadecimal digits. Does no validation. -/
|
||||
public def ofHex (s : String) : Hash :=
|
||||
mk <| s.utf8ByteSize.fold (init := 0) fun i h n =>
|
||||
let c := s.getUtf8Byte ⟨i⟩ h
|
||||
let c := s.getUTF8Byte ⟨i⟩ h
|
||||
if c ≤ 57 then n*16 + (c - 48).toUInt64
|
||||
else if 97 ≤ c then n*16 + (c - 87).toUInt64 -- c - 'a' + 10 = (c - 87)
|
||||
else n*16 + (c - 55).toUInt64 -- c - 'A' + 10 = (c - 55)
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ public def zpad (n : Nat) (len : Nat) : String :=
|
|||
/-- Returns whether a string is composed of only hexadecimal digits. -/
|
||||
public def isHex (s : String) : Bool :=
|
||||
s.utf8ByteSize.all fun i h =>
|
||||
let c := s.getUtf8Byte ⟨i⟩ h
|
||||
let c := s.getUTF8Byte ⟨i⟩ h
|
||||
if c ≤ 57 then -- '9'
|
||||
48 ≤ c -- '0'
|
||||
else if c ≤ 102 then -- 'f'
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ macro "test_extern'" t:term " => " v:term : command =>
|
|||
|
||||
def checkGet (s : String) (arr : Array UInt8) :=
|
||||
(List.range s.utf8ByteSize).all fun i =>
|
||||
let c := if h : _ then s.getUtf8Byte ⟨i⟩ h else unreachable!
|
||||
let c := if h : _ then s.getUTF8Byte ⟨i⟩ h else unreachable!
|
||||
c == arr[i]!
|
||||
|
||||
macro "validate" arr:term " => " "↯" : command =>
|
||||
|
|
|
|||
|
|
@ -4,11 +4,11 @@ setLit.lean:22:19-22:21: error: overloaded, errors
|
|||
|
||||
Hint: Additional diagnostic information may be available using the `set_option diagnostics true` command.
|
||||
|
||||
Fields missing: `bytes`, `isValidUtf8`
|
||||
Fields missing: `bytes`, `isValidUTF8`
|
||||
|
||||
Hint: Add missing fields:
|
||||
̲b̲y̲t̲e̲s̲ ̲:̲=̲ ̲_̲
|
||||
̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲i̲s̲V̲a̲l̲i̲d̲U̲t̲f̲8̲ ̲:̲=̲ ̲_̲ ̲
|
||||
̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲i̲s̲V̲a̲l̲i̲d̲U̲T̲F̲8̲ ̲:̲=̲ ̲_̲ ̲
|
||||
setLit.lean:24:31-24:38: error: overloaded, errors
|
||||
failed to synthesize
|
||||
Singleton Nat String
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue