This PR renames `String.bytes` to `String.toByteArray`. This is for two reasons: first, `toByteArray` is a better name, and second, we have something else that wants to use the name `bytes`, namely the function that returns in iterator over the string's bytes.
2842 lines
114 KiB
Text
2842 lines
114 KiB
Text
/-
|
||
Copyright (c) 2016 Microsoft Corporation. All rights reserved.
|
||
Released under Apache 2.0 license as described in the file LICENSE.
|
||
Author: Leonardo de Moura, Mario Carneiro, Markus Himmel
|
||
-/
|
||
module
|
||
|
||
prelude
|
||
public import Init.Data.String.Decode
|
||
public import Init.Data.String.Defs
|
||
public import Init.Data.String.PosRaw
|
||
import Init.Data.ByteArray.Lemmas
|
||
import Init.Data.Char.Lemmas
|
||
|
||
/-!
|
||
# Strings
|
||
|
||
This file builds on the UTF-8 verification in `Init.Data.String.Decode` and the preliminary
|
||
material in `Init.Data.String.Defs` to get the theory of strings off the ground. In particular,
|
||
in this file we construct the decoding function `String.data : String → List Char` and show that
|
||
it is a two-sided inverse to `List.asString : List Char → String`. This in turn enables us to
|
||
understand the validity predicate on positions in terms of lists of characters, which forms the
|
||
basis for all further verification for strings.
|
||
-/
|
||
|
||
public section
|
||
|
||
universe u
|
||
|
||
section
|
||
|
||
@[simp]
|
||
theorem String.utf8ByteSize_singleton {c : Char} : (String.singleton c).utf8ByteSize = c.utf8Size := by
|
||
simp [← size_toByteArray, List.utf8Encode_singleton]
|
||
|
||
theorem List.isUTF8FirstByte_getElem_utf8Encode_singleton {c : Char} {i : Nat} {hi : i < [c].utf8Encode.size} :
|
||
UInt8.IsUTF8FirstByte [c].utf8Encode[i] ↔ i = 0 := by
|
||
simp [List.utf8Encode_singleton, UInt8.isUTF8FirstByte_getElem_utf8EncodeChar]
|
||
|
||
|
||
theorem ByteArray.IsValidUTF8.push {b : ByteArray} (h : IsValidUTF8 b) {c : Char} (hc : c.utf8Size = 1) :
|
||
IsValidUTF8 (b.push c.toUInt8) := by
|
||
rcases h with ⟨m, rfl⟩
|
||
refine ⟨m ++ [c], by simp [List.utf8Encode_singleton, String.utf8EncodeChar_eq_singleton hc]⟩
|
||
|
||
theorem ByteArray.isValidUTF8_utf8Encode_singleton_append_iff {b : ByteArray} {c : Char} :
|
||
IsValidUTF8 ([c].utf8Encode ++ b) ↔ IsValidUTF8 b := by
|
||
refine ⟨?_, fun h => IsValidUTF8.append isValidUTF8_utf8Encode h⟩
|
||
rintro ⟨l, hl⟩
|
||
match l with
|
||
| [] => simp at hl
|
||
| d::l =>
|
||
obtain rfl : c = d := by
|
||
replace hl := congrArg (fun l => utf8DecodeChar? l 0) hl
|
||
simpa [List.utf8DecodeChar?_utf8Encode_singleton_append,
|
||
List.utf8DecodeChar?_utf8Encode_cons] using hl
|
||
rw [← List.singleton_append (l := l), List.utf8Encode_append,
|
||
ByteArray.append_right_inj] at hl
|
||
exact hl ▸ isValidUTF8_utf8Encode
|
||
|
||
/--
|
||
Decodes a sequence of characters from their UTF-8 representation. Returns `none` if the bytes are
|
||
not a sequence of Unicode scalar values.
|
||
-/
|
||
@[inline, expose]
|
||
def ByteArray.utf8Decode? (b : ByteArray) : Option (Array Char) :=
|
||
go (b.size + 1) 0 #[] (by simp) (by simp)
|
||
where
|
||
go (fuel : Nat) (i : Nat) (acc : Array Char) (hi : i ≤ b.size) (hf : b.size - i < fuel) : Option (Array Char) :=
|
||
match fuel, hf with
|
||
| fuel + 1, _ =>
|
||
if i = b.size then
|
||
some acc
|
||
else
|
||
match h : utf8DecodeChar? b i with
|
||
| none => none
|
||
| some c => go fuel (i + c.utf8Size) (acc.push c)
|
||
(le_size_of_utf8DecodeChar?_eq_some h)
|
||
(have := c.utf8Size_pos; have := le_size_of_utf8DecodeChar?_eq_some h; by omega)
|
||
termination_by structural fuel
|
||
|
||
@[expose, extern "lean_string_validate_utf8"]
|
||
def ByteArray.validateUTF8 (b : @& ByteArray) : Bool :=
|
||
go (b.size + 1) 0 (by simp) (by simp)
|
||
where
|
||
go (fuel : Nat) (i : Nat) (hi : i ≤ b.size) (hf : b.size - i < fuel) : Bool :=
|
||
match fuel, hf with
|
||
| fuel + 1, _ =>
|
||
if hi : i = b.size then
|
||
true
|
||
else
|
||
match h : validateUTF8At b i with
|
||
| false => false
|
||
| true => go fuel (i + b[i].utf8ByteSize (isUTF8FirstByte_of_validateUTF8At h))
|
||
?_ ?_
|
||
termination_by structural fuel
|
||
finally
|
||
all_goals rw [ByteArray.validateUTF8At_eq_isSome_utf8DecodeChar?] at h
|
||
· rw [← ByteArray.utf8Size_utf8DecodeChar (h := h)]
|
||
exact add_utf8Size_utf8DecodeChar_le_size
|
||
· rw [← ByteArray.utf8Size_utf8DecodeChar (h := h)]
|
||
have := add_utf8Size_utf8DecodeChar_le_size (h := h)
|
||
have := (b.utf8DecodeChar i h).utf8Size_pos
|
||
omega
|
||
|
||
theorem ByteArray.isSome_utf8Decode?Go_eq_validateUTF8Go {b : ByteArray} {fuel : Nat}
|
||
{i : Nat} {acc : Array Char} {hi : i ≤ b.size} {hf : b.size - i < fuel} :
|
||
(utf8Decode?.go b fuel i acc hi hf).isSome = validateUTF8.go b fuel i hi hf := by
|
||
fun_induction utf8Decode?.go with
|
||
| case1 => simp [validateUTF8.go]
|
||
| case2 i acc hi fuel hf h₁ h₂ =>
|
||
simp only [Option.isSome_none, validateUTF8.go, h₁, ↓reduceDIte, Bool.false_eq]
|
||
split
|
||
· rfl
|
||
· rename_i heq
|
||
simp [validateUTF8At_eq_isSome_utf8DecodeChar?, h₂] at heq
|
||
| case3 i acc hi fuel hf h₁ c h₂ ih =>
|
||
simp [validateUTF8.go, h₁]
|
||
split
|
||
· rename_i heq
|
||
simp [validateUTF8At_eq_isSome_utf8DecodeChar?, h₂] at heq
|
||
· rw [ih]
|
||
congr
|
||
rw [← ByteArray.utf8Size_utf8DecodeChar (h := by simp [h₂])]
|
||
simp [utf8DecodeChar, h₂]
|
||
|
||
theorem ByteArray.isSome_utf8Decode?_eq_validateUTF8 {b : ByteArray} :
|
||
b.utf8Decode?.isSome = b.validateUTF8 :=
|
||
b.isSome_utf8Decode?Go_eq_validateUTF8Go
|
||
|
||
theorem ByteArray.utf8Decode?.go.congr {b b' : ByteArray} {fuel fuel' i i' : Nat} {acc acc' : Array Char} {hi hi' hf hf'}
|
||
(hbb' : b = b') (hii' : i = i') (hacc : acc = acc') :
|
||
ByteArray.utf8Decode?.go b fuel i acc hi hf = ByteArray.utf8Decode?.go b' fuel' i' acc' hi' hf' := by
|
||
subst hbb' hii' hacc
|
||
fun_induction ByteArray.utf8Decode?.go b fuel i acc hi hf generalizing fuel' with
|
||
| case1 =>
|
||
rw [go.eq_def]
|
||
split
|
||
simp
|
||
| case2 =>
|
||
rw [go.eq_def]
|
||
split <;> split
|
||
· simp_all
|
||
· split <;> simp_all
|
||
| case3 =>
|
||
conv => rhs; rw [go.eq_def]
|
||
split <;> split
|
||
· simp_all
|
||
· split
|
||
· simp_all
|
||
· rename_i c₁ hc₁ ih _ _ _ _ _ c₂ hc₂
|
||
obtain rfl : c₁ = c₂ := by rw [← Option.some_inj, ← hc₁, ← hc₂]
|
||
apply ih
|
||
|
||
@[simp]
|
||
theorem ByteArray.utf8Decode?_empty : ByteArray.empty.utf8Decode? = some #[] := by
|
||
simp [utf8Decode?, utf8Decode?.go]
|
||
|
||
private theorem ByteArray.isSome_utf8Decode?go_iff {b : ByteArray} {fuel i : Nat} {hi : i ≤ b.size} {hf} {acc : Array Char} :
|
||
(ByteArray.utf8Decode?.go b fuel i acc hi hf).isSome ↔ IsValidUTF8 (b.extract i b.size) := by
|
||
fun_induction ByteArray.utf8Decode?.go with
|
||
| case1 => simp
|
||
| case2 fuel i hi hf acc h₁ h₂ =>
|
||
simp only [Option.isSome_none, Bool.false_eq_true, false_iff]
|
||
rintro ⟨l, hl⟩
|
||
have : l ≠ [] := by
|
||
rintro rfl
|
||
simp at hl
|
||
omega
|
||
rw [← l.cons_head_tail this] at hl
|
||
rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract, hl, List.utf8DecodeChar?_utf8Encode_cons] at h₂
|
||
simp at h₂
|
||
| case3 i acc hi fuel hf h₁ c h₂ ih =>
|
||
rw [ih]
|
||
have h₂' := h₂
|
||
rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h₂'
|
||
obtain ⟨l, hl⟩ := exists_of_utf8DecodeChar?_eq_some h₂'
|
||
rw [ByteArray.extract_eq_extract_append_extract (i := i) (i + c.utf8Size) (by omega)
|
||
(le_size_of_utf8DecodeChar?_eq_some h₂)] at hl ⊢
|
||
rw [ByteArray.append_inj_left hl (by have := le_size_of_utf8DecodeChar?_eq_some h₂; simp; omega),
|
||
← List.utf8Encode_singleton, isValidUTF8_utf8Encode_singleton_append_iff]
|
||
|
||
theorem ByteArray.isSome_utf8Decode?_iff {b : ByteArray} :
|
||
b.utf8Decode?.isSome ↔ IsValidUTF8 b := by
|
||
rw [utf8Decode?, isSome_utf8Decode?go_iff, extract_zero_size]
|
||
|
||
@[simp]
|
||
theorem ByteArray.validateUTF8_eq_true_iff {b : ByteArray} :
|
||
b.validateUTF8 = true ↔ IsValidUTF8 b := by
|
||
rw [← isSome_utf8Decode?_eq_validateUTF8, isSome_utf8Decode?_iff]
|
||
|
||
@[simp]
|
||
theorem ByteArray.validateUTF8_eq_false_iff {b : ByteArray} :
|
||
b.validateUTF8 = false ↔ ¬ IsValidUTF8 b := by
|
||
simp [← Bool.not_eq_true]
|
||
|
||
instance {b : ByteArray} : Decidable b.IsValidUTF8 :=
|
||
decidable_of_iff (b.validateUTF8 = true) ByteArray.validateUTF8_eq_true_iff
|
||
|
||
/--
|
||
Decodes an array of bytes that encode a string as [UTF-8](https://en.wikipedia.org/wiki/UTF-8) into
|
||
the corresponding string, or returns `none` if the array is not a valid UTF-8 encoding of a string.
|
||
-/
|
||
@[inline, expose] def String.fromUTF8? (a : ByteArray) : Option String :=
|
||
if h : a.IsValidUTF8 then some (fromUTF8 a h) else none
|
||
|
||
/--
|
||
Decodes an array of bytes that encode a string as [UTF-8](https://en.wikipedia.org/wiki/UTF-8) into
|
||
the corresponding string, or panics if the array is not a valid UTF-8 encoding of a string.
|
||
-/
|
||
@[inline, expose] def String.fromUTF8! (a : ByteArray) : String :=
|
||
if h : a.IsValidUTF8 then fromUTF8 a h else panic! "invalid UTF-8 string"
|
||
|
||
@[simp]
|
||
theorem String.empty_append {s : String} : "" ++ s = s := by
|
||
simp [← String.toByteArray_inj]
|
||
|
||
@[simp]
|
||
theorem String.append_empty {s : String} : s ++ "" = s := by
|
||
simp [← String.toByteArray_inj]
|
||
|
||
@[simp]
|
||
theorem String.ofList_nil : String.ofList [] = "" :=
|
||
rfl
|
||
|
||
@[deprecated String.ofList_nil (since := "2025-10-30")]
|
||
theorem List.asString_nil : String.ofList [] = "" :=
|
||
String.ofList_nil
|
||
|
||
@[simp]
|
||
theorem String.ofList_append {l₁ l₂ : List Char} :
|
||
String.ofList (l₁ ++ l₂) = String.ofList l₁ ++ String.ofList l₂ := by
|
||
simp [← String.toByteArray_inj]
|
||
|
||
@[deprecated String.ofList_append (since := "2025-10-30")]
|
||
theorem List.asString_append {l₁ l₂ : List Char} :
|
||
String.ofList (l₁ ++ l₂) = String.ofList l₁ ++ String.ofList l₂ :=
|
||
String.ofList_append
|
||
|
||
@[expose]
|
||
def String.Internal.toArray (b : String) : Array Char :=
|
||
b.toByteArray.utf8Decode?.get (b.toByteArray.isSome_utf8Decode?_iff.2 b.isValidUTF8)
|
||
|
||
@[simp]
|
||
theorem String.Internal.toArray_empty : String.Internal.toArray "" = #[] := by
|
||
simp [toArray]
|
||
|
||
/--
|
||
Converts a string to a list of characters.
|
||
|
||
Since strings are represented as dynamic arrays of bytes containing the string encoded using
|
||
UTF-8, this operation takes time and space linear in the length of the string.
|
||
|
||
Examples:
|
||
* `"abc".toList = ['a', 'b', 'c']`
|
||
* `"".toList = []`
|
||
* `"\n".toList = ['\n']`
|
||
-/
|
||
@[extern "lean_string_data", expose]
|
||
def String.toList (s : String) : List Char :=
|
||
(String.Internal.toArray s).toList
|
||
|
||
/--
|
||
Converts a string to a list of characters.
|
||
|
||
Since strings are represented as dynamic arrays of bytes containing the string encoded using
|
||
UTF-8, this operation takes time and space linear in the length of the string.
|
||
|
||
Examples:
|
||
* `"abc".toList = ['a', 'b', 'c']`
|
||
* `"".toList = []`
|
||
* `"\n".toList = ['\n']`
|
||
-/
|
||
@[extern "lean_string_data", expose, deprecated String.toList (since := "2025-10-30")]
|
||
def String.data (b : String) : List Char :=
|
||
(String.Internal.toArray b).toList
|
||
|
||
@[simp]
|
||
theorem String.toList_empty : "".toList = [] := by
|
||
simp [toList]
|
||
|
||
@[deprecated String.toList_empty (since := "2025-10-30")]
|
||
theorem String.data_empty : "".toList = [] :=
|
||
toList_empty
|
||
|
||
/--
|
||
Returns the length of a string in Unicode code points.
|
||
|
||
Examples:
|
||
* `"".length = 0`
|
||
* `"abc".length = 3`
|
||
* `"L∃∀N".length = 4`
|
||
-/
|
||
@[extern "lean_string_length", expose]
|
||
def String.length (b : @& String) : Nat :=
|
||
b.toList.length
|
||
|
||
@[simp]
|
||
theorem String.Internal.size_toArray {b : String} : (String.Internal.toArray b).size = b.length :=
|
||
(rfl)
|
||
|
||
@[simp]
|
||
theorem String.length_toList {s : String} : s.toList.length = s.length := (rfl)
|
||
|
||
@[deprecated String.length_toList (since := "2025-10-30")]
|
||
theorem String.length_data {b : String} : b.toList.length = b.length := (rfl)
|
||
|
||
private theorem ByteArray.utf8Decode?go_eq_utf8Decode?go_extract {b : ByteArray} {fuel i : Nat} {hi : i ≤ b.size} {hf} {acc : Array Char} :
|
||
utf8Decode?.go b fuel i acc hi hf = (utf8Decode?.go (b.extract i b.size) fuel 0 #[] (by simp) (by simp [hf])).map (acc ++ ·) := by
|
||
fun_cases utf8Decode?.go b fuel i acc hi hf with
|
||
| case1 =>
|
||
rw [utf8Decode?.go]
|
||
simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Nat.zero_add, List.push_toArray,
|
||
List.nil_append]
|
||
rw [if_pos (by omega)]
|
||
simp
|
||
| case2 fuel hf₁ h₁ h₂ hf₂ =>
|
||
rw [utf8Decode?.go]
|
||
simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Nat.zero_add, List.push_toArray,
|
||
List.nil_append]
|
||
rw [if_neg (by omega)]
|
||
rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h₂
|
||
split <;> simp_all
|
||
| case3 fuel hf₁ h₁ c h₂ hf₂ =>
|
||
conv => rhs; rw [utf8Decode?.go]
|
||
simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Nat.zero_add, List.push_toArray,
|
||
List.nil_append]
|
||
rw [if_neg (by omega)]
|
||
rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h₂
|
||
split
|
||
· simp_all
|
||
· rename_i c' hc'
|
||
obtain rfl : c = c' := by
|
||
rw [← Option.some_inj, ← h₂, hc']
|
||
have := c.utf8Size_pos
|
||
conv => lhs; rw [ByteArray.utf8Decode?go_eq_utf8Decode?go_extract]
|
||
conv => rhs; rw [ByteArray.utf8Decode?go_eq_utf8Decode?go_extract]
|
||
simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Option.map_map, ByteArray.extract_extract]
|
||
have : (fun x => acc ++ x) ∘ (fun x => #[c] ++ x) = fun x => acc.push c ++ x := by funext; simp
|
||
simp [(by omega : i + (b.size - i) = b.size), this]
|
||
termination_by fuel
|
||
|
||
theorem ByteArray.utf8Decode?_utf8Encode_singleton_append {l : ByteArray} {c : Char} :
|
||
([c].utf8Encode ++ l).utf8Decode? = l.utf8Decode?.map (#[c] ++ ·) := by
|
||
rw [utf8Decode?, utf8Decode?.go,
|
||
if_neg (by simp [List.utf8Encode_singleton]; have := c.utf8Size_pos; omega)]
|
||
split
|
||
· simp_all [List.utf8DecodeChar?_utf8Encode_singleton_append]
|
||
· rename_i d h
|
||
obtain rfl : c = d := by simpa [List.utf8DecodeChar?_utf8Encode_singleton_append] using h
|
||
rw [utf8Decode?go_eq_utf8Decode?go_extract, utf8Decode?]
|
||
simp only [List.push_toArray, List.nil_append, Nat.zero_add]
|
||
congr 1
|
||
apply ByteArray.utf8Decode?.go.congr _ rfl rfl
|
||
apply extract_append_eq_right _ (by simp)
|
||
simp [List.utf8Encode_singleton]
|
||
|
||
@[simp]
|
||
theorem List.utf8Decode?_utf8Encode {l : List Char} :
|
||
l.utf8Encode.utf8Decode? = some l.toArray := by
|
||
induction l with
|
||
| nil => simp
|
||
| cons c l ih =>
|
||
rw [← List.singleton_append, List.utf8Encode_append]
|
||
simp only [ByteArray.utf8Decode?_utf8Encode_singleton_append, cons_append, nil_append,
|
||
Option.map_eq_some_iff, Array.append_eq_toArray_iff, cons.injEq, true_and]
|
||
refine ⟨l.toArray, ih, by simp⟩
|
||
|
||
@[simp]
|
||
theorem ByteArray.utf8Encode_get_utf8Decode? {b : ByteArray} {h} :
|
||
(b.utf8Decode?.get h).toList.utf8Encode = b := by
|
||
obtain ⟨l, rfl⟩ := isSome_utf8Decode?_iff.1 h
|
||
simp
|
||
|
||
@[simp]
|
||
theorem String.toList_ofList {l : List Char} : (String.ofList l).toList = l := by
|
||
simp [String.toList, String.Internal.toArray]
|
||
|
||
@[deprecated String.toList_ofList (since := "2025-10-30")]
|
||
theorem List.data_asString {l : List Char} : (String.ofList l).toList = l :=
|
||
String.toList_ofList
|
||
|
||
@[simp]
|
||
theorem String.ofList_toList {s : String} : String.ofList s.toList = s := by
|
||
obtain ⟨l, rfl⟩ := s.exists_eq_ofList
|
||
simp
|
||
|
||
@[deprecated String.ofList_toList (since := "2025-10-30")]
|
||
theorem String.asString_data {b : String} : String.ofList b.toList = b :=
|
||
String.ofList_toList
|
||
|
||
theorem String.ofList_injective {l₁ l₂ : List Char} (h : String.ofList l₁ = String.ofList l₂) : l₁ = l₂ := by
|
||
simpa using congrArg String.toList h
|
||
|
||
@[deprecated String.ofList_injective (since := "2025-10-30")]
|
||
theorem List.asString_injective {l₁ l₂ : List Char} (h : String.ofList l₁ = String.ofList l₂) : l₁ = l₂ :=
|
||
String.ofList_injective h
|
||
|
||
theorem String.ofList_inj {l₁ l₂ : List Char} : String.ofList l₁ = String.ofList l₂ ↔ l₁ = l₂ :=
|
||
⟨ofList_injective, (· ▸ rfl)⟩
|
||
|
||
@[deprecated String.ofList_inj (since := "2025-10-30")]
|
||
theorem List.asString_inj {l₁ l₂ : List Char} : String.ofList l₁ = String.ofList l₂ ↔ l₁ = l₂ :=
|
||
String.ofList_inj
|
||
|
||
theorem String.toList_injective {s₁ s₂ : String} (h : s₁.toList = s₂.toList) : s₁ = s₂ := by
|
||
simpa using congrArg String.ofList h
|
||
|
||
@[deprecated String.toList_injective (since := "2025-10-30")]
|
||
theorem String.data_injective {s₁ s₂ : String} (h : s₁.toList = s₂.toList) : s₁ = s₂ :=
|
||
String.toList_injective h
|
||
|
||
theorem String.toList_inj {s₁ s₂ : String} : s₁.toList = s₂.toList ↔ s₁ = s₂ :=
|
||
⟨toList_injective, (· ▸ rfl)⟩
|
||
|
||
@[deprecated String.toList_inj (since := "2025-10-30")]
|
||
theorem String.data_inj {s₁ s₂ : String} : s₁.toList = s₂.toList ↔ s₁ = s₂ :=
|
||
String.toList_inj
|
||
|
||
@[simp]
|
||
theorem String.toList_append {s t : String} : (s ++ t).toList = s.toList ++ t.toList := by
|
||
simp [← String.ofList_inj]
|
||
|
||
@[deprecated String.toList_append (since := "2025-10-30")]
|
||
theorem String.data_append {l₁ l₂ : String} : (l₁ ++ l₂).toList = l₁.toList ++ l₂.toList :=
|
||
String.toList_append
|
||
|
||
@[simp]
|
||
theorem String.utf8Encode_toList {b : String} : b.toList.utf8Encode = b.toByteArray := by
|
||
have := congrArg String.toByteArray (String.ofList_toList (s := b))
|
||
rwa [← String.toByteArray_ofList]
|
||
|
||
@[deprecated String.utf8Encode_toList (since := "2025-10-30")]
|
||
theorem String.utf8encode_data {b : String} : b.toList.utf8Encode = b.toByteArray :=
|
||
String.utf8Encode_toList
|
||
|
||
@[simp]
|
||
theorem String.toList_eq_nil_iff {b : String} : b.toList = [] ↔ b = "" := by
|
||
rw [← String.ofList_inj, ofList_toList, String.ofList_nil]
|
||
|
||
@[deprecated String.toList_eq_nil_iff (since := "2025-10-30")]
|
||
theorem String.data_eq_nil_iff {b : String} : b.toList = [] ↔ b = "" :=
|
||
String.toList_eq_nil_iff
|
||
|
||
@[simp]
|
||
theorem String.ofList_eq_empty_iff {l : List Char} : String.ofList l = "" ↔ l = [] := by
|
||
rw [← String.toList_inj, String.toList_ofList, String.toList_empty]
|
||
|
||
@[deprecated String.ofList_eq_empty_iff (since := "2025-10-30")]
|
||
theorem List.asString_eq_empty_iff {l : List Char} : String.ofList l = "" ↔ l = [] :=
|
||
String.ofList_eq_empty_iff
|
||
|
||
@[simp]
|
||
theorem String.length_ofList {l : List Char} : (String.ofList l).length = l.length := by
|
||
rw [← String.length_toList, String.toList_ofList]
|
||
|
||
@[deprecated String.length_ofList (since := "2025-10-30")]
|
||
theorem List.length_asString {l : List Char} : (String.ofList l).length = l.length :=
|
||
String.length_ofList
|
||
|
||
end
|
||
|
||
namespace String
|
||
|
||
instance : LT String :=
|
||
⟨fun s₁ s₂ => s₁.toList < s₂.toList⟩
|
||
|
||
@[extern "lean_string_dec_lt"]
|
||
instance decidableLT (s₁ s₂ : @& String) : Decidable (s₁ < s₂) :=
|
||
List.decidableLT s₁.toList s₂.toList
|
||
|
||
/--
|
||
Non-strict inequality on strings, typically used via the `≤` operator.
|
||
|
||
`a ≤ b` is defined to mean `¬ b < a`.
|
||
-/
|
||
@[expose, reducible] protected def le (a b : String) : Prop := ¬ b < a
|
||
|
||
instance : LE String :=
|
||
⟨String.le⟩
|
||
|
||
instance decLE (s₁ s₂ : String) : Decidable (s₁ ≤ s₂) :=
|
||
inferInstanceAs (Decidable (Not _))
|
||
|
||
theorem _root_.List.isPrefix_of_utf8Encode_append_eq_utf8Encode {l m : List Char} (b : ByteArray)
|
||
(h : l.utf8Encode ++ b = m.utf8Encode) : l <+: m := by
|
||
induction l generalizing m with
|
||
| nil => simp
|
||
| cons c l ih =>
|
||
replace h := congrArg ByteArray.utf8Decode? h
|
||
rw [List.utf8Decode?_utf8Encode] at h
|
||
rw [← List.singleton_append, List.utf8Encode_append, ByteArray.append_assoc,
|
||
ByteArray.utf8Decode?_utf8Encode_singleton_append] at h
|
||
suffices ∃ m', m = [c] ++ m' ∧ l.utf8Encode ++ b = m'.utf8Encode by
|
||
obtain ⟨m', rfl, hm'⟩ := this
|
||
simpa using ih hm'
|
||
have hx : (l.utf8Encode ++ b).utf8Decode?.isSome := by
|
||
exact Option.isSome_map ▸ Option.isSome_of_eq_some h
|
||
refine ⟨(l.utf8Encode ++ b).utf8Decode?.get hx |>.toList, ?_, by simp⟩
|
||
exact List.toArray_inj (Option.some_inj.1 (by simp [← h]))
|
||
|
||
open List in
|
||
theorem Pos.Raw.IsValid.exists {s : String} {p : Pos.Raw} (h : p.IsValid s) :
|
||
∃ m₁ m₂ : List Char, m₁.utf8Encode = s.toByteArray.extract 0 p.byteIdx ∧ String.ofList (m₁ ++ m₂) = s := by
|
||
obtain ⟨l, hl⟩ := s.isValidUTF8
|
||
obtain ⟨m₁, hm₁⟩ := h.isValidUTF8_extract_zero
|
||
suffices m₁ <+: l by
|
||
obtain ⟨m₂, rfl⟩ := this
|
||
refine ⟨m₁, m₂, hm₁.symm, ?_⟩
|
||
apply String.toByteArray_inj.1
|
||
simpa using hl.symm
|
||
apply List.isPrefix_of_utf8Encode_append_eq_utf8Encode (s.toByteArray.extract p.byteIdx s.toByteArray.size)
|
||
rw [← hl, ← hm₁, ← ByteArray.extract_eq_extract_append_extract _ (by simp),
|
||
ByteArray.extract_zero_size]
|
||
simpa using h.le_rawEndPos
|
||
|
||
theorem Pos.Raw.IsValid.isValidUTF8_extract_utf8ByteSize {s : String} {p : Pos.Raw} (h : p.IsValid s) :
|
||
ByteArray.IsValidUTF8 (s.toByteArray.extract p.byteIdx s.utf8ByteSize) := by
|
||
obtain ⟨m₁, m₂, hm, rfl⟩ := h.exists
|
||
simp only [String.ofList_append, toByteArray_append, String.toByteArray_ofList]
|
||
rw [ByteArray.extract_append_eq_right]
|
||
· exact ByteArray.isValidUTF8_utf8Encode
|
||
· rw [hm]
|
||
simp only [String.ofList_append, toByteArray_append, String.toByteArray_ofList, ByteArray.size_extract,
|
||
ByteArray.size_append, Nat.sub_zero]
|
||
refine (Nat.min_eq_left ?_).symm
|
||
simpa [utf8ByteSize, Pos.Raw.le_iff] using h.le_rawEndPos
|
||
· simp [utf8ByteSize]
|
||
|
||
theorem Pos.Raw.isValid_iff_exists_append {s : String} {p : Pos.Raw} :
|
||
p.IsValid s ↔ ∃ s₁ s₂ : String, s = s₁ ++ s₂ ∧ p = s₁.rawEndPos := by
|
||
refine ⟨fun h => ⟨⟨_, h.isValidUTF8_extract_zero⟩, ⟨_, h.isValidUTF8_extract_utf8ByteSize⟩, ?_, ?_⟩, ?_⟩
|
||
· apply String.toByteArray_inj.1
|
||
have := Pos.Raw.le_iff.1 h.le_rawEndPos
|
||
simp_all [← size_toByteArray]
|
||
· have := byteIdx_rawEndPos ▸ Pos.Raw.le_iff.1 h.le_rawEndPos
|
||
apply String.Pos.Raw.ext
|
||
simp [Nat.min_eq_left this]
|
||
· rintro ⟨s₁, s₂, rfl, rfl⟩
|
||
refine isValid_iff_isValidUTF8_extract_zero.2 ⟨by simp [Pos.Raw.le_iff], ?_⟩
|
||
simpa [ByteArray.extract_append_eq_left] using s₁.isValidUTF8
|
||
|
||
theorem Pos.Raw.isValid_ofList {l : List Char} {p : Pos.Raw} :
|
||
p.IsValid (ofList l) ↔ ∃ i, p.byteIdx = (ofList (l.take i)).utf8ByteSize := by
|
||
rw [isValid_iff_exists_append]
|
||
refine ⟨?_, ?_⟩
|
||
· rintro ⟨t₁, t₂, ht, rfl⟩
|
||
refine ⟨t₁.length, ?_⟩
|
||
have := congrArg String.toList ht
|
||
simp only [String.toList_ofList, String.toList_append] at this
|
||
simp [this]
|
||
· rintro ⟨i, hi⟩
|
||
refine ⟨ofList (l.take i), ofList (l.drop i), ?_, ?_⟩
|
||
· simp [← String.ofList_append]
|
||
· simpa [Pos.Raw.ext_iff]
|
||
|
||
@[deprecated Pos.Raw.isValid_ofList (since := "2025-10-30")]
|
||
theorem Pos.Raw.isValid_asString {l : List Char} {p : Pos.Raw} :
|
||
p.IsValid (ofList l) ↔ ∃ i, p.byteIdx = (ofList (l.take i)).utf8ByteSize :=
|
||
Pos.Raw.isValid_ofList
|
||
|
||
theorem Pos.Raw.isValid_iff_exists_take_toList {s : String} {p : Pos.Raw} :
|
||
p.IsValid s ↔ ∃ i, p.byteIdx = (ofList (s.toList.take i)).utf8ByteSize := by
|
||
obtain ⟨l, rfl⟩ := s.exists_eq_ofList
|
||
simp [isValid_ofList]
|
||
|
||
@[deprecated Pos.Raw.isValid_iff_exists_take_toList (since := "2025-10-30")]
|
||
theorem Pos.Raw.isValid_iff_exists_take_data {s : String} {p : Pos.Raw} :
|
||
p.IsValid s ↔ ∃ i, p.byteIdx = (ofList (s.toList.take i)).utf8ByteSize :=
|
||
Pos.Raw.isValid_iff_exists_take_toList
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValid_singleton {c : Char} {p : Pos.Raw} :
|
||
p.IsValid (String.singleton c) ↔ p = 0 ∨ p.byteIdx = c.utf8Size := by
|
||
rw [singleton_eq_ofList, Pos.Raw.isValid_ofList]
|
||
refine ⟨?_, ?_⟩
|
||
· rintro ⟨i, hi'⟩
|
||
obtain ⟨rfl, hi⟩ : i = 0 ∨ 1 ≤ i := by omega
|
||
· simp [Pos.Raw.ext_iff, hi']
|
||
· rw [hi', List.take_of_length_le (by simpa)]
|
||
simp [← singleton_eq_ofList]
|
||
· rintro (rfl|hi)
|
||
· exact ⟨0, by simp⟩
|
||
· exact ⟨1, by simp [hi, ← singleton_eq_ofList]⟩
|
||
|
||
theorem Pos.Raw.isValid_append {s t : String} {p : Pos.Raw} :
|
||
p.IsValid (s ++ t) ↔ p.IsValid s ∨ (s.rawEndPos ≤ p ∧ (p - s).IsValid t) := by
|
||
obtain ⟨s, rfl⟩ := exists_eq_ofList s
|
||
obtain ⟨t, rfl⟩ := exists_eq_ofList t
|
||
rw [← String.ofList_append, Pos.Raw.isValid_ofList, Pos.Raw.isValid_ofList, Pos.Raw.isValid_ofList]
|
||
refine ⟨?_, ?_⟩
|
||
· rintro ⟨j, hj⟩
|
||
by_cases h : j ≤ s.length
|
||
· exact Or.inl ⟨j, by simp [hj, List.take_append_of_le_length h]⟩
|
||
· refine Or.inr ⟨?_, ⟨j - s.length, ?_⟩⟩
|
||
· simp [Pos.Raw.le_iff, hj, List.take_append, List.take_of_length_le (i := j) (l := s) (by omega)]
|
||
· simp [hj, List.take_append, List.take_of_length_le (i := j) (l := s) (by omega)]
|
||
· rintro (⟨j, hj⟩|⟨h, ⟨j, hj⟩⟩)
|
||
· refine ⟨min j s.length, ?_⟩
|
||
rw [List.take_append_of_le_length (Nat.min_le_right ..), ← List.take_eq_take_min, hj]
|
||
· refine ⟨s.length + j, ?_⟩
|
||
simp only [Pos.Raw.byteIdx_sub_string, byteIdx_rawEndPos, Pos.Raw.le_iff] at hj h
|
||
simp only [List.take_append, List.take_of_length_le (i := s.length + j) (l := s) (by omega),
|
||
Nat.add_sub_cancel_left, String.ofList_append, utf8ByteSize_append]
|
||
omega
|
||
|
||
theorem Pos.Raw.IsValid.append_left {t : String} {p : Pos.Raw} (h : p.IsValid t) (s : String) :
|
||
(s + p).IsValid (s ++ t) :=
|
||
isValid_append.2 (Or.inr ⟨by simp [Pos.Raw.le_iff], by
|
||
suffices p = s + p - s by simp [← this, h]
|
||
simp [Pos.Raw.ext_iff]⟩)
|
||
|
||
theorem Pos.Raw.IsValid.append_right {s : String} {p : Pos.Raw} (h : p.IsValid s) (t : String) :
|
||
p.IsValid (s ++ t) :=
|
||
isValid_append.2 (Or.inl h)
|
||
|
||
theorem Pos.Raw.isValid_push {s : String} {c : Char} {p : Pos.Raw} :
|
||
p.IsValid (s.push c) ↔ p.IsValid s ∨ p = s.rawEndPos + c := by
|
||
rw [← append_singleton, isValid_append, isValid_singleton]
|
||
simp only [le_iff, byteIdx_rawEndPos, Pos.Raw.ext_iff, byteIdx_sub_string, byteIdx_zero, byteIdx_add_char]
|
||
refine ⟨?_, ?_⟩
|
||
· rintro (h|⟨h₁,(h₂|h₂)⟩)
|
||
· exact Or.inl h
|
||
· suffices p = s.rawEndPos by simp [this]
|
||
simp only [Pos.Raw.ext_iff, byteIdx_rawEndPos]
|
||
omega
|
||
· omega
|
||
· rintro (h|h)
|
||
· exact Or.inl h
|
||
· omega
|
||
|
||
@[simp]
|
||
theorem utf8ByteSize_push {s : String} {c : Char} :
|
||
(s.push c).utf8ByteSize = s.utf8ByteSize + c.utf8Size := by
|
||
simp [← size_toByteArray, List.utf8Encode_singleton]
|
||
|
||
@[simp]
|
||
theorem rawEndPos_push {s : String} {c : Char} : (s.push c).rawEndPos = s.rawEndPos + c := by
|
||
simp [Pos.Raw.ext_iff]
|
||
|
||
@[deprecated rawEndPos_push (since := "2025-10-20")]
|
||
theorem endPos_push {s : String} {c : Char} : (s.push c).rawEndPos = s.rawEndPos + c :=
|
||
rawEndPos_push
|
||
|
||
theorem push_induction (s : String) (motive : String → Prop) (empty : motive "")
|
||
(push : ∀ b c, motive b → motive (b.push c)) : motive s := by
|
||
obtain ⟨m, rfl⟩ := s.exists_eq_ofList
|
||
apply append_singleton_induction m (motive <| ofList ·)
|
||
· simpa
|
||
· intro l c hl
|
||
rw [String.ofList_append, ← singleton_eq_ofList, append_singleton]
|
||
exact push _ _ hl
|
||
where
|
||
append_singleton_induction (l : List Char) (motive : List Char → Prop) (nil : motive [])
|
||
(append_singleton : ∀ l a, motive l → motive (l ++ [a])) : motive l := by
|
||
rw [← l.reverse_reverse]
|
||
generalize l.reverse = m
|
||
induction m with
|
||
| nil => simpa
|
||
| cons a m ih =>
|
||
rw [List.reverse_cons]
|
||
exact append_singleton _ _ ih
|
||
|
||
theorem Pos.Raw.isValid_iff_isUTF8FirstByte {s : String} {p : Pos.Raw} :
|
||
p.IsValid s ↔ p = s.rawEndPos ∨ ∃ (h : p < s.rawEndPos), (s.getUTF8Byte p h).IsUTF8FirstByte := by
|
||
induction s using push_induction with
|
||
| empty => simp [Pos.Raw.lt_iff]
|
||
| push s c ih =>
|
||
rw [isValid_push, ih]
|
||
refine ⟨?_, ?_⟩
|
||
· rintro ((rfl|⟨h, hb⟩)|h)
|
||
· refine Or.inr ⟨by simp [Pos.Raw.lt_iff, Char.utf8Size_pos], ?_⟩
|
||
simp only [getUTF8Byte, toByteArray_push, byteIdx_rawEndPos]
|
||
rw [ByteArray.getElem_append_right (by simp)]
|
||
simp [List.isUTF8FirstByte_getElem_utf8Encode_singleton]
|
||
· refine Or.inr ⟨by simp [lt_iff] at h ⊢; omega, ?_⟩
|
||
simp only [getUTF8Byte, toByteArray_push]
|
||
rwa [ByteArray.getElem_append_left, ← getUTF8Byte]
|
||
· exact Or.inl (by simpa [rawEndPos_push])
|
||
· rintro (h|⟨h, hb⟩)
|
||
· exact Or.inr (by simpa [rawEndPos_push] using h)
|
||
· simp only [getUTF8Byte, toByteArray_push] at hb
|
||
by_cases h' : p < s.rawEndPos
|
||
· refine Or.inl (Or.inr ⟨h', ?_⟩)
|
||
rwa [ByteArray.getElem_append_left h', ← getUTF8Byte] at hb
|
||
· refine Or.inl (Or.inl ?_)
|
||
rw [ByteArray.getElem_append_right (by simp [lt_iff] at h' ⊢; omega)] at hb
|
||
simp only [size_toByteArray, List.isUTF8FirstByte_getElem_utf8Encode_singleton] at hb
|
||
ext
|
||
simp only [lt_iff, byteIdx_rawEndPos, Nat.not_lt] at ⊢ h'
|
||
omega
|
||
|
||
/--
|
||
Returns `true` if `p` is a valid UTF-8 position in the string `s`.
|
||
|
||
This means that `p ≤ s.rawEndPos` and `p` lies on a UTF-8 character boundary. At runtime, this
|
||
operation takes constant time.
|
||
|
||
Examples:
|
||
* `String.Pos.isValid "abc" ⟨0⟩ = true`
|
||
* `String.Pos.isValid "abc" ⟨1⟩ = true`
|
||
* `String.Pos.isValid "abc" ⟨3⟩ = true`
|
||
* `String.Pos.isValid "abc" ⟨4⟩ = false`
|
||
* `String.Pos.isValid "𝒫(A)" ⟨0⟩ = true`
|
||
* `String.Pos.isValid "𝒫(A)" ⟨1⟩ = false`
|
||
* `String.Pos.isValid "𝒫(A)" ⟨2⟩ = false`
|
||
* `String.Pos.isValid "𝒫(A)" ⟨3⟩ = false`
|
||
* `String.Pos.isValid "𝒫(A)" ⟨4⟩ = true`
|
||
-/
|
||
@[extern "lean_string_is_valid_pos", expose]
|
||
def Pos.Raw.isValid (s : @&String) (p : @& Pos.Raw) : Bool :=
|
||
if h : p < s.rawEndPos then
|
||
(s.getUTF8Byte p h).IsUTF8FirstByte
|
||
else
|
||
p = s.rawEndPos
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValid_eq_true_iff {s : String} {p : Pos.Raw} : p.isValid s = true ↔ p.IsValid s := by
|
||
rw [isValid_iff_isUTF8FirstByte]
|
||
fun_cases isValid s p with
|
||
| case1 h =>
|
||
simp_all only [decide_eq_true_eq, exists_true_left, iff_or_self]
|
||
rintro rfl
|
||
simp [lt_iff] at h
|
||
| case2 => simp_all
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValid_eq_false_iff {s : String} {p : Pos.Raw} : p.isValid s = false ↔ ¬ p.IsValid s := by
|
||
rw [← Bool.not_eq_true, Pos.Raw.isValid_eq_true_iff]
|
||
|
||
instance {s : String} {p : Pos.Raw} : Decidable (p.IsValid s) :=
|
||
decidable_of_iff (p.isValid s = true) Pos.Raw.isValid_eq_true_iff
|
||
|
||
theorem Pos.Raw.isValid_iff_isSome_utf8DecodeChar? {s : String} {p : Pos.Raw} :
|
||
p.IsValid s ↔ p = s.rawEndPos ∨ (s.toByteArray.utf8DecodeChar? p.byteIdx).isSome := by
|
||
refine ⟨?_, fun h => h.elim (by rintro rfl; simp) (fun h => ?_)⟩
|
||
· induction s using push_induction with
|
||
| empty => simp [ByteArray.utf8DecodeChar?]
|
||
| push s c ih =>
|
||
simp only [isValid_push, toByteArray_push]
|
||
refine ?_ ∘ Or.imp_left ih
|
||
rintro ((rfl|h)|rfl)
|
||
· rw [ByteArray.utf8DecodeChar?_eq_utf8DecodeChar?_extract, ByteArray.extract_append_eq_right (by simp) (by simp)]
|
||
simp
|
||
· exact Or.inr (ByteArray.isSome_utf8DecodeChar?_append h _)
|
||
· simp [rawEndPos_push]
|
||
· refine isValid_iff_isUTF8FirstByte.2 (Or.inr ?_)
|
||
obtain ⟨c, hc⟩ := Option.isSome_iff_exists.1 h
|
||
refine ⟨?_, ?_⟩
|
||
· have := ByteArray.le_size_of_utf8DecodeChar?_eq_some hc
|
||
have := c.utf8Size_pos
|
||
simp only [lt_iff, byteIdx_rawEndPos, gt_iff_lt, ← size_toByteArray]
|
||
omega
|
||
· rw [getUTF8Byte]
|
||
exact ByteArray.isUTF8FirstByte_of_isSome_utf8DecodeChar? h
|
||
|
||
theorem _root_.ByteArray.IsValidUTF8.isUTF8FirstByte_getElem_zero {b : ByteArray}
|
||
(h : b.IsValidUTF8) (h₀ : 0 < b.size) : b[0].IsUTF8FirstByte := by
|
||
rcases h with ⟨m, rfl⟩
|
||
have : m ≠ [] := by
|
||
rintro rfl
|
||
simp at h₀
|
||
conv => congr; congr; rw [← List.cons_head_tail this, ← List.singleton_append, List.utf8Encode_append]
|
||
rw [ByteArray.getElem_append_left]
|
||
· exact List.isUTF8FirstByte_getElem_utf8Encode_singleton.2 rfl
|
||
· simp [List.utf8Encode_singleton, Char.utf8Size_pos]
|
||
|
||
theorem isUTF8FirstByte_getUTF8Byte_zero {b : String} {h} : (b.getUTF8Byte 0 h).IsUTF8FirstByte :=
|
||
b.isValidUTF8.isUTF8FirstByte_getElem_zero _
|
||
|
||
theorem Pos.Raw.isValidUTF8_extract_iff {s : String} (p₁ p₂ : Pos.Raw) (hle : p₁ ≤ p₂) (hle' : p₂ ≤ s.rawEndPos) :
|
||
(s.toByteArray.extract p₁.byteIdx p₂.byteIdx).IsValidUTF8 ↔ p₁ = p₂ ∨ (p₁.IsValid s ∧ p₂.IsValid s) := by
|
||
have hle'' : p₂.byteIdx ≤ s.toByteArray.size := by simpa [le_iff] using hle'
|
||
refine ⟨fun h => Classical.or_iff_not_imp_left.2 (fun h' => ?_), ?_⟩
|
||
· have hlt : p₁ < p₂ := by
|
||
simp_all [le_iff, lt_iff, Pos.Raw.ext_iff]
|
||
omega
|
||
have h₁ : p₁.IsValid s := by
|
||
rw [isValid_iff_isUTF8FirstByte]
|
||
refine Or.inr ⟨Pos.Raw.lt_of_lt_of_le hlt hle', ?_⟩
|
||
have hlt' : 0 < p₂.byteIdx - p₁.byteIdx := by
|
||
simp [lt_iff] at hlt
|
||
omega
|
||
have := h.isUTF8FirstByte_getElem_zero
|
||
simp only [ByteArray.size_extract, Nat.min_eq_left hle'', hlt', ByteArray.getElem_extract, Nat.add_zero] at this
|
||
simp [getUTF8Byte, this trivial]
|
||
refine ⟨h₁, isValid_iff_isValidUTF8_extract_zero.2 ⟨hle', ?_⟩⟩
|
||
rw [ByteArray.extract_eq_extract_append_extract p₁.byteIdx (by simp) hle]
|
||
exact h₁.isValidUTF8_extract_zero.append h
|
||
· refine fun h => h.elim (by rintro rfl; simp) (fun ⟨h₁, h₂⟩ => ?_)
|
||
let t : String := ⟨_, h₂.isValidUTF8_extract_zero⟩
|
||
have htb : t.toByteArray = s.toByteArray.extract 0 p₂.byteIdx := rfl
|
||
have ht : p₁.IsValid t := by
|
||
refine isValid_iff_isValidUTF8_extract_zero.2 ⟨?_, ?_⟩
|
||
· simpa [le_iff, t, Nat.min_eq_left hle'', ← size_toByteArray]
|
||
· simpa [htb, ByteArray.extract_extract, Nat.min_eq_left (le_iff.1 hle)] using h₁.isValidUTF8_extract_zero
|
||
simpa [htb, ByteArray.extract_extract, Nat.zero_add, Nat.min_eq_left hle'', ← size_toByteArray]
|
||
using ht.isValidUTF8_extract_utf8ByteSize
|
||
|
||
theorem Pos.Raw.isValid_iff_isValidUTF8_extract_utf8ByteSize {s : String} {p : Pos.Raw} :
|
||
p.IsValid s ↔ p ≤ s.rawEndPos ∧ (s.toByteArray.extract p.byteIdx s.utf8ByteSize).IsValidUTF8 := by
|
||
refine ⟨fun h => ⟨h.le_rawEndPos, h.isValidUTF8_extract_utf8ByteSize⟩, fun ⟨h₁, h₂⟩ => ?_⟩
|
||
rw [← byteIdx_rawEndPos, isValidUTF8_extract_iff _ _ h₁ (by simp)] at h₂
|
||
obtain (rfl|h₂) := h₂
|
||
· simp
|
||
· exact h₂.1
|
||
|
||
theorem Pos.isValidUTF8_extract {s : String} (pos₁ pos₂ : s.Pos) :
|
||
(s.toByteArray.extract pos₁.offset.byteIdx pos₂.offset.byteIdx).IsValidUTF8 := by
|
||
by_cases h : pos₁ ≤ pos₂
|
||
· exact (Pos.Raw.isValidUTF8_extract_iff _ _ h pos₂.isValid.le_rawEndPos).2 (Or.inr ⟨pos₁.isValid, pos₂.isValid⟩)
|
||
· rw [ByteArray.extract_eq_empty_iff.2]
|
||
· exact ByteArray.isValidUTF8_empty
|
||
· rw [Nat.min_eq_left]
|
||
· rw [Pos.le_iff, Pos.Raw.le_iff] at h
|
||
omega
|
||
· have := Pos.Raw.le_iff.1 pos₂.isValid.le_rawEndPos
|
||
rwa [size_toByteArray, ← byteIdx_rawEndPos]
|
||
|
||
@[extern "lean_string_utf8_extract"]
|
||
def Pos.extract {s : @& String} (b e : @& s.Pos) : String where
|
||
toByteArray := s.toByteArray.extract b.offset.byteIdx e.offset.byteIdx
|
||
isValidUTF8 := b.isValidUTF8_extract e
|
||
|
||
/-- Creates a `String` from a `String.Slice` by copying the bytes. -/
|
||
@[inline]
|
||
def Slice.copy (s : Slice) : String :=
|
||
s.startInclusive.extract s.endExclusive
|
||
|
||
theorem Slice.toByteArray_copy {s : Slice} :
|
||
s.copy.toByteArray = s.str.toByteArray.extract s.startInclusive.offset.byteIdx s.endExclusive.offset.byteIdx := (rfl)
|
||
|
||
@[simp]
|
||
theorem Slice.utf8ByteSize_copy {s : Slice} :
|
||
s.copy.utf8ByteSize = s.endExclusive.offset.byteIdx - s.startInclusive.offset.byteIdx:= by
|
||
simp [← size_toByteArray, toByteArray_copy]
|
||
rw [Nat.min_eq_left (by simpa [Pos.Raw.le_iff] using s.endExclusive.isValid.le_rawEndPos)]
|
||
|
||
@[simp]
|
||
theorem Slice.rawEndPos_copy {s : Slice} : s.copy.rawEndPos = s.rawEndPos := by
|
||
simp [Pos.Raw.ext_iff, utf8ByteSize_eq]
|
||
|
||
@[simp]
|
||
theorem copy_toSlice {s : String} : s.toSlice.copy = s := by
|
||
simp [← toByteArray_inj, Slice.toByteArray_copy, ← size_toByteArray]
|
||
|
||
theorem Slice.getUTF8Byte_eq_getUTF8Byte_copy {s : Slice} {p : Pos.Raw} {h : p < s.rawEndPos} :
|
||
s.getUTF8Byte p h = s.copy.getUTF8Byte p (by simpa) := by
|
||
simp [getUTF8Byte, String.getUTF8Byte, toByteArray_copy, ByteArray.getElem_extract]
|
||
|
||
theorem Slice.getUTF8Byte_copy {s : Slice} {p : Pos.Raw} {h} :
|
||
s.copy.getUTF8Byte p h = s.getUTF8Byte p (by simpa using h) := by
|
||
rw [getUTF8Byte_eq_getUTF8Byte_copy]
|
||
|
||
theorem Slice.isUTF8FirstByte_utf8ByteAt_zero {s : Slice} {h} :
|
||
(s.getUTF8Byte 0 h).IsUTF8FirstByte := by
|
||
simpa [getUTF8Byte_eq_getUTF8Byte_copy] using s.copy.isUTF8FirstByte_getUTF8Byte_zero
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValid_copy_iff {s : Slice} {p : Pos.Raw} :
|
||
p.IsValid s.copy ↔ p.IsValidForSlice s := by
|
||
rw [isValid_iff_isValidUTF8_extract_zero]
|
||
refine ⟨fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩, fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩⟩
|
||
· simpa using h₁
|
||
· have := s.startInclusive_le_endExclusive
|
||
simp_all only [Slice.rawEndPos_copy, le_iff, Slice.byteIdx_rawEndPos, Slice.utf8ByteSize_eq,
|
||
Pos.le_iff]
|
||
rw [Slice.toByteArray_copy, ByteArray.extract_extract, Nat.add_zero, Nat.min_eq_left (by omega)] at h₂
|
||
rw [← byteIdx_offsetBy, Pos.Raw.isValidUTF8_extract_iff] at h₂
|
||
· rcases h₂ with (h₂|⟨-, h₂⟩)
|
||
· rw [← h₂]
|
||
exact s.startInclusive.isValid
|
||
· exact h₂
|
||
· simp [le_iff]
|
||
· have := s.endExclusive.isValid.le_rawEndPos
|
||
simp_all [le_iff]
|
||
omega
|
||
· simpa using h₁
|
||
· have := s.startInclusive_le_endExclusive
|
||
simp_all only [le_iff, Slice.byteIdx_rawEndPos, Slice.utf8ByteSize_eq, Pos.le_iff]
|
||
rw [Slice.toByteArray_copy, ByteArray.extract_extract, Nat.add_zero, Nat.min_eq_left (by omega)]
|
||
rw [← byteIdx_offsetBy, Pos.Raw.isValidUTF8_extract_iff]
|
||
· exact Or.inr ⟨s.startInclusive.isValid, h₂⟩
|
||
· simp [le_iff]
|
||
· have := s.endExclusive.isValid.le_rawEndPos
|
||
simp_all [le_iff]
|
||
omega
|
||
|
||
theorem Pos.Raw.isValidForSlice_iff_isUTF8FirstByte {s : Slice} {p : Pos.Raw} :
|
||
p.IsValidForSlice s ↔ (p = s.rawEndPos ∨ (∃ (h : p < s.rawEndPos), (s.getUTF8Byte p h).IsUTF8FirstByte)) := by
|
||
simp [← isValid_copy_iff, isValid_iff_isUTF8FirstByte, Slice.getUTF8Byte_copy]
|
||
|
||
/-- Efficiently checks whether a position is at a UTF-8 character boundary of the slice `s`. -/
|
||
@[expose]
|
||
def Pos.Raw.isValidForSlice (s : Slice) (p : Pos.Raw) : Bool :=
|
||
if h : p < s.rawEndPos then
|
||
(s.getUTF8Byte p h).IsUTF8FirstByte
|
||
else
|
||
p = s.rawEndPos
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValidForSlice_eq_true_iff {s : Slice} {p : Pos.Raw} :
|
||
p.isValidForSlice s = true ↔ p.IsValidForSlice s := by
|
||
rw [isValidForSlice_iff_isUTF8FirstByte]
|
||
fun_cases isValidForSlice with
|
||
| case1 h =>
|
||
simp_all only [decide_eq_true_eq, exists_true_left, iff_or_self]
|
||
rintro rfl
|
||
simp [lt_iff] at h
|
||
| case2 => simp_all
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValidForSlice_eq_false_iff {s : Slice} {p : Pos.Raw} :
|
||
p.isValidForSlice s = false ↔ ¬ p.IsValidForSlice s := by
|
||
rw [← Bool.not_eq_true, isValidForSlice_eq_true_iff]
|
||
|
||
instance {s : Slice} {p : Pos.Raw} : Decidable (p.IsValidForSlice s) :=
|
||
decidable_of_iff _ Pos.Raw.isValidForSlice_eq_true_iff
|
||
|
||
theorem Pos.Raw.isValidForSlice_iff_isSome_utf8DecodeChar?_copy {s : Slice} {p : Pos.Raw} :
|
||
p.IsValidForSlice s ↔ p = s.rawEndPos ∨ (s.copy.toByteArray.utf8DecodeChar? p.byteIdx).isSome := by
|
||
rw [← isValid_copy_iff, isValid_iff_isSome_utf8DecodeChar?, Slice.rawEndPos_copy]
|
||
|
||
theorem Slice.toByteArray_str_eq {s : Slice} :
|
||
s.str.toByteArray = s.str.toByteArray.extract 0 s.startInclusive.offset.byteIdx ++
|
||
s.copy.toByteArray ++ s.str.toByteArray.extract s.endExclusive.offset.byteIdx s.str.toByteArray.size := by
|
||
rw [toByteArray_copy, ← ByteArray.extract_eq_extract_append_extract, ← ByteArray.extract_eq_extract_append_extract,
|
||
ByteArray.extract_zero_size]
|
||
· simp
|
||
· simpa [Pos.Raw.le_iff] using s.endExclusive.isValid.le_rawEndPos
|
||
· simp
|
||
· simpa [Pos.Raw.le_iff] using s.startInclusive_le_endExclusive
|
||
|
||
theorem Pos.Raw.isValidForSlice_iff_isSome_utf8DecodeChar? {s : Slice} {p : Pos.Raw} :
|
||
p.IsValidForSlice s ↔ p = s.rawEndPos ∨ (p < s.rawEndPos ∧ (s.str.toByteArray.utf8DecodeChar? (s.startInclusive.offset.byteIdx + p.byteIdx)).isSome) := by
|
||
refine ⟨?_, ?_⟩
|
||
· rw [isValidForSlice_iff_isSome_utf8DecodeChar?_copy]
|
||
rintro (rfl|h)
|
||
· simp
|
||
· refine Or.inr ⟨?_, ?_⟩
|
||
· have := ByteArray.lt_size_of_isSome_utf8DecodeChar? h
|
||
simpa [Pos.Raw.lt_iff] using this
|
||
· rw [ByteArray.utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h
|
||
rw [Slice.toByteArray_str_eq, ByteArray.append_assoc, ByteArray.utf8DecodeChar?_eq_utf8DecodeChar?_extract]
|
||
simp only [ByteArray.size_append, ByteArray.size_extract, Nat.sub_zero, Nat.le_refl,
|
||
Nat.min_eq_left]
|
||
have h' : s.startInclusive.offset.byteIdx ≤ s.str.toByteArray.size := by
|
||
simpa [le_iff] using s.startInclusive.isValid.le_rawEndPos
|
||
rw [Nat.min_eq_left h', ByteArray.extract_append_size_add' (by simp [size_toByteArray ▸ h']),
|
||
ByteArray.extract_append, Nat.add_sub_cancel_left]
|
||
rw [ByteArray.extract_eq_extract_append_extract s.copy.toByteArray.size]
|
||
· rw [ByteArray.append_assoc]
|
||
apply ByteArray.isSome_utf8DecodeChar?_append h
|
||
· have := ByteArray.lt_size_of_isSome_utf8DecodeChar? h
|
||
simp only [size_toByteArray, Slice.utf8ByteSize_copy, ByteArray.size_extract, Nat.le_refl,
|
||
Nat.min_eq_left] at this
|
||
simp only [size_toByteArray, Slice.utf8ByteSize_copy, ge_iff_le]
|
||
omega
|
||
· simp
|
||
· rw [isValidForSlice_iff_isUTF8FirstByte]
|
||
rintro (rfl|⟨h₁, h₂⟩)
|
||
· simp
|
||
· exact Or.inr ⟨h₁, ByteArray.isUTF8FirstByte_of_isSome_utf8DecodeChar? h₂⟩
|
||
|
||
theorem Slice.Pos.isUTF8FirstByte_byte {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
|
||
(pos.byte h).IsUTF8FirstByte :=
|
||
((Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.1 pos.isValidForSlice).elim (fun h' => (h (Pos.ext h')).elim) (·.2))
|
||
|
||
/-- Given a valid position on a slice `s`, obtains the corresponding valid position on the
|
||
underlying string `s.str`. -/
|
||
@[inline]
|
||
def Slice.Pos.str {s : Slice} (pos : s.Pos) : s.str.Pos where
|
||
offset := pos.offset.offsetBy s.startInclusive.offset
|
||
isValid := pos.isValidForSlice.isValid_offsetBy
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_str {s : Slice} {pos : s.Pos} :
|
||
pos.str.offset = pos.offset.offsetBy s.startInclusive.offset := (rfl)
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_str_le_offset_endExclusive {s : Slice} {pos : s.Pos} :
|
||
pos.str.offset ≤ s.endExclusive.offset := by
|
||
have := pos.isValidForSlice.le_rawEndPos
|
||
have := s.startInclusive_le_endExclusive
|
||
simp only [Pos.Raw.le_iff, byteIdx_rawEndPos, utf8ByteSize_eq, offset_str,
|
||
Pos.Raw.byteIdx_offsetBy, String.Pos.le_iff] at *
|
||
omega
|
||
|
||
theorem Slice.Pos.offset_le_offset_str {s : Slice} {pos : s.Pos} :
|
||
pos.offset ≤ pos.str.offset := by
|
||
simp [String.Pos.Raw.le_iff]
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_le_offset_endExclusive {s : Slice} {pos : s.Pos} :
|
||
pos.offset ≤ s.endExclusive.offset :=
|
||
Pos.Raw.le_trans offset_le_offset_str offset_str_le_offset_endExclusive
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.startInclusive_le_str {s : Slice} {pos : s.Pos} :
|
||
s.startInclusive ≤ pos.str := by
|
||
simp [String.Pos.le_iff, Pos.Raw.le_iff]
|
||
|
||
/--
|
||
Given a valid position on `s.str` which is within the bounds of the slice `s`, obtains the
|
||
corresponding valid position on `s`.
|
||
-/
|
||
@[inline]
|
||
def Slice.Pos.ofStr {s : Slice} (pos : s.str.Pos) (h₁ : s.startInclusive ≤ pos)
|
||
(h₂ : pos ≤ s.endExclusive) : s.Pos where
|
||
offset := pos.offset.unoffsetBy s.startInclusive.offset
|
||
isValidForSlice := by
|
||
refine ⟨?_, Pos.Raw.offsetBy_unoffsetBy_of_le h₁ |>.symm ▸ pos.isValid⟩
|
||
simp [String.Pos.le_iff, Pos.Raw.le_iff, Slice.utf8ByteSize_eq] at *
|
||
omega
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_ofStr {s : Slice} {pos : s.str.Pos} {h₁ h₂} :
|
||
(ofStr pos h₁ h₂).offset = pos.offset.unoffsetBy s.startInclusive.offset := (rfl)
|
||
|
||
/-- Given a slice and a valid position within the slice, obtain a new slice on the same underlying
|
||
string by replacing the start of the slice with the given position. -/
|
||
@[inline, expose] -- for the defeq `(s.sliceFrom pos).str = s.str`
|
||
def Slice.sliceFrom (s : Slice) (pos : s.Pos) : Slice where
|
||
str := s.str
|
||
startInclusive := pos.str
|
||
endExclusive := s.endExclusive
|
||
startInclusive_le_endExclusive := Pos.offset_str_le_offset_endExclusive
|
||
|
||
@[deprecated Slice.sliceFrom (since := "2025-11-20")]
|
||
def Slice.replaceStart (s : Slice) (pos : s.Pos) : Slice :=
|
||
s.sliceFrom pos
|
||
|
||
@[simp]
|
||
theorem Slice.str_sliceFrom {s : Slice} {pos : s.Pos} :
|
||
(s.sliceFrom pos).str = s.str := rfl
|
||
|
||
@[simp]
|
||
theorem Slice.startInclusive_sliceFrom {s : Slice} {pos : s.Pos} :
|
||
(s.sliceFrom pos).startInclusive = pos.str := rfl
|
||
|
||
@[simp]
|
||
theorem Slice.endExclusive_sliceFrom {s : Slice} {pos : s.Pos} :
|
||
(s.sliceFrom pos).endExclusive = s.endExclusive := rfl
|
||
|
||
/-- Given a slice and a valid position within the slice, obtain a new slice on the same underlying
|
||
string by replacing the end of the slice with the given position. -/
|
||
@[inline, expose] -- for the defeq `(s.sliceTo pos).str = s.str`
|
||
def Slice.sliceTo (s : Slice) (pos : s.Pos) : Slice where
|
||
str := s.str
|
||
startInclusive := s.startInclusive
|
||
endExclusive := pos.str
|
||
startInclusive_le_endExclusive := by simp [String.Pos.le_iff, String.Pos.Raw.le_iff]
|
||
|
||
@[deprecated Slice.sliceTo (since := "2025-11-20")]
|
||
def Slice.replaceEnd (s : Slice) (pos : s.Pos) : Slice :=
|
||
s.sliceTo pos
|
||
|
||
@[simp]
|
||
theorem Slice.str_sliceTo {s : Slice} {pos : s.Pos} :
|
||
(s.sliceTo pos).str = s.str := rfl
|
||
|
||
@[simp]
|
||
theorem Slice.startInclusive_sliceTo {s : Slice} {pos : s.Pos} :
|
||
(s.sliceTo pos).startInclusive = s.startInclusive := rfl
|
||
|
||
@[simp]
|
||
theorem Slice.endExclusive_sliceTo {s : Slice} {pos : s.Pos} :
|
||
(s.sliceTo pos).endExclusive = pos.str := rfl
|
||
|
||
/-- Given a slice and two valid positions within the slice, obtain a new slice on the same underlying
|
||
string formed by the new bounds. -/
|
||
@[inline, expose] -- for the defeq `(s.slice newStart newEnd).str = s.str`
|
||
def Slice.slice (s : Slice) (newStart newEnd : s.Pos)
|
||
(h : newStart ≤ newEnd) : Slice where
|
||
str := s.str
|
||
startInclusive := newStart.str
|
||
endExclusive := newEnd.str
|
||
startInclusive_le_endExclusive := by simpa [String.Pos.le_iff, Pos.Raw.le_iff] using h
|
||
|
||
@[deprecated Slice.slice (since := "2025-11-20")]
|
||
def Slice.replaceStartEnd (s : Slice) (newStart newEnd : s.Pos) (h : newStart ≤ newEnd) : Slice :=
|
||
s.slice newStart newEnd h
|
||
|
||
@[simp]
|
||
theorem Slice.str_slice {s : Slice} {newStart newEnd : s.Pos} {h} :
|
||
(s.slice newStart newEnd h).str = s.str := rfl
|
||
|
||
@[simp]
|
||
theorem Slice.startInclusive_slice {s : Slice} {newStart newEnd : s.Pos} {h} :
|
||
(s.slice newStart newEnd h).startInclusive = newStart.str := rfl
|
||
|
||
@[simp]
|
||
theorem Slice.endExclusive_slice {s : Slice} {newStart newEnd : s.Pos} {h} :
|
||
(s.slice newStart newEnd h).endExclusive = newEnd.str := rfl
|
||
|
||
/-- Given a slice and two valid positions within the slice, obtain a new slice on the same underlying
|
||
string formed by the new bounds, or `none` if the given end is strictly less than the given start. -/
|
||
def Slice.slice? (s : Slice) (newStart newEnd : s.Pos) : Option Slice :=
|
||
if h : newStart.offset ≤ newEnd.offset then
|
||
some (s.slice newStart newEnd h)
|
||
else
|
||
none
|
||
|
||
/-- Given a slice and two valid positions within the slice, obtain a new slice on the same underlying
|
||
string formed by the new bounds, or panic if the given end is strictly less than the given start. -/
|
||
def Slice.slice! (s : Slice) (newStart newEnd : s.Pos) : Slice :=
|
||
if h : newStart.offset ≤ newEnd.offset then
|
||
s.slice newStart newEnd h
|
||
else
|
||
panic! "Starting position must be less than or equal to end position."
|
||
|
||
@[deprecated Slice.slice! (since := "2025-11-20")]
|
||
def Slice.replaceStartEnd! (s : Slice) (newStart newEnd : s.Pos) : Slice :=
|
||
s.slice! newStart newEnd
|
||
|
||
@[simp]
|
||
theorem Slice.utf8ByteSize_sliceFrom {s : Slice} {pos : s.Pos} :
|
||
(s.sliceFrom pos).utf8ByteSize = s.utf8ByteSize - pos.offset.byteIdx := by
|
||
simp only [utf8ByteSize_eq, str_sliceFrom, endExclusive_sliceFrom,
|
||
startInclusive_sliceFrom, Pos.offset_str, Pos.Raw.byteIdx_offsetBy]
|
||
omega
|
||
|
||
theorem Slice.rawEndPos_sliceFrom {s : Slice} {pos : s.Pos} :
|
||
(s.sliceFrom pos).rawEndPos = s.rawEndPos.unoffsetBy pos.offset := by
|
||
ext
|
||
simp
|
||
|
||
@[simp]
|
||
theorem Slice.utf8ByteSize_sliceTo {s : Slice} {pos : s.Pos} :
|
||
(s.sliceTo pos).utf8ByteSize = pos.offset.byteIdx := by
|
||
simp [utf8ByteSize_eq]
|
||
|
||
@[simp]
|
||
theorem Slice.rawEndPos_sliceTo {s : Slice} {pos : s.Pos} :
|
||
(s.sliceTo pos).rawEndPos = pos.offset := by
|
||
ext
|
||
simp
|
||
|
||
@[simp]
|
||
theorem Slice.utf8ByteSize_slice {s : Slice} {newStart newEnd : s.Pos} {h} :
|
||
(s.slice newStart newEnd h).utf8ByteSize = newStart.offset.byteDistance newEnd.offset := by
|
||
simp [utf8ByteSize_eq, Pos.Raw.byteDistance_eq]
|
||
omega
|
||
|
||
theorem Pos.Raw.isValidForSlice_sliceFrom {s : Slice} {p : s.Pos} {off : Pos.Raw} :
|
||
off.IsValidForSlice (s.sliceFrom p) ↔ (off.offsetBy p.offset).IsValidForSlice s := by
|
||
refine ⟨fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩, fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩⟩
|
||
· have := p.isValidForSlice.le_rawEndPos
|
||
simp_all [le_iff]
|
||
omega
|
||
· simpa [Pos.Raw.offsetBy_assoc] using h₂
|
||
· simp_all [Pos.Raw.le_iff]
|
||
omega
|
||
· simpa [Pos.Raw.offsetBy_assoc] using h₂
|
||
|
||
theorem Pos.Raw.isValidForSlice_sliceTo {s : Slice} {p : s.Pos} {off : Pos.Raw} :
|
||
off.IsValidForSlice (s.sliceTo p) ↔ off ≤ p.offset ∧ off.IsValidForSlice s := by
|
||
refine ⟨fun ⟨h₁, h₂⟩ => ⟨?_, ?_, ?_⟩, fun ⟨h₁, ⟨h₂, h₃⟩⟩ => ⟨?_, ?_⟩⟩
|
||
· simpa using h₁
|
||
· simp only [Slice.rawEndPos_sliceTo] at h₁
|
||
exact Pos.Raw.le_trans h₁ p.isValidForSlice.le_rawEndPos
|
||
· simpa using h₂
|
||
· simpa using h₁
|
||
· simpa using h₃
|
||
|
||
@[extern "lean_string_utf8_get_fast", expose]
|
||
def decodeChar (s : @& String) (byteIdx : @& Nat) (h : (s.toByteArray.utf8DecodeChar? byteIdx).isSome) : Char :=
|
||
s.toByteArray.utf8DecodeChar byteIdx h
|
||
|
||
/-- Obtains the character at the given position in the string. -/
|
||
@[inline, expose]
|
||
def Slice.Pos.get {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : Char :=
|
||
s.str.decodeChar (s.startInclusive.offset.byteIdx + pos.offset.byteIdx)
|
||
((Pos.Raw.isValidForSlice_iff_isSome_utf8DecodeChar?.1 pos.isValidForSlice).elim (by simp_all [Pos.ext_iff]) (·.2))
|
||
|
||
theorem Slice.Pos.get_eq_utf8DecodeChar {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) :
|
||
pos.get h = s.str.toByteArray.utf8DecodeChar (s.startInclusive.offset.byteIdx + pos.offset.byteIdx)
|
||
((Pos.Raw.isValidForSlice_iff_isSome_utf8DecodeChar?.1 pos.isValidForSlice).elim (by simp_all [Pos.ext_iff]) (·.2)) := (rfl)
|
||
|
||
theorem Slice.Pos.utf8Encode_get_eq_extract {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) :
|
||
List.utf8Encode [pos.get h] = s.str.toByteArray.extract (s.startInclusive.offset.byteIdx + pos.offset.byteIdx)
|
||
(s.startInclusive.offset.byteIdx + pos.offset.byteIdx + (pos.get h).utf8Size) := by
|
||
rw [get_eq_utf8DecodeChar pos h, List.utf8Encode_singleton, ByteArray.utf8EncodeChar_utf8DecodeChar]
|
||
|
||
/-- Returns the byte at the given position in the string, or `none` if the position is the end
|
||
position. -/
|
||
@[expose]
|
||
def Slice.Pos.get? {s : Slice} (pos : s.Pos) : Option Char :=
|
||
if h : pos = s.endPos then none else some (pos.get h)
|
||
|
||
/-- Returns the byte at the given position in the string, or panicks if the position is the end
|
||
position. -/
|
||
@[expose]
|
||
def Slice.Pos.get! {s : Slice} (pos : s.Pos) : Char :=
|
||
if h : pos = s.endPos then panic! "Cannot retrieve character at end position" else pos.get h
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValidForSlice_toSlice_iff {s : String} {p : Pos.Raw} :
|
||
p.IsValidForSlice s.toSlice ↔ p.IsValid s := by
|
||
rw [← isValid_copy_iff, copy_toSlice]
|
||
|
||
theorem Pos.Raw.IsValid.toSlice {s : String} {p : Pos.Raw} (h : p.IsValid s) :
|
||
p.IsValidForSlice s.toSlice :=
|
||
isValidForSlice_toSlice_iff.2 h
|
||
|
||
theorem Pos.Raw.IsValidForSlice.ofSlice {s : String} {p : Pos.Raw} (h : p.IsValidForSlice s.toSlice) :
|
||
p.IsValid s :=
|
||
isValidForSlice_toSlice_iff.1 h
|
||
|
||
/-- Turns a valid position on the string `s` into a valid position on the slice `s.toSlice`. -/
|
||
@[inline, expose]
|
||
def Pos.toSlice {s : String} (pos : s.Pos) : s.toSlice.Pos where
|
||
offset := pos.offset
|
||
isValidForSlice := pos.isValid.toSlice
|
||
|
||
@[simp]
|
||
theorem Pos.offset_toSlice {s : String} {pos : s.Pos} : pos.toSlice.offset = pos.offset := (rfl)
|
||
|
||
/-- Given a string `s`, turns a valid position on the slice `s.toSlice` into a valid position on the
|
||
string `s`. -/
|
||
@[inline, expose]
|
||
def Slice.Pos.ofSlice {s : String} (pos : s.toSlice.Pos) : s.Pos where
|
||
offset := pos.offset
|
||
isValid := pos.isValidForSlice.ofSlice
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_ofSlice {s : String} {pos : s.toSlice.Pos} : pos.ofSlice.offset = pos.offset := (rfl)
|
||
|
||
@[simp]
|
||
theorem rawEndPos_toSlice {s : String} : s.toSlice.rawEndPos = s.rawEndPos := by
|
||
rw [← Slice.rawEndPos_copy, copy_toSlice]
|
||
|
||
@[simp]
|
||
theorem endPos_toSlice {s : String} : s.toSlice.endPos = s.endPos.toSlice :=
|
||
Slice.Pos.ext (by simp)
|
||
|
||
@[simp]
|
||
theorem startPos_toSlice {s : String} : s.toSlice.startPos = s.startPos.toSlice :=
|
||
Slice.Pos.ext (by simp)
|
||
|
||
@[simp]
|
||
theorem Pos.ofSlice_toSlice {s : String} (pos : s.Pos) : pos.toSlice.ofSlice = pos :=
|
||
Pos.ext (by simp)
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.toSlice_ofSlice {s : String} (pos : s.toSlice.Pos) : pos.ofSlice.toSlice = pos :=
|
||
Slice.Pos.ext (by simp)
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.toSlice_comp_ofSlice {s : String} :
|
||
Pos.toSlice ∘ (ofSlice (s := s)) = id := by ext; simp
|
||
|
||
@[simp]
|
||
theorem Pos.ofSlice_comp_toSlice {s : String} :
|
||
Slice.Pos.ofSlice ∘ (toSlice (s := s)) = id := by ext; simp
|
||
|
||
theorem Pos.toSlice_inj {s : String} {p q : s.Pos} : p.toSlice = q.toSlice ↔ p = q :=
|
||
⟨fun h => by simpa using congrArg Slice.Pos.ofSlice h, (· ▸ rfl)⟩
|
||
|
||
theorem Slice.Pos.ofSlice_inj {s : String} {p q : s.toSlice.Pos} : p.ofSlice = q.ofSlice ↔ p = q :=
|
||
⟨fun h => by simpa using congrArg Pos.toSlice h, (· ▸ rfl)⟩
|
||
|
||
@[simp]
|
||
theorem Pos.toSlice_le {s : String} {p q : s.Pos} : p.toSlice ≤ q.toSlice ↔ p ≤ q := by
|
||
simp [le_iff, Slice.Pos.le_iff]
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.ofSlice_le {s : String} {p q : s.toSlice.Pos} :
|
||
p.ofSlice ≤ q.ofSlice ↔ p ≤ q := by
|
||
simp [String.Pos.le_iff, le_iff]
|
||
|
||
@[simp]
|
||
theorem Pos.toSlice_lt {s : String} {p q : s.Pos} : p.toSlice < q.toSlice ↔ p < q := by
|
||
simp [lt_iff, Slice.Pos.lt_iff]
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.ofSlice_lt {s : String} {p q : s.toSlice.Pos} :
|
||
p.ofSlice < q.ofSlice ↔ p < q := by
|
||
simp [String.Pos.lt_iff, lt_iff]
|
||
|
||
/--
|
||
Returns the character at the position `pos` of a string, taking a proof that `p` is not the
|
||
past-the-end position.
|
||
|
||
This function is overridden with an efficient implementation in runtime code.
|
||
|
||
Examples:
|
||
* `("abc".pos ⟨1⟩ (by decide)).get (by decide) = 'b'`
|
||
* `("L∃∀N".pos ⟨1⟩ (by decide)).get (by decide) = '∃'`
|
||
-/
|
||
@[inline, expose]
|
||
def Pos.get {s : String} (pos : s.Pos) (h : pos ≠ s.endPos) : Char :=
|
||
pos.toSlice.get (ne_of_apply_ne Slice.Pos.ofSlice (by simp [h]))
|
||
|
||
/--
|
||
Returns the character at the position `pos` of a string, or `none` if the position is the
|
||
past-the-end position.
|
||
|
||
This function is overridden with an efficient implementation in runtime code.
|
||
-/
|
||
@[inline, expose]
|
||
def Pos.get? {s : String} (pos : s.Pos) : Option Char :=
|
||
pos.toSlice.get?
|
||
|
||
/--
|
||
Returns the character at the position `pos` of a string, or panics if the position is the
|
||
past-the-end position.
|
||
|
||
This function is overridden with an efficient implementation in runtime code.
|
||
-/
|
||
@[inline, expose]
|
||
def Pos.get! {s : String} (pos : s.Pos) : Char :=
|
||
pos.toSlice.get!
|
||
|
||
/--
|
||
Returns the byte at the position `pos` of a string.
|
||
-/
|
||
@[inline, expose]
|
||
def Pos.byte {s : String} (pos : s.Pos) (h : pos ≠ s.endPos) : UInt8 :=
|
||
pos.toSlice.byte (ne_of_apply_ne Slice.Pos.ofSlice (by simp [h]))
|
||
|
||
theorem Pos.isUTF8FirstByte_byte {s : String} {pos : s.Pos} {h : pos ≠ s.endPos} :
|
||
(pos.byte h).IsUTF8FirstByte :=
|
||
Slice.Pos.isUTF8FirstByte_byte
|
||
|
||
@[simp]
|
||
theorem startPos_eq_endPos_iff {b : String} : b.startPos = b.endPos ↔ b = "" := by
|
||
simp [← utf8ByteSize_eq_zero_iff, Pos.ext_iff, Eq.comm (b := b.rawEndPos)]
|
||
|
||
theorem isSome_utf8DecodeChar?_zero {b : String} (hb : b ≠ "") : (b.toByteArray.utf8DecodeChar? 0).isSome := by
|
||
refine (((Pos.Raw.isValid_iff_isSome_utf8DecodeChar? (s := b)).1 Pos.Raw.isValid_zero).elim ?_ id)
|
||
rw [eq_comm, rawEndPos_eq_zero_iff]
|
||
exact fun h => (hb h).elim
|
||
|
||
theorem head_toList {b : String} {h} :
|
||
b.toList.head h = b.toByteArray.utf8DecodeChar 0 (isSome_utf8DecodeChar?_zero (by simpa using h)) := by
|
||
obtain ⟨l, rfl⟩ := b.exists_eq_ofList
|
||
match l with
|
||
| [] => simp at h
|
||
| c::cs => simp
|
||
|
||
@[deprecated head_toList (since := "2025-10-30")]
|
||
theorem head_data {b : String} {h} :
|
||
b.toList.head h = b.toByteArray.utf8DecodeChar 0 (isSome_utf8DecodeChar?_zero (by simpa using h)) :=
|
||
head_toList
|
||
|
||
theorem get_startPos {b : String} (h) :
|
||
b.startPos.get h = b.toList.head (by rwa [ne_eq, toList_eq_nil_iff, ← startPos_eq_endPos_iff]) :=
|
||
head_toList.symm
|
||
|
||
theorem eq_singleton_append {s : String} (h : s.startPos ≠ s.endPos) :
|
||
∃ t, s = singleton (s.startPos.get h) ++ t := by
|
||
obtain ⟨m, rfl⟩ := s.exists_eq_ofList
|
||
have hm : m ≠ [] := by
|
||
rwa [ne_eq, ← String.ofList_eq_empty_iff, ← startPos_eq_endPos_iff]
|
||
refine ⟨ofList m.tail, ?_⟩
|
||
rw (occs := [1]) [← List.cons_head_tail hm]
|
||
rw [← List.singleton_append, String.ofList_append, append_left_inj, ← singleton_eq_ofList,
|
||
get_startPos]
|
||
simp
|
||
|
||
theorem Slice.copy_eq_copy_sliceTo {s : Slice} {pos : s.Pos} :
|
||
s.copy = (s.sliceTo pos).copy ++ (s.sliceFrom pos).copy := by
|
||
rw [← String.toByteArray_inj, toByteArray_copy, toByteArray_append, toByteArray_copy, toByteArray_copy]
|
||
simp only [str_sliceTo, startInclusive_sliceTo, endExclusive_sliceTo, Pos.offset_str,
|
||
Pos.Raw.byteIdx_offsetBy, str_sliceFrom, startInclusive_sliceFrom,
|
||
endExclusive_sliceFrom, ByteArray.extract_append_extract, Nat.le_add_right, Nat.min_eq_left]
|
||
rw [Nat.max_eq_right]
|
||
exact pos.offset_str_le_offset_endExclusive
|
||
|
||
/-- Given a slice `s` and a position on `s.copy`, obtain the corresponding position on `s`. -/
|
||
@[inline]
|
||
def Pos.ofCopy {s : Slice} (pos : s.copy.Pos) : s.Pos where
|
||
offset := pos.offset
|
||
isValidForSlice := Pos.Raw.isValid_copy_iff.1 pos.isValid
|
||
|
||
@[simp]
|
||
theorem Pos.offset_ofCopy {s : Slice} {pos : s.copy.Pos} : pos.ofCopy.offset = pos.offset := (rfl)
|
||
|
||
/-- Given a slice `s` and a position on `s`, obtain the corresponding position on `s.copy.` -/
|
||
@[inline]
|
||
def Slice.Pos.toCopy {s : Slice} (pos : s.Pos) : s.copy.Pos where
|
||
offset := pos.offset
|
||
isValid := Pos.Raw.isValid_copy_iff.2 pos.isValidForSlice
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_toCopy {s : Slice} {pos : s.Pos} : pos.toCopy.offset = pos.offset := (rfl)
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.ofCopy_toCopy {s : Slice} {pos : s.Pos} : pos.toCopy.ofCopy = pos :=
|
||
Slice.Pos.ext (by simp)
|
||
|
||
@[simp]
|
||
theorem Pos.toCopy_ofCopy {s : Slice} {pos : s.copy.Pos} : pos.ofCopy.toCopy = pos :=
|
||
Pos.ext (by simp)
|
||
|
||
theorem Pos.ofCopy_inj {s : Slice} {pos pos' : s.copy.Pos} : pos.ofCopy = pos'.ofCopy ↔ pos = pos' :=
|
||
⟨fun h => by simpa using congrArg Slice.Pos.toCopy h, (· ▸ rfl)⟩
|
||
|
||
@[simp]
|
||
theorem Slice.startPos_copy {s : Slice} : s.copy.startPos = s.startPos.toCopy :=
|
||
String.Pos.ext (by simp)
|
||
|
||
@[simp]
|
||
theorem Slice.endPos_copy {s : Slice} : s.copy.endPos = s.endPos.toCopy :=
|
||
String.Pos.ext (by simp)
|
||
|
||
theorem Slice.Pos.get_toCopy {s : Slice} {pos : s.Pos} (h) :
|
||
pos.toCopy.get h = pos.get (by rintro rfl; simp at h) := by
|
||
rw [String.Pos.get, Slice.Pos.get_eq_utf8DecodeChar, Slice.Pos.get_eq_utf8DecodeChar]
|
||
simp only [str_toSlice, toByteArray_copy, startInclusive_toSlice, startPos_copy, offset_toCopy,
|
||
Slice.offset_startPos, Pos.Raw.byteIdx_zero, Pos.offset_toSlice, Nat.zero_add]
|
||
rw [ByteArray.utf8DecodeChar_eq_utf8DecodeChar_extract]
|
||
conv => lhs; congr; rw [ByteArray.extract_extract]
|
||
conv => rhs; rw [ByteArray.utf8DecodeChar_eq_utf8DecodeChar_extract]
|
||
exact ByteArray.utf8DecodeChar_extract_congr _ _ _
|
||
|
||
theorem Slice.Pos.get_eq_get_toCopy {s : Slice} {pos : s.Pos} {h} :
|
||
pos.get h = pos.toCopy.get (ne_of_apply_ne Pos.ofCopy (by simp [h])) :=
|
||
(get_toCopy _).symm
|
||
|
||
theorem Slice.Pos.byte_toCopy {s : Slice} {pos : s.Pos} (h) :
|
||
pos.toCopy.byte h = pos.byte (by rintro rfl; simp at h) := by
|
||
rw [String.Pos.byte, Slice.Pos.byte, Slice.Pos.byte]
|
||
simp [getUTF8Byte, String.getUTF8Byte, toByteArray_copy, ByteArray.getElem_extract]
|
||
|
||
theorem Slice.Pos.byte_eq_byte_toCopy {s : Slice} {pos : s.Pos} {h} :
|
||
pos.byte h = pos.toCopy.byte (ne_of_apply_ne Pos.ofCopy (by simp [h])) :=
|
||
(byte_toCopy _).symm
|
||
|
||
/-- Given a position in `s.sliceFrom p₀`, obtain the corresponding position in `s`. -/
|
||
@[inline]
|
||
def Slice.Pos.ofSliceFrom {s : Slice} {p₀ : s.Pos} (pos : (s.sliceFrom p₀).Pos) : s.Pos where
|
||
offset := pos.offset.offsetBy p₀.offset
|
||
isValidForSlice := Pos.Raw.isValidForSlice_sliceFrom.1 pos.isValidForSlice
|
||
|
||
@[deprecated Slice.Pos.ofSliceFrom (since := "2025-11-20")]
|
||
def Slice.Pos.ofReplaceStart {s : Slice} {p₀ : s.Pos} (pos : (s.sliceFrom p₀).Pos) : s.Pos :=
|
||
ofSliceFrom pos
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_ofSliceFrom {s : Slice} {p₀ : s.Pos} {pos : (s.sliceFrom p₀).Pos} :
|
||
(ofSliceFrom pos).offset = pos.offset.offsetBy p₀.offset := (rfl)
|
||
|
||
/-- Given a position in `s` that is at least `p₀`, obtain the corresponding position in
|
||
`s.sliceFrom p₀`. -/
|
||
@[inline]
|
||
def Slice.Pos.sliceFrom {s : Slice} (p₀ : s.Pos) (pos : s.Pos) (h : p₀ ≤ pos) :
|
||
(s.sliceFrom p₀).Pos where
|
||
offset := pos.offset.unoffsetBy p₀.offset
|
||
isValidForSlice := Pos.Raw.isValidForSlice_sliceFrom.2 (by
|
||
simpa [Pos.Raw.offsetBy_unoffsetBy_of_le (Pos.Raw.le_iff.1 h)] using pos.isValidForSlice)
|
||
|
||
@[deprecated Slice.Pos.sliceFrom (since := "2025-11-20")]
|
||
def Slice.Pos.toReplaceStart {s : Slice} (p₀ : s.Pos) (pos : s.Pos) (h : p₀ ≤ pos) :
|
||
(s.sliceFrom p₀).Pos :=
|
||
sliceFrom p₀ pos h
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_sliceFrom {s : Slice} {p₀ : s.Pos} {pos : s.Pos} {h} :
|
||
(sliceFrom p₀ pos h).offset = pos.offset.unoffsetBy p₀.offset := (rfl)
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.ofSliceFrom_startPos {s : Slice} {pos : s.Pos} :
|
||
ofSliceFrom (s.sliceFrom pos).startPos = pos :=
|
||
Slice.Pos.ext (by simp)
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.ofSliceFrom_endPos {s : Slice} {pos : s.Pos} :
|
||
ofSliceFrom (s.sliceFrom pos).endPos = s.endPos := by
|
||
have := pos.isValidForSlice.le_rawEndPos
|
||
simp_all [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff]
|
||
|
||
theorem Slice.Pos.ofSliceFrom_inj {s : Slice} {p₀ : s.Pos} {pos pos' : (s.sliceFrom p₀).Pos} :
|
||
ofSliceFrom pos = ofSliceFrom pos' ↔ pos = pos' := by
|
||
simp [Pos.ext_iff, String.Pos.Raw.ext_iff]
|
||
|
||
theorem Slice.Pos.get_eq_get_ofSliceFrom {s : Slice} {p₀ : s.Pos} {pos : (s.sliceFrom p₀).Pos} {h} :
|
||
pos.get h = (ofSliceFrom pos).get (by rwa [← ofSliceFrom_endPos, ne_eq, ofSliceFrom_inj]) := by
|
||
simp [Slice.Pos.get, Nat.add_assoc]
|
||
|
||
/-- Given a position in `s.sliceTo p₀`, obtain the corresponding position in `s`. -/
|
||
@[inline]
|
||
def Slice.Pos.ofSliceTo {s : Slice} {p₀ : s.Pos} (pos : (s.sliceTo p₀).Pos) : s.Pos where
|
||
offset := pos.offset
|
||
isValidForSlice := (Pos.Raw.isValidForSlice_sliceTo.1 pos.isValidForSlice).2
|
||
|
||
@[deprecated Slice.Pos.ofSliceTo (since := "2025-11-20")]
|
||
def Slice.Pos.ofReplaceEnd {s : Slice} {p₀ : s.Pos} (pos : (s.sliceTo p₀).Pos) : s.Pos :=
|
||
ofSliceTo pos
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_ofSliceTo {s : Slice} {p₀ : s.Pos} {pos : (s.sliceTo p₀).Pos} :
|
||
(ofSliceTo pos).offset = pos.offset := (rfl)
|
||
|
||
/-- Given a position in `s` that is at most `p₀`, obtain the corresponding position in `s.sliceTo p₀`. -/
|
||
@[inline]
|
||
def Slice.Pos.sliceTo {s : Slice} (p₀ : s.Pos) (pos : s.Pos) (h : pos ≤ p₀) :
|
||
(s.sliceTo p₀).Pos where
|
||
offset := pos.offset
|
||
isValidForSlice := Pos.Raw.isValidForSlice_sliceTo.2 ⟨h, pos.isValidForSlice⟩
|
||
|
||
@[deprecated Slice.Pos.sliceTo (since := "2025-11-20")]
|
||
def Slice.Pos.toReplaceEnd {s : Slice} (p₀ : s.Pos) (pos : s.Pos) (h : pos ≤ p₀) :
|
||
(s.sliceTo p₀).Pos :=
|
||
sliceTo p₀ pos h
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_sliceTo {s : Slice} {p₀ : s.Pos} {pos : s.Pos} {h : pos ≤ p₀} :
|
||
(sliceTo p₀ pos h).offset = pos.offset := (rfl)
|
||
|
||
theorem Slice.Pos.copy_eq_append_get {s : Slice} {pos : s.Pos} (h : pos ≠ s.endPos) :
|
||
∃ t₁ t₂ : String, s.copy = t₁ ++ singleton (pos.get h) ++ t₂ ∧ t₁.utf8ByteSize = pos.offset.byteIdx := by
|
||
obtain ⟨t₂, ht₂⟩ := (s.sliceFrom pos).copy.eq_singleton_append (by simpa [← Pos.ofCopy_inj, ← ofSliceFrom_inj])
|
||
refine ⟨(s.sliceTo pos).copy, t₂, ?_, by simp⟩
|
||
simp only [Slice.startPos_copy, get_toCopy, get_eq_get_ofSliceFrom, ofSliceFrom_startPos] at ht₂
|
||
rw [append_assoc, ← ht₂, ← copy_eq_copy_sliceTo]
|
||
|
||
theorem Slice.Pos.utf8ByteSize_byte {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
|
||
(pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte = (pos.get h).utf8Size := by
|
||
simp [getUTF8Byte, byte, String.getUTF8Byte, get_eq_utf8DecodeChar, ByteArray.utf8Size_utf8DecodeChar]
|
||
|
||
theorem Pos.utf8ByteSize_byte {s : String} {pos : s.Pos} {h : pos ≠ s.endPos} :
|
||
(pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte = (pos.get h).utf8Size :=
|
||
Slice.Pos.utf8ByteSize_byte
|
||
|
||
/-- Advances a valid position on a slice to the next valid position, given a proof that the
|
||
position is not the past-the-end position, which guarantees that such a position exists. -/
|
||
@[expose]
|
||
def Slice.Pos.next {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : s.Pos where
|
||
offset := pos.offset.increaseBy ((pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte)
|
||
isValidForSlice := by
|
||
obtain ⟨t₁, t₂, ht, ht'⟩ := copy_eq_append_get h
|
||
replace ht' : pos.offset = t₁.rawEndPos := Eq.symm (String.Pos.Raw.ext ht')
|
||
rw [utf8ByteSize_byte, ← Pos.Raw.isValid_copy_iff, ht, ht']
|
||
refine Pos.Raw.IsValid.append_right ?_ t₂
|
||
rw [Pos.Raw.increaseBy_charUtf8Size]
|
||
refine Pos.Raw.IsValid.append_left ?_ t₁
|
||
exact Pos.Raw.isValid_singleton.2 (Or.inr rfl)
|
||
|
||
/-- Advances a valid position on a slice to the next valid position, or returns `none` if the
|
||
given position is the past-the-end position. -/
|
||
@[expose]
|
||
def Slice.Pos.next? {s : Slice} (pos : s.Pos) : Option s.Pos :=
|
||
if h : pos = s.endPos then none else some (pos.next h)
|
||
|
||
/-- Advances a valid position on a slice to the next valid position, or panics if the given
|
||
position is the past-the-end position. -/
|
||
@[expose]
|
||
def Slice.Pos.next! {s : Slice} (pos : s.Pos) : s.Pos :=
|
||
if h : pos = s.endPos then panic! "Cannot advance the end position" else pos.next h
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_next {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
|
||
(pos.next h).offset = pos.offset + pos.get h := by
|
||
ext
|
||
simp [next, utf8ByteSize_byte]
|
||
|
||
theorem Slice.Pos.byteIdx_offset_next {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
|
||
(pos.next h).offset.byteIdx = pos.offset.byteIdx + (pos.get h).utf8Size := by
|
||
simp
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.lt_next {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
|
||
pos < pos.next h := by
|
||
simp [Pos.lt_iff, Pos.Raw.lt_iff, Char.utf8Size_pos]
|
||
|
||
theorem Slice.Pos.copy_eq_copy_sliceTo_append_get {s : Slice} {pos : s.Pos} (h : pos ≠ s.endPos) :
|
||
s.copy = (s.sliceTo pos).copy ++ singleton (pos.get h) ++ (s.sliceFrom (pos.next h)).copy := by
|
||
suffices (max (s.startInclusive.offset.byteIdx + (pos.offset.byteIdx + (pos.get h).utf8Size)) s.endExclusive.offset.byteIdx)
|
||
= s.endExclusive.offset.byteIdx by
|
||
simp [← toByteArray_inj, toByteArray_copy, utf8Encode_get_eq_extract, Nat.add_assoc, this]
|
||
rw [Nat.max_eq_right]
|
||
simpa [Pos.Raw.le_iff] using (pos.next h).offset_str_le_offset_endExclusive
|
||
|
||
@[inline, expose]
|
||
def Slice.Pos.prevAux {s : Slice} (pos : s.Pos) (h : pos ≠ s.startPos) : String.Pos.Raw :=
|
||
go (pos.offset.byteIdx - 1) (by
|
||
have := pos.isValidForSlice.le_rawEndPos
|
||
simp [Pos.Raw.le_iff, Pos.ext_iff] at ⊢ this h
|
||
omega)
|
||
where
|
||
go (off : Nat) (h₁ : off < s.utf8ByteSize) : String.Pos.Raw :=
|
||
if hbyte : (s.getUTF8Byte ⟨off⟩ h₁).IsUTF8FirstByte then
|
||
⟨off⟩
|
||
else
|
||
have : 0 ≠ off := by
|
||
intro h
|
||
obtain hoff : (⟨off⟩ : String.Pos.Raw) = 0 := by simpa [String.Pos.Raw.ext_iff] using h.symm
|
||
simp [hoff, s.isUTF8FirstByte_utf8ByteAt_zero] at hbyte
|
||
match off with
|
||
| 0 => False.elim (by contradiction)
|
||
| off + 1 => go off (by omega)
|
||
termination_by structural off
|
||
|
||
theorem Pos.Raw.isValidForSlice_prevAuxGo {s : Slice} (off : Nat) (h₁ : off < s.utf8ByteSize) :
|
||
(Slice.Pos.prevAux.go off h₁).IsValidForSlice s := by
|
||
induction off with
|
||
| zero =>
|
||
rw [Slice.Pos.prevAux.go]
|
||
split
|
||
· exact Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.2 (Or.inr ⟨_, ‹_›⟩)
|
||
· simpa using elim
|
||
| succ off ih =>
|
||
rw [Slice.Pos.prevAux.go]
|
||
split
|
||
· exact Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.2 (Or.inr ⟨_, ‹_›⟩)
|
||
· simpa using ih _
|
||
where
|
||
elim {P : Pos.Raw → Prop} {h : False} : P h.elim := h.elim
|
||
|
||
theorem Pos.Raw.isValidForSlice_prevAux {s : Slice} (pos : s.Pos) (h : pos ≠ s.startPos) :
|
||
(pos.prevAux h).IsValidForSlice s :=
|
||
isValidForSlice_prevAuxGo ..
|
||
|
||
/-- Returns the previous valid position before the given position, given a proof that the position
|
||
is not the start position, which guarantees that such a position exists. -/
|
||
@[inline, expose]
|
||
def Slice.Pos.prev {s : Slice} (pos : s.Pos) (h : pos ≠ s.startPos) : s.Pos where
|
||
offset := prevAux pos h
|
||
isValidForSlice := Pos.Raw.isValidForSlice_prevAux _ _
|
||
|
||
/-- Returns the previous valid position before the given position, or `none` if the position is
|
||
the start position. -/
|
||
@[expose]
|
||
def Slice.Pos.prev? {s : Slice} (pos : s.Pos) : Option s.Pos :=
|
||
if h : pos = s.startPos then none else some (pos.prev h)
|
||
|
||
/-- Returns the previous valid position before the given position, or panics if the position is
|
||
the start position. -/
|
||
@[expose]
|
||
def Slice.Pos.prev! {s : Slice} (pos : s.Pos) : s.Pos :=
|
||
if h : pos = s.startPos then panic! "The start position has no previous position" else pos.prev h
|
||
|
||
/-- Constructs a valid position on `s` from a position and a proof that it is valid. -/
|
||
@[inline, expose]
|
||
def Slice.pos (s : Slice) (off : String.Pos.Raw) (h : off.IsValidForSlice s) : s.Pos where
|
||
offset := off
|
||
isValidForSlice := h
|
||
|
||
@[simp]
|
||
theorem Slice.offset_pos {s : Slice} {off h} : (s.pos off h).offset = off := rfl
|
||
|
||
/-- Constructs a valid position on `s` from a position, returning `none` if the position is not valid. -/
|
||
@[expose]
|
||
def Slice.pos? (s : Slice) (off : String.Pos.Raw) : Option s.Pos :=
|
||
if h : off.isValidForSlice s then
|
||
some (s.pos off (Pos.Raw.isValidForSlice_eq_true_iff.1 h))
|
||
else
|
||
none
|
||
|
||
/-- Constructs a valid position `s` from a position, panicking if the position is not valid. -/
|
||
@[expose]
|
||
def Slice.pos! (s : Slice) (off : String.Pos.Raw) : s.Pos :=
|
||
if h : off.isValidForSlice s then
|
||
s.pos off (Pos.Raw.isValidForSlice_eq_true_iff.1 h)
|
||
else
|
||
panic! "Offset is not at a valid UTF-8 character boundary"
|
||
|
||
/-- Advances a valid position on a string to the next valid position, given a proof that the
|
||
position is not the past-the-end position, which guarantees that such a position exists. -/
|
||
@[expose, extern "lean_string_utf8_next_fast"]
|
||
def Pos.next {s : @& String} (pos : @& s.Pos) (h : pos ≠ s.endPos) : s.Pos :=
|
||
(Slice.Pos.next pos.toSlice (ne_of_apply_ne Slice.Pos.ofSlice (by simpa))).ofSlice
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.str_inj {s : Slice} (p₁ p₂ : s.Pos) : p₁.str = p₂.str ↔ p₁ = p₂ := by
|
||
simp [Slice.Pos.ext_iff, String.Pos.ext_iff, Pos.Raw.ext_iff]
|
||
|
||
@[expose, extern "lean_string_utf8_next_fast"]
|
||
def String.Pos.next {s : @& String} (pos : @& s.Pos) (h : pos ≠ s.endPos) : s.Pos :=
|
||
(Slice.Pos.next pos.toSlice (ne_of_apply_ne Slice.Pos.ofSlice (by simpa))).ofSlice
|
||
|
||
/-- Advances a valid position on a string to the next valid position, or returns `none` if the
|
||
given position is the past-the-end position. -/
|
||
@[inline, expose]
|
||
def Pos.next? {s : String} (pos : s.Pos) : Option s.Pos :=
|
||
pos.toSlice.next?.map Slice.Pos.ofSlice
|
||
|
||
/-- Advances a valid position on a string to the next valid position, or panics if the given
|
||
position is the past-the-end position. -/
|
||
@[inline, expose]
|
||
def Pos.next! {s : String} (pos : s.Pos) : s.Pos :=
|
||
pos.toSlice.next!.ofSlice
|
||
|
||
/-- Returns the previous valid position before the given position, given a proof that the position
|
||
is not the start position, which guarantees that such a position exists. -/
|
||
@[inline, expose]
|
||
def Pos.prev {s : String} (pos : s.Pos) (h : pos ≠ s.startPos) : s.Pos :=
|
||
(pos.toSlice.prev (ne_of_apply_ne Slice.Pos.ofSlice (by simpa))).ofSlice
|
||
|
||
/-- Returns the previous valid position before the given position, or `none` if the position is
|
||
the start position. -/
|
||
@[inline, expose]
|
||
def Pos.prev? {s : String} (pos : s.Pos) : Option s.Pos :=
|
||
pos.toSlice.prev?.map Slice.Pos.ofSlice
|
||
|
||
/-- Returns the previous valid position before the given position, or panics if the position is
|
||
the start position. -/
|
||
@[inline, expose]
|
||
def Pos.prev! {s : String} (pos : s.Pos) : s.Pos :=
|
||
pos.toSlice.prev!.ofSlice
|
||
|
||
/-- Constructs a valid position on `s` from a position and a proof that it is valid. -/
|
||
@[inline, expose]
|
||
def pos (s : String) (off : Pos.Raw) (h : off.IsValid s) : s.Pos :=
|
||
(s.toSlice.pos off h.toSlice).ofSlice
|
||
|
||
/-- Constructs a valid position on `s` from a position, returning `none` if the position is not valid. -/
|
||
@[inline, expose]
|
||
def pos? (s : String) (off : Pos.Raw) : Option s.Pos :=
|
||
(s.toSlice.pos? off).map Slice.Pos.ofSlice
|
||
|
||
/-- Constructs a valid position `s` from a position, panicking if the position is not valid. -/
|
||
@[inline, expose]
|
||
def pos! (s : String) (off : Pos.Raw) : s.Pos :=
|
||
(s.toSlice.pos! off).ofSlice
|
||
|
||
@[simp]
|
||
theorem offset_pos {s : String} {off : Pos.Raw} {h} : (s.pos off h).offset = off := rfl
|
||
|
||
/-- Constructs a valid position on `t` from a valid position on `s` and a proof that `s = t`. -/
|
||
@[inline]
|
||
def Slice.Pos.cast {s t : Slice} (pos : s.Pos) (h : s = t) : t.Pos where
|
||
offset := pos.offset
|
||
isValidForSlice := h ▸ pos.isValidForSlice
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_cast {s t : Slice} {pos : s.Pos} {h : s = t} :
|
||
(pos.cast h).offset = pos.offset := (rfl)
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.cast_rfl {s : Slice} {pos : s.Pos} : pos.cast rfl = pos :=
|
||
Slice.Pos.ext (by simp)
|
||
|
||
/-- Constructs a valid position on `t` from a valid position on `s` and a proof that `s = t`. -/
|
||
@[inline]
|
||
def Pos.cast {s t : String} (pos : s.Pos) (h : s = t) : t.Pos where
|
||
offset := pos.offset
|
||
isValid := h ▸ pos.isValid
|
||
|
||
@[simp]
|
||
theorem Pos.offset_cast {s t : String} {pos : s.Pos} {h : s = t} :
|
||
(pos.cast h).offset = pos.offset := (rfl)
|
||
|
||
@[simp]
|
||
theorem Pos.cast_rfl {s : String} {pos : s.Pos} : pos.cast rfl = pos :=
|
||
Pos.ext (by simp)
|
||
|
||
theorem Pos.toCopy_toSlice_eq_cast {s : String} (p : s.Pos) :
|
||
p.toSlice.toCopy = p.cast copy_toSlice.symm :=
|
||
Pos.ext (by simp)
|
||
|
||
/-- Given a byte position within a string slice, obtains the smallest valid position that is
|
||
strictly greater than the given byte position. -/
|
||
@[inline]
|
||
def Slice.findNextPos (offset : String.Pos.Raw) (s : Slice) (_h : offset < s.rawEndPos) : s.Pos :=
|
||
go offset.inc
|
||
where
|
||
go (offset : String.Pos.Raw) : s.Pos :=
|
||
if h : offset < s.rawEndPos then
|
||
if h' : (s.getUTF8Byte offset h).IsUTF8FirstByte then
|
||
s.pos offset (Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.2 (Or.inr ⟨_, h'⟩))
|
||
else
|
||
go offset.inc
|
||
else
|
||
s.endPos
|
||
termination_by s.utf8ByteSize - offset.byteIdx
|
||
decreasing_by
|
||
simp only [Pos.Raw.lt_iff, byteIdx_rawEndPos, utf8ByteSize_eq, Pos.Raw.byteIdx_inc] at h ⊢
|
||
omega
|
||
|
||
private theorem Slice.le_offset_findNextPosGo {s : Slice} {o : String.Pos.Raw} (h : o ≤ s.rawEndPos) :
|
||
o ≤ (findNextPos.go s o).offset := by
|
||
fun_induction findNextPos.go with
|
||
| case1 => simp
|
||
| case2 x h₁ h₂ ih =>
|
||
refine Pos.Raw.le_of_lt (Pos.Raw.lt_of_lt_of_le Pos.Raw.lt_inc (ih ?_))
|
||
rw [Pos.Raw.le_iff, Pos.Raw.byteIdx_inc]
|
||
exact Nat.succ_le_iff.2 h₁
|
||
| case3 x h => exact h
|
||
|
||
theorem Slice.lt_offset_findNextPos {s : Slice} {o : String.Pos.Raw} (h) : o < (s.findNextPos o h).offset :=
|
||
Pos.Raw.lt_of_lt_of_le Pos.Raw.lt_inc (le_offset_findNextPosGo (Pos.Raw.inc_le.2 h))
|
||
|
||
theorem Slice.Pos.prevAuxGo_le_self {s : Slice} {p : Nat} {h : p < s.utf8ByteSize} :
|
||
prevAux.go p h ≤ ⟨p⟩ := by
|
||
induction p with
|
||
| zero =>
|
||
rw [prevAux.go]
|
||
split
|
||
· simp
|
||
· simpa using elim (· ≤ { })
|
||
| succ p ih =>
|
||
rw [prevAux.go]
|
||
split
|
||
· simp
|
||
· simpa using Nat.le_trans ih (by simp)
|
||
where
|
||
elim (P : String.Pos.Raw → Prop) {h : False} : P h.elim := h.elim
|
||
|
||
theorem Slice.Pos.prevAux_lt_self {s : Slice} {p : s.Pos} {h} : p.prevAux h < p.offset := by
|
||
rw [prevAux]
|
||
refine Pos.Raw.lt_of_le_of_lt prevAuxGo_le_self ?_
|
||
simp [Pos.ext_iff, Pos.Raw.lt_iff] at *
|
||
omega
|
||
|
||
theorem Slice.Pos.prevAux_lt_rawEndPos {s : Slice} {p : s.Pos} {h} : p.prevAux h < s.rawEndPos :=
|
||
Pos.Raw.lt_of_lt_of_le prevAux_lt_self p.isValidForSlice.le_rawEndPos
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.prev_ne_endPos {s : Slice} {p : s.Pos} {h} : p.prev h ≠ s.endPos := by
|
||
simpa [Pos.ext_iff, prev] using Pos.Raw.ne_of_lt prevAux_lt_rawEndPos
|
||
|
||
@[simp]
|
||
theorem Pos.prev_ne_endPos {s : String} {p : s.Pos} {h} : p.prev h ≠ s.endPos :=
|
||
mt (congrArg (·.toSlice)) (Slice.Pos.prev_ne_endPos (h := mt (congrArg (·.ofSlice)) h))
|
||
|
||
theorem Pos.toSlice_prev {s : String} {p : s.Pos} {h} :
|
||
(p.prev h).toSlice = p.toSlice.prev (ne_of_apply_ne Slice.Pos.ofSlice (by simpa)) := by
|
||
simp [prev]
|
||
|
||
theorem Slice.Pos.offset_prev_lt_offset {s : Slice} {p : s.Pos} {h} : (p.prev h).offset < p.offset := by
|
||
simpa [prev] using prevAux_lt_self
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.prev_lt {s : Slice} {p : s.Pos} {h} : p.prev h < p :=
|
||
lt_iff.2 offset_prev_lt_offset
|
||
|
||
@[simp]
|
||
theorem Pos.prev_lt {s : String} {p : s.Pos} {h} : p.prev h < p := by
|
||
simp [← toSlice_lt, toSlice_prev]
|
||
|
||
|
||
@[expose]
|
||
def Pos.Raw.utf8GetAux : List Char → Pos.Raw → Pos.Raw → Char
|
||
| [], _, _ => default
|
||
| c::cs, i, p => if i = p then c else utf8GetAux cs (i + c) p
|
||
|
||
@[deprecated Pos.Raw.utf8GetAux (since := "2025-10-09")]
|
||
abbrev utf8GetAux : List Char → Pos.Raw → Pos.Raw → Char :=
|
||
Pos.Raw.utf8GetAux
|
||
|
||
/--
|
||
Returns the character at position `p` of a string. If `p` is not a valid position, returns the
|
||
fallback value `(default : Char)`, which is `'A'`, but does not panic.
|
||
|
||
This function is overridden with an efficient implementation in runtime code. See
|
||
`String.Pos.Raw.utf8GetAux` for the reference implementation.
|
||
|
||
This is a legacy function. The recommended alternative is `String.Pos.get`, combined with
|
||
`String.pos` or another means of obtaining a `String.Pos`.
|
||
|
||
Examples:
|
||
* `"abc".get ⟨1⟩ = 'b'`
|
||
* `"abc".get ⟨3⟩ = (default : Char)` because byte `3` is at the end of the string.
|
||
* `"L∃∀N".get ⟨2⟩ = (default : Char)` because byte `2` is in the middle of `'∃'`.
|
||
-/
|
||
@[extern "lean_string_utf8_get", expose]
|
||
def Pos.Raw.get (s : @& String) (p : @& Pos.Raw) : Char :=
|
||
utf8GetAux s.toList 0 p
|
||
|
||
@[extern "lean_string_utf8_get", expose, deprecated Pos.Raw.get (since := "2025-10-14")]
|
||
def get (s : @& String) (p : @& Pos.Raw) : Char :=
|
||
Pos.Raw.utf8GetAux s.toList 0 p
|
||
|
||
@[expose]
|
||
def Pos.Raw.utf8GetAux? : List Char → Pos.Raw → Pos.Raw → Option Char
|
||
| [], _, _ => none
|
||
| c::cs, i, p => if i = p then some c else utf8GetAux? cs (i + c) p
|
||
|
||
@[deprecated Pos.Raw.utf8GetAux (since := "2025-10-09")]
|
||
abbrev utf8GetAux? : List Char → Pos.Raw → Pos.Raw → Option Char :=
|
||
Pos.Raw.utf8GetAux?
|
||
|
||
/--
|
||
Returns the character at position `p` of a string. If `p` is not a valid position, returns `none`.
|
||
|
||
This function is overridden with an efficient implementation in runtime code. See
|
||
`String.utf8GetAux?` for the reference implementation.
|
||
|
||
This is a legacy function. The recommended alternative is `String.Pos.get`, combined with
|
||
`String.pos?` or another means of obtaining a `String.Pos`.
|
||
|
||
Examples:
|
||
* `"abc".get? ⟨1⟩ = some 'b'`
|
||
* `"abc".get? ⟨3⟩ = none`
|
||
* `"L∃∀N".get? ⟨1⟩ = some '∃'`
|
||
* `"L∃∀N".get? ⟨2⟩ = none`
|
||
-/
|
||
@[extern "lean_string_utf8_get_opt", expose]
|
||
def Pos.Raw.get? : (@& String) → (@& Pos.Raw) → Option Char
|
||
| s, p => utf8GetAux? s.toList 0 p
|
||
|
||
@[extern "lean_string_utf8_get_opt", expose, deprecated Pos.Raw.get? (since := "2025-10-14")]
|
||
def get? : (@& String) → (@& Pos.Raw) → Option Char
|
||
| s, p => Pos.Raw.utf8GetAux? s.toList 0 p
|
||
|
||
/--
|
||
Returns the character at position `p` of a string. Panics if `p` is not a valid position.
|
||
|
||
See `String.pos?` and `String.Pos.get` for a safer alternative.
|
||
|
||
This function is overridden with an efficient implementation in runtime code. See
|
||
`String.utf8GetAux` for the reference implementation.
|
||
|
||
This is a legacy function. The recommended alternative is `String.Pos.get`, combined with
|
||
`String.pos!` or another means of obtaining a `String.Pos`.
|
||
|
||
Examples
|
||
* `"abc".get! ⟨1⟩ = 'b'`
|
||
-/
|
||
@[extern "lean_string_utf8_get_bang", expose]
|
||
def Pos.Raw.get! (s : @& String) (p : @& Pos.Raw) : Char :=
|
||
match s with
|
||
| s => Pos.Raw.utf8GetAux s.toList 0 p
|
||
|
||
@[extern "lean_string_utf8_get_bang", expose, deprecated Pos.Raw.get! (since := "2025-10-14")]
|
||
def get! (s : @& String) (p : @& Pos.Raw) : Char :=
|
||
match s with
|
||
| s => Pos.Raw.utf8GetAux s.toList 0 p
|
||
|
||
@[expose]
|
||
def Pos.Raw.utf8SetAux (c' : Char) : List Char → Pos.Raw → Pos.Raw → List Char
|
||
| [], _, _ => []
|
||
| c::cs, i, p =>
|
||
if i = p then (c'::cs) else c::(utf8SetAux c' cs (i + c) p)
|
||
|
||
@[deprecated Pos.Raw.utf8SetAux (since := "2025-10-09")]
|
||
abbrev utf8SetAux (c' : Char) : List Char → Pos.Raw → Pos.Raw → List Char :=
|
||
Pos.Raw.utf8SetAux c'
|
||
|
||
@[simp]
|
||
theorem Pos.get_toSlice {s : String} {p : s.Pos} {h} :
|
||
p.toSlice.get h = p.get (ne_of_apply_ne (·.toSlice) (by simp_all)) := by
|
||
rfl
|
||
|
||
theorem Pos.get_eq_get_toSlice {s : String} {p : s.Pos} {h} :
|
||
p.get h = p.toSlice.get (ne_of_apply_ne Slice.Pos.ofSlice (by simp [h])) := rfl
|
||
|
||
@[simp]
|
||
theorem Pos.offset_next {s : String} (p : s.Pos) (h : p ≠ s.endPos) :
|
||
(p.next h).offset = p.offset + p.get h := by
|
||
simp [next]
|
||
|
||
theorem Pos.byteIdx_offset_next {s : String} (p : s.Pos) (h : p ≠ s.endPos) :
|
||
(p.next h).offset.byteIdx = p.offset.byteIdx + (p.get h).utf8Size := by
|
||
simp
|
||
|
||
theorem Pos.toSlice_next {s : String} {p : s.Pos} {h} :
|
||
(p.next h).toSlice = p.toSlice.next (ne_of_apply_ne Slice.Pos.ofSlice (by simpa)) := by
|
||
simp [next]
|
||
|
||
theorem Pos.byteIdx_lt_utf8ByteSize {s : String} (p : s.Pos) (h : p ≠ s.endPos) :
|
||
p.offset.byteIdx < s.utf8ByteSize := by
|
||
have := byteIdx_rawEndPos ▸ Pos.Raw.le_iff.1 p.isValid.le_rawEndPos
|
||
simp only [ne_eq, Pos.ext_iff, offset_endPos, Pos.Raw.ext_iff, byteIdx_rawEndPos] at h
|
||
omega
|
||
|
||
@[simp]
|
||
theorem Pos.lt_next {s : String} {p : s.Pos} {h} : p < p.next h := by
|
||
simp [← Pos.toSlice_lt, toSlice_next]
|
||
|
||
theorem Pos.ne_startPos_of_lt {s : String} {p q : s.Pos} :
|
||
p < q → q ≠ s.startPos := by
|
||
simp only [lt_iff, Pos.Raw.lt_iff, ne_eq, Pos.ext_iff, offset_startPos, Pos.Raw.ext_iff,
|
||
Pos.Raw.byteIdx_zero]
|
||
omega
|
||
|
||
theorem Pos.next_ne_startPos {s : String} {p : s.Pos} {h} :
|
||
p.next h ≠ s.startPos :=
|
||
ne_startPos_of_lt p.lt_next
|
||
|
||
@[simp]
|
||
theorem Pos.str_toSlice {s : String} {p : s.Pos} : p.toSlice.str = p := by
|
||
ext
|
||
simp
|
||
|
||
theorem Slice.Pos.str_le_endExclusive {s : Slice} (p : s.Pos) :
|
||
p.str ≤ s.endExclusive := by
|
||
have := p.isValidForSlice.le_utf8ByteSize
|
||
have := s.startInclusive_le_endExclusive
|
||
simp [String.Pos.le_iff, Pos.Raw.le_iff, Slice.utf8ByteSize_eq] at *
|
||
omega
|
||
|
||
theorem Pos.lt_of_le_of_ne {s : String} {p q : s.Pos} :
|
||
p ≤ q → p ≠ q → p < q := by
|
||
simp [Pos.le_iff, Pos.lt_iff, Pos.ext_iff, Pos.Raw.le_iff, Pos.Raw.lt_iff,
|
||
Pos.Raw.ext_iff]
|
||
omega
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.str_endPos {s : Slice} : s.endPos.str = s.endExclusive := by
|
||
simp [String.Pos.ext_iff]
|
||
|
||
theorem Slice.Pos.str_lt_endExclusive {s : Slice} (p : s.Pos) (h : p ≠ s.endPos) :
|
||
p.str < s.endExclusive :=
|
||
Pos.lt_of_le_of_ne p.str_le_endExclusive (by rwa [← str_endPos, ne_eq, str_inj])
|
||
|
||
theorem Pos.ne_of_lt {s : String} {p q : s.Pos} : p < q → p ≠ q := by
|
||
simpa [Pos.lt_iff, Pos.ext_iff] using Pos.Raw.ne_of_lt
|
||
|
||
theorem Pos.lt_of_lt_of_le {s : String} {p q r : s.Pos} : p < q → q ≤ r → p < r := by
|
||
simpa [Pos.lt_iff, Pos.le_iff] using Pos.Raw.lt_of_lt_of_le
|
||
|
||
theorem Pos.le_endPos {s : String} (p : s.Pos) : p ≤ s.endPos := by
|
||
simpa [Pos.le_iff] using p.isValid.le_rawEndPos
|
||
|
||
theorem Slice.Pos.str_ne_endPos {s : Slice} (p : s.Pos) (h : p ≠ s.endPos) :
|
||
p.str ≠ s.str.endPos :=
|
||
Pos.ne_of_lt (Pos.lt_of_lt_of_le (p.str_lt_endExclusive h) (Pos.le_endPos _))
|
||
|
||
theorem Pos.le_trans {s : String} {p q r : s.Pos} : p ≤ q → q ≤ r → p ≤ r := by
|
||
simpa [Pos.le_iff] using Pos.Raw.le_trans
|
||
|
||
theorem Pos.le_of_lt {s : String} {p q : s.Pos} : p < q → p ≤ q := by
|
||
simpa [Pos.le_iff, Pos.lt_iff] using Pos.Raw.le_of_lt
|
||
|
||
theorem Slice.Pos.le_of_not_lt {s : Slice} {p q : s.Pos} : ¬q < p → p ≤ q := by
|
||
simp [Slice.Pos.le_iff, Slice.Pos.lt_iff, Pos.Raw.le_iff, Pos.Raw.lt_iff]
|
||
|
||
theorem Slice.Pos.ne_endPos_of_lt {s : Slice} {p q : s.Pos} : p < q → p ≠ s.endPos := by
|
||
have := q.isValidForSlice.le_utf8ByteSize
|
||
simp [lt_iff, Pos.ext_iff, Pos.Raw.lt_iff, Pos.Raw.ext_iff]
|
||
omega
|
||
|
||
theorem Slice.Pos.next_le_of_lt {s : Slice} {p q : s.Pos} {h} : p < q → p.next h ≤ q := by
|
||
-- Things like this will become a lot simpler once we have the `Splits` machinery developed,
|
||
-- but this is `String.Basic`, so we have to suffer a little.
|
||
refine fun hpq => le_of_not_lt (fun hq => ?_)
|
||
have := q.isUTF8FirstByte_byte (h := ne_endPos_of_lt hq)
|
||
rw [byte, getUTF8Byte, String.getUTF8Byte] at this
|
||
simp only [Pos.Raw.byteIdx_offsetBy] at this
|
||
have h₁ : q.offset.byteIdx = p.offset.byteIdx + (q.offset.byteIdx - p.offset.byteIdx) := by
|
||
simp [lt_iff, Pos.Raw.lt_iff] at hpq
|
||
omega
|
||
have h₂ : q.offset.byteIdx - p.offset.byteIdx < (p.get h).utf8Size := by
|
||
simp [lt_iff, Pos.Raw.lt_iff] at hq
|
||
omega
|
||
conv at this => congr; arg 2; rw [h₁, ← Nat.add_assoc]
|
||
rw [← ByteArray.getElem_extract (start := s.startInclusive.offset.byteIdx + p.offset.byteIdx)
|
||
(stop := s.startInclusive.offset.byteIdx + p.offset.byteIdx + (p.get h).utf8Size)] at this
|
||
· simp only [← utf8Encode_get_eq_extract, List.utf8Encode_singleton] at this
|
||
have h₃ := List.getElem_toByteArray (l := utf8EncodeChar (p.get h))
|
||
(i := q.offset.byteIdx - p.offset.byteIdx) (h := by simpa)
|
||
rw [h₃, UInt8.isUTF8FirstByte_getElem_utf8EncodeChar] at this
|
||
simp only [lt_iff, Pos.Raw.lt_iff] at hpq
|
||
omega
|
||
· simp only [ByteArray.size_extract, size_toByteArray]
|
||
rw [Nat.min_eq_left]
|
||
· omega
|
||
· have := (p.next h).str.isValid.le_utf8ByteSize
|
||
simpa [Nat.add_assoc] using this
|
||
|
||
theorem Slice.Pos.ofSlice_le_iff {s : String} {p : s.toSlice.Pos} {q : s.Pos} :
|
||
p.ofSlice ≤ q ↔ p ≤ q.toSlice := Iff.rfl
|
||
|
||
@[simp]
|
||
theorem Pos.toSlice_lt_toSlice_iff {s : String} {p q : s.Pos} :
|
||
p.toSlice < q.toSlice ↔ p < q := Iff.rfl
|
||
|
||
theorem Pos.next_le_of_lt {s : String} {p q : s.Pos} {h} : p < q → p.next h ≤ q := by
|
||
rw [next, Slice.Pos.ofSlice_le_iff, ← Pos.toSlice_lt_toSlice_iff]
|
||
exact Slice.Pos.next_le_of_lt
|
||
|
||
theorem Slice.Pos.get_eq_get_str {s : Slice} {p : s.Pos} {h} :
|
||
p.get h = p.str.get (str_ne_endPos _ h) := by
|
||
simp [String.Pos.get, Slice.Pos.get]
|
||
|
||
@[inline]
|
||
def Slice.Pos.nextFast {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : s.Pos :=
|
||
ofStr (pos.str.next (str_ne_endPos _ h))
|
||
(Pos.le_trans Slice.Pos.startInclusive_le_str (Pos.le_of_lt String.Pos.lt_next))
|
||
(String.Pos.next_le_of_lt (Slice.Pos.str_lt_endExclusive _ h))
|
||
|
||
@[csimp]
|
||
theorem Slice.Pos.next_eq_nextFast : @Slice.Pos.next = @Slice.Pos.nextFast := by
|
||
funext s pos h
|
||
simp [Slice.Pos.ext_iff, Slice.Pos.nextFast, Pos.Raw.ext_iff, Slice.Pos.get_eq_get_str]
|
||
omega
|
||
|
||
/-- The slice from the beginning of `s` up to `p` (exclusive). -/
|
||
@[inline, expose]
|
||
def sliceTo (s : String) (p : s.Pos) : Slice :=
|
||
s.toSlice.sliceTo p.toSlice
|
||
|
||
@[deprecated sliceTo (since :="2025-11-20")]
|
||
def replaceEnd (s : String) (p : s.Pos) : Slice :=
|
||
s.sliceTo p
|
||
|
||
@[simp]
|
||
theorem str_sliceTo {s : String} {p : s.Pos} : (s.sliceTo p).str = s := rfl
|
||
|
||
@[simp]
|
||
theorem startInclusive_sliceTo {s : String} {p : s.Pos} :
|
||
(s.sliceTo p).startInclusive = s.startPos := by
|
||
simp [sliceTo]
|
||
|
||
@[simp]
|
||
theorem endExclusive_sliceTo {s : String} {p : s.Pos} :
|
||
(s.sliceTo p).endExclusive = p := by
|
||
simp [sliceTo]
|
||
|
||
@[simp]
|
||
theorem rawEndPos_sliceTo {s : String} {p : s.Pos} :
|
||
(s.sliceTo p).rawEndPos = p.offset := by
|
||
simp [sliceTo]
|
||
|
||
theorem Pos.Raw.isValidForSlice_stringSliceTo {s : String} {p : s.Pos} {q : Pos.Raw} :
|
||
q.IsValidForSlice (s.sliceTo p) ↔ q ≤ p.offset ∧ q.IsValid s := by
|
||
rw [sliceTo, isValidForSlice_sliceTo, Pos.offset_toSlice, isValidForSlice_toSlice_iff]
|
||
|
||
/-- The slice from `p` (inclusive) up to the end of `s`. -/
|
||
@[inline, expose]
|
||
def sliceFrom (s : String) (p : s.Pos) : Slice :=
|
||
s.toSlice.sliceFrom p.toSlice
|
||
|
||
@[deprecated sliceFrom (since := "2025-11-20")]
|
||
def replaceStart (s : String) (p : s.Pos) : Slice :=
|
||
s.sliceFrom p
|
||
|
||
@[simp]
|
||
theorem str_sliceFrom {s : String} {p : s.Pos} : (s.sliceFrom p).str = s := rfl
|
||
|
||
@[simp]
|
||
theorem startInclusive_sliceFrom {s : String} {p : s.Pos} :
|
||
(s.sliceFrom p).startInclusive = p := by
|
||
simp [sliceFrom]
|
||
|
||
@[simp]
|
||
theorem endExclusive_sliceFrom {s : String} {p : s.Pos} :
|
||
(s.sliceFrom p).endExclusive = s.endPos := by
|
||
simp [sliceFrom]
|
||
|
||
@[simp]
|
||
theorem utf8ByteSize_toSlice {s : String} : s.toSlice.utf8ByteSize = s.utf8ByteSize := by
|
||
simp [Slice.utf8ByteSize_eq]
|
||
|
||
@[simp]
|
||
theorem utf8ByteSize_sliceFrom {s : String} {p : s.Pos} :
|
||
(s.sliceFrom p).utf8ByteSize = s.utf8ByteSize - p.offset.byteIdx := by
|
||
simp [sliceFrom]
|
||
|
||
@[simp]
|
||
theorem utf8ByteSize_sliceTo {s : String} {p : s.Pos} :
|
||
(s.sliceTo p).utf8ByteSize = p.offset.byteIdx := by
|
||
simp [sliceTo]
|
||
|
||
theorem Pos.Raw.isValidForSlice_stringSliceFrom {s : String} {p : s.Pos} {q : Pos.Raw} :
|
||
q.IsValidForSlice (s.sliceFrom p) ↔ (q.offsetBy p.offset).IsValid s := by
|
||
rw [sliceFrom, isValidForSlice_sliceFrom, isValidForSlice_toSlice_iff,
|
||
Pos.offset_toSlice]
|
||
|
||
/--
|
||
Given a string and two valid positions within the string, obtain a slice on the string formed by
|
||
the two positions.
|
||
|
||
This happens to be equivalent to the constructor of `String.Slice`.
|
||
-/
|
||
@[inline, expose] -- For the defeq `(s.slice p₁ p₂).str = s`
|
||
def slice (s : String) (startInclusive endExclusive : s.Pos)
|
||
(h : startInclusive ≤ endExclusive) : String.Slice where
|
||
str := s
|
||
startInclusive
|
||
endExclusive
|
||
startInclusive_le_endExclusive := h
|
||
|
||
@[simp]
|
||
theorem str_slice {s : String} {startInclusive endExclusive h} :
|
||
(s.slice startInclusive endExclusive h).str = s := rfl
|
||
|
||
@[simp]
|
||
theorem startInclusive_slice {s : String} {startInclusive endExclusive h} :
|
||
(s.slice startInclusive endExclusive h).startInclusive = startInclusive := rfl
|
||
|
||
@[simp]
|
||
theorem endExclusive_slice {s : String} {startInclusive endExclusive h} :
|
||
(s.slice startInclusive endExclusive h).endExclusive = endExclusive := rfl
|
||
|
||
/-- Given a string and two valid positions within the string, obtain a slice on the string formed
|
||
by the new bounds, or return `none` if the given end is strictly less then the given start. -/
|
||
def slice? (s : String) (startInclusive endExclusive : s.Pos) :=
|
||
if h : startInclusive ≤ endExclusive then
|
||
some (s.slice startInclusive endExclusive h)
|
||
else
|
||
none
|
||
|
||
/--
|
||
Given a string and two valid positions within the string, obtain a slice on the string formed by
|
||
the new bounds, or panic if the given end is strictly less than the given start.
|
||
-/
|
||
def slice! (s : String) (p₁ p₂ : s.Pos) : Slice :=
|
||
s.toSlice.slice! p₁.toSlice p₂.toSlice
|
||
|
||
@[deprecated slice! (since := "2025-11-20")]
|
||
def replaceStartEnd! (s : String) (p₁ p₂ : s.Pos) : Slice :=
|
||
s.slice! p₁ p₂
|
||
|
||
theorem Pos.utf8Encode_get_eq_extract {s : String} (pos : s.Pos) (h : pos ≠ s.endPos) :
|
||
List.utf8Encode [pos.get h] = s.toByteArray.extract pos.offset.byteIdx (pos.offset.byteIdx + (pos.get h).utf8Size) := by
|
||
rw [get_eq_get_toSlice, Slice.Pos.utf8Encode_get_eq_extract]
|
||
simp
|
||
|
||
theorem Pos.eq_copy_sliceTo_append_get {s : String} {pos : s.Pos} (h : pos ≠ s.endPos) :
|
||
s = (s.sliceTo pos).copy ++ singleton (pos.get h) ++ (s.sliceFrom (pos.next h)).copy := by
|
||
simp [← toByteArray_inj, utf8Encode_get_eq_extract pos h, Slice.toByteArray_copy, ← size_toByteArray]
|
||
|
||
/-- Given a position in `s.sliceFrom p₀`, obtain the corresponding position in `s`. -/
|
||
@[inline]
|
||
def Pos.ofSliceFrom {s : String} {p₀ : s.Pos} (pos : (s.sliceFrom p₀).Pos) :
|
||
s.Pos where
|
||
offset := pos.offset.offsetBy p₀.offset
|
||
isValid := Pos.Raw.isValidForSlice_stringSliceFrom.1 pos.isValidForSlice
|
||
|
||
@[deprecated Pos.ofSliceFrom (since := "2025-11-20")]
|
||
def Pos.ofReplaceStart {s : String} {p₀ : s.Pos} (pos : (s.sliceFrom p₀).Pos) :
|
||
s.Pos :=
|
||
ofSliceFrom pos
|
||
|
||
@[simp]
|
||
theorem Pos.offset_ofSliceFrom {s : String} {p₀ : s.Pos}
|
||
{pos : (s.sliceFrom p₀).Pos} : (ofSliceFrom pos).offset = pos.offset.offsetBy p₀.offset :=
|
||
(rfl)
|
||
|
||
/-- Given a position in `s` that is at least `p₀`, obtain the corresponding position in
|
||
`s.sliceFrom p₀`. -/
|
||
@[inline]
|
||
def Pos.sliceFrom {s : String} (p₀ : s.Pos) (pos : s.Pos) (h : p₀ ≤ pos) :
|
||
(s.sliceFrom p₀).Pos where
|
||
offset := pos.offset.unoffsetBy p₀.offset
|
||
isValidForSlice := Pos.Raw.isValidForSlice_stringSliceFrom.2 (by
|
||
simpa [Pos.Raw.offsetBy_unoffsetBy_of_le (Pos.Raw.le_iff.1 h)] using pos.isValid)
|
||
|
||
@[deprecated Pos.sliceFrom (since := "2025-11-20")]
|
||
def Pos.toReplaceStart {s : String} (p₀ : s.Pos) (pos : s.Pos) (h : p₀ ≤ pos) :
|
||
(s.sliceFrom p₀).Pos :=
|
||
sliceFrom p₀ pos h
|
||
|
||
@[simp]
|
||
theorem Pos.offset_sliceFrom {s : String} {p₀ : s.Pos} {pos : s.Pos} {h} :
|
||
(sliceFrom p₀ pos h).offset = pos.offset.unoffsetBy p₀.offset := (rfl)
|
||
|
||
@[simp]
|
||
theorem Pos.ofSliceFrom_startPos {s : String} {pos : s.Pos} :
|
||
ofSliceFrom (s.sliceFrom pos).startPos = pos :=
|
||
Pos.ext (by simp)
|
||
|
||
@[simp]
|
||
theorem Pos.ofSliceFrom_endPos {s : String} {pos : s.Pos} :
|
||
ofSliceFrom (s.sliceFrom pos).endPos = s.endPos := by
|
||
have := pos.isValid.le_rawEndPos
|
||
simp_all [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff]
|
||
|
||
theorem Pos.ofSliceFrom_inj {s : String} {p₀ : s.Pos}
|
||
{pos pos' : (s.sliceFrom p₀).Pos} :
|
||
ofSliceFrom pos = ofSliceFrom pos' ↔ pos = pos' := by
|
||
simp [Pos.ext_iff, String.Pos.Raw.ext_iff, Slice.Pos.ext_iff]
|
||
|
||
theorem Pos.get_eq_get_ofSliceFrom {s : String} {p₀ : s.Pos}
|
||
{pos : (s.sliceFrom p₀).Pos} {h} :
|
||
pos.get h = (ofSliceFrom pos).get (by rwa [← ofSliceFrom_endPos, ne_eq, ofSliceFrom_inj]) := by
|
||
simp [Pos.get, Slice.Pos.get]
|
||
|
||
/-- Given a position in `s.sliceTo p₀`, obtain the corresponding position in `s`. -/
|
||
@[inline]
|
||
def Pos.ofSliceTo {s : String} {p₀ : s.Pos} (pos : (s.sliceTo p₀).Pos) : s.Pos where
|
||
offset := pos.offset
|
||
isValid := (Pos.Raw.isValidForSlice_stringSliceTo.1 pos.isValidForSlice).2
|
||
|
||
@[deprecated Pos.ofSliceTo (since := "2025-11-20")]
|
||
def Pos.ofReplaceEnd {s : String} {p₀ : s.Pos} (pos : (s.sliceTo p₀).Pos) : s.Pos :=
|
||
ofSliceTo pos
|
||
|
||
@[simp]
|
||
theorem Pos.offset_ofSliceTo {s : String} {p₀ : s.Pos} {pos : (s.sliceTo p₀).Pos} :
|
||
(ofSliceTo pos).offset = pos.offset := (rfl)
|
||
|
||
/-- Given a position in `s` that is at most `p₀`, obtain the corresponding position in `s.sliceTo p₀`. -/
|
||
@[inline]
|
||
def Pos.sliceTo {s : String} (p₀ : s.Pos) (pos : s.Pos) (h : pos ≤ p₀) :
|
||
(s.sliceTo p₀).Pos where
|
||
offset := pos.offset
|
||
isValidForSlice := Pos.Raw.isValidForSlice_stringSliceTo.2 ⟨h, pos.isValid⟩
|
||
|
||
@[deprecated Pos.sliceTo (since := "2025-11-20")]
|
||
def Pos.toReplaceEnd {s : String} (p₀ : s.Pos) (pos : s.Pos) (h : pos ≤ p₀) :
|
||
(s.sliceTo p₀).Pos :=
|
||
sliceTo p₀ pos h
|
||
|
||
@[simp]
|
||
theorem Pos.offset_sliceTo {s : String} {p₀ : s.Pos} {pos : s.Pos} {h : pos ≤ p₀} :
|
||
(sliceTo p₀ pos h).offset = pos.offset := (rfl)
|
||
|
||
/--
|
||
Advances the position `p` `n` times.
|
||
|
||
If this would move `p` past the end of `s`, the result is `s.endPos`.
|
||
-/
|
||
def Slice.Pos.nextn {s : Slice} (p : s.Pos) (n : Nat) : s.Pos :=
|
||
match n with
|
||
| 0 => p
|
||
| n + 1 =>
|
||
if h : p ≠ s.endPos then
|
||
nextn (p.next h) n
|
||
else
|
||
p
|
||
|
||
/--
|
||
Iterates `p.prev` `n` times.
|
||
|
||
If this would move `p` past the start of `s`, the result is `s.endPos`.
|
||
-/
|
||
def Slice.Pos.prevn {s : Slice} (p : s.Pos) (n : Nat) : s.Pos :=
|
||
match n with
|
||
| 0 => p
|
||
| n + 1 =>
|
||
if h : p ≠ s.startPos then
|
||
prevn (p.prev h) n
|
||
else
|
||
p
|
||
|
||
/--
|
||
Advances the position `p` `n` times.
|
||
|
||
If this would move `p` past the end of `s`, the result is `s.endPos`.
|
||
-/
|
||
@[inline]
|
||
def Pos.nextn {s : String} (p : s.Pos) (n : Nat) : s.Pos :=
|
||
(p.toSlice.nextn n).ofSlice
|
||
|
||
/--
|
||
Iterates `p.prev` `n` times.
|
||
|
||
If this would move `p` past the start of `s`, the result is `s.startPos`.
|
||
-/
|
||
@[inline]
|
||
def Pos.prevn {s : String} (p : s.Pos) (n : Nat) : s.Pos :=
|
||
(p.toSlice.prevn n).ofSlice
|
||
|
||
theorem Slice.Pos.le_nextn {s : Slice} {p : s.Pos} {n : Nat} : p ≤ p.nextn n := by
|
||
fun_induction nextn with
|
||
| case1 => simp [Slice.Pos.le_iff]
|
||
| case2 p n h ih =>
|
||
simp only [Pos.le_iff] at *
|
||
exact Pos.Raw.le_of_lt (Pos.Raw.lt_of_lt_of_le lt_next ih)
|
||
| case3 => simp [Slice.Pos.le_iff]
|
||
|
||
theorem Pos.le_nextn {s : String} {p : s.Pos} {n : Nat} :
|
||
p ≤ p.nextn n := by
|
||
simpa [nextn, Pos.le_iff, ← offset_toSlice] using Slice.Pos.le_nextn
|
||
|
||
theorem Slice.Pos.prevn_le {s : Slice} {p : s.Pos} {n : Nat} : p.prevn n ≤ p := by
|
||
fun_induction prevn with
|
||
| case1 => simp [le_iff]
|
||
| case2 p n h ih =>
|
||
simp only [Pos.le_iff] at *
|
||
exact Pos.Raw.le_of_lt (Pos.Raw.lt_of_le_of_lt ih prev_lt)
|
||
| case3 => simp [le_iff]
|
||
|
||
theorem Pos.prevn_le {s : String} {p : s.Pos} {n : Nat} :
|
||
p.prevn n ≤ p := by
|
||
simpa [nextn, Pos.le_iff, ← offset_toSlice] using Slice.Pos.prevn_le
|
||
/--
|
||
Returns the next position in a string after position `p`. If `p` is not a valid position or
|
||
`p = s.endPos`, returns the position one byte after `p`.
|
||
|
||
A run-time bounds check is performed to determine whether `p` is at the end of the string. If a
|
||
bounds check has already been performed, use `String.next'` to avoid a repeated check.
|
||
|
||
This is a legacy function. The recommended alternative is `String.Pos.next` or one of its
|
||
variants like `String.Pos.next?`, combined with `String.pos` or another means of obtaining
|
||
a `String.ValisPos`.
|
||
|
||
Some examples of edge cases:
|
||
* `"abc".next ⟨3⟩ = ⟨4⟩`, since `3 = "abc".endPos`
|
||
* `"L∃∀N".next ⟨2⟩ = ⟨3⟩`, since `2` points into the middle of a multi-byte UTF-8 character
|
||
|
||
Examples:
|
||
* `"abc".get ("abc".next 0) = 'b'`
|
||
* `"L∃∀N".get (0 |> "L∃∀N".next |> "L∃∀N".next) = '∀'`
|
||
-/
|
||
@[extern "lean_string_utf8_next", expose]
|
||
def Pos.Raw.next (s : @& String) (p : @& Pos.Raw) : Pos.Raw :=
|
||
let c := get s p
|
||
p + c
|
||
|
||
@[extern "lean_string_utf8_next", expose, deprecated Pos.Raw.next (since := "2025-10-14")]
|
||
def next (s : @& String) (p : @& Pos.Raw) : Pos.Raw :=
|
||
let c := p.get s
|
||
p + c
|
||
|
||
@[expose]
|
||
def Pos.Raw.utf8PrevAux : List Char → Pos.Raw → Pos.Raw → Pos.Raw
|
||
| [], _, p => ⟨p.byteIdx - 1⟩
|
||
| c::cs, i, p =>
|
||
let i' := i + c
|
||
if p ≤ i' then i else utf8PrevAux cs i' p
|
||
|
||
@[deprecated Pos.Raw.utf8PrevAux (since := "2025-10-10")]
|
||
abbrev utf8PrevAux : List Char → Pos.Raw → Pos.Raw → Pos.Raw :=
|
||
Pos.Raw.utf8PrevAux
|
||
|
||
/--
|
||
Returns the position in a string before a specified position, `p`. If `p = ⟨0⟩`, returns `0`. If `p`
|
||
is greater than `rawEndPos`, returns the position one byte before `p`. Otherwise, if `p` occurs in the
|
||
middle of a multi-byte character, returns the beginning position of that character.
|
||
|
||
For example, `"L∃∀N".prev ⟨3⟩` is `⟨1⟩`, since byte 3 occurs in the middle of the multi-byte
|
||
character `'∃'` that starts at byte 1.
|
||
|
||
This is a legacy function. The recommended alternative is `String.Pos.prev` or one of its
|
||
variants like `String.Pos.prev?`, combined with `String.pos` or another means of obtaining
|
||
a `String.Pos`.
|
||
|
||
Examples:
|
||
* `"abc".get ("abc".rawEndPos |> "abc".prev) = 'c'`
|
||
* `"L∃∀N".get ("L∃∀N".rawEndPos |> "L∃∀N".prev |> "L∃∀N".prev |> "L∃∀N".prev) = '∃'`
|
||
-/
|
||
@[extern "lean_string_utf8_prev", expose]
|
||
def Pos.Raw.prev : (@& String) → (@& Pos.Raw) → Pos.Raw
|
||
| s, p => utf8PrevAux s.toList 0 p
|
||
|
||
@[extern "lean_string_utf8_prev", expose, deprecated Pos.Raw.prev (since := "2025-10-14")]
|
||
def prev : (@& String) → (@& Pos.Raw) → Pos.Raw
|
||
| s, p => Pos.Raw.utf8PrevAux s.toList 0 p
|
||
|
||
/--
|
||
Returns `true` if a specified byte position is greater than or equal to the position which points to
|
||
the end of a string. Otherwise, returns `false`.
|
||
|
||
Examples:
|
||
* `(0 |> "abc".next |> "abc".next |> "abc".atEnd) = false`
|
||
* `(0 |> "abc".next |> "abc".next |> "abc".next |> "abc".next |> "abc".atEnd) = true`
|
||
* `(0 |> "L∃∀N".next |> "L∃∀N".next |> "L∃∀N".next |> "L∃∀N".atEnd) = false`
|
||
* `(0 |> "L∃∀N".next |> "L∃∀N".next |> "L∃∀N".next |> "L∃∀N".next |> "L∃∀N".atEnd) = true`
|
||
* `"abc".atEnd ⟨4⟩ = true`
|
||
* `"L∃∀N".atEnd ⟨7⟩ = false`
|
||
* `"L∃∀N".atEnd ⟨8⟩ = true`
|
||
-/
|
||
@[extern "lean_string_utf8_at_end", expose]
|
||
def Pos.Raw.atEnd : (@& String) → (@& Pos.Raw) → Bool
|
||
| s, p => p.byteIdx ≥ utf8ByteSize s
|
||
|
||
@[extern "lean_string_utf8_at_end", expose, deprecated Pos.Raw.atEnd (since := "2025-10-14")]
|
||
def atEnd : (@& String) → (@& Pos.Raw) → Bool
|
||
| s, p => p.byteIdx ≥ utf8ByteSize s
|
||
|
||
/--
|
||
Returns the character at position `p` of a string. Returns `(default : Char)`, which is `'A'`, if
|
||
`p` is not a valid position.
|
||
|
||
Requires evidence, `h`, that `p` is within bounds instead of performing a run-time bounds check as
|
||
in `String.get`.
|
||
|
||
A typical pattern combines `get'` with a dependent `if`-expression to avoid the overhead of an
|
||
additional bounds check. For example:
|
||
```
|
||
def getInBounds? (s : String) (p : String.Pos) : Option Char :=
|
||
if h : s.atEnd p then none else some (s.get' p h)
|
||
```
|
||
Even with evidence of `¬ s.atEnd p`, `p` may be invalid if a byte index points into the middle of a
|
||
multi-byte UTF-8 character. For example, `"L∃∀N".get' ⟨2⟩ (by decide) = (default : Char)`.
|
||
|
||
This is a legacy function. The recommended alternative is `String.Pos.get`, combined with
|
||
`String.pos` or another means of obtaining a `String.Pos`.
|
||
|
||
Examples:
|
||
* `"abc".get' 0 (by decide) = 'a'`
|
||
* `let lean := "L∃∀N"; lean.get' (0 |> lean.next |> lean.next) (by decide) = '∀'`
|
||
-/
|
||
@[extern "lean_string_utf8_get_fast", expose]
|
||
def Pos.Raw.get' (s : @& String) (p : @& Pos.Raw) (h : ¬ p.atEnd s) : Char :=
|
||
match s with
|
||
| s => Pos.Raw.utf8GetAux s.toList 0 p
|
||
|
||
@[extern "lean_string_utf8_get_fast", expose, deprecated Pos.Raw.get' (since := "2025-10-14")]
|
||
def get' (s : @& String) (p : @& Pos.Raw) (h : ¬ p.atEnd s) : Char :=
|
||
match s with
|
||
| s => Pos.Raw.utf8GetAux s.toList 0 p
|
||
|
||
/--
|
||
Returns the next position in a string after position `p`. The result is unspecified if `p` is not a
|
||
valid position.
|
||
|
||
Requires evidence, `h`, that `p` is within bounds. No run-time bounds check is performed, as in
|
||
`String.next`.
|
||
|
||
A typical pattern combines `String.next'` with a dependent `if`-expression to avoid the overhead of
|
||
an additional bounds check. For example:
|
||
```
|
||
def next? (s : String) (p : String.Pos) : Option Char :=
|
||
if h : s.atEnd p then none else s.get (s.next' p h)
|
||
```
|
||
|
||
This is a legacy function. The recommended alternative is `String.Pos.next`, combined with
|
||
`String.pos` or another means of obtaining a `String.Pos`.
|
||
|
||
Example:
|
||
* `let abc := "abc"; abc.get (abc.next' 0 (by decide)) = 'b'`
|
||
-/
|
||
@[extern "lean_string_utf8_next_fast", expose]
|
||
def Pos.Raw.next' (s : @& String) (p : @& Pos.Raw) (h : ¬ p.atEnd s) : Pos.Raw :=
|
||
let c := p.get s
|
||
p + c
|
||
|
||
@[extern "lean_string_utf8_next_fast", expose, deprecated Pos.Raw.next' (since := "2025-10-14")]
|
||
def next' (s : @& String) (p : @& Pos.Raw) (h : ¬ p.atEnd s) : Pos.Raw :=
|
||
let c := p.get s
|
||
p + c
|
||
|
||
theorem Pos.Raw.lt_next (s : String) (i : Pos.Raw) : i < i.next s :=
|
||
Nat.add_lt_add_left (Char.utf8Size_pos _) _
|
||
|
||
theorem Pos.Raw.byteIdx_lt_byteIdx_next (s : String) (i : Pos.Raw) : i.1 < (i.next s).1 :=
|
||
lt_iff.1 (i.lt_next s)
|
||
|
||
@[deprecated Pos.Raw.byteIdx_lt_byteIdx_next (since := "2025-10-10")]
|
||
theorem lt_next (s : String) (i : Pos.Raw) : i.1 < (i.next s).1 :=
|
||
Pos.Raw.lt_next s i
|
||
|
||
theorem Pos.Raw.utf8PrevAux_lt_of_pos : ∀ (cs : List Char) (i p : Pos.Raw), i < p → p ≠ 0 →
|
||
(Pos.Raw.utf8PrevAux cs i p).1 < p.1
|
||
| [], _, _, _, h => Nat.sub_one_lt (mt (congrArg Pos.Raw.mk) h)
|
||
| c::cs, i, p, h, h' => by
|
||
simp [utf8PrevAux]
|
||
apply iteInduction (motive := (Pos.Raw.byteIdx · < _)) <;> intro h''
|
||
next => exact h
|
||
next => exact utf8PrevAux_lt_of_pos _ _ _ (Nat.lt_of_not_le h'') h'
|
||
|
||
theorem Pos.Raw.prev_lt_of_pos (s : String) (i : Pos.Raw) (h : i ≠ 0) : (i.prev s).1 < i.1 :=
|
||
utf8PrevAux_lt_of_pos _ _ _ (Nat.zero_lt_of_ne_zero (mt (congrArg Pos.Raw.mk) h)) h
|
||
|
||
@[deprecated Pos.Raw.prev_lt_of_pos (since := "2025-10-10")]
|
||
theorem prev_lt_of_pos (s : String) (i : Pos.Raw) (h : i ≠ 0) : (i.prev s).1 < i.1 :=
|
||
Pos.Raw.prev_lt_of_pos s i h
|
||
|
||
/--
|
||
Returns the first position where the two strings differ.
|
||
|
||
If one string is a prefix of the other, then the returned position is the end position of the
|
||
shorter string. If the strings are identical, then their end position is returned.
|
||
|
||
Examples:
|
||
* `"tea".firstDiffPos "ten" = ⟨2⟩`
|
||
* `"tea".firstDiffPos "tea" = ⟨3⟩`
|
||
* `"tea".firstDiffPos "teas" = ⟨3⟩`
|
||
* `"teas".firstDiffPos "tea" = ⟨3⟩`
|
||
-/
|
||
@[expose]
|
||
def firstDiffPos (a b : String) : Pos.Raw :=
|
||
let stopPos := a.rawEndPos.min b.rawEndPos
|
||
let rec loop (i : Pos.Raw) : Pos.Raw :=
|
||
if h : i < stopPos then
|
||
if i.get a != i.get b then i
|
||
else
|
||
have := Nat.sub_lt_sub_left h (Pos.Raw.lt_next a i)
|
||
loop (i.next a)
|
||
else i
|
||
termination_by stopPos.1 - i.1
|
||
loop 0
|
||
|
||
/--
|
||
Creates a new string that consists of the region of the input string delimited by the two positions.
|
||
|
||
The result is `""` if the start position is greater than or equal to the end position or if the
|
||
start position is at the end of the string. If either position is invalid (that is, if either points
|
||
at the middle of a multi-byte UTF-8 character) then the result is unspecified.
|
||
|
||
This is a legacy function. The recommended alternative is `String.Pos.extract`, but usually
|
||
it is even better to operate on `String.Slice` instead and call `String.Slice.copy` (only) if
|
||
required.
|
||
|
||
Examples:
|
||
* `"red green blue".extract ⟨0⟩ ⟨3⟩ = "red"`
|
||
* `"red green blue".extract ⟨3⟩ ⟨0⟩ = ""`
|
||
* `"red green blue".extract ⟨0⟩ ⟨100⟩ = "red green blue"`
|
||
* `"red green blue".extract ⟨4⟩ ⟨100⟩ = "green blue"`
|
||
* `"L∃∀N".extract ⟨1⟩ ⟨2⟩ = "∃∀N"`
|
||
* `"L∃∀N".extract ⟨2⟩ ⟨100⟩ = ""`
|
||
-/
|
||
@[extern "lean_string_utf8_extract", expose]
|
||
def Pos.Raw.extract : (@& String) → (@& Pos.Raw) → (@& Pos.Raw) → String
|
||
| s, b, e => if b.byteIdx ≥ e.byteIdx then "" else ofList (go₁ s.toList 0 b e)
|
||
where
|
||
go₁ : List Char → Pos.Raw → Pos.Raw → Pos.Raw → List Char
|
||
| [], _, _, _ => []
|
||
| s@(c::cs), i, b, e => if i = b then go₂ s i e else go₁ cs (i + c) b e
|
||
|
||
go₂ : List Char → Pos.Raw → Pos.Raw → List Char
|
||
| [], _, _ => []
|
||
| c::cs, i, e => if i = e then [] else c :: go₂ cs (i + c) e
|
||
|
||
@[extern "lean_string_utf8_extract", expose, deprecated Pos.Raw.extract (since := "2025-10-14")]
|
||
def extract : (@& String) → (@& Pos.Raw) → (@& Pos.Raw) → String
|
||
| s, b, e => Pos.Raw.extract s b e
|
||
|
||
|
||
|
||
def Pos.Raw.offsetOfPosAux (s : String) (pos : Pos.Raw) (i : Pos.Raw) (offset : Nat) : Nat :=
|
||
if i >= pos then offset
|
||
else if h : i.atEnd s then
|
||
offset
|
||
else
|
||
have := Nat.sub_lt_sub_left (Nat.gt_of_not_le (mt decide_eq_true h)) (Pos.Raw.lt_next s _)
|
||
offsetOfPosAux s pos (i.next s) (offset+1)
|
||
termination_by s.rawEndPos.1 - i.1
|
||
|
||
/--
|
||
Returns the character index that corresponds to the provided position (i.e. UTF-8 byte index) in a
|
||
string.
|
||
|
||
If the position is at the end of the string, then the string's length in characters is returned. If
|
||
the position is invalid due to pointing at the middle of a UTF-8 byte sequence, then the character
|
||
index of the next character after the position is returned.
|
||
|
||
Examples:
|
||
* `"L∃∀N".offsetOfPos ⟨0⟩ = 0`
|
||
* `"L∃∀N".offsetOfPos ⟨1⟩ = 1`
|
||
* `"L∃∀N".offsetOfPos ⟨2⟩ = 2`
|
||
* `"L∃∀N".offsetOfPos ⟨4⟩ = 2`
|
||
* `"L∃∀N".offsetOfPos ⟨5⟩ = 3`
|
||
* `"L∃∀N".offsetOfPos ⟨50⟩ = 4`
|
||
-/
|
||
@[inline] def Pos.Raw.offsetOfPos (s : String) (pos : Pos.Raw) : Nat :=
|
||
offsetOfPosAux s pos 0 0
|
||
|
||
@[deprecated String.Pos.Raw.offsetOfPos (since := "2025-11-17")]
|
||
def offsetOfPos (s : String) (pos : Pos.Raw) : Nat :=
|
||
pos.offsetOfPos s
|
||
|
||
@[export lean_string_offsetofpos]
|
||
def Internal.offsetOfPosImpl (s : String) (pos : Pos.Raw) : Nat :=
|
||
String.Pos.Raw.offsetOfPos s pos
|
||
|
||
@[specialize] def foldrAux {α : Type u} (f : Char → α → α) (a : α) (s : String) (i begPos : Pos.Raw) : α :=
|
||
if h : begPos < i then
|
||
have := Pos.Raw.prev_lt_of_pos s i <| mt (congrArg String.Pos.Raw.byteIdx) <|
|
||
Ne.symm <| Nat.ne_of_lt <| Nat.lt_of_le_of_lt (Nat.zero_le _) h
|
||
let i := i.prev s
|
||
let a := f (i.get s) a
|
||
foldrAux f a s i begPos
|
||
else a
|
||
termination_by i.1
|
||
|
||
/--
|
||
Folds a function over a string from the right, accumulating a value starting with `init`. The
|
||
accumulated value is combined with each character in reverse order, using `f`.
|
||
|
||
Examples:
|
||
* `"coffee tea water".foldr (fun c n => if c.isWhitespace then n + 1 else n) 0 = 2`
|
||
* `"coffee tea and water".foldr (fun c n => if c.isWhitespace then n + 1 else n) 0 = 3`
|
||
* `"coffee tea water".foldr (fun c s => c.push s) "" = "retaw dna aet eeffoc"`
|
||
-/
|
||
@[inline] def foldr {α : Type u} (f : Char → α → α) (init : α) (s : String) : α :=
|
||
foldrAux f init s s.rawEndPos 0
|
||
|
||
@[specialize] def anyAux (s : String) (stopPos : Pos.Raw) (p : Char → Bool) (i : Pos.Raw) : Bool :=
|
||
if h : i < stopPos then
|
||
if p (i.get s) then true
|
||
else
|
||
have := Nat.sub_lt_sub_left h (Pos.Raw.lt_next s i)
|
||
anyAux s stopPos p (i.next s)
|
||
else false
|
||
termination_by stopPos.1 - i.1
|
||
|
||
/--
|
||
Checks whether there is a character in a string for which the Boolean predicate `p` returns `true`.
|
||
|
||
Short-circuits at the first character for which `p` returns `true`.
|
||
|
||
Examples:
|
||
* `"brown".any (·.isLetter) = true`
|
||
* `"brown".any (·.isWhitespace) = false`
|
||
* `"brown and orange".any (·.isLetter) = true`
|
||
* `"".any (fun _ => false) = false`
|
||
-/
|
||
@[inline] def any (s : String) (p : Char → Bool) : Bool :=
|
||
anyAux s s.rawEndPos p 0
|
||
|
||
@[export lean_string_any]
|
||
def Internal.anyImpl (s : String) (p : Char → Bool) :=
|
||
String.any s p
|
||
|
||
/--
|
||
Checks whether the Boolean predicate `p` returns `true` for every character in a string.
|
||
|
||
Short-circuits at the first character for which `p` returns `false`.
|
||
|
||
Examples:
|
||
* `"brown".all (·.isLetter) = true`
|
||
* `"brown and orange".all (·.isLetter) = false`
|
||
* `"".all (fun _ => false) = true`
|
||
-/
|
||
@[inline] def all (s : String) (p : Char → Bool) : Bool :=
|
||
!s.any (fun c => !p c)
|
||
|
||
/--
|
||
Checks whether a string contains the specified character.
|
||
|
||
Examples:
|
||
* `"green".contains 'e' = true`
|
||
* `"green".contains 'x' = false`
|
||
* `"".contains 'x' = false`
|
||
-/
|
||
@[inline] def contains (s : String) (c : Char) : Bool :=
|
||
s.any (fun a => a == c)
|
||
|
||
@[export lean_string_contains]
|
||
def Internal.containsImpl (s : String) (c : Char) : Bool :=
|
||
String.contains s c
|
||
|
||
theorem Pos.Raw.utf8SetAux_of_gt (c' : Char) : ∀ (cs : List Char) {i p : Pos.Raw}, i > p → utf8SetAux c' cs i p = cs
|
||
| [], _, _, _ => rfl
|
||
| c::cs, i, p, h => by
|
||
rw [utf8SetAux, if_neg (mt (congrArg (·.1)) (Ne.symm <| Nat.ne_of_lt h)), utf8SetAux_of_gt c' cs]
|
||
exact Nat.lt_of_lt_of_le h (Nat.le_add_right ..)
|
||
|
||
/--
|
||
Checks whether substrings of two strings are equal. Substrings are indicated by their starting
|
||
positions and a size in _UTF-8 bytes_. Returns `false` if the indicated substring does not exist in
|
||
either string.
|
||
|
||
This is a legacy function. The recommended alternative is to construct slices representing the
|
||
strings to be compared and use the `BEq` instance of `String.Slice`.
|
||
-/
|
||
def Pos.Raw.substrEq (s1 : String) (pos1 : String.Pos.Raw) (s2 : String) (pos2 : String.Pos.Raw) (sz : Nat) : Bool :=
|
||
pos1.byteIdx + sz ≤ s1.rawEndPos.byteIdx && pos2.byteIdx + sz ≤ s2.rawEndPos.byteIdx && loop pos1 pos2 { byteIdx := pos1.byteIdx + sz }
|
||
where
|
||
loop (off1 off2 stop1 : Pos.Raw) :=
|
||
if _h : off1.byteIdx < stop1.byteIdx then
|
||
let c₁ := off1.get s1
|
||
let c₂ := off2.get s2
|
||
c₁ == c₂ && loop (off1 + c₁) (off2 + c₂) stop1
|
||
else true
|
||
termination_by stop1.1 - off1.1
|
||
decreasing_by
|
||
have := Nat.sub_lt_sub_left _h (Nat.add_lt_add_left c₁.utf8Size_pos off1.1)
|
||
decreasing_tactic
|
||
|
||
@[deprecated Pos.Raw.substrEq (since := "2025-10-10")]
|
||
def substrEq (s1 : String) (pos1 : String.Pos.Raw) (s2 : String) (pos2 : String.Pos.Raw) (sz : Nat) : Bool :=
|
||
Pos.Raw.substrEq s1 pos1 s2 pos2 sz
|
||
|
||
end String
|
||
|
||
namespace String
|
||
|
||
@[ext]
|
||
theorem ext {s₁ s₂ : String} (h : s₁.toList = s₂.toList) : s₁ = s₂ :=
|
||
toList_injective h
|
||
|
||
@[simp] theorem length_empty : "".length = 0 := by simp [← length_toList, toList_empty]
|
||
|
||
@[deprecated singleton_eq_ofList (since := "2025-10-30")]
|
||
theorem singleton_eq {c : Char} : String.singleton c = ofList [c] :=
|
||
singleton_eq_ofList
|
||
|
||
@[simp] theorem toList_singleton (c : Char) : (String.singleton c).toList = [c] := by
|
||
simp [singleton_eq_ofList]
|
||
|
||
@[deprecated toList_singleton (since := "2025-10-30")]
|
||
theorem data_singleton (c : Char) : (String.singleton c).toList = [c] :=
|
||
toList_singleton c
|
||
|
||
@[simp]
|
||
theorem length_singleton {c : Char} : (String.singleton c).length = 1 := by
|
||
simp [← length_toList]
|
||
|
||
@[simp]
|
||
theorem toList_push (c : Char) : (String.push s c).toList = s.toList ++ [c] := by
|
||
simp [← append_singleton]
|
||
|
||
@[deprecated toList_push (since := "2025-10-30")]
|
||
theorem data_push (c : Char) : (String.push s c).toList = s.toList ++ [c] :=
|
||
toList_push c
|
||
|
||
@[simp] theorem length_push (c : Char) : (String.push s c).length = s.length + 1 := by
|
||
simp [← length_toList]
|
||
|
||
@[simp] theorem length_pushn (c : Char) (n : Nat) : (pushn s c n).length = s.length + n := by
|
||
rw [pushn_eq_repeat_push]; induction n <;> simp [Nat.repeat, Nat.add_assoc, *]
|
||
|
||
@[simp] theorem length_append (s t : String) : (s ++ t).length = s.length + t.length := by
|
||
simp [← length_toList]
|
||
|
||
theorem lt_iff {s t : String} : s < t ↔ s.toList < t.toList := .rfl
|
||
|
||
namespace Pos.Raw
|
||
|
||
@[simp] theorem get!_eq_get (s : String) (p : Pos.Raw) : p.get! s = p.get s := rfl
|
||
|
||
@[simp] theorem get'_eq (s : String) (p : Pos.Raw) (h) : get' s p h = get s p := rfl
|
||
|
||
@[simp] theorem next'_eq (s : String) (p : Pos.Raw) (h) : next' s p h = next s p := rfl
|
||
|
||
end Pos.Raw
|
||
|
||
@[deprecated Pos.Raw.get!_eq_get (since := "2025-10-14")]
|
||
theorem get!_eq_get (s : String) (p : Pos.Raw) : p.get! s = p.get s := rfl
|
||
|
||
@[deprecated Pos.Raw.lt_next (since := "2025-10-10")]
|
||
theorem lt_next' (s : String) (p : Pos.Raw) : p < p.next s :=
|
||
Pos.Raw.lt_next s p
|
||
|
||
@[simp] theorem Pos.Raw.prev_zero (s : String) : Pos.Raw.prev s 0 = 0 := by
|
||
rw [Pos.Raw.prev]
|
||
cases s.toList <;> simp [utf8PrevAux, Pos.Raw.le_iff]
|
||
|
||
@[deprecated Pos.Raw.prev_zero (since := "2025-10-10")]
|
||
theorem prev_zero (s : String) : (0 : Pos.Raw).prev s = 0 := by
|
||
exact Pos.Raw.prev_zero s
|
||
|
||
@[deprecated Pos.Raw.get'_eq (since := "2025-10-14")]
|
||
theorem get'_eq (s : String) (p : Pos.Raw) (h) : p.get' s h = p.get s := rfl
|
||
|
||
@[deprecated Pos.Raw.next'_eq (since := "2025-10-14")]
|
||
theorem next'_eq (s : String) (p : Pos.Raw) (h) : p.next' s h = p.next s := rfl
|
||
|
||
-- `toRawSubstring'` is just a synonym for `toRawSubstring` without the `@[inline]` attribute
|
||
-- so for proving can be unfolded.
|
||
attribute [simp] toRawSubstring'
|
||
|
||
@[deprecated String.size_toByteArray (since := "2025-11-24")]
|
||
theorem size_bytes {s : String} : s.toByteArray.size = s.utf8ByteSize :=
|
||
size_toByteArray
|
||
|
||
@[deprecated String.toByteArray_ofList (since := "2025-11-24")]
|
||
theorem bytes_ofList {l : List Char} : (ofList l).toByteArray = l.utf8Encode :=
|
||
toByteArray_ofList
|
||
|
||
@[deprecated String.toByteArray_inj (since := "2025-11-24")]
|
||
theorem bytes_inj {s t : String} : s.toByteArray = t.toByteArray ↔ s = t :=
|
||
toByteArray_inj
|
||
|
||
end String
|
||
|
||
namespace Char
|
||
|
||
@[simp] theorem length_toString (c : Char) : c.toString.length = 1 := by
|
||
simp [toString_eq_singleton]
|
||
|
||
end Char
|