This PR fixes a bug in `String.Slice.takeWhile` which caused it to get its bookkeeping wrong and panic. The new version only uses safe operations on `String.Slice.Pos`.
4667 lines
176 KiB
Text
4667 lines
176 KiB
Text
/-
|
||
Copyright (c) 2016 Microsoft Corporation. All rights reserved.
|
||
Released under Apache 2.0 license as described in the file LICENSE.
|
||
Author: Leonardo de Moura, Mario Carneiro
|
||
-/
|
||
module
|
||
|
||
prelude
|
||
public import Init.Data.String.Decode
|
||
import Init.Data.ByteArray.Lemmas
|
||
|
||
public section
|
||
|
||
universe u
|
||
|
||
section
|
||
|
||
@[simp]
|
||
theorem List.utf8Encode_nil : List.utf8Encode [] = ByteArray.empty := by simp [utf8Encode]
|
||
|
||
theorem List.utf8Encode_singleton {c : Char} : [c].utf8Encode = (String.utf8EncodeChar c).toByteArray := by
|
||
simp [utf8Encode]
|
||
|
||
@[simp]
|
||
theorem List.utf8Encode_append {l l' : List Char} :
|
||
(l ++ l').utf8Encode = l.utf8Encode ++ l'.utf8Encode := by
|
||
simp [utf8Encode]
|
||
|
||
theorem List.utf8Encode_cons {c : Char} {l : List Char} : (c :: l).utf8Encode = [c].utf8Encode ++ l.utf8Encode := by
|
||
rw [← singleton_append, List.utf8Encode_append]
|
||
|
||
theorem List.isUTF8FirstByte_getElem_utf8Encode_singleton {c : Char} {i : Nat} {hi : i < [c].utf8Encode.size} :
|
||
UInt8.IsUTF8FirstByte [c].utf8Encode[i] ↔ i = 0 := by
|
||
simp [List.utf8Encode_singleton, UInt8.isUTF8FirstByte_getElem_utf8EncodeChar]
|
||
|
||
@[simp]
|
||
theorem String.utf8EncodeChar_ne_nil {c : Char} : String.utf8EncodeChar c ≠ [] := by
|
||
fun_cases String.utf8EncodeChar with simp
|
||
|
||
@[simp]
|
||
theorem List.utf8Encode_eq_empty {l : List Char} : l.utf8Encode = ByteArray.empty ↔ l = [] := by
|
||
simp [utf8Encode, ← List.eq_nil_iff_forall_not_mem]
|
||
|
||
theorem ByteArray.isValidUTF8_utf8Encode {l : List Char} : IsValidUTF8 l.utf8Encode :=
|
||
.intro l rfl
|
||
|
||
@[simp]
|
||
theorem ByteArray.isValidUTF8_empty : IsValidUTF8 ByteArray.empty :=
|
||
.intro [] (by simp)
|
||
|
||
theorem Char.isValidUTF8_toByteArray_utf8EncodeChar {c : Char} :
|
||
ByteArray.IsValidUTF8 (String.utf8EncodeChar c).toByteArray :=
|
||
.intro [c] (by simp [List.utf8Encode_singleton])
|
||
|
||
theorem ByteArray.IsValidUTF8.append {b b' : ByteArray} (h : IsValidUTF8 b) (h' : IsValidUTF8 b') :
|
||
IsValidUTF8 (b ++ b') := by
|
||
rcases h with ⟨m, rfl⟩
|
||
rcases h' with ⟨m', rfl⟩
|
||
exact .intro (m ++ m') (by simp)
|
||
|
||
@[simp]
|
||
theorem Char.toUInt8_val {c : Char} : c.val.toUInt8 = c.toUInt8 := rfl
|
||
|
||
theorem ByteArray.IsValidUTF8.push {b : ByteArray} (h : IsValidUTF8 b) {c : Char} (hc : c.utf8Size = 1) :
|
||
IsValidUTF8 (b.push c.toUInt8) := by
|
||
rcases h with ⟨m, rfl⟩
|
||
refine ⟨m ++ [c], by simp [List.utf8Encode_singleton, String.utf8EncodeChar_eq_singleton hc]⟩
|
||
|
||
theorem ByteArray.isValidUTF8_utf8Encode_singleton_append_iff {b : ByteArray} {c : Char} :
|
||
IsValidUTF8 ([c].utf8Encode ++ b) ↔ IsValidUTF8 b := by
|
||
refine ⟨?_, fun h => IsValidUTF8.append isValidUTF8_utf8Encode h⟩
|
||
rintro ⟨l, hl⟩
|
||
match l with
|
||
| [] => simp at hl
|
||
| d::l =>
|
||
obtain rfl : c = d := by
|
||
replace hl := congrArg (fun l => utf8DecodeChar? l 0) hl
|
||
simpa [List.utf8DecodeChar?_utf8Encode_singleton_append,
|
||
List.utf8DecodeChar?_utf8Encode_cons] using hl
|
||
rw [← List.singleton_append (l := l), List.utf8Encode_append,
|
||
ByteArray.append_right_inj] at hl
|
||
exact hl ▸ isValidUTF8_utf8Encode
|
||
|
||
/--
|
||
Decodes a sequence of characters from their UTF-8 representation. Returns `none` if the bytes are
|
||
not a sequence of Unicode scalar values.
|
||
-/
|
||
@[inline, expose]
|
||
def ByteArray.utf8Decode? (b : ByteArray) : Option (Array Char) :=
|
||
go (b.size + 1) 0 #[] (by simp) (by simp)
|
||
where
|
||
go (fuel : Nat) (i : Nat) (acc : Array Char) (hi : i ≤ b.size) (hf : b.size - i < fuel) : Option (Array Char) :=
|
||
match fuel, hf with
|
||
| fuel + 1, _ =>
|
||
if i = b.size then
|
||
some acc
|
||
else
|
||
match h : utf8DecodeChar? b i with
|
||
| none => none
|
||
| some c => go fuel (i + c.utf8Size) (acc.push c)
|
||
(le_size_of_utf8DecodeChar?_eq_some h)
|
||
(have := c.utf8Size_pos; have := le_size_of_utf8DecodeChar?_eq_some h; by omega)
|
||
termination_by structural fuel
|
||
|
||
@[expose, extern "lean_string_validate_utf8"]
|
||
def ByteArray.validateUTF8 (b : ByteArray) : Bool :=
|
||
go (b.size + 1) 0 (by simp) (by simp)
|
||
where
|
||
go (fuel : Nat) (i : Nat) (hi : i ≤ b.size) (hf : b.size - i < fuel) : Bool :=
|
||
match fuel, hf with
|
||
| fuel + 1, _ =>
|
||
if hi : i = b.size then
|
||
true
|
||
else
|
||
match h : validateUTF8At b i with
|
||
| false => false
|
||
| true => go fuel (i + b[i].utf8ByteSize (isUTF8FirstByte_of_validateUTF8At h))
|
||
?_ ?_
|
||
termination_by structural fuel
|
||
finally
|
||
all_goals rw [ByteArray.validateUTF8At_eq_isSome_utf8DecodeChar?] at h
|
||
· rw [← ByteArray.utf8Size_utf8DecodeChar (h := h)]
|
||
exact add_utf8Size_utf8DecodeChar_le_size
|
||
· rw [← ByteArray.utf8Size_utf8DecodeChar (h := h)]
|
||
have := add_utf8Size_utf8DecodeChar_le_size (h := h)
|
||
have := (b.utf8DecodeChar i h).utf8Size_pos
|
||
omega
|
||
|
||
theorem ByteArray.isSome_utf8Decode?Go_eq_validateUTF8Go {b : ByteArray} {fuel : Nat}
|
||
{i : Nat} {acc : Array Char} {hi : i ≤ b.size} {hf : b.size - i < fuel} :
|
||
(utf8Decode?.go b fuel i acc hi hf).isSome = validateUTF8.go b fuel i hi hf := by
|
||
fun_induction utf8Decode?.go with
|
||
| case1 => simp [validateUTF8.go]
|
||
| case2 i acc hi fuel hf h₁ h₂ =>
|
||
simp only [Option.isSome_none, validateUTF8.go, h₁, ↓reduceDIte, Bool.false_eq]
|
||
split
|
||
· rfl
|
||
· rename_i heq
|
||
simp [validateUTF8At_eq_isSome_utf8DecodeChar?, h₂] at heq
|
||
| case3 i acc hi fuel hf h₁ c h₂ ih =>
|
||
simp [validateUTF8.go, h₁]
|
||
split
|
||
· rename_i heq
|
||
simp [validateUTF8At_eq_isSome_utf8DecodeChar?, h₂] at heq
|
||
· rw [ih]
|
||
congr
|
||
rw [← ByteArray.utf8Size_utf8DecodeChar (h := by simp [h₂])]
|
||
simp [utf8DecodeChar, h₂]
|
||
|
||
theorem ByteArray.isSome_utf8Decode?_eq_validateUTF8 {b : ByteArray} :
|
||
b.utf8Decode?.isSome = b.validateUTF8 :=
|
||
b.isSome_utf8Decode?Go_eq_validateUTF8Go
|
||
|
||
theorem ByteArray.utf8Decode?.go.congr {b b' : ByteArray} {fuel fuel' i i' : Nat} {acc acc' : Array Char} {hi hi' hf hf'}
|
||
(hbb' : b = b') (hii' : i = i') (hacc : acc = acc') :
|
||
ByteArray.utf8Decode?.go b fuel i acc hi hf = ByteArray.utf8Decode?.go b' fuel' i' acc' hi' hf' := by
|
||
subst hbb' hii' hacc
|
||
fun_induction ByteArray.utf8Decode?.go b fuel i acc hi hf generalizing fuel' with
|
||
| case1 =>
|
||
rw [go.eq_def]
|
||
split
|
||
simp
|
||
| case2 =>
|
||
rw [go.eq_def]
|
||
split <;> split
|
||
· simp_all
|
||
· split <;> simp_all
|
||
| case3 =>
|
||
conv => rhs; rw [go.eq_def]
|
||
split <;> split
|
||
· simp_all
|
||
· split
|
||
· simp_all
|
||
· rename_i c₁ hc₁ ih _ _ _ _ _ c₂ hc₂
|
||
obtain rfl : c₁ = c₂ := by rw [← Option.some_inj, ← hc₁, ← hc₂]
|
||
apply ih
|
||
|
||
@[simp]
|
||
theorem ByteArray.utf8Decode?_empty : ByteArray.empty.utf8Decode? = some #[] := by
|
||
simp [utf8Decode?, utf8Decode?.go]
|
||
|
||
private theorem ByteArray.isSome_utf8Decode?go_iff {b : ByteArray} {fuel i : Nat} {hi : i ≤ b.size} {hf} {acc : Array Char} :
|
||
(ByteArray.utf8Decode?.go b fuel i acc hi hf).isSome ↔ IsValidUTF8 (b.extract i b.size) := by
|
||
fun_induction ByteArray.utf8Decode?.go with
|
||
| case1 => simp
|
||
| case2 fuel i hi hf acc h₁ h₂ =>
|
||
simp only [Option.isSome_none, Bool.false_eq_true, false_iff]
|
||
rintro ⟨l, hl⟩
|
||
have : l ≠ [] := by
|
||
rintro rfl
|
||
simp at hl
|
||
omega
|
||
rw [← l.cons_head_tail this] at hl
|
||
rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract, hl, List.utf8DecodeChar?_utf8Encode_cons] at h₂
|
||
simp at h₂
|
||
| case3 i acc hi fuel hf h₁ c h₂ ih =>
|
||
rw [ih]
|
||
have h₂' := h₂
|
||
rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h₂'
|
||
obtain ⟨l, hl⟩ := exists_of_utf8DecodeChar?_eq_some h₂'
|
||
rw [ByteArray.extract_eq_extract_append_extract (i := i) (i + c.utf8Size) (by omega)
|
||
(le_size_of_utf8DecodeChar?_eq_some h₂)] at hl ⊢
|
||
rw [ByteArray.append_inj_left hl (by have := le_size_of_utf8DecodeChar?_eq_some h₂; simp; omega),
|
||
← List.utf8Encode_singleton, isValidUTF8_utf8Encode_singleton_append_iff]
|
||
|
||
theorem ByteArray.isSome_utf8Decode?_iff {b : ByteArray} :
|
||
b.utf8Decode?.isSome ↔ IsValidUTF8 b := by
|
||
rw [utf8Decode?, isSome_utf8Decode?go_iff, extract_zero_size]
|
||
|
||
@[simp]
|
||
theorem ByteArray.validateUTF8_eq_true_iff {b : ByteArray} :
|
||
b.validateUTF8 = true ↔ IsValidUTF8 b := by
|
||
rw [← isSome_utf8Decode?_eq_validateUTF8, isSome_utf8Decode?_iff]
|
||
|
||
@[simp]
|
||
theorem ByteArray.validateUTF8_eq_false_iff {b : ByteArray} :
|
||
b.validateUTF8 = false ↔ ¬ IsValidUTF8 b := by
|
||
simp [← Bool.not_eq_true]
|
||
|
||
instance {b : ByteArray} : Decidable b.IsValidUTF8 :=
|
||
decidable_of_iff (b.validateUTF8 = true) ByteArray.validateUTF8_eq_true_iff
|
||
|
||
/--
|
||
Decodes an array of bytes that encode a string as [UTF-8](https://en.wikipedia.org/wiki/UTF-8) into
|
||
the corresponding string.
|
||
-/
|
||
@[inline, expose]
|
||
def String.fromUTF8 (a : @& ByteArray) (h : a.IsValidUTF8) : String :=
|
||
.ofByteArray a h
|
||
|
||
/--
|
||
Decodes an array of bytes that encode a string as [UTF-8](https://en.wikipedia.org/wiki/UTF-8) into
|
||
the corresponding string, or returns `none` if the array is not a valid UTF-8 encoding of a string.
|
||
-/
|
||
@[inline, expose] def String.fromUTF8? (a : ByteArray) : Option String :=
|
||
if h : a.IsValidUTF8 then some (fromUTF8 a h) else none
|
||
|
||
/--
|
||
Decodes an array of bytes that encode a string as [UTF-8](https://en.wikipedia.org/wiki/UTF-8) into
|
||
the corresponding string, or panics if the array is not a valid UTF-8 encoding of a string.
|
||
-/
|
||
@[inline, expose] def String.fromUTF8! (a : ByteArray) : String :=
|
||
if h : a.IsValidUTF8 then fromUTF8 a h else panic! "invalid UTF-8 string"
|
||
|
||
/--
|
||
Encodes a string in UTF-8 as an array of bytes.
|
||
-/
|
||
@[extern "lean_string_to_utf8"]
|
||
def String.toUTF8 (a : @& String) : ByteArray :=
|
||
a.bytes
|
||
|
||
@[simp] theorem String.toUTF8_eq_bytes {s : String} : s.toUTF8 = s.bytes := (rfl)
|
||
|
||
@[simp] theorem String.bytes_empty : "".bytes = ByteArray.empty := (rfl)
|
||
|
||
/--
|
||
Appends two strings. Usually accessed via the `++` operator.
|
||
|
||
The internal implementation will perform destructive updates if the string is not shared.
|
||
|
||
Examples:
|
||
* `"abc".append "def" = "abcdef"`
|
||
* `"abc" ++ "def" = "abcdef"`
|
||
* `"" ++ "" = ""`
|
||
-/
|
||
@[extern "lean_string_append", expose]
|
||
def String.append (s : String) (t : @& String) : String where
|
||
bytes := s.bytes ++ t.bytes
|
||
isValidUTF8 := s.isValidUTF8.append t.isValidUTF8
|
||
|
||
instance : Append String where
|
||
append s t := s.append t
|
||
|
||
@[simp]
|
||
theorem String.bytes_append {s t : String} : (s ++ t).bytes = s.bytes ++ t.bytes := (rfl)
|
||
|
||
theorem String.bytes_inj {s t : String} : s.bytes = t.bytes ↔ s = t := by
|
||
refine ⟨fun h => ?_, (· ▸ rfl)⟩
|
||
rcases s with ⟨s⟩
|
||
rcases t with ⟨t⟩
|
||
subst h
|
||
rfl
|
||
|
||
@[simp]
|
||
theorem String.empty_append {s : String} : "" ++ s = s := by
|
||
simp [← String.bytes_inj]
|
||
|
||
@[simp]
|
||
theorem String.append_empty {s : String} : s ++ "" = s := by
|
||
simp [← String.bytes_inj]
|
||
|
||
@[simp] theorem List.bytes_asString {l : List Char} : l.asString.bytes = l.utf8Encode := by
|
||
simp [List.asString, String.mk]
|
||
|
||
@[simp]
|
||
theorem List.asString_nil : List.asString [] = "" := by
|
||
simp [← String.bytes_inj]
|
||
|
||
@[simp]
|
||
theorem List.asString_append {l₁ l₂ : List Char} : (l₁ ++ l₂).asString = l₁.asString ++ l₂.asString := by
|
||
simp [← String.bytes_inj]
|
||
|
||
@[expose]
|
||
def String.Internal.toArray (b : String) : Array Char :=
|
||
b.bytes.utf8Decode?.get (b.bytes.isSome_utf8Decode?_iff.2 b.isValidUTF8)
|
||
|
||
@[simp]
|
||
theorem String.Internal.toArray_empty : String.Internal.toArray "" = #[] := by
|
||
simp [toArray]
|
||
|
||
@[extern "lean_string_data", expose]
|
||
def String.data (b : String) : List Char :=
|
||
(String.Internal.toArray b).toList
|
||
|
||
@[simp]
|
||
theorem String.data_empty : "".data = [] := by
|
||
simp [data]
|
||
|
||
/--
|
||
Returns the length of a string in Unicode code points.
|
||
|
||
Examples:
|
||
* `"".length = 0`
|
||
* `"abc".length = 3`
|
||
* `"L∃∀N".length = 4`
|
||
-/
|
||
@[extern "lean_string_length", expose]
|
||
def String.length (b : @& String) : Nat :=
|
||
b.data.length
|
||
|
||
@[simp]
|
||
theorem String.Internal.size_toArray {b : String} : (String.Internal.toArray b).size = b.length :=
|
||
(rfl)
|
||
|
||
@[simp]
|
||
theorem String.length_data {b : String} : b.data.length = b.length := (rfl)
|
||
|
||
theorem String.exists_eq_asString (s : String) :
|
||
∃ l : List Char, s = l.asString := by
|
||
rcases s with ⟨_, ⟨l, rfl⟩⟩
|
||
refine ⟨l, by simp [← String.bytes_inj]⟩
|
||
|
||
private theorem ByteArray.utf8Decode?go_eq_utf8Decode?go_extract {b : ByteArray} {fuel i : Nat} {hi : i ≤ b.size} {hf} {acc : Array Char} :
|
||
utf8Decode?.go b fuel i acc hi hf = (utf8Decode?.go (b.extract i b.size) fuel 0 #[] (by simp) (by simp [hf])).map (acc ++ ·) := by
|
||
fun_cases utf8Decode?.go b fuel i acc hi hf with
|
||
| case1 =>
|
||
rw [utf8Decode?.go]
|
||
simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Nat.zero_add, List.push_toArray,
|
||
List.nil_append]
|
||
rw [if_pos (by omega)]
|
||
simp
|
||
| case2 fuel hf₁ h₁ h₂ hf₂ =>
|
||
rw [utf8Decode?.go]
|
||
simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Nat.zero_add, List.push_toArray,
|
||
List.nil_append]
|
||
rw [if_neg (by omega)]
|
||
rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h₂
|
||
split <;> simp_all
|
||
| case3 fuel hf₁ h₁ c h₂ hf₂ =>
|
||
conv => rhs; rw [utf8Decode?.go]
|
||
simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Nat.zero_add, List.push_toArray,
|
||
List.nil_append]
|
||
rw [if_neg (by omega)]
|
||
rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h₂
|
||
split
|
||
· simp_all
|
||
· rename_i c' hc'
|
||
obtain rfl : c = c' := by
|
||
rw [← Option.some_inj, ← h₂, hc']
|
||
have := c.utf8Size_pos
|
||
conv => lhs; rw [ByteArray.utf8Decode?go_eq_utf8Decode?go_extract]
|
||
conv => rhs; rw [ByteArray.utf8Decode?go_eq_utf8Decode?go_extract]
|
||
simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Option.map_map, ByteArray.extract_extract]
|
||
have : (fun x => acc ++ x) ∘ (fun x => #[c] ++ x) = fun x => acc.push c ++ x := by funext; simp
|
||
simp [(by omega : i + (b.size - i) = b.size), this]
|
||
|
||
theorem ByteArray.utf8Decode?_utf8Encode_singleton_append {l : ByteArray} {c : Char} :
|
||
([c].utf8Encode ++ l).utf8Decode? = l.utf8Decode?.map (#[c] ++ ·) := by
|
||
rw [utf8Decode?, utf8Decode?.go,
|
||
if_neg (by simp [List.utf8Encode_singleton]; have := c.utf8Size_pos; omega)]
|
||
split
|
||
· simp_all [List.utf8DecodeChar?_utf8Encode_singleton_append]
|
||
· rename_i d h
|
||
obtain rfl : c = d := by simpa [List.utf8DecodeChar?_utf8Encode_singleton_append] using h
|
||
rw [utf8Decode?go_eq_utf8Decode?go_extract, utf8Decode?]
|
||
simp only [List.push_toArray, List.nil_append, Nat.zero_add]
|
||
congr 1
|
||
apply ByteArray.utf8Decode?.go.congr _ rfl rfl
|
||
apply extract_append_eq_right _ (by simp)
|
||
simp [List.utf8Encode_singleton]
|
||
|
||
@[simp]
|
||
theorem List.utf8Decode?_utf8Encode {l : List Char} :
|
||
l.utf8Encode.utf8Decode? = some l.toArray := by
|
||
induction l with
|
||
| nil => simp
|
||
| cons c l ih =>
|
||
rw [← List.singleton_append, List.utf8Encode_append]
|
||
simp only [ByteArray.utf8Decode?_utf8Encode_singleton_append, cons_append, nil_append,
|
||
Option.map_eq_some_iff, Array.append_eq_toArray_iff, cons.injEq, true_and]
|
||
refine ⟨l.toArray, ih, by simp⟩
|
||
|
||
@[simp]
|
||
theorem ByteArray.utf8Encode_get_utf8Decode? {b : ByteArray} {h} :
|
||
(b.utf8Decode?.get h).toList.utf8Encode = b := by
|
||
obtain ⟨l, rfl⟩ := isSome_utf8Decode?_iff.1 h
|
||
simp
|
||
|
||
@[simp]
|
||
theorem List.data_asString {l : List Char} : l.asString.data = l := by
|
||
simp [String.data, String.Internal.toArray]
|
||
|
||
@[simp]
|
||
theorem String.asString_data {b : String} : b.data.asString = b := by
|
||
obtain ⟨l, rfl⟩ := String.exists_eq_asString b
|
||
rw [List.data_asString]
|
||
|
||
theorem List.asString_injective {l₁ l₂ : List Char} (h : l₁.asString = l₂.asString) : l₁ = l₂ := by
|
||
simpa using congrArg String.data h
|
||
|
||
theorem List.asString_inj {l₁ l₂ : List Char} : l₁.asString = l₂.asString ↔ l₁ = l₂ :=
|
||
⟨asString_injective, (· ▸ rfl)⟩
|
||
|
||
theorem String.data_injective {s₁ s₂ : String} (h : s₁.data = s₂.data) : s₁ = s₂ := by
|
||
simpa using congrArg List.asString h
|
||
|
||
theorem String.data_inj {s₁ s₂ : String} : s₁.data = s₂.data ↔ s₁ = s₂ :=
|
||
⟨data_injective, (· ▸ rfl)⟩
|
||
|
||
@[simp]
|
||
theorem String.data_append {l₁ l₂ : String} : (l₁ ++ l₂).data = l₁.data ++ l₂.data := by
|
||
apply List.asString_injective
|
||
simp
|
||
|
||
@[simp]
|
||
theorem String.utf8encode_data {b : String} : b.data.utf8Encode = b.bytes := by
|
||
have := congrArg String.bytes (String.asString_data (b := b))
|
||
rwa [← List.bytes_asString]
|
||
|
||
@[simp]
|
||
theorem String.utf8ByteSize_empty : "".utf8ByteSize = 0 := (rfl)
|
||
|
||
@[simp]
|
||
theorem String.utf8ByteSize_append {s t : String} :
|
||
(s ++ t).utf8ByteSize = s.utf8ByteSize + t.utf8ByteSize := by
|
||
simp [utf8ByteSize]
|
||
|
||
@[simp]
|
||
theorem String.size_bytes {s : String} : s.bytes.size = s.utf8ByteSize := rfl
|
||
|
||
@[simp]
|
||
theorem String.bytes_push {s : String} {c : Char} : (s.push c).bytes = s.bytes ++ [c].utf8Encode := by
|
||
simp [push]
|
||
|
||
-- This is just to keep the proof of `set_next_add` below from breaking; if that lemma goes away
|
||
-- or the proof is rewritten, it can be removed.
|
||
private noncomputable def String.utf8ByteSize' : String → Nat
|
||
| s => go s.data
|
||
where
|
||
go : List Char → Nat
|
||
| [] => 0
|
||
| c::cs => go cs + c.utf8Size
|
||
|
||
private theorem String.utf8ByteSize'_eq (s : String) : s.utf8ByteSize' = s.utf8ByteSize := by
|
||
suffices ∀ l, utf8ByteSize'.go l = l.asString.utf8ByteSize by
|
||
obtain ⟨m, rfl⟩ := s.exists_eq_asString
|
||
rw [utf8ByteSize', this, asString_data]
|
||
intro l
|
||
induction l with
|
||
| nil => simp [utf8ByteSize'.go]
|
||
| cons c cs ih =>
|
||
rw [utf8ByteSize'.go, ih, ← List.singleton_append, List.asString_append,
|
||
utf8ByteSize_append, Nat.add_comm]
|
||
congr
|
||
rw [← size_bytes, List.bytes_asString, List.utf8Encode_singleton,
|
||
List.size_toByteArray, length_utf8EncodeChar]
|
||
|
||
end
|
||
|
||
namespace String
|
||
|
||
instance : HSub String.Pos.Raw String String.Pos.Raw where
|
||
hSub p s := { byteIdx := p.byteIdx - s.utf8ByteSize }
|
||
|
||
instance : HSub String.Pos.Raw Char String.Pos.Raw where
|
||
hSub p c := { byteIdx := p.byteIdx - c.utf8Size }
|
||
|
||
@[export lean_string_pos_sub]
|
||
def Pos.Internal.subImpl : String.Pos.Raw → String.Pos.Raw → String.Pos.Raw :=
|
||
fun p₁ p₂ => ⟨p₁.byteIdx - p₂.byteIdx⟩
|
||
|
||
instance : HAdd String.Pos.Raw Char String.Pos.Raw where
|
||
hAdd p c := { byteIdx := p.byteIdx + c.utf8Size }
|
||
|
||
instance : HAdd Char String.Pos.Raw String.Pos.Raw where
|
||
hAdd c p := { byteIdx := c.utf8Size + p.byteIdx }
|
||
|
||
instance : HAdd String String.Pos.Raw String.Pos.Raw where
|
||
hAdd s p := { byteIdx := s.utf8ByteSize + p.byteIdx }
|
||
|
||
instance : HAdd String.Pos.Raw String String.Pos.Raw where
|
||
hAdd p s := { byteIdx := p.byteIdx + s.utf8ByteSize }
|
||
|
||
instance : LE String.Pos.Raw where
|
||
le p₁ p₂ := p₁.byteIdx ≤ p₂.byteIdx
|
||
|
||
instance : LT String.Pos.Raw where
|
||
lt p₁ p₂ := p₁.byteIdx < p₂.byteIdx
|
||
|
||
instance (p₁ p₂ : String.Pos.Raw) : Decidable (p₁ ≤ p₂) :=
|
||
inferInstanceAs (Decidable (p₁.byteIdx ≤ p₂.byteIdx))
|
||
|
||
instance (p₁ p₂ : String.Pos.Raw) : Decidable (p₁ < p₂) :=
|
||
inferInstanceAs (Decidable (p₁.byteIdx < p₂.byteIdx))
|
||
|
||
instance : Min String.Pos.Raw := minOfLe
|
||
instance : Max String.Pos.Raw := maxOfLe
|
||
|
||
theorem Pos.Raw.le_iff {i₁ i₂ : Pos.Raw} : i₁ ≤ i₂ ↔ i₁.byteIdx ≤ i₂.byteIdx := .rfl
|
||
|
||
theorem Pos.Raw.lt_iff {i₁ i₂ : Pos.Raw} : i₁ < i₂ ↔ i₁.byteIdx < i₂.byteIdx := .rfl
|
||
|
||
@[simp]
|
||
theorem byteIdx_endPos {s : String} : s.endPos.byteIdx = s.utf8ByteSize := rfl
|
||
|
||
@[simp]
|
||
theorem utf8ByteSize_ofByteArray {b : ByteArray} {h} :
|
||
(String.ofByteArray b h).utf8ByteSize = b.size := rfl
|
||
|
||
attribute [ext] String.Pos.Raw
|
||
|
||
instance : LT String :=
|
||
⟨fun s₁ s₂ => s₁.data < s₂.data⟩
|
||
|
||
@[extern "lean_string_dec_lt"]
|
||
instance decidableLT (s₁ s₂ : @& String) : Decidable (s₁ < s₂) :=
|
||
List.decidableLT s₁.data s₂.data
|
||
|
||
/--
|
||
Non-strict inequality on strings, typically used via the `≤` operator.
|
||
|
||
`a ≤ b` is defined to mean `¬ b < a`.
|
||
-/
|
||
@[expose, reducible] protected def le (a b : String) : Prop := ¬ b < a
|
||
|
||
instance : LE String :=
|
||
⟨String.le⟩
|
||
|
||
instance decLE (s₁ s₂ : String) : Decidable (s₁ ≤ s₂) :=
|
||
inferInstanceAs (Decidable (Not _))
|
||
|
||
/--
|
||
Converts a string to a list of characters.
|
||
|
||
Since strings are represented as dynamic arrays of bytes containing the string encoded using
|
||
UTF-8, this operation takes time and space linear in the length of the string.
|
||
|
||
Examples:
|
||
* `"abc".toList = ['a', 'b', 'c']`
|
||
* `"".toList = []`
|
||
* `"\n".toList = ['\n']`
|
||
-/
|
||
@[inline, expose]
|
||
def toList (s : String) : List Char :=
|
||
s.data
|
||
|
||
/--
|
||
Predicate for validity of positions inside a `String`.
|
||
|
||
There are multiple equivalent definitions for validity.
|
||
|
||
We say that a position is valid if the string obtained by taking all of the bytes up to, but
|
||
excluding, the given position, is valid UTF-8; see `Pos.isValid_iff_isValidUTF8_extract_zero`.
|
||
|
||
Similarly, a position is valid if the string obtained by taking all of the bytes starting at the
|
||
given position is valid UTF-8; see `Pos.isValid_iff_isValidUTF8_extract_utf8ByteSize`.
|
||
|
||
An equivalent condition is that the position is the length of the UTF-8 encoding of
|
||
some prefix of the characters of the string; see `Pos.isValid_iff_exists_append` and
|
||
`Pos.isValid_iff_exists_take_data`.
|
||
|
||
Another equivalent condition that can be checked efficiently is that the position is either the
|
||
end position or strictly smaller than the end position and the byte at the position satisfies
|
||
`UInt8.IsUTF8FirstByte`; see `Pos.isValid_iff_isUTF8FirstByte`.
|
||
|
||
Examples:
|
||
* `String.Pos.IsValid "abc" ⟨0⟩`
|
||
* `String.Pos.IsValid "abc" ⟨1⟩`
|
||
* `String.Pos.IsValid "abc" ⟨3⟩`
|
||
* `¬ String.Pos.IsValid "abc" ⟨4⟩`
|
||
* `String.Pos.IsValid "𝒫(A)" ⟨0⟩`
|
||
* `¬ String.Pos.IsValid "𝒫(A)" ⟨1⟩`
|
||
* `¬ String.Pos.IsValid "𝒫(A)" ⟨2⟩`
|
||
* `¬ String.Pos.IsValid "𝒫(A)" ⟨3⟩`
|
||
* `String.Pos.IsValid "𝒫(A)" ⟨4⟩`
|
||
-/
|
||
structure Pos.Raw.IsValid (s : String) (off : String.Pos.Raw) : Prop where private mk ::
|
||
le_endPos : off ≤ s.endPos
|
||
isValidUTF8_extract_zero : (s.bytes.extract 0 off.byteIdx).IsValidUTF8
|
||
|
||
theorem _root_.List.isPrefix_of_utf8Encode_append_eq_utf8Encode {l m : List Char} (b : ByteArray)
|
||
(h : l.utf8Encode ++ b = m.utf8Encode) : l <+: m := by
|
||
induction l generalizing m with
|
||
| nil => simp
|
||
| cons c l ih =>
|
||
replace h := congrArg ByteArray.utf8Decode? h
|
||
rw [List.utf8Decode?_utf8Encode] at h
|
||
rw [← List.singleton_append, List.utf8Encode_append, ByteArray.append_assoc,
|
||
ByteArray.utf8Decode?_utf8Encode_singleton_append] at h
|
||
suffices ∃ m', m = [c] ++ m' ∧ l.utf8Encode ++ b = m'.utf8Encode by
|
||
obtain ⟨m', rfl, hm'⟩ := this
|
||
simpa using ih hm'
|
||
have hx : (l.utf8Encode ++ b).utf8Decode?.isSome := by
|
||
exact Option.isSome_map ▸ Option.isSome_of_eq_some h
|
||
refine ⟨(l.utf8Encode ++ b).utf8Decode?.get hx |>.toList, ?_, by simp⟩
|
||
exact List.toArray_inj (Option.some_inj.1 (by simp [← h]))
|
||
|
||
open List in
|
||
theorem Pos.Raw.IsValid.exists {s : String} {p : Pos.Raw} (h : p.IsValid s) :
|
||
∃ m₁ m₂ : List Char, m₁.utf8Encode = s.bytes.extract 0 p.byteIdx ∧ (m₁ ++ m₂).asString = s := by
|
||
obtain ⟨l, hl⟩ := s.isValidUTF8
|
||
obtain ⟨m₁, hm₁⟩ := h.isValidUTF8_extract_zero
|
||
suffices m₁ <+: l by
|
||
obtain ⟨m₂, rfl⟩ := this
|
||
refine ⟨m₁, m₂, hm₁.symm, ?_⟩
|
||
apply String.bytes_inj.1
|
||
simpa using hl.symm
|
||
apply List.isPrefix_of_utf8Encode_append_eq_utf8Encode (s.bytes.extract p.byteIdx s.bytes.size)
|
||
rw [← hl, ← hm₁, ← ByteArray.extract_eq_extract_append_extract _ (by simp),
|
||
ByteArray.extract_zero_size]
|
||
simpa using h.le_endPos
|
||
|
||
theorem Pos.Raw.IsValid.isValidUTF8_extract_utf8ByteSize {s : String} {p : Pos.Raw} (h : p.IsValid s) :
|
||
ByteArray.IsValidUTF8 (s.bytes.extract p.byteIdx s.utf8ByteSize) := by
|
||
obtain ⟨m₁, m₂, hm, rfl⟩ := h.exists
|
||
simp only [List.asString_append, bytes_append, List.bytes_asString]
|
||
rw [ByteArray.extract_append_eq_right]
|
||
· exact ByteArray.isValidUTF8_utf8Encode
|
||
· rw [hm]
|
||
simp only [List.asString_append, bytes_append, List.bytes_asString, ByteArray.size_extract,
|
||
ByteArray.size_append, Nat.sub_zero]
|
||
refine (Nat.min_eq_left ?_).symm
|
||
simpa [utf8ByteSize, Pos.Raw.le_iff] using h.le_endPos
|
||
· simp [utf8ByteSize]
|
||
|
||
theorem Pos.Raw.isValid_iff_exists_append {s : String} {p : Pos.Raw} :
|
||
p.IsValid s ↔ ∃ s₁ s₂ : String, s = s₁ ++ s₂ ∧ p = s₁.endPos := by
|
||
refine ⟨fun h => ⟨⟨_, h.isValidUTF8_extract_zero⟩, ⟨_, h.isValidUTF8_extract_utf8ByteSize⟩, ?_, ?_⟩, ?_⟩
|
||
· apply String.bytes_inj.1
|
||
have := Pos.Raw.le_iff.1 h.le_endPos
|
||
simp_all [← size_bytes]
|
||
· have := byteIdx_endPos ▸ Pos.Raw.le_iff.1 h.le_endPos
|
||
apply String.Pos.Raw.ext
|
||
simp [Nat.min_eq_left this]
|
||
· rintro ⟨s₁, s₂, rfl, rfl⟩
|
||
refine ⟨by simp [Pos.Raw.le_iff], ?_⟩
|
||
simpa [ByteArray.extract_append_eq_left] using s₁.isValidUTF8
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_zero : (0 : Pos.Raw).byteIdx = 0 := rfl
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValid_zero {s : String} : (0 : Pos.Raw).IsValid s where
|
||
le_endPos := by simp [Pos.Raw.le_iff]
|
||
isValidUTF8_extract_zero := by simp
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValid_endPos {s : String} : s.endPos.IsValid s where
|
||
le_endPos := by simp [Pos.Raw.le_iff]
|
||
isValidUTF8_extract_zero := by simp [← size_bytes, s.isValidUTF8]
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValid_empty_iff {p : Pos.Raw} : p.IsValid "" ↔ p = 0 := by
|
||
refine ⟨?_, ?_⟩
|
||
· rintro ⟨h₁, h₂⟩
|
||
simp only [le_iff, byteIdx_endPos, utf8ByteSize_empty, Nat.le_zero_eq] at h₁
|
||
ext
|
||
omega
|
||
· rintro rfl
|
||
simp
|
||
|
||
theorem Pos.Raw.isValid_asString {l : List Char} {p : Pos.Raw} :
|
||
p.IsValid l.asString ↔ ∃ i, p.byteIdx = (l.take i).asString.utf8ByteSize := by
|
||
rw [isValid_iff_exists_append]
|
||
refine ⟨?_, ?_⟩
|
||
· rintro ⟨t₁, t₂, ht, rfl⟩
|
||
refine ⟨t₁.length, ?_⟩
|
||
have := congrArg String.data ht
|
||
simp only [List.data_asString, String.data_append] at this
|
||
simp [this]
|
||
· rintro ⟨i, hi⟩
|
||
refine ⟨(l.take i).asString, (l.drop i).asString, ?_, ?_⟩
|
||
· simp [← List.asString_append]
|
||
· simpa [Pos.Raw.ext_iff]
|
||
|
||
theorem Pos.Raw.isValid_iff_exists_take_data {s : String} {p : Pos.Raw} :
|
||
p.IsValid s ↔ ∃ i, p.byteIdx = (s.data.take i).asString.utf8ByteSize := by
|
||
obtain ⟨l, rfl⟩ := s.exists_eq_asString
|
||
simp [isValid_asString]
|
||
|
||
@[simp]
|
||
theorem bytes_singleton {c : Char} : (String.singleton c).bytes = [c].utf8Encode := by
|
||
simp [singleton]
|
||
|
||
theorem singleton_eq_asString {c : Char} : String.singleton c = [c].asString := by
|
||
simp [← String.bytes_inj]
|
||
|
||
@[simp]
|
||
theorem utf8ByteSize_singleton {c : Char} : (String.singleton c).utf8ByteSize = c.utf8Size := by
|
||
simp [← size_bytes, List.utf8Encode_singleton]
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValid_singleton {c : Char} {p : Pos.Raw} :
|
||
p.IsValid (String.singleton c) ↔ p = 0 ∨ p.byteIdx = c.utf8Size := by
|
||
rw [singleton_eq_asString, Pos.Raw.isValid_asString]
|
||
refine ⟨?_, ?_⟩
|
||
· rintro ⟨i, hi'⟩
|
||
obtain ⟨rfl, hi⟩ : i = 0 ∨ 1 ≤ i := by omega
|
||
· simp [Pos.Raw.ext_iff, hi']
|
||
· rw [hi', List.take_of_length_le (by simpa)]
|
||
simp [← singleton_eq_asString]
|
||
· rintro (rfl|hi)
|
||
· exact ⟨0, by simp⟩
|
||
· exact ⟨1, by simp [hi, ← singleton_eq_asString]⟩
|
||
|
||
/--
|
||
Returns the size of the byte slice delineated by the positions `lo` and `hi`.
|
||
-/
|
||
@[expose, inline]
|
||
def Pos.Raw.byteDistance (lo hi : Pos.Raw) : Nat :=
|
||
hi.byteIdx - lo.byteIdx
|
||
|
||
theorem Pos.Raw.byteDistance_eq {lo hi : Pos.Raw} : lo.byteDistance hi = hi.byteIdx - lo.byteIdx :=
|
||
(rfl)
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_sub_char {p : Pos.Raw} {c : Char} : (p - c).byteIdx = p.byteIdx - c.utf8Size := rfl
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_sub_string {p : Pos.Raw} {s : String} : (p - s).byteIdx = p.byteIdx - s.utf8ByteSize := rfl
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_add_string {p : Pos.Raw} {s : String} : (p + s).byteIdx = p.byteIdx + s.utf8ByteSize := rfl
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_string_add {s : String} {p : Pos.Raw} : (s + p).byteIdx = s.utf8ByteSize + p.byteIdx := rfl
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_add_char {p : Pos.Raw} {c : Char} : (p + c).byteIdx = p.byteIdx + c.utf8Size := rfl
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_char_add {c : Char} {p : Pos.Raw} : (c + p).byteIdx = c.utf8Size + p.byteIdx := rfl
|
||
|
||
theorem Pos.Raw.isValid_append {s t : String} {p : Pos.Raw} :
|
||
p.IsValid (s ++ t) ↔ p.IsValid s ∨ (s.endPos ≤ p ∧ (p - s).IsValid t) := by
|
||
obtain ⟨s, rfl⟩ := exists_eq_asString s
|
||
obtain ⟨t, rfl⟩ := exists_eq_asString t
|
||
rw [← List.asString_append, Pos.Raw.isValid_asString, Pos.Raw.isValid_asString, Pos.Raw.isValid_asString]
|
||
refine ⟨?_, ?_⟩
|
||
· rintro ⟨j, hj⟩
|
||
by_cases h : j ≤ s.length
|
||
· exact Or.inl ⟨j, by simp [hj, List.take_append_of_le_length h]⟩
|
||
· refine Or.inr ⟨?_, ⟨j - s.length, ?_⟩⟩
|
||
· simp [Pos.Raw.le_iff, hj, List.take_append, List.take_of_length_le (i := j) (l := s) (by omega)]
|
||
· simp [hj, List.take_append, List.take_of_length_le (i := j) (l := s) (by omega)]
|
||
· rintro (⟨j, hj⟩|⟨h, ⟨j, hj⟩⟩)
|
||
· refine ⟨min j s.length, ?_⟩
|
||
rw [List.take_append_of_le_length (Nat.min_le_right ..), ← List.take_eq_take_min, hj]
|
||
· refine ⟨s.length + j, ?_⟩
|
||
simp only [Pos.Raw.byteIdx_sub_string, byteIdx_endPos, Pos.Raw.le_iff] at hj h
|
||
simp only [List.take_append, List.take_of_length_le (i := s.length + j) (l := s) (by omega),
|
||
Nat.add_sub_cancel_left, List.asString_append, utf8ByteSize_append]
|
||
omega
|
||
|
||
theorem Pos.Raw.IsValid.append_left {t : String} {p : Pos.Raw} (h : p.IsValid t) (s : String) :
|
||
(s + p).IsValid (s ++ t) :=
|
||
isValid_append.2 (Or.inr ⟨by simp [Pos.Raw.le_iff], by
|
||
suffices p = s + p - s by simp [← this, h]
|
||
simp [Pos.Raw.ext_iff]⟩)
|
||
|
||
theorem Pos.Raw.IsValid.append_right {s : String} {p : Pos.Raw} (h : p.IsValid s) (t : String) :
|
||
p.IsValid (s ++ t) :=
|
||
isValid_append.2 (Or.inl h)
|
||
|
||
@[simp]
|
||
theorem append_singleton {s : String} {c : Char} : s ++ singleton c = s.push c := by
|
||
simp [← bytes_inj]
|
||
|
||
theorem Pos.Raw.isValid_push {s : String} {c : Char} {p : Pos.Raw} :
|
||
p.IsValid (s.push c) ↔ p.IsValid s ∨ p = s.endPos + c := by
|
||
rw [← append_singleton, isValid_append, isValid_singleton]
|
||
simp only [le_iff, byteIdx_endPos, Pos.Raw.ext_iff, byteIdx_sub_string, byteIdx_zero, byteIdx_add_char]
|
||
refine ⟨?_, ?_⟩
|
||
· rintro (h|⟨h₁,(h₂|h₂)⟩)
|
||
· exact Or.inl h
|
||
· suffices p = s.endPos by simp [this]
|
||
simp only [Pos.Raw.ext_iff, byteIdx_endPos]
|
||
omega
|
||
· omega
|
||
· rintro (h|h)
|
||
· exact Or.inl h
|
||
· omega
|
||
|
||
@[simp]
|
||
theorem utf8ByteSize_push {s : String} {c : Char} :
|
||
(s.push c).utf8ByteSize = s.utf8ByteSize + c.utf8Size := by
|
||
simp [← size_bytes, List.utf8Encode_singleton]
|
||
|
||
theorem endPos_push {s : String} {c : Char} : (s.push c).endPos = s.endPos + c := by
|
||
simp [Pos.Raw.ext_iff]
|
||
|
||
theorem push_induction (s : String) (motive : String → Prop) (empty : motive "")
|
||
(push : ∀ b c, motive b → motive (b.push c)) : motive s := by
|
||
obtain ⟨m, rfl⟩ := s.exists_eq_asString
|
||
apply append_singleton_induction m (motive ·.asString)
|
||
· simpa
|
||
· intro l c hl
|
||
rw [List.asString_append, ← singleton_eq_asString, append_singleton]
|
||
exact push _ _ hl
|
||
where
|
||
append_singleton_induction (l : List Char) (motive : List Char → Prop) (nil : motive [])
|
||
(append_singleton : ∀ l a, motive l → motive (l ++ [a])) : motive l := by
|
||
rw [← l.reverse_reverse]
|
||
generalize l.reverse = m
|
||
induction m with
|
||
| nil => simpa
|
||
| cons a m ih =>
|
||
rw [List.reverse_cons]
|
||
exact append_singleton _ _ ih
|
||
|
||
/--
|
||
Accesses the indicated byte in the UTF-8 encoding of a string.
|
||
|
||
At runtime, this function is implemented by efficient, constant-time code.
|
||
-/
|
||
@[extern "lean_string_get_byte_fast", expose]
|
||
def getUTF8Byte (s : @& String) (p : Pos.Raw) (h : p < s.endPos) : UInt8 :=
|
||
s.bytes[p.byteIdx]
|
||
|
||
@[deprecated getUTF8Byte (since := "2025-10-01"), extern "lean_string_get_byte_fast", expose]
|
||
abbrev getUtf8Byte (s : String) (p : Pos.Raw) (h : p < s.endPos) : UInt8 :=
|
||
s.getUTF8Byte p h
|
||
|
||
@[simp]
|
||
theorem endPos_empty : "".endPos = 0 := rfl
|
||
|
||
theorem Pos.Raw.isValid_iff_isUTF8FirstByte {s : String} {p : Pos.Raw} :
|
||
p.IsValid s ↔ p = s.endPos ∨ ∃ (h : p < s.endPos), (s.getUTF8Byte p h).IsUTF8FirstByte := by
|
||
induction s using push_induction with
|
||
| empty => simp [Pos.Raw.lt_iff]
|
||
| push s c ih =>
|
||
rw [isValid_push, ih]
|
||
refine ⟨?_, ?_⟩
|
||
· rintro ((rfl|⟨h, hb⟩)|h)
|
||
· refine Or.inr ⟨by simp [Pos.Raw.lt_iff, Char.utf8Size_pos], ?_⟩
|
||
simp only [getUTF8Byte, bytes_push, byteIdx_endPos]
|
||
rw [ByteArray.getElem_append_right (by simp)]
|
||
simp [List.isUTF8FirstByte_getElem_utf8Encode_singleton]
|
||
· refine Or.inr ⟨by simp [lt_iff] at h ⊢; omega, ?_⟩
|
||
simp only [getUTF8Byte, bytes_push]
|
||
rwa [ByteArray.getElem_append_left, ← getUTF8Byte]
|
||
· exact Or.inl (by simpa [endPos_push])
|
||
· rintro (h|⟨h, hb⟩)
|
||
· exact Or.inr (by simpa [endPos_push] using h)
|
||
· simp only [getUTF8Byte, bytes_push] at hb
|
||
by_cases h' : p < s.endPos
|
||
· refine Or.inl (Or.inr ⟨h', ?_⟩)
|
||
rwa [ByteArray.getElem_append_left h', ← getUTF8Byte] at hb
|
||
· refine Or.inl (Or.inl ?_)
|
||
rw [ByteArray.getElem_append_right (by simp [lt_iff] at h' ⊢; omega)] at hb
|
||
simp only [size_bytes, List.isUTF8FirstByte_getElem_utf8Encode_singleton] at hb
|
||
ext
|
||
simp only [lt_iff, byteIdx_endPos, Nat.not_lt] at ⊢ h'
|
||
omega
|
||
|
||
/--
|
||
Returns `true` if `p` is a valid UTF-8 position in the string `s`.
|
||
|
||
This means that `p ≤ s.endPos` and `p` lies on a UTF-8 character boundary. At runtime, this
|
||
operation takes constant time.
|
||
|
||
Examples:
|
||
* `String.Pos.isValid "abc" ⟨0⟩ = true`
|
||
* `String.Pos.isValid "abc" ⟨1⟩ = true`
|
||
* `String.Pos.isValid "abc" ⟨3⟩ = true`
|
||
* `String.Pos.isValid "abc" ⟨4⟩ = false`
|
||
* `String.Pos.isValid "𝒫(A)" ⟨0⟩ = true`
|
||
* `String.Pos.isValid "𝒫(A)" ⟨1⟩ = false`
|
||
* `String.Pos.isValid "𝒫(A)" ⟨2⟩ = false`
|
||
* `String.Pos.isValid "𝒫(A)" ⟨3⟩ = false`
|
||
* `String.Pos.isValid "𝒫(A)" ⟨4⟩ = true`
|
||
-/
|
||
@[extern "lean_string_is_valid_pos", expose]
|
||
def Pos.Raw.isValid (s : @&String) (p : @& Pos.Raw) : Bool :=
|
||
if h : p < s.endPos then
|
||
(s.getUTF8Byte p h).IsUTF8FirstByte
|
||
else
|
||
p = s.endPos
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValid_eq_true_iff {s : String} {p : Pos.Raw} : p.isValid s = true ↔ p.IsValid s := by
|
||
rw [isValid_iff_isUTF8FirstByte]
|
||
fun_cases isValid s p with
|
||
| case1 h =>
|
||
simp_all only [decide_eq_true_eq, exists_true_left, iff_or_self]
|
||
rintro rfl
|
||
simp [lt_iff] at h
|
||
| case2 => simp_all
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValid_eq_false_iff {s : String} {p : Pos.Raw} : p.isValid s = false ↔ ¬ p.IsValid s := by
|
||
rw [← Bool.not_eq_true, Pos.Raw.isValid_eq_true_iff]
|
||
|
||
instance {s : String} {p : Pos.Raw} : Decidable (p.IsValid s) :=
|
||
decidable_of_iff (p.isValid s = true) Pos.Raw.isValid_eq_true_iff
|
||
|
||
theorem Pos.Raw.isValid_iff_isSome_utf8DecodeChar? {s : String} {p : Pos.Raw} :
|
||
p.IsValid s ↔ p = s.endPos ∨ (s.bytes.utf8DecodeChar? p.byteIdx).isSome := by
|
||
refine ⟨?_, fun h => h.elim (by rintro rfl; simp) (fun h => ?_)⟩
|
||
· induction s using push_induction with
|
||
| empty => simp [ByteArray.utf8DecodeChar?]
|
||
| push s c ih =>
|
||
simp only [isValid_push, bytes_push]
|
||
refine ?_ ∘ Or.imp_left ih
|
||
rintro ((rfl|h)|rfl)
|
||
· rw [ByteArray.utf8DecodeChar?_eq_utf8DecodeChar?_extract, ByteArray.extract_append_eq_right (by simp) (by simp)]
|
||
simp
|
||
· exact Or.inr (ByteArray.isSome_utf8DecodeChar?_append h _)
|
||
· simp [endPos_push]
|
||
· refine isValid_iff_isUTF8FirstByte.2 (Or.inr ?_)
|
||
obtain ⟨c, hc⟩ := Option.isSome_iff_exists.1 h
|
||
refine ⟨?_, ?_⟩
|
||
· have := ByteArray.le_size_of_utf8DecodeChar?_eq_some hc
|
||
have := c.utf8Size_pos
|
||
simp only [lt_iff, byteIdx_endPos, gt_iff_lt, ← size_bytes]
|
||
omega
|
||
· rw [getUTF8Byte]
|
||
exact ByteArray.isUTF8FirstByte_of_isSome_utf8DecodeChar? h
|
||
|
||
theorem _root_.ByteArray.IsValidUTF8.isUTF8FirstByte_getElem_zero {b : ByteArray}
|
||
(h : b.IsValidUTF8) (h₀ : 0 < b.size) : b[0].IsUTF8FirstByte := by
|
||
rcases h with ⟨m, rfl⟩
|
||
have : m ≠ [] := by
|
||
rintro rfl
|
||
simp at h₀
|
||
conv => congr; congr; rw [← List.cons_head_tail this, ← List.singleton_append, List.utf8Encode_append]
|
||
rw [ByteArray.getElem_append_left]
|
||
· exact List.isUTF8FirstByte_getElem_utf8Encode_singleton.2 rfl
|
||
· simp [List.utf8Encode_singleton, Char.utf8Size_pos]
|
||
|
||
theorem isUTF8FirstByte_getUTF8Byte_zero {b : String} {h} : (b.getUTF8Byte 0 h).IsUTF8FirstByte :=
|
||
b.isValidUTF8.isUTF8FirstByte_getElem_zero _
|
||
|
||
protected theorem Pos.Raw.le_trans {a b c : Pos.Raw} : a ≤ b → b ≤ c → a ≤ c := by
|
||
simpa [le_iff] using Nat.le_trans
|
||
|
||
protected theorem Pos.Raw.lt_of_lt_of_le {a b c : Pos.Raw} : a < b → b ≤ c → a < c := by
|
||
simpa [le_iff, lt_iff] using Nat.lt_of_lt_of_le
|
||
|
||
theorem Pos.Raw.isValidUTF8_extract_iff {s : String} (p₁ p₂ : Pos.Raw) (hle : p₁ ≤ p₂) (hle' : p₂ ≤ s.endPos) :
|
||
(s.bytes.extract p₁.byteIdx p₂.byteIdx).IsValidUTF8 ↔ p₁ = p₂ ∨ (p₁.IsValid s ∧ p₂.IsValid s) := by
|
||
have hle'' : p₂.byteIdx ≤ s.bytes.size := by simpa [le_iff] using hle'
|
||
refine ⟨fun h => Classical.or_iff_not_imp_left.2 (fun h' => ?_), ?_⟩
|
||
· have hlt : p₁ < p₂ := by
|
||
simp_all [le_iff, lt_iff, Pos.Raw.ext_iff]
|
||
omega
|
||
have h₁ : p₁.IsValid s := by
|
||
rw [isValid_iff_isUTF8FirstByte]
|
||
refine Or.inr ⟨Pos.Raw.lt_of_lt_of_le hlt hle', ?_⟩
|
||
have hlt' : 0 < p₂.byteIdx - p₁.byteIdx := by
|
||
simp [lt_iff] at hlt
|
||
omega
|
||
have := h.isUTF8FirstByte_getElem_zero
|
||
simp only [ByteArray.size_extract, Nat.min_eq_left hle'', hlt', ByteArray.getElem_extract, Nat.add_zero] at this
|
||
simp [getUTF8Byte, this trivial]
|
||
refine ⟨h₁, ⟨hle', ?_⟩⟩
|
||
rw [ByteArray.extract_eq_extract_append_extract p₁.byteIdx (by simp) hle]
|
||
exact h₁.isValidUTF8_extract_zero.append h
|
||
· refine fun h => h.elim (by rintro rfl; simp) (fun ⟨h₁, h₂⟩ => ?_)
|
||
let t : String := ⟨_, h₂.isValidUTF8_extract_zero⟩
|
||
have htb : t.bytes = s.bytes.extract 0 p₂.byteIdx := rfl
|
||
have ht : p₁.IsValid t := by
|
||
refine ⟨?_, ?_⟩
|
||
· simpa [le_iff, t, Nat.min_eq_left hle'', ← size_bytes]
|
||
· simpa [htb, ByteArray.extract_extract, Nat.min_eq_left (le_iff.1 hle)] using h₁.isValidUTF8_extract_zero
|
||
simpa [htb, ByteArray.extract_extract, Nat.zero_add, Nat.min_eq_left hle'', ← size_bytes]
|
||
using ht.isValidUTF8_extract_utf8ByteSize
|
||
|
||
theorem Pos.Raw.isValid_iff_isValidUTF8_extract_zero {s : String} {p : Pos.Raw} :
|
||
p.IsValid s ↔ p ≤ s.endPos ∧ (s.bytes.extract 0 p.byteIdx).IsValidUTF8 :=
|
||
⟨fun ⟨h₁, h₂⟩ => ⟨h₁, h₂⟩, fun ⟨h₁, h₂⟩ => ⟨h₁, h₂⟩⟩
|
||
|
||
theorem Pos.Raw.isValid_iff_isValidUTF8_extract_utf8ByteSize {s : String} {p : Pos.Raw} :
|
||
p.IsValid s ↔ p ≤ s.endPos ∧ (s.bytes.extract p.byteIdx s.utf8ByteSize).IsValidUTF8 := by
|
||
refine ⟨fun h => ⟨h.le_endPos, h.isValidUTF8_extract_utf8ByteSize⟩, fun ⟨h₁, h₂⟩ => ?_⟩
|
||
rw [← byteIdx_endPos, isValidUTF8_extract_iff _ _ h₁ (by simp [le_iff])] at h₂
|
||
obtain (rfl|h₂) := h₂
|
||
· simp
|
||
· exact h₂.1
|
||
|
||
/--
|
||
A `ValidPos s` is a byte offset in `s` together with a proof that this position is at a UTF-8
|
||
character boundary.
|
||
-/
|
||
@[ext]
|
||
structure ValidPos (s : String) where
|
||
/-- The underlying byte offset of the `ValidPos`. -/
|
||
offset : Pos.Raw
|
||
/-- The proof that `offset` is valid for the string `s`. -/
|
||
isValid : offset.IsValid s
|
||
deriving @[expose] DecidableEq
|
||
|
||
/-- The start position of `s`, as an `s.ValidPos`. -/
|
||
@[inline, expose]
|
||
def startValidPos (s : String) : s.ValidPos where
|
||
offset := 0
|
||
isValid := by simp
|
||
|
||
instance {s : String} : Inhabited s.ValidPos where
|
||
default := s.startValidPos
|
||
|
||
@[simp]
|
||
theorem offset_startValidPos {s : String} : s.startValidPos.offset = 0 := (rfl)
|
||
|
||
/-- The past-the-end position of `s`, as an `s.ValidPos`. -/
|
||
@[inline, expose]
|
||
def endValidPos (s : String) : s.ValidPos where
|
||
offset := s.endPos
|
||
isValid := by simp
|
||
|
||
instance {s : String} : LE s.ValidPos where
|
||
le l r := l.offset ≤ r.offset
|
||
|
||
instance {s : String} : LT s.ValidPos where
|
||
lt l r := l.offset < r.offset
|
||
|
||
theorem ValidPos.le_iff {s : String} {l r : s.ValidPos} : l ≤ r ↔ l.offset ≤ r.offset :=
|
||
Iff.rfl
|
||
|
||
theorem ValidPos.lt_iff {s : String} {l r : s.ValidPos} : l < r ↔ l.offset < r.offset :=
|
||
Iff.rfl
|
||
|
||
instance {s : String} (l r : s.ValidPos) : Decidable (l ≤ r) :=
|
||
decidable_of_iff' _ ValidPos.le_iff
|
||
|
||
instance {s : String} (l r : s.ValidPos) : Decidable (l < r) :=
|
||
decidable_of_iff' _ ValidPos.lt_iff
|
||
|
||
theorem ValidPos.isValidUTF8_extract {s : String} (pos₁ pos₂ : s.ValidPos) :
|
||
(s.bytes.extract pos₁.offset.byteIdx pos₂.offset.byteIdx).IsValidUTF8 := by
|
||
by_cases h : pos₁ ≤ pos₂
|
||
· exact (Pos.Raw.isValidUTF8_extract_iff _ _ h pos₂.isValid.le_endPos).2 (Or.inr ⟨pos₁.isValid, pos₂.isValid⟩)
|
||
· rw [ByteArray.extract_eq_empty_iff.2]
|
||
· exact ByteArray.isValidUTF8_empty
|
||
· rw [Nat.min_eq_left]
|
||
· rw [ValidPos.le_iff, Pos.Raw.le_iff] at h
|
||
omega
|
||
· have := Pos.Raw.le_iff.1 pos₂.isValid.le_endPos
|
||
rwa [size_bytes, ← byteIdx_endPos]
|
||
|
||
/--
|
||
A region or slice of some underlying string.
|
||
|
||
A slice consists of a string together with the start and end byte positions of a region of
|
||
interest. Actually extracting a substring requires copying and memory allocation, while many
|
||
slices of the same underlying string may exist with very little overhead. While this could be achieved by tracking the bounds by hand, the slice API is much more convenient.
|
||
|
||
`String.Slice` bundles proofs to ensure that the start and end positions always delineate a valid
|
||
string. For this reason, it should be preferred over `Substring`.
|
||
-/
|
||
structure Slice where
|
||
/-- The underlying strings. -/
|
||
str : String
|
||
/-- The byte position of the start of the string slice. -/
|
||
startInclusive : str.ValidPos
|
||
/-- The byte position of the end of the string slice. -/
|
||
endExclusive : str.ValidPos
|
||
/-- The slice is not degenerate (but it may be empty). -/
|
||
startInclusive_le_endExclusive : startInclusive ≤ endExclusive
|
||
|
||
instance : Inhabited Slice where
|
||
default := ⟨"", "".startValidPos, "".startValidPos, by simp [ValidPos.le_iff, Pos.Raw.le_iff]⟩
|
||
|
||
/--
|
||
Returns a slice that contains the entire string.
|
||
-/
|
||
@[inline, expose] -- expose for the defeq `s.toSlice.str = s`.
|
||
def toSlice (s : String) : Slice where
|
||
str := s
|
||
startInclusive := s.startValidPos
|
||
endExclusive := s.endValidPos
|
||
startInclusive_le_endExclusive := by simp [ValidPos.le_iff, Pos.Raw.le_iff]
|
||
|
||
/-- The number of bytes of the UTF-8 encoding of the string slice. -/
|
||
@[expose]
|
||
def Slice.utf8ByteSize (s : Slice) : Nat :=
|
||
s.startInclusive.offset.byteDistance s.endExclusive.offset
|
||
|
||
theorem Slice.utf8ByteSize_eq {s : Slice} :
|
||
s.utf8ByteSize = s.endExclusive.offset.byteIdx - s.startInclusive.offset.byteIdx := (rfl)
|
||
|
||
instance : HAdd Pos.Raw Slice Pos.Raw where
|
||
hAdd p s := { byteIdx := p.byteIdx + s.utf8ByteSize }
|
||
|
||
instance : HAdd Slice Pos.Raw Pos.Raw where
|
||
hAdd s p := { byteIdx := s.utf8ByteSize + p.byteIdx }
|
||
|
||
instance : HSub Pos.Raw Slice Pos.Raw where
|
||
hSub p s := { byteIdx := p.byteIdx - s.utf8ByteSize }
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_add_slide {p : Pos.Raw} {s : Slice} :
|
||
(p + s).byteIdx = p.byteIdx + s.utf8ByteSize := rfl
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_slice_add {s : Slice} {p : Pos.Raw} :
|
||
(s + p).byteIdx = s.utf8ByteSize + p.byteIdx := rfl
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_sub_slice {p : Pos.Raw} {s : Slice} :
|
||
(p - s).byteIdx = p.byteIdx - s.utf8ByteSize := rfl
|
||
|
||
/-- The end position of a slice, as a `Pos.Raw`. -/
|
||
@[expose]
|
||
def Slice.rawEndPos (s : Slice) : Pos.Raw where
|
||
byteIdx := s.utf8ByteSize
|
||
|
||
@[simp]
|
||
theorem Slice.byteIdx_rawEndPos {s : Slice} : s.rawEndPos.byteIdx = s.utf8ByteSize := (rfl)
|
||
|
||
/--
|
||
Offsets `p` by `offset` on the left. This is not an `HAdd` instance because it should be a
|
||
relatively rare operation, so we use a name to make accidental use less likely. To offset a position
|
||
by the size of a character character `c` or string `s`, you can use `c + p` resp. `s + p`.
|
||
|
||
This should be seen as an operation that converts relative positions into absolute positions.
|
||
|
||
See also `Pos.Raw.increaseBy`, which is an "advancing" operation.
|
||
-/
|
||
@[expose, inline]
|
||
def Pos.Raw.offsetBy (p : Pos.Raw) (offset : Pos.Raw) : Pos.Raw where
|
||
byteIdx := offset.byteIdx + p.byteIdx
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_offsetBy {p : Pos.Raw} {offset : Pos.Raw} :
|
||
(p.offsetBy offset).byteIdx = offset.byteIdx + p.byteIdx := (rfl)
|
||
|
||
/--
|
||
Decreases `p` by `offset`. This is not an `HSub` instance because it should be a relatively
|
||
rare operation, so we use a name to make accidental use less likely. To unoffset a position
|
||
by the size of a character `c` or string `s`, you can use `p - c` resp. `p - s`.
|
||
|
||
This should be seen as an operation that converts absolute positions into relative positions.
|
||
|
||
See also `Pos.Raw.decreaseBy`, which is an "unadvancing" operation.
|
||
-/
|
||
@[expose, inline]
|
||
def Pos.Raw.unoffsetBy (p : Pos.Raw) (offset : Pos.Raw) : Pos.Raw where
|
||
byteIdx := p.byteIdx - offset.byteIdx
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_unoffsetBy {p : Pos.Raw} {offset : Pos.Raw} :
|
||
(p.unoffsetBy offset).byteIdx = p.byteIdx - offset.byteIdx := (rfl)
|
||
|
||
/-- Criterion for validity of positions in string slices. -/
|
||
structure Pos.Raw.IsValidForSlice (s : Slice) (p : Pos.Raw) : Prop where
|
||
le_utf8ByteSize : p ≤ s.rawEndPos
|
||
isValid_offsetBy : (p.offsetBy s.startInclusive.offset).IsValid s.str
|
||
|
||
/--
|
||
Accesses the indicated byte in the UTF-8 encoding of a string slice.
|
||
|
||
At runtime, this function is implemented by efficient, constant-time code.
|
||
-/
|
||
@[inline, expose]
|
||
def Slice.getUTF8Byte (s : Slice) (p : Pos.Raw) (h : p < s.rawEndPos) : UInt8 :=
|
||
s.str.getUTF8Byte (p.offsetBy s.startInclusive.offset) (by
|
||
have := s.endExclusive.isValid.le_endPos
|
||
simp only [Pos.Raw.lt_iff, byteIdx_rawEndPos, utf8ByteSize_eq, Pos.Raw.le_iff, byteIdx_endPos,
|
||
Pos.Raw.byteIdx_offsetBy] at *
|
||
omega)
|
||
|
||
/--
|
||
Accesses the indicated byte in the UTF-8 encoding of the string slice, or panics if the position
|
||
is out-of-bounds.
|
||
-/
|
||
@[expose]
|
||
def Slice.getUTF8Byte! (s : Slice) (p : String.Pos.Raw) : UInt8 :=
|
||
if h : p < s.rawEndPos then
|
||
s.getUTF8Byte p h
|
||
else
|
||
panic! "String slice access is out of bounds."
|
||
|
||
@[extern "lean_string_utf8_extract"]
|
||
def ValidPos.extract {s : @& String} (b e : @& s.ValidPos) : String where
|
||
bytes := s.bytes.extract b.offset.byteIdx e.offset.byteIdx
|
||
isValidUTF8 := b.isValidUTF8_extract e
|
||
|
||
/-- Creates a `String` from a `String.Slice` by copying the bytes. -/
|
||
@[inline]
|
||
def Slice.copy (s : Slice) : String :=
|
||
s.startInclusive.extract s.endExclusive
|
||
|
||
theorem Slice.bytes_copy {s : Slice} :
|
||
s.copy.bytes = s.str.bytes.extract s.startInclusive.offset.byteIdx s.endExclusive.offset.byteIdx := (rfl)
|
||
|
||
@[simp]
|
||
theorem Slice.utf8ByteSize_copy {s : Slice} :
|
||
s.copy.utf8ByteSize = s.endExclusive.offset.byteIdx - s.startInclusive.offset.byteIdx:= by
|
||
simp [← size_bytes, bytes_copy]
|
||
rw [Nat.min_eq_left (by simpa [Pos.Raw.le_iff] using s.endExclusive.isValid.le_endPos)]
|
||
|
||
@[simp]
|
||
theorem Slice.endPos_copy {s : Slice} : s.copy.endPos = s.rawEndPos := by
|
||
simp [Pos.Raw.ext_iff, utf8ByteSize_eq]
|
||
|
||
theorem Slice.getUTF8Byte_eq_getUTF8Byte_copy {s : Slice} {p : Pos.Raw} {h : p < s.rawEndPos} :
|
||
s.getUTF8Byte p h = s.copy.getUTF8Byte p (by simpa) := by
|
||
simp [getUTF8Byte, String.getUTF8Byte, bytes_copy, ByteArray.getElem_extract]
|
||
|
||
theorem Slice.getUTF8Byte_copy {s : Slice} {p : Pos.Raw} {h} :
|
||
s.copy.getUTF8Byte p h = s.getUTF8Byte p (by simpa using h) := by
|
||
rw [getUTF8Byte_eq_getUTF8Byte_copy]
|
||
|
||
theorem Slice.isUTF8FirstByte_utf8ByteAt_zero {s : Slice} {h} :
|
||
(s.getUTF8Byte 0 h).IsUTF8FirstByte := by
|
||
simpa [getUTF8Byte_eq_getUTF8Byte_copy] using s.copy.isUTF8FirstByte_getUTF8Byte_zero
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValid_copy_iff {s : Slice} {p : Pos.Raw} :
|
||
p.IsValid s.copy ↔ p.IsValidForSlice s := by
|
||
refine ⟨fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩, fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩⟩
|
||
· simpa using h₁
|
||
· have := s.startInclusive_le_endExclusive
|
||
simp_all only [Slice.endPos_copy, le_iff, Slice.byteIdx_rawEndPos, Slice.utf8ByteSize_eq,
|
||
ValidPos.le_iff]
|
||
rw [Slice.bytes_copy, ByteArray.extract_extract, Nat.add_zero, Nat.min_eq_left (by omega)] at h₂
|
||
rw [← byteIdx_offsetBy, Pos.Raw.isValidUTF8_extract_iff] at h₂
|
||
· rcases h₂ with (h₂|⟨-, h₂⟩)
|
||
· rw [← h₂]
|
||
exact s.startInclusive.isValid
|
||
· exact h₂
|
||
· simp [le_iff]
|
||
· have := s.endExclusive.isValid.le_endPos
|
||
simp_all [le_iff]
|
||
omega
|
||
· simpa using h₁
|
||
· have := s.startInclusive_le_endExclusive
|
||
simp_all only [le_iff, Slice.byteIdx_rawEndPos, Slice.utf8ByteSize_eq, ValidPos.le_iff]
|
||
rw [Slice.bytes_copy, ByteArray.extract_extract, Nat.add_zero, Nat.min_eq_left (by omega)]
|
||
rw [← byteIdx_offsetBy, Pos.Raw.isValidUTF8_extract_iff]
|
||
· exact Or.inr ⟨s.startInclusive.isValid, h₂⟩
|
||
· simp [le_iff]
|
||
· have := s.endExclusive.isValid.le_endPos
|
||
simp_all [le_iff]
|
||
omega
|
||
|
||
/--
|
||
A `Slice.Pos s` is a byte offset in `s` together with a proof that this position is at a UTF-8
|
||
character boundary.
|
||
-/
|
||
@[ext]
|
||
structure Slice.Pos (s : Slice) where
|
||
/-- The underlying byte offset of the `Slice.Pos`. -/
|
||
offset : String.Pos.Raw
|
||
/-- The proof that `offset` is valid for the string slice `s`. -/
|
||
isValidForSlice : offset.IsValidForSlice s
|
||
deriving @[expose] DecidableEq
|
||
|
||
/-- The start position of `s`, as an `s.Pos`. -/
|
||
@[inline, expose]
|
||
def Slice.startPos (s : Slice) : s.Pos where
|
||
offset := 0
|
||
isValidForSlice := ⟨by simp [Pos.Raw.le_iff], by simpa using s.startInclusive.isValid⟩
|
||
|
||
@[simp]
|
||
theorem ByteString.Slice.offset_startPos {s : Slice} : s.startPos.offset = 0 := (rfl)
|
||
|
||
instance {s : Slice} : Inhabited s.Pos where
|
||
default := s.startPos
|
||
|
||
@[simp]
|
||
theorem Slice.offset_startInclusive_add_self {s : Slice} :
|
||
s.startInclusive.offset + s = s.endExclusive.offset := by
|
||
have := s.startInclusive_le_endExclusive
|
||
simp_all [String.Pos.Raw.ext_iff, ValidPos.le_iff, Pos.Raw.le_iff, utf8ByteSize_eq]
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.offsetBy_endPos_left {p : Pos.Raw} {s : String} :
|
||
s.endPos.offsetBy p = p + s := by
|
||
simp [Pos.Raw.ext_iff]
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.offsetBy_endPos_right {p : Pos.Raw} {s : String} :
|
||
p.offsetBy s.endPos = s + p := by
|
||
simp [Pos.Raw.ext_iff]
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.offsetBy_sliceRawEndPos_left {p : Pos.Raw} {s : Slice} :
|
||
s.rawEndPos.offsetBy p = p + s := by
|
||
simp [Pos.Raw.ext_iff]
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.offsetBy_sliceRawEndPos_right {p : Pos.Raw} {s : Slice} :
|
||
p.offsetBy s.rawEndPos = s + p := by
|
||
simp [Pos.Raw.ext_iff]
|
||
|
||
/-- The past-the-end position of `s`, as an `s.Pos`. -/
|
||
@[inline, expose]
|
||
def Slice.endPos (s : Slice) : s.Pos where
|
||
offset := s.rawEndPos
|
||
isValidForSlice := ⟨by simp [Pos.Raw.le_iff], by simpa using s.endExclusive.isValid⟩
|
||
|
||
@[simp]
|
||
theorem ByteString.Slice.offset_endPos {s : Slice} : s.endPos.offset = s.rawEndPos := (rfl)
|
||
|
||
instance {s : Slice} : LE s.Pos where
|
||
le l r := l.offset ≤ r.offset
|
||
|
||
instance {s : Slice} : LT s.Pos where
|
||
lt l r := l.offset < r.offset
|
||
|
||
theorem Slice.Pos.le_iff {s : Slice} {l r : s.Pos} : l ≤ r ↔ l.offset ≤ r.offset :=
|
||
Iff.rfl
|
||
|
||
theorem Slice.Pos.lt_iff {s : Slice} {l r : s.Pos} : l < r ↔ l.offset < r.offset :=
|
||
Iff.rfl
|
||
|
||
instance {s : Slice} (l r : s.Pos) : Decidable (l ≤ r) :=
|
||
decidable_of_iff' _ Slice.Pos.le_iff
|
||
|
||
instance {s : Slice} (l r : s.Pos) : Decidable (l < r) :=
|
||
decidable_of_iff' _ Slice.Pos.lt_iff
|
||
|
||
theorem Pos.Raw.isValidForSlice_iff_isUTF8FirstByte {s : Slice} {p : Pos.Raw} :
|
||
p.IsValidForSlice s ↔ (p = s.rawEndPos ∨ (∃ (h : p < s.rawEndPos), (s.getUTF8Byte p h).IsUTF8FirstByte)) := by
|
||
simp [← isValid_copy_iff, isValid_iff_isUTF8FirstByte, Slice.getUTF8Byte_copy]
|
||
|
||
/-- Efficiently checks whether a position is at a UTF-8 character boundary of the slice `s`. -/
|
||
@[expose]
|
||
def Pos.Raw.isValidForSlice (s : Slice) (p : Pos.Raw) : Bool :=
|
||
if h : p < s.rawEndPos then
|
||
(s.getUTF8Byte p h).IsUTF8FirstByte
|
||
else
|
||
p = s.rawEndPos
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValidForSlice_eq_true_iff {s : Slice} {p : Pos.Raw} :
|
||
p.isValidForSlice s = true ↔ p.IsValidForSlice s := by
|
||
rw [isValidForSlice_iff_isUTF8FirstByte]
|
||
fun_cases isValidForSlice with
|
||
| case1 h =>
|
||
simp_all only [decide_eq_true_eq, exists_true_left, iff_or_self]
|
||
rintro rfl
|
||
simp [lt_iff] at h
|
||
| case2 => simp_all
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValidForSlice_eq_false_iff {s : Slice} {p : Pos.Raw} :
|
||
p.isValidForSlice s = false ↔ ¬ p.IsValidForSlice s := by
|
||
rw [← Bool.not_eq_true, isValidForSlice_eq_true_iff]
|
||
|
||
instance {s : Slice} {p : Pos.Raw} : Decidable (p.IsValidForSlice s) :=
|
||
decidable_of_iff _ Pos.Raw.isValidForSlice_eq_true_iff
|
||
|
||
theorem Pos.Raw.isValidForSlice_iff_isSome_utf8DecodeChar?_copy {s : Slice} {p : Pos.Raw} :
|
||
p.IsValidForSlice s ↔ p = s.rawEndPos ∨ (s.copy.bytes.utf8DecodeChar? p.byteIdx).isSome := by
|
||
rw [← isValid_copy_iff, isValid_iff_isSome_utf8DecodeChar?, Slice.endPos_copy]
|
||
|
||
theorem Slice.bytes_str_eq {s : Slice} :
|
||
s.str.bytes = s.str.bytes.extract 0 s.startInclusive.offset.byteIdx ++
|
||
s.copy.bytes ++ s.str.bytes.extract s.endExclusive.offset.byteIdx s.str.bytes.size := by
|
||
rw [bytes_copy, ← ByteArray.extract_eq_extract_append_extract, ← ByteArray.extract_eq_extract_append_extract,
|
||
ByteArray.extract_zero_size]
|
||
· simp
|
||
· simpa [Pos.Raw.le_iff] using s.endExclusive.isValid.le_endPos
|
||
· simp
|
||
· simpa [Pos.Raw.le_iff] using s.startInclusive_le_endExclusive
|
||
|
||
theorem Pos.Raw.isValidForSlice_iff_isSome_utf8DecodeChar? {s : Slice} {p : Pos.Raw} :
|
||
p.IsValidForSlice s ↔ p = s.rawEndPos ∨ (p < s.rawEndPos ∧ (s.str.bytes.utf8DecodeChar? (s.startInclusive.offset.byteIdx + p.byteIdx)).isSome) := by
|
||
refine ⟨?_, ?_⟩
|
||
· rw [isValidForSlice_iff_isSome_utf8DecodeChar?_copy]
|
||
rintro (rfl|h)
|
||
· simp
|
||
· refine Or.inr ⟨?_, ?_⟩
|
||
· have := ByteArray.lt_size_of_isSome_utf8DecodeChar? h
|
||
simpa [Pos.Raw.lt_iff] using this
|
||
· rw [ByteArray.utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h
|
||
rw [Slice.bytes_str_eq, ByteArray.append_assoc, ByteArray.utf8DecodeChar?_eq_utf8DecodeChar?_extract]
|
||
simp only [ByteArray.size_append, ByteArray.size_extract, Nat.sub_zero, Nat.le_refl,
|
||
Nat.min_eq_left]
|
||
have h' : s.startInclusive.offset.byteIdx ≤ s.str.bytes.size := by
|
||
simpa [le_iff] using s.startInclusive.isValid.le_endPos
|
||
rw [Nat.min_eq_left h', ByteArray.extract_append_size_add' (by simp [size_bytes ▸ h']),
|
||
ByteArray.extract_append, Nat.add_sub_cancel_left]
|
||
rw [ByteArray.extract_eq_extract_append_extract s.copy.bytes.size]
|
||
· rw [ByteArray.append_assoc]
|
||
apply ByteArray.isSome_utf8DecodeChar?_append h
|
||
· have := ByteArray.lt_size_of_isSome_utf8DecodeChar? h
|
||
simp only [size_bytes, Slice.utf8ByteSize_copy, ByteArray.size_extract, Nat.le_refl,
|
||
Nat.min_eq_left] at this
|
||
simp only [size_bytes, Slice.utf8ByteSize_copy, ge_iff_le]
|
||
omega
|
||
· simp
|
||
· rw [isValidForSlice_iff_isUTF8FirstByte]
|
||
rintro (rfl|⟨h₁, h₂⟩)
|
||
· simp
|
||
· exact Or.inr ⟨h₁, ByteArray.isUTF8FirstByte_of_isSome_utf8DecodeChar? h₂⟩
|
||
|
||
/-- Returns the byte at a position in a slice that is not the end position. -/
|
||
@[inline, expose]
|
||
def Slice.Pos.byte {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : UInt8 :=
|
||
s.getUTF8Byte pos.offset (by
|
||
have := pos.isValidForSlice.le_utf8ByteSize
|
||
simp_all [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff, Pos.Raw.lt_iff]
|
||
omega)
|
||
|
||
theorem Slice.Pos.isUTF8FirstByte_byte {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
|
||
(pos.byte h).IsUTF8FirstByte :=
|
||
((Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.1 pos.isValidForSlice).elim (fun h' => (h (Pos.ext h')).elim) (·.2))
|
||
|
||
/-- Given a valid position on a slice `s`, obtains the corresponding valid position on the
|
||
underlying string `s.str`. -/
|
||
@[inline]
|
||
def Slice.Pos.str {s : Slice} (pos : s.Pos) : s.str.ValidPos where
|
||
offset := pos.offset.offsetBy s.startInclusive.offset
|
||
isValid := pos.isValidForSlice.isValid_offsetBy
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_str {s : Slice} {pos : s.Pos} :
|
||
pos.str.offset = pos.offset.offsetBy s.startInclusive.offset := (rfl)
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_str_le_offset_endExclusive {s : Slice} {pos : s.Pos} :
|
||
pos.str.offset ≤ s.endExclusive.offset := by
|
||
have := pos.isValidForSlice.le_utf8ByteSize
|
||
have := s.startInclusive_le_endExclusive
|
||
simp only [Pos.Raw.le_iff, byteIdx_rawEndPos, utf8ByteSize_eq, offset_str,
|
||
Pos.Raw.byteIdx_offsetBy, ValidPos.le_iff] at *
|
||
omega
|
||
|
||
theorem Slice.Pos.offset_le_offset_str {s : Slice} {pos : s.Pos} :
|
||
pos.offset ≤ pos.str.offset := by
|
||
simp [String.Pos.Raw.le_iff]
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_le_offset_endExclusive {s : Slice} {pos : s.Pos} :
|
||
pos.offset ≤ s.endExclusive.offset :=
|
||
Pos.Raw.le_trans offset_le_offset_str offset_str_le_offset_endExclusive
|
||
|
||
/-- Given a slice and a valid position within the slice, obtain a new slice on the same underlying
|
||
string by replacing the start of the slice with the given position. -/
|
||
@[inline, expose] -- for the defeq `(s.replaceStart pos).str = s.str`
|
||
def Slice.replaceStart (s : Slice) (pos : s.Pos) : Slice where
|
||
str := s.str
|
||
startInclusive := pos.str
|
||
endExclusive := s.endExclusive
|
||
startInclusive_le_endExclusive := Pos.offset_str_le_offset_endExclusive
|
||
|
||
@[simp]
|
||
theorem Slice.str_replaceStart {s : Slice} {pos : s.Pos} :
|
||
(s.replaceStart pos).str = s.str := rfl
|
||
|
||
@[simp]
|
||
theorem Slice.startInclusive_replaceStart {s : Slice} {pos : s.Pos} :
|
||
(s.replaceStart pos).startInclusive = pos.str := rfl
|
||
|
||
@[simp]
|
||
theorem Slice.endExclusive_replaceStart {s : Slice} {pos : s.Pos} :
|
||
(s.replaceStart pos).endExclusive = s.endExclusive := rfl
|
||
|
||
/-- Given a slice and a valid position within the slice, obtain a new slice on the same underlying
|
||
string by replacing the end of the slice with the given position. -/
|
||
@[inline, expose] -- for the defeq `(s.replaceEnd pos).str = s.str`
|
||
def Slice.replaceEnd (s : Slice) (pos : s.Pos) : Slice where
|
||
str := s.str
|
||
startInclusive := s.startInclusive
|
||
endExclusive := pos.str
|
||
startInclusive_le_endExclusive := by simp [ValidPos.le_iff, String.Pos.Raw.le_iff]
|
||
|
||
@[simp]
|
||
theorem Slice.str_replaceEnd {s : Slice} {pos : s.Pos} :
|
||
(s.replaceEnd pos).str = s.str := rfl
|
||
|
||
@[simp]
|
||
theorem Slice.startInclusive_replaceEnd {s : Slice} {pos : s.Pos} :
|
||
(s.replaceEnd pos).startInclusive = s.startInclusive := rfl
|
||
|
||
@[simp]
|
||
theorem Slice.endExclusive_replaceEnd {s : Slice} {pos : s.Pos} :
|
||
(s.replaceEnd pos).endExclusive = pos.str := rfl
|
||
|
||
/-- Given a slice and two valid positions within the slice, obtain a new slice on the same underlying
|
||
string formed by the new bounds. -/
|
||
@[inline, expose] -- for the defeq `(s.replaceStartEnd newStart newEnd).str = s.str`
|
||
def Slice.replaceStartEnd (s : Slice) (newStart newEnd : s.Pos)
|
||
(h : newStart.offset ≤ newEnd.offset) : Slice where
|
||
str := s.str
|
||
startInclusive := newStart.str
|
||
endExclusive := newEnd.str
|
||
startInclusive_le_endExclusive := by simpa [ValidPos.le_iff, Pos.Raw.le_iff] using h
|
||
|
||
@[simp]
|
||
theorem Slice.str_replaceStartEnd {s : Slice} {newStart newEnd : s.Pos} {h} :
|
||
(s.replaceStartEnd newStart newEnd h).str = s.str := rfl
|
||
|
||
@[simp]
|
||
theorem Slice.startInclusive_replaceStartEnd {s : Slice} {newStart newEnd : s.Pos} {h} :
|
||
(s.replaceStartEnd newStart newEnd h).startInclusive = newStart.str := rfl
|
||
|
||
@[simp]
|
||
theorem Slice.endExclusive_replaceStartEnd {s : Slice} {newStart newEnd : s.Pos} {h} :
|
||
(s.replaceStartEnd newStart newEnd h).endExclusive = newEnd.str := rfl
|
||
|
||
/-- Given a slice and two valid positions within the slice, obtain a new slice on the same underlying
|
||
string formed by the new bounds, or panic if the given end is strictly less than the given start. -/
|
||
def Slice.replaceStartEnd! (s : Slice) (newStart newEnd : s.Pos) : Slice :=
|
||
if h : newStart.offset ≤ newEnd.offset then
|
||
s.replaceStartEnd newStart newEnd h
|
||
else
|
||
panic! "Starting position must be less than or equal to end position."
|
||
|
||
@[simp]
|
||
theorem Slice.utf8ByteSize_replaceStart {s : Slice} {pos : s.Pos} :
|
||
(s.replaceStart pos).utf8ByteSize = s.utf8ByteSize - pos.offset.byteIdx := by
|
||
simp only [utf8ByteSize_eq, str_replaceStart, endExclusive_replaceStart,
|
||
startInclusive_replaceStart, Pos.offset_str, Pos.Raw.byteIdx_offsetBy]
|
||
omega
|
||
|
||
theorem Slice.rawEndPos_replaceStart {s : Slice} {pos : s.Pos} :
|
||
(s.replaceStart pos).rawEndPos = s.rawEndPos.unoffsetBy pos.offset := by
|
||
ext
|
||
simp
|
||
|
||
@[simp]
|
||
theorem Slice.utf8ByteSize_replaceEnd {s : Slice} {pos : s.Pos} :
|
||
(s.replaceEnd pos).utf8ByteSize = pos.offset.byteIdx := by
|
||
simp [utf8ByteSize_eq]
|
||
|
||
@[simp]
|
||
theorem Slice.rawEndPos_replaceEnd {s : Slice} {pos : s.Pos} :
|
||
(s.replaceEnd pos).rawEndPos = pos.offset := by
|
||
ext
|
||
simp
|
||
|
||
@[simp]
|
||
theorem Slice.utf8ByteSize_replaceStartEnd {s : Slice} {newStart newEnd : s.Pos} {h} :
|
||
(s.replaceStartEnd newStart newEnd h).utf8ByteSize = newStart.offset.byteDistance newEnd.offset := by
|
||
simp [utf8ByteSize_eq, Pos.Raw.byteDistance_eq]
|
||
omega
|
||
|
||
theorem Pos.Raw.offsetBy_assoc {p q r : Pos.Raw} :
|
||
(p.offsetBy q).offsetBy r = p.offsetBy (q.offsetBy r) := by
|
||
ext
|
||
simp [Nat.add_assoc]
|
||
|
||
theorem Pos.Raw.isValidForSlice_replaceStart {s : Slice} {p : s.Pos} {off : Pos.Raw} :
|
||
off.IsValidForSlice (s.replaceStart p) ↔ (off.offsetBy p.offset).IsValidForSlice s := by
|
||
refine ⟨fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩, fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩⟩
|
||
· have := p.isValidForSlice.le_utf8ByteSize
|
||
simp_all [le_iff]
|
||
omega
|
||
· simpa [Pos.Raw.offsetBy_assoc] using h₂
|
||
· simp_all [Pos.Raw.le_iff]
|
||
omega
|
||
· simpa [Pos.Raw.offsetBy_assoc] using h₂
|
||
|
||
theorem Pos.Raw.isValidForSlice_replaceEnd {s : Slice} {p : s.Pos} {off : Pos.Raw} :
|
||
off.IsValidForSlice (s.replaceEnd p) ↔ off ≤ p.offset ∧ off.IsValidForSlice s := by
|
||
refine ⟨fun ⟨h₁, h₂⟩ => ⟨?_, ?_, ?_⟩, fun ⟨h₁, ⟨h₂, h₃⟩⟩ => ⟨?_, ?_⟩⟩
|
||
· simpa using h₁
|
||
· simp only [Slice.rawEndPos_replaceEnd] at h₁
|
||
exact Pos.Raw.le_trans h₁ p.isValidForSlice.le_utf8ByteSize
|
||
· simpa using h₂
|
||
· simpa using h₁
|
||
· simpa using h₃
|
||
|
||
@[extern "lean_string_utf8_get", expose]
|
||
def decodeChar (s : @& String) (byteIdx : @& Nat) (h : (s.bytes.utf8DecodeChar? byteIdx).isSome) : Char :=
|
||
s.bytes.utf8DecodeChar byteIdx h
|
||
|
||
/-- Obtains the character at the given position in the string. -/
|
||
@[inline, expose]
|
||
def Slice.Pos.get {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : Char :=
|
||
s.str.decodeChar (s.startInclusive.offset.byteIdx + pos.offset.byteIdx)
|
||
((Pos.Raw.isValidForSlice_iff_isSome_utf8DecodeChar?.1 pos.isValidForSlice).elim (by simp_all [Pos.ext_iff]) (·.2))
|
||
|
||
theorem Slice.Pos.get_eq_utf8DecodeChar {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) :
|
||
pos.get h = s.str.bytes.utf8DecodeChar (s.startInclusive.offset.byteIdx + pos.offset.byteIdx)
|
||
((Pos.Raw.isValidForSlice_iff_isSome_utf8DecodeChar?.1 pos.isValidForSlice).elim (by simp_all [Pos.ext_iff]) (·.2)) := (rfl)
|
||
|
||
/-- Returns the byte at the given position in the string, or `none` if the position is the end
|
||
position. -/
|
||
@[expose]
|
||
def Slice.Pos.get? {s : Slice} (pos : s.Pos) : Option Char :=
|
||
if h : pos = s.endPos then none else some (pos.get h)
|
||
|
||
/-- Returns the byte at the given position in the string, or panicks if the position is the end
|
||
position. -/
|
||
@[expose]
|
||
def Slice.Pos.get! {s : Slice} (pos : s.Pos) : Char :=
|
||
if h : pos = s.endPos then panic! "Cannot retrieve character at end position" else pos.get h
|
||
|
||
@[simp]
|
||
theorem startInclusive_toSlice {s : String} : s.toSlice.startInclusive = s.startValidPos := rfl
|
||
|
||
@[simp]
|
||
theorem endExclusive_toSlice {s : String} : s.toSlice.endExclusive = s.endValidPos := rfl
|
||
|
||
@[simp]
|
||
theorem str_toSlice {s : String} : s.toSlice.str = s := rfl
|
||
|
||
@[simp]
|
||
theorem offset_endValidPos {s : String} : s.endValidPos.offset = s.endPos := (rfl)
|
||
|
||
@[simp]
|
||
theorem copy_toSlice {s : String} : s.toSlice.copy = s := by
|
||
simp [← bytes_inj, Slice.bytes_copy, ← size_bytes]
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValidForSlice_toSlice_iff {s : String} {p : Pos.Raw} :
|
||
p.IsValidForSlice s.toSlice ↔ p.IsValid s := by
|
||
rw [← isValid_copy_iff, copy_toSlice]
|
||
|
||
theorem Pos.Raw.IsValid.toSlice {s : String} {p : Pos.Raw} (h : p.IsValid s) :
|
||
p.IsValidForSlice s.toSlice :=
|
||
isValidForSlice_toSlice_iff.2 h
|
||
|
||
theorem Pos.Raw.IsValidForSlice.ofSlice {s : String} {p : Pos.Raw} (h : p.IsValidForSlice s.toSlice) :
|
||
p.IsValid s :=
|
||
isValidForSlice_toSlice_iff.1 h
|
||
|
||
/-- Turns a valid position on the string `s` into a valid position on the slice `s.toSlice`. -/
|
||
@[inline, expose]
|
||
def ValidPos.toSlice {s : String} (pos : s.ValidPos) : s.toSlice.Pos where
|
||
offset := pos.offset
|
||
isValidForSlice := pos.isValid.toSlice
|
||
|
||
@[simp]
|
||
theorem ValidPos.offset_toSlice {s : String} {pos : s.ValidPos} : pos.toSlice.offset = pos.offset := (rfl)
|
||
|
||
/-- Given a string `s`, turns a valid position on the slice `s.toSlice` into a valid position on the
|
||
string `s`. -/
|
||
@[inline, expose]
|
||
def Slice.Pos.ofSlice {s : String} (pos : s.toSlice.Pos) : s.ValidPos where
|
||
offset := pos.offset
|
||
isValid := pos.isValidForSlice.ofSlice
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.ofset_ofSlice {s : String} {pos : s.toSlice.Pos} : pos.ofSlice.offset = pos.offset := (rfl)
|
||
|
||
@[simp]
|
||
theorem rawEndPos_toSlice {s : String} : s.toSlice.rawEndPos = s.endPos := by
|
||
rw [← Slice.endPos_copy, copy_toSlice]
|
||
|
||
@[simp]
|
||
theorem endPos_toSlice {s : String} : s.toSlice.endPos = s.endValidPos.toSlice :=
|
||
Slice.Pos.ext (by simp)
|
||
|
||
@[simp]
|
||
theorem startPos_toSlice {s : String} : s.toSlice.startPos = s.startValidPos.toSlice :=
|
||
Slice.Pos.ext (by simp)
|
||
|
||
@[simp]
|
||
theorem ValidPos.ofSlice_toSlice {s : String} (pos : s.ValidPos) : pos.toSlice.ofSlice = pos :=
|
||
ValidPos.ext (by simp)
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.toSlice_ofSlice {s : String} (pos : s.toSlice.Pos) : pos.ofSlice.toSlice = pos :=
|
||
Slice.Pos.ext (by simp)
|
||
|
||
/--
|
||
Returns the character at the position `pos` of a string, taking a proof that `p` is not the
|
||
past-the-end position.
|
||
|
||
This function is overridden with an efficient implementation in runtime code.
|
||
|
||
Examples:
|
||
* `("abc".pos ⟨1⟩ (by decide)).get (by decide) = 'b'`
|
||
* `("L∃∀N".pos ⟨1⟩ (by decide)).get (by decide) = '∃'`
|
||
-/
|
||
@[inline, expose]
|
||
def ValidPos.get {s : String} (pos : s.ValidPos) (h : pos ≠ s.endValidPos) : Char :=
|
||
pos.toSlice.get (ne_of_apply_ne Slice.Pos.ofSlice (by simp [h]))
|
||
|
||
/--
|
||
Returns the character at the position `pos` of a string, or `none` if the position is the
|
||
past-the-end position.
|
||
|
||
This function is overridden with an efficient implementation in runtime code.
|
||
-/
|
||
@[inline, expose]
|
||
def ValidPos.get? {s : String} (pos : s.ValidPos) : Option Char :=
|
||
pos.toSlice.get?
|
||
|
||
/--
|
||
Returns the character at the position `pos` of a string, or panics if the position is the
|
||
past-the-end position.
|
||
|
||
This function is overridden with an efficient implementation in runtime code.
|
||
-/
|
||
@[inline, expose]
|
||
def ValidPos.get! {s : String} (pos : s.ValidPos) : Char :=
|
||
pos.toSlice.get!
|
||
|
||
/--
|
||
Returns the byte at the position `pos` of a string.
|
||
-/
|
||
@[inline, expose]
|
||
def ValidPos.byte {s : String} (pos : s.ValidPos) (h : pos ≠ s.endValidPos) : UInt8 :=
|
||
pos.toSlice.byte (ne_of_apply_ne Slice.Pos.ofSlice (by simp [h]))
|
||
|
||
theorem ValidPos.isUTF8FirstByte_byte {s : String} {pos : s.ValidPos} {h : pos ≠ s.endValidPos} :
|
||
(pos.byte h).IsUTF8FirstByte :=
|
||
Slice.Pos.isUTF8FirstByte_byte
|
||
|
||
@[simp]
|
||
theorem append_left_inj {s₁ s₂ : String} (t : String) :
|
||
s₁ ++ t = s₂ ++ t ↔ s₁ = s₂ := by
|
||
simp [← String.data_inj]
|
||
|
||
theorem append_assoc {s₁ s₂ s₃ : String} : s₁ ++ s₂ ++ s₃ = s₁ ++ (s₂ ++ s₃) := by
|
||
simp [← String.data_inj]
|
||
|
||
@[simp]
|
||
theorem utf8ByteSize_eq_zero_iff {s : String} : s.utf8ByteSize = 0 ↔ s = "" := by
|
||
refine ⟨fun h => ?_, fun h => h ▸ utf8ByteSize_empty⟩
|
||
simpa [← bytes_inj, ← ByteArray.size_eq_zero_iff] using h
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.eq_zero_iff {p : Pos.Raw} : p = 0 ↔ p.byteIdx = 0 :=
|
||
Pos.Raw.ext_iff
|
||
|
||
theorem endPos_eq_zero_iff {b : String} : b.endPos = 0 ↔ b = "" := by
|
||
simp
|
||
|
||
@[simp]
|
||
theorem startValidPos_eq_endValidPos_iff {b : String} : b.startValidPos = b.endValidPos ↔ b = "" := by
|
||
simp [← utf8ByteSize_eq_zero_iff, ValidPos.ext_iff, Eq.comm (b := b.endPos)]
|
||
|
||
@[simp]
|
||
theorem data_eq_nil_iff {b : String} : b.data = [] ↔ b = "" := by
|
||
rw [← List.asString_inj, asString_data, List.asString_nil]
|
||
|
||
@[simp]
|
||
theorem _root_.List.asString_eq_empty_iff {l : List Char} : l.asString = "" ↔ l = [] := by
|
||
rw [← data_inj, List.data_asString, data_empty]
|
||
|
||
@[simp]
|
||
theorem _root_.List.length_asString {l : List Char} : l.asString.length = l.length := by
|
||
rw [← String.length_data, List.data_asString]
|
||
|
||
theorem isSome_utf8DecodeChar?_zero {b : String} (hb : b ≠ "") : (b.bytes.utf8DecodeChar? 0).isSome := by
|
||
refine (((Pos.Raw.isValid_iff_isSome_utf8DecodeChar? (s := b)).1 Pos.Raw.isValid_zero).elim ?_ id)
|
||
rw [eq_comm, endPos_eq_zero_iff]
|
||
exact fun h => (hb h).elim
|
||
|
||
theorem head_data {b : String} {h} :
|
||
b.data.head h = b.bytes.utf8DecodeChar 0 (isSome_utf8DecodeChar?_zero (by simpa using h)) := by
|
||
obtain ⟨l, rfl⟩ := b.exists_eq_asString
|
||
match l with
|
||
| [] => simp at h
|
||
| c::cs => simp
|
||
|
||
theorem get_startValidPos {b : String} (h) :
|
||
b.startValidPos.get h = b.data.head (by rwa [ne_eq, data_eq_nil_iff, ← startValidPos_eq_endValidPos_iff]) :=
|
||
head_data.symm
|
||
|
||
theorem eq_singleton_append {s : String} (h : s.startValidPos ≠ s.endValidPos) :
|
||
∃ t, s = singleton (s.startValidPos.get h) ++ t := by
|
||
obtain ⟨m, rfl⟩ := s.exists_eq_asString
|
||
have hm : m ≠ [] := by
|
||
rwa [ne_eq, ← List.asString_eq_empty_iff, ← startValidPos_eq_endValidPos_iff]
|
||
refine ⟨m.tail.asString, ?_⟩
|
||
rw (occs := [1]) [← List.cons_head_tail hm]
|
||
rw [← List.singleton_append, List.asString_append, append_left_inj, ← singleton_eq_asString,
|
||
get_startValidPos]
|
||
simp
|
||
|
||
theorem Slice.copy_eq_copy_replaceEnd {s : Slice} {pos : s.Pos} :
|
||
s.copy = (s.replaceEnd pos).copy ++ (s.replaceStart pos).copy := by
|
||
rw [← String.bytes_inj, bytes_copy, bytes_append, bytes_copy, bytes_copy]
|
||
simp only [str_replaceEnd, startInclusive_replaceEnd, endExclusive_replaceEnd, Pos.offset_str,
|
||
Pos.Raw.byteIdx_offsetBy, str_replaceStart, startInclusive_replaceStart,
|
||
endExclusive_replaceStart, ByteArray.extract_append_extract, Nat.le_add_right, Nat.min_eq_left]
|
||
rw [Nat.max_eq_right]
|
||
exact pos.offset_str_le_offset_endExclusive
|
||
|
||
/-- Given a slice `s` and a position on `s.copy`, obtain the corresponding position on `s`. -/
|
||
@[inline]
|
||
def ValidPos.ofCopy {s : Slice} (pos : s.copy.ValidPos) : s.Pos where
|
||
offset := pos.offset
|
||
isValidForSlice := Pos.Raw.isValid_copy_iff.1 pos.isValid
|
||
|
||
@[simp]
|
||
theorem ValidPos.offset_ofCopy {s : Slice} {pos : s.copy.ValidPos} : pos.ofCopy.offset = pos.offset := (rfl)
|
||
|
||
/-- Given a slice `s` and a position on `s`, obtain the corresponding position on `s.copy.` -/
|
||
@[inline]
|
||
def Slice.Pos.toCopy {s : Slice} (pos : s.Pos) : s.copy.ValidPos where
|
||
offset := pos.offset
|
||
isValid := Pos.Raw.isValid_copy_iff.2 pos.isValidForSlice
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_toCopy {s : Slice} {pos : s.Pos} : pos.toCopy.offset = pos.offset := (rfl)
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.ofCopy_toCopy {s : Slice} {pos : s.Pos} : pos.toCopy.ofCopy = pos :=
|
||
Slice.Pos.ext (by simp)
|
||
|
||
@[simp]
|
||
theorem ValidPos.toCopy_ofCopy {s : Slice} {pos : s.copy.ValidPos} : pos.ofCopy.toCopy = pos :=
|
||
ValidPos.ext (by simp)
|
||
|
||
theorem ValidPos.ofCopy_inj {s : Slice} {pos pos' : s.copy.ValidPos} : pos.ofCopy = pos'.ofCopy ↔ pos = pos' :=
|
||
⟨fun h => by simpa using congrArg Slice.Pos.toCopy h, (· ▸ rfl)⟩
|
||
|
||
@[simp]
|
||
theorem Slice.startValidPos_copy {s : Slice} : s.copy.startValidPos = s.startPos.toCopy :=
|
||
ValidPos.ext (by simp)
|
||
|
||
@[simp]
|
||
theorem Slice.endValidPos_copy {s : Slice} : s.copy.endValidPos = s.endPos.toCopy :=
|
||
ValidPos.ext (by simp)
|
||
|
||
theorem Slice.Pos.get_toCopy {s : Slice} {pos : s.Pos} (h) :
|
||
pos.toCopy.get h = pos.get (by rintro rfl; simp at h) := by
|
||
rw [ValidPos.get, Slice.Pos.get_eq_utf8DecodeChar, Slice.Pos.get_eq_utf8DecodeChar]
|
||
simp only [str_toSlice, bytes_copy, startInclusive_toSlice, startValidPos_copy, offset_toCopy,
|
||
ByteString.Slice.offset_startPos, Pos.Raw.byteIdx_zero, ValidPos.offset_toSlice, Nat.zero_add]
|
||
rw [ByteArray.utf8DecodeChar_eq_utf8DecodeChar_extract]
|
||
conv => lhs; congr; rw [ByteArray.extract_extract]
|
||
conv => rhs; rw [ByteArray.utf8DecodeChar_eq_utf8DecodeChar_extract]
|
||
exact ByteArray.utf8DecodeChar_extract_congr _ _ _
|
||
|
||
theorem Slice.Pos.get_eq_get_toCopy {s : Slice} {pos : s.Pos} {h} :
|
||
pos.get h = pos.toCopy.get (ne_of_apply_ne ValidPos.ofCopy (by simp [h])) :=
|
||
(get_toCopy _).symm
|
||
|
||
theorem Slice.Pos.byte_toCopy {s : Slice} {pos : s.Pos} (h) :
|
||
pos.toCopy.byte h = pos.byte (by rintro rfl; simp at h) := by
|
||
rw [ValidPos.byte, Slice.Pos.byte, Slice.Pos.byte]
|
||
simp [getUTF8Byte, String.getUTF8Byte, bytes_copy, ByteArray.getElem_extract]
|
||
|
||
theorem Slice.Pos.byte_eq_byte_toCopy {s : Slice} {pos : s.Pos} {h} :
|
||
pos.byte h = pos.toCopy.byte (ne_of_apply_ne ValidPos.ofCopy (by simp [h])) :=
|
||
(byte_toCopy _).symm
|
||
|
||
/-- Given a position in `s.replaceStart p₀`, obtain the corresponding position in `s`. -/
|
||
@[inline]
|
||
def Slice.Pos.ofReplaceStart {s : Slice} {p₀ : s.Pos} (pos : (s.replaceStart p₀).Pos) : s.Pos where
|
||
offset := pos.offset.offsetBy p₀.offset
|
||
isValidForSlice := Pos.Raw.isValidForSlice_replaceStart.1 pos.isValidForSlice
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_ofReplaceStart {s : Slice} {p₀ : s.Pos} {pos : (s.replaceStart p₀).Pos} :
|
||
(ofReplaceStart pos).offset = pos.offset.offsetBy p₀.offset := (rfl)
|
||
|
||
theorem Pos.Raw.offsetBy_unoffsetBy_of_le {p : Pos.Raw} {q : Pos.Raw} (h : q ≤ p) :
|
||
(p.unoffsetBy q).offsetBy q = p := by
|
||
ext
|
||
simp_all [le_iff]
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.unoffsetBy_offsetBy {p q : Pos.Raw} : (p.offsetBy q).unoffsetBy q = p := by
|
||
ext
|
||
simp
|
||
|
||
/-- Given a position in `s` that is at least `p₀`, obtain the corresponding position in
|
||
`s.replaceStart p₀`. -/
|
||
@[inline]
|
||
def Slice.Pos.toReplaceStart {s : Slice} (p₀ : s.Pos) (pos : s.Pos) (h : p₀ ≤ pos) :
|
||
(s.replaceStart p₀).Pos where
|
||
offset := pos.offset.unoffsetBy p₀.offset
|
||
isValidForSlice := Pos.Raw.isValidForSlice_replaceStart.2 (by
|
||
simpa [Pos.Raw.offsetBy_unoffsetBy_of_le (Pos.Raw.le_iff.1 h)] using pos.isValidForSlice)
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_toReplaceStart {s : Slice} {p₀ : s.Pos} {pos : s.Pos} {h} :
|
||
(toReplaceStart p₀ pos h).offset = pos.offset.unoffsetBy p₀.offset := (rfl)
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.offsetBy_zero_left {p : Pos.Raw} : (0 : Pos.Raw).offsetBy p = p := by
|
||
ext
|
||
simp
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.ofReplaceStart_startPos {s : Slice} {pos : s.Pos} :
|
||
ofReplaceStart (s.replaceStart pos).startPos = pos :=
|
||
Slice.Pos.ext (by simp)
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.ofReplaceStart_endPos {s : Slice} {pos : s.Pos} :
|
||
ofReplaceStart (s.replaceStart pos).endPos = s.endPos := by
|
||
have := pos.isValidForSlice.le_utf8ByteSize
|
||
simp_all [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff]
|
||
|
||
theorem Slice.Pos.ofReplaceStart_inj {s : Slice} {p₀ : s.Pos} {pos pos' : (s.replaceStart p₀).Pos} :
|
||
ofReplaceStart pos = ofReplaceStart pos' ↔ pos = pos' := by
|
||
simp [Pos.ext_iff, String.Pos.Raw.ext_iff]
|
||
|
||
theorem Slice.Pos.get_eq_get_ofReplaceStart {s : Slice} {p₀ : s.Pos} {pos : (s.replaceStart p₀).Pos} {h} :
|
||
pos.get h = (ofReplaceStart pos).get (by rwa [← ofReplaceStart_endPos, ne_eq, ofReplaceStart_inj]) := by
|
||
simp [Slice.Pos.get, Nat.add_assoc]
|
||
|
||
/-- Given a position in `s.replaceEnd p₀`, obtain the corresponding position in `s`. -/
|
||
@[inline]
|
||
def Slice.Pos.ofReplaceEnd {s : Slice} {p₀ : s.Pos} (pos : (s.replaceEnd p₀).Pos) : s.Pos where
|
||
offset := pos.offset
|
||
isValidForSlice := (Pos.Raw.isValidForSlice_replaceEnd.1 pos.isValidForSlice).2
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_ofReplaceEnd {s : Slice} {p₀ : s.Pos} {pos : (s.replaceEnd p₀).Pos} :
|
||
(ofReplaceEnd pos).offset = pos.offset := (rfl)
|
||
|
||
/-- Given a position in `s` that is at most `p₀`, obtain the corresponding position in `s.replaceEnd p₀`. -/
|
||
@[inline]
|
||
def Slice.Pos.toReplaceEnd {s : Slice} (p₀ : s.Pos) (pos : s.Pos) (h : pos ≤ p₀) :
|
||
(s.replaceEnd p₀).Pos where
|
||
offset := pos.offset
|
||
isValidForSlice := Pos.Raw.isValidForSlice_replaceEnd.2 ⟨h, pos.isValidForSlice⟩
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_toReplaceEnd {s : Slice} {p₀ : s.Pos} {pos : s.Pos} {h : pos ≤ p₀} :
|
||
(toReplaceEnd p₀ pos h).offset = pos.offset := (rfl)
|
||
|
||
theorem Slice.Pos.copy_eq_append_get {s : Slice} {pos : s.Pos} (h : pos ≠ s.endPos) :
|
||
∃ t₁ t₂ : String, s.copy = t₁ ++ singleton (pos.get h) ++ t₂ ∧ t₁.utf8ByteSize = pos.offset.byteIdx := by
|
||
obtain ⟨t₂, ht₂⟩ := (s.replaceStart pos).copy.eq_singleton_append (by simpa [← ValidPos.ofCopy_inj, ← ofReplaceStart_inj])
|
||
refine ⟨(s.replaceEnd pos).copy, t₂, ?_, by simp⟩
|
||
simp only [Slice.startValidPos_copy, get_toCopy, get_eq_get_ofReplaceStart, ofReplaceStart_startPos] at ht₂
|
||
rw [append_assoc, ← ht₂, ← copy_eq_copy_replaceEnd]
|
||
|
||
theorem Slice.Pos.utf8ByteSize_byte {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
|
||
(pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte = (pos.get h).utf8Size := by
|
||
simp [getUTF8Byte, byte, String.getUTF8Byte, get_eq_utf8DecodeChar, ByteArray.utf8Size_utf8DecodeChar]
|
||
|
||
theorem ValidPos.utf8ByteSize_byte {s : String} {pos : s.ValidPos} {h : pos ≠ s.endValidPos} :
|
||
(pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte = (pos.get h).utf8Size :=
|
||
Slice.Pos.utf8ByteSize_byte
|
||
|
||
/--
|
||
Advances `p` by `n` bytes. This is not an `HAdd` instance because it should be a relatively
|
||
rare operation, so we use a name to make accidental use less likely. To add the size of a
|
||
character `c` or string `s` to a raw position `p`, you can use `p + c` resp. `p + s`.
|
||
|
||
This should be seen as an "advance" or "skip".
|
||
|
||
See also `Pos.Raw.offsetBy`, which turns relative positions into absolute positions.
|
||
-/
|
||
@[expose, inline]
|
||
def Pos.Raw.increaseBy (p : Pos.Raw) (n : Nat) : Pos.Raw where
|
||
byteIdx := p.byteIdx + n
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_increaseBy {p : Pos.Raw} {n : Nat} :
|
||
(p.increaseBy n).byteIdx = p.byteIdx + n := (rfl)
|
||
|
||
/--
|
||
Move the position `p` back by `n` bytes. This is not an `HSub` instance because it should be a
|
||
relatively rare operation, so we use a name to make accidental use less likely. To remove the size
|
||
of a character `c` or string `s` from a raw position `p`, you can use `p - c` resp. `p - s`.
|
||
|
||
This should be seen as the inverse of an "advance" or "skip".
|
||
|
||
See also `Pos.Raw.unoffsetBy`, which turns absolute positions into relative positions.
|
||
-/
|
||
@[expose, inline]
|
||
def Pos.Raw.decreaseBy (p : Pos.Raw) (n : Nat) : Pos.Raw where
|
||
byteIdx := p.byteIdx - n
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_decreaseBy {p : Pos.Raw} {n : Nat} :
|
||
(p.decreaseBy n).byteIdx = p.byteIdx - n := (rfl)
|
||
|
||
theorem Pos.Raw.increaseBy_charUtf8Size {p : Pos.Raw} {c : Char} :
|
||
p.increaseBy c.utf8Size = p + c := by
|
||
simp [Pos.Raw.ext_iff]
|
||
|
||
/-- Advances a valid position on a slice to the next valid position, given a proof that the
|
||
position is not the past-the-end position, which guarantees that such a position exists. -/
|
||
@[expose]
|
||
def Slice.Pos.next {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : s.Pos where
|
||
offset := pos.offset.increaseBy ((pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte)
|
||
isValidForSlice := by
|
||
obtain ⟨t₁, t₂, ht, ht'⟩ := copy_eq_append_get h
|
||
replace ht' : pos.offset = t₁.endPos := Eq.symm (String.Pos.Raw.ext ht')
|
||
rw [utf8ByteSize_byte, ← Pos.Raw.isValid_copy_iff, ht, ht']
|
||
refine Pos.Raw.IsValid.append_right ?_ t₂
|
||
rw [Pos.Raw.increaseBy_charUtf8Size]
|
||
refine Pos.Raw.IsValid.append_left ?_ t₁
|
||
exact Pos.Raw.isValid_singleton.2 (Or.inr rfl)
|
||
|
||
/-- Advances a valid position on a slice to the next valid position, or returns `none` if the
|
||
given position is the past-the-end position. -/
|
||
@[expose]
|
||
def Slice.Pos.next? {s : Slice} (pos : s.Pos) : Option s.Pos :=
|
||
if h : pos = s.endPos then none else some (pos.next h)
|
||
|
||
/-- Advances a valid position on a slice to the next valid position, or panics if the given
|
||
position is the past-the-end position. -/
|
||
@[expose]
|
||
def Slice.Pos.next! {s : Slice} (pos : s.Pos) : s.Pos :=
|
||
if h : pos = s.endPos then panic! "Cannot advance the end position" else pos.next h
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_next {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
|
||
(pos.next h).offset = pos.offset + pos.get h := by
|
||
ext
|
||
simp [next, utf8ByteSize_byte]
|
||
|
||
theorem Slice.Pos.byteIdx_offset_next {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
|
||
(pos.next h).offset.byteIdx = pos.offset.byteIdx + (pos.get h).utf8Size := by
|
||
simp
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.lt_next {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
|
||
pos < pos.next h := by
|
||
simp [Pos.lt_iff, Pos.Raw.lt_iff, Char.utf8Size_pos]
|
||
|
||
/-- Increases the byte offset of the position by `1`. Not to be confused with `ValidPos.next`. -/
|
||
@[inline, expose]
|
||
def Pos.Raw.inc (p : Pos.Raw) : Pos.Raw :=
|
||
⟨p.byteIdx + 1⟩
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_inc {p : Pos.Raw} : p.inc.byteIdx = p.byteIdx + 1 := (rfl)
|
||
|
||
/-- Decreases the byte offset of the position by `1`. Not to be confused with `ValidPos.prev`. -/
|
||
@[inline, expose]
|
||
def Pos.Raw.dec (p : Pos.Raw) : Pos.Raw :=
|
||
⟨p.byteIdx - 1⟩
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_dec {p : Pos.Raw} : p.dec.byteIdx = p.byteIdx - 1 := (rfl)
|
||
|
||
@[inline, expose]
|
||
def Slice.Pos.prevAux {s : Slice} (pos : s.Pos) (h : pos ≠ s.startPos) : String.Pos.Raw :=
|
||
go (pos.offset.byteIdx - 1) (by
|
||
have := pos.isValidForSlice.le_utf8ByteSize
|
||
simp [Pos.Raw.le_iff, Pos.ext_iff] at ⊢ this h
|
||
omega)
|
||
where
|
||
go (off : Nat) (h₁ : off < s.utf8ByteSize) : String.Pos.Raw :=
|
||
if hbyte : (s.getUTF8Byte ⟨off⟩ h₁).IsUTF8FirstByte then
|
||
⟨off⟩
|
||
else
|
||
have : 0 ≠ off := by
|
||
intro h
|
||
obtain hoff : (⟨off⟩ : String.Pos.Raw) = 0 := by simpa [String.Pos.Raw.ext_iff] using h.symm
|
||
simp [hoff, s.isUTF8FirstByte_utf8ByteAt_zero] at hbyte
|
||
match off with
|
||
| 0 => False.elim (by contradiction)
|
||
| off + 1 => go off (by omega)
|
||
termination_by structural off
|
||
|
||
theorem Pos.Raw.isValidForSlice_prevAuxGo {s : Slice} (off : Nat) (h₁ : off < s.utf8ByteSize) :
|
||
(Slice.Pos.prevAux.go off h₁).IsValidForSlice s := by
|
||
induction off with
|
||
| zero =>
|
||
rw [Slice.Pos.prevAux.go]
|
||
split
|
||
· exact Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.2 (Or.inr ⟨_, ‹_›⟩)
|
||
· simpa using elim
|
||
| succ off ih =>
|
||
rw [Slice.Pos.prevAux.go]
|
||
split
|
||
· exact Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.2 (Or.inr ⟨_, ‹_›⟩)
|
||
· simpa using ih _
|
||
where
|
||
elim {P : Pos.Raw → Prop} {h : False} : P h.elim := h.elim
|
||
|
||
theorem Pos.Raw.isValidForSlice_prevAux {s : Slice} (pos : s.Pos) (h : pos ≠ s.startPos) :
|
||
(pos.prevAux h).IsValidForSlice s :=
|
||
isValidForSlice_prevAuxGo ..
|
||
|
||
/-- Returns the previous valid position before the given position, given a proof that the position
|
||
is not the start position, which guarantees that such a position exists. -/
|
||
@[inline, expose]
|
||
def Slice.Pos.prev {s : Slice} (pos : s.Pos) (h : pos ≠ s.startPos) : s.Pos where
|
||
offset := prevAux pos h
|
||
isValidForSlice := Pos.Raw.isValidForSlice_prevAux _ _
|
||
|
||
/-- Returns the previous valid position before the given position, or `none` if the position is
|
||
the start position. -/
|
||
@[expose]
|
||
def Slice.Pos.prev? {s : Slice} (pos : s.Pos) : Option s.Pos :=
|
||
if h : pos = s.startPos then none else some (pos.prev h)
|
||
|
||
/-- Returns the previous valid position before the given position, or panics if the position is
|
||
the start position. -/
|
||
@[expose]
|
||
def Slice.Pos.prev! {s : Slice} (pos : s.Pos) : s.Pos :=
|
||
if h : pos = s.startPos then panic! "The start position has no previous position" else pos.prev h
|
||
|
||
/-- Constructs a valid position on `s` from a position and a proof that it is valid. -/
|
||
@[inline, expose]
|
||
def Slice.pos (s : Slice) (off : String.Pos.Raw) (h : off.IsValidForSlice s) : s.Pos where
|
||
offset := off
|
||
isValidForSlice := h
|
||
|
||
@[simp]
|
||
theorem Slice.offset_pos {s : Slice} {off h} : (s.pos off h).offset = off := rfl
|
||
|
||
/-- Constructs a valid position on `s` from a position, returning `none` if the position is not valid. -/
|
||
@[expose]
|
||
def Slice.pos? (s : Slice) (off : String.Pos.Raw) : Option s.Pos :=
|
||
if h : off.isValidForSlice s then
|
||
some (s.pos off (Pos.Raw.isValidForSlice_eq_true_iff.1 h))
|
||
else
|
||
none
|
||
|
||
/-- Constructs a valid position `s` from a position, panicking if the position is not valid. -/
|
||
@[expose]
|
||
def Slice.pos! (s : Slice) (off : String.Pos.Raw) : s.Pos :=
|
||
if h : off.isValidForSlice s then
|
||
s.pos off (Pos.Raw.isValidForSlice_eq_true_iff.1 h)
|
||
else
|
||
panic! "Offset is not at a valid UTF-8 character boundary"
|
||
|
||
/-- Advances a valid position on a string to the next valid position, given a proof that the
|
||
position is not the past-the-end position, which guarantees that such a position exists. -/
|
||
@[inline, expose]
|
||
def ValidPos.next {s : String} (pos : s.ValidPos) (h : pos ≠ s.endValidPos) : s.ValidPos :=
|
||
(pos.toSlice.next (ne_of_apply_ne Slice.Pos.ofSlice (by simpa))).ofSlice
|
||
|
||
/-- Advances a valid position on a string to the next valid position, or returns `none` if the
|
||
given position is the past-the-end position. -/
|
||
@[inline, expose]
|
||
def ValidPos.next? {s : String} (pos : s.ValidPos) : Option s.ValidPos :=
|
||
pos.toSlice.next?.map Slice.Pos.ofSlice
|
||
|
||
/-- Advances a valid position on a string to the next valid position, or panics if the given
|
||
position is the past-the-end position. -/
|
||
@[inline, expose]
|
||
def ValidPos.next! {s : String} (pos : s.ValidPos) : s.ValidPos :=
|
||
pos.toSlice.next!.ofSlice
|
||
|
||
/-- Returns the previous valid position before the given position, given a proof that the position
|
||
is not the start position, which guarantees that such a position exists. -/
|
||
@[inline, expose]
|
||
def ValidPos.prev {s : String} (pos : s.ValidPos) (h : pos ≠ s.startValidPos) : s.ValidPos :=
|
||
(pos.toSlice.prev (ne_of_apply_ne Slice.Pos.ofSlice (by simpa))).ofSlice
|
||
|
||
/-- Returns the previous valid position before the given position, or `none` if the position is
|
||
the start position. -/
|
||
@[inline, expose]
|
||
def ValidPos.prev? {s : String} (pos : s.ValidPos) : Option s.ValidPos :=
|
||
pos.toSlice.prev?.map Slice.Pos.ofSlice
|
||
|
||
/-- Returns the previous valid position before the given position, or panics if the position is
|
||
the start position. -/
|
||
@[inline, expose]
|
||
def ValidPos.prev! {s : String} (pos : s.ValidPos) : s.ValidPos :=
|
||
pos.toSlice.prev!.ofSlice
|
||
|
||
/-- Constructs a valid position on `s` from a position and a proof that it is valid. -/
|
||
@[inline, expose]
|
||
def pos (s : String) (off : Pos.Raw) (h : off.IsValid s) : s.ValidPos :=
|
||
(s.toSlice.pos off h.toSlice).ofSlice
|
||
|
||
/-- Constructs a valid position on `s` from a position, returning `none` if the position is not valid. -/
|
||
@[inline, expose]
|
||
def pos? (s : String) (off : Pos.Raw) : Option s.ValidPos :=
|
||
(s.toSlice.pos? off).map Slice.Pos.ofSlice
|
||
|
||
/-- Constructs a valid position `s` from a position, panicking if the position is not valid. -/
|
||
@[inline, expose]
|
||
def pos! (s : String) (off : Pos.Raw) : s.ValidPos :=
|
||
(s.toSlice.pos! off).ofSlice
|
||
|
||
/-- Constructs a valid position on `t` from a valid position on `s` and a proof that `s = t`. -/
|
||
@[inline]
|
||
def Slice.Pos.cast {s t : Slice} (pos : s.Pos) (h : s = t) : t.Pos where
|
||
offset := pos.offset
|
||
isValidForSlice := h ▸ pos.isValidForSlice
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.offset_cast {s t : Slice} {pos : s.Pos} {h : s = t} :
|
||
(pos.cast h).offset = pos.offset := (rfl)
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.cast_rfl {s : Slice} {pos : s.Pos} : pos.cast rfl = pos :=
|
||
Slice.Pos.ext (by simp)
|
||
|
||
/-- Constructs a valid position on `t` from a valid position on `s` and a proof that `s = t`. -/
|
||
@[inline]
|
||
def ValidPos.cast {s t : String} (pos : s.ValidPos) (h : s = t) : t.ValidPos where
|
||
offset := pos.offset
|
||
isValid := h ▸ pos.isValid
|
||
|
||
@[simp]
|
||
theorem ValidPos.offset_cast {s t : String} {pos : s.ValidPos} {h : s = t} :
|
||
(pos.cast h).offset = pos.offset := (rfl)
|
||
|
||
@[simp]
|
||
theorem ValidPos.cast_rfl {s : String} {pos : s.ValidPos} : pos.cast rfl = pos :=
|
||
ValidPos.ext (by simp)
|
||
|
||
/-- Given a byte position within a string slice, obtains the smallest valid position that is
|
||
strictly greater than the given byte position. -/
|
||
@[inline]
|
||
def Slice.findNextPos (offset : String.Pos.Raw) (s : Slice) (_h : offset < s.rawEndPos) : s.Pos :=
|
||
go offset.inc
|
||
where
|
||
go (offset : String.Pos.Raw) : s.Pos :=
|
||
if h : offset < s.rawEndPos then
|
||
if h' : (s.getUTF8Byte offset h).IsUTF8FirstByte then
|
||
s.pos offset (Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.2 (Or.inr ⟨_, h'⟩))
|
||
else
|
||
go offset.inc
|
||
else
|
||
s.endPos
|
||
termination_by s.utf8ByteSize - offset.byteIdx
|
||
decreasing_by
|
||
simp only [Pos.Raw.lt_iff, byteIdx_rawEndPos, utf8ByteSize_eq, Pos.Raw.byteIdx_inc] at h ⊢
|
||
omega
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.le_refl {p : Pos.Raw} : p ≤ p := by simp [le_iff]
|
||
|
||
theorem Pos.Raw.lt_inc {p : Pos.Raw} : p < p.inc := by simp [lt_iff]
|
||
|
||
theorem Pos.Raw.le_of_lt {p q : Pos.Raw} : p < q → p ≤ q := by simpa [lt_iff, le_iff] using Nat.le_of_lt
|
||
|
||
theorem Pos.Raw.inc_le {p q : Pos.Raw} : p.inc ≤ q ↔ p < q := by simpa [lt_iff, le_iff] using Nat.succ_le
|
||
|
||
private theorem Slice.le_offset_findNextPosGo {s : Slice} {o : String.Pos.Raw} (h : o ≤ s.rawEndPos) :
|
||
o ≤ (findNextPos.go s o).offset := by
|
||
fun_induction findNextPos.go with
|
||
| case1 => simp
|
||
| case2 x h₁ h₂ ih =>
|
||
refine Pos.Raw.le_of_lt (Pos.Raw.lt_of_lt_of_le Pos.Raw.lt_inc (ih ?_))
|
||
rw [Pos.Raw.le_iff, Pos.Raw.byteIdx_inc]
|
||
exact Nat.succ_le.2 h₁
|
||
| case3 x h => exact h
|
||
|
||
theorem Slice.lt_offset_findNextPos {s : Slice} {o : String.Pos.Raw} (h) : o < (s.findNextPos o h).offset :=
|
||
Pos.Raw.lt_of_lt_of_le Pos.Raw.lt_inc (le_offset_findNextPosGo (Pos.Raw.inc_le.2 h))
|
||
|
||
theorem Slice.Pos.prevAuxGo_le_self {s : Slice} {p : Nat} {h : p < s.utf8ByteSize} :
|
||
prevAux.go p h ≤ ⟨p⟩ := by
|
||
induction p with
|
||
| zero =>
|
||
rw [prevAux.go]
|
||
split
|
||
· simp
|
||
· simpa using elim (· ≤ { })
|
||
| succ p ih =>
|
||
rw [prevAux.go]
|
||
split
|
||
· simp
|
||
· simpa using Nat.le_trans ih (by simp)
|
||
where
|
||
elim (P : String.Pos.Raw → Prop) {h : False} : P h.elim := h.elim
|
||
|
||
theorem Pos.Raw.lt_of_le_of_lt {a b c : Pos.Raw} : a ≤ b → b < c → a < c := by
|
||
simpa [le_iff, lt_iff] using Nat.lt_of_le_of_lt
|
||
|
||
theorem Slice.Pos.prevAux_lt_self {s : Slice} {p : s.Pos} {h} : p.prevAux h < p.offset := by
|
||
rw [prevAux]
|
||
refine Pos.Raw.lt_of_le_of_lt prevAuxGo_le_self ?_
|
||
simp [Pos.ext_iff, Pos.Raw.lt_iff] at *
|
||
omega
|
||
|
||
theorem Slice.Pos.prevAux_lt_rawEndPos {s : Slice} {p : s.Pos} {h} : p.prevAux h < s.rawEndPos :=
|
||
Pos.Raw.lt_of_lt_of_le prevAux_lt_self p.isValidForSlice.le_utf8ByteSize
|
||
|
||
theorem Pos.Raw.ne_of_lt {a b : Pos.Raw} : a < b → a ≠ b := by
|
||
simpa [lt_iff, Pos.Raw.ext_iff] using Nat.ne_of_lt
|
||
|
||
theorem Slice.Pos.prev_ne_endPos {s : Slice} {p : s.Pos} {h} : p.prev h ≠ s.endPos := by
|
||
simpa [Pos.ext_iff, prev] using Pos.Raw.ne_of_lt prevAux_lt_rawEndPos
|
||
|
||
theorem Slice.Pos.offset_prev_lt_offset {s : Slice} {p : s.Pos} {h} : (p.prev h).offset < p.offset := by
|
||
simpa [prev] using prevAux_lt_self
|
||
|
||
@[simp]
|
||
theorem Slice.Pos.prev_lt {s : Slice} {p : s.Pos} {h} : p.prev h < p :=
|
||
lt_iff.2 offset_prev_lt_offset
|
||
|
||
/-- Advances the position `p` `n` times, saturating at `s.endPos` if necessary. -/
|
||
def Slice.Pos.nextn {s : Slice} (p : s.Pos) (n : Nat) : s.Pos :=
|
||
match n with
|
||
| 0 => p
|
||
| n + 1 =>
|
||
if h : p ≠ s.endPos then
|
||
nextn (p.next h) n
|
||
else
|
||
p
|
||
|
||
/-- Iterates `p.prev` `n` times, saturating at `s.startPos` if necessary. -/
|
||
def Slice.Pos.prevn {s : Slice} (p : s.Pos) (n : Nat) : s.Pos :=
|
||
match n with
|
||
| 0 => p
|
||
| n + 1 =>
|
||
if h : p ≠ s.startPos then
|
||
prevn (p.prev h) n
|
||
else
|
||
p
|
||
|
||
@[expose]
|
||
def Pos.Raw.utf8GetAux : List Char → Pos.Raw → Pos.Raw → Char
|
||
| [], _, _ => default
|
||
| c::cs, i, p => if i = p then c else utf8GetAux cs (i + c) p
|
||
|
||
@[deprecated Pos.Raw.utf8GetAux (since := "2025-10-09")]
|
||
abbrev utf8GetAux : List Char → Pos.Raw → Pos.Raw → Char :=
|
||
Pos.Raw.utf8GetAux
|
||
|
||
/--
|
||
Returns the character at position `p` of a string. If `p` is not a valid position, returns the
|
||
fallback value `(default : Char)`, which is `'A'`, but does not panic.
|
||
|
||
This function is overridden with an efficient implementation in runtime code. See
|
||
`String.Pos.Raw.utf8GetAux` for the reference implementation.
|
||
|
||
This is a legacy function. The recommended alternative is `String.ValidPos.get`, combined with
|
||
`String.pos` or another means of obtaining a `String.ValidPos`.
|
||
|
||
Examples:
|
||
* `"abc".get ⟨1⟩ = 'b'`
|
||
* `"abc".get ⟨3⟩ = (default : Char)` because byte `3` is at the end of the string.
|
||
* `"L∃∀N".get ⟨2⟩ = (default : Char)` because byte `2` is in the middle of `'∃'`.
|
||
-/
|
||
@[extern "lean_string_utf8_get", expose]
|
||
def Pos.Raw.get (s : @& String) (p : @& Pos.Raw) : Char :=
|
||
utf8GetAux s.data 0 p
|
||
|
||
@[extern "lean_string_utf8_get", expose, deprecated Pos.Raw.get (since := "2025-10-14")]
|
||
def get (s : @& String) (p : @& Pos.Raw) : Char :=
|
||
Pos.Raw.utf8GetAux s.data 0 p
|
||
|
||
@[expose]
|
||
def Pos.Raw.utf8GetAux? : List Char → Pos.Raw → Pos.Raw → Option Char
|
||
| [], _, _ => none
|
||
| c::cs, i, p => if i = p then some c else utf8GetAux? cs (i + c) p
|
||
|
||
@[deprecated Pos.Raw.utf8GetAux (since := "2025-10-09")]
|
||
abbrev utf8GetAux? : List Char → Pos.Raw → Pos.Raw → Option Char :=
|
||
Pos.Raw.utf8GetAux?
|
||
|
||
/--
|
||
Returns the character at position `p` of a string. If `p` is not a valid position, returns `none`.
|
||
|
||
This function is overridden with an efficient implementation in runtime code. See
|
||
`String.utf8GetAux?` for the reference implementation.
|
||
|
||
This is a legacy function. The recommended alternative is `String.ValidPos.get`, combined with
|
||
`String.pos?` or another means of obtaining a `String.ValidPos`.
|
||
|
||
Examples:
|
||
* `"abc".get? ⟨1⟩ = some 'b'`
|
||
* `"abc".get? ⟨3⟩ = none`
|
||
* `"L∃∀N".get? ⟨1⟩ = some '∃'`
|
||
* `"L∃∀N".get? ⟨2⟩ = none`
|
||
-/
|
||
@[extern "lean_string_utf8_get_opt", expose]
|
||
def Pos.Raw.get? : (@& String) → (@& Pos.Raw) → Option Char
|
||
| s, p => utf8GetAux? s.data 0 p
|
||
|
||
@[extern "lean_string_utf8_get_opt", expose, deprecated Pos.Raw.get? (since := "2025-10-14")]
|
||
def get? : (@& String) → (@& Pos.Raw) → Option Char
|
||
| s, p => Pos.Raw.utf8GetAux? s.data 0 p
|
||
|
||
/--
|
||
Returns the character at position `p` of a string. Panics if `p` is not a valid position.
|
||
|
||
See `String.pos?` and `String.ValidPos.get` for a safer alternative.
|
||
|
||
This function is overridden with an efficient implementation in runtime code. See
|
||
`String.utf8GetAux` for the reference implementation.
|
||
|
||
This is a legacy function. The recommended alternative is `String.ValidPos.get`, combined with
|
||
`String.pos!` or another means of obtaining a `String.ValidPos`.
|
||
|
||
Examples
|
||
* `"abc".get! ⟨1⟩ = 'b'`
|
||
-/
|
||
@[extern "lean_string_utf8_get_bang", expose]
|
||
def Pos.Raw.get! (s : @& String) (p : @& Pos.Raw) : Char :=
|
||
match s with
|
||
| s => Pos.Raw.utf8GetAux s.data 0 p
|
||
|
||
@[extern "lean_string_utf8_get_bang", expose, deprecated Pos.Raw.get! (since := "2025-10-14")]
|
||
def get! (s : @& String) (p : @& Pos.Raw) : Char :=
|
||
match s with
|
||
| s => Pos.Raw.utf8GetAux s.data 0 p
|
||
|
||
@[expose]
|
||
def Pos.Raw.utf8SetAux (c' : Char) : List Char → Pos.Raw → Pos.Raw → List Char
|
||
| [], _, _ => []
|
||
| c::cs, i, p =>
|
||
if i = p then (c'::cs) else c::(utf8SetAux c' cs (i + c) p)
|
||
|
||
@[deprecated Pos.Raw.utf8SetAux (since := "2025-10-09")]
|
||
abbrev utf8SetAux (c' : Char) : List Char → Pos.Raw → Pos.Raw → List Char :=
|
||
Pos.Raw.utf8SetAux c'
|
||
|
||
@[simp]
|
||
theorem ValidPos.toSlice_get {s : String} {p : s.ValidPos} {h} :
|
||
p.toSlice.get h = p.get (ne_of_apply_ne (·.toSlice) (by simp_all)) := by
|
||
rfl
|
||
|
||
@[simp]
|
||
theorem ValidPos.offset_next {s : String} (p : s.ValidPos) (h : p ≠ s.endValidPos) :
|
||
(p.next h).offset = p.offset + p.get h := by
|
||
simp [next]
|
||
|
||
theorem ValidPos.byteIdx_offset_next {s : String} (p : s.ValidPos) (h : p ≠ s.endValidPos) :
|
||
(p.next h).offset.byteIdx = p.offset.byteIdx + (p.get h).utf8Size := by
|
||
simp
|
||
|
||
theorem ValidPos.byteIdx_lt_utf8ByteSize {s : String} (p : s.ValidPos) (h : p ≠ s.endValidPos) :
|
||
p.offset.byteIdx < s.utf8ByteSize := by
|
||
have := byteIdx_endPos ▸ Pos.Raw.le_iff.1 p.isValid.le_endPos
|
||
simp only [ne_eq, ValidPos.ext_iff, offset_endValidPos, Pos.Raw.ext_iff, byteIdx_endPos] at h
|
||
omega
|
||
|
||
@[simp]
|
||
theorem ValidPos.str_toSlice {s : String} {p : s.ValidPos} : p.toSlice.str = p := by
|
||
ext
|
||
simp
|
||
|
||
/-- The slice from the beginning of `s` up to `p` (exclusive). -/
|
||
@[inline, expose]
|
||
def replaceEnd (s : String) (p : s.ValidPos) : Slice :=
|
||
s.toSlice.replaceEnd p.toSlice
|
||
|
||
@[simp]
|
||
theorem str_replaceEnd {s : String} {p : s.ValidPos} : (s.replaceEnd p).str = s := rfl
|
||
|
||
@[simp]
|
||
theorem startInclusive_replaceEnd {s : String} {p : s.ValidPos} :
|
||
(s.replaceEnd p).startInclusive = s.startValidPos := by
|
||
simp [replaceEnd]
|
||
|
||
@[simp]
|
||
theorem endExclusive_replaceEnd {s : String} {p : s.ValidPos} :
|
||
(s.replaceEnd p).endExclusive = p := by
|
||
simp [replaceEnd]
|
||
|
||
theorem Pos.Raw.isValidForSlice_stringReplaceEnd {s : String} {p : s.ValidPos} {q : Pos.Raw} :
|
||
q.IsValidForSlice (s.replaceEnd p) ↔ q ≤ p.offset ∧ q.IsValid s := by
|
||
rw [replaceEnd, isValidForSlice_replaceEnd, ValidPos.offset_toSlice, isValidForSlice_toSlice_iff]
|
||
|
||
/-- The slice from `p` (inclusive) up to the end of `s`. -/
|
||
@[inline, expose]
|
||
def replaceStart (s : String) (p : s.ValidPos) : Slice :=
|
||
s.toSlice.replaceStart p.toSlice
|
||
|
||
@[simp]
|
||
theorem str_replaceStart {s : String} {p : s.ValidPos} : (s.replaceStart p).str = s := rfl
|
||
|
||
@[simp]
|
||
theorem startInclusive_replaceStart {s : String} {p : s.ValidPos} :
|
||
(s.replaceStart p).startInclusive = p := by
|
||
simp [replaceStart]
|
||
|
||
@[simp]
|
||
theorem endExclusive_replaceStart {s : String} {p : s.ValidPos} :
|
||
(s.replaceStart p).endExclusive = s.endValidPos := by
|
||
simp [replaceStart]
|
||
|
||
theorem Pos.Raw.isValidForSlice_stringReplaceStart {s : String} {p : s.ValidPos} {q : Pos.Raw} :
|
||
q.IsValidForSlice (s.replaceStart p) ↔ (q.offsetBy p.offset).IsValid s := by
|
||
rw [replaceStart, isValidForSlice_replaceStart, isValidForSlice_toSlice_iff,
|
||
ValidPos.offset_toSlice]
|
||
|
||
/--
|
||
Replaces the character at a specified position in a string with a new character.
|
||
|
||
If both the replacement character and the replaced character are 7-bit ASCII characters and the
|
||
string is not shared, then it is updated in-place and not copied.
|
||
|
||
Examples:
|
||
* `("abc".pos ⟨1⟩ (by decide)).set 'B' (by decide) = "aBc"`
|
||
* `("L∃∀N".pos ⟨4⟩ (by decide)).set 'X' (by decide) = "L∃XN"`
|
||
-/
|
||
@[extern "lean_string_utf8_set", expose]
|
||
def ValidPos.set {s : String} (p : s.ValidPos) (c : Char) (hp : p ≠ s.endValidPos) : String :=
|
||
if hc : c.utf8Size = 1 ∧ (p.byte hp).utf8ByteSize isUTF8FirstByte_byte = 1 then
|
||
.ofByteArray (s.bytes.set p.offset.byteIdx c.toUInt8 (p.byteIdx_lt_utf8ByteSize hp)) (by
|
||
rw [ByteArray.set_eq_push_extract_append_extract, ← hc.2, utf8ByteSize_byte,
|
||
← ValidPos.byteIdx_offset_next]
|
||
refine ByteArray.IsValidUTF8.append ?_ (p.next hp).isValid.isValidUTF8_extract_utf8ByteSize
|
||
exact p.isValid.isValidUTF8_extract_zero.push hc.1)
|
||
else
|
||
(s.replaceEnd p).copy ++ singleton c ++ (s.replaceStart (p.next hp)).copy
|
||
|
||
theorem ValidPos.set_eq_append {s : String} {p : s.ValidPos} {c : Char} {hp} :
|
||
p.set c hp = (s.replaceEnd p).copy ++ singleton c ++ (s.replaceStart (p.next hp)).copy := by
|
||
rw [set]
|
||
split
|
||
· rename_i h
|
||
simp [← bytes_inj, ByteArray.set_eq_push_extract_append_extract, Slice.bytes_copy,
|
||
List.utf8Encode_singleton, String.utf8EncodeChar_eq_singleton h.1, utf8ByteSize_byte ▸ h.2]
|
||
· rfl
|
||
|
||
theorem Pos.Raw.IsValid.set_of_le {s : String} {p : s.ValidPos} {c : Char} {hp : p ≠ s.endValidPos}
|
||
{q : Pos.Raw} (hq : q.IsValid s) (hpq : q ≤ p.offset) : q.IsValid (p.set c hp) := by
|
||
rw [ValidPos.set_eq_append, String.append_assoc]
|
||
apply append_right
|
||
rw [isValid_copy_iff, isValidForSlice_stringReplaceEnd]
|
||
exact ⟨hpq, hq⟩
|
||
|
||
/-- Given a valid position in a string, obtain the corresponding position after setting a character on
|
||
that string, provided that the position was before the changed position. -/
|
||
@[inline]
|
||
def ValidPos.setOfLE {s : String} (q p : s.ValidPos) (c : Char) (hp : p ≠ s.endValidPos)
|
||
(hpq : q ≤ p) : (p.set c hp).ValidPos where
|
||
offset := q.offset
|
||
isValid := q.isValid.set_of_le hpq
|
||
|
||
@[simp]
|
||
theorem ValidPos.offset_setOfLE {s : String} {q p : s.ValidPos} {c : Char} {hp : p ≠ s.endValidPos}
|
||
{hpq : q ≤ p} : (q.setOfLE p c hp hpq).offset = q.offset := (rfl)
|
||
|
||
/--
|
||
Replaces the character at position `p` in the string `s` with the result of applying `f` to that
|
||
character.
|
||
|
||
If both the replacement character and the replaced character are 7-bit ASCII characters and the
|
||
string is not shared, then it is updated in-place and not copied.
|
||
|
||
Examples:
|
||
* `("abc".pos ⟨1⟩ (by decide)).modify Char.toUpper (by decide) = "aBc"`
|
||
-/
|
||
def ValidPos.modify {s : String} (p : s.ValidPos) (f : Char → Char) (hp : p ≠ s.endValidPos) :
|
||
String :=
|
||
p.set (f <| p.get hp) hp
|
||
|
||
theorem Pos.Raw.IsValid.modify_of_le {s : String} {p : s.ValidPos} {f : Char → Char}
|
||
{hp : p ≠ s.endValidPos} {q : Pos.Raw} (hq : q.IsValid s) (hpq : q ≤ p.offset) :
|
||
q.IsValid (p.modify f hp) :=
|
||
set_of_le hq hpq
|
||
|
||
/-- Given a valid position in a string, obtain the corresponding position after modifying a character
|
||
in that string, provided that the position was before the changed position. -/
|
||
def ValidPos.modifyOfLE {s : String} (q p : s.ValidPos) (f : Char → Char)
|
||
(hp : p ≠ s.endValidPos) (hpq : q ≤ p) : (p.modify f hp).ValidPos where
|
||
offset := q.offset
|
||
isValid := q.isValid.modify_of_le hpq
|
||
|
||
@[simp]
|
||
theorem ValidPos.offset_modifyOfLE {s : String} {q p : s.ValidPos} {f : Char → Char}
|
||
{hp : p ≠ s.endValidPos} {hpq : q ≤ p} : (q.modifyOfLE p f hp hpq).offset = q.offset := (rfl)
|
||
|
||
/--
|
||
Replaces the character at a specified position in a string with a new character. If the position is
|
||
invalid, the string is returned unchanged.
|
||
|
||
If both the replacement character and the replaced character are 7-bit ASCII characters and the
|
||
string is not shared, then it is updated in-place and not copied.
|
||
|
||
This is a legacy function. The recommended alternative is `String.ValidPos.set`, combined with
|
||
`String.pos` or another means of obtaining a `String.ValidPos`.
|
||
|
||
Examples:
|
||
* `"abc".set ⟨1⟩ 'B' = "aBc"`
|
||
* `"abc".set ⟨3⟩ 'D' = "abc"`
|
||
* `"L∃∀N".set ⟨4⟩ 'X' = "L∃XN"`
|
||
* `"L∃∀N".set ⟨2⟩ 'X' = "L∃∀N"` because `'∃'` is a multi-byte character, so the byte index `2` is an
|
||
invalid position.
|
||
-/
|
||
@[extern "lean_string_utf8_set", expose]
|
||
def Pos.Raw.set : String → (@& Pos.Raw) → Char → String
|
||
| s, i, c => (Pos.Raw.utf8SetAux c s.data 0 i).asString
|
||
|
||
@[extern "lean_string_utf8_set", expose, deprecated Pos.Raw.set (since := "2025-10-14")]
|
||
def set : String → (@& Pos.Raw) → Char → String
|
||
| s, i, c => (Pos.Raw.utf8SetAux c s.data 0 i).asString
|
||
|
||
/--
|
||
Replaces the character at position `p` in the string `s` with the result of applying `f` to that
|
||
character. If `p` is an invalid position, the string is returned unchanged.
|
||
|
||
If both the replacement character and the replaced character are 7-bit ASCII characters and the
|
||
string is not shared, then it is updated in-place and not copied.
|
||
|
||
This is a legacy function. The recommended alternative is `String.ValidPos.set`, combined with
|
||
`String.pos` or another means of obtaining a `String.ValidPos`.
|
||
|
||
Examples:
|
||
* `"abc".modify ⟨1⟩ Char.toUpper = "aBc"`
|
||
* `"abc".modify ⟨3⟩ Char.toUpper = "abc"`
|
||
-/
|
||
@[expose]
|
||
def Pos.Raw.modify (s : String) (i : Pos.Raw) (f : Char → Char) : String :=
|
||
i.set s (f (i.get s))
|
||
|
||
@[expose, inherit_doc Pos.Raw.modify, deprecated Pos.Raw.modify (since := "2025-10-10")]
|
||
def modify (s : String) (i : Pos.Raw) (f : Char → Char) : String :=
|
||
i.set s (f (i.get s))
|
||
|
||
/--
|
||
Returns the next position in a string after position `p`. If `p` is not a valid position or
|
||
`p = s.endPos`, returns the position one byte after `p`.
|
||
|
||
A run-time bounds check is performed to determine whether `p` is at the end of the string. If a
|
||
bounds check has already been performed, use `String.next'` to avoid a repeated check.
|
||
|
||
This is a legacy function. The recommended alternative is `String.ValidPos.next` or one of its
|
||
variants like `String.ValidPos.next?`, combined with `String.pos` or another means of obtaining
|
||
a `String.ValisPos`.
|
||
|
||
Some examples of edge cases:
|
||
* `"abc".next ⟨3⟩ = ⟨4⟩`, since `3 = "abc".endPos`
|
||
* `"L∃∀N".next ⟨2⟩ = ⟨3⟩`, since `2` points into the middle of a multi-byte UTF-8 character
|
||
|
||
Examples:
|
||
* `"abc".get ("abc".next 0) = 'b'`
|
||
* `"L∃∀N".get (0 |> "L∃∀N".next |> "L∃∀N".next) = '∀'`
|
||
-/
|
||
@[extern "lean_string_utf8_next", expose]
|
||
def Pos.Raw.next (s : @& String) (p : @& Pos.Raw) : Pos.Raw :=
|
||
let c := get s p
|
||
p + c
|
||
|
||
@[extern "lean_string_utf8_next", expose, deprecated Pos.Raw.next (since := "2025-10-14")]
|
||
def next (s : @& String) (p : @& Pos.Raw) : Pos.Raw :=
|
||
let c := p.get s
|
||
p + c
|
||
|
||
@[expose]
|
||
def Pos.Raw.utf8PrevAux : List Char → Pos.Raw → Pos.Raw → Pos.Raw
|
||
| [], _, p => ⟨p.byteIdx - 1⟩
|
||
| c::cs, i, p =>
|
||
let i' := i + c
|
||
if p ≤ i' then i else utf8PrevAux cs i' p
|
||
|
||
@[deprecated Pos.Raw.utf8PrevAux (since := "2025-10-10")]
|
||
abbrev utf8PrevAux : List Char → Pos.Raw → Pos.Raw → Pos.Raw :=
|
||
Pos.Raw.utf8PrevAux
|
||
|
||
/--
|
||
Returns the position in a string before a specified position, `p`. If `p = ⟨0⟩`, returns `0`. If `p`
|
||
is greater than `endPos`, returns the position one byte before `p`. Otherwise, if `p` occurs in the
|
||
middle of a multi-byte character, returns the beginning position of that character.
|
||
|
||
For example, `"L∃∀N".prev ⟨3⟩` is `⟨1⟩`, since byte 3 occurs in the middle of the multi-byte
|
||
character `'∃'` that starts at byte 1.
|
||
|
||
This is a legacy function. The recommended alternative is `String.ValidPos.prev` or one of its
|
||
variants like `String.ValidPos.prev?`, combined with `String.pos` or another means of obtaining
|
||
a `String.ValidPos`.
|
||
|
||
Examples:
|
||
* `"abc".get ("abc".endPos |> "abc".prev) = 'c'`
|
||
* `"L∃∀N".get ("L∃∀N".endPos |> "L∃∀N".prev |> "L∃∀N".prev |> "L∃∀N".prev) = '∃'`
|
||
-/
|
||
@[extern "lean_string_utf8_prev", expose]
|
||
def Pos.Raw.prev : (@& String) → (@& Pos.Raw) → Pos.Raw
|
||
| s, p => utf8PrevAux s.data 0 p
|
||
|
||
@[extern "lean_string_utf8_prev", expose, deprecated Pos.Raw.prev (since := "2025-10-14")]
|
||
def prev : (@& String) → (@& Pos.Raw) → Pos.Raw
|
||
| s, p => Pos.Raw.utf8PrevAux s.data 0 p
|
||
|
||
/--
|
||
Returns the first character in `s`. If `s = ""`, returns `(default : Char)`.
|
||
|
||
Examples:
|
||
* `"abc".front = 'a'`
|
||
* `"".front = (default : Char)`
|
||
-/
|
||
@[inline, expose] def front (s : String) : Char :=
|
||
Pos.Raw.get s 0
|
||
|
||
@[export lean_string_front]
|
||
def Internal.frontImpl (s : String) : Char :=
|
||
String.front s
|
||
|
||
theorem front_eq_get {s : String} : s.front = (0 : Pos.Raw).get s := rfl
|
||
|
||
/--
|
||
Returns the last character in `s`. If `s = ""`, returns `(default : Char)`.
|
||
|
||
Examples:
|
||
* `"abc".back = 'c'`
|
||
* `"".back = (default : Char)`
|
||
-/
|
||
@[inline, expose] def back (s : String) : Char :=
|
||
(s.endPos.prev s).get s
|
||
|
||
theorem back_eq_get_prev_endPos {s : String} : s.back = (s.endPos.prev s).get s := rfl
|
||
|
||
/--
|
||
Returns `true` if a specified byte position is greater than or equal to the position which points to
|
||
the end of a string. Otherwise, returns `false`.
|
||
|
||
Examples:
|
||
* `(0 |> "abc".next |> "abc".next |> "abc".atEnd) = false`
|
||
* `(0 |> "abc".next |> "abc".next |> "abc".next |> "abc".next |> "abc".atEnd) = true`
|
||
* `(0 |> "L∃∀N".next |> "L∃∀N".next |> "L∃∀N".next |> "L∃∀N".atEnd) = false`
|
||
* `(0 |> "L∃∀N".next |> "L∃∀N".next |> "L∃∀N".next |> "L∃∀N".next |> "L∃∀N".atEnd) = true`
|
||
* `"abc".atEnd ⟨4⟩ = true`
|
||
* `"L∃∀N".atEnd ⟨7⟩ = false`
|
||
* `"L∃∀N".atEnd ⟨8⟩ = true`
|
||
-/
|
||
@[extern "lean_string_utf8_at_end", expose]
|
||
def Pos.Raw.atEnd : (@& String) → (@& Pos.Raw) → Bool
|
||
| s, p => p.byteIdx ≥ utf8ByteSize s
|
||
|
||
@[extern "lean_string_utf8_at_end", expose, deprecated Pos.Raw.atEnd (since := "2025-10-14")]
|
||
def atEnd : (@& String) → (@& Pos.Raw) → Bool
|
||
| s, p => p.byteIdx ≥ utf8ByteSize s
|
||
|
||
/--
|
||
Returns the character at position `p` of a string. Returns `(default : Char)`, which is `'A'`, if
|
||
`p` is not a valid position.
|
||
|
||
Requires evidence, `h`, that `p` is within bounds instead of performing a run-time bounds check as
|
||
in `String.get`.
|
||
|
||
A typical pattern combines `get'` with a dependent `if`-expression to avoid the overhead of an
|
||
additional bounds check. For example:
|
||
```
|
||
def getInBounds? (s : String) (p : String.Pos) : Option Char :=
|
||
if h : s.atEnd p then none else some (s.get' p h)
|
||
```
|
||
Even with evidence of `¬ s.atEnd p`, `p` may be invalid if a byte index points into the middle of a
|
||
multi-byte UTF-8 character. For example, `"L∃∀N".get' ⟨2⟩ (by decide) = (default : Char)`.
|
||
|
||
This is a legacy function. The recommended alternative is `String.ValidPos.get`, combined with
|
||
`String.pos` or another means of obtaining a `String.ValidPos`.
|
||
|
||
Examples:
|
||
* `"abc".get' 0 (by decide) = 'a'`
|
||
* `let lean := "L∃∀N"; lean.get' (0 |> lean.next |> lean.next) (by decide) = '∀'`
|
||
-/
|
||
@[extern "lean_string_utf8_get_fast", expose]
|
||
def Pos.Raw.get' (s : @& String) (p : @& Pos.Raw) (h : ¬ p.atEnd s) : Char :=
|
||
match s with
|
||
| s => Pos.Raw.utf8GetAux s.data 0 p
|
||
|
||
@[extern "lean_string_utf8_get_fast", expose, deprecated Pos.Raw.get' (since := "2025-10-14")]
|
||
def get' (s : @& String) (p : @& Pos.Raw) (h : ¬ p.atEnd s) : Char :=
|
||
match s with
|
||
| s => Pos.Raw.utf8GetAux s.data 0 p
|
||
|
||
/--
|
||
Returns the next position in a string after position `p`. The result is unspecified if `p` is not a
|
||
valid position.
|
||
|
||
Requires evidence, `h`, that `p` is within bounds. No run-time bounds check is performed, as in
|
||
`String.next`.
|
||
|
||
A typical pattern combines `String.next'` with a dependent `if`-expression to avoid the overhead of
|
||
an additional bounds check. For example:
|
||
```
|
||
def next? (s : String) (p : String.Pos) : Option Char :=
|
||
if h : s.atEnd p then none else s.get (s.next' p h)
|
||
```
|
||
|
||
This is a legacy function. The recommended alternative is `String.ValidPos.next`, combined with
|
||
`String.pos` or another means of obtaining a `String.ValidPos`.
|
||
|
||
Example:
|
||
* `let abc := "abc"; abc.get (abc.next' 0 (by decide)) = 'b'`
|
||
-/
|
||
@[extern "lean_string_utf8_next_fast", expose]
|
||
def Pos.Raw.next' (s : @& String) (p : @& Pos.Raw) (h : ¬ p.atEnd s) : Pos.Raw :=
|
||
let c := p.get s
|
||
p + c
|
||
|
||
@[extern "lean_string_utf8_next_fast", expose, deprecated Pos.Raw.next' (since := "2025-10-14")]
|
||
def next' (s : @& String) (p : @& Pos.Raw) (h : ¬ p.atEnd s) : Pos.Raw :=
|
||
let c := p.get s
|
||
p + c
|
||
|
||
@[deprecated Pos.Raw.lt_iff (since := "2025-10-10")]
|
||
theorem pos_lt_eq (p₁ p₂ : Pos.Raw) : (p₁ < p₂) = (p₁.1 < p₂.1) :=
|
||
propext Pos.Raw.lt_iff
|
||
|
||
@[deprecated Pos.Raw.byteIdx_add_char (since := "2025-10-10")]
|
||
theorem pos_add_char (p : Pos.Raw) (c : Char) : (p + c).byteIdx = p.byteIdx + c.utf8Size :=
|
||
Pos.Raw.byteIdx_add_char
|
||
|
||
protected theorem Pos.Raw.ne_zero_of_lt : {a b : Pos.Raw} → a < b → b ≠ 0
|
||
| _, _, hlt, rfl => Nat.not_lt_zero _ hlt
|
||
|
||
theorem Pos.Raw.lt_next (s : String) (i : Pos.Raw) : i < i.next s :=
|
||
Nat.add_lt_add_left (Char.utf8Size_pos _) _
|
||
|
||
theorem Pos.Raw.byteIdx_lt_byteIdx_next (s : String) (i : Pos.Raw) : i.1 < (i.next s).1 :=
|
||
lt_iff.1 (i.lt_next s)
|
||
|
||
@[deprecated Pos.Raw.byteIdx_lt_byteIdx_next (since := "2025-10-10")]
|
||
theorem lt_next (s : String) (i : Pos.Raw) : i.1 < (i.next s).1 :=
|
||
Pos.Raw.lt_next s i
|
||
|
||
theorem Pos.Raw.utf8PrevAux_lt_of_pos : ∀ (cs : List Char) (i p : Pos.Raw), i < p → p ≠ 0 →
|
||
(Pos.Raw.utf8PrevAux cs i p).1 < p.1
|
||
| [], _, _, _, h => Nat.sub_one_lt (mt (congrArg Pos.Raw.mk) h)
|
||
| c::cs, i, p, h, h' => by
|
||
simp [utf8PrevAux]
|
||
apply iteInduction (motive := (Pos.Raw.byteIdx · < _)) <;> intro h''
|
||
next => exact h
|
||
next => exact utf8PrevAux_lt_of_pos _ _ _ (Nat.lt_of_not_le h'') h'
|
||
|
||
theorem Pos.Raw.prev_lt_of_pos (s : String) (i : Pos.Raw) (h : i ≠ 0) : (i.prev s).1 < i.1 :=
|
||
utf8PrevAux_lt_of_pos _ _ _ (Nat.zero_lt_of_ne_zero (mt (congrArg Pos.Raw.mk) h)) h
|
||
|
||
@[deprecated Pos.Raw.prev_lt_of_pos (since := "2025-10-10")]
|
||
theorem prev_lt_of_pos (s : String) (i : Pos.Raw) (h : i ≠ 0) : (i.prev s).1 < i.1 :=
|
||
Pos.Raw.prev_lt_of_pos s i h
|
||
|
||
def posOfAux (s : String) (c : Char) (stopPos : Pos.Raw) (pos : Pos.Raw) : Pos.Raw :=
|
||
if h : pos < stopPos then
|
||
if pos.get s == c then pos
|
||
else
|
||
have := Nat.sub_lt_sub_left h (Pos.Raw.lt_next s pos)
|
||
posOfAux s c stopPos (pos.next s)
|
||
else pos
|
||
termination_by stopPos.1 - pos.1
|
||
|
||
/--
|
||
Returns the position of the first occurrence of a character, `c`, in a string `s`. If `s` does not
|
||
contain `c`, returns `s.endPos`.
|
||
|
||
Examples:
|
||
* `"abcba".posOf 'a' = ⟨0⟩`
|
||
* `"abcba".posOf 'z' = ⟨5⟩`
|
||
* `"L∃∀N".posOf '∀' = ⟨4⟩`
|
||
-/
|
||
@[inline] def posOf (s : String) (c : Char) : Pos.Raw :=
|
||
posOfAux s c s.endPos 0
|
||
|
||
@[export lean_string_posof]
|
||
def Internal.posOfImpl (s : String) (c : Char) : Pos.Raw :=
|
||
String.posOf s c
|
||
|
||
def revPosOfAux (s : String) (c : Char) (pos : Pos.Raw) : Option Pos.Raw :=
|
||
if h : pos = 0 then none
|
||
else
|
||
have := Pos.Raw.prev_lt_of_pos s pos h
|
||
let pos := pos.prev s
|
||
if pos.get s == c then some pos
|
||
else revPosOfAux s c pos
|
||
termination_by pos.1
|
||
|
||
/--
|
||
Returns the position of the last occurrence of a character, `c`, in a string `s`. If `s` does not
|
||
contain `c`, returns `none`.
|
||
|
||
Examples:
|
||
* `"abcabc".revPosOf 'a' = some ⟨3⟩`
|
||
* `"abcabc".revPosOf 'z' = none`
|
||
* `"L∃∀N".revPosOf '∀' = some ⟨4⟩`
|
||
-/
|
||
@[inline] def revPosOf (s : String) (c : Char) : Option Pos.Raw :=
|
||
revPosOfAux s c s.endPos
|
||
|
||
def findAux (s : String) (p : Char → Bool) (stopPos : Pos.Raw) (pos : Pos.Raw) : Pos.Raw :=
|
||
if h : pos < stopPos then
|
||
if p (pos.get s) then pos
|
||
else
|
||
have := Nat.sub_lt_sub_left h (Pos.Raw.lt_next s pos)
|
||
findAux s p stopPos (pos.next s)
|
||
else pos
|
||
termination_by stopPos.1 - pos.1
|
||
|
||
/--
|
||
Finds the position of the first character in a string for which the Boolean predicate `p` returns
|
||
`true`. If there is no such character in the string, then the end position of the string is
|
||
returned.
|
||
|
||
Examples:
|
||
* `"coffee tea water".find (·.isWhitespace) = ⟨6⟩`
|
||
* `"tea".find (· == 'X') = ⟨3⟩`
|
||
* `"".find (· == 'X') = ⟨0⟩`
|
||
-/
|
||
@[inline] def find (s : String) (p : Char → Bool) : Pos.Raw :=
|
||
findAux s p s.endPos 0
|
||
|
||
def revFindAux (s : String) (p : Char → Bool) (pos : Pos.Raw) : Option Pos.Raw :=
|
||
if h : pos = 0 then none
|
||
else
|
||
have := Pos.Raw.prev_lt_of_pos s pos h
|
||
let pos := pos.prev s
|
||
if p (pos.get s) then some pos
|
||
else revFindAux s p pos
|
||
termination_by pos.1
|
||
|
||
/--
|
||
Finds the position of the last character in a string for which the Boolean predicate `p` returns
|
||
`true`. If there is no such character in the string, then `none` is returned.
|
||
|
||
Examples:
|
||
* `"coffee tea water".revFind (·.isWhitespace) = some ⟨10⟩`
|
||
* `"tea".revFind (· == 'X') = none`
|
||
* `"".revFind (· == 'X') = none`
|
||
-/
|
||
@[inline] def revFind (s : String) (p : Char → Bool) : Option Pos.Raw :=
|
||
revFindAux s p s.endPos
|
||
|
||
/--
|
||
Returns either `p₁` or `p₂`, whichever has the least byte index.
|
||
-/
|
||
abbrev Pos.Raw.min (p₁ p₂ : Pos.Raw) : Pos.Raw :=
|
||
{ byteIdx := p₁.byteIdx.min p₂.byteIdx }
|
||
|
||
@[export lean_string_pos_min]
|
||
def Pos.Raw.Internal.minImpl (p₁ p₂ : Pos.Raw) : Pos.Raw :=
|
||
Pos.Raw.min p₁ p₂
|
||
|
||
/--
|
||
Returns the first position where the two strings differ.
|
||
|
||
If one string is a prefix of the other, then the returned position is the end position of the
|
||
shorter string. If the strings are identical, then their end position is returned.
|
||
|
||
Examples:
|
||
* `"tea".firstDiffPos "ten" = ⟨2⟩`
|
||
* `"tea".firstDiffPos "tea" = ⟨3⟩`
|
||
* `"tea".firstDiffPos "teas" = ⟨3⟩`
|
||
* `"teas".firstDiffPos "tea" = ⟨3⟩`
|
||
-/
|
||
@[expose]
|
||
def firstDiffPos (a b : String) : Pos.Raw :=
|
||
let stopPos := a.endPos.min b.endPos
|
||
let rec loop (i : Pos.Raw) : Pos.Raw :=
|
||
if h : i < stopPos then
|
||
if i.get a != i.get b then i
|
||
else
|
||
have := Nat.sub_lt_sub_left h (Pos.Raw.lt_next a i)
|
||
loop (i.next a)
|
||
else i
|
||
termination_by stopPos.1 - i.1
|
||
loop 0
|
||
|
||
/--
|
||
Creates a new string that consists of the region of the input string delimited by the two positions.
|
||
|
||
The result is `""` if the start position is greater than or equal to the end position or if the
|
||
start position is at the end of the string. If either position is invalid (that is, if either points
|
||
at the middle of a multi-byte UTF-8 character) then the result is unspecified.
|
||
|
||
This is a legacy function. The recommended alternative is `String.ValidPos.extract`, but usually
|
||
it is even better to operate on `String.Slice` instead and call `String.Slice.copy` (only) if
|
||
required.
|
||
|
||
Examples:
|
||
* `"red green blue".extract ⟨0⟩ ⟨3⟩ = "red"`
|
||
* `"red green blue".extract ⟨3⟩ ⟨0⟩ = ""`
|
||
* `"red green blue".extract ⟨0⟩ ⟨100⟩ = "red green blue"`
|
||
* `"red green blue".extract ⟨4⟩ ⟨100⟩ = "green blue"`
|
||
* `"L∃∀N".extract ⟨1⟩ ⟨2⟩ = "∃∀N"`
|
||
* `"L∃∀N".extract ⟨2⟩ ⟨100⟩ = ""`
|
||
-/
|
||
@[extern "lean_string_utf8_extract", expose]
|
||
def Pos.Raw.extract : (@& String) → (@& Pos.Raw) → (@& Pos.Raw) → String
|
||
| s, b, e => if b.byteIdx ≥ e.byteIdx then "" else (go₁ s.data 0 b e).asString
|
||
where
|
||
go₁ : List Char → Pos.Raw → Pos.Raw → Pos.Raw → List Char
|
||
| [], _, _, _ => []
|
||
| s@(c::cs), i, b, e => if i = b then go₂ s i e else go₁ cs (i + c) b e
|
||
|
||
go₂ : List Char → Pos.Raw → Pos.Raw → List Char
|
||
| [], _, _ => []
|
||
| c::cs, i, e => if i = e then [] else c :: go₂ cs (i + c) e
|
||
|
||
@[extern "lean_string_utf8_extract", expose, deprecated Pos.Raw.extract (since := "2025-10-14")]
|
||
def extract : (@& String) → (@& Pos.Raw) → (@& Pos.Raw) → String
|
||
| s, b, e => Pos.Raw.extract s b e
|
||
|
||
@[specialize] def splitAux (s : String) (p : Char → Bool) (b : Pos.Raw) (i : Pos.Raw) (r : List String) : List String :=
|
||
if h : i.atEnd s then
|
||
let r := (b.extract s i)::r
|
||
r.reverse
|
||
else
|
||
have := Nat.sub_lt_sub_left (Nat.gt_of_not_le (mt decide_eq_true h)) (Pos.Raw.lt_next s _)
|
||
if p (i.get s) then
|
||
let i' := i.next s
|
||
splitAux s p i' i' (b.extract s i :: r)
|
||
else
|
||
splitAux s p b (i.next s) r
|
||
termination_by s.endPos.1 - i.1
|
||
|
||
/--
|
||
Splits a string at each character for which `p` returns `true`.
|
||
|
||
The characters that satisfy `p` are not included in any of the resulting strings. If multiple
|
||
characters in a row satisfy `p`, then the resulting list will contain empty strings.
|
||
|
||
Examples:
|
||
* `"coffee tea water".split (·.isWhitespace) = ["coffee", "tea", "water"]`
|
||
* `"coffee tea water".split (·.isWhitespace) = ["coffee", "", "tea", "", "water"]`
|
||
* `"fun x =>\n x + 1\n".split (· == '\n') = ["fun x =>", " x + 1", ""]`
|
||
-/
|
||
@[inline] def splitToList (s : String) (p : Char → Bool) : List String :=
|
||
splitAux s p 0 0 []
|
||
|
||
@[inline, deprecated splitToList (since := "2025-10-17")]
|
||
def split (s : String) (p : Char → Bool) : List String :=
|
||
splitToList s p
|
||
|
||
/--
|
||
Auxiliary for `splitOn`. Preconditions:
|
||
* `sep` is not empty
|
||
* `b <= i` are indexes into `s`
|
||
* `j` is an index into `sep`, and not at the end
|
||
|
||
It represents the state where we have currently parsed some split parts into `r` (in reverse order),
|
||
`b` is the beginning of the string / the end of the previous match of `sep`, and the first `j` bytes
|
||
of `sep` match the bytes `i-j .. i` of `s`.
|
||
-/
|
||
def splitOnAux (s sep : String) (b : Pos.Raw) (i : Pos.Raw) (j : Pos.Raw) (r : List String) : List String :=
|
||
if i.atEnd s then
|
||
let r := (b.extract s i)::r
|
||
r.reverse
|
||
else
|
||
if i.get s == j.get sep then
|
||
let i := i.next s
|
||
let j := j.next sep
|
||
if j.atEnd sep then
|
||
splitOnAux s sep i i 0 (b.extract s (i.unoffsetBy j)::r)
|
||
else
|
||
splitOnAux s sep b i j r
|
||
else
|
||
splitOnAux s sep b ((i.unoffsetBy j).next s) 0 r
|
||
termination_by (s.endPos.1 - (j.byteDistance i), sep.endPos.1 - j.1)
|
||
decreasing_by
|
||
focus
|
||
rename_i h _ _
|
||
left; exact Nat.sub_lt_sub_left
|
||
(Nat.lt_of_le_of_lt (Nat.sub_le ..) (Nat.gt_of_not_le (mt decide_eq_true h)))
|
||
(Nat.lt_of_le_of_lt (Nat.sub_le ..) (Pos.Raw.lt_next s _))
|
||
focus
|
||
rename_i i₀ j₀ _ eq h'
|
||
rw [show (j₀.next sep).byteDistance (i₀.next s) = j₀.byteDistance i₀ by
|
||
change (_ + Char.utf8Size _) - (_ + Char.utf8Size _) = _
|
||
rw [(beq_iff_eq ..).1 eq, Nat.add_sub_add_right]; rfl]
|
||
right; exact Nat.sub_lt_sub_left
|
||
(Nat.lt_of_le_of_lt (Nat.le_add_right ..) (Nat.gt_of_not_le (mt decide_eq_true h')))
|
||
(Pos.Raw.lt_next sep _)
|
||
focus
|
||
rename_i h _
|
||
left; exact Nat.sub_lt_sub_left
|
||
(Nat.lt_of_le_of_lt (Nat.sub_le ..) (Nat.gt_of_not_le (mt decide_eq_true h)))
|
||
(Pos.Raw.lt_next s _)
|
||
|
||
/--
|
||
Splits a string `s` on occurrences of the separator string `sep`. The default separator is `" "`.
|
||
|
||
When `sep` is empty, the result is `[s]`. When `sep` occurs in overlapping patterns, the first match
|
||
is taken. There will always be exactly `n+1` elements in the returned list if there were `n`
|
||
non-overlapping matches of `sep` in the string. The separators are not included in the returned
|
||
substrings.
|
||
|
||
Examples:
|
||
* `"here is some text ".splitOn = ["here", "is", "some", "text", ""]`
|
||
* `"here is some text ".splitOn "some" = ["here is ", " text "]`
|
||
* `"here is some text ".splitOn "" = ["here is some text "]`
|
||
* `"ababacabac".splitOn "aba" = ["", "bac", "c"]`
|
||
-/
|
||
@[inline] def splitOn (s : String) (sep : String := " ") : List String :=
|
||
if sep == "" then [s] else splitOnAux s sep 0 0 0 []
|
||
|
||
instance : Inhabited String := ⟨""⟩
|
||
|
||
instance : Append String := ⟨String.append⟩
|
||
|
||
/--
|
||
Adds multiple repetitions of a character to the end of a string.
|
||
|
||
Returns `s`, with `n` repetitions of `c` at the end. Internally, the implementation repeatedly calls
|
||
`String.push`, so the string is modified in-place if there is a unique reference to it.
|
||
|
||
Examples:
|
||
* `"indeed".pushn '!' 2 = "indeed!!"`
|
||
* `"indeed".pushn '!' 0 = "indeed"`
|
||
* `"".pushn ' ' 4 = " "`
|
||
-/
|
||
@[inline] def pushn (s : String) (c : Char) (n : Nat) : String :=
|
||
n.repeat (fun s => s.push c) s
|
||
|
||
@[export lean_string_pushn]
|
||
def Internal.pushnImpl (s : String) (c : Char) (n : Nat) : String :=
|
||
String.pushn s c n
|
||
|
||
/--
|
||
Checks whether a string is empty.
|
||
|
||
Empty strings are equal to `""` and have length and end position `0`.
|
||
|
||
Examples:
|
||
* `"".isEmpty = true`
|
||
* `"empty".isEmpty = false`
|
||
* `" ".isEmpty = false`
|
||
-/
|
||
@[inline] def isEmpty (s : String) : Bool :=
|
||
s.endPos == 0
|
||
|
||
@[export lean_string_isempty]
|
||
def Internal.isEmptyImpl (s : String) : Bool :=
|
||
String.isEmpty s
|
||
|
||
/--
|
||
Appends all the strings in a list of strings, in order.
|
||
|
||
Use `String.intercalate` to place a separator string between the strings in a list.
|
||
|
||
Examples:
|
||
* `String.join ["gr", "ee", "n"] = "green"`
|
||
* `String.join ["b", "", "l", "", "ue"] = "blue"`
|
||
* `String.join [] = ""`
|
||
-/
|
||
@[inline] def join (l : List String) : String :=
|
||
l.foldl (fun r s => r ++ s) ""
|
||
|
||
/--
|
||
Appends the strings in a list of strings, placing the separator `s` between each pair.
|
||
|
||
Examples:
|
||
* `", ".intercalate ["red", "green", "blue"] = "red, green, blue"`
|
||
* `" and ".intercalate ["tea", "coffee"] = "tea and coffee"`
|
||
* `" | ".intercalate ["M", "", "N"] = "M | | N"`
|
||
-/
|
||
def intercalate (s : String) : List String → String
|
||
| [] => ""
|
||
| a :: as => go a s as
|
||
where go (acc : String) (s : String) : List String → String
|
||
| a :: as => go (acc ++ s ++ a) s as
|
||
| [] => acc
|
||
|
||
@[export lean_string_intercalate]
|
||
def Internal.intercalateImpl (s : String) : List String → String :=
|
||
String.intercalate s
|
||
|
||
/--
|
||
An iterator over the characters (Unicode code points) in a `String`. Typically created by
|
||
`String.iter`.
|
||
|
||
String iterators pair a string with a valid byte index. This allows efficient character-by-character
|
||
processing of strings while avoiding the need to manually ensure that byte indices are used with the
|
||
correct strings.
|
||
|
||
An iterator is *valid* if the position `i` is *valid* for the string `s`, meaning `0 ≤ i ≤ s.endPos`
|
||
and `i` lies on a UTF8 byte boundary. If `i = s.endPos`, the iterator is at the end of the string.
|
||
|
||
Most operations on iterators return unspecified values if the iterator is not valid. The functions
|
||
in the `String.Iterator` API rule out the creation of invalid iterators, with two exceptions:
|
||
- `Iterator.next iter` is invalid if `iter` is already at the end of the string (`iter.atEnd` is
|
||
`true`), and
|
||
- `Iterator.forward iter n`/`Iterator.nextn iter n` is invalid if `n` is strictly greater than the
|
||
number of remaining characters.
|
||
-/
|
||
structure Iterator where
|
||
/-- The string being iterated over. -/
|
||
s : String
|
||
/-- The current UTF-8 byte position in the string `s`.
|
||
|
||
This position is not guaranteed to be valid for the string. If the position is not valid, then the
|
||
current character is `(default : Char)`, similar to `String.get` on an invalid position.
|
||
-/
|
||
i : Pos.Raw
|
||
deriving DecidableEq, Inhabited
|
||
|
||
/-- Creates an iterator at the beginning of the string. -/
|
||
@[inline] def mkIterator (s : String) : Iterator :=
|
||
⟨s, 0⟩
|
||
|
||
@[inherit_doc mkIterator]
|
||
abbrev iter := mkIterator
|
||
|
||
/--
|
||
The size of a string iterator is the number of bytes remaining.
|
||
|
||
Recursive functions that iterate towards the end of a string will typically decrease this measure.
|
||
-/
|
||
instance : SizeOf String.Iterator where
|
||
sizeOf i := i.1.utf8ByteSize - i.2.byteIdx
|
||
|
||
theorem Iterator.sizeOf_eq (i : String.Iterator) : sizeOf i = i.1.utf8ByteSize - i.2.byteIdx :=
|
||
rfl
|
||
|
||
namespace Iterator
|
||
@[inline, inherit_doc Iterator.s]
|
||
def toString := Iterator.s
|
||
|
||
/--
|
||
The number of UTF-8 bytes remaining in the iterator.
|
||
-/
|
||
@[inline] def remainingBytes : Iterator → Nat
|
||
| ⟨s, i⟩ => s.endPos.byteIdx - i.byteIdx
|
||
|
||
@[inline, inherit_doc Iterator.i]
|
||
def pos := Iterator.i
|
||
|
||
/--
|
||
Gets the character at the iterator's current position.
|
||
|
||
A run-time bounds check is performed. Use `String.Iterator.curr'` to avoid redundant bounds checks.
|
||
|
||
If the position is invalid, returns `(default : Char)`.
|
||
-/
|
||
@[inline] def curr : Iterator → Char
|
||
| ⟨s, i⟩ => i.get s
|
||
|
||
/--
|
||
Moves the iterator's position forward by one character, unconditionally.
|
||
|
||
It is only valid to call this function if the iterator is not at the end of the string (i.e.
|
||
if `Iterator.atEnd` is `false`); otherwise, the resulting iterator will be invalid.
|
||
-/
|
||
@[inline] def next : Iterator → Iterator
|
||
| ⟨s, i⟩ => ⟨s, i.next s⟩
|
||
|
||
/--
|
||
Moves the iterator's position backward by one character, unconditionally.
|
||
|
||
The position is not changed if the iterator is at the beginning of the string.
|
||
-/
|
||
@[inline] def prev : Iterator → Iterator
|
||
| ⟨s, i⟩ => ⟨s, i.prev s⟩
|
||
|
||
/--
|
||
Checks whether the iterator is past its string's last character.
|
||
-/
|
||
@[inline] def atEnd : Iterator → Bool
|
||
| ⟨s, i⟩ => i.byteIdx ≥ s.endPos.byteIdx
|
||
|
||
/--
|
||
Checks whether the iterator is at or before the string's last character.
|
||
-/
|
||
@[inline] def hasNext : Iterator → Bool
|
||
| ⟨s, i⟩ => i.byteIdx < s.endPos.byteIdx
|
||
|
||
/--
|
||
Checks whether the iterator is after the beginning of the string.
|
||
-/
|
||
@[inline] def hasPrev : Iterator → Bool
|
||
| ⟨_, i⟩ => i.byteIdx > 0
|
||
|
||
/--
|
||
Gets the character at the iterator's current position.
|
||
|
||
The proof of `it.hasNext` ensures that there is, in fact, a character at the current position. This
|
||
function is faster that `String.Iterator.curr` due to avoiding a run-time bounds check.
|
||
-/
|
||
@[inline] def curr' (it : Iterator) (h : it.hasNext) : Char :=
|
||
match it with
|
||
| ⟨s, i⟩ => i.get' s (by simpa only [hasNext, endPos, decide_eq_true_eq, Pos.Raw.atEnd, ge_iff_le, Nat.not_le] using h)
|
||
|
||
/--
|
||
Moves the iterator's position forward by one character, unconditionally.
|
||
|
||
The proof of `it.hasNext` ensures that there is, in fact, a position that's one character forwards.
|
||
This function is faster that `String.Iterator.next` due to avoiding a run-time bounds check.
|
||
-/
|
||
@[inline] def next' (it : Iterator) (h : it.hasNext) : Iterator :=
|
||
match it with
|
||
| ⟨s, i⟩ => ⟨s, i.next' s (by simpa only [hasNext, endPos, decide_eq_true_eq, Pos.Raw.atEnd, ge_iff_le, Nat.not_le] using h)⟩
|
||
|
||
/--
|
||
Replaces the current character in the string.
|
||
|
||
Does nothing if the iterator is at the end of the string. If both the replacement character and the
|
||
replaced character are 7-bit ASCII characters and the string is not shared, then it is updated
|
||
in-place and not copied.
|
||
-/
|
||
@[inline] def setCurr : Iterator → Char → Iterator
|
||
| ⟨s, i⟩, c => ⟨i.set s c, i⟩
|
||
|
||
/--
|
||
Moves the iterator's position to the end of the string, just past the last character.
|
||
-/
|
||
@[inline] def toEnd : Iterator → Iterator
|
||
| ⟨s, _⟩ => ⟨s, s.endPos⟩
|
||
|
||
/--
|
||
Extracts the substring between the positions of two iterators. The first iterator's position is the
|
||
start of the substring, and the second iterator's position is the end.
|
||
|
||
Returns the empty string if the iterators are for different strings, or if the position of the first
|
||
iterator is past the position of the second iterator.
|
||
-/
|
||
@[inline] def extract : Iterator → Iterator → String
|
||
| ⟨s₁, b⟩, ⟨s₂, e⟩ =>
|
||
if s₁ ≠ s₂ || b > e then ""
|
||
else b.extract s₁ e
|
||
|
||
/--
|
||
Moves the iterator's position forward by the specified number of characters.
|
||
|
||
The resulting iterator is only valid if the number of characters to skip is less than or equal
|
||
to the number of characters left in the iterator.
|
||
-/
|
||
def forward : Iterator → Nat → Iterator
|
||
| it, 0 => it
|
||
| it, n+1 => forward it.next n
|
||
|
||
/--
|
||
The remaining characters in an iterator, as a string.
|
||
-/
|
||
@[inline] def remainingToString : Iterator → String
|
||
| ⟨s, i⟩ => i.extract s s.endPos
|
||
|
||
@[inherit_doc forward]
|
||
def nextn : Iterator → Nat → Iterator
|
||
| it, 0 => it
|
||
| it, i+1 => nextn it.next i
|
||
|
||
/--
|
||
Moves the iterator's position back by the specified number of characters, stopping at the beginning
|
||
of the string.
|
||
-/
|
||
def prevn : Iterator → Nat → Iterator
|
||
| it, 0 => it
|
||
| it, i+1 => prevn it.prev i
|
||
|
||
end Iterator
|
||
|
||
def offsetOfPosAux (s : String) (pos : Pos.Raw) (i : Pos.Raw) (offset : Nat) : Nat :=
|
||
if i >= pos then offset
|
||
else if h : i.atEnd s then
|
||
offset
|
||
else
|
||
have := Nat.sub_lt_sub_left (Nat.gt_of_not_le (mt decide_eq_true h)) (Pos.Raw.lt_next s _)
|
||
offsetOfPosAux s pos (i.next s) (offset+1)
|
||
termination_by s.endPos.1 - i.1
|
||
|
||
/--
|
||
Returns the character index that corresponds to the provided position (i.e. UTF-8 byte index) in a
|
||
string.
|
||
|
||
If the position is at the end of the string, then the string's length in characters is returned. If
|
||
the position is invalid due to pointing at the middle of a UTF-8 byte sequence, then the character
|
||
index of the next character after the position is returned.
|
||
|
||
Examples:
|
||
* `"L∃∀N".offsetOfPos ⟨0⟩ = 0`
|
||
* `"L∃∀N".offsetOfPos ⟨1⟩ = 1`
|
||
* `"L∃∀N".offsetOfPos ⟨2⟩ = 2`
|
||
* `"L∃∀N".offsetOfPos ⟨4⟩ = 2`
|
||
* `"L∃∀N".offsetOfPos ⟨5⟩ = 3`
|
||
* `"L∃∀N".offsetOfPos ⟨50⟩ = 4`
|
||
-/
|
||
@[inline] def offsetOfPos (s : String) (pos : Pos.Raw) : Nat :=
|
||
offsetOfPosAux s pos 0 0
|
||
|
||
@[export lean_string_offsetofpos]
|
||
def Internal.offsetOfPosImpl (s : String) (pos : Pos.Raw) : Nat :=
|
||
String.offsetOfPos s pos
|
||
|
||
@[specialize] def foldlAux {α : Type u} (f : α → Char → α) (s : String) (stopPos : Pos.Raw) (i : Pos.Raw) (a : α) : α :=
|
||
if h : i < stopPos then
|
||
have := Nat.sub_lt_sub_left h (Pos.Raw.lt_next s i)
|
||
foldlAux f s stopPos (i.next s) (f a (i.get s))
|
||
else a
|
||
termination_by stopPos.1 - i.1
|
||
|
||
/--
|
||
Folds a function over a string from the left, accumulating a value starting with `init`. The
|
||
accumulated value is combined with each character in order, using `f`.
|
||
|
||
Examples:
|
||
* `"coffee tea water".foldl (fun n c => if c.isWhitespace then n + 1 else n) 0 = 2`
|
||
* `"coffee tea and water".foldl (fun n c => if c.isWhitespace then n + 1 else n) 0 = 3`
|
||
* `"coffee tea water".foldl (·.push ·) "" = "coffee tea water"`
|
||
-/
|
||
@[inline] def foldl {α : Type u} (f : α → Char → α) (init : α) (s : String) : α :=
|
||
foldlAux f s s.endPos 0 init
|
||
|
||
@[export lean_string_foldl]
|
||
def Internal.foldlImpl (f : String → Char → String) (init : String) (s : String) : String :=
|
||
String.foldl f init s
|
||
|
||
@[specialize] def foldrAux {α : Type u} (f : Char → α → α) (a : α) (s : String) (i begPos : Pos.Raw) : α :=
|
||
if h : begPos < i then
|
||
have := Pos.Raw.prev_lt_of_pos s i <| mt (congrArg String.Pos.Raw.byteIdx) <|
|
||
Ne.symm <| Nat.ne_of_lt <| Nat.lt_of_le_of_lt (Nat.zero_le _) h
|
||
let i := i.prev s
|
||
let a := f (i.get s) a
|
||
foldrAux f a s i begPos
|
||
else a
|
||
termination_by i.1
|
||
|
||
/--
|
||
Folds a function over a string from the right, accumulating a value starting with `init`. The
|
||
accumulated value is combined with each character in reverse order, using `f`.
|
||
|
||
Examples:
|
||
* `"coffee tea water".foldr (fun c n => if c.isWhitespace then n + 1 else n) 0 = 2`
|
||
* `"coffee tea and water".foldr (fun c n => if c.isWhitespace then n + 1 else n) 0 = 3`
|
||
* `"coffee tea water".foldr (fun c s => c.push s) "" = "retaw dna aet eeffoc"`
|
||
-/
|
||
@[inline] def foldr {α : Type u} (f : Char → α → α) (init : α) (s : String) : α :=
|
||
foldrAux f init s s.endPos 0
|
||
|
||
@[specialize] def anyAux (s : String) (stopPos : Pos.Raw) (p : Char → Bool) (i : Pos.Raw) : Bool :=
|
||
if h : i < stopPos then
|
||
if p (i.get s) then true
|
||
else
|
||
have := Nat.sub_lt_sub_left h (Pos.Raw.lt_next s i)
|
||
anyAux s stopPos p (i.next s)
|
||
else false
|
||
termination_by stopPos.1 - i.1
|
||
|
||
/--
|
||
Checks whether there is a character in a string for which the Boolean predicate `p` returns `true`.
|
||
|
||
Short-circuits at the first character for which `p` returns `true`.
|
||
|
||
Examples:
|
||
* `"brown".any (·.isLetter) = true`
|
||
* `"brown".any (·.isWhitespace) = false`
|
||
* `"brown and orange".any (·.isLetter) = true`
|
||
* `"".any (fun _ => false) = false`
|
||
-/
|
||
@[inline] def any (s : String) (p : Char → Bool) : Bool :=
|
||
anyAux s s.endPos p 0
|
||
|
||
@[export lean_string_any]
|
||
def Internal.anyImpl (s : String) (p : Char → Bool) :=
|
||
String.any s p
|
||
|
||
/--
|
||
Checks whether the Boolean predicate `p` returns `true` for every character in a string.
|
||
|
||
Short-circuits at the first character for which `p` returns `false`.
|
||
|
||
Examples:
|
||
* `"brown".all (·.isLetter) = true`
|
||
* `"brown and orange".all (·.isLetter) = false`
|
||
* `"".all (fun _ => false) = true`
|
||
-/
|
||
@[inline] def all (s : String) (p : Char → Bool) : Bool :=
|
||
!s.any (fun c => !p c)
|
||
|
||
/--
|
||
Checks whether a string contains the specified character.
|
||
|
||
Examples:
|
||
* `"green".contains 'e' = true`
|
||
* `"green".contains 'x' = false`
|
||
* `"".contains 'x' = false`
|
||
-/
|
||
@[inline] def contains (s : String) (c : Char) : Bool :=
|
||
s.any (fun a => a == c)
|
||
|
||
@[export lean_string_contains]
|
||
def Internal.containsImpl (s : String) (c : Char) : Bool :=
|
||
String.contains s c
|
||
|
||
theorem Pos.Raw.utf8SetAux_of_gt (c' : Char) : ∀ (cs : List Char) {i p : Pos.Raw}, i > p → utf8SetAux c' cs i p = cs
|
||
| [], _, _, _ => rfl
|
||
| c::cs, i, p, h => by
|
||
rw [utf8SetAux, if_neg (mt (congrArg (·.1)) (Ne.symm <| Nat.ne_of_lt h)), utf8SetAux_of_gt c' cs]
|
||
exact Nat.lt_of_lt_of_le h (Nat.le_add_right ..)
|
||
|
||
theorem set_next_add (s : String) (i : Pos.Raw) (c : Char) (b₁ b₂)
|
||
(h : (i.next s).1 + b₁ = s.endPos.1 + b₂) :
|
||
(i.next (i.set s c)).1 + b₁ = (i.set s c).endPos.1 + b₂ := by
|
||
simp [Pos.Raw.next, Pos.Raw.get, Pos.Raw.set, endPos, ← utf8ByteSize'_eq, utf8ByteSize'] at h ⊢
|
||
rw [Nat.add_comm i.1, Nat.add_assoc] at h ⊢
|
||
let rec foo : ∀ cs a b₁ b₂,
|
||
(Pos.Raw.utf8GetAux cs a i).utf8Size + b₁ = utf8ByteSize'.go cs + b₂ →
|
||
(Pos.Raw.utf8GetAux (Pos.Raw.utf8SetAux c cs a i) a i).utf8Size + b₁ = utf8ByteSize'.go (Pos.Raw.utf8SetAux c cs a i) + b₂
|
||
| [], _, _, _, h => h
|
||
| c'::cs, a, b₁, b₂, h => by
|
||
unfold Pos.Raw.utf8SetAux
|
||
apply iteInduction (motive := fun p => (Pos.Raw.utf8GetAux p a i).utf8Size + b₁ = utf8ByteSize'.go p + b₂) <;>
|
||
intro h' <;> simp [Pos.Raw.utf8GetAux, h', utf8ByteSize'.go] at h ⊢
|
||
next =>
|
||
rw [Nat.add_assoc, Nat.add_left_comm] at h ⊢; rw [Nat.add_left_cancel h]
|
||
next =>
|
||
rw [Nat.add_assoc] at h ⊢
|
||
refine foo cs (a + c') b₁ (c'.utf8Size + b₂) h
|
||
exact foo s.data 0 _ _ h
|
||
|
||
theorem mapAux_lemma (s : String) (i : Pos.Raw) (c : Char) (h : ¬i.atEnd s) :
|
||
(i.set s c).endPos.1 - (i.next (i.set s c)).1 < s.endPos.1 - i.1 := by
|
||
suffices (i.set s c).endPos.1 - (i.next (i.set s c)).1 = s.endPos.1 - (i.next s).1 by
|
||
rw [this]
|
||
apply Nat.sub_lt_sub_left (Nat.gt_of_not_le (mt decide_eq_true h)) (Pos.Raw.lt_next ..)
|
||
have := set_next_add s i c (s.endPos.byteIdx - (i.next s).byteIdx) 0
|
||
have := set_next_add s i c 0 ((i.next s).byteIdx - s.endPos.byteIdx)
|
||
omega
|
||
|
||
@[specialize] def mapAux (f : Char → Char) (i : Pos.Raw) (s : String) : String :=
|
||
if h : i.atEnd s then s
|
||
else
|
||
let c := f (i.get s)
|
||
have := mapAux_lemma s i c h
|
||
let s := i.set s c
|
||
mapAux f (i.next s) s
|
||
termination_by s.endPos.1 - i.1
|
||
|
||
/--
|
||
Applies the function `f` to every character in a string, returning a string that contains the
|
||
resulting characters.
|
||
|
||
Examples:
|
||
* `"abc123".map Char.toUpper = "ABC123"`
|
||
* `"".map Char.toUpper = ""`
|
||
-/
|
||
@[inline] def map (f : Char → Char) (s : String) : String :=
|
||
mapAux f 0 s
|
||
|
||
/--
|
||
Checks whether the string can be interpreted as the decimal representation of a natural number.
|
||
|
||
A string can be interpreted as a decimal natural number if it is not empty and all the characters in
|
||
it are digits.
|
||
|
||
Use `String.toNat?` or `String.toNat!` to convert such a string to a natural number.
|
||
|
||
Examples:
|
||
* `"".isNat = false`
|
||
* `"0".isNat = true`
|
||
* `"5".isNat = true`
|
||
* `"05".isNat = true`
|
||
* `"587".isNat = true`
|
||
* `"-587".isNat = false`
|
||
* `" 5".isNat = false`
|
||
* `"2+3".isNat = false`
|
||
* `"0xff".isNat = false`
|
||
-/
|
||
@[inline] def isNat (s : String) : Bool :=
|
||
!s.isEmpty && s.all (·.isDigit)
|
||
|
||
/--
|
||
Interprets a string as the decimal representation of a natural number, returning it. Returns `none`
|
||
if the string does not contain a decimal natural number.
|
||
|
||
A string can be interpreted as a decimal natural number if it is not empty and all the characters in
|
||
it are digits.
|
||
|
||
Use `String.isNat` to check whether `String.toNat?` would return `some`. `String.toNat!` is an
|
||
alternative that panics instead of returning `none` when the string is not a natural number.
|
||
|
||
Examples:
|
||
* `"".toNat? = none`
|
||
* `"0".toNat? = some 0`
|
||
* `"5".toNat? = some 5`
|
||
* `"587".toNat? = some 587`
|
||
* `"-587".toNat? = none`
|
||
* `" 5".toNat? = none`
|
||
* `"2+3".toNat? = none`
|
||
* `"0xff".toNat? = none`
|
||
-/
|
||
def toNat? (s : String) : Option Nat :=
|
||
if s.isNat then
|
||
some <| s.foldl (fun n c => n*10 + (c.toNat - '0'.toNat)) 0
|
||
else
|
||
none
|
||
|
||
/--
|
||
Checks whether substrings of two strings are equal. Substrings are indicated by their starting
|
||
positions and a size in _UTF-8 bytes_. Returns `false` if the indicated substring does not exist in
|
||
either string.
|
||
|
||
This is a legacy function. The recommended alternative is to construct slices representing the
|
||
strings to be compared and use the `BEq` instance of `String.Slice`.
|
||
-/
|
||
def Pos.Raw.substrEq (s1 : String) (pos1 : String.Pos.Raw) (s2 : String) (pos2 : String.Pos.Raw) (sz : Nat) : Bool :=
|
||
pos1.byteIdx + sz ≤ s1.endPos.byteIdx && pos2.byteIdx + sz ≤ s2.endPos.byteIdx && loop pos1 pos2 { byteIdx := pos1.byteIdx + sz }
|
||
where
|
||
loop (off1 off2 stop1 : Pos.Raw) :=
|
||
if _h : off1.byteIdx < stop1.byteIdx then
|
||
let c₁ := off1.get s1
|
||
let c₂ := off2.get s2
|
||
c₁ == c₂ && loop (off1 + c₁) (off2 + c₂) stop1
|
||
else true
|
||
termination_by stop1.1 - off1.1
|
||
decreasing_by
|
||
have := Nat.sub_lt_sub_left _h (Nat.add_lt_add_left c₁.utf8Size_pos off1.1)
|
||
decreasing_tactic
|
||
|
||
@[deprecated Pos.Raw.substrEq (since := "2025-10-10")]
|
||
def substrEq (s1 : String) (pos1 : String.Pos.Raw) (s2 : String) (pos2 : String.Pos.Raw) (sz : Nat) : Bool :=
|
||
Pos.Raw.substrEq s1 pos1 s2 pos2 sz
|
||
|
||
/--
|
||
Checks whether the first string (`p`) is a prefix of the second (`s`).
|
||
|
||
`String.startsWith` is a version that takes the potential prefix after the string.
|
||
|
||
Examples:
|
||
* `"red".isPrefixOf "red green blue" = true`
|
||
* `"green".isPrefixOf "red green blue" = false`
|
||
* `"".isPrefixOf "red green blue" = true`
|
||
-/
|
||
def isPrefixOf (p : String) (s : String) : Bool :=
|
||
Pos.Raw.substrEq p 0 s 0 p.endPos.byteIdx
|
||
|
||
@[export lean_string_isprefixof]
|
||
def Internal.isPrefixOfImpl (p : String) (s : String) : Bool :=
|
||
String.isPrefixOf p s
|
||
|
||
/--
|
||
In the string `s`, replaces all occurrences of `pattern` with `replacement`.
|
||
|
||
Examples:
|
||
* `"red green blue".replace "e" "" = "rd grn blu"`
|
||
* `"red green blue".replace "ee" "E" = "red grEn blue"`
|
||
* `"red green blue".replace "e" "E" = "rEd grEEn bluE"`
|
||
-/
|
||
def replace (s pattern replacement : String) : String :=
|
||
if h : pattern.endPos.1 = 0 then s
|
||
else
|
||
have hPatt := Nat.zero_lt_of_ne_zero h
|
||
let rec loop (acc : String) (accStop pos : String.Pos.Raw) :=
|
||
if h : pos.byteIdx + pattern.endPos.byteIdx > s.endPos.byteIdx then
|
||
acc ++ accStop.extract s s.endPos
|
||
else
|
||
have := Nat.lt_of_lt_of_le (Nat.add_lt_add_left hPatt _) (Nat.ge_of_not_lt h)
|
||
if Pos.Raw.substrEq s pos pattern 0 pattern.endPos.byteIdx then
|
||
have := Nat.sub_lt_sub_left this (Nat.add_lt_add_left hPatt _)
|
||
loop (acc ++ accStop.extract s pos ++ replacement) (pos + pattern) (pos + pattern)
|
||
else
|
||
have := Nat.sub_lt_sub_left this (Pos.Raw.lt_next s pos)
|
||
loop acc accStop (pos.next s)
|
||
termination_by s.endPos.1 - pos.1
|
||
loop "" 0 0
|
||
|
||
/--
|
||
Returns the position of the beginning of the line that contains the position `pos`.
|
||
|
||
Lines are ended by `'\n'`, and the returned position is either `0 : String.Pos` or immediately after
|
||
a `'\n'` character.
|
||
-/
|
||
def findLineStart (s : String) (pos : String.Pos.Raw) : String.Pos.Raw :=
|
||
match s.revFindAux (· = '\n') pos with
|
||
| none => 0
|
||
| some n => ⟨n.byteIdx + 1⟩
|
||
|
||
end String
|
||
|
||
namespace Substring
|
||
|
||
/--
|
||
Checks whether a substring is empty.
|
||
|
||
A substring is empty if its start and end positions are the same.
|
||
-/
|
||
@[inline] def isEmpty (ss : Substring) : Bool :=
|
||
ss.bsize == 0
|
||
|
||
@[export lean_substring_isempty]
|
||
def Internal.isEmptyImpl (ss : Substring) : Bool :=
|
||
Substring.isEmpty ss
|
||
|
||
/--
|
||
Copies the region of the underlying string pointed to by a substring into a fresh string.
|
||
-/
|
||
@[inline] def toString : Substring → String
|
||
| ⟨s, b, e⟩ => b.extract s e
|
||
|
||
@[export lean_substring_tostring]
|
||
def Internal.toStringImpl : Substring → String :=
|
||
Substring.toString
|
||
|
||
/--
|
||
Returns an iterator into the underlying string, at the substring's starting position. The ending
|
||
position is discarded, so the iterator alone cannot be used to determine whether its current
|
||
position is within the original substring.
|
||
-/
|
||
@[inline] def toIterator : Substring → String.Iterator
|
||
| ⟨s, b, _⟩ => ⟨s, b⟩
|
||
|
||
/--
|
||
Returns the character at the given position in the substring.
|
||
|
||
The position is relative to the substring, rather than the underlying string, and no bounds checking
|
||
is performed with respect to the substring's end position. If the relative position is not a valid
|
||
position in the underlying string, the fallback value `(default : Char)`, which is `'A'`, is
|
||
returned. Does not panic.
|
||
-/
|
||
@[inline] def get : Substring → String.Pos.Raw → Char
|
||
| ⟨s, b, _⟩, p => (p.offsetBy b).get s
|
||
|
||
@[export lean_substring_get]
|
||
def Internal.getImpl : Substring → String.Pos.Raw → Char :=
|
||
Substring.get
|
||
|
||
/--
|
||
Returns the next position in a substring after the given position. If the position is at the end of
|
||
the substring, it is returned unmodified.
|
||
|
||
Both the input position and the returned position are interpreted relative to the substring's start
|
||
position, not the underlying string.
|
||
-/
|
||
@[inline] def next : Substring → String.Pos.Raw → String.Pos.Raw
|
||
| ⟨s, b, e⟩, p =>
|
||
let absP := p.offsetBy b
|
||
if absP = e then p else { byteIdx := (absP.next s).byteIdx - b.byteIdx }
|
||
|
||
theorem lt_next (s : Substring) (i : String.Pos.Raw) (h : i.1 < s.bsize) :
|
||
i.1 < (s.next i).1 := by
|
||
simp [next]; rw [if_neg ?a]
|
||
case a =>
|
||
refine mt (congrArg String.Pos.Raw.byteIdx) (Nat.ne_of_lt ?_)
|
||
exact (Nat.add_comm .. ▸ Nat.add_lt_of_lt_sub h :)
|
||
apply Nat.lt_sub_of_add_lt
|
||
rw [Nat.add_comm]; apply String.Pos.Raw.lt_next
|
||
|
||
/--
|
||
Returns the previous position in a substring, just prior to the given position. If the position is
|
||
at the beginning of the substring, it is returned unmodified.
|
||
|
||
Both the input position and the returned position are interpreted relative to the substring's start
|
||
position, not the underlying string.
|
||
-/
|
||
@[inline] def prev : Substring → String.Pos.Raw → String.Pos.Raw
|
||
| ⟨s, b, _⟩, p =>
|
||
let absP := p.offsetBy b
|
||
if absP = b then p else { byteIdx := (absP.prev s).byteIdx - b.byteIdx }
|
||
|
||
@[export lean_substring_prev]
|
||
def Internal.prevImpl : Substring → String.Pos.Raw → String.Pos.Raw :=
|
||
Substring.prev
|
||
|
||
/--
|
||
Returns the position that's the specified number of characters forward from the given position in a
|
||
substring. If the end position of the substring is reached, it is returned.
|
||
|
||
Both the input position and the returned position are interpreted relative to the substring's start
|
||
position, not the underlying string.
|
||
-/
|
||
def nextn : Substring → Nat → String.Pos.Raw → String.Pos.Raw
|
||
| _, 0, p => p
|
||
| ss, i+1, p => ss.nextn i (ss.next p)
|
||
|
||
/--
|
||
Returns the position that's the specified number of characters prior to the given position in a
|
||
substring. If the start position of the substring is reached, it is returned.
|
||
|
||
Both the input position and the returned position are interpreted relative to the substring's start
|
||
position, not the underlying string.
|
||
-/
|
||
def prevn : Substring → Nat → String.Pos.Raw → String.Pos.Raw
|
||
| _, 0, p => p
|
||
| ss, i+1, p => ss.prevn i (ss.prev p)
|
||
|
||
/--
|
||
Returns the first character in the substring.
|
||
|
||
If the substring is empty, but the substring's start position is a valid position in the underlying
|
||
string, then the character at the start position is returned. If the substring's start position is
|
||
not a valid position in the string, the fallback value `(default : Char)`, which is `'A'`, is
|
||
returned. Does not panic.
|
||
-/
|
||
@[inline, expose] def front (s : Substring) : Char :=
|
||
s.get 0
|
||
|
||
@[export lean_substring_front]
|
||
def Internal.frontImpl : Substring → Char :=
|
||
Substring.front
|
||
|
||
/--
|
||
Returns the substring-relative position of the first occurrence of `c` in `s`, or `s.bsize` if `c`
|
||
doesn't occur.
|
||
-/
|
||
@[inline] def posOf (s : Substring) (c : Char) : String.Pos.Raw :=
|
||
match s with
|
||
| ⟨s, b, e⟩ => { byteIdx := (String.posOfAux s c e b).byteIdx - b.byteIdx }
|
||
|
||
/--
|
||
Removes the specified number of characters (Unicode code points) from the beginning of a substring
|
||
by advancing its start position.
|
||
|
||
If the substring's end position is reached, the start position is not advanced past it.
|
||
-/
|
||
@[inline] def drop : Substring → Nat → Substring
|
||
| ss@⟨s, b, e⟩, n => ⟨s, (ss.nextn n 0).offsetBy b, e⟩
|
||
|
||
@[export lean_substring_drop]
|
||
def Internal.dropImpl : Substring → Nat → Substring :=
|
||
Substring.drop
|
||
|
||
/--
|
||
Removes the specified number of characters (Unicode code points) from the end of a substring
|
||
by moving its end position towards its start position.
|
||
|
||
If the substring's start position is reached, the end position is not retracted past it.
|
||
-/
|
||
@[inline] def dropRight : Substring → Nat → Substring
|
||
| ss@⟨s, b, _⟩, n => ⟨s, b, (ss.prevn n ⟨ss.bsize⟩).offsetBy b⟩
|
||
|
||
/--
|
||
Retains only the specified number of characters (Unicode code points) at the beginning of a
|
||
substring, by moving its end position towards its start position.
|
||
|
||
If the substring's start position is reached, the end position is not retracted past it.
|
||
-/
|
||
@[inline] def take : Substring → Nat → Substring
|
||
| ss@⟨s, b, _⟩, n => ⟨s, b, (ss.nextn n 0).offsetBy b⟩
|
||
|
||
/--
|
||
Retains only the specified number of characters (Unicode code points) at the end of a substring, by
|
||
moving its start position towards its end position.
|
||
|
||
If the substring's end position is reached, the start position is not advanced past it.
|
||
-/
|
||
@[inline] def takeRight : Substring → Nat → Substring
|
||
| ss@⟨s, b, e⟩, n => ⟨s, (ss.prevn n ⟨ss.bsize⟩).offsetBy b, e⟩
|
||
|
||
/--
|
||
Checks whether a position in a substring is precisely equal to its ending position.
|
||
|
||
The position is understood relative to the substring's starting position, rather than the underlying
|
||
string's starting position.
|
||
-/
|
||
@[inline] def atEnd : Substring → String.Pos.Raw → Bool
|
||
| ⟨_, b, e⟩, p => p.offsetBy b == e
|
||
|
||
/--
|
||
Returns the region of the substring delimited by the provided start and stop positions, as a
|
||
substring. The positions are interpreted with respect to the substring's start position, rather than
|
||
the underlying string.
|
||
|
||
If the resulting substring is empty, then the resulting substring is a substring of the empty string
|
||
`""`. Otherwise, the underlying string is that of the input substring with the beginning and end
|
||
positions adjusted.
|
||
-/
|
||
@[inline] def extract : Substring → String.Pos.Raw → String.Pos.Raw → Substring
|
||
| ⟨s, b, e⟩, b', e' => if b' ≥ e' then ⟨"", 0, 0⟩ else ⟨s, e.min (b'.offsetBy b), e.min (e'.offsetBy b)⟩
|
||
|
||
@[export lean_substring_extract]
|
||
def Internal.extractImpl : Substring → String.Pos.Raw → String.Pos.Raw → Substring :=
|
||
Substring.extract
|
||
|
||
/--
|
||
Splits a substring `s` on occurrences of the separator string `sep`. The default separator is `" "`.
|
||
|
||
When `sep` is empty, the result is `[s]`. When `sep` occurs in overlapping patterns, the first match
|
||
is taken. There will always be exactly `n+1` elements in the returned list if there were `n`
|
||
non-overlapping matches of `sep` in the string. The separators are not included in the returned
|
||
substrings, which are all substrings of `s`'s string.
|
||
-/
|
||
def splitOn (s : Substring) (sep : String := " ") : List Substring :=
|
||
if sep == "" then
|
||
[s]
|
||
else
|
||
let rec loop (b i j : String.Pos.Raw) (r : List Substring) : List Substring :=
|
||
if h : i.byteIdx < s.bsize then
|
||
have := Nat.sub_lt_sub_left h (lt_next s i h)
|
||
if s.get i == j.get sep then
|
||
let i := s.next i
|
||
let j := j.next sep
|
||
if j.atEnd sep then
|
||
loop i i 0 (s.extract b (i.unoffsetBy j) :: r)
|
||
else
|
||
loop b i j r
|
||
else
|
||
loop b (s.next i) 0 r
|
||
else
|
||
let r := if j.atEnd sep then
|
||
"".toSubstring :: s.extract b (i.unoffsetBy j) :: r
|
||
else
|
||
s.extract b i :: r
|
||
r.reverse
|
||
termination_by s.bsize - i.1
|
||
loop 0 0 0 []
|
||
|
||
/--
|
||
Folds a function over a substring from the left, accumulating a value starting with `init`. The
|
||
accumulated value is combined with each character in order, using `f`.
|
||
-/
|
||
@[inline] def foldl {α : Type u} (f : α → Char → α) (init : α) (s : Substring) : α :=
|
||
match s with
|
||
| ⟨s, b, e⟩ => String.foldlAux f s e b init
|
||
|
||
/--
|
||
Folds a function over a substring from the right, accumulating a value starting with `init`. The
|
||
accumulated value is combined with each character in reverse order, using `f`.
|
||
-/
|
||
@[inline] def foldr {α : Type u} (f : Char → α → α) (init : α) (s : Substring) : α :=
|
||
match s with
|
||
| ⟨s, b, e⟩ => String.foldrAux f init s e b
|
||
|
||
/--
|
||
Checks whether the Boolean predicate `p` returns `true` for any character in a substring.
|
||
|
||
Short-circuits at the first character for which `p` returns `true`.
|
||
-/
|
||
@[inline] def any (s : Substring) (p : Char → Bool) : Bool :=
|
||
match s with
|
||
| ⟨s, b, e⟩ => String.anyAux s e p b
|
||
|
||
/--
|
||
Checks whether the Boolean predicate `p` returns `true` for every character in a substring.
|
||
|
||
Short-circuits at the first character for which `p` returns `false`.
|
||
-/
|
||
@[inline] def all (s : Substring) (p : Char → Bool) : Bool :=
|
||
!s.any (fun c => !p c)
|
||
|
||
@[export lean_substring_all]
|
||
def Internal.allImpl (s : Substring) (p : Char → Bool) : Bool :=
|
||
Substring.all s p
|
||
|
||
/--
|
||
Checks whether a substring contains the specified character.
|
||
-/
|
||
@[inline] def contains (s : Substring) (c : Char) : Bool :=
|
||
s.any (fun a => a == c)
|
||
|
||
@[specialize] def takeWhileAux (s : String) (stopPos : String.Pos.Raw) (p : Char → Bool) (i : String.Pos.Raw) : String.Pos.Raw :=
|
||
if h : i < stopPos then
|
||
if p (i.get s) then
|
||
have := Nat.sub_lt_sub_left h (String.Pos.Raw.lt_next s i)
|
||
takeWhileAux s stopPos p (i.next s)
|
||
else i
|
||
else i
|
||
termination_by stopPos.1 - i.1
|
||
|
||
/--
|
||
Retains only the longest prefix of a substring in which a Boolean predicate returns `true` for all
|
||
characters by moving the substring's end position towards its start position.
|
||
-/
|
||
@[inline] def takeWhile : Substring → (Char → Bool) → Substring
|
||
| ⟨s, b, e⟩, p =>
|
||
let e := takeWhileAux s e p b;
|
||
⟨s, b, e⟩
|
||
|
||
@[export lean_substring_takewhile]
|
||
def Internal.takeWhileImpl : Substring → (Char → Bool) → Substring :=
|
||
Substring.takeWhile
|
||
|
||
/--
|
||
Removes the longest prefix of a substring in which a Boolean predicate returns `true` for all
|
||
characters by moving the substring's start position. The start position is moved to the position of
|
||
the first character for which the predicate returns `false`, or to the substring's end position if
|
||
the predicate always returns `true`.
|
||
-/
|
||
@[inline] def dropWhile : Substring → (Char → Bool) → Substring
|
||
| ⟨s, b, e⟩, p =>
|
||
let b := takeWhileAux s e p b;
|
||
⟨s, b, e⟩
|
||
|
||
@[specialize] def takeRightWhileAux (s : String) (begPos : String.Pos.Raw) (p : Char → Bool) (i : String.Pos.Raw) : String.Pos.Raw :=
|
||
if h : begPos < i then
|
||
have := String.Pos.Raw.prev_lt_of_pos s i <| mt (congrArg String.Pos.Raw.byteIdx) <|
|
||
Ne.symm <| Nat.ne_of_lt <| Nat.lt_of_le_of_lt (Nat.zero_le _) h
|
||
let i' := i.prev s
|
||
let c := i'.get s
|
||
if !p c then i
|
||
else takeRightWhileAux s begPos p i'
|
||
else i
|
||
termination_by i.1
|
||
|
||
/--
|
||
Retains only the longest suffix of a substring in which a Boolean predicate returns `true` for all
|
||
characters by moving the substring's start position towards its end position.
|
||
-/
|
||
@[inline] def takeRightWhile : Substring → (Char → Bool) → Substring
|
||
| ⟨s, b, e⟩, p =>
|
||
let b := takeRightWhileAux s b p e
|
||
⟨s, b, e⟩
|
||
|
||
/--
|
||
Removes the longest suffix of a substring in which a Boolean predicate returns `true` for all
|
||
characters by moving the substring's end position. The end position is moved just after the position
|
||
of the last character for which the predicate returns `false`, or to the substring's start position
|
||
if the predicate always returns `true`.
|
||
-/
|
||
@[inline] def dropRightWhile : Substring → (Char → Bool) → Substring
|
||
| ⟨s, b, e⟩, p =>
|
||
let e := takeRightWhileAux s b p e
|
||
⟨s, b, e⟩
|
||
|
||
/--
|
||
Removes leading whitespace from a substring by moving its start position to the first non-whitespace
|
||
character, or to its end position if there is no non-whitespace character.
|
||
|
||
“Whitespace” is defined as characters for which `Char.isWhitespace` returns `true`.
|
||
-/
|
||
@[inline] def trimLeft (s : Substring) : Substring :=
|
||
s.dropWhile Char.isWhitespace
|
||
|
||
/--
|
||
Removes trailing whitespace from a substring by moving its end position to the last non-whitespace
|
||
character, or to its start position if there is no non-whitespace character.
|
||
|
||
“Whitespace” is defined as characters for which `Char.isWhitespace` returns `true`.
|
||
-/
|
||
@[inline] def trimRight (s : Substring) : Substring :=
|
||
s.dropRightWhile Char.isWhitespace
|
||
|
||
/--
|
||
Removes leading and trailing whitespace from a substring by first moving its start position to the
|
||
first non-whitespace character, and then moving its end position to the last non-whitespace
|
||
character.
|
||
|
||
If the substring consists only of whitespace, then the resulting substring's start position is moved
|
||
to its end position.
|
||
|
||
“Whitespace” is defined as characters for which `Char.isWhitespace` returns `true`.
|
||
|
||
Examples:
|
||
* `" red green blue ".toSubstring.trim.toString = "red green blue"`
|
||
* `" red green blue ".toSubstring.trim.startPos = ⟨1⟩`
|
||
* `" red green blue ".toSubstring.trim.stopPos = ⟨15⟩`
|
||
* `" ".toSubstring.trim.startPos = ⟨5⟩`
|
||
-/
|
||
@[inline] def trim : Substring → Substring
|
||
| ⟨s, b, e⟩ =>
|
||
let b := takeWhileAux s e Char.isWhitespace b
|
||
let e := takeRightWhileAux s b Char.isWhitespace e
|
||
⟨s, b, e⟩
|
||
|
||
/--
|
||
Checks whether the substring can be interpreted as the decimal representation of a natural number.
|
||
|
||
A substring can be interpreted as a decimal natural number if it is not empty and all the characters
|
||
in it are digits.
|
||
|
||
Use `Substring.toNat?` to convert such a substring to a natural number.
|
||
-/
|
||
@[inline] def isNat (s : Substring) : Bool :=
|
||
!s.isEmpty && s.all fun c => c.isDigit
|
||
|
||
/--
|
||
Checks whether the substring can be interpreted as the decimal representation of a natural number,
|
||
returning the number if it can.
|
||
|
||
A substring can be interpreted as a decimal natural number if it is not empty and all the characters
|
||
in it are digits.
|
||
|
||
Use `Substring.isNat` to check whether the substring is such a substring.
|
||
-/
|
||
def toNat? (s : Substring) : Option Nat :=
|
||
if s.isNat then
|
||
some <| s.foldl (fun n c => n*10 + (c.toNat - '0'.toNat)) 0
|
||
else
|
||
none
|
||
|
||
/--
|
||
Given a `Substring`, returns another one which has valid endpoints
|
||
and represents the same substring according to `Substring.toString`.
|
||
(Note, the substring may still be inverted, i.e. beginning greater than end.)
|
||
-/
|
||
def repair : Substring → Substring
|
||
| ⟨s, b, e⟩ => ⟨s, if b.isValid s then b else s.endPos, if e.isValid s then e else s.endPos⟩
|
||
|
||
/--
|
||
Checks whether two substrings represent equal strings. Usually accessed via the `==` operator.
|
||
|
||
Two substrings do not need to have the same underlying string or the same start and end positions;
|
||
instead, they are equal if they contain the same sequence of characters.
|
||
-/
|
||
def beq (ss1 ss2 : Substring) : Bool :=
|
||
let ss1 := ss1.repair
|
||
let ss2 := ss2.repair
|
||
ss1.bsize == ss2.bsize && String.Pos.Raw.substrEq ss1.str ss1.startPos ss2.str ss2.startPos ss1.bsize
|
||
|
||
@[export lean_substring_beq]
|
||
def Internal.beqImpl (ss1 ss2 : Substring) : Bool :=
|
||
Substring.beq ss1 ss2
|
||
|
||
instance hasBeq : BEq Substring := ⟨beq⟩
|
||
|
||
/--
|
||
Checks whether two substrings have the same position and content.
|
||
|
||
The two substrings do not need to have the same underlying string for this check to succeed.
|
||
-/
|
||
def sameAs (ss1 ss2 : Substring) : Bool :=
|
||
ss1.startPos == ss2.startPos && ss1 == ss2
|
||
|
||
/--
|
||
Returns the longest common prefix of two substrings.
|
||
|
||
The returned substring uses the same underlying string as `s`.
|
||
-/
|
||
def commonPrefix (s t : Substring) : Substring :=
|
||
{ s with stopPos := loop s.startPos t.startPos }
|
||
where
|
||
/-- Returns the ending position of the common prefix, working up from `spos, tpos`. -/
|
||
loop spos tpos :=
|
||
if h : spos < s.stopPos ∧ tpos < t.stopPos then
|
||
if spos.get s.str == tpos.get t.str then
|
||
have := Nat.sub_lt_sub_left h.1 (String.Pos.Raw.lt_next s.str spos)
|
||
loop (spos.next s.str) (tpos.next t.str)
|
||
else
|
||
spos
|
||
else
|
||
spos
|
||
termination_by s.stopPos.byteIdx - spos.byteIdx
|
||
|
||
/--
|
||
Returns the longest common suffix of two substrings.
|
||
|
||
The returned substring uses the same underlying string as `s`.
|
||
-/
|
||
def commonSuffix (s t : Substring) : Substring :=
|
||
{ s with startPos := loop s.stopPos t.stopPos }
|
||
where
|
||
/-- Returns the starting position of the common prefix, working down from `spos, tpos`. -/
|
||
loop spos tpos :=
|
||
if h : s.startPos < spos ∧ t.startPos < tpos then
|
||
let spos' := spos.prev s.str
|
||
let tpos' := tpos.prev t.str
|
||
if spos'.get s.str == tpos'.get t.str then
|
||
have : spos' < spos := String.Pos.Raw.prev_lt_of_pos s.str spos (String.Pos.Raw.ne_zero_of_lt h.1)
|
||
loop spos' tpos'
|
||
else
|
||
spos
|
||
else
|
||
spos
|
||
termination_by spos.byteIdx
|
||
|
||
/--
|
||
If `pre` is a prefix of `s`, returns the remainder. Returns `none` otherwise.
|
||
|
||
The substring `pre` is a prefix of `s` if there exists a `t : Substring` such that
|
||
`s.toString = pre.toString ++ t.toString`. If so, the result is the substring of `s` without the
|
||
prefix.
|
||
-/
|
||
def dropPrefix? (s : Substring) (pre : Substring) : Option Substring :=
|
||
let t := s.commonPrefix pre
|
||
if t.bsize = pre.bsize then
|
||
some { s with startPos := t.stopPos }
|
||
else
|
||
none
|
||
|
||
/--
|
||
If `suff` is a suffix of `s`, returns the remainder. Returns `none` otherwise.
|
||
|
||
The substring `suff` is a suffix of `s` if there exists a `t : Substring` such that
|
||
`s.toString = t.toString ++ suff.toString`. If so, the result the substring of `s` without the
|
||
suffix.
|
||
-/
|
||
def dropSuffix? (s : Substring) (suff : Substring) : Option Substring :=
|
||
let t := s.commonSuffix suff
|
||
if t.bsize = suff.bsize then
|
||
some { s with stopPos := t.startPos }
|
||
else
|
||
none
|
||
|
||
end Substring
|
||
|
||
namespace String
|
||
|
||
/--
|
||
Removes the specified number of characters (Unicode code points) from the start of the string.
|
||
|
||
If `n` is greater than `s.length`, returns `""`.
|
||
|
||
Examples:
|
||
* `"red green blue".drop 4 = "green blue"`
|
||
* `"red green blue".drop 10 = "blue"`
|
||
* `"red green blue".drop 50 = ""`
|
||
-/
|
||
@[inline] def drop (s : String) (n : Nat) : String :=
|
||
(s.toSubstring.drop n).toString
|
||
|
||
@[export lean_string_drop]
|
||
def Internal.dropImpl (s : String) (n : Nat) : String :=
|
||
String.drop s n
|
||
|
||
/--
|
||
Removes the specified number of characters (Unicode code points) from the end of the string.
|
||
|
||
If `n` is greater than `s.length`, returns `""`.
|
||
|
||
Examples:
|
||
* `"red green blue".dropRight 5 = "red green"`
|
||
* `"red green blue".dropRight 11 = "red"`
|
||
* `"red green blue".dropRight 50 = ""`
|
||
-/
|
||
@[inline] def dropRight (s : String) (n : Nat) : String :=
|
||
(s.toSubstring.dropRight n).toString
|
||
|
||
@[export lean_string_dropright]
|
||
def Internal.dropRightImpl (s : String) (n : Nat) : String :=
|
||
String.dropRight s n
|
||
|
||
/--
|
||
Creates a new string that contains the first `n` characters (Unicode code points) of `s`.
|
||
|
||
If `n` is greater than `s.length`, returns `s`.
|
||
|
||
Examples:
|
||
* `"red green blue".take 3 = "red"`
|
||
* `"red green blue".take 1 = "r"`
|
||
* `"red green blue".take 0 = ""`
|
||
* `"red green blue".take 100 = "red green blue"`
|
||
-/
|
||
@[inline] def take (s : String) (n : Nat) : String :=
|
||
(s.toSubstring.take n).toString
|
||
|
||
/--
|
||
Creates a new string that contains the last `n` characters (Unicode code points) of `s`.
|
||
|
||
If `n` is greater than `s.length`, returns `s`.
|
||
|
||
Examples:
|
||
* `"red green blue".takeRight 4 = "blue"`
|
||
* `"red green blue".takeRight 1 = "e"`
|
||
* `"red green blue".takeRight 0 = ""`
|
||
* `"red green blue".takeRight 100 = "red green blue"`
|
||
-/
|
||
@[inline] def takeRight (s : String) (n : Nat) : String :=
|
||
(s.toSubstring.takeRight n).toString
|
||
|
||
/--
|
||
Creates a new string that contains the longest prefix of `s` in which `p` returns `true` for all
|
||
characters.
|
||
|
||
Examples:
|
||
* `"red green blue".takeWhile (·.isLetter) = "red"`
|
||
* `"red green blue".takeWhile (· == 'r') = "r"`
|
||
* `"red green blue".takeWhile (· != 'n') = "red gree"`
|
||
* `"red green blue".takeWhile (fun _ => true) = "red green blue"`
|
||
-/
|
||
@[inline] def takeWhile (s : String) (p : Char → Bool) : String :=
|
||
(s.toSubstring.takeWhile p).toString
|
||
|
||
/--
|
||
Creates a new string by removing the longest prefix from `s` in which `p` returns `true` for all
|
||
characters.
|
||
|
||
Examples:
|
||
* `"red green blue".dropWhile (·.isLetter) = " green blue"`
|
||
* `"red green blue".dropWhile (· == 'r') = "ed green blue"`
|
||
* `"red green blue".dropWhile (· != 'n') = "n blue"`
|
||
* `"red green blue".dropWhile (fun _ => true) = ""`
|
||
-/
|
||
@[inline] def dropWhile (s : String) (p : Char → Bool) : String :=
|
||
(s.toSubstring.dropWhile p).toString
|
||
|
||
/--
|
||
Creates a new string that contains the longest suffix of `s` in which `p` returns `true` for all
|
||
characters.
|
||
|
||
Examples:
|
||
* `"red green blue".takeRightWhile (·.isLetter) = "blue"`
|
||
* `"red green blue".takeRightWhile (· == 'e') = "e"`
|
||
* `"red green blue".takeRightWhile (· != 'n') = " blue"`
|
||
* `"red green blue".takeRightWhile (fun _ => true) = "red green blue"`
|
||
-/
|
||
@[inline] def takeRightWhile (s : String) (p : Char → Bool) : String :=
|
||
(s.toSubstring.takeRightWhile p).toString
|
||
|
||
/--
|
||
Creates a new string by removing the longest suffix from `s` in which `p` returns `true` for all
|
||
characters.
|
||
|
||
Examples:
|
||
* `"red green blue".dropRightWhile (·.isLetter) = "red green "`
|
||
* `"red green blue".dropRightWhile (· == 'e') = "red green blu"`
|
||
* `"red green blue".dropRightWhile (· != 'n') = "red green"`
|
||
* `"red green blue".dropRightWhile (fun _ => true) = ""`
|
||
-/
|
||
@[inline] def dropRightWhile (s : String) (p : Char → Bool) : String :=
|
||
(s.toSubstring.dropRightWhile p).toString
|
||
|
||
/--
|
||
Checks whether the first string (`s`) begins with the second (`pre`).
|
||
|
||
`String.isPrefix` is a version that takes the potential prefix before the string.
|
||
|
||
Examples:
|
||
* `"red green blue".startsWith "red" = true`
|
||
* `"red green blue".startsWith "green" = false`
|
||
* `"red green blue".startsWith "" = true`
|
||
* `"red".startsWith "red" = true`
|
||
-/
|
||
@[inline] def startsWith (s pre : String) : Bool :=
|
||
s.toSubstring.take pre.length == pre.toSubstring
|
||
|
||
/--
|
||
Checks whether the first string (`s`) ends with the second (`post`).
|
||
|
||
Examples:
|
||
* `"red green blue".endsWith "blue" = true`
|
||
* `"red green blue".endsWith "green" = false`
|
||
* `"red green blue".endsWith "" = true`
|
||
* `"red".endsWith "red" = true`
|
||
-/
|
||
@[inline] def endsWith (s post : String) : Bool :=
|
||
s.toSubstring.takeRight post.length == post.toSubstring
|
||
|
||
/--
|
||
Removes trailing whitespace from a string.
|
||
|
||
“Whitespace” is defined as characters for which `Char.isWhitespace` returns `true`.
|
||
|
||
Examples:
|
||
* `"abc".trimRight = "abc"`
|
||
* `" abc".trimRight = " abc"`
|
||
* `"abc \t ".trimRight = "abc"`
|
||
* `" abc ".trimRight = " abc"`
|
||
* `"abc\ndef\n".trimRight = "abc\ndef"`
|
||
-/
|
||
@[inline] def trimRight (s : String) : String :=
|
||
s.toSubstring.trimRight.toString
|
||
|
||
/--
|
||
Removes leading whitespace from a string.
|
||
|
||
“Whitespace” is defined as characters for which `Char.isWhitespace` returns `true`.
|
||
|
||
Examples:
|
||
* `"abc".trimLeft = "abc"`
|
||
* `" abc".trimLeft = " abc"`
|
||
* `"abc \t ".trimLeft = "abc \t "`
|
||
* `" abc ".trimLeft = "abc "`
|
||
* `"abc\ndef\n".trimLeft = "abc\ndef\n"`
|
||
-/
|
||
@[inline] def trimLeft (s : String) : String :=
|
||
s.toSubstring.trimLeft.toString
|
||
|
||
/--
|
||
Removes leading and trailing whitespace from a string.
|
||
|
||
“Whitespace” is defined as characters for which `Char.isWhitespace` returns `true`.
|
||
|
||
Examples:
|
||
* `"abc".trim = "abc"`
|
||
* `" abc".trim = "abc"`
|
||
* `"abc \t ".trim = "abc"`
|
||
* `" abc ".trim = "abc"`
|
||
* `"abc\ndef\n".trim = "abc\ndef"`
|
||
-/
|
||
@[inline] def trim (s : String) : String :=
|
||
s.toSubstring.trim.toString
|
||
|
||
@[export lean_string_trim]
|
||
def Internal.trimImpl (s : String) : String :=
|
||
String.trim s
|
||
|
||
/--
|
||
Repeatedly increments a position in a string, as if by `String.next`, while the predicate `p`
|
||
returns `true` for the character at the position. Stops incrementing at the end of the string or
|
||
when `p` returns `false` for the current character.
|
||
|
||
Examples:
|
||
* `let s := " a "; s.get (s.nextWhile Char.isWhitespace 0) = 'a'`
|
||
* `let s := "a "; s.get (s.nextWhile Char.isWhitespace 0) = 'a'`
|
||
* `let s := "ba "; s.get (s.nextWhile Char.isWhitespace 0) = 'b'`
|
||
-/
|
||
@[inline] def Pos.Raw.nextWhile (s : String) (p : Char → Bool) (i : String.Pos.Raw) : String.Pos.Raw :=
|
||
Substring.takeWhileAux s s.endPos p i
|
||
|
||
@[deprecated Pos.Raw.nextWhile (since := "2025-10-10")]
|
||
abbrev nextWhile (s : String) (p : Char → Bool) (i : String.Pos.Raw) : String.Pos.Raw :=
|
||
Pos.Raw.nextWhile s p i
|
||
|
||
@[export lean_string_nextwhile]
|
||
def Internal.nextWhileImpl (s : String) (p : Char → Bool) (i : String.Pos.Raw) : String.Pos.Raw :=
|
||
i.nextWhile s p
|
||
|
||
/--
|
||
Repeatedly increments a position in a string, as if by `String.next`, while the predicate `p`
|
||
returns `false` for the character at the position. Stops incrementing at the end of the string or
|
||
when `p` returns `true` for the current character.
|
||
|
||
Examples:
|
||
* `let s := " a "; s.get (s.nextUntil Char.isWhitespace 0) = ' '`
|
||
* `let s := " a "; s.get (s.nextUntil Char.isLetter 0) = 'a'`
|
||
* `let s := "a "; s.get (s.nextUntil Char.isWhitespace 0) = ' '`
|
||
-/
|
||
@[inline] def Pos.Raw.nextUntil (s : String) (p : Char → Bool) (i : String.Pos.Raw) : String.Pos.Raw :=
|
||
nextWhile s (fun c => !p c) i
|
||
|
||
@[deprecated Pos.Raw.nextUntil (since := "2025-10-10")]
|
||
def nextUntil (s : String) (p : Char → Bool) (i : String.Pos.Raw) : String.Pos.Raw :=
|
||
i.nextUntil s p
|
||
|
||
/--
|
||
Replaces each character in `s` with the result of applying `Char.toUpper` to it.
|
||
|
||
`Char.toUpper` has no effect on characters outside of the range `'a'`–`'z'`.
|
||
|
||
Examples:
|
||
* `"orange".toUpper = "ORANGE"`
|
||
* `"abc123".toUpper = "ABC123"`
|
||
-/
|
||
@[inline] def toUpper (s : String) : String :=
|
||
s.map Char.toUpper
|
||
|
||
/--
|
||
Replaces each character in `s` with the result of applying `Char.toLower` to it.
|
||
|
||
`Char.toLower` has no effect on characters outside of the range `'A'`–`'Z'`.
|
||
|
||
Examples:
|
||
* `"ORANGE".toLower = "orange"`
|
||
* `"Orange".toLower = "orange"`
|
||
* `"ABc123".toLower = "abc123"`
|
||
-/
|
||
@[inline] def toLower (s : String) : String :=
|
||
s.map Char.toLower
|
||
|
||
/--
|
||
Replaces the first character in `s` with the result of applying `Char.toUpper` to it. Returns the
|
||
empty string if the string is empty.
|
||
|
||
`Char.toUpper` has no effect on characters outside of the range `'a'`–`'z'`.
|
||
|
||
Examples:
|
||
* `"orange".capitalize = "Orange"`
|
||
* `"ORANGE".capitalize = "ORANGE"`
|
||
* `"".capitalize = ""`
|
||
-/
|
||
@[inline] def capitalize (s : String) : String :=
|
||
(0 : Pos.Raw).set s <| (0 : Pos.Raw).get s |>.toUpper
|
||
|
||
@[export lean_string_capitalize]
|
||
def Internal.capitalizeImpl (s : String) : String :=
|
||
String.capitalize s
|
||
|
||
/--
|
||
Replaces the first character in `s` with the result of applying `Char.toLower` to it. Returns the
|
||
empty string if the string is empty.
|
||
|
||
`Char.toLower` has no effect on characters outside of the range `'A'`–`'Z'`.
|
||
|
||
Examples:
|
||
* `"Orange".decapitalize = "orange"`
|
||
* `"ORANGE".decapitalize = "oRANGE"`
|
||
* `"".decapitalize = ""`
|
||
-/
|
||
@[inline] def decapitalize (s : String) :=
|
||
(0 : Pos.Raw).set s <| (0 : Pos.Raw).get s |>.toLower
|
||
|
||
/--
|
||
If `pre` is a prefix of `s`, returns the remainder. Returns `none` otherwise.
|
||
|
||
The string `pre` is a prefix of `s` if there exists a `t : String` such that `s = pre ++ t`. If so,
|
||
the result is `some t`.
|
||
|
||
Use `String.stripPrefix` to return the string unchanged when `pre` is not a prefix.
|
||
|
||
Examples:
|
||
* `"red green blue".dropPrefix? "red " = some "green blue"`
|
||
* `"red green blue".dropPrefix? "reed " = none`
|
||
* `"red green blue".dropPrefix? "" = some "red green blue"`
|
||
-/
|
||
def dropPrefix? (s : String) (pre : String) : Option Substring :=
|
||
s.toSubstring.dropPrefix? pre.toSubstring
|
||
|
||
/--
|
||
If `suff` is a suffix of `s`, returns the remainder. Returns `none` otherwise.
|
||
|
||
The string `suff` is a suffix of `s` if there exists a `t : String` such that `s = t ++ suff`. If so,
|
||
the result is `some t`.
|
||
|
||
Use `String.stripSuffix` to return the string unchanged when `suff` is not a suffix.
|
||
|
||
Examples:
|
||
* `"red green blue".dropSuffix? " blue" = some "red green"`
|
||
* `"red green blue".dropSuffix? " blu " = none`
|
||
* `"red green blue".dropSuffix? "" = some "red green blue"`
|
||
-/
|
||
def dropSuffix? (s : String) (suff : String) : Option Substring :=
|
||
s.toSubstring.dropSuffix? suff.toSubstring
|
||
|
||
/--
|
||
If `pre` is a prefix of `s`, returns the remainder. Returns `s` unmodified otherwise.
|
||
|
||
The string `pre` is a prefix of `s` if there exists a `t : String` such that `s = pre ++ t`. If so,
|
||
the result is `t`. Otherwise, it is `s`.
|
||
|
||
Use `String.dropPrefix?` to return `none` when `pre` is not a prefix.
|
||
|
||
Examples:
|
||
* `"red green blue".stripPrefix "red " = "green blue"`
|
||
* `"red green blue".stripPrefix "reed " = "red green blue"`
|
||
* `"red green blue".stripPrefix "" = "red green blue"`
|
||
-/
|
||
def stripPrefix (s : String) (pre : String) : String :=
|
||
s.dropPrefix? pre |>.map Substring.toString |>.getD s
|
||
|
||
/--
|
||
If `suff` is a suffix of `s`, returns the remainder. Returns `s` unmodified otherwise.
|
||
|
||
The string `suff` is a suffix of `s` if there exists a `t : String` such that `s = t ++ suff`. If so,
|
||
the result is `t`. Otherwise, it is `s`.
|
||
|
||
Use `String.dropSuffix?` to return `none` when `suff` is not a suffix.
|
||
|
||
Examples:
|
||
* `"red green blue".stripSuffix " blue" = "red green"`
|
||
* `"red green blue".stripSuffix " blu " = "red green blue"`
|
||
* `"red green blue".stripSuffix "" = "red green blue"`
|
||
-/
|
||
def stripSuffix (s : String) (suff : String) : String :=
|
||
s.dropSuffix? suff |>.map Substring.toString |>.getD s
|
||
|
||
end String
|
||
|
||
namespace String
|
||
|
||
@[ext]
|
||
theorem ext {s₁ s₂ : String} (h : s₁.data = s₂.data) : s₁ = s₂ :=
|
||
data_injective h
|
||
|
||
@[simp] theorem default_eq : default = "" := rfl
|
||
|
||
@[simp]
|
||
theorem mk_eq_asString (s : List Char) : String.mk s = List.asString s := rfl
|
||
|
||
@[simp] theorem length_empty : "".length = 0 := by simp [← length_data, data_empty]
|
||
|
||
theorem singleton_eq {c : Char} : String.singleton c = [c].asString := by
|
||
simp [← bytes_inj]
|
||
|
||
@[simp] theorem data_singleton (c : Char) : (String.singleton c).data = [c] := by
|
||
simp [singleton_eq]
|
||
|
||
@[simp]
|
||
theorem length_singleton {c : Char} : (String.singleton c).length = 1 := by
|
||
simp [← length_data]
|
||
|
||
theorem push_eq_append (c : Char) : String.push s c = s ++ singleton c := by
|
||
simp
|
||
|
||
@[simp] theorem data_push (c : Char) : (String.push s c).data = s.data ++ [c] := by
|
||
simp [← append_singleton]
|
||
|
||
@[simp] theorem length_push (c : Char) : (String.push s c).length = s.length + 1 := by
|
||
simp [← length_data]
|
||
|
||
@[simp] theorem length_pushn (c : Char) (n : Nat) : (pushn s c n).length = s.length + n := by
|
||
unfold pushn; induction n <;> simp [Nat.repeat, Nat.add_assoc, *]
|
||
|
||
@[simp] theorem length_append (s t : String) : (s ++ t).length = s.length + t.length := by
|
||
simp [← length_data]
|
||
|
||
attribute [simp] toList -- prefer `String.data` over `String.toList` in lemmas
|
||
|
||
theorem lt_iff {s t : String} : s < t ↔ s.data < t.data := .rfl
|
||
|
||
namespace Pos.Raw
|
||
|
||
theorem byteIdx_mk (n : Nat) : byteIdx ⟨n⟩ = n := rfl
|
||
|
||
@[simp] theorem mk_zero : ⟨0⟩ = (0 : Pos.Raw) := rfl
|
||
|
||
@[simp] theorem mk_byteIdx (p : Pos.Raw) : ⟨p.byteIdx⟩ = p := rfl
|
||
|
||
@[deprecated byteIdx_offsetBy (since := "2025-10-08")]
|
||
theorem add_byteIdx {p₁ p₂ : Pos.Raw} : (p₂.offsetBy p₁).byteIdx = p₁.byteIdx + p₂.byteIdx := by
|
||
simp
|
||
|
||
@[deprecated byteIdx_offsetBy (since := "2025-10-08")]
|
||
theorem add_eq {p₁ p₂ : Pos.Raw} : p₂.offsetBy p₁ = ⟨p₁.byteIdx + p₂.byteIdx⟩ := rfl
|
||
|
||
@[deprecated byteIdx_unoffsetBy (since := "2025-10-08")]
|
||
theorem sub_byteIdx (p₁ p₂ : Pos.Raw) : (p₁.unoffsetBy p₂).byteIdx = p₁.byteIdx - p₂.byteIdx := rfl
|
||
|
||
@[deprecated byteIdx_add_char (since := "2025-10-10")]
|
||
theorem addChar_byteIdx (p : Pos.Raw) (c : Char) : (p + c).byteIdx = p.byteIdx + c.utf8Size :=
|
||
byteIdx_add_char
|
||
|
||
theorem add_char_eq (p : Pos.Raw) (c : Char) : p + c = ⟨p.byteIdx + c.utf8Size⟩ := rfl
|
||
|
||
@[deprecated add_char_eq (since := "2025-10-10")]
|
||
theorem addChar_eq (p : Pos.Raw) (c : Char) : p + c = ⟨p.byteIdx + c.utf8Size⟩ :=
|
||
add_char_eq p c
|
||
|
||
theorem byteIdx_zero_add_char (c : Char) : ((0 : Pos.Raw) + c).byteIdx = c.utf8Size := by
|
||
simp only [byteIdx_add_char, byteIdx_zero, Nat.zero_add]
|
||
|
||
@[deprecated byteIdx_zero_add_char (since := "2025-10-10")]
|
||
theorem zero_addChar_byteIdx (c : Char) : ((0 : Pos.Raw) + c).byteIdx = c.utf8Size :=
|
||
byteIdx_zero_add_char c
|
||
|
||
theorem zero_add_char_eq (c : Char) : (0 : Pos.Raw) + c = ⟨c.utf8Size⟩ := by rw [← byteIdx_zero_add_char]
|
||
|
||
@[deprecated zero_add_char_eq (since := "2025-10-10")]
|
||
theorem zero_addChar_eq (c : Char) : (0 : Pos.Raw) + c = ⟨c.utf8Size⟩ :=
|
||
zero_add_char_eq c
|
||
|
||
theorem add_char_right_comm (p : Pos.Raw) (c₁ c₂ : Char) : p + c₁ + c₂ = p + c₂ + c₁ := by
|
||
simpa [Pos.Raw.ext_iff] using Nat.add_right_comm ..
|
||
|
||
@[deprecated add_char_right_comm (since := "2025-10-10")]
|
||
theorem addChar_right_comm (p : Pos.Raw) (c₁ c₂ : Char) : p + c₁ + c₂ = p + c₂ + c₁ :=
|
||
add_char_right_comm p c₁ c₂
|
||
|
||
theorem ne_of_gt {i₁ i₂ : Pos.Raw} (h : i₁ < i₂) : i₂ ≠ i₁ := (ne_of_lt h).symm
|
||
|
||
@[deprecated byteIdx_add_string (since := "2025-10-10")]
|
||
theorem byteIdx_addString (p : Pos.Raw) (s : String) :
|
||
(p + s).byteIdx = p.byteIdx + s.utf8ByteSize := byteIdx_add_string
|
||
|
||
@[deprecated byteIdx_addString (since := "2025-03-18")]
|
||
abbrev addString_byteIdx := @byteIdx_add_string
|
||
|
||
theorem addString_eq (p : Pos.Raw) (s : String) : p + s = ⟨p.byteIdx + s.utf8ByteSize⟩ := rfl
|
||
|
||
theorem byteIdx_zero_add_string (s : String) : ((0 : Pos.Raw) + s).byteIdx = s.utf8ByteSize := by
|
||
simp only [byteIdx_add_string, byteIdx_zero, Nat.zero_add]
|
||
|
||
@[deprecated byteIdx_zero_add_string (since := "2025-10-10")]
|
||
theorem byteIdx_zero_addString (s : String) : ((0 : Pos.Raw) + s).byteIdx = s.utf8ByteSize :=
|
||
byteIdx_zero_add_string s
|
||
|
||
@[deprecated byteIdx_zero_addString (since := "2025-03-18")]
|
||
abbrev zero_addString_byteIdx := @byteIdx_zero_add_string
|
||
|
||
theorem zero_add_string_eq (s : String) : (0 : Pos.Raw) + s = ⟨s.utf8ByteSize⟩ := by
|
||
rw [← byteIdx_zero_add_string]
|
||
|
||
@[deprecated zero_add_string_eq (since := "2025-10-10")]
|
||
theorem zero_addString_eq (s : String) : (0 : Pos.Raw) + s = ⟨s.utf8ByteSize⟩ :=
|
||
zero_add_string_eq s
|
||
|
||
@[simp] theorem mk_le_mk {i₁ i₂ : Nat} : Pos.Raw.mk i₁ ≤ Pos.Raw.mk i₂ ↔ i₁ ≤ i₂ := .rfl
|
||
|
||
@[simp] theorem mk_lt_mk {i₁ i₂ : Nat} : Pos.Raw.mk i₁ < Pos.Raw.mk i₂ ↔ i₁ < i₂ := .rfl
|
||
|
||
@[simp] theorem get!_eq_get (s : String) (p : Pos.Raw) : p.get! s = p.get s := rfl
|
||
|
||
@[simp] theorem get'_eq (s : String) (p : Pos.Raw) (h) : get' s p h = get s p := rfl
|
||
|
||
@[simp] theorem next'_eq (s : String) (p : Pos.Raw) (h) : next' s p h = next s p := rfl
|
||
|
||
end Pos.Raw
|
||
|
||
@[deprecated Pos.Raw.get!_eq_get (since := "2025-10-14")]
|
||
theorem get!_eq_get (s : String) (p : Pos.Raw) : p.get! s = p.get s := rfl
|
||
|
||
@[deprecated Pos.Raw.lt_next (since := "2025-10-10")]
|
||
theorem lt_next' (s : String) (p : Pos.Raw) : p < p.next s :=
|
||
Pos.Raw.lt_next s p
|
||
|
||
@[simp] theorem Pos.Raw.prev_zero (s : String) : Pos.Raw.prev s 0 = 0 := by
|
||
rw [Pos.Raw.prev]
|
||
cases s.data <;> simp [utf8PrevAux, Pos.Raw.le_iff]
|
||
|
||
@[deprecated Pos.Raw.prev_zero (since := "2025-10-10")]
|
||
theorem prev_zero (s : String) : (0 : Pos.Raw).prev s = 0 := by
|
||
exact Pos.Raw.prev_zero s
|
||
|
||
@[deprecated Pos.Raw.get'_eq (since := "2025-10-14")]
|
||
theorem get'_eq (s : String) (p : Pos.Raw) (h) : p.get' s h = p.get s := rfl
|
||
|
||
@[deprecated Pos.Raw.next'_eq (since := "2025-10-14")]
|
||
theorem next'_eq (s : String) (p : Pos.Raw) (h) : p.next' s h = p.next s := rfl
|
||
|
||
-- `toSubstring'` is just a synonym for `toSubstring` without the `@[inline]` attribute
|
||
-- so for proving can be unfolded.
|
||
attribute [simp] toSubstring'
|
||
|
||
end String
|
||
|
||
namespace Char
|
||
|
||
theorem toString_eq_singleton {c : Char} : c.toString = String.singleton c := rfl
|
||
|
||
@[simp] theorem length_toString (c : Char) : c.toString.length = 1 := by
|
||
simp [toString_eq_singleton]
|
||
|
||
end Char
|
||
|
||
open String
|
||
|
||
namespace Substring
|
||
|
||
@[simp] theorem prev_zero (s : Substring) : s.prev 0 = 0 := by simp [prev]
|
||
|
||
@[simp] theorem prevn_zero (s : Substring) : ∀ n, s.prevn n 0 = 0
|
||
| 0 => rfl
|
||
| n+1 => by simp [prevn, prevn_zero s n]
|
||
|
||
end Substring
|