This PR moves some more material out of `Init.Data.String.Basic` and fixes the incorrect name `String.Pos.Raw.IsValidForSlice.le_utf8ByteSize`.
572 lines
19 KiB
Text
572 lines
19 KiB
Text
/-
|
||
Copyright (c) 2016 Microsoft Corporation. All rights reserved.
|
||
Released under Apache 2.0 license as described in the file LICENSE.
|
||
Author: Leonardo de Moura, Mario Carneiro
|
||
-/
|
||
module
|
||
|
||
prelude
|
||
public import Init.Data.ByteArray.Basic
|
||
public import Init.Data.String.PosRaw
|
||
import Init.Data.ByteArray.Lemmas
|
||
|
||
/-!
|
||
# Preliminary developments for strings
|
||
|
||
This file contains the material about strings which we can write down without the results in
|
||
`Init.Data.String.Decode`, i.e., without knowing about the bijection between `String` and
|
||
`List Char` given by UTF-8 decoding and encoding.
|
||
|
||
Note that this file, despite being called `Defs`, contains quite a few lemmas.
|
||
-/
|
||
|
||
public section
|
||
|
||
@[simp]
|
||
theorem List.utf8Encode_nil : List.utf8Encode [] = ByteArray.empty := by simp [utf8Encode]
|
||
|
||
theorem List.utf8Encode_singleton {c : Char} : [c].utf8Encode = (String.utf8EncodeChar c).toByteArray := by
|
||
simp [utf8Encode]
|
||
|
||
@[simp]
|
||
theorem List.utf8Encode_append {l l' : List Char} :
|
||
(l ++ l').utf8Encode = l.utf8Encode ++ l'.utf8Encode := by
|
||
simp [utf8Encode]
|
||
|
||
theorem List.utf8Encode_cons {c : Char} {l : List Char} : (c :: l).utf8Encode = [c].utf8Encode ++ l.utf8Encode := by
|
||
rw [← singleton_append, List.utf8Encode_append]
|
||
|
||
@[simp]
|
||
theorem String.utf8EncodeChar_ne_nil {c : Char} : String.utf8EncodeChar c ≠ [] := by
|
||
fun_cases String.utf8EncodeChar with simp
|
||
|
||
@[simp]
|
||
theorem List.utf8Encode_eq_empty {l : List Char} : l.utf8Encode = ByteArray.empty ↔ l = [] := by
|
||
simp [utf8Encode, ← List.eq_nil_iff_forall_not_mem]
|
||
|
||
theorem ByteArray.isValidUTF8_utf8Encode {l : List Char} : IsValidUTF8 l.utf8Encode :=
|
||
.intro l rfl
|
||
|
||
@[simp]
|
||
theorem ByteArray.isValidUTF8_empty : IsValidUTF8 ByteArray.empty :=
|
||
.intro [] (by simp)
|
||
|
||
theorem Char.isValidUTF8_toByteArray_utf8EncodeChar {c : Char} :
|
||
ByteArray.IsValidUTF8 (String.utf8EncodeChar c).toByteArray :=
|
||
.intro [c] (by simp [List.utf8Encode_singleton])
|
||
|
||
theorem ByteArray.IsValidUTF8.append {b b' : ByteArray} (h : IsValidUTF8 b) (h' : IsValidUTF8 b') :
|
||
IsValidUTF8 (b ++ b') := by
|
||
rcases h with ⟨m, rfl⟩
|
||
rcases h' with ⟨m', rfl⟩
|
||
exact .intro (m ++ m') (by simp)
|
||
|
||
/--
|
||
Decodes an array of bytes that encode a string as [UTF-8](https://en.wikipedia.org/wiki/UTF-8) into
|
||
the corresponding string.
|
||
-/
|
||
@[inline, expose]
|
||
def String.fromUTF8 (a : @& ByteArray) (h : a.IsValidUTF8) : String :=
|
||
.ofByteArray a h
|
||
|
||
/--
|
||
Encodes a string in UTF-8 as an array of bytes.
|
||
-/
|
||
@[extern "lean_string_to_utf8"]
|
||
def String.toUTF8 (a : @& String) : ByteArray :=
|
||
a.bytes
|
||
|
||
@[simp] theorem String.toUTF8_eq_bytes {s : String} : s.toUTF8 = s.bytes := (rfl)
|
||
|
||
@[simp] theorem String.bytes_empty : "".bytes = ByteArray.empty := (rfl)
|
||
|
||
/--
|
||
Appends two strings. Usually accessed via the `++` operator.
|
||
|
||
The internal implementation will perform destructive updates if the string is not shared.
|
||
|
||
Examples:
|
||
* `"abc".append "def" = "abcdef"`
|
||
* `"abc" ++ "def" = "abcdef"`
|
||
* `"" ++ "" = ""`
|
||
-/
|
||
@[extern "lean_string_append", expose]
|
||
def String.append (s : String) (t : @& String) : String where
|
||
bytes := s.bytes ++ t.bytes
|
||
isValidUTF8 := s.isValidUTF8.append t.isValidUTF8
|
||
|
||
instance : Append String where
|
||
append s t := s.append t
|
||
|
||
@[simp]
|
||
theorem String.bytes_append {s t : String} : (s ++ t).bytes = s.bytes ++ t.bytes := (rfl)
|
||
|
||
theorem String.bytes_inj {s t : String} : s.bytes = t.bytes ↔ s = t := by
|
||
refine ⟨fun h => ?_, (· ▸ rfl)⟩
|
||
rcases s with ⟨s⟩
|
||
rcases t with ⟨t⟩
|
||
subst h
|
||
rfl
|
||
|
||
@[simp] theorem List.bytes_asString {l : List Char} : l.asString.bytes = l.utf8Encode := by
|
||
simp [List.asString, String.mk]
|
||
|
||
theorem String.exists_eq_asString (s : String) :
|
||
∃ l : List Char, s = l.asString := by
|
||
rcases s with ⟨_, ⟨l, rfl⟩⟩
|
||
refine ⟨l, by simp [← String.bytes_inj]⟩
|
||
|
||
@[simp]
|
||
theorem String.utf8ByteSize_empty : "".utf8ByteSize = 0 := (rfl)
|
||
|
||
@[simp]
|
||
theorem String.utf8ByteSize_append {s t : String} :
|
||
(s ++ t).utf8ByteSize = s.utf8ByteSize + t.utf8ByteSize := by
|
||
simp [utf8ByteSize]
|
||
|
||
@[simp]
|
||
theorem String.size_bytes {s : String} : s.bytes.size = s.utf8ByteSize := rfl
|
||
|
||
@[simp]
|
||
theorem String.bytes_push {s : String} {c : Char} : (s.push c).bytes = s.bytes ++ [c].utf8Encode := by
|
||
simp [push]
|
||
|
||
namespace String
|
||
|
||
@[deprecated rawEndPos (since := "2025-10-20")]
|
||
def endPos (s : String) : String.Pos.Raw :=
|
||
s.rawEndPos
|
||
|
||
/-- The start position of the string, as a `String.Pos.Raw.` -/
|
||
def rawStartPos (_s : String) : String.Pos.Raw :=
|
||
0
|
||
|
||
@[simp]
|
||
theorem rawStartPos_eq {s : String} : s.rawStartPos = 0 := (rfl)
|
||
|
||
@[simp]
|
||
theorem byteIdx_rawEndPos {s : String} : s.rawEndPos.byteIdx = s.utf8ByteSize := rfl
|
||
|
||
@[deprecated byteIdx_rawEndPos (since := "2025-10-20")]
|
||
theorem byteIdx_endPos {s : String} : s.rawEndPos.byteIdx = s.utf8ByteSize := rfl
|
||
|
||
@[simp]
|
||
theorem utf8ByteSize_ofByteArray {b : ByteArray} {h} :
|
||
(String.ofByteArray b h).utf8ByteSize = b.size := rfl
|
||
|
||
@[simp]
|
||
theorem bytes_singleton {c : Char} : (String.singleton c).bytes = [c].utf8Encode := by
|
||
simp [singleton]
|
||
|
||
theorem singleton_eq_asString {c : Char} : String.singleton c = [c].asString := by
|
||
simp [← String.bytes_inj]
|
||
|
||
@[simp]
|
||
theorem append_singleton {s : String} {c : Char} : s ++ singleton c = s.push c := by
|
||
simp [← bytes_inj]
|
||
|
||
@[simp]
|
||
theorem append_left_inj {s₁ s₂ : String} (t : String) :
|
||
s₁ ++ t = s₂ ++ t ↔ s₁ = s₂ := by
|
||
simp [← bytes_inj]
|
||
|
||
theorem append_assoc {s₁ s₂ s₃ : String} : s₁ ++ s₂ ++ s₃ = s₁ ++ (s₂ ++ s₃) := by
|
||
simp [← bytes_inj, ByteArray.append_assoc]
|
||
|
||
@[simp]
|
||
theorem utf8ByteSize_eq_zero_iff {s : String} : s.utf8ByteSize = 0 ↔ s = "" := by
|
||
refine ⟨fun h => ?_, fun h => h ▸ utf8ByteSize_empty⟩
|
||
simpa [← bytes_inj, ← ByteArray.size_eq_zero_iff] using h
|
||
|
||
theorem rawEndPos_eq_zero_iff {b : String} : b.rawEndPos = 0 ↔ b = "" := by
|
||
simp
|
||
|
||
@[deprecated rawEndPos_eq_zero_iff (since := "2025-10-20")]
|
||
theorem endPos_eq_zero_iff {b : String} : b.rawEndPos = 0 ↔ b = "" :=
|
||
rawEndPos_eq_zero_iff
|
||
|
||
/--
|
||
Adds multiple repetitions of a character to the end of a string.
|
||
|
||
Returns `s`, with `n` repetitions of `c` at the end. Internally, the implementation repeatedly calls
|
||
`String.push`, so the string is modified in-place if there is a unique reference to it.
|
||
|
||
Examples:
|
||
* `"indeed".pushn '!' 2 = "indeed!!"`
|
||
* `"indeed".pushn '!' 0 = "indeed"`
|
||
* `"".pushn ' ' 4 = " "`
|
||
-/
|
||
@[inline] def pushn (s : String) (c : Char) (n : Nat) : String :=
|
||
n.repeat (fun s => s.push c) s
|
||
|
||
theorem pushn_eq_repeat_push {s : String} {c : Char} {n : Nat} :
|
||
s.pushn c n = n.repeat (fun s => s.push c) s := (rfl)
|
||
|
||
@[export lean_string_pushn]
|
||
def Internal.pushnImpl (s : String) (c : Char) (n : Nat) : String :=
|
||
String.pushn s c n
|
||
|
||
/--
|
||
Checks whether a string is empty.
|
||
|
||
Empty strings are equal to `""` and have length and end position `0`.
|
||
|
||
Examples:
|
||
* `"".isEmpty = true`
|
||
* `"empty".isEmpty = false`
|
||
* `" ".isEmpty = false`
|
||
-/
|
||
@[inline] def isEmpty (s : String) : Bool :=
|
||
s.utf8ByteSize == 0
|
||
|
||
@[export lean_string_isempty]
|
||
def Internal.isEmptyImpl (s : String) : Bool :=
|
||
String.isEmpty s
|
||
|
||
/--
|
||
Appends all the strings in a list of strings, in order.
|
||
|
||
Use `String.intercalate` to place a separator string between the strings in a list.
|
||
|
||
Examples:
|
||
* `String.join ["gr", "ee", "n"] = "green"`
|
||
* `String.join ["b", "", "l", "", "ue"] = "blue"`
|
||
* `String.join [] = ""`
|
||
-/
|
||
@[inline] def join (l : List String) : String :=
|
||
l.foldl (fun r s => r ++ s) ""
|
||
|
||
/--
|
||
Appends the strings in a list of strings, placing the separator `s` between each pair.
|
||
|
||
Examples:
|
||
* `", ".intercalate ["red", "green", "blue"] = "red, green, blue"`
|
||
* `" and ".intercalate ["tea", "coffee"] = "tea and coffee"`
|
||
* `" | ".intercalate ["M", "", "N"] = "M | | N"`
|
||
-/
|
||
def intercalate (s : String) : List String → String
|
||
| [] => ""
|
||
| a :: as => go a s as
|
||
where go (acc : String) (s : String) : List String → String
|
||
| a :: as => go (acc ++ s ++ a) s as
|
||
| [] => acc
|
||
|
||
@[export lean_string_intercalate]
|
||
def Internal.intercalateImpl (s : String) : List String → String :=
|
||
String.intercalate s
|
||
|
||
/--
|
||
Predicate for validity of positions inside a `String`.
|
||
|
||
There are multiple equivalent definitions for validity.
|
||
|
||
We say that a position is valid if the string obtained by taking all of the bytes up to, but
|
||
excluding, the given position, is valid UTF-8; see `Pos.isValid_iff_isValidUTF8_extract_zero`.
|
||
|
||
Similarly, a position is valid if the string obtained by taking all of the bytes starting at the
|
||
given position is valid UTF-8; see `Pos.isValid_iff_isValidUTF8_extract_utf8ByteSize`.
|
||
|
||
An equivalent condition is that the position is the length of the UTF-8 encoding of
|
||
some prefix of the characters of the string; see `Pos.isValid_iff_exists_append` and
|
||
`Pos.isValid_iff_exists_take_data`.
|
||
|
||
Another equivalent condition that can be checked efficiently is that the position is either the
|
||
end position or strictly smaller than the end position and the byte at the position satisfies
|
||
`UInt8.IsUTF8FirstByte`; see `Pos.isValid_iff_isUTF8FirstByte`.
|
||
|
||
Examples:
|
||
* `String.Pos.IsValid "abc" ⟨0⟩`
|
||
* `String.Pos.IsValid "abc" ⟨1⟩`
|
||
* `String.Pos.IsValid "abc" ⟨3⟩`
|
||
* `¬ String.Pos.IsValid "abc" ⟨4⟩`
|
||
* `String.Pos.IsValid "𝒫(A)" ⟨0⟩`
|
||
* `¬ String.Pos.IsValid "𝒫(A)" ⟨1⟩`
|
||
* `¬ String.Pos.IsValid "𝒫(A)" ⟨2⟩`
|
||
* `¬ String.Pos.IsValid "𝒫(A)" ⟨3⟩`
|
||
* `String.Pos.IsValid "𝒫(A)" ⟨4⟩`
|
||
-/
|
||
structure Pos.Raw.IsValid (s : String) (off : String.Pos.Raw) : Prop where private mk ::
|
||
le_rawEndPos : off ≤ s.rawEndPos
|
||
isValidUTF8_extract_zero : (s.bytes.extract 0 off.byteIdx).IsValidUTF8
|
||
|
||
theorem Pos.Raw.IsValid.le_utf8ByteSize {s : String} {off : String.Pos.Raw} (h : off.IsValid s) :
|
||
off.byteIdx ≤ s.utf8ByteSize := by
|
||
simpa [Pos.Raw.le_iff] using h.le_rawEndPos
|
||
|
||
theorem Pos.Raw.isValid_iff_isValidUTF8_extract_zero {s : String} {p : Pos.Raw} :
|
||
p.IsValid s ↔ p ≤ s.rawEndPos ∧ (s.bytes.extract 0 p.byteIdx).IsValidUTF8 :=
|
||
⟨fun ⟨h₁, h₂⟩ => ⟨h₁, h₂⟩, fun ⟨h₁, h₂⟩ => ⟨h₁, h₂⟩⟩
|
||
|
||
@[deprecated le_rawEndPos (since := "2025-10-20")]
|
||
theorem Pos.Raw.IsValid.le_endPos {s : String} {off : String.Pos.Raw} (h : off.IsValid s) :
|
||
off ≤ s.rawEndPos :=
|
||
h.le_rawEndPos
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValid_zero {s : String} : (0 : Pos.Raw).IsValid s where
|
||
le_rawEndPos := by simp [Pos.Raw.le_iff]
|
||
isValidUTF8_extract_zero := by simp
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValid_rawEndPos {s : String} : s.rawEndPos.IsValid s where
|
||
le_rawEndPos := by simp
|
||
isValidUTF8_extract_zero := by simp [← size_bytes, s.isValidUTF8]
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.isValid_empty_iff {p : Pos.Raw} : p.IsValid "" ↔ p = 0 := by
|
||
refine ⟨?_, ?_⟩
|
||
· rintro ⟨h₁, h₂⟩
|
||
simp only [le_iff, byteIdx_rawEndPos, utf8ByteSize_empty, Nat.le_zero_eq] at h₁
|
||
ext
|
||
omega
|
||
· rintro rfl
|
||
simp
|
||
|
||
/--
|
||
A `ValidPos s` is a byte offset in `s` together with a proof that this position is at a UTF-8
|
||
character boundary.
|
||
-/
|
||
@[ext]
|
||
structure ValidPos (s : String) where
|
||
/-- The underlying byte offset of the `ValidPos`. -/
|
||
offset : Pos.Raw
|
||
/-- The proof that `offset` is valid for the string `s`. -/
|
||
isValid : offset.IsValid s
|
||
deriving @[expose] DecidableEq
|
||
|
||
/-- The start position of `s`, as an `s.ValidPos`. -/
|
||
@[inline, expose]
|
||
def startValidPos (s : String) : s.ValidPos where
|
||
offset := 0
|
||
isValid := by simp
|
||
|
||
@[simp]
|
||
theorem offset_startValidPos {s : String} : s.startValidPos.offset = 0 := (rfl)
|
||
|
||
instance {s : String} : Inhabited s.ValidPos where
|
||
default := s.startValidPos
|
||
|
||
/-- The past-the-end position of `s`, as an `s.ValidPos`. -/
|
||
@[inline, expose]
|
||
def endValidPos (s : String) : s.ValidPos where
|
||
offset := s.rawEndPos
|
||
isValid := by simp
|
||
|
||
@[simp]
|
||
theorem offset_endValidPos {s : String} : s.endValidPos.offset = s.rawEndPos := (rfl)
|
||
|
||
instance {s : String} : LE s.ValidPos where
|
||
le l r := l.offset ≤ r.offset
|
||
|
||
instance {s : String} : LT s.ValidPos where
|
||
lt l r := l.offset < r.offset
|
||
|
||
theorem ValidPos.le_iff {s : String} {l r : s.ValidPos} : l ≤ r ↔ l.offset ≤ r.offset :=
|
||
Iff.rfl
|
||
|
||
theorem ValidPos.lt_iff {s : String} {l r : s.ValidPos} : l < r ↔ l.offset < r.offset :=
|
||
Iff.rfl
|
||
|
||
instance {s : String} (l r : s.ValidPos) : Decidable (l ≤ r) :=
|
||
decidable_of_iff' _ ValidPos.le_iff
|
||
|
||
instance {s : String} (l r : s.ValidPos) : Decidable (l < r) :=
|
||
decidable_of_iff' _ ValidPos.lt_iff
|
||
|
||
/--
|
||
A region or slice of some underlying string.
|
||
|
||
A slice consists of a string together with the start and end byte positions of a region of
|
||
interest. Actually extracting a substring requires copying and memory allocation, while many
|
||
slices of the same underlying string may exist with very little overhead. While this could be achieved by tracking the bounds by hand, the slice API is much more convenient.
|
||
|
||
`String.Slice` bundles proofs to ensure that the start and end positions always delineate a valid
|
||
string. For this reason, it should be preferred over `Substring`.
|
||
-/
|
||
structure Slice where
|
||
/-- The underlying strings. -/
|
||
str : String
|
||
/-- The byte position of the start of the string slice. -/
|
||
startInclusive : str.ValidPos
|
||
/-- The byte position of the end of the string slice. -/
|
||
endExclusive : str.ValidPos
|
||
/-- The slice is not degenerate (but it may be empty). -/
|
||
startInclusive_le_endExclusive : startInclusive ≤ endExclusive
|
||
|
||
instance : Inhabited Slice where
|
||
default := ⟨"", "".startValidPos, "".startValidPos, by simp [ValidPos.le_iff]⟩
|
||
|
||
/--
|
||
Returns a slice that contains the entire string.
|
||
-/
|
||
@[inline, expose] -- expose for the defeq `s.toSlice.str = s`.
|
||
def toSlice (s : String) : Slice where
|
||
str := s
|
||
startInclusive := s.startValidPos
|
||
endExclusive := s.endValidPos
|
||
startInclusive_le_endExclusive := by simp [ValidPos.le_iff, Pos.Raw.le_iff]
|
||
|
||
/-- The number of bytes of the UTF-8 encoding of the string slice. -/
|
||
@[expose]
|
||
def Slice.utf8ByteSize (s : Slice) : Nat :=
|
||
s.startInclusive.offset.byteDistance s.endExclusive.offset
|
||
|
||
theorem Slice.utf8ByteSize_eq {s : Slice} :
|
||
s.utf8ByteSize = s.endExclusive.offset.byteIdx - s.startInclusive.offset.byteIdx := (rfl)
|
||
|
||
instance : HAdd Pos.Raw Slice Pos.Raw where
|
||
hAdd p s := { byteIdx := p.byteIdx + s.utf8ByteSize }
|
||
|
||
instance : HAdd Slice Pos.Raw Pos.Raw where
|
||
hAdd s p := { byteIdx := s.utf8ByteSize + p.byteIdx }
|
||
|
||
instance : HSub Pos.Raw Slice Pos.Raw where
|
||
hSub p s := { byteIdx := p.byteIdx - s.utf8ByteSize }
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_add_slide {p : Pos.Raw} {s : Slice} :
|
||
(p + s).byteIdx = p.byteIdx + s.utf8ByteSize := rfl
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_slice_add {s : Slice} {p : Pos.Raw} :
|
||
(s + p).byteIdx = s.utf8ByteSize + p.byteIdx := rfl
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.byteIdx_sub_slice {p : Pos.Raw} {s : Slice} :
|
||
(p - s).byteIdx = p.byteIdx - s.utf8ByteSize := rfl
|
||
|
||
/-- The end position of a slice, as a `Pos.Raw`. -/
|
||
@[expose]
|
||
def Slice.rawEndPos (s : Slice) : Pos.Raw where
|
||
byteIdx := s.utf8ByteSize
|
||
|
||
@[simp]
|
||
theorem Slice.byteIdx_rawEndPos {s : Slice} : s.rawEndPos.byteIdx = s.utf8ByteSize := (rfl)
|
||
|
||
/-- Criterion for validity of positions in string slices. -/
|
||
structure Pos.Raw.IsValidForSlice (s : Slice) (p : Pos.Raw) : Prop where
|
||
le_rawEndPos : p ≤ s.rawEndPos
|
||
isValid_offsetBy : (p.offsetBy s.startInclusive.offset).IsValid s.str
|
||
|
||
theorem Pos.Raw.IsValidForSlice.le_utf8ByteSize {s : Slice} {p : Pos.Raw}
|
||
(h : p.IsValidForSlice s) : p.byteIdx ≤ s.utf8ByteSize := by
|
||
simpa [Pos.Raw.le_iff] using h.le_rawEndPos
|
||
|
||
/--
|
||
Accesses the indicated byte in the UTF-8 encoding of a string slice.
|
||
|
||
At runtime, this function is implemented by efficient, constant-time code.
|
||
-/
|
||
@[inline, expose]
|
||
def Slice.getUTF8Byte (s : Slice) (p : Pos.Raw) (h : p < s.rawEndPos) : UInt8 :=
|
||
s.str.getUTF8Byte (p.offsetBy s.startInclusive.offset) (by
|
||
have := s.endExclusive.isValid.le_rawEndPos
|
||
simp only [Pos.Raw.lt_iff, byteIdx_rawEndPos, utf8ByteSize_eq, Pos.Raw.le_iff, byteIdx_rawEndPos,
|
||
Pos.Raw.byteIdx_offsetBy] at *
|
||
omega)
|
||
|
||
/--
|
||
Accesses the indicated byte in the UTF-8 encoding of the string slice, or panics if the position
|
||
is out-of-bounds.
|
||
-/
|
||
@[expose]
|
||
def Slice.getUTF8Byte! (s : Slice) (p : String.Pos.Raw) : UInt8 :=
|
||
if h : p < s.rawEndPos then
|
||
s.getUTF8Byte p h
|
||
else
|
||
panic! "String slice access is out of bounds."
|
||
|
||
/--
|
||
A `Slice.Pos s` is a byte offset in `s` together with a proof that this position is at a UTF-8
|
||
character boundary.
|
||
-/
|
||
@[ext]
|
||
structure Slice.Pos (s : Slice) where
|
||
/-- The underlying byte offset of the `Slice.Pos`. -/
|
||
offset : String.Pos.Raw
|
||
/-- The proof that `offset` is valid for the string slice `s`. -/
|
||
isValidForSlice : offset.IsValidForSlice s
|
||
deriving @[expose] DecidableEq
|
||
|
||
/-- The start position of `s`, as an `s.Pos`. -/
|
||
@[inline, expose]
|
||
def Slice.startPos (s : Slice) : s.Pos where
|
||
offset := 0
|
||
isValidForSlice := ⟨by simp [Pos.Raw.le_iff], by simpa using s.startInclusive.isValid⟩
|
||
|
||
@[simp]
|
||
theorem Slice.offset_startPos {s : Slice} : s.startPos.offset = 0 := (rfl)
|
||
|
||
instance {s : Slice} : Inhabited s.Pos where
|
||
default := s.startPos
|
||
|
||
@[simp]
|
||
theorem Slice.offset_startInclusive_add_self {s : Slice} :
|
||
s.startInclusive.offset + s = s.endExclusive.offset := by
|
||
have := s.startInclusive_le_endExclusive
|
||
simp_all [String.Pos.Raw.ext_iff, ValidPos.le_iff, Pos.Raw.le_iff, utf8ByteSize_eq]
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.offsetBy_rawEndPos_left {p : Pos.Raw} {s : String} :
|
||
s.rawEndPos.offsetBy p = p + s := by
|
||
simp [Pos.Raw.ext_iff]
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.offsetBy_rawEndPos_right {p : Pos.Raw} {s : String} :
|
||
p.offsetBy s.rawEndPos = s + p := by
|
||
simp [Pos.Raw.ext_iff]
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.offsetBy_sliceRawEndPos_left {p : Pos.Raw} {s : Slice} :
|
||
s.rawEndPos.offsetBy p = p + s := by
|
||
simp [Pos.Raw.ext_iff]
|
||
|
||
@[simp]
|
||
theorem Pos.Raw.offsetBy_sliceRawEndPos_right {p : Pos.Raw} {s : Slice} :
|
||
p.offsetBy s.rawEndPos = s + p := by
|
||
simp [Pos.Raw.ext_iff]
|
||
|
||
/-- The past-the-end position of `s`, as an `s.Pos`. -/
|
||
@[inline, expose]
|
||
def Slice.endPos (s : Slice) : s.Pos where
|
||
offset := s.rawEndPos
|
||
isValidForSlice := ⟨by simp, by simpa using s.endExclusive.isValid⟩
|
||
|
||
@[simp]
|
||
theorem Slice.offset_endPos {s : Slice} : s.endPos.offset = s.rawEndPos := (rfl)
|
||
|
||
instance {s : Slice} : LE s.Pos where
|
||
le l r := l.offset ≤ r.offset
|
||
|
||
instance {s : Slice} : LT s.Pos where
|
||
lt l r := l.offset < r.offset
|
||
|
||
theorem Slice.Pos.le_iff {s : Slice} {l r : s.Pos} : l ≤ r ↔ l.offset ≤ r.offset :=
|
||
Iff.rfl
|
||
|
||
theorem Slice.Pos.lt_iff {s : Slice} {l r : s.Pos} : l < r ↔ l.offset < r.offset :=
|
||
Iff.rfl
|
||
|
||
instance {s : Slice} (l r : s.Pos) : Decidable (l ≤ r) :=
|
||
decidable_of_iff' _ Slice.Pos.le_iff
|
||
|
||
instance {s : Slice} (l r : s.Pos) : Decidable (l < r) :=
|
||
decidable_of_iff' _ Slice.Pos.lt_iff
|
||
|
||
/-- Returns the byte at a position in a slice that is not the end position. -/
|
||
@[inline, expose]
|
||
def Slice.Pos.byte {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : UInt8 :=
|
||
s.getUTF8Byte pos.offset (by
|
||
have := pos.isValidForSlice.le_rawEndPos
|
||
simp_all [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff, Pos.Raw.lt_iff]
|
||
omega)
|
||
|
||
@[simp] theorem default_eq : default = "" := rfl
|
||
|
||
@[simp]
|
||
theorem mk_eq_asString (s : List Char) : String.mk s = List.asString s := rfl
|
||
|
||
theorem push_eq_append (c : Char) : String.push s c = s ++ singleton c := by
|
||
simp
|
||
|
||
end String
|