feat: redefine String, part one (#10304)

This PR redefines `String` to be the type of byte arrays `b` for which
`b.IsValidUtf8`.

This moves the data model of strings much closer to the actual data
representation at runtime.

In the near future, we will

- provide variants of `String.Pos` and `Substring` that only allow for
valid positions
- redefine all `String` functions to be much closer to their C++
implementations

In the near-to-medium future we will then provide comprehensive
verification of `String` based on these refactors.
This commit is contained in:
Markus Himmel 2025-09-18 13:36:52 +02:00 committed by GitHub
parent 02ca710872
commit 197bc6cb66
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
29 changed files with 2478 additions and 486 deletions

View file

@ -1580,6 +1580,7 @@ instance {p q : Prop} [d : Decidable (p ↔ q)] : Decidable (p = q) :=
gen_injective_theorems% Array
gen_injective_theorems% BitVec
gen_injective_theorems% ByteArray
gen_injective_theorems% Char
gen_injective_theorems% DoResultBC
gen_injective_theorems% DoResultPR

View file

@ -1100,6 +1100,10 @@ theorem toInt_setWidth' {m n : Nat} (p : m ≤ n) {x : BitVec m} :
rw [setWidth'_eq, toFin_setWidth, Fin.val_ofNat, Fin.coe_castLE, val_toFin,
Nat.mod_eq_of_lt (by apply BitVec.toNat_lt_twoPow_of_le p)]
theorem toNat_setWidth_of_le {w w' : Nat} {b : BitVec w} (h : w ≤ w') : (b.setWidth w').toNat = b.toNat := by
rw [BitVec.toNat_setWidth, Nat.mod_eq_of_lt]
exact BitVec.toNat_lt_twoPow_of_le h
/-! ## extractLsb -/
@[simp, grind =]
@ -1287,6 +1291,17 @@ theorem extractLsb'_eq_zero {x : BitVec w} {start : Nat} :
ext i hi
omega
theorem extractLsb'_setWidth_of_le {b : BitVec w} {start len w' : Nat} (h : start + len ≤ w') :
(b.setWidth w').extractLsb' start len = b.extractLsb' start len := by
ext i h_i
simp
omega
theorem setWidth_extractLsb'_of_le {c : BitVec w} (h : len₁ ≤ len₂) :
(c.extractLsb' start len₂).setWidth len₁ = c.extractLsb' start len₁ := by
ext i hi
simp [show i < len₂ by omega]
/-! ### allOnes -/
@[simp, grind =] theorem toNat_allOnes : (allOnes v).toNat = 2^v - 1 := by
@ -1530,6 +1545,12 @@ theorem extractLsb_and {x : BitVec w} {hi lo : Nat} :
@[simp, grind =] theorem ofNat_and {x y : Nat} : BitVec.ofNat w (x &&& y) = BitVec.ofNat w x &&& BitVec.ofNat w y :=
eq_of_toNat_eq (by simp [Nat.and_mod_two_pow])
theorem and_or_distrib_left {x y z : BitVec w} : x &&& (y ||| z) = (x &&& y) ||| (x &&& z) :=
BitVec.eq_of_getElem_eq (by simp [Bool.and_or_distrib_left])
theorem and_or_distrib_right {x y z : BitVec w} : (x ||| y) &&& z = (x &&& z) ||| (y &&& z) :=
BitVec.eq_of_getElem_eq (by simp [Bool.and_or_distrib_right])
/-! ### xor -/
@[simp, grind =] theorem toNat_xor (x y : BitVec v) :
@ -2180,6 +2201,10 @@ theorem msb_ushiftRight {x : BitVec w} {n : Nat} :
have := lt_of_getLsbD ha
omega
theorem setWidth_ushiftRight_eq_extractLsb {b : BitVec w} : (b >>> w').setWidth w'' = b.extractLsb' w' w'' := by
ext i hi
simp
/-! ### ushiftRight reductions from BitVec to Nat -/
@[simp, grind =]
@ -2970,10 +2995,9 @@ theorem shiftLeft_eq_concat_of_lt {x : BitVec w} {n : Nat} (hn : n < w) :
/-- Combine adjacent `extractLsb'` operations into a single `extractLsb'`. -/
theorem extractLsb'_append_extractLsb'_eq_extractLsb' {x : BitVec w} (h : start₂ = start₁ + len₁) :
((x.extractLsb' start₂ len₂) ++ (x.extractLsb' start₁ len₁)) =
(x.extractLsb' start₁ (len₁ + len₂)).cast (by omega) := by
x.extractLsb' start₁ (len₂ + len₁) := by
ext i h
simp only [getElem_append, getElem_extractLsb', dite_eq_ite, getElem_cast, ite_eq_left_iff,
Nat.not_lt]
simp only [getElem_append, getElem_extractLsb', dite_eq_ite, ite_eq_left_iff, Nat.not_lt]
intro hi
congr 1
omega
@ -3085,6 +3109,51 @@ theorem extractLsb'_append_eq_of_le {v w} {xhi : BitVec v} {xlo : BitVec w}
extractLsb' start len (xhi ++ xlo) = extractLsb' (start - w) len xhi := by
simp [extractLsb'_append_eq_ite, show ¬ start < w by omega]
theorem extractLsb'_append_eq_left {a : BitVec w} {b : BitVec w'} : (a ++ b).extractLsb' w' w = a := by
simp [BitVec.extractLsb'_append_eq_of_le]
theorem extractLsb'_append_eq_right {a : BitVec w} {b : BitVec w'} : (a ++ b).extractLsb' 0 w' = b := by
simp [BitVec.extractLsb'_append_eq_of_add_le]
theorem setWidth_append_eq_right {a : BitVec w} {b : BitVec w'} : (a ++ b).setWidth w' = b := by
ext i hi
simp [getLsbD_append, hi]
theorem append_left_inj {s₁ s₂ : BitVec w} (t : BitVec w') : s₁ ++ t = s₂ ++ t ↔ s₁ = s₂ := by
refine ⟨fun h => ?_, fun h => h ▸ rfl⟩
ext i hi
simpa [getElem_append, dif_neg] using congrArg (·[i + w']'(by omega)) h
theorem append_right_inj (s : BitVec w) {t₁ t₂ : BitVec w'} : s ++ t₁ = s ++ t₂ ↔ t₁ = t₂ := by
refine ⟨fun h => ?_, fun h => h ▸ rfl⟩
ext i hi
simpa [getElem_append, hi] using congrArg (·[i]) h
theorem setWidth_append_eq_shiftLeft_setWidth_or {b : BitVec w} {b' : BitVec w'} :
(b ++ b').setWidth w'' = (b.setWidth w'' <<< w') ||| b'.setWidth w'' := by
ext i hi
simp only [getElem_setWidth, getElem_or, getElem_shiftLeft]
rw [getLsbD_append]
split <;> simp_all
theorem setWidth_append_append_eq_shiftLeft_setWidth_or {b : BitVec w} {b' : BitVec w'} {b'' : BitVec w''} :
(b ++ b' ++ b'').setWidth w''' = (b.setWidth w''' <<< (w' + w'')) ||| (b'.setWidth w''' <<< w'') ||| b''.setWidth w''' := by
rw [BitVec.setWidth_append_eq_shiftLeft_setWidth_or,
BitVec.setWidth_append_eq_shiftLeft_setWidth_or,
BitVec.shiftLeft_or_distrib, BitVec.shiftLeft_add]
theorem setWidth_append_append_append_eq_shiftLeft_setWidth_or {b : BitVec w} {b' : BitVec w'} {b'' : BitVec w''} {b''' : BitVec w'''} :
(b ++ b' ++ b'' ++ b''').setWidth w'''' = (b.setWidth w'''' <<< (w' + w'' + w''')) ||| (b'.setWidth w'''' <<< (w'' + w''')) |||
(b''.setWidth w'''' <<< w''') ||| b'''.setWidth w'''' := by
simp only [BitVec.setWidth_append_eq_shiftLeft_setWidth_or, BitVec.shiftLeft_or_distrib, BitVec.shiftLeft_add]
theorem and_setWidth_allOnes (w' w : Nat) (b : BitVec (w' + w)) :
b &&& (BitVec.allOnes w).setWidth (w' + w) = 0#w' ++ b.setWidth w := by
ext i hi
simp only [getElem_and, getElem_setWidth, getLsbD_allOnes]
rw [BitVec.getElem_append]
split <;> simp_all
/-! ### rev -/
@[grind =]
@ -4041,6 +4110,9 @@ instance instLawfulOrderLT : LawfulOrderLT (BitVec n) := by
apply LawfulOrderLT.of_le
simpa using fun _ _ => BitVec.lt_asymm
theorem length_pos_of_lt {b b' : BitVec w} (h : b < b') : 0 < w :=
length_pos_of_ne (BitVec.ne_of_lt h)
protected theorem umod_lt (x : BitVec n) {y : BitVec n} : 0 < y → x % y < y := by
simp only [ofNat_eq_ofNat, lt_def, toNat_ofNat, Nat.zero_mod]
apply Nat.mod_lt
@ -4112,6 +4184,14 @@ theorem lt_of_msb_false_of_msb_true {x y : BitVec w} (hx : x.msb = false) (hy :
simp
omega
theorem lt_add_one {b : BitVec w} (h : b ≠ allOnes w) : b < b + 1 := by
simp only [ne_eq, ← toNat_inj, toNat_allOnes] at h
simp only [BitVec.lt_def, ofNat_eq_ofNat, toNat_add, toNat_ofNat, Nat.add_mod_mod]
rw [Nat.mod_eq_of_lt]
· exact Nat.lt_add_one _
· have := b.toNat_lt_twoPow_of_le (Nat.le_refl _)
omega
/-! ### udiv -/
theorem udiv_def {x y : BitVec n} : x / y = BitVec.ofNat n (x.toNat / y.toNat) := by
@ -5257,7 +5337,7 @@ theorem replicate_succ' {x : BitVec w} :
(replicate n x ++ x).cast (by rw [Nat.mul_succ]) := by
simp [replicate_append_self]
theorem BitVec.setWidth_add_eq_mod {x y : BitVec w} : BitVec.setWidth i (x + y) = (BitVec.setWidth i x + BitVec.setWidth i y) % (BitVec.twoPow i w) := by
theorem setWidth_add_eq_mod {x y : BitVec w} : BitVec.setWidth i (x + y) = (BitVec.setWidth i x + BitVec.setWidth i y) % (BitVec.twoPow i w) := by
apply BitVec.eq_of_toNat_eq
rw [toNat_setWidth]
simp only [toNat_setWidth, toNat_add, toNat_umod, Nat.add_mod_mod, Nat.mod_add_mod, toNat_twoPow]
@ -5266,6 +5346,14 @@ theorem BitVec.setWidth_add_eq_mod {x y : BitVec w} : BitVec.setWidth i (x + y)
· have hk : 2 ^ w < 2 ^ i := Nat.pow_lt_pow_of_lt (by decide) (Nat.lt_of_not_le h)
rw [Nat.mod_eq_of_lt hk, Nat.mod_mod_eq_mod_mod_of_dvd (Nat.pow_dvd_pow _ (Nat.le_of_not_le h))]
theorem setWidth_setWidth_eq_self {a : BitVec w} {w' : Nat} (h : a < BitVec.twoPow w w') : (a.setWidth w').setWidth w = a := by
by_cases hw : w' < w
· simp only [toNat_eq, toNat_setWidth]
rw [Nat.mod_mod_of_dvd' (Nat.pow_dvd_pow _ (Nat.le_of_lt hw)), Nat.mod_eq_of_lt]
rwa [BitVec.lt_def, BitVec.toNat_twoPow_of_lt hw] at h
· rw [BitVec.lt_def, BitVec.toNat_twoPow_of_le (by omega)] at h
simp at h
/-! ### intMin -/
@[grind =]
@ -5928,6 +6016,12 @@ theorem getElem_eq_true_of_lt_of_le {x : BitVec w} (hk' : k < w) (hlt: x.toNat <
omega
· simp [show w ≤ k + k' by omega] at hk'
theorem not_lt_iff {b : BitVec w} : ~~~b < b ↔ 0 < w ∧ b.msb = true := by
refine ⟨fun h => ?_, fun ⟨hw, hb⟩ => ?_⟩
· have := length_pos_of_lt h
exact ⟨this, by rwa [← ult_iff_lt, ult_eq_msb_of_msb_neq (by simp_all)] at h⟩
· rwa [← ult_iff_lt, ult_eq_msb_of_msb_neq (by simp_all)]
/-! ### Count leading zeros -/
theorem clzAuxRec_zero (x : BitVec w) :

View file

@ -7,6 +7,8 @@ module
prelude
public import Init.Data.ByteArray.Basic
public import Init.Data.ByteArray.Bootstrap
public import Init.Data.ByteArray.Extra
public import Init.Data.ByteArray.Lemmas
public section

View file

@ -11,16 +11,11 @@ public import Init.Data.UInt.Basic
public import Init.Data.UInt.BasicAux
import all Init.Data.UInt.BasicAux
public import Init.Data.Option.Basic
public import Init.Data.Array.Extract
@[expose] public section
universe u
structure ByteArray where
data : Array UInt8
attribute [extern "lean_byte_array_mk"] ByteArray.mk
attribute [extern "lean_byte_array_data"] ByteArray.data
namespace ByteArray
deriving instance BEq for ByteArray
@ -30,29 +25,15 @@ attribute [ext] ByteArray
instance : DecidableEq ByteArray :=
fun _ _ => decidable_of_decidable_of_iff ByteArray.ext_iff.symm
@[extern "lean_mk_empty_byte_array"]
def emptyWithCapacity (c : @& Nat) : ByteArray :=
{ data := #[] }
@[deprecated emptyWithCapacity (since := "2025-03-12")]
abbrev mkEmpty := emptyWithCapacity
def empty : ByteArray := emptyWithCapacity 0
instance : Inhabited ByteArray where
default := empty
instance : EmptyCollection ByteArray where
emptyCollection := ByteArray.empty
@[extern "lean_byte_array_push"]
def push : ByteArray → UInt8 → ByteArray
| ⟨bs⟩, b => ⟨bs.push b⟩
@[extern "lean_byte_array_size"]
def size : (@& ByteArray) → Nat
| ⟨bs⟩ => bs.size
@[extern "lean_sarray_size", simp]
def usize (a : @& ByteArray) : USize :=
a.size.toUSize
@ -106,11 +87,31 @@ def copySlice (src : @& ByteArray) (srcOff : Nat) (dest : ByteArray) (destOff le
def extract (a : ByteArray) (b e : Nat) : ByteArray :=
a.copySlice b empty 0 (e - b)
protected def append (a : ByteArray) (b : ByteArray) : ByteArray :=
protected def fastAppend (a : ByteArray) (b : ByteArray) : ByteArray :=
-- we assume that `append`s may be repeated, so use asymptotic growing; use `copySlice` directly to customize
b.copySlice 0 a a.size b.size false
instance : Append ByteArray := ⟨ByteArray.append⟩
@[simp]
theorem size_data {a : ByteArray} :
a.data.size = a.size := rfl
@[csimp]
theorem append_eq_fastAppend : @ByteArray.append = @ByteArray.fastAppend := by
funext a b
ext1
apply Array.ext'
simp [ByteArray.fastAppend, copySlice, ← size_data, - Array.append_assoc]
-- Needs to come after the `csimp` lemma
instance : Append ByteArray where
append := ByteArray.append
@[simp]
theorem append_eq {a b : ByteArray} : a.append b = a ++ b := rfl
@[simp]
theorem fastAppend_eq {a b : ByteArray} : a.fastAppend b = a ++ b := by
simp [← append_eq_fastAppend]
def toList (bs : ByteArray) : List UInt8 :=
let rec loop (i : Nat) (r : List UInt8) :=
@ -350,13 +351,4 @@ def prevn : Iterator → Nat → Iterator
end Iterator
end ByteArray
/--
Converts a list of bytes into a `ByteArray`.
-/
def List.toByteArray (bs : List UInt8) : ByteArray :=
let rec loop
| [], r => r
| b::bs, r => loop bs (r.push b)
loop bs ByteArray.empty
instance : ToString ByteArray := ⟨fun bs => bs.toList.toString⟩

View file

@ -0,0 +1,53 @@
/-
Copyright (c) 2025 Lean FRO, LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Markus Himmel
-/
module
prelude
public import Init.Prelude
public import Init.Data.List.Basic
public section
namespace ByteArray
@[simp]
theorem data_push {a : ByteArray} {b : UInt8} : (a.push b).data = a.data.push b := rfl
@[expose]
protected def append (a b : ByteArray) : ByteArray :=
⟨⟨a.data.toList ++ b.data.toList⟩⟩
@[simp]
theorem toList_data_append' {a b : ByteArray} :
(a.append b).data.toList = a.data.toList ++ b.data.toList := by
have ⟨⟨a⟩⟩ := a
have ⟨⟨b⟩⟩ := b
rfl
theorem ext : {x y : ByteArray} → x.data = y.data → x = y
| ⟨_⟩, ⟨_⟩, rfl => rfl
end ByteArray
@[simp]
theorem List.toList_data_toByteArray {l : List UInt8} :
l.toByteArray.data.toList = l := by
rw [List.toByteArray]
suffices ∀ a b, (List.toByteArray.loop a b).data.toList = b.data.toList ++ a by
simpa using this l ByteArray.empty
intro a b
fun_induction List.toByteArray.loop a b with simp_all [toList_push]
where
toList_push {xs : Array UInt8} {x : UInt8} : (xs.push x).toList = xs.toList ++ [x] := by
have ⟨xs⟩ := xs
simp [Array.push, List.concat_eq_append]
theorem List.toByteArray_append' {l l' : List UInt8} :
(l ++ l').toByteArray = l.toByteArray.append l'.toByteArray :=
ByteArray.ext (ext (by simp))
where
ext : {x y : Array UInt8} → x.toList = y.toList → x = y
| ⟨_⟩, ⟨_⟩, rfl => rfl

View file

@ -0,0 +1,252 @@
/-
Copyright (c) 2025 Lean FRO, LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Markus Himmel
-/
module
prelude
public import Init.Data.ByteArray.Basic
public import Init.Data.Array.Extract
public section
@[simp]
theorem ByteArray.data_empty : ByteArray.empty.data = #[] := rfl
@[simp]
theorem ByteArray.data_extract {a : ByteArray} {b e : Nat} :
(a.extract b e).data = a.data.extract b e := by
simp [extract, copySlice]
by_cases b ≤ e
· rw [(by omega : b + (e - b) = e)]
· rw [Array.extract_eq_empty_of_le (by omega), Array.extract_eq_empty_of_le (by omega)]
@[simp]
theorem ByteArray.extract_zero_size {b : ByteArray} : b.extract 0 b.size = b := by
ext1
simp
@[simp]
theorem ByteArray.extract_same {b : ByteArray} {i : Nat} : b.extract i i = ByteArray.empty := by
ext1
simp [Nat.min_le_left]
theorem ByteArray.fastAppend_eq_copySlice {a b : ByteArray} :
a.fastAppend b = b.copySlice 0 a a.size b.size false := rfl
@[simp]
theorem List.toByteArray_append {l l' : List UInt8} :
(l ++ l').toByteArray = l.toByteArray ++ l'.toByteArray := by
simp [List.toByteArray_append']
@[simp]
theorem ByteArray.toList_data_append {l l' : ByteArray} :
(l ++ l').data.toList = l.data.toList ++ l'.data.toList := by
simp [← append_eq]
@[simp]
theorem ByteArray.data_append {l l' : ByteArray} :
(l ++ l').data = l.data ++ l'.data := by
simp [← Array.toList_inj]
@[simp]
theorem ByteArray.size_empty : ByteArray.empty.size = 0 := by
simp [← ByteArray.size_data]
@[simp]
theorem List.data_toByteArray {l : List UInt8} :
l.toByteArray.data = l.toArray := by
rw [List.toByteArray]
suffices ∀ a b, (List.toByteArray.loop a b).data = b.data ++ a.toArray by
simpa using this l ByteArray.empty
intro a b
fun_induction List.toByteArray.loop a b with simp_all
@[simp]
theorem List.size_toByteArray {l : List UInt8} :
l.toByteArray.size = l.length := by
simp [← ByteArray.size_data]
@[simp]
theorem List.toByteArray_nil : List.toByteArray [] = ByteArray.empty := rfl
@[simp]
theorem ByteArray.empty_append {b : ByteArray} : ByteArray.empty ++ b = b := by
ext1
simp
@[simp]
theorem ByteArray.append_empty {b : ByteArray} : b ++ ByteArray.empty = b := by
ext1
simp
@[simp, grind]
theorem ByteArray.size_append {a b : ByteArray} : (a ++ b).size = a.size + b.size := by
simp [← size_data]
@[simp]
theorem ByteArray.size_eq_zero_iff {a : ByteArray} : a.size = 0 ↔ a = ByteArray.empty := by
refine ⟨fun h => ?_, fun h => h ▸ ByteArray.size_empty⟩
ext1
simp [← Array.size_eq_zero_iff, h]
theorem ByteArray.getElem_eq_getElem_data {a : ByteArray} {i : Nat} {h : i < a.size} :
a[i] = a.data[i]'(by simpa [← size_data]) := rfl
@[simp]
theorem ByteArray.getElem_append_left {i : Nat} {a b : ByteArray} {h : i < (a ++ b).size}
(hlt : i < a.size) : (a ++ b)[i] = a[i] := by
simp only [getElem_eq_getElem_data, data_append]
rw [Array.getElem_append_left (by simpa)]
theorem ByteArray.getElem_append_right {i : Nat} {a b : ByteArray} {h : i < (a ++ b).size}
(hle : a.size ≤ i) : (a ++ b)[i] = b[i - a.size]'(by simp_all; omega) := by
simp only [getElem_eq_getElem_data, data_append]
rw [Array.getElem_append_right (by simpa)]
simp
@[simp]
theorem List.getElem_toByteArray {l : List UInt8} {i : Nat} {h : i < l.toByteArray.size} :
l.toByteArray[i]'h = l[i]'(by simp_all) := by
simp [ByteArray.getElem_eq_getElem_data]
theorem List.getElem_eq_getElem_toByteArray {l : List UInt8} {i : Nat} {h : i < l.length} :
l[i]'h = l.toByteArray[i]'(by simp_all) := by
simp
@[simp]
theorem ByteArray.size_extract {a : ByteArray} {b e : Nat} :
(a.extract b e).size = min e a.size - b := by
simp [← size_data]
@[simp]
theorem ByteArray.extract_eq_empty_iff {b : ByteArray} {i j : Nat} : b.extract i j = ByteArray.empty ↔ min j b.size ≤ i := by
rw [← size_eq_zero_iff, size_extract]
omega
@[simp]
theorem ByteArray.extract_add_left {b : ByteArray} {i j : Nat} : b.extract (i + j) i = ByteArray.empty := by
simp only [extract_eq_empty_iff]
exact Nat.le_trans (Nat.min_le_left _ _) (by simp)
@[simp]
theorem ByteArray.append_eq_empty_iff {a b : ByteArray} :
a ++ b = ByteArray.empty ↔ a = ByteArray.empty ∧ b = ByteArray.empty := by
simp [← size_eq_zero_iff, size_append]
@[simp]
theorem List.toByteArray_eq_empty {l : List UInt8} :
l.toByteArray = ByteArray.empty ↔ l = [] := by
simp [← ByteArray.size_eq_zero_iff]
theorem ByteArray.append_right_inj {ys₁ ys₂ : ByteArray} (xs : ByteArray) :
xs ++ ys₁ = xs ++ ys₂ ↔ ys₁ = ys₂ := by
simp [ByteArray.ext_iff, Array.append_right_inj]
@[simp]
theorem ByteArray.extract_append_extract {a : ByteArray} {i j k : Nat} :
a.extract i j ++ a.extract j k = a.extract (min i j) (max j k) := by
ext1
simp
theorem ByteArray.extract_eq_extract_append_extract {a : ByteArray} {i k : Nat} (j : Nat)
(hi : i ≤ j) (hk : j ≤ k) :
a.extract i k = a.extract i j ++ a.extract j k := by
simp
rw [Nat.min_eq_left hi, Nat.max_eq_right hk]
theorem ByteArray.append_inj_left {xs₁ xs₂ ys₁ ys₂ : ByteArray} (h : xs₁ ++ ys₁ = xs₂ ++ ys₂) (hl : xs₁.size = xs₂.size) : xs₁ = xs₂ := by
simp only [ByteArray.ext_iff, ← ByteArray.size_data, ByteArray.data_append] at *
exact Array.append_inj_left h hl
theorem ByteArray.extract_append_eq_right {a b : ByteArray} {i : Nat} (hi : i = a.size) :
(a ++ b).extract i (a ++ b).size = b := by
subst hi
ext1
simp [← size_data]
theorem ByteArray.extract_append_eq_left {a b : ByteArray} {i : Nat} (hi : i = a.size) :
(a ++ b).extract 0 i = a := by
subst hi
ext1
simp
theorem ByteArray.extract_append_size_left {a b : ByteArray} {i : Nat} :
(a ++ b).extract i a.size = a.extract i a.size := by
ext1
simp
theorem ByteArray.extract_append_size_add {a b : ByteArray} {i j : Nat} :
(a ++ b).extract (a.size + i) (a.size + j) = b.extract i j := by
ext1
simp
theorem ByteArray.extract_append {as bs : ByteArray} {i j : Nat} :
(as ++ bs).extract i j = as.extract i j ++ bs.extract (i - as.size) (j - as.size) := by
ext1
simp
theorem ByteArray.extract_append_size_add' {a b : ByteArray} {i j k : Nat} (h : k = a.size) :
(a ++ b).extract (k + i) (k + j) = b.extract i j := by
cases h
rw [extract_append_size_add]
theorem ByteArray.extract_extract {a : ByteArray} {i j k l : Nat} :
(a.extract i j).extract k l = a.extract (i + k) (min (i + l) j) := by
ext1
simp
theorem ByteArray.getElem_extract_aux {xs : ByteArray} {start stop : Nat} (h : i < (xs.extract start stop).size) :
start + i < xs.size := by
rw [size_extract] at h; apply Nat.add_lt_of_lt_sub'; apply Nat.lt_of_lt_of_le h
apply Nat.sub_le_sub_right; apply Nat.min_le_right
theorem ByteArray.getElem_extract {i : Nat} {b : ByteArray} {start stop : Nat}
(h) : (b.extract start stop)[i]'h = b[start + i]'(getElem_extract_aux h) := by
simp [getElem_eq_getElem_data]
theorem ByteArray.extract_eq_extract_left {a : ByteArray} {i i' j : Nat} :
a.extract i j = a.extract i' j ↔ min j a.size - i = min j a.size - i' := by
simp [ByteArray.ext_iff, Array.extract_eq_extract_left]
theorem ByteArray.extract_add_one {a : ByteArray} {i : Nat} (ha : i + 1 ≤ a.size) :
a.extract i (i + 1) = [a[i]].toByteArray := by
ext
· simp
omega
· rename_i j hj hj'
obtain rfl : j = 0 := by simpa using hj'
simp [ByteArray.getElem_eq_getElem_data]
theorem ByteArray.extract_add_two {a : ByteArray} {i : Nat} (ha : i + 2 ≤ a.size) :
a.extract i (i + 2) = [a[i], a[i + 1]].toByteArray := by
rw [extract_eq_extract_append_extract (i + 1) (by simp) (by omega),
extract_add_one (by omega), extract_add_one (by omega)]
simp [← List.toByteArray_append]
theorem ByteArray.extract_add_three {a : ByteArray} {i : Nat} (ha : i + 3 ≤ a.size) :
a.extract i (i + 3) = [a[i], a[i + 1], a[i + 2]].toByteArray := by
rw [extract_eq_extract_append_extract (i + 1) (by simp) (by omega),
extract_add_one (by omega), extract_add_two (by omega)]
simp [← List.toByteArray_append]
theorem ByteArray.extract_add_four {a : ByteArray} {i : Nat} (ha : i + 4 ≤ a.size) :
a.extract i (i + 4) = [a[i], a[i + 1], a[i + 2], a[i + 3]].toByteArray := by
rw [extract_eq_extract_append_extract (i + 1) (by simp) (by omega),
extract_add_one (by omega), extract_add_three (by omega)]
simp [← List.toByteArray_append]
theorem ByteArray.append_assoc {a b c : ByteArray} : a ++ b ++ c = a ++ (b ++ c) := by
ext1
simp
@[simp]
theorem ByteArray.toList_empty : ByteArray.empty.toList = [] := by
simp [ByteArray.toList, ByteArray.toList.loop]
theorem ByteArray.copySlice_eq_append {src : ByteArray} {srcOff : Nat} {dest : ByteArray} {destOff len : Nat} {exact : Bool} :
ByteArray.copySlice src srcOff dest destOff len exact =
dest.extract 0 destOff ++ src.extract srcOff (srcOff +len) ++ dest.extract (destOff + min len (src.data.size - srcOff)) dest.data.size := by
ext1
simp [copySlice]

View file

@ -475,21 +475,6 @@ We define the basic functional programming operations on `List`:
/-! ### map -/
/--
Applies a function to each element of the list, returning the resulting list of values.
`O(|l|)`.
Examples:
* `[a, b, c].map f = [f a, f b, f c]`
* `[].map Nat.succ = []`
* `["one", "two", "three"].map (·.length) = [3, 3, 5]`
* `["one", "two", "three"].map (·.reverse) = ["eno", "owt", "eerht"]`
-/
@[specialize] def map (f : α → β) : (l : List α) → List β
| [] => []
| a::as => f a :: map f as
@[simp, grind =] theorem map_nil {f : α → β} : map f [] = [] := rfl
@[simp, grind =] theorem map_cons {f : α → β} {a : α} {l : List α} : map f (a :: l) = f a :: map f l := rfl
@ -606,20 +591,6 @@ Appends two lists. Normally used via the `++` operator.
Appending lists takes time proportional to the length of the first list: `O(|xs|)`.
Examples:
* `[1, 2, 3] ++ [4, 5] = [1, 2, 3, 4, 5]`.
* `[] ++ [4, 5] = [4, 5]`.
* `[1, 2, 3] ++ [] = [1, 2, 3]`.
-/
protected def append : (xs ys : List α) → List α
| [], bs => bs
| a::as, bs => a :: List.append as bs
/--
Appends two lists. Normally used via the `++` operator.
Appending lists takes time proportional to the length of the first list: `O(|xs|)`.
This is a tail-recursive version of `List.append`.
Examples:
@ -691,18 +662,6 @@ theorem reverseAux_eq_append {as bs : List α} : reverseAux as bs = reverseAux a
/-! ### flatten -/
/--
Concatenates a list of lists into a single list, preserving the order of the elements.
`O(|flatten L|)`.
Examples:
* `[["a"], ["b", "c"]].flatten = ["a", "b", "c"]`
* `[["a"], [], ["b", "c"], ["d", "e", "f"]].flatten = ["a", "b", "c", "d", "e", "f"]`
-/
def flatten : List (List α) → List α
| [] => []
| l :: L => l ++ flatten L
@[simp, grind =] theorem flatten_nil : List.flatten ([] : List (List α)) = [] := rfl
@[simp, grind =] theorem flatten_cons : (l :: L).flatten = l ++ L.flatten := rfl
@ -721,20 +680,14 @@ Examples:
/-! ### flatMap -/
/--
Applies a function that returns a list to each element of a list, and concatenates the resulting
lists.
Examples:
* `[2, 3, 2].flatMap List.range = [0, 1, 0, 1, 2, 0, 1]`
* `["red", "blue"].flatMap String.toList = ['r', 'e', 'd', 'b', 'l', 'u', 'e']`
-/
@[inline] def flatMap {α : Type u} {β : Type v} (b : α → List β) (as : List α) : List β := flatten (map b as)
@[simp, grind =] theorem flatMap_nil {f : α → List β} : List.flatMap f [] = [] := by simp [List.flatMap]
@[simp, grind =] theorem flatMap_cons {x : α} {xs : List α} {f : α → List β} :
List.flatMap f (x :: xs) = f x ++ List.flatMap f xs := by simp [List.flatMap]
@[simp, grind _=_] theorem flatMap_append {xs ys : List α} {f : α → List β} :
(xs ++ ys).flatMap f = xs.flatMap f ++ ys.flatMap f := by
induction xs; {rfl}; simp_all [flatMap_cons, append_assoc]
/-! ### replicate -/
/--

View file

@ -348,6 +348,18 @@ theorem ext_getElem {l₁ l₂ : List α} (hl : length l₁ = length l₂)
theorem getElem?_concat_length {l : List α} {a : α} : (l ++ [a])[l.length]? = some a := by
simp
theorem eq_getElem_of_length_eq_one : (l : List α) → (hl : l.length = 1) → l = [l[0]'(hl ▸ by decide)]
| [_], _ => rfl
theorem eq_getElem_of_length_eq_two : (l : List α) → (hl : l.length = 2) → l = [l[0]'(hl ▸ by decide), l[1]'(hl ▸ by decide)]
| [_, _], _ => rfl
theorem eq_getElem_of_length_eq_three : (l : List α) → (hl : l.length = 3) → l = [l[0]'(hl ▸ by decide), l[1]'(hl ▸ by decide), l[2]'(hl ▸ by decide)]
| [_, _, _], _ => rfl
theorem eq_getElem_of_length_eq_four : (l : List α) → (hl : l.length = 4) → l = [l[0]'(hl ▸ by decide), l[1]'(hl ▸ by decide), l[2]'(hl ▸ by decide), l[3]'(hl ▸ by decide)]
| [_, _, _, _], _ => rfl
/-! ### getD
We simplify away `getD`, replacing `getD l n a` with `(l[n]?).getD a`.
@ -2136,10 +2148,6 @@ theorem flatMap_singleton (f : α → List β) (x : α) : [x].flatMap f = f x :=
simp only [findSome?_cons]
split <;> simp_all
@[simp, grind _=_] theorem flatMap_append {xs ys : List α} {f : α → List β} :
(xs ++ ys).flatMap f = xs.flatMap f ++ ys.flatMap f := by
induction xs; {rfl}; simp_all [flatMap_cons, append_assoc]
theorem flatMap_assoc {l : List α} {f : α → List β} {g : β → List γ} :
(l.flatMap f).flatMap g = l.flatMap fun x => (f x).flatMap g := by
induction l <;> simp [*]

View file

@ -171,7 +171,7 @@ theorem pairwise_append_comm {R : αα → Prop} (s : ∀ {x y}, R x y →
induction L with
| nil => simp
| cons l L IH =>
simp only [flatten, pairwise_append, IH, mem_flatten, exists_imp, and_imp, forall_mem_cons,
simp only [flatten_cons, pairwise_append, IH, mem_flatten, exists_imp, and_imp, forall_mem_cons,
pairwise_cons, and_assoc, and_congr_right_iff]
rw [and_comm, and_congr_left_iff]
intros; exact ⟨fun h l' b c d e => h c d e l' b, fun h c d e l' b => h l' b c d e⟩

View file

@ -8,6 +8,7 @@ module
prelude
public import Init.Data.String.Basic
public import Init.Data.String.Bootstrap
public import Init.Data.String.Decode
public import Init.Data.String.Extra
public import Init.Data.String.Lemmas
public import Init.Data.String.Repr

View file

@ -9,11 +9,370 @@ prelude
public import Init.Data.List.Basic
public import Init.Data.Char.Basic
public import Init.Data.String.Bootstrap
public import Init.Data.ByteArray.Basic
public import Init.Data.String.Decode
import Init.Data.ByteArray.Lemmas
public section
universe u
section
@[simp]
theorem List.utf8Encode_nil : List.utf8Encode [] = ByteArray.empty := by simp [utf8Encode]
theorem List.utf8Encode_singleton {c : Char} : [c].utf8Encode = (String.utf8EncodeChar c).toByteArray := by
simp [utf8Encode]
@[simp]
theorem List.utf8Encode_append {l l' : List Char} :
(l ++ l').utf8Encode = l.utf8Encode ++ l'.utf8Encode := by
simp [utf8Encode]
theorem List.utf8Encode_cons {c : Char} {l : List Char} : (c :: l).utf8Encode = [c].utf8Encode ++ l.utf8Encode := by
rw [← singleton_append, List.utf8Encode_append]
@[simp]
theorem String.utf8EncodeChar_ne_nil {c : Char} : String.utf8EncodeChar c ≠ [] := by
fun_cases String.utf8EncodeChar with simp
@[simp]
theorem List.utf8Encode_eq_empty {l : List Char} : l.utf8Encode = ByteArray.empty ↔ l = [] := by
simp [utf8Encode, ← List.eq_nil_iff_forall_not_mem]
theorem ByteArray.isValidUtf8_utf8Encode {l : List Char} : IsValidUtf8 l.utf8Encode :=
.intro l rfl
@[simp]
theorem ByteArray.isValidUtf8_empty : IsValidUtf8 ByteArray.empty :=
.intro [] (by simp)
theorem Char.isValidUtf8_toByteArray_utf8EncodeChar {c : Char} :
ByteArray.IsValidUtf8 (String.utf8EncodeChar c).toByteArray :=
.intro [c] (by simp [List.utf8Encode_singleton])
theorem ByteArray.IsValidUtf8.append {b b' : ByteArray} (h : IsValidUtf8 b) (h' : IsValidUtf8 b') :
IsValidUtf8 (b ++ b') := by
rcases h with ⟨m, rfl⟩
rcases h' with ⟨m', rfl⟩
exact .intro (m ++ m') (by simp)
theorem ByteArray.isValidUtf8_utf8Encode_singleton_append_iff {b : ByteArray} {c : Char} :
IsValidUtf8 ([c].utf8Encode ++ b) ↔ IsValidUtf8 b := by
refine ⟨?_, fun h => IsValidUtf8.append isValidUtf8_utf8Encode h⟩
rintro ⟨l, hl⟩
match l with
| [] => simp at hl
| d::l =>
obtain rfl : c = d := by
replace hl := congrArg (fun l => utf8DecodeChar? l 0) hl
simpa [List.utf8DecodeChar?_utf8Encode_singleton_append,
List.utf8DecodeChar?_utf8Encode_cons] using hl
rw [← List.singleton_append (l := l), List.utf8Encode_append,
ByteArray.append_right_inj] at hl
exact hl ▸ isValidUtf8_utf8Encode
@[expose]
def ByteArray.utf8Decode? (b : ByteArray) : Option (Array Char) :=
go (b.size + 1) 0 #[] (by simp) (by simp)
where
go (fuel : Nat) (i : Nat) (acc : Array Char) (hi : i ≤ b.size) (hf : b.size - i < fuel) : Option (Array Char) :=
match fuel, hf with
| fuel + 1, _ =>
if i = b.size then
some acc
else
match h : utf8DecodeChar? b i with
| none => none
| some c => go fuel (i + c.utf8Size) (acc.push c)
(le_size_of_utf8DecodeChar?_eq_some h)
(have := c.utf8Size_pos; have := le_size_of_utf8DecodeChar?_eq_some h; by omega)
termination_by structural fuel
theorem ByteArray.utf8Decode?.go.congr {b b' : ByteArray} {fuel fuel' i i' : Nat} {acc acc' : Array Char} {hi hi' hf hf'}
(hbb' : b = b') (hii' : i = i') (hacc : acc = acc') :
ByteArray.utf8Decode?.go b fuel i acc hi hf = ByteArray.utf8Decode?.go b' fuel' i' acc' hi' hf' := by
subst hbb' hii' hacc
fun_induction ByteArray.utf8Decode?.go b fuel i acc hi hf generalizing fuel' with
| case1 =>
rw [go.eq_def]
split
simp
| case2 =>
rw [go.eq_def]
split <;> split
· simp_all
· split <;> simp_all
| case3 =>
conv => rhs; rw [go.eq_def]
split <;> split
· simp_all
· split
· simp_all
· rename_i c₁ hc₁ ih _ _ _ _ _ c₂ hc₂
obtain rfl : c₁ = c₂ := by rw [← Option.some_inj, ← hc₁, ← hc₂]
apply ih
@[simp]
theorem ByteArray.utf8Decode?_empty : ByteArray.empty.utf8Decode? = some #[] := by
simp [utf8Decode?, utf8Decode?.go]
private theorem ByteArray.isSome_utf8Decode?go_iff {b : ByteArray} {fuel i : Nat} {hi : i ≤ b.size} {hf} {acc : Array Char} :
(ByteArray.utf8Decode?.go b fuel i acc hi hf).isSome ↔ IsValidUtf8 (b.extract i b.size) := by
fun_induction ByteArray.utf8Decode?.go with
| case1 => simp
| case2 fuel i hi hf acc h₁ h₂ =>
simp only [Option.isSome_none, Bool.false_eq_true, false_iff]
rintro ⟨l, hl⟩
have : l ≠ [] := by
rintro rfl
simp at hl
omega
rw [← l.cons_head_tail this] at hl
rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract, hl, List.utf8DecodeChar?_utf8Encode_cons] at h₂
simp at h₂
| case3 i acc hi fuel hf h₁ c h₂ ih =>
rw [ih]
have h₂' := h₂
rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h₂'
obtain ⟨l, hl⟩ := exists_of_utf8DecodeChar?_eq_some h₂'
rw [ByteArray.extract_eq_extract_append_extract (i := i) (i + c.utf8Size) (by omega)
(le_size_of_utf8DecodeChar?_eq_some h₂)] at hl ⊢
rw [ByteArray.append_inj_left hl (by have := le_size_of_utf8DecodeChar?_eq_some h₂; simp; omega),
← List.utf8Encode_singleton, isValidUtf8_utf8Encode_singleton_append_iff]
theorem ByteArray.isSome_utf8Decode?_iff {b : ByteArray} :
b.utf8Decode?.isSome ↔ IsValidUtf8 b := by
rw [utf8Decode?, isSome_utf8Decode?go_iff, extract_zero_size]
@[simp]
theorem String.bytes_empty : "".bytes = ByteArray.empty := (rfl)
/--
Appends two strings. Usually accessed via the `++` operator.
The internal implementation will perform destructive updates if the string is not shared.
Examples:
* `"abc".append "def" = "abcdef"`
* `"abc" ++ "def" = "abcdef"`
* `"" ++ "" = ""`
-/
@[extern "lean_string_append", expose]
def String.append (s t : String) : String where
bytes := s.bytes ++ t.bytes
isValidUtf8 := s.isValidUtf8.append t.isValidUtf8
instance : Append String where
append s t := s.append t
@[simp]
theorem String.bytes_append {s t : String} : (s ++ t).bytes = s.bytes ++ t.bytes := (rfl)
theorem String.bytes_inj {s t : String} : s.bytes = t.bytes ↔ s = t := by
refine ⟨fun h => ?_, (· ▸ rfl)⟩
rcases s with ⟨s⟩
rcases t with ⟨t⟩
subst h
rfl
@[simp]
theorem String.empty_append {s : String} : "" ++ s = s := by
simp [← String.bytes_inj]
@[simp]
theorem String.append_empty {s : String} : s ++ "" = s := by
simp [← String.bytes_inj]
@[simp] theorem List.bytes_asString {l : List Char} : l.asString.bytes = l.utf8Encode := by
simp [List.asString, String.mk]
@[simp]
theorem List.asString_nil : List.asString [] = "" := by
simp [← String.bytes_inj]
@[simp]
theorem List.asString_append {l₁ l₂ : List Char} : (l₁ ++ l₂).asString = l₁.asString ++ l₂.asString := by
simp [← String.bytes_inj]
@[expose]
def String.Internal.toArray (b : String) : Array Char :=
b.bytes.utf8Decode?.get (b.bytes.isSome_utf8Decode?_iff.2 b.isValidUtf8)
@[simp]
theorem String.Internal.toArray_empty : String.Internal.toArray "" = #[] := by
simp [toArray]
@[extern "lean_string_data", expose]
def String.data (b : String) : List Char :=
(String.Internal.toArray b).toList
@[simp]
theorem String.data_empty : "".data = [] := by
simp [data]
/--
Returns the length of a string in Unicode code points.
Examples:
* `"".length = 0`
* `"abc".length = 3`
* `"L∃∀N".length = 4`
-/
@[extern "lean_string_length"]
def String.length (b : String) : Nat :=
b.data.length
@[simp]
theorem String.Internal.size_toArray {b : String} : (String.Internal.toArray b).size = b.length :=
(rfl)
@[simp]
theorem String.length_data {b : String} : b.data.length = b.length := (rfl)
theorem String.exists_eq_asString (s : String) :
∃ l : List Char, s = l.asString := by
rcases s with ⟨_, ⟨l, rfl⟩⟩
refine ⟨l, by simp [← String.bytes_inj]⟩
private theorem ByteArray.utf8Decode?go_eq_utf8Decode?go_extract {b : ByteArray} {fuel i : Nat} {hi : i ≤ b.size} {hf} {acc : Array Char} :
utf8Decode?.go b fuel i acc hi hf = (utf8Decode?.go (b.extract i b.size) fuel 0 #[] (by simp) (by simp [hf])).map (acc ++ ·) := by
fun_cases utf8Decode?.go b fuel i acc hi hf with
| case1 =>
rw [utf8Decode?.go]
simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Nat.zero_add, List.push_toArray,
List.nil_append]
rw [if_pos (by omega)]
simp
| case2 fuel hf₁ h₁ h₂ hf₂ =>
rw [utf8Decode?.go]
simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Nat.zero_add, List.push_toArray,
List.nil_append]
rw [if_neg (by omega)]
rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h₂
split <;> simp_all
| case3 fuel hf₁ h₁ c h₂ hf₂ =>
conv => rhs; rw [utf8Decode?.go]
simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Nat.zero_add, List.push_toArray,
List.nil_append]
rw [if_neg (by omega)]
rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h₂
split
· simp_all
· rename_i c' hc'
obtain rfl : c = c' := by
rw [← Option.some_inj, ← h₂, hc']
have := c.utf8Size_pos
conv => lhs; rw [ByteArray.utf8Decode?go_eq_utf8Decode?go_extract]
conv => rhs; rw [ByteArray.utf8Decode?go_eq_utf8Decode?go_extract]
simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Option.map_map, ByteArray.extract_extract]
have : (fun x => acc ++ x) ∘ (fun x => #[c] ++ x) = fun x => acc.push c ++ x := by funext; simp
simp [(by omega : i + (b.size - i) = b.size), this]
theorem ByteArray.utf8Decode?_utf8Encode_singleton_append {l : ByteArray} {c : Char} :
([c].utf8Encode ++ l).utf8Decode? = l.utf8Decode?.map (#[c] ++ ·) := by
rw [utf8Decode?, utf8Decode?.go,
if_neg (by simp [List.utf8Encode_singleton]; have := c.utf8Size_pos; omega)]
split
· simp_all [List.utf8DecodeChar?_utf8Encode_singleton_append]
· rename_i d h
obtain rfl : c = d := by simpa [List.utf8DecodeChar?_utf8Encode_singleton_append] using h
rw [utf8Decode?go_eq_utf8Decode?go_extract, utf8Decode?]
simp only [List.push_toArray, List.nil_append, Nat.zero_add]
congr 1
apply ByteArray.utf8Decode?.go.congr _ rfl rfl
apply extract_append_eq_right
simp [List.utf8Encode_singleton]
@[simp]
theorem List.utf8Decode?_utf8Encode {l : List Char} :
l.utf8Encode.utf8Decode? = some l.toArray := by
induction l with
| nil => simp
| cons c l ih =>
rw [← List.singleton_append, List.utf8Encode_append]
simp only [ByteArray.utf8Decode?_utf8Encode_singleton_append, cons_append, nil_append,
Option.map_eq_some_iff, Array.append_eq_toArray_iff, cons.injEq, true_and]
refine ⟨l.toArray, ih, by simp⟩
@[simp]
theorem ByteArray.utf8Encode_get_utf8Decode? {b : ByteArray} {h} :
(b.utf8Decode?.get h).toList.utf8Encode = b := by
obtain ⟨l, rfl⟩ := isSome_utf8Decode?_iff.1 h
simp
@[simp]
theorem List.data_asString {l : List Char} : l.asString.data = l := by
simp [String.data, String.Internal.toArray]
@[simp]
theorem String.asString_data {b : String} : b.data.asString = b := by
obtain ⟨l, rfl⟩ := String.exists_eq_asString b
rw [List.data_asString]
theorem List.asString_injective {l₁ l₂ : List Char} (h : l₁.asString = l₂.asString) : l₁ = l₂ := by
simpa using congrArg String.data h
theorem List.asString_inj {l₁ l₂ : List Char} : l₁.asString = l₂.asString ↔ l₁ = l₂ :=
⟨asString_injective, (· ▸ rfl)⟩
theorem String.data_injective {s₁ s₂ : String} (h : s₁.data = s₂.data) : s₁ = s₂ := by
simpa using congrArg List.asString h
theorem String.data_inj {s₁ s₂ : String} : s₁.data = s₂.data ↔ s₁ = s₂ :=
⟨data_injective, (· ▸ rfl)⟩
@[simp]
theorem String.data_append {l₁ l₂ : String} : (l₁ ++ l₂).data = l₁.data ++ l₂.data := by
apply List.asString_injective
simp
@[simp]
theorem String.utf8encode_data {b : String} : b.data.utf8Encode = b.bytes := by
have := congrArg String.bytes (String.asString_data (b := b))
rwa [← List.bytes_asString]
@[simp]
theorem String.utf8ByteSize_empty : "".utf8ByteSize = 0 := (rfl)
@[simp]
theorem String.utf8ByteSize_append {s t : String} :
(s ++ t).utf8ByteSize = s.utf8ByteSize + t.utf8ByteSize := by
simp [utf8ByteSize]
@[simp]
theorem String.size_bytes {s : String} : s.bytes.size = s.utf8ByteSize := rfl
@[simp]
theorem String.bytes_push {s : String} {c : Char} : (s.push c).bytes = s.bytes ++ [c].utf8Encode := by
simp [push]
-- This is just to keep the proof of `set_next_add` below from breaking; if that lemma goes away
-- or the proof is rewritten, it can be removed.
private noncomputable def String.utf8ByteSize' : String → Nat
| s => go s.data
where
go : List Char → Nat
| [] => 0
| c::cs => go cs + c.utf8Size
private theorem String.utf8ByteSize'_eq (s : String) : s.utf8ByteSize' = s.utf8ByteSize := by
suffices ∀ l, utf8ByteSize'.go l = l.asString.utf8ByteSize by
obtain ⟨m, rfl⟩ := s.exists_eq_asString
rw [utf8ByteSize', this, asString_data]
intro l
induction l with
| nil => simp [utf8ByteSize'.go]
| cons c cs ih =>
rw [utf8ByteSize'.go, ih, ← List.singleton_append, List.asString_append,
utf8ByteSize_append, Nat.add_comm]
congr
rw [← size_bytes, List.bytes_asString, List.utf8Encode_singleton,
List.size_toByteArray, length_utf8EncodeChar]
end
namespace String
instance : HAdd String.Pos String.Pos String.Pos where
@ -54,8 +413,6 @@ instance : LT String :=
instance decidableLT (s₁ s₂ : @& String) : Decidable (s₁ < s₂) :=
List.decidableLT s₁.data s₂.data
/--
Non-strict inequality on strings, typically used via the `≤` operator.
@ -69,32 +426,6 @@ instance : LE String :=
instance decLE (s₁ s₂ : String) : Decidable (s₁ ≤ s₂) :=
inferInstanceAs (Decidable (Not _))
/--
Returns the length of a string in Unicode code points.
Examples:
* `"".length = 0`
* `"abc".length = 3`
* `"L∃∀N".length = 4`
-/
@[extern "lean_string_length", expose]
def length : (@& String) → Nat
| ⟨s⟩ => s.length
/--
Appends two strings. Usually accessed via the `++` operator.
The internal implementation will perform destructive updates if the string is not shared.
Examples:
* `"abc".append "def" = "abcdef"`
* `"abc" ++ "def" = "abcdef"`
* `"" ++ "" = ""`
-/
@[extern "lean_string_append", expose]
def append : String → (@& String) → String
| ⟨a⟩, ⟨b⟩ => ⟨a ++ b⟩
/--
Converts a string to a list of characters.
@ -153,8 +484,7 @@ Examples:
-/
@[extern "lean_string_utf8_get", expose]
def get (s : @& String) (p : @& Pos) : Char :=
match s with
| ⟨s⟩ => utf8GetAux s 0 p
utf8GetAux s.data 0 p
def utf8GetAux? : List Char → Pos → Pos → Option Char
| [], _, _ => none
@ -175,7 +505,7 @@ Examples:
-/
@[extern "lean_string_utf8_get_opt"]
def get? : (@& String) → (@& Pos) → Option Char
| ⟨s⟩, p => utf8GetAux? s 0 p
| s, p => utf8GetAux? s.data 0 p
/--
Returns the character at position `p` of a string. Panics if `p` is not a valid position.
@ -191,7 +521,7 @@ Examples
@[extern "lean_string_utf8_get_bang", expose]
def get! (s : @& String) (p : @& Pos) : Char :=
match s with
| ⟨s⟩ => utf8GetAux s 0 p
| s => utf8GetAux s.data 0 p
def utf8SetAux (c' : Char) : List Char → Pos → Pos → List Char
| [], _, _ => []
@ -214,7 +544,7 @@ Examples:
-/
@[extern "lean_string_utf8_set"]
def set : String → (@& Pos) → Char → String
| ⟨s⟩, i, c => ⟨utf8SetAux c s 0 i⟩
| s, i, c => (utf8SetAux c s.data 0 i).asString
/--
Replaces the character at position `p` in the string `s` with the result of applying `f` to that
@ -270,7 +600,7 @@ Examples:
-/
@[extern "lean_string_utf8_prev", expose]
def prev : (@& String) → (@& Pos) → Pos
| ⟨s⟩, p => utf8PrevAux s 0 p
| s, p => utf8PrevAux s.data 0 p
/--
Returns the first character in `s`. If `s = ""`, returns `(default : Char)`.
@ -336,7 +666,7 @@ Examples:
@[extern "lean_string_utf8_get_fast", expose]
def get' (s : @& String) (p : @& Pos) (h : ¬ s.atEnd p) : Char :=
match s with
| ⟨s⟩ => utf8GetAux s 0 p
| s => utf8GetAux s.data 0 p
/--
Returns the next position in a string after position `p`. The result is unspecified if `p` is not a
@ -360,16 +690,6 @@ def next' (s : @& String) (p : @& Pos) (h : ¬ s.atEnd p) : Pos :=
let c := get s p
p + c
theorem _root_.Char.utf8Size_pos (c : Char) : 0 < c.utf8Size := by
repeat first | apply iteInduction (motive := (0 < ·)) <;> intros | decide
theorem _root_.Char.utf8Size_le_four (c : Char) : c.utf8Size ≤ 4 := by
repeat first | apply iteInduction (motive := (· ≤ 4)) <;> intros | decide
theorem _root_.Char.utf8Size_eq (c : Char) : c.utf8Size = 1 c.utf8Size = 2 c.utf8Size = 3 c.utf8Size = 4 := by
match c.utf8Size, c.utf8Size_pos, c.utf8Size_le_four with
| 1, _, _ | 2, _, _ | 3, _, _ | 4, _, _ => simp
@[deprecated Char.utf8Size_pos (since := "2026-06-04")] abbrev one_le_csize := Char.utf8Size_pos
@[simp] theorem pos_lt_eq (p₁ p₂ : Pos) : (p₁ < p₂) = (p₁.1 < p₂.1) := rfl
@ -534,7 +854,7 @@ Examples:
-/
@[extern "lean_string_utf8_extract", expose]
def extract : (@& String) → (@& Pos) → (@& Pos) → String
| ⟨s⟩, b, e => if b.byteIdx ≥ e.byteIdx then "" else ⟨go₁ s 0 b e⟩
| s, b, e => if b.byteIdx ≥ e.byteIdx then "" else (go₁ s.data 0 b e).asString
where
go₁ : List Char → Pos → Pos → Pos → List Char
| [], _, _, _ => []
@ -1030,37 +1350,31 @@ theorem utf8SetAux_of_gt (c' : Char) : ∀ (cs : List Char) {i p : Pos}, i > p
theorem set_next_add (s : String) (i : Pos) (c : Char) (b₁ b₂)
(h : (s.next i).1 + b₁ = s.endPos.1 + b₂) :
((s.set i c).next i).1 + b₁ = (s.set i c).endPos.1 + b₂ := by
simp [next, get, set, endPos, utf8ByteSize] at h ⊢
simp [next, get, set, endPos, utf8ByteSize'_eq, utf8ByteSize'] at h ⊢
rw [Nat.add_comm i.1, Nat.add_assoc] at h ⊢
let rec foo : ∀ cs a b₁ b₂,
(utf8GetAux cs a i).utf8Size + b₁ = utf8ByteSize.go cs + b₂ →
(utf8GetAux (utf8SetAux c cs a i) a i).utf8Size + b₁ = utf8ByteSize.go (utf8SetAux c cs a i) + b₂
(utf8GetAux cs a i).utf8Size + b₁ = utf8ByteSize'.go cs + b₂ →
(utf8GetAux (utf8SetAux c cs a i) a i).utf8Size + b₁ = utf8ByteSize'.go (utf8SetAux c cs a i) + b₂
| [], _, _, _, h => h
| c'::cs, a, b₁, b₂, h => by
unfold utf8SetAux
apply iteInduction (motive := fun p => (utf8GetAux p a i).utf8Size + b₁ = utf8ByteSize.go p + b₂) <;>
intro h' <;> simp [utf8GetAux, h', utf8ByteSize.go] at h ⊢
apply iteInduction (motive := fun p => (utf8GetAux p a i).utf8Size + b₁ = utf8ByteSize'.go p + b₂) <;>
intro h' <;> simp [utf8GetAux, h', utf8ByteSize'.go] at h ⊢
next =>
rw [Nat.add_assoc, Nat.add_left_comm] at h ⊢; rw [Nat.add_left_cancel h]
next =>
rw [Nat.add_assoc] at h ⊢
refine foo cs (a + c') b₁ (c'.utf8Size + b₂) h
exact foo s.1 0 _ _ h
exact foo s.data 0 _ _ h
theorem mapAux_lemma (s : String) (i : Pos) (c : Char) (h : ¬s.atEnd i) :
(s.set i c).endPos.1 - ((s.set i c).next i).1 < s.endPos.1 - i.1 :=
(s.set i c).endPos.1 - ((s.set i c).next i).1 < s.endPos.1 - i.1 := by
suffices (s.set i c).endPos.1 - ((s.set i c).next i).1 = s.endPos.1 - (s.next i).1 by
rw [this]
apply Nat.sub_lt_sub_left (Nat.gt_of_not_le (mt decide_eq_true h)) (lt_next ..)
Nat.sub.elim (motive := (_ = ·)) _ _
(fun _ k e =>
have := set_next_add _ _ _ k 0 e.symm
Nat.sub_eq_of_eq_add <| this.symm.trans <| Nat.add_comm ..)
(fun h => by
have ⟨k, e⟩ := Nat.le.dest h
rw [Nat.succ_add] at e
have : ((s.set i c).next i).1 = _ := set_next_add _ _ c 0 k.succ e.symm
exact Nat.sub_eq_zero_of_le (this ▸ Nat.le_add_right ..))
have := set_next_add s i c (s.endPos.byteIdx - (s.next i).byteIdx) 0
have := set_next_add s i c 0 ((s.next i).byteIdx - s.endPos.byteIdx)
omega
@[specialize] def mapAux (f : Char → Char) (i : Pos) (s : String) : String :=
if h : s.atEnd i then s
@ -2044,40 +2358,51 @@ def stripSuffix (s : String) (suff : String) : String :=
end String
namespace Char
@[simp] theorem length_toString (c : Char) : c.toString.length = 1 := rfl
end Char
namespace String
@[ext]
theorem ext {s₁ s₂ : String} (h : s₁.data = s₂.data) : s₁ = s₂ :=
show ⟨s₁.data⟩ = (⟨s₂.data⟩ : String) from h ▸ rfl
theorem ext_iff {s₁ s₂ : String} : s₁ = s₂ ↔ s₁.data = s₂.data := ⟨fun h => h ▸ rfl, ext⟩
data_injective h
@[simp] theorem default_eq : default = "" := rfl
@[simp] theorem length_mk (s : List Char) : (String.mk s).length = s.length := rfl
@[simp]
theorem String.mk_eq_asString (s : List Char) : String.mk s = List.asString s := rfl
@[simp] theorem length_empty : "".length = 0 := rfl
@[simp]
theorem _root_.List.length_asString (s : List Char) : (List.asString s).length = s.length := by
rw [← length_data, List.data_asString]
@[simp] theorem length_singleton (c : Char) : (String.singleton c).length = 1 := rfl
@[simp] theorem length_empty : "".length = 0 := by simp [← length_data, data_empty]
@[simp]
theorem bytes_singleton {c : Char} : (String.singleton c).bytes = [c].utf8Encode := by
simp [singleton]
theorem singleton_eq {c : Char} : String.singleton c = [c].asString := by
simp [← bytes_inj]
@[simp] theorem data_singleton (c : Char) : (String.singleton c).data = [c] := by
simp [singleton_eq]
@[simp]
theorem length_singleton {c : Char} : (String.singleton c).length = 1 := by
simp [← length_data]
theorem push_eq_append (c : Char) : String.push s c = s ++ singleton c := by
simp [← bytes_inj]
@[simp] theorem data_push (c : Char) : (String.push s c).data = s.data ++ [c] := by
simp [push_eq_append]
@[simp] theorem length_push (c : Char) : (String.push s c).length = s.length + 1 := by
rw [push, length_mk, List.length_append, List.length_singleton, Nat.succ.injEq]
rfl
simp [← length_data]
@[simp] theorem length_pushn (c : Char) (n : Nat) : (pushn s c n).length = s.length + n := by
unfold pushn; induction n <;> simp [Nat.repeat, Nat.add_assoc, *]
@[simp] theorem length_append (s t : String) : (s ++ t).length = s.length + t.length := by
simp only [length, append, List.length_append]
@[simp] theorem data_push (s : String) (c : Char) : (s.push c).data = s.data ++ [c] := rfl
@[simp] theorem data_append (s t : String) : (s ++ t).data = s.data ++ t.data := rfl
simp [← length_data]
attribute [simp] toList -- prefer `String.data` over `String.toList` in lemmas
@ -2161,10 +2486,8 @@ end Pos
theorem lt_next' (s : String) (p : Pos) : p < next s p := lt_next ..
@[simp] theorem prev_zero (s : String) : prev s 0 = 0 := by
cases s with | mk cs
cases cs
next => rfl
next => simp [prev, utf8PrevAux, Pos.le_iff]
rw [prev]
cases s.data <;> simp [utf8PrevAux, Pos.le_iff]
@[simp] theorem get'_eq (s : String) (p : Pos) (h) : get' s p h = get s p := rfl
@ -2174,19 +2497,20 @@ theorem lt_next' (s : String) (p : Pos) : p < next s p := lt_next ..
-- so for proving can be unfolded.
attribute [simp] toSubstring'
theorem singleton_eq (c : Char) : singleton c = ⟨[c]⟩ := rfl
@[simp] theorem data_singleton (c : Char) : (singleton c).data = [c] := rfl
@[simp] theorem append_empty (s : String) : s ++ "" = s := ext (List.append_nil _)
@[simp] theorem empty_append (s : String) : "" ++ s = s := rfl
theorem append_assoc (s₁ s₂ s₃ : String) : (s₁ ++ s₂) ++ s₃ = s₁ ++ (s₂ ++ s₃) :=
ext (List.append_assoc ..)
ext (by simp [data_append])
end String
namespace Char
theorem toString_eq_singleton {c : Char} : c.toString = String.singleton c := rfl
@[simp] theorem length_toString (c : Char) : c.toString.length = 1 := by
simp [toString_eq_singleton]
end Char
open String
namespace Substring

View file

@ -8,6 +8,7 @@ module
prelude
public import Init.Data.List.Basic
public import Init.Data.Char.Basic
public import Init.Data.ByteArray.Bootstrap
public section
@ -31,7 +32,11 @@ Examples:
-/
@[extern "lean_string_push", expose]
def push : String → Char → String
| ⟨s⟩, c => ⟨s ++ [c]⟩
| ⟨b, h⟩, c => ⟨b.append (List.utf8Encode [c]), ?pf⟩
where finally
have ⟨m, hm⟩ := h
cases hm
exact .intro (m ++ [c]) (by simp [List.utf8Encode, List.toByteArray_append'])
/--
Returns a new string that contains only the character `c`.
@ -124,8 +129,21 @@ Examples:
* `[].asString = ""`
* `['a', 'a', 'a'].asString = "aaa"`
-/
@[extern "lean_string_mk", expose]
def String.mk (data : List Char) : String :=
⟨List.utf8Encode data,.intro data rfl⟩
/--
Creates a string that contains the characters in a list, in order.
Examples:
* `['L', '∃', '∀', 'N'].asString = "L∃∀N"`
* `[].asString = ""`
* `['a', 'a', 'a'].asString = "aaa"`
-/
@[expose]
def List.asString (s : List Char) : String :=
⟨s⟩
String.mk s
namespace Substring.Internal

File diff suppressed because it is too large Load diff

View file

@ -133,92 +133,15 @@ the corresponding string, or panics if the array is not a valid UTF-8 encoding o
@[inline] def fromUTF8! (a : ByteArray) : String :=
if h : validateUTF8 a then fromUTF8 a h else panic! "invalid UTF-8 string"
/--
Returns the sequence of bytes in a character's UTF-8 encoding.
-/
def utf8EncodeCharFast (c : Char) : List UInt8 :=
let v := c.val
if v ≤ 0x7f then
[v.toUInt8]
else if v ≤ 0x7ff then
[(v >>> 6).toUInt8 &&& 0x1f ||| 0xc0,
v.toUInt8 &&& 0x3f ||| 0x80]
else if v ≤ 0xffff then
[(v >>> 12).toUInt8 &&& 0x0f ||| 0xe0,
(v >>> 6).toUInt8 &&& 0x3f ||| 0x80,
v.toUInt8 &&& 0x3f ||| 0x80]
else
[(v >>> 18).toUInt8 &&& 0x07 ||| 0xf0,
(v >>> 12).toUInt8 &&& 0x3f ||| 0x80,
(v >>> 6).toUInt8 &&& 0x3f ||| 0x80,
v.toUInt8 &&& 0x3f ||| 0x80]
private theorem Nat.add_two_pow_eq_or_of_lt {b : Nat} (i : Nat) (b_lt : b < 2 ^ i) (a : Nat) :
b + 2 ^ i * a = b ||| 2 ^ i * a := by
rw [Nat.add_comm, Nat.or_comm, Nat.two_pow_add_eq_or_of_lt b_lt]
@[csimp]
theorem utf8EncodeChar_eq_utf8EncodeCharFast : @utf8EncodeChar = @utf8EncodeCharFast := by
funext c
simp only [utf8EncodeChar, utf8EncodeCharFast, UInt8.ofNat_uInt32ToNat, UInt8.ofNat_add,
UInt8.reduceOfNat, UInt32.le_iff_toNat_le, UInt32.reduceToNat]
split
· rfl
· split
· simp only [List.cons.injEq, ← UInt8.toNat_inj, UInt8.toNat_add, UInt8.toNat_ofNat',
Nat.reducePow, UInt8.reduceToNat, Nat.mod_add_mod, UInt8.toNat_or, UInt8.toNat_and,
UInt32.toNat_toUInt8, UInt32.toNat_shiftRight, UInt32.reduceToNat, Nat.reduceMod, and_true]
refine ⟨?_, ?_⟩
· rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 5 (by omega) 6,
Nat.and_two_pow_sub_one_eq_mod _ 5, Nat.shiftRight_eq_div_pow,
Nat.mod_eq_of_lt (b := 256) (by omega)]
· rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 6 (by omega) 2,
Nat.and_two_pow_sub_one_eq_mod _ 6, Nat.mod_mod_of_dvd _ (by decide)]
· split
· simp only [List.cons.injEq, ← UInt8.toNat_inj, UInt8.toNat_add, UInt8.toNat_ofNat',
Nat.reducePow, UInt8.reduceToNat, Nat.mod_add_mod, UInt8.toNat_or, UInt8.toNat_and,
UInt32.toNat_toUInt8, UInt32.toNat_shiftRight, UInt32.reduceToNat, Nat.reduceMod, and_true]
refine ⟨?_, ?_, ?_⟩
· rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 4 (by omega) 14,
Nat.and_two_pow_sub_one_eq_mod _ 4, Nat.shiftRight_eq_div_pow,
Nat.mod_eq_of_lt (b := 256) (by omega)]
· rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 6 (by omega) 2,
Nat.and_two_pow_sub_one_eq_mod _ 6, Nat.shiftRight_eq_div_pow,
Nat.mod_mod_of_dvd (c.val.toNat / 2 ^ 6) (by decide)]
· rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 6 (by omega) 2,
Nat.and_two_pow_sub_one_eq_mod _ 6, Nat.mod_mod_of_dvd c.val.toNat (by decide)]
· simp only [List.cons.injEq, ← UInt8.toNat_inj, UInt8.toNat_add, UInt8.toNat_ofNat',
Nat.reducePow, UInt8.reduceToNat, Nat.mod_add_mod, UInt8.toNat_or, UInt8.toNat_and,
UInt32.toNat_toUInt8, UInt32.toNat_shiftRight, UInt32.reduceToNat, Nat.reduceMod, and_true]
refine ⟨?_, ?_, ?_, ?_⟩
· rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 3 (by omega) 30,
Nat.and_two_pow_sub_one_eq_mod _ 3, Nat.shiftRight_eq_div_pow,
Nat.mod_mod_of_dvd _ (by decide)]
· rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 6 (by omega) 2,
Nat.and_two_pow_sub_one_eq_mod _ 6, Nat.shiftRight_eq_div_pow,
Nat.mod_mod_of_dvd (c.val.toNat / 2 ^ 12) (by decide)]
· rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 6 (by omega) 2,
Nat.and_two_pow_sub_one_eq_mod _ 6, Nat.shiftRight_eq_div_pow,
Nat.mod_mod_of_dvd (c.val.toNat / 2 ^ 6) (by decide)]
· rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 6 (by omega) 2,
Nat.and_two_pow_sub_one_eq_mod _ 6, Nat.mod_mod_of_dvd c.val.toNat (by decide)]
@[simp] theorem length_utf8EncodeChar (c : Char) : (utf8EncodeChar c).length = c.utf8Size := by
simp [Char.utf8Size, utf8EncodeChar_eq_utf8EncodeCharFast, utf8EncodeCharFast]
cases Decidable.em (c.val ≤ 0x7f) <;> simp [*]
cases Decidable.em (c.val ≤ 0x7ff) <;> simp [*]
cases Decidable.em (c.val ≤ 0xffff) <;> simp [*]
/--
Encodes a string in UTF-8 as an array of bytes.
-/
@[extern "lean_string_to_utf8"]
def toUTF8 (a : @& String) : ByteArray :=
⟨⟨a.data.flatMap utf8EncodeChar⟩⟩
a.bytes
@[simp] theorem size_toUTF8 (s : String) : s.toUTF8.size = s.utf8ByteSize := by
simp [toUTF8, ByteArray.size, Array.size, utf8ByteSize, List.flatMap]
induction s.data <;> simp [List.map, utf8ByteSize.go, Nat.add_comm, *]
rfl
/--
Accesses the indicated byte in the UTF-8 encoding of a string.

View file

@ -1327,3 +1327,14 @@ theorem UInt64.right_le_or {a b : UInt64} : b ≤ a ||| b := by
simpa [UInt64.le_iff_toNat_le] using Nat.right_le_or
theorem USize.right_le_or {a b : USize} : b ≤ a ||| b := by
simpa [USize.le_iff_toNat_le] using Nat.right_le_or
theorem UInt8.and_lt_add_one {b c : UInt8} (h : c ≠ -1) : b &&& c < c + 1 :=
UInt8.lt_of_le_of_lt UInt8.and_le_right (UInt8.lt_add_one h)
theorem UInt16.and_lt_add_one {b c : UInt16} (h : c ≠ -1) : b &&& c < c + 1 :=
UInt16.lt_of_le_of_lt UInt16.and_le_right (UInt16.lt_add_one h)
theorem UInt32.and_lt_add_one {b c : UInt32} (h : c ≠ -1) : b &&& c < c + 1 :=
UInt32.lt_of_le_of_lt UInt32.and_le_right (UInt32.lt_add_one h)
theorem UInt64.and_lt_add_one {b c : UInt64} (h : c ≠ -1) : b &&& c < c + 1 :=
UInt64.lt_of_le_of_lt UInt64.and_le_right (UInt64.lt_add_one h)
theorem USize.and_lt_add_one {b c : USize} (h : c ≠ -1) : b &&& c < c + 1 :=
USize.lt_of_le_of_lt USize.and_le_right (USize.lt_add_one h)

View file

@ -3163,3 +3163,15 @@ protected theorem USize.sub_lt {a b : USize} (hb : 0 < b) (hab : b ≤ a) : a -
rw [lt_iff_toNat_lt, USize.toNat_sub_of_le _ _ hab]
refine Nat.sub_lt ?_ (USize.lt_iff_toNat_lt.1 hb)
exact USize.lt_iff_toNat_lt.1 (USize.lt_of_lt_of_le hb hab)
theorem UInt8.lt_add_one {c : UInt8} (h : c ≠ -1) : c < c + 1 :=
UInt8.lt_iff_toBitVec_lt.2 (BitVec.lt_add_one (by simpa [← UInt8.toBitVec_inj] using h))
theorem UInt16.lt_add_one {c : UInt16} (h : c ≠ -1) : c < c + 1 :=
UInt16.lt_iff_toBitVec_lt.2 (BitVec.lt_add_one (by simpa [← UInt16.toBitVec_inj] using h))
theorem UInt32.lt_add_one {c : UInt32} (h : c ≠ -1) : c < c + 1 :=
UInt32.lt_iff_toBitVec_lt.2 (BitVec.lt_add_one (by simpa [← UInt32.toBitVec_inj] using h))
theorem UInt64.lt_add_one {c : UInt64} (h : c ≠ -1) : c < c + 1 :=
UInt64.lt_iff_toBitVec_lt.2 (BitVec.lt_add_one (by simpa [← UInt64.toBitVec_inj] using h))
theorem USize.lt_add_one {c : USize} (h : c ≠ -1) : c < c + 1 :=
USize.lt_iff_toBitVec_lt.2 (BitVec.lt_add_one
(by simpa [← USize.toBitVec_inj, BitVec.neg_one_eq_allOnes] using h))

View file

@ -3010,218 +3010,56 @@ def List.concat {α : Type u} : List αα → List α
| cons a as, b => cons a (concat as b)
/--
Returns the sequence of bytes in a character's UTF-8 encoding.
Appends two lists. Normally used via the `++` operator.
Appending lists takes time proportional to the length of the first list: `O(|xs|)`.
Examples:
* `[1, 2, 3] ++ [4, 5] = [1, 2, 3, 4, 5]`.
* `[] ++ [4, 5] = [4, 5]`.
* `[1, 2, 3] ++ [] = [1, 2, 3]`.
-/
def String.utf8EncodeChar (c : Char) : List UInt8 :=
let v := c.val.toNat
ite (LE.le v 0x7f)
(List.cons (UInt8.ofNat v) List.nil)
(ite (LE.le v 0x7ff)
(List.cons
(UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 64) 0x20) 0xc0))
(List.cons
(UInt8.ofNat (HAdd.hAdd (HMod.hMod v 0x40) 0x80))
List.nil))
(ite (LE.le v 0xffff)
(List.cons
(UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 4096) 0x10) 0xe0))
(List.cons
(UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 64) 0x40) 0x80))
(List.cons
(UInt8.ofNat (HAdd.hAdd (HMod.hMod v 0x40) 0x80))
List.nil)))
(List.cons
(UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 262144) 0x08) 0xf0))
(List.cons
(UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 4096) 0x40) 0x80))
(List.cons
(UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 64) 0x40) 0x80))
(List.cons
(UInt8.ofNat (HAdd.hAdd (HMod.hMod v 0x40) 0x80))
List.nil))))))
protected def List.append : (xs ys : List α) → List α
| nil, bs => bs
| cons a as, bs => cons a (List.append as bs)
/--
A string is a sequence of Unicode code points.
Concatenates a list of lists into a single list, preserving the order of the elements.
At runtime, strings are represented by [dynamic arrays](https://en.wikipedia.org/wiki/Dynamic_array)
of bytes using the UTF-8 encoding. Both the size in bytes (`String.utf8ByteSize`) and in characters
(`String.length`) are cached and take constant time. Many operations on strings perform in-place
modifications when the reference to the string is unique.
`O(|flatten L|)`.
Examples:
* `[["a"], ["b", "c"]].flatten = ["a", "b", "c"]`
* `[["a"], [], ["b", "c"], ["d", "e", "f"]].flatten = ["a", "b", "c", "d", "e", "f"]`
-/
structure String where
/-- Pack a `List Char` into a `String`. This function is overridden by the
compiler and is O(n) in the length of the list. -/
mk ::
/-- Unpack `String` into a `List Char`. This function is overridden by the
compiler and is O(n) in the length of the list. -/
data : List Char
attribute [extern "lean_string_mk"] String.mk
attribute [extern "lean_string_data"] String.data
def List.flatten : List (List α) → List α
| nil => nil
| cons l L => List.append l (flatten L)
/--
Decides whether two strings are equal. Normally used via the `DecidableEq String` instance and the
`=` operator.
Applies a function to each element of the list, returning the resulting list of values.
At runtime, this function is overridden with an efficient native implementation.
`O(|l|)`.
Examples:
* `[a, b, c].map f = [f a, f b, f c]`
* `[].map Nat.succ = []`
* `["one", "two", "three"].map (·.length) = [3, 3, 5]`
* `["one", "two", "three"].map (·.reverse) = ["eno", "owt", "eerht"]`
-/
@[extern "lean_string_dec_eq"]
def String.decEq (s₁ s₂ : @& String) : Decidable (Eq s₁ s₂) :=
match s₁, s₂ with
| ⟨s₁⟩, ⟨s₂⟩ =>
dite (Eq s₁ s₂) (fun h => isTrue (congrArg _ h)) (fun h => isFalse (fun h' => String.noConfusion h' (fun h' => absurd h' h)))
instance : DecidableEq String := String.decEq
@[specialize] def List.map (f : α → β) : (l : List α) → List β
| nil => nil
| cons a as => cons (f a) (map f as)
/--
A byte position in a `String`, according to its UTF-8 encoding.
Applies a function that returns a list to each element of a list, and concatenates the resulting
lists.
Character positions (counting the Unicode code points rather than bytes) are represented by plain
`Nat`s. Indexing a `String` by a `String.Pos` takes constant time, while character positions need to
be translated internally to byte positions, which takes linear time.
A byte position `p` is *valid* for a string `s` if `0 ≤ p ≤ s.endPos` and `p` lies on a UTF-8
character boundary.
Examples:
* `[2, 3, 2].flatMap List.range = [0, 1, 0, 1, 2, 0, 1]`
* `["red", "blue"].flatMap String.toList = ['r', 'e', 'd', 'b', 'l', 'u', 'e']`
-/
structure String.Pos where
/-- Get the underlying byte index of a `String.Pos` -/
byteIdx : Nat := 0
instance : Inhabited String.Pos where
default := {}
instance : DecidableEq String.Pos :=
fun ⟨a⟩ ⟨b⟩ => match decEq a b with
| isTrue h => isTrue (h ▸ rfl)
| isFalse h => isFalse (fun he => String.Pos.noConfusion he fun he => absurd he h)
/--
A region or slice of some underlying string.
A substring contains an string together with the start and end byte positions of a region of
interest. Actually extracting a substring requires copying and memory allocation, while many
substrings of the same underlying string may exist with very little overhead, and they are more
convenient than tracking the bounds by hand.
Using its constructor explicitly, it is possible to construct a `Substring` in which one or both of
the positions is invalid for the string. Many operations will return unexpected or confusing results
if the start and stop positions are not valid. Instead, it's better to use API functions that ensure
the validity of the positions in a substring to create and manipulate them.
-/
structure Substring where
/-- The underlying string. -/
str : String
/-- The byte position of the start of the string slice. -/
startPos : String.Pos
/-- The byte position of the end of the string slice. -/
stopPos : String.Pos
instance : Inhabited Substring where
default := ⟨"", {}, {}⟩
/--
The number of bytes used by the string's UTF-8 encoding.
-/
@[inline, expose] def Substring.bsize : Substring → Nat
| ⟨_, b, e⟩ => e.byteIdx.sub b.byteIdx
/--
The number of bytes used by the string's UTF-8 encoding.
At runtime, this function takes constant time because the byte length of strings is cached.
-/
@[extern "lean_string_utf8_byte_size"]
def String.utf8ByteSize : (@& String) → Nat
| ⟨s⟩ => go s
where
go : List Char → Nat
| .nil => 0
| .cons c cs => hAdd (go cs) c.utf8Size
/--
A UTF-8 byte position that points at the end of a string, just after the last character.
* `"abc".endPos = ⟨3⟩`
* `"L∃∀N".endPos = ⟨8⟩`
-/
@[inline] def String.endPos (s : String) : String.Pos where
byteIdx := utf8ByteSize s
/--
Converts a `String` into a `Substring` that denotes the entire string.
-/
@[inline] def String.toSubstring (s : String) : Substring where
str := s
startPos := {}
stopPos := s.endPos
/--
Converts a `String` into a `Substring` that denotes the entire string.
This is a version of `String.toSubstring` that doesn't have an `@[inline]` annotation.
-/
def String.toSubstring' (s : String) : Substring :=
s.toSubstring
/--
This function will cast a value of type `α` to type `β`, and is a no-op in the
compiler. This function is **extremely dangerous** because there is no guarantee
that types `α` and `β` have the same data representation, and this can lead to
memory unsafety. It is also logically unsound, since you could just cast
`True` to `False`. For all those reasons this function is marked as `unsafe`.
It is implemented by lifting both `α` and `β` into a common universe, and then
using `cast (lcProof : ULift (PLift α) = ULift (PLift β))` to actually perform
the cast. All these operations are no-ops in the compiler.
Using this function correctly requires some knowledge of the data representation
of the source and target types. Some general classes of casts which are safe in
the current runtime:
* `Array α` to `Array β` where `α` and `β` have compatible representations,
or more generally for other inductive types.
* `Quot α r` and `α`.
* `@Subtype α p` and `α`, or generally any structure containing only one
non-`Prop` field of type `α`.
* Casting `α` to/from `NonScalar` when `α` is a boxed generic type
(i.e. a function that accepts an arbitrary type `α` and is not specialized to
a scalar type like `UInt8`).
-/
unsafe def unsafeCast {α : Sort u} {β : Sort v} (a : α) : β :=
PLift.down (ULift.down.{max u v} (cast lcProof (ULift.up.{max u v} (PLift.up a))))
/-- Auxiliary definition for `panic`. -/
/-
This is a workaround for `panic` occurring in monadic code. See issue #695.
The `panicCore` definition cannot be specialized since it is an extern.
When `panic` occurs in monadic code, the `Inhabited α` parameter depends on a
`[inst : Monad m]` instance. The `inst` parameter will not be eliminated during
specialization if it occurs inside of a binder (to avoid work duplication), and
will prevent the actual monad from being "copied" to the code being specialized.
When we reimplement the specializer, we may consider copying `inst` if it also
occurs outside binders or if it is an instance.
-/
@[never_extract, extern "lean_panic_fn"]
def panicCore {α : Sort u} [Inhabited α] (msg : String) : α := default
/--
`(panic "msg" : α)` has a built-in implementation which prints `msg` to
the error buffer. It *does not* terminate execution, and because it is a safe
function, it still has to return an element of `α`, so it takes `[Inhabited α]`
and returns `default`. It is primarily intended for debugging in pure contexts,
and assertion failures.
Because this is a pure function with side effects, it is marked as
`@[never_extract]` so that the compiler will not perform common sub-expression
elimination and other optimizations that assume that the expression is pure.
-/
@[noinline, never_extract]
def panic {α : Sort u} [Inhabited α] (msg : String) : α :=
panicCore msg
-- TODO: this be applied directly to `Inhabited`'s definition when we remove the above workaround
attribute [nospecialize] Inhabited
@[inline] def List.flatMap {α : Type u} {β : Type v} (b : α → List β) (as : List α) : List β := flatten (map b as)
/--
`Array α` is the type of [dynamic arrays](https://en.wikipedia.org/wiki/Dynamic_array) with elements
@ -3452,6 +3290,276 @@ def Array.extract (as : Array α) (start : Nat := 0) (stop : Nat := as.size) : A
let sz' := Nat.sub (min stop as.size) start
loop sz' start (emptyWithCapacity sz')
/-- `ByteArray` is like `Array UInt8`, but with an efficient run-time representation as a packed
byte buffer. -/
structure ByteArray where
/-- The data contained in the byte array. -/
data : Array UInt8
attribute [extern "lean_byte_array_mk"] ByteArray.mk
attribute [extern "lean_byte_array_data"] ByteArray.data
/--
Constructs a new empty byte array with initial capacity `c`.
-/
@[extern "lean_mk_empty_byte_array"]
def ByteArray.emptyWithCapacity (c : @& Nat) : ByteArray :=
{ data := Array.empty }
/--
Constructs a new empty byte array with initial capacity `0`.
Use `ByteArray.emptyWithCapacity` to create an array with a greater initial capacity.
-/
def ByteArray.empty : ByteArray := emptyWithCapacity 0
/--
Adds an element to the end of an array. The resulting array's size is one greater than the input
array. If there are no other references to the array, then it is modified in-place.
This takes amortized `O(1)` time because `Array α` is represented by a dynamic array.
-/
@[extern "lean_byte_array_push"]
def ByteArray.push : ByteArray → UInt8 → ByteArray
| ⟨bs⟩, b => ⟨bs.push b⟩
/--
Converts a list of bytes into a `ByteArray`.
-/
def List.toByteArray (bs : List UInt8) : ByteArray :=
let rec loop
| nil, r => r
| cons b bs, r => loop bs (r.push b)
loop bs ByteArray.empty
/-- Returns the size of the byte array. -/
@[extern "lean_byte_array_size"]
def ByteArray.size : (@& ByteArray) → Nat
| ⟨bs⟩ => bs.size
/--
Returns the sequence of bytes in a character's UTF-8 encoding.
-/
def String.utf8EncodeChar (c : Char) : List UInt8 :=
let v := c.val.toNat
ite (LE.le v 0x7f)
(List.cons (UInt8.ofNat v) List.nil)
(ite (LE.le v 0x7ff)
(List.cons
(UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 64) 0x20) 0xc0))
(List.cons
(UInt8.ofNat (HAdd.hAdd (HMod.hMod v 0x40) 0x80))
List.nil))
(ite (LE.le v 0xffff)
(List.cons
(UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 4096) 0x10) 0xe0))
(List.cons
(UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 64) 0x40) 0x80))
(List.cons
(UInt8.ofNat (HAdd.hAdd (HMod.hMod v 0x40) 0x80))
List.nil)))
(List.cons
(UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 262144) 0x08) 0xf0))
(List.cons
(UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 4096) 0x40) 0x80))
(List.cons
(UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 64) 0x40) 0x80))
(List.cons
(UInt8.ofNat (HAdd.hAdd (HMod.hMod v 0x40) 0x80))
List.nil))))))
/-- Encode a list of characters (Unicode scalar value) in UTF-8. This is an inefficient model
implementation. Use `List.asString` instead. -/
def List.utf8Encode (l : List Char) : ByteArray :=
l.flatMap String.utf8EncodeChar |>.toByteArray
/-- A byte array is valid UTF-8 if it is of the form `List.Internal.utf8Encode m` for some `m`.
Note that in order for this definition to be well-behaved it is necessary to know that this `m`
is unique. To show this, one defines UTF-8 decoding and shows that encoding and decoding are
mutually inverse. -/
inductive ByteArray.IsValidUtf8 (b : ByteArray) : Prop
/-- Show that a byte -/
| intro (m : List Char) (hm : Eq b (List.utf8Encode m))
/--
A string is a sequence of Unicode scalar values.
At runtime, strings are represented by [dynamic arrays](https://en.wikipedia.org/wiki/Dynamic_array)
of bytes using the UTF-8 encoding. Both the size in bytes (`String.utf8ByteSize`) and in characters
(`String.length`) are cached and take constant time. Many operations on strings perform in-place
modifications when the reference to the string is unique.
-/
structure String where ofByteArray ::
/-- The bytes of the UTF-8 encoding of the string. -/
bytes : ByteArray
/-- The bytes of the string form valid UTF-8. -/
isValidUtf8 : ByteArray.IsValidUtf8 bytes
attribute [extern "lean_string_to_utf8"] String.bytes
/--
Decides whether two strings are equal. Normally used via the `DecidableEq String` instance and the
`=` operator.
At runtime, this function is overridden with an efficient native implementation.
-/
@[extern "lean_string_dec_eq"]
def String.decEq (s₁ s₂ : @& String) : Decidable (Eq s₁ s₂) :=
match s₁, s₂ with
| ⟨⟨⟨s₁⟩⟩, _⟩, ⟨⟨⟨s₂⟩⟩, _⟩ =>
dite (Eq s₁ s₂) (fun h => match s₁, s₂, h with | _, _, Eq.refl _ => isTrue rfl)
(fun h => isFalse
(fun h' => h (congrArg (fun s => Array.toList (ByteArray.data (String.bytes s))) h')))
instance : DecidableEq String := String.decEq
/--
A byte position in a `String`, according to its UTF-8 encoding.
Character positions (counting the Unicode code points rather than bytes) are represented by plain
`Nat`s. Indexing a `String` by a `String.Pos` takes constant time, while character positions need to
be translated internally to byte positions, which takes linear time.
A byte position `p` is *valid* for a string `s` if `0 ≤ p ≤ s.endPos` and `p` lies on a UTF-8
character boundary.
-/
structure String.Pos where
/-- Get the underlying byte index of a `String.Pos` -/
byteIdx : Nat := 0
instance : Inhabited String.Pos where
default := {}
instance : DecidableEq String.Pos :=
fun ⟨a⟩ ⟨b⟩ => match decEq a b with
| isTrue h => isTrue (h ▸ rfl)
| isFalse h => isFalse (fun he => String.Pos.noConfusion he fun he => absurd he h)
/--
A region or slice of some underlying string.
A substring contains an string together with the start and end byte positions of a region of
interest. Actually extracting a substring requires copying and memory allocation, while many
substrings of the same underlying string may exist with very little overhead, and they are more
convenient than tracking the bounds by hand.
Using its constructor explicitly, it is possible to construct a `Substring` in which one or both of
the positions is invalid for the string. Many operations will return unexpected or confusing results
if the start and stop positions are not valid. Instead, it's better to use API functions that ensure
the validity of the positions in a substring to create and manipulate them.
-/
structure Substring where
/-- The underlying string. -/
str : String
/-- The byte position of the start of the string slice. -/
startPos : String.Pos
/-- The byte position of the end of the string slice. -/
stopPos : String.Pos
instance : Inhabited Substring where
default := ⟨"", {}, {}⟩
/--
The number of bytes used by the string's UTF-8 encoding.
-/
@[inline, expose] def Substring.bsize : Substring → Nat
| ⟨_, b, e⟩ => e.byteIdx.sub b.byteIdx
/--
The number of bytes used by the string's UTF-8 encoding.
At runtime, this function takes constant time because the byte length of strings is cached.
-/
@[extern "lean_string_utf8_byte_size"]
def String.utf8ByteSize (s : @& String) : Nat :=
s.bytes.size
/--
A UTF-8 byte position that points at the end of a string, just after the last character.
* `"abc".endPos = ⟨3⟩`
* `"L∃∀N".endPos = ⟨8⟩`
-/
@[inline] def String.endPos (s : String) : String.Pos where
byteIdx := utf8ByteSize s
/--
Converts a `String` into a `Substring` that denotes the entire string.
-/
@[inline] def String.toSubstring (s : String) : Substring where
str := s
startPos := {}
stopPos := s.endPos
/--
Converts a `String` into a `Substring` that denotes the entire string.
This is a version of `String.toSubstring` that doesn't have an `@[inline]` annotation.
-/
def String.toSubstring' (s : String) : Substring :=
s.toSubstring
/--
This function will cast a value of type `α` to type `β`, and is a no-op in the
compiler. This function is **extremely dangerous** because there is no guarantee
that types `α` and `β` have the same data representation, and this can lead to
memory unsafety. It is also logically unsound, since you could just cast
`True` to `False`. For all those reasons this function is marked as `unsafe`.
It is implemented by lifting both `α` and `β` into a common universe, and then
using `cast (lcProof : ULift (PLift α) = ULift (PLift β))` to actually perform
the cast. All these operations are no-ops in the compiler.
Using this function correctly requires some knowledge of the data representation
of the source and target types. Some general classes of casts which are safe in
the current runtime:
* `Array α` to `Array β` where `α` and `β` have compatible representations,
or more generally for other inductive types.
* `Quot α r` and `α`.
* `@Subtype α p` and `α`, or generally any structure containing only one
non-`Prop` field of type `α`.
* Casting `α` to/from `NonScalar` when `α` is a boxed generic type
(i.e. a function that accepts an arbitrary type `α` and is not specialized to
a scalar type like `UInt8`).
-/
unsafe def unsafeCast {α : Sort u} {β : Sort v} (a : α) : β :=
PLift.down (ULift.down.{max u v} (cast lcProof (ULift.up.{max u v} (PLift.up a))))
/-- Auxiliary definition for `panic`. -/
/-
This is a workaround for `panic` occurring in monadic code. See issue #695.
The `panicCore` definition cannot be specialized since it is an extern.
When `panic` occurs in monadic code, the `Inhabited α` parameter depends on a
`[inst : Monad m]` instance. The `inst` parameter will not be eliminated during
specialization if it occurs inside of a binder (to avoid work duplication), and
will prevent the actual monad from being "copied" to the code being specialized.
When we reimplement the specializer, we may consider copying `inst` if it also
occurs outside binders or if it is an instance.
-/
@[never_extract, extern "lean_panic_fn"]
def panicCore {α : Sort u} [Inhabited α] (msg : String) : α := default
/--
`(panic "msg" : α)` has a built-in implementation which prints `msg` to
the error buffer. It *does not* terminate execution, and because it is a safe
function, it still has to return an element of `α`, so it takes `[Inhabited α]`
and returns `default`. It is primarily intended for debugging in pure contexts,
and assertion failures.
Because this is a pure function with side effects, it is marked as
`@[never_extract]` so that the compiler will not perform common sub-expression
elimination and other optimizations that assume that the expression is pure.
-/
@[noinline, never_extract]
def panic {α : Sort u} [Inhabited α] (msg : String) : α :=
panicCore msg
-- TODO: this be applied directly to `Inhabited`'s definition when we remove the above workaround
attribute [nospecialize] Inhabited
/--
The `>>=` operator is overloaded via instances of `bind`.

View file

@ -84,10 +84,11 @@ deriving instance SizeOf for USize
deriving instance SizeOf for Char
deriving instance SizeOf for Option
deriving instance SizeOf for List
deriving instance SizeOf for Array
deriving instance SizeOf for ByteArray
deriving instance SizeOf for String
deriving instance SizeOf for String.Pos
deriving instance SizeOf for Substring
deriving instance SizeOf for Array
deriving instance SizeOf for Except
deriving instance SizeOf for EStateM.Result

View file

@ -611,7 +611,7 @@ mutual
asStringFn (atomicFn (noSpaceBefore >> repFn count (satisfyFn (· == char) s!"'{tok count}'"))))
where
tok (count : Nat) : String := ⟨List.replicate count char⟩
tok (count : Nat) : String := (List.replicate count char).asString
opener (ctxt : InlineCtxt) : ParserFn :=
match getter ctxt with
| none => many1Fn (satisfyFn (· == char) s!"any number of {char}s")

View file

@ -631,7 +631,7 @@
"end": {"line": 294, "character": 20}},
"contents":
{"value":
"```lean\nList.map.{u, v} {α : Type u} {β : Type v} (f : α → β) (l : List α) : List β\n```\n***\nApplies a function to each element of the list, returning the resulting list of values.\n\n`O(|l|)`.\n\nExamples:\n* `[a, b, c].map f = [f a, f b, f c]`\n* `[].map Nat.succ = []`\n* `[\"one\", \"two\", \"three\"].map (·.length) = [3, 3, 5]`\n* `[\"one\", \"two\", \"three\"].map (·.reverse) = [\"eno\", \"owt\", \"eerht\"]`\n\n***\n*import Init.Data.List.Basic*",
"```lean\nList.map.{u_1, u_2} {α : Type u_1} {β : Type u_2} (f : α → β) (l : List α) : List β\n```\n***\nApplies a function to each element of the list, returning the resulting list of values.\n\n`O(|l|)`.\n\nExamples:\n* `[a, b, c].map f = [f a, f b, f c]`\n* `[].map Nat.succ = []`\n* `[\"one\", \"two\", \"three\"].map (·.length) = [3, 3, 5]`\n* `[\"one\", \"two\", \"three\"].map (·.reverse) = [\"eno\", \"owt\", \"eerht\"]`\n\n***\n*import Init.Prelude*",
"kind": "markdown"}}
{"textDocument": {"uri": "file:///hover.lean"},
"position": {"line": 297, "character": 26}}

View file

@ -5,8 +5,8 @@
/-!
Defined without named arguments, prints without named arguments.
-/
/-- info: String.append : String → String → String -/
#guard_msgs in #check String.append
/-- info: Nat.add : Nat → Nat → Nat -/
#guard_msgs in #check Nat.add
/-!
The List argument is not named, it is not printed as a named argument.

View file

@ -85,7 +85,7 @@ def removeRightmostZeros (s : String) : String :=
if a != '0'
then aux [] (a :: (buff ++ res)) as
else aux (a :: buff) res as
⟨aux [] [] s.data⟩
(aux [] [] s.data).asString
protected def Literal.toString : Literal → String
| bool b => toString b

View file

@ -85,7 +85,7 @@ def removeRightmostZeros (s : String) : String :=
if a != '0'
then aux [] (a :: (buff ++ res)) as
else aux (a :: buff) res as
⟨aux [] [] s.data⟩
(aux [] [] s.data).asString
protected def Literal.toString : Literal → String
| bool b => toString b

View file

@ -11,8 +11,8 @@ example (x : Option Nat) (f : Nat → Nat) : (x.map f).isSome = x.isSome := by
case case2 => simp
/--
info: List.map.fun_cases.{u} {α : Type u} (motive : List α → Prop) (case1 : motive [])
(case2 : ∀ (a : α) (as : List α), motive (a :: as)) (x✝ : List α) : motive x✝
info: List.map.fun_cases.{u_1} {α : Type u_1} (motive : List α → Prop) (case1 : motive [])
(case2 : ∀ (head : α) (as : List α), motive (head :: as)) (x✝ : List α) : motive x✝
-/
#guard_msgs in
#check List.map.fun_cases

View file

@ -78,7 +78,7 @@ namespace NonMutual
def lex [Monad m] [MonadExceptOf LexErr m] (current? : Option (List Char × Nat)) (it : String.Iterator) : m (List Token) := do
let currTok := fun
| (cs, n) => { text := {data := cs.reverse}, tok := Tok.num n }
| (cs, n) => { text := cs.reverse.asString , tok := Tok.num n }
if it.atEnd then
return current?.toList.map currTok
else

View file

@ -1,17 +1,17 @@
/--
info: equations:
@[defeq] theorem List.append.eq_1.{u} : ∀ {α : Type u} (x : List α), [].append x = x
@[defeq] theorem List.append.eq_2.{u} : ∀ {α : Type u} (x : List α) (a : α) (l : List α),
(a :: l).append x = a :: l.append x
@[defeq] theorem List.append.eq_1.{u_1} : ∀ {α : Type u_1} (x : List α), [].append x = x
@[defeq] theorem List.append.eq_2.{u_1} : ∀ {α : Type u_1} (x : List α) (a : α) (as : List α),
(a :: as).append x = a :: as.append x
-/
#guard_msgs in
#print eqns List.append
/--
info: equations:
@[defeq] theorem List.append.eq_1.{u} : ∀ {α : Type u} (x : List α), [].append x = x
@[defeq] theorem List.append.eq_2.{u} : ∀ {α : Type u} (x : List α) (a : α) (l : List α),
(a :: l).append x = a :: l.append x
@[defeq] theorem List.append.eq_1.{u_1} : ∀ {α : Type u_1} (x : List α), [].append x = x
@[defeq] theorem List.append.eq_2.{u_1} : ∀ {α : Type u_1} (x : List α) (a : α) (as : List α),
(a :: as).append x = a :: as.append x
-/
#guard_msgs in
#print equations List.append

View file

@ -4,10 +4,11 @@ setLit.lean:22:19-22:21: error: overloaded, errors
Hint: Additional diagnostic information may be available using the `set_option diagnostics true` command.
Fields missing: `data`
Fields missing: `bytes`, `isValidUtf8`
Hint: Add missing fields:
̲d̲a̲t̲a̲ ̲:̲=̲ ̲_̲ ̲
̲b̲y̲t̲e̲s̲ ̲:̲=̲ ̲_̲
̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲i̲s̲V̲a̲l̲i̲d̲U̲t̲f̲8̲ ̲:̲=̲ ̲_̲ ̲
setLit.lean:24:31-24:38: error: overloaded, errors
failed to synthesize
Singleton Nat String

View file

@ -4,7 +4,7 @@
13
101
558
311
313
310
312
11
InfTree.node.sizeOf_spec.{u} {α : Type u} [SizeOf α] (children : Nat → InfTree α) : sizeOf (InfTree.node children) = 1

View file

@ -20,7 +20,7 @@ it₁.remainingToString ++ "-" ++ it₂.remainingToString
#eval "αβγ".mkIterator.next.1
#eval "αβγ".mkIterator.next.next.1
#eval "αβγ".mkIterator.next.2
#eval "αβ".1
#eval "αβ".data
#eval "αβ".push 'a'
#eval g "α"
#eval "".mkIterator.curr