refactor: use String.ofList and String.toList for String <-> List Char conversion (#11017)

This PR establishes `String.ofList` and `String.toList` as the preferred
method for converting between strings and lists of characters and
deprecates the alternatives `String.mk`, `List.asString` and
`String.data`.
This commit is contained in:
Markus Himmel 2025-10-31 15:41:23 +01:00 committed by GitHub
parent c41cb64ca7
commit 377f149862
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 233 additions and 138 deletions

View file

@ -203,8 +203,8 @@ If `n` is `0`, then one digit is returned. Otherwise, `⌊(n + 3) / 4⌋` digits
-- `Internal` string functions by moving this definition out to a separate file that can live
-- downstream of `Init.Data.String.Basic`.
protected def toHex {n : Nat} (x : BitVec n) : String :=
let s := (Nat.toDigits 16 x.toNat).asString
let t := (List.replicate ((n+3) / 4 - String.Internal.length s) '0').asString
let s := String.ofList (Nat.toDigits 16 x.toNat)
let t := String.ofList (List.replicate ((n+3) / 4 - String.Internal.length s) '0')
String.Internal.append t s
/-- `BitVec` representation. -/

View file

@ -226,7 +226,7 @@ Examples:
-/
@[extern "lean_string_of_usize"]
protected def _root_.USize.repr (n : @& USize) : String :=
(toDigits 10 n.toNat).asString
String.ofList (toDigits 10 n.toNat)
/-- We statically allocate and memoize reprs for small natural numbers. -/
private def reprArray : Array String := Id.run do
@ -235,14 +235,14 @@ private def reprArray : Array String := Id.run do
def reprFast (n : Nat) : String :=
if h : n < Nat.reprArray.size then Nat.reprArray.getInternal n h else
if h : n < USize.size then (USize.ofNatLT n h).repr
else (toDigits 10 n).asString
else String.ofList (toDigits 10 n)
/--
Converts a natural number to its decimal string representation.
-/
@[implemented_by reprFast]
protected def repr (n : Nat) : String :=
(toDigits 10 n).asString
String.ofList (toDigits 10 n)
/--
Converts a natural number less than `10` to the corresponding Unicode superscript digit character.
@ -293,7 +293,7 @@ Examples:
* `Nat.toSuperscriptString 35 = "³⁵"`
-/
def toSuperscriptString (n : Nat) : String :=
(toSuperDigits n).asString
String.ofList (toSuperDigits n)
/--
Converts a natural number less than `10` to the corresponding Unicode subscript digit character.
@ -344,7 +344,7 @@ Examples:
* `Nat.toSubscriptString 35 = "₃₅"`
-/
def toSubscriptString (n : Nat) : String :=
(toSubDigits n).asString
String.ofList (toSubDigits n)
end Nat

View file

@ -220,13 +220,23 @@ theorem String.append_empty {s : String} : s ++ "" = s := by
simp [← String.bytes_inj]
@[simp]
theorem List.asString_nil : List.asString [] = "" := by
simp [← String.bytes_inj]
theorem String.ofList_nil : String.ofList [] = "" :=
rfl
@[deprecated String.ofList_nil (since := "2025-10-30")]
theorem List.asString_nil : String.ofList [] = "" :=
String.ofList_nil
@[simp]
theorem List.asString_append {l₁ l₂ : List Char} : (l₁ ++ l₂).asString = l₁.asString ++ l₂.asString := by
theorem String.ofList_append {l₁ l₂ : List Char} :
String.ofList (l₁ ++ l₂) = String.ofList l₁ ++ String.ofList l₂ := by
simp [← String.bytes_inj]
@[deprecated String.ofList_append (since := "2025-10-30")]
theorem List.asString_append {l₁ l₂ : List Char} :
String.ofList (l₁ ++ l₂) = String.ofList l₁ ++ String.ofList l₂ :=
String.ofList_append
@[expose]
def String.Internal.toArray (b : String) : Array Char :=
b.bytes.utf8Decode?.get (b.bytes.isSome_utf8Decode?_iff.2 b.isValidUTF8)
@ -261,13 +271,17 @@ Examples:
* `"".toList = []`
* `"\n".toList = ['\n']`
-/
@[extern "lean_string_data", expose]
@[extern "lean_string_data", expose, deprecated String.toList (since := "2025-10-30")]
def String.data (b : String) : List Char :=
(String.Internal.toArray b).toList
@[simp]
theorem String.data_empty : "".data = [] := by
simp [data]
theorem String.toList_empty : "".toList = [] := by
simp [toList]
@[deprecated String.toList_empty (since := "2025-10-30")]
theorem String.data_empty : "".toList = [] :=
toList_empty
/--
Returns the length of a string in Unicode code points.
@ -279,14 +293,17 @@ Examples:
-/
@[extern "lean_string_length", expose]
def String.length (b : @& String) : Nat :=
b.data.length
b.toList.length
@[simp]
theorem String.Internal.size_toArray {b : String} : (String.Internal.toArray b).size = b.length :=
(rfl)
@[simp]
theorem String.length_data {b : String} : b.data.length = b.length := (rfl)
theorem String.length_toList {s : String} : s.toList.length = s.length := (rfl)
@[deprecated String.length_toList (since := "2025-10-30")]
theorem String.length_data {b : String} : b.toList.length = b.length := (rfl)
private theorem ByteArray.utf8Decode?go_eq_utf8Decode?go_extract {b : ByteArray} {fuel i : Nat} {hi : i ≤ b.size} {hf} {acc : Array Char} :
utf8Decode?.go b fuel i acc hi hf = (utf8Decode?.go (b.extract i b.size) fuel 0 #[] (by simp) (by simp [hf])).map (acc ++ ·) := by
@ -356,58 +373,101 @@ theorem ByteArray.utf8Encode_get_utf8Decode? {b : ByteArray} {h} :
simp
@[simp]
theorem List.data_asString {l : List Char} : l.asString.data = l := by
simp [String.data, String.Internal.toArray]
theorem String.toList_ofList {l : List Char} : (String.ofList l).toList = l := by
simp [String.toList, String.Internal.toArray]
@[deprecated String.toList_ofList (since := "2025-10-30")]
theorem List.data_asString {l : List Char} : (String.ofList l).toList = l :=
String.toList_ofList
@[simp]
theorem String.asString_data {b : String} : b.data.asString = b := by
obtain ⟨l, rfl⟩ := String.exists_eq_asString b
rw [List.data_asString]
theorem List.asString_injective {l₁ l₂ : List Char} (h : l₁.asString = l₂.asString) : l₁ = l₂ := by
simpa using congrArg String.data h
theorem List.asString_inj {l₁ l₂ : List Char} : l₁.asString = l₂.asString ↔ l₁ = l₂ :=
⟨asString_injective, (· ▸ rfl)⟩
theorem String.data_injective {s₁ s₂ : String} (h : s₁.data = s₂.data) : s₁ = s₂ := by
simpa using congrArg List.asString h
theorem String.data_inj {s₁ s₂ : String} : s₁.data = s₂.data ↔ s₁ = s₂ :=
⟨data_injective, (· ▸ rfl)⟩
@[simp]
theorem String.data_append {l₁ l₂ : String} : (l₁ ++ l₂).data = l₁.data ++ l₂.data := by
apply List.asString_injective
theorem String.ofList_toList {s : String} : String.ofList s.toList = s := by
obtain ⟨l, rfl⟩ := s.exists_eq_ofList
simp
@[simp]
theorem String.utf8encode_data {b : String} : b.data.utf8Encode = b.bytes := by
have := congrArg String.bytes (String.asString_data (b := b))
rwa [← List.bytes_asString]
@[deprecated String.ofList_toList (since := "2025-10-30")]
theorem String.asString_data {b : String} : String.ofList b.toList = b :=
String.ofList_toList
theorem String.ofList_injective {l₁ l₂ : List Char} (h : String.ofList l₁ = String.ofList l₂) : l₁ = l₂ := by
simpa using congrArg String.toList h
@[deprecated String.ofList_injective (since := "2025-10-30")]
theorem List.asString_injective {l₁ l₂ : List Char} (h : String.ofList l₁ = String.ofList l₂) : l₁ = l₂ :=
String.ofList_injective h
theorem String.ofList_inj {l₁ l₂ : List Char} : String.ofList l₁ = String.ofList l₂ ↔ l₁ = l₂ :=
⟨ofList_injective, (· ▸ rfl)⟩
@[deprecated String.ofList_inj (since := "2025-10-30")]
theorem List.asString_inj {l₁ l₂ : List Char} : String.ofList l₁ = String.ofList l₂ ↔ l₁ = l₂ :=
String.ofList_inj
theorem String.toList_injective {s₁ s₂ : String} (h : s₁.toList = s₂.toList) : s₁ = s₂ := by
simpa using congrArg String.ofList h
@[deprecated String.toList_injective (since := "2025-10-30")]
theorem String.data_injective {s₁ s₂ : String} (h : s₁.toList = s₂.toList) : s₁ = s₂ :=
String.toList_injective h
theorem String.toList_inj {s₁ s₂ : String} : s₁.toList = s₂.toList ↔ s₁ = s₂ :=
⟨toList_injective, (· ▸ rfl)⟩
@[deprecated String.toList_inj (since := "2025-10-30")]
theorem String.data_inj {s₁ s₂ : String} : s₁.toList = s₂.toList ↔ s₁ = s₂ :=
String.toList_inj
@[simp]
theorem String.data_eq_nil_iff {b : String} : b.data = [] ↔ b = "" := by
rw [← List.asString_inj, asString_data, List.asString_nil]
theorem String.toList_append {s t : String} : (s ++ t).toList = s.toList ++ t.toList := by
simp [← String.ofList_inj]
@[deprecated String.toList_append (since := "2025-10-30")]
theorem String.data_append {l₁ l₂ : String} : (l₁ ++ l₂).toList = l₁.toList ++ l₂.toList :=
String.toList_append
@[simp]
theorem List.asString_eq_empty_iff {l : List Char} : l.asString = "" ↔ l = [] := by
rw [← String.data_inj, List.data_asString, String.data_empty]
theorem String.utf8Encode_toList {b : String} : b.toList.utf8Encode = b.bytes := by
have := congrArg String.bytes (String.ofList_toList (s := b))
rwa [← String.bytes_ofList]
@[deprecated String.utf8Encode_toList (since := "2025-10-30")]
theorem String.utf8encode_data {b : String} : b.toList.utf8Encode = b.bytes :=
String.utf8Encode_toList
@[simp]
theorem List.length_asString {l : List Char} : l.asString.length = l.length := by
rw [← String.length_data, List.data_asString]
theorem String.toList_eq_nil_iff {b : String} : b.toList = [] ↔ b = "" := by
rw [← String.ofList_inj, ofList_toList, String.ofList_nil]
@[deprecated String.toList_eq_nil_iff (since := "2025-10-30")]
theorem String.data_eq_nil_iff {b : String} : b.toList = [] ↔ b = "" :=
String.toList_eq_nil_iff
@[simp]
theorem String.ofList_eq_empty_iff {l : List Char} : String.ofList l = "" ↔ l = [] := by
rw [← String.toList_inj, String.toList_ofList, String.toList_empty]
@[deprecated String.ofList_eq_empty_iff (since := "2025-10-30")]
theorem List.asString_eq_empty_iff {l : List Char} : String.ofList l = "" ↔ l = [] :=
String.ofList_eq_empty_iff
@[simp]
theorem String.length_ofList {l : List Char} : (String.ofList l).length = l.length := by
rw [← String.length_toList, String.toList_ofList]
@[deprecated String.length_ofList (since := "2025-10-30")]
theorem List.length_asString {l : List Char} : (String.ofList l).length = l.length :=
String.length_ofList
end
namespace String
instance : LT String :=
⟨fun s₁ s₂ => s₁.data < s₂.data⟩
⟨fun s₁ s₂ => s₁.toList < s₂.toList
@[extern "lean_string_dec_lt"]
instance decidableLT (s₁ s₂ : @& String) : Decidable (s₁ < s₂) :=
List.decidableLT s₁.data s₂.data
List.decidableLT s₁.toList s₂.toList
/--
Non-strict inequality on strings, typically used via the `≤` operator.
@ -441,7 +501,7 @@ theorem _root_.List.isPrefix_of_utf8Encode_append_eq_utf8Encode {l m : List Char
open List in
theorem Pos.Raw.IsValid.exists {s : String} {p : Pos.Raw} (h : p.IsValid s) :
∃ m₁ m₂ : List Char, m₁.utf8Encode = s.bytes.extract 0 p.byteIdx ∧ (m₁ ++ m₂).asString = s := by
∃ m₁ m₂ : List Char, m₁.utf8Encode = s.bytes.extract 0 p.byteIdx ∧ String.ofList (m₁ ++ m₂) = s := by
obtain ⟨l, hl⟩ := s.isValidUTF8
obtain ⟨m₁, hm₁⟩ := h.isValidUTF8_extract_zero
suffices m₁ <+: l by
@ -457,11 +517,11 @@ theorem Pos.Raw.IsValid.exists {s : String} {p : Pos.Raw} (h : p.IsValid s) :
theorem Pos.Raw.IsValid.isValidUTF8_extract_utf8ByteSize {s : String} {p : Pos.Raw} (h : p.IsValid s) :
ByteArray.IsValidUTF8 (s.bytes.extract p.byteIdx s.utf8ByteSize) := by
obtain ⟨m₁, m₂, hm, rfl⟩ := h.exists
simp only [List.asString_append, bytes_append, List.bytes_asString]
simp only [String.ofList_append, bytes_append, String.bytes_ofList]
rw [ByteArray.extract_append_eq_right]
· exact ByteArray.isValidUTF8_utf8Encode
· rw [hm]
simp only [List.asString_append, bytes_append, List.bytes_asString, ByteArray.size_extract,
simp only [String.ofList_append, bytes_append, String.bytes_ofList, ByteArray.size_extract,
ByteArray.size_append, Nat.sub_zero]
refine (Nat.min_eq_left ?_).symm
simpa [utf8ByteSize, Pos.Raw.le_iff] using h.le_rawEndPos
@ -480,44 +540,54 @@ theorem Pos.Raw.isValid_iff_exists_append {s : String} {p : Pos.Raw} :
refine isValid_iff_isValidUTF8_extract_zero.2 ⟨by simp [Pos.Raw.le_iff], ?_⟩
simpa [ByteArray.extract_append_eq_left] using s₁.isValidUTF8
theorem Pos.Raw.isValid_asString {l : List Char} {p : Pos.Raw} :
p.IsValid l.asString ↔ ∃ i, p.byteIdx = (l.take i).asString.utf8ByteSize := by
theorem Pos.Raw.isValid_ofList {l : List Char} {p : Pos.Raw} :
p.IsValid (ofList l) ↔ ∃ i, p.byteIdx = (ofList (l.take i)).utf8ByteSize := by
rw [isValid_iff_exists_append]
refine ⟨?_, ?_⟩
· rintro ⟨t₁, t₂, ht, rfl⟩
refine ⟨t₁.length, ?_⟩
have := congrArg String.data ht
simp only [List.data_asString, String.data_append] at this
have := congrArg String.toList ht
simp only [String.toList_ofList, String.toList_append] at this
simp [this]
· rintro ⟨i, hi⟩
refine ⟨(l.take i).asString, (l.drop i).asString, ?_, ?_⟩
· simp [← List.asString_append]
refine ⟨ofList (l.take i), ofList (l.drop i), ?_, ?_⟩
· simp [← String.ofList_append]
· simpa [Pos.Raw.ext_iff]
@[deprecated Pos.Raw.isValid_ofList (since := "2025-10-30")]
theorem Pos.Raw.isValid_asString {l : List Char} {p : Pos.Raw} :
p.IsValid (ofList l) ↔ ∃ i, p.byteIdx = (ofList (l.take i)).utf8ByteSize :=
Pos.Raw.isValid_ofList
theorem Pos.Raw.isValid_iff_exists_take_toList {s : String} {p : Pos.Raw} :
p.IsValid s ↔ ∃ i, p.byteIdx = (ofList (s.toList.take i)).utf8ByteSize := by
obtain ⟨l, rfl⟩ := s.exists_eq_ofList
simp [isValid_ofList]
@[deprecated Pos.Raw.isValid_iff_exists_take_toList (since := "2025-10-30")]
theorem Pos.Raw.isValid_iff_exists_take_data {s : String} {p : Pos.Raw} :
p.IsValid s ↔ ∃ i, p.byteIdx = (s.data.take i).asString.utf8ByteSize := by
obtain ⟨l, rfl⟩ := s.exists_eq_asString
simp [isValid_asString]
p.IsValid s ↔ ∃ i, p.byteIdx = (ofList (s.toList.take i)).utf8ByteSize :=
Pos.Raw.isValid_iff_exists_take_toList
@[simp]
theorem Pos.Raw.isValid_singleton {c : Char} {p : Pos.Raw} :
p.IsValid (String.singleton c) ↔ p = 0 p.byteIdx = c.utf8Size := by
rw [singleton_eq_asString, Pos.Raw.isValid_asString]
rw [singleton_eq_ofList, Pos.Raw.isValid_ofList]
refine ⟨?_, ?_⟩
· rintro ⟨i, hi'⟩
obtain ⟨rfl, hi⟩ : i = 0 1 ≤ i := by omega
· simp [Pos.Raw.ext_iff, hi']
· rw [hi', List.take_of_length_le (by simpa)]
simp [← singleton_eq_asString]
simp [← singleton_eq_ofList]
· rintro (rfl|hi)
· exact ⟨0, by simp⟩
· exact ⟨1, by simp [hi, ← singleton_eq_asString]⟩
· exact ⟨1, by simp [hi, ← singleton_eq_ofList]⟩
theorem Pos.Raw.isValid_append {s t : String} {p : Pos.Raw} :
p.IsValid (s ++ t) ↔ p.IsValid s (s.rawEndPos ≤ p ∧ (p - s).IsValid t) := by
obtain ⟨s, rfl⟩ := exists_eq_asString s
obtain ⟨t, rfl⟩ := exists_eq_asString t
rw [← List.asString_append, Pos.Raw.isValid_asString, Pos.Raw.isValid_asString, Pos.Raw.isValid_asString]
obtain ⟨s, rfl⟩ := exists_eq_ofList s
obtain ⟨t, rfl⟩ := exists_eq_ofList t
rw [← String.ofList_append, Pos.Raw.isValid_ofList, Pos.Raw.isValid_ofList, Pos.Raw.isValid_ofList]
refine ⟨?_, ?_⟩
· rintro ⟨j, hj⟩
by_cases h : j ≤ s.length
@ -531,7 +601,7 @@ theorem Pos.Raw.isValid_append {s t : String} {p : Pos.Raw} :
· refine ⟨s.length + j, ?_⟩
simp only [Pos.Raw.byteIdx_sub_string, byteIdx_rawEndPos, Pos.Raw.le_iff] at hj h
simp only [List.take_append, List.take_of_length_le (i := s.length + j) (l := s) (by omega),
Nat.add_sub_cancel_left, List.asString_append, utf8ByteSize_append]
Nat.add_sub_cancel_left, String.ofList_append, utf8ByteSize_append]
omega
theorem Pos.Raw.IsValid.append_left {t : String} {p : Pos.Raw} (h : p.IsValid t) (s : String) :
@ -573,11 +643,11 @@ theorem endPos_push {s : String} {c : Char} : (s.push c).rawEndPos = s.rawEndPos
theorem push_induction (s : String) (motive : String → Prop) (empty : motive "")
(push : ∀ b c, motive b → motive (b.push c)) : motive s := by
obtain ⟨m, rfl⟩ := s.exists_eq_asString
apply append_singleton_induction m (motive ·.asString)
obtain ⟨m, rfl⟩ := s.exists_eq_ofList
apply append_singleton_induction m (motive <| ofList ·)
· simpa
· intro l c hl
rw [List.asString_append, ← singleton_eq_asString, append_singleton]
rw [String.ofList_append, ← singleton_eq_ofList, append_singleton]
exact push _ _ hl
where
append_singleton_induction (l : List Char) (motive : List Char → Prop) (nil : motive [])
@ -1210,25 +1280,30 @@ theorem isSome_utf8DecodeChar?_zero {b : String} (hb : b ≠ "") : (b.bytes.utf8
rw [eq_comm, rawEndPos_eq_zero_iff]
exact fun h => (hb h).elim
theorem head_data {b : String} {h} :
b.data.head h = b.bytes.utf8DecodeChar 0 (isSome_utf8DecodeChar?_zero (by simpa using h)) := by
obtain ⟨l, rfl⟩ := b.exists_eq_asString
theorem head_toList {b : String} {h} :
b.toList.head h = b.bytes.utf8DecodeChar 0 (isSome_utf8DecodeChar?_zero (by simpa using h)) := by
obtain ⟨l, rfl⟩ := b.exists_eq_ofList
match l with
| [] => simp at h
| c::cs => simp
@[deprecated head_toList (since := "2025-10-30")]
theorem head_data {b : String} {h} :
b.toList.head h = b.bytes.utf8DecodeChar 0 (isSome_utf8DecodeChar?_zero (by simpa using h)) :=
head_toList
theorem get_startValidPos {b : String} (h) :
b.startValidPos.get h = b.data.head (by rwa [ne_eq, data_eq_nil_iff, ← startValidPos_eq_endValidPos_iff]) :=
head_data.symm
b.startValidPos.get h = b.toList.head (by rwa [ne_eq, toList_eq_nil_iff, ← startValidPos_eq_endValidPos_iff]) :=
head_toList.symm
theorem eq_singleton_append {s : String} (h : s.startValidPos ≠ s.endValidPos) :
∃ t, s = singleton (s.startValidPos.get h) ++ t := by
obtain ⟨m, rfl⟩ := s.exists_eq_asString
obtain ⟨m, rfl⟩ := s.exists_eq_ofList
have hm : m ≠ [] := by
rwa [ne_eq, ← List.asString_eq_empty_iff, ← startValidPos_eq_endValidPos_iff]
refine ⟨m.tail.asString, ?_⟩
rwa [ne_eq, ← String.ofList_eq_empty_iff, ← startValidPos_eq_endValidPos_iff]
refine ⟨ofList m.tail, ?_⟩
rw (occs := [1]) [← List.cons_head_tail hm]
rw [← List.singleton_append, List.asString_append, append_left_inj, ← singleton_eq_asString,
rw [← List.singleton_append, String.ofList_append, append_left_inj, ← singleton_eq_ofList,
get_startValidPos]
simp
@ -1713,11 +1788,11 @@ Examples:
-/
@[extern "lean_string_utf8_get", expose]
def Pos.Raw.get (s : @& String) (p : @& Pos.Raw) : Char :=
utf8GetAux s.data 0 p
utf8GetAux s.toList 0 p
@[extern "lean_string_utf8_get", expose, deprecated Pos.Raw.get (since := "2025-10-14")]
def get (s : @& String) (p : @& Pos.Raw) : Char :=
Pos.Raw.utf8GetAux s.data 0 p
Pos.Raw.utf8GetAux s.toList 0 p
@[expose]
def Pos.Raw.utf8GetAux? : List Char → Pos.Raw → Pos.Raw → Option Char
@ -1745,11 +1820,11 @@ Examples:
-/
@[extern "lean_string_utf8_get_opt", expose]
def Pos.Raw.get? : (@& String) → (@& Pos.Raw) → Option Char
| s, p => utf8GetAux? s.data 0 p
| s, p => utf8GetAux? s.toList 0 p
@[extern "lean_string_utf8_get_opt", expose, deprecated Pos.Raw.get? (since := "2025-10-14")]
def get? : (@& String) → (@& Pos.Raw) → Option Char
| s, p => Pos.Raw.utf8GetAux? s.data 0 p
| s, p => Pos.Raw.utf8GetAux? s.toList 0 p
/--
Returns the character at position `p` of a string. Panics if `p` is not a valid position.
@ -1768,12 +1843,12 @@ Examples
@[extern "lean_string_utf8_get_bang", expose]
def Pos.Raw.get! (s : @& String) (p : @& Pos.Raw) : Char :=
match s with
| s => Pos.Raw.utf8GetAux s.data 0 p
| s => Pos.Raw.utf8GetAux s.toList 0 p
@[extern "lean_string_utf8_get_bang", expose, deprecated Pos.Raw.get! (since := "2025-10-14")]
def get! (s : @& String) (p : @& Pos.Raw) : Char :=
match s with
| s => Pos.Raw.utf8GetAux s.data 0 p
| s => Pos.Raw.utf8GetAux s.toList 0 p
@[expose]
def Pos.Raw.utf8SetAux (c' : Char) : List Char → Pos.Raw → Pos.Raw → List Char
@ -1927,11 +2002,11 @@ Examples:
-/
@[extern "lean_string_utf8_prev", expose]
def Pos.Raw.prev : (@& String) → (@& Pos.Raw) → Pos.Raw
| s, p => utf8PrevAux s.data 0 p
| s, p => utf8PrevAux s.toList 0 p
@[extern "lean_string_utf8_prev", expose, deprecated Pos.Raw.prev (since := "2025-10-14")]
def prev : (@& String) → (@& Pos.Raw) → Pos.Raw
| s, p => Pos.Raw.utf8PrevAux s.data 0 p
| s, p => Pos.Raw.utf8PrevAux s.toList 0 p
/--
Returns the first character in `s`. If `s = ""`, returns `(default : Char)`.
@ -2011,12 +2086,12 @@ Examples:
@[extern "lean_string_utf8_get_fast", expose]
def Pos.Raw.get' (s : @& String) (p : @& Pos.Raw) (h : ¬ p.atEnd s) : Char :=
match s with
| s => Pos.Raw.utf8GetAux s.data 0 p
| s => Pos.Raw.utf8GetAux s.toList 0 p
@[extern "lean_string_utf8_get_fast", expose, deprecated Pos.Raw.get' (since := "2025-10-14")]
def get' (s : @& String) (p : @& Pos.Raw) (h : ¬ p.atEnd s) : Char :=
match s with
| s => Pos.Raw.utf8GetAux s.data 0 p
| s => Pos.Raw.utf8GetAux s.toList 0 p
/--
Returns the next position in a string after position `p`. The result is unspecified if `p` is not a
@ -2209,7 +2284,7 @@ Examples:
-/
@[extern "lean_string_utf8_extract", expose]
def Pos.Raw.extract : (@& String) → (@& Pos.Raw) → (@& Pos.Raw) → String
| s, b, e => if b.byteIdx ≥ e.byteIdx then "" else (go₁ s.data 0 b e).asString
| s, b, e => if b.byteIdx ≥ e.byteIdx then "" else ofList (go₁ s.toList 0 b e)
where
go₁ : List Char → Pos.Raw → Pos.Raw → Pos.Raw → List Char
| [], _, _, _ => []
@ -2563,36 +2638,44 @@ end String
namespace String
@[ext]
theorem ext {s₁ s₂ : String} (h : s₁.data = s₂.data) : s₁ = s₂ :=
data_injective h
theorem ext {s₁ s₂ : String} (h : s₁.toList = s₂.toList) : s₁ = s₂ :=
toList_injective h
@[simp] theorem length_empty : "".length = 0 := by simp [← length_data, data_empty]
@[simp] theorem length_empty : "".length = 0 := by simp [← length_toList, toList_empty]
theorem singleton_eq {c : Char} : String.singleton c = [c].asString := by
simp [← bytes_inj]
@[deprecated singleton_eq_ofList (since := "2025-10-30")]
theorem singleton_eq {c : Char} : String.singleton c = ofList [c] :=
singleton_eq_ofList
@[simp] theorem data_singleton (c : Char) : (String.singleton c).data = [c] := by
simp [singleton_eq]
@[simp] theorem toList_singleton (c : Char) : (String.singleton c).toList = [c] := by
simp [singleton_eq_ofList]
@[deprecated toList_singleton (since := "2025-10-30")]
theorem data_singleton (c : Char) : (String.singleton c).toList = [c] :=
toList_singleton c
@[simp]
theorem length_singleton {c : Char} : (String.singleton c).length = 1 := by
simp [← length_data]
simp [← length_toList]
@[simp] theorem data_push (c : Char) : (String.push s c).data = s.data ++ [c] := by
@[simp]
theorem toList_push (c : Char) : (String.push s c).toList = s.toList ++ [c] := by
simp [← append_singleton]
@[deprecated toList_push (since := "2025-10-30")]
theorem data_push (c : Char) : (String.push s c).toList = s.toList ++ [c] :=
toList_push c
@[simp] theorem length_push (c : Char) : (String.push s c).length = s.length + 1 := by
simp [← length_data]
simp [← length_toList]
@[simp] theorem length_pushn (c : Char) (n : Nat) : (pushn s c n).length = s.length + n := by
rw [pushn_eq_repeat_push]; induction n <;> simp [Nat.repeat, Nat.add_assoc, *]
@[simp] theorem length_append (s t : String) : (s ++ t).length = s.length + t.length := by
simp [← length_data]
simp [← length_toList]
attribute [simp] toList -- prefer `String.data` over `String.toList` in lemmas
theorem lt_iff {s t : String} : s < t ↔ s.data < t.data := .rfl
theorem lt_iff {s t : String} : s < t ↔ s.toList < t.toList := .rfl
namespace Pos.Raw
@ -2613,7 +2696,7 @@ theorem lt_next' (s : String) (p : Pos.Raw) : p < p.next s :=
@[simp] theorem Pos.Raw.prev_zero (s : String) : Pos.Raw.prev s 0 = 0 := by
rw [Pos.Raw.prev]
cases s.data <;> simp [utf8PrevAux, Pos.Raw.le_iff]
cases s.toList <;> simp [utf8PrevAux, Pos.Raw.le_iff]
@[deprecated Pos.Raw.prev_zero (since := "2025-10-10")]
theorem prev_zero (s : String) : (0 : Pos.Raw).prev s = 0 := by

View file

@ -138,7 +138,7 @@ Examples:
def String.ofList (data : List Char) : String :=
⟨List.utf8Encode data,.intro data rfl⟩
@[extern "lean_string_mk", expose]
@[extern "lean_string_mk", expose, deprecated String.ofList (since := "2025-10-30")]
def String.mk (data : List Char) : String :=
⟨List.utf8Encode data,.intro data rfl⟩
@ -150,9 +150,9 @@ Examples:
* `[].asString = ""`
* `['a', 'a', 'a'].asString = "aaa"`
-/
@[expose, inline]
@[expose, inline, deprecated String.ofList (since := "2025-10-30")]
def List.asString (s : List Char) : String :=
String.mk s
String.ofList s
namespace Substring.Internal

View file

@ -108,14 +108,23 @@ theorem String.bytes_inj {s t : String} : s.bytes = t.bytes ↔ s = t := by
subst h
rfl
@[simp] theorem List.bytes_asString {l : List Char} : l.asString.bytes = l.utf8Encode := by
simp [List.asString, String.mk]
@[simp] theorem String.bytes_ofList {l : List Char} : (String.ofList l).bytes = l.utf8Encode := by
simp [String.ofList]
theorem String.exists_eq_asString (s : String) :
∃ l : List Char, s = l.asString := by
@[deprecated String.bytes_ofList (since := "2025-10-30")]
theorem List.bytes_asString {l : List Char} : (String.ofList l).bytes = l.utf8Encode :=
String.bytes_ofList
theorem String.exists_eq_ofList (s : String) :
∃ l : List Char, s = String.ofList l := by
rcases s with ⟨_, ⟨l, rfl⟩⟩
refine ⟨l, by simp [← String.bytes_inj]⟩
@[deprecated String.exists_eq_ofList (since := "2025-10-30")]
theorem String.exists_eq_asString (s : String) :
∃ l : List Char, s = String.ofList l :=
s.exists_eq_ofList
@[simp]
theorem String.utf8ByteSize_empty : "".utf8ByteSize = 0 := (rfl)
@ -158,9 +167,13 @@ theorem utf8ByteSize_ofByteArray {b : ByteArray} {h} :
theorem bytes_singleton {c : Char} : (String.singleton c).bytes = [c].utf8Encode := by
simp [singleton]
theorem singleton_eq_asString {c : Char} : String.singleton c = [c].asString := by
theorem singleton_eq_ofList {c : Char} : String.singleton c = String.ofList [c] := by
simp [← String.bytes_inj]
@[deprecated singleton_eq_ofList (since := "2025-10-30")]
theorem singleton_eq_asString {c : Char} : String.singleton c = String.ofList [c] :=
singleton_eq_ofList
@[simp]
theorem append_singleton {s : String} {c : Char} : s ++ singleton c = s.push c := by
simp [← bytes_inj]
@ -586,9 +599,6 @@ def Slice.Pos.byte {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : UInt8 :=
@[simp] theorem default_eq : default = "" := rfl
@[simp]
theorem mk_eq_asString (s : List Char) : String.mk s = List.asString s := rfl
theorem push_eq_append (c : Char) : String.push s c = s ++ singleton c := by
simp

View file

@ -19,10 +19,12 @@ open Std
namespace String
protected theorem data_eq_of_eq {a b : String} (h : a = b) : a.data = b.data :=
@[deprecated toList_inj (since := "2025-10-30")]
protected theorem data_eq_of_eq {a b : String} (h : a = b) : a.toList = b.toList :=
h ▸ rfl
protected theorem ne_of_data_ne {a b : String} (h : a.data ≠ b.data) : a ≠ b :=
fun h' => absurd (String.data_eq_of_eq h') h
@[deprecated toList_inj (since := "2025-10-30")]
protected theorem ne_of_data_ne {a b : String} (h : a.toList ≠ b.toList) : a ≠ b := by
simpa [← toList_inj]
@[simp] protected theorem not_le {a b : String} : ¬ a ≤ b ↔ b < a := Decidable.not_not
@[simp] protected theorem not_lt {a b : String} : ¬ a < b ↔ b ≤ a := Iff.rfl
@ -34,7 +36,7 @@ attribute [local instance] Char.notLTTrans Char.notLTAntisymm Char.notLTTotal
protected theorem le_trans {a b c : String} : a ≤ b → b ≤ c → a ≤ c := List.le_trans
protected theorem lt_trans {a b c : String} : a < b → b < c → a < c := List.lt_trans
protected theorem le_total (a b : String) : a ≤ b b ≤ a := List.le_total _ _
protected theorem le_antisymm {a b : String} : a ≤ b → b ≤ a → a = b := fun h₁ h₂ => String.ext (List.le_antisymm (as := a.data) (bs := b.data) h₁ h₂)
protected theorem le_antisymm {a b : String} : a ≤ b → b ≤ a → a = b := fun h₁ h₂ => String.ext (List.le_antisymm (as := a.toList) (bs := b.toList) h₁ h₂)
protected theorem lt_asymm {a b : String} (h : a < b) : ¬ b < a := List.lt_asymm h
protected theorem ne_of_lt {a b : String} (h : a < b) : a ≠ b := by
have := String.lt_irrefl a

View file

@ -160,11 +160,11 @@ Examples:
-/
@[extern "lean_string_utf8_set", expose]
def Pos.Raw.set : String → (@& Pos.Raw) → Char → String
| s, i, c => (Pos.Raw.utf8SetAux c s.data 0 i).asString
| s, i, c => ofList (Pos.Raw.utf8SetAux c s.toList 0 i)
@[extern "lean_string_utf8_set", expose, deprecated Pos.Raw.set (since := "2025-10-14")]
def set : String → (@& Pos.Raw) → Char → String
| s, i, c => (Pos.Raw.utf8SetAux c s.data 0 i).asString
| s, i, c => ofList (Pos.Raw.utf8SetAux c s.toList 0 i)
/--
Replaces the character at position `p` in the string `s` with the result of applying `f` to that

View file

@ -621,7 +621,7 @@ mutual
asStringFn (atomicFn (noSpaceBefore >> repFn count (satisfyFn (· == char) s!"'{tok count}'"))))
where
tok (count : Nat) : String := (List.replicate count char).asString
tok (count : Nat) : String := String.ofList (List.replicate count char)
opener (ctxt : InlineCtxt) : ParserFn :=
match getter ctxt with
| none => many1Fn (satisfyFn (· == char) s!"any number of {char}s")
@ -665,7 +665,7 @@ mutual
where
opener : ParserFn := asStringFn (many1Fn (satisfyFn (· == '`') s!"any number of backticks"))
closer (count : Nat) : ParserFn :=
asStringFn (atomicFn (repFn count (satisfyFn' (· == '`') s!"expected '{String.mk (.replicate count '`')}' to close inline code"))) >>
asStringFn (atomicFn (repFn count (satisfyFn' (· == '`') s!"expected '{String.ofList (.replicate count '`')}' to close inline code"))) >>
notFollowedByFn (satisfyFn (· == '`') "`") "backtick"
takeBackticksFn : Nat → ParserFn
| 0 => satisfyFn (fun _ => false)
@ -1081,7 +1081,7 @@ mutual
(closeFence l fenceWidth >>
withFence 0 fun info _ c s =>
if (c.fileMap.toPosition info.getPos?.get!).column != col then
s.mkErrorAt s!"closing '{String.mk <| List.replicate fenceWidth ':'}' from directive on line {l} at column {col}, but it's at column {(c.fileMap.toPosition info.getPos?.get!).column}" info.getPos?.get!
s.mkErrorAt s!"closing '{String.ofList <| List.replicate fenceWidth ':'}' from directive on line {l} at column {col}, but it's at column {(c.fileMap.toPosition info.getPos?.get!).column}" info.getPos?.get!
else
s))
@ -1114,7 +1114,7 @@ mutual
else skipFn
closeFence (line width : Nat) :=
let str := String.mk (.replicate width ':')
let str := String.ofList (.replicate width ':')
bolThen (description := s!"closing '{str}' for directive from line {line}")
(eatSpaces >>
asStringFn (strFn str) >> notFollowedByFn (chFn ':') "':'" >>

View file

@ -108,7 +108,7 @@ def mkMissingFieldsHint (fields : Array (Name × Option Expr)) (stx : Syntax) :
match interveningLineEndPos? with
| none => (.line, .nil)
| some interveningLineEndPos =>
(String.mk (List.replicate (indent - col interveningLineEndPos) ' '), .nil)
(String.ofList (List.replicate (indent - col interveningLineEndPos) ' '), .nil)
let suggestionText := preWs ++ suggestionText ++ postWs
let insPos := view.lastFieldTailPos?.getD <| interveningLineEndPos?.getD view.leaderTailPos
let width := Tactic.TryThis.format.inputWidth.get (← getOptions)

View file

@ -86,7 +86,7 @@ where
stringContents : Parser String := attempt do
let escaped := pchar '\\' *> pchar '"'
let cs ← many (notFollowedBy (pchar '"') *> (escaped <|> any))
return String.mk cs.toList
return String.ofList cs.toList
/--
Parses all input up to the next whitespace. If `nonempty` is `true`, fails if there is no input

View file

@ -196,8 +196,8 @@ such as `b̵a̵c̲h̲e̲e̲rs̲`.
-/
private def mkDiffString (ds : Array (Diff.Action × String)) : String :=
let rangeStrs := ds.map fun
| (.insert, s) => String.mk (s.data.flatMap ([·, '\u0332'])) -- U+0332 Combining Low Line
| (.delete, s) => String.mk (s.data.flatMap ([·, '\u0335'])) -- U+0335 Combining Short Stroke Overlay
| (.insert, s) => String.ofList (s.toList.flatMap ([·, '\u0332'])) -- U+0332 Combining Low Line
| (.delete, s) => String.ofList (s.toList.flatMap ([·, '\u0335'])) -- U+0335 Combining Short Stroke Overlay
| (.skip , s) => s
rangeStrs.foldl (· ++ ·) ""
@ -277,7 +277,7 @@ partial def readableDiff (s s' : String) (granularity : DiffGranularity := .auto
-- front and back, or at a single interior point. This will always be fairly readable (and
-- splitting by a larger unit would likely only be worse)
if charArrDiff.size ≤ 3 || approxEditDistance ≤ maxCharDiffDistance then
charArrDiff.map fun (act, cs) => (act, String.mk cs.toList)
charArrDiff.map fun (act, cs) => (act, String.ofList cs.toList)
else if approxEditDistance ≤ maxWordDiffDistance then
wordDiff
else
@ -375,14 +375,14 @@ where
/-- Given a `Char` diff, produces an equivalent `String` diff, joining actions of the same kind. -/
joinCharDiff (d : Array (Diff.Action × Char)) :=
joinEdits d |>.map fun (act, cs) => (act, String.mk cs.toList)
joinEdits d |>.map fun (act, cs) => (act, String.ofList cs.toList)
maxDiff :=
#[(.delete, s), (.insert, s')]
mkWhitespaceDiff (oldWs newWs : String) :=
if !oldWs.contains '\n' then
Diff.diff oldWs.data.toArray newWs.data.toArray |> joinCharDiff
Diff.diff oldWs.toList.toArray newWs.toList.toArray |> joinCharDiff
else
#[(.skip, newWs)]

View file

@ -1894,7 +1894,7 @@ def anchorPrefixToString (numDigits : Nat) (anchorPrefix : UInt64) : String :=
let n := cs.length
let zs := List.replicate (numDigits - n) '0'
let cs := zs ++ cs
cs.asString
String.ofList cs
def anchorToString (numDigits : Nat) (anchor : UInt64) : String :=
anchorPrefixToString numDigits (anchor >>> (64 - 4*numDigits.toUInt64))

View file

@ -31,8 +31,8 @@ private partial def reduceListChar (e : Expr) (s : String) : SimpM DStep := do
else
return .continue
builtin_dsimproc [simp, seval] reduceMk (String.mk _) := fun e => do
unless e.isAppOfArity ``String.mk 1 do return .continue
builtin_dsimproc [simp, seval] reduceOfList (String.ofList _) := fun e => do
unless e.isAppOfArity ``String.ofList 1 do return .continue
reduceListChar e.appArg! ""
@[inline] def reduceBinPred (declName : Name) (arity : Nat) (op : String → String → Bool) (e : Expr) : SimpM Step := do

View file

@ -139,7 +139,7 @@ public def ofHex? (s : String) : Option Hash :=
/-- Returns the hash as 16-digit lowercase hex string. -/
public def hex (self : Hash) : String :=
lpad (Nat.toDigits 16 self.val.toNat).asString '0' 16
lpad (String.ofList <| Nat.toDigits 16 self.val.toNat) '0' 16
public def ofDecimal? (s : String) : Option Hash :=
(inline s.toNat?).map ofNat

View file

@ -65,7 +65,7 @@ public def ppString (s : String) : String :=
| '\\' => s ++ "\\\\"
| _ =>
if c.val < 0x20 || c.val == 0x7F then
s ++ "\\u" ++ lpad (String.mk <| Nat.toDigits 16 c.val.toNat) '0' 4
s ++ "\\u" ++ lpad (String.ofList <| Nat.toDigits 16 c.val.toNat) '0' 4
else
s.push c
s.push '\"'