From 59573646c227d940962c08a1e77ce51177a024ea Mon Sep 17 00:00:00 2001 From: Markus Himmel Date: Thu, 23 Oct 2025 15:57:23 +0200 Subject: [PATCH] chore: more minor `String` improvements (#10930) This PR moves some more material out of `Init.Data.String.Basic` and fixes the incorrect name `String.Pos.Raw.IsValidForSlice.le_utf8ByteSize`. --- src/Init/Data/String/Basic.lean | 81 ++--------------------- src/Init/Data/String/Defs.lean | 82 +++++++++++++++++++++++- src/Init/Data/String/Pattern/Char.lean | 2 +- src/Init/Data/String/Pattern/Pred.lean | 2 +- src/Init/Data/String/Pattern/String.lean | 2 +- src/Init/Data/String/Slice.lean | 2 +- 6 files changed, 91 insertions(+), 80 deletions(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index d9b49aa640..868250e8de 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -893,7 +893,7 @@ theorem Slice.Pos.offset_str {s : Slice} {pos : s.Pos} : @[simp] theorem Slice.Pos.offset_str_le_offset_endExclusive {s : Slice} {pos : s.Pos} : pos.str.offset ≤ s.endExclusive.offset := by - have := pos.isValidForSlice.le_utf8ByteSize + have := pos.isValidForSlice.le_rawEndPos have := s.startInclusive_le_endExclusive simp only [Pos.Raw.le_iff, byteIdx_rawEndPos, utf8ByteSize_eq, offset_str, Pos.Raw.byteIdx_offsetBy, ValidPos.le_iff] at * @@ -1012,7 +1012,7 @@ theorem Slice.utf8ByteSize_replaceStartEnd {s : Slice} {newStart newEnd : s.Pos} theorem Pos.Raw.isValidForSlice_replaceStart {s : Slice} {p : s.Pos} {off : Pos.Raw} : off.IsValidForSlice (s.replaceStart p) ↔ (off.offsetBy p.offset).IsValidForSlice s := by refine ⟨fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩, fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩⟩ - · have := p.isValidForSlice.le_utf8ByteSize + · have := p.isValidForSlice.le_rawEndPos simp_all [le_iff] omega · simpa [Pos.Raw.offsetBy_assoc] using h₂ @@ -1025,7 +1025,7 @@ theorem Pos.Raw.isValidForSlice_replaceEnd {s : Slice} {p : s.Pos} {off : Pos.Ra refine ⟨fun ⟨h₁, h₂⟩ => ⟨?_, ?_, ?_⟩, fun ⟨h₁, ⟨h₂, h₃⟩⟩ => ⟨?_, ?_⟩⟩ · simpa using h₁ · simp only [Slice.rawEndPos_replaceEnd] at h₁ - exact Pos.Raw.le_trans h₁ p.isValidForSlice.le_utf8ByteSize + exact Pos.Raw.le_trans h₁ p.isValidForSlice.le_rawEndPos · simpa using h₂ · simpa using h₁ · simpa using h₃ @@ -1297,7 +1297,7 @@ theorem Slice.Pos.ofReplaceStart_startPos {s : Slice} {pos : s.Pos} : @[simp] theorem Slice.Pos.ofReplaceStart_endPos {s : Slice} {pos : s.Pos} : ofReplaceStart (s.replaceStart pos).endPos = s.endPos := by - have := pos.isValidForSlice.le_utf8ByteSize + have := pos.isValidForSlice.le_rawEndPos simp_all [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff] theorem Slice.Pos.ofReplaceStart_inj {s : Slice} {p₀ : s.Pos} {pos pos' : (s.replaceStart p₀).Pos} : @@ -1388,7 +1388,7 @@ theorem Slice.Pos.lt_next {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} : @[inline, expose] def Slice.Pos.prevAux {s : Slice} (pos : s.Pos) (h : pos ≠ s.startPos) : String.Pos.Raw := go (pos.offset.byteIdx - 1) (by - have := pos.isValidForSlice.le_utf8ByteSize + have := pos.isValidForSlice.le_rawEndPos simp [Pos.Raw.le_iff, Pos.ext_iff] at ⊢ this h omega) where @@ -1603,7 +1603,7 @@ theorem Slice.Pos.prevAux_lt_self {s : Slice} {p : s.Pos} {h} : p.prevAux h < p. omega theorem Slice.Pos.prevAux_lt_rawEndPos {s : Slice} {p : s.Pos} {h} : p.prevAux h < s.rawEndPos := - Pos.Raw.lt_of_lt_of_le prevAux_lt_self p.isValidForSlice.le_utf8ByteSize + Pos.Raw.lt_of_lt_of_le prevAux_lt_self p.isValidForSlice.le_rawEndPos @[simp] theorem Slice.Pos.prev_ne_endPos {s : Slice} {p : s.Pos} {h} : p.prev h ≠ s.endPos := by @@ -2256,73 +2256,6 @@ Examples: @[inline] def splitOn (s : String) (sep : String := " ") : List String := if sep == "" then [s] else splitOnAux s sep 0 0 0 [] -/-- -Adds multiple repetitions of a character to the end of a string. - -Returns `s`, with `n` repetitions of `c` at the end. Internally, the implementation repeatedly calls -`String.push`, so the string is modified in-place if there is a unique reference to it. - -Examples: - * `"indeed".pushn '!' 2 = "indeed!!"` - * `"indeed".pushn '!' 0 = "indeed"` - * `"".pushn ' ' 4 = " "` --/ -@[inline] def pushn (s : String) (c : Char) (n : Nat) : String := - n.repeat (fun s => s.push c) s - -@[export lean_string_pushn] -def Internal.pushnImpl (s : String) (c : Char) (n : Nat) : String := - String.pushn s c n - -/-- -Checks whether a string is empty. - -Empty strings are equal to `""` and have length and end position `0`. - -Examples: - * `"".isEmpty = true` - * `"empty".isEmpty = false` - * `" ".isEmpty = false` --/ -@[inline] def isEmpty (s : String) : Bool := - s.rawEndPos == 0 - -@[export lean_string_isempty] -def Internal.isEmptyImpl (s : String) : Bool := - String.isEmpty s - -/-- -Appends all the strings in a list of strings, in order. - -Use `String.intercalate` to place a separator string between the strings in a list. - -Examples: - * `String.join ["gr", "ee", "n"] = "green"` - * `String.join ["b", "", "l", "", "ue"] = "blue"` - * `String.join [] = ""` --/ -@[inline] def join (l : List String) : String := - l.foldl (fun r s => r ++ s) "" - -/-- -Appends the strings in a list of strings, placing the separator `s` between each pair. - -Examples: - * `", ".intercalate ["red", "green", "blue"] = "red, green, blue"` - * `" and ".intercalate ["tea", "coffee"] = "tea and coffee"` - * `" | ".intercalate ["M", "", "N"] = "M | | N"` --/ -def intercalate (s : String) : List String → String - | [] => "" - | a :: as => go a s as -where go (acc : String) (s : String) : List String → String - | a :: as => go (acc ++ s ++ a) s as - | [] => acc - -@[export lean_string_intercalate] -def Internal.intercalateImpl (s : String) : List String → String := - String.intercalate s - def offsetOfPosAux (s : String) (pos : Pos.Raw) (i : Pos.Raw) (offset : Nat) : Nat := if i >= pos then offset @@ -2592,7 +2525,7 @@ theorem length_singleton {c : Char} : (String.singleton c).length = 1 := by simp [← length_data] @[simp] theorem length_pushn (c : Char) (n : Nat) : (pushn s c n).length = s.length + n := by - unfold pushn; induction n <;> simp [Nat.repeat, Nat.add_assoc, *] + rw [pushn_eq_repeat_push]; induction n <;> simp [Nat.repeat, Nat.add_assoc, *] @[simp] theorem length_append (s t : String) : (s ++ t).length = s.length + t.length := by simp [← length_data] diff --git a/src/Init/Data/String/Defs.lean b/src/Init/Data/String/Defs.lean index ab4a45afdf..51ad0c1a26 100644 --- a/src/Init/Data/String/Defs.lean +++ b/src/Init/Data/String/Defs.lean @@ -185,6 +185,76 @@ theorem rawEndPos_eq_zero_iff {b : String} : b.rawEndPos = 0 ↔ b = "" := by theorem endPos_eq_zero_iff {b : String} : b.rawEndPos = 0 ↔ b = "" := rawEndPos_eq_zero_iff +/-- +Adds multiple repetitions of a character to the end of a string. + +Returns `s`, with `n` repetitions of `c` at the end. Internally, the implementation repeatedly calls +`String.push`, so the string is modified in-place if there is a unique reference to it. + +Examples: + * `"indeed".pushn '!' 2 = "indeed!!"` + * `"indeed".pushn '!' 0 = "indeed"` + * `"".pushn ' ' 4 = " "` +-/ +@[inline] def pushn (s : String) (c : Char) (n : Nat) : String := + n.repeat (fun s => s.push c) s + +theorem pushn_eq_repeat_push {s : String} {c : Char} {n : Nat} : + s.pushn c n = n.repeat (fun s => s.push c) s := (rfl) + +@[export lean_string_pushn] +def Internal.pushnImpl (s : String) (c : Char) (n : Nat) : String := + String.pushn s c n + +/-- +Checks whether a string is empty. + +Empty strings are equal to `""` and have length and end position `0`. + +Examples: + * `"".isEmpty = true` + * `"empty".isEmpty = false` + * `" ".isEmpty = false` +-/ +@[inline] def isEmpty (s : String) : Bool := + s.utf8ByteSize == 0 + +@[export lean_string_isempty] +def Internal.isEmptyImpl (s : String) : Bool := + String.isEmpty s + +/-- +Appends all the strings in a list of strings, in order. + +Use `String.intercalate` to place a separator string between the strings in a list. + +Examples: + * `String.join ["gr", "ee", "n"] = "green"` + * `String.join ["b", "", "l", "", "ue"] = "blue"` + * `String.join [] = ""` +-/ +@[inline] def join (l : List String) : String := + l.foldl (fun r s => r ++ s) "" + +/-- +Appends the strings in a list of strings, placing the separator `s` between each pair. + +Examples: + * `", ".intercalate ["red", "green", "blue"] = "red, green, blue"` + * `" and ".intercalate ["tea", "coffee"] = "tea and coffee"` + * `" | ".intercalate ["M", "", "N"] = "M | | N"` +-/ +def intercalate (s : String) : List String → String + | [] => "" + | a :: as => go a s as +where go (acc : String) (s : String) : List String → String + | a :: as => go (acc ++ s ++ a) s as + | [] => acc + +@[export lean_string_intercalate] +def Internal.intercalateImpl (s : String) : List String → String := + String.intercalate s + /-- Predicate for validity of positions inside a `String`. @@ -219,6 +289,10 @@ structure Pos.Raw.IsValid (s : String) (off : String.Pos.Raw) : Prop where priva le_rawEndPos : off ≤ s.rawEndPos isValidUTF8_extract_zero : (s.bytes.extract 0 off.byteIdx).IsValidUTF8 +theorem Pos.Raw.IsValid.le_utf8ByteSize {s : String} {off : String.Pos.Raw} (h : off.IsValid s) : + off.byteIdx ≤ s.utf8ByteSize := by + simpa [Pos.Raw.le_iff] using h.le_rawEndPos + theorem Pos.Raw.isValid_iff_isValidUTF8_extract_zero {s : String} {p : Pos.Raw} : p.IsValid s ↔ p ≤ s.rawEndPos ∧ (s.bytes.extract 0 p.byteIdx).IsValidUTF8 := ⟨fun ⟨h₁, h₂⟩ => ⟨h₁, h₂⟩, fun ⟨h₁, h₂⟩ => ⟨h₁, h₂⟩⟩ @@ -371,9 +445,13 @@ theorem Slice.byteIdx_rawEndPos {s : Slice} : s.rawEndPos.byteIdx = s.utf8ByteSi /-- Criterion for validity of positions in string slices. -/ structure Pos.Raw.IsValidForSlice (s : Slice) (p : Pos.Raw) : Prop where - le_utf8ByteSize : p ≤ s.rawEndPos + le_rawEndPos : p ≤ s.rawEndPos isValid_offsetBy : (p.offsetBy s.startInclusive.offset).IsValid s.str +theorem Pos.Raw.IsValidForSlice.le_utf8ByteSize {s : Slice} {p : Pos.Raw} + (h : p.IsValidForSlice s) : p.byteIdx ≤ s.utf8ByteSize := by + simpa [Pos.Raw.le_iff] using h.le_rawEndPos + /-- Accesses the indicated byte in the UTF-8 encoding of a string slice. @@ -479,7 +557,7 @@ instance {s : Slice} (l r : s.Pos) : Decidable (l < r) := @[inline, expose] def Slice.Pos.byte {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : UInt8 := s.getUTF8Byte pos.offset (by - have := pos.isValidForSlice.le_utf8ByteSize + have := pos.isValidForSlice.le_rawEndPos simp_all [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff, Pos.Raw.lt_iff] omega) diff --git a/src/Init/Data/String/Pattern/Char.lean b/src/Init/Data/String/Pattern/Char.lean index a7d3b4ec4f..12a00eebfe 100644 --- a/src/Init/Data/String/Pattern/Char.lean +++ b/src/Init/Data/String/Pattern/Char.lean @@ -71,7 +71,7 @@ def finitenessRelation : Std.Iterators.FinitenessRelation (ForwardCharSearcher s obtain ⟨_, h1, h2, _⟩ := h' have h3 := Char.utf8Size_pos (it.internalState.currPos.get h1) have h4 := it.internalState.currPos.isValidForSlice.le_utf8ByteSize - simp [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff] at h1 h2 h4 + simp [Pos.ext_iff, String.Pos.Raw.ext_iff] at h1 h2 h4 omega · cases h' · cases h diff --git a/src/Init/Data/String/Pattern/Pred.lean b/src/Init/Data/String/Pattern/Pred.lean index 2ad31534d7..c9afdf568c 100644 --- a/src/Init/Data/String/Pattern/Pred.lean +++ b/src/Init/Data/String/Pattern/Pred.lean @@ -73,7 +73,7 @@ def finitenessRelation : Std.Iterators.FinitenessRelation (ForwardCharPredSearch obtain ⟨_, h1, h2, _⟩ := h' have h3 := Char.utf8Size_pos (it.internalState.currPos.get h1) have h4 := it.internalState.currPos.isValidForSlice.le_utf8ByteSize - simp [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff] at h1 h2 h4 + simp [Pos.ext_iff, String.Pos.Raw.ext_iff] at h1 h2 h4 omega · cases h' · cases h diff --git a/src/Init/Data/String/Pattern/String.lean b/src/Init/Data/String/Pattern/String.lean index ba549ffe7f..7c2e36bbbe 100644 --- a/src/Init/Data/String/Pattern/String.lean +++ b/src/Init/Data/String/Pattern/String.lean @@ -177,7 +177,7 @@ private def finitenessRelation : apply Prod.Lex.right' · simp · have haux := np.isValidForSlice.le_utf8ByteSize - simp [Slice.Pos.lt_iff, String.Pos.Raw.le_iff, String.Pos.Raw.lt_iff] at h1' haux ⊢ + simp [Slice.Pos.lt_iff, String.Pos.Raw.lt_iff] at h1' haux ⊢ omega · apply Prod.Lex.left simp [h'] diff --git a/src/Init/Data/String/Slice.lean b/src/Init/Data/String/Slice.lean index 4ae58877d2..f227f77c84 100644 --- a/src/Init/Data/String/Slice.lean +++ b/src/Init/Data/String/Slice.lean @@ -861,7 +861,7 @@ private def finitenessRelation [Pure m] : obtain ⟨h1, h2, _⟩ := h' have h3 := Char.utf8Size_pos (it.internalState.currPos.get h1) have h4 := it.internalState.currPos.isValidForSlice.le_utf8ByteSize - simp [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff] at h1 h2 h4 + simp [Pos.ext_iff, String.Pos.Raw.ext_iff] at h1 h2 h4 omega · cases h' · cases h