From 9f41f3324a95595b2126e16507f2e334299ddecd Mon Sep 17 00:00:00 2001 From: Mario Carneiro Date: Thu, 25 Sep 2025 07:08:41 +0200 Subject: [PATCH] fix: make Substring.beq reflexive (#10552) This PR ensures that `Substring.beq` is reflexive, and in particular satisfies the equivalence `ss1 == ss2 <-> ss1.toString = ss2.toString`. Closes #10511. Note: I also fixed a strange line in the `String.extract` documentation which looks like it may have been a copypasta, and added another example to show how invalid UTF8 positions work, but the doc also makes a point of saying that it is unspecified so maybe it would be better not to have the example? :shrug: --- src/Init/Data/String/Basic.lean | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 82b8be79ce..891a9702d5 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -2343,7 +2343,8 @@ Examples: * `"red green blue".extract ⟨3⟩ ⟨0⟩ = ""` * `"red green blue".extract ⟨0⟩ ⟨100⟩ = "red green blue"` * `"red green blue".extract ⟨4⟩ ⟨100⟩ = "green blue"` -* `"L∃∀N".extract ⟨2⟩ ⟨100⟩ = "green blue"` +* `"L∃∀N".extract ⟨1⟩ ⟨2⟩ = "∃∀N"` +* `"L∃∀N".extract ⟨2⟩ ⟨100⟩ = ""` -/ @[extern "lean_string_utf8_extract", expose] def extract : (@& String) → (@& Pos) → (@& Pos) → String @@ -3410,6 +3411,14 @@ def toNat? (s : Substring) : Option Nat := else none +/-- +Given a `Substring`, returns another one which has valid endpoints +and represents the same substring according to `Substring.toString`. +(Note, the substring may still be inverted, i.e. beginning greater than end.) +-/ +def repair : Substring → Substring + | ⟨s, b, e⟩ => ⟨s, if b.isValid s then b else s.endPos, if e.isValid s then e else s.endPos⟩ + /-- Checks whether two substrings represent equal strings. Usually accessed via the `==` operator. @@ -3417,6 +3426,8 @@ Two substrings do not need to have the same underlying string or the same start instead, they are equal if they contain the same sequence of characters. -/ def beq (ss1 ss2 : Substring) : Bool := + let ss1 := ss1.repair + let ss2 := ss2.repair ss1.bsize == ss2.bsize && ss1.str.substrEq ss1.startPos ss2.str ss2.startPos ss1.bsize @[export lean_substring_beq]