diff --git a/src/Init/Data/String.lean b/src/Init/Data/String.lean index 241ff1f743..a677bfa848 100644 --- a/src/Init/Data/String.lean +++ b/src/Init/Data/String.lean @@ -22,3 +22,5 @@ public import Init.Data.String.Substring public import Init.Data.String.TakeDrop public import Init.Data.String.Modify public import Init.Data.String.Termination +public import Init.Data.String.ToSlice +public import Init.Data.String.Search diff --git a/src/Init/Data/String/Modify.lean b/src/Init/Data/String/Modify.lean index 26dac98ad4..66eb8fe73f 100644 --- a/src/Init/Data/String/Modify.lean +++ b/src/Init/Data/String/Modify.lean @@ -208,32 +208,6 @@ Examples: @[inline] def map (f : Char → Char) (s : String) : String := mapAux f s s.startValidPos -/-- -In the string `s`, replaces all occurrences of `pattern` with `replacement`. - -Examples: -* `"red green blue".replace "e" "" = "rd grn blu"` -* `"red green blue".replace "ee" "E" = "red grEn blue"` -* `"red green blue".replace "e" "E" = "rEd grEEn bluE"` --/ -def replace (s pattern replacement : String) : String := - if h : pattern.rawEndPos.1 = 0 then s - else - have hPatt := Nat.zero_lt_of_ne_zero h - let rec loop (acc : String) (accStop pos : String.Pos.Raw) := - if h : pos.byteIdx + pattern.rawEndPos.byteIdx > s.rawEndPos.byteIdx then - acc ++ accStop.extract s s.rawEndPos - else - have := Nat.lt_of_lt_of_le (Nat.add_lt_add_left hPatt _) (Nat.ge_of_not_lt h) - if Pos.Raw.substrEq s pos pattern 0 pattern.rawEndPos.byteIdx then - have := Nat.sub_lt_sub_left this (Nat.add_lt_add_left hPatt _) - loop (acc ++ accStop.extract s pos ++ replacement) (pos + pattern) (pos + pattern) - else - have := Nat.sub_lt_sub_left this (Pos.Raw.lt_next s pos) - loop acc accStop (pos.next s) - termination_by s.rawEndPos.1 - pos.1 - loop "" 0 0 - /-- Replaces each character in `s` with the result of applying `Char.toUpper` to it. diff --git a/src/Init/Data/String/Search.lean b/src/Init/Data/String/Search.lean new file mode 100644 index 0000000000..eee377fa22 --- /dev/null +++ b/src/Init/Data/String/Search.lean @@ -0,0 +1,56 @@ +/- +Copyright (c) 2025 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Markus Himmel +-/ +module + +prelude +public import Init.Data.String.Slice + +set_option doc.verso true + +/-! +# String operations based on slice operations + +This file contains {name}`String` operations that are implemented by passing to +{name}`String.Slice`. +-/ + +public section + +namespace String + +section +open String.Slice Pattern + +variable {ρ : Type} {σ : Slice → Type} +variable [∀ s, Std.Iterators.Iterator (σ s) Id (SearchStep s)] +variable [∀ s, Std.Iterators.Finite (σ s) Id] +variable [∀ s, Std.Iterators.IteratorLoop (σ s) Id Id] + + +/-- +Constructs a new string obtained by replacing all occurrences of {name}`pattern` with +{name}`replacement` in {name}`s`. + +This function is generic over all currently supported patterns. The replacement may be a +{name}`String` or a {name}`String.Slice`. + +Examples: +* {lean}`"red green blue".replace 'e' "" = "rd grn blu"` +* {lean}`"red green blue".replace (fun c => c == 'u' || c == 'e') "" = "rd grn bl"` +* {lean}`"red green blue".replace "e" "" = "rd grn blu"` +* {lean}`"red green blue".replace "ee" "E" = "red grEn blue"` +* {lean}`"red green blue".replace "e" "E" = "rEd grEEn bluE"` +* {lean}`"aaaaa".replace "aa" "b" = "bba"` +* {lean}`"abc".replace "" "k" = "kakbkck"` +-/ +@[inline] +def replace [ToForwardSearcher ρ σ] [ToSlice α] (s : String) (pattern : ρ) + (replacement : α) : String := + s.toSlice.replace pattern replacement + +end + +end String diff --git a/src/Init/Data/String/Slice.lean b/src/Init/Data/String/Slice.lean index f227f77c84..8fc9111d2f 100644 --- a/src/Init/Data/String/Slice.lean +++ b/src/Init/Data/String/Slice.lean @@ -9,6 +9,7 @@ prelude public import Init.Data.String.Pattern public import Init.Data.Ord.Basic public import Init.Data.Iterators.Combinators.FilterMap +public import Init.Data.String.ToSlice set_option doc.verso true @@ -36,6 +37,12 @@ public section namespace String.Slice +instance : HAppend String String.Slice String where + -- This implementation performs an unnecessary copy which could be avoided by providing a custom + -- C++ implementation for this instance. Note: if `String` had no custom runtime representation + -- at all, then this would be very easy to get right from Lean using `ByteArray.copySlice`. + hAppend s t := s ++ t.copy + open Pattern /-- @@ -349,6 +356,28 @@ Examples: def dropPrefix [ForwardPattern ρ] (s : Slice) (pat : ρ) : Slice := dropPrefix? s pat |>.getD s +/-- +Constructs a new string obtained by replacing all occurrences of {name}`pattern` with +{name}`replacement` in {name}`s`. + +This function is generic over all currently supported patterns. The replacement may be a +{name}`String` or a {name}`String.Slice`. + +Examples: +* {lean}`"red green blue".toSlice.replace 'e' "" = "rd grn blu"` +* {lean}`"red green blue".toSlice.replace (fun c => c == 'u' || c == 'e') "" = "rd grn bl"` +* {lean}`"red green blue".toSlice.replace "e" "" = "rd grn blu"` +* {lean}`"red green blue".toSlice.replace "ee" "E" = "red grEn blue"` +* {lean}`"red green blue".toSlice.replace "e" "E" = "rEd grEEn bluE"` +* {lean}`"aaaaa".toSlice.replace "aa" "b" = "bba"` +* {lean}`"abc".toSlice.replace "" "k" = "kakbkck"` +-/ +def replace [ToForwardSearcher ρ σ] [ToSlice α] (s : Slice) (pattern : ρ) (replacement : α) : + String := + (ToForwardSearcher.toSearcher s pattern).fold (init := "") (fun + | sofar, .matched .. => sofar ++ ToSlice.toSlice replacement + | sofar, .rejected start stop => sofar ++ s.replaceStartEnd! start stop) + /-- Removes the specified number of characters (Unicode code points) from the start of the slice. diff --git a/src/Init/Data/String/ToSlice.lean b/src/Init/Data/String/ToSlice.lean new file mode 100644 index 0000000000..eb8e52db88 --- /dev/null +++ b/src/Init/Data/String/ToSlice.lean @@ -0,0 +1,37 @@ +/- +Copyright (c) 2025 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Markus Himmel +-/ +module + +prelude +public import Init.Data.String.Defs + +/-! +# The `ToSlice` typeclass +-/ + +public section + +namespace String + +/-- +Typeclass for types that have a conversion function to `String.Slice`. This typeclass is used to +make some `String`/`String.Slice` functions polymorphic. As such, for now it is not intended that +there instances of this beyond `ToSlice String` and `ToSlice String.Slice`. + +To convert arbitrary data into a string representation, see `ToString` and `Repr`. +-/ +class ToSlice (α : Type u) where + toSlice : α → String.Slice + +@[inline] +instance : ToSlice String.Slice where + toSlice := id + +@[inline] +instance : ToSlice String where + toSlice := toSlice + +end String diff --git a/src/Lean/Compiler/FFI.lean b/src/Lean/Compiler/FFI.lean index c56612b4ce..ab4662a922 100644 --- a/src/Lean/Compiler/FFI.lean +++ b/src/Lean/Compiler/FFI.lean @@ -7,7 +7,7 @@ module prelude public import Init.System.FilePath -import Init.Data.String.Modify +import Init.Data.String.Search public section diff --git a/src/Lean/Data/Lsp/Utf16.lean b/src/Lean/Data/Lsp/Utf16.lean index 8d04c5c62f..4cdadab483 100644 --- a/src/Lean/Data/Lsp/Utf16.lean +++ b/src/Lean/Data/Lsp/Utf16.lean @@ -7,7 +7,6 @@ Authors: Marc Huisinga, Wojciech Nawrocki module prelude -public import Init.Data.String public import Lean.Data.Lsp.BasicAux public import Lean.DeclarationRange diff --git a/src/Lean/DocString/Markdown.lean b/src/Lean/DocString/Markdown.lean index 649b159549..1d32f92579 100644 --- a/src/Lean/DocString/Markdown.lean +++ b/src/Lean/DocString/Markdown.lean @@ -12,7 +12,7 @@ import Init.Data.Ord public import Lean.DocString.Types public import Init.Data.String.TakeDrop import Init.Data.String.Iterator -public import Init.Data.String.Modify +public import Init.Data.String.Search set_option linter.missingDocs true diff --git a/tests/lean/run/string_replace.lean b/tests/lean/run/string_replace.lean new file mode 100644 index 0000000000..9a90ac0b6e --- /dev/null +++ b/tests/lean/run/string_replace.lean @@ -0,0 +1,21 @@ +module + +def isVowel (c : Char) : Bool := + c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u' + +#guard "education".replace isVowel "☃!" = "☃!d☃!c☃!t☃!☃!n" +#guard "red green blue".replace "e" "" = "rd grn blu" +#guard "red green blue".replace 'e' "" = "rd grn blu" +#guard "red green blue".replace "ee" "E" = "red grEn blue" +#guard "red green blue".replace "e" "E" = "rEd grEEn bluE" +#guard "abc".replace "" "k" = "kakbkck" +#guard "ℕ".replace "" "z" = "zℕz" +#guard "𝔸".replace "" "z" = "z𝔸z" +#guard "v̂".replace "" "z" = "zvẑz" +#guard "aaaaa".replace "aa" "b" = "bba" +#guard "v̂fℚo".replace ("ℕfℚoℤ".toSlice.drop 1 |>.dropEnd 1) ("☃🔭🌌".toSlice.dropEnd 1 |>.drop 1) = "v̂🔭" +#guard ("abcde".toSlice.drop 1).replace (· == 'c') "C" = "bCde" +#guard (("ac bc cc cd".toSlice.split " ").map (·.replace 'c' "C") |>.toList) = ["aC", "bC", "CC", "Cd"] +#guard "red green blue".replace (fun c => c == 'u' || c == 'e') "" = "rd grn bl" +#guard "aab".replace "ab" "X" = "aX" +#guard " ℚℚ\n ".replace "ℚ\n" "\n" = " ℚ\n " diff --git a/tests/lean/run/string_slice.lean b/tests/lean/run/string_slice.lean index c5a9414f84..45272340cd 100644 --- a/tests/lean/run/string_slice.lean +++ b/tests/lean/run/string_slice.lean @@ -23,12 +23,14 @@ Tests for `String.Slice` functions #guard ("coffee tea water".toSlice.split ' ').toList == ["coffee".toSlice, "tea".toSlice, "water".toSlice] #guard ("coffee tea water".toSlice.split " tea ").toList == ["coffee".toSlice, "water".toSlice] #guard ("baaab".toSlice.split "aa").toList == ["b".toSlice, "ab".toSlice] +#guard ("aababaaba".toSlice.split "ab").toList == ["a".toSlice, "".toSlice, "a".toSlice, "a".toSlice] #guard ("coffee tea water".toSlice.splitInclusive Char.isWhitespace).toList == ["coffee ".toSlice, "tea ".toSlice, "water".toSlice] #guard ("coffee tea water".toSlice.splitInclusive ' ').toList == ["coffee ".toSlice, "tea ".toSlice, "water".toSlice] #guard ("coffee tea water".toSlice.splitInclusive " tea ").toList == ["coffee tea ".toSlice, "water".toSlice] #guard ("a".toSlice.splitInclusive (fun (_ : Char) => true)).toList == ["a".toSlice] #guard ("baaab".toSlice.splitInclusive "aa").toList == ["baa".toSlice, "ab".toSlice] +#guard ("aababaaba".toSlice.splitInclusive "ab").toList == ["aab".toSlice, "ab".toSlice, "aab".toSlice, "a".toSlice] #guard "red green blue".toSlice.drop 4 == "green blue".toSlice #guard "red green blue".toSlice.drop 10 == "blue".toSlice