refactor: redefine String.replace (#10986)
This PR defines `String.Slice.replace` and redefines `String.replace` to use the `Slice` version. The new implementation is generic in the pattern, so it supports things like `"education".replace isVowel "☃!" = "☃!d☃!c☃!t☃!☃!n"`. Since it uses the `ForwardSearcher` infrastructure, `String` patterns are searched using KMP, unlike the previous implementation which had quadratic runtime. As a side effect, the behavior when replacing an empty string now matches that of most other programming languages, namely `"abc".replace "" "k" = "kakbkck"`.
This commit is contained in:
parent
d427423917
commit
167429501b
10 changed files with 149 additions and 29 deletions
|
|
@ -22,3 +22,5 @@ public import Init.Data.String.Substring
|
|||
public import Init.Data.String.TakeDrop
|
||||
public import Init.Data.String.Modify
|
||||
public import Init.Data.String.Termination
|
||||
public import Init.Data.String.ToSlice
|
||||
public import Init.Data.String.Search
|
||||
|
|
|
|||
|
|
@ -208,32 +208,6 @@ Examples:
|
|||
@[inline] def map (f : Char → Char) (s : String) : String :=
|
||||
mapAux f s s.startValidPos
|
||||
|
||||
/--
|
||||
In the string `s`, replaces all occurrences of `pattern` with `replacement`.
|
||||
|
||||
Examples:
|
||||
* `"red green blue".replace "e" "" = "rd grn blu"`
|
||||
* `"red green blue".replace "ee" "E" = "red grEn blue"`
|
||||
* `"red green blue".replace "e" "E" = "rEd grEEn bluE"`
|
||||
-/
|
||||
def replace (s pattern replacement : String) : String :=
|
||||
if h : pattern.rawEndPos.1 = 0 then s
|
||||
else
|
||||
have hPatt := Nat.zero_lt_of_ne_zero h
|
||||
let rec loop (acc : String) (accStop pos : String.Pos.Raw) :=
|
||||
if h : pos.byteIdx + pattern.rawEndPos.byteIdx > s.rawEndPos.byteIdx then
|
||||
acc ++ accStop.extract s s.rawEndPos
|
||||
else
|
||||
have := Nat.lt_of_lt_of_le (Nat.add_lt_add_left hPatt _) (Nat.ge_of_not_lt h)
|
||||
if Pos.Raw.substrEq s pos pattern 0 pattern.rawEndPos.byteIdx then
|
||||
have := Nat.sub_lt_sub_left this (Nat.add_lt_add_left hPatt _)
|
||||
loop (acc ++ accStop.extract s pos ++ replacement) (pos + pattern) (pos + pattern)
|
||||
else
|
||||
have := Nat.sub_lt_sub_left this (Pos.Raw.lt_next s pos)
|
||||
loop acc accStop (pos.next s)
|
||||
termination_by s.rawEndPos.1 - pos.1
|
||||
loop "" 0 0
|
||||
|
||||
/--
|
||||
Replaces each character in `s` with the result of applying `Char.toUpper` to it.
|
||||
|
||||
|
|
|
|||
56
src/Init/Data/String/Search.lean
Normal file
56
src/Init/Data/String/Search.lean
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
/-
|
||||
Copyright (c) 2025 Lean FRO, LLC. All rights reserved.
|
||||
Released under Apache 2.0 license as described in the file LICENSE.
|
||||
Authors: Markus Himmel
|
||||
-/
|
||||
module
|
||||
|
||||
prelude
|
||||
public import Init.Data.String.Slice
|
||||
|
||||
set_option doc.verso true
|
||||
|
||||
/-!
|
||||
# String operations based on slice operations
|
||||
|
||||
This file contains {name}`String` operations that are implemented by passing to
|
||||
{name}`String.Slice`.
|
||||
-/
|
||||
|
||||
public section
|
||||
|
||||
namespace String
|
||||
|
||||
section
|
||||
open String.Slice Pattern
|
||||
|
||||
variable {ρ : Type} {σ : Slice → Type}
|
||||
variable [∀ s, Std.Iterators.Iterator (σ s) Id (SearchStep s)]
|
||||
variable [∀ s, Std.Iterators.Finite (σ s) Id]
|
||||
variable [∀ s, Std.Iterators.IteratorLoop (σ s) Id Id]
|
||||
|
||||
|
||||
/--
|
||||
Constructs a new string obtained by replacing all occurrences of {name}`pattern` with
|
||||
{name}`replacement` in {name}`s`.
|
||||
|
||||
This function is generic over all currently supported patterns. The replacement may be a
|
||||
{name}`String` or a {name}`String.Slice`.
|
||||
|
||||
Examples:
|
||||
* {lean}`"red green blue".replace 'e' "" = "rd grn blu"`
|
||||
* {lean}`"red green blue".replace (fun c => c == 'u' || c == 'e') "" = "rd grn bl"`
|
||||
* {lean}`"red green blue".replace "e" "" = "rd grn blu"`
|
||||
* {lean}`"red green blue".replace "ee" "E" = "red grEn blue"`
|
||||
* {lean}`"red green blue".replace "e" "E" = "rEd grEEn bluE"`
|
||||
* {lean}`"aaaaa".replace "aa" "b" = "bba"`
|
||||
* {lean}`"abc".replace "" "k" = "kakbkck"`
|
||||
-/
|
||||
@[inline]
|
||||
def replace [ToForwardSearcher ρ σ] [ToSlice α] (s : String) (pattern : ρ)
|
||||
(replacement : α) : String :=
|
||||
s.toSlice.replace pattern replacement
|
||||
|
||||
end
|
||||
|
||||
end String
|
||||
|
|
@ -9,6 +9,7 @@ prelude
|
|||
public import Init.Data.String.Pattern
|
||||
public import Init.Data.Ord.Basic
|
||||
public import Init.Data.Iterators.Combinators.FilterMap
|
||||
public import Init.Data.String.ToSlice
|
||||
|
||||
set_option doc.verso true
|
||||
|
||||
|
|
@ -36,6 +37,12 @@ public section
|
|||
|
||||
namespace String.Slice
|
||||
|
||||
instance : HAppend String String.Slice String where
|
||||
-- This implementation performs an unnecessary copy which could be avoided by providing a custom
|
||||
-- C++ implementation for this instance. Note: if `String` had no custom runtime representation
|
||||
-- at all, then this would be very easy to get right from Lean using `ByteArray.copySlice`.
|
||||
hAppend s t := s ++ t.copy
|
||||
|
||||
open Pattern
|
||||
|
||||
/--
|
||||
|
|
@ -349,6 +356,28 @@ Examples:
|
|||
def dropPrefix [ForwardPattern ρ] (s : Slice) (pat : ρ) : Slice :=
|
||||
dropPrefix? s pat |>.getD s
|
||||
|
||||
/--
|
||||
Constructs a new string obtained by replacing all occurrences of {name}`pattern` with
|
||||
{name}`replacement` in {name}`s`.
|
||||
|
||||
This function is generic over all currently supported patterns. The replacement may be a
|
||||
{name}`String` or a {name}`String.Slice`.
|
||||
|
||||
Examples:
|
||||
* {lean}`"red green blue".toSlice.replace 'e' "" = "rd grn blu"`
|
||||
* {lean}`"red green blue".toSlice.replace (fun c => c == 'u' || c == 'e') "" = "rd grn bl"`
|
||||
* {lean}`"red green blue".toSlice.replace "e" "" = "rd grn blu"`
|
||||
* {lean}`"red green blue".toSlice.replace "ee" "E" = "red grEn blue"`
|
||||
* {lean}`"red green blue".toSlice.replace "e" "E" = "rEd grEEn bluE"`
|
||||
* {lean}`"aaaaa".toSlice.replace "aa" "b" = "bba"`
|
||||
* {lean}`"abc".toSlice.replace "" "k" = "kakbkck"`
|
||||
-/
|
||||
def replace [ToForwardSearcher ρ σ] [ToSlice α] (s : Slice) (pattern : ρ) (replacement : α) :
|
||||
String :=
|
||||
(ToForwardSearcher.toSearcher s pattern).fold (init := "") (fun
|
||||
| sofar, .matched .. => sofar ++ ToSlice.toSlice replacement
|
||||
| sofar, .rejected start stop => sofar ++ s.replaceStartEnd! start stop)
|
||||
|
||||
/--
|
||||
Removes the specified number of characters (Unicode code points) from the start of the slice.
|
||||
|
||||
|
|
|
|||
37
src/Init/Data/String/ToSlice.lean
Normal file
37
src/Init/Data/String/ToSlice.lean
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
/-
|
||||
Copyright (c) 2025 Lean FRO, LLC. All rights reserved.
|
||||
Released under Apache 2.0 license as described in the file LICENSE.
|
||||
Authors: Markus Himmel
|
||||
-/
|
||||
module
|
||||
|
||||
prelude
|
||||
public import Init.Data.String.Defs
|
||||
|
||||
/-!
|
||||
# The `ToSlice` typeclass
|
||||
-/
|
||||
|
||||
public section
|
||||
|
||||
namespace String
|
||||
|
||||
/--
|
||||
Typeclass for types that have a conversion function to `String.Slice`. This typeclass is used to
|
||||
make some `String`/`String.Slice` functions polymorphic. As such, for now it is not intended that
|
||||
there instances of this beyond `ToSlice String` and `ToSlice String.Slice`.
|
||||
|
||||
To convert arbitrary data into a string representation, see `ToString` and `Repr`.
|
||||
-/
|
||||
class ToSlice (α : Type u) where
|
||||
toSlice : α → String.Slice
|
||||
|
||||
@[inline]
|
||||
instance : ToSlice String.Slice where
|
||||
toSlice := id
|
||||
|
||||
@[inline]
|
||||
instance : ToSlice String where
|
||||
toSlice := toSlice
|
||||
|
||||
end String
|
||||
|
|
@ -7,7 +7,7 @@ module
|
|||
|
||||
prelude
|
||||
public import Init.System.FilePath
|
||||
import Init.Data.String.Modify
|
||||
import Init.Data.String.Search
|
||||
|
||||
public section
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ Authors: Marc Huisinga, Wojciech Nawrocki
|
|||
module
|
||||
|
||||
prelude
|
||||
public import Init.Data.String
|
||||
public import Lean.Data.Lsp.BasicAux
|
||||
public import Lean.DeclarationRange
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import Init.Data.Ord
|
|||
public import Lean.DocString.Types
|
||||
public import Init.Data.String.TakeDrop
|
||||
import Init.Data.String.Iterator
|
||||
public import Init.Data.String.Modify
|
||||
public import Init.Data.String.Search
|
||||
|
||||
set_option linter.missingDocs true
|
||||
|
||||
|
|
|
|||
21
tests/lean/run/string_replace.lean
Normal file
21
tests/lean/run/string_replace.lean
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
module
|
||||
|
||||
def isVowel (c : Char) : Bool :=
|
||||
c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u'
|
||||
|
||||
#guard "education".replace isVowel "☃!" = "☃!d☃!c☃!t☃!☃!n"
|
||||
#guard "red green blue".replace "e" "" = "rd grn blu"
|
||||
#guard "red green blue".replace 'e' "" = "rd grn blu"
|
||||
#guard "red green blue".replace "ee" "E" = "red grEn blue"
|
||||
#guard "red green blue".replace "e" "E" = "rEd grEEn bluE"
|
||||
#guard "abc".replace "" "k" = "kakbkck"
|
||||
#guard "ℕ".replace "" "z" = "zℕz"
|
||||
#guard "𝔸".replace "" "z" = "z𝔸z"
|
||||
#guard "v̂".replace "" "z" = "zvẑz"
|
||||
#guard "aaaaa".replace "aa" "b" = "bba"
|
||||
#guard "v̂fℚo".replace ("ℕfℚoℤ".toSlice.drop 1 |>.dropEnd 1) ("☃🔭🌌".toSlice.dropEnd 1 |>.drop 1) = "v̂🔭"
|
||||
#guard ("abcde".toSlice.drop 1).replace (· == 'c') "C" = "bCde"
|
||||
#guard (("ac bc cc cd".toSlice.split " ").map (·.replace 'c' "C") |>.toList) = ["aC", "bC", "CC", "Cd"]
|
||||
#guard "red green blue".replace (fun c => c == 'u' || c == 'e') "" = "rd grn bl"
|
||||
#guard "aab".replace "ab" "X" = "aX"
|
||||
#guard " ℚℚ\n ".replace "ℚ\n" "\n" = " ℚ\n "
|
||||
|
|
@ -23,12 +23,14 @@ Tests for `String.Slice` functions
|
|||
#guard ("coffee tea water".toSlice.split ' ').toList == ["coffee".toSlice, "tea".toSlice, "water".toSlice]
|
||||
#guard ("coffee tea water".toSlice.split " tea ").toList == ["coffee".toSlice, "water".toSlice]
|
||||
#guard ("baaab".toSlice.split "aa").toList == ["b".toSlice, "ab".toSlice]
|
||||
#guard ("aababaaba".toSlice.split "ab").toList == ["a".toSlice, "".toSlice, "a".toSlice, "a".toSlice]
|
||||
|
||||
#guard ("coffee tea water".toSlice.splitInclusive Char.isWhitespace).toList == ["coffee ".toSlice, "tea ".toSlice, "water".toSlice]
|
||||
#guard ("coffee tea water".toSlice.splitInclusive ' ').toList == ["coffee ".toSlice, "tea ".toSlice, "water".toSlice]
|
||||
#guard ("coffee tea water".toSlice.splitInclusive " tea ").toList == ["coffee tea ".toSlice, "water".toSlice]
|
||||
#guard ("a".toSlice.splitInclusive (fun (_ : Char) => true)).toList == ["a".toSlice]
|
||||
#guard ("baaab".toSlice.splitInclusive "aa").toList == ["baa".toSlice, "ab".toSlice]
|
||||
#guard ("aababaaba".toSlice.splitInclusive "ab").toList == ["aab".toSlice, "ab".toSlice, "aab".toSlice, "a".toSlice]
|
||||
|
||||
#guard "red green blue".toSlice.drop 4 == "green blue".toSlice
|
||||
#guard "red green blue".toSlice.drop 10 == "blue".toSlice
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue